1 Star 0 Fork 2

zhouyuan24 / scraper

forked from Wekson / scraper 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
index.php 3.73 KB
一键复制 编辑 原始数据 按行查看 历史
dreeye 提交于 2016-04-25 16:34 . menu regular debug
<?php
require 'vendor/autoload.php';
use Goutte\Client;
define("APP_PATH", realpath(dirname(__FILE__)));
$app = new Yaf_Application(APP_PATH . "/conf/application.ini");
#$app->execute("zone");
#$app->execute("mall");
$app->execute("menu");
// 首页地区和链接
function zone() {
$client = new Client();
$crawler = $client->request('GET', 'http://beijing.daojia.com.cn');
$crawler->filter(".hfc a")->each(function ($node, $i) {
$zone[$i]['name'] = $node->filter('b')->count() ? $node->filter('b')->text() : '';
$zone[$i]['url'] = 'http://beijing.daojia.com.cn'.$node->attr('href');
$zone[$i]['city_id'] = 1;
$djMod = new DaojiaModel();
$djMod->addZone($zone[$i]);
echo '<pre>';print_r($zone[$i]);echo '</pre>';
});
}
// 获取各区的店铺名称
function mall() {
$djMod = new DaojiaModel();
$mallData = $djMod->getZone();
foreach($mallData as $mData)
{
$zoneId = $mData['id'];
$cityId = $mData['city_id'];
$client = new Client();
$crawler = $client->request('GET', $mData['url']);
$crawler->filter("div[class='sort_yi']")->each(function ($node, $i) use ($zoneId, $cityId, $djMod){
$mall[$i]['url'] = 'http://beijing.daojia.com.cn'.$node->filter('.sort_rest_img a')->attr('href');
// 商家logo
$mall[$i]['logo'] = $node->filter('.sort_rest_img img')->attr('src');
// 商家名称
$mall[$i]['name'] = $node->filter('.sort_left a')->count() ? $node->filter('.sort_left a')->text() : '';
// 菜色
$mall[$i]['dish'] = $node->filter('.sign span')->eq(0)->count() ? $node->filter('.sign span')->eq(0)->text() : '';
// 人均花费
$mall[$i]['cost_one'] = $node->filter('.sign span')->eq(1)->count() ? $node->filter('.sign span')->eq(1)->text() : '';
$mall[$i]['zone_id'] = $zoneId;
$mall[$i]['city_id'] = $cityId;
if($djMod->addMall($mall[$i])) {
echo '<pre>';print_r($mall);echo '</pre>';
}
});
sleep(2);
}
}
//获取店铺的菜单信息
function menu() {
$djMod = new DaojiaModel();
$mallData = $djMod->getMall();
foreach($mallData as $mData)
{
$zoneId = $mData['zone_id'];
$cityId = $mData['city_id'];
$mallId = $mData['id'];
$scrap_count = $mData['scrap_count'];
//抓过一次,滤过
if ($scrap_count > 0)
continue;
$client = new Client();
$crawler = $client->request('GET', $mData['url']);
$crawler->filter(".hottest_dishes")->filter("tr")->each(function ($node, $i) use ($zoneId, $cityId, $mallId, $djMod){
$img = $node->filter('a')->attr('onmouseover');
if (preg_match("/'http:\/\/.*?'/i", $img, $matches)) {
$menu[$i]['menu_img'] = trim($matches[0], "'");
}
else
{
$menu[$i]['menu_img'] = '';
}
$menu[$i]['menu_name'] = $node->filter('a')->count() ? $node->filter('a')->text() : '';
$menu[$i]['menu_price'] = $node->filter('.td_two')->count() ? $node->filter('.td_two')->text() : '';
$menu[$i]['remark'] = $node->filter('.td_three')->count() ? $node->filter('.td_three')->text() : '';
$menu[$i]['zone_id'] = $zoneId;
$menu[$i]['city_id'] = $cityId;
$menu[$i]['mall_id'] = $mallId;
if($djMod->addMenu($menu[$i])) {
error_log('SUCCESS ADD MENU:'.$i.'--'.$menu[$i]['menu_name'].'--'.$menu[$i]['menu_price'].'--'.$menu[$i]['mall_id']);
}
});
if (!$djMod->scrapCount($mallId))
{
exit();
}
sleep(2);
}
}
PHP
1
https://gitee.com/zhouyuan24/scraper.git
git@gitee.com:zhouyuan24/scraper.git
zhouyuan24
scraper
scraper
master

搜索帮助