diff --git a/lets_eat/install.bat b/lets_eat/install.bat new file mode 100644 index 0000000000000000000000000000000000000000..339499d786dce1c5e05130e66f0ff3ec5d145b25 --- /dev/null +++ b/lets_eat/install.bat @@ -0,0 +1,2 @@ +set PIP3_CMD=pip +%PIP3_CMD% install -r requirements.txt --trusted-host=http://pypi.douban.com/simple \ No newline at end of file diff --git a/lets_eat/lets_eat.py b/lets_eat/lets_eat.py new file mode 100644 index 0000000000000000000000000000000000000000..38bcd877f4f01eec413e132a0765964d1a6d2a0f --- /dev/null +++ b/lets_eat/lets_eat.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- +import sys +import urllib.request +from bs4 import BeautifulSoup +import random + +default_user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36' + + +def visit_page(url): + ''' + ''' + req = urllib.request.Request( + url=url, + headers={'User-Agent': default_user_agent} + ) + resp = urllib.request.urlopen(req, timeout=120) + if resp.getcode() != 200: + return None + + html = resp.read().decode() + return BeautifulSoup(html) + + +def total(seed_url): + ''' + ''' + html = visit_page(seed_url) + if not html: + return 0 + links = html.find_all('a', class_='PageLink') + if not links or len(links) <= 0: + return 0 + + link = links[-1] + page_size = 15 + return page_size * int(link.text) + + +def get_food(url, idx): + html = visit_page(url) + shop_list_container = html.find('div', id='shop-all-list') + shop_list = shop_list_container.ul.find_all('li', recursive=False) + if len(shop_list) < idx: + idx = len(shop_list) + food_item = shop_list[idx-1] + food={} + + name = food_item.find('div', class_='tit').a['title'] + if name: + food['name'] = name + + addr_info = food_item.find('div', class_='tag-addr').find_all('span') + if addr_info: + if len(addr_info) > 0: + food['tag'] = addr_info[0].text + if len(addr_info) > 2: + food['addr'] = addr_info[2].text + + price = [data for data in food_item.find('div', class_='comment').find_all('a') if data['data-click-name'] == 'shop_avgprice_click'] + if not price or len(price) <= 0: + food['price'] = '未知' + else: + food['price'] = price[0].text.replace('\n', '').replace(' ', '') + + recommend = food_item.find('div', class_='recommend').find_all('a') + if recommend: + food['recommends'] = [data.text for data in recommend] + + comment = food_item.find('span', class_='comment-list').find_all('span') + if comment: + food['comments'] = [data.contents[0].string + ': ' + data.contents[1].text + for data in comment] + + + return food + + +def random_food(seed_url, max_idx=0): + ''' + ''' + food_size = total(seed_url) + if food_size == 0: + return None + + if max_idx != 0: + food_size = max_idx if food_size > max_idx else food_size + food_idx = random.randint(1, food_size) + page = food_idx // 15 + 1 + idx = food_idx % 15 + + print('the %sth food(%s, %s)' % (food_idx, page, idx)) + page_url = seed_url + 'p' + str(page) + food = get_food(page_url, idx) + return food + + +def parse_args(args): + if len(args) == 2: + name = args[1].strip() + if name.startswith('name='): + name = name[len('name='):] + return (name, 0) + else: + parsed = {'name': None, 'max': 0} + for arg in args[1:]: + pair = arg.split('=', 2) + parsed[pair[0].strip()] = pair[1].strip() + + return (parsed['name'], int(parsed['max'])) + + + +if __name__ == '__main__': + args = sys.argv + name = None + max_idx = 0 + if len(args) > 1: + name, max_idx = parse_args(args) + print(name, max_idx) + # 美食查询 + search_url = 'https://www.dianping.com/search/keyword/7/10_' + + # 按商圈查询 + # seed_url = 'http://www.dianping.com/shenzhen/ch10/r1953' + keyword = None + if name: + keyword = name + else: + keyword = input('请输入商圈/地址/菜名等: ') + + keyword = urllib.parse.quote(keyword) + seed_url = search_url + keyword + '/' + print(seed_url) + food = random_food(seed_url, max_idx) + if food: + print('商户\t\t:%s\n类型\t\t:%s\n价格\t\t:%s\n地址\t\t:%s\n推荐菜\t\t:%s\n评价\t\t:%s\n' + % (food['name'], food['tag'], food['price'], food['addr'], ', '.join(food['recommends']), ', '.join(food['comments']))) + else: + print('未找到相关商户') \ No newline at end of file diff --git a/lets_eat/requirements.txt b/lets_eat/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..13154420d48e8bb6fa3eef5c9bd57941f37ec44f --- /dev/null +++ b/lets_eat/requirements.txt @@ -0,0 +1 @@ +bs4