Fetch the repository succeeded.
This action will force synchronization from 耿直的小爬虫/Python爬虫, which will overwrite any changes that you have made since you forked the repository, and can not be recovered!!!
Synchronous operation will process in the background and will refresh the page when finishing processing. Please be patient.
import requests
from pyquery import PyQuery as pq
from fake_useragent import UserAgent
import time
import random
import pymongo
clien=pymongo.MongoClient(host='改成自己的数据库IP')
db=clien.trave_guide
coll=db.text
ua=UserAgent()
headers={
'User-Agent':ua.random
}
def request_HTML(page):
URL='http://travel.qunar.com/travelbook/list.htm?page={}&order=hot_heat'.format(page)
sponse=requests.get(URL,headers=headers).text
return sponse
def Parsing_data(html):
doc=pq(html)
Comprehensive_content=doc('.list_item ').items()
data={}
for i in Comprehensive_content:
data = {}
#获取标题
The_title=i.find('.tit a').text()
#出发时间
Departure_time=i.find('.date').text()
#共几天
For_a_few_days=i.find('.days').text()
#一个有几张旅行照片
How_many_travel_photos=i.find('.photo_nums').text()
#途径
way=i.find('.places').text()
#人均
Per_capita=i.find('.fee').text()
#浏览人数
Number_of_visitors=i.find('.iconfont').text()
data['标题'] = The_title
data['出发时间'] = Departure_time
data['共几天'] = For_a_few_days
data['几张照片'] = How_many_travel_photos
data['途径'] = way
data['人均'] = Per_capita
data['浏览人数']=Number_of_visitors
coll.insert_one(data)
print(data)
for i in range(1,200):
Parsing_data(request_HTML(i))
time.sleep(random.randint(0,6))
print('已全部保存到数据库中 请查看...')
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。