1 Star 0 Fork 185

不清白 / Python爬虫

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
爬取JD手机信息.py 2.19 KB
一键复制 编辑 原始数据 按行查看 历史
import requests
from pyquery import PyQuery as pq
from fake_useragent import UserAgent
import time
import random
import pymongo
clien=pymongo.MongoClient(host=改成自己的数据库)
#数据库名字
db=clien.Mobile_phone
#集合名字
coll=db.phone
ua=UserAgent()
headers={
'User-Agent':ua.random
}
def Parsing(url):
doc=pq(url)
#
Comprehensive_information=doc('.gl-i-wrap').items()
for i in Comprehensive_information:
#提取详细信息的链接
Mobile_phone_name=i.find('.p-img a').attr('href')
#提取手机价格
The_price=i.find('.p-price').text()
#手机图片
Cell_phone_pictures=pq(i.find('.p-img').html()).find('img').attr('source-data-lazy-img')
#进入手机详细链接
try:
Splice_link='http:'+Mobile_phone_name
Enter_the_link=requests.get(url=Splice_link,headers=headers).text
#手机信息
doc=pq(Enter_the_link)
#手机分辨率
Cell_phone_resolution=doc('.detail').find('p').attr('title')
All_the_information=doc('.p-parameter-list').items()
for c in All_the_information:
information=c.find('li').text()
#test.append(information)
#print(information)
#print(Cell_phone_resolution)
data={}
data['价格']=The_price
data['手机图片']=Cell_phone_pictures
data['手机分辨率']=Cell_phone_resolution
data['全部信息']=information
coll.insert_one(data)
print(data)
except Exception as e:
pass
def The_JD(page):
URL='https://search.jd.com/Search?keyword=%E6%99%BA%E8%83%BD%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%99%BA%E8%83%BD%E6%89%8B%E6%9C%BA&page={}&s=170&click=0'.format(page)
sponse=requests.get(URL,headers=headers)
sponse.encoding='utf8'
The_Sponse=sponse.text
Parsing(The_Sponse)
for i in range(1,101):
if i % 2 == 1:
The_JD(i)
time.sleep(int(random.randint(0, 9)))
Python
1
https://gitee.com/bqb/python_reptilian.git
git@gitee.com:bqb/python_reptilian.git
bqb
python_reptilian
Python爬虫
master

搜索帮助