代码拉取完成,页面将自动刷新
# spider.py 更新
# 爬取百度热搜数据
def get_baidu_hot():
url = 'http://top.baidu.com/?vit=1'
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36"
}
resp = requests.get(url, headers=headers, timeout=30)
data = resp.content.decode('gbk')
html = etree.HTML(data)
html_li = html.xpath('//ul[@id="hot-list"]/li')
contxt = []
for li in html_li:
content = li.xpath("a/@title")[0] # 关键字
# rise = li.xpath("span[last()]/text()")[0] # icon-rose icon-fair
rise = li.xpath("span[@class='icon-rose' or @class='icon-fair']/text()")[0] # 搜索指数
print(content, rise)
contxt.append(content + rise)
return contxt