爬取百度热搜数据接口失效

# spider.py 更新
# 爬取百度热搜数据
def get_baidu_hot():
    url = 'http://top.baidu.com/?vit=1'
    headers = {
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36"
    }
    resp = requests.get(url, headers=headers, timeout=30)
    data = resp.content.decode('gbk')
    html = etree.HTML(data)
    html_li = html.xpath('//ul[@id="hot-list"]/li')
    contxt = []
    for li in html_li:
        content = li.xpath("a/@title")[0]  # 关键字
        # rise = li.xpath("span[last()]/text()")[0]  # icon-rose  icon-fair
        rise = li.xpath("span[@class='icon-rose' or @class='icon-fair']/text()")[0]  # 搜索指数
        print(content, rise)
        contxt.append(content + rise)
    return contxt

简笔轩客 / Cov

内容风险标识

评论 (0)

简笔轩客 / Cov .gitee-modal { width: 500px !important; }

内容风险标识

爬取百度热搜数据 接口失效

评论 (0)

搜索帮助

简笔轩客 / Cov

爬取百度热搜数据接口失效