代码拉取完成,页面将自动刷新
import requests
from bs4 import BeautifulSoup
nnUrl = 'https://www.xicidaili.com/nn/{page}' # 高匿代理
ntUrl = 'https://www.xicidaili.com/nt/{page}' # 普通代理
wnUrl = 'https://www.xicidaili.com/wn/{page}' # HTTPS代理
wtUrl = 'https://www.xicidaili.com/wt/{page}' # HTTP代理
def getIps(url=nnUrl.format(page=1)):
header = {
'Connection': 'keep-alive',
# 'Cache-Control': 'max-age=0',
# 'Accept': 'text/html, */*; q=0.01',
# 'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36',
# 'DNT': '1',
# 'Accept-Encoding': 'gzip, deflate, sdch',
# 'Accept-Language': 'zh-CN,zh;q=0.8,ja;q=0.6',
}
res = requests.get(url, headers=header)
html = BeautifulSoup(res.text, 'html.parser')
table = html.find('table',attrs={'id':'ip_list'})
trs = table.find_all('tr')
Proxies = []
for tr in trs:
td = tr.find_all('td')
# print(td)
if td:
ip = td[1].text
port = td[2].text
isAnon = True if td[4].text =='高匿' else False
isHttps = True if td[5].text=='HTTPS' or td[5].text=='https' else False
lifeTime = td[8].text
vTime = td[9].text
Proxies.append({'ip':ip,'port':port,'isHttps':isHttps,'anonymous':isAnon,'lifeTime':lifeTime,'verifyTime':vTime})
return Proxies
def main():
Proxies=[]
# for i in range(1,11)[:1]:
# Proxies+=getIps(nnUrl.format(page=i))
Proxies+=getIps(nnUrl.format(page=2))
return Proxies
if __name__ == '__main__':
Proxies=main()
print(Proxies)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。