1 Star 0 Fork 0

一朵灼灼华 / sitemap

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
getXicidaili.py 1.67 KB
一键复制 编辑 原始数据 按行查看 历史
一朵灼灼华 提交于 2019-10-25 16:00 . 新增获取更多的代理
import requests
from bs4 import BeautifulSoup
nnUrl = 'https://www.xicidaili.com/nn/{page}' # 高匿代理
ntUrl = 'https://www.xicidaili.com/nt/{page}' # 普通代理
wnUrl = 'https://www.xicidaili.com/wn/{page}' # HTTPS代理
wtUrl = 'https://www.xicidaili.com/wt/{page}' # HTTP代理
def getIps(url=nnUrl.format(page=1)):
header = {
'Connection': 'keep-alive',
# 'Cache-Control': 'max-age=0',
# 'Accept': 'text/html, */*; q=0.01',
# 'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36',
# 'DNT': '1',
# 'Accept-Encoding': 'gzip, deflate, sdch',
# 'Accept-Language': 'zh-CN,zh;q=0.8,ja;q=0.6',
}
res = requests.get(url, headers=header)
html = BeautifulSoup(res.text, 'html.parser')
table = html.find('table',attrs={'id':'ip_list'})
trs = table.find_all('tr')
Proxies = []
for tr in trs:
td = tr.find_all('td')
# print(td)
if td:
ip = td[1].text
port = td[2].text
isAnon = True if td[4].text =='高匿' else False
isHttps = True if td[5].text=='HTTPS' or td[5].text=='https' else False
lifeTime = td[8].text
vTime = td[9].text
Proxies.append({'ip':ip,'port':port,'isHttps':isHttps,'anonymous':isAnon,'lifeTime':lifeTime,'verifyTime':vTime})
return Proxies
def main():
Proxies=[]
# for i in range(1,11)[:1]:
# Proxies+=getIps(nnUrl.format(page=i))
Proxies+=getIps(nnUrl.format(page=2))
return Proxies
if __name__ == '__main__':
Proxies=main()
print(Proxies)
XML
1
https://gitee.com/heanny/sitemap.git
git@gitee.com:heanny/sitemap.git
heanny
sitemap
sitemap
master

搜索帮助