sitemap
/
getXicidaili.py

import requests
from bs4 import BeautifulSoup
nnUrl = 'https://www.xicidaili.com/nn/{page}'  # 高匿代理
ntUrl = 'https://www.xicidaili.com/nt/{page}'  # 普通代理
wnUrl = 'https://www.xicidaili.com/wn/{page}'  # HTTPS代理
wtUrl = 'https://www.xicidaili.com/wt/{page}'  # HTTP代理


def getIps(url=nnUrl.format(page=1)):
    header = {
        'Connection': 'keep-alive',
        # 'Cache-Control': 'max-age=0',
        # 'Accept': 'text/html, */*; q=0.01',
        # 'X-Requested-With': 'XMLHttpRequest',
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36',
        # 'DNT': '1',
        # 'Accept-Encoding': 'gzip, deflate, sdch',
        # 'Accept-Language': 'zh-CN,zh;q=0.8,ja;q=0.6',
    }
    res = requests.get(url, headers=header)
    html = BeautifulSoup(res.text, 'html.parser')
    table = html.find('table',attrs={'id':'ip_list'})
    trs = table.find_all('tr')
    Proxies = []
    for tr in trs:
        td = tr.find_all('td')
        # print(td)
        if td:
            ip = td[1].text
            port = td[2].text
            isAnon = True if td[4].text =='高匿' else False
            isHttps = True if td[5].text=='HTTPS' or td[5].text=='https' else False
            lifeTime = td[8].text
            vTime = td[9].text
            Proxies.append({'ip':ip,'port':port,'isHttps':isHttps,'anonymous':isAnon,'lifeTime':lifeTime,'verifyTime':vTime})
    return Proxies
def main():
    Proxies=[]
    # for i in range(1,11)[:1]:
    #     Proxies+=getIps(nnUrl.format(page=i))
    Proxies+=getIps(nnUrl.format(page=2))
    return Proxies

if __name__ == '__main__':
    Proxies=main()
    print(Proxies)