2 Star 7 Fork 5

nico / domain-crawling

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
domain-crawling.py 4.25 KB
一键复制 编辑 原始数据 按行查看 历史
nico 提交于 2018-06-14 18:24 . bug fixed
import urllib
import itertools
import json
import sys
import time
import socket
import requests
import time
import argparse
## An API for getting domain name information
domainapi = 'https://cloud.baidu.com/api/bcd/search/status'
## The domain data5u.com order id
## I'm going to change this id to my own
orderid = 'd6745b2f3ced66d5c21621f43dc65f5d'
## You can get the API for the proxy IP
proxyapi = 'http://api.ip.data5u.com/dynamic/get.html?order=' + orderid
## Total length of domain name searched
domainlistlength = 0
"""
Split the list
"""
def chunks(l, n):
return [l[i:i + n] for i in range(0, len(l), n)]
"""
Crawling by domain length
"""
def crawling(length, suffix, path, openproxy, delayed):
# Open stroge file
strogefile = open(path + ".txt", 'wb+')
# abcdefghigklmnopqrstuvwxyz
# Gets all the domain combinations in a-z0-9
domainlist = list(itertools.product('abcdefghigklmnopqrstuvwxyz0123456789', repeat = length))
# Gets domain list length
global domainlistlength
domainlistlength = len(domainlist)
print('Prepare to retrieve domain name length:', domainlistlength)
# Block
#domainlistblocks = chunks(domainlist, 3)
index = 0
domainNames = []
for domain in domainlist:
try:
if index % 3 == 0:
pushget(domainNames, openproxy, index, strogefile)
domainNames = []
time.sleep(delayed)
label = {
"label": "".join(domain),
"tld": suffix
}
domainNames.append(label)
except:
print('An exception has occurred:', sys.exc_info()[0])
index += 1
strogefile.close()
"""
Push the request data to domain API
"""
def pushget(domainNames, openproxy, index, strogefile):
# When the agent is opened, the agent is changed every ten times
if openproxy and index % 6 == 0:
proxyip = requests.get(proxyapi)
proxyip = proxyip.text.replace('\n', '')
proxy={
"http":"http://" + proxyip,
"https":"http://" + proxyip
}
proxy = urllib.request.ProxyHandler(proxy)
opener = urllib.request.build_opener(proxy, urllib.request.HTTPHandler)
urllib.request.install_opener(opener)
print('Proxy by IP: ', proxyip)
# Get the agent no more than 10 times per second
time.sleep(0.1)
# Construct request data
requestdata = {
"domainNames":domainNames
}
requestdatajson = str(json.dumps(requestdata))
# Construction request header
headers = {
'Cache-Control':'no-cache',
'Content-Encoding':'gzip',
'Content-Length':len(requestdatajson),
'Connection':'keep-alive',
'Content-Type':'application/json;charset=UTF-8',
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.23 Mobile Safari/537.36'
}
# Initiate a request for data
request = urllib.request.Request(domainapi, data = bytes(requestdatajson, encoding='utf-8'), headers = headers)
result = urllib.request.urlopen(request)
data = result.read()
data = data.decode()
data = json.loads(data)
if data.get('status') == 200:
domaininfos = data.get('result').get('accurate')
for domaininfo in domaininfos:
if domaininfo.get('status') == 'UNREGISTERED':
strogefile.writelines([str(domaininfo).encode(), "\r\n".encode()])
strogefile.flush()
print(domaininfo, "-", index, "/", domainlistlength)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Nico domain name crawler script')
parser.add_argument('-p', '--path', help='The available domain name storage path after detection.')
parser.add_argument('-l', '--length', type = int, help ='The length of the domain you want to detect is all combinations of a-z0-9.')
parser.add_argument('-o', '--openproxy', choices = ['y', 'n'], help='Open the IP proxy mode.')
parser.add_argument('-d', '--delayed', type = int, help='The interval between each climb, Unit s')
parser.add_argument('-s', '--suffix', help='Domain suffix')
args = parser.parse_args()
if not args.length:
args.length = 4
if not args.path:
args.path = str(int(time.time() * 1000))
if not args.delayed:
args.delayed = 0.1
if not args.suffix:
args.suffix = 'com'
if not args.openproxy:
args.openproxy = False
elif args.openproxy == 'y':
args.openproxy = True
else:
args.openproxy = False
crawling(length = args.length, suffix = args.suffix, path = args.path, openproxy = args.openproxy, delayed = args.delayed)
Python
1
https://gitee.com/ainilili/domain-crawling.git
git@gitee.com:ainilili/domain-crawling.git
ainilili
domain-crawling
domain-crawling
master

搜索帮助