代码拉取完成,页面将自动刷新
#!/usr/bin/env python
#-*- coding:utf-8 -*-
"""
把包含动态的文件下载下来
"""
import requests
import os
import sys
import time
import util
import get_moods_detail
import traceback
class Get_moods(object):
'''Get moods file with cookie'''
def __init__(self):
self.session = requests.Session()
self.headers = util.headers
self.g_tk = util.g_tk
def get_moods(self, qqnumber):
'''Use cookie and header to get moods file and save it to result folder with QQnumber name'''
referer = 'http://user.qzone.qq.com/' + qqnumber
self.headers['Referer'] = referer
# Create a folder with qq number to save it's result file
util.check_path('mood_result/' + qqnumber)
# Get the goal url, except the position argument.
url_base = util.parse_moods_url(qqnumber)
pos = 0
key = True
while key:
print("\tDealing with position:\t%d" % pos)
url = url_base + "&pos=%d" % pos
# print(url) # for debug use
res = self.session.get(url, headers = self.headers)
con = res.text
with open('mood_result/' + qqnumber + '/' + str(pos), 'w', encoding="utf-8") as f:
f.write(con)
if res.status_code != requests.codes.ok:
with open('crawler_log.log', 'a', encoding="utf-8") as log_file:
log_file.write('Request Fail! Response Code is {} , Time is {} , Please check yuor cookies'.format(str(res.status_code), time.ctime()))
print("Request Fail! Get More in log file.")
sys.exit()
if '''"msglist":null''' in con:
key = False
# Cookie expried
if '''"subcode":-4001''' in con:
with open('crawler_log.log', 'a', encoding="utf-8") as log_file:
log_file.write('Cookie Expried! Time is %s\n' % time.ctime())
print("Cookie Expried!Get More in log file.")
sys.exit()
# Cannot access...
if '''"msgnum":0''' in con:
with open('crawler_log.log', 'a', encoding="utf-8") as log_file:
log_file.write("%s Cannot access..\n" % qqnumber)
key = False
#拿到数据再进行下一步解析
else:
#解析数据并写入数据库
get_moods_detail.start_write(qqnumber, con)
pos += 20
#延时
time.sleep(20)
#below method only make for me to get the friend's mood
#which havn't download yet.
#
#def get_rest_number(self):
# exists_number = os.listdir('mood_result')
# with open('qqnumber_backup.inc') as f:
# con = f.read()
# con = eval(con)
# for item in con:
# qq = item['data']
# if qq not in exists_number:
# print("Dealing with:\t%s" % qq)
# self.get_moods(qq)
# else:
# print('Finish!')
class Get_moods_start(object):
def __init__(self):
print('Start to get all friend\'s mood file and save it to the mood_result folder')
def get_moods_start(self):
app = Get_moods()
#app.get_rest_number()
with open('qqnumber.inc', encoding="utf-8") as qnumber_file:
qnumber_string = qnumber_file.read()
qnumber_list = eval(qnumber_string)
# check if there is a mood_result folder to save the result file
# if not create it
util.check_path('mood_result')
while qnumber_list != []:
save_back_qnumber = qnumber_list[:]
item = qnumber_list.pop()
qq = item['data']
print("Dealing with:\t%s" % qq)
start_time = time.ctime()
with open('crawler_log.log', 'a', encoding="utf-8") as log_file:
log_file.write("Program run at: %s\tGetting %s data...\n" % (start_time, qq))
try:
app.get_moods(qq)
except KeyboardInterrupt:
print('User Interrupt, program will exit')
sys.exit()
except Exception as e:
# Write the rest item back to qqnumber.inc
with open('qqnumber.inc', 'w', encoding="utf-8") as qnumber_file:
qnumber_file.write(str(save_back_qnumber))
# Write the log
with open('crawler_log.log', 'a', encoding="utf-8") as log_file:
exception_time = time.ctime()
log_file.write("Exception occured: %s\n%s\n" % (exception_time, traceback.format_exc()))
else:
print("%s Finish!" % qq)
else:
print("Finish All!")
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。