1 Star 2 Fork 1

equationl/QQzoneMood_crawler

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
.gitignore
README.md
cookie_file
get_full_data.py
get_moods.py
get_moods_detail.py
get_my_friends.py
get_qq_number.py
main.py
operate_table.py
util.py
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
get_moods.py 4.70 KB
一键复制 编辑 原始数据 按行查看 历史
#!/usr/bin/env python
#-*- coding:utf-8 -*-
"""
把包含动态的文件下载下来
"""
import requests
import os
import sys
import time
import util
import get_moods_detail
import traceback
class Get_moods(object):
'''Get moods file with cookie'''
def __init__(self):
self.session = requests.Session()
self.headers = util.headers
self.g_tk = util.g_tk
def get_moods(self, qqnumber):
'''Use cookie and header to get moods file and save it to result folder with QQnumber name'''
referer = 'http://user.qzone.qq.com/' + qqnumber
self.headers['Referer'] = referer
# Create a folder with qq number to save it's result file
util.check_path('mood_result/' + qqnumber)
# Get the goal url, except the position argument.
url_base = util.parse_moods_url(qqnumber)
pos = 0
key = True
while key:
print("\tDealing with position:\t%d" % pos)
url = url_base + "&pos=%d" % pos
# print(url) # for debug use
res = self.session.get(url, headers = self.headers)
con = res.text
with open('mood_result/' + qqnumber + '/' + str(pos), 'w', encoding="utf-8") as f:
f.write(con)
if res.status_code != requests.codes.ok:
with open('crawler_log.log', 'a', encoding="utf-8") as log_file:
log_file.write('Request Fail! Response Code is {} , Time is {} , Please check yuor cookies'.format(str(res.status_code), time.ctime()))
print("Request Fail! Get More in log file.")
sys.exit()
if '''"msglist":null''' in con:
key = False
# Cookie expried
if '''"subcode":-4001''' in con:
with open('crawler_log.log', 'a', encoding="utf-8") as log_file:
log_file.write('Cookie Expried! Time is %s\n' % time.ctime())
print("Cookie Expried!Get More in log file.")
sys.exit()
# Cannot access...
if '''"msgnum":0''' in con:
with open('crawler_log.log', 'a', encoding="utf-8") as log_file:
log_file.write("%s Cannot access..\n" % qqnumber)
key = False
#拿到数据再进行下一步解析
else:
#解析数据并写入数据库
get_moods_detail.start_write(qqnumber, con)
pos += 20
#延时
time.sleep(20)
#below method only make for me to get the friend's mood
#which havn't download yet.
#
#def get_rest_number(self):
# exists_number = os.listdir('mood_result')
# with open('qqnumber_backup.inc') as f:
# con = f.read()
# con = eval(con)
# for item in con:
# qq = item['data']
# if qq not in exists_number:
# print("Dealing with:\t%s" % qq)
# self.get_moods(qq)
# else:
# print('Finish!')
class Get_moods_start(object):
def __init__(self):
print('Start to get all friend\'s mood file and save it to the mood_result folder')
def get_moods_start(self):
app = Get_moods()
#app.get_rest_number()
with open('qqnumber.inc', encoding="utf-8") as qnumber_file:
qnumber_string = qnumber_file.read()
qnumber_list = eval(qnumber_string)
# check if there is a mood_result folder to save the result file
# if not create it
util.check_path('mood_result')
while qnumber_list != []:
save_back_qnumber = qnumber_list[:]
item = qnumber_list.pop()
qq = item['data']
print("Dealing with:\t%s" % qq)
start_time = time.ctime()
with open('crawler_log.log', 'a', encoding="utf-8") as log_file:
log_file.write("Program run at: %s\tGetting %s data...\n" % (start_time, qq))
try:
app.get_moods(qq)
except KeyboardInterrupt:
print('User Interrupt, program will exit')
sys.exit()
except Exception as e:
# Write the rest item back to qqnumber.inc
with open('qqnumber.inc', 'w', encoding="utf-8") as qnumber_file:
qnumber_file.write(str(save_back_qnumber))
# Write the log
with open('crawler_log.log', 'a', encoding="utf-8") as log_file:
exception_time = time.ctime()
log_file.write("Exception occured: %s\n%s\n" % (exception_time, traceback.format_exc()))
else:
print("%s Finish!" % qq)
else:
print("Finish All!")
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/equation/QQzoneMood_crawler.git
git@gitee.com:equation/QQzoneMood_crawler.git
equation
QQzoneMood_crawler
QQzoneMood_crawler
master

搜索帮助