1 Star 0 Fork 0

sorossvp/copy herrkun Financial data collection and pdfplumber

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
download_filesFromcsv_wyk.py 2.17 KB
一键复制 编辑 原始数据 按行查看 历史
herrkun 提交于 2019-04-30 16:27 . Add files via upload
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 21 16:02:00 2019
下载所有的PDF
@author: herr_kun
"""
#!/usr/bin/python
# coding = utf-8
# __author__='Lilly'
# description:下载csv中列出的pdf年报
import csv
import os
import time
import requests
def download_pdf(path,MAX_COUNT = 5):
LIST_FILE=path
assert (os.path.exists(LIST_FILE)), 'No such list file \"' + LIST_FILE + '\"!'
DST_DIR=os.path.dirname(LIST_FILE)
assert (os.path.exists(DST_DIR)), 'No such destination directory \"' + DST_DIR + '\"!'
if DST_DIR[len(DST_DIR) - 1] != '/':
DST_DIR += '/'
# 读取待下载文件列表
with open(LIST_FILE, 'r') as csv_in:
reader = csv.reader(csv_in)
for each in enumerate(reader):
download_count = 1
download_token = False
while download_count <= MAX_COUNT:
try:
download_count += 1
r = requests.get(each[1][1])
download_token = True
break
except:
# 下载失败则报错误
print(str(each[0] + 1) + '::' + str(download_count) + ':\"' + each[1][0] + '\" failed!')
download_token = False
time.sleep(3)
if download_token:
# 下载成功则保存
with open(DST_DIR + each[1][0], 'wb') as file:
file.write(r.content)
print(str(each[0] + 1) + ': \"' + each[1][0] + '\" downloaded.')
else:
# 彻底下载失败则记录日志
with open(DST_DIR + 'error.log', 'a') as log_file:
log_file.write(
time.strftime('[%Y/%m/%d %H:%M:%S] ', time.localtime(time.time())) + 'Failed to download\"' +
each[1][0] + '\"\n')
print('...' + str(each[0] + 1) + ':\"' + each[1][0] + '\" finally failed ...')
if __name__ == '__main__':
#DST_DIR = r'D:\XML\wyk年度报告/'
LIST_FILE = r'D:\XML\wyk年度报告/part_data_wyk.csv'
download_pdf(LIST_FILE)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/sorossvp/copy-herrkun-Financial-data-collection-and-pdfplumber.git
git@gitee.com:sorossvp/copy-herrkun-Financial-data-collection-and-pdfplumber.git
sorossvp
copy-herrkun-Financial-data-collection-and-pdfplumber
copy herrkun Financial data collection and pdfplumber
master

搜索帮助