1 Star 0 Fork 0

陈发强 / pycharmfile2

Create your Gitee Account
Explore and code with more than 6 million developers,Free private repositories !:)
Sign up
This repository doesn't specify license. Without author's permission, this code is only for learning and cannot be used for other purposes.
Clone or download
python爬虫票房.py 1.23 KB
Copy Edit Web IDE Raw Blame History
陈发强 authored 2020-06-14 22:16 . python爬虫票房
import requests
import re
def getHTMLText(url):
r = requests.get(url, timeout=30)
# print(r.status_code)
r.raise_for_status()
r.encoding = r.apparent_encoding
# print(r.text[10000:20000])
return r.text
def parsePage(ilt, html):
plt = re.findall(r'全球票房:[\d\.]{2,7}', html)
tlt = re.findall(r'第[\d]*名:《.{1,15}》', html)
'''print(plt)
print(len(plt))
print(tlt)
print(len(tlt))
'''
for i in range(len(tlt)):
price = eval(plt[i].split(':')[1])
title = tlt[i].split(':')[1]
ilt.append([price, title])
# print(ilt)
def printGoodsList(ilt):
tplt = "{:4}\t{:8}\t{:16}"
print(tplt.format("序号", "票房", "电影名称"))
count = 0
for g in ilt:
count = count + 1
print(tplt.format(count, g[0], g[1]))
def main():
start_url = '''https://mini.eastday.com/a/190505123024855-'''
infoList = []
url = '''https://mini.eastday.com/a/190505123024855.html?qid=02263'''
html = getHTMLText(url)
for i in range(2, 7):
url = start_url + str(i) + '.html'
html += getHTMLText(url)
parsePage(infoList, html)
# print("infolist")
# print(infoList)
printGoodsList(infoList)
main()

Comment ( 0 )

Sign in for post a comment

1
https://gitee.com/cfqlovem-521/pycharmfile2.git
git@gitee.com:cfqlovem-521/pycharmfile2.git
cfqlovem-521
pycharmfile2
pycharmfile2
master

Search