Ai
1 Star 0 Fork 0

xlelou/爬虫

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
doubai.py 3.33 KB
一键复制 编辑 原始数据 按行查看 历史
xlelou 提交于 2018-06-25 16:56 +08:00 . 'doubai'
import requests
import json
from bs4 import BeautifulSoup
import webbrowser
nowplayingUrl = 'https://movie.douban.com/cinema/nowplaying/dongying/'
class getM():
def getNowPlaying (url):
r = requests.get(url)
res = r.text
soup = BeautifulSoup(res,'html.parser')
div = soup.find(id = 'nowplaying').find_all("li", attrs={"class": "list-item"})
# print(div)
nowplaying = ''
for i in range(len(div)):
# if i < 10 :
# print(div[i].find("li", attrs={"class": 'srating'}).find('span','subject-rate'))
if div[i].find("li", attrs={"class": 'stitle'}).find('a').get('title') != None:
nowplaying += '电影名称:'+ div[i].find("li", attrs={"class": 'stitle'}).find('a').get('title')+ '\r\n'
else:
nowplaying += '电影名称:'+'暂无名称'+ '\r\n'
if div[i].find("li", attrs={"class": 'srating'}).find('span','subject-rate') != None:
nowplaying += '评分:' + div[i].find("li", attrs={"class": 'srating'}).find('span','subject-rate').text+ '\r\n'
else:
nowplaying +='评分:' + '暂无评分'+'\r\n'
if div[i].find("li", attrs={"class": 'stitle'}).find('a').get('href') != None:
nowplaying += '电影简介:<a target="_blank" href='+ div[i].find("li", attrs={"class": 'stitle'}).find('a').get('href')+'/>'+div[i].find("li", attrs={"class": 'stitle'}).find('a').get('href')+'</a>' +'\r\n'
else:
nowplaying += '电影简介:'+'暂无简介'+ '\r\n'
if div[i].find("li", attrs={"class": 'sbtn'}).find('a').get('href') != None:
nowplaying += '购票地址:<a target="_blank" href='+ div[i].find("li", attrs={"class": 'sbtn'}).find('a').get('href')+ '>'+div[i].find("li", attrs={"class": 'sbtn'}).find('a').get('href')+'</a>'+'\r\n'
else:
nowplaying += '购票地址:'+'暂无地址'+ '\r\n'
return nowplaying
def getComing (url):
r = requests.get(url)
res = r.text
soup = BeautifulSoup(res,'html.parser')
trs = soup.find('table','coming_list').find('tbody').find_all('tr')
coming = ''
for i in range(len(trs)):
if trs[i]:
coming += '上映日期:' + trs[i].find_all('td')[0].text.strip() + '\r\n'
coming += '片名:' + trs[i].find_all('td')[1].text.strip() + '\r\n'
coming += '类型:' + trs[i].find_all('td')[2].text.strip() + '\r\n'
coming += '制片地区:' + trs[i].find_all('td')[3].text.strip() + '\r\n'
coming += '想看:' + trs[i].find_all('td')[4].text.strip() + '\r\n'
coming += '简介:<a target="_blank" href=' + trs[i].find_all('td')[1].find('a').get('href').strip() + '>'+trs[i].find_all('td')[1].find('a').get('href').strip()+'</a>' +'\r\n'
coming += '\r\n' + '\r\n'
# pass
return coming
GEN_HTML = 'asd.html'
print(getM.getNowPlaying(nowplayingUrl))
print(getM.getComing('https://movie.douban.com/coming'))
content = getM.getNowPlaying(nowplayingUrl).replace('\r\n','<br/>')
coming = getM.getComing('https://movie.douban.com/coming').replace('\r\n','<br/>')
f = open(GEN_HTML,'w',encoding='utf8')
message = """
<html>
<head>
<meta name="renderer" content="webkit" />
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
</head>
<body>
<p>Hello,World!</p>
<p>豆瓣电影</p>
<div>
<p>热映</p>
%s
</div>
<div>
<p>即将上映</p>
%s
</div>
</body>
</html>"""%(content,coming)
f.write(message)
f.close()
webbrowser.open(GEN_HTML,new = 1)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/xlelou/spider.git
git@gitee.com:xlelou/spider.git
xlelou
spider
爬虫
master

搜索帮助