python_reptilian
/
爬取豆瓣电影的一些信息

import re
import urllib.request as r
def test(url,page):
    html = r.urlopen(url).read().decode('utf-8')
    # 上演时间
    test = re.compile('<p class="releasetime">(.*?)</p>').findall(html)

    # 片名
    ttest = re.compile('<a href=".*?" title=(".*?")').findall(html)
    ttest = set(ttest)

    # 主演
    tttest = re.compile('<p class="star">(.*?)</p>', re.S).findall(html)
    tttest = set(tttest)
    p=0
    # 主演
    for c in tttest:
        print(c)
    # 片名
    for u in ttest:
        print('片名:', u)
        p += 1

    print('一共%s个片名' % p)
    # 上演时间
    for i in test:
        print(i)
    print('当前是第%s页'%page)
for yy in range(0,100,10):
     url='http://maoyan.com/board/4?offset=%s'%yy
     test(url,yy)