GameCrawler_4399
/
4399GameCrawler.py

import os
import re
import threading

from bs4 import BeautifulSoup as bs
import requests


def getAllGameUrl():
    """
    获取所有游戏的名称和游戏信息页的链接
    :return:
    """
    gameUrlList = []
    response = requests.get('http://www.4399.com/flash/gamehw.htm')
    dom = bs(response.content, 'html.parser')
    gameLiList = dom.select('#skinbody > div:nth-child(6) > ul > li')
    for i in gameLiList:
        # 获取游戏的名称
        gameName = i.select_one('a > b').get_text()
        # 获取游戏信息页的链接
        # 'http://www.4399.com/flash/212103.htm'
        gameInfoUrl = indexUrl + i.select_one('a')['href']
        gameUrlList.append({'gameName': gameName, 'gameInfoUrl': gameInfoUrl})
    return gameUrlList


def downloadIfAvailable(game):
    """
    判断一个游戏是否支持本地下载
    :return:
    """
    response = requests.get(game['gameInfoUrl'])
    plainText = response.text
    relativeUrlList = re.findall(r'(?<=_strGamePath=").+?\.swf', plainText)
    if len(relativeUrlList) != 0:
        gameUrl = gameBaseUrl + relativeUrlList[0]
        game['gameUrl'] = gameUrl
        threading.Thread(target=downloadAGame, args=(game,)).start()


def downloadAGame(game):
    """
    根据下载链接下载游戏，并保存到.swf文件
    :param game:
    :return:
    """
    downloadPath = 'games/'
    if not os.path.exists(downloadPath):
        try:
            os.mkdir(downloadPath)
        except FileExistsError as e:
            print(e)
    with open(downloadPath + game['gameName'] + '.swf', 'wb') as file:
        file.write(requests.get(game['gameUrl']).content)
        print(game['gameName'] + '下载完成')


if __name__ == '__main__':
    indexUrl = 'http://www.4399.com'
    gameBaseUrl = 'http://sxiao.4399.com/4399swf'
    gameUrlList = getAllGameUrl()
    for i in gameUrlList:
        threading.Thread(target=downloadIfAvailable, args=(i,)).start()