1 Star 0 Fork 0

JackHan / MediaCrawler

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
main.py 2.75 KB
一键复制 编辑 原始数据 按行查看 历史
Qiuyu Wu 提交于 2023-11-16 18:47 . feat: to api
import argparse
import asyncio
import sys
import config
import db
from base import proxy_account_pool
from media_platform.douyin import DouYinCrawler
from media_platform.xhs import XiaoHongShuCrawler
from fastapi import FastAPI, HTTPException, Request
import asyncio
app = FastAPI()
class CrawlerFactory:
@staticmethod
def create_crawler(platform: str):
if platform == "xhs":
return XiaoHongShuCrawler()
elif platform == "dy":
return DouYinCrawler()
else:
raise ValueError("Invalid Media Platform Currently only supported xhs or dy ...")
async def main():
# define command line params ...
parser = argparse.ArgumentParser(description='Media crawler program.')
parser.add_argument('--platform', type=str, help='Media platform select (xhs|dy)', choices=["xhs", "dy"],
default=config.PLATFORM)
parser.add_argument('--lt', type=str, help='Login type (qrcode | phone | cookie)',
choices=["qrcode", "phone", "cookie"], default=config.LOGIN_TYPE)
# init account pool
account_pool = proxy_account_pool.create_account_pool()
# init db
if config.IS_SAVED_DATABASED:
await db.init_db()
args = parser.parse_args()
crawler = CrawlerFactory.create_crawler(platform=args.platform)
crawler.init_config(
platform=args.platform,
login_type=args.lt,
account_pool=account_pool
)
await crawler.start()
# 修改 main 函数以接受参数
async def _main(platform: str, login_type: str):
account_pool = proxy_account_pool.create_account_pool()
if config.IS_SAVED_DATABASED:
await db.init_db()
crawler = CrawlerFactory.create_crawler(platform=platform)
crawler.init_config(
platform=platform,
login_type=login_type,
account_pool=account_pool
)
await crawler.start()
# 定义 API 路由
@app.post("/api/v1/search")
async def search(request: Request):
body = await request.json()
_ = body.get("user_id")
_ = body.get("key_word")
_ = body.get("desc")
# 调用 main 函数
platform = "xhs" # 或从请求中获取
login_type = "qrcode" # 或从请求中获取
await _main(platform, login_type)
return {"message": "Crawler started"}
# 运行服务器
if __name__ == '__main__':
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
# if __name__ == '__main__':
# try:
# # asyncio.run(main())
# asyncio.get_event_loop().run_until_complete(main())
# except KeyboardInterrupt:
# sys.exit()
# TODO:
# 1. 先把main函数服务化,用api调用main
# 2. 实现传参,并且存下search的参数
# 3. 实现搜索结果回调
# 4. 实现qrcode发送手机(wx,邮件,telegram等)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/JackHan/MediaCrawler.git
git@gitee.com:JackHan/MediaCrawler.git
JackHan
MediaCrawler
MediaCrawler
feat/api_search

搜索帮助

Bbcd6f05 5694891 0cc6727d 5694891