1 Star 3 Fork 1

senbinge / comic_downloader_cli

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
book_info_spider.py 3.83 KB
一键复制 编辑 原始数据 按行查看 历史
senbinge 提交于 2021-09-27 16:34 . fix setup
# -*- coding: utf-8 -*-
"""
Created on 2021-09-23 15:58:13
---------
@summary:
---------
@author: chyy
"""
import time
import feapder
import feapder.utils.tools as tools
from book import Book,Chapter
from feapder.utils.log import log
class BookInfoSpider(feapder.AirSpider):
def __init__(self,book:Book, thread_count=None):
super().__init__(thread_count=thread_count)
self.book=book
self.save_book=False
def start_requests(self):
if self.book.save_path:
tools.mkdir(self.book.save_path)
yield feapder.Request(self.book.url,callback=self.parse_chapters)
def parse_chapters(self,request,response):
if not self.book.title:
book_name=response.css('div.cy_title>h1::text').extract_first()
if not book_name:
log.error('提取漫画标题失败!')
log.error('url:'+request.url)
return
self.book.title=book_name
# book_path=path.join(self.book.get_book_folder(),'book.dat')
# if path.exists(book_path)
tools.mkdir(self.book.get_book_folder())
log.info('开始解析漫画:'+self.book.title)
chapters=response.css('div.cy_plist>ul>li')
if not chapters:
log.error('提取章节失败!')
log.error('url:'+request.url)
return
log.info('发现章节数量:'+str(len(chapters)))
self.book.chapter_count=len(chapters)
if self.book.is_missing_chapter()==False:
log.info('漫画章节未更新')
return
self.save_book=True
for chapter in chapters[::-1]:
title=chapter.css('a>p::text').extract_first().strip()
if not title:
log.error('提取章节名失败!')
log.error('url:'+request.url)
continue
if title in self.book.chapters:
continue
url=chapter.css('a::attr(href)').extract_first().strip()
if not url:
log.error('提取章节url失败')
log.error('url:'+request.url)
continue
yield feapder.Request(url,callback=self.parse_chapter,chapter_title=title)
def parse_chapter(self,request,response):
chapter_title=request.chapter_title
log.info('开始解析章节信息:'+chapter_title)
url_pattern=response.css('div.uk-container>a>img::attr(src)').extract_first()
if not url_pattern:
log.error('提取图片url模板失败!')
log.error('url:'+request.url)
return
cut=url_pattern.rfind('/')
if cut <0:
log.error('解析图片url模板失败:'+url_pattern)
log.error('url:'+request.url)
return
url_pattern=url_pattern[:cut+1]
try:
first_id,total_count=response.re_first(r'>本章.+?(\d)P/(\d+)P<')
first_id=int(first_id)
total_count=int(total_count)
except:
log.error('提取图片数量信息失败!')
log.error('url:'+request.url)
return
else:
log.info(f'提取图片数量信息成功,起始id:{first_id},总数:{total_count}')
chapter=Chapter(chapter_title,total_count,url_pattern,first_id)
self.book.add_chapter(chapter)
def end_callback(self):
if self.save_book:
self.book.save()
log.info('漫画解析完成')
if __name__ == "__main__":
book=Book(url='https://www.dongman.la/manhua/detail/12382/')
spider=BookInfoSpider(book=book)
# spider.start_url='https://www.dongman.la/manhua/detail/13576/'
# spider.start_url='https://www.dongman.la/manhua/detail/12074/'
spider.start()
while spider.all_thread_is_done()==False:
time.sleep(3)
book.save()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/senbinge/comic_downloader_cli.git
git@gitee.com:senbinge/comic_downloader_cli.git
senbinge
comic_downloader_cli
comic_downloader_cli
master

搜索帮助

344bd9b3 5694891 D2dac590 5694891