代码拉取完成,页面将自动刷新
# -*- coding: utf-8 -*-
"""
Created on 2021-09-23 15:58:13
---------
@summary:
---------
@author: chyy
"""
import time
import feapder
import feapder.utils.tools as tools
from book import Book,Chapter
from feapder.utils.log import log
class BookInfoSpider(feapder.AirSpider):
def __init__(self,book:Book, thread_count=None):
super().__init__(thread_count=thread_count)
self.book=book
self.save_book=False
def start_requests(self):
if self.book.save_path:
tools.mkdir(self.book.save_path)
yield feapder.Request(self.book.url,callback=self.parse_chapters)
def parse_chapters(self,request,response):
if not self.book.title:
book_name=response.css('div.cy_title>h1::text').extract_first()
if not book_name:
log.error('提取漫画标题失败!')
log.error('url:'+request.url)
return
self.book.title=book_name
# book_path=path.join(self.book.get_book_folder(),'book.dat')
# if path.exists(book_path)
tools.mkdir(self.book.get_book_folder())
log.info('开始解析漫画:'+self.book.title)
chapters=response.css('div.cy_plist>ul>li')
if not chapters:
log.error('提取章节失败!')
log.error('url:'+request.url)
return
log.info('发现章节数量:'+str(len(chapters)))
self.book.chapter_count=len(chapters)
if self.book.is_missing_chapter()==False:
log.info('漫画章节未更新')
return
self.save_book=True
for chapter in chapters[::-1]:
title=chapter.css('a>p::text').extract_first().strip()
if not title:
log.error('提取章节名失败!')
log.error('url:'+request.url)
continue
if title in self.book.chapters:
continue
url=chapter.css('a::attr(href)').extract_first().strip()
if not url:
log.error('提取章节url失败')
log.error('url:'+request.url)
continue
yield feapder.Request(url,callback=self.parse_chapter,chapter_title=title)
def parse_chapter(self,request,response):
chapter_title=request.chapter_title
log.info('开始解析章节信息:'+chapter_title)
url_pattern=response.css('div.uk-container>a>img::attr(src)').extract_first()
if not url_pattern:
log.error('提取图片url模板失败!')
log.error('url:'+request.url)
return
cut=url_pattern.rfind('/')
if cut <0:
log.error('解析图片url模板失败:'+url_pattern)
log.error('url:'+request.url)
return
url_pattern=url_pattern[:cut+1]
try:
first_id,total_count=response.re_first(r'>本章.+?(\d)P/(\d+)P<')
first_id=int(first_id)
total_count=int(total_count)
except:
log.error('提取图片数量信息失败!')
log.error('url:'+request.url)
return
else:
log.info(f'提取图片数量信息成功,起始id:{first_id},总数:{total_count}')
chapter=Chapter(chapter_title,total_count,url_pattern,first_id)
self.book.add_chapter(chapter)
def end_callback(self):
if self.save_book:
self.book.save()
log.info('漫画解析完成')
if __name__ == "__main__":
book=Book(url='https://www.dongman.la/manhua/detail/12382/')
spider=BookInfoSpider(book=book)
# spider.start_url='https://www.dongman.la/manhua/detail/13576/'
# spider.start_url='https://www.dongman.la/manhua/detail/12074/'
spider.start()
while spider.all_thread_is_done()==False:
time.sleep(3)
book.save()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。