验证中...
pipeline里面踩过的坑
Raw Copy
from scrapy.utils.project import get_project_settings
from twisted.enterprise import adbapi
import pymysql
import pymysql.cursors
class MysqlTwistedPipeline(object):
# 异步化操作数据库,构建连接池
def __init__(self):
def from_setting(settings):
params = dict(
host=settings['MYSQL_HOST'],
port=settings['MYSQL_PORT'],
db=settings['MYSQL_DBNAME'],
user=settings['MYSQL_USER'],
passwd=settings['MYSQL_PASSWORD'],
charset="utf8",
use_unicode=True,
cursorclass=pymysql.cursors.DictCursor,
)
dbpool = adbapi.ConnectionPool("pymysql", **params)
return dbpool
settings = get_project_settings()
self.dbpool = from_setting(settings)
# @classmethod
# def from_setting(cls, settings):
# params = dict(
# host=settings['MYSQL_HOST'],
# port=settings['MYSQL_PORT'],
# db=settings['MYSQL_DBNAME'],
# user=settings['MYSQL_USER'],
# passwd=settings['MYSQL_PASSWORD'],
# charset="utf8",
# use_unicode=True,
# cursorclass=pymysql.cursors.DictCursor,
# )
#
# dbpool = adbapi.ConnectionPool("PyMySQL", **params)
#
# return cls(dbpool)
def process_item(self, item, spider):
# 使用twisted将mysql插入变成异步执行
query = self.dbpool.runInteraction(self.do_insert, item)
query.addErrback(self.handle_error, item, spider) # 处理异常
def handle_error(self, failure, item, spider):
# 处理异步插入的异常
print(failure)
def do_insert(self, cursor, item):
insert_sql = """
insert into article_info(title,url,front_image_url,front_image_path,create_time,fav_nums,praise_nums,
comment_nums,tags,content,url_object_id)values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
cursor.execute(insert_sql, (
item['title'], item['url'], item['front_image_url'], item['front_image_path'], item['create_time'],
item['fav_nums'], item['praise_nums'], item['comment_nums'], item['tags'],
pymysql.escape_string(item['content']), item['url_object_id']))
注意:第一次写配置文件,user写成了uesr,scrapy报错没有报具体的错误,我在寻找原因的过程中间分析出是没有建立成连接池的关系,
一直想着是twisted的问题,找twisted方面的解决办法,网上有说法是缺少pywin32组件,结果还是不行,
最终我将from_setting方法写入init,报错了原因所在。

Comment list( 0 )

Sign in for post a comment

Help Search