1 Star 0 Fork 0

fuwu360 / fuwu360-tools

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
taobao_assistant_2_shop.py 37.50 KB
一键复制 编辑 原始数据 按行查看 历史
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767
# !/usr/bin/python3
# -*- coding: utf-8 -*-
"""
丛淘宝助理导出的数据再导入到网店
操作之前必须先备份数据库,以防万一
注意导出的文件名需要是:taobao.csv
请在下面的__init__方法里面修改数据库连接的账号和相关信息
"""
import argparse
import datetime
import json
import logging
import os
import random
import sys
import time
import pandas as pd
import pymysql
import lxml.html.clean as clean
from sshtunnel import SSHTunnelForwarder
# 如果需要ssh方式连接数据库,则需要下面这个
# server = SSHTunnelForwarder(("192.168.0.2", 22),ssh_password="Pg123456!@#$%^ping",ssh_username='root',remote_bind_address=("pc-2zeyi062q7v17kb07.rwlb.rds.aliyuncs.com", 3306))
parser = argparse.ArgumentParser(description='命令行参数测试')
# 如果只打算单独导入一条数据,则带上下面的参数,表示的是淘宝的id
parser.add_argument('--tb-id', type=str, default='')
args = parser.parse_args()
tb_id = args.tb_id
class TaobaoAssistant2Shop():
def __init__(self):
# server.start()
# 设置日志
self.set_logging()
# 数据库ip
self.host = 'www.yunte.cn'
# 数据库名称
self.database_name = 'fuwu360'
# 数据库用户名
user = 'fuwu360'
# 数据库密码
passwd = 'xuanwei123'
# 数据库表前缀
self.table_prifix = 'ht_'
# csv文件名
self.csv_file_name = '../taobao.csv'
# 相册是否直接直接用淘宝图片,不适用,则需要自己ftp传图片到网站空间,然后登录网站:商品管理-->图片批量处理。这样就可以处理水印和缩略图。
# 使用则用的是淘宝的图片链接。使用淘宝图片链接的缺点是没法压缩,首页加载慢一点 使用是True,不使用是False
self.use_taobao_pictures = False
# 数据库连接,如果是ssh方式则用这个
# self.conn = pymysql.connect(host='127.0.0.1', user=user, passwd=passwd,
# db=self.database_name,
# port=server.local_bind_port, charset='utf8',
# cursorclass=pymysql.cursors.DictCursor,
# connect_timeout=7200)
# self.cursor = self.conn.cursor()
self.conn = pymysql.connect(host=self.host, user=user, passwd=passwd,
db=self.database_name,
port=3306, charset='utf8',
cursorclass=pymysql.cursors.DictCursor,
connect_timeout=7200)
self.conn.autocommit(False)
self.cursor = self.conn.cursor()
self.category_dict = self.get_category_dict()
self.brand_dict = self.get_brand_dict()
self.goods_type_dict = self.get_goods_type_dict()
# 数据库支持json的情况查询
self.db_support_json = self.db_support_json()
# 默认的商品分类 例如1042是category表的“家庭散装”的id
self.default_cat_id = 1042
self.default_goods_type = 18
# https://developer.alibaba.com/docs/api.htm?apiId=21
self.taobao_item_dict = {'prd_license_no': '生产许可证号', 'design_code': '产品标准号', 'factory': '厂名',
'factory_site': '厂址', 'contact': '厂家联系方式', 'mix': '配料表', 'plan_storage': '储藏方法',
'period': '保质期(天)', 'food_additive': ' 食品添加剂', 'supplier': '供货商',
'product_date_start': '生产开始日期', 'product_date_end': '生产结束日期',
'stock_date_start': '进货开始日期', 'stock_date_end': '进货结束日期'}
# 淘宝店的分类 https://siteadmin.taobao.com/category/index.htm
self.seller_cids_dict = {'1457344118': '美妆/个人护肤', '1457342895': '面部护理', '1457342896': '口腔护理',
'1457342897': '身体护理', '1457342898': '洗发护发', '1457344114': '宣威火腿',
'1457344115': '熟食罐头', '1457344116': '家庭散装', '1457344117': '火腿礼盒',
'1457344112': '食品/特产', '1457344113': '茶叶', '1542461584': '月饼', '': '火腿饼',
'1542461586': '鲜花饼', '1542461587': '其它'}
# 检查MySQl是否支持json格式,5.7以上支持
def db_support_json(self) -> bool:
check_sql = "select version() as version"
self.cursor.execute(check_sql)
v_data = self.cursor.fetchone()
if v_data:
v_str = v_data.get('version')
if '-' in v_str:
v_str = v_str.split('-')[0]
v_array = v_str.split('.')
v_str = v_array[0] + '.' + v_array[1]
return float(v_str) > 5.7
return False
# 检查数据库里哪些商品在淘宝店找不到对应的id
def check_not_in_taobao_id(self):
# 注意编码是utf-16
df = pd.read_csv(self.csv_file_name, sep='\t', encoding='utf-16', header=1)
taobao_id_list = []
for row_index, row in df.iterrows():
if 0 == row_index:
# 标题跳过
continue
# 数字ID 数据例如:601258429194
num_id = row.get('num_id')
taobao_id_list.append(num_id)
check_sql = f"""
SELECT goods_id, goods_name, extension_info, seller_note from {self.table_prifix}goods WHERE extension_info != ''
and is_delete = 0 and is_on_sale = 1;
"""
self.cursor.execute(check_sql)
extension_info_list = self.cursor.fetchall()
if extension_info_list:
not_in_taobao_id_list = []
for data in extension_info_list:
# 解析json
extension_info_dict = json.loads(data.get('extension_info'))
taobao_id = extension_info_dict.get('taobao_id')
if not taobao_id:
continue
taobao_id = str(taobao_id)
goods_id = str(data.get('goods_id'))
seller_note = data.get('seller_note')
goods_name = data.get('goods_name')
if taobao_id not in taobao_id_list:
print(f"网站id为{goods_id}的商品:{goods_name}在淘宝店找不到!,seller_note为:{seller_note},更新")
not_in_taobao_id_list.append(goods_id)
if len(not_in_taobao_id_list) >= 1:
not_in_taobao_ids = ','.join(not_in_taobao_id_list)
update_seller_note_sql = f"""
UPDATE {self.table_prifix}goods
SET seller_note = '此商品在淘宝店找不到'
WHERE goods_id in ({not_in_taobao_ids})
"""
self.cursor.execute(update_seller_note_sql)
# 检查数据表是否有extension_info字段,类型是json,没有则创建
def check_column(self) -> None:
check_column_sql = f"""
SELECT * FROM information_schema.columns WHERE TABLE_SCHEMA = '{self.database_name}'
and TABLE_NAME = '{self.table_prifix}goods' AND COLUMN_NAME = 'extension_info'
"""
self.cursor.execute(check_column_sql)
column_list = self.cursor.fetchall()
if not column_list or len(column_list) == 0:
# 创建字段
add_column_sql = f"""
ALTER TABLE {self.table_prifix}goods ADD extension_info json NULL COMMENT '淘宝、拼多多、天猫、京东等第三方平台的商品id';
"""
if not self.db_support_json:
add_column_sql = f"""
ALTER TABLE {self.table_prifix}goods ADD extension_info varchar(255) NULL COMMENT '淘宝、拼多多、天猫、京东等第三方平台的商品id';
"""
self.cursor.execute(add_column_sql)
# 设置日志
def set_logging(self) -> None:
today = datetime.date.today().strftime("%Y-%m-%d")
logging.basicConfig(
# 控制台打印的日志级别
level=logging.INFO,
# 日志格式
format='%(asctime)s %(funcName)s [line:%(lineno)d] %(levelname)s %(message)s',
# 日志文件名
filename=os.path.join(os.getcwd(), 'taobao_assistant_2_shop' + '-' + today + '.txt'),
# 模式,有w和a,w就是写模式,每次都会重新写日志,覆盖之前的日志, a是追加模式,默认如果不写的话,就是追加模式
filemode='a'
)
def read_csv(self):
# 注意编码是utf-16
df = pd.read_csv(self.csv_file_name, sep='\t', encoding='utf-16', header=1)
# df.head(): 显示数据框 df 的前5行
# print(df.head(3))
# df.info(): 显示数据摘要
# print(df.info())
# describe统计下数据量、标准值、平均值、最大值等
# print(df.describe())
# 获取行数
# number_of_rows = len(df)
c_month = datetime.date.today().strftime("%Y%m")
pictures_catalog = f'images/{c_month}/'
for row_index, row in df.iterrows():
# data = row.to_dict()
# print(data)
if 0 == row_index:
# 标题跳过
continue
# 数字ID 数据例如:601258429194
num_id = row.get('num_id')
if tb_id and isinstance(tb_id, int):
# 如果运行时传了taobao_id参数,那么直处理这个id的数据,其它的都跳过不处理。如果没有传这个参数,则是处理全部数据
if num_id != tb_id:
continue
# 宝贝名称 数据例如:包邮 银杏茶(红茶) 勝境關牌 128克 云南银杏
title = row.get('title')
# 宝贝类目 这是淘宝发布商品时不选的类目,这里用不到 数据例如:125252011
cid = row.get('cid')
# 店铺类目 淘宝网店上自己维护的分类 在这里维护:https://siteadmin.taobao.com/category/index.htm 数据例如:1457344113
seller_cids = row.get('seller_cids')
# 新旧程度 用不到的字段 数据例如:1
stuff_status = row.get('stuff_status')
# 省 数据例如:云南
location_state = row.get('location_state')
# 城市 数据例如:曲靖
location_city = row.get('location_city')
# 出售方式 数据例如:1
item_type = row.get('item_type')
# 宝贝价格 数据例如:186
price = row.get('price')
# 加价幅度 数据例如:0.00
auction_increment = row.get('auction_increment')
# 宝贝数量 数据例如:588
num = row.get('num')
# mysql 的SMALLINT,无符号的最大值是65535
if num and int(num) > 65535:
num = 65535
# 有效期 数据例如:7
valid_thru = row.get('valid_thru')
# 运费承担 数据例如:1
freight_payer = row.get('freight_payer')
# 平邮 数据例如:1.17447e+10
post_fee = row.get('post_fee')
# EMS 数据例如:3.21792e-37
ems_fee = row.get('ems_fee')
# 快递 数据例如:0
express_fee = row.get('express_fee')
# 发票 数据例如:0
has_invoice = row.get('has_invoice')
# 保修 数据例如:0
has_warranty = row.get('has_warranty')
# 放入仓库 数据例如:1
approve_status = row.get('approve_status')
# 橱窗推荐 数据例如:0
has_showcase = row.get('has_showcase')
# 开始时间 数据例如:2020-06-18 09:22:26
list_time = row.get('list_time')
# 宝贝描述 数据例如:<p><strong style="color:#daa520;font-size:28.0px;font-style:normal;letter-spacing:normal;orphans:2;text-align:start;text-indent:0.0px;text-transform:none;white-space:normal;widows:2;word-spacing:0.0px;background-color:#ffffff;">&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;128g/盒*90盒/件</strong> <img align="absmiddle" src="https://img.alicdn.com/imgextra/i1/3052091257/O1CN012ts8Gf1L9kPR9MiEG_!!3052091257.jpg" style="max-width:none;width:750.0px;"><img align="absmiddle" src="https://img.alicdn.com/imgextra/i2/3052091257/O1CN01u8LY4V1L9kPN3ATkS_!!3052091257.jpg" style="max-width:750.0px;"><img align="absmiddle" src="https://img.alicdn.com/imgextra/i2/3052091257/O1CN016p8Fnu1L9kPP1oT39_!!3052091257.jpg" style="max-width:none;width:750.0px;"><img align="absmiddle" src="https://img.alicdn.com/imgextra/i4/3052091257/O1CN01PaBu5I1L9kPOwVABE_!!3052091257.jpg" style="max-width:750.0px;"><img align="absmiddle" src="https://img.alicdn.com/imgextra/i2/3052091257/O1CN016I2hY81L9kPP1qooH_!!3052091257.jpg" style="max-width:750.0px;"><img align="absmiddle" src="https://img.alicdn.com/imgextra/i3/3052091257/O1CN01ZUuJ6G1L9kShKMBYd_!!3052091257.jpg" style="max-width:none;width:750.0px;"><img align="absmiddle" src="https://img.alicdn.com/imgextra/i3/3052091257/O1CN01Y02Qec1L9kShc4m3o_!!3052091257.jpg" style="max-width:none;width:750.0px;"><img align="absmiddle" src="https://img.alicdn.com/imgextra/i3/3052091257/O1CN01l47h3z1L9kSj4jTLT_!!3052091257.jpg" style="max-width:none;width:750.0px;"><img align="absmiddle" src="https://img.alicdn.com/imgextra/i1/3052091257/O1CN01HYYgI91L9kPim1HnR_!!3052091257.jpg" style="max-width:750.0px;"><img align="absmiddle" src="https://img.alicdn.com/imgextra/i4/3052091257/O1CN01cfDuot1L9kPlqtoNh_!!3052091257.gif" style="max-width:750.0px;"></p>
description = row.get('description')
# 清理掉不必要的样式
description = self.clean_attrs(row.get('description'))
# 添加自适应图片样式
description = description.replace('<img ', '<img class="img-fluid" ').replace('<span><span>', '')
# 宝贝属性 需要到input_custom_cpv里面找
cateProps = row.get('cateProps')
# 邮费模版ID 数据例如:11489891460
postage_id = row.get('postage_id')
# 会员打折 数据例如:0
has_discount = row.get('has_discount')
# 修改时间 数据例如:2020-06-23 22:11:01
modified = row.get('modified')
# 上传状态 数据例如:200
upload_fail_msg = row.get('upload_fail_msg')
# 图片状态 数据例如:1;1;1;1;1;
picture_status = row.get('picture_status')
# 返点比例 数据例如:0
auction_point = row.get('auction_point')
# 新图片 数据例如:21d081a080ce8ce6834c476d5e470225:1:0:|https://img.alicdn.com/bao/uploaded/i3/3052091257/O1CN01xkxHrc1L9kSdG6UhG_!!3052091257.jpg;9e37773991b10e67f6cd154be5553cbb:1:1:|https://img.alicdn.com/bao/uploaded/i2/3052091257/O1CN016p8Fnu1L9kPP1oT39_!!3052091257.jpg;54c5ed06a53b43c7f6538e6374382c41:1:2:|https://img.alicdn.com/bao/uploaded/i4/3052091257/O1CN01PaBu5I1L9kPOwVABE_!!3052091257.jpg;b3681ee8ceaf3fe49d9ac2fde0f9a644:1:3:|https://img.alicdn.com/bao/uploaded/i2/3052091257/O1CN016I2hY81L9kPP1qooH_!!3052091257.jpg;3415090ce94e738411ce0e14fc47ad05:1:4:|https://img.alicdn.com/bao/uploaded/i2/3052091257/O1CN01u8LY4V1L9kPN3ATkS_!!3052091257.jpg;
picture = row.get('picture')
# 视频 数据例如:nan
video = row.get('video')
# 销售属性组合 数据例如:nan
skuProps = row.get('skuProps')
# 用户输入ID串 数据例如:nan
inputPids = row.get('inputPids')
# 用户输入名-值对 数据例如:nan
inputValues = row.get('inputValues')
# 商家编码 数据例如:nan
outer_id = row.get('outer_id')
# 销售属性别名 数据例如:nan
propAlias = row.get('propAlias')
# 代充类型 数据例如:0
auto_fill = row.get('auto_fill')
# 本地ID 数据例如:0
local_cid = row.get('local_cid')
# 宝贝分类 数据例如:2
navigation_type = row.get('navigation_type')
# 用户名称 数据例如:云特宣威火腿
user_name = row.get('user_name')
# 宝贝状态 数据例如:1
syncStatus = row.get('syncStatus')
# 闪电发货 数据例如:0
is_lighting_consigment = row.get('is_lighting_consigment')
# 新品 数据例如:0
is_xinpin = row.get('is_xinpin')
# 食品专项 数据例如:contact:0874-8970360;design_code:;factory:宣威市宣特火腿有限公司;factory_site:云南省曲靖市宣威市西宁街道复兴村、云南省曲靖市宣威市西泽乡睦乐村委会;food_additive:无;mix:宣威火腿;period:360;plan_storage:阴凉干燥处;prd_license_no:SC10953038100426;product_date_end:2020-06-12;product_date_start:2020-06-01;stock_date_end:2020-06-13;stock_date_start:2020-06-13;supplier:宣威市宣特火腿有限公司
foodparame = row.get('foodparame')
# 尺码库 数据例如:tags:381698
features = row.get('features')
# 采购地 数据例如:0
buyareatype = row.get('buyareatype')
# 库存类型 数据例如:-1
global_stock_type = row.get('global_stock_type')
# 国家地区 数据例如:nan
global_stock_country = row.get('global_stock_country')
# 库存计数 数据例如:1
sub_stock_type = row.get('sub_stock_type')
# 物流体积 数据例如:nan
item_size = row.get('item_size')
# 物流重量 数据例如:nan
item_weight = row.get('item_weight')
# 退换货承诺 数据例如:1
sell_promise = row.get('sell_promise')
# 定制工具 数据例如:nan
custom_design_flag = row.get('custom_design_flag')
# 无线详情 数据例如:nan
wireless_desc = row.get('wireless_desc')
# 商品条形码 数据例如:6970839992286
barcode = row.get('barcode')
# sku 条形码 数据例如:nan
sku_barcode = row.get('sku_barcode')
# 7天退货 数据例如:1
newprepay = row.get('newprepay')
# 宝贝卖点 数据例如:nan
subtitle = row.get('subtitle')
# 属性值备注 数据例如:nan
cpv_memo = row.get('cpv_memo')
# 自定义属性值 数据例如:nan
input_custom_cpv = row.get('input_custom_cpv')
# 商品资质 数据例如:%7B%7D
qualification = row.get('qualification')
# 增加商品资质 数据例如:1
add_qualification = row.get('add_qualification')
# 关联线下服务 数据例如:nan
o2o_bind_service = row.get('o2o_bind_service')
# tmall扩展字段 数据例如:nan
tmall_extend = row.get('tmall_extend')
# 产品组合 数据例如:nan
product_combine = row.get('product_combine')
# tmall属性组合 数据例如:nan
tmall_item_prop_combine = row.get('tmall_item_prop_combine')
# taoschema扩展字段 数据例如:nan
taoschema_extend = row.get('taoschema_extend')
# 开始往数据库写入数据
# 第一步 是查询分类,如果不存在则使用默认分类
cat_id = self.get_seller_cid(seller_cids)
brand_id = 0
# 第二步查询品牌,先从foodparame(食品专项)里面查找,找不到再从inputValues(用户输入名-值对)查找
# 分号分割
if not isinstance(foodparame, str):
foodparame = None
foodparame_dict = dict()
if foodparame:
foodparame_list = foodparame.split(';')
for foodparame_str in foodparame_list:
f_list = foodparame_str.split(':')
if not f_list[1]:
continue
foodparame_name = f_list[0]
# 英文转为中文
foodparame_name = self.taobao_item_dict.get(foodparame_name)
foodparame_dict.update({foodparame_name: f_list[1]})
# 厂名
factory = foodparame_dict.get('厂名')
# 根据厂名获取品牌id
brand_id = self.brand_dict.get(factory)
if brand_id is None:
brand_id = 0
if inputValues:
factory = inputValues.split(';')[0]
if factory:
for k, v in self.brand_dict.items():
if factory in k:
brand_id = v
break
if not outer_id:
# 唯一的货号,如果不存在,则用淘宝的id
outer_id = num_id
if self.use_taobao_pictures:
img = picture.split(';')[0].split('|')[1]
# 原始图片,用tbi文件改后缀为jpg
original_img = img
goods_thumb = img
goods_img = img
else:
img = picture.split(';')[0].split(':')[0] + '.jpg'
# 原始图片,用tbi文件改后缀为jpg
original_img = pictures_catalog + 'source_img/' + img
goods_thumb = pictures_catalog + 'thumb_img/' + img
goods_img = pictures_catalog + 'goods_img/' + img
# add_time
# 先转换修改时间为时间数组
if modified.endswith(' '):
modified = modified[0: len(modified) - 1]
time_array = time.strptime(modified, "%Y-%m-%d %H:%M:%S")
# 转换为时间戳
add_time = int(time.mktime(time_array))
# goods_type
goods_type = self.default_goods_type
for k, v in self.goods_type_dict.items():
if k in title:
goods_type = v
break
# 注意non值
goods_sn = outer_id
goods_weight = item_weight
keywords = inputValues
goods_brief = subtitle
if goods_sn and isinstance(goods_sn, float) and str(goods_sn) == 'nan':
goods_sn = ''
if goods_weight and isinstance(goods_weight, float) and str(goods_weight) == 'nan':
goods_weight = 0
if keywords and isinstance(keywords, float) and str(keywords) == 'nan':
keywords = ''
if goods_brief and isinstance(goods_brief, float) and str(goods_brief) == 'nan':
goods_brief = ''
# json数据 淘宝 拼多多等id
extension_info = '{"taobao_id":' + num_id + '}'
# 保存商品
goods_dict = {
'cat_id': cat_id,
# 商品的唯一货号,跟淘宝店保持一致
'goods_sn': goods_sn,
'goods_name': title,
'goods_name_style': '+',
# 点击量,随机数
'click_count': random.randint(100, 10000),
'brand_id': brand_id,
# 供应商id,默认11
'provider_name': 1,
# 商品库存数量
'goods_number': num,
# 商品的重量,以千克为单位
'goods_weight': goods_weight,
# 市场价格 乘以1.2 取整数
'market_price': int(float(price) * 1.2),
'shop_price': price,
# 促销价格
'promote_price': 0,
# 促销价格开始日期
'promote_start_date': 0,
# 促销价格结束日期
'promote_end_date': 0,
# 商品报警数量 10
'warn_number': 10,
# 用inputValues作为关键词
'keywords': keywords,
# 商品的简短描述 用卖点
'goods_brief': goods_brief,
'goods_desc': description,
# 商品在前台显示的微缩图片,如在分类筛选时显示的小图片
'goods_thumb': goods_thumb,
# 商品的实际大小图片,如进入该商品页时介绍商品属性所显示的大图片
'goods_img': goods_img,
# 应该是上传的商品的原始图片
'original_img': original_img,
# 是否是实物,1,是;0,否;比如虚拟卡就为0,不是实物
'is_real': 1,
# 商品的扩展属性,比如像虚拟卡
'extension_code': '',
# 该商品是否开放销售,1,是;0,否
'is_on_sale': syncStatus,
# 是否能单独销售,1,是;0,否;如果不能单独销售,则只能作为某商品的配件或者赠品销售
'is_alone_sale': 1,
# 购买该商品可以使用的积分数量,估计应该是用积分代替金额消费 价格乘以100
'integral': int(float(price) * 100),
'add_time': add_time,
# 商品的显示顺序
'sort_order': 100,
# 商品是否已经删除,0,否;1,已删除
'is_delete': 0,
# 是否是精品;0,否;1,是
'is_best': 0,
# 是否是新品
'is_new': 0,
# 是否热销,0,否;1,是
'is_hot': 0,
# 是否特价促销;0,否;1,是
'is_promote': has_showcase,
# 购买该商品所能领到的红包类型
'bonus_type_id': 0,
'last_update': add_time,
# 商品所属类型id,取值表goods_type的cat_id
'goods_type': goods_type,
# 商品的商家备注,仅商家可见 把淘宝id写入商家备注
'seller_note': '来自淘宝',
# 购买该商品时每笔成功交易赠送的积分数量
'give_integral': -1,
'rank_integral': -1,
'suppliers_id': 0,
'is_check': 1,
'is_shipping': 0,
'virtual_sales': 0,
'extension_info': extension_info
}
# 先查询是否存在,存在则更新
# select_goods_sql = f"""SELECT goods_id FROM {self.table_prifix}goods WHERE extension_info -> '$.taobao_id'
# = {num_id}"""
# 如果数据库不支持json,只能用like来查询
select_goods_sql = f"""SELECT goods_id FROM {self.table_prifix}goods WHERE extension_info like
'%{num_id}%'"""
self.cursor.execute(select_goods_sql)
goods = self.cursor.fetchone()
if goods:
goods_id = goods.get('goods_id')
self.update_goods(goods_dict, goods_id)
# 删除商品属性
del_goods_attr_sql = f"""
delete from {self.table_prifix}goods_attr where goods_id = {goods_id}
"""
# 删除相册和属性
del_goods_gallery_sql = f"""
delete from {self.table_prifix}goods_gallery where goods_id = {goods_id}
"""
self.cursor.execute(del_goods_gallery_sql)
self.cursor.execute(del_goods_attr_sql)
print(f"更新商品{goods_id}{title}")
else:
# 保存,获取id
goods_id = self.insert_goods(goods_dict)
print(f"保存商品{goods_id},{num_id}{title}")
# 然后去保存相册
insert_goods_gallery_sql = f"""
INSERT INTO {self.table_prifix}goods_gallery ( goods_id, img_url, img_desc, thumb_url, img_original,
sort_order )
VALUES
(%(goods_id)s, %(img_url)s, %(img_desc)s, %(thumb_url)s, %(img_original)s, %(sort_order)s);
"""
goods_gallery_list = []
for pic_info in picture.split(';'):
if not pic_info:
continue
# img_url = pic_info_array[1]
if self.use_taobao_pictures:
pic = pic_info.split('|')[0].split('|')[1]
original_img = pic
img_url = pic
thumb_url = pic
else:
pic = pic_info.split('|')[0].split(':')[0] + '.jpg'
original_img = pictures_catalog + 'source_img/' + pic
img_url = pictures_catalog + 'goods_img/' + pic
thumb_url = pictures_catalog + 'thumb_img/' + pic
goods_gallery = {'goods_id': goods_id, 'img_url': img_url, 'img_desc': title, 'thumb_url': thumb_url,
'img_original': original_img, 'sort_order': 100}
goods_gallery_list.append(goods_gallery)
self.cursor.executemany(insert_goods_gallery_sql, goods_gallery_list)
if foodparame:
# 开始保存属性
# attr_input_type 当添加商品时,该属性的添加类别; 0为手功输入;1为选择输入;2为多行文本输入
# attr_values 即选择输入,则attr_name对应的值的取值就是该这字段值
select_attribute_sql = f"""
SELECT attr_id, attr_name, attr_values from {self.table_prifix}attribute WHERE
cat_id = {goods_type} and attr_input_type = 0
"""
self.cursor.execute(select_attribute_sql)
attribute_list = self.cursor.fetchall()
attribute_dict = dict()
for attribute in attribute_list:
attribute_dict.update({attribute.get('attr_name'): attribute.get('attr_id')})
insert_goods_attr_sql = f"""
INSERT INTO {self.table_prifix}goods_attr ( goods_id, attr_id, attr_value, attr_price )
VALUES
( %(goods_id)s, %(attr_id)s, %(attr_value)s, %(attr_price)s );
"""
goods_attr_list = []
for k, v in foodparame_dict.items():
attr_id = attribute_dict.get(k)
if not attr_id:
continue
goods_attr = {'goods_id': goods_id, 'attr_id': attr_id, 'attr_value': v, 'attr_price': 0}
goods_attr_list.append(goods_attr)
self.cursor.executemany(insert_goods_attr_sql, goods_attr_list)
def get_seller_cid(self, seller_cids: int) -> int:
cat_name = self.seller_cids_dict.get(str(seller_cids))
if not cat_name:
return self.default_cat_id
seller_cid = self.category_dict.get(cat_name)
if not seller_cid:
return self.default_cat_id
return seller_cid
# 清理商品描述里面的html属性
def clean_attrs(self, html_string: str) -> str:
if html_string:
safe_attrs = set(['src', 'href', 'colspan', 'rowspan'])
cleaner = clean.Cleaner(safe_attrs=safe_attrs)
return cleaner.clean_html(html_string)
return html_string
def update_goods(self, goods_dict: dict, goods_id: int) -> None:
# 更新数据 大部分字段都不做更新
update_goods_sql = f"""
update {self.table_prifix}goods set
goods_name = %(goods_name)s,
goods_number = %(goods_number)s,
goods_weight = %(goods_weight)s,
market_price = %(market_price)s,
shop_price = %(shop_price)s,
promote_price = %(promote_price)s,
keywords = %(keywords)s,
goods_brief = %(goods_brief)s,
goods_desc = %(goods_desc)s,
goods_thumb = %(goods_thumb)s,
goods_img = %(goods_img)s,
original_img = %(original_img)s,
add_time = %(add_time)s,
last_update = %(last_update)s
WHERE goods_id = {goods_id}
"""
self.cursor.execute(update_goods_sql, goods_dict)
def insert_goods(self, goods_dict: dict) -> int:
insert_sql = f"""
INSERT INTO {self.table_prifix}goods (
cat_id,
goods_sn,
goods_name,
goods_name_style,
click_count,
brand_id,
provider_name,
goods_number,
goods_weight,
market_price,
shop_price,
promote_price,
promote_start_date,
promote_end_date,
warn_number,
keywords,
goods_brief,
goods_desc,
goods_thumb,
goods_img,
original_img,
is_real,
extension_code,
is_on_sale,
is_alone_sale,
integral,
add_time,
sort_order,
is_delete,
is_best,
is_new,
is_hot,
is_promote,
bonus_type_id,
last_update,
goods_type,
seller_note,
give_integral,
rank_integral,
suppliers_id,
is_check,
is_shipping,
virtual_sales,
extension_info
)
VALUES
(
%(cat_id)s,
%(goods_sn)s,
%(goods_name)s,
%(goods_name_style)s,
%(click_count)s,
%(brand_id)s,
%(provider_name)s,
%(goods_number)s,
%(goods_weight)s,
%(market_price)s,
%(shop_price)s,
%(promote_price)s,
%(promote_start_date)s,
%(promote_end_date)s,
%(warn_number)s,
%(keywords)s,
%(goods_brief)s,
%(goods_desc)s,
%(goods_thumb)s,
%(goods_img)s,
%(original_img)s,
%(is_real)s,
%(extension_code)s,
%(is_on_sale)s,
%(is_alone_sale)s,
%(integral)s,
%(add_time)s,
%(sort_order)s,
%(is_delete)s,
%(is_best)s,
%(is_new)s,
%(is_hot)s,
%(is_promote)s,
%(bonus_type_id)s,
%(last_update)s,
%(goods_type)s,
%(seller_note)s,
%(give_integral)s,
%(rank_integral)s,
%(suppliers_id)s,
%(is_check)s,
%(is_shipping)s,
%(virtual_sales)s,
%(extension_info)s
);
"""
self.cursor.execute(insert_sql, goods_dict)
id = self.conn.insert_id()
return id
# 网店的商品分类
def get_category_dict(self) -> dict:
# 从数据库查询
select_sql = f"SELECT cat_id, cat_name from {self.table_prifix}category"
self.cursor.execute(select_sql)
rows = self.cursor.fetchall()
category_dict = dict()
for row in rows:
category_dict.update({row.get('cat_name'): row.get('cat_id')})
return category_dict
def get_goods_type_dict(self) -> dict:
# 从数据库查询
select_sql = f"SELECT cat_id, cat_name from {self.table_prifix}goods_type"
self.cursor.execute(select_sql)
rows = self.cursor.fetchall()
goods_type_dict = dict()
for row in rows:
cat_name = row.get('cat_name')
if '(' in cat_name:
cat_name = cat_name.split('(')[0]
goods_type_dict.update({cat_name: row.get('cat_id')})
return goods_type_dict
def get_brand_dict(self) -> dict:
# 从数据库查询
select_sql = f"SELECT brand_id, brand_desc from {self.table_prifix}brand"
self.cursor.execute(select_sql)
rows = self.cursor.fetchall()
brand_dict = dict()
for row in rows:
brand_desc = row.get('brand_desc')
if '联系电话' in brand_desc:
brand_desc = brand_desc.split('联系电话')[0]
brand_desc = brand_desc.replace(' ', '')
brand_dict.update({brand_desc: row.get('brand_id')})
return brand_dict
if __name__ == "__main__":
taobao_assistant_2_shop = TaobaoAssistant2Shop()
# 检查数据库表goods是否已经有extension_info字段,没有则添加
taobao_assistant_2_shop.check_column()
# 检查数据库里哪些商品在淘宝店找不到对应的id
taobao_assistant_2_shop.check_not_in_taobao_id()
# 读取csv,然后导入数据库
taobao_assistant_2_shop.read_csv()
# 数据库连接提交
taobao_assistant_2_shop.conn.commit()
# 关闭游标
taobao_assistant_2_shop.cursor.close()
# 关闭数据库连接
taobao_assistant_2_shop.conn.close()
# server.close()
# 退出程序
sys.exit(0)
Python
1
https://gitee.com/fuwu360/fuwu360-tools.git
git@gitee.com:fuwu360/fuwu360-tools.git
fuwu360
fuwu360-tools
fuwu360-tools
master

搜索帮助