代码拉取完成,页面将自动刷新
#coding=utf-8
'''
Create by Hanjt and date is 2018-12-24
脚本用于导入excel表格数据入mysql数据库,excel表格文件为运营部门同事整理的伪评论数据
执行脚本时,需安装以上导入模块
执行命令为:python mkcm.py
Python 版本为 3.7(最低3.0以上)
'''
import pymysql
import xlrd
import sys
import logging
import random
import datetime
import hashlib
import time
import tkinter as tk
import tkinter.messagebox
LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
DATE_FORMAT = "%m/%d/%Y %H:%M:%S %p"
logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT)
# logging.basicConfig(filename='mkcm.log', level=logging.DEBUG, format=LOG_FORMAT)
# the data for comment from the file of excel to imported into database.
data_list_from_excel = []
global length_data_list
length_data_list = 0
'''
连接数据库
'''
def connect_db(db):
if "pro" == db:
return pymysql.connect(host='****',
port=3306,
user='followenjoy',
password='****',
database='followenjoy',
charset='utf8')
else:
return pymysql.connect(host='****',
port=3306,
user='root',
password='****',
database='followenjoy_db',
charset='utf8')
'''
随机生成一个订单ID
'''
def order_id_gen():
df = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))
return str(df) + str(random.randint(100000,999999))
'''
随机生成一个快照ID
'''
def snapshot_id_gen():
# 此处借用随机生成的订单ID进行md5
m = hashlib.md5()
m.update(bytes(str(order_id_gen()), encoding="utf-8"))
return m.hexdigest()
'''
随机生成一个用户ID
'''
def user_id_gen():
r = random.randint(0,9)
if r == 0:
return random.randint(15000000000, 15099999999)
elif r == 1:
return random.randint(18900000000, 18999999999)
elif r == 2:
return random.randint(13900000000, 13999999999)
elif r == 3:
return random.randint(18800000000, 18899999999)
elif r == 4:
return random.randint(13500000000, 13599999999)
elif r == 5:
return random.randint(13800000000, 13899999999)
elif r == 6:
return random.randint(13700000000, 13799999999)
elif r == 7:
return random.randint(15200000000, 15299999999)
elif r == 8:
return random.randint(18000000000, 18099999999)
elif r == 9:
return random.randint(15700000000, 15799999999)
'''
随机生成一个评论时间,10-100天
'''
def add_time_gen():
return int(time.time()) - random.randint(864000,864000*20)
'''
重装数据
'''
def repaire_data():
length_data = len(data_list_from_excel)
for i in range(length_data):
count = random.randint(1,4)
add_time = add_time_gen()
data_dict = {
"user_id": user_id_gen(),
"order_id": order_id_gen(),
"snapshot_id": snapshot_id_gen(),
"satisfaction_num": random.randint(4,5),
"service_star": random.randint(3,5),
"logistic_star": random.randint(3,5),
"admire_num": random.randint(0,50),
"add_time": add_time,
"revert_time": add_time + random.randint(86400, 86400*10), # 10内回复
"is_recomment": 0,
"is_quintessence": 0,
"count": count,
"s1": str(count) + "套",
"s2": "国内发货"
}
data_list_from_excel[i].update(data_dict)
'''
获取excel表格数据
'''
def read_excel(excel_dir):
workbook = xlrd.open_workbook(excel_dir)
# sheet_default_name = workbook.sheet_names()[1]
# sheet_default = workbook.sheet_by_index(1)
sheets = workbook.sheets()
l = len(sheets)
for i in range(l):
sheet = sheets[i]
length_sheet = sheet.nrows
# logging.info("the rows of sheet: " + str(sheet.nrows))
# logging.info("the cols of sheet: " + str(sheet.ncols))
for i in range(1,length_sheet):
logging.info("loop the row of no." + str(i))
# get the data of this row
row = sheet.row_values(i)
# the object of one comment to imported into database.
user_head = row[3]
if ".jpg" not in user_head:
user_head = user_head + ".jpg"
data_dict = {
"goods_id": int(str(row[0])[:-2]),
"goods_title": row[1],
"content": str(row[2]).strip(),
"user_head": str(user_head).strip(),
"user_name": str(row[4]).strip(),
"revert_content": str(row[5]).strip()
}
# logging.info("data: " + str(data_dict))
data_list_from_excel.append(data_dict)
global length_data_list
length_data_list = len(data_list_from_excel)
# logging.info(len(data_list_from_excel))
'''
将数据存入数据库
'''
def insert_into_database(db):
params = []
for i in range(length_data_list):
t = (
data_list_from_excel[i].get("goods_id"),
data_list_from_excel[i].get("goods_title"),
data_list_from_excel[i].get("content"),
data_list_from_excel[i].get("user_head"),
data_list_from_excel[i].get("user_name"),
data_list_from_excel[i].get("revert_content"),
data_list_from_excel[i].get("user_id"),
data_list_from_excel[i].get("order_id"),
data_list_from_excel[i].get("snapshot_id"),
data_list_from_excel[i].get("satisfaction_num"),
data_list_from_excel[i].get("service_star"),
data_list_from_excel[i].get("logistic_star"),
data_list_from_excel[i].get("admire_num"),
data_list_from_excel[i].get("add_time"),
data_list_from_excel[i].get("revert_time"),
data_list_from_excel[i].get("is_recomment"),
data_list_from_excel[i].get("is_quintessence"),
data_list_from_excel[i].get("count"),
data_list_from_excel[i].get("s1"),
data_list_from_excel[i].get("s2")
)
params.append(t)
conn = connect_db(db)
cur = conn.cursor()
try:
cur.executemany("INSERT INTO c_order_comment_tmp (goods_id,\
goods_title,\
content,\
user_head,\
user_name,\
revert_content,\
user_id,\
order_id,\
snapshot_id,\
satisfaction_num,\
service_star,\
logistic_star,\
admire_num,\
add_time,\
revert_time,\
is_recomment,\
is_quintessence,\
count,\
s1,\
s2) \
VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", params)
assert cur.rowcount == len(data_list_from_excel), 'execute sql error'
conn.commit()
except Exception:
conn.rollback()
logging.exception('Insert into database error')
finally:
cur.close()
conn.close()
def _main(excel_dir, pd):
# logging.info('The directory of excel is: ' + excel_dir)
read_excel(excel_dir)
repaire_data()
# for d in data_list_from_excel:
# logging.info(str(d))
insert_into_database(pd)
'''
桌面应用入口
'''
def main():
excel_dir = var_excel_dir.get()
db = var_db.get()
print(str(excel_dir))
print(str(db))
try:
read_excel(excel_dir)
repaire_data()
insert_into_database(db)
tk.messagebox.showinfo('提示','导入成功')
except:
tk.messagebox.showerror('警告','导入失败')
window = tk.Tk()
window.title('伪评论导入辅助插件')
window.geometry('600x400')
canvas = tk.Canvas(window, height=200, width=500)
image_file = tk.PhotoImage(file='welcome.gif')
image = canvas.create_image(0,0, anchor='nw', image=image_file)
canvas.pack(side='top')
tk.Label(window, text='填写Excel文件路径: ').place(x=150, y= 150)
var_excel_dir = tk.StringVar()
var_excel_dir.set('excel.xls')
entry_excel_dir = tk.Entry(window, textvariable=var_excel_dir)
entry_excel_dir.place(x=260, y=150)
dbs = [
('开发库', 'dev'),
('正式库', 'pro')
]
var_db = tk.StringVar()
var_db.set('dev')
for d,v in dbs:
r = tk.Radiobutton(window, text=d,variable=var_db,value=v)
r.pack()
btn_import = tk.Button(window, text='开始导入', command=main)
btn_import.place(x=270, y=280)
window.mainloop()
if __name__ == '__main__':
# excel_dir = sys.argv[1]
# excel_dir = 'excel.xls'
# excel_dir = input("请输入文件路径:")
# pd = input("请选择导入的目标数据库类型 dev/pro?")
# if "dev" == pd or "pro" == pd:
# print("正在执行...")
# s = time.time()
# _main(excel_dir,pd)
# e = time.time()
# print("执行结束,总计耗时:" + str(e-s) + "秒, 共导入数据:" + str(len(data_list_from_excel)) + "条.")
# else:
# print("请正确选择数据库类型.")
pass
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。