2 Star 0 Fork 0

tf/ZjdxCKDataHandelPyThon

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
main5-6.py 10.03 KB
一键复制 编辑 原始数据 按行查看 历史
Muzi 提交于 2021-03-31 18:28 . 第六步修改为 uuid
import asyncio
from aiochclient import ChClient
from aiohttp import ClientSession
import lcs
import uuid as UUID
async def main():
async with ClientSession() as s:
client = ChClient(s, url='http://115.29.55.141:8123/', user='zxcs', password='123123', database='datahouse')
alive = await client.is_alive() # returns True if connection is Ok
# step5
await client.fetch(
"ALTER TABLE datahouse.T_SCI_WOS_2020_ISSN_China_OutPut_ExpertNew ADD COLUMN keywords String;")
await client.fetch(
"ALTER TABLE datahouse.T_SCI_WOS_2020_ISSN_China_OutPut_ExpertNew ADD COLUMN subjects String;")
await client.fetch(
"ALTER TABLE datahouse.T_SCI_WOS_2020_ISSN_China_OutPut_ExpertNew ADD COLUMN journals String;")
sql8 = (
" SELECT email from datahouse.T_SCI_WOS_2020_ISSN_China_OutPut_ExpertNew " +
" where email is not null and email !='' " +
" group by email " +
" HAVING count(*) > 1 "
)
sql8_rows = await client.fetch(sql8)
if sql8_rows is not None and len(sql8_rows) > 0:
for sql8_rows_item in sql8_rows:
if sql8_rows_item != '':
sql8_1 = (
" SELECT " +
" `uuid`,`name`,`organization`,`Second_organization`,`email`,`reprintauthor`,`altname`,`country`,`firstauthor`,`organizationdept`,`keywords`,`subjects`,`journals` " +
" from datahouse.T_SCI_WOS_2020_ISSN_China_OutPut_ExpertNew " +
" where email ='" + str(sql8_rows_item['email']) + "' ORDER BY LENGTH(name) DESC"
)
sql8_1_rows = await client.fetch(sql8_1)
if sql8_1_rows is not None and len(sql8_1_rows) > 0:
sameArr = []
for j in range(len(sql8_1_rows)):
itemj = sql8_1_rows[j]
namej = itemj['name']
for k in range(len(sql8_1_rows)):
if k > j:
itemk = sql8_1_rows[k]
namek = itemk['name']
if namej == namek or lcs.isLCS(namej, namek):
if itemj not in sameArr:
sameArr.append(itemj)
if itemk not in sameArr:
sameArr.append(itemk)
if len(sameArr) > 0:
# uuid = str(sameArr[0]['uuid'] or '')
uuid = str(UUID.uuid4())
uuidArr = []
newOrganization = ''
for j in range(len(sameArr)):
sameItem = sameArr[j]
newOrganization += sameItem['organization']
if j < len(sameArr) - 1:
newOrganization += ','
itemUuid = str(sameItem['uuid'])
uuidArr.append(itemUuid)
sql8_2 = (
" ALTER TABLE datahouse.T_SCI_WOS_2020_ISSN_China_OutPut_ExpertNew " +
" DELETE WHERE uuid in ("
)
sql8_3 = (
"insert into datahouse.T_SCI_WOS_2020_ISSN_China_OutPut_ExpertNew values(" +
"'" + uuid + "'," +
"'" + str(sameArr[0]['name'] or '') + "'," +
"'" + str(newOrganization) + "'," +
"'" + str(sameArr[0]['Second_organization'] or '') + "'," +
"'" + str(sameArr[0]['email'] or '') + "'," +
"'" + str(sameArr[0]['reprintauthor'] or '') + "'," +
"'" + str(sameArr[0]['altname'] or '') + "'," +
"'" + str(sameArr[0]['country'] or '') + "'," +
"'" + str(sameArr[0]['firstauthor'] or '') + "'," +
"'" + str(sameArr[0]['organizationdept'] or '') + "'," +
"''," +
"''," +
"''" +
")"
)
sql8_4 = (
" SELECT " +
" `uuid`,`guid`" +
" FROM datahouse.T_SCI_WOS_2020_ISSN_China_OutPut_Experts_Relationship " +
" WHERE uuid in ("
)
sql8_5 = (
" ALTER TABLE datahouse.T_SCI_WOS_2020_ISSN_China_OutPut_Experts_Relationship " +
" DELETE WHERE uuid in ("
)
for j in range(len(uuidArr)):
itemUuid = uuidArr[j]
itemSql = "'" + itemUuid + "'"
if j < len(uuidArr) - 1:
itemSql += ','
sql8_2 += itemSql
sql8_4 += itemSql
sql8_5 += itemSql
sql8_2 += ')'
# 删除专家
# print(f"sql8_2 -> {sql8_2}")
await client.execute(sql8_2)
# 新增新专家
# print(f"sql8_3 -> {sql8_3}")
await client.execute(sql8_3)
sql8_4 += ')'
# 查询专家关系
sql8_4_rows = await client.fetch(sql8_4)
if sql8_4_rows is not None:
sql8_5 += ')'
# 删除旧专家关系
await client.execute(sql8_5)
sql8_6 = (
"insert into datahouse.T_SCI_WOS_2020_ISSN_China_OutPut_Experts_Relationship values"
)
for j in range(len(sql8_4_rows)):
item = sql8_4_rows[j]
guid = str(item['guid'])
sql8_6_item = "('" + uuid + "','" + guid + "')"
if j < len(sql8_4_rows) - 1:
sql8_6_item += ','
sql8_6 += sql8_6_item
await client.execute(sql8_6)
print('step 5 finish')
# step6
sql9_1 = (
" SELECT b.uuid,a.guid,a.DE,a.SC,a.SO " +
" FROM " +
" (SELECT guid,DE,SC,SO FROM datahouse.T_SCI_WOS_2020_ISSN_China WHERE guid in (SELECT guid FROM datahouse.T_SCI_WOS_2020_ISSN_China_OutPut_Experts_Relationship GROUP BY guid)) a " +
" JOIN " +
" (SELECT uuid,guid FROM datahouse.T_SCI_WOS_2020_ISSN_China_OutPut_Experts_Relationship GROUP BY uuid,guid) b " +
" ON a.guid = b.guid " +
" ORDER BY b.uuid"
)
# print(f"sql9_1 -> {sql9_1}")
sql9_1_rows = await client.fetch(sql9_1)
uuids = {}
for row in sql9_1_rows:
itemData = {}
itemKeywords = {}
itemSubjects = {}
itemJournals = {}
itemUuid = row["uuid"]
if itemUuid is not None and itemUuid in uuids.keys():
itemData = uuids[itemUuid]
itemKeywords = itemData['keywords']
itemSubjects = itemData['subjects']
itemJournals = itemData['journals']
de = row["DE"]
if de is not None and de != '':
if itemKeywords.get(de) is None:
itemKeywords[de] = 1
else:
itemKeywords[de] = itemKeywords[de] + 1
itemData['keywords'] = itemKeywords
sc = row["SC"]
if sc is not None and sc != '':
if itemSubjects.get(sc) is None:
itemSubjects[sc] = 1
else:
itemSubjects[sc] = itemSubjects[sc] + 1
itemData['subjects'] = itemSubjects
so = row["SO"]
if so is not None and so != '':
if itemJournals.get(so) is None:
itemJournals[so] = 1
else:
itemJournals[so] = itemJournals[so] + 1
itemData['journals'] = itemJournals
uuids[itemUuid] = itemData
for itemUuid in uuids.keys():
itemData = uuids[itemUuid]
keywords = itemData['keywords']
subjects = itemData['subjects']
journals = itemData['journals']
sql9_1_keywords_v = ",".join(keywords.keys())
sql9_1_subjects_v = ",".join(subjects.keys())
sql9_1_journals_v = ",".join(journals.keys())
sql9_2 = (
" ALTER TABLE datahouse.T_SCI_WOS_2020_ISSN_China_OutPut_ExpertNew " +
" UPDATE " +
" keywords='" + sql9_1_keywords_v + "' ," +
" subjects = '" + sql9_1_subjects_v + "' ," +
" journals='" + sql9_1_journals_v + "' " +
" where uuid = '" + itemUuid + "' "
)
# print(f"sql9_2 -> {sql9_2}")
await client.execute(sql9_2)
print('step 6 finish')
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/tf001/ZjdxCKDataHandelPyThon.git
git@gitee.com:tf001/ZjdxCKDataHandelPyThon.git
tf001
ZjdxCKDataHandelPyThon
ZjdxCKDataHandelPyThon
master

搜索帮助

D67c1975 1850385 1daf7b77 1850385