2 Star 0 Fork 0

邓龙 / Gaokao-Application

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
utils.py 3.98 KB
一键复制 编辑 原始数据 按行查看 历史
import os
import pandas as pd
import unicodedata as ud
from pandas import DataFrame
from xlsxwriter.utility import xl_range
DIR_ADMISSION_SCORE_PAGES = './raw_pages/投档线'
DIR_SCORE_RANK_TABLE_PAGES = './raw_pages/一分一档表'
DIR_ADMISSION_SCORE_DATA = './data/admission_score'
DIR_SCORE_RANK_TABLE_DATA = './data/rank_score_table'
DIR_RELEASE = './release'
PATH_USTC_LOGO = './image/ustc_logo_fig_2100x2100.jpg'
MAX_SCORE = 750
MIN_SCORE = 0
ADMISSION_DTYPES = {
'院校代码': 'object',
'院校名称': 'object',
'文史类': 'Int64',
'理工类': 'Int64'
}
SCORE_RANK_DTYPES = {
'分值': 'Int64',
'人数': 'Int64',
'累计人数': 'Int64',
'名次': 'Int64'
}
# 投档线表中需要的列,以及列在HTML中对应的可能的名字
ADMISSION_COLUMNS_NAMES = {
'院校代码': ['院校代号', '院校代码'],
'院校名称': ['院校名称'],
'文史类': ['文史类', '文史类投档线'],
'理工类': ['理工类', '理工类投档线']
}
# 一分一档表中需要的列,以及列在HTML中对应的可能的名字
SCORE_RANK_COLUMNS_NAMES = {
'分值': ['分值', '总分'],
'人数': ['人数'],
'累计人数': ['累计人数'],
'名次': ['名次']
}
BATCH_NAMES = {
'本科第一批': 1,
'本科第二批': 2,
}
BATCH_SHORT_NAMES = {
'一本': 1,
'二本': 2,
}
def batch_name(batch):
for name, b in BATCH_NAMES.items():
if b == batch:
return name
raise ValueError(f'未知的批次{batch}')
def batch_short_name(batch):
for name, b in BATCH_SHORT_NAMES.items():
if b == batch:
return name
raise ValueError(f'未知的批次{batch}')
def admission_data_path(year, batch, ext='.csv'):
return os.path.join(
DIR_ADMISSION_SCORE_DATA,
f'{year}_batch{batch}_admission_score' + ext
)
def score_rank_data_path(year, subject, ext='.csv'):
return os.path.join(
DIR_SCORE_RANK_TABLE_DATA,
f'{year}_rank_score_table_{subject}' + ext
)
def rank_file_path(years, batch):
return os.path.join(DIR_RELEASE,
f'{batch_short_name(batch)}高校历年录取名次趋势表({min(years)}年~{max(years)}年).xlsx')
def east_asian_len(text):
EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1, "A": 1}
return sum([EAW_MAP.get(ud.east_asian_width(c), 1) for c in text])
def write_to_excel_table(df: DataFrame, writer, sheet_name, title, startrow=0, startcol=0):
# 写入数据
df.to_excel(writer, sheet_name=sheet_name, startrow=startrow+2, header=False, index=False)
worksheet = writer.sheets[sheet_name]
(nrow, ncol) = df.shape
title_row = startrow
header_row = startrow + 1
data_row = startrow + 2
end_row = data_row + nrow - 1
data_col = startcol
end_col = data_col + ncol - 1
# 自定义列头
column_settings = [{'header': col} for col in df.columns]
worksheet.add_table(header_row, data_col, end_row, end_col, {'columns': column_settings})
# 用east_asian_len计算所有字符串显示长度
value_len = df.applymap(lambda x: east_asian_len(str(x))).max()
# 数据头的长度要+2(用于显示筛选箭头)
header_len = df.columns.to_series().map(lambda x: east_asian_len(str(x))) + 2
# 最终长度+1(两边边界)
columns_len = pd.concat([value_len, header_len], axis=1).max(axis=1) + 1
# 设置最大最小值
columns_len.clip(4, 24, inplace=True)
# 设置列宽
for i, col_len in enumerate(columns_len):
col = data_col + i
worksheet.set_column(col, col, col_len)
workbook = writer.book
merge_format = workbook.add_format({
'font_name': '等线',
'font_size': 16,
'font_color': 'white',
'bg_color': '#4F81BD',
'bold': 1,
'bottom': 6, # ==== 双边框线
'bottom_color': 'white',
'align': 'center',
'valign': 'vcenter'
})
worksheet.merge_range(title_row, data_col, title_row, end_col, title, merge_format)
return header_row, data_col, end_row, end_col
1
https://gitee.com/ldeng1997/gaokao-application.git
git@gitee.com:ldeng1997/gaokao-application.git
ldeng1997
gaokao-application
Gaokao-Application
master

搜索帮助