代码拉取完成,页面将自动刷新
# -*- encoding: utf-8 -*-
# @Author: SWHL
# @Contact: liekkaskono@163.com
from pathlib import Path
from rapidocr import RapidOCR
from wired_table_rec import WiredTableRecognition
from wired_table_rec.main import WiredTableInput
from wired_table_rec.utils.utils import VisTable
output_dir = Path("outputs")
output_dir.mkdir(parents=True, exist_ok=True)
input_args = WiredTableInput()
table_engine = WiredTableRecognition(input_args)
ocr_engine = RapidOCR()
viser = VisTable()
if __name__ == "__main__":
img_path = "tests/test_files/wired/bad_case_1.png"
rapid_ocr_output = ocr_engine(img_path, return_word_box=True)
ocr_result = list(
zip(rapid_ocr_output.boxes, rapid_ocr_output.txts, rapid_ocr_output.scores)
)
# 使用单字识别
# word_results = rapid_ocr_output.word_results
# ocr_result = [[word_result[2], word_result[0], word_result[1]] for word_result in word_results]
# Table Rec
table_results = table_engine(img_path, ocr_result)
table_html_str, table_cell_bboxes = (
table_results.pred_html,
table_results.cell_bboxes,
)
# Save
save_dir = Path("outputs")
save_dir.mkdir(parents=True, exist_ok=True)
save_html_path = f"outputs/{Path(img_path).stem}.html"
save_drawed_path = f"outputs/{Path(img_path).stem}_table_vis{Path(img_path).suffix}"
save_logic_path = (
f"outputs/{Path(img_path).stem}_table_vis_logic{Path(img_path).suffix}"
)
# Visualize table rec result
vis_imged = viser(
img_path, table_results, save_html_path, save_drawed_path, save_logic_path
)
print(f"The results has been saved under {output_dir}")
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。