# pdf2word **Repository Path**: enzoism/pdf2word ## Basic Information - **Project Name**: pdf2word - **Description**: pdf2word-可以批量完成PDF转Word - **Primary Language**: Unknown - **License**: Not specified - **Default Branch**: master - **Homepage**: None - **GVP Project**: No ## Statistics - **Stars**: 0 - **Forks**: 1 - **Created**: 2025-03-04 - **Last Updated**: 2025-05-13 ## Categories & Tags **Categories**: Uncategorized **Tags**: None ## README # 2025-03-04-PDF转Word > DeepSeek等大模型从来都不是简单的写一个静态博客这么肤浅(太多博主都只讲这个内容了)借助全网大神的奇思妙想,拓展我狭隘的思维边界。 @[toc] --- ## 1-参考网址 - DeepSeek编写一个PDF转Word软件(参考当前):https://blog.csdn.net/henanlion/article/details/145861672 - 个人尝试代码仓库:https://gitee.com/enzoism/pdf2word --- ## 2-学习要点 - 1)思维拓展-DeepSeek如何辅助编程 - 2)Python-编写PDF转Word:https://blog.csdn.net/2301_77717148/article/details/146015704 - 3)Python-使用tkinter:https://blog.csdn.net/2301_77717148/article/details/146015225 - 4)Python-使用threading和queue:https://blog.csdn.net/2301_77717148/article/details/146013352 - 5)Python-编写Requirements.txt:https://blog.csdn.net/2301_77717148/article/details/146012907 --- ## 3-核心逻辑 - 1)使用pdf2docx库将PDF文件转换为Word文件 - 2)使用tkinter实现GUI界面 - 3)使用threading和queue实现多线程和消息队列通信 - 4)使用Requirements.txt管理依赖库 ---- ## 4-核心代码 - DeepSeek编写一个PDF转Word软件(参考当前):https://blog.csdn.net/henanlion/article/details/145861672 ```python import os import tkinter as tk from tkinter import ttk, filedialog, messagebox from pdf2docx import Converter import threading import queue class PDFToWordConverter: def __init__(self, master): self.master = master master.title("PDF批量转Word") master.geometry("610x295") # 输入文件夹 self.lbl_input = tk.Label(master, text="输入文件夹:") self.ent_input = tk.Entry(master, width=30) self.btn_input = tk.Button(master, text="选择", command=self.select_input) # 输出文件夹 self.lbl_output = tk.Label(master, text="输出文件夹:") self.ent_output = tk.Entry(master, width=30) self.btn_output = tk.Button(master, text="选择", command=self.select_output) # 复选框 self.var_subdir = tk.BooleanVar() self.var_open = tk.BooleanVar(value=True) self.chk_subdir = tk.Checkbutton(master, text="包含子文件夹", variable=self.var_subdir) self.chk_open = tk.Checkbutton(master, text="转换完成后打开目标文件夹", variable=self.var_open) # 转换按钮 self.btn_convert = tk.Button(master, text="开始转换", command=self.start_conversion) # 布局 self.lbl_input.grid(row=0, column=0, padx=10, pady=10, sticky=tk.W) self.ent_input.grid(row=0, column=1, padx=5, pady=10, sticky=tk.EW) self.btn_input.grid(row=0, column=2, padx=10, pady=10) self.lbl_output.grid(row=1, column=0, padx=10, pady=10, sticky=tk.W) self.ent_output.grid(row=1, column=1, padx=5, pady=10, sticky=tk.EW) self.btn_output.grid(row=1, column=2, padx=10, pady=10) self.chk_subdir.grid(row=2, column=1, padx=5, pady=5, sticky=tk.W) self.chk_open.grid(row=3, column=1, padx=5, pady=5, sticky=tk.W) self.btn_convert.grid(row=4, column=1, pady=10) # 新增进度组件 self.progress_label = tk.Label(master, text="准备就绪") self.progress_bar = ttk.Progressbar(master, orient=tk.HORIZONTAL, mode='determinate') # 调整布局(新增两行) self.progress_label.grid(row=5, column=0, columnspan=3, padx=10, pady=5, sticky=tk.W) self.progress_bar.grid(row=6, column=0, columnspan=3, padx=10, pady=10, sticky=tk.EW) # 消息队列用于线程通信 self.queue = queue.Queue() master.after(100, self.process_queue) # 配置列权重 master.columnconfigure(1, weight=1) def select_input(self): path = filedialog.askdirectory() if path: self.ent_input.delete(0, tk.END) self.ent_input.insert(0, path) def select_output(self): path = filedialog.askdirectory() if path: self.ent_output.delete(0, tk.END) self.ent_output.insert(0, path) def start_conversion(self): # 重置进度条 self.progress_bar['value'] = 0 self.progress_label.config(text="正在扫描PDF文件...") input_dir = self.ent_input.get() output_dir = self.ent_output.get() if not input_dir or not output_dir: messagebox.showerror("错误", "请先选择输入和输出文件夹!") return # 禁用转换按钮 self.btn_convert.config(state=tk.DISABLED) threading.Thread(target=self.convert_files, args=(input_dir, output_dir), daemon=True).start() def get_pdf_list(self, input_dir): pdf_list = [] for root, dirs, files in os.walk(input_dir): if not self.var_subdir.get() and root != input_dir: continue for file in files: if file.lower().endswith('.pdf'): pdf_list.append(os.path.join(root, file)) return pdf_list def convert_files(self, input_dir, output_dir): self.pdf_files = self.get_pdf_list(input_dir) try: total_files = len(self.pdf_files) for index, pdf_path in enumerate(self.pdf_files): # 更新当前文件进度 self.queue.put(("file_progress", (index + 1, total_files, pdf_path))) # 构建输出路径 relative_path = os.path.relpath(os.path.dirname(pdf_path), input_dir) if self.var_subdir.get() else "" output_path = os.path.join(output_dir, relative_path) os.makedirs(output_path, exist_ok=True) # 转换文件 docx_path = os.path.join(output_path, f"{os.path.splitext(os.path.basename(pdf_path))[0]}.docx") cv = Converter(pdf_path) cv.convert(docx_path, progress_callback=self.update_page_progress) cv.close() self.queue.put(("complete", None)) except Exception as e: self.queue.put(("error", str(e))) def update_page_progress(self, current, total): # 页面级别进度(每文件0-100%) progress = (current / total) * 100 if total != 0 else 0 self.queue.put(("page_progress", progress)) def process_queue(self): try: while True: msg_type, data = self.queue.get_nowait() if msg_type == "file_progress": current, total, path = data file_progress = (current / total) * 100 self.progress_bar['value'] = file_progress self.progress_label.config(text=f"正在转换 {current}/{total}:{os.path.basename(path)}") elif msg_type == "page_progress": # 综合进度 = 文件进度 + 页面进度/总文件数 current_file_progress = self.progress_bar['value'] page_progress = data / len(self.pdf_files) self.progress_bar['value'] = current_file_progress + page_progress elif msg_type == "complete": messagebox.showinfo("完成", "转换完成!") if self.var_open.get(): os.startfile(self.ent_output.get()) self.btn_convert.config(state=tk.NORMAL) self.progress_label.config(text="转换完成") elif msg_type == "error": messagebox.showerror("错误", f"转换出错:{data}") self.btn_convert.config(state=tk.NORMAL) self.progress_label.config(text="转换出错") except queue.Empty: pass finally: self.master.after(100, self.process_queue) if __name__ == "__main__": root = tk.Tk() app = PDFToWordConverter(root) root.mainloop() ```