master

分支 (1)

管理

管理

master

python_learn
/
修改 abs.py

import pandas as pd
import os
from datetime import datetime


def get_titles_string(input_file_path, title_column="title"):
    """
    读取Excel文件中的标题列，将符合条件的标题用OR连接成字符串并保存到txt文件

    Args:
        input_file_path (str): 输入Excel文件路径
        title_column (str): 标题列的名称，默认为'title'
    """
    try:
        # 检查文件是否存在
        if not os.path.exists(input_file_path):
            raise FileNotFoundError(f"文件不存在：{input_file_path}")

        # 读取Excel文件
        df = pd.read_excel(input_file_path)

        # 检查列名是否存在
        if title_column not in df.columns:
            print("可用的列名：")
            for col in df.columns:
                print(f"- {col}")
            raise ValueError(f"列名 '{title_column}' 不存在")

        # 处理标题并添加引号
        titles = []
        for title in df[title_column]:
            if pd.notna(title):
                title_str = str(title).strip()
                # 如果标题包含括号或@，跳过这个标题
                if (
                    "(" in title_str
                    or ")" in title_str
                    or "[" in title_str
                    or "]" in title_str
                    or "@" in title_str
                ):
                    continue
                titles.append(f'SO="{title_str}"')

        if not titles:
            raise ValueError("没有找到有效的标题")

        # 用 OR 连接所有标题
        result = " OR ".join(titles)

        # 生成输出文件路径
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        input_dir = os.path.dirname(input_file_path)
        input_filename = os.path.splitext(os.path.basename(input_file_path))[0]
        output_file = os.path.join(
            input_dir, f"{input_filename}_titles_{timestamp}.txt"
        )

        # 保存结果
        with open(output_file, "w", encoding="utf-8") as f:
            f.write(result)

        print(f"标题字符串已保存至：{output_file}")
        return output_file

    except Exception as e:
        print(f"错误：{str(e)}")
        return None


if __name__ == "__main__":
    # 使用示例
    get_titles_string(r"C:\Users\jefeer\Documents\python_learn\ABS.xlsx")
    # 或者使用双反斜杠：
    # get_titles_string("C:\\Users\\jefeer\\Documents\\python_learn\\ABS.xlsx")