From 98bea8f401b83bf976d0fbb0c545f91141392f68 Mon Sep 17 00:00:00 2001
From: petermouse666 <708975811@qq.com>
Date: Sat, 20 Sep 2025 20:42:21 +0800
Subject: [PATCH 1/2] update ci-bot for auto generating translation comment

---
 ci/tools/comment/comment_agent.py    | 958 +++++++++++++++++++++++++++
 ci/tools/comment/create_comment.py   | 372 +++++++++++
 ci/tools/comment/create_comment.yaml |  38 ++
 3 files changed, 1368 insertions(+)
 create mode 100644 ci/tools/comment/comment_agent.py
 create mode 100644 ci/tools/comment/create_comment.py
 create mode 100644 ci/tools/comment/create_comment.yaml

diff --git a/ci/tools/comment/comment_agent.py b/ci/tools/comment/comment_agent.py
new file mode 100644
index 00000000..25dbe385
--- /dev/null
+++ b/ci/tools/comment/comment_agent.py
@@ -0,0 +1,958 @@
+import json
+import re
+import logging
+import urllib.parse
+from typing import List, Dict, Any, Optional, Tuple, Literal
+from dataclasses import dataclass
+from concurrent.futures import ThreadPoolExecutor, as_completed, TimeoutError as FutureTimeoutError
+from pathlib import Path
+import sys
+import time
+# LangChain imports
+from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
+from langchain_core.runnables import RunnableLambda, RunnablePassthrough
+from pydantic import BaseModel, Field, SecretStr
+from langchain_community.llms import Ollama
+from langchain_ollama import ChatOllama
+from langchain.chains import TransformChain, SequentialChain
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_openai import ChatOpenAI
+import yaml
+
+# ==================== 配置加载 ====================
+
+def load_config(config_file="create_comment.yaml"):
+    """从YAML文件加载配置"""
+    try:
+        with open(config_file, 'r', encoding='utf-8') as f:
+            config = yaml.safe_load(f)
+        return config.get('comment_agent', {})
+    except FileNotFoundError:
+        print(f"配置文件 {config_file} 不存在")
+        raise
+    except yaml.YAMLError as e:
+        print(f"解析配置文件时发生错误: {e}")
+        raise
+
+# 加载配置
+_config = load_config()
+
+# ==================== 配置常量 ====================
+
+BACKEND_TYPE = _config.get('backend', {}).get('type', 'siliconflow')
+MODEL_NAME = _config.get('model', {}).get('name', 'Qwen/Qwen3-8B')
+MODEL_TEMPERATURE = _config.get('model', {}).get('temperature', 0.1)
+MODEL_MAX_RETRY = _config.get('model', {}).get('max_retry', 5)
+PROCESSING_MAX_WORKERS = _config.get('processing', {}).get('max_workers', 8)
+SINGLE_FILE_TIMEOUT = _config.get('processing', {}).get('single_file_timeout', 180)
+TOTAL_COMMENT_TIMEOUT = _config.get('processing', {}).get('total_comment_timeout', 300)
+LOGGING_LEVEL = _config.get('logging', {}).get('level', 'INFO')
+SILICONFLOW_API_KEY = ''
+SILICONFLOW_API_BASE = ''
+
+# 配置日志
+logging.basicConfig(level=getattr(logging, LOGGING_LEVEL.upper()))
+logger = logging.getLogger(__name__)
+
+# ==================== 数据模型定义 ====================
+
+class PRAnalysisResult(BaseModel):
+    """PR分析结果的结构化输出"""
+    has_text_changes: bool = Field(description="是否涉及英文文本改动", default=False)
+    text_change_type: Literal["无文本改动", "仅标点符号改动", "英文内容改动", "代码注释改动", "混合改动"] = Field(description="文本改动类型")
+    has_grammar_errors: bool = Field(description="是否存在语法语病错误", default=False)
+    grammar_errors: List[str] = Field(description="具体的语法语病错误列表", default=[])
+    detailed_analysis: str = Field(description="详细分析说明")
+    suggestions: List[str] = Field(description="改进建议列表", default=[])
+
+class FileTextAnalysis(BaseModel):
+    """单个文件的文本分析"""
+    file_path: str = Field(description="文件路径", default="")
+    has_text_changes: bool = Field(description="是否涉及英文文本改动", default=False)
+    text_lines: List[str] = Field(description="涉及文本改动的行", default=[])
+    grammar_issues: List[str] = Field(description="语法问题列表", default=[])
+    analysis_details: str = Field(description="分析详情")
+
+@dataclass
+class DiffFileInfo:
+    """单个文件的diff信息"""
+    file_path: str
+    diff_content: str
+    lines_added: int
+    lines_deleted: int
+
+@dataclass
+class CommentResult:
+    """评论生成结果"""
+    pr_analysis: Optional[PRAnalysisResult]
+    file_analyses: List[FileTextAnalysis]
+    processed_files: int
+    total_files: int
+    error: Optional[str] = None
+
+# ==================== Token 统计工具 ====================
+
+
+# ==================== 工具函数 ====================
+
+class DiffParser:
+    """Git Diff 解析器"""
+    
+    @staticmethod
+    def parse_git_diff(diff_content: str) -> List[DiffFileInfo]:
+        """
+        解析git diff内容，提取每个文件的改动信息
+        
+        Args:
+            diff_content: git diff的原始内容
+ 
+        Returns:
+            包含文件路径和对应diff内容的列表
+        """
+
+        files = []
+        current_file = None
+        current_diff = []
+        
+        lines = diff_content.strip().split('\n')
+        
+        for line in lines:
+            # 匹配文件路径行
+            if line.startswith('diff --git'):
+                # 保存前一个文件的信息
+                if current_file and current_diff:
+                    diff_info = DiffParser._create_diff_file_info(current_file, current_diff)
+                    if diff_info:
+                        files.append(diff_info)
+                
+                # 提取文件路径 - 改进的解析逻辑
+                current_file = DiffParser._extract_file_path(line)
+                if current_file:
+                    current_diff = [line]
+                else:
+                    current_diff = []
+            elif current_file:
+                current_diff.append(line)
+        
+        # 添加最后一个文件
+        if current_file and current_diff:
+            diff_info = DiffParser._create_diff_file_info(current_file, current_diff)
+            if diff_info:
+                files.append(diff_info)
+        
+        return files
+    
+    @staticmethod
+    def _extract_file_path(diff_line: str) -> Optional[str]:
+        """
+        从git diff行中提取文件路径，支持包含汉字的文件名
+        
+        Args:
+            diff_line: git diff的文件头行，格式如 "diff --git a/path/to/file b/path/to/file"
+            
+        Returns:
+            提取出的文件路径，如果解析失败则返回None
+        """
+        try:
+            # 方法1: 处理引号包围的路径（Git对特殊字符的处理）
+            # 格式: diff --git "a/path/to/file" "b/path/to/file"
+            quoted_pattern = r'diff --git "a/(.+?)" "b/(.+?)"'
+            quoted_match = re.match(quoted_pattern, diff_line)
+            
+            if quoted_match:
+                file_path_a = quoted_match.group(1)
+                file_path_b = quoted_match.group(2)
+                # 通常a和b路径相同，使用a路径（旧文件路径）
+                file_path = file_path_a
+            else:
+                # 方法2: 使用正则表达式匹配标准的git diff格式
+                # 格式: diff --git a/path/to/file b/path/to/file
+                pattern = r'diff --git a/(.+?) b/(.+?)(?:\s|$)'
+                match = re.match(pattern, diff_line)
+                
+                if match:
+                    file_path_a = match.group(1)
+                    file_path_b = match.group(2)
+                    # 通常a和b路径相同，使用a路径（旧文件路径）
+                    file_path = file_path_a
+                else:
+                    # 方法3: 如果正则匹配失败，尝试更简单的解析
+                    # 处理可能包含空格和特殊字符的文件名
+                    if ' a/' in diff_line and ' b/' in diff_line:
+                        # 找到 a/ 和 b/ 的位置
+                        a_pos = diff_line.find(' a/')
+                        b_pos = diff_line.find(' b/')
+                        
+                        if a_pos != -1 and b_pos != -1 and a_pos < b_pos:
+                            # 提取a/和b/之间的路径
+                            a_start = a_pos + 3  # 跳过 ' a/'
+                            file_path = diff_line[a_start:b_pos]
+                        else:
+                            return None
+                    else:
+                        # 方法4: 最后的备选方案，简单的字符串分割
+                        parts = diff_line.split()
+                        if len(parts) >= 3:
+                            a_path = parts[2]
+                            if a_path.startswith('a/'):
+                                file_path = a_path[2:]  # 移除'a/'前缀
+                            else:
+                                return None
+                        else:
+                            return None
+            
+            # 处理文件名编码
+            return DiffParser._decode_file_path(file_path)
+            
+        except Exception as e:
+            logger.warning(f"解析文件路径时发生错误: {e}, diff行: {diff_line}")
+            return None
+    
+    @staticmethod
+    def _decode_file_path(file_path: str) -> str:
+        """
+        解码文件路径，处理各种编码情况
+        
+        Args:
+            file_path: 原始文件路径
+            
+        Returns:
+            解码后的文件路径
+        """
+        try:
+            # 首先尝试URL解码，处理Git编码的文件名
+            decoded_path = urllib.parse.unquote(file_path, encoding='utf-8')
+            
+            # 处理Git对特殊字符的引号包装
+            if decoded_path.startswith('"') and decoded_path.endswith('"'):
+                decoded_path = decoded_path[1:-1]
+                # Git使用反斜杠转义，需要处理转义序列
+                decoded_path = decoded_path.replace('\\"', '"')
+                decoded_path = decoded_path.replace('\\\\', '\\')
+            
+            # 无论是否有引号包装，都尝试处理八进制编码
+            # 检查是否包含八进制转义序列
+            if '\\' in decoded_path and re.search(r'\\[0-7]{3}', decoded_path):
+                decoded_path = DiffParser._decode_octal_sequences(decoded_path)
+            
+            return decoded_path
+            
+        except Exception as e:
+            logger.warning(f"解码文件路径时发生错误: {e}, 原始路径: {file_path}")
+            return file_path
+    
+    @staticmethod
+    def _decode_octal_sequences(text: str) -> str:
+        """
+        解码文本中的八进制转义序列
+        
+        Args:
+            text: 包含八进制转义序列的文本
+            
+        Returns:
+            解码后的文本
+        """
+        try:
+            # 查找八进制转义序列模式：\xxx
+            pattern = r'\\([0-7]{3})'
+            
+            # 找到所有八进制序列
+            matches = list(re.finditer(pattern, text))
+            if not matches:
+                return text
+            
+            # 收集所有字节值
+            result = ""
+            last_end = 0
+            bytes_buffer = []
+            
+            for i, match in enumerate(matches):
+                # 添加匹配前的文本
+                if match.start() > last_end:
+                    # 如果有缓冲的字节，先处理它们
+                    if bytes_buffer:
+                        try:
+                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
+                            result += decoded_bytes
+                            bytes_buffer = []
+                        except UnicodeDecodeError:
+                            # 如果解码失败，保持原始形式
+                            for byte_val in bytes_buffer:
+                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
+                            bytes_buffer = []
+                    
+                    result += text[last_end:match.start()]
+                
+                # 处理当前八进制序列
+                octal_str = match.group(1)
+                try:
+                    byte_value = int(octal_str, 8)
+                    bytes_buffer.append(byte_value)
+                except ValueError:
+                    # 如果转换失败，添加原始字符串
+                    if bytes_buffer:
+                        try:
+                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
+                            result += decoded_bytes
+                            bytes_buffer = []
+                        except UnicodeDecodeError:
+                            for byte_val in bytes_buffer:
+                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
+                            bytes_buffer = []
+                    result += match.group(0)
+                
+                last_end = match.end()
+                
+                # 检查是否是最后一个匹配或下一个匹配不连续
+                is_last = (i == len(matches) - 1)
+                is_next_non_consecutive = (not is_last and 
+                                         matches[i + 1].start() != match.end())
+                
+                if is_last or is_next_non_consecutive:
+                    # 处理缓冲的字节
+                    if bytes_buffer:
+                        try:
+                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
+                        except UnicodeDecodeError:
+                            # 如果解码失败，保持原始形式
+                            for byte_val in bytes_buffer:
+                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
+                        bytes_buffer = []
+            
+            # 添加剩余的文本
+            if last_end < len(text):
+                result += text[last_end:]
+            
+            return result
+            
+        except Exception as e:
+            logger.warning(f"解码八进制序列时发生错误: {e}, 原始文本: {text}")
+            return text
+    
+    @staticmethod
+    def _create_diff_file_info(file_path: str, diff_lines: List[str]) -> Optional[DiffFileInfo]:
+        """创建DiffFileInfo对象"""
+        diff_content = '\n'.join(diff_lines)
+        lines_added, lines_deleted = DiffParser._count_lines_changed(diff_content)
+        
+        return DiffFileInfo(
+            file_path=file_path,
+            diff_content=diff_content,
+            lines_added=lines_added,
+            lines_deleted=lines_deleted
+        )
+    
+    @staticmethod
+    def _count_lines_changed(diff_content: str) -> Tuple[int, int]:
+        """统计git diff中改动的行数"""
+        lines_added, lines_deleted = 0, 0
+        lines = diff_content.strip().split('\n')
+
+        for line in lines:
+            # 统计新增行（以+开头，但不是+++）
+            if line.startswith('+') and not line.startswith('+++'):
+                lines_added += 1
+            # 统计删除行（以-开头，但不是---）
+            elif line.startswith('-') and not line.startswith('---'):
+                lines_deleted += 1
+
+        return lines_added, lines_deleted
+
+# ==================== LangChain 组件 ====================
+
+class LLMFactory:
+    """LLM工厂类"""
+    
+    @staticmethod
+    def create_chat_llm(model_name: str = None, base_url: str = None):
+        """创建LLM实例"""
+        if model_name is None:
+            model_name = MODEL_NAME
+        
+        if BACKEND_TYPE == "siliconflow":
+            return ChatOpenAI(
+                model=model_name,
+                api_key=SecretStr(SILICONFLOW_API_KEY),
+                base_url=SILICONFLOW_API_BASE,
+                temperature=MODEL_TEMPERATURE
+            )
+        else:
+            raise ValueError(f"不支持的后端类型: {BACKEND_TYPE}")
+
+class PromptTemplates:
+    """提示模板集合"""
+    
+    @staticmethod
+    def get_file_text_analysis_prompt() -> ChatPromptTemplate:
+        """获取单文件文本分析提示模板"""
+        return ChatPromptTemplate.from_messages([
+            ("system", f"""
+你是一个专业的代码审查和语言专家，专注于分析Gitee文档仓库的翻译PR中的英文文本内容。每条PR都是人工生成的文档改动。请忽略中文、格式和代码的审计，专注于识别英文文本变更。
+
+注意：请忽略中文、格式和代码的审计，专注于识别英文文本变更。如果文档的变更不涉及英文文本，你只需要输出“不涉及英文改动”即可，不需要额外输出任何分析结果。
+同时：对于专有名词，例如openEuler、GitHub等，你不能将其纳入英文文本变更的纠错范围内，而是应该自动识别专有名词。对于代码的相关变更，也不应该纳入分析内容范围。
+
+你需要遵循**能不提修改意见就不提修改意见**的原则进行审查！！！
+
+请仔细分析这个文件的改动，并按照以下要求进行分析：
+
+**分析重点：**
+
+1. 英文文本变更识别：
+   - 检查是否涉及英文文本内容的改动
+   - 区分代码逻辑变更和英文文本内容变更
+   - 识别注释、文档字符串、用户显示文本等英文文本内容
+   - 标识出具体的英文文本变更行
+
+2. 语法错误检测：
+   - 检查英文文本的语法、拼写错误
+
+**分析类型判断：**
+- 如果改动不涉及任何英文文本内容，标记为"无英文文本改动"
+- 如果涉及代码注释的英文文本变更，标记为"代码注释改动"
+- 如果涉及文档、界面文本等英文内容变更，标记为"英文内容改动"
+
+**语法检查重点：**
+- 英文：主谓一致、时态、拼写、标点、语序
+
+**输出要求：**
+- 如果存在英文文本变更但变更不存在语法问题，则直接输出“不存在语法问题”，不需要任何额外输出
+- 详细列出发现的语法错误（如果有）
+- 不能超过100个汉字字符
+
+            """),
+            ("human", """
+文件路径: {file_path}
+
+Git Diff 内容:
+{diff_content}
+
+            """)
+        ])
+    
+    @staticmethod
+    def get_pr_analysis_prompt() -> ChatPromptTemplate:
+        """获取整体PR分析提示模板"""
+        return ChatPromptTemplate.from_messages([
+            ("system", """
+你是一个专业的PR审查专家，专门分析Gitee文档仓库的翻译PR中的英文文本变更和语法问题。每条PR都是人工生成的文档改动。
+
+请分析所有文件的改动，并生成一个综合评估，要求：
+
+1. 整体文本变更评估：
+   - 统计涉及文本变更的文件数量
+   - 分析文本变更的类型分布
+   - 评估变更的重要性和影响范围
+   - 如果文本变更不涉及英文，或涉及英文但使用正确不需要改动，则**直接忽略**，无需对其进行总结
+
+2. 语法错误汇总：
+   - **仅汇总改动中的硬伤，如单词拼写错误、英语语法（时态语态）错误等**
+   - **对于一些可以优化但称不上错误的点，以最小化改动为原则，选择忽略**
+   - 提高报错阈值，忽略可优化翻译的点
+   - 提供优先修复建议
+
+3. 质量评估：
+   - 对整个PR的文本质量给出评分
+   - 分析文本变更的一致性
+   - 评估对用户体验的影响
+
+4. 改进建议：
+   - 提供具体的修改建议
+   - 推荐最佳实践
+   - 建议后续的质量控制措施
+
+**输出格式要求：**
+- 提供清晰的分析结论
+- 按优先级排列发现的问题
+- 给出可操作的改进建议
+
+            """),
+            ("human", """
+各个文件的分析结果:
+{file_analyses}
+
+总文件数: {total_files}
+涉及文本变更的文件数: {text_changed_files}
+            """)
+        ])
+
+class FileTextAnalysisChain:
+    """单文件文本分析任务链"""
+    
+    def __init__(self, llm: ChatOpenAI):
+        self.llm = llm
+        
+        # 创建输出解析器
+        self.output_parser = JsonOutputParser(pydantic_object=FileTextAnalysis)
+        
+        # 为硅基流动平台添加输出格式说明
+        format_instructions = """
+请以JSON格式输出，包含以下字段：
+{{
+    "has_text_changes": "是否涉及英文文本改动（布尔值）",
+    "text_lines": "涉及文本改动的行（字符串列表）",
+    "grammar_issues": "语法问题列表（字符串列表）",
+    "analysis_details": "分析详情（字符串）"
+}}
+"""
+        # 创建新的prompt模板
+        system_template = """
+你是一个专业的代码审查和语言专家，专注于分析Gitee文档仓库的翻译PR中的英文文本内容。每条PR都是人工生成的文档改动。
+
+**核心原则：只关注必然存在明显错误的地方，其他文件都不需要关注！**
+
+**严格过滤条件：**
+1. 如果文档的变更不涉及英文文本，直接标记为"无英文文本改动"，无需任何分析
+2. 如果涉及英文文本但语法完全正确，直接标记为"语法正确，无需关注"
+3. 如果仅涉及标点符号的微小调整，直接标记为"仅标点符号改动，无需关注"
+4. 对于专有名词（如openEuler、GitHub等），自动识别并忽略，不纳入纠错范围
+5. 对于代码相关变更，不纳入分析内容范围
+
+**只关注以下明显错误：**
+- 明显的单词拼写错误（如：recieve -> receive）
+- 严重的语法错误（如：主谓不一致、时态错误）
+- 明显的标点符号错误（如：缺少句号、逗号使用错误）
+- 明显的语序错误
+
+**忽略以下情况：**
+- 语法正确但可以优化的表达
+- 风格偏好问题
+- 轻微的标点符号调整
+- 术语选择的差异
+- 表达方式的个人偏好
+
+**输出要求：**
+- 如果不存在明显错误，直接输出"语法正确，无需关注"
+- 只有发现明显错误时才详细列出
+- 不能超过100个汉字字符
+- 遵循"能不提修改意见就不提修改意见"的原则
+
+{format_instructions}
+"""
+        human_template = """
+文件路径: {file_path}
+
+Git Diff 内容:
+{diff_content}
+"""
+        self.prompt = ChatPromptTemplate.from_messages([
+            ("system", system_template.format(format_instructions=format_instructions)),
+            ("human", human_template)
+        ])
+        self.chain = self.prompt | self.llm | self.output_parser
+    
+    def analyze(self, diff_file_info: DiffFileInfo) -> Optional[FileTextAnalysis]:
+        """分析单个文件的文本变更"""
+        max_retry = MODEL_MAX_RETRY
+        for attempt in range(1, max_retry + 1):
+            # 如果不是第一次尝试，等待一段时间再重试，避免连续失败
+            if attempt > 1:
+                delay = min(attempt * 2, 10)  # 递增延迟，最多10秒
+                logger.info(f"第{attempt}次尝试分析文件 {diff_file_info.file_path}，等待{delay}秒...")
+                time.sleep(delay)
+            
+            try:
+                # 构造prompt字符串
+                prompt_args = {
+                    "file_path": diff_file_info.file_path,
+                    "diff_content": diff_file_info.diff_content
+                }
+                
+                # 直接调用，简化超时控制
+                invoke_args = {
+                    "file_path": diff_file_info.file_path,
+                    "diff_content": diff_file_info.diff_content
+                }
+                result = self.chain.invoke(invoke_args)
+                # 验证结果有效性
+                if isinstance(result, (dict, FileTextAnalysis)):
+                    if isinstance(result, dict):
+                        result = FileTextAnalysis(**result)
+                    
+                    # 检查结果完整性
+                    if result and hasattr(result, 'analysis_details') and result.analysis_details:
+                        
+                        # 设置准确值
+                        result.file_path = diff_file_info.file_path
+                        
+                        # 检查是否只关注明显错误
+                        analysis_text = result.analysis_details.lower()
+                        if any(phrase in analysis_text for phrase in [
+                            "语法正确，无需关注", 
+                            "无英文文本改动", 
+                            "仅标点符号改动，无需关注",
+                            "不存在语法问题"
+                        ]):
+                            # 如果无问题，设置has_text_changes为False
+                            result.has_text_changes = False
+                            result.grammar_issues = []
+                        
+                        return result
+                
+                # 结果无效，记录并重试
+                logger.warning(f"分析文件 {diff_file_info.file_path} 返回无效结果，第{attempt}次尝试")
+                if attempt < max_retry:
+                    continue
+            except Exception as e:
+                err_str = str(e)
+                # 检查是否为HTTP错误（如404、5xx），常见关键字有status code、HTTP、response等
+                is_http_error = False
+                for code in ["404", "500", "502", "503", "504"]:
+                    if code in err_str:
+                        is_http_error = True
+                        break
+                if ("status code" in err_str or "HTTP" in err_str or "response" in err_str) and any(code in err_str for code in ["404", "500", "502", "503", "504"]):
+                    is_http_error = True
+                if is_http_error:
+                    logger.error(f"分析文件 {diff_file_info.file_path} 时发生HTTP错误: {e}，第{attempt}次尝试，10秒后重试...")
+                    if attempt < max_retry:
+                        time.sleep(10)
+                        continue
+                else:
+                    logger.error(f"分析文件 {diff_file_info.file_path} 时发生错误: {e}，第{attempt}次尝试")
+                # 其它异常直接进入下一次重试
+                if attempt < max_retry:
+                    logger.info(f"第{attempt}次尝试失败，准备重试...")
+        logger.error(f"分析文件 {diff_file_info.file_path} 连续{max_retry}次均未获得结构化输出，放弃。")
+        return None
+
+class PRAnalysisChain:
+    """PR整体分析任务链"""
+    
+    def __init__(self, llm: ChatOllama | ChatOpenAI):
+        self.llm = llm
+        
+        # 创建输出解析器
+        self.output_parser = JsonOutputParser(pydantic_object=PRAnalysisResult)
+        
+        # 为硅基流动平台添加输出格式说明
+        format_instructions = """
+请以JSON格式输出，包含以下字段：
+{{
+    "has_text_changes": "是否涉及英文文本改动（布尔值）",
+    "text_change_type": "文本改动类型（字符串）",
+    "has_grammar_errors": "是否存在语法语病错误（布尔值）",
+    "grammar_errors": "具体的语法语病错误列表（字符串列表）",
+    "detailed_analysis": "详细分析说明（字符串）",
+    "suggestions": "改进建议列表（字符串列表）"
+}}
+"""
+        # 创建新的prompt模板
+        system_template = """
+你是一个专业的PR审查专家，专门分析Pull Request中的文本变更和语法问题。
+
+**核心原则：只关注必然存在明显错误的地方，其他文件都不需要关注！**
+
+请基于各个文件的分析结果，生成整个PR的综合评估，要求：
+
+1. 严格过滤文件：
+   - 只统计存在明显错误的文件
+   - 忽略"语法正确，无需关注"的文件
+   - 忽略"无英文文本改动"的文件
+   - 忽略"仅标点符号改动，无需关注"的文件
+
+2. 只汇总明显错误：
+   - 仅汇总硬伤：明显的单词拼写错误、严重的语法错误
+   - 忽略可优化但称不上错误的点
+   - 忽略风格偏好问题
+   - 忽略轻微的标点符号调整
+
+3. 质量评估：
+   - 只对存在明显错误的文件进行质量评估
+   - 如果所有文件都无问题，直接标记为"无问题文件"
+
+4. 改进建议：
+   - 只对存在明显错误的文件提供修改建议
+   - 建议优先修复明显的拼写和语法错误
+
+**输出格式要求：**
+- 如果所有文件都无问题，直接输出"所有文件语法正确，无需关注"
+- 只列出存在明显错误的文件
+- 按优先级排列发现的问题
+- 给出可操作的改进建议
+
+{format_instructions}
+"""
+        human_template = """
+各个文件的分析结果:
+{file_analyses}
+
+总文件数: {total_files}
+涉及文本变更的文件数: {text_changed_files}
+"""
+        self.prompt = ChatPromptTemplate.from_messages([
+            ("system", system_template.format(format_instructions=format_instructions)),
+            ("human", human_template)
+        ])
+        self.chain = self.prompt | self.llm | self.output_parser
+    
+    def generate(self, file_analyses: List[FileTextAnalysis]) -> Optional[PRAnalysisResult]:
+        """生成PR整体分析"""
+        try:
+            total_files = len(file_analyses)
+            
+            # 过滤出只关注存在明显错误的文件
+            problematic_files = []
+            for analysis in file_analyses:
+                # 检查是否存在明显错误
+                has_obvious_errors = (
+                    analysis.has_text_changes and 
+                    analysis.grammar_issues and 
+                    len(analysis.grammar_issues) > 0 and
+                    analysis.analysis_details and
+                    not any(phrase in analysis.analysis_details for phrase in [
+                        "语法正确，无需关注", 
+                        "无英文文本改动", 
+                        "仅标点符号改动，无需关注",
+                        "不存在语法问题"
+                    ])
+                )
+                
+                if has_obvious_errors:
+                    problematic_files.append(analysis)
+            
+            # 如果所有文件都无问题，直接返回无问题结果
+            if not problematic_files:
+                return PRAnalysisResult(
+                    has_text_changes=False,
+                    text_change_type="无文本改动",
+                    has_grammar_errors=False,
+                    grammar_errors=[],
+                    detailed_analysis="所有文件语法正确，无需关注",
+                    suggestions=[]
+                )
+            
+            text_changed_files = len(problematic_files)
+            
+            file_analyses_info = []
+            for analysis in problematic_files:
+                file_analyses_info.append({
+                    'file_path': analysis.file_path,
+                    'has_text_changes': analysis.has_text_changes,
+                    'text_lines': analysis.text_lines,
+                    'grammar_issues': analysis.grammar_issues,
+                    'analysis_details': analysis.analysis_details
+                })
+            
+            # 构造prompt字符串
+            prompt_args = {
+                "file_analyses": json.dumps(file_analyses_info, ensure_ascii=False, indent=2),
+                "total_files": total_files,
+                "text_changed_files": text_changed_files
+            }
+            
+            # 使用线程池执行器为PR分析添加超时控制
+            timeout_executor = None
+            try:
+                timeout_executor = ThreadPoolExecutor(max_workers=1)
+                invoke_args = {
+                    "file_analyses": json.dumps(file_analyses_info, ensure_ascii=False, indent=2),
+                    "total_files": total_files,
+                    "text_changed_files": text_changed_files
+                }
+                result = self.chain.invoke(invoke_args)
+                # 验证结果有效性
+                if isinstance(result, (dict, PRAnalysisResult)):
+                    # 如果是dict（来自JsonOutputParser），转换为PRAnalysisResult
+                    if isinstance(result, dict):
+                        result = PRAnalysisResult(**result)
+                    return result
+                else:
+                    logger.error(f"生成PR分析时返回类型错误: {type(result)}")
+                    return None
+            except Exception as e:
+                logger.error(f"生成PR分析时发生错误: {e}")
+                return None
+        except Exception as e:
+            logger.error(f"生成PR分析时发生错误: {e}")
+            return None
+
+# ==================== 主处理类 ====================
+
+class PRCommentAnalyzer:
+    """PR评论分析器"""
+    
+    def __init__(self, siliconflow_api_key: str = "", siliconflow_api_base: str = "https://api.siliconflow.cn/v1", model_name: str = None, base_url: str = None):
+        if model_name is None:
+            model_name = MODEL_NAME
+        
+        # 设置siliconflow API配置
+        global SILICONFLOW_API_KEY, SILICONFLOW_API_BASE
+        if siliconflow_api_key:
+            SILICONFLOW_API_KEY = siliconflow_api_key
+        if siliconflow_api_base:
+            SILICONFLOW_API_BASE = siliconflow_api_base
+            
+        self.llm = LLMFactory.create_chat_llm(model_name)
+        self.file_analysis_chain = FileTextAnalysisChain(self.llm)
+        self.pr_analysis_chain = PRAnalysisChain(self.llm)
+    
+    def cleanup(self):
+        """清理资源，确保程序能正确退出"""
+        try:
+            # 清理 LLM 连接
+            if hasattr(self.llm, 'client') and hasattr(self.llm.client, 'close'):
+                self.llm.client.close()
+            elif hasattr(self.llm, '_client') and hasattr(self.llm._client, 'close'):
+                self.llm._client.close()
+            
+            # 如果是 ChatOpenAI，尝试关闭底层的 HTTP 客户端
+            if BACKEND_TYPE == "siliconflow" and hasattr(self.llm, 'client'):
+                try:
+                    # 强制关闭 httpx 客户端
+                    if hasattr(self.llm.client, '_client'):
+                        self.llm.client._client.close()
+                except Exception as e:
+                    logger.debug(f"关闭 HTTP 客户端时发生错误: {e}")
+            
+            logger.info("资源清理完成")
+        except Exception as e:
+            logger.warning(f"清理资源时发生错误: {e}")
+    
+    def analyze_pr_diff(self, diff_content: str, max_workers: int = None) -> CommentResult:
+        if max_workers is None:
+            max_workers = PROCESSING_MAX_WORKERS
+            
+        logger.info("开始解析PR diff...")
+        files = DiffParser.parse_git_diff(diff_content)
+        logger.info(f"解析到 {len(files)} 个文件的改动")
+        if not files:
+            logger.warning("未找到任何文件改动")
+            return CommentResult(
+                pr_analysis=None,
+                file_analyses=[],
+                processed_files=0,
+                total_files=0,
+                error='未找到任何文件改动'
+            )
+        
+        logger.info("开始并行处理各个文件的文本分析...")
+        file_analyses = []
+        # 使用更健壮的并发处理机制
+        executor = None
+        try:
+            executor = ThreadPoolExecutor(max_workers=max_workers)
+            future_to_file = {
+                executor.submit(self.file_analysis_chain.analyze, file_info): file_info.file_path
+                for file_info in files
+            }
+            
+            # 设置更长的整体超时时间，避免与单个文件超时冲突
+            overall_timeout = SINGLE_FILE_TIMEOUT * len(files) + 600  # 给每个文件的时间 + 额外缓冲
+            
+            completed_count = 0
+            total_count = len(future_to_file)
+            
+            try:
+                for future in as_completed(future_to_file, timeout=overall_timeout):
+                    file_path = future_to_file[future]
+                    completed_count += 1
+                    try:
+                        analysis = future.result(timeout=5)  # 短暂缓冲时间，因为任务已经完成
+                        if analysis:
+                            file_analyses.append(analysis)
+                            logger.info(f"完成文件 {file_path} 的文本分析 ({completed_count}/{total_count})")
+                        else:
+                            logger.warning(f"文件 {file_path} 的文本分析失败 ({completed_count}/{total_count})")
+                    except (FutureTimeoutError, TimeoutError) as e:
+                        logger.error(f"文件 {file_path} 的文本分析获取超时，跳过该文件: {type(e).__name__} ({completed_count}/{total_count})")
+                        try:
+                            future.cancel()
+                        except Exception as cancel_e:
+                            logger.warning(f"取消任务时发生错误: {cancel_e}")
+                    except Exception as e:
+                        logger.error(f"处理文件 {file_path} 时发生异常: {e} ({completed_count}/{total_count})")
+            except (FutureTimeoutError, TimeoutError) as overall_e:
+                logger.error(f"整体处理超时({overall_timeout}秒)，已完成{completed_count}/{total_count}个文件")
+                # 取消所有未完成的任务
+                for future in future_to_file:
+                    if not future.done():
+                        try:
+                            future.cancel()
+                        except Exception as cancel_e:
+                            logger.warning(f"取消未完成任务时发生错误: {cancel_e}")
+        finally:
+            # 确保线程池被正确关闭
+            if executor:
+                try:
+                    executor.shutdown(wait=True)
+                except Exception as shutdown_e:
+                    logger.warning(f"关闭主线程池时发生错误: {shutdown_e}")
+        
+        logger.info(f"成功生成 {len(file_analyses)} 个文件的文本分析")
+        logger.info("开始生成PR整体分析...")
+        pr_analysis = None
+        if file_analyses:
+            logger.info(f"基于 {len(file_analyses)} 个成功处理的文件生成PR分析...")
+            try:
+                pr_analysis = self.pr_analysis_chain.generate(file_analyses)
+                if pr_analysis:
+                    logger.info("PR整体分析生成成功")
+                else:
+                    logger.warning("PR整体分析生成失败")
+            except Exception as e:
+                logger.error(f"生成PR分析时发生未预期的错误: {e}")
+        else:
+            logger.warning("没有成功处理的文件，跳过PR分析生成")
+        
+        return CommentResult(
+            pr_analysis=pr_analysis,
+            file_analyses=file_analyses,
+            processed_files=len(file_analyses),
+            total_files=len(files)
+        )
+
+# ==================== 主函数 ====================
+
+def get_comment_analysis(sample_diff, siliconflow_api_key="", siliconflow_api_base="https://api.siliconflow.cn/v1"):
+
+    analyzer = PRCommentAnalyzer(siliconflow_api_key, siliconflow_api_base)
+    result = None
+    try:
+        result = analyzer.analyze_pr_diff(sample_diff)
+    finally:
+        # 确保在函数退出前清理资源
+        analyzer.cleanup()
+
+    if not result:
+        print("处理失败，无法获取结果")
+        return None
+    
+    if result.error:
+        print(f"错误: {result.error}")
+    
+    print("\n=== 单文件文本分析 ===")
+    problematic_files = [f for f in result.file_analyses if f.has_text_changes and f.grammar_issues]
+    if problematic_files:
+        for analysis in problematic_files:
+            print(f"文件: {analysis.file_path}")
+            print(f"涉及文本变更: {analysis.has_text_changes}")
+            print(f"文本变更行: {analysis.text_lines}")
+            print(f"语法问题: {analysis.grammar_issues}")
+            print(f"分析详情: {analysis.analysis_details}")
+            print("-" * 50)
+    else:
+        print("所有文件语法正确，无需关注")
+    
+    print("=== 处理结果 ===")
+    print(f"总文件数: {result.total_files}")
+    print(f"成功处理文件数: {result.processed_files}")
+    
+    if result.pr_analysis:
+        print("\n=== PR整体分析 ===")
+        pr = result.pr_analysis
+        print(f"涉及文本变更: {pr.has_text_changes}")
+        print(f"文本变更类型: {pr.text_change_type}")
+        print(f"存在语法错误: {pr.has_grammar_errors}")
+        print(f"语法错误列表: {pr.grammar_errors}")
+        print(f"详细分析: {pr.detailed_analysis}")
+        print(f"改进建议: {pr.suggestions}")
+            
+    
+    return result
+
+if __name__ == "__main__":
+    # 微服务接口逻辑： 传递进来的就是 sample_diff 的内容
+    sample_diff = sys.argv[1]
+    result = get_comment_analysis(sample_diff) 
+    print(result)
diff --git a/ci/tools/comment/create_comment.py b/ci/tools/comment/create_comment.py
new file mode 100644
index 00000000..481c7a2f
--- /dev/null
+++ b/ci/tools/comment/create_comment.py
@@ -0,0 +1,372 @@
+import argparse
+import json
+import logging
+import re
+import sys
+from dataclasses import dataclass, field
+from difflib import SequenceMatcher
+from typing import TypeVar, Generic
+from comment_agent import get_comment_analysis
+
+import requests
+import yaml
+
+logging.basicConfig(level=logging.INFO, stream=sys.stdout,
+                    format='%(asctime)s [%(levelname)s] %(module)s.%(lineno)d %(name)s:\t%(message)s')
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Org:
+    org_name: str
+    comment_target_owner: str
+    comment_target_repo: str
+    auto_comment_enabled: bool = field(default=True)
+    confidence_threshold: float = field(default=0.7)
+    text_check_enabled: bool = field(default=True)
+    grammar_check_enabled: bool = field(default=True)
+
+
+@dataclass
+class CommentAgentConfig:
+    backend: dict = field(default_factory=dict)
+    model: dict = field(default_factory=dict)
+    processing: dict = field(default_factory=dict)
+    logging: dict = field(default_factory=dict)
+
+
+@dataclass
+class Config:
+    orgs: list[dict | Org]
+    comment_agent: dict | CommentAgentConfig = field(default_factory=dict)
+
+    def __post_init__(self):
+        tmp_orgs: list[Org] = []
+        for item in self.orgs:
+            tmp_orgs.append(Org(**item))
+        self.orgs = tmp_orgs
+        
+        if isinstance(self.comment_agent, dict) and self.comment_agent:
+            self.comment_agent = CommentAgentConfig(**self.comment_agent)
+
+
+@dataclass
+class ReqArgs:
+    method: str
+    url: str
+    headers: dict[str, str]
+    params: dict[str, str] | None = field(default=None)
+    data: str | None = field(default=None)
+    timeout: int = field(default=180)
+
+
+T = TypeVar('T')
+content_type_is_text = "text/plain"
+content_type_is_json_dict = {}
+content_type_is_json_list = []
+
+
+def send_request(args: ReqArgs, t: Generic[T]) -> T:
+    error_count = 0
+    while error_count < 3:
+        try:
+            resp = requests.request(**args.__dict__)
+            resp.raise_for_status()
+            if type(t) is dict or type(t) is list:
+                res_data: dict | list = resp.json()
+            else:
+                res_data: str = resp.text
+        except requests.exceptions.RequestException as e:
+            if e.response.status_code in [400, 401, 403, 404, 405]:
+                logger.error("[ERROR] client error {}".format(e))
+                break
+            logger.error("[ERROR] server error: {}".format(e))
+            error_count += 1
+        else:
+            logger.info("[OK] [{}], {}".format(args.method, args.url))
+            return res_data
+    return None
+
+
+class GiteeClient:
+    """
+    Gitee OpenAPI 客户端
+    """
+    headers = {
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    }
+
+    def __init__(self, developer_token: str):
+        """
+        构造函数
+        :param developer_token: Gitee v5 token
+        """
+        self.headers["Authorization"] = "Bearer {}".format(developer_token)
+
+    def get_diff_content(self, owner: str, repo: str, number: int) -> str | None:
+        req_url = "https://gitee.com/{}/{}/pulls/{}.diff".format(owner, repo, number)
+        req_args = ReqArgs(method="GET", url=req_url, headers=self.headers)
+        result: str | None = send_request(req_args, "")
+        if result is None:
+            logger.error("can not get diff file from PR: {}".format(req_url))
+        return result
+
+    def add_pr_comment(self, owner, repo, number, body):
+        req_url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/comments'.format(owner, repo, number)
+        req_body = {
+            "body": "### 🤖 AI审查反馈 \n {} ".format(body)
+        }
+        req_args = ReqArgs(method="POST", url=req_url, headers=self.headers, data=json.dumps(req_body))
+        result: dict | None = send_request(req_args, {})
+        return result is not None
+
+
+
+def get_diff_file_list(diff_content: str) -> list[str]:
+    diff_files_list = []
+    diff_files = [x.split(' ')[0][2:] for x in diff_content.split('diff --git ')[1:]]
+    for diff_file in diff_files:
+        if diff_file.endswith('\"'):
+            d = re.compile(r'/[\d\s\S]+')
+            diff_file = d.findall(diff_file)
+            diff_file = diff_file[0].replace('/', '', 1).replace('\"', '')
+            diff_files_list.append(diff_file)
+        else:
+            diff_files_list.append(diff_file)
+    return diff_files_list
+
+
+def generate_comment_content(comment_result, pr_url: str, analysis_status: str = "success") -> str:
+    """根据分析结果生成评论内容"""
+    comment_body = ""
+    
+    # 根据分析状态添加不同的状态标识
+    if analysis_status == "error":
+        comment_body += "### 分析状态：处理失败\n"
+        comment_body += "**分析过程中发生错误，无法生成详细反馈。请手动审查文本变更。**\n\n"
+    elif analysis_status == "low_confidence":
+        comment_body += "### 分析状态：置信度较低\n"
+        comment_body += "**当前分析置信度较低，结果仅供参考。建议进行人工审查。**\n\n"
+    elif analysis_status == "no_text_changes":
+        comment_body += "### 分析状态：无文本问题\n"
+        comment_body += "**AI分析结果显示本次PR未发现明显的文本变更或语法问题。无需改动。**\n\n"
+    elif analysis_status == "no_grammar_errors":
+        comment_body += "### 分析状态：文本质量良好\n"
+        comment_body += "**检测到文本变更，但未发现明显的语法错误，文本质量良好。无需改动。**\n\n"
+    else:  # success with issues
+        comment_body += "### 分析状态：发现需要关注的问题\n"
+        comment_body += "**AI分析发现了一些文本变更或语法问题，请查看下方详细信息。**\n\n"
+    
+    # 如果有分析结果，添加详细信息
+    if comment_result and not comment_result.error:
+        # 如果有PR整体分析
+        if comment_result.pr_analysis:
+            pr_analysis = comment_result.pr_analysis
+            
+            # 添加整体评估摘要
+            comment_body += "## 整体评估\n"
+            comment_body += f"- 涉及文本变更: {'是' if pr_analysis.has_text_changes else '否'}\n"
+            comment_body += f"- 文本变更类型: {pr_analysis.text_change_type}\n"
+            comment_body += f"- 存在语法错误: {'是' if pr_analysis.has_grammar_errors else '否'}\n\n"
+            
+            # 添加详细分析
+            if pr_analysis.detailed_analysis:
+                comment_body += "## 详细分析\n"
+                comment_body += f"{pr_analysis.detailed_analysis}\n\n"
+            
+            # 添加语法错误列表
+            if pr_analysis.grammar_errors:
+                comment_body += "## 语法问题\n"
+                for i, error in enumerate(pr_analysis.grammar_errors, 1):
+                    comment_body += f"{i}. {error}\n"
+                comment_body += "\n"
+            
+            # 添加改进建议
+            if pr_analysis.suggestions:
+                comment_body += "## 改进建议\n"
+                for i, suggestion in enumerate(pr_analysis.suggestions, 1):
+                    comment_body += f"{i}. {suggestion}\n"
+                comment_body += "\n"
+        
+        # 添加文件级别的分析结果
+        if comment_result.file_analyses:
+            # comment_body += "## 文件分析\n"
+            
+            # 统计有问题的文件
+            files_with_issues = [f for f in comment_result.file_analyses if f.has_text_changes or f.grammar_issues]
+            files_without_issues = [f for f in comment_result.file_analyses if not f.has_text_changes and not f.grammar_issues]
+            
+            if files_with_issues:
+                comment_body += f"### 需要关注的文件 ({len(files_with_issues)} 个)\n"
+                for i, file_analysis in enumerate(files_with_issues, 1):
+                    comment_body += f"\n**{i}. {file_analysis.file_path}**\n"
+                    
+                    if file_analysis.has_text_changes:
+                        comment_body += f"- 文本变更: 检测到英文文本改动\n"
+                        if file_analysis.text_lines:
+                            comment_body += f"- 涉及行数: {len(file_analysis.text_lines)} 行\n"
+                    
+                    if file_analysis.grammar_issues:
+                        comment_body += f"- 语法问题: 发现 {len(file_analysis.grammar_issues)} 个问题\n"
+                        for j, issue in enumerate(file_analysis.grammar_issues, 1):
+                            comment_body += f"  {j}. {issue}\n"
+                    
+                    if file_analysis.analysis_details:
+                        comment_body += f"- 分析详情: {file_analysis.analysis_details}\n"
+            
+            if files_without_issues:
+                comment_body += f"\n### 无问题的文件 ({len(files_without_issues)} 个)\n"
+                for file_analysis in files_without_issues:
+                    comment_body += f"- {file_analysis.file_path}\n"
+            
+            # 添加处理统计
+            # comment_body += f"\n### 处理统计\n"
+            # comment_body += f"- 总文件数: {comment_result.total_files}\n"
+            # comment_body += f"- 成功分析: {comment_result.processed_files}\n"
+            # comment_body += f"- 有文本变更: {len([f for f in comment_result.file_analyses if f.has_text_changes])}\n"
+            # comment_body += f"- 有语法问题: {len([f for f in comment_result.file_analyses if f.grammar_issues])}\n"
+
+    # 添加免责声明
+    comment_body += "## 免责声明\n"
+    comment_body += "本评论内容基于AI Agent技术自动生成，仅供参考。请开发者根据实际情况进行判断和修改。\n"
+    
+    return comment_body
+
+
+class Args:
+    gitee_token: str
+    pr_owner: str
+    pr_repo: str
+    pr_number: int
+    siliconflow_api_key: str = ""
+    siliconflow_api_base: str = "https://api.siliconflow.cn/v1"
+
+    def validate(self):
+        valid = self.gitee_token and self.pr_owner and self.pr_repo and self.pr_number
+        if not valid:
+            logger.error("Invalid Command Arguments")
+            sys.exit(1)
+
+
+def load_config_yaml(yaml_path):
+    with open(yaml_path, "r", encoding="utf-8") as config_in:
+        data = yaml.safe_load(config_in)
+
+    if data is None:
+        return None
+    return Config(**data)
+
+
+def create_comment_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, pr_owner: str, pr_repo: str,
+                                              pr_number: int, siliconflow_api_key: str, siliconflow_api_base: str):
+    pr_html_url = "https://gitee.com/{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
+    
+    for org_item in conf.orgs:
+        if org_item.org_name != pr_owner:
+            continue
+        
+        if not org_item.auto_comment_enabled:
+            logger.info(f"组织 {org_item.org_name} 未启用自动评论功能")
+            continue
+        
+        # 移除文件筛选逻辑，对所有PR平等处理
+        logger.info("开始对PR进行全面文本分析（不限制文件类型和路径）")
+        
+        # 获取diff内容
+        diff_content = cli.get_diff_content(pr_owner, pr_repo, pr_number)
+        if diff_content is None:
+            logger.error("无法获取PR的diff内容")
+            sys.exit(1)
+        
+        # 调用AI Agent进行分析
+        logger.info("开始进行AI代码审查分析...")
+        comment_result = get_comment_analysis(diff_content, siliconflow_api_key, siliconflow_api_base)
+        
+        if not comment_result:
+            logger.error("AI分析失败，将发布错误状态评论")
+            # 创建一个错误结果对象，确保能发布评论
+            from comment_agent import CommentResult
+            comment_result = CommentResult(
+                pr_analysis=None,
+                file_analyses=[],
+                processed_files=0,
+                total_files=0,
+                error="AI分析过程失败"
+            )
+        
+        # 确定分析状态和评论内容
+        analysis_status = "success"
+        
+        if comment_result.error:
+            analysis_status = "error"
+            logger.info("AI分析过程出错，将发布错误状态评论")
+        elif comment_result.pr_analysis:
+            pr_analysis = comment_result.pr_analysis
+            
+            # 检查是否有文本变更或语法错误
+            if pr_analysis.has_text_changes and pr_analysis.has_grammar_errors:
+                analysis_status = "success"  # 有问题，正常处理
+                logger.info("检测到文本变更和语法错误，将发布问题报告评论")
+            elif pr_analysis.has_text_changes and not pr_analysis.has_grammar_errors:
+                analysis_status = "no_grammar_errors"
+                logger.info("检测到文本变更但无语法错误，将发布文本质量良好的评论")
+            elif not pr_analysis.has_text_changes:
+                analysis_status = "no_text_changes"
+                logger.info("未检测到文本变更，将发布无文本问题的评论")
+            else:
+                analysis_status = "success"
+                logger.info("检测到需要关注的问题，将发布详细分析评论")
+        else:
+            # 如果没有整体分析，检查是否有文件级别的问题
+            files_with_issues = [f for f in comment_result.file_analyses if f.has_text_changes or f.grammar_issues]
+            if files_with_issues:
+                analysis_status = "success"
+                logger.info(f"检测到 {len(files_with_issues)} 个文件有文本问题，将发布文件级别问题评论")
+            else:
+                analysis_status = "no_text_changes"
+                logger.info("未检测到文件级别问题，将发布无问题评论")
+        
+        # 总是生成和发布评论
+        comment_content = generate_comment_content(
+            comment_result, 
+            pr_html_url, 
+            analysis_status
+        )
+        
+        # 发布评论
+        success = cli.add_pr_comment(pr_owner, pr_repo, pr_number, comment_content)
+        if success:
+            logger.info(f"AI代码审查评论发布成功 - 状态: {analysis_status}")
+        else:
+            logger.error(f"AI代码审查评论发布失败 - 状态: {analysis_status}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Create AI-powered PR comment based on text analysis')
+    parser.add_argument('--gitee_token', type=str, required=True, help='gitee v5 api token')
+    parser.add_argument('--pr_owner', type=str, required=True, help='the PR of owner')
+    parser.add_argument('--pr_repo', type=str, required=True, help='the PR of repo')
+    parser.add_argument('--pr_number', type=str, required=True, help='the PR number')
+    parser.add_argument('--siliconflow_api_key', type=str, default="", help='the API key of siliconflow')
+    parser.add_argument('--siliconflow_api_base', type=str, default="https://api.siliconflow.cn/v1", help='the base URL of siliconflow')
+    args = Args()
+    parser.parse_args(args=sys.argv[1:], namespace=args)
+    args.validate()
+
+    exec_py = sys.argv[0]
+    config_yaml_path = exec_py[:-2] + 'yaml'
+    conf = load_config_yaml(config_yaml_path)
+
+    cli = GiteeClient(args.gitee_token)
+
+    pr_owner = args.pr_owner
+    pr_repo = args.pr_repo
+    pr_number = args.pr_number
+    siliconflow_api_key = args.siliconflow_api_key
+    siliconflow_api_base = args.siliconflow_api_base
+    create_comment_based_on_pr_diff_and_config(conf, cli, pr_owner, pr_repo, pr_number, siliconflow_api_key, siliconflow_api_base)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/ci/tools/comment/create_comment.yaml b/ci/tools/comment/create_comment.yaml
new file mode 100644
index 00000000..32ac269a
--- /dev/null
+++ b/ci/tools/comment/create_comment.yaml
@@ -0,0 +1,38 @@
+# Comment Agent Configuration
+comment_agent:
+  # Backend Configuration
+  backend:
+    type: "siliconflow"  # Options: "ollama" or "siliconflow"
+    # siliconflow配置现在通过命令行参数传入
+
+  # Model Configuration
+  model:
+    name: "Qwen/Qwen3-8B"
+    temperature: 0.1
+    max_retry: 5  # For siliconflow backend
+
+  # Processing Configuration
+  processing:
+    max_workers: 8  # Number of parallel workers for file processing
+    single_file_timeout: 180  # Timeout for single file analysis (seconds)
+    total_comment_timeout: 300  # Timeout for total PR analysis (seconds)
+  
+  # Logging Configuration
+  logging:
+    level: "INFO"
+
+# PR Comment Configuration
+orgs:
+  - org_name: openeuler
+    comment_target_owner: openeuler
+    comment_target_repo: docs
+    auto_comment_enabled: true
+    text_check_enabled: true   # 是否启用文本变更检测
+    grammar_check_enabled: true  # 是否启用语法错误检测
+
+  - org_name: src-openeuler
+    comment_target_owner: openeuler
+    comment_target_repo: globalization
+    auto_comment_enabled: true
+    text_check_enabled: true
+    grammar_check_enabled: true
\ No newline at end of file
-- 
Gitee


From e11aacffb261b30d184868d2396c31fad07c2e63 Mon Sep 17 00:00:00 2001
From: petermouse666 <708975811@qq.com>
Date: Fri, 24 Oct 2025 13:48:31 +0800
Subject: [PATCH 2/2] update ci-bot for auto generating comment

---
 ci/tools/comment/comment_agent.py    | 1960 +++++++++++++-------------
 ci/tools/comment/create_comment.py   |  154 +-
 ci/tools/comment/create_comment.yaml |    2 +-
 3 files changed, 1060 insertions(+), 1056 deletions(-)

diff --git a/ci/tools/comment/comment_agent.py b/ci/tools/comment/comment_agent.py
index 25dbe385..797a2522 100644
--- a/ci/tools/comment/comment_agent.py
+++ b/ci/tools/comment/comment_agent.py
@@ -1,958 +1,1002 @@
-import json
-import re
-import logging
-import urllib.parse
-from typing import List, Dict, Any, Optional, Tuple, Literal
-from dataclasses import dataclass
-from concurrent.futures import ThreadPoolExecutor, as_completed, TimeoutError as FutureTimeoutError
-from pathlib import Path
-import sys
-import time
-# LangChain imports
-from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
-from langchain_core.runnables import RunnableLambda, RunnablePassthrough
-from pydantic import BaseModel, Field, SecretStr
-from langchain_community.llms import Ollama
-from langchain_ollama import ChatOllama
-from langchain.chains import TransformChain, SequentialChain
-from langchain_core.output_parsers import JsonOutputParser
-from langchain_openai import ChatOpenAI
-import yaml
-
-# ==================== 配置加载 ====================
-
-def load_config(config_file="create_comment.yaml"):
-    """从YAML文件加载配置"""
-    try:
-        with open(config_file, 'r', encoding='utf-8') as f:
-            config = yaml.safe_load(f)
-        return config.get('comment_agent', {})
-    except FileNotFoundError:
-        print(f"配置文件 {config_file} 不存在")
-        raise
-    except yaml.YAMLError as e:
-        print(f"解析配置文件时发生错误: {e}")
-        raise
-
-# 加载配置
-_config = load_config()
-
-# ==================== 配置常量 ====================
-
-BACKEND_TYPE = _config.get('backend', {}).get('type', 'siliconflow')
-MODEL_NAME = _config.get('model', {}).get('name', 'Qwen/Qwen3-8B')
-MODEL_TEMPERATURE = _config.get('model', {}).get('temperature', 0.1)
-MODEL_MAX_RETRY = _config.get('model', {}).get('max_retry', 5)
-PROCESSING_MAX_WORKERS = _config.get('processing', {}).get('max_workers', 8)
-SINGLE_FILE_TIMEOUT = _config.get('processing', {}).get('single_file_timeout', 180)
-TOTAL_COMMENT_TIMEOUT = _config.get('processing', {}).get('total_comment_timeout', 300)
-LOGGING_LEVEL = _config.get('logging', {}).get('level', 'INFO')
-SILICONFLOW_API_KEY = ''
-SILICONFLOW_API_BASE = ''
-
-# 配置日志
-logging.basicConfig(level=getattr(logging, LOGGING_LEVEL.upper()))
-logger = logging.getLogger(__name__)
-
-# ==================== 数据模型定义 ====================
-
-class PRAnalysisResult(BaseModel):
-    """PR分析结果的结构化输出"""
-    has_text_changes: bool = Field(description="是否涉及英文文本改动", default=False)
-    text_change_type: Literal["无文本改动", "仅标点符号改动", "英文内容改动", "代码注释改动", "混合改动"] = Field(description="文本改动类型")
-    has_grammar_errors: bool = Field(description="是否存在语法语病错误", default=False)
-    grammar_errors: List[str] = Field(description="具体的语法语病错误列表", default=[])
-    detailed_analysis: str = Field(description="详细分析说明")
-    suggestions: List[str] = Field(description="改进建议列表", default=[])
-
-class FileTextAnalysis(BaseModel):
-    """单个文件的文本分析"""
-    file_path: str = Field(description="文件路径", default="")
-    has_text_changes: bool = Field(description="是否涉及英文文本改动", default=False)
-    text_lines: List[str] = Field(description="涉及文本改动的行", default=[])
-    grammar_issues: List[str] = Field(description="语法问题列表", default=[])
-    analysis_details: str = Field(description="分析详情")
-
-@dataclass
-class DiffFileInfo:
-    """单个文件的diff信息"""
-    file_path: str
-    diff_content: str
-    lines_added: int
-    lines_deleted: int
-
-@dataclass
-class CommentResult:
-    """评论生成结果"""
-    pr_analysis: Optional[PRAnalysisResult]
-    file_analyses: List[FileTextAnalysis]
-    processed_files: int
-    total_files: int
-    error: Optional[str] = None
-
-# ==================== Token 统计工具 ====================
-
-
-# ==================== 工具函数 ====================
-
-class DiffParser:
-    """Git Diff 解析器"""
-    
-    @staticmethod
-    def parse_git_diff(diff_content: str) -> List[DiffFileInfo]:
-        """
-        解析git diff内容，提取每个文件的改动信息
-        
-        Args:
-            diff_content: git diff的原始内容
- 
-        Returns:
-            包含文件路径和对应diff内容的列表
-        """
-
-        files = []
-        current_file = None
-        current_diff = []
-        
-        lines = diff_content.strip().split('\n')
-        
-        for line in lines:
-            # 匹配文件路径行
-            if line.startswith('diff --git'):
-                # 保存前一个文件的信息
-                if current_file and current_diff:
-                    diff_info = DiffParser._create_diff_file_info(current_file, current_diff)
-                    if diff_info:
-                        files.append(diff_info)
-                
-                # 提取文件路径 - 改进的解析逻辑
-                current_file = DiffParser._extract_file_path(line)
-                if current_file:
-                    current_diff = [line]
-                else:
-                    current_diff = []
-            elif current_file:
-                current_diff.append(line)
-        
-        # 添加最后一个文件
-        if current_file and current_diff:
-            diff_info = DiffParser._create_diff_file_info(current_file, current_diff)
-            if diff_info:
-                files.append(diff_info)
-        
-        return files
-    
-    @staticmethod
-    def _extract_file_path(diff_line: str) -> Optional[str]:
-        """
-        从git diff行中提取文件路径，支持包含汉字的文件名
-        
-        Args:
-            diff_line: git diff的文件头行，格式如 "diff --git a/path/to/file b/path/to/file"
-            
-        Returns:
-            提取出的文件路径，如果解析失败则返回None
-        """
-        try:
-            # 方法1: 处理引号包围的路径（Git对特殊字符的处理）
-            # 格式: diff --git "a/path/to/file" "b/path/to/file"
-            quoted_pattern = r'diff --git "a/(.+?)" "b/(.+?)"'
-            quoted_match = re.match(quoted_pattern, diff_line)
-            
-            if quoted_match:
-                file_path_a = quoted_match.group(1)
-                file_path_b = quoted_match.group(2)
-                # 通常a和b路径相同，使用a路径（旧文件路径）
-                file_path = file_path_a
-            else:
-                # 方法2: 使用正则表达式匹配标准的git diff格式
-                # 格式: diff --git a/path/to/file b/path/to/file
-                pattern = r'diff --git a/(.+?) b/(.+?)(?:\s|$)'
-                match = re.match(pattern, diff_line)
-                
-                if match:
-                    file_path_a = match.group(1)
-                    file_path_b = match.group(2)
-                    # 通常a和b路径相同，使用a路径（旧文件路径）
-                    file_path = file_path_a
-                else:
-                    # 方法3: 如果正则匹配失败，尝试更简单的解析
-                    # 处理可能包含空格和特殊字符的文件名
-                    if ' a/' in diff_line and ' b/' in diff_line:
-                        # 找到 a/ 和 b/ 的位置
-                        a_pos = diff_line.find(' a/')
-                        b_pos = diff_line.find(' b/')
-                        
-                        if a_pos != -1 and b_pos != -1 and a_pos < b_pos:
-                            # 提取a/和b/之间的路径
-                            a_start = a_pos + 3  # 跳过 ' a/'
-                            file_path = diff_line[a_start:b_pos]
-                        else:
-                            return None
-                    else:
-                        # 方法4: 最后的备选方案，简单的字符串分割
-                        parts = diff_line.split()
-                        if len(parts) >= 3:
-                            a_path = parts[2]
-                            if a_path.startswith('a/'):
-                                file_path = a_path[2:]  # 移除'a/'前缀
-                            else:
-                                return None
-                        else:
-                            return None
-            
-            # 处理文件名编码
-            return DiffParser._decode_file_path(file_path)
-            
-        except Exception as e:
-            logger.warning(f"解析文件路径时发生错误: {e}, diff行: {diff_line}")
-            return None
-    
-    @staticmethod
-    def _decode_file_path(file_path: str) -> str:
-        """
-        解码文件路径，处理各种编码情况
-        
-        Args:
-            file_path: 原始文件路径
-            
-        Returns:
-            解码后的文件路径
-        """
-        try:
-            # 首先尝试URL解码，处理Git编码的文件名
-            decoded_path = urllib.parse.unquote(file_path, encoding='utf-8')
-            
-            # 处理Git对特殊字符的引号包装
-            if decoded_path.startswith('"') and decoded_path.endswith('"'):
-                decoded_path = decoded_path[1:-1]
-                # Git使用反斜杠转义，需要处理转义序列
-                decoded_path = decoded_path.replace('\\"', '"')
-                decoded_path = decoded_path.replace('\\\\', '\\')
-            
-            # 无论是否有引号包装，都尝试处理八进制编码
-            # 检查是否包含八进制转义序列
-            if '\\' in decoded_path and re.search(r'\\[0-7]{3}', decoded_path):
-                decoded_path = DiffParser._decode_octal_sequences(decoded_path)
-            
-            return decoded_path
-            
-        except Exception as e:
-            logger.warning(f"解码文件路径时发生错误: {e}, 原始路径: {file_path}")
-            return file_path
-    
-    @staticmethod
-    def _decode_octal_sequences(text: str) -> str:
-        """
-        解码文本中的八进制转义序列
-        
-        Args:
-            text: 包含八进制转义序列的文本
-            
-        Returns:
-            解码后的文本
-        """
-        try:
-            # 查找八进制转义序列模式：\xxx
-            pattern = r'\\([0-7]{3})'
-            
-            # 找到所有八进制序列
-            matches = list(re.finditer(pattern, text))
-            if not matches:
-                return text
-            
-            # 收集所有字节值
-            result = ""
-            last_end = 0
-            bytes_buffer = []
-            
-            for i, match in enumerate(matches):
-                # 添加匹配前的文本
-                if match.start() > last_end:
-                    # 如果有缓冲的字节，先处理它们
-                    if bytes_buffer:
-                        try:
-                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
-                            result += decoded_bytes
-                            bytes_buffer = []
-                        except UnicodeDecodeError:
-                            # 如果解码失败，保持原始形式
-                            for byte_val in bytes_buffer:
-                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
-                            bytes_buffer = []
-                    
-                    result += text[last_end:match.start()]
-                
-                # 处理当前八进制序列
-                octal_str = match.group(1)
-                try:
-                    byte_value = int(octal_str, 8)
-                    bytes_buffer.append(byte_value)
-                except ValueError:
-                    # 如果转换失败，添加原始字符串
-                    if bytes_buffer:
-                        try:
-                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
-                            result += decoded_bytes
-                            bytes_buffer = []
-                        except UnicodeDecodeError:
-                            for byte_val in bytes_buffer:
-                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
-                            bytes_buffer = []
-                    result += match.group(0)
-                
-                last_end = match.end()
-                
-                # 检查是否是最后一个匹配或下一个匹配不连续
-                is_last = (i == len(matches) - 1)
-                is_next_non_consecutive = (not is_last and 
-                                         matches[i + 1].start() != match.end())
-                
-                if is_last or is_next_non_consecutive:
-                    # 处理缓冲的字节
-                    if bytes_buffer:
-                        try:
-                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
-                        except UnicodeDecodeError:
-                            # 如果解码失败，保持原始形式
-                            for byte_val in bytes_buffer:
-                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
-                        bytes_buffer = []
-            
-            # 添加剩余的文本
-            if last_end < len(text):
-                result += text[last_end:]
-            
-            return result
-            
-        except Exception as e:
-            logger.warning(f"解码八进制序列时发生错误: {e}, 原始文本: {text}")
-            return text
-    
-    @staticmethod
-    def _create_diff_file_info(file_path: str, diff_lines: List[str]) -> Optional[DiffFileInfo]:
-        """创建DiffFileInfo对象"""
-        diff_content = '\n'.join(diff_lines)
-        lines_added, lines_deleted = DiffParser._count_lines_changed(diff_content)
-        
-        return DiffFileInfo(
-            file_path=file_path,
-            diff_content=diff_content,
-            lines_added=lines_added,
-            lines_deleted=lines_deleted
-        )
-    
-    @staticmethod
-    def _count_lines_changed(diff_content: str) -> Tuple[int, int]:
-        """统计git diff中改动的行数"""
-        lines_added, lines_deleted = 0, 0
-        lines = diff_content.strip().split('\n')
-
-        for line in lines:
-            # 统计新增行（以+开头，但不是+++）
-            if line.startswith('+') and not line.startswith('+++'):
-                lines_added += 1
-            # 统计删除行（以-开头，但不是---）
-            elif line.startswith('-') and not line.startswith('---'):
-                lines_deleted += 1
-
-        return lines_added, lines_deleted
-
-# ==================== LangChain 组件 ====================
-
-class LLMFactory:
-    """LLM工厂类"""
-    
-    @staticmethod
-    def create_chat_llm(model_name: str = None, base_url: str = None):
-        """创建LLM实例"""
-        if model_name is None:
-            model_name = MODEL_NAME
-        
-        if BACKEND_TYPE == "siliconflow":
-            return ChatOpenAI(
-                model=model_name,
-                api_key=SecretStr(SILICONFLOW_API_KEY),
-                base_url=SILICONFLOW_API_BASE,
-                temperature=MODEL_TEMPERATURE
-            )
-        else:
-            raise ValueError(f"不支持的后端类型: {BACKEND_TYPE}")
-
-class PromptTemplates:
-    """提示模板集合"""
-    
-    @staticmethod
-    def get_file_text_analysis_prompt() -> ChatPromptTemplate:
-        """获取单文件文本分析提示模板"""
-        return ChatPromptTemplate.from_messages([
-            ("system", f"""
-你是一个专业的代码审查和语言专家，专注于分析Gitee文档仓库的翻译PR中的英文文本内容。每条PR都是人工生成的文档改动。请忽略中文、格式和代码的审计，专注于识别英文文本变更。
-
-注意：请忽略中文、格式和代码的审计，专注于识别英文文本变更。如果文档的变更不涉及英文文本，你只需要输出“不涉及英文改动”即可，不需要额外输出任何分析结果。
-同时：对于专有名词，例如openEuler、GitHub等，你不能将其纳入英文文本变更的纠错范围内，而是应该自动识别专有名词。对于代码的相关变更，也不应该纳入分析内容范围。
-
-你需要遵循**能不提修改意见就不提修改意见**的原则进行审查！！！
-
-请仔细分析这个文件的改动，并按照以下要求进行分析：
-
-**分析重点：**
-
-1. 英文文本变更识别：
-   - 检查是否涉及英文文本内容的改动
-   - 区分代码逻辑变更和英文文本内容变更
-   - 识别注释、文档字符串、用户显示文本等英文文本内容
-   - 标识出具体的英文文本变更行
-
-2. 语法错误检测：
-   - 检查英文文本的语法、拼写错误
-
-**分析类型判断：**
-- 如果改动不涉及任何英文文本内容，标记为"无英文文本改动"
-- 如果涉及代码注释的英文文本变更，标记为"代码注释改动"
-- 如果涉及文档、界面文本等英文内容变更，标记为"英文内容改动"
-
-**语法检查重点：**
-- 英文：主谓一致、时态、拼写、标点、语序
-
-**输出要求：**
-- 如果存在英文文本变更但变更不存在语法问题，则直接输出“不存在语法问题”，不需要任何额外输出
-- 详细列出发现的语法错误（如果有）
-- 不能超过100个汉字字符
-
-            """),
-            ("human", """
-文件路径: {file_path}
-
-Git Diff 内容:
-{diff_content}
-
-            """)
-        ])
-    
-    @staticmethod
-    def get_pr_analysis_prompt() -> ChatPromptTemplate:
-        """获取整体PR分析提示模板"""
-        return ChatPromptTemplate.from_messages([
-            ("system", """
-你是一个专业的PR审查专家，专门分析Gitee文档仓库的翻译PR中的英文文本变更和语法问题。每条PR都是人工生成的文档改动。
-
-请分析所有文件的改动，并生成一个综合评估，要求：
-
-1. 整体文本变更评估：
-   - 统计涉及文本变更的文件数量
-   - 分析文本变更的类型分布
-   - 评估变更的重要性和影响范围
-   - 如果文本变更不涉及英文，或涉及英文但使用正确不需要改动，则**直接忽略**，无需对其进行总结
-
-2. 语法错误汇总：
-   - **仅汇总改动中的硬伤，如单词拼写错误、英语语法（时态语态）错误等**
-   - **对于一些可以优化但称不上错误的点，以最小化改动为原则，选择忽略**
-   - 提高报错阈值，忽略可优化翻译的点
-   - 提供优先修复建议
-
-3. 质量评估：
-   - 对整个PR的文本质量给出评分
-   - 分析文本变更的一致性
-   - 评估对用户体验的影响
-
-4. 改进建议：
-   - 提供具体的修改建议
-   - 推荐最佳实践
-   - 建议后续的质量控制措施
-
-**输出格式要求：**
-- 提供清晰的分析结论
-- 按优先级排列发现的问题
-- 给出可操作的改进建议
-
-            """),
-            ("human", """
-各个文件的分析结果:
-{file_analyses}
-
-总文件数: {total_files}
-涉及文本变更的文件数: {text_changed_files}
-            """)
-        ])
-
-class FileTextAnalysisChain:
-    """单文件文本分析任务链"""
-    
-    def __init__(self, llm: ChatOpenAI):
-        self.llm = llm
-        
-        # 创建输出解析器
-        self.output_parser = JsonOutputParser(pydantic_object=FileTextAnalysis)
-        
-        # 为硅基流动平台添加输出格式说明
-        format_instructions = """
-请以JSON格式输出，包含以下字段：
-{{
-    "has_text_changes": "是否涉及英文文本改动（布尔值）",
-    "text_lines": "涉及文本改动的行（字符串列表）",
-    "grammar_issues": "语法问题列表（字符串列表）",
-    "analysis_details": "分析详情（字符串）"
-}}
-"""
-        # 创建新的prompt模板
-        system_template = """
-你是一个专业的代码审查和语言专家，专注于分析Gitee文档仓库的翻译PR中的英文文本内容。每条PR都是人工生成的文档改动。
-
-**核心原则：只关注必然存在明显错误的地方，其他文件都不需要关注！**
-
-**严格过滤条件：**
-1. 如果文档的变更不涉及英文文本，直接标记为"无英文文本改动"，无需任何分析
-2. 如果涉及英文文本但语法完全正确，直接标记为"语法正确，无需关注"
-3. 如果仅涉及标点符号的微小调整，直接标记为"仅标点符号改动，无需关注"
-4. 对于专有名词（如openEuler、GitHub等），自动识别并忽略，不纳入纠错范围
-5. 对于代码相关变更，不纳入分析内容范围
-
-**只关注以下明显错误：**
-- 明显的单词拼写错误（如：recieve -> receive）
-- 严重的语法错误（如：主谓不一致、时态错误）
-- 明显的标点符号错误（如：缺少句号、逗号使用错误）
-- 明显的语序错误
-
-**忽略以下情况：**
-- 语法正确但可以优化的表达
-- 风格偏好问题
-- 轻微的标点符号调整
-- 术语选择的差异
-- 表达方式的个人偏好
-
-**输出要求：**
-- 如果不存在明显错误，直接输出"语法正确，无需关注"
-- 只有发现明显错误时才详细列出
-- 不能超过100个汉字字符
-- 遵循"能不提修改意见就不提修改意见"的原则
-
-{format_instructions}
-"""
-        human_template = """
-文件路径: {file_path}
-
-Git Diff 内容:
-{diff_content}
-"""
-        self.prompt = ChatPromptTemplate.from_messages([
-            ("system", system_template.format(format_instructions=format_instructions)),
-            ("human", human_template)
-        ])
-        self.chain = self.prompt | self.llm | self.output_parser
-    
-    def analyze(self, diff_file_info: DiffFileInfo) -> Optional[FileTextAnalysis]:
-        """分析单个文件的文本变更"""
-        max_retry = MODEL_MAX_RETRY
-        for attempt in range(1, max_retry + 1):
-            # 如果不是第一次尝试，等待一段时间再重试，避免连续失败
-            if attempt > 1:
-                delay = min(attempt * 2, 10)  # 递增延迟，最多10秒
-                logger.info(f"第{attempt}次尝试分析文件 {diff_file_info.file_path}，等待{delay}秒...")
-                time.sleep(delay)
-            
-            try:
-                # 构造prompt字符串
-                prompt_args = {
-                    "file_path": diff_file_info.file_path,
-                    "diff_content": diff_file_info.diff_content
-                }
-                
-                # 直接调用，简化超时控制
-                invoke_args = {
-                    "file_path": diff_file_info.file_path,
-                    "diff_content": diff_file_info.diff_content
-                }
-                result = self.chain.invoke(invoke_args)
-                # 验证结果有效性
-                if isinstance(result, (dict, FileTextAnalysis)):
-                    if isinstance(result, dict):
-                        result = FileTextAnalysis(**result)
-                    
-                    # 检查结果完整性
-                    if result and hasattr(result, 'analysis_details') and result.analysis_details:
-                        
-                        # 设置准确值
-                        result.file_path = diff_file_info.file_path
-                        
-                        # 检查是否只关注明显错误
-                        analysis_text = result.analysis_details.lower()
-                        if any(phrase in analysis_text for phrase in [
-                            "语法正确，无需关注", 
-                            "无英文文本改动", 
-                            "仅标点符号改动，无需关注",
-                            "不存在语法问题"
-                        ]):
-                            # 如果无问题，设置has_text_changes为False
-                            result.has_text_changes = False
-                            result.grammar_issues = []
-                        
-                        return result
-                
-                # 结果无效，记录并重试
-                logger.warning(f"分析文件 {diff_file_info.file_path} 返回无效结果，第{attempt}次尝试")
-                if attempt < max_retry:
-                    continue
-            except Exception as e:
-                err_str = str(e)
-                # 检查是否为HTTP错误（如404、5xx），常见关键字有status code、HTTP、response等
-                is_http_error = False
-                for code in ["404", "500", "502", "503", "504"]:
-                    if code in err_str:
-                        is_http_error = True
-                        break
-                if ("status code" in err_str or "HTTP" in err_str or "response" in err_str) and any(code in err_str for code in ["404", "500", "502", "503", "504"]):
-                    is_http_error = True
-                if is_http_error:
-                    logger.error(f"分析文件 {diff_file_info.file_path} 时发生HTTP错误: {e}，第{attempt}次尝试，10秒后重试...")
-                    if attempt < max_retry:
-                        time.sleep(10)
-                        continue
-                else:
-                    logger.error(f"分析文件 {diff_file_info.file_path} 时发生错误: {e}，第{attempt}次尝试")
-                # 其它异常直接进入下一次重试
-                if attempt < max_retry:
-                    logger.info(f"第{attempt}次尝试失败，准备重试...")
-        logger.error(f"分析文件 {diff_file_info.file_path} 连续{max_retry}次均未获得结构化输出，放弃。")
-        return None
-
-class PRAnalysisChain:
-    """PR整体分析任务链"""
-    
-    def __init__(self, llm: ChatOllama | ChatOpenAI):
-        self.llm = llm
-        
-        # 创建输出解析器
-        self.output_parser = JsonOutputParser(pydantic_object=PRAnalysisResult)
-        
-        # 为硅基流动平台添加输出格式说明
-        format_instructions = """
-请以JSON格式输出，包含以下字段：
-{{
-    "has_text_changes": "是否涉及英文文本改动（布尔值）",
-    "text_change_type": "文本改动类型（字符串）",
-    "has_grammar_errors": "是否存在语法语病错误（布尔值）",
-    "grammar_errors": "具体的语法语病错误列表（字符串列表）",
-    "detailed_analysis": "详细分析说明（字符串）",
-    "suggestions": "改进建议列表（字符串列表）"
-}}
-"""
-        # 创建新的prompt模板
-        system_template = """
-你是一个专业的PR审查专家，专门分析Pull Request中的文本变更和语法问题。
-
-**核心原则：只关注必然存在明显错误的地方，其他文件都不需要关注！**
-
-请基于各个文件的分析结果，生成整个PR的综合评估，要求：
-
-1. 严格过滤文件：
-   - 只统计存在明显错误的文件
-   - 忽略"语法正确，无需关注"的文件
-   - 忽略"无英文文本改动"的文件
-   - 忽略"仅标点符号改动，无需关注"的文件
-
-2. 只汇总明显错误：
-   - 仅汇总硬伤：明显的单词拼写错误、严重的语法错误
-   - 忽略可优化但称不上错误的点
-   - 忽略风格偏好问题
-   - 忽略轻微的标点符号调整
-
-3. 质量评估：
-   - 只对存在明显错误的文件进行质量评估
-   - 如果所有文件都无问题，直接标记为"无问题文件"
-
-4. 改进建议：
-   - 只对存在明显错误的文件提供修改建议
-   - 建议优先修复明显的拼写和语法错误
-
-**输出格式要求：**
-- 如果所有文件都无问题，直接输出"所有文件语法正确，无需关注"
-- 只列出存在明显错误的文件
-- 按优先级排列发现的问题
-- 给出可操作的改进建议
-
-{format_instructions}
-"""
-        human_template = """
-各个文件的分析结果:
-{file_analyses}
-
-总文件数: {total_files}
-涉及文本变更的文件数: {text_changed_files}
-"""
-        self.prompt = ChatPromptTemplate.from_messages([
-            ("system", system_template.format(format_instructions=format_instructions)),
-            ("human", human_template)
-        ])
-        self.chain = self.prompt | self.llm | self.output_parser
-    
-    def generate(self, file_analyses: List[FileTextAnalysis]) -> Optional[PRAnalysisResult]:
-        """生成PR整体分析"""
-        try:
-            total_files = len(file_analyses)
-            
-            # 过滤出只关注存在明显错误的文件
-            problematic_files = []
-            for analysis in file_analyses:
-                # 检查是否存在明显错误
-                has_obvious_errors = (
-                    analysis.has_text_changes and 
-                    analysis.grammar_issues and 
-                    len(analysis.grammar_issues) > 0 and
-                    analysis.analysis_details and
-                    not any(phrase in analysis.analysis_details for phrase in [
-                        "语法正确，无需关注", 
-                        "无英文文本改动", 
-                        "仅标点符号改动，无需关注",
-                        "不存在语法问题"
-                    ])
-                )
-                
-                if has_obvious_errors:
-                    problematic_files.append(analysis)
-            
-            # 如果所有文件都无问题，直接返回无问题结果
-            if not problematic_files:
-                return PRAnalysisResult(
-                    has_text_changes=False,
-                    text_change_type="无文本改动",
-                    has_grammar_errors=False,
-                    grammar_errors=[],
-                    detailed_analysis="所有文件语法正确，无需关注",
-                    suggestions=[]
-                )
-            
-            text_changed_files = len(problematic_files)
-            
-            file_analyses_info = []
-            for analysis in problematic_files:
-                file_analyses_info.append({
-                    'file_path': analysis.file_path,
-                    'has_text_changes': analysis.has_text_changes,
-                    'text_lines': analysis.text_lines,
-                    'grammar_issues': analysis.grammar_issues,
-                    'analysis_details': analysis.analysis_details
-                })
-            
-            # 构造prompt字符串
-            prompt_args = {
-                "file_analyses": json.dumps(file_analyses_info, ensure_ascii=False, indent=2),
-                "total_files": total_files,
-                "text_changed_files": text_changed_files
-            }
-            
-            # 使用线程池执行器为PR分析添加超时控制
-            timeout_executor = None
-            try:
-                timeout_executor = ThreadPoolExecutor(max_workers=1)
-                invoke_args = {
-                    "file_analyses": json.dumps(file_analyses_info, ensure_ascii=False, indent=2),
-                    "total_files": total_files,
-                    "text_changed_files": text_changed_files
-                }
-                result = self.chain.invoke(invoke_args)
-                # 验证结果有效性
-                if isinstance(result, (dict, PRAnalysisResult)):
-                    # 如果是dict（来自JsonOutputParser），转换为PRAnalysisResult
-                    if isinstance(result, dict):
-                        result = PRAnalysisResult(**result)
-                    return result
-                else:
-                    logger.error(f"生成PR分析时返回类型错误: {type(result)}")
-                    return None
-            except Exception as e:
-                logger.error(f"生成PR分析时发生错误: {e}")
-                return None
-        except Exception as e:
-            logger.error(f"生成PR分析时发生错误: {e}")
-            return None
-
-# ==================== 主处理类 ====================
-
-class PRCommentAnalyzer:
-    """PR评论分析器"""
-    
-    def __init__(self, siliconflow_api_key: str = "", siliconflow_api_base: str = "https://api.siliconflow.cn/v1", model_name: str = None, base_url: str = None):
-        if model_name is None:
-            model_name = MODEL_NAME
-        
-        # 设置siliconflow API配置
-        global SILICONFLOW_API_KEY, SILICONFLOW_API_BASE
-        if siliconflow_api_key:
-            SILICONFLOW_API_KEY = siliconflow_api_key
-        if siliconflow_api_base:
-            SILICONFLOW_API_BASE = siliconflow_api_base
-            
-        self.llm = LLMFactory.create_chat_llm(model_name)
-        self.file_analysis_chain = FileTextAnalysisChain(self.llm)
-        self.pr_analysis_chain = PRAnalysisChain(self.llm)
-    
-    def cleanup(self):
-        """清理资源，确保程序能正确退出"""
-        try:
-            # 清理 LLM 连接
-            if hasattr(self.llm, 'client') and hasattr(self.llm.client, 'close'):
-                self.llm.client.close()
-            elif hasattr(self.llm, '_client') and hasattr(self.llm._client, 'close'):
-                self.llm._client.close()
-            
-            # 如果是 ChatOpenAI，尝试关闭底层的 HTTP 客户端
-            if BACKEND_TYPE == "siliconflow" and hasattr(self.llm, 'client'):
-                try:
-                    # 强制关闭 httpx 客户端
-                    if hasattr(self.llm.client, '_client'):
-                        self.llm.client._client.close()
-                except Exception as e:
-                    logger.debug(f"关闭 HTTP 客户端时发生错误: {e}")
-            
-            logger.info("资源清理完成")
-        except Exception as e:
-            logger.warning(f"清理资源时发生错误: {e}")
-    
-    def analyze_pr_diff(self, diff_content: str, max_workers: int = None) -> CommentResult:
-        if max_workers is None:
-            max_workers = PROCESSING_MAX_WORKERS
-            
-        logger.info("开始解析PR diff...")
-        files = DiffParser.parse_git_diff(diff_content)
-        logger.info(f"解析到 {len(files)} 个文件的改动")
-        if not files:
-            logger.warning("未找到任何文件改动")
-            return CommentResult(
-                pr_analysis=None,
-                file_analyses=[],
-                processed_files=0,
-                total_files=0,
-                error='未找到任何文件改动'
-            )
-        
-        logger.info("开始并行处理各个文件的文本分析...")
-        file_analyses = []
-        # 使用更健壮的并发处理机制
-        executor = None
-        try:
-            executor = ThreadPoolExecutor(max_workers=max_workers)
-            future_to_file = {
-                executor.submit(self.file_analysis_chain.analyze, file_info): file_info.file_path
-                for file_info in files
-            }
-            
-            # 设置更长的整体超时时间，避免与单个文件超时冲突
-            overall_timeout = SINGLE_FILE_TIMEOUT * len(files) + 600  # 给每个文件的时间 + 额外缓冲
-            
-            completed_count = 0
-            total_count = len(future_to_file)
-            
-            try:
-                for future in as_completed(future_to_file, timeout=overall_timeout):
-                    file_path = future_to_file[future]
-                    completed_count += 1
-                    try:
-                        analysis = future.result(timeout=5)  # 短暂缓冲时间，因为任务已经完成
-                        if analysis:
-                            file_analyses.append(analysis)
-                            logger.info(f"完成文件 {file_path} 的文本分析 ({completed_count}/{total_count})")
-                        else:
-                            logger.warning(f"文件 {file_path} 的文本分析失败 ({completed_count}/{total_count})")
-                    except (FutureTimeoutError, TimeoutError) as e:
-                        logger.error(f"文件 {file_path} 的文本分析获取超时，跳过该文件: {type(e).__name__} ({completed_count}/{total_count})")
-                        try:
-                            future.cancel()
-                        except Exception as cancel_e:
-                            logger.warning(f"取消任务时发生错误: {cancel_e}")
-                    except Exception as e:
-                        logger.error(f"处理文件 {file_path} 时发生异常: {e} ({completed_count}/{total_count})")
-            except (FutureTimeoutError, TimeoutError) as overall_e:
-                logger.error(f"整体处理超时({overall_timeout}秒)，已完成{completed_count}/{total_count}个文件")
-                # 取消所有未完成的任务
-                for future in future_to_file:
-                    if not future.done():
-                        try:
-                            future.cancel()
-                        except Exception as cancel_e:
-                            logger.warning(f"取消未完成任务时发生错误: {cancel_e}")
-        finally:
-            # 确保线程池被正确关闭
-            if executor:
-                try:
-                    executor.shutdown(wait=True)
-                except Exception as shutdown_e:
-                    logger.warning(f"关闭主线程池时发生错误: {shutdown_e}")
-        
-        logger.info(f"成功生成 {len(file_analyses)} 个文件的文本分析")
-        logger.info("开始生成PR整体分析...")
-        pr_analysis = None
-        if file_analyses:
-            logger.info(f"基于 {len(file_analyses)} 个成功处理的文件生成PR分析...")
-            try:
-                pr_analysis = self.pr_analysis_chain.generate(file_analyses)
-                if pr_analysis:
-                    logger.info("PR整体分析生成成功")
-                else:
-                    logger.warning("PR整体分析生成失败")
-            except Exception as e:
-                logger.error(f"生成PR分析时发生未预期的错误: {e}")
-        else:
-            logger.warning("没有成功处理的文件，跳过PR分析生成")
-        
-        return CommentResult(
-            pr_analysis=pr_analysis,
-            file_analyses=file_analyses,
-            processed_files=len(file_analyses),
-            total_files=len(files)
-        )
-
-# ==================== 主函数 ====================
-
-def get_comment_analysis(sample_diff, siliconflow_api_key="", siliconflow_api_base="https://api.siliconflow.cn/v1"):
-
-    analyzer = PRCommentAnalyzer(siliconflow_api_key, siliconflow_api_base)
-    result = None
-    try:
-        result = analyzer.analyze_pr_diff(sample_diff)
-    finally:
-        # 确保在函数退出前清理资源
-        analyzer.cleanup()
-
-    if not result:
-        print("处理失败，无法获取结果")
-        return None
-    
-    if result.error:
-        print(f"错误: {result.error}")
-    
-    print("\n=== 单文件文本分析 ===")
-    problematic_files = [f for f in result.file_analyses if f.has_text_changes and f.grammar_issues]
-    if problematic_files:
-        for analysis in problematic_files:
-            print(f"文件: {analysis.file_path}")
-            print(f"涉及文本变更: {analysis.has_text_changes}")
-            print(f"文本变更行: {analysis.text_lines}")
-            print(f"语法问题: {analysis.grammar_issues}")
-            print(f"分析详情: {analysis.analysis_details}")
-            print("-" * 50)
-    else:
-        print("所有文件语法正确，无需关注")
-    
-    print("=== 处理结果 ===")
-    print(f"总文件数: {result.total_files}")
-    print(f"成功处理文件数: {result.processed_files}")
-    
-    if result.pr_analysis:
-        print("\n=== PR整体分析 ===")
-        pr = result.pr_analysis
-        print(f"涉及文本变更: {pr.has_text_changes}")
-        print(f"文本变更类型: {pr.text_change_type}")
-        print(f"存在语法错误: {pr.has_grammar_errors}")
-        print(f"语法错误列表: {pr.grammar_errors}")
-        print(f"详细分析: {pr.detailed_analysis}")
-        print(f"改进建议: {pr.suggestions}")
-            
-    
-    return result
-
-if __name__ == "__main__":
-    # 微服务接口逻辑： 传递进来的就是 sample_diff 的内容
-    sample_diff = sys.argv[1]
-    result = get_comment_analysis(sample_diff) 
-    print(result)
+import json
+import re
+import logging
+import urllib.parse
+from typing import List, Dict, Any, Optional, Tuple, Literal
+from dataclasses import dataclass
+from concurrent.futures import ThreadPoolExecutor, as_completed, TimeoutError as FutureTimeoutError
+from pathlib import Path
+import sys
+import time
+# LangChain imports
+from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
+from langchain_core.runnables import RunnableLambda, RunnablePassthrough
+from pydantic import BaseModel, Field, SecretStr
+from langchain_community.llms import Ollama
+from langchain_ollama import ChatOllama
+from langchain.chains import TransformChain, SequentialChain
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_openai import ChatOpenAI
+import yaml
+
+# ==================== 配置加载 ====================
+
+def load_config(config_file="create_comment.yaml"):
+    """从YAML文件加载配置"""
+    try:
+        with open(config_file, 'r', encoding='utf-8') as f:
+            config = yaml.safe_load(f)
+        return config.get('comment_agent', {})
+    except FileNotFoundError:
+        print(f"配置文件 {config_file} 不存在")
+        raise
+    except yaml.YAMLError as e:
+        print(f"解析配置文件时发生错误: {e}")
+        raise
+
+# 加载配置
+_config = load_config()
+
+# ==================== 配置常量 ====================
+
+BACKEND_TYPE = _config.get('backend', {}).get('type', 'siliconflow')
+MODEL_NAME = _config.get('model', {}).get('name', 'Qwen/Qwen3-8B')
+MODEL_TEMPERATURE = _config.get('model', {}).get('temperature', 0.1)
+MODEL_MAX_RETRY = _config.get('model', {}).get('max_retry', 5)
+PROCESSING_MAX_WORKERS = _config.get('processing', {}).get('max_workers', 8)
+SINGLE_FILE_TIMEOUT = _config.get('processing', {}).get('single_file_timeout', 180)
+TOTAL_COMMENT_TIMEOUT = _config.get('processing', {}).get('total_comment_timeout', 300)
+LOGGING_LEVEL = _config.get('logging', {}).get('level', 'INFO')
+SILICONFLOW_API_KEY = ''
+SILICONFLOW_API_BASE = ''
+
+# 配置日志
+logging.basicConfig(level=getattr(logging, LOGGING_LEVEL.upper()))
+logger = logging.getLogger(__name__)
+
+# ==================== 数据模型定义 ====================
+
+class PRAnalysisResult(BaseModel):
+    """PR分析结果的结构化输出"""
+    has_text_changes: bool = Field(description="是否有文本变更", default=True)
+    text_change_type: str = Field(description="文本变更类型", default="")
+    has_grammar_errors: bool = Field(description="是否存在语法错误", default=False)
+    grammar_errors: List[str] = Field(description="语法错误列表", default=[])
+    detailed_analysis: str = Field(description="详细分析说明")
+    suggestions: List[str] = Field(description="改进建议列表", default=[])
+
+class FileTextAnalysis(BaseModel):
+    """单个文件的文本分析"""
+    file_path: str = Field(description="文件路径", default="")
+    grammar_issues: List[str] = Field(description="语法问题列表", default=[])
+    analysis_details: str = Field(description="分析详情")
+
+@dataclass
+class DiffFileInfo:
+    """单个文件的diff信息"""
+    file_path: str
+    diff_content: str
+    lines_added: int
+    lines_deleted: int
+
+@dataclass
+class CommentResult:
+    """评论生成结果"""
+    pr_analysis: Optional[PRAnalysisResult]
+    file_analyses: List[FileTextAnalysis]
+    processed_files: int
+    total_files: int
+    error: Optional[str] = None
+
+
+# ==================== 工具函数 ====================
+
+class DiffParser:
+    """Git Diff 解析器"""
+    
+    @staticmethod
+    def filter_docs_en_files(diff_content: str) -> str:
+        """
+        过滤diff内容，只保留docs/en路径下的文件变更
+        """
+        if not diff_content:
+            return ""
+        
+        lines = diff_content.split('\n')
+        filtered_lines = []
+        current_file_section = []
+        in_docs_en_file = False
+        current_file_path = ""
+        
+        for line in lines:
+            if line.startswith('diff --git'):
+                # 处理前一个文件
+                if in_docs_en_file and current_file_section:
+                    filtered_lines.extend(current_file_section)
+                    logger.info(f"包含docs/en路径下的文件: {current_file_path}")
+                
+                # 检查新文件是否在docs/en路径下
+                current_file_section = [line]
+                in_docs_en_file = False
+                current_file_path = ""
+                
+                # 提取文件路径
+                if ' a/' in line and ' b/' in line:
+                    # 找到 a/ 和 b/ 的位置
+                    a_pos = line.find(' a/')
+                    b_pos = line.find(' b/')
+                    
+                    if a_pos != -1 and b_pos != -1 and a_pos < b_pos:
+                        # 提取a/和b/之间的路径
+                        a_start = a_pos + 3  # 跳过 ' a/'
+                        current_file_path = line[a_start:b_pos]
+                        
+                        # 检查是否在docs/en路径下
+                        if current_file_path.startswith('docs/en/'):
+                            in_docs_en_file = True
+            else:
+                # 继续当前文件的内容
+                current_file_section.append(line)
+        
+        # 处理最后一个文件
+        if in_docs_en_file and current_file_section:
+            filtered_lines.extend(current_file_section)
+            logger.info(f"包含docs/en路径下的文件: {current_file_path}")
+        
+        return '\n'.join(filtered_lines)
+    
+    @staticmethod
+    def parse_git_diff(diff_content: str) -> List[DiffFileInfo]:
+        """
+        解析git diff内容，提取每个文件的改动信息
+        
+        Args:
+            diff_content: git diff的原始内容
+ 
+        Returns:
+            包含文件路径和对应diff内容的列表
+        """
+
+        files = []
+        current_file = None
+        current_diff = []
+        
+        lines = diff_content.strip().split('\n')
+        
+        for line in lines:
+            # 匹配文件路径行
+            if line.startswith('diff --git'):
+                # 保存前一个文件的信息
+                if current_file and current_diff:
+                    diff_info = DiffParser._create_diff_file_info(current_file, current_diff)
+                    if diff_info:
+                        files.append(diff_info)
+                
+                # 提取文件路径 - 改进的解析逻辑
+                current_file = DiffParser._extract_file_path(line)
+                if current_file:
+                    current_diff = [line]
+                else:
+                    current_diff = []
+            elif current_file:
+                current_diff.append(line)
+        
+        # 添加最后一个文件
+        if current_file and current_diff:
+            diff_info = DiffParser._create_diff_file_info(current_file, current_diff)
+            if diff_info:
+                files.append(diff_info)
+        
+        return files
+    
+    @staticmethod
+    def _extract_file_path(diff_line: str) -> Optional[str]:
+        """
+        从git diff行中提取文件路径，支持包含汉字的文件名
+        
+        Args:
+            diff_line: git diff的文件头行，格式如 "diff --git a/path/to/file b/path/to/file"
+            
+        Returns:
+            提取出的文件路径，如果解析失败则返回None
+        """
+        try:
+            # 方法1: 处理引号包围的路径（Git对特殊字符的处理）
+            # 格式: diff --git "a/path/to/file" "b/path/to/file"
+            quoted_pattern = r'diff --git "a/(.+?)" "b/(.+?)"'
+            quoted_match = re.match(quoted_pattern, diff_line)
+            
+            if quoted_match:
+                file_path_a = quoted_match.group(1)
+                file_path_b = quoted_match.group(2)
+                # 通常a和b路径相同，使用a路径（旧文件路径）
+                file_path = file_path_a
+            else:
+                # 方法2: 使用正则表达式匹配标准的git diff格式
+                # 格式: diff --git a/path/to/file b/path/to/file
+                pattern = r'diff --git a/(.+?) b/(.+?)(?:\s|$)'
+                match = re.match(pattern, diff_line)
+                
+                if match:
+                    file_path_a = match.group(1)
+                    file_path_b = match.group(2)
+                    # 通常a和b路径相同，使用a路径（旧文件路径）
+                    file_path = file_path_a
+                else:
+                    # 方法3: 如果正则匹配失败，尝试更简单的解析
+                    # 处理可能包含空格和特殊字符的文件名
+                    if ' a/' in diff_line and ' b/' in diff_line:
+                        # 找到 a/ 和 b/ 的位置
+                        a_pos = diff_line.find(' a/')
+                        b_pos = diff_line.find(' b/')
+                        
+                        if a_pos != -1 and b_pos != -1 and a_pos < b_pos:
+                            # 提取a/和b/之间的路径
+                            a_start = a_pos + 3  # 跳过 ' a/'
+                            file_path = diff_line[a_start:b_pos]
+                        else:
+                            return None
+                    else:
+                        # 方法4: 最后的备选方案，简单的字符串分割
+                        parts = diff_line.split()
+                        if len(parts) >= 3:
+                            a_path = parts[2]
+                            if a_path.startswith('a/'):
+                                file_path = a_path[2:]  # 移除'a/'前缀
+                            else:
+                                return None
+                        else:
+                            return None
+            
+            # 处理文件名编码
+            return DiffParser._decode_file_path(file_path)
+            
+        except Exception as e:
+            logger.warning(f"解析文件路径时发生错误: {e}, diff行: {diff_line}")
+            return None
+    
+    @staticmethod
+    def _decode_file_path(file_path: str) -> str:
+        """
+        解码文件路径，处理各种编码情况
+        
+        Args:
+            file_path: 原始文件路径
+            
+        Returns:
+            解码后的文件路径
+        """
+        try:
+            # 首先尝试URL解码，处理Git编码的文件名
+            decoded_path = urllib.parse.unquote(file_path, encoding='utf-8')
+            
+            # 处理Git对特殊字符的引号包装
+            if decoded_path.startswith('"') and decoded_path.endswith('"'):
+                decoded_path = decoded_path[1:-1]
+                # Git使用反斜杠转义，需要处理转义序列
+                decoded_path = decoded_path.replace('\\"', '"')
+                decoded_path = decoded_path.replace('\\\\', '\\')
+            
+            # 无论是否有引号包装，都尝试处理八进制编码
+            # 检查是否包含八进制转义序列
+            if '\\' in decoded_path and re.search(r'\\[0-7]{3}', decoded_path):
+                decoded_path = DiffParser._decode_octal_sequences(decoded_path)
+            
+            return decoded_path
+            
+        except Exception as e:
+            logger.warning(f"解码文件路径时发生错误: {e}, 原始路径: {file_path}")
+            return file_path
+    
+    @staticmethod
+    def _decode_octal_sequences(text: str) -> str:
+        """
+        解码文本中的八进制转义序列
+        
+        Args:
+            text: 包含八进制转义序列的文本
+            
+        Returns:
+            解码后的文本
+        """
+        try:
+            # 查找八进制转义序列模式：\xxx
+            pattern = r'\\([0-7]{3})'
+            
+            # 找到所有八进制序列
+            matches = list(re.finditer(pattern, text))
+            if not matches:
+                return text
+            
+            # 收集所有字节值
+            result = ""
+            last_end = 0
+            bytes_buffer = []
+            
+            for i, match in enumerate(matches):
+                # 添加匹配前的文本
+                if match.start() > last_end:
+                    # 如果有缓冲的字节，先处理它们
+                    if bytes_buffer:
+                        try:
+                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
+                            result += decoded_bytes
+                            bytes_buffer = []
+                        except UnicodeDecodeError:
+                            # 如果解码失败，保持原始形式
+                            for byte_val in bytes_buffer:
+                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
+                            bytes_buffer = []
+                    
+                    result += text[last_end:match.start()]
+                
+                # 处理当前八进制序列
+                octal_str = match.group(1)
+                try:
+                    byte_value = int(octal_str, 8)
+                    bytes_buffer.append(byte_value)
+                except ValueError:
+                    # 如果转换失败，添加原始字符串
+                    if bytes_buffer:
+                        try:
+                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
+                            result += decoded_bytes
+                            bytes_buffer = []
+                        except UnicodeDecodeError:
+                            for byte_val in bytes_buffer:
+                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
+                            bytes_buffer = []
+                    result += match.group(0)
+                
+                last_end = match.end()
+                
+                # 检查是否是最后一个匹配或下一个匹配不连续
+                is_last = (i == len(matches) - 1)
+                is_next_non_consecutive = (not is_last and 
+                                         matches[i + 1].start() != match.end())
+                
+                if is_last or is_next_non_consecutive:
+                    # 处理缓冲的字节
+                    if bytes_buffer:
+                        try:
+                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
+                        except UnicodeDecodeError:
+                            # 如果解码失败，保持原始形式
+                            for byte_val in bytes_buffer:
+                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
+                        bytes_buffer = []
+            
+            # 添加剩余的文本
+            if last_end < len(text):
+                result += text[last_end:]
+            
+            return result
+            
+        except Exception as e:
+            logger.warning(f"解码八进制序列时发生错误: {e}, 原始文本: {text}")
+            return text
+    
+    @staticmethod
+    def _create_diff_file_info(file_path: str, diff_lines: List[str]) -> Optional[DiffFileInfo]:
+        """创建DiffFileInfo对象"""
+        diff_content = '\n'.join(diff_lines)
+        lines_added, lines_deleted = DiffParser._count_lines_changed(diff_content)
+        
+        return DiffFileInfo(
+            file_path=file_path,
+            diff_content=diff_content,
+            lines_added=lines_added,
+            lines_deleted=lines_deleted
+        )
+    
+    @staticmethod
+    def _count_lines_changed(diff_content: str) -> Tuple[int, int]:
+        """统计git diff中改动的行数"""
+        lines_added, lines_deleted = 0, 0
+        lines = diff_content.strip().split('\n')
+
+        for line in lines:
+            # 统计新增行（以+开头，但不是+++）
+            if line.startswith('+') and not line.startswith('+++'):
+                lines_added += 1
+            # 统计删除行（以-开头，但不是---）
+            elif line.startswith('-') and not line.startswith('---'):
+                lines_deleted += 1
+
+        return lines_added, lines_deleted
+
+    @staticmethod
+    def is_punctuation_only_change(diff_content: str) -> bool:
+        """判断一个 diff 是否仅包含标点/空白改动（不包含英文字母数字层面的实质变化）
+
+        核心逻辑：
+        - 提取added_text和removed_text
+        - 只保留英文字母数字和下划线进行对比
+        - 如果这部分相同，说明英文内容没变，只是标点/空白/中文改了
+        - 如果这部分不同，说明有英文内容变更，不是"仅标点改动"
+        
+        注意：此函数主要用于过滤纯标点/空白改动，避免对这类改动进行语法检查
+        """
+        try:
+            added_parts = []
+            removed_parts = []
+            for raw_line in diff_content.strip().split('\n'):
+                if raw_line.startswith('+++') or raw_line.startswith('---'):
+                    continue
+                if raw_line.startswith('+'):
+                    added_parts.append(raw_line[1:])
+                elif raw_line.startswith('-'):
+                    removed_parts.append(raw_line[1:])
+
+            added_text = '\n'.join(added_parts)
+            removed_text = '\n'.join(removed_parts)
+            
+            # 如果没有改动，返回False
+            if added_text == removed_text:
+                return False
+
+            # 只保留英文字母数字和下划线
+            def keep_word_chars(s: str) -> str:
+                return re.sub(r'[^A-Za-z0-9_]', '', s)
+
+            added_word_chars = keep_word_chars(added_text)
+            removed_word_chars = keep_word_chars(removed_text)
+            
+            # 核心判断：如果英文字母数字部分完全相同，才认为是"仅标点改动"
+            # 
+            # 会被识别为"仅标点改动"（返回True，跳过语法检查）：
+            # 1. 纯标点改动（如逗号改句号）
+            # 2. 空白改动（如空格、换行）
+            # 3. 中文标点改动
+            # 
+            # 不会被识别为"仅标点改动"（返回False，进入语法检查）：
+            # 1. 新增/删除英文字母
+            # 2. 英文单词拼写改动
+            # 3. 英文内容的任何实质性改动
+            if added_word_chars == removed_word_chars and added_text != removed_text:
+                # 额外检查：如果两者都没有英文字母数字（纯中文/标点改动）
+                # 并且原始文本长度差异很大，可能不只是标点改动
+                if not added_word_chars and not removed_word_chars:
+                    # 如果都是纯中文/标点，检查长度差异
+                    # 长度差异超过10个字符，可能是中文内容的实质性改动
+                    if abs(len(added_text) - len(removed_text)) > 10:
+                        return False
+                return True
+
+            return False
+        except Exception as e:
+            logger.debug(f"判定仅标点改动时发生错误: {e}")
+            return False
+
+# ==================== LangChain 组件 ====================
+
+class LLMFactory:
+    """LLM工厂类"""
+    
+    @staticmethod
+    def create_chat_llm(model_name: str = None, base_url: str = None):
+        """创建LLM实例"""
+        if model_name is None:
+            model_name = MODEL_NAME
+        
+        if BACKEND_TYPE == "siliconflow":
+            return ChatOpenAI(
+                model=model_name,
+                api_key=SecretStr(SILICONFLOW_API_KEY),
+                base_url=SILICONFLOW_API_BASE,
+                temperature=0  # 使用0温度确保最大确定性和一致性
+            )
+        else:
+            raise ValueError(f"不支持的后端类型: {BACKEND_TYPE}")
+
+class FileTextAnalysisChain:
+    """单文件文本分析任务链"""
+    
+    def __init__(self, llm: ChatOpenAI):
+        self.llm = llm
+        
+        # 创建输出解析器
+        self.output_parser = JsonOutputParser(pydantic_object=FileTextAnalysis)
+        
+        # 输出格式说明
+        format_instructions = """
+请以JSON格式输出，包含以下字段：
+{{
+    "grammar_issues": "语法问题列表（字符串列表，如无问题则为空列表）",
+    "analysis_details": "分析详情（字符串）"
+}}
+"""
+        # 创建新的prompt模板
+        system_template = """
+你是英文语法检查专家，专门审查文档中英文文本的明显拼写和语法错误。
+
+【核心原则：严格、一致、客观】
+必须对所有文件使用完全相同的判断标准！对同类错误必须给出一致的结论！
+
+【必须检查的错误类型】
+严格按照以下标准判断，不得有任何主观性：
+
+1. 明显的拼写错误：
+   - 常见单词拼写错误（如：recieve → receive, teh → the, seperate → separate）
+   - 随机字符串/无意义字符序列（如：awfawfwafaw, asvasvasv, xyzabc等）
+   - 判断标准：如果一个英文字符串不是：
+     * 技术文库中正确拼写的英文单词
+     * 技术术语（如：JSON, API, HTTP）
+     * 专有名词（如：GitHub, openEuler）
+     * 缩写词（如：PR, CI, CD）
+     * 文件名/路径/命令等
+     则认定为拼写错误
+
+2. 明显的时态错误：
+   - He go yesterday → He went yesterday
+   - She don't went → She didn't go
+   - 必须是显而易见的时态不匹配
+
+3. 严重的主谓不一致：
+   - They is → They are
+   - He are → He is
+   - It is sings → It sings/It is singing
+   - 必须是显而易见的主谓不匹配
+
+4. 其他明显的语法错误：
+   - 动词形式错误（如：He can goes → He can go）
+   - 名词单复数错误（如：many book → many books）
+   - 介词使用错误（如：depend in → depend on）
+   - 冠词使用错误（如：a apple → an apple）
+   - 语态使用错误（如：主动语态和被动语态混淆）
+
+【完全忽略以下内容，直接输出"无需关注"】
+- 任何中文文本（包括中文列表项、中文注释、中文文档）
+- 所有格式问题：链接格式、大小写格式、标点符号、空格、缩进、换行
+- 代码/命令/文件名/路径/配置/脚本/Shell命令（如：/etc/yum.conf, npm install）
+- Markdown语法：标题、列表、表格、链接、图片
+- 专有名词：GitHub、openEuler、Gitee、GVP、CVE、CWE等
+- 口语化表达或技术文档中的简化表达
+- 缺少冠词的表达（口语化和技术文档中常见且可接受）
+- 句子结构简化（技术文档中常见且可接受）
+
+【判定流程 - 严格执行】
+对于每个新增的英文文本：
+1. 是否在代码/配置/路径/命令中？→ 是 → "无需关注"
+2. 是否是专有名词/技术术语/缩写？→ 是 → "无需关注"
+3. 是否是标准英文单词？→ 否 → 检查是否为随机字符串
+4. 如果是随机无意义字符串（无法识别字符串的含义）→ 报告为拼写错误
+5. 如果是完整句子 → 检查时态、主谓一致性、动词形式、名词单复数、介词、冠词等
+6. 其他情况 → "无需关注"
+
+【一致性要求】
+对于以下情况必须一致判断：
+- awfawfwafaw, asvasvasv, xyzabc 等无意义的随机字符串 → 必须全部识别
+- recieve, teh, seperate 等常见拼写错误 → 必须全部识别
+- 同样的语法错误在不同文件中 → 必须得出相同结论
+
+【输出要求】
+- 必须使用中文输出所有分析内容
+- analysis_details字段必须用中文解释问题
+- grammar_issues列表中的每一项都必须用中文描述
+- 对于随机字符串，明确指出"随机无意义字符串"或"拼写错误"
+- 保持判断的一致性和可重复性
+- 确保准确完整地识别所有问题类型，不遗漏任何明显的错误
+
+{format_instructions}
+"""
+        human_template = """
+文件路径: {file_path}
+
+Git Diff 内容:
+{diff_content}
+"""
+        self.prompt = ChatPromptTemplate.from_messages([
+            ("system", system_template.format(format_instructions=format_instructions)),
+            ("human", human_template)
+        ])
+        self.chain = self.prompt | self.llm | self.output_parser
+    
+    def analyze(self, diff_file_info: DiffFileInfo) -> Optional[FileTextAnalysis]:
+        """分析单个文件的文本变更"""
+        logger.info(f"开始分析文件: {diff_file_info.file_path}")
+        max_retry = MODEL_MAX_RETRY
+        
+        for attempt in range(1, max_retry + 1):
+            # 重试时采用指数退避策略
+            if attempt > 1:
+                delay = min(2 ** (attempt - 1), 10)  # 2, 4, 8, 10, 10...
+                logger.info(f"第{attempt}次尝试，等待{delay}秒...")
+                time.sleep(delay)
+            
+            try:
+                # 调用LLM分析
+                invoke_args = {
+                    "file_path": diff_file_info.file_path,
+                    "diff_content": diff_file_info.diff_content
+                }
+                result = self.chain.invoke(invoke_args)
+                
+                # 验证结果有效性
+                if isinstance(result, dict):
+                    result = FileTextAnalysis(**result)
+                
+                if isinstance(result, FileTextAnalysis) and result.analysis_details:
+                    result.file_path = diff_file_info.file_path
+                    # 确保grammar_issues为列表
+                    if not result.grammar_issues:
+                        result.grammar_issues = []
+                    return result
+                
+                # 结果无效，重试
+                logger.warning(f"分析返回无效结果，第{attempt}/{max_retry}次尝试")
+                
+            except Exception as e:
+                err_str = str(e)
+                # 判断是否为HTTP错误
+                is_http_error = any(code in err_str for code in ["404", "500", "502", "503", "504"])
+                
+                if is_http_error:
+                    logger.error(f"HTTP错误: {e}，第{attempt}/{max_retry}次尝试")
+                    if attempt < max_retry:
+                        time.sleep(10)  # HTTP错误等待更长时间
+                else:
+                    logger.error(f"分析错误: {e}，第{attempt}/{max_retry}次尝试")
+        
+        logger.error(f"分析文件 {diff_file_info.file_path} 失败，已重试{max_retry}次")
+        return None
+
+class PRAnalysisChain:
+    """PR整体分析任务链"""
+    
+    def __init__(self, llm: ChatOllama | ChatOpenAI):
+        self.llm = llm
+        
+        # 创建输出解析器
+        self.output_parser = JsonOutputParser(pydantic_object=PRAnalysisResult)
+        
+        # 输出格式说明
+        format_instructions = """
+请以JSON格式输出，包含以下字段：
+{{
+    "has_text_changes": "是否有文本变更（布尔值）",
+    "text_change_type": "文本变更类型（字符串）",
+    "has_grammar_errors": "是否存在语法错误（布尔值）",
+    "grammar_errors": "语法错误列表（字符串列表）",
+    "detailed_analysis": "详细分析说明（字符串）",
+    "suggestions": "改进建议列表（字符串列表）"
+}}
+"""
+        # 创建prompt模板
+        system_template = """
+你是PR审查专家，汇总各文件的英文拼写和语法检查结果。
+
+【核心任务】
+汇总所有存在语法问题的文件，包括：
+- 拼写错误（随机字符串、单词拼写错误）
+- 时态错误
+- 主谓不一致
+- 其他明显的语法错误
+
+【输出要求】
+- has_text_changes: 如果有任何文本变更则为true，否则为false
+- text_change_type: 描述变更类型（如"文本变更且有语法错误"、"文本变更但无语法错误"等）
+- has_grammar_errors: 如果存在语法错误则为true，否则为false
+- grammar_errors: 所有语法错误的列表（用中文描述）
+- detailed_analysis: 简洁明了的分析说明（不超过200字，使用中文）
+- suggestions: 改进建议列表（如有问题则提供建议，无问题则为空列表）
+
+{format_instructions}
+"""
+        human_template = """
+各个文件的分析结果:
+{file_analyses}
+
+总文件数: {total_files}
+涉及文本变更的文件数: {text_changed_files}
+"""
+        self.prompt = ChatPromptTemplate.from_messages([
+            ("system", system_template.format(format_instructions=format_instructions)),
+            ("human", human_template)
+        ])
+        self.chain = self.prompt | self.llm | self.output_parser
+    
+    def generate(self, file_analyses: List[FileTextAnalysis], 
+                 total_comment_timeout: int = TOTAL_COMMENT_TIMEOUT) -> Optional[PRAnalysisResult]:
+        """生成PR整体分析"""
+        try:
+            total_files = len(file_analyses)
+            
+            # 只保留有语法问题的文件
+            problematic_files = [f for f in file_analyses if f.grammar_issues]
+            
+            # 如果所有文件都无问题，直接返回
+            if not problematic_files:
+                return PRAnalysisResult(
+                    has_text_changes=True,
+                    text_change_type="文本变更但无语法错误",
+                    has_grammar_errors=False,
+                    grammar_errors=[],
+                    detailed_analysis="所有文件无问题",
+                    suggestions=[]
+                )
+            
+            # 构造分析信息
+            file_analyses_info = [
+                {
+                    'file_path': f.file_path,
+                    'grammar_issues': f.grammar_issues,
+                    'analysis_details': f.analysis_details
+                }
+                for f in problematic_files
+            ]
+            
+            # 使用线程池添加超时控制
+            with ThreadPoolExecutor(max_workers=1) as executor:
+                invoke_args = {
+                    "file_analyses": json.dumps(file_analyses_info, ensure_ascii=False, indent=2),
+                    "total_files": total_files,
+                    "text_changed_files": len(problematic_files)
+                }
+                
+                future = executor.submit(self.chain.invoke, invoke_args)
+                try:
+                    result = future.result(timeout=total_comment_timeout)
+                except (FutureTimeoutError, TimeoutError):
+                    logger.error(f"生成PR分析超时（{total_comment_timeout}秒）")
+                    future.cancel()
+                    return None
+            
+            # 处理结果
+            if isinstance(result, dict):
+                result = PRAnalysisResult(**result)
+            return result if isinstance(result, PRAnalysisResult) else None
+                
+        except Exception as e:
+            logger.error(f"生成PR分析时发生错误: {e}")
+            return None
+
+# ==================== 主处理类 ====================
+
+class PRCommentAnalyzer:
+    """PR评论分析器"""
+    
+    def __init__(self, siliconflow_api_key: str = "", 
+                 siliconflow_api_base: str = "https://api.siliconflow.cn/v1", 
+                 model_name: str = None, base_url: str = None):
+        if model_name is None:
+            model_name = MODEL_NAME
+        
+        # 设置siliconflow API配置
+        global SILICONFLOW_API_KEY, SILICONFLOW_API_BASE
+        if siliconflow_api_key:
+            SILICONFLOW_API_KEY = siliconflow_api_key
+        if siliconflow_api_base:
+            SILICONFLOW_API_BASE = siliconflow_api_base
+            
+        self.llm = LLMFactory.create_chat_llm(model_name)
+        self.file_analysis_chain = FileTextAnalysisChain(self.llm)
+        self.pr_analysis_chain = PRAnalysisChain(self.llm)
+    
+    def cleanup(self):
+        """清理资源，确保程序能正确退出"""
+        try:
+            # 清理 LLM 连接
+            if hasattr(self.llm, 'client') and hasattr(self.llm.client, 'close'):
+                self.llm.client.close()
+            elif hasattr(self.llm, '_client') and hasattr(self.llm._client, 'close'):
+                self.llm._client.close()
+            
+            # 如果是 ChatOpenAI，尝试关闭底层的 HTTP 客户端
+            if BACKEND_TYPE == "siliconflow" and hasattr(self.llm, 'client'):
+                try:
+                    # 强制关闭 httpx 客户端
+                    if hasattr(self.llm.client, '_client'):
+                        self.llm.client._client.close()
+                except Exception as e:
+                    logger.debug(f"关闭 HTTP 客户端时发生错误: {e}")
+            
+            logger.info("资源清理完成")
+        except Exception as e:
+            logger.warning(f"清理资源时发生错误: {e}")
+    
+    def analyze_pr_diff(self, diff_content: str, max_workers: int = None) -> CommentResult:
+        if max_workers is None:
+            max_workers = PROCESSING_MAX_WORKERS
+            
+        # 早期检查：查看diff中是否包含docs/en路径下的文件变更
+        if 'docs/en/' not in diff_content:
+            logger.info("diff内容中不包含docs/en路径下的文件变更，无需进行语法检查")
+            return CommentResult(
+                pr_analysis=PRAnalysisResult(
+                    has_text_changes=False,
+                    text_change_type="无docs/en路径下的文件变更",
+                    has_grammar_errors=False,
+                    grammar_errors=[],
+                    detailed_analysis="本次改动不涉及docs/en路径下的文件，无需语法检查",
+                    suggestions=[]
+                ),
+                file_analyses=[],
+                processed_files=0,
+                total_files=0
+            )
+        
+        # 过滤只保留docs/en路径下的文件
+        logger.info("过滤diff内容，只保留docs/en路径下的文件...")
+        filtered_diff_content = DiffParser.filter_docs_en_files(diff_content)
+        
+        # 检查是否有需要处理的docs/en路径下的文件变更
+        if not filtered_diff_content.strip():
+            logger.info("没有需要处理的docs/en路径下的文件变更，无需进行语法检查")
+            return CommentResult(
+                pr_analysis=PRAnalysisResult(
+                    has_text_changes=False,
+                    text_change_type="无文本改动",
+                    has_grammar_errors=False,
+                    grammar_errors=[],
+                    detailed_analysis="过滤后没有docs/en路径下的文件需要检查",
+                    suggestions=[]
+                ),
+                file_analyses=[],
+                processed_files=0,
+                total_files=0
+            )
+        
+        logger.info("开始解析过滤后的PR diff...")
+        files = DiffParser.parse_git_diff(filtered_diff_content)
+        logger.info(f"解析到 {len(files)} 个docs/en路径下的文件改动")
+        # 预过滤：仅标点/空白改动的文件不视为英文改动，跳过后续LLM分析
+        filtered_files = []
+        skipped_punct_files = 0
+        for f in files:
+            if DiffParser.is_punctuation_only_change(f.diff_content):
+                skipped_punct_files += 1
+                logger.info(f"跳过仅标点/空白改动的文件: {f.file_path}")
+                continue
+            filtered_files.append(f)
+        if skipped_punct_files:
+            logger.info(f"共有 {skipped_punct_files} 个文件因仅标点/空白改动被忽略")
+        
+        # 检查是否有文件需要分析
+        if not files:
+            logger.warning("未找到任何文件改动")
+            return CommentResult(
+                pr_analysis=None,
+                file_analyses=[],
+                processed_files=0,
+                total_files=0,
+                error='未找到任何文件改动'
+            )
+        
+        # 如果所有文件都被过滤掉了（都是标点/空白改动）
+        if not filtered_files:
+            logger.info("所有文件都是标点/空白改动，无需进行语法检查")
+            return CommentResult(
+                pr_analysis=PRAnalysisResult(
+                    has_text_changes=False,
+                    text_change_type="无文本改动",
+                    has_grammar_errors=False,
+                    grammar_errors=[],
+                    detailed_analysis="所有改动都是标点或空白改动，无需语法检查",
+                    suggestions=[]
+                ),
+                file_analyses=[],
+                processed_files=0,
+                total_files=len(files)
+            )
+        
+        logger.info(f"开始并行处理文件分析 (共{len(filtered_files)}个，并发数{max_workers})")
+        file_analyses = []
+        
+        # 计算整体超时时间
+        batches = (len(filtered_files) + max_workers - 1) // max_workers
+        overall_timeout = SINGLE_FILE_TIMEOUT * batches + 60
+        logger.info(f"整体超时: {overall_timeout}秒")
+        
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # 提交所有任务
+            future_to_file = {
+                executor.submit(self.file_analysis_chain.analyze, file_info): file_info.file_path
+                for file_info in filtered_files
+            }
+            
+            completed_count = 0
+            total_count = len(future_to_file)
+            
+            try:
+                for future in as_completed(future_to_file, timeout=overall_timeout):
+                    file_path = future_to_file[future]
+                    completed_count += 1
+                    
+                    try:
+                        analysis = future.result(timeout=SINGLE_FILE_TIMEOUT)
+                        if analysis:
+                            file_analyses.append(analysis)
+                            logger.info(f"完成 {file_path} ({completed_count}/{total_count})")
+                        else:
+                            logger.warning(f"失败 {file_path} ({completed_count}/{total_count})")
+                    except (FutureTimeoutError, TimeoutError):
+                        logger.error(f"超时 {file_path} ({completed_count}/{total_count})")
+                        future.cancel()
+                    except Exception as e:
+                        logger.error(f"异常 {file_path}: {e} ({completed_count}/{total_count})")
+                        
+            except (FutureTimeoutError, TimeoutError):
+                logger.error(f"整体超时({overall_timeout}秒)，已完成{completed_count}/{total_count}")
+                # 取消未完成的任务
+                for future in future_to_file:
+                    if not future.done():
+                        future.cancel()
+        
+        logger.info(f"成功生成 {len(file_analyses)} 个文件的文本分析")
+        logger.info("开始生成PR整体分析...")
+        pr_analysis = None
+        if file_analyses:
+            logger.info(f"基于 {len(file_analyses)} 个成功处理的文件生成PR分析...")
+            try:
+                pr_analysis = self.pr_analysis_chain.generate(file_analyses, TOTAL_COMMENT_TIMEOUT)
+                if pr_analysis:
+                    logger.info("PR整体分析生成成功")
+                else:
+                    logger.warning("PR整体分析生成失败")
+            except Exception as e:
+                logger.error(f"生成PR分析时发生未预期的错误: {e}")
+        else:
+            logger.warning("没有成功处理的文件，跳过PR分析生成")
+        
+        return CommentResult(
+            pr_analysis=pr_analysis,
+            file_analyses=file_analyses,
+            processed_files=len(file_analyses),
+            total_files=len(files)
+        )
+
+# ==================== 主函数 ====================
+
+def get_comment_analysis(sample_diff, siliconflow_api_key="", siliconflow_api_base="https://api.siliconflow.cn/v1"):
+
+    analyzer = PRCommentAnalyzer(siliconflow_api_key, siliconflow_api_base)
+    result = None
+    try:
+        result = analyzer.analyze_pr_diff(sample_diff)
+    finally:
+        # 确保在函数退出前清理资源
+        analyzer.cleanup()
+
+    if not result:
+        print("处理失败，无法获取结果")
+        return None
+    
+    if result.error:
+        print(f"错误: {result.error}")
+    
+    print("\n=== 单文件文本分析 ===")
+    # 只输出有语法问题的文件
+    problematic_files = [f for f in result.file_analyses 
+                        if f.grammar_issues and len(f.grammar_issues) > 0]
+    if problematic_files:
+        for analysis in problematic_files:
+            print(f"文件: {analysis.file_path}")
+            print(f"语法问题: {analysis.grammar_issues}")
+            print(f"分析详情: {analysis.analysis_details}")
+            print("-" * 50)
+    else:
+        print("所有文件语法正确，无需关注")
+    
+    print("=== 处理结果 ===")
+    print(f"总文件数: {result.total_files}")
+    print(f"成功处理文件数: {result.processed_files}")
+    
+    if result.pr_analysis:
+        print("\n=== PR整体分析 ===")
+        pr = result.pr_analysis
+        print(f"涉及文本变更: {pr.has_text_changes}")
+        print(f"文本变更类型: {pr.text_change_type}")
+        print(f"存在语法错误: {pr.has_grammar_errors}")
+        print(f"语法错误列表: {pr.grammar_errors}")
+        print(f"详细分析: {pr.detailed_analysis}")
+        print(f"改进建议: {pr.suggestions}")
+            
+    
+    return result
+
+if __name__ == "__main__":
+    # 微服务接口逻辑： 传递进来的就是 sample_diff 的内容
+    sample_diff = sys.argv[1]
+    result = get_comment_analysis(sample_diff) 
+    print(result)
diff --git a/ci/tools/comment/create_comment.py b/ci/tools/comment/create_comment.py
index 481c7a2f..8c450419 100644
--- a/ci/tools/comment/create_comment.py
+++ b/ci/tools/comment/create_comment.py
@@ -22,7 +22,6 @@ class Org:
     comment_target_owner: str
     comment_target_repo: str
     auto_comment_enabled: bool = field(default=True)
-    confidence_threshold: float = field(default=0.7)
     text_check_enabled: bool = field(default=True)
     grammar_check_enabled: bool = field(default=True)
 
@@ -142,94 +141,33 @@ def generate_comment_content(comment_result, pr_url: str, analysis_status: str =
     comment_body = ""
     
     # 根据分析状态添加不同的状态标识
-    if analysis_status == "error":
-        comment_body += "### 分析状态：处理失败\n"
-        comment_body += "**分析过程中发生错误，无法生成详细反馈。请手动审查文本变更。**\n\n"
-    elif analysis_status == "low_confidence":
-        comment_body += "### 分析状态：置信度较低\n"
-        comment_body += "**当前分析置信度较低，结果仅供参考。建议进行人工审查。**\n\n"
-    elif analysis_status == "no_text_changes":
+    if analysis_status in ["no_text_changes", "no_grammar_errors"]:
+        # 统一的默认评论内容
         comment_body += "### 分析状态：无文本问题\n"
         comment_body += "**AI分析结果显示本次PR未发现明显的文本变更或语法问题。无需改动。**\n\n"
-    elif analysis_status == "no_grammar_errors":
-        comment_body += "### 分析状态：文本质量良好\n"
-        comment_body += "**检测到文本变更，但未发现明显的语法错误，文本质量良好。无需改动。**\n\n"
     else:  # success with issues
         comment_body += "### 分析状态：发现需要关注的问题\n"
         comment_body += "**AI分析发现了一些文本变更或语法问题，请查看下方详细信息。**\n\n"
     
-    # 如果有分析结果，添加详细信息
-    if comment_result and not comment_result.error:
-        # 如果有PR整体分析
-        if comment_result.pr_analysis:
-            pr_analysis = comment_result.pr_analysis
-            
-            # 添加整体评估摘要
-            comment_body += "## 整体评估\n"
-            comment_body += f"- 涉及文本变更: {'是' if pr_analysis.has_text_changes else '否'}\n"
-            comment_body += f"- 文本变更类型: {pr_analysis.text_change_type}\n"
-            comment_body += f"- 存在语法错误: {'是' if pr_analysis.has_grammar_errors else '否'}\n\n"
-            
-            # 添加详细分析
-            if pr_analysis.detailed_analysis:
-                comment_body += "## 详细分析\n"
-                comment_body += f"{pr_analysis.detailed_analysis}\n\n"
-            
-            # 添加语法错误列表
-            if pr_analysis.grammar_errors:
-                comment_body += "## 语法问题\n"
-                for i, error in enumerate(pr_analysis.grammar_errors, 1):
-                    comment_body += f"{i}. {error}\n"
-                comment_body += "\n"
-            
-            # 添加改进建议
-            if pr_analysis.suggestions:
-                comment_body += "## 改进建议\n"
-                for i, suggestion in enumerate(pr_analysis.suggestions, 1):
-                    comment_body += f"{i}. {suggestion}\n"
-                comment_body += "\n"
-        
-        # 添加文件级别的分析结果
+    # 如果有分析结果，添加文件级别的分析结果（仅输出有语法问题的文件）
+    if comment_result and analysis_status not in ["no_text_changes", "no_grammar_errors"]:
         if comment_result.file_analyses:
-            # comment_body += "## 文件分析\n"
-            
-            # 统计有问题的文件
-            files_with_issues = [f for f in comment_result.file_analyses if f.has_text_changes or f.grammar_issues]
-            files_without_issues = [f for f in comment_result.file_analyses if not f.has_text_changes and not f.grammar_issues]
+            # 只统计有语法问题的文件
+            files_with_issues = [f for f in comment_result.file_analyses 
+                               if f.grammar_issues and len(f.grammar_issues) > 0]
             
             if files_with_issues:
                 comment_body += f"### 需要关注的文件 ({len(files_with_issues)} 个)\n"
                 for i, file_analysis in enumerate(files_with_issues, 1):
                     comment_body += f"\n**{i}. {file_analysis.file_path}**\n"
-                    
-                    if file_analysis.has_text_changes:
-                        comment_body += f"- 文本变更: 检测到英文文本改动\n"
-                        if file_analysis.text_lines:
-                            comment_body += f"- 涉及行数: {len(file_analysis.text_lines)} 行\n"
-                    
-                    if file_analysis.grammar_issues:
-                        comment_body += f"- 语法问题: 发现 {len(file_analysis.grammar_issues)} 个问题\n"
-                        for j, issue in enumerate(file_analysis.grammar_issues, 1):
-                            comment_body += f"  {j}. {issue}\n"
-                    
-                    if file_analysis.analysis_details:
-                        comment_body += f"- 分析详情: {file_analysis.analysis_details}\n"
-            
-            if files_without_issues:
-                comment_body += f"\n### 无问题的文件 ({len(files_without_issues)} 个)\n"
-                for file_analysis in files_without_issues:
-                    comment_body += f"- {file_analysis.file_path}\n"
-            
-            # 添加处理统计
-            # comment_body += f"\n### 处理统计\n"
-            # comment_body += f"- 总文件数: {comment_result.total_files}\n"
-            # comment_body += f"- 成功分析: {comment_result.processed_files}\n"
-            # comment_body += f"- 有文本变更: {len([f for f in comment_result.file_analyses if f.has_text_changes])}\n"
-            # comment_body += f"- 有语法问题: {len([f for f in comment_result.file_analyses if f.grammar_issues])}\n"
+                    comment_body += f"- 语法问题: 发现 {len(file_analysis.grammar_issues)} 个问题\n"
+                    for j, issue in enumerate(file_analysis.grammar_issues, 1):
+                        comment_body += f"  {j}. {issue}\n"
 
     # 添加免责声明
     comment_body += "## 免责声明\n"
     comment_body += "本评论内容基于AI Agent技术自动生成，仅供参考。请开发者根据实际情况进行判断和修改。\n"
+    comment_body += "**注意：语法检查仅针对 docs/en/ 路径下的文件进行。**\n"
     
     return comment_body
 
@@ -270,38 +208,53 @@ def create_comment_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, p
             logger.info(f"组织 {org_item.org_name} 未启用自动评论功能")
             continue
         
-        # 移除文件筛选逻辑，对所有PR平等处理
-        logger.info("开始对PR进行全面文本分析（不限制文件类型和路径）")
-        
         # 获取diff内容
         diff_content = cli.get_diff_content(pr_owner, pr_repo, pr_number)
         if diff_content is None:
             logger.error("无法获取PR的diff内容")
             sys.exit(1)
         
+        # 早期检查：查看diff中是否包含docs/en路径下的文件变更
+        if 'docs/en/' not in diff_content:
+            logger.info("diff内容中不包含docs/en路径下的文件变更，发布默认评论")
+            # 发布默认评论
+            comment_content = generate_comment_content(
+                comment_result=None, 
+                pr_url=pr_html_url, 
+                analysis_status="no_text_changes"
+            )
+            success = cli.add_pr_comment(pr_owner, pr_repo, pr_number, comment_content)
+            if success:
+                logger.info("默认评论发布成功（无docs/en路径变更）")
+            else:
+                logger.error("默认评论发布失败（无docs/en路径变更）")
+            continue
+        
+        logger.info("检测到docs/en路径下的文件变更，开始进行英文语法检查分析...")
+        
         # 调用AI Agent进行分析
         logger.info("开始进行AI代码审查分析...")
-        comment_result = get_comment_analysis(diff_content, siliconflow_api_key, siliconflow_api_base)
+        comment_result = None
+        
+        try:
+            comment_result = get_comment_analysis(diff_content, siliconflow_api_key, siliconflow_api_base)
+        except Exception as e:
+            logger.error(f"AI分析过程发生异常: {e}")
+            logger.error("AI分析失败，跳过评论发布")
+            continue  # 跳过本次评论，不发布任何内容
         
         if not comment_result:
-            logger.error("AI分析失败，将发布错误状态评论")
-            # 创建一个错误结果对象，确保能发布评论
-            from comment_agent import CommentResult
-            comment_result = CommentResult(
-                pr_analysis=None,
-                file_analyses=[],
-                processed_files=0,
-                total_files=0,
-                error="AI分析过程失败"
-            )
+            logger.error("AI分析失败，未返回结果，跳过评论发布")
+            continue  # 跳过本次评论，不发布任何内容
+        
+        if comment_result.error:
+            logger.error(f"AI分析返回错误: {comment_result.error}，跳过评论发布")
+            continue  # 跳过本次评论，不发布任何内容
         
         # 确定分析状态和评论内容
         analysis_status = "success"
         
-        if comment_result.error:
-            analysis_status = "error"
-            logger.info("AI分析过程出错，将发布错误状态评论")
-        elif comment_result.pr_analysis:
+        if comment_result.pr_analysis:
             pr_analysis = comment_result.pr_analysis
             
             # 检查是否有文本变更或语法错误
@@ -318,14 +271,17 @@ def create_comment_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, p
                 analysis_status = "success"
                 logger.info("检测到需要关注的问题，将发布详细分析评论")
         else:
-            # 如果没有整体分析，检查是否有文件级别的问题
-            files_with_issues = [f for f in comment_result.file_analyses if f.has_text_changes or f.grammar_issues]
+            # 如果没有整体分析，检查是否有文件级别的语法问题
+            files_with_issues = [
+                f for f in comment_result.file_analyses 
+                if f.grammar_issues and len(f.grammar_issues) > 0
+            ]
             if files_with_issues:
                 analysis_status = "success"
-                logger.info(f"检测到 {len(files_with_issues)} 个文件有文本问题，将发布文件级别问题评论")
+                logger.info(f"检测到 {len(files_with_issues)} 个文件有语法问题，将发布文件级别问题评论")
             else:
                 analysis_status = "no_text_changes"
-                logger.info("未检测到文件级别问题，将发布无问题评论")
+                logger.info("未检测到文件级别语法问题，将发布无问题评论")
         
         # 总是生成和发布评论
         comment_content = generate_comment_content(
@@ -349,7 +305,9 @@ def main():
     parser.add_argument('--pr_repo', type=str, required=True, help='the PR of repo')
     parser.add_argument('--pr_number', type=str, required=True, help='the PR number')
     parser.add_argument('--siliconflow_api_key', type=str, default="", help='the API key of siliconflow')
-    parser.add_argument('--siliconflow_api_base', type=str, default="https://api.siliconflow.cn/v1", help='the base URL of siliconflow')
+    parser.add_argument('--siliconflow_api_base', type=str, 
+                        default="https://api.siliconflow.cn/v1", 
+                        help='the base URL of siliconflow')
     args = Args()
     parser.parse_args(args=sys.argv[1:], namespace=args)
     args.validate()
@@ -365,7 +323,9 @@ def main():
     pr_number = args.pr_number
     siliconflow_api_key = args.siliconflow_api_key
     siliconflow_api_base = args.siliconflow_api_base
-    create_comment_based_on_pr_diff_and_config(conf, cli, pr_owner, pr_repo, pr_number, siliconflow_api_key, siliconflow_api_base)
+    create_comment_based_on_pr_diff_and_config(
+        conf, cli, pr_owner, pr_repo, pr_number, siliconflow_api_key, siliconflow_api_base
+    )
 
 
 if __name__ == '__main__':
diff --git a/ci/tools/comment/create_comment.yaml b/ci/tools/comment/create_comment.yaml
index 32ac269a..4c77c5ed 100644
--- a/ci/tools/comment/create_comment.yaml
+++ b/ci/tools/comment/create_comment.yaml
@@ -7,7 +7,7 @@ comment_agent:
 
   # Model Configuration
   model:
-    name: "Qwen/Qwen3-8B"
+    name: "Qwen/Qwen3-32B"
     temperature: 0.1
     max_retry: 5  # For siliconflow backend
 
-- 
Gitee