From 406f685650764e91c324384793483793cf75c509 Mon Sep 17 00:00:00 2001
From: petermouse666 <708975811@qq.com>
Date: Wed, 10 Sep 2025 08:52:44 +0000
Subject: [PATCH 1/8] update ci-bot for auto generating translation issue

---
 .../new_create_translation_issue.py           |   72 +-
 .../new_create_translation_issue.yaml         |   27 +
 ci/tools/translation/translation_agent.py     | 1279 +++++++++++++++++
 3 files changed, 1373 insertions(+), 5 deletions(-)
 mode change 100644 => 100755 ci/tools/translation/new_create_translation_issue.py
 mode change 100644 => 100755 ci/tools/translation/new_create_translation_issue.yaml
 create mode 100755 ci/tools/translation/translation_agent.py

diff --git a/ci/tools/translation/new_create_translation_issue.py b/ci/tools/translation/new_create_translation_issue.py
old mode 100644
new mode 100755
index 5ca3cc0d2..0a61c7136
--- a/ci/tools/translation/new_create_translation_issue.py
+++ b/ci/tools/translation/new_create_translation_issue.py
@@ -6,6 +6,7 @@ import sys
 from dataclasses import dataclass, field
 from difflib import SequenceMatcher
 from typing import TypeVar, Generic
+from translation_agent import get_agent_summary
 
 import requests
 import yaml
@@ -39,15 +40,27 @@ class Org:
         self.issue_triggers = tmp_issue_triggers
 
 
+@dataclass
+class TranslationAgentConfig:
+    backend: dict = field(default_factory=dict)
+    model: dict = field(default_factory=dict)
+    processing: dict = field(default_factory=dict)
+    logging: dict = field(default_factory=dict)
+
+
 @dataclass
 class Config:
     orgs: list[dict | Org]
+    translation_agent: dict | TranslationAgentConfig = field(default_factory=dict)
 
     def __post_init__(self):
         tmp_orgs: list[Org] = []
         for item in self.orgs:
             tmp_orgs.append(Org(**item))
         self.orgs = tmp_orgs
+        
+        if isinstance(self.translation_agent, dict) and self.translation_agent:
+            self.translation_agent = TranslationAgentConfig(**self.translation_agent)
 
 
 @dataclass
@@ -231,6 +244,8 @@ class Args:
     pr_owner: str
     pr_repo: str
     pr_number: int
+    siliconflow_api_key: str = ""
+    siliconflow_api_base: str = "https://api.siliconflow.cn/v1"
 
     def validate(self):
         valid = self.gitee_token and self.pr_owner and self.pr_repo and self.pr_number
@@ -249,14 +264,15 @@ def load_config_yaml(yaml_path):
 
 
 def create_issue_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, pr_owner: str, pr_repo: str,
-                                             pr_number: int):
+                                             pr_number: int, siliconflow_api_key: str, siliconflow_api_base: str):
     pr__html_url = "https://gitee.com/{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
     for org_item in conf.orgs:
         issue_title_pr_mark = "{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
         if org_item.org_name != pr_owner:
             continue
-        if org_item.auto_create_issue:
-            cli.check_only_marks_changed(pr_owner, pr_repo, pr_number, org_item.change_content_exclude)
+        # 旧标点符号判断逻辑，已弃用
+        # if org_item.auto_create_issue:
+        #     cli.check_only_marks_changed(pr_owner, pr_repo, pr_number, org_item.change_content_exclude)
         file_count = 0
         diff_content = cli.get_diff_content(pr_owner, pr_repo, pr_number)
         if diff_content is None:
@@ -300,18 +316,60 @@ def create_issue_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, pr_
             need_create_issue_titles.append(need_create_issue[issue_item][1])
             need_create_issue_template[need_create_issue[issue_item][1]] = need_create_issue[issue_item][0]
         if need_create_issue_titles:
+
             need_create_issue_list, existed_issue_list = cli.check_issue_exists(org_item.issue_of_owner,
                                                                                 org_item.issue_of_repo,
                                                                                 need_create_issue_titles)
+
             if not need_create_issue_list:
                 feedback_comment = "issue has already created, please go to check issue: {}".format(
                     existed_issue_list)
                 logger.info("Warning: " + feedback_comment)
                 cli.add_pr_comment(pr_owner, pr_repo, pr_number, feedback_comment)
             for need_create_issue_item in need_create_issue_list:
+                
+                issue_summary = get_agent_summary(diff_content, siliconflow_api_key, siliconflow_api_base)
+                issue_body = ""
+                if issue_summary and not issue_summary.error:
+                    issue_body += f"## 📊 变更统计\n\n"
+                    issue_body += f"- **总文件数**: {issue_summary.total_files}\n"
+                    issue_body += f"- **成功处理文件数**: {issue_summary.processed_files}\n"
+                    if issue_summary.total_files != issue_summary.processed_files:
+                        # 注意人工审查提醒
+                        issue_body += f"- **未处理文件数**: {issue_summary.total_files - issue_summary.processed_files}\n"
+                        issue_body += f"- **提醒：机器人未能及时自动生成所有改动的摘要，请注意人工审查！**\n"
+                    if issue_summary.total_summary:
+                        total = issue_summary.total_summary
+                        issue_body += f"- **总改动行数**: {total.total_lines_changed}\n"
+                        issue_body += f"- **改动类型**: {', '.join(total.change_type_list)}\n\n"
+                        issue_body += f"## 🔍 整体变更摘要\n\n"
+                        issue_body += f"{total.overall_summary}\n\n"
+                        issue_body += f"## ⚠️ 整体潜在影响\n\n"
+                        issue_body += f"{total.overall_potential_impact}\n\n"
+                    if issue_summary.file_summaries:
+                        issue_body += f"## 📝 单文件变更详情\n\n"
+                        for summary in issue_summary.file_summaries:
+                            issue_body += f"### 📁 {summary.file_path}\n\n"
+                            issue_body += f"- **改动类型**: {summary.change_type}\n"
+                            issue_body += f"- **新增行数**: {summary.lines_added}\n"
+                            issue_body += f"- **删除行数**: {summary.lines_deleted}\n"
+                            issue_body += f"- **潜在影响**: {summary.potential_impact}\n"
+                            issue_body += f"- **详细摘要**: {summary.summary}\n\n"
+                            issue_body += "---\n\n"
+                else:
+                    issue_body += f"## ⚠️ 翻译变更检测\n\n"
+                    issue_body += f"检测到需要翻译的文件变更，但无法获取详细摘要信息。\n\n"
+                    issue_body += f"**变更文件数量**: {len(diff_files)}\n"
+                    issue_body += f"**相关PR**: {pr__html_url}\n\n"
+                
+                issue_body += f"## ❗️ 本Issue的摘要内容基于AI Agent技术自动生成，仅供参考，请以实际更改为准。\n\n" 
+                issue_body += f"## 🔗 相关PR链接\n\n"
+                issue_body += f"- {pr__html_url}\n"
+                
                 cli.create_issue(org_item.issue_of_owner, org_item.issue_of_repo, need_create_issue_item,
                                  need_create_issue_template[need_create_issue_item],
-                                 "### Related PR link \n - {}".format(pr__html_url))
+                                 issue_body)          
+
 
 
 def main():
@@ -320,6 +378,8 @@ def main():
     parser.add_argument('--pr_owner', type=str, required=True, help='the PR of owner')
     parser.add_argument('--pr_repo', type=str, required=True, help='the PR of repo')
     parser.add_argument('--pr_number', type=str, required=True, help='the PR number')
+    parser.add_argument('--siliconflow_api_key', type=str, default="", help='the API key of siliconflow')
+    parser.add_argument('--siliconflow_api_base', type=str, default="https://api.siliconflow.cn/v1", help='the base URL of siliconflow')
     args = Args()
     parser.parse_args(args=sys.argv[1:], namespace=args)
     args.validate()
@@ -333,7 +393,9 @@ def main():
     pr_owner = args.pr_owner
     pr_repo = args.pr_repo
     pr_number = args.pr_number
-    create_issue_based_on_pr_diff_and_config(conf, cli, pr_owner, pr_repo, pr_number)
+    siliconflow_api_key = args.siliconflow_api_key
+    siliconflow_api_base = args.siliconflow_api_base
+    create_issue_based_on_pr_diff_and_config(conf, cli, pr_owner, pr_repo, pr_number, siliconflow_api_key, siliconflow_api_base)
 
 
 if __name__ == '__main__':
diff --git a/ci/tools/translation/new_create_translation_issue.yaml b/ci/tools/translation/new_create_translation_issue.yaml
old mode 100644
new mode 100755
index a58ebcc6e..bc48ab7a2
--- a/ci/tools/translation/new_create_translation_issue.yaml
+++ b/ci/tools/translation/new_create_translation_issue.yaml
@@ -1,3 +1,30 @@
+# Translation Agent Configuration
+translation_agent:
+  # Backend Configuration
+  backend:
+    type: "siliconflow"  # Options: "ollama" or "siliconflow"
+    # siliconflow配置现在通过命令行参数传入
+    ollama:
+      base_url: "http://localhost:11434"
+  
+  # Model Configuration
+  model:
+    name: "Qwen/Qwen3-32B" # Options: "llama3" "Qwen/Qwen3-8B"  "THUDM/GLM-4-32B-0414" or others
+    temperature: 0.1
+    max_retry: 5  # For siliconflow backend
+    max_retry_ollama: 1  # For ollama backend
+  
+  # Processing Configuration
+  processing:
+    max_workers: 8  # Number of parallel workers for file processing
+    single_file_timeout: 180  # Timeout for single file summary generation (seconds)
+    total_summary_timeout: 300  # Timeout for total summary generation (seconds)
+  
+  # Logging Configuration
+  logging:
+    level: "INFO"
+
+# Issue Creation Configuration
 orgs:
   - org_name: openeuler
     issue_of_owner: openeuler
diff --git a/ci/tools/translation/translation_agent.py b/ci/tools/translation/translation_agent.py
new file mode 100755
index 000000000..258826eb5
--- /dev/null
+++ b/ci/tools/translation/translation_agent.py
@@ -0,0 +1,1279 @@
+import json
+import re
+import logging
+import urllib.parse
+from typing import List, Dict, Any, Optional, Tuple, Literal
+from dataclasses import dataclass
+from concurrent.futures import ThreadPoolExecutor, as_completed, TimeoutError as FutureTimeoutError
+from pathlib import Path
+import tiktoken
+import sys
+import time
+# LangChain imports
+from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
+from langchain_core.runnables import RunnableLambda, RunnablePassthrough
+from pydantic import BaseModel, Field, SecretStr
+from langchain_community.llms import Ollama
+from langchain_ollama import ChatOllama
+from langchain.chains import TransformChain, SequentialChain
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_openai import ChatOpenAI
+import yaml
+
+# ==================== 配置加载 ====================
+
+def load_config(config_file="new_create_translation_issue.yaml"):
+    """从YAML文件加载配置"""
+    try:
+        with open(config_file, 'r', encoding='utf-8') as f:
+            config = yaml.safe_load(f)
+        return config.get('translation_agent', {})
+    except FileNotFoundError:
+        print(f"配置文件 {config_file} 不存在")
+        raise
+    except yaml.YAMLError as e:
+        print(f"解析配置文件时发生错误: {e}")
+        raise
+
+# 加载配置
+_config = load_config()
+
+# ==================== 配置常量 ====================
+
+BACKEND_TYPE = _config.get('backend', {}).get('type', 'siliconflow')
+OLLAMA_BASE_URL = _config.get('backend', {}).get('ollama', {}).get('base_url', 'http://localhost:11434')
+MODEL_NAME = _config.get('model', {}).get('name', 'Qwen/Qwen3-8B')
+MODEL_TEMPERATURE = _config.get('model', {}).get('temperature', 0.1)
+MODEL_MAX_RETRY = _config.get('model', {}).get('max_retry', 5)
+MODEL_MAX_RETRY_OLLAMA = _config.get('model', {}).get('max_retry_ollama', 1)
+PROCESSING_MAX_WORKERS = _config.get('processing', {}).get('max_workers', 8)
+SINGLE_FILE_TIMEOUT = _config.get('processing', {}).get('single_file_timeout', 180)
+TOTAL_SUMMARY_TIMEOUT = _config.get('processing', {}).get('total_summary_timeout', 300)
+LOGGING_LEVEL = _config.get('logging', {}).get('level', 'INFO')
+SILICONFLOW_API_KEY = ''
+SILICONFLOW_API_BASE =''
+
+# 配置日志
+logging.basicConfig(level=getattr(logging, LOGGING_LEVEL.upper()))
+logger = logging.getLogger(__name__)
+
+# ==================== 数据模型定义 ====================
+
+class SingleFileSummary(BaseModel):
+    """单个文件摘要的结构化输出"""
+    file_path: str = Field(description="文件路径", default="")
+    change_type: Literal["仅涉及标点符号的修改", "涉及到中英文文本内容的修改", "涉及到代码内容的修改", "涉及到其他内容的修改"] = Field(description="改动类型")
+    potential_impact: str = Field(description="改动对其他文件潜在的影响")
+    summary: str = Field(description="改动的详细摘要")
+    lines_added: int = Field(description="新增行数", default=0)
+    lines_deleted: int = Field(description="删除行数", default=0)
+
+class FileChangeInfo(BaseModel):
+    """文件改动信息"""
+    file_path: str = Field(description="文件路径")
+    change_type: Literal["仅涉及标点符号的修改", "涉及到中英文文本内容的修改", "涉及到代码内容的修改", "涉及到其他内容的修改"] = Field(description="改动类型")
+    lines_changed: int = Field(description="改动行数")
+
+class TotalSummary(BaseModel):
+    """总摘要的结构化输出"""
+    total_files_changed: int = Field(description="总共修改的文件数量", default=0)
+    total_lines_changed: int = Field(description="总共修改的行数", default=0)
+    overall_potential_impact: str = Field(description="整体改动对其他文件潜在的影响")
+    overall_summary: str = Field(description="整体改动的详细摘要")
+    change_type_list: List[str] = Field(description="所有文件包含的改动种类列表", default=[])
+    file_changes: List[FileChangeInfo] = Field(description="每个修改文件的详细信息列表", default=[])
+
+@dataclass
+class DiffFileInfo:
+    """单个文件的diff信息"""
+    file_path: str
+    diff_content: str
+    lines_added: int
+    lines_deleted: int
+
+@dataclass
+class ProcessingResult:
+    """处理结果"""
+    file_summaries: List[SingleFileSummary]
+    total_summary: Optional[TotalSummary]
+    processed_files: int
+    total_files: int
+    error: Optional[str] = None
+
+# ==================== Token 统计工具 ====================
+
+class TokenCounter:
+    def __init__(self, model_name=MODEL_NAME):
+        self.model_name = model_name
+        self.prompt_tokens = 0
+        self.completion_tokens = 0
+        self.total_tokens = 0
+        self.tokenizer = None
+        self._init_tokenizer()
+
+    def _init_tokenizer(self):
+        """初始化tokenizer"""
+        try:
+            self.tokenizer = tiktoken.encoding_for_model(self.model_name)
+        except Exception:
+            try:
+                self.tokenizer = tiktoken.get_encoding("cl100k_base")
+            except Exception:
+                logger.warning("无法初始化tokenizer，将不会计算token数量")
+
+    def _encode(self, text: str) -> List[int]:
+        """编码文本"""
+        if not isinstance(text, str):
+            return []
+        if self.tokenizer is None:
+            # 如果没有tokenizer，使用简单的估算方法
+            return [0] * (len(text) // 4)
+        try:
+            return self.tokenizer.encode(text)
+        except Exception as e:
+            logger.warning(f"编码文本时发生错误: {e}")
+            # 如果编码失败，使用简单的估算方法
+            return [0] * (len(text) // 4)
+
+    def _count_tokens(self, text: str) -> int:
+        """计算文本的token数量"""
+        return len(self._encode(text))
+
+    def count_prompt(self, prompt: str) -> int:
+        """计算prompt的token数量"""
+        tokens = self._count_tokens(prompt)
+        self.prompt_tokens += tokens
+        self.total_tokens += tokens
+        return tokens
+
+    def count_completion(self, completion: str) -> int:
+        """计算completion的token数量"""
+        tokens = self._count_tokens(completion)
+        self.completion_tokens += tokens
+        self.total_tokens += tokens
+        return tokens
+
+    def get_stats(self):
+        return {
+            "prompt_tokens": self.prompt_tokens,
+            "completion_tokens": self.completion_tokens,
+            "total_tokens": self.total_tokens
+        }
+
+# ==================== 工具函数 ====================
+
+class DiffParser:
+    """Git Diff 解析器"""
+    
+    @staticmethod
+    def parse_git_diff(diff_content: str) -> List[DiffFileInfo]:
+        """
+        解析git diff内容，提取每个文件的改动信息
+        
+        Args:
+            diff_content: git diff的原始内容
+ 
+        Returns:
+            包含文件路径和对应diff内容的列表
+        """
+
+        files = []
+        current_file = None
+        current_diff = []
+        
+        lines = diff_content.strip().split('\n')
+        
+        for line in lines:
+            # 匹配文件路径行
+            if line.startswith('diff --git'):
+                # 保存前一个文件的信息
+                if current_file and current_diff:
+                    diff_info = DiffParser._create_diff_file_info(current_file, current_diff)
+                    if diff_info:
+                        files.append(diff_info)
+                
+                # 提取文件路径 - 改进的解析逻辑
+                current_file = DiffParser._extract_file_path(line)
+                if current_file:
+                    current_diff = [line]
+                else:
+                    current_diff = []
+            elif current_file:
+                current_diff.append(line)
+        
+        # 添加最后一个文件
+        if current_file and current_diff:
+            diff_info = DiffParser._create_diff_file_info(current_file, current_diff)
+            if diff_info:
+                files.append(diff_info)
+        
+        return files
+    
+    @staticmethod
+    def _extract_file_path(diff_line: str) -> Optional[str]:
+        """
+        从git diff行中提取文件路径，支持包含汉字的文件名
+        
+        Args:
+            diff_line: git diff的文件头行，格式如 "diff --git a/path/to/file b/path/to/file"
+            
+        Returns:
+            提取出的文件路径，如果解析失败则返回None
+        """
+        try:
+            # 方法1: 处理引号包围的路径（Git对特殊字符的处理）
+            # 格式: diff --git "a/path/to/file" "b/path/to/file"
+            quoted_pattern = r'diff --git "a/(.+?)" "b/(.+?)"'
+            quoted_match = re.match(quoted_pattern, diff_line)
+            
+            if quoted_match:
+                file_path_a = quoted_match.group(1)
+                file_path_b = quoted_match.group(2)
+                # 通常a和b路径相同，使用a路径（旧文件路径）
+                file_path = file_path_a
+            else:
+                # 方法2: 使用正则表达式匹配标准的git diff格式
+                # 格式: diff --git a/path/to/file b/path/to/file
+                pattern = r'diff --git a/(.+?) b/(.+?)(?:\s|$)'
+                match = re.match(pattern, diff_line)
+                
+                if match:
+                    file_path_a = match.group(1)
+                    file_path_b = match.group(2)
+                    # 通常a和b路径相同，使用a路径（旧文件路径）
+                    file_path = file_path_a
+                else:
+                    # 方法3: 如果正则匹配失败，尝试更简单的解析
+                    # 处理可能包含空格和特殊字符的文件名
+                    if ' a/' in diff_line and ' b/' in diff_line:
+                        # 找到 a/ 和 b/ 的位置
+                        a_pos = diff_line.find(' a/')
+                        b_pos = diff_line.find(' b/')
+                        
+                        if a_pos != -1 and b_pos != -1 and a_pos < b_pos:
+                            # 提取a/和b/之间的路径
+                            a_start = a_pos + 3  # 跳过 ' a/'
+                            file_path = diff_line[a_start:b_pos]
+                        else:
+                            return None
+                    else:
+                        # 方法4: 最后的备选方案，简单的字符串分割
+                        parts = diff_line.split()
+                        if len(parts) >= 3:
+                            a_path = parts[2]
+                            if a_path.startswith('a/'):
+                                file_path = a_path[2:]  # 移除'a/'前缀
+                            else:
+                                return None
+                        else:
+                            return None
+            
+            # 处理文件名编码
+            return DiffParser._decode_file_path(file_path)
+            
+        except Exception as e:
+            logger.warning(f"解析文件路径时发生错误: {e}, diff行: {diff_line}")
+            return None
+    
+    @staticmethod
+    def _decode_file_path(file_path: str) -> str:
+        """
+        解码文件路径，处理各种编码情况
+        
+        Args:
+            file_path: 原始文件路径
+            
+        Returns:
+            解码后的文件路径
+        """
+        try:
+            # 首先尝试URL解码，处理Git编码的文件名
+            decoded_path = urllib.parse.unquote(file_path, encoding='utf-8')
+            
+            # 处理Git对特殊字符的引号包装
+            if decoded_path.startswith('"') and decoded_path.endswith('"'):
+                decoded_path = decoded_path[1:-1]
+                # Git使用反斜杠转义，需要处理转义序列
+                decoded_path = decoded_path.replace('\\"', '"')
+                decoded_path = decoded_path.replace('\\\\', '\\')
+            
+            # 无论是否有引号包装，都尝试处理八进制编码
+            # 检查是否包含八进制转义序列
+            if '\\' in decoded_path and re.search(r'\\[0-7]{3}', decoded_path):
+                decoded_path = DiffParser._decode_octal_sequences(decoded_path)
+            
+            return decoded_path
+            
+        except Exception as e:
+            logger.warning(f"解码文件路径时发生错误: {e}, 原始路径: {file_path}")
+            return file_path
+    
+    @staticmethod
+    def _decode_octal_sequences(text: str) -> str:
+        """
+        解码文本中的八进制转义序列
+        
+        Args:
+            text: 包含八进制转义序列的文本
+            
+        Returns:
+            解码后的文本
+        """
+        try:
+            # 查找八进制转义序列模式：\xxx
+            pattern = r'\\([0-7]{3})'
+            
+            # 找到所有八进制序列
+            matches = list(re.finditer(pattern, text))
+            if not matches:
+                return text
+            
+            # 收集所有字节值
+            result = ""
+            last_end = 0
+            bytes_buffer = []
+            
+            for i, match in enumerate(matches):
+                # 添加匹配前的文本
+                if match.start() > last_end:
+                    # 如果有缓冲的字节，先处理它们
+                    if bytes_buffer:
+                        try:
+                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
+                            result += decoded_bytes
+                            bytes_buffer = []
+                        except UnicodeDecodeError:
+                            # 如果解码失败，保持原始形式
+                            for byte_val in bytes_buffer:
+                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
+                            bytes_buffer = []
+                    
+                    result += text[last_end:match.start()]
+                
+                # 处理当前八进制序列
+                octal_str = match.group(1)
+                try:
+                    byte_value = int(octal_str, 8)
+                    bytes_buffer.append(byte_value)
+                except ValueError:
+                    # 如果转换失败，添加原始字符串
+                    if bytes_buffer:
+                        try:
+                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
+                            result += decoded_bytes
+                            bytes_buffer = []
+                        except UnicodeDecodeError:
+                            for byte_val in bytes_buffer:
+                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
+                            bytes_buffer = []
+                    result += match.group(0)
+                
+                last_end = match.end()
+                
+                # 检查是否是最后一个匹配或下一个匹配不连续
+                is_last = (i == len(matches) - 1)
+                is_next_non_consecutive = (not is_last and 
+                                         matches[i + 1].start() != match.end())
+                
+                if is_last or is_next_non_consecutive:
+                    # 处理缓冲的字节
+                    if bytes_buffer:
+                        try:
+                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
+                            result += decoded_bytes
+                        except UnicodeDecodeError:
+                            # 如果解码失败，保持原始形式
+                            for byte_val in bytes_buffer:
+                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
+                        bytes_buffer = []
+            
+            # 添加剩余的文本
+            if last_end < len(text):
+                result += text[last_end:]
+            
+            return result
+            
+        except Exception as e:
+            logger.warning(f"解码八进制序列时发生错误: {e}, 原始文本: {text}")
+            return text
+    
+    @staticmethod
+    def _create_diff_file_info(file_path: str, diff_lines: List[str]) -> Optional[DiffFileInfo]:
+        """创建DiffFileInfo对象"""
+        diff_content = '\n'.join(diff_lines)
+        lines_added, lines_deleted = DiffParser._count_lines_changed(diff_content)
+        
+        return DiffFileInfo(
+            file_path=file_path,
+            diff_content=diff_content,
+            lines_added=lines_added,
+            lines_deleted=lines_deleted
+        )
+    
+    @staticmethod
+    def _count_lines_changed(diff_content: str) -> Tuple[int, int]:
+        """统计git diff中改动的行数"""
+        lines_added, lines_deleted = 0, 0
+        lines = diff_content.strip().split('\n')
+
+        for line in lines:
+            # 统计新增行（以+开头，但不是+++）
+            if line.startswith('+') and not line.startswith('+++'):
+                lines_added += 1
+            # 统计删除行（以-开头，但不是---）
+            elif line.startswith('-') and not line.startswith('---'):
+                lines_deleted += 1
+
+        return lines_added, lines_deleted
+
+# ==================== LangChain 组件 ====================
+
+class LLMFactory:
+    """LLM工厂类"""
+    
+    @staticmethod
+    def create_chat_llm(model_name: str = None, base_url: str = None):
+        """创建LLM实例"""
+        if model_name is None:
+            model_name = MODEL_NAME
+        if base_url is None:
+            base_url = OLLAMA_BASE_URL
+            
+        if BACKEND_TYPE == "ollama":
+            return ChatOllama(
+                model=model_name,
+                base_url=base_url,
+                temperature=MODEL_TEMPERATURE
+            )
+        elif BACKEND_TYPE == "siliconflow":
+            return ChatOpenAI(
+                model=model_name,
+                api_key=SecretStr(SILICONFLOW_API_KEY),
+                base_url=SILICONFLOW_API_BASE,
+                temperature=MODEL_TEMPERATURE
+            )
+        else:
+            raise ValueError(f"不支持的后端类型: {BACKEND_TYPE}")
+    
+    @staticmethod
+    def create_llm(model_name: str = None, base_url: str = None):
+        """创建LLM实例"""
+        if model_name is None:
+            model_name = MODEL_NAME
+        if base_url is None:
+            base_url = OLLAMA_BASE_URL
+            
+        if BACKEND_TYPE == "ollama":
+            return Ollama(
+                model=model_name,
+                base_url=base_url,
+                temperature=MODEL_TEMPERATURE
+            )
+        elif BACKEND_TYPE == "siliconflow":
+            return ChatOpenAI(
+                model=model_name,
+                api_key=SecretStr(SILICONFLOW_API_KEY),
+                base_url=SILICONFLOW_API_BASE,
+                temperature=MODEL_TEMPERATURE
+            )
+        else:
+            raise ValueError(f"不支持的后端类型: {BACKEND_TYPE}")
+
+class PromptTemplates:
+    """提示模板集合"""
+    
+    @staticmethod
+    def get_single_file_prompt() -> ChatPromptTemplate:
+        """获取单文件分析提示模板"""
+        return ChatPromptTemplate.from_messages([
+            ("system", f"""
+你是一个专业的Git维护专家，擅长总结社区文档的改动，请分析以下git diff中单个文件的改动，并生成结构化的摘要。
+
+请仔细分析这个文件的改动，并按照以下要求生成摘要：
+
+**务必注意：当你对单个文件的所有变更内容从头到尾进行过完整的分析之后，再生成你最终的结论！不要仅根据其中几行的增删改就给出你的结论！**
+
+1. 改动类型判断（必须选择以下四种之一，请严格按照示例进行判断）：
+
+   - "涉及到其他内容的修改"：新增二进制文件、新增依赖库等其他内容
+   - "仅涉及标点符号的修改"：仅修改了标点符号的增减、删除、变动，几乎不影响理解
+   - "涉及到代码内容的修改"：修改了代码逻辑、函数定义、配置结构、命令行内容、脚本实现等
+   - "涉及到中英文文本内容的修改"：修改了文档内容、命令或代码注释、字符串等文本，需要对内容进行翻译或调整以使得所有语种的人都可以理解
+   
+**其中，你需要重点对后三种类型的修改进行区分。越靠后，修改类型判定的优先级越高。**
+如果修改的内容仅仅为新增了二进制文件、新增了依赖库等其他内容，绝大部分情况都可以归类为"涉及到其他内容的修改"。
+如果修改的内容不涉及中文或英文字符且不涉及代码改动，绝大部分情况都可以归类为"仅涉及标点符号的修改"，但一旦存在除了标点符号或文档格式以外的改动，则优先归为其他类别。
+如果修改的内容涉及代码逻辑、函数定义、配置结构、脚本实现等可能产生现实影响的变更，或者对环境部署命令行、内容配置进行了更改或调整，但不需要对内容进行翻译或调整以使得所有语种的人都可以理解，则归类为"涉及到代码内容的修改"。
+如果修改的内容涉及中文或英文字符，且需要对内容进行翻译或调整以使得所有语种的人都可以理解，可以归类为"涉及到中英文文本内容的修改"。
+一个区分"涉及到代码内容的修改"和"涉及到中英文文本内容的修改"的标准是：如果当前的改动属于某一语言，如果使用者不理解该语言，则必须要对改动进行翻译才能理解，则归类为"涉及到中英文文本内容的修改"，否则归类为"涉及到代码内容的修改"。
+
+下面我将提供几个判断示例供你参考：
+
+示例1 - 仅涉及标点符号的修改：
+```diff
+- 这是一个测试文档,用于演示功能。
++ 这是一个测试文档，用于演示功能！
+```
+分析：只变更了逗号为中文逗号，句号为感叹号，属于"仅涉及标点符号的修改"
+或者文件中：
+```diff
+- 这个文档的功能有进一步补充的空间。
++ 这个文档的功能有进一步补充的空间！
+```
+分析：只涉及中文句号和感叹号的增删改，不涉及中文字符和英文字符的改动，且不涉及代码改动，属于"仅涉及标点符号的修改"
+
+示例2 - 涉及到代码内容的修改：
+```diff
+- function getUserInfo() 
++ function getUserProfile() 
+```
+或者在文档的代码块中：
+```diff
+- ```python
+- def hello():
+-     print("hello")
+- ```
++ ```python
++ def greeting():
++     print("hello world")
++ ```
+```
+或者在文档的命令行代码块中
+```diff
+- pwd
+- cat /etc/profile
++ sudo apt update
++ whoami
++ echo "hello"
+```
+分析：修改了函数名、逻辑或文档文本中的代码块等，但是不涉及需要翻译的内容，属于"涉及到代码内容的修改"
+
+示例3 - 涉及到中英文文本内容的修改：
+```diff
+- // 这是一个注释说明
++ // 这是一个更详细的注释说明
+```
+或者JSON中：
+```diff
+- "description": "用户管理模块"
++ "description": "用户账户管理模块"
+```
+分析：修改了注释或文档文本内容，影响用户的阅读理解，需要对内容进行翻译或调整以使得所有语种的人都可以理解，属于"涉及到中英文文本内容的修改"
+
+示例4 - 涉及到其他内容的修改：
+```diff
++ Binary file image.png added
+```
+或者：
+```diff
++ "dependencies": 
++   "new-package": "^1.0.0"
++ 
+```
+分析：新增了二进制文件或依赖包等，属于"涉及到其他内容的修改"
+
+2. 潜在影响分析：
+   - 分析这个文件的改动可能对其他文件或整体系统造成的影响
+   - 考虑依赖关系、接口变化、数据流等
+   - 如果是配置文件的修改，考虑对系统配置的影响
+   - 如果对其他文件无潜在影响，请说明无潜在影响及原因
+
+3. 详细摘要：
+   - 提炼出摘要改动文件所属的板块，并解释板块作用
+   - 结合文件名和改动细节，用详细的语言描述具体的改动内容，要求准确全面，且改动内容要做到具体
+   - 突出重要的改动点和影响范围，包括修改内容主要针对的对象、文档的分类等
+   - 结合文件名、改动类型、潜在影响分析，对摘要做进一步补充
+
+4. 输出格式：
+  - 请用中文生成摘要
+  - 要求改动类型、潜在影响、改动内容总结都包含在摘要中，不能存在空字段
+  - 严格检查你的输出，对"新增"、"删除"、"修改"等字眼要严格检查，确保没有出现语义错误
+  - 严格检查你的输出，确保没有出现语义错误，对于出现的数字、改动的具体内容务必保证描述完全吻合
+
+            """),
+            ("human", """
+文件路径: {file_path}
+
+Git Diff 内容:
+{diff_content}
+
+            """)
+        ])
+    
+    @staticmethod
+    def get_total_summary_prompt() -> ChatPromptTemplate:
+        """获取总摘要生成提示模板"""
+        return ChatPromptTemplate.from_messages([
+            ("system", """
+你是一个专业的Git维护专家，擅长总结社区文档的改动，请基于以下各个文件的改动摘要，生成整个git diff的总摘要。
+
+请分析所有文件的改动，并生成一个总摘要，要求：
+
+1. 整体改动类型统计：
+   - 统计所有文件涉及到的改动类型，取并集
+   - 四种改动类型说明：
+     * "仅涉及标点符号的修改"：只修改了标点符号的增减、删除、变动
+     * "涉及到中英文文本内容的修改"：修改了文档内容、注释等文本，但未涉及代码逻辑
+     * "涉及到代码内容的修改"：修改了代码逻辑、函数定义、配置结构、命令行内容、脚本实现等
+     * "涉及到其他内容的修改"：新增二进制文件、新增依赖库等其他内容
+   - 将所有出现的改动类型都列出，不做优先级选择
+
+统计示例：
+
+示例1 - 单一类型：
+文件A：仅涉及标点符号的修改
+文件B：仅涉及标点符号的修改
+→ 整体改动类型：["仅涉及标点符号的修改"]
+
+示例2 - 多种类型：
+文件A：仅涉及标点符号的修改
+文件B：涉及到中英文文本内容的修改
+→ 整体改动类型：["仅涉及标点符号的修改", "涉及到中英文文本内容的修改"]
+
+示例3 - 复杂混合：
+文件A：涉及到中英文文本内容的修改
+文件B：涉及到代码内容的修改
+文件C：涉及到其他内容的修改
+→ 整体改动类型：["涉及到中英文文本内容的修改", "涉及到代码内容的修改", "涉及到其他内容的修改"]
+
+2. 整体潜在影响分析：
+   - 逐个总结所有文件的改动内容，并进行详细的列举，尽量涵盖所有修改内容
+   - 综合分析所有文件改动对系统的整体影响
+   - 考虑文件间的依赖关系和系统架构影响
+   - 评估改动的风险等级和影响范围
+   - 如果对其他文件无潜在影响，请说明无潜在影响及原因
+
+3. 整体摘要详细列举：
+   - 提炼出所有摘要改动文件所属的板块，并解释板块作用
+   - 用详细的语言分条概括每个摘要文件的核心内容，需要具体到文件，这一部分要占到最大的篇幅，不要遗漏任何摘要文件的内容
+   - 突出重要的改动点，包括修改内容主要针对的对象、文档的分类等
+   - 注意：整体摘要需要总结所有文件的内容；整体摘要需要尽可能详细
+
+4. 输出格式：
+   - 请用中文生成摘要，整体摘要内容字段务必全面详细
+   - 要求整体潜在影响、整体摘要都包含在摘要中，不能存在空字段
+   - 整体摘要必须满足以下格式："本次更改涉及到XXX等文件，这些文件分别属于社区中的XXX模块。涉及到XXX的修改，可能会对XXX造成影响。总的来说，这次更改主要是XXX。"
+   - 严格检查你的输出，对"新增"、"删除"、"修改"等字眼要严格检查，确保没有出现语义错误
+   - 严格检查你的输出，确保没有出现语义错误，对于出现的数字、改动的具体内容务必保证描述完全吻合
+
+
+            """),
+            ("human", """
+各个文件的改动摘要:
+{file_changes}
+
+总文件数: {total_files}
+            """)
+        ])
+
+class SingleFileAnalysisChain:
+    """单文件分析任务链"""
+    
+    def __init__(self, llm: ChatOllama | ChatOpenAI, token_counter: TokenCounter):
+        self.llm = llm
+        self.token_counter = token_counter
+        
+        # 创建输出解析器
+        self.output_parser = JsonOutputParser(pydantic_object=SingleFileSummary)
+        
+        # 根据后端类型选择不同的链构建方式
+        if BACKEND_TYPE == "ollama":
+            self.prompt = PromptTemplates.get_single_file_prompt()
+            self.chain = self.prompt | self.llm.with_structured_output(SingleFileSummary)
+        else:
+            # 为硅基流动平台添加输出格式说明
+            format_instructions = """
+请以JSON格式输出，包含以下字段：
+{{
+    "change_type": "改动类型（必须是以下之一：仅涉及标点符号的修改、涉及到中英文文本内容的修改、涉及到代码内容的修改、涉及到其他内容的修改）",
+    "potential_impact": "改动对其他文件潜在的影响",
+    "summary": "改动的详细摘要"
+}}
+"""
+            # 创建新的prompt模板
+            system_template = """
+你是一个专业的Git维护专家，擅长总结社区文档的改动，请分析以下git diff中单个文件的改动，并生成结构化的摘要。
+
+请仔细分析这个文件的改动，并按照以下要求生成摘要：
+
+**务必注意：当你对单个文件的所有变更内容从头到尾进行过完整的分析之后，再生成你最终的结论！不要仅根据其中几行的增删改就给出你的结论！**
+
+1. 改动类型判断（必须选择以下四种之一，请严格按照示例进行判断）：
+
+   - "涉及到其他内容的修改"：新增二进制文件、新增依赖库等其他内容
+   - "仅涉及标点符号的修改"：仅修改了标点符号的增减、删除、变动，几乎不影响理解
+   - "涉及到代码内容的修改"：修改了代码逻辑、函数定义、配置结构、命令行内容、脚本实现等
+   - "涉及到中英文文本内容的修改"：修改了文档内容、命令或代码注释、字符串等文本，需要对内容进行翻译或调整以使得所有语种的人都可以理解
+   
+**其中，你需要重点对后三种类型的修改进行区分。越靠后，修改类型判定的优先级越高。**
+如果修改的内容仅仅为新增了二进制文件、新增了依赖库等其他内容，绝大部分情况都可以归类为"涉及到其他内容的修改"。
+如果修改的内容不涉及中文或英文字符且不涉及代码改动，绝大部分情况都可以归类为"仅涉及标点符号的修改"，但一旦存在除了标点符号或文档格式以外的改动，则优先归为其他类别。
+如果修改的内容涉及代码逻辑、函数定义、配置结构、脚本实现等可能产生现实影响的变更，或者对环境部署命令行、内容配置进行了更改或调整，但不需要对内容进行翻译或调整以使得所有语种的人都可以理解，则归类为"涉及到代码内容的修改"。
+如果修改的内容涉及中文或英文字符，且需要对内容进行翻译或调整以使得所有语种的人都可以理解，可以归类为"涉及到中英文文本内容的修改"。
+一个区分"涉及到代码内容的修改"和"涉及到中英文文本内容的修改"的标准是：如果当前的改动属于某一语言，如果使用者不理解该语言，则必须要对改动进行翻译才能理解，则归类为"涉及到中英文文本内容的修改"，否则归类为"涉及到代码内容的修改"。
+
+下面我将提供几个判断示例供你参考：
+
+示例1 - 仅涉及标点符号的修改：
+```diff
+- 这是一个测试文档,用于演示功能。
++ 这是一个测试文档，用于演示功能！
+```
+分析：只变更了逗号为中文逗号，句号为感叹号，属于"仅涉及标点符号的修改"
+或者文件中：
+```diff
+- 这个文档的功能有进一步补充的空间。
++ 这个文档的功能有进一步补充的空间！
+```
+分析：只涉及中文句号和感叹号的增删改，不涉及中文字符和英文字符的改动，且不涉及代码改动，属于"仅涉及标点符号的修改"
+
+示例2 - 涉及到代码内容的修改：
+```diff
+- function getUserInfo() 
++ function getUserProfile() 
+```
+或者在文档的代码块中：
+```diff
+- ```python
+- def hello():
+-     print("hello")
+- ```
++ ```python
++ def greeting():
++     print("hello world")
++ ```
+```
+或者在文档的命令行代码块中
+```diff
+- pwd
+- cat /etc/profile
++ sudo apt update
++ whoami
++ echo "hello"
+```
+分析：修改了函数名、逻辑或文档文本中的代码块等，但是不涉及需要翻译的内容，属于"涉及到代码内容的修改"
+
+示例3 - 涉及到中英文文本内容的修改：
+```diff
+- // 这是一个注释说明
++ // 这是一个更详细的注释说明
+```
+或者JSON中：
+```diff
+- "description": "用户管理模块"
++ "description": "用户账户管理模块"
+```
+分析：修改了注释或文档文本内容，影响用户的阅读理解，需要对内容进行翻译或调整以使得所有语种的人都可以理解，属于"涉及到中英文文本内容的修改"
+
+示例4 - 涉及到其他内容的修改：
+```diff
++ Binary file image.png added
+```
+或者：
+```diff
++ "dependencies": 
++   "new-package": "^1.0.0"
++ 
+```
+分析：新增了二进制文件或依赖包等，属于"涉及到其他内容的修改"
+
+2. 潜在影响分析：
+   - 分析这个文件的改动可能对其他文件或整体系统造成的影响
+   - 考虑依赖关系、接口变化、数据流等
+   - 如果是配置文件的修改，考虑对系统配置的影响
+   - 如果对其他文件无潜在影响，请说明无潜在影响及原因
+
+3. 详细摘要：
+   - 提炼出摘要改动文件所属的板块，并解释板块作用
+   - 结合文件名和改动细节，用详细的语言描述具体的改动内容，要求准确全面，且改动内容要做到具体
+   - 突出重要的改动点和影响范围，包括修改内容主要针对的对象、文档的分类等
+   - 结合文件名、改动类型、潜在影响分析，对摘要做进一步补充
+
+4. 输出格式：
+  - 请用中文生成摘要
+  - 要求改动类型、潜在影响、改动内容总结都包含在摘要中，不能存在空字段
+  - 严格检查你的输出，对"新增"、"删除"、"修改"等字眼要严格检查，确保没有出现语义错误
+  - 严格检查你的输出，确保没有出现语义错误，对于出现的数字、改动的具体内容务必保证描述完全吻合
+
+{format_instructions}
+"""
+            human_template = """
+文件路径: {file_path}
+
+Git Diff 内容:
+{diff_content}
+"""
+            self.prompt = ChatPromptTemplate.from_messages([
+                ("system", system_template.format(format_instructions=format_instructions)),
+                ("human", human_template)
+            ])
+            self.chain = self.prompt | self.llm | self.output_parser
+    
+    def analyze(self, diff_file_info: DiffFileInfo) -> Optional[SingleFileSummary]:
+        """分析单个文件的改动"""
+        max_retry = MODEL_MAX_RETRY_OLLAMA if BACKEND_TYPE == "ollama" else MODEL_MAX_RETRY
+        for attempt in range(1, max_retry + 1):
+            # 如果不是第一次尝试，等待一段时间再重试，避免连续失败
+            if attempt > 1:
+                delay = min(attempt * 2, 10)  # 递增延迟，最多10秒
+                logger.info(f"第{attempt}次尝试分析文件 {diff_file_info.file_path}，等待{delay}秒...")
+                time.sleep(delay)
+            
+            try:
+                # 构造prompt字符串
+                prompt_args = {
+                    "file_path": diff_file_info.file_path,
+                    "diff_content": diff_file_info.diff_content
+                }
+                try:
+                    messages = self.prompt.format_messages(**prompt_args)
+                    if messages and len(messages) > 0:
+                        message = messages[0]
+                        if hasattr(message, 'content') and message.content:
+                            prompt_str = str(message.content)
+                            if prompt_str:
+                                self.token_counter.count_prompt(prompt_str)
+                except Exception as e:
+                    logger.warning(f"格式化prompt时发生错误: {e}")
+                
+                # 直接调用，简化超时控制
+                invoke_args = {
+                    "file_path": diff_file_info.file_path,
+                    "diff_content": diff_file_info.diff_content,
+                    "lines_added": diff_file_info.lines_added,
+                    "lines_deleted": diff_file_info.lines_deleted
+                }
+                if BACKEND_TYPE != "ollama":
+                    invoke_args["response_format"] = {"type": "json_object"}
+                
+                result = self.chain.invoke(invoke_args)
+                # 验证结果有效性
+                if isinstance(result, (dict, SingleFileSummary)):
+                    if isinstance(result, dict):
+                        result = SingleFileSummary(**result)
+                    
+                    # 检查结果完整性
+                    if result and hasattr(result, 'summary') and result.summary and result.change_type:
+                        # 统计completion token
+                        try:
+                            completion_str = str(result.summary)
+                            if completion_str:
+                                self.token_counter.count_completion(completion_str)
+                        except Exception as e:
+                            logger.warning(f"计算completion tokens时发生错误: {e}")
+                        
+                        # 设置准确值
+                        result.file_path = diff_file_info.file_path
+                        result.lines_added = diff_file_info.lines_added
+                        result.lines_deleted = diff_file_info.lines_deleted
+                        return result
+                
+                # 结果无效，记录并重试
+                logger.warning(f"分析文件 {diff_file_info.file_path} 返回无效结果，第{attempt}次尝试")
+                if attempt < max_retry:
+                    continue
+            except Exception as e:
+                err_str = str(e)
+                # 检查是否为HTTP错误（如404、5xx），常见关键字有status code、HTTP、response等
+                is_http_error = False
+                for code in ["404", "500", "502", "503", "504"]:
+                    if code in err_str:
+                        is_http_error = True
+                        break
+                if ("status code" in err_str or "HTTP" in err_str or "response" in err_str) and any(code in err_str for code in ["404", "500", "502", "503", "504"]):
+                    is_http_error = True
+                if is_http_error:
+                    logger.error(f"分析文件 {diff_file_info.file_path} 时发生HTTP错误: {e}，第{attempt}次尝试，10秒后重试...")
+                    if attempt < max_retry:
+                        time.sleep(10)
+                        continue
+                else:
+                    logger.error(f"分析文件 {diff_file_info.file_path} 时发生错误: {e}，第{attempt}次尝试")
+                # 其它异常直接进入下一次重试
+                if attempt < max_retry:
+                    logger.info(f"第{attempt}次尝试失败，准备重试...")
+        logger.error(f"分析文件 {diff_file_info.file_path} 连续{max_retry}次均未获得结构化输出，放弃。")
+        return None
+
+class TotalSummaryChain:
+    """总摘要生成任务链"""
+    
+    def __init__(self, llm: ChatOllama | ChatOpenAI, token_counter: TokenCounter):
+        self.llm = llm
+        self.token_counter = token_counter
+        
+        # 创建输出解析器
+        self.output_parser = JsonOutputParser(pydantic_object=TotalSummary)
+        
+        # 根据后端类型选择不同的链构建方式
+        if BACKEND_TYPE == "ollama":
+            self.prompt = PromptTemplates.get_total_summary_prompt()
+            self.chain = self.prompt | self.llm.with_structured_output(TotalSummary)
+        else:
+            # 为硅基流动平台添加输出格式说明
+            format_instructions = """
+请以JSON格式输出，包含以下字段：
+{{
+    "overall_potential_impact": "整体改动对其他文件潜在的影响",
+    "overall_summary": "整体改动的详细摘要"
+}}
+"""
+            # 创建新的prompt模板
+            system_template = """
+你是一个专业的Git维护专家，擅长总结社区文档的改动，请基于以下各个文件的改动摘要，生成整个git diff的总摘要。
+
+请分析所有文件的改动，并生成一个总摘要，要求：
+
+1. 整体改动类型统计：
+   - 统计所有文件涉及到的改动类型，取并集
+   - 四种改动类型说明：
+     * "仅涉及标点符号的修改"：只修改了标点符号的增减、删除、变动
+     * "涉及到中英文文本内容的修改"：修改了文档内容、注释等文本，但未涉及代码逻辑
+     * "涉及到代码内容的修改"：修改了代码逻辑、函数定义、配置结构、命令行内容、脚本实现等
+     * "涉及到其他内容的修改"：新增二进制文件、新增依赖库等其他内容
+   - 将所有出现的改动类型都列出，不做优先级选择
+
+统计示例：
+
+示例1 - 单一类型：
+文件A：仅涉及标点符号的修改
+文件B：仅涉及标点符号的修改
+→ 整体改动类型：["仅涉及标点符号的修改"]
+
+示例2 - 多种类型：
+文件A：仅涉及标点符号的修改
+文件B：涉及到中英文文本内容的修改
+→ 整体改动类型：["仅涉及标点符号的修改", "涉及到中英文文本内容的修改"]
+
+示例3 - 复杂混合：
+文件A：涉及到中英文文本内容的修改
+文件B：涉及到代码内容的修改
+文件C：涉及到其他内容的修改
+→ 整体改动类型：["涉及到中英文文本内容的修改", "涉及到代码内容的修改", "涉及到其他内容的修改"]
+
+2. 整体潜在影响分析：
+   - 逐个总结所有文件的改动内容，并进行详细的列举，尽量涵盖所有修改内容
+   - 综合分析所有文件改动对系统的整体影响
+   - 考虑文件间的依赖关系和系统架构影响
+   - 评估改动的风险等级和影响范围
+   - 如果对其他文件无潜在影响，请说明无潜在影响及原因
+
+3. 整体摘要详细列举：
+   - 提炼出所有摘要改动文件所属的板块，并解释板块作用
+   - 用详细的语言分条概括每个摘要文件的核心内容，需要具体到文件，这一部分要占到最大的篇幅，不要遗漏任何摘要文件的内容
+   - 突出重要的改动点，包括修改内容主要针对的对象、文档的分类等
+   - 注意：整体摘要需要总结所有文件的内容；整体摘要需要尽可能详细
+
+4. 输出格式：
+   - 请用中文生成摘要，整体摘要内容字段务必全面详细
+   - 要求整体潜在影响、整体摘要都包含在摘要中，不能存在空字段
+   - 整体摘要必须满足以下格式："本次更改涉及到XXX等文件，这些文件分别属于社区中的XXX模块。涉及到XXX的修改，可能会对XXX造成影响。总的来说，这次更改主要是XXX。"
+   - 严格检查你的输出，对"新增"、"删除"、"修改"等字眼要严格检查，确保没有出现语义错误
+   - 严格检查你的输出，确保没有出现语义错误，对于出现的数字、改动的具体内容务必保证描述完全吻合
+
+{format_instructions}
+"""
+            human_template = """
+各个文件的改动摘要:
+{file_changes}
+
+总文件数: {total_files}
+"""
+            self.prompt = ChatPromptTemplate.from_messages([
+                ("system", system_template.format(format_instructions=format_instructions)),
+                ("human", human_template)
+            ])
+            self.chain = self.prompt | self.llm | self.output_parser
+    
+    def generate(self, file_summaries: List[SingleFileSummary]) -> Optional[TotalSummary]:
+        """生成总摘要"""
+        try:
+            total_files = len(file_summaries)
+            total_lines = sum(s.lines_added + s.lines_deleted for s in file_summaries)
+            file_changes_info = []
+            # 收集所有改动类型
+            all_change_types = list(set(s.change_type for s in file_summaries))
+            
+            for summary in file_summaries:
+                file_changes_info.append({
+                    'file_path': summary.file_path,
+                    'change_type': summary.change_type,
+                    'potential_impact': summary.potential_impact,
+                    'summary': summary.summary
+                })
+            
+            # 构造prompt字符串
+            prompt_args = {
+                "file_changes": json.dumps(file_changes_info, ensure_ascii=False, indent=2),
+                "total_files": total_files
+            }
+            try:
+                messages = self.prompt.format_messages(**prompt_args)
+                if messages and len(messages) > 0:
+                    message = messages[0]
+                    if hasattr(message, 'content') and message.content:
+                        prompt_str = str(message.content)
+                        if prompt_str:
+                            self.token_counter.count_prompt(prompt_str)
+            except Exception as e:
+                logger.warning(f"格式化prompt时发生错误: {e}")
+            
+            # 使用线程池执行器为总摘要生成添加超时控制
+            timeout_executor = None
+            try:
+                timeout_executor = ThreadPoolExecutor(max_workers=1)
+                invoke_args = {
+                    "file_changes": json.dumps(file_changes_info, ensure_ascii=False, indent=2),
+                    "total_files": total_files,
+                    "total_lines": total_lines
+                }
+                if BACKEND_TYPE != "ollama":
+                    # 为 SiliconFlow 添加 response_format 参数
+                    invoke_args["response_format"] = {"type": "json_object"}
+                
+                # 提交任务并设置超时
+                future = timeout_executor.submit(self.chain.invoke, invoke_args)
+                try:
+                    result = future.result(timeout=TOTAL_SUMMARY_TIMEOUT)
+                except (FutureTimeoutError, TimeoutError) as e:
+                    logger.error(f"生成总摘要超时（{TOTAL_SUMMARY_TIMEOUT}秒），放弃生成总摘要: {type(e).__name__}")
+                    try:
+                        future.cancel()  # 尝试取消超时的任务
+                    except Exception as cancel_e:
+                        logger.warning(f"取消任务时发生错误: {cancel_e}")
+                    return None
+            finally:
+                # 确保线程池被正确关闭
+                if timeout_executor:
+                    try:
+                        timeout_executor.shutdown(wait=False)
+                    except Exception as shutdown_e:
+                        logger.warning(f"关闭总摘要线程池时发生错误: {shutdown_e}")
+            
+            # 处理结果
+            if isinstance(result, (dict, TotalSummary)):
+                # 如果是dict（来自JsonOutputParser），转换为TotalSummary
+                if isinstance(result, dict):
+                    result = TotalSummary(**result)
+                try:
+                    if result and hasattr(result, 'overall_summary'):
+                        summary = result.overall_summary
+                        if summary:
+                            completion_str = str(summary)
+                            if completion_str:
+                                self.token_counter.count_completion(completion_str)
+                except Exception as e:
+                    logger.warning(f"计算completion tokens时发生错误: {e}")
+                return TotalSummary(
+                    total_files_changed=total_files,
+                    total_lines_changed=total_lines,
+                    overall_potential_impact=result.overall_potential_impact,
+                    overall_summary=result.overall_summary,
+                    change_type_list=all_change_types,
+                    file_changes=[
+                        FileChangeInfo(
+                            file_path=summary.file_path,
+                            change_type=summary.change_type,
+                            lines_changed=summary.lines_added + summary.lines_deleted
+                        )
+                        for summary in file_summaries
+                    ]
+                )
+            else:
+                logger.error(f"生成总摘要时返回类型错误: {type(result)}")
+                return None
+        except Exception as e:
+            logger.error(f"生成总摘要时发生错误: {e}")
+            return None
+
+# ==================== 主处理类 ====================
+
+class GitDiffSummarizer:
+    """Git Diff 摘要生成器"""
+    
+    def __init__(self, siliconflow_api_key: str = "", siliconflow_api_base: str = "https://api.siliconflow.cn/v1", model_name: str = None, base_url: str = None):
+        if model_name is None:
+            model_name = MODEL_NAME
+        if base_url is None:
+            base_url = OLLAMA_BASE_URL
+        
+        # 设置siliconflow API配置
+        global SILICONFLOW_API_KEY, SILICONFLOW_API_BASE
+        if siliconflow_api_key:
+            SILICONFLOW_API_KEY = siliconflow_api_key
+        if siliconflow_api_base:
+            SILICONFLOW_API_BASE = siliconflow_api_base
+            
+        self.token_counter = TokenCounter(model_name)
+        self.llm = LLMFactory.create_chat_llm(model_name, base_url)
+        self.single_file_chain = SingleFileAnalysisChain(self.llm, self.token_counter)
+        self.total_summary_chain = TotalSummaryChain(self.llm, self.token_counter)
+    
+    def cleanup(self):
+        """清理资源，确保程序能正确退出"""
+        try:
+            # 清理 LLM 连接
+            if hasattr(self.llm, 'client') and hasattr(self.llm.client, 'close'):
+                self.llm.client.close()
+            elif hasattr(self.llm, '_client') and hasattr(self.llm._client, 'close'):
+                self.llm._client.close()
+            
+            # 如果是 ChatOpenAI，尝试关闭底层的 HTTP 客户端
+            if BACKEND_TYPE == "siliconflow" and hasattr(self.llm, 'client'):
+                try:
+                    # 强制关闭 httpx 客户端
+                    if hasattr(self.llm.client, '_client'):
+                        self.llm.client._client.close()
+                except Exception as e:
+                    logger.debug(f"关闭 HTTP 客户端时发生错误: {e}")
+            
+            logger.info("资源清理完成")
+        except Exception as e:
+            logger.warning(f"清理资源时发生错误: {e}")
+    
+    def process_git_diff(self, diff_content: str, max_workers: int = None) -> ProcessingResult:
+        if max_workers is None:
+            max_workers = PROCESSING_MAX_WORKERS
+            
+        logger.info("开始解析git diff...")
+        files = DiffParser.parse_git_diff(diff_content)
+        logger.info(f"解析到 {len(files)} 个文件的改动")
+        if not files:
+            logger.warning("未找到任何文件改动")
+            return ProcessingResult(
+                file_summaries=[],
+                total_summary=None,
+                processed_files=0,
+                total_files=0,
+                error='未找到任何文件改动'
+            )
+        logger.info("开始并行处理各个文件的改动...")
+        file_summaries = []
+        # 使用更健壮的并发处理机制
+        executor = None
+        try:
+            executor = ThreadPoolExecutor(max_workers=max_workers)
+            future_to_file = {
+                executor.submit(self.single_file_chain.analyze, file_info): file_info.file_path
+                for file_info in files
+            }
+            
+            # 设置更长的整体超时时间，避免与单个文件超时冲突
+            overall_timeout = SINGLE_FILE_TIMEOUT * len(files) + 600  # 给每个文件的时间 + 额外缓冲
+            
+            completed_count = 0
+            total_count = len(future_to_file)
+            
+            try:
+                for future in as_completed(future_to_file, timeout=overall_timeout):
+                    file_path = future_to_file[future]
+                    completed_count += 1
+                    try:
+                        summary = future.result(timeout=5)  # 短暂缓冲时间，因为任务已经完成
+                        if summary:
+                            file_summaries.append(summary)
+                            logger.info(f"完成文件 {file_path} 的摘要生成 ({completed_count}/{total_count})")
+                        else:
+                            logger.warning(f"文件 {file_path} 的摘要生成失败 ({completed_count}/{total_count})")
+                    except (FutureTimeoutError, TimeoutError) as e:
+                        logger.error(f"文件 {file_path} 的摘要获取超时，跳过该文件: {type(e).__name__} ({completed_count}/{total_count})")
+                        try:
+                            future.cancel()
+                        except Exception as cancel_e:
+                            logger.warning(f"取消任务时发生错误: {cancel_e}")
+                    except Exception as e:
+                        logger.error(f"处理文件 {file_path} 时发生异常: {e} ({completed_count}/{total_count})")
+            except (FutureTimeoutError, TimeoutError) as overall_e:
+                logger.error(f"整体处理超时({overall_timeout}秒)，已完成{completed_count}/{total_count}个文件")
+                # 取消所有未完成的任务
+                for future in future_to_file:
+                    if not future.done():
+                        try:
+                            future.cancel()
+                        except Exception as cancel_e:
+                            logger.warning(f"取消未完成任务时发生错误: {cancel_e}")
+        finally:
+            # 确保线程池被正确关闭
+            if executor:
+                try:
+                    executor.shutdown(wait=True)
+                except Exception as shutdown_e:
+                    logger.warning(f"关闭主线程池时发生错误: {shutdown_e}")
+        logger.info(f"成功生成 {len(file_summaries)} 个文件的摘要")
+        logger.info("开始生成总摘要...")
+        total_summary = None
+        if file_summaries:
+            logger.info(f"基于 {len(file_summaries)} 个成功处理的文件生成总摘要...")
+            try:
+                total_summary = self.total_summary_chain.generate(file_summaries)
+                if total_summary:
+                    logger.info("总摘要生成成功")
+                else:
+                    logger.warning("总摘要生成失败")
+            except Exception as e:
+                logger.error(f"生成总摘要时发生未预期的错误: {e}")
+        else:
+            logger.warning("没有成功处理的文件，跳过总摘要生成")
+        return ProcessingResult(
+            file_summaries=file_summaries,
+            total_summary=total_summary,
+            processed_files=len(file_summaries),
+            total_files=len(files)
+        )
+
+# ==================== 主函数 ====================
+
+def get_agent_summary(sample_diff, siliconflow_api_key="", siliconflow_api_base="https://api.siliconflow.cn/v1"):
+
+    summarizer = GitDiffSummarizer(siliconflow_api_key, siliconflow_api_base)
+    result = None
+    try:
+        result = summarizer.process_git_diff(sample_diff)
+    finally:
+        # 确保在函数退出前清理资源
+        summarizer.cleanup()
+
+    if not result:
+        print("处理失败，无法获取结果")
+        return None
+    
+    if result.error:
+        print(f"错误: {result.error}")
+    print("\n=== 单文件摘要 ===")
+    for summary in result.file_summaries:
+        print(f"文件: {summary.file_path}")
+        print(f"改动类型: {summary.change_type}")
+        print(f"新增行数: {summary.lines_added}")
+        print(f"删除行数: {summary.lines_deleted}")
+        print(f"潜在影响: {summary.potential_impact}")
+        print(f"摘要: {summary.summary}")
+        print("-" * 50)
+    print("=== 处理结果 ===")
+    print(f"总文件数: {result.total_files}")
+    print(f"成功处理文件数: {result.processed_files}")
+    if result.total_summary:
+        print("\n=== 总摘要 ===")
+        total = result.total_summary
+        print(f"总文件数: {total.total_files_changed}")
+        print(f"总改动行数: {total.total_lines_changed}")
+        print(f"改动类型列表: {total.change_type_list}")
+        print(f"整体潜在影响: {total.overall_potential_impact}")
+        print(f"整体摘要: {total.overall_summary}")
+        print("\n=== 文件改动列表 ===")
+        for file_change in total.file_changes:
+            print(f"- {file_change.file_path}: {file_change.change_type} ({file_change.lines_changed} 行)")
+            
+    # 输出token统计
+    stats = summarizer.token_counter.get_stats()
+    print("\n=== Token消耗统计 ===")
+    print(f"Prompt tokens: {stats['prompt_tokens']}")
+    print(f"Completion tokens: {stats['completion_tokens']}")
+    print(f"Total tokens: {stats['total_tokens']}")
+    # exit()
+    return result
+
+if __name__ == "__main__":
+    # 微服务接口逻辑模拟： 传递进来的就是 sample_diff 的内容
+    sample_diff = sys.argv[1]
+    result = get_agent_summary(sample_diff) 
+    print(result)
\ No newline at end of file
-- 
Gitee


From 7cdf3c857e07c1b9e807710e45a5974afeab8f83 Mon Sep 17 00:00:00 2001
From: petermouse666 <708975811@qq.com>
Date: Sat, 20 Sep 2025 20:32:48 +0800
Subject: [PATCH 2/8] update ci-bot for auto generating translation comment

---
 ci/tools/comment/comment_agent.py    | 958 +++++++++++++++++++++++++++
 ci/tools/comment/create_comment.py   | 372 +++++++++++
 ci/tools/comment/create_comment.yaml |  38 ++
 3 files changed, 1368 insertions(+)
 create mode 100644 ci/tools/comment/comment_agent.py
 create mode 100644 ci/tools/comment/create_comment.py
 create mode 100644 ci/tools/comment/create_comment.yaml

diff --git a/ci/tools/comment/comment_agent.py b/ci/tools/comment/comment_agent.py
new file mode 100644
index 000000000..25dbe385c
--- /dev/null
+++ b/ci/tools/comment/comment_agent.py
@@ -0,0 +1,958 @@
+import json
+import re
+import logging
+import urllib.parse
+from typing import List, Dict, Any, Optional, Tuple, Literal
+from dataclasses import dataclass
+from concurrent.futures import ThreadPoolExecutor, as_completed, TimeoutError as FutureTimeoutError
+from pathlib import Path
+import sys
+import time
+# LangChain imports
+from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
+from langchain_core.runnables import RunnableLambda, RunnablePassthrough
+from pydantic import BaseModel, Field, SecretStr
+from langchain_community.llms import Ollama
+from langchain_ollama import ChatOllama
+from langchain.chains import TransformChain, SequentialChain
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_openai import ChatOpenAI
+import yaml
+
+# ==================== 配置加载 ====================
+
+def load_config(config_file="create_comment.yaml"):
+    """从YAML文件加载配置"""
+    try:
+        with open(config_file, 'r', encoding='utf-8') as f:
+            config = yaml.safe_load(f)
+        return config.get('comment_agent', {})
+    except FileNotFoundError:
+        print(f"配置文件 {config_file} 不存在")
+        raise
+    except yaml.YAMLError as e:
+        print(f"解析配置文件时发生错误: {e}")
+        raise
+
+# 加载配置
+_config = load_config()
+
+# ==================== 配置常量 ====================
+
+BACKEND_TYPE = _config.get('backend', {}).get('type', 'siliconflow')
+MODEL_NAME = _config.get('model', {}).get('name', 'Qwen/Qwen3-8B')
+MODEL_TEMPERATURE = _config.get('model', {}).get('temperature', 0.1)
+MODEL_MAX_RETRY = _config.get('model', {}).get('max_retry', 5)
+PROCESSING_MAX_WORKERS = _config.get('processing', {}).get('max_workers', 8)
+SINGLE_FILE_TIMEOUT = _config.get('processing', {}).get('single_file_timeout', 180)
+TOTAL_COMMENT_TIMEOUT = _config.get('processing', {}).get('total_comment_timeout', 300)
+LOGGING_LEVEL = _config.get('logging', {}).get('level', 'INFO')
+SILICONFLOW_API_KEY = ''
+SILICONFLOW_API_BASE = ''
+
+# 配置日志
+logging.basicConfig(level=getattr(logging, LOGGING_LEVEL.upper()))
+logger = logging.getLogger(__name__)
+
+# ==================== 数据模型定义 ====================
+
+class PRAnalysisResult(BaseModel):
+    """PR分析结果的结构化输出"""
+    has_text_changes: bool = Field(description="是否涉及英文文本改动", default=False)
+    text_change_type: Literal["无文本改动", "仅标点符号改动", "英文内容改动", "代码注释改动", "混合改动"] = Field(description="文本改动类型")
+    has_grammar_errors: bool = Field(description="是否存在语法语病错误", default=False)
+    grammar_errors: List[str] = Field(description="具体的语法语病错误列表", default=[])
+    detailed_analysis: str = Field(description="详细分析说明")
+    suggestions: List[str] = Field(description="改进建议列表", default=[])
+
+class FileTextAnalysis(BaseModel):
+    """单个文件的文本分析"""
+    file_path: str = Field(description="文件路径", default="")
+    has_text_changes: bool = Field(description="是否涉及英文文本改动", default=False)
+    text_lines: List[str] = Field(description="涉及文本改动的行", default=[])
+    grammar_issues: List[str] = Field(description="语法问题列表", default=[])
+    analysis_details: str = Field(description="分析详情")
+
+@dataclass
+class DiffFileInfo:
+    """单个文件的diff信息"""
+    file_path: str
+    diff_content: str
+    lines_added: int
+    lines_deleted: int
+
+@dataclass
+class CommentResult:
+    """评论生成结果"""
+    pr_analysis: Optional[PRAnalysisResult]
+    file_analyses: List[FileTextAnalysis]
+    processed_files: int
+    total_files: int
+    error: Optional[str] = None
+
+# ==================== Token 统计工具 ====================
+
+
+# ==================== 工具函数 ====================
+
+class DiffParser:
+    """Git Diff 解析器"""
+    
+    @staticmethod
+    def parse_git_diff(diff_content: str) -> List[DiffFileInfo]:
+        """
+        解析git diff内容，提取每个文件的改动信息
+        
+        Args:
+            diff_content: git diff的原始内容
+ 
+        Returns:
+            包含文件路径和对应diff内容的列表
+        """
+
+        files = []
+        current_file = None
+        current_diff = []
+        
+        lines = diff_content.strip().split('\n')
+        
+        for line in lines:
+            # 匹配文件路径行
+            if line.startswith('diff --git'):
+                # 保存前一个文件的信息
+                if current_file and current_diff:
+                    diff_info = DiffParser._create_diff_file_info(current_file, current_diff)
+                    if diff_info:
+                        files.append(diff_info)
+                
+                # 提取文件路径 - 改进的解析逻辑
+                current_file = DiffParser._extract_file_path(line)
+                if current_file:
+                    current_diff = [line]
+                else:
+                    current_diff = []
+            elif current_file:
+                current_diff.append(line)
+        
+        # 添加最后一个文件
+        if current_file and current_diff:
+            diff_info = DiffParser._create_diff_file_info(current_file, current_diff)
+            if diff_info:
+                files.append(diff_info)
+        
+        return files
+    
+    @staticmethod
+    def _extract_file_path(diff_line: str) -> Optional[str]:
+        """
+        从git diff行中提取文件路径，支持包含汉字的文件名
+        
+        Args:
+            diff_line: git diff的文件头行，格式如 "diff --git a/path/to/file b/path/to/file"
+            
+        Returns:
+            提取出的文件路径，如果解析失败则返回None
+        """
+        try:
+            # 方法1: 处理引号包围的路径（Git对特殊字符的处理）
+            # 格式: diff --git "a/path/to/file" "b/path/to/file"
+            quoted_pattern = r'diff --git "a/(.+?)" "b/(.+?)"'
+            quoted_match = re.match(quoted_pattern, diff_line)
+            
+            if quoted_match:
+                file_path_a = quoted_match.group(1)
+                file_path_b = quoted_match.group(2)
+                # 通常a和b路径相同，使用a路径（旧文件路径）
+                file_path = file_path_a
+            else:
+                # 方法2: 使用正则表达式匹配标准的git diff格式
+                # 格式: diff --git a/path/to/file b/path/to/file
+                pattern = r'diff --git a/(.+?) b/(.+?)(?:\s|$)'
+                match = re.match(pattern, diff_line)
+                
+                if match:
+                    file_path_a = match.group(1)
+                    file_path_b = match.group(2)
+                    # 通常a和b路径相同，使用a路径（旧文件路径）
+                    file_path = file_path_a
+                else:
+                    # 方法3: 如果正则匹配失败，尝试更简单的解析
+                    # 处理可能包含空格和特殊字符的文件名
+                    if ' a/' in diff_line and ' b/' in diff_line:
+                        # 找到 a/ 和 b/ 的位置
+                        a_pos = diff_line.find(' a/')
+                        b_pos = diff_line.find(' b/')
+                        
+                        if a_pos != -1 and b_pos != -1 and a_pos < b_pos:
+                            # 提取a/和b/之间的路径
+                            a_start = a_pos + 3  # 跳过 ' a/'
+                            file_path = diff_line[a_start:b_pos]
+                        else:
+                            return None
+                    else:
+                        # 方法4: 最后的备选方案，简单的字符串分割
+                        parts = diff_line.split()
+                        if len(parts) >= 3:
+                            a_path = parts[2]
+                            if a_path.startswith('a/'):
+                                file_path = a_path[2:]  # 移除'a/'前缀
+                            else:
+                                return None
+                        else:
+                            return None
+            
+            # 处理文件名编码
+            return DiffParser._decode_file_path(file_path)
+            
+        except Exception as e:
+            logger.warning(f"解析文件路径时发生错误: {e}, diff行: {diff_line}")
+            return None
+    
+    @staticmethod
+    def _decode_file_path(file_path: str) -> str:
+        """
+        解码文件路径，处理各种编码情况
+        
+        Args:
+            file_path: 原始文件路径
+            
+        Returns:
+            解码后的文件路径
+        """
+        try:
+            # 首先尝试URL解码，处理Git编码的文件名
+            decoded_path = urllib.parse.unquote(file_path, encoding='utf-8')
+            
+            # 处理Git对特殊字符的引号包装
+            if decoded_path.startswith('"') and decoded_path.endswith('"'):
+                decoded_path = decoded_path[1:-1]
+                # Git使用反斜杠转义，需要处理转义序列
+                decoded_path = decoded_path.replace('\\"', '"')
+                decoded_path = decoded_path.replace('\\\\', '\\')
+            
+            # 无论是否有引号包装，都尝试处理八进制编码
+            # 检查是否包含八进制转义序列
+            if '\\' in decoded_path and re.search(r'\\[0-7]{3}', decoded_path):
+                decoded_path = DiffParser._decode_octal_sequences(decoded_path)
+            
+            return decoded_path
+            
+        except Exception as e:
+            logger.warning(f"解码文件路径时发生错误: {e}, 原始路径: {file_path}")
+            return file_path
+    
+    @staticmethod
+    def _decode_octal_sequences(text: str) -> str:
+        """
+        解码文本中的八进制转义序列
+        
+        Args:
+            text: 包含八进制转义序列的文本
+            
+        Returns:
+            解码后的文本
+        """
+        try:
+            # 查找八进制转义序列模式：\xxx
+            pattern = r'\\([0-7]{3})'
+            
+            # 找到所有八进制序列
+            matches = list(re.finditer(pattern, text))
+            if not matches:
+                return text
+            
+            # 收集所有字节值
+            result = ""
+            last_end = 0
+            bytes_buffer = []
+            
+            for i, match in enumerate(matches):
+                # 添加匹配前的文本
+                if match.start() > last_end:
+                    # 如果有缓冲的字节，先处理它们
+                    if bytes_buffer:
+                        try:
+                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
+                            result += decoded_bytes
+                            bytes_buffer = []
+                        except UnicodeDecodeError:
+                            # 如果解码失败，保持原始形式
+                            for byte_val in bytes_buffer:
+                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
+                            bytes_buffer = []
+                    
+                    result += text[last_end:match.start()]
+                
+                # 处理当前八进制序列
+                octal_str = match.group(1)
+                try:
+                    byte_value = int(octal_str, 8)
+                    bytes_buffer.append(byte_value)
+                except ValueError:
+                    # 如果转换失败，添加原始字符串
+                    if bytes_buffer:
+                        try:
+                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
+                            result += decoded_bytes
+                            bytes_buffer = []
+                        except UnicodeDecodeError:
+                            for byte_val in bytes_buffer:
+                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
+                            bytes_buffer = []
+                    result += match.group(0)
+                
+                last_end = match.end()
+                
+                # 检查是否是最后一个匹配或下一个匹配不连续
+                is_last = (i == len(matches) - 1)
+                is_next_non_consecutive = (not is_last and 
+                                         matches[i + 1].start() != match.end())
+                
+                if is_last or is_next_non_consecutive:
+                    # 处理缓冲的字节
+                    if bytes_buffer:
+                        try:
+                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
+                        except UnicodeDecodeError:
+                            # 如果解码失败，保持原始形式
+                            for byte_val in bytes_buffer:
+                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
+                        bytes_buffer = []
+            
+            # 添加剩余的文本
+            if last_end < len(text):
+                result += text[last_end:]
+            
+            return result
+            
+        except Exception as e:
+            logger.warning(f"解码八进制序列时发生错误: {e}, 原始文本: {text}")
+            return text
+    
+    @staticmethod
+    def _create_diff_file_info(file_path: str, diff_lines: List[str]) -> Optional[DiffFileInfo]:
+        """创建DiffFileInfo对象"""
+        diff_content = '\n'.join(diff_lines)
+        lines_added, lines_deleted = DiffParser._count_lines_changed(diff_content)
+        
+        return DiffFileInfo(
+            file_path=file_path,
+            diff_content=diff_content,
+            lines_added=lines_added,
+            lines_deleted=lines_deleted
+        )
+    
+    @staticmethod
+    def _count_lines_changed(diff_content: str) -> Tuple[int, int]:
+        """统计git diff中改动的行数"""
+        lines_added, lines_deleted = 0, 0
+        lines = diff_content.strip().split('\n')
+
+        for line in lines:
+            # 统计新增行（以+开头，但不是+++）
+            if line.startswith('+') and not line.startswith('+++'):
+                lines_added += 1
+            # 统计删除行（以-开头，但不是---）
+            elif line.startswith('-') and not line.startswith('---'):
+                lines_deleted += 1
+
+        return lines_added, lines_deleted
+
+# ==================== LangChain 组件 ====================
+
+class LLMFactory:
+    """LLM工厂类"""
+    
+    @staticmethod
+    def create_chat_llm(model_name: str = None, base_url: str = None):
+        """创建LLM实例"""
+        if model_name is None:
+            model_name = MODEL_NAME
+        
+        if BACKEND_TYPE == "siliconflow":
+            return ChatOpenAI(
+                model=model_name,
+                api_key=SecretStr(SILICONFLOW_API_KEY),
+                base_url=SILICONFLOW_API_BASE,
+                temperature=MODEL_TEMPERATURE
+            )
+        else:
+            raise ValueError(f"不支持的后端类型: {BACKEND_TYPE}")
+
+class PromptTemplates:
+    """提示模板集合"""
+    
+    @staticmethod
+    def get_file_text_analysis_prompt() -> ChatPromptTemplate:
+        """获取单文件文本分析提示模板"""
+        return ChatPromptTemplate.from_messages([
+            ("system", f"""
+你是一个专业的代码审查和语言专家，专注于分析Gitee文档仓库的翻译PR中的英文文本内容。每条PR都是人工生成的文档改动。请忽略中文、格式和代码的审计，专注于识别英文文本变更。
+
+注意：请忽略中文、格式和代码的审计，专注于识别英文文本变更。如果文档的变更不涉及英文文本，你只需要输出“不涉及英文改动”即可，不需要额外输出任何分析结果。
+同时：对于专有名词，例如openEuler、GitHub等，你不能将其纳入英文文本变更的纠错范围内，而是应该自动识别专有名词。对于代码的相关变更，也不应该纳入分析内容范围。
+
+你需要遵循**能不提修改意见就不提修改意见**的原则进行审查！！！
+
+请仔细分析这个文件的改动，并按照以下要求进行分析：
+
+**分析重点：**
+
+1. 英文文本变更识别：
+   - 检查是否涉及英文文本内容的改动
+   - 区分代码逻辑变更和英文文本内容变更
+   - 识别注释、文档字符串、用户显示文本等英文文本内容
+   - 标识出具体的英文文本变更行
+
+2. 语法错误检测：
+   - 检查英文文本的语法、拼写错误
+
+**分析类型判断：**
+- 如果改动不涉及任何英文文本内容，标记为"无英文文本改动"
+- 如果涉及代码注释的英文文本变更，标记为"代码注释改动"
+- 如果涉及文档、界面文本等英文内容变更，标记为"英文内容改动"
+
+**语法检查重点：**
+- 英文：主谓一致、时态、拼写、标点、语序
+
+**输出要求：**
+- 如果存在英文文本变更但变更不存在语法问题，则直接输出“不存在语法问题”，不需要任何额外输出
+- 详细列出发现的语法错误（如果有）
+- 不能超过100个汉字字符
+
+            """),
+            ("human", """
+文件路径: {file_path}
+
+Git Diff 内容:
+{diff_content}
+
+            """)
+        ])
+    
+    @staticmethod
+    def get_pr_analysis_prompt() -> ChatPromptTemplate:
+        """获取整体PR分析提示模板"""
+        return ChatPromptTemplate.from_messages([
+            ("system", """
+你是一个专业的PR审查专家，专门分析Gitee文档仓库的翻译PR中的英文文本变更和语法问题。每条PR都是人工生成的文档改动。
+
+请分析所有文件的改动，并生成一个综合评估，要求：
+
+1. 整体文本变更评估：
+   - 统计涉及文本变更的文件数量
+   - 分析文本变更的类型分布
+   - 评估变更的重要性和影响范围
+   - 如果文本变更不涉及英文，或涉及英文但使用正确不需要改动，则**直接忽略**，无需对其进行总结
+
+2. 语法错误汇总：
+   - **仅汇总改动中的硬伤，如单词拼写错误、英语语法（时态语态）错误等**
+   - **对于一些可以优化但称不上错误的点，以最小化改动为原则，选择忽略**
+   - 提高报错阈值，忽略可优化翻译的点
+   - 提供优先修复建议
+
+3. 质量评估：
+   - 对整个PR的文本质量给出评分
+   - 分析文本变更的一致性
+   - 评估对用户体验的影响
+
+4. 改进建议：
+   - 提供具体的修改建议
+   - 推荐最佳实践
+   - 建议后续的质量控制措施
+
+**输出格式要求：**
+- 提供清晰的分析结论
+- 按优先级排列发现的问题
+- 给出可操作的改进建议
+
+            """),
+            ("human", """
+各个文件的分析结果:
+{file_analyses}
+
+总文件数: {total_files}
+涉及文本变更的文件数: {text_changed_files}
+            """)
+        ])
+
+class FileTextAnalysisChain:
+    """单文件文本分析任务链"""
+    
+    def __init__(self, llm: ChatOpenAI):
+        self.llm = llm
+        
+        # 创建输出解析器
+        self.output_parser = JsonOutputParser(pydantic_object=FileTextAnalysis)
+        
+        # 为硅基流动平台添加输出格式说明
+        format_instructions = """
+请以JSON格式输出，包含以下字段：
+{{
+    "has_text_changes": "是否涉及英文文本改动（布尔值）",
+    "text_lines": "涉及文本改动的行（字符串列表）",
+    "grammar_issues": "语法问题列表（字符串列表）",
+    "analysis_details": "分析详情（字符串）"
+}}
+"""
+        # 创建新的prompt模板
+        system_template = """
+你是一个专业的代码审查和语言专家，专注于分析Gitee文档仓库的翻译PR中的英文文本内容。每条PR都是人工生成的文档改动。
+
+**核心原则：只关注必然存在明显错误的地方，其他文件都不需要关注！**
+
+**严格过滤条件：**
+1. 如果文档的变更不涉及英文文本，直接标记为"无英文文本改动"，无需任何分析
+2. 如果涉及英文文本但语法完全正确，直接标记为"语法正确，无需关注"
+3. 如果仅涉及标点符号的微小调整，直接标记为"仅标点符号改动，无需关注"
+4. 对于专有名词（如openEuler、GitHub等），自动识别并忽略，不纳入纠错范围
+5. 对于代码相关变更，不纳入分析内容范围
+
+**只关注以下明显错误：**
+- 明显的单词拼写错误（如：recieve -> receive）
+- 严重的语法错误（如：主谓不一致、时态错误）
+- 明显的标点符号错误（如：缺少句号、逗号使用错误）
+- 明显的语序错误
+
+**忽略以下情况：**
+- 语法正确但可以优化的表达
+- 风格偏好问题
+- 轻微的标点符号调整
+- 术语选择的差异
+- 表达方式的个人偏好
+
+**输出要求：**
+- 如果不存在明显错误，直接输出"语法正确，无需关注"
+- 只有发现明显错误时才详细列出
+- 不能超过100个汉字字符
+- 遵循"能不提修改意见就不提修改意见"的原则
+
+{format_instructions}
+"""
+        human_template = """
+文件路径: {file_path}
+
+Git Diff 内容:
+{diff_content}
+"""
+        self.prompt = ChatPromptTemplate.from_messages([
+            ("system", system_template.format(format_instructions=format_instructions)),
+            ("human", human_template)
+        ])
+        self.chain = self.prompt | self.llm | self.output_parser
+    
+    def analyze(self, diff_file_info: DiffFileInfo) -> Optional[FileTextAnalysis]:
+        """分析单个文件的文本变更"""
+        max_retry = MODEL_MAX_RETRY
+        for attempt in range(1, max_retry + 1):
+            # 如果不是第一次尝试，等待一段时间再重试，避免连续失败
+            if attempt > 1:
+                delay = min(attempt * 2, 10)  # 递增延迟，最多10秒
+                logger.info(f"第{attempt}次尝试分析文件 {diff_file_info.file_path}，等待{delay}秒...")
+                time.sleep(delay)
+            
+            try:
+                # 构造prompt字符串
+                prompt_args = {
+                    "file_path": diff_file_info.file_path,
+                    "diff_content": diff_file_info.diff_content
+                }
+                
+                # 直接调用，简化超时控制
+                invoke_args = {
+                    "file_path": diff_file_info.file_path,
+                    "diff_content": diff_file_info.diff_content
+                }
+                result = self.chain.invoke(invoke_args)
+                # 验证结果有效性
+                if isinstance(result, (dict, FileTextAnalysis)):
+                    if isinstance(result, dict):
+                        result = FileTextAnalysis(**result)
+                    
+                    # 检查结果完整性
+                    if result and hasattr(result, 'analysis_details') and result.analysis_details:
+                        
+                        # 设置准确值
+                        result.file_path = diff_file_info.file_path
+                        
+                        # 检查是否只关注明显错误
+                        analysis_text = result.analysis_details.lower()
+                        if any(phrase in analysis_text for phrase in [
+                            "语法正确，无需关注", 
+                            "无英文文本改动", 
+                            "仅标点符号改动，无需关注",
+                            "不存在语法问题"
+                        ]):
+                            # 如果无问题，设置has_text_changes为False
+                            result.has_text_changes = False
+                            result.grammar_issues = []
+                        
+                        return result
+                
+                # 结果无效，记录并重试
+                logger.warning(f"分析文件 {diff_file_info.file_path} 返回无效结果，第{attempt}次尝试")
+                if attempt < max_retry:
+                    continue
+            except Exception as e:
+                err_str = str(e)
+                # 检查是否为HTTP错误（如404、5xx），常见关键字有status code、HTTP、response等
+                is_http_error = False
+                for code in ["404", "500", "502", "503", "504"]:
+                    if code in err_str:
+                        is_http_error = True
+                        break
+                if ("status code" in err_str or "HTTP" in err_str or "response" in err_str) and any(code in err_str for code in ["404", "500", "502", "503", "504"]):
+                    is_http_error = True
+                if is_http_error:
+                    logger.error(f"分析文件 {diff_file_info.file_path} 时发生HTTP错误: {e}，第{attempt}次尝试，10秒后重试...")
+                    if attempt < max_retry:
+                        time.sleep(10)
+                        continue
+                else:
+                    logger.error(f"分析文件 {diff_file_info.file_path} 时发生错误: {e}，第{attempt}次尝试")
+                # 其它异常直接进入下一次重试
+                if attempt < max_retry:
+                    logger.info(f"第{attempt}次尝试失败，准备重试...")
+        logger.error(f"分析文件 {diff_file_info.file_path} 连续{max_retry}次均未获得结构化输出，放弃。")
+        return None
+
+class PRAnalysisChain:
+    """PR整体分析任务链"""
+    
+    def __init__(self, llm: ChatOllama | ChatOpenAI):
+        self.llm = llm
+        
+        # 创建输出解析器
+        self.output_parser = JsonOutputParser(pydantic_object=PRAnalysisResult)
+        
+        # 为硅基流动平台添加输出格式说明
+        format_instructions = """
+请以JSON格式输出，包含以下字段：
+{{
+    "has_text_changes": "是否涉及英文文本改动（布尔值）",
+    "text_change_type": "文本改动类型（字符串）",
+    "has_grammar_errors": "是否存在语法语病错误（布尔值）",
+    "grammar_errors": "具体的语法语病错误列表（字符串列表）",
+    "detailed_analysis": "详细分析说明（字符串）",
+    "suggestions": "改进建议列表（字符串列表）"
+}}
+"""
+        # 创建新的prompt模板
+        system_template = """
+你是一个专业的PR审查专家，专门分析Pull Request中的文本变更和语法问题。
+
+**核心原则：只关注必然存在明显错误的地方，其他文件都不需要关注！**
+
+请基于各个文件的分析结果，生成整个PR的综合评估，要求：
+
+1. 严格过滤文件：
+   - 只统计存在明显错误的文件
+   - 忽略"语法正确，无需关注"的文件
+   - 忽略"无英文文本改动"的文件
+   - 忽略"仅标点符号改动，无需关注"的文件
+
+2. 只汇总明显错误：
+   - 仅汇总硬伤：明显的单词拼写错误、严重的语法错误
+   - 忽略可优化但称不上错误的点
+   - 忽略风格偏好问题
+   - 忽略轻微的标点符号调整
+
+3. 质量评估：
+   - 只对存在明显错误的文件进行质量评估
+   - 如果所有文件都无问题，直接标记为"无问题文件"
+
+4. 改进建议：
+   - 只对存在明显错误的文件提供修改建议
+   - 建议优先修复明显的拼写和语法错误
+
+**输出格式要求：**
+- 如果所有文件都无问题，直接输出"所有文件语法正确，无需关注"
+- 只列出存在明显错误的文件
+- 按优先级排列发现的问题
+- 给出可操作的改进建议
+
+{format_instructions}
+"""
+        human_template = """
+各个文件的分析结果:
+{file_analyses}
+
+总文件数: {total_files}
+涉及文本变更的文件数: {text_changed_files}
+"""
+        self.prompt = ChatPromptTemplate.from_messages([
+            ("system", system_template.format(format_instructions=format_instructions)),
+            ("human", human_template)
+        ])
+        self.chain = self.prompt | self.llm | self.output_parser
+    
+    def generate(self, file_analyses: List[FileTextAnalysis]) -> Optional[PRAnalysisResult]:
+        """生成PR整体分析"""
+        try:
+            total_files = len(file_analyses)
+            
+            # 过滤出只关注存在明显错误的文件
+            problematic_files = []
+            for analysis in file_analyses:
+                # 检查是否存在明显错误
+                has_obvious_errors = (
+                    analysis.has_text_changes and 
+                    analysis.grammar_issues and 
+                    len(analysis.grammar_issues) > 0 and
+                    analysis.analysis_details and
+                    not any(phrase in analysis.analysis_details for phrase in [
+                        "语法正确，无需关注", 
+                        "无英文文本改动", 
+                        "仅标点符号改动，无需关注",
+                        "不存在语法问题"
+                    ])
+                )
+                
+                if has_obvious_errors:
+                    problematic_files.append(analysis)
+            
+            # 如果所有文件都无问题，直接返回无问题结果
+            if not problematic_files:
+                return PRAnalysisResult(
+                    has_text_changes=False,
+                    text_change_type="无文本改动",
+                    has_grammar_errors=False,
+                    grammar_errors=[],
+                    detailed_analysis="所有文件语法正确，无需关注",
+                    suggestions=[]
+                )
+            
+            text_changed_files = len(problematic_files)
+            
+            file_analyses_info = []
+            for analysis in problematic_files:
+                file_analyses_info.append({
+                    'file_path': analysis.file_path,
+                    'has_text_changes': analysis.has_text_changes,
+                    'text_lines': analysis.text_lines,
+                    'grammar_issues': analysis.grammar_issues,
+                    'analysis_details': analysis.analysis_details
+                })
+            
+            # 构造prompt字符串
+            prompt_args = {
+                "file_analyses": json.dumps(file_analyses_info, ensure_ascii=False, indent=2),
+                "total_files": total_files,
+                "text_changed_files": text_changed_files
+            }
+            
+            # 使用线程池执行器为PR分析添加超时控制
+            timeout_executor = None
+            try:
+                timeout_executor = ThreadPoolExecutor(max_workers=1)
+                invoke_args = {
+                    "file_analyses": json.dumps(file_analyses_info, ensure_ascii=False, indent=2),
+                    "total_files": total_files,
+                    "text_changed_files": text_changed_files
+                }
+                result = self.chain.invoke(invoke_args)
+                # 验证结果有效性
+                if isinstance(result, (dict, PRAnalysisResult)):
+                    # 如果是dict（来自JsonOutputParser），转换为PRAnalysisResult
+                    if isinstance(result, dict):
+                        result = PRAnalysisResult(**result)
+                    return result
+                else:
+                    logger.error(f"生成PR分析时返回类型错误: {type(result)}")
+                    return None
+            except Exception as e:
+                logger.error(f"生成PR分析时发生错误: {e}")
+                return None
+        except Exception as e:
+            logger.error(f"生成PR分析时发生错误: {e}")
+            return None
+
+# ==================== 主处理类 ====================
+
+class PRCommentAnalyzer:
+    """PR评论分析器"""
+    
+    def __init__(self, siliconflow_api_key: str = "", siliconflow_api_base: str = "https://api.siliconflow.cn/v1", model_name: str = None, base_url: str = None):
+        if model_name is None:
+            model_name = MODEL_NAME
+        
+        # 设置siliconflow API配置
+        global SILICONFLOW_API_KEY, SILICONFLOW_API_BASE
+        if siliconflow_api_key:
+            SILICONFLOW_API_KEY = siliconflow_api_key
+        if siliconflow_api_base:
+            SILICONFLOW_API_BASE = siliconflow_api_base
+            
+        self.llm = LLMFactory.create_chat_llm(model_name)
+        self.file_analysis_chain = FileTextAnalysisChain(self.llm)
+        self.pr_analysis_chain = PRAnalysisChain(self.llm)
+    
+    def cleanup(self):
+        """清理资源，确保程序能正确退出"""
+        try:
+            # 清理 LLM 连接
+            if hasattr(self.llm, 'client') and hasattr(self.llm.client, 'close'):
+                self.llm.client.close()
+            elif hasattr(self.llm, '_client') and hasattr(self.llm._client, 'close'):
+                self.llm._client.close()
+            
+            # 如果是 ChatOpenAI，尝试关闭底层的 HTTP 客户端
+            if BACKEND_TYPE == "siliconflow" and hasattr(self.llm, 'client'):
+                try:
+                    # 强制关闭 httpx 客户端
+                    if hasattr(self.llm.client, '_client'):
+                        self.llm.client._client.close()
+                except Exception as e:
+                    logger.debug(f"关闭 HTTP 客户端时发生错误: {e}")
+            
+            logger.info("资源清理完成")
+        except Exception as e:
+            logger.warning(f"清理资源时发生错误: {e}")
+    
+    def analyze_pr_diff(self, diff_content: str, max_workers: int = None) -> CommentResult:
+        if max_workers is None:
+            max_workers = PROCESSING_MAX_WORKERS
+            
+        logger.info("开始解析PR diff...")
+        files = DiffParser.parse_git_diff(diff_content)
+        logger.info(f"解析到 {len(files)} 个文件的改动")
+        if not files:
+            logger.warning("未找到任何文件改动")
+            return CommentResult(
+                pr_analysis=None,
+                file_analyses=[],
+                processed_files=0,
+                total_files=0,
+                error='未找到任何文件改动'
+            )
+        
+        logger.info("开始并行处理各个文件的文本分析...")
+        file_analyses = []
+        # 使用更健壮的并发处理机制
+        executor = None
+        try:
+            executor = ThreadPoolExecutor(max_workers=max_workers)
+            future_to_file = {
+                executor.submit(self.file_analysis_chain.analyze, file_info): file_info.file_path
+                for file_info in files
+            }
+            
+            # 设置更长的整体超时时间，避免与单个文件超时冲突
+            overall_timeout = SINGLE_FILE_TIMEOUT * len(files) + 600  # 给每个文件的时间 + 额外缓冲
+            
+            completed_count = 0
+            total_count = len(future_to_file)
+            
+            try:
+                for future in as_completed(future_to_file, timeout=overall_timeout):
+                    file_path = future_to_file[future]
+                    completed_count += 1
+                    try:
+                        analysis = future.result(timeout=5)  # 短暂缓冲时间，因为任务已经完成
+                        if analysis:
+                            file_analyses.append(analysis)
+                            logger.info(f"完成文件 {file_path} 的文本分析 ({completed_count}/{total_count})")
+                        else:
+                            logger.warning(f"文件 {file_path} 的文本分析失败 ({completed_count}/{total_count})")
+                    except (FutureTimeoutError, TimeoutError) as e:
+                        logger.error(f"文件 {file_path} 的文本分析获取超时，跳过该文件: {type(e).__name__} ({completed_count}/{total_count})")
+                        try:
+                            future.cancel()
+                        except Exception as cancel_e:
+                            logger.warning(f"取消任务时发生错误: {cancel_e}")
+                    except Exception as e:
+                        logger.error(f"处理文件 {file_path} 时发生异常: {e} ({completed_count}/{total_count})")
+            except (FutureTimeoutError, TimeoutError) as overall_e:
+                logger.error(f"整体处理超时({overall_timeout}秒)，已完成{completed_count}/{total_count}个文件")
+                # 取消所有未完成的任务
+                for future in future_to_file:
+                    if not future.done():
+                        try:
+                            future.cancel()
+                        except Exception as cancel_e:
+                            logger.warning(f"取消未完成任务时发生错误: {cancel_e}")
+        finally:
+            # 确保线程池被正确关闭
+            if executor:
+                try:
+                    executor.shutdown(wait=True)
+                except Exception as shutdown_e:
+                    logger.warning(f"关闭主线程池时发生错误: {shutdown_e}")
+        
+        logger.info(f"成功生成 {len(file_analyses)} 个文件的文本分析")
+        logger.info("开始生成PR整体分析...")
+        pr_analysis = None
+        if file_analyses:
+            logger.info(f"基于 {len(file_analyses)} 个成功处理的文件生成PR分析...")
+            try:
+                pr_analysis = self.pr_analysis_chain.generate(file_analyses)
+                if pr_analysis:
+                    logger.info("PR整体分析生成成功")
+                else:
+                    logger.warning("PR整体分析生成失败")
+            except Exception as e:
+                logger.error(f"生成PR分析时发生未预期的错误: {e}")
+        else:
+            logger.warning("没有成功处理的文件，跳过PR分析生成")
+        
+        return CommentResult(
+            pr_analysis=pr_analysis,
+            file_analyses=file_analyses,
+            processed_files=len(file_analyses),
+            total_files=len(files)
+        )
+
+# ==================== 主函数 ====================
+
+def get_comment_analysis(sample_diff, siliconflow_api_key="", siliconflow_api_base="https://api.siliconflow.cn/v1"):
+
+    analyzer = PRCommentAnalyzer(siliconflow_api_key, siliconflow_api_base)
+    result = None
+    try:
+        result = analyzer.analyze_pr_diff(sample_diff)
+    finally:
+        # 确保在函数退出前清理资源
+        analyzer.cleanup()
+
+    if not result:
+        print("处理失败，无法获取结果")
+        return None
+    
+    if result.error:
+        print(f"错误: {result.error}")
+    
+    print("\n=== 单文件文本分析 ===")
+    problematic_files = [f for f in result.file_analyses if f.has_text_changes and f.grammar_issues]
+    if problematic_files:
+        for analysis in problematic_files:
+            print(f"文件: {analysis.file_path}")
+            print(f"涉及文本变更: {analysis.has_text_changes}")
+            print(f"文本变更行: {analysis.text_lines}")
+            print(f"语法问题: {analysis.grammar_issues}")
+            print(f"分析详情: {analysis.analysis_details}")
+            print("-" * 50)
+    else:
+        print("所有文件语法正确，无需关注")
+    
+    print("=== 处理结果 ===")
+    print(f"总文件数: {result.total_files}")
+    print(f"成功处理文件数: {result.processed_files}")
+    
+    if result.pr_analysis:
+        print("\n=== PR整体分析 ===")
+        pr = result.pr_analysis
+        print(f"涉及文本变更: {pr.has_text_changes}")
+        print(f"文本变更类型: {pr.text_change_type}")
+        print(f"存在语法错误: {pr.has_grammar_errors}")
+        print(f"语法错误列表: {pr.grammar_errors}")
+        print(f"详细分析: {pr.detailed_analysis}")
+        print(f"改进建议: {pr.suggestions}")
+            
+    
+    return result
+
+if __name__ == "__main__":
+    # 微服务接口逻辑： 传递进来的就是 sample_diff 的内容
+    sample_diff = sys.argv[1]
+    result = get_comment_analysis(sample_diff) 
+    print(result)
diff --git a/ci/tools/comment/create_comment.py b/ci/tools/comment/create_comment.py
new file mode 100644
index 000000000..481c7a2f5
--- /dev/null
+++ b/ci/tools/comment/create_comment.py
@@ -0,0 +1,372 @@
+import argparse
+import json
+import logging
+import re
+import sys
+from dataclasses import dataclass, field
+from difflib import SequenceMatcher
+from typing import TypeVar, Generic
+from comment_agent import get_comment_analysis
+
+import requests
+import yaml
+
+logging.basicConfig(level=logging.INFO, stream=sys.stdout,
+                    format='%(asctime)s [%(levelname)s] %(module)s.%(lineno)d %(name)s:\t%(message)s')
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Org:
+    org_name: str
+    comment_target_owner: str
+    comment_target_repo: str
+    auto_comment_enabled: bool = field(default=True)
+    confidence_threshold: float = field(default=0.7)
+    text_check_enabled: bool = field(default=True)
+    grammar_check_enabled: bool = field(default=True)
+
+
+@dataclass
+class CommentAgentConfig:
+    backend: dict = field(default_factory=dict)
+    model: dict = field(default_factory=dict)
+    processing: dict = field(default_factory=dict)
+    logging: dict = field(default_factory=dict)
+
+
+@dataclass
+class Config:
+    orgs: list[dict | Org]
+    comment_agent: dict | CommentAgentConfig = field(default_factory=dict)
+
+    def __post_init__(self):
+        tmp_orgs: list[Org] = []
+        for item in self.orgs:
+            tmp_orgs.append(Org(**item))
+        self.orgs = tmp_orgs
+        
+        if isinstance(self.comment_agent, dict) and self.comment_agent:
+            self.comment_agent = CommentAgentConfig(**self.comment_agent)
+
+
+@dataclass
+class ReqArgs:
+    method: str
+    url: str
+    headers: dict[str, str]
+    params: dict[str, str] | None = field(default=None)
+    data: str | None = field(default=None)
+    timeout: int = field(default=180)
+
+
+T = TypeVar('T')
+content_type_is_text = "text/plain"
+content_type_is_json_dict = {}
+content_type_is_json_list = []
+
+
+def send_request(args: ReqArgs, t: Generic[T]) -> T:
+    error_count = 0
+    while error_count < 3:
+        try:
+            resp = requests.request(**args.__dict__)
+            resp.raise_for_status()
+            if type(t) is dict or type(t) is list:
+                res_data: dict | list = resp.json()
+            else:
+                res_data: str = resp.text
+        except requests.exceptions.RequestException as e:
+            if e.response.status_code in [400, 401, 403, 404, 405]:
+                logger.error("[ERROR] client error {}".format(e))
+                break
+            logger.error("[ERROR] server error: {}".format(e))
+            error_count += 1
+        else:
+            logger.info("[OK] [{}], {}".format(args.method, args.url))
+            return res_data
+    return None
+
+
+class GiteeClient:
+    """
+    Gitee OpenAPI 客户端
+    """
+    headers = {
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    }
+
+    def __init__(self, developer_token: str):
+        """
+        构造函数
+        :param developer_token: Gitee v5 token
+        """
+        self.headers["Authorization"] = "Bearer {}".format(developer_token)
+
+    def get_diff_content(self, owner: str, repo: str, number: int) -> str | None:
+        req_url = "https://gitee.com/{}/{}/pulls/{}.diff".format(owner, repo, number)
+        req_args = ReqArgs(method="GET", url=req_url, headers=self.headers)
+        result: str | None = send_request(req_args, "")
+        if result is None:
+            logger.error("can not get diff file from PR: {}".format(req_url))
+        return result
+
+    def add_pr_comment(self, owner, repo, number, body):
+        req_url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/comments'.format(owner, repo, number)
+        req_body = {
+            "body": "### 🤖 AI审查反馈 \n {} ".format(body)
+        }
+        req_args = ReqArgs(method="POST", url=req_url, headers=self.headers, data=json.dumps(req_body))
+        result: dict | None = send_request(req_args, {})
+        return result is not None
+
+
+
+def get_diff_file_list(diff_content: str) -> list[str]:
+    diff_files_list = []
+    diff_files = [x.split(' ')[0][2:] for x in diff_content.split('diff --git ')[1:]]
+    for diff_file in diff_files:
+        if diff_file.endswith('\"'):
+            d = re.compile(r'/[\d\s\S]+')
+            diff_file = d.findall(diff_file)
+            diff_file = diff_file[0].replace('/', '', 1).replace('\"', '')
+            diff_files_list.append(diff_file)
+        else:
+            diff_files_list.append(diff_file)
+    return diff_files_list
+
+
+def generate_comment_content(comment_result, pr_url: str, analysis_status: str = "success") -> str:
+    """根据分析结果生成评论内容"""
+    comment_body = ""
+    
+    # 根据分析状态添加不同的状态标识
+    if analysis_status == "error":
+        comment_body += "### 分析状态：处理失败\n"
+        comment_body += "**分析过程中发生错误，无法生成详细反馈。请手动审查文本变更。**\n\n"
+    elif analysis_status == "low_confidence":
+        comment_body += "### 分析状态：置信度较低\n"
+        comment_body += "**当前分析置信度较低，结果仅供参考。建议进行人工审查。**\n\n"
+    elif analysis_status == "no_text_changes":
+        comment_body += "### 分析状态：无文本问题\n"
+        comment_body += "**AI分析结果显示本次PR未发现明显的文本变更或语法问题。无需改动。**\n\n"
+    elif analysis_status == "no_grammar_errors":
+        comment_body += "### 分析状态：文本质量良好\n"
+        comment_body += "**检测到文本变更，但未发现明显的语法错误，文本质量良好。无需改动。**\n\n"
+    else:  # success with issues
+        comment_body += "### 分析状态：发现需要关注的问题\n"
+        comment_body += "**AI分析发现了一些文本变更或语法问题，请查看下方详细信息。**\n\n"
+    
+    # 如果有分析结果，添加详细信息
+    if comment_result and not comment_result.error:
+        # 如果有PR整体分析
+        if comment_result.pr_analysis:
+            pr_analysis = comment_result.pr_analysis
+            
+            # 添加整体评估摘要
+            comment_body += "## 整体评估\n"
+            comment_body += f"- 涉及文本变更: {'是' if pr_analysis.has_text_changes else '否'}\n"
+            comment_body += f"- 文本变更类型: {pr_analysis.text_change_type}\n"
+            comment_body += f"- 存在语法错误: {'是' if pr_analysis.has_grammar_errors else '否'}\n\n"
+            
+            # 添加详细分析
+            if pr_analysis.detailed_analysis:
+                comment_body += "## 详细分析\n"
+                comment_body += f"{pr_analysis.detailed_analysis}\n\n"
+            
+            # 添加语法错误列表
+            if pr_analysis.grammar_errors:
+                comment_body += "## 语法问题\n"
+                for i, error in enumerate(pr_analysis.grammar_errors, 1):
+                    comment_body += f"{i}. {error}\n"
+                comment_body += "\n"
+            
+            # 添加改进建议
+            if pr_analysis.suggestions:
+                comment_body += "## 改进建议\n"
+                for i, suggestion in enumerate(pr_analysis.suggestions, 1):
+                    comment_body += f"{i}. {suggestion}\n"
+                comment_body += "\n"
+        
+        # 添加文件级别的分析结果
+        if comment_result.file_analyses:
+            # comment_body += "## 文件分析\n"
+            
+            # 统计有问题的文件
+            files_with_issues = [f for f in comment_result.file_analyses if f.has_text_changes or f.grammar_issues]
+            files_without_issues = [f for f in comment_result.file_analyses if not f.has_text_changes and not f.grammar_issues]
+            
+            if files_with_issues:
+                comment_body += f"### 需要关注的文件 ({len(files_with_issues)} 个)\n"
+                for i, file_analysis in enumerate(files_with_issues, 1):
+                    comment_body += f"\n**{i}. {file_analysis.file_path}**\n"
+                    
+                    if file_analysis.has_text_changes:
+                        comment_body += f"- 文本变更: 检测到英文文本改动\n"
+                        if file_analysis.text_lines:
+                            comment_body += f"- 涉及行数: {len(file_analysis.text_lines)} 行\n"
+                    
+                    if file_analysis.grammar_issues:
+                        comment_body += f"- 语法问题: 发现 {len(file_analysis.grammar_issues)} 个问题\n"
+                        for j, issue in enumerate(file_analysis.grammar_issues, 1):
+                            comment_body += f"  {j}. {issue}\n"
+                    
+                    if file_analysis.analysis_details:
+                        comment_body += f"- 分析详情: {file_analysis.analysis_details}\n"
+            
+            if files_without_issues:
+                comment_body += f"\n### 无问题的文件 ({len(files_without_issues)} 个)\n"
+                for file_analysis in files_without_issues:
+                    comment_body += f"- {file_analysis.file_path}\n"
+            
+            # 添加处理统计
+            # comment_body += f"\n### 处理统计\n"
+            # comment_body += f"- 总文件数: {comment_result.total_files}\n"
+            # comment_body += f"- 成功分析: {comment_result.processed_files}\n"
+            # comment_body += f"- 有文本变更: {len([f for f in comment_result.file_analyses if f.has_text_changes])}\n"
+            # comment_body += f"- 有语法问题: {len([f for f in comment_result.file_analyses if f.grammar_issues])}\n"
+
+    # 添加免责声明
+    comment_body += "## 免责声明\n"
+    comment_body += "本评论内容基于AI Agent技术自动生成，仅供参考。请开发者根据实际情况进行判断和修改。\n"
+    
+    return comment_body
+
+
+class Args:
+    gitee_token: str
+    pr_owner: str
+    pr_repo: str
+    pr_number: int
+    siliconflow_api_key: str = ""
+    siliconflow_api_base: str = "https://api.siliconflow.cn/v1"
+
+    def validate(self):
+        valid = self.gitee_token and self.pr_owner and self.pr_repo and self.pr_number
+        if not valid:
+            logger.error("Invalid Command Arguments")
+            sys.exit(1)
+
+
+def load_config_yaml(yaml_path):
+    with open(yaml_path, "r", encoding="utf-8") as config_in:
+        data = yaml.safe_load(config_in)
+
+    if data is None:
+        return None
+    return Config(**data)
+
+
+def create_comment_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, pr_owner: str, pr_repo: str,
+                                              pr_number: int, siliconflow_api_key: str, siliconflow_api_base: str):
+    pr_html_url = "https://gitee.com/{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
+    
+    for org_item in conf.orgs:
+        if org_item.org_name != pr_owner:
+            continue
+        
+        if not org_item.auto_comment_enabled:
+            logger.info(f"组织 {org_item.org_name} 未启用自动评论功能")
+            continue
+        
+        # 移除文件筛选逻辑，对所有PR平等处理
+        logger.info("开始对PR进行全面文本分析（不限制文件类型和路径）")
+        
+        # 获取diff内容
+        diff_content = cli.get_diff_content(pr_owner, pr_repo, pr_number)
+        if diff_content is None:
+            logger.error("无法获取PR的diff内容")
+            sys.exit(1)
+        
+        # 调用AI Agent进行分析
+        logger.info("开始进行AI代码审查分析...")
+        comment_result = get_comment_analysis(diff_content, siliconflow_api_key, siliconflow_api_base)
+        
+        if not comment_result:
+            logger.error("AI分析失败，将发布错误状态评论")
+            # 创建一个错误结果对象，确保能发布评论
+            from comment_agent import CommentResult
+            comment_result = CommentResult(
+                pr_analysis=None,
+                file_analyses=[],
+                processed_files=0,
+                total_files=0,
+                error="AI分析过程失败"
+            )
+        
+        # 确定分析状态和评论内容
+        analysis_status = "success"
+        
+        if comment_result.error:
+            analysis_status = "error"
+            logger.info("AI分析过程出错，将发布错误状态评论")
+        elif comment_result.pr_analysis:
+            pr_analysis = comment_result.pr_analysis
+            
+            # 检查是否有文本变更或语法错误
+            if pr_analysis.has_text_changes and pr_analysis.has_grammar_errors:
+                analysis_status = "success"  # 有问题，正常处理
+                logger.info("检测到文本变更和语法错误，将发布问题报告评论")
+            elif pr_analysis.has_text_changes and not pr_analysis.has_grammar_errors:
+                analysis_status = "no_grammar_errors"
+                logger.info("检测到文本变更但无语法错误，将发布文本质量良好的评论")
+            elif not pr_analysis.has_text_changes:
+                analysis_status = "no_text_changes"
+                logger.info("未检测到文本变更，将发布无文本问题的评论")
+            else:
+                analysis_status = "success"
+                logger.info("检测到需要关注的问题，将发布详细分析评论")
+        else:
+            # 如果没有整体分析，检查是否有文件级别的问题
+            files_with_issues = [f for f in comment_result.file_analyses if f.has_text_changes or f.grammar_issues]
+            if files_with_issues:
+                analysis_status = "success"
+                logger.info(f"检测到 {len(files_with_issues)} 个文件有文本问题，将发布文件级别问题评论")
+            else:
+                analysis_status = "no_text_changes"
+                logger.info("未检测到文件级别问题，将发布无问题评论")
+        
+        # 总是生成和发布评论
+        comment_content = generate_comment_content(
+            comment_result, 
+            pr_html_url, 
+            analysis_status
+        )
+        
+        # 发布评论
+        success = cli.add_pr_comment(pr_owner, pr_repo, pr_number, comment_content)
+        if success:
+            logger.info(f"AI代码审查评论发布成功 - 状态: {analysis_status}")
+        else:
+            logger.error(f"AI代码审查评论发布失败 - 状态: {analysis_status}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Create AI-powered PR comment based on text analysis')
+    parser.add_argument('--gitee_token', type=str, required=True, help='gitee v5 api token')
+    parser.add_argument('--pr_owner', type=str, required=True, help='the PR of owner')
+    parser.add_argument('--pr_repo', type=str, required=True, help='the PR of repo')
+    parser.add_argument('--pr_number', type=str, required=True, help='the PR number')
+    parser.add_argument('--siliconflow_api_key', type=str, default="", help='the API key of siliconflow')
+    parser.add_argument('--siliconflow_api_base', type=str, default="https://api.siliconflow.cn/v1", help='the base URL of siliconflow')
+    args = Args()
+    parser.parse_args(args=sys.argv[1:], namespace=args)
+    args.validate()
+
+    exec_py = sys.argv[0]
+    config_yaml_path = exec_py[:-2] + 'yaml'
+    conf = load_config_yaml(config_yaml_path)
+
+    cli = GiteeClient(args.gitee_token)
+
+    pr_owner = args.pr_owner
+    pr_repo = args.pr_repo
+    pr_number = args.pr_number
+    siliconflow_api_key = args.siliconflow_api_key
+    siliconflow_api_base = args.siliconflow_api_base
+    create_comment_based_on_pr_diff_and_config(conf, cli, pr_owner, pr_repo, pr_number, siliconflow_api_key, siliconflow_api_base)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/ci/tools/comment/create_comment.yaml b/ci/tools/comment/create_comment.yaml
new file mode 100644
index 000000000..32ac269ab
--- /dev/null
+++ b/ci/tools/comment/create_comment.yaml
@@ -0,0 +1,38 @@
+# Comment Agent Configuration
+comment_agent:
+  # Backend Configuration
+  backend:
+    type: "siliconflow"  # Options: "ollama" or "siliconflow"
+    # siliconflow配置现在通过命令行参数传入
+
+  # Model Configuration
+  model:
+    name: "Qwen/Qwen3-8B"
+    temperature: 0.1
+    max_retry: 5  # For siliconflow backend
+
+  # Processing Configuration
+  processing:
+    max_workers: 8  # Number of parallel workers for file processing
+    single_file_timeout: 180  # Timeout for single file analysis (seconds)
+    total_comment_timeout: 300  # Timeout for total PR analysis (seconds)
+  
+  # Logging Configuration
+  logging:
+    level: "INFO"
+
+# PR Comment Configuration
+orgs:
+  - org_name: openeuler
+    comment_target_owner: openeuler
+    comment_target_repo: docs
+    auto_comment_enabled: true
+    text_check_enabled: true   # 是否启用文本变更检测
+    grammar_check_enabled: true  # 是否启用语法错误检测
+
+  - org_name: src-openeuler
+    comment_target_owner: openeuler
+    comment_target_repo: globalization
+    auto_comment_enabled: true
+    text_check_enabled: true
+    grammar_check_enabled: true
\ No newline at end of file
-- 
Gitee


From b05188d8577a7191bf700ab07f925199ff3f62d0 Mon Sep 17 00:00:00 2001
From: petermouse666 <708975811@qq.com>
Date: Sat, 20 Sep 2025 20:39:00 +0800
Subject: [PATCH 3/8] update ci-bot for auto generating translation issue

---
 ci/tools/comment/comment_agent.py    | 958 ---------------------------
 ci/tools/comment/create_comment.py   | 372 -----------
 ci/tools/comment/create_comment.yaml |  38 --
 3 files changed, 1368 deletions(-)
 delete mode 100644 ci/tools/comment/comment_agent.py
 delete mode 100644 ci/tools/comment/create_comment.py
 delete mode 100644 ci/tools/comment/create_comment.yaml

diff --git a/ci/tools/comment/comment_agent.py b/ci/tools/comment/comment_agent.py
deleted file mode 100644
index 25dbe385c..000000000
--- a/ci/tools/comment/comment_agent.py
+++ /dev/null
@@ -1,958 +0,0 @@
-import json
-import re
-import logging
-import urllib.parse
-from typing import List, Dict, Any, Optional, Tuple, Literal
-from dataclasses import dataclass
-from concurrent.futures import ThreadPoolExecutor, as_completed, TimeoutError as FutureTimeoutError
-from pathlib import Path
-import sys
-import time
-# LangChain imports
-from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
-from langchain_core.runnables import RunnableLambda, RunnablePassthrough
-from pydantic import BaseModel, Field, SecretStr
-from langchain_community.llms import Ollama
-from langchain_ollama import ChatOllama
-from langchain.chains import TransformChain, SequentialChain
-from langchain_core.output_parsers import JsonOutputParser
-from langchain_openai import ChatOpenAI
-import yaml
-
-# ==================== 配置加载 ====================
-
-def load_config(config_file="create_comment.yaml"):
-    """从YAML文件加载配置"""
-    try:
-        with open(config_file, 'r', encoding='utf-8') as f:
-            config = yaml.safe_load(f)
-        return config.get('comment_agent', {})
-    except FileNotFoundError:
-        print(f"配置文件 {config_file} 不存在")
-        raise
-    except yaml.YAMLError as e:
-        print(f"解析配置文件时发生错误: {e}")
-        raise
-
-# 加载配置
-_config = load_config()
-
-# ==================== 配置常量 ====================
-
-BACKEND_TYPE = _config.get('backend', {}).get('type', 'siliconflow')
-MODEL_NAME = _config.get('model', {}).get('name', 'Qwen/Qwen3-8B')
-MODEL_TEMPERATURE = _config.get('model', {}).get('temperature', 0.1)
-MODEL_MAX_RETRY = _config.get('model', {}).get('max_retry', 5)
-PROCESSING_MAX_WORKERS = _config.get('processing', {}).get('max_workers', 8)
-SINGLE_FILE_TIMEOUT = _config.get('processing', {}).get('single_file_timeout', 180)
-TOTAL_COMMENT_TIMEOUT = _config.get('processing', {}).get('total_comment_timeout', 300)
-LOGGING_LEVEL = _config.get('logging', {}).get('level', 'INFO')
-SILICONFLOW_API_KEY = ''
-SILICONFLOW_API_BASE = ''
-
-# 配置日志
-logging.basicConfig(level=getattr(logging, LOGGING_LEVEL.upper()))
-logger = logging.getLogger(__name__)
-
-# ==================== 数据模型定义 ====================
-
-class PRAnalysisResult(BaseModel):
-    """PR分析结果的结构化输出"""
-    has_text_changes: bool = Field(description="是否涉及英文文本改动", default=False)
-    text_change_type: Literal["无文本改动", "仅标点符号改动", "英文内容改动", "代码注释改动", "混合改动"] = Field(description="文本改动类型")
-    has_grammar_errors: bool = Field(description="是否存在语法语病错误", default=False)
-    grammar_errors: List[str] = Field(description="具体的语法语病错误列表", default=[])
-    detailed_analysis: str = Field(description="详细分析说明")
-    suggestions: List[str] = Field(description="改进建议列表", default=[])
-
-class FileTextAnalysis(BaseModel):
-    """单个文件的文本分析"""
-    file_path: str = Field(description="文件路径", default="")
-    has_text_changes: bool = Field(description="是否涉及英文文本改动", default=False)
-    text_lines: List[str] = Field(description="涉及文本改动的行", default=[])
-    grammar_issues: List[str] = Field(description="语法问题列表", default=[])
-    analysis_details: str = Field(description="分析详情")
-
-@dataclass
-class DiffFileInfo:
-    """单个文件的diff信息"""
-    file_path: str
-    diff_content: str
-    lines_added: int
-    lines_deleted: int
-
-@dataclass
-class CommentResult:
-    """评论生成结果"""
-    pr_analysis: Optional[PRAnalysisResult]
-    file_analyses: List[FileTextAnalysis]
-    processed_files: int
-    total_files: int
-    error: Optional[str] = None
-
-# ==================== Token 统计工具 ====================
-
-
-# ==================== 工具函数 ====================
-
-class DiffParser:
-    """Git Diff 解析器"""
-    
-    @staticmethod
-    def parse_git_diff(diff_content: str) -> List[DiffFileInfo]:
-        """
-        解析git diff内容，提取每个文件的改动信息
-        
-        Args:
-            diff_content: git diff的原始内容
- 
-        Returns:
-            包含文件路径和对应diff内容的列表
-        """
-
-        files = []
-        current_file = None
-        current_diff = []
-        
-        lines = diff_content.strip().split('\n')
-        
-        for line in lines:
-            # 匹配文件路径行
-            if line.startswith('diff --git'):
-                # 保存前一个文件的信息
-                if current_file and current_diff:
-                    diff_info = DiffParser._create_diff_file_info(current_file, current_diff)
-                    if diff_info:
-                        files.append(diff_info)
-                
-                # 提取文件路径 - 改进的解析逻辑
-                current_file = DiffParser._extract_file_path(line)
-                if current_file:
-                    current_diff = [line]
-                else:
-                    current_diff = []
-            elif current_file:
-                current_diff.append(line)
-        
-        # 添加最后一个文件
-        if current_file and current_diff:
-            diff_info = DiffParser._create_diff_file_info(current_file, current_diff)
-            if diff_info:
-                files.append(diff_info)
-        
-        return files
-    
-    @staticmethod
-    def _extract_file_path(diff_line: str) -> Optional[str]:
-        """
-        从git diff行中提取文件路径，支持包含汉字的文件名
-        
-        Args:
-            diff_line: git diff的文件头行，格式如 "diff --git a/path/to/file b/path/to/file"
-            
-        Returns:
-            提取出的文件路径，如果解析失败则返回None
-        """
-        try:
-            # 方法1: 处理引号包围的路径（Git对特殊字符的处理）
-            # 格式: diff --git "a/path/to/file" "b/path/to/file"
-            quoted_pattern = r'diff --git "a/(.+?)" "b/(.+?)"'
-            quoted_match = re.match(quoted_pattern, diff_line)
-            
-            if quoted_match:
-                file_path_a = quoted_match.group(1)
-                file_path_b = quoted_match.group(2)
-                # 通常a和b路径相同，使用a路径（旧文件路径）
-                file_path = file_path_a
-            else:
-                # 方法2: 使用正则表达式匹配标准的git diff格式
-                # 格式: diff --git a/path/to/file b/path/to/file
-                pattern = r'diff --git a/(.+?) b/(.+?)(?:\s|$)'
-                match = re.match(pattern, diff_line)
-                
-                if match:
-                    file_path_a = match.group(1)
-                    file_path_b = match.group(2)
-                    # 通常a和b路径相同，使用a路径（旧文件路径）
-                    file_path = file_path_a
-                else:
-                    # 方法3: 如果正则匹配失败，尝试更简单的解析
-                    # 处理可能包含空格和特殊字符的文件名
-                    if ' a/' in diff_line and ' b/' in diff_line:
-                        # 找到 a/ 和 b/ 的位置
-                        a_pos = diff_line.find(' a/')
-                        b_pos = diff_line.find(' b/')
-                        
-                        if a_pos != -1 and b_pos != -1 and a_pos < b_pos:
-                            # 提取a/和b/之间的路径
-                            a_start = a_pos + 3  # 跳过 ' a/'
-                            file_path = diff_line[a_start:b_pos]
-                        else:
-                            return None
-                    else:
-                        # 方法4: 最后的备选方案，简单的字符串分割
-                        parts = diff_line.split()
-                        if len(parts) >= 3:
-                            a_path = parts[2]
-                            if a_path.startswith('a/'):
-                                file_path = a_path[2:]  # 移除'a/'前缀
-                            else:
-                                return None
-                        else:
-                            return None
-            
-            # 处理文件名编码
-            return DiffParser._decode_file_path(file_path)
-            
-        except Exception as e:
-            logger.warning(f"解析文件路径时发生错误: {e}, diff行: {diff_line}")
-            return None
-    
-    @staticmethod
-    def _decode_file_path(file_path: str) -> str:
-        """
-        解码文件路径，处理各种编码情况
-        
-        Args:
-            file_path: 原始文件路径
-            
-        Returns:
-            解码后的文件路径
-        """
-        try:
-            # 首先尝试URL解码，处理Git编码的文件名
-            decoded_path = urllib.parse.unquote(file_path, encoding='utf-8')
-            
-            # 处理Git对特殊字符的引号包装
-            if decoded_path.startswith('"') and decoded_path.endswith('"'):
-                decoded_path = decoded_path[1:-1]
-                # Git使用反斜杠转义，需要处理转义序列
-                decoded_path = decoded_path.replace('\\"', '"')
-                decoded_path = decoded_path.replace('\\\\', '\\')
-            
-            # 无论是否有引号包装，都尝试处理八进制编码
-            # 检查是否包含八进制转义序列
-            if '\\' in decoded_path and re.search(r'\\[0-7]{3}', decoded_path):
-                decoded_path = DiffParser._decode_octal_sequences(decoded_path)
-            
-            return decoded_path
-            
-        except Exception as e:
-            logger.warning(f"解码文件路径时发生错误: {e}, 原始路径: {file_path}")
-            return file_path
-    
-    @staticmethod
-    def _decode_octal_sequences(text: str) -> str:
-        """
-        解码文本中的八进制转义序列
-        
-        Args:
-            text: 包含八进制转义序列的文本
-            
-        Returns:
-            解码后的文本
-        """
-        try:
-            # 查找八进制转义序列模式：\xxx
-            pattern = r'\\([0-7]{3})'
-            
-            # 找到所有八进制序列
-            matches = list(re.finditer(pattern, text))
-            if not matches:
-                return text
-            
-            # 收集所有字节值
-            result = ""
-            last_end = 0
-            bytes_buffer = []
-            
-            for i, match in enumerate(matches):
-                # 添加匹配前的文本
-                if match.start() > last_end:
-                    # 如果有缓冲的字节，先处理它们
-                    if bytes_buffer:
-                        try:
-                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
-                            result += decoded_bytes
-                            bytes_buffer = []
-                        except UnicodeDecodeError:
-                            # 如果解码失败，保持原始形式
-                            for byte_val in bytes_buffer:
-                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
-                            bytes_buffer = []
-                    
-                    result += text[last_end:match.start()]
-                
-                # 处理当前八进制序列
-                octal_str = match.group(1)
-                try:
-                    byte_value = int(octal_str, 8)
-                    bytes_buffer.append(byte_value)
-                except ValueError:
-                    # 如果转换失败，添加原始字符串
-                    if bytes_buffer:
-                        try:
-                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
-                            result += decoded_bytes
-                            bytes_buffer = []
-                        except UnicodeDecodeError:
-                            for byte_val in bytes_buffer:
-                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
-                            bytes_buffer = []
-                    result += match.group(0)
-                
-                last_end = match.end()
-                
-                # 检查是否是最后一个匹配或下一个匹配不连续
-                is_last = (i == len(matches) - 1)
-                is_next_non_consecutive = (not is_last and 
-                                         matches[i + 1].start() != match.end())
-                
-                if is_last or is_next_non_consecutive:
-                    # 处理缓冲的字节
-                    if bytes_buffer:
-                        try:
-                            decoded_bytes = bytes(bytes_buffer).decode('utf-8')
-                        except UnicodeDecodeError:
-                            # 如果解码失败，保持原始形式
-                            for byte_val in bytes_buffer:
-                                result += f"\\{oct(byte_val)[2:].zfill(3)}"
-                        bytes_buffer = []
-            
-            # 添加剩余的文本
-            if last_end < len(text):
-                result += text[last_end:]
-            
-            return result
-            
-        except Exception as e:
-            logger.warning(f"解码八进制序列时发生错误: {e}, 原始文本: {text}")
-            return text
-    
-    @staticmethod
-    def _create_diff_file_info(file_path: str, diff_lines: List[str]) -> Optional[DiffFileInfo]:
-        """创建DiffFileInfo对象"""
-        diff_content = '\n'.join(diff_lines)
-        lines_added, lines_deleted = DiffParser._count_lines_changed(diff_content)
-        
-        return DiffFileInfo(
-            file_path=file_path,
-            diff_content=diff_content,
-            lines_added=lines_added,
-            lines_deleted=lines_deleted
-        )
-    
-    @staticmethod
-    def _count_lines_changed(diff_content: str) -> Tuple[int, int]:
-        """统计git diff中改动的行数"""
-        lines_added, lines_deleted = 0, 0
-        lines = diff_content.strip().split('\n')
-
-        for line in lines:
-            # 统计新增行（以+开头，但不是+++）
-            if line.startswith('+') and not line.startswith('+++'):
-                lines_added += 1
-            # 统计删除行（以-开头，但不是---）
-            elif line.startswith('-') and not line.startswith('---'):
-                lines_deleted += 1
-
-        return lines_added, lines_deleted
-
-# ==================== LangChain 组件 ====================
-
-class LLMFactory:
-    """LLM工厂类"""
-    
-    @staticmethod
-    def create_chat_llm(model_name: str = None, base_url: str = None):
-        """创建LLM实例"""
-        if model_name is None:
-            model_name = MODEL_NAME
-        
-        if BACKEND_TYPE == "siliconflow":
-            return ChatOpenAI(
-                model=model_name,
-                api_key=SecretStr(SILICONFLOW_API_KEY),
-                base_url=SILICONFLOW_API_BASE,
-                temperature=MODEL_TEMPERATURE
-            )
-        else:
-            raise ValueError(f"不支持的后端类型: {BACKEND_TYPE}")
-
-class PromptTemplates:
-    """提示模板集合"""
-    
-    @staticmethod
-    def get_file_text_analysis_prompt() -> ChatPromptTemplate:
-        """获取单文件文本分析提示模板"""
-        return ChatPromptTemplate.from_messages([
-            ("system", f"""
-你是一个专业的代码审查和语言专家，专注于分析Gitee文档仓库的翻译PR中的英文文本内容。每条PR都是人工生成的文档改动。请忽略中文、格式和代码的审计，专注于识别英文文本变更。
-
-注意：请忽略中文、格式和代码的审计，专注于识别英文文本变更。如果文档的变更不涉及英文文本，你只需要输出“不涉及英文改动”即可，不需要额外输出任何分析结果。
-同时：对于专有名词，例如openEuler、GitHub等，你不能将其纳入英文文本变更的纠错范围内，而是应该自动识别专有名词。对于代码的相关变更，也不应该纳入分析内容范围。
-
-你需要遵循**能不提修改意见就不提修改意见**的原则进行审查！！！
-
-请仔细分析这个文件的改动，并按照以下要求进行分析：
-
-**分析重点：**
-
-1. 英文文本变更识别：
-   - 检查是否涉及英文文本内容的改动
-   - 区分代码逻辑变更和英文文本内容变更
-   - 识别注释、文档字符串、用户显示文本等英文文本内容
-   - 标识出具体的英文文本变更行
-
-2. 语法错误检测：
-   - 检查英文文本的语法、拼写错误
-
-**分析类型判断：**
-- 如果改动不涉及任何英文文本内容，标记为"无英文文本改动"
-- 如果涉及代码注释的英文文本变更，标记为"代码注释改动"
-- 如果涉及文档、界面文本等英文内容变更，标记为"英文内容改动"
-
-**语法检查重点：**
-- 英文：主谓一致、时态、拼写、标点、语序
-
-**输出要求：**
-- 如果存在英文文本变更但变更不存在语法问题，则直接输出“不存在语法问题”，不需要任何额外输出
-- 详细列出发现的语法错误（如果有）
-- 不能超过100个汉字字符
-
-            """),
-            ("human", """
-文件路径: {file_path}
-
-Git Diff 内容:
-{diff_content}
-
-            """)
-        ])
-    
-    @staticmethod
-    def get_pr_analysis_prompt() -> ChatPromptTemplate:
-        """获取整体PR分析提示模板"""
-        return ChatPromptTemplate.from_messages([
-            ("system", """
-你是一个专业的PR审查专家，专门分析Gitee文档仓库的翻译PR中的英文文本变更和语法问题。每条PR都是人工生成的文档改动。
-
-请分析所有文件的改动，并生成一个综合评估，要求：
-
-1. 整体文本变更评估：
-   - 统计涉及文本变更的文件数量
-   - 分析文本变更的类型分布
-   - 评估变更的重要性和影响范围
-   - 如果文本变更不涉及英文，或涉及英文但使用正确不需要改动，则**直接忽略**，无需对其进行总结
-
-2. 语法错误汇总：
-   - **仅汇总改动中的硬伤，如单词拼写错误、英语语法（时态语态）错误等**
-   - **对于一些可以优化但称不上错误的点，以最小化改动为原则，选择忽略**
-   - 提高报错阈值，忽略可优化翻译的点
-   - 提供优先修复建议
-
-3. 质量评估：
-   - 对整个PR的文本质量给出评分
-   - 分析文本变更的一致性
-   - 评估对用户体验的影响
-
-4. 改进建议：
-   - 提供具体的修改建议
-   - 推荐最佳实践
-   - 建议后续的质量控制措施
-
-**输出格式要求：**
-- 提供清晰的分析结论
-- 按优先级排列发现的问题
-- 给出可操作的改进建议
-
-            """),
-            ("human", """
-各个文件的分析结果:
-{file_analyses}
-
-总文件数: {total_files}
-涉及文本变更的文件数: {text_changed_files}
-            """)
-        ])
-
-class FileTextAnalysisChain:
-    """单文件文本分析任务链"""
-    
-    def __init__(self, llm: ChatOpenAI):
-        self.llm = llm
-        
-        # 创建输出解析器
-        self.output_parser = JsonOutputParser(pydantic_object=FileTextAnalysis)
-        
-        # 为硅基流动平台添加输出格式说明
-        format_instructions = """
-请以JSON格式输出，包含以下字段：
-{{
-    "has_text_changes": "是否涉及英文文本改动（布尔值）",
-    "text_lines": "涉及文本改动的行（字符串列表）",
-    "grammar_issues": "语法问题列表（字符串列表）",
-    "analysis_details": "分析详情（字符串）"
-}}
-"""
-        # 创建新的prompt模板
-        system_template = """
-你是一个专业的代码审查和语言专家，专注于分析Gitee文档仓库的翻译PR中的英文文本内容。每条PR都是人工生成的文档改动。
-
-**核心原则：只关注必然存在明显错误的地方，其他文件都不需要关注！**
-
-**严格过滤条件：**
-1. 如果文档的变更不涉及英文文本，直接标记为"无英文文本改动"，无需任何分析
-2. 如果涉及英文文本但语法完全正确，直接标记为"语法正确，无需关注"
-3. 如果仅涉及标点符号的微小调整，直接标记为"仅标点符号改动，无需关注"
-4. 对于专有名词（如openEuler、GitHub等），自动识别并忽略，不纳入纠错范围
-5. 对于代码相关变更，不纳入分析内容范围
-
-**只关注以下明显错误：**
-- 明显的单词拼写错误（如：recieve -> receive）
-- 严重的语法错误（如：主谓不一致、时态错误）
-- 明显的标点符号错误（如：缺少句号、逗号使用错误）
-- 明显的语序错误
-
-**忽略以下情况：**
-- 语法正确但可以优化的表达
-- 风格偏好问题
-- 轻微的标点符号调整
-- 术语选择的差异
-- 表达方式的个人偏好
-
-**输出要求：**
-- 如果不存在明显错误，直接输出"语法正确，无需关注"
-- 只有发现明显错误时才详细列出
-- 不能超过100个汉字字符
-- 遵循"能不提修改意见就不提修改意见"的原则
-
-{format_instructions}
-"""
-        human_template = """
-文件路径: {file_path}
-
-Git Diff 内容:
-{diff_content}
-"""
-        self.prompt = ChatPromptTemplate.from_messages([
-            ("system", system_template.format(format_instructions=format_instructions)),
-            ("human", human_template)
-        ])
-        self.chain = self.prompt | self.llm | self.output_parser
-    
-    def analyze(self, diff_file_info: DiffFileInfo) -> Optional[FileTextAnalysis]:
-        """分析单个文件的文本变更"""
-        max_retry = MODEL_MAX_RETRY
-        for attempt in range(1, max_retry + 1):
-            # 如果不是第一次尝试，等待一段时间再重试，避免连续失败
-            if attempt > 1:
-                delay = min(attempt * 2, 10)  # 递增延迟，最多10秒
-                logger.info(f"第{attempt}次尝试分析文件 {diff_file_info.file_path}，等待{delay}秒...")
-                time.sleep(delay)
-            
-            try:
-                # 构造prompt字符串
-                prompt_args = {
-                    "file_path": diff_file_info.file_path,
-                    "diff_content": diff_file_info.diff_content
-                }
-                
-                # 直接调用，简化超时控制
-                invoke_args = {
-                    "file_path": diff_file_info.file_path,
-                    "diff_content": diff_file_info.diff_content
-                }
-                result = self.chain.invoke(invoke_args)
-                # 验证结果有效性
-                if isinstance(result, (dict, FileTextAnalysis)):
-                    if isinstance(result, dict):
-                        result = FileTextAnalysis(**result)
-                    
-                    # 检查结果完整性
-                    if result and hasattr(result, 'analysis_details') and result.analysis_details:
-                        
-                        # 设置准确值
-                        result.file_path = diff_file_info.file_path
-                        
-                        # 检查是否只关注明显错误
-                        analysis_text = result.analysis_details.lower()
-                        if any(phrase in analysis_text for phrase in [
-                            "语法正确，无需关注", 
-                            "无英文文本改动", 
-                            "仅标点符号改动，无需关注",
-                            "不存在语法问题"
-                        ]):
-                            # 如果无问题，设置has_text_changes为False
-                            result.has_text_changes = False
-                            result.grammar_issues = []
-                        
-                        return result
-                
-                # 结果无效，记录并重试
-                logger.warning(f"分析文件 {diff_file_info.file_path} 返回无效结果，第{attempt}次尝试")
-                if attempt < max_retry:
-                    continue
-            except Exception as e:
-                err_str = str(e)
-                # 检查是否为HTTP错误（如404、5xx），常见关键字有status code、HTTP、response等
-                is_http_error = False
-                for code in ["404", "500", "502", "503", "504"]:
-                    if code in err_str:
-                        is_http_error = True
-                        break
-                if ("status code" in err_str or "HTTP" in err_str or "response" in err_str) and any(code in err_str for code in ["404", "500", "502", "503", "504"]):
-                    is_http_error = True
-                if is_http_error:
-                    logger.error(f"分析文件 {diff_file_info.file_path} 时发生HTTP错误: {e}，第{attempt}次尝试，10秒后重试...")
-                    if attempt < max_retry:
-                        time.sleep(10)
-                        continue
-                else:
-                    logger.error(f"分析文件 {diff_file_info.file_path} 时发生错误: {e}，第{attempt}次尝试")
-                # 其它异常直接进入下一次重试
-                if attempt < max_retry:
-                    logger.info(f"第{attempt}次尝试失败，准备重试...")
-        logger.error(f"分析文件 {diff_file_info.file_path} 连续{max_retry}次均未获得结构化输出，放弃。")
-        return None
-
-class PRAnalysisChain:
-    """PR整体分析任务链"""
-    
-    def __init__(self, llm: ChatOllama | ChatOpenAI):
-        self.llm = llm
-        
-        # 创建输出解析器
-        self.output_parser = JsonOutputParser(pydantic_object=PRAnalysisResult)
-        
-        # 为硅基流动平台添加输出格式说明
-        format_instructions = """
-请以JSON格式输出，包含以下字段：
-{{
-    "has_text_changes": "是否涉及英文文本改动（布尔值）",
-    "text_change_type": "文本改动类型（字符串）",
-    "has_grammar_errors": "是否存在语法语病错误（布尔值）",
-    "grammar_errors": "具体的语法语病错误列表（字符串列表）",
-    "detailed_analysis": "详细分析说明（字符串）",
-    "suggestions": "改进建议列表（字符串列表）"
-}}
-"""
-        # 创建新的prompt模板
-        system_template = """
-你是一个专业的PR审查专家，专门分析Pull Request中的文本变更和语法问题。
-
-**核心原则：只关注必然存在明显错误的地方，其他文件都不需要关注！**
-
-请基于各个文件的分析结果，生成整个PR的综合评估，要求：
-
-1. 严格过滤文件：
-   - 只统计存在明显错误的文件
-   - 忽略"语法正确，无需关注"的文件
-   - 忽略"无英文文本改动"的文件
-   - 忽略"仅标点符号改动，无需关注"的文件
-
-2. 只汇总明显错误：
-   - 仅汇总硬伤：明显的单词拼写错误、严重的语法错误
-   - 忽略可优化但称不上错误的点
-   - 忽略风格偏好问题
-   - 忽略轻微的标点符号调整
-
-3. 质量评估：
-   - 只对存在明显错误的文件进行质量评估
-   - 如果所有文件都无问题，直接标记为"无问题文件"
-
-4. 改进建议：
-   - 只对存在明显错误的文件提供修改建议
-   - 建议优先修复明显的拼写和语法错误
-
-**输出格式要求：**
-- 如果所有文件都无问题，直接输出"所有文件语法正确，无需关注"
-- 只列出存在明显错误的文件
-- 按优先级排列发现的问题
-- 给出可操作的改进建议
-
-{format_instructions}
-"""
-        human_template = """
-各个文件的分析结果:
-{file_analyses}
-
-总文件数: {total_files}
-涉及文本变更的文件数: {text_changed_files}
-"""
-        self.prompt = ChatPromptTemplate.from_messages([
-            ("system", system_template.format(format_instructions=format_instructions)),
-            ("human", human_template)
-        ])
-        self.chain = self.prompt | self.llm | self.output_parser
-    
-    def generate(self, file_analyses: List[FileTextAnalysis]) -> Optional[PRAnalysisResult]:
-        """生成PR整体分析"""
-        try:
-            total_files = len(file_analyses)
-            
-            # 过滤出只关注存在明显错误的文件
-            problematic_files = []
-            for analysis in file_analyses:
-                # 检查是否存在明显错误
-                has_obvious_errors = (
-                    analysis.has_text_changes and 
-                    analysis.grammar_issues and 
-                    len(analysis.grammar_issues) > 0 and
-                    analysis.analysis_details and
-                    not any(phrase in analysis.analysis_details for phrase in [
-                        "语法正确，无需关注", 
-                        "无英文文本改动", 
-                        "仅标点符号改动，无需关注",
-                        "不存在语法问题"
-                    ])
-                )
-                
-                if has_obvious_errors:
-                    problematic_files.append(analysis)
-            
-            # 如果所有文件都无问题，直接返回无问题结果
-            if not problematic_files:
-                return PRAnalysisResult(
-                    has_text_changes=False,
-                    text_change_type="无文本改动",
-                    has_grammar_errors=False,
-                    grammar_errors=[],
-                    detailed_analysis="所有文件语法正确，无需关注",
-                    suggestions=[]
-                )
-            
-            text_changed_files = len(problematic_files)
-            
-            file_analyses_info = []
-            for analysis in problematic_files:
-                file_analyses_info.append({
-                    'file_path': analysis.file_path,
-                    'has_text_changes': analysis.has_text_changes,
-                    'text_lines': analysis.text_lines,
-                    'grammar_issues': analysis.grammar_issues,
-                    'analysis_details': analysis.analysis_details
-                })
-            
-            # 构造prompt字符串
-            prompt_args = {
-                "file_analyses": json.dumps(file_analyses_info, ensure_ascii=False, indent=2),
-                "total_files": total_files,
-                "text_changed_files": text_changed_files
-            }
-            
-            # 使用线程池执行器为PR分析添加超时控制
-            timeout_executor = None
-            try:
-                timeout_executor = ThreadPoolExecutor(max_workers=1)
-                invoke_args = {
-                    "file_analyses": json.dumps(file_analyses_info, ensure_ascii=False, indent=2),
-                    "total_files": total_files,
-                    "text_changed_files": text_changed_files
-                }
-                result = self.chain.invoke(invoke_args)
-                # 验证结果有效性
-                if isinstance(result, (dict, PRAnalysisResult)):
-                    # 如果是dict（来自JsonOutputParser），转换为PRAnalysisResult
-                    if isinstance(result, dict):
-                        result = PRAnalysisResult(**result)
-                    return result
-                else:
-                    logger.error(f"生成PR分析时返回类型错误: {type(result)}")
-                    return None
-            except Exception as e:
-                logger.error(f"生成PR分析时发生错误: {e}")
-                return None
-        except Exception as e:
-            logger.error(f"生成PR分析时发生错误: {e}")
-            return None
-
-# ==================== 主处理类 ====================
-
-class PRCommentAnalyzer:
-    """PR评论分析器"""
-    
-    def __init__(self, siliconflow_api_key: str = "", siliconflow_api_base: str = "https://api.siliconflow.cn/v1", model_name: str = None, base_url: str = None):
-        if model_name is None:
-            model_name = MODEL_NAME
-        
-        # 设置siliconflow API配置
-        global SILICONFLOW_API_KEY, SILICONFLOW_API_BASE
-        if siliconflow_api_key:
-            SILICONFLOW_API_KEY = siliconflow_api_key
-        if siliconflow_api_base:
-            SILICONFLOW_API_BASE = siliconflow_api_base
-            
-        self.llm = LLMFactory.create_chat_llm(model_name)
-        self.file_analysis_chain = FileTextAnalysisChain(self.llm)
-        self.pr_analysis_chain = PRAnalysisChain(self.llm)
-    
-    def cleanup(self):
-        """清理资源，确保程序能正确退出"""
-        try:
-            # 清理 LLM 连接
-            if hasattr(self.llm, 'client') and hasattr(self.llm.client, 'close'):
-                self.llm.client.close()
-            elif hasattr(self.llm, '_client') and hasattr(self.llm._client, 'close'):
-                self.llm._client.close()
-            
-            # 如果是 ChatOpenAI，尝试关闭底层的 HTTP 客户端
-            if BACKEND_TYPE == "siliconflow" and hasattr(self.llm, 'client'):
-                try:
-                    # 强制关闭 httpx 客户端
-                    if hasattr(self.llm.client, '_client'):
-                        self.llm.client._client.close()
-                except Exception as e:
-                    logger.debug(f"关闭 HTTP 客户端时发生错误: {e}")
-            
-            logger.info("资源清理完成")
-        except Exception as e:
-            logger.warning(f"清理资源时发生错误: {e}")
-    
-    def analyze_pr_diff(self, diff_content: str, max_workers: int = None) -> CommentResult:
-        if max_workers is None:
-            max_workers = PROCESSING_MAX_WORKERS
-            
-        logger.info("开始解析PR diff...")
-        files = DiffParser.parse_git_diff(diff_content)
-        logger.info(f"解析到 {len(files)} 个文件的改动")
-        if not files:
-            logger.warning("未找到任何文件改动")
-            return CommentResult(
-                pr_analysis=None,
-                file_analyses=[],
-                processed_files=0,
-                total_files=0,
-                error='未找到任何文件改动'
-            )
-        
-        logger.info("开始并行处理各个文件的文本分析...")
-        file_analyses = []
-        # 使用更健壮的并发处理机制
-        executor = None
-        try:
-            executor = ThreadPoolExecutor(max_workers=max_workers)
-            future_to_file = {
-                executor.submit(self.file_analysis_chain.analyze, file_info): file_info.file_path
-                for file_info in files
-            }
-            
-            # 设置更长的整体超时时间，避免与单个文件超时冲突
-            overall_timeout = SINGLE_FILE_TIMEOUT * len(files) + 600  # 给每个文件的时间 + 额外缓冲
-            
-            completed_count = 0
-            total_count = len(future_to_file)
-            
-            try:
-                for future in as_completed(future_to_file, timeout=overall_timeout):
-                    file_path = future_to_file[future]
-                    completed_count += 1
-                    try:
-                        analysis = future.result(timeout=5)  # 短暂缓冲时间，因为任务已经完成
-                        if analysis:
-                            file_analyses.append(analysis)
-                            logger.info(f"完成文件 {file_path} 的文本分析 ({completed_count}/{total_count})")
-                        else:
-                            logger.warning(f"文件 {file_path} 的文本分析失败 ({completed_count}/{total_count})")
-                    except (FutureTimeoutError, TimeoutError) as e:
-                        logger.error(f"文件 {file_path} 的文本分析获取超时，跳过该文件: {type(e).__name__} ({completed_count}/{total_count})")
-                        try:
-                            future.cancel()
-                        except Exception as cancel_e:
-                            logger.warning(f"取消任务时发生错误: {cancel_e}")
-                    except Exception as e:
-                        logger.error(f"处理文件 {file_path} 时发生异常: {e} ({completed_count}/{total_count})")
-            except (FutureTimeoutError, TimeoutError) as overall_e:
-                logger.error(f"整体处理超时({overall_timeout}秒)，已完成{completed_count}/{total_count}个文件")
-                # 取消所有未完成的任务
-                for future in future_to_file:
-                    if not future.done():
-                        try:
-                            future.cancel()
-                        except Exception as cancel_e:
-                            logger.warning(f"取消未完成任务时发生错误: {cancel_e}")
-        finally:
-            # 确保线程池被正确关闭
-            if executor:
-                try:
-                    executor.shutdown(wait=True)
-                except Exception as shutdown_e:
-                    logger.warning(f"关闭主线程池时发生错误: {shutdown_e}")
-        
-        logger.info(f"成功生成 {len(file_analyses)} 个文件的文本分析")
-        logger.info("开始生成PR整体分析...")
-        pr_analysis = None
-        if file_analyses:
-            logger.info(f"基于 {len(file_analyses)} 个成功处理的文件生成PR分析...")
-            try:
-                pr_analysis = self.pr_analysis_chain.generate(file_analyses)
-                if pr_analysis:
-                    logger.info("PR整体分析生成成功")
-                else:
-                    logger.warning("PR整体分析生成失败")
-            except Exception as e:
-                logger.error(f"生成PR分析时发生未预期的错误: {e}")
-        else:
-            logger.warning("没有成功处理的文件，跳过PR分析生成")
-        
-        return CommentResult(
-            pr_analysis=pr_analysis,
-            file_analyses=file_analyses,
-            processed_files=len(file_analyses),
-            total_files=len(files)
-        )
-
-# ==================== 主函数 ====================
-
-def get_comment_analysis(sample_diff, siliconflow_api_key="", siliconflow_api_base="https://api.siliconflow.cn/v1"):
-
-    analyzer = PRCommentAnalyzer(siliconflow_api_key, siliconflow_api_base)
-    result = None
-    try:
-        result = analyzer.analyze_pr_diff(sample_diff)
-    finally:
-        # 确保在函数退出前清理资源
-        analyzer.cleanup()
-
-    if not result:
-        print("处理失败，无法获取结果")
-        return None
-    
-    if result.error:
-        print(f"错误: {result.error}")
-    
-    print("\n=== 单文件文本分析 ===")
-    problematic_files = [f for f in result.file_analyses if f.has_text_changes and f.grammar_issues]
-    if problematic_files:
-        for analysis in problematic_files:
-            print(f"文件: {analysis.file_path}")
-            print(f"涉及文本变更: {analysis.has_text_changes}")
-            print(f"文本变更行: {analysis.text_lines}")
-            print(f"语法问题: {analysis.grammar_issues}")
-            print(f"分析详情: {analysis.analysis_details}")
-            print("-" * 50)
-    else:
-        print("所有文件语法正确，无需关注")
-    
-    print("=== 处理结果 ===")
-    print(f"总文件数: {result.total_files}")
-    print(f"成功处理文件数: {result.processed_files}")
-    
-    if result.pr_analysis:
-        print("\n=== PR整体分析 ===")
-        pr = result.pr_analysis
-        print(f"涉及文本变更: {pr.has_text_changes}")
-        print(f"文本变更类型: {pr.text_change_type}")
-        print(f"存在语法错误: {pr.has_grammar_errors}")
-        print(f"语法错误列表: {pr.grammar_errors}")
-        print(f"详细分析: {pr.detailed_analysis}")
-        print(f"改进建议: {pr.suggestions}")
-            
-    
-    return result
-
-if __name__ == "__main__":
-    # 微服务接口逻辑： 传递进来的就是 sample_diff 的内容
-    sample_diff = sys.argv[1]
-    result = get_comment_analysis(sample_diff) 
-    print(result)
diff --git a/ci/tools/comment/create_comment.py b/ci/tools/comment/create_comment.py
deleted file mode 100644
index 481c7a2f5..000000000
--- a/ci/tools/comment/create_comment.py
+++ /dev/null
@@ -1,372 +0,0 @@
-import argparse
-import json
-import logging
-import re
-import sys
-from dataclasses import dataclass, field
-from difflib import SequenceMatcher
-from typing import TypeVar, Generic
-from comment_agent import get_comment_analysis
-
-import requests
-import yaml
-
-logging.basicConfig(level=logging.INFO, stream=sys.stdout,
-                    format='%(asctime)s [%(levelname)s] %(module)s.%(lineno)d %(name)s:\t%(message)s')
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class Org:
-    org_name: str
-    comment_target_owner: str
-    comment_target_repo: str
-    auto_comment_enabled: bool = field(default=True)
-    confidence_threshold: float = field(default=0.7)
-    text_check_enabled: bool = field(default=True)
-    grammar_check_enabled: bool = field(default=True)
-
-
-@dataclass
-class CommentAgentConfig:
-    backend: dict = field(default_factory=dict)
-    model: dict = field(default_factory=dict)
-    processing: dict = field(default_factory=dict)
-    logging: dict = field(default_factory=dict)
-
-
-@dataclass
-class Config:
-    orgs: list[dict | Org]
-    comment_agent: dict | CommentAgentConfig = field(default_factory=dict)
-
-    def __post_init__(self):
-        tmp_orgs: list[Org] = []
-        for item in self.orgs:
-            tmp_orgs.append(Org(**item))
-        self.orgs = tmp_orgs
-        
-        if isinstance(self.comment_agent, dict) and self.comment_agent:
-            self.comment_agent = CommentAgentConfig(**self.comment_agent)
-
-
-@dataclass
-class ReqArgs:
-    method: str
-    url: str
-    headers: dict[str, str]
-    params: dict[str, str] | None = field(default=None)
-    data: str | None = field(default=None)
-    timeout: int = field(default=180)
-
-
-T = TypeVar('T')
-content_type_is_text = "text/plain"
-content_type_is_json_dict = {}
-content_type_is_json_list = []
-
-
-def send_request(args: ReqArgs, t: Generic[T]) -> T:
-    error_count = 0
-    while error_count < 3:
-        try:
-            resp = requests.request(**args.__dict__)
-            resp.raise_for_status()
-            if type(t) is dict or type(t) is list:
-                res_data: dict | list = resp.json()
-            else:
-                res_data: str = resp.text
-        except requests.exceptions.RequestException as e:
-            if e.response.status_code in [400, 401, 403, 404, 405]:
-                logger.error("[ERROR] client error {}".format(e))
-                break
-            logger.error("[ERROR] server error: {}".format(e))
-            error_count += 1
-        else:
-            logger.info("[OK] [{}], {}".format(args.method, args.url))
-            return res_data
-    return None
-
-
-class GiteeClient:
-    """
-    Gitee OpenAPI 客户端
-    """
-    headers = {
-        "Content-Type": "application/json",
-        "Accept": "application/json",
-    }
-
-    def __init__(self, developer_token: str):
-        """
-        构造函数
-        :param developer_token: Gitee v5 token
-        """
-        self.headers["Authorization"] = "Bearer {}".format(developer_token)
-
-    def get_diff_content(self, owner: str, repo: str, number: int) -> str | None:
-        req_url = "https://gitee.com/{}/{}/pulls/{}.diff".format(owner, repo, number)
-        req_args = ReqArgs(method="GET", url=req_url, headers=self.headers)
-        result: str | None = send_request(req_args, "")
-        if result is None:
-            logger.error("can not get diff file from PR: {}".format(req_url))
-        return result
-
-    def add_pr_comment(self, owner, repo, number, body):
-        req_url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/comments'.format(owner, repo, number)
-        req_body = {
-            "body": "### 🤖 AI审查反馈 \n {} ".format(body)
-        }
-        req_args = ReqArgs(method="POST", url=req_url, headers=self.headers, data=json.dumps(req_body))
-        result: dict | None = send_request(req_args, {})
-        return result is not None
-
-
-
-def get_diff_file_list(diff_content: str) -> list[str]:
-    diff_files_list = []
-    diff_files = [x.split(' ')[0][2:] for x in diff_content.split('diff --git ')[1:]]
-    for diff_file in diff_files:
-        if diff_file.endswith('\"'):
-            d = re.compile(r'/[\d\s\S]+')
-            diff_file = d.findall(diff_file)
-            diff_file = diff_file[0].replace('/', '', 1).replace('\"', '')
-            diff_files_list.append(diff_file)
-        else:
-            diff_files_list.append(diff_file)
-    return diff_files_list
-
-
-def generate_comment_content(comment_result, pr_url: str, analysis_status: str = "success") -> str:
-    """根据分析结果生成评论内容"""
-    comment_body = ""
-    
-    # 根据分析状态添加不同的状态标识
-    if analysis_status == "error":
-        comment_body += "### 分析状态：处理失败\n"
-        comment_body += "**分析过程中发生错误，无法生成详细反馈。请手动审查文本变更。**\n\n"
-    elif analysis_status == "low_confidence":
-        comment_body += "### 分析状态：置信度较低\n"
-        comment_body += "**当前分析置信度较低，结果仅供参考。建议进行人工审查。**\n\n"
-    elif analysis_status == "no_text_changes":
-        comment_body += "### 分析状态：无文本问题\n"
-        comment_body += "**AI分析结果显示本次PR未发现明显的文本变更或语法问题。无需改动。**\n\n"
-    elif analysis_status == "no_grammar_errors":
-        comment_body += "### 分析状态：文本质量良好\n"
-        comment_body += "**检测到文本变更，但未发现明显的语法错误，文本质量良好。无需改动。**\n\n"
-    else:  # success with issues
-        comment_body += "### 分析状态：发现需要关注的问题\n"
-        comment_body += "**AI分析发现了一些文本变更或语法问题，请查看下方详细信息。**\n\n"
-    
-    # 如果有分析结果，添加详细信息
-    if comment_result and not comment_result.error:
-        # 如果有PR整体分析
-        if comment_result.pr_analysis:
-            pr_analysis = comment_result.pr_analysis
-            
-            # 添加整体评估摘要
-            comment_body += "## 整体评估\n"
-            comment_body += f"- 涉及文本变更: {'是' if pr_analysis.has_text_changes else '否'}\n"
-            comment_body += f"- 文本变更类型: {pr_analysis.text_change_type}\n"
-            comment_body += f"- 存在语法错误: {'是' if pr_analysis.has_grammar_errors else '否'}\n\n"
-            
-            # 添加详细分析
-            if pr_analysis.detailed_analysis:
-                comment_body += "## 详细分析\n"
-                comment_body += f"{pr_analysis.detailed_analysis}\n\n"
-            
-            # 添加语法错误列表
-            if pr_analysis.grammar_errors:
-                comment_body += "## 语法问题\n"
-                for i, error in enumerate(pr_analysis.grammar_errors, 1):
-                    comment_body += f"{i}. {error}\n"
-                comment_body += "\n"
-            
-            # 添加改进建议
-            if pr_analysis.suggestions:
-                comment_body += "## 改进建议\n"
-                for i, suggestion in enumerate(pr_analysis.suggestions, 1):
-                    comment_body += f"{i}. {suggestion}\n"
-                comment_body += "\n"
-        
-        # 添加文件级别的分析结果
-        if comment_result.file_analyses:
-            # comment_body += "## 文件分析\n"
-            
-            # 统计有问题的文件
-            files_with_issues = [f for f in comment_result.file_analyses if f.has_text_changes or f.grammar_issues]
-            files_without_issues = [f for f in comment_result.file_analyses if not f.has_text_changes and not f.grammar_issues]
-            
-            if files_with_issues:
-                comment_body += f"### 需要关注的文件 ({len(files_with_issues)} 个)\n"
-                for i, file_analysis in enumerate(files_with_issues, 1):
-                    comment_body += f"\n**{i}. {file_analysis.file_path}**\n"
-                    
-                    if file_analysis.has_text_changes:
-                        comment_body += f"- 文本变更: 检测到英文文本改动\n"
-                        if file_analysis.text_lines:
-                            comment_body += f"- 涉及行数: {len(file_analysis.text_lines)} 行\n"
-                    
-                    if file_analysis.grammar_issues:
-                        comment_body += f"- 语法问题: 发现 {len(file_analysis.grammar_issues)} 个问题\n"
-                        for j, issue in enumerate(file_analysis.grammar_issues, 1):
-                            comment_body += f"  {j}. {issue}\n"
-                    
-                    if file_analysis.analysis_details:
-                        comment_body += f"- 分析详情: {file_analysis.analysis_details}\n"
-            
-            if files_without_issues:
-                comment_body += f"\n### 无问题的文件 ({len(files_without_issues)} 个)\n"
-                for file_analysis in files_without_issues:
-                    comment_body += f"- {file_analysis.file_path}\n"
-            
-            # 添加处理统计
-            # comment_body += f"\n### 处理统计\n"
-            # comment_body += f"- 总文件数: {comment_result.total_files}\n"
-            # comment_body += f"- 成功分析: {comment_result.processed_files}\n"
-            # comment_body += f"- 有文本变更: {len([f for f in comment_result.file_analyses if f.has_text_changes])}\n"
-            # comment_body += f"- 有语法问题: {len([f for f in comment_result.file_analyses if f.grammar_issues])}\n"
-
-    # 添加免责声明
-    comment_body += "## 免责声明\n"
-    comment_body += "本评论内容基于AI Agent技术自动生成，仅供参考。请开发者根据实际情况进行判断和修改。\n"
-    
-    return comment_body
-
-
-class Args:
-    gitee_token: str
-    pr_owner: str
-    pr_repo: str
-    pr_number: int
-    siliconflow_api_key: str = ""
-    siliconflow_api_base: str = "https://api.siliconflow.cn/v1"
-
-    def validate(self):
-        valid = self.gitee_token and self.pr_owner and self.pr_repo and self.pr_number
-        if not valid:
-            logger.error("Invalid Command Arguments")
-            sys.exit(1)
-
-
-def load_config_yaml(yaml_path):
-    with open(yaml_path, "r", encoding="utf-8") as config_in:
-        data = yaml.safe_load(config_in)
-
-    if data is None:
-        return None
-    return Config(**data)
-
-
-def create_comment_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, pr_owner: str, pr_repo: str,
-                                              pr_number: int, siliconflow_api_key: str, siliconflow_api_base: str):
-    pr_html_url = "https://gitee.com/{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
-    
-    for org_item in conf.orgs:
-        if org_item.org_name != pr_owner:
-            continue
-        
-        if not org_item.auto_comment_enabled:
-            logger.info(f"组织 {org_item.org_name} 未启用自动评论功能")
-            continue
-        
-        # 移除文件筛选逻辑，对所有PR平等处理
-        logger.info("开始对PR进行全面文本分析（不限制文件类型和路径）")
-        
-        # 获取diff内容
-        diff_content = cli.get_diff_content(pr_owner, pr_repo, pr_number)
-        if diff_content is None:
-            logger.error("无法获取PR的diff内容")
-            sys.exit(1)
-        
-        # 调用AI Agent进行分析
-        logger.info("开始进行AI代码审查分析...")
-        comment_result = get_comment_analysis(diff_content, siliconflow_api_key, siliconflow_api_base)
-        
-        if not comment_result:
-            logger.error("AI分析失败，将发布错误状态评论")
-            # 创建一个错误结果对象，确保能发布评论
-            from comment_agent import CommentResult
-            comment_result = CommentResult(
-                pr_analysis=None,
-                file_analyses=[],
-                processed_files=0,
-                total_files=0,
-                error="AI分析过程失败"
-            )
-        
-        # 确定分析状态和评论内容
-        analysis_status = "success"
-        
-        if comment_result.error:
-            analysis_status = "error"
-            logger.info("AI分析过程出错，将发布错误状态评论")
-        elif comment_result.pr_analysis:
-            pr_analysis = comment_result.pr_analysis
-            
-            # 检查是否有文本变更或语法错误
-            if pr_analysis.has_text_changes and pr_analysis.has_grammar_errors:
-                analysis_status = "success"  # 有问题，正常处理
-                logger.info("检测到文本变更和语法错误，将发布问题报告评论")
-            elif pr_analysis.has_text_changes and not pr_analysis.has_grammar_errors:
-                analysis_status = "no_grammar_errors"
-                logger.info("检测到文本变更但无语法错误，将发布文本质量良好的评论")
-            elif not pr_analysis.has_text_changes:
-                analysis_status = "no_text_changes"
-                logger.info("未检测到文本变更，将发布无文本问题的评论")
-            else:
-                analysis_status = "success"
-                logger.info("检测到需要关注的问题，将发布详细分析评论")
-        else:
-            # 如果没有整体分析，检查是否有文件级别的问题
-            files_with_issues = [f for f in comment_result.file_analyses if f.has_text_changes or f.grammar_issues]
-            if files_with_issues:
-                analysis_status = "success"
-                logger.info(f"检测到 {len(files_with_issues)} 个文件有文本问题，将发布文件级别问题评论")
-            else:
-                analysis_status = "no_text_changes"
-                logger.info("未检测到文件级别问题，将发布无问题评论")
-        
-        # 总是生成和发布评论
-        comment_content = generate_comment_content(
-            comment_result, 
-            pr_html_url, 
-            analysis_status
-        )
-        
-        # 发布评论
-        success = cli.add_pr_comment(pr_owner, pr_repo, pr_number, comment_content)
-        if success:
-            logger.info(f"AI代码审查评论发布成功 - 状态: {analysis_status}")
-        else:
-            logger.error(f"AI代码审查评论发布失败 - 状态: {analysis_status}")
-
-
-def main():
-    parser = argparse.ArgumentParser(description='Create AI-powered PR comment based on text analysis')
-    parser.add_argument('--gitee_token', type=str, required=True, help='gitee v5 api token')
-    parser.add_argument('--pr_owner', type=str, required=True, help='the PR of owner')
-    parser.add_argument('--pr_repo', type=str, required=True, help='the PR of repo')
-    parser.add_argument('--pr_number', type=str, required=True, help='the PR number')
-    parser.add_argument('--siliconflow_api_key', type=str, default="", help='the API key of siliconflow')
-    parser.add_argument('--siliconflow_api_base', type=str, default="https://api.siliconflow.cn/v1", help='the base URL of siliconflow')
-    args = Args()
-    parser.parse_args(args=sys.argv[1:], namespace=args)
-    args.validate()
-
-    exec_py = sys.argv[0]
-    config_yaml_path = exec_py[:-2] + 'yaml'
-    conf = load_config_yaml(config_yaml_path)
-
-    cli = GiteeClient(args.gitee_token)
-
-    pr_owner = args.pr_owner
-    pr_repo = args.pr_repo
-    pr_number = args.pr_number
-    siliconflow_api_key = args.siliconflow_api_key
-    siliconflow_api_base = args.siliconflow_api_base
-    create_comment_based_on_pr_diff_and_config(conf, cli, pr_owner, pr_repo, pr_number, siliconflow_api_key, siliconflow_api_base)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/ci/tools/comment/create_comment.yaml b/ci/tools/comment/create_comment.yaml
deleted file mode 100644
index 32ac269ab..000000000
--- a/ci/tools/comment/create_comment.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-# Comment Agent Configuration
-comment_agent:
-  # Backend Configuration
-  backend:
-    type: "siliconflow"  # Options: "ollama" or "siliconflow"
-    # siliconflow配置现在通过命令行参数传入
-
-  # Model Configuration
-  model:
-    name: "Qwen/Qwen3-8B"
-    temperature: 0.1
-    max_retry: 5  # For siliconflow backend
-
-  # Processing Configuration
-  processing:
-    max_workers: 8  # Number of parallel workers for file processing
-    single_file_timeout: 180  # Timeout for single file analysis (seconds)
-    total_comment_timeout: 300  # Timeout for total PR analysis (seconds)
-  
-  # Logging Configuration
-  logging:
-    level: "INFO"
-
-# PR Comment Configuration
-orgs:
-  - org_name: openeuler
-    comment_target_owner: openeuler
-    comment_target_repo: docs
-    auto_comment_enabled: true
-    text_check_enabled: true   # 是否启用文本变更检测
-    grammar_check_enabled: true  # 是否启用语法错误检测
-
-  - org_name: src-openeuler
-    comment_target_owner: openeuler
-    comment_target_repo: globalization
-    auto_comment_enabled: true
-    text_check_enabled: true
-    grammar_check_enabled: true
\ No newline at end of file
-- 
Gitee


From e609ffd74583e77b1920856f2d88aed58774cf07 Mon Sep 17 00:00:00 2001
From: petermouse666 <708975811@qq.com>
Date: Mon, 22 Sep 2025 14:50:45 +0800
Subject: [PATCH 4/8] update for merge

---
 .../new_create_translation_issue.py           | 742 ++++++++----------
 .../new_create_translation_issue.yaml         |  69 +-
 .../new_create_translation_issue_AI.py        | 402 ++++++++++
 .../new_create_translation_issue_AI.yaml      |  49 ++
 4 files changed, 812 insertions(+), 450 deletions(-)
 mode change 100755 => 100644 ci/tools/translation/new_create_translation_issue.py
 mode change 100755 => 100644 ci/tools/translation/new_create_translation_issue.yaml
 create mode 100755 ci/tools/translation/new_create_translation_issue_AI.py
 create mode 100755 ci/tools/translation/new_create_translation_issue_AI.yaml

diff --git a/ci/tools/translation/new_create_translation_issue.py b/ci/tools/translation/new_create_translation_issue.py
old mode 100755
new mode 100644
index 0a61c7136..f5e3f8f8a
--- a/ci/tools/translation/new_create_translation_issue.py
+++ b/ci/tools/translation/new_create_translation_issue.py
@@ -1,402 +1,340 @@
-import argparse
-import json
-import logging
-import re
-import sys
-from dataclasses import dataclass, field
-from difflib import SequenceMatcher
-from typing import TypeVar, Generic
-from translation_agent import get_agent_summary
-
-import requests
-import yaml
-
-logging.basicConfig(level=logging.INFO, stream=sys.stdout,
-                    format='%(asctime)s [%(levelname)s] %(module)s.%(lineno)d %(name)s:\t%(message)s')
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class IssueTrigger:
-    trigger_pr_path: str
-    issue_title: str
-    issue_assignee: str
-    file_extension: list[str] = field(default_factory=list)
-
-
-@dataclass
-class Org:
-    org_name: str
-    issue_of_owner: str
-    issue_of_repo: str
-    auto_create_issue: bool
-    issue_triggers: list[dict | IssueTrigger] = field(default_factory=list)
-    change_content_exclude: list[str] = field(default_factory=list)
-
-    def __post_init__(self):
-        tmp_issue_triggers: list[IssueTrigger] = []
-        for item in self.issue_triggers:
-            tmp_issue_triggers.append(IssueTrigger(**item))
-        self.issue_triggers = tmp_issue_triggers
-
-
-@dataclass
-class TranslationAgentConfig:
-    backend: dict = field(default_factory=dict)
-    model: dict = field(default_factory=dict)
-    processing: dict = field(default_factory=dict)
-    logging: dict = field(default_factory=dict)
-
-
-@dataclass
-class Config:
-    orgs: list[dict | Org]
-    translation_agent: dict | TranslationAgentConfig = field(default_factory=dict)
-
-    def __post_init__(self):
-        tmp_orgs: list[Org] = []
-        for item in self.orgs:
-            tmp_orgs.append(Org(**item))
-        self.orgs = tmp_orgs
-        
-        if isinstance(self.translation_agent, dict) and self.translation_agent:
-            self.translation_agent = TranslationAgentConfig(**self.translation_agent)
-
-
-@dataclass
-class ReqArgs:
-    method: str
-    url: str
-    headers: dict[str, str]
-    params: dict[str, str] | None = field(default=None)
-    data: str | None = field(default=None)
-    timeout: int = field(default=180)
-
-
-T = TypeVar('T')
-content_type_is_text = "text/plain"
-content_type_is_json_dict = {}
-content_type_is_json_list = []
-
-
-def send_request(args: ReqArgs, t: Generic[T]) -> T:
-    error_count = 0
-    while error_count < 3:
-        try:
-            resp = requests.request(**args.__dict__)
-            resp.raise_for_status()
-            if type(t) is dict or type(t) is list:
-                res_data: dict | list = resp.json()
-            else:
-                res_data: str = resp.text
-        except requests.exceptions.RequestException as e:
-            if e.response.status_code in [400, 401, 403, 404, 405]:
-                logger.error("[ERROR] client error {}".format(e))
-                break
-            logger.error("[ERROR] server error: {}".format(e))
-            error_count += 1
-        else:
-            logger.info("[OK] [{}], {}".format(args.method, args.url))
-            return res_data
-    return None
-
-
-class GiteeClient:
-    """
-    Gitee OpenAPI 客户端
-    """
-    headers = {
-        "Content-Type": "application/json",
-        "Accept": "application/json",
-    }
-
-    def __init__(self, developer_token: str):
-        """
-        构造函数
-        :param developer_token: Gitee v5 token
-        """
-        self.headers["Authorization"] = "Bearer {}".format(developer_token)
-
-    def get_diff_content(self, owner: str, repo: str, number: int) -> str | None:
-        req_url = "https://gitee.com/{}/{}/pulls/{}.diff".format(owner, repo, number)
-        req_args = ReqArgs(method="GET", url=req_url, headers=self.headers)
-        result: str | None = send_request(req_args, "")
-        if result is None:
-            logger.error("can not get diff file from PR: {}".format(req_url))
-        return result
-
-    def check_issue_exists(self, owner: str, repo: str, issue_titles: list[str]) -> tuple[list[str], list[str]]:
-        req_url = "https://gitee.com/api/v5/repos/{}/{}/issues".format(owner, repo)
-        page = 1
-        existed_issues = []
-        while page <= 200:
-            query = {
-                "per_page": 100,
-                "page": page,
-                "sort": "created",
-                "direction": "desc",
-            }
-            req_args = ReqArgs(method="GET", url=req_url, params=query, headers=self.headers)
-            result: list | None = send_request(req_args, [])
-            if result is None:
-                break
-            page += 1
-            for item in result:
-                if not issue_titles:
-                    return [], existed_issues
-                if issue_titles and item.get('title') in issue_titles:
-                    issue_titles.remove(item.get('title'))
-                    existed_issues.append(item.get('html_url'))
-            if len(result) < 100:
-                break
-        return issue_titles, existed_issues
-
-    def create_issue(self, owner, repo, title, assignee, body):
-        req_url = "https://gitee.com/api/v5/repos/{}/issues".format(owner)
-        req_body = {
-            "repo": repo,
-            "title": title,
-            "issue_type": "翻译",
-            "body": body,
-            "assignee": assignee,
-            "push_events": False,
-            "tag_push_events": False,
-            "issues_events": False,
-        }
-        req_args = ReqArgs(method="POST", url=req_url, headers=self.headers, data=json.dumps(req_body))
-        result: dict | None = send_request(req_args, {})
-        return result is None
-
-    def add_pr_comment(self, owner, repo, number, body):
-        req_url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/comments'.format(owner, repo, number)
-        req_body = {
-            "body": "### Translation Feedback \n {} ".format(body)
-        }
-        req_args = ReqArgs(method="POST", url=req_url, headers=self.headers, data=json.dumps(req_body))
-        result: dict | None = send_request(req_args, {})
-        return result is None
-
-    def check_only_marks_changed(self, owner, repo, number, check_list):
-        diff_content = self.get_diff_content(owner, repo, number)
-        deleted_strs, inserted_strs = get_diff_content_list(diff_content)
-        if is_only_marks_changed(deleted_strs, inserted_strs, check_list):
-            logger.warning('Only marks changed, skip the following steps')
-            sys.exit(1)
-        logger.info('Not just only marks changed, continue creating issue')
-
-
-def get_diff_file_list(diff_content: str) -> list[str]:
-    diff_files_list = []
-    diff_files = [x.split(' ')[0][2:] for x in diff_content.split('diff --git ')[1:]]
-    for diff_file in diff_files:
-        if diff_file.endswith('\"'):
-            d = re.compile(r'/[\d\s\S]+')
-            diff_file = d.findall(diff_file)
-            diff_file = diff_file[0].replace('/', '', 1).replace('\"', '')
-            diff_files_list.append(diff_file)
-        else:
-            diff_files_list.append(diff_file)
-    return diff_files_list
-
-
-def get_diff_content_list(diff_content: str) -> tuple[str, str]:
-    pieces = diff_content.split('diff --git')
-    deleted_strs = ''
-    inserted_strs = ''
-    for piece in pieces:
-        start = False
-        for line in piece.splitlines():
-            if line.startswith('@@'):
-                start = True
-                continue
-            if not start:
-                continue
-            if line.startswith('-'):
-                if len(line) == 1:
-                    deleted_strs += '\n'
-                else:
-                    deleted_strs += line[1:]
-            elif line.startswith('+'):
-                if len(line) == 1:
-                    inserted_strs += '\n'
-                else:
-                    inserted_strs += line[1:]
-    return deleted_strs, inserted_strs
-
-
-def is_only_marks_changed(a, b, check_list):
-    s = SequenceMatcher(None, a, b)
-    for tag, i1, i2, j1, j2 in s.get_opcodes():
-        if tag == 'equal':
-            continue
-        elif tag in ['delete', 'insert']:
-            return False
-        elif tag == 'replace':
-            deleted = ''.join(a[i1:i2]).strip()
-            inserted = ''.join(b[j1:j2]).strip()
-            if deleted not in check_list or inserted not in check_list:
-                return False
-    return True
-
-
-class Args:
-    gitee_token: str
-    pr_owner: str
-    pr_repo: str
-    pr_number: int
-    siliconflow_api_key: str = ""
-    siliconflow_api_base: str = "https://api.siliconflow.cn/v1"
-
-    def validate(self):
-        valid = self.gitee_token and self.pr_owner and self.pr_repo and self.pr_number
-        if not valid:
-            logger.error("Invalid Command Arguments")
-            sys.exit(1)
-
-
-def load_config_yaml(yaml_path):
-    with open(yaml_path, "r", encoding="utf-8") as config_in:
-        data = yaml.safe_load(config_in)
-
-    if data is None:
-        return None
-    return Config(**data)
-
-
-def create_issue_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, pr_owner: str, pr_repo: str,
-                                             pr_number: int, siliconflow_api_key: str, siliconflow_api_base: str):
-    pr__html_url = "https://gitee.com/{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
-    for org_item in conf.orgs:
-        issue_title_pr_mark = "{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
-        if org_item.org_name != pr_owner:
-            continue
-        # 旧标点符号判断逻辑，已弃用
-        # if org_item.auto_create_issue:
-        #     cli.check_only_marks_changed(pr_owner, pr_repo, pr_number, org_item.change_content_exclude)
-        file_count = 0
-        diff_content = cli.get_diff_content(pr_owner, pr_repo, pr_number)
-        if diff_content is None:
-            sys.exit(1)
-        diff_files = get_diff_file_list(diff_content)
-        zh_file = []
-        en_file = []
-        need_create_issue = {}
-        for trigger in org_item.issue_triggers:
-            for diff_file in diff_files:
-                if diff_file.startswith(trigger.trigger_pr_path) and diff_file.split('.')[-1] in trigger.file_extension:
-                    logger.info("file {} has been changed".format(diff_file))
-                    file_count += 1
-                    if "/zh" in trigger.trigger_pr_path:
-                        need_create_issue["zh"] = [trigger.issue_assignee,
-                                                   "{}({}).".format(trigger.issue_title, issue_title_pr_mark)]
-                        zh_file.append(diff_file.replace("zh/", ""))
-                    elif "/en" in trigger.trigger_pr_path:
-                        need_create_issue["en"] = [trigger.issue_assignee,
-                                                   "{}({}).".format(trigger.issue_title, issue_title_pr_mark)]
-                        en_file.append(diff_file.replace("en/", ""))
-                    else:
-                        logger.warning("not a range")
-        changed_same_files = False
-        for z in zh_file:
-            if z in en_file:
-                changed_same_files = True
-            else:
-                changed_same_files = False
-        if file_count == 0:
-            logger.warning(
-                "NOTE: https://gitee.com/{}/files change files out of translate range".format(issue_title_pr_mark))
-            return
-        if changed_same_files:
-            logger.info("changed the same files in en and zh path, no need to create issue")
-            return
-
-        need_create_issue_template = {}
-        need_create_issue_titles = []
-        for issue_item in need_create_issue:
-            need_create_issue_titles.append(need_create_issue[issue_item][1])
-            need_create_issue_template[need_create_issue[issue_item][1]] = need_create_issue[issue_item][0]
-        if need_create_issue_titles:
-
-            need_create_issue_list, existed_issue_list = cli.check_issue_exists(org_item.issue_of_owner,
-                                                                                org_item.issue_of_repo,
-                                                                                need_create_issue_titles)
-
-            if not need_create_issue_list:
-                feedback_comment = "issue has already created, please go to check issue: {}".format(
-                    existed_issue_list)
-                logger.info("Warning: " + feedback_comment)
-                cli.add_pr_comment(pr_owner, pr_repo, pr_number, feedback_comment)
-            for need_create_issue_item in need_create_issue_list:
-                
-                issue_summary = get_agent_summary(diff_content, siliconflow_api_key, siliconflow_api_base)
-                issue_body = ""
-                if issue_summary and not issue_summary.error:
-                    issue_body += f"## 📊 变更统计\n\n"
-                    issue_body += f"- **总文件数**: {issue_summary.total_files}\n"
-                    issue_body += f"- **成功处理文件数**: {issue_summary.processed_files}\n"
-                    if issue_summary.total_files != issue_summary.processed_files:
-                        # 注意人工审查提醒
-                        issue_body += f"- **未处理文件数**: {issue_summary.total_files - issue_summary.processed_files}\n"
-                        issue_body += f"- **提醒：机器人未能及时自动生成所有改动的摘要，请注意人工审查！**\n"
-                    if issue_summary.total_summary:
-                        total = issue_summary.total_summary
-                        issue_body += f"- **总改动行数**: {total.total_lines_changed}\n"
-                        issue_body += f"- **改动类型**: {', '.join(total.change_type_list)}\n\n"
-                        issue_body += f"## 🔍 整体变更摘要\n\n"
-                        issue_body += f"{total.overall_summary}\n\n"
-                        issue_body += f"## ⚠️ 整体潜在影响\n\n"
-                        issue_body += f"{total.overall_potential_impact}\n\n"
-                    if issue_summary.file_summaries:
-                        issue_body += f"## 📝 单文件变更详情\n\n"
-                        for summary in issue_summary.file_summaries:
-                            issue_body += f"### 📁 {summary.file_path}\n\n"
-                            issue_body += f"- **改动类型**: {summary.change_type}\n"
-                            issue_body += f"- **新增行数**: {summary.lines_added}\n"
-                            issue_body += f"- **删除行数**: {summary.lines_deleted}\n"
-                            issue_body += f"- **潜在影响**: {summary.potential_impact}\n"
-                            issue_body += f"- **详细摘要**: {summary.summary}\n\n"
-                            issue_body += "---\n\n"
-                else:
-                    issue_body += f"## ⚠️ 翻译变更检测\n\n"
-                    issue_body += f"检测到需要翻译的文件变更，但无法获取详细摘要信息。\n\n"
-                    issue_body += f"**变更文件数量**: {len(diff_files)}\n"
-                    issue_body += f"**相关PR**: {pr__html_url}\n\n"
-                
-                issue_body += f"## ❗️ 本Issue的摘要内容基于AI Agent技术自动生成，仅供参考，请以实际更改为准。\n\n" 
-                issue_body += f"## 🔗 相关PR链接\n\n"
-                issue_body += f"- {pr__html_url}\n"
-                
-                cli.create_issue(org_item.issue_of_owner, org_item.issue_of_repo, need_create_issue_item,
-                                 need_create_issue_template[need_create_issue_item],
-                                 issue_body)          
-
-
-
-def main():
-    parser = argparse.ArgumentParser(description='Create Gitee Webhook based on the profile')
-    parser.add_argument('--gitee_token', type=str, required=True, help='gitee v5 api token')
-    parser.add_argument('--pr_owner', type=str, required=True, help='the PR of owner')
-    parser.add_argument('--pr_repo', type=str, required=True, help='the PR of repo')
-    parser.add_argument('--pr_number', type=str, required=True, help='the PR number')
-    parser.add_argument('--siliconflow_api_key', type=str, default="", help='the API key of siliconflow')
-    parser.add_argument('--siliconflow_api_base', type=str, default="https://api.siliconflow.cn/v1", help='the base URL of siliconflow')
-    args = Args()
-    parser.parse_args(args=sys.argv[1:], namespace=args)
-    args.validate()
-
-    exec_py = sys.argv[0]
-    config_yaml_path = exec_py[:-2] + 'yaml'
-    conf = load_config_yaml(config_yaml_path)
-
-    cli = GiteeClient(args.gitee_token)
-
-    pr_owner = args.pr_owner
-    pr_repo = args.pr_repo
-    pr_number = args.pr_number
-    siliconflow_api_key = args.siliconflow_api_key
-    siliconflow_api_base = args.siliconflow_api_base
-    create_issue_based_on_pr_diff_and_config(conf, cli, pr_owner, pr_repo, pr_number, siliconflow_api_key, siliconflow_api_base)
-
-
-if __name__ == '__main__':
-    main()
+import argparse
+import json
+import logging
+import re
+import sys
+from dataclasses import dataclass, field
+from difflib import SequenceMatcher
+from typing import TypeVar, Generic
+
+import requests
+import yaml
+
+logging.basicConfig(level=logging.INFO, stream=sys.stdout,
+                    format='%(asctime)s [%(levelname)s] %(module)s.%(lineno)d %(name)s:\t%(message)s')
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class IssueTrigger:
+    trigger_pr_path: str
+    issue_title: str
+    issue_assignee: str
+    file_extension: list[str] = field(default_factory=list)
+
+
+@dataclass
+class Org:
+    org_name: str
+    issue_of_owner: str
+    issue_of_repo: str
+    auto_create_issue: bool
+    issue_triggers: list[dict | IssueTrigger] = field(default_factory=list)
+    change_content_exclude: list[str] = field(default_factory=list)
+
+    def __post_init__(self):
+        tmp_issue_triggers: list[IssueTrigger] = []
+        for item in self.issue_triggers:
+            tmp_issue_triggers.append(IssueTrigger(**item))
+        self.issue_triggers = tmp_issue_triggers
+
+
+@dataclass
+class Config:
+    orgs: list[dict | Org]
+
+    def __post_init__(self):
+        tmp_orgs: list[Org] = []
+        for item in self.orgs:
+            tmp_orgs.append(Org(**item))
+        self.orgs = tmp_orgs
+
+
+@dataclass
+class ReqArgs:
+    method: str
+    url: str
+    headers: dict[str, str]
+    params: dict[str, str] | None = field(default=None)
+    data: str | None = field(default=None)
+    timeout: int = field(default=180)
+
+
+T = TypeVar('T')
+content_type_is_text = "text/plain"
+content_type_is_json_dict = {}
+content_type_is_json_list = []
+
+
+def send_request(args: ReqArgs, t: Generic[T]) -> T:
+    error_count = 0
+    while error_count < 3:
+        try:
+            resp = requests.request(**args.__dict__)
+            resp.raise_for_status()
+            if type(t) is dict or type(t) is list:
+                res_data: dict | list = resp.json()
+            else:
+                res_data: str = resp.text
+        except requests.exceptions.RequestException as e:
+            if e.response.status_code in [400, 401, 403, 404, 405]:
+                logger.error("[ERROR] client error {}".format(e))
+                break
+            logger.error("[ERROR] server error: {}".format(e))
+            error_count += 1
+        else:
+            logger.info("[OK] [{}], {}".format(args.method, args.url))
+            return res_data
+    return None
+
+
+class GiteeClient:
+    """
+    Gitee OpenAPI 客户端
+    """
+    headers = {
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    }
+
+    def __init__(self, developer_token: str):
+        """
+        构造函数
+        :param developer_token: Gitee v5 token
+        """
+        self.headers["Authorization"] = "Bearer {}".format(developer_token)
+
+    def get_diff_content(self, owner: str, repo: str, number: int) -> str | None:
+        req_url = "https://gitee.com/{}/{}/pulls/{}.diff".format(owner, repo, number)
+        req_args = ReqArgs(method="GET", url=req_url, headers=self.headers)
+        result: str | None = send_request(req_args, "")
+        if result is None:
+            logger.error("can not get diff file from PR: {}".format(req_url))
+        return result
+
+    def check_issue_exists(self, owner: str, repo: str, issue_titles: list[str]) -> tuple[list[str], list[str]]:
+        req_url = "https://gitee.com/api/v5/repos/{}/{}/issues".format(owner, repo)
+        page = 1
+        existed_issues = []
+        while page <= 200:
+            query = {
+                "per_page": 100,
+                "page": page,
+                "sort": "created",
+                "direction": "desc",
+            }
+            req_args = ReqArgs(method="GET", url=req_url, params=query, headers=self.headers)
+            result: list | None = send_request(req_args, [])
+            if result is None:
+                break
+            page += 1
+            for item in result:
+                if not issue_titles:
+                    return [], existed_issues
+                if issue_titles and item.get('title') in issue_titles:
+                    issue_titles.remove(item.get('title'))
+                    existed_issues.append(item.get('html_url'))
+            if len(result) < 100:
+                break
+        return issue_titles, existed_issues
+
+    def create_issue(self, owner, repo, title, assignee, body):
+        req_url = "https://gitee.com/api/v5/repos/{}/issues".format(owner)
+        req_body = {
+            "repo": repo,
+            "title": title,
+            "issue_type": "翻译",
+            "body": body,
+            "assignee": assignee,
+            "push_events": False,
+            "tag_push_events": False,
+            "issues_events": False,
+        }
+        req_args = ReqArgs(method="POST", url=req_url, headers=self.headers, data=json.dumps(req_body))
+        result: dict | None = send_request(req_args, {})
+        return result is None
+
+    def add_pr_comment(self, owner, repo, number, body):
+        req_url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/comments'.format(owner, repo, number)
+        req_body = {
+            "body": "### Translation Feedback \n {} ".format(body)
+        }
+        req_args = ReqArgs(method="POST", url=req_url, headers=self.headers, data=json.dumps(req_body))
+        result: dict | None = send_request(req_args, {})
+        return result is None
+
+    def check_only_marks_changed(self, owner, repo, number, check_list):
+        diff_content = self.get_diff_content(owner, repo, number)
+        deleted_strs, inserted_strs = get_diff_content_list(diff_content)
+        if is_only_marks_changed(deleted_strs, inserted_strs, check_list):
+            logger.warning('Only marks changed, skip the following steps')
+            sys.exit(1)
+        logger.info('Not just only marks changed, continue creating issue')
+
+
+def get_diff_file_list(diff_content: str) -> list[str]:
+    diff_files_list = []
+    diff_files = [x.split(' ')[0][2:] for x in diff_content.split('diff --git ')[1:]]
+    for diff_file in diff_files:
+        if diff_file.endswith('\"'):
+            d = re.compile(r'/[\d\s\S]+')
+            diff_file = d.findall(diff_file)
+            diff_file = diff_file[0].replace('/', '', 1).replace('\"', '')
+            diff_files_list.append(diff_file)
+        else:
+            diff_files_list.append(diff_file)
+    return diff_files_list
+
+
+def get_diff_content_list(diff_content: str) -> tuple[str, str]:
+    pieces = diff_content.split('diff --git')
+    deleted_strs = ''
+    inserted_strs = ''
+    for piece in pieces:
+        start = False
+        for line in piece.splitlines():
+            if line.startswith('@@'):
+                start = True
+                continue
+            if not start:
+                continue
+            if line.startswith('-'):
+                if len(line) == 1:
+                    deleted_strs += '\n'
+                else:
+                    deleted_strs += line[1:]
+            elif line.startswith('+'):
+                if len(line) == 1:
+                    inserted_strs += '\n'
+                else:
+                    inserted_strs += line[1:]
+    return deleted_strs, inserted_strs
+
+
+def is_only_marks_changed(a, b, check_list):
+    s = SequenceMatcher(None, a, b)
+    for tag, i1, i2, j1, j2 in s.get_opcodes():
+        if tag == 'equal':
+            continue
+        elif tag in ['delete', 'insert']:
+            return False
+        elif tag == 'replace':
+            deleted = ''.join(a[i1:i2]).strip()
+            inserted = ''.join(b[j1:j2]).strip()
+            if deleted not in check_list or inserted not in check_list:
+                return False
+    return True
+
+
+class Args:
+    gitee_token: str
+    pr_owner: str
+    pr_repo: str
+    pr_number: int
+
+    def validate(self):
+        valid = self.gitee_token and self.pr_owner and self.pr_repo and self.pr_number
+        if not valid:
+            logger.error("Invalid Command Arguments")
+            sys.exit(1)
+
+
+def load_config_yaml(yaml_path):
+    with open(yaml_path, "r", encoding="utf-8") as config_in:
+        data = yaml.safe_load(config_in)
+
+    if data is None:
+        return None
+    return Config(**data)
+
+
+def create_issue_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, pr_owner: str, pr_repo: str,
+                                             pr_number: int):
+    pr__html_url = "https://gitee.com/{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
+    for org_item in conf.orgs:
+        issue_title_pr_mark = "{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
+        if org_item.org_name != pr_owner:
+            continue
+        if org_item.auto_create_issue:
+            cli.check_only_marks_changed(pr_owner, pr_repo, pr_number, org_item.change_content_exclude)
+        file_count = 0
+        diff_content = cli.get_diff_content(pr_owner, pr_repo, pr_number)
+        if diff_content is None:
+            sys.exit(1)
+        diff_files = get_diff_file_list(diff_content)
+        zh_file = []
+        en_file = []
+        need_create_issue = {}
+        for trigger in org_item.issue_triggers:
+            for diff_file in diff_files:
+                if diff_file.startswith(trigger.trigger_pr_path) and diff_file.split('.')[-1] in trigger.file_extension:
+                    logger.info("file {} has been changed".format(diff_file))
+                    file_count += 1
+                    if "/zh" in trigger.trigger_pr_path:
+                        need_create_issue["zh"] = [trigger.issue_assignee,
+                                                   "{}({}).".format(trigger.issue_title, issue_title_pr_mark)]
+                        zh_file.append(diff_file.replace("zh/", ""))
+                    elif "/en" in trigger.trigger_pr_path:
+                        need_create_issue["en"] = [trigger.issue_assignee,
+                                                   "{}({}).".format(trigger.issue_title, issue_title_pr_mark)]
+                        en_file.append(diff_file.replace("en/", ""))
+                    else:
+                        logger.warning("not a range")
+        changed_same_files = False
+        for z in zh_file:
+            if z in en_file:
+                changed_same_files = True
+            else:
+                changed_same_files = False
+        if file_count == 0:
+            logger.warning(
+                "NOTE: https://gitee.com/{}/files change files out of translate range".format(issue_title_pr_mark))
+            return
+        if changed_same_files:
+            logger.info("changed the same files in en and zh path, no need to create issue")
+            return
+
+        need_create_issue_template = {}
+        need_create_issue_titles = []
+        for issue_item in need_create_issue:
+            need_create_issue_titles.append(need_create_issue[issue_item][1])
+            need_create_issue_template[need_create_issue[issue_item][1]] = need_create_issue[issue_item][0]
+        if need_create_issue_titles:
+            need_create_issue_list, existed_issue_list = cli.check_issue_exists(org_item.issue_of_owner,
+                                                                                org_item.issue_of_repo,
+                                                                                need_create_issue_titles)
+            if not need_create_issue_list:
+                feedback_comment = "issue has already created, please go to check issue: {}".format(
+                    existed_issue_list)
+                logger.info("Warning: " + feedback_comment)
+                cli.add_pr_comment(pr_owner, pr_repo, pr_number, feedback_comment)
+            for need_create_issue_item in need_create_issue_list:
+                cli.create_issue(org_item.issue_of_owner, org_item.issue_of_repo, need_create_issue_item,
+                                 need_create_issue_template[need_create_issue_item],
+                                 "### Related PR link \n - {}".format(pr__html_url))
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Create Gitee Webhook based on the profile')
+    parser.add_argument('--gitee_token', type=str, required=True, help='gitee v5 api token')
+    parser.add_argument('--pr_owner', type=str, required=True, help='the PR of owner')
+    parser.add_argument('--pr_repo', type=str, required=True, help='the PR of repo')
+    parser.add_argument('--pr_number', type=str, required=True, help='the PR number')
+    args = Args()
+    parser.parse_args(args=sys.argv[1:], namespace=args)
+    args.validate()
+
+    exec_py = sys.argv[0]
+    config_yaml_path = exec_py[:-2] + 'yaml'
+    conf = load_config_yaml(config_yaml_path)
+
+    cli = GiteeClient(args.gitee_token)
+
+    pr_owner = args.pr_owner
+    pr_repo = args.pr_repo
+    pr_number = args.pr_number
+    create_issue_based_on_pr_diff_and_config(conf, cli, pr_owner, pr_repo, pr_number)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/ci/tools/translation/new_create_translation_issue.yaml b/ci/tools/translation/new_create_translation_issue.yaml
old mode 100755
new mode 100644
index bc48ab7a2..07e6c76d0
--- a/ci/tools/translation/new_create_translation_issue.yaml
+++ b/ci/tools/translation/new_create_translation_issue.yaml
@@ -1,49 +1,22 @@
-# Translation Agent Configuration
-translation_agent:
-  # Backend Configuration
-  backend:
-    type: "siliconflow"  # Options: "ollama" or "siliconflow"
-    # siliconflow配置现在通过命令行参数传入
-    ollama:
-      base_url: "http://localhost:11434"
-  
-  # Model Configuration
-  model:
-    name: "Qwen/Qwen3-32B" # Options: "llama3" "Qwen/Qwen3-8B"  "THUDM/GLM-4-32B-0414" or others
-    temperature: 0.1
-    max_retry: 5  # For siliconflow backend
-    max_retry_ollama: 1  # For ollama backend
-  
-  # Processing Configuration
-  processing:
-    max_workers: 8  # Number of parallel workers for file processing
-    single_file_timeout: 180  # Timeout for single file summary generation (seconds)
-    total_summary_timeout: 300  # Timeout for total summary generation (seconds)
-  
-  # Logging Configuration
-  logging:
-    level: "INFO"
-
-# Issue Creation Configuration
-orgs:
-  - org_name: openeuler
-    issue_of_owner: openeuler
-    issue_of_repo: globalization
-    auto_create_issue: true
-    issue_triggers:
-      - trigger_pr_path: 'docs/zh'
-        issue_title: "[Auto] This is an English translation issue for the PR"
-        issue_assignee: judithsq
-        file_extension: [ doc, md, json ]
-    change_content_exclude: [ ',', '，', '.', '。', ';', '；', ':', '：', '"', '“', '”', '、' ]
-
-  - org_name: src-openeuler
-    issue_of_owner: openeuler
-    issue_of_repo: globalization
-    auto_create_issue: true
-    issue_triggers:
-      - trigger_pr_path: 'docs/zh'
-        issue_title: "[Auto] This is an English translation issue for the PR"
-        issue_assignee: judithsq
-        file_extension: [ doc, md, json ]
+orgs:
+  - org_name: openeuler
+    issue_of_owner: openeuler
+    issue_of_repo: globalization
+    auto_create_issue: true
+    issue_triggers:
+      - trigger_pr_path: 'docs/zh'
+        issue_title: "[Auto] This is an English translation issue for the PR"
+        issue_assignee: judithsq
+        file_extension: [ doc, md, json ]
+    change_content_exclude: [ ',', '，', '.', '。', ';', '；', ':', '：', '"', '“', '”', '、' ]
+
+  - org_name: src-openeuler
+    issue_of_owner: openeuler
+    issue_of_repo: globalization
+    auto_create_issue: true
+    issue_triggers:
+      - trigger_pr_path: 'docs/zh'
+        issue_title: "[Auto] This is an English translation issue for the PR"
+        issue_assignee: judithsq
+        file_extension: [ doc, md, json ]
     change_content_exclude: [ ',', '，', '.', '。', ';', '；', ':', '：', '"', '“', '”', '、' ]
\ No newline at end of file
diff --git a/ci/tools/translation/new_create_translation_issue_AI.py b/ci/tools/translation/new_create_translation_issue_AI.py
new file mode 100755
index 000000000..0a61c7136
--- /dev/null
+++ b/ci/tools/translation/new_create_translation_issue_AI.py
@@ -0,0 +1,402 @@
+import argparse
+import json
+import logging
+import re
+import sys
+from dataclasses import dataclass, field
+from difflib import SequenceMatcher
+from typing import TypeVar, Generic
+from translation_agent import get_agent_summary
+
+import requests
+import yaml
+
+logging.basicConfig(level=logging.INFO, stream=sys.stdout,
+                    format='%(asctime)s [%(levelname)s] %(module)s.%(lineno)d %(name)s:\t%(message)s')
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class IssueTrigger:
+    trigger_pr_path: str
+    issue_title: str
+    issue_assignee: str
+    file_extension: list[str] = field(default_factory=list)
+
+
+@dataclass
+class Org:
+    org_name: str
+    issue_of_owner: str
+    issue_of_repo: str
+    auto_create_issue: bool
+    issue_triggers: list[dict | IssueTrigger] = field(default_factory=list)
+    change_content_exclude: list[str] = field(default_factory=list)
+
+    def __post_init__(self):
+        tmp_issue_triggers: list[IssueTrigger] = []
+        for item in self.issue_triggers:
+            tmp_issue_triggers.append(IssueTrigger(**item))
+        self.issue_triggers = tmp_issue_triggers
+
+
+@dataclass
+class TranslationAgentConfig:
+    backend: dict = field(default_factory=dict)
+    model: dict = field(default_factory=dict)
+    processing: dict = field(default_factory=dict)
+    logging: dict = field(default_factory=dict)
+
+
+@dataclass
+class Config:
+    orgs: list[dict | Org]
+    translation_agent: dict | TranslationAgentConfig = field(default_factory=dict)
+
+    def __post_init__(self):
+        tmp_orgs: list[Org] = []
+        for item in self.orgs:
+            tmp_orgs.append(Org(**item))
+        self.orgs = tmp_orgs
+        
+        if isinstance(self.translation_agent, dict) and self.translation_agent:
+            self.translation_agent = TranslationAgentConfig(**self.translation_agent)
+
+
+@dataclass
+class ReqArgs:
+    method: str
+    url: str
+    headers: dict[str, str]
+    params: dict[str, str] | None = field(default=None)
+    data: str | None = field(default=None)
+    timeout: int = field(default=180)
+
+
+T = TypeVar('T')
+content_type_is_text = "text/plain"
+content_type_is_json_dict = {}
+content_type_is_json_list = []
+
+
+def send_request(args: ReqArgs, t: Generic[T]) -> T:
+    error_count = 0
+    while error_count < 3:
+        try:
+            resp = requests.request(**args.__dict__)
+            resp.raise_for_status()
+            if type(t) is dict or type(t) is list:
+                res_data: dict | list = resp.json()
+            else:
+                res_data: str = resp.text
+        except requests.exceptions.RequestException as e:
+            if e.response.status_code in [400, 401, 403, 404, 405]:
+                logger.error("[ERROR] client error {}".format(e))
+                break
+            logger.error("[ERROR] server error: {}".format(e))
+            error_count += 1
+        else:
+            logger.info("[OK] [{}], {}".format(args.method, args.url))
+            return res_data
+    return None
+
+
+class GiteeClient:
+    """
+    Gitee OpenAPI 客户端
+    """
+    headers = {
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    }
+
+    def __init__(self, developer_token: str):
+        """
+        构造函数
+        :param developer_token: Gitee v5 token
+        """
+        self.headers["Authorization"] = "Bearer {}".format(developer_token)
+
+    def get_diff_content(self, owner: str, repo: str, number: int) -> str | None:
+        req_url = "https://gitee.com/{}/{}/pulls/{}.diff".format(owner, repo, number)
+        req_args = ReqArgs(method="GET", url=req_url, headers=self.headers)
+        result: str | None = send_request(req_args, "")
+        if result is None:
+            logger.error("can not get diff file from PR: {}".format(req_url))
+        return result
+
+    def check_issue_exists(self, owner: str, repo: str, issue_titles: list[str]) -> tuple[list[str], list[str]]:
+        req_url = "https://gitee.com/api/v5/repos/{}/{}/issues".format(owner, repo)
+        page = 1
+        existed_issues = []
+        while page <= 200:
+            query = {
+                "per_page": 100,
+                "page": page,
+                "sort": "created",
+                "direction": "desc",
+            }
+            req_args = ReqArgs(method="GET", url=req_url, params=query, headers=self.headers)
+            result: list | None = send_request(req_args, [])
+            if result is None:
+                break
+            page += 1
+            for item in result:
+                if not issue_titles:
+                    return [], existed_issues
+                if issue_titles and item.get('title') in issue_titles:
+                    issue_titles.remove(item.get('title'))
+                    existed_issues.append(item.get('html_url'))
+            if len(result) < 100:
+                break
+        return issue_titles, existed_issues
+
+    def create_issue(self, owner, repo, title, assignee, body):
+        req_url = "https://gitee.com/api/v5/repos/{}/issues".format(owner)
+        req_body = {
+            "repo": repo,
+            "title": title,
+            "issue_type": "翻译",
+            "body": body,
+            "assignee": assignee,
+            "push_events": False,
+            "tag_push_events": False,
+            "issues_events": False,
+        }
+        req_args = ReqArgs(method="POST", url=req_url, headers=self.headers, data=json.dumps(req_body))
+        result: dict | None = send_request(req_args, {})
+        return result is None
+
+    def add_pr_comment(self, owner, repo, number, body):
+        req_url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/comments'.format(owner, repo, number)
+        req_body = {
+            "body": "### Translation Feedback \n {} ".format(body)
+        }
+        req_args = ReqArgs(method="POST", url=req_url, headers=self.headers, data=json.dumps(req_body))
+        result: dict | None = send_request(req_args, {})
+        return result is None
+
+    def check_only_marks_changed(self, owner, repo, number, check_list):
+        diff_content = self.get_diff_content(owner, repo, number)
+        deleted_strs, inserted_strs = get_diff_content_list(diff_content)
+        if is_only_marks_changed(deleted_strs, inserted_strs, check_list):
+            logger.warning('Only marks changed, skip the following steps')
+            sys.exit(1)
+        logger.info('Not just only marks changed, continue creating issue')
+
+
+def get_diff_file_list(diff_content: str) -> list[str]:
+    diff_files_list = []
+    diff_files = [x.split(' ')[0][2:] for x in diff_content.split('diff --git ')[1:]]
+    for diff_file in diff_files:
+        if diff_file.endswith('\"'):
+            d = re.compile(r'/[\d\s\S]+')
+            diff_file = d.findall(diff_file)
+            diff_file = diff_file[0].replace('/', '', 1).replace('\"', '')
+            diff_files_list.append(diff_file)
+        else:
+            diff_files_list.append(diff_file)
+    return diff_files_list
+
+
+def get_diff_content_list(diff_content: str) -> tuple[str, str]:
+    pieces = diff_content.split('diff --git')
+    deleted_strs = ''
+    inserted_strs = ''
+    for piece in pieces:
+        start = False
+        for line in piece.splitlines():
+            if line.startswith('@@'):
+                start = True
+                continue
+            if not start:
+                continue
+            if line.startswith('-'):
+                if len(line) == 1:
+                    deleted_strs += '\n'
+                else:
+                    deleted_strs += line[1:]
+            elif line.startswith('+'):
+                if len(line) == 1:
+                    inserted_strs += '\n'
+                else:
+                    inserted_strs += line[1:]
+    return deleted_strs, inserted_strs
+
+
+def is_only_marks_changed(a, b, check_list):
+    s = SequenceMatcher(None, a, b)
+    for tag, i1, i2, j1, j2 in s.get_opcodes():
+        if tag == 'equal':
+            continue
+        elif tag in ['delete', 'insert']:
+            return False
+        elif tag == 'replace':
+            deleted = ''.join(a[i1:i2]).strip()
+            inserted = ''.join(b[j1:j2]).strip()
+            if deleted not in check_list or inserted not in check_list:
+                return False
+    return True
+
+
+class Args:
+    gitee_token: str
+    pr_owner: str
+    pr_repo: str
+    pr_number: int
+    siliconflow_api_key: str = ""
+    siliconflow_api_base: str = "https://api.siliconflow.cn/v1"
+
+    def validate(self):
+        valid = self.gitee_token and self.pr_owner and self.pr_repo and self.pr_number
+        if not valid:
+            logger.error("Invalid Command Arguments")
+            sys.exit(1)
+
+
+def load_config_yaml(yaml_path):
+    with open(yaml_path, "r", encoding="utf-8") as config_in:
+        data = yaml.safe_load(config_in)
+
+    if data is None:
+        return None
+    return Config(**data)
+
+
+def create_issue_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, pr_owner: str, pr_repo: str,
+                                             pr_number: int, siliconflow_api_key: str, siliconflow_api_base: str):
+    pr__html_url = "https://gitee.com/{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
+    for org_item in conf.orgs:
+        issue_title_pr_mark = "{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
+        if org_item.org_name != pr_owner:
+            continue
+        # 旧标点符号判断逻辑，已弃用
+        # if org_item.auto_create_issue:
+        #     cli.check_only_marks_changed(pr_owner, pr_repo, pr_number, org_item.change_content_exclude)
+        file_count = 0
+        diff_content = cli.get_diff_content(pr_owner, pr_repo, pr_number)
+        if diff_content is None:
+            sys.exit(1)
+        diff_files = get_diff_file_list(diff_content)
+        zh_file = []
+        en_file = []
+        need_create_issue = {}
+        for trigger in org_item.issue_triggers:
+            for diff_file in diff_files:
+                if diff_file.startswith(trigger.trigger_pr_path) and diff_file.split('.')[-1] in trigger.file_extension:
+                    logger.info("file {} has been changed".format(diff_file))
+                    file_count += 1
+                    if "/zh" in trigger.trigger_pr_path:
+                        need_create_issue["zh"] = [trigger.issue_assignee,
+                                                   "{}({}).".format(trigger.issue_title, issue_title_pr_mark)]
+                        zh_file.append(diff_file.replace("zh/", ""))
+                    elif "/en" in trigger.trigger_pr_path:
+                        need_create_issue["en"] = [trigger.issue_assignee,
+                                                   "{}({}).".format(trigger.issue_title, issue_title_pr_mark)]
+                        en_file.append(diff_file.replace("en/", ""))
+                    else:
+                        logger.warning("not a range")
+        changed_same_files = False
+        for z in zh_file:
+            if z in en_file:
+                changed_same_files = True
+            else:
+                changed_same_files = False
+        if file_count == 0:
+            logger.warning(
+                "NOTE: https://gitee.com/{}/files change files out of translate range".format(issue_title_pr_mark))
+            return
+        if changed_same_files:
+            logger.info("changed the same files in en and zh path, no need to create issue")
+            return
+
+        need_create_issue_template = {}
+        need_create_issue_titles = []
+        for issue_item in need_create_issue:
+            need_create_issue_titles.append(need_create_issue[issue_item][1])
+            need_create_issue_template[need_create_issue[issue_item][1]] = need_create_issue[issue_item][0]
+        if need_create_issue_titles:
+
+            need_create_issue_list, existed_issue_list = cli.check_issue_exists(org_item.issue_of_owner,
+                                                                                org_item.issue_of_repo,
+                                                                                need_create_issue_titles)
+
+            if not need_create_issue_list:
+                feedback_comment = "issue has already created, please go to check issue: {}".format(
+                    existed_issue_list)
+                logger.info("Warning: " + feedback_comment)
+                cli.add_pr_comment(pr_owner, pr_repo, pr_number, feedback_comment)
+            for need_create_issue_item in need_create_issue_list:
+                
+                issue_summary = get_agent_summary(diff_content, siliconflow_api_key, siliconflow_api_base)
+                issue_body = ""
+                if issue_summary and not issue_summary.error:
+                    issue_body += f"## 📊 变更统计\n\n"
+                    issue_body += f"- **总文件数**: {issue_summary.total_files}\n"
+                    issue_body += f"- **成功处理文件数**: {issue_summary.processed_files}\n"
+                    if issue_summary.total_files != issue_summary.processed_files:
+                        # 注意人工审查提醒
+                        issue_body += f"- **未处理文件数**: {issue_summary.total_files - issue_summary.processed_files}\n"
+                        issue_body += f"- **提醒：机器人未能及时自动生成所有改动的摘要，请注意人工审查！**\n"
+                    if issue_summary.total_summary:
+                        total = issue_summary.total_summary
+                        issue_body += f"- **总改动行数**: {total.total_lines_changed}\n"
+                        issue_body += f"- **改动类型**: {', '.join(total.change_type_list)}\n\n"
+                        issue_body += f"## 🔍 整体变更摘要\n\n"
+                        issue_body += f"{total.overall_summary}\n\n"
+                        issue_body += f"## ⚠️ 整体潜在影响\n\n"
+                        issue_body += f"{total.overall_potential_impact}\n\n"
+                    if issue_summary.file_summaries:
+                        issue_body += f"## 📝 单文件变更详情\n\n"
+                        for summary in issue_summary.file_summaries:
+                            issue_body += f"### 📁 {summary.file_path}\n\n"
+                            issue_body += f"- **改动类型**: {summary.change_type}\n"
+                            issue_body += f"- **新增行数**: {summary.lines_added}\n"
+                            issue_body += f"- **删除行数**: {summary.lines_deleted}\n"
+                            issue_body += f"- **潜在影响**: {summary.potential_impact}\n"
+                            issue_body += f"- **详细摘要**: {summary.summary}\n\n"
+                            issue_body += "---\n\n"
+                else:
+                    issue_body += f"## ⚠️ 翻译变更检测\n\n"
+                    issue_body += f"检测到需要翻译的文件变更，但无法获取详细摘要信息。\n\n"
+                    issue_body += f"**变更文件数量**: {len(diff_files)}\n"
+                    issue_body += f"**相关PR**: {pr__html_url}\n\n"
+                
+                issue_body += f"## ❗️ 本Issue的摘要内容基于AI Agent技术自动生成，仅供参考，请以实际更改为准。\n\n" 
+                issue_body += f"## 🔗 相关PR链接\n\n"
+                issue_body += f"- {pr__html_url}\n"
+                
+                cli.create_issue(org_item.issue_of_owner, org_item.issue_of_repo, need_create_issue_item,
+                                 need_create_issue_template[need_create_issue_item],
+                                 issue_body)          
+
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Create Gitee Webhook based on the profile')
+    parser.add_argument('--gitee_token', type=str, required=True, help='gitee v5 api token')
+    parser.add_argument('--pr_owner', type=str, required=True, help='the PR of owner')
+    parser.add_argument('--pr_repo', type=str, required=True, help='the PR of repo')
+    parser.add_argument('--pr_number', type=str, required=True, help='the PR number')
+    parser.add_argument('--siliconflow_api_key', type=str, default="", help='the API key of siliconflow')
+    parser.add_argument('--siliconflow_api_base', type=str, default="https://api.siliconflow.cn/v1", help='the base URL of siliconflow')
+    args = Args()
+    parser.parse_args(args=sys.argv[1:], namespace=args)
+    args.validate()
+
+    exec_py = sys.argv[0]
+    config_yaml_path = exec_py[:-2] + 'yaml'
+    conf = load_config_yaml(config_yaml_path)
+
+    cli = GiteeClient(args.gitee_token)
+
+    pr_owner = args.pr_owner
+    pr_repo = args.pr_repo
+    pr_number = args.pr_number
+    siliconflow_api_key = args.siliconflow_api_key
+    siliconflow_api_base = args.siliconflow_api_base
+    create_issue_based_on_pr_diff_and_config(conf, cli, pr_owner, pr_repo, pr_number, siliconflow_api_key, siliconflow_api_base)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/ci/tools/translation/new_create_translation_issue_AI.yaml b/ci/tools/translation/new_create_translation_issue_AI.yaml
new file mode 100755
index 000000000..bc48ab7a2
--- /dev/null
+++ b/ci/tools/translation/new_create_translation_issue_AI.yaml
@@ -0,0 +1,49 @@
+# Translation Agent Configuration
+translation_agent:
+  # Backend Configuration
+  backend:
+    type: "siliconflow"  # Options: "ollama" or "siliconflow"
+    # siliconflow配置现在通过命令行参数传入
+    ollama:
+      base_url: "http://localhost:11434"
+  
+  # Model Configuration
+  model:
+    name: "Qwen/Qwen3-32B" # Options: "llama3" "Qwen/Qwen3-8B"  "THUDM/GLM-4-32B-0414" or others
+    temperature: 0.1
+    max_retry: 5  # For siliconflow backend
+    max_retry_ollama: 1  # For ollama backend
+  
+  # Processing Configuration
+  processing:
+    max_workers: 8  # Number of parallel workers for file processing
+    single_file_timeout: 180  # Timeout for single file summary generation (seconds)
+    total_summary_timeout: 300  # Timeout for total summary generation (seconds)
+  
+  # Logging Configuration
+  logging:
+    level: "INFO"
+
+# Issue Creation Configuration
+orgs:
+  - org_name: openeuler
+    issue_of_owner: openeuler
+    issue_of_repo: globalization
+    auto_create_issue: true
+    issue_triggers:
+      - trigger_pr_path: 'docs/zh'
+        issue_title: "[Auto] This is an English translation issue for the PR"
+        issue_assignee: judithsq
+        file_extension: [ doc, md, json ]
+    change_content_exclude: [ ',', '，', '.', '。', ';', '；', ':', '：', '"', '“', '”', '、' ]
+
+  - org_name: src-openeuler
+    issue_of_owner: openeuler
+    issue_of_repo: globalization
+    auto_create_issue: true
+    issue_triggers:
+      - trigger_pr_path: 'docs/zh'
+        issue_title: "[Auto] This is an English translation issue for the PR"
+        issue_assignee: judithsq
+        file_extension: [ doc, md, json ]
+    change_content_exclude: [ ',', '，', '.', '。', ';', '；', ':', '：', '"', '“', '”', '、' ]
\ No newline at end of file
-- 
Gitee


From 8177646f2d47ce618c6ede807275ec200565555b Mon Sep 17 00:00:00 2001
From: petermouse666 <708975811@qq.com>
Date: Fri, 26 Sep 2025 01:54:24 +0800
Subject: [PATCH 5/8] update for review

---
 .../new_create_translation_issue_AI.py        | 291 ++++++++++++------
 ci/tools/translation/translation.yaml         |   7 +-
 ci/tools/translation/translation_agent.py     | 197 ++++++------
 3 files changed, 298 insertions(+), 197 deletions(-)

diff --git a/ci/tools/translation/new_create_translation_issue_AI.py b/ci/tools/translation/new_create_translation_issue_AI.py
index 0a61c7136..272b70711 100755
--- a/ci/tools/translation/new_create_translation_issue_AI.py
+++ b/ci/tools/translation/new_create_translation_issue_AI.py
@@ -1,3 +1,10 @@
+# ==================== 常量定义 ====================
+
+# Issue类型常量
+ISSUE_TYPE_TRANSLATION = "翻译"
+
+# ==================== 数据模型定义 ====================
+
 import argparse
 import json
 import logging
@@ -156,7 +163,7 @@ class GiteeClient:
         req_body = {
             "repo": repo,
             "title": title,
-            "issue_type": "翻译",
+            "issue_type": ISSUE_TYPE_TRANSLATION,
             "body": body,
             "assignee": assignee,
             "push_events": False,
@@ -263,112 +270,192 @@ def load_config_yaml(yaml_path):
     return Config(**data)
 
 
+def analyze_diff_files(diff_files: list[str], issue_triggers: list[IssueTrigger], issue_title_pr_mark: str) -> tuple[int, list[str], list[str], dict]:
+    """
+    分析diff文件，识别需要创建issue的文件
+    返回: (文件计数, 中文文件列表, 英文文件列表, 需要创建的issue字典)
+    """
+    file_count = 0
+    zh_file = []
+    en_file = []
+    need_create_issue = {}
+    
+    for trigger in issue_triggers:
+        for diff_file in diff_files:
+            if diff_file.startswith(trigger.trigger_pr_path) and diff_file.split('.')[-1] in trigger.file_extension:
+                logger.info("file {} has been changed".format(diff_file))
+                file_count += 1
+                if "/zh" in trigger.trigger_pr_path:
+                    need_create_issue["zh"] = [trigger.issue_assignee,
+                                               "{}({}).".format(trigger.issue_title, issue_title_pr_mark)]
+                    zh_file.append(diff_file.replace("zh/", ""))
+                elif "/en" in trigger.trigger_pr_path:
+                    need_create_issue["en"] = [trigger.issue_assignee,
+                                               "{}({}).".format(trigger.issue_title, issue_title_pr_mark)]
+                    en_file.append(diff_file.replace("en/", ""))
+                else:
+                    logger.warning("not a range")
+    
+    return file_count, zh_file, en_file, need_create_issue
+
+
+def check_same_files_changed(zh_file: list[str], en_file: list[str]) -> bool:
+    """
+    检查中英文路径下是否修改了相同的文件
+    """
+    for z in zh_file:
+        if z in en_file:
+            return True
+    return False
+
+
+def prepare_issue_templates(need_create_issue: dict) -> tuple[dict, list[str]]:
+    """
+    准备issue模板和标题列表
+    """
+    need_create_issue_template = {}
+    need_create_issue_titles = []
+    for issue_item in need_create_issue:
+        need_create_issue_titles.append(need_create_issue[issue_item][1])
+        need_create_issue_template[need_create_issue[issue_item][1]] = need_create_issue[issue_item][0]
+    return need_create_issue_template, need_create_issue_titles
+
+
+def generate_issue_body(issue_summary, diff_files: list[str], pr_html_url: str) -> str:
+    """
+    生成issue的正文内容
+    """
+    issue_body = ""
+    if issue_summary and not issue_summary.error:
+        issue_body += f"## 📊 变更统计\n\n"
+        issue_body += f"- **总文件数**: {issue_summary.total_files}\n"
+        issue_body += f"- **成功处理文件数**: {issue_summary.processed_files}\n"
+        if issue_summary.total_files != issue_summary.processed_files:
+            # 注意人工审查提醒
+            issue_body += f"- **未处理文件数**: {issue_summary.total_files - issue_summary.processed_files}\n"
+            issue_body += f"- **提醒：机器人未能及时自动生成所有改动的摘要，请注意人工审查！**\n"
+        if issue_summary.total_summary:
+            total = issue_summary.total_summary
+            issue_body += f"- **总改动行数**: {total.total_lines_changed}\n"
+            issue_body += f"- **改动类型**: {', '.join(total.change_type_list)}\n\n"
+            issue_body += f"## 🔍 整体变更摘要\n\n"
+            issue_body += f"{total.overall_summary}\n\n"
+            issue_body += f"## ⚠️ 整体潜在影响\n\n"
+            issue_body += f"{total.overall_potential_impact}\n\n"
+        if issue_summary.file_summaries:
+            issue_body += f"## 📝 单文件变更详情\n\n"
+            for summary in issue_summary.file_summaries:
+                issue_body += f"### 📁 {summary.file_path}\n\n"
+                issue_body += f"- **改动类型**: {summary.change_type}\n"
+                issue_body += f"- **新增行数**: {summary.lines_added}\n"
+                issue_body += f"- **删除行数**: {summary.lines_deleted}\n"
+                issue_body += f"- **潜在影响**: {summary.potential_impact}\n"
+                issue_body += f"- **详细摘要**: {summary.summary}\n\n"
+                issue_body += "---\n\n"
+    else:
+        issue_body += f"## ⚠️ 翻译变更检测\n\n"
+        issue_body += f"检测到需要翻译的文件变更，但无法获取详细摘要信息。\n\n"
+        issue_body += f"**变更文件数量**: {len(diff_files)}\n"
+        issue_body += f"**相关PR**: {pr_html_url}\n\n"
+    
+    issue_body += f"## ❗️ 本Issue的摘要内容基于AI Agent技术自动生成，仅供参考，请以实际更改为准。\n\n" 
+    issue_body += f"## 🔗 相关PR链接\n\n"
+    issue_body += f"- {pr_html_url}\n"
+    
+    return issue_body
+
+
+def process_org_item(org_item: Org, cli: GiteeClient, pr_owner: str, pr_repo: str, pr_number: int, 
+                    siliconflow_api_key: str, siliconflow_api_base: str, pr_html_url: str, issue_title_pr_mark: str,
+                    translation_agent_config: TranslationAgentConfig = None):
+    """
+    处理单个组织配置项
+    """
+    # 获取diff内容
+    diff_content = cli.get_diff_content(pr_owner, pr_repo, pr_number)
+    if diff_content is None:
+        sys.exit(1)
+    
+    diff_files = get_diff_file_list(diff_content)
+    
+    # 分析diff文件
+    file_count, zh_file, en_file, need_create_issue = analyze_diff_files(
+        diff_files, org_item.issue_triggers, issue_title_pr_mark)
+    
+    # 检查是否修改了相同文件
+    changed_same_files = check_same_files_changed(zh_file, en_file)
+    
+    # 验证是否需要创建issue
+    if file_count == 0:
+        logger.warning(
+            "NOTE: https://gitee.com/{}/files change files out of translate range".format(issue_title_pr_mark))
+        return
+    
+    if changed_same_files:
+        logger.info("changed the same files in en and zh path, no need to create issue")
+        return
+    
+    # 准备issue模板
+    need_create_issue_template, need_create_issue_titles = prepare_issue_templates(need_create_issue)
+    
+    if not need_create_issue_titles:
+        return
+    
+    # 检查issue是否已存在
+    need_create_issue_list, existed_issue_list = cli.check_issue_exists(
+        org_item.issue_of_owner, org_item.issue_of_repo, need_create_issue_titles)
+    
+    if not need_create_issue_list:
+        feedback_comment = "issue has already created, please go to check issue: {}".format(existed_issue_list)
+        logger.info("Warning: " + feedback_comment)
+        cli.add_pr_comment(pr_owner, pr_repo, pr_number, feedback_comment)
+        return
+    
+    # 创建issue
+    for need_create_issue_item in need_create_issue_list:
+        # 从配置中提取参数
+        backend_config = translation_agent_config.backend if translation_agent_config else {}
+        model_config = translation_agent_config.model if translation_agent_config else {}
+        processing_config = translation_agent_config.processing if translation_agent_config else {}
+        
+        # 提取具体配置值
+        backend_type = backend_config.get('type', 'siliconflow')
+        model_name = model_config.get('name', 'Qwen/Qwen3-8B')
+        temperature = model_config.get('temperature', 0.1)
+        max_workers = processing_config.get('max_workers', 8)
+        single_file_timeout = processing_config.get('single_file_timeout', 180)
+        total_summary_timeout = processing_config.get('total_summary_timeout', 300)
+        max_retry = model_config.get('max_retry', 5)
+        max_retry_ollama = model_config.get('max_retry_ollama', 1)
+        
+        issue_summary = get_agent_summary(
+            diff_content, siliconflow_api_key, siliconflow_api_base,
+            model_name=model_name, backend_type=backend_type, temperature=temperature,
+            max_workers=max_workers, single_file_timeout=single_file_timeout,
+            total_summary_timeout=total_summary_timeout, max_retry=max_retry,
+            max_retry_ollama=max_retry_ollama
+        )
+        issue_body = generate_issue_body(issue_summary, diff_files, pr_html_url)
+        
+        cli.create_issue(org_item.issue_of_owner, org_item.issue_of_repo, need_create_issue_item,
+                         need_create_issue_template[need_create_issue_item], issue_body)
+
+
 def create_issue_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, pr_owner: str, pr_repo: str,
                                              pr_number: int, siliconflow_api_key: str, siliconflow_api_base: str):
-    pr__html_url = "https://gitee.com/{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
+    """
+    基于PR diff和配置创建issue的主函数
+    """
+    pr_html_url = "https://gitee.com/{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
+    issue_title_pr_mark = "{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
+    
     for org_item in conf.orgs:
-        issue_title_pr_mark = "{}/{}/pulls/{}".format(pr_owner, pr_repo, pr_number)
         if org_item.org_name != pr_owner:
             continue
-        # 旧标点符号判断逻辑，已弃用
-        # if org_item.auto_create_issue:
-        #     cli.check_only_marks_changed(pr_owner, pr_repo, pr_number, org_item.change_content_exclude)
-        file_count = 0
-        diff_content = cli.get_diff_content(pr_owner, pr_repo, pr_number)
-        if diff_content is None:
-            sys.exit(1)
-        diff_files = get_diff_file_list(diff_content)
-        zh_file = []
-        en_file = []
-        need_create_issue = {}
-        for trigger in org_item.issue_triggers:
-            for diff_file in diff_files:
-                if diff_file.startswith(trigger.trigger_pr_path) and diff_file.split('.')[-1] in trigger.file_extension:
-                    logger.info("file {} has been changed".format(diff_file))
-                    file_count += 1
-                    if "/zh" in trigger.trigger_pr_path:
-                        need_create_issue["zh"] = [trigger.issue_assignee,
-                                                   "{}({}).".format(trigger.issue_title, issue_title_pr_mark)]
-                        zh_file.append(diff_file.replace("zh/", ""))
-                    elif "/en" in trigger.trigger_pr_path:
-                        need_create_issue["en"] = [trigger.issue_assignee,
-                                                   "{}({}).".format(trigger.issue_title, issue_title_pr_mark)]
-                        en_file.append(diff_file.replace("en/", ""))
-                    else:
-                        logger.warning("not a range")
-        changed_same_files = False
-        for z in zh_file:
-            if z in en_file:
-                changed_same_files = True
-            else:
-                changed_same_files = False
-        if file_count == 0:
-            logger.warning(
-                "NOTE: https://gitee.com/{}/files change files out of translate range".format(issue_title_pr_mark))
-            return
-        if changed_same_files:
-            logger.info("changed the same files in en and zh path, no need to create issue")
-            return
-
-        need_create_issue_template = {}
-        need_create_issue_titles = []
-        for issue_item in need_create_issue:
-            need_create_issue_titles.append(need_create_issue[issue_item][1])
-            need_create_issue_template[need_create_issue[issue_item][1]] = need_create_issue[issue_item][0]
-        if need_create_issue_titles:
-
-            need_create_issue_list, existed_issue_list = cli.check_issue_exists(org_item.issue_of_owner,
-                                                                                org_item.issue_of_repo,
-                                                                                need_create_issue_titles)
-
-            if not need_create_issue_list:
-                feedback_comment = "issue has already created, please go to check issue: {}".format(
-                    existed_issue_list)
-                logger.info("Warning: " + feedback_comment)
-                cli.add_pr_comment(pr_owner, pr_repo, pr_number, feedback_comment)
-            for need_create_issue_item in need_create_issue_list:
-                
-                issue_summary = get_agent_summary(diff_content, siliconflow_api_key, siliconflow_api_base)
-                issue_body = ""
-                if issue_summary and not issue_summary.error:
-                    issue_body += f"## 📊 变更统计\n\n"
-                    issue_body += f"- **总文件数**: {issue_summary.total_files}\n"
-                    issue_body += f"- **成功处理文件数**: {issue_summary.processed_files}\n"
-                    if issue_summary.total_files != issue_summary.processed_files:
-                        # 注意人工审查提醒
-                        issue_body += f"- **未处理文件数**: {issue_summary.total_files - issue_summary.processed_files}\n"
-                        issue_body += f"- **提醒：机器人未能及时自动生成所有改动的摘要，请注意人工审查！**\n"
-                    if issue_summary.total_summary:
-                        total = issue_summary.total_summary
-                        issue_body += f"- **总改动行数**: {total.total_lines_changed}\n"
-                        issue_body += f"- **改动类型**: {', '.join(total.change_type_list)}\n\n"
-                        issue_body += f"## 🔍 整体变更摘要\n\n"
-                        issue_body += f"{total.overall_summary}\n\n"
-                        issue_body += f"## ⚠️ 整体潜在影响\n\n"
-                        issue_body += f"{total.overall_potential_impact}\n\n"
-                    if issue_summary.file_summaries:
-                        issue_body += f"## 📝 单文件变更详情\n\n"
-                        for summary in issue_summary.file_summaries:
-                            issue_body += f"### 📁 {summary.file_path}\n\n"
-                            issue_body += f"- **改动类型**: {summary.change_type}\n"
-                            issue_body += f"- **新增行数**: {summary.lines_added}\n"
-                            issue_body += f"- **删除行数**: {summary.lines_deleted}\n"
-                            issue_body += f"- **潜在影响**: {summary.potential_impact}\n"
-                            issue_body += f"- **详细摘要**: {summary.summary}\n\n"
-                            issue_body += "---\n\n"
-                else:
-                    issue_body += f"## ⚠️ 翻译变更检测\n\n"
-                    issue_body += f"检测到需要翻译的文件变更，但无法获取详细摘要信息。\n\n"
-                    issue_body += f"**变更文件数量**: {len(diff_files)}\n"
-                    issue_body += f"**相关PR**: {pr__html_url}\n\n"
-                
-                issue_body += f"## ❗️ 本Issue的摘要内容基于AI Agent技术自动生成，仅供参考，请以实际更改为准。\n\n" 
-                issue_body += f"## 🔗 相关PR链接\n\n"
-                issue_body += f"- {pr__html_url}\n"
-                
-                cli.create_issue(org_item.issue_of_owner, org_item.issue_of_repo, need_create_issue_item,
-                                 need_create_issue_template[need_create_issue_item],
-                                 issue_body)          
+        
+        process_org_item(org_item, cli, pr_owner, pr_repo, pr_number, 
+                        siliconflow_api_key, siliconflow_api_base, pr_html_url, issue_title_pr_mark,
+                        conf.translation_agent)          
 
 
diff --git a/ci/tools/translation/translation.yaml b/ci/tools/translation/translation.yaml
index 2dfccd235..6842cfac9 100644
--- a/ci/tools/translation/translation.yaml
+++ b/ci/tools/translation/translation.yaml
@@ -8,14 +8,9 @@ repositories:
     assign_issue:
     - title: "[Auto] This is an English translation issue."
     - sign_to: judithsq    
-#  - trigger_pr_path: 'docs/en'
-#    file_extension: [doc, md, json]
-#    assign_issue:
-#    - title: "[Auto] This is a Russian translation issue."
-#    - sign_to: judithsq
   exclude:
     - condition: only_marks_change
-      check_list: [',', '，', '.', '。', ';', '；', ':', '：', '"', '“', '”', '、']
+      check_list: [',', '，', '.', '。', ';', '；', ':', '：', '"', '"', '"', '、']
 - owner: openeuler
   repo: website-v2
   auto_create_issue: false
diff --git a/ci/tools/translation/translation_agent.py b/ci/tools/translation/translation_agent.py
index 258826eb5..60e4aac60 100755
--- a/ci/tools/translation/translation_agent.py
+++ b/ci/tools/translation/translation_agent.py
@@ -20,41 +20,26 @@ from langchain_core.output_parsers import JsonOutputParser
 from langchain_openai import ChatOpenAI
 import yaml
 
-# ==================== 配置加载 ====================
-
-def load_config(config_file="new_create_translation_issue.yaml"):
-    """从YAML文件加载配置"""
-    try:
-        with open(config_file, 'r', encoding='utf-8') as f:
-            config = yaml.safe_load(f)
-        return config.get('translation_agent', {})
-    except FileNotFoundError:
-        print(f"配置文件 {config_file} 不存在")
-        raise
-    except yaml.YAMLError as e:
-        print(f"解析配置文件时发生错误: {e}")
-        raise
-
-# 加载配置
-_config = load_config()
-
 # ==================== 配置常量 ====================
 
-BACKEND_TYPE = _config.get('backend', {}).get('type', 'siliconflow')
-OLLAMA_BASE_URL = _config.get('backend', {}).get('ollama', {}).get('base_url', 'http://localhost:11434')
-MODEL_NAME = _config.get('model', {}).get('name', 'Qwen/Qwen3-8B')
-MODEL_TEMPERATURE = _config.get('model', {}).get('temperature', 0.1)
-MODEL_MAX_RETRY = _config.get('model', {}).get('max_retry', 5)
-MODEL_MAX_RETRY_OLLAMA = _config.get('model', {}).get('max_retry_ollama', 1)
-PROCESSING_MAX_WORKERS = _config.get('processing', {}).get('max_workers', 8)
-SINGLE_FILE_TIMEOUT = _config.get('processing', {}).get('single_file_timeout', 180)
-TOTAL_SUMMARY_TIMEOUT = _config.get('processing', {}).get('total_summary_timeout', 300)
-LOGGING_LEVEL = _config.get('logging', {}).get('level', 'INFO')
-SILICONFLOW_API_KEY = ''
-SILICONFLOW_API_BASE =''
+# 后端类型常量
+BACKEND_TYPE_OLLAMA = "ollama"
+BACKEND_TYPE_SILICONFLOW = "siliconflow"
+
+# 默认配置值
+DEFAULT_BACKEND_TYPE = BACKEND_TYPE_SILICONFLOW
+DEFAULT_OLLAMA_BASE_URL = 'http://localhost:11434'
+DEFAULT_MODEL_NAME = 'Qwen/Qwen3-8B'
+DEFAULT_MODEL_TEMPERATURE = 0.1
+DEFAULT_MODEL_MAX_RETRY = 5
+DEFAULT_MODEL_MAX_RETRY_OLLAMA = 1
+DEFAULT_PROCESSING_MAX_WORKERS = 8
+DEFAULT_SINGLE_FILE_TIMEOUT = 180
+DEFAULT_TOTAL_SUMMARY_TIMEOUT = 300
+DEFAULT_LOGGING_LEVEL = 'INFO'
 
 # 配置日志
-logging.basicConfig(level=getattr(logging, LOGGING_LEVEL.upper()))
+logging.basicConfig(level=getattr(logging, DEFAULT_LOGGING_LEVEL.upper()))
 logger = logging.getLogger(__name__)
 
 # ==================== 数据模型定义 ====================
@@ -103,7 +88,7 @@ class ProcessingResult:
 # ==================== Token 统计工具 ====================
 
 class TokenCounter:
-    def __init__(self, model_name=MODEL_NAME):
+    def __init__(self, model_name=DEFAULT_MODEL_NAME):
         self.model_name = model_name
         self.prompt_tokens = 0
         self.completion_tokens = 0
@@ -432,52 +417,62 @@ class LLMFactory:
     """LLM工厂类"""
     
     @staticmethod
-    def create_chat_llm(model_name: str = None, base_url: str = None):
+    def create_chat_llm(model_name: str = None, base_url: str = None, backend_type: str = None, 
+                       temperature: float = None, siliconflow_api_key: str = "", siliconflow_api_base: str = ""):
         """创建LLM实例"""
         if model_name is None:
-            model_name = MODEL_NAME
+            model_name = DEFAULT_MODEL_NAME
         if base_url is None:
-            base_url = OLLAMA_BASE_URL
+            base_url = DEFAULT_OLLAMA_BASE_URL
+        if backend_type is None:
+            backend_type = DEFAULT_BACKEND_TYPE
+        if temperature is None:
+            temperature = DEFAULT_MODEL_TEMPERATURE
             
-        if BACKEND_TYPE == "ollama":
+        if backend_type == BACKEND_TYPE_OLLAMA:
             return ChatOllama(
                 model=model_name,
                 base_url=base_url,
-                temperature=MODEL_TEMPERATURE
+                temperature=temperature
             )
-        elif BACKEND_TYPE == "siliconflow":
+        elif backend_type == BACKEND_TYPE_SILICONFLOW:
             return ChatOpenAI(
                 model=model_name,
-                api_key=SecretStr(SILICONFLOW_API_KEY),
-                base_url=SILICONFLOW_API_BASE,
-                temperature=MODEL_TEMPERATURE
+                api_key=SecretStr(siliconflow_api_key),
+                base_url=siliconflow_api_base,
+                temperature=temperature
             )
         else:
-            raise ValueError(f"不支持的后端类型: {BACKEND_TYPE}")
+            raise ValueError(f"不支持的后端类型: {backend_type}")
     
     @staticmethod
-    def create_llm(model_name: str = None, base_url: str = None):
+    def create_llm(model_name: str = None, base_url: str = None, backend_type: str = None,
+                   temperature: float = None, siliconflow_api_key: str = "", siliconflow_api_base: str = ""):
         """创建LLM实例"""
         if model_name is None:
-            model_name = MODEL_NAME
+            model_name = DEFAULT_MODEL_NAME
         if base_url is None:
-            base_url = OLLAMA_BASE_URL
+            base_url = DEFAULT_OLLAMA_BASE_URL
+        if backend_type is None:
+            backend_type = DEFAULT_BACKEND_TYPE
+        if temperature is None:
+            temperature = DEFAULT_MODEL_TEMPERATURE
             
-        if BACKEND_TYPE == "ollama":
+        if backend_type == BACKEND_TYPE_OLLAMA:
             return Ollama(
                 model=model_name,
                 base_url=base_url,
-                temperature=MODEL_TEMPERATURE
+                temperature=temperature
             )
-        elif BACKEND_TYPE == "siliconflow":
+        elif backend_type == BACKEND_TYPE_SILICONFLOW:
             return ChatOpenAI(
                 model=model_name,
-                api_key=SecretStr(SILICONFLOW_API_KEY),
-                base_url=SILICONFLOW_API_BASE,
-                temperature=MODEL_TEMPERATURE
+                api_key=SecretStr(siliconflow_api_key),
+                base_url=siliconflow_api_base,
+                temperature=temperature
             )
         else:
-            raise ValueError(f"不支持的后端类型: {BACKEND_TYPE}")
+            raise ValueError(f"不支持的后端类型: {backend_type}")
 
 class PromptTemplates:
     """提示模板集合"""
@@ -669,15 +664,16 @@ Git Diff 内容:
 class SingleFileAnalysisChain:
     """单文件分析任务链"""
     
-    def __init__(self, llm: ChatOllama | ChatOpenAI, token_counter: TokenCounter):
+    def __init__(self, llm: ChatOllama | ChatOpenAI, token_counter: TokenCounter, backend_type: str = DEFAULT_BACKEND_TYPE):
         self.llm = llm
         self.token_counter = token_counter
+        self.backend_type = backend_type
         
         # 创建输出解析器
         self.output_parser = JsonOutputParser(pydantic_object=SingleFileSummary)
         
         # 根据后端类型选择不同的链构建方式
-        if BACKEND_TYPE == "ollama":
+        if backend_type == BACKEND_TYPE_OLLAMA:
             self.prompt = PromptTemplates.get_single_file_prompt()
             self.chain = self.prompt | self.llm.with_structured_output(SingleFileSummary)
         else:
@@ -809,10 +805,11 @@ Git Diff 内容:
             ])
             self.chain = self.prompt | self.llm | self.output_parser
     
-    def analyze(self, diff_file_info: DiffFileInfo) -> Optional[SingleFileSummary]:
+    def analyze(self, diff_file_info: DiffFileInfo, max_retry_ollama: int = DEFAULT_MODEL_MAX_RETRY_OLLAMA, 
+                max_retry: int = DEFAULT_MODEL_MAX_RETRY) -> Optional[SingleFileSummary]:
         """分析单个文件的改动"""
-        max_retry = MODEL_MAX_RETRY_OLLAMA if BACKEND_TYPE == "ollama" else MODEL_MAX_RETRY
-        for attempt in range(1, max_retry + 1):
+        max_retry_count = max_retry_ollama if self.backend_type == BACKEND_TYPE_OLLAMA else max_retry
+        for attempt in range(1, max_retry_count + 1):
             # 如果不是第一次尝试，等待一段时间再重试，避免连续失败
             if attempt > 1:
                 delay = min(attempt * 2, 10)  # 递增延迟，最多10秒
@@ -843,7 +840,7 @@ Git Diff 内容:
                     "lines_added": diff_file_info.lines_added,
                     "lines_deleted": diff_file_info.lines_deleted
                 }
-                if BACKEND_TYPE != "ollama":
+                if self.backend_type != BACKEND_TYPE_OLLAMA:
                     invoke_args["response_format"] = {"type": "json_object"}
                 
                 result = self.chain.invoke(invoke_args)
@@ -870,7 +867,7 @@ Git Diff 内容:
                 
                 # 结果无效，记录并重试
                 logger.warning(f"分析文件 {diff_file_info.file_path} 返回无效结果，第{attempt}次尝试")
-                if attempt < max_retry:
+                if attempt < max_retry_count:
                     continue
             except Exception as e:
                 err_str = str(e)
@@ -884,29 +881,30 @@ Git Diff 内容:
                     is_http_error = True
                 if is_http_error:
                     logger.error(f"分析文件 {diff_file_info.file_path} 时发生HTTP错误: {e}，第{attempt}次尝试，10秒后重试...")
-                    if attempt < max_retry:
+                    if attempt < max_retry_count:
                         time.sleep(10)
                         continue
                 else:
                     logger.error(f"分析文件 {diff_file_info.file_path} 时发生错误: {e}，第{attempt}次尝试")
                 # 其它异常直接进入下一次重试
-                if attempt < max_retry:
+                if attempt < max_retry_count:
                     logger.info(f"第{attempt}次尝试失败，准备重试...")
-        logger.error(f"分析文件 {diff_file_info.file_path} 连续{max_retry}次均未获得结构化输出，放弃。")
+        logger.error(f"分析文件 {diff_file_info.file_path} 连续{max_retry_count}次均未获得结构化输出，放弃。")
         return None
 
 class TotalSummaryChain:
     """总摘要生成任务链"""
     
-    def __init__(self, llm: ChatOllama | ChatOpenAI, token_counter: TokenCounter):
+    def __init__(self, llm: ChatOllama | ChatOpenAI, token_counter: TokenCounter, backend_type: str = DEFAULT_BACKEND_TYPE):
         self.llm = llm
         self.token_counter = token_counter
+        self.backend_type = backend_type
         
         # 创建输出解析器
         self.output_parser = JsonOutputParser(pydantic_object=TotalSummary)
         
         # 根据后端类型选择不同的链构建方式
-        if BACKEND_TYPE == "ollama":
+        if backend_type == BACKEND_TYPE_OLLAMA:
             self.prompt = PromptTemplates.get_total_summary_prompt()
             self.chain = self.prompt | self.llm.with_structured_output(TotalSummary)
         else:
@@ -985,7 +983,7 @@ class TotalSummaryChain:
             ])
             self.chain = self.prompt | self.llm | self.output_parser
     
-    def generate(self, file_summaries: List[SingleFileSummary]) -> Optional[TotalSummary]:
+    def generate(self, file_summaries: List[SingleFileSummary], total_summary_timeout: int = DEFAULT_TOTAL_SUMMARY_TIMEOUT) -> Optional[TotalSummary]:
         """生成总摘要"""
         try:
             total_files = len(file_summaries)
@@ -1027,16 +1025,16 @@ class TotalSummaryChain:
                     "total_files": total_files,
                     "total_lines": total_lines
                 }
-                if BACKEND_TYPE != "ollama":
+                if self.backend_type != BACKEND_TYPE_OLLAMA:
                     # 为 SiliconFlow 添加 response_format 参数
                     invoke_args["response_format"] = {"type": "json_object"}
                 
                 # 提交任务并设置超时
                 future = timeout_executor.submit(self.chain.invoke, invoke_args)
                 try:
-                    result = future.result(timeout=TOTAL_SUMMARY_TIMEOUT)
+                    result = future.result(timeout=total_summary_timeout)
                 except (FutureTimeoutError, TimeoutError) as e:
-                    logger.error(f"生成总摘要超时（{TOTAL_SUMMARY_TIMEOUT}秒），放弃生成总摘要: {type(e).__name__}")
+                    logger.error(f"生成总摘要超时（{total_summary_timeout}秒），放弃生成总摘要: {type(e).__name__}")
                     try:
                         future.cancel()  # 尝试取消超时的任务
                     except Exception as cancel_e:
@@ -1091,23 +1089,40 @@ class TotalSummaryChain:
 class GitDiffSummarizer:
     """Git Diff 摘要生成器"""
     
-    def __init__(self, siliconflow_api_key: str = "", siliconflow_api_base: str = "https://api.siliconflow.cn/v1", model_name: str = None, base_url: str = None):
+    def __init__(self, siliconflow_api_key: str = "", siliconflow_api_base: str = "https://api.siliconflow.cn/v1", 
+                 model_name: str = None, base_url: str = None, backend_type: str = None, 
+                 temperature: float = None, max_workers: int = None, single_file_timeout: int = None,
+                 total_summary_timeout: int = None, max_retry: int = None, max_retry_ollama: int = None):
         if model_name is None:
-            model_name = MODEL_NAME
+            model_name = DEFAULT_MODEL_NAME
         if base_url is None:
-            base_url = OLLAMA_BASE_URL
-        
-        # 设置siliconflow API配置
-        global SILICONFLOW_API_KEY, SILICONFLOW_API_BASE
-        if siliconflow_api_key:
-            SILICONFLOW_API_KEY = siliconflow_api_key
-        if siliconflow_api_base:
-            SILICONFLOW_API_BASE = siliconflow_api_base
+            base_url = DEFAULT_OLLAMA_BASE_URL
+        if backend_type is None:
+            backend_type = DEFAULT_BACKEND_TYPE
+        if temperature is None:
+            temperature = DEFAULT_MODEL_TEMPERATURE
+        if max_workers is None:
+            max_workers = DEFAULT_PROCESSING_MAX_WORKERS
+        if single_file_timeout is None:
+            single_file_timeout = DEFAULT_SINGLE_FILE_TIMEOUT
+        if total_summary_timeout is None:
+            total_summary_timeout = DEFAULT_TOTAL_SUMMARY_TIMEOUT
+        if max_retry is None:
+            max_retry = DEFAULT_MODEL_MAX_RETRY
+        if max_retry_ollama is None:
+            max_retry_ollama = DEFAULT_MODEL_MAX_RETRY_OLLAMA
+            
+        self.backend_type = backend_type
+        self.max_workers = max_workers
+        self.single_file_timeout = single_file_timeout
+        self.total_summary_timeout = total_summary_timeout
+        self.max_retry = max_retry
+        self.max_retry_ollama = max_retry_ollama
             
         self.token_counter = TokenCounter(model_name)
-        self.llm = LLMFactory.create_chat_llm(model_name, base_url)
-        self.single_file_chain = SingleFileAnalysisChain(self.llm, self.token_counter)
-        self.total_summary_chain = TotalSummaryChain(self.llm, self.token_counter)
+        self.llm = LLMFactory.create_chat_llm(model_name, base_url, backend_type, temperature, siliconflow_api_key, siliconflow_api_base)
+        self.single_file_chain = SingleFileAnalysisChain(self.llm, self.token_counter, backend_type)
+        self.total_summary_chain = TotalSummaryChain(self.llm, self.token_counter, backend_type)
     
     def cleanup(self):
         """清理资源，确保程序能正确退出"""
@@ -1119,7 +1134,7 @@ class GitDiffSummarizer:
                 self.llm._client.close()
             
             # 如果是 ChatOpenAI，尝试关闭底层的 HTTP 客户端
-            if BACKEND_TYPE == "siliconflow" and hasattr(self.llm, 'client'):
+            if self.backend_type == BACKEND_TYPE_SILICONFLOW and hasattr(self.llm, 'client'):
                 try:
                     # 强制关闭 httpx 客户端
                     if hasattr(self.llm.client, '_client'):
@@ -1133,7 +1148,7 @@ class GitDiffSummarizer:
     
     def process_git_diff(self, diff_content: str, max_workers: int = None) -> ProcessingResult:
         if max_workers is None:
-            max_workers = PROCESSING_MAX_WORKERS
+            max_workers = self.max_workers
             
         logger.info("开始解析git diff...")
         files = DiffParser.parse_git_diff(diff_content)
@@ -1154,12 +1169,12 @@ class GitDiffSummarizer:
         try:
             executor = ThreadPoolExecutor(max_workers=max_workers)
             future_to_file = {
-                executor.submit(self.single_file_chain.analyze, file_info): file_info.file_path
+                executor.submit(self.single_file_chain.analyze, file_info, self.max_retry_ollama, self.max_retry): file_info.file_path
                 for file_info in files
             }
             
             # 设置更长的整体超时时间，避免与单个文件超时冲突
-            overall_timeout = SINGLE_FILE_TIMEOUT * len(files) + 600  # 给每个文件的时间 + 额外缓冲
+            overall_timeout = self.single_file_timeout * len(files) + 600  # 给每个文件的时间 + 额外缓冲
             
             completed_count = 0
             total_count = len(future_to_file)
@@ -1205,7 +1220,7 @@ class GitDiffSummarizer:
         if file_summaries:
             logger.info(f"基于 {len(file_summaries)} 个成功处理的文件生成总摘要...")
             try:
-                total_summary = self.total_summary_chain.generate(file_summaries)
+                total_summary = self.total_summary_chain.generate(file_summaries, self.total_summary_timeout)
                 if total_summary:
                     logger.info("总摘要生成成功")
                 else:
@@ -1223,9 +1238,14 @@ class GitDiffSummarizer:
 
 # ==================== 主函数 ====================
 
-def get_agent_summary(sample_diff, siliconflow_api_key="", siliconflow_api_base="https://api.siliconflow.cn/v1"):
+def get_agent_summary(sample_diff, siliconflow_api_key="", siliconflow_api_base="https://api.siliconflow.cn/v1",
+                     model_name=None, base_url=None, backend_type=None, temperature=None, 
+                     max_workers=None, single_file_timeout=None, total_summary_timeout=None,
+                     max_retry=None, max_retry_ollama=None):
 
-    summarizer = GitDiffSummarizer(siliconflow_api_key, siliconflow_api_base)
+    summarizer = GitDiffSummarizer(siliconflow_api_key, siliconflow_api_base, model_name, base_url, 
+                                 backend_type, temperature, max_workers, single_file_timeout,
+                                 total_summary_timeout, max_retry, max_retry_ollama)
     result = None
     try:
         result = summarizer.process_git_diff(sample_diff)
@@ -1269,7 +1289,6 @@ def get_agent_summary(sample_diff, siliconflow_api_key="", siliconflow_api_base=
     print(f"Prompt tokens: {stats['prompt_tokens']}")
     print(f"Completion tokens: {stats['completion_tokens']}")
     print(f"Total tokens: {stats['total_tokens']}")
-    # exit()
     return result
 
 if __name__ == "__main__":
-- 
Gitee


From 77745d3a03bef79d0f944b3a1b31e880dc78a05f Mon Sep 17 00:00:00 2001
From: petermouse666 <708975811@qq.com>
Date: Fri, 26 Sep 2025 02:03:38 +0800
Subject: [PATCH 6/8] update for review

---
 .../new_create_translation_issue_AI.py        | 83 ++++++++++++-------
 .../new_create_translation_issue_AI.yaml      |  6 +-
 ci/tools/translation/translation_agent.py     | 62 +++++++++-----
 3 files changed, 98 insertions(+), 53 deletions(-)

diff --git a/ci/tools/translation/new_create_translation_issue_AI.py b/ci/tools/translation/new_create_translation_issue_AI.py
index 272b70711..4e0a52e3b 100755
--- a/ci/tools/translation/new_create_translation_issue_AI.py
+++ b/ci/tools/translation/new_create_translation_issue_AI.py
@@ -132,7 +132,8 @@ class GiteeClient:
             logger.error("can not get diff file from PR: {}".format(req_url))
         return result
 
-    def check_issue_exists(self, owner: str, repo: str, issue_titles: list[str]) -> tuple[list[str], list[str]]:
+    def check_issue_exists(self, owner: str, repo: str, 
+                           issue_titles: list[str]) -> tuple[list[str], list[str]]:
         req_url = "https://gitee.com/api/v5/repos/{}/{}/issues".format(owner, repo)
         page = 1
         existed_issues = []
@@ -270,7 +271,8 @@ def load_config_yaml(yaml_path):
     return Config(**data)
 
 
-def analyze_diff_files(diff_files: list[str], issue_triggers: list[IssueTrigger], issue_title_pr_mark: str) -> tuple[int, list[str], list[str], dict]:
+def analyze_diff_files(diff_files: list[str], issue_triggers: list[IssueTrigger], 
+                       issue_title_pr_mark: str) -> tuple[int, list[str], list[str], dict]:
     """
     分析diff文件，识别需要创建issue的文件
     返回: (文件计数, 中文文件列表, 英文文件列表, 需要创建的issue字典)
@@ -282,16 +284,21 @@ def analyze_diff_files(diff_files: list[str], issue_triggers: list[IssueTrigger]
     
     for trigger in issue_triggers:
         for diff_file in diff_files:
-            if diff_file.startswith(trigger.trigger_pr_path) and diff_file.split('.')[-1] in trigger.file_extension:
+            if diff_file.startswith(trigger.trigger_pr_path) and \
+               diff_file.split('.')[-1] in trigger.file_extension:
                 logger.info("file {} has been changed".format(diff_file))
                 file_count += 1
                 if "/zh" in trigger.trigger_pr_path:
-                    need_create_issue["zh"] = [trigger.issue_assignee,
-                                               "{}({}).".format(trigger.issue_title, issue_title_pr_mark)]
+                    need_create_issue["zh"] = [
+                        trigger.issue_assignee,
+                        "{}({}).".format(trigger.issue_title, issue_title_pr_mark)
+                    ]
                     zh_file.append(diff_file.replace("zh/", ""))
                 elif "/en" in trigger.trigger_pr_path:
-                    need_create_issue["en"] = [trigger.issue_assignee,
-                                               "{}({}).".format(trigger.issue_title, issue_title_pr_mark)]
+                    need_create_issue["en"] = [
+                        trigger.issue_assignee,
+                        "{}({}).".format(trigger.issue_title, issue_title_pr_mark)
+                    ]
                     en_file.append(diff_file.replace("en/", ""))
                 else:
                     logger.warning("not a range")
@@ -317,7 +324,8 @@ def prepare_issue_templates(need_create_issue: dict) -> tuple[dict, list[str]]:
     need_create_issue_titles = []
     for issue_item in need_create_issue:
         need_create_issue_titles.append(need_create_issue[issue_item][1])
-        need_create_issue_template[need_create_issue[issue_item][1]] = need_create_issue[issue_item][0]
+        need_create_issue_template[need_create_issue[issue_item][1]] = \
+            need_create_issue[issue_item][0]
     return need_create_issue_template, need_create_issue_titles
 
 
@@ -333,7 +341,8 @@ def generate_issue_body(issue_summary, diff_files: list[str], pr_html_url: str)
         if issue_summary.total_files != issue_summary.processed_files:
             # 注意人工审查提醒
             issue_body += f"- **未处理文件数**: {issue_summary.total_files - issue_summary.processed_files}\n"
-            issue_body += f"- **提醒：机器人未能及时自动生成所有改动的摘要，请注意人工审查！**\n"
+            issue_body += f"- **提醒：机器人未能及时自动生成所有改动的摘要，" \
+                         f"请注意人工审查！**\n"
         if issue_summary.total_summary:
             total = issue_summary.total_summary
             issue_body += f"- **总改动行数**: {total.total_lines_changed}\n"
@@ -358,15 +367,17 @@ def generate_issue_body(issue_summary, diff_files: list[str], pr_html_url: str)
         issue_body += f"**变更文件数量**: {len(diff_files)}\n"
         issue_body += f"**相关PR**: {pr_html_url}\n\n"
     
-    issue_body += f"## ❗️ 本Issue的摘要内容基于AI Agent技术自动生成，仅供参考，请以实际更改为准。\n\n" 
+    issue_body += f"## ❗️ 本Issue的摘要内容基于AI Agent技术自动生成，" \
+                 f"仅供参考，请以实际更改为准。\n\n" 
     issue_body += f"## 🔗 相关PR链接\n\n"
     issue_body += f"- {pr_html_url}\n"
     
     return issue_body
 
 
-def process_org_item(org_item: Org, cli: GiteeClient, pr_owner: str, pr_repo: str, pr_number: int, 
-                    siliconflow_api_key: str, siliconflow_api_base: str, pr_html_url: str, issue_title_pr_mark: str,
+def process_org_item(org_item: Org, cli: GiteeClient, pr_owner: str, pr_repo: str, 
+                    pr_number: int, siliconflow_api_key: str, siliconflow_api_base: str, 
+                    pr_html_url: str, issue_title_pr_mark: str,
                     translation_agent_config: TranslationAgentConfig = None):
     """
     处理单个组织配置项
@@ -388,7 +399,8 @@ def process_org_item(org_item: Org, cli: GiteeClient, pr_owner: str, pr_repo: st
     # 验证是否需要创建issue
     if file_count == 0:
         logger.warning(
-            "NOTE: https://gitee.com/{}/files change files out of translate range".format(issue_title_pr_mark))
+            "NOTE: https://gitee.com/{}/files change files out of translate range"
+            .format(issue_title_pr_mark))
         return
     
     if changed_same_files:
@@ -406,7 +418,8 @@ def process_org_item(org_item: Org, cli: GiteeClient, pr_owner: str, pr_repo: st
         org_item.issue_of_owner, org_item.issue_of_repo, need_create_issue_titles)
     
     if not need_create_issue_list:
-        feedback_comment = "issue has already created, please go to check issue: {}".format(existed_issue_list)
+        feedback_comment = "issue has already created, please go to check issue: {}".format(
+            existed_issue_list)
         logger.info("Warning: " + feedback_comment)
         cli.add_pr_comment(pr_owner, pr_repo, pr_number, feedback_comment)
         return
@@ -428,21 +441,30 @@ def process_org_item(org_item: Org, cli: GiteeClient, pr_owner: str, pr_repo: st
         max_retry = model_config.get('max_retry', 5)
         max_retry_ollama = model_config.get('max_retry_ollama', 1)
         
-        issue_summary = get_agent_summary(
-            diff_content, siliconflow_api_key, siliconflow_api_base,
-            model_name=model_name, backend_type=backend_type, temperature=temperature,
-            max_workers=max_workers, single_file_timeout=single_file_timeout,
-            total_summary_timeout=total_summary_timeout, max_retry=max_retry,
-            max_retry_ollama=max_retry_ollama
-        )
-        issue_body = generate_issue_body(issue_summary, diff_files, pr_html_url)
+        try:
+            issue_summary = get_agent_summary(
+                diff_content, siliconflow_api_key, siliconflow_api_base,
+                model_name=model_name, backend_type=backend_type, temperature=temperature,
+                max_workers=max_workers, single_file_timeout=single_file_timeout,
+                total_summary_timeout=total_summary_timeout, max_retry=max_retry,
+                max_retry_ollama=max_retry_ollama
+            )
+            issue_body = generate_issue_body(issue_summary, diff_files, pr_html_url)
+        except Exception as e:
+            logger.error(f"AI Agent调用失败: {e}")
+            logger.info("回退到传统方式创建issue")
+            # 使用传统方式的简单issue body格式
+            issue_body = "### Related PR link \n - {}".format(pr_html_url)
         
-        cli.create_issue(org_item.issue_of_owner, org_item.issue_of_repo, need_create_issue_item,
+        cli.create_issue(org_item.issue_of_owner, org_item.issue_of_repo, 
+                         need_create_issue_item,
                          need_create_issue_template[need_create_issue_item], issue_body)
 
 
-def create_issue_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, pr_owner: str, pr_repo: str,
-                                             pr_number: int, siliconflow_api_key: str, siliconflow_api_base: str):
+def create_issue_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, 
+                                             pr_owner: str, pr_repo: str,
+                                             pr_number: int, siliconflow_api_key: str, 
+                                             siliconflow_api_base: str):
     """
     基于PR diff和配置创建issue的主函数
     """
@@ -454,8 +476,8 @@ def create_issue_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, pr_
             continue
         
         process_org_item(org_item, cli, pr_owner, pr_repo, pr_number, 
-                        siliconflow_api_key, siliconflow_api_base, pr_html_url, issue_title_pr_mark,
-                        conf.translation_agent)          
+                        siliconflow_api_key, siliconflow_api_base, pr_html_url, 
+                        issue_title_pr_mark, conf.translation_agent)          
 
 
@@ -466,7 +488,9 @@ def main():
     parser.add_argument('--pr_repo', type=str, required=True, help='the PR of repo')
     parser.add_argument('--pr_number', type=str, required=True, help='the PR number')
     parser.add_argument('--siliconflow_api_key', type=str, default="", help='the API key of siliconflow')
-    parser.add_argument('--siliconflow_api_base', type=str, default="https://api.siliconflow.cn/v1", help='the base URL of siliconflow')
+    parser.add_argument('--siliconflow_api_base', type=str, 
+                       default="https://api.siliconflow.cn/v1", 
+                       help='the base URL of siliconflow')
     args = Args()
     parser.parse_args(args=sys.argv[1:], namespace=args)
     args.validate()
@@ -482,7 +506,8 @@ def main():
     pr_number = args.pr_number
     siliconflow_api_key = args.siliconflow_api_key
     siliconflow_api_base = args.siliconflow_api_base
-    create_issue_based_on_pr_diff_and_config(conf, cli, pr_owner, pr_repo, pr_number, siliconflow_api_key, siliconflow_api_base)
+    create_issue_based_on_pr_diff_and_config(conf, cli, pr_owner, pr_repo, pr_number, 
+                                            siliconflow_api_key, siliconflow_api_base)
 
 
 if __name__ == '__main__':
diff --git a/ci/tools/translation/new_create_translation_issue_AI.yaml b/ci/tools/translation/new_create_translation_issue_AI.yaml
index bc48ab7a2..4455c5750 100755
--- a/ci/tools/translation/new_create_translation_issue_AI.yaml
+++ b/ci/tools/translation/new_create_translation_issue_AI.yaml
@@ -9,7 +9,7 @@ translation_agent:
   
   # Model Configuration
   model:
-    name: "Qwen/Qwen3-32B" # Options: "llama3" "Qwen/Qwen3-8B"  "THUDM/GLM-4-32B-0414" or others
+    name: "Qwen/Qwen3-32B" # Options: "llama3" "Qwen/Qwen3-8B" "THUDM/GLM-4-32B-0414" or others
     temperature: 0.1
     max_retry: 5  # For siliconflow backend
     max_retry_ollama: 1  # For ollama backend
@@ -35,7 +35,7 @@ orgs:
         issue_title: "[Auto] This is an English translation issue for the PR"
         issue_assignee: judithsq
         file_extension: [ doc, md, json ]
-    change_content_exclude: [ ',', '，', '.', '。', ';', '；', ':', '：', '"', '“', '”', '、' ]
+    change_content_exclude: [ ',', '，', '.', '。', ';', '；', ':', '：', '"', '"', '"', '、' ]
 
   - org_name: src-openeuler
     issue_of_owner: openeuler
@@ -46,4 +46,4 @@ orgs:
         issue_title: "[Auto] This is an English translation issue for the PR"
         issue_assignee: judithsq
         file_extension: [ doc, md, json ]
-    change_content_exclude: [ ',', '，', '.', '。', ';', '；', ':', '：', '"', '“', '”', '、' ]
\ No newline at end of file
+    change_content_exclude: [ ',', '，', '.', '。', ';', '；', ':', '：', '"', '"', '"', '、' ]
\ No newline at end of file
diff --git a/ci/tools/translation/translation_agent.py b/ci/tools/translation/translation_agent.py
index 60e4aac60..dbe89469d 100755
--- a/ci/tools/translation/translation_agent.py
+++ b/ci/tools/translation/translation_agent.py
@@ -47,7 +47,8 @@ logger = logging.getLogger(__name__)
 class SingleFileSummary(BaseModel):
     """单个文件摘要的结构化输出"""
     file_path: str = Field(description="文件路径", default="")
-    change_type: Literal["仅涉及标点符号的修改", "涉及到中英文文本内容的修改", "涉及到代码内容的修改", "涉及到其他内容的修改"] = Field(description="改动类型")
+    change_type: Literal["仅涉及标点符号的修改", "涉及到中英文文本内容的修改", 
+                        "涉及到代码内容的修改", "涉及到其他内容的修改"] = Field(description="改动类型")
     potential_impact: str = Field(description="改动对其他文件潜在的影响")
     summary: str = Field(description="改动的详细摘要")
     lines_added: int = Field(description="新增行数", default=0)
@@ -56,7 +57,8 @@ class SingleFileSummary(BaseModel):
 class FileChangeInfo(BaseModel):
     """文件改动信息"""
     file_path: str = Field(description="文件路径")
-    change_type: Literal["仅涉及标点符号的修改", "涉及到中英文文本内容的修改", "涉及到代码内容的修改", "涉及到其他内容的修改"] = Field(description="改动类型")
+    change_type: Literal["仅涉及标点符号的修改", "涉及到中英文文本内容的修改", 
+                        "涉及到代码内容的修改", "涉及到其他内容的修改"] = Field(description="改动类型")
     lines_changed: int = Field(description="改动行数")
 
 class TotalSummary(BaseModel):
@@ -418,7 +420,8 @@ class LLMFactory:
     
     @staticmethod
     def create_chat_llm(model_name: str = None, base_url: str = None, backend_type: str = None, 
-                       temperature: float = None, siliconflow_api_key: str = "", siliconflow_api_base: str = ""):
+                       temperature: float = None, siliconflow_api_key: str = "", 
+                       siliconflow_api_base: str = ""):
         """创建LLM实例"""
         if model_name is None:
             model_name = DEFAULT_MODEL_NAME
@@ -447,7 +450,8 @@ class LLMFactory:
     
     @staticmethod
     def create_llm(model_name: str = None, base_url: str = None, backend_type: str = None,
-                   temperature: float = None, siliconflow_api_key: str = "", siliconflow_api_base: str = ""):
+                   temperature: float = None, siliconflow_api_key: str = "", 
+                   siliconflow_api_base: str = ""):
         """创建LLM实例"""
         if model_name is None:
             model_name = DEFAULT_MODEL_NAME
@@ -664,7 +668,8 @@ Git Diff 内容:
 class SingleFileAnalysisChain:
     """单文件分析任务链"""
     
-    def __init__(self, llm: ChatOllama | ChatOpenAI, token_counter: TokenCounter, backend_type: str = DEFAULT_BACKEND_TYPE):
+    def __init__(self, llm: ChatOllama | ChatOpenAI, token_counter: TokenCounter, 
+                 backend_type: str = DEFAULT_BACKEND_TYPE):
         self.llm = llm
         self.token_counter = token_counter
         self.backend_type = backend_type
@@ -805,7 +810,8 @@ Git Diff 内容:
             ])
             self.chain = self.prompt | self.llm | self.output_parser
     
-    def analyze(self, diff_file_info: DiffFileInfo, max_retry_ollama: int = DEFAULT_MODEL_MAX_RETRY_OLLAMA, 
+    def analyze(self, diff_file_info: DiffFileInfo, 
+                max_retry_ollama: int = DEFAULT_MODEL_MAX_RETRY_OLLAMA, 
                 max_retry: int = DEFAULT_MODEL_MAX_RETRY) -> Optional[SingleFileSummary]:
         """分析单个文件的改动"""
         max_retry_count = max_retry_ollama if self.backend_type == BACKEND_TYPE_OLLAMA else max_retry
@@ -877,10 +883,12 @@ Git Diff 内容:
                     if code in err_str:
                         is_http_error = True
                         break
-                if ("status code" in err_str or "HTTP" in err_str or "response" in err_str) and any(code in err_str for code in ["404", "500", "502", "503", "504"]):
+                if ("status code" in err_str or "HTTP" in err_str or "response" in err_str) and \
+                   any(code in err_str for code in ["404", "500", "502", "503", "504"]):
                     is_http_error = True
                 if is_http_error:
-                    logger.error(f"分析文件 {diff_file_info.file_path} 时发生HTTP错误: {e}，第{attempt}次尝试，10秒后重试...")
+                    logger.error(f"分析文件 {diff_file_info.file_path} 时发生HTTP错误: {e}，"
+                                f"第{attempt}次尝试，10秒后重试...")
                     if attempt < max_retry_count:
                         time.sleep(10)
                         continue
@@ -889,13 +897,15 @@ Git Diff 内容:
                 # 其它异常直接进入下一次重试
                 if attempt < max_retry_count:
                     logger.info(f"第{attempt}次尝试失败，准备重试...")
-        logger.error(f"分析文件 {diff_file_info.file_path} 连续{max_retry_count}次均未获得结构化输出，放弃。")
+        logger.error(f"分析文件 {diff_file_info.file_path} 连续{max_retry_count}次均未获得结构化输出，"
+                    f"放弃。")
         return None
 
 class TotalSummaryChain:
     """总摘要生成任务链"""
     
-    def __init__(self, llm: ChatOllama | ChatOpenAI, token_counter: TokenCounter, backend_type: str = DEFAULT_BACKEND_TYPE):
+    def __init__(self, llm: ChatOllama | ChatOpenAI, token_counter: TokenCounter, 
+                 backend_type: str = DEFAULT_BACKEND_TYPE):
         self.llm = llm
         self.token_counter = token_counter
         self.backend_type = backend_type
@@ -983,7 +993,8 @@ class TotalSummaryChain:
             ])
             self.chain = self.prompt | self.llm | self.output_parser
     
-    def generate(self, file_summaries: List[SingleFileSummary], total_summary_timeout: int = DEFAULT_TOTAL_SUMMARY_TIMEOUT) -> Optional[TotalSummary]:
+    def generate(self, file_summaries: List[SingleFileSummary], 
+                 total_summary_timeout: int = DEFAULT_TOTAL_SUMMARY_TIMEOUT) -> Optional[TotalSummary]:
         """生成总摘要"""
         try:
             total_files = len(file_summaries)
@@ -1034,7 +1045,8 @@ class TotalSummaryChain:
                 try:
                     result = future.result(timeout=total_summary_timeout)
                 except (FutureTimeoutError, TimeoutError) as e:
-                    logger.error(f"生成总摘要超时（{total_summary_timeout}秒），放弃生成总摘要: {type(e).__name__}")
+                    logger.error(f"生成总摘要超时（{total_summary_timeout}秒），放弃生成总摘要: "
+                                f"{type(e).__name__}")
                     try:
                         future.cancel()  # 尝试取消超时的任务
                     except Exception as cancel_e:
@@ -1089,10 +1101,12 @@ class TotalSummaryChain:
 class GitDiffSummarizer:
     """Git Diff 摘要生成器"""
     
-    def __init__(self, siliconflow_api_key: str = "", siliconflow_api_base: str = "https://api.siliconflow.cn/v1", 
+    def __init__(self, siliconflow_api_key: str = "", 
+                 siliconflow_api_base: str = "https://api.siliconflow.cn/v1", 
                  model_name: str = None, base_url: str = None, backend_type: str = None, 
-                 temperature: float = None, max_workers: int = None, single_file_timeout: int = None,
-                 total_summary_timeout: int = None, max_retry: int = None, max_retry_ollama: int = None):
+                 temperature: float = None, max_workers: int = None, 
+                 single_file_timeout: int = None, total_summary_timeout: int = None, 
+                 max_retry: int = None, max_retry_ollama: int = None):
         if model_name is None:
             model_name = DEFAULT_MODEL_NAME
         if base_url is None:
@@ -1120,7 +1134,8 @@ class GitDiffSummarizer:
         self.max_retry_ollama = max_retry_ollama
             
         self.token_counter = TokenCounter(model_name)
-        self.llm = LLMFactory.create_chat_llm(model_name, base_url, backend_type, temperature, siliconflow_api_key, siliconflow_api_base)
+        self.llm = LLMFactory.create_chat_llm(model_name, base_url, backend_type, temperature, 
+                                             siliconflow_api_key, siliconflow_api_base)
         self.single_file_chain = SingleFileAnalysisChain(self.llm, self.token_counter, backend_type)
         self.total_summary_chain = TotalSummaryChain(self.llm, self.token_counter, backend_type)
     
@@ -1187,11 +1202,14 @@ class GitDiffSummarizer:
                         summary = future.result(timeout=5)  # 短暂缓冲时间，因为任务已经完成
                         if summary:
                             file_summaries.append(summary)
-                            logger.info(f"完成文件 {file_path} 的摘要生成 ({completed_count}/{total_count})")
+                            logger.info(f"完成文件 {file_path} 的摘要生成 "
+                                   f"({completed_count}/{total_count})")
                         else:
-                            logger.warning(f"文件 {file_path} 的摘要生成失败 ({completed_count}/{total_count})")
+                            logger.warning(f"文件 {file_path} 的摘要生成失败 "
+                                      f"({completed_count}/{total_count})")
                     except (FutureTimeoutError, TimeoutError) as e:
-                        logger.error(f"文件 {file_path} 的摘要获取超时，跳过该文件: {type(e).__name__} ({completed_count}/{total_count})")
+                        logger.error(f"文件 {file_path} 的摘要获取超时，跳过该文件: "
+                                    f"{type(e).__name__} ({completed_count}/{total_count})")
                         try:
                             future.cancel()
                         except Exception as cancel_e:
@@ -1199,7 +1217,8 @@ class GitDiffSummarizer:
                     except Exception as e:
                         logger.error(f"处理文件 {file_path} 时发生异常: {e} ({completed_count}/{total_count})")
             except (FutureTimeoutError, TimeoutError) as overall_e:
-                logger.error(f"整体处理超时({overall_timeout}秒)，已完成{completed_count}/{total_count}个文件")
+                logger.error(f"整体处理超时({overall_timeout}秒)，"
+                            f"已完成{completed_count}/{total_count}个文件")
                 # 取消所有未完成的任务
                 for future in future_to_file:
                     if not future.done():
@@ -1238,7 +1257,8 @@ class GitDiffSummarizer:
 
 # ==================== 主函数 ====================
 
-def get_agent_summary(sample_diff, siliconflow_api_key="", siliconflow_api_base="https://api.siliconflow.cn/v1",
+def get_agent_summary(sample_diff, siliconflow_api_key="", 
+                     siliconflow_api_base="https://api.siliconflow.cn/v1",
                      model_name=None, base_url=None, backend_type=None, temperature=None, 
                      max_workers=None, single_file_timeout=None, total_summary_timeout=None,
                      max_retry=None, max_retry_ollama=None):
-- 
Gitee


From 08dcc65f6b27cbbe9b2531bcb44891b65b6e12ab Mon Sep 17 00:00:00 2001
From: petermouse666 <708975811@qq.com>
Date: Fri, 26 Sep 2025 17:58:38 +0800
Subject: [PATCH 7/8] update for another review

---
 .../new_create_translation_issue_AI.py        | 146 +++++++++++++++++-
 1 file changed, 138 insertions(+), 8 deletions(-)

diff --git a/ci/tools/translation/new_create_translation_issue_AI.py b/ci/tools/translation/new_create_translation_issue_AI.py
index 4e0a52e3b..5d2fac0c6 100755
--- a/ci/tools/translation/new_create_translation_issue_AI.py
+++ b/ci/tools/translation/new_create_translation_issue_AI.py
@@ -173,7 +173,7 @@ class GiteeClient:
         }
         req_args = ReqArgs(method="POST", url=req_url, headers=self.headers, data=json.dumps(req_body))
         result: dict | None = send_request(req_args, {})
-        return result is None
+        return result is not None
 
     def add_pr_comment(self, owner, repo, number, body):
         req_url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/comments'.format(owner, repo, number)
@@ -375,6 +375,78 @@ def generate_issue_body(issue_summary, diff_files: list[str], pr_html_url: str)
     return issue_body
 
 
+def generate_issue_body_without_ai_summary(diff_files: list[str], pr_html_url: str) -> str:
+    """
+    生成不包含AI摘要的issue正文内容
+    """
+    issue_body = f"## ⚠️ 翻译变更检测\n\n"
+    issue_body += f"检测到需要翻译的文件变更，但本次变更不包含docs/zh路径下的文件，因此未生成AI摘要。\n\n"
+    issue_body += f"**变更文件数量**: {len(diff_files)}\n"
+    issue_body += f"**相关PR**: {pr_html_url}\n\n"
+    issue_body += f"## 📝 变更文件列表\n\n"
+    
+    # 只显示docs/zh路径下的文件
+    docs_zh_files = [f for f in diff_files if f.startswith('docs/zh/')]
+    if docs_zh_files:
+        for file_path in docs_zh_files:
+            issue_body += f"- {file_path}\n"
+    else:
+        issue_body += f"本次变更未包含docs/zh路径下的文件。\n"
+    
+    issue_body += f"\n## 🔗 相关PR链接\n\n"
+    issue_body += f"- {pr_html_url}\n"
+    
+    return issue_body
+
+
+def filter_docs_zh_files(diff_content: str) -> str:
+    """
+    过滤diff内容，只保留docs/zh路径下的文件变更
+    """
+    if not diff_content:
+        return ""
+    
+    lines = diff_content.split('\n')
+    filtered_lines = []
+    current_file_section = []
+    in_docs_zh_file = False
+    
+    for line in lines:
+        if line.startswith('diff --git'):
+            # 处理前一个文件
+            if in_docs_zh_file and current_file_section:
+                filtered_lines.extend(current_file_section)
+            
+            # 检查新文件是否在docs/zh路径下
+            current_file_section = [line]
+            in_docs_zh_file = False
+            
+            # 提取文件路径
+            if ' a/' in line and ' b/' in line:
+                # 找到 a/ 和 b/ 的位置
+                a_pos = line.find(' a/')
+                b_pos = line.find(' b/')
+                
+                if a_pos != -1 and b_pos != -1 and a_pos < b_pos:
+                    # 提取a/和b/之间的路径
+                    a_start = a_pos + 3  # 跳过 ' a/'
+                    file_path = line[a_start:b_pos]
+                    
+                    # 检查是否在docs/zh路径下
+                    if file_path.startswith('docs/zh/'):
+                        in_docs_zh_file = True
+                        logger.info(f"包含docs/zh路径下的文件: {file_path}")
+        else:
+            # 继续当前文件的内容
+            current_file_section.append(line)
+    
+    # 处理最后一个文件
+    if in_docs_zh_file and current_file_section:
+        filtered_lines.extend(current_file_section)
+    
+    return '\n'.join(filtered_lines)
+
+
 def process_org_item(org_item: Org, cli: GiteeClient, pr_owner: str, pr_repo: str, 
                     pr_number: int, siliconflow_api_key: str, siliconflow_api_base: str, 
                     pr_html_url: str, issue_title_pr_mark: str,
@@ -387,6 +459,55 @@ def process_org_item(org_item: Org, cli: GiteeClient, pr_owner: str, pr_repo: st
     if diff_content is None:
         sys.exit(1)
     
+    # 过滤只保留docs/zh路径下的文件
+    filtered_diff_content = filter_docs_zh_files(diff_content)
+    
+    # 检查是否有docs/zh路径下的文件变更
+    if not filtered_diff_content.strip():
+        logger.info("没有docs/zh路径下的文件变更，跳过AI摘要生成")
+        # 创建简单的issue，不包含AI摘要
+        diff_files = get_diff_file_list(diff_content)
+        file_count, zh_file, en_file, need_create_issue = analyze_diff_files(
+            diff_files, org_item.issue_triggers, issue_title_pr_mark)
+        
+        if file_count == 0:
+            logger.warning(
+                "NOTE: https://gitee.com/{}/files change files out of translate range"
+                .format(issue_title_pr_mark))
+            return
+        
+        if check_same_files_changed(zh_file, en_file):
+            logger.info("changed the same files in en and zh path, no need to create issue")
+            return
+        
+        need_create_issue_template, need_create_issue_titles = prepare_issue_templates(need_create_issue)
+        if not need_create_issue_titles:
+            return
+        
+        need_create_issue_list, existed_issue_list = cli.check_issue_exists(
+            org_item.issue_of_owner, org_item.issue_of_repo, need_create_issue_titles)
+        
+        if not need_create_issue_list:
+            feedback_comment = "所有相关的翻译issue已经存在，请检查: {}".format(
+                ", ".join(existed_issue_list))
+            logger.info("Warning: " + feedback_comment)
+            cli.add_pr_comment(pr_owner, pr_repo, pr_number, feedback_comment)
+            return
+        
+        # 创建不包含AI摘要的简单issue
+        for need_create_issue_item in need_create_issue_list:
+            issue_body = generate_issue_body_without_ai_summary(diff_files, pr_html_url)
+            success = cli.create_issue(org_item.issue_of_owner, org_item.issue_of_repo, 
+                                       need_create_issue_item,
+                                       need_create_issue_template[need_create_issue_item], issue_body)
+            if success:
+                logger.info(f"成功创建issue: {need_create_issue_item}")
+            else:
+                logger.error(f"创建issue失败: {need_create_issue_item}")
+                error_comment = f"创建翻译issue失败: {need_create_issue_item}，请手动创建"
+                cli.add_pr_comment(pr_owner, pr_repo, pr_number, error_comment)
+        return
+    
     diff_files = get_diff_file_list(diff_content)
     
     # 分析diff文件
@@ -418,8 +539,8 @@ def process_org_item(org_item: Org, cli: GiteeClient, pr_owner: str, pr_repo: st
         org_item.issue_of_owner, org_item.issue_of_repo, need_create_issue_titles)
     
     if not need_create_issue_list:
-        feedback_comment = "issue has already created, please go to check issue: {}".format(
-            existed_issue_list)
+        feedback_comment = "所有相关的翻译issue已经存在，请检查: {}".format(
+            ", ".join(existed_issue_list))
         logger.info("Warning: " + feedback_comment)
         cli.add_pr_comment(pr_owner, pr_repo, pr_number, feedback_comment)
         return
@@ -442,23 +563,32 @@ def process_org_item(org_item: Org, cli: GiteeClient, pr_owner: str, pr_repo: st
         max_retry_ollama = model_config.get('max_retry_ollama', 1)
         
         try:
+            # 使用过滤后的diff内容生成AI摘要
             issue_summary = get_agent_summary(
-                diff_content, siliconflow_api_key, siliconflow_api_base,
+                filtered_diff_content, siliconflow_api_key, siliconflow_api_base,
                 model_name=model_name, backend_type=backend_type, temperature=temperature,
                 max_workers=max_workers, single_file_timeout=single_file_timeout,
                 total_summary_timeout=total_summary_timeout, max_retry=max_retry,
                 max_retry_ollama=max_retry_ollama
             )
             issue_body = generate_issue_body(issue_summary, diff_files, pr_html_url)
+            logger.info("AI Agent成功生成issue内容")
         except Exception as e:
             logger.error(f"AI Agent调用失败: {e}")
             logger.info("回退到传统方式创建issue")
             # 使用传统方式的简单issue body格式
             issue_body = "### Related PR link \n - {}".format(pr_html_url)
         
-        cli.create_issue(org_item.issue_of_owner, org_item.issue_of_repo, 
-                         need_create_issue_item,
-                         need_create_issue_template[need_create_issue_item], issue_body)
+        success = cli.create_issue(org_item.issue_of_owner, org_item.issue_of_repo, 
+                                   need_create_issue_item,
+                                   need_create_issue_template[need_create_issue_item], issue_body)
+        if success:
+            logger.info(f"成功创建issue: {need_create_issue_item}")
+        else:
+            logger.error(f"创建issue失败: {need_create_issue_item}")
+            # 添加PR评论说明创建失败
+            error_comment = f"创建翻译issue失败: {need_create_issue_item}，请手动创建"
+            cli.add_pr_comment(pr_owner, pr_repo, pr_number, error_comment)
 
 
 def create_issue_based_on_pr_diff_and_config(conf: Config, cli: GiteeClient, 
@@ -503,7 +633,7 @@ def main():
 
     pr_owner = args.pr_owner
     pr_repo = args.pr_repo
-    pr_number = args.pr_number
+    pr_number = int(args.pr_number)
     siliconflow_api_key = args.siliconflow_api_key
     siliconflow_api_base = args.siliconflow_api_base
     create_issue_based_on_pr_diff_and_config(conf, cli, pr_owner, pr_repo, pr_number, 
-- 
Gitee


From e56305221123c32f11fe022875fd6771e2a19da7 Mon Sep 17 00:00:00 2001
From: petermouse666 <708975811@qq.com>
Date: Mon, 29 Sep 2025 10:44:05 +0800
Subject: [PATCH 8/8] update for review

---
 .../new_create_translation_issue_AI.py        | 239 ++++++++----------
 1 file changed, 111 insertions(+), 128 deletions(-)

diff --git a/ci/tools/translation/new_create_translation_issue_AI.py b/ci/tools/translation/new_create_translation_issue_AI.py
index 5d2fac0c6..6ae647731 100755
--- a/ci/tools/translation/new_create_translation_issue_AI.py
+++ b/ci/tools/translation/new_create_translation_issue_AI.py
@@ -81,9 +81,6 @@ class ReqArgs:
 
 
 T = TypeVar('T')
-content_type_is_text = "text/plain"
-content_type_is_json_dict = {}
-content_type_is_json_list = []
 
 
 def send_request(args: ReqArgs, t: Generic[T]) -> T:
@@ -186,11 +183,21 @@ class GiteeClient:
 
     def check_only_marks_changed(self, owner, repo, number, check_list):
         diff_content = self.get_diff_content(owner, repo, number)
-        deleted_strs, inserted_strs = get_diff_content_list(diff_content)
+
+        # 检查docs/en路径下是否有对应的文件变更
+        zh_files_in_en = check_zh_files_also_modified_in_en(diff_content)
+
+        # 只检查docs/zh路径下的变更，过滤掉同时在en下修改的文件
+        filtered_diff_content = filter_docs_zh_files(diff_content, zh_files_in_en)
+        if not filtered_diff_content.strip():
+            logger.info('No docs/zh changes found, skip mark change check')
+            return
+
+        deleted_strs, inserted_strs = get_diff_content_list(filtered_diff_content)
         if is_only_marks_changed(deleted_strs, inserted_strs, check_list):
-            logger.warning('Only marks changed, skip the following steps')
+            logger.warning('Only marks changed in docs/zh files, skip the following steps')
             sys.exit(1)
-        logger.info('Not just only marks changed, continue creating issue')
+        logger.info('Not just only marks changed in docs/zh files, continue creating issue')
 
 
 def get_diff_file_list(diff_content: str) -> list[str]:
@@ -271,19 +278,22 @@ def load_config_yaml(yaml_path):
     return Config(**data)
 
 
-def analyze_diff_files(diff_files: list[str], issue_triggers: list[IssueTrigger], 
-                       issue_title_pr_mark: str) -> tuple[int, list[str], list[str], dict]:
+def analyze_diff_files(diff_files: list[str], issue_triggers: list[IssueTrigger],
+                       issue_title_pr_mark: str) -> tuple[int, list[str], dict]:
     """
-    分析diff文件，识别需要创建issue的文件
-    返回: (文件计数, 中文文件列表, 英文文件列表, 需要创建的issue字典)
+    分析diff文件，识别需要创建issue的文件（只处理docs/zh路径下的文件，不包括同时在docs/en下修改的文件）
+    返回: (文件计数, 中文文件列表, 需要创建的issue字典)
     """
     file_count = 0
     zh_file = []
-    en_file = []
     need_create_issue = {}
-    
+
     for trigger in issue_triggers:
         for diff_file in diff_files:
+            # 只处理docs/zh路径下的文件
+            if not diff_file.startswith('docs/zh/'):
+                continue
+
             if diff_file.startswith(trigger.trigger_pr_path) and \
                diff_file.split('.')[-1] in trigger.file_extension:
                 logger.info("file {} has been changed".format(diff_file))
@@ -293,27 +303,36 @@ def analyze_diff_files(diff_files: list[str], issue_triggers: list[IssueTrigger]
                         trigger.issue_assignee,
                         "{}({}).".format(trigger.issue_title, issue_title_pr_mark)
                     ]
-                    zh_file.append(diff_file.replace("zh/", ""))
-                elif "/en" in trigger.trigger_pr_path:
-                    need_create_issue["en"] = [
-                        trigger.issue_assignee,
-                        "{}({}).".format(trigger.issue_title, issue_title_pr_mark)
-                    ]
-                    en_file.append(diff_file.replace("en/", ""))
-                else:
-                    logger.warning("not a range")
-    
-    return file_count, zh_file, en_file, need_create_issue
+                    # 提取相对于docs/zh/的路径
+                    relative_path = diff_file.replace("docs/zh/", "")
+                    zh_file.append(relative_path)
 
+    return file_count, zh_file, need_create_issue
 
-def check_same_files_changed(zh_file: list[str], en_file: list[str]) -> bool:
+
+def check_zh_files_also_modified_in_en(diff_content: str) -> list[str]:
     """
-    检查中英文路径下是否修改了相同的文件
+    检查哪些docs/zh文件在docs/en下也有修改
+    返回：同时在docs/zh和docs/en下修改的文件列表（相对于docs/zh/的路径）
     """
-    for z in zh_file:
-        if z in en_file:
-            return True
-    return False
+    if not diff_content:
+        return []
+
+    # 获取所有diff文件
+    all_diff_files = get_diff_file_list(diff_content)
+
+    # 获取docs/zh和docs/en下的文件
+    zh_files = [f.replace("docs/zh/", "") for f in all_diff_files if f.startswith("docs/zh/")]
+    en_files = [f.replace("docs/en/", "") for f in all_diff_files if f.startswith("docs/en/")]
+
+    # 找出同时在zh和en下修改的文件
+    zh_files_in_en = []
+    for zh_file in zh_files:
+        if zh_file in en_files:
+            zh_files_in_en.append(zh_file)
+            logger.info(f"文件 {zh_file} 在docs/zh和docs/en下都有修改，将跳过摘要生成")
+
+    return zh_files_in_en
 
 
 def prepare_issue_templates(need_create_issue: dict) -> tuple[dict, list[str]]:
@@ -366,84 +385,81 @@ def generate_issue_body(issue_summary, diff_files: list[str], pr_html_url: str)
         issue_body += f"检测到需要翻译的文件变更，但无法获取详细摘要信息。\n\n"
         issue_body += f"**变更文件数量**: {len(diff_files)}\n"
         issue_body += f"**相关PR**: {pr_html_url}\n\n"
-    
+        issue_body += f"## 📝 变更文件列表\n\n"
+        for file_path in diff_files:
+            issue_body += f"- {file_path}\n"
+        issue_body += f"\n"
+
     issue_body += f"## ❗️ 本Issue的摘要内容基于AI Agent技术自动生成，" \
-                 f"仅供参考，请以实际更改为准。\n\n" 
+                 f"仅供参考，请以实际更改为准。\n\n"
     issue_body += f"## 🔗 相关PR链接\n\n"
     issue_body += f"- {pr_html_url}\n"
     
     return issue_body
 
 
-def generate_issue_body_without_ai_summary(diff_files: list[str], pr_html_url: str) -> str:
-    """
-    生成不包含AI摘要的issue正文内容
-    """
-    issue_body = f"## ⚠️ 翻译变更检测\n\n"
-    issue_body += f"检测到需要翻译的文件变更，但本次变更不包含docs/zh路径下的文件，因此未生成AI摘要。\n\n"
-    issue_body += f"**变更文件数量**: {len(diff_files)}\n"
-    issue_body += f"**相关PR**: {pr_html_url}\n\n"
-    issue_body += f"## 📝 变更文件列表\n\n"
-    
-    # 只显示docs/zh路径下的文件
-    docs_zh_files = [f for f in diff_files if f.startswith('docs/zh/')]
-    if docs_zh_files:
-        for file_path in docs_zh_files:
-            issue_body += f"- {file_path}\n"
-    else:
-        issue_body += f"本次变更未包含docs/zh路径下的文件。\n"
-    
-    issue_body += f"\n## 🔗 相关PR链接\n\n"
-    issue_body += f"- {pr_html_url}\n"
-    
-    return issue_body
-
-
-def filter_docs_zh_files(diff_content: str) -> str:
+def filter_docs_zh_files(diff_content: str, exclude_files: list[str] = None) -> str:
     """
     过滤diff内容，只保留docs/zh路径下的文件变更
+    :param exclude_files: 需要排除的文件列表（相对于docs/zh/的路径）
     """
+    if exclude_files is None:
+        exclude_files = []
+
     if not diff_content:
         return ""
-    
+
     lines = diff_content.split('\n')
     filtered_lines = []
     current_file_section = []
     in_docs_zh_file = False
-    
+    current_file_path = ""
+
     for line in lines:
         if line.startswith('diff --git'):
             # 处理前一个文件
             if in_docs_zh_file and current_file_section:
-                filtered_lines.extend(current_file_section)
-            
+                # 检查当前文件是否需要排除
+                relative_path = current_file_path.replace("docs/zh/", "")
+                if relative_path not in exclude_files:
+                    filtered_lines.extend(current_file_section)
+                    logger.info(f"包含docs/zh路径下的文件: {current_file_path}")
+                else:
+                    logger.info(f"排除docs/zh路径下的文件（因为在en下也有修改）: {current_file_path}")
+
             # 检查新文件是否在docs/zh路径下
             current_file_section = [line]
             in_docs_zh_file = False
-            
+            current_file_path = ""
+
             # 提取文件路径
             if ' a/' in line and ' b/' in line:
                 # 找到 a/ 和 b/ 的位置
                 a_pos = line.find(' a/')
                 b_pos = line.find(' b/')
-                
+
                 if a_pos != -1 and b_pos != -1 and a_pos < b_pos:
                     # 提取a/和b/之间的路径
                     a_start = a_pos + 3  # 跳过 ' a/'
-                    file_path = line[a_start:b_pos]
-                    
+                    current_file_path = line[a_start:b_pos]
+
                     # 检查是否在docs/zh路径下
-                    if file_path.startswith('docs/zh/'):
+                    if current_file_path.startswith('docs/zh/'):
                         in_docs_zh_file = True
-                        logger.info(f"包含docs/zh路径下的文件: {file_path}")
         else:
             # 继续当前文件的内容
             current_file_section.append(line)
-    
+
     # 处理最后一个文件
     if in_docs_zh_file and current_file_section:
-        filtered_lines.extend(current_file_section)
-    
+        # 检查当前文件是否需要排除
+        relative_path = current_file_path.replace("docs/zh/", "")
+        if relative_path not in exclude_files:
+            filtered_lines.extend(current_file_section)
+            logger.info(f"包含docs/zh路径下的文件: {current_file_path}")
+        else:
+            logger.info(f"排除docs/zh路径下的文件（因为在en下也有修改）: {current_file_path}")
+
     return '\n'.join(filtered_lines)
 
 
@@ -458,65 +474,36 @@ def process_org_item(org_item: Org, cli: GiteeClient, pr_owner: str, pr_repo: st
     diff_content = cli.get_diff_content(pr_owner, pr_repo, pr_number)
     if diff_content is None:
         sys.exit(1)
-    
-    # 过滤只保留docs/zh路径下的文件
-    filtered_diff_content = filter_docs_zh_files(diff_content)
-    
-    # 检查是否有docs/zh路径下的文件变更
+
+    # 早期检查：查看diff中是否包含docs/zh路径下的文件变更
+    if 'docs/zh/' not in diff_content:
+        logger.info("diff内容中不包含docs/zh路径下的文件变更，无需创建翻译issue")
+        return
+
+    # 检查docs/en路径下是否有对应的文件变更
+    zh_files_in_en = check_zh_files_also_modified_in_en(diff_content)
+    if zh_files_in_en:
+        logger.info(f"发现 {len(zh_files_in_en)} 个在docs/zh和docs/en下同时修改的文件：{zh_files_in_en}")
+    else:
+        logger.info("没有发现同时在docs/zh和docs/en下修改的文件")
+
+    # 过滤只保留docs/zh路径下的文件，排除同时在docs/en下修改的文件
+    filtered_diff_content = filter_docs_zh_files(diff_content, zh_files_in_en)
+
+    # 检查是否有需要处理的docs/zh路径下的文件变更
     if not filtered_diff_content.strip():
-        logger.info("没有docs/zh路径下的文件变更，跳过AI摘要生成")
-        # 创建简单的issue，不包含AI摘要
-        diff_files = get_diff_file_list(diff_content)
-        file_count, zh_file, en_file, need_create_issue = analyze_diff_files(
-            diff_files, org_item.issue_triggers, issue_title_pr_mark)
-        
-        if file_count == 0:
-            logger.warning(
-                "NOTE: https://gitee.com/{}/files change files out of translate range"
-                .format(issue_title_pr_mark))
-            return
-        
-        if check_same_files_changed(zh_file, en_file):
-            logger.info("changed the same files in en and zh path, no need to create issue")
-            return
-        
-        need_create_issue_template, need_create_issue_titles = prepare_issue_templates(need_create_issue)
-        if not need_create_issue_titles:
-            return
-        
-        need_create_issue_list, existed_issue_list = cli.check_issue_exists(
-            org_item.issue_of_owner, org_item.issue_of_repo, need_create_issue_titles)
-        
-        if not need_create_issue_list:
-            feedback_comment = "所有相关的翻译issue已经存在，请检查: {}".format(
-                ", ".join(existed_issue_list))
-            logger.info("Warning: " + feedback_comment)
-            cli.add_pr_comment(pr_owner, pr_repo, pr_number, feedback_comment)
-            return
-        
-        # 创建不包含AI摘要的简单issue
-        for need_create_issue_item in need_create_issue_list:
-            issue_body = generate_issue_body_without_ai_summary(diff_files, pr_html_url)
-            success = cli.create_issue(org_item.issue_of_owner, org_item.issue_of_repo, 
-                                       need_create_issue_item,
-                                       need_create_issue_template[need_create_issue_item], issue_body)
-            if success:
-                logger.info(f"成功创建issue: {need_create_issue_item}")
-            else:
-                logger.error(f"创建issue失败: {need_create_issue_item}")
-                error_comment = f"创建翻译issue失败: {need_create_issue_item}，请手动创建"
-                cli.add_pr_comment(pr_owner, pr_repo, pr_number, error_comment)
+        logger.info("没有需要处理的docs/zh路径下的文件变更，无需创建翻译issue")
         return
-    
-    diff_files = get_diff_file_list(diff_content)
-    
+
+    diff_files = get_diff_file_list(filtered_diff_content)
+    logger.info(f"解析出 {len(diff_files)} 个变更文件：{diff_files}")
+
     # 分析diff文件
-    file_count, zh_file, en_file, need_create_issue = analyze_diff_files(
+    file_count, zh_file, need_create_issue = analyze_diff_files(
         diff_files, org_item.issue_triggers, issue_title_pr_mark)
-    
-    # 检查是否修改了相同文件
-    changed_same_files = check_same_files_changed(zh_file, en_file)
-    
+
+    logger.info(f"分析完成：共找到 {file_count} 个需要处理的文件")
+
     # 验证是否需要创建issue
     if file_count == 0:
         logger.warning(
@@ -524,10 +511,6 @@ def process_org_item(org_item: Org, cli: GiteeClient, pr_owner: str, pr_repo: st
             .format(issue_title_pr_mark))
         return
     
-    if changed_same_files:
-        logger.info("changed the same files in en and zh path, no need to create issue")
-        return
-    
     # 准备issue模板
     need_create_issue_template, need_create_issue_titles = prepare_issue_templates(need_create_issue)
     
-- 
Gitee