From 57ed7447539d0897ae62d860107bc6d7d9728c80 Mon Sep 17 00:00:00 2001 From: JiQingzhe2004 Date: Tue, 1 Apr 2025 23:09:48 +0800 Subject: [PATCH 01/13] =?UTF-8?q?=E6=96=B0=E5=A2=9EDeepseek=E7=9A=84?= =?UTF-8?q?=E6=8E=A5=E5=85=A5=EF=BC=8C=E6=94=AF=E6=8C=81=E5=AE=98=E6=96=B9?= =?UTF-8?q?=E7=9A=84=E4=B8=A4=E7=A7=8D=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- base/func_deepseek.py | 108 ++++++++++++++++++++++++++++++++++++++++++ config.yaml.template | 6 +++ configuration.py | 3 +- constants.py | 3 +- robot.py | 7 ++- 5 files changed, 124 insertions(+), 3 deletions(-) create mode 100644 base/func_deepseek.py diff --git a/base/func_deepseek.py b/base/func_deepseek.py new file mode 100644 index 0000000..35da459 --- /dev/null +++ b/base/func_deepseek.py @@ -0,0 +1,108 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- + +import logging +from datetime import datetime + +import httpx +from openai import APIConnectionError, APIError, AuthenticationError, OpenAI + + +class DeepSeek(): + def __init__(self, conf: dict) -> None: + key = conf.get("key") + api = conf.get("api", "https://api.deepseek.com") + proxy = conf.get("proxy") + prompt = conf.get("prompt") + self.model = conf.get("model", "deepseek-chat") + self.LOG = logging.getLogger("DeepSeek") + if proxy: + self.client = OpenAI(api_key=key, base_url=api, http_client=httpx.Client(proxy=proxy)) + else: + self.client = OpenAI(api_key=key, base_url=api) + self.conversation_list = {} + self.system_content_msg = {"role": "system", "content": prompt} + self.LOG.info(f"使用 DeepSeek 模型: {self.model}") + + def __repr__(self): + return 'DeepSeek' + + @staticmethod + def value_check(conf: dict) -> bool: + if conf: + if conf.get("key") and conf.get("prompt"): + return True + return False + + def get_answer(self, question: str, wxid: str) -> str: + if question == "#清除对话": + if wxid in self.conversation_list.keys(): + del self.conversation_list[wxid] + return "已清除上下文" + + self.updateMessage(wxid, question, "user") + + rsp = "" + try: + response = self.client.chat.completions.create( + model=self.model, + messages=self.conversation_list[wxid], + stream=False + ) + rsp = response.choices[0].message.content + self.updateMessage(wxid, rsp, "assistant") + except (APIConnectionError, APIError, AuthenticationError) as e1: + self.LOG.error(f"DeepSeek API 返回了错误:{str(e1)}") + except Exception as e0: + self.LOG.error(f"发生未知错误:{str(e0)}") + + return rsp + + def updateMessage(self, wxid: str, question: str, role: str) -> None: + now_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + + time_mk = "当需要回答时间时请直接参考回复:" + # 初始化聊天记录,组装系统信息 + if wxid not in self.conversation_list.keys(): + question_ = [ + self.system_content_msg, + {"role": "system", "content": "" + time_mk + now_time} + ] + self.conversation_list[wxid] = question_ + + # 当前问题 + content_question_ = {"role": role, "content": question} + self.conversation_list[wxid].append(content_question_) + + for cont in self.conversation_list[wxid]: + if cont["role"] != "system": + continue + if cont["content"].startswith(time_mk): + cont["content"] = time_mk + now_time + + # 只存储10条记录,超过滚动清除 + i = len(self.conversation_list[wxid]) + if i > 10: + print("滚动清除微信记录:" + wxid) + # 删除多余的记录,倒着删,且跳过第一个的系统消息 + del self.conversation_list[wxid][1] + + +if __name__ == "__main__": + from configuration import Config + config = Config().DEEPSEEK + if not config: + exit(0) + + chat = DeepSeek(config) + + while True: + q = input(">>> ") + try: + time_start = datetime.now() # 记录开始时间 + print(chat.get_answer(q, "wxid")) + time_end = datetime.now() # 记录结束时间 + + print(f"{round((time_end - time_start).total_seconds(), 2)}s") # 计算的时间差为程序的执行时间,单位为秒/s + except Exception as e: + print(e) diff --git a/config.yaml.template b/config.yaml.template index c26c7f7..938f5ba 100644 --- a/config.yaml.template +++ b/config.yaml.template @@ -104,3 +104,9 @@ bard: # -----bard配置这行不填----- zhipu: # -----zhipu配置这行不填----- api_key: #api key model: # 模型类型 + +deepseek: # -----deepseek配置这行不填----- + key: # 填写你的 DeepSeek API Key + api: https://api.deepseek.com # DeepSeek API 地址 + model: deepseek-chat # 可选: deepseek-chat (DeepSeek-V3), deepseek-reasoner (DeepSeek-R1) + prompt: 你是智能聊天机器人,你叫 DeepSeek 助手 # 根据需要对角色进行设定 diff --git a/configuration.py b/configuration.py index 0f8816d..cc85dc5 100644 --- a/configuration.py +++ b/configuration.py @@ -4,6 +4,7 @@ import logging.config import os import shutil +from typing import Dict, List import yaml @@ -40,5 +41,5 @@ class Config(object): self.CHATGLM = yconfig.get("chatglm", {}) self.BardAssistant = yconfig.get("bard", {}) self.ZhiPu = yconfig.get("zhipu", {}) - + self.DEEPSEEK = yconfig.get("deepseek", {}) self.SEND_RATE_LIMIT = yconfig.get("send_rate_limit", 0) diff --git a/constants.py b/constants.py index 17e9590..59fc9d0 100644 --- a/constants.py +++ b/constants.py @@ -11,13 +11,14 @@ class ChatType(IntEnum): BardAssistant = 5 # Google Bard ZhiPu = 6 # ZhiPu OLLAMA = 7 # Ollama + DEEPSEEK = 8 # DeepSeek @staticmethod def is_in_chat_types(chat_type: int) -> bool: if chat_type in [ChatType.TIGER_BOT.value, ChatType.CHATGPT.value, ChatType.XINGHUO_WEB.value, ChatType.CHATGLM.value, ChatType.BardAssistant.value, ChatType.ZhiPu.value, - ChatType.OLLAMA]: + ChatType.OLLAMA.value, ChatType.DEEPSEEK.value]: return True return False diff --git a/robot.py b/robot.py index df1fe42..2112380 100644 --- a/robot.py +++ b/robot.py @@ -14,6 +14,7 @@ from base.func_bard import BardAssistant from base.func_chatglm import ChatGLM from base.func_ollama import Ollama from base.func_chatgpt import ChatGPT +from base.func_deepseek import DeepSeek from base.func_chengyu import cy from base.func_weather import Weather from base.func_news import News @@ -53,6 +54,8 @@ class Robot(Job): self.chat = ZhiPu(self.config.ZhiPu) elif chat_type == ChatType.OLLAMA.value and Ollama.value_check(self.config.OLLAMA): self.chat = Ollama(self.config.OLLAMA) + elif chat_type == ChatType.DEEPSEEK.value and DeepSeek.value_check(self.config.DEEPSEEK): + self.chat = DeepSeek(self.config.DEEPSEEK) else: self.LOG.warning("未配置模型") self.chat = None @@ -71,6 +74,8 @@ class Robot(Job): self.chat = BardAssistant(self.config.BardAssistant) elif ZhiPu.value_check(self.config.ZhiPu): self.chat = ZhiPu(self.config.ZhiPu) + elif DeepSeek.value_check(self.config.DEEPSEEK): + self.chat = DeepSeek(self.config.DEEPSEEK) else: self.LOG.warning("未配置模型") self.chat = None @@ -216,7 +221,7 @@ class Robot(Job): # 清除超过1分钟的记录 self._msg_timestamps = [t for t in self._msg_timestamps if now - t < 60] if len(self._msg_timestamps) >= self.config.SEND_RATE_LIMIT: - self.LOG.warning("发送消息过快,已达到每分钟"+self.config.SEND_RATE_LIMIT+"条上限。") + self.LOG.warning(f"发送消息过快,已达到每分钟{self.config.SEND_RATE_LIMIT}条上限。") return self._msg_timestamps.append(now) -- Gitee From 619002d11b88e8535092e58f9a3d82b30b64f0f5 Mon Sep 17 00:00:00 2001 From: JiQingzhe2004 Date: Wed, 2 Apr 2025 00:17:57 +0800 Subject: [PATCH 02/13] =?UTF-8?q?=E5=AF=B9Deepseek=E6=8E=A5=E5=85=A5?= =?UTF-8?q?=E8=BF=9B=E8=A1=8C=E4=BA=86=E4=BC=98=E5=8C=96=EF=BC=8C=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E4=BA=86=E6=80=9D=E7=BB=B4=E9=93=BE=E7=9A=84=E6=98=BE?= =?UTF-8?q?=E7=A4=BA=EF=BC=8C=E6=A0=B9=E6=8D=AE=E7=AE=A1=E7=90=86=E5=91=98?= =?UTF-8?q?=E7=9A=84=E8=AE=BE=E7=BD=AE=E8=87=AA=E5=8A=A8=E5=88=A4=E6=96=AD?= =?UTF-8?q?=E6=98=AF=E5=90=A6=E5=BA=94=E8=AF=A5=E6=98=BE=E7=A4=BA=E6=80=9D?= =?UTF-8?q?=E7=BB=B4=E9=93=BE=EF=BC=8C=E5=A6=82=E6=9E=9C=E5=BC=80=E5=90=AF?= =?UTF-8?q?=E6=80=9D=E7=BB=B4=E9=93=BE=EF=BC=8C=E5=88=99=E4=BD=BF=E7=94=A8?= =?UTF-8?q?=E7=89=B9=E5=AE=9A=E5=AD=97=E7=AC=A6=E6=9D=A5=E5=81=9A=E5=86=85?= =?UTF-8?q?=E5=AE=B9=E7=9A=84=E5=8C=BA=E5=88=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.MD | 9 +++ base/func_deepseek.py | 128 ++++++++++++++++++++++++++++-------------- config.yaml.template | 5 +- 3 files changed, 100 insertions(+), 42 deletions(-) diff --git a/README.MD b/README.MD index 60385fc..a034bcf 100644 --- a/README.MD +++ b/README.MD @@ -140,6 +140,15 @@ bard: # -----bard配置这行不填----- # 提示词尽可能用英文,bard对中文提示词的效果不是很理想,下方提示词为英语老师的示例,请按实际需要修改,默认设置的提示词为谷歌创造的AI大语言模型 # I want you to act as a spoken English teacher and improver. I will speak to you in English and you will reply to me in English to practice my spoken English. I want you to keep your reply neat, limiting the reply to 100 words. I want you to strictly correct my grammar mistakes, typos, and factual errors. I want you to ask me a question in your reply. Now let's start practicing, you could ask me a question first. Remember, I want you to strictly correct my grammar mistakes, typos, and factual errors. prompt: You am a large language model, trained by Google. + +deepseek: sk-xxxxxxxxxxxxxxx # -----deepseek配置这行不填----- + #思维链相关功能默认关闭,开启后会增加响应时间和消耗更多的token + key: # 填写你的 DeepSeek API Key + api: https://api.deepseek.com # DeepSeek API 地址 + model: deepseek-chat # 可选: deepseek-chat (DeepSeek-V3), deepseek-reasoner (DeepSeek-R1) + prompt: 你是智能聊天机器人,你叫 DeepSeek 助手 # 根据需要对角色进行设定 + enable_reasoning: false # 是否启用思维链功能,仅在使用 deepseek-reasoner 模型时有效 + show_reasoning: false # 是否在回复中显示思维过程,仅在启用思维链功能时有效 ``` ## HTTP diff --git a/base/func_deepseek.py b/base/func_deepseek.py index 35da459..5d8846d 100644 --- a/base/func_deepseek.py +++ b/base/func_deepseek.py @@ -16,13 +16,27 @@ class DeepSeek(): prompt = conf.get("prompt") self.model = conf.get("model", "deepseek-chat") self.LOG = logging.getLogger("DeepSeek") + + self.reasoning_supported = (self.model == "deepseek-reasoner") + + if conf.get("enable_reasoning", False) and not self.reasoning_supported: + self.LOG.warning("思维链功能只在使用 deepseek-reasoner 模型时可用,当前模型不支持此功能") + + self.enable_reasoning = conf.get("enable_reasoning", False) and self.reasoning_supported + self.show_reasoning = conf.get("show_reasoning", False) and self.enable_reasoning + if proxy: self.client = OpenAI(api_key=key, base_url=api, http_client=httpx.Client(proxy=proxy)) else: self.client = OpenAI(api_key=key, base_url=api) + self.conversation_list = {} + self.system_content_msg = {"role": "system", "content": prompt} - self.LOG.info(f"使用 DeepSeek 模型: {self.model}") + + reasoning_status = "开启" if self.enable_reasoning else "关闭" + reasoning_display = "显示" if self.show_reasoning else "隐藏" + self.LOG.info(f"使用 DeepSeek 模型: {self.model}, 思维链功能: {reasoning_status}({reasoning_display}), 模型支持思维链: {'是' if self.reasoning_supported else '否'}") def __repr__(self): return 'DeepSeek' @@ -39,53 +53,86 @@ class DeepSeek(): if wxid in self.conversation_list.keys(): del self.conversation_list[wxid] return "已清除上下文" + + if question.lower() in ["#开启思维链", "#enable reasoning"]: + if not self.reasoning_supported: + return "当前模型不支持思维链功能,请使用 deepseek-reasoner 模型" + self.enable_reasoning = True + self.show_reasoning = True + return "已开启思维链模式,将显示完整的推理过程" - self.updateMessage(wxid, question, "user") + if question.lower() in ["#关闭思维链", "#disable reasoning"]: + if not self.reasoning_supported: + return "当前模型不支持思维链功能,无需关闭" + self.enable_reasoning = False + self.show_reasoning = False + return "已关闭思维链模式" + + if question.lower() in ["#隐藏思维链", "#hide reasoning"]: + if not self.enable_reasoning: + return "思维链功能未开启,无法设置隐藏/显示" + self.show_reasoning = False + return "已设置隐藏思维链,但模型仍会进行深度思考" + + if question.lower() in ["#显示思维链", "#show reasoning"]: + if not self.enable_reasoning: + return "思维链功能未开启,无法设置隐藏/显示" + self.show_reasoning = True + return "已设置显示思维链" + + if wxid not in self.conversation_list: + self.conversation_list[wxid] = [] + if self.system_content_msg["content"]: + self.conversation_list[wxid].append(self.system_content_msg) + + self.conversation_list[wxid].append({"role": "user", "content": question}) - rsp = "" try: + clean_messages = [] + for msg in self.conversation_list[wxid]: + clean_msg = {"role": msg["role"], "content": msg["content"]} + clean_messages.append(clean_msg) + response = self.client.chat.completions.create( model=self.model, - messages=self.conversation_list[wxid], + messages=clean_messages, stream=False ) - rsp = response.choices[0].message.content - self.updateMessage(wxid, rsp, "assistant") + + if self.reasoning_supported and self.enable_reasoning: + # deepseek-reasoner模型返回的特殊字段: reasoning_content和content + # 单独处理思维链模式的响应 + reasoning_content = getattr(response.choices[0].message, "reasoning_content", None) + content = response.choices[0].message.content + + if self.show_reasoning and reasoning_content: + final_response = f"🤔思考过程:\n{reasoning_content}\n\n🎉最终答案:\n{content}" + #最好不要删除表情,因为微信内的信息没有办法做自定义显示,这里是为了做两个分隔,来区分思考过程和最终答案!💡 + else: + final_response = content + self.conversation_list[wxid].append({"role": "assistant", "content": content}) + else: + final_response = response.choices[0].message.content + self.conversation_list[wxid].append({"role": "assistant", "content": final_response}) + + # 控制对话长度,保留最近的历史记录 + # 系统消息(如果有) + 最近9轮对话(问答各算一轮) + max_history = 19 + if len(self.conversation_list[wxid]) > max_history: + has_system = self.conversation_list[wxid][0]["role"] == "system" + if has_system: + self.conversation_list[wxid] = [self.conversation_list[wxid][0]] + self.conversation_list[wxid][-(max_history-1):] + else: + self.conversation_list[wxid] = self.conversation_list[wxid][-max_history:] + + return final_response + except (APIConnectionError, APIError, AuthenticationError) as e1: self.LOG.error(f"DeepSeek API 返回了错误:{str(e1)}") + return f"DeepSeek API 返回了错误:{str(e1)}" except Exception as e0: self.LOG.error(f"发生未知错误:{str(e0)}") - - return rsp - - def updateMessage(self, wxid: str, question: str, role: str) -> None: - now_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) - - time_mk = "当需要回答时间时请直接参考回复:" - # 初始化聊天记录,组装系统信息 - if wxid not in self.conversation_list.keys(): - question_ = [ - self.system_content_msg, - {"role": "system", "content": "" + time_mk + now_time} - ] - self.conversation_list[wxid] = question_ - - # 当前问题 - content_question_ = {"role": role, "content": question} - self.conversation_list[wxid].append(content_question_) - - for cont in self.conversation_list[wxid]: - if cont["role"] != "system": - continue - if cont["content"].startswith(time_mk): - cont["content"] = time_mk + now_time - - # 只存储10条记录,超过滚动清除 - i = len(self.conversation_list[wxid]) - if i > 10: - print("滚动清除微信记录:" + wxid) - # 删除多余的记录,倒着删,且跳过第一个的系统消息 - del self.conversation_list[wxid][1] + return "抱歉,处理您的请求时出现了错误" if __name__ == "__main__": @@ -99,10 +146,9 @@ if __name__ == "__main__": while True: q = input(">>> ") try: - time_start = datetime.now() # 记录开始时间 + time_start = datetime.now() print(chat.get_answer(q, "wxid")) - time_end = datetime.now() # 记录结束时间 - - print(f"{round((time_end - time_start).total_seconds(), 2)}s") # 计算的时间差为程序的执行时间,单位为秒/s + time_end = datetime.now() + print(f"{round((time_end - time_start).total_seconds(), 2)}s") except Exception as e: print(e) diff --git a/config.yaml.template b/config.yaml.template index 938f5ba..9938e21 100644 --- a/config.yaml.template +++ b/config.yaml.template @@ -106,7 +106,10 @@ zhipu: # -----zhipu配置这行不填----- model: # 模型类型 deepseek: # -----deepseek配置这行不填----- - key: # 填写你的 DeepSeek API Key + #思维链相关功能默认关闭,开启后会增加响应时间和消耗更多的token + key: # 填写你的 DeepSeek API Key API Key的格式为sk-xxxxxxxxxxxxxxx api: https://api.deepseek.com # DeepSeek API 地址 model: deepseek-chat # 可选: deepseek-chat (DeepSeek-V3), deepseek-reasoner (DeepSeek-R1) prompt: 你是智能聊天机器人,你叫 DeepSeek 助手 # 根据需要对角色进行设定 + enable_reasoning: false # 是否启用思维链功能,仅在使用 deepseek-reasoner 模型时有效 + show_reasoning: false # 是否在回复中显示思维过程,仅在启用思维链功能时有效 -- Gitee From 1c74fe3ef8d16b920fd59dac1863724cea1b02eb Mon Sep 17 00:00:00 2001 From: JiQingzhe2004 Date: Wed, 2 Apr 2025 13:07:20 +0800 Subject: [PATCH 03/13] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=99=BA=E8=B0=B1?= =?UTF-8?q?=E6=B8=85=E8=A8=80=E6=96=87=E7=94=9F=E5=9B=BE=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=EF=BC=8C=E6=94=AF=E6=8C=81=E6=A0=B9=E6=8D=AE=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E7=94=9F=E6=88=90=E5=92=8C=E4=B8=8B=E8=BD=BD=E5=9B=BE=E5=83=8F?= =?UTF-8?q?=E5=B9=B6=E5=8F=91=E9=80=81=EF=BC=8C=E6=9B=B4=E6=94=B9=E7=BE=A4?= =?UTF-8?q?=E5=86=85@=E8=81=8A=E5=A4=A9=E9=80=BB=E8=BE=91=EF=BC=8C?= =?UTF-8?q?=E5=8A=A0=E5=85=A5=E8=A7=A6=E5=8F=91=E6=96=87=E7=94=9F=E5=9B=BE?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=E3=80=82=E5=A6=82=E6=9E=9C=E7=AE=A1=E7=90=86?= =?UTF-8?q?=E5=91=98=E6=9C=AA=E5=BC=80=E5=90=AF=E6=96=87=E7=94=9F=E5=9B=BE?= =?UTF-8?q?=EF=BC=8C=E5=88=99=E9=BB=98=E8=AE=A4=E8=B0=83=E7=94=A8=E5=85=B6?= =?UTF-8?q?=E4=BB=96=E8=AF=AD=E8=A8=80=E6=A8=A1=E5=9E=8B=EF=BC=8C=E4=B9=9F?= =?UTF-8?q?=E5=8F=AF=E8=AE=BE=E7=BD=AE=E5=9B=9E=E5=A4=8D=E5=9B=BA=E5=AE=9A?= =?UTF-8?q?=E4=BF=A1=E6=81=AF=E6=9D=A5=E5=87=8F=E5=B0=91=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E7=9A=84=E8=B0=83=E7=94=A8=EF=BC=8C=E4=BC=98=E5=8C=96=E7=94=A8?= =?UTF-8?q?=E6=88=B7=E7=9A=84=E4=BD=BF=E7=94=A8=E4=BD=93=E9=AA=8C=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- base/func_cogview.py | 94 ++++++++++++++++++++++++++++++++++++++++++ config.yaml.template | 11 +++++ configuration.py | 2 + requirements.txt | 2 +- robot.py | 97 ++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 202 insertions(+), 4 deletions(-) create mode 100644 base/func_cogview.py diff --git a/base/func_cogview.py b/base/func_cogview.py new file mode 100644 index 0000000..2c149c5 --- /dev/null +++ b/base/func_cogview.py @@ -0,0 +1,94 @@ +import logging +import os +import requests +import tempfile +import time +from zhipuai import ZhipuAI + +class CogView(): + def __init__(self, conf: dict) -> None: + self.api_key = conf.get("api_key") + self.model = conf.get("model", "cogview-4-250304") # 默认使用最新模型 + self.quality = conf.get("quality", "standard") + self.size = conf.get("size", "1024x1024") + self.enable = conf.get("enable", True) + + project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + default_img_dir = os.path.join(project_dir, "zhipuimg") + self.temp_dir = conf.get("temp_dir", default_img_dir) + + self.LOG = logging.getLogger("CogView") + + if self.api_key: + self.client = ZhipuAI(api_key=self.api_key) + self.LOG.info(f"CogView 初始化成功,模型:{self.model},质量:{self.quality},图片保存目录:{self.temp_dir}") + else: + self.LOG.warning("未配置智谱API密钥,图像生成功能无法使用") + self.client = None + + os.makedirs(self.temp_dir, exist_ok=True) + + @staticmethod + def value_check(conf: dict) -> bool: + if conf and conf.get("api_key") and conf.get("enable", True): + return True + return False + + def __repr__(self): + return 'CogView' + + def generate_image(self, prompt: str) -> str: + """ + 生成图像并返回图像URL + + Args: + prompt (str): 图像描述 + + Returns: + str: 生成的图像URL或错误信息 + """ + if not self.client or not self.enable: + return "图像生成功能未启用或API密钥未配置" + + try: + response = self.client.images.generations( + model=self.model, + prompt=prompt, + quality=self.quality, + size=self.size, + ) + + if response and response.data and len(response.data) > 0: + return response.data[0].url + else: + return "图像生成失败,未收到有效响应" + except Exception as e: + self.LOG.error(f"图像生成出错: {str(e)}") + return f"图像生成出错: {str(e)}" + + def download_image(self, image_url: str) -> str: + """ + 下载图片并返回本地文件路径 + + Args: + image_url (str): 图片URL + + Returns: + str: 本地图片文件路径,下载失败则返回None + """ + try: + response = requests.get(image_url, stream=True, timeout=30) + if response.status_code == 200: + file_path = os.path.join(self.temp_dir, f"cogview_{int(time.time())}.jpg") + with open(file_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + self.LOG.info(f"图片已下载到: {file_path}") + return file_path + else: + self.LOG.error(f"下载图片失败,状态码: {response.status_code}") + return None + except Exception as e: + self.LOG.error(f"下载图片过程出错: {str(e)}") + return None diff --git a/config.yaml.template b/config.yaml.template index 9938e21..34cd586 100644 --- a/config.yaml.template +++ b/config.yaml.template @@ -113,3 +113,14 @@ deepseek: # -----deepseek配置这行不填----- prompt: 你是智能聊天机器人,你叫 DeepSeek 助手 # 根据需要对角色进行设定 enable_reasoning: false # 是否启用思维链功能,仅在使用 deepseek-reasoner 模型时有效 show_reasoning: false # 是否在回复中显示思维过程,仅在启用思维链功能时有效 + +cogview: # -----智谱AI图像生成配置这行不填----- + #此API请参考 https://www.bigmodel.cn/dev/api/image-model/cogview + enable: False # 是否启用图像生成功能,默认关闭,将False替换为true则开启,此模型可和其他模型同时运行。 + api_key: # 智谱API密钥,请填入您的API Key + model: cogview-4-250304 # 模型编码,可选:cogview-4-250304、cogview-4、cogview-3-flash + quality: standard # 生成质量,可选:standard(快速)、hd(高清) + size: 1024x1024 # 图片尺寸,可自定义,需符合条件 + trigger_keyword: 画一张 # 触发图像生成的关键词 + temp_dir: # 临时文件存储目录,留空则默认使用项目目录下的zhipuimg文件夹,如果要更改,例如 D:/Pictures/temp 或 /home/user/temp + fallback_to_chat: true # 当未启用绘画功能时:true=将请求发给聊天模型处理,false=回复固定的未启用提示信息 diff --git a/configuration.py b/configuration.py index cc85dc5..4aefa2d 100644 --- a/configuration.py +++ b/configuration.py @@ -11,6 +11,7 @@ import yaml class Config(object): def __init__(self) -> None: + self.COGVIEW = {} self.reload() def _load_config(self) -> dict: @@ -42,4 +43,5 @@ class Config(object): self.BardAssistant = yconfig.get("bard", {}) self.ZhiPu = yconfig.get("zhipu", {}) self.DEEPSEEK = yconfig.get("deepseek", {}) + self.COGVIEW = yconfig.get("cogview", {}) self.SEND_RATE_LIMIT = yconfig.get("send_rate_limit", 0) diff --git a/requirements.txt b/requirements.txt index 589dd1c..938f547 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,5 +14,5 @@ jupyter_client zhdate ipykernel google-generativeai -zhipuai +zhipuai>=1.0.0 ollama diff --git a/robot.py b/robot.py index 2112380..5fe8bd3 100644 --- a/robot.py +++ b/robot.py @@ -6,7 +6,9 @@ import time import xml.etree.ElementTree as ET from queue import Empty from threading import Thread +import os from base.func_zhipu import ZhiPu +from base.func_cogview import CogView from wcferry import Wcf, WxMsg @@ -82,6 +84,16 @@ class Robot(Job): self.LOG.info(f"已选择: {self.chat}") + if hasattr(self.config, 'COGVIEW') and CogView.value_check(self.config.COGVIEW): + self.cogview = CogView(self.config.COGVIEW) + self.LOG.info("图像生成服务已初始化") + else: + self.cogview = None + if hasattr(self.config, 'COGVIEW'): + self.LOG.info("图像生成服务未启用或配置不正确") + else: + self.LOG.info("配置中未找到COGVIEW配置部分") + @staticmethod def value_check(args: dict) -> bool: if args: @@ -93,6 +105,46 @@ class Robot(Job): :param msg: 微信消息结构 :return: 处理状态,`True` 成功,`False` 失败 """ + trigger = self.config.COGVIEW.get('trigger_keyword', '画一张') if hasattr(self.config, 'COGVIEW') else '画一张' + content = re.sub(r"@.*?[\u2005|\s]", "", msg.content).replace(" ", "") + if content.startswith(trigger): + if self.cogview and hasattr(self.config, 'COGVIEW') and self.config.COGVIEW.get('enable', False): + prompt = content[len(trigger):].strip() + if prompt: + self.LOG.info(f"群聊中收到图像生成请求: {prompt}") + self.sendTextMsg("正在生成图像,请稍等...", msg.roomid, msg.sender) + image_url = self.cogview.generate_image(prompt) + + if image_url and image_url.startswith("http"): + try: + self.LOG.info(f"开始下载图片: {image_url}") + image_path = self.cogview.download_image(image_url) + + if image_path: + self.LOG.info(f"发送图片到群: {image_path}") + self.wcf.send_image(image_path, msg.roomid) + os.remove(image_path) # 发送后删除临时文件 + else: + self.LOG.warning(f"图片下载失败,发送URL链接作为备用: {image_url}") + self.sendTextMsg(f"图像已生成,但无法自动显示,点链接也能查看:\n{image_url}", msg.roomid, msg.sender) + except Exception as e: + self.LOG.error(f"发送图片过程出错: {str(e)}") + self.sendTextMsg(f"图像已生成,但发送过程出错,点链接也能查看:\n{image_url}", msg.roomid, msg.sender) + else: + self.LOG.error(f"图像生成失败: {image_url}") + self.sendTextMsg(f"图像生成失败: {image_url}", msg.roomid, msg.sender) + return True + else: + self.LOG.info("群聊中收到图像生成请求但功能未启用") + + fallback_to_chat = self.config.COGVIEW.get('fallback_to_chat', False) if hasattr(self.config, 'COGVIEW') else False + + if fallback_to_chat and self.chat: + self.LOG.info("将画图请求转发给聊天模型处理") + return self.toChitchat(msg) + else: + self.sendTextMsg("报一丝,图像生成功能没有开启,请联系管理员开启此功能。(可以贿赂他开启)", msg.roomid, msg.sender) + return True return self.toChitchat(msg) def toChengyu(self, msg: WxMsg) -> bool: @@ -172,18 +224,57 @@ class Robot(Job): elif msg.type == 10000: # 系统信息 self.sayHiToNewFriend(msg) - elif msg.type == 0x01: # 文本消息 - # 让配置加载更灵活,自己可以更新配置。也可以利用定时任务更新。 + elif msg.type == 0x01: if msg.from_self(): if msg.content == "^更新$": self.config.reload() self.LOG.info("已更新") else: + trigger = self.config.COGVIEW.get('trigger_keyword', '画一张') if hasattr(self.config, 'COGVIEW') else '画一张' + if msg.content.startswith(trigger): + if self.cogview and hasattr(self.config, 'COGVIEW') and self.config.COGVIEW.get('enable', False): + prompt = msg.content[len(trigger):].strip() + if prompt: + self.LOG.info(f"收到图像生成请求: {prompt}") + self.sendTextMsg("正在生成图像,请稍等...", msg.sender) + image_url = self.cogview.generate_image(prompt) + + if image_url and image_url.startswith("http"): + try: + self.LOG.info(f"开始下载图片: {image_url}") + image_path = self.cogview.download_image(image_url) + + if image_path: + self.LOG.info(f"发送图片: {image_path}") + self.wcf.send_image(image_path, msg.sender) + os.remove(image_path) # 发送后删除临时文件 + else: + self.LOG.warning(f"图片下载失败,发送URL链接作为备用: {image_url}") + self.sendTextMsg(f"图像已生成,但无法自动显示,点链接也能查看:\n{image_url}", msg.sender) + except Exception as e: + self.LOG.error(f"发送图片过程出错: {str(e)}") + self.sendTextMsg(f"图像已生成,但发送过程出错,点链接也能查看:\n{image_url}", msg.sender) + else: + self.LOG.error(f"图像生成失败: {image_url}") + self.sendTextMsg(f"图像生成失败: {image_url}", msg.sender) + return + else: + self.LOG.info("私聊中收到图像生成请求但功能未启用") + + fallback_to_chat = self.config.COGVIEW.get('fallback_to_chat', False) if hasattr(self.config, 'COGVIEW') else False + + if fallback_to_chat and self.chat: + self.LOG.info("将画图请求转发给聊天模型处理") + return self.toChitchat(msg) + else: + self.sendTextMsg("报一丝,图像生成功能没有开启,请联系管理员开启此功能。(可以贿赂他开启)", msg.sender) + return + self.toChitchat(msg) # 闲聊 def onMsg(self, msg: WxMsg) -> int: try: - self.LOG.info(msg) # 打印信息 + self.LOG.info(msg) self.processMsg(msg) except Exception as e: self.LOG.error(e) -- Gitee From fc58246e6b49bd490285139e7015b9aacb446b8b Mon Sep 17 00:00:00 2001 From: JiQingzhe2004 Date: Wed, 2 Apr 2025 13:10:36 +0800 Subject: [PATCH 04/13] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E4=B8=AD=E7=9A=84=E6=B3=A8=E9=87=8A=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F=EF=BC=8C=E7=A1=AE=E4=BF=9DAPI=E5=8F=82=E8=80=83?= =?UTF-8?q?=E9=93=BE=E6=8E=A5=E5=89=8D=E6=9C=89=E7=A9=BA=E6=A0=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.yaml.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.yaml.template b/config.yaml.template index 34cd586..eb2e80b 100644 --- a/config.yaml.template +++ b/config.yaml.template @@ -115,7 +115,7 @@ deepseek: # -----deepseek配置这行不填----- show_reasoning: false # 是否在回复中显示思维过程,仅在启用思维链功能时有效 cogview: # -----智谱AI图像生成配置这行不填----- - #此API请参考 https://www.bigmodel.cn/dev/api/image-model/cogview + # 此API请参考 https://www.bigmodel.cn/dev/api/image-model/cogview enable: False # 是否启用图像生成功能,默认关闭,将False替换为true则开启,此模型可和其他模型同时运行。 api_key: # 智谱API密钥,请填入您的API Key model: cogview-4-250304 # 模型编码,可选:cogview-4-250304、cogview-4、cogview-3-flash -- Gitee From 1e2be7f5b7d14757876ea86b5f5a3bbb8c32fb11 Mon Sep 17 00:00:00 2001 From: JiQingzhe2004 Date: Wed, 2 Apr 2025 18:49:50 +0800 Subject: [PATCH 05/13] =?UTF-8?q?=E5=A6=82=E6=9E=9C=E4=BD=BF=E7=94=A8?= =?UTF-8?q?=E5=9B=BD=E5=86=85=E6=9C=8D=E5=8A=A1=E5=99=A8=EF=BC=8C=E5=8F=AF?= =?UTF-8?q?=E8=83=BD=E4=BC=9A=E9=81=87=E5=88=B0=E7=BD=91=E7=BB=9C=E7=9A=84?= =?UTF-8?q?=E7=9B=B8=E5=85=B3=E9=97=AE=E9=A2=98=EF=BC=8C=E5=AE=89=E8=A3=85?= =?UTF-8?q?=E4=BE=9D=E8=B5=96=E5=8F=AF=E4=BD=BF=E7=94=A8=E9=95=9C=E5=83=8F?= =?UTF-8?q?=E6=BA=90=E6=9D=A5=E5=AE=89=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.MD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.MD b/README.MD index a034bcf..83302cb 100644 --- a/README.MD +++ b/README.MD @@ -31,6 +31,8 @@ git clone https://gitee.com/lch0821/WeChatRobot.git python -m pip install -U pip # 安装必要依赖 pip install -r requirements.txt +# 国内用户可能会因为网络问题出现安装失败,届时可使用镜像源来下载 +pip install -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com -r requirements.txt # ChatGLM 还需要安装一个 kernel ipython kernel install --name chatglm3 --user ``` -- Gitee From ffa07d4992ac3794013385252c9c5daf08b79140 Mon Sep 17 00:00:00 2001 From: JiQingzhe2004 Date: Wed, 2 Apr 2025 19:01:13 +0800 Subject: [PATCH 06/13] =?UTF-8?q?4=E6=9C=882=E6=97=A5=E5=90=91=E6=99=BA?= =?UTF-8?q?=E8=B0=B1=E6=B8=85=E8=A8=80=E6=8F=90=E4=BA=A4=E4=BA=86=E7=94=9F?= =?UTF-8?q?=E6=88=90=E8=BF=9D=E8=A7=84=E5=9B=BE=E5=83=8F=E7=9A=84BUG?= =?UTF-8?q?=EF=BC=8C=E5=AE=98=E6=96=B9=E4=BF=AE=E5=A4=8D=E5=90=8E=EF=BC=8C?= =?UTF-8?q?=E5=B0=86=E9=A1=B9=E7=9B=AE=E5=86=85=E6=9B=B4=E6=96=B0=E9=80=82?= =?UTF-8?q?=E9=85=8D=E8=BF=94=E5=9B=9E=E7=9A=84500=E9=94=99=E8=AF=AF?= =?UTF-8?q?=EF=BC=8C=E5=90=91=E7=94=A8=E6=88=B7=E8=BF=94=E5=9B=9E=E8=83=BD?= =?UTF-8?q?=E5=A4=9F=E7=9C=8B=E6=87=82=E7=9A=84=E6=96=87=E5=AD=97=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- base/func_cogview.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/base/func_cogview.py b/base/func_cogview.py index 2c149c5..01cba46 100644 --- a/base/func_cogview.py +++ b/base/func_cogview.py @@ -63,8 +63,14 @@ class CogView(): else: return "图像生成失败,未收到有效响应" except Exception as e: - self.LOG.error(f"图像生成出错: {str(e)}") - return f"图像生成出错: {str(e)}" + error_str = str(e) + self.LOG.error(f"图像生成出错: {error_str}") + + if "Error code: 500" in error_str or "HTTP/1.1 500" in error_str or "code\":\"1234\"" in error_str: + self.LOG.warning(f"检测到违规内容请求: {prompt}") + return "很抱歉,您的请求可能包含违规内容,无法生成图像" + + return "图像生成失败,请调整您的描述后重试" def download_image(self, image_url: str) -> str: """ -- Gitee From d95762be3a0492c030edf2bc9c0e6acb48a882e6 Mon Sep 17 00:00:00 2001 From: JiQingzhe2004 Date: Thu, 3 Apr 2025 01:57:53 +0800 Subject: [PATCH 07/13] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E9=98=BF=E9=87=8C?= =?UTF-8?q?=E4=BA=91=E6=96=87=E7=94=9F=E5=9B=BE=E5=8A=9F=E8=83=BD=EF=BC=88?= =?UTF-8?q?=E6=9B=B4=E5=A5=BD=E7=9A=84=E6=A8=A1=E5=9E=8B=EF=BC=89=EF=BC=8C?= =?UTF-8?q?=E6=9B=B4=E6=96=B0=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6=E4=BB=A5?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E9=98=BF=E9=87=8C=E4=BA=91=E5=9B=BE=E5=83=8F?= =?UTF-8?q?=E7=94=9F=E6=88=90=EF=BC=8C=E4=BC=98=E5=8C=96=E6=9C=BA=E5=99=A8?= =?UTF-8?q?=E4=BA=BA=E5=90=AF=E5=8A=A8=E6=B6=88=E6=81=AF=EF=BC=8C=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E4=BD=BF=E7=94=A8=E8=AF=B4=E6=98=8E=EF=BC=8C=E5=B9=B6?= =?UTF-8?q?=E6=9B=B4=E6=96=B0=E4=BE=9D=E8=B5=96=E9=A1=B9=E4=BB=A5=E5=8C=85?= =?UTF-8?q?=E5=90=ABdashscope=E5=BA=93=E3=80=82-=E9=87=8D=E6=96=B0?= =?UTF-8?q?=E8=B0=83=E6=95=B4=E6=96=87=E4=BB=B6=E4=BB=A3=E7=A0=81=E4=BD=8D?= =?UTF-8?q?=E7=BD=AE=EF=BC=8C=E5=87=8F=E5=B0=91=E4=BB=A3=E7=A0=81=E5=A4=8D?= =?UTF-8?q?=E6=9D=82=E7=A8=8B=E5=BA=A6=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- base/func_aliyun_image.py | 108 +++++++++++++++++ config.yaml.template | 12 +- configuration.py | 2 + main.py | 10 +- requirements.txt | 1 + robot.py | 246 ++++++++++++++++++++++++++------------ 6 files changed, 302 insertions(+), 77 deletions(-) create mode 100644 base/func_aliyun_image.py diff --git a/base/func_aliyun_image.py b/base/func_aliyun_image.py new file mode 100644 index 0000000..b6d9f29 --- /dev/null +++ b/base/func_aliyun_image.py @@ -0,0 +1,108 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- + +import logging +import os +import time +from http import HTTPStatus +from urllib.parse import urlparse, unquote +from pathlib import PurePosixPath + +import requests +from dashscope import ImageSynthesis +import dashscope + +class AliyunImage(): + """阿里文生图API调用 + """ + + @staticmethod + def value_check(args: dict) -> bool: + try: + return bool(args and args.get("api_key", "") and args.get("model", "")) + except Exception: + return False + + def __init__(self, config={}) -> None: + self.LOG = logging.getLogger("AliyunImage") + if not config: + raise Exception("缺少配置信息") + + self.api_key = config.get("api_key", "") + self.model = config.get("model", "wanx2.1-t2i-turbo") + self.size = config.get("size", "1024*1024") + self.enable = config.get("enable", True) + self.n = config.get("n", 1) + self.temp_dir = config.get("temp_dir", "./temp") + + # 确保临时目录存在 + if not os.path.exists(self.temp_dir): + os.makedirs(self.temp_dir) + + # 设置API密钥 + dashscope.api_key = self.api_key + + self.LOG.info("AliyunImage 已初始化") + + def generate_image(self, prompt: str) -> str: + """生成图像并返回图像URL + + Args: + prompt (str): 图像描述 + + Returns: + str: 生成的图像URL或错误信息 + """ + if not self.enable or not self.api_key: + return "阿里文生图功能未启用或API密钥未配置" + + try: + rsp = ImageSynthesis.call( + api_key=self.api_key, + model=self.model, + prompt=prompt, + n=self.n, + size=self.size + ) + + if rsp.status_code == HTTPStatus.OK and rsp.output and rsp.output.results: + return rsp.output.results[0].url + else: + self.LOG.error(f"图像生成失败: {rsp.code}, {rsp.message}") + return f"图像生成失败: {rsp.message}" + except Exception as e: + error_str = str(e) + self.LOG.error(f"图像生成出错: {error_str}") + + if "Error code: 500" in error_str or "HTTP/1.1 500" in error_str: + self.LOG.warning(f"检测到违规内容请求: {prompt}") + return "很抱歉,您的请求可能包含违规内容,无法生成图像" + + return "图像生成失败,请调整您的描述后重试" + + def download_image(self, image_url: str) -> str: + """ + 下载图片并返回本地文件路径 + + Args: + image_url (str): 图片URL + + Returns: + str: 本地图片文件路径,下载失败则返回None + """ + try: + response = requests.get(image_url, stream=True, timeout=30) + if response.status_code == 200: + file_path = os.path.join(self.temp_dir, f"aliyun_image_{int(time.time())}.jpg") + with open(file_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + self.LOG.info(f"图片已下载到: {file_path}") + return file_path + else: + self.LOG.error(f"下载图片失败,状态码: {response.status_code}") + return None + except Exception as e: + self.LOG.error(f"下载图片过程出错: {str(e)}") + return None diff --git a/config.yaml.template b/config.yaml.template index eb2e80b..42fef2f 100644 --- a/config.yaml.template +++ b/config.yaml.template @@ -121,6 +121,16 @@ cogview: # -----智谱AI图像生成配置这行不填----- model: cogview-4-250304 # 模型编码,可选:cogview-4-250304、cogview-4、cogview-3-flash quality: standard # 生成质量,可选:standard(快速)、hd(高清) size: 1024x1024 # 图片尺寸,可自定义,需符合条件 - trigger_keyword: 画一张 # 触发图像生成的关键词 + trigger_keyword: 牛智谱 # 触发图像生成的关键词 temp_dir: # 临时文件存储目录,留空则默认使用项目目录下的zhipuimg文件夹,如果要更改,例如 D:/Pictures/temp 或 /home/user/temp fallback_to_chat: true # 当未启用绘画功能时:true=将请求发给聊天模型处理,false=回复固定的未启用提示信息 + +aliyun_image: # -----如果要使用阿里云文生图,取消下面的注释并填写相关内容,模型到阿里云百炼找通义万相-文生图2.1-Turbo----- + enable: true # 是否启用阿里文生图功能,false为关闭,默认开启,如果未配置,则会将消息发送给聊天大模型 + api_key: sk-xxxxxxxxxxxxxxxxxxxxxxxx # 替换为你的DashScope API密钥 + model: wanx2.1-t2i-turbo # 模型名称,默认使用wanx2.1-t2i-turbo(快),wanx2.1-t2i-plus(中),wanx-v1(慢),会给用户不同的提示! + size: 1024*1024 # 图像尺寸,格式为宽*高 + n: 1 # 生成图像的数量 + temp_dir: ./temp # 临时文件存储路径 + trigger_keyword: 牛阿里 # 触发词,默认为"牛阿里" + fallback_to_chat: true # 当服务不可用时是否转发给聊天模型处理 \ No newline at end of file diff --git a/configuration.py b/configuration.py index 4aefa2d..135a1a8 100644 --- a/configuration.py +++ b/configuration.py @@ -12,6 +12,7 @@ import yaml class Config(object): def __init__(self) -> None: self.COGVIEW = {} + self.ALIYUN_IMAGE = {} self.reload() def _load_config(self) -> dict: @@ -44,4 +45,5 @@ class Config(object): self.ZhiPu = yconfig.get("zhipu", {}) self.DEEPSEEK = yconfig.get("deepseek", {}) self.COGVIEW = yconfig.get("cogview", {}) + self.ALIYUN_IMAGE = yconfig.get("aliyun_image", {}) self.SEND_RATE_LIMIT = yconfig.get("send_rate_limit", 0) diff --git a/main.py b/main.py index b24b38d..1870c00 100644 --- a/main.py +++ b/main.py @@ -24,7 +24,15 @@ def main(chat_type: int): robot.LOG.info(f"WeChatRobot【{__version__}】成功启动···") # 机器人启动发送测试消息 - robot.sendTextMsg("机器人启动成功!", "filehelper") + # 机器人启动发送测试消息 + robot.sendTextMsg("机器人启动成功!\n" + "🎨 绘画功能使用说明:\n" + "• 智谱绘画:牛智谱[描述]\n" + "• 阿里绘画:牛阿里[描述]\n" + "实例:\n" + "牛阿里 画一张家乡\n" + "@XX 牛阿里 画一张家乡\n" + "💬 聊天时直接发送消息即可", "filehelper") # 接收消息 # robot.enableRecvMsg() # 可能会丢消息? diff --git a/requirements.txt b/requirements.txt index 938f547..275fbe4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,4 @@ ipykernel google-generativeai zhipuai>=1.0.0 ollama +dashscope \ No newline at end of file diff --git a/robot.py b/robot.py index 5fe8bd3..c63d879 100644 --- a/robot.py +++ b/robot.py @@ -7,8 +7,11 @@ import xml.etree.ElementTree as ET from queue import Empty from threading import Thread import os +import random +import shutil from base.func_zhipu import ZhiPu from base.func_cogview import CogView +from base.func_aliyun_image import AliyunImage from wcferry import Wcf, WxMsg @@ -84,15 +87,27 @@ class Robot(Job): self.LOG.info(f"已选择: {self.chat}") + # 初始化图像生成服务 if hasattr(self.config, 'COGVIEW') and CogView.value_check(self.config.COGVIEW): self.cogview = CogView(self.config.COGVIEW) - self.LOG.info("图像生成服务已初始化") + self.LOG.info("图像生成服务(CogView)已初始化") else: self.cogview = None if hasattr(self.config, 'COGVIEW'): - self.LOG.info("图像生成服务未启用或配置不正确") + self.LOG.info("图像生成服务(CogView)未启用或配置不正确") else: self.LOG.info("配置中未找到COGVIEW配置部分") + + # 初始化阿里文生图服务 + if hasattr(self.config, 'ALIYUN_IMAGE') and AliyunImage.value_check(self.config.ALIYUN_IMAGE): + self.aliyun_image = AliyunImage(self.config.ALIYUN_IMAGE) + self.LOG.info("阿里文生图服务已初始化") + else: + self.aliyun_image = None + if hasattr(self.config, 'ALIYUN_IMAGE'): + self.LOG.info("阿里文生图服务未启用或配置不正确") + else: + self.LOG.info("配置中未找到ALIYUN_IMAGE配置部分") @staticmethod def value_check(args: dict) -> bool: @@ -100,51 +115,155 @@ class Robot(Job): return all(value is not None for key, value in args.items() if key != 'proxy') return False + def handle_image_generation(self, service_type, prompt, receiver, at_user=None): + """处理图像生成请求的通用函数 + + :param service_type: 服务类型,'cogview'或'aliyun' + :param prompt: 图像生成提示词 + :param receiver: 接收者ID + :param at_user: 被@的用户ID,用于群聊 + :return: 处理状态,True成功,False失败 + """ + if service_type == 'cogview': + if not self.cogview or not hasattr(self.config, 'COGVIEW') or not self.config.COGVIEW.get('enable', False): + self.LOG.info(f"收到图像生成请求但功能未启用: {prompt}") + fallback_to_chat = self.config.COGVIEW.get('fallback_to_chat', False) if hasattr(self.config, 'COGVIEW') else False + if not fallback_to_chat: + self.sendTextMsg("报一丝,图像生成功能没有开启,请联系管理员开启此功能。(可以贿赂他开启)", receiver, at_user) + return True + return False + + service = self.cogview + wait_message = "正在生成图像,请稍等..." + + elif service_type == 'aliyun': + if not self.aliyun_image or not hasattr(self.config, 'ALIYUN_IMAGE') or not self.config.ALIYUN_IMAGE.get('enable', False): + self.LOG.info(f"收到阿里文生图请求但功能未启用: {prompt}") + fallback_to_chat = self.config.ALIYUN_IMAGE.get('fallback_to_chat', False) if hasattr(self.config, 'ALIYUN_IMAGE') else False + if not fallback_to_chat: + self.sendTextMsg("报一丝,阿里文生图功能没有开启,请联系管理员开启此功能。", receiver, at_user) + return True + return False + + service = self.aliyun_image + model_type = self.config.ALIYUN_IMAGE.get('model', '') + if model_type == 'wanx2.1-t2i-plus': + wait_message = "当前模型为阿里PLUS模型,生成速度较慢,请耐心等候..." + elif model_type == 'wanx-v1': + wait_message = "当前模型为阿里V1模型,生成速度非常慢,可能需要等待较长时间,请耐心等候..." + else: + wait_message = "正在生成图像,请稍等..." + else: + self.LOG.error(f"未知的图像生成服务类型: {service_type}") + return False + + self.LOG.info(f"收到图像生成请求 [{service_type}]: {prompt}") + self.sendTextMsg(wait_message, receiver, at_user) + + image_url = service.generate_image(prompt) + + if image_url and image_url.startswith("http"): + try: + self.LOG.info(f"开始下载图片: {image_url}") + image_path = service.download_image(image_url) + + if image_path: + # 创建一个临时副本,避免文件占用问题 + temp_dir = os.path.dirname(image_path) + file_ext = os.path.splitext(image_path)[1] + temp_copy = os.path.join( + temp_dir, + f"temp_{service_type}_{int(time.time())}_{random.randint(1000, 9999)}{file_ext}" + ) + + try: + # 创建文件副本 + shutil.copy2(image_path, temp_copy) + self.LOG.info(f"创建临时副本: {temp_copy}") + + # 发送临时副本 + self.LOG.info(f"发送图片到 {receiver}: {temp_copy}") + self.wcf.send_image(temp_copy, receiver) + + # 等待一小段时间确保微信API完成处理 + time.sleep(1.5) + + except Exception as e: + self.LOG.error(f"创建或发送临时副本失败: {str(e)}") + # 如果副本处理失败,尝试直接发送原图 + self.LOG.info(f"尝试直接发送原图: {image_path}") + self.wcf.send_image(image_path, receiver) + + # 安全删除文件 + self._safe_delete_file(image_path) + if os.path.exists(temp_copy): + self._safe_delete_file(temp_copy) + + else: + self.LOG.warning(f"图片下载失败,发送URL链接作为备用: {image_url}") + self.sendTextMsg(f"图像已生成,但无法自动显示,点链接也能查看:\n{image_url}", receiver, at_user) + except Exception as e: + self.LOG.error(f"发送图片过程出错: {str(e)}") + self.sendTextMsg(f"图像已生成,但发送过程出错,点链接也能查看:\n{image_url}", receiver, at_user) + else: + self.LOG.error(f"图像生成失败: {image_url}") + self.sendTextMsg(f"图像生成失败: {image_url}", receiver, at_user) + + return True + + def _safe_delete_file(self, file_path, max_retries=3, retry_delay=1.0): + """安全删除文件,带有重试机制 + + :param file_path: 要删除的文件路径 + :param max_retries: 最大重试次数 + :param retry_delay: 重试间隔(秒) + :return: 是否成功删除 + """ + if not os.path.exists(file_path): + return True + + for attempt in range(max_retries): + try: + os.remove(file_path) + self.LOG.info(f"成功删除文件: {file_path}") + return True + except Exception as e: + if attempt < max_retries - 1: + self.LOG.warning(f"删除文件 {file_path} 失败, 将在 {retry_delay} 秒后重试: {str(e)}") + time.sleep(retry_delay) + else: + self.LOG.error(f"无法删除文件 {file_path} 经过 {max_retries} 次尝试: {str(e)}") + + return False + def toAt(self, msg: WxMsg) -> bool: """处理被 @ 消息 :param msg: 微信消息结构 :return: 处理状态,`True` 成功,`False` 失败 """ - trigger = self.config.COGVIEW.get('trigger_keyword', '画一张') if hasattr(self.config, 'COGVIEW') else '画一张' + # CogView触发词 + cogview_trigger = self.config.COGVIEW.get('trigger_keyword', '牛智谱') if hasattr(self.config, 'COGVIEW') else '牛智谱' + # 阿里文生图触发词 + aliyun_trigger = self.config.ALIYUN_IMAGE.get('trigger_keyword', '牛阿里') if hasattr(self.config, 'ALIYUN_IMAGE') else '牛阿里' + content = re.sub(r"@.*?[\u2005|\s]", "", msg.content).replace(" ", "") - if content.startswith(trigger): - if self.cogview and hasattr(self.config, 'COGVIEW') and self.config.COGVIEW.get('enable', False): - prompt = content[len(trigger):].strip() - if prompt: - self.LOG.info(f"群聊中收到图像生成请求: {prompt}") - self.sendTextMsg("正在生成图像,请稍等...", msg.roomid, msg.sender) - image_url = self.cogview.generate_image(prompt) - - if image_url and image_url.startswith("http"): - try: - self.LOG.info(f"开始下载图片: {image_url}") - image_path = self.cogview.download_image(image_url) - - if image_path: - self.LOG.info(f"发送图片到群: {image_path}") - self.wcf.send_image(image_path, msg.roomid) - os.remove(image_path) # 发送后删除临时文件 - else: - self.LOG.warning(f"图片下载失败,发送URL链接作为备用: {image_url}") - self.sendTextMsg(f"图像已生成,但无法自动显示,点链接也能查看:\n{image_url}", msg.roomid, msg.sender) - except Exception as e: - self.LOG.error(f"发送图片过程出错: {str(e)}") - self.sendTextMsg(f"图像已生成,但发送过程出错,点链接也能查看:\n{image_url}", msg.roomid, msg.sender) - else: - self.LOG.error(f"图像生成失败: {image_url}") - self.sendTextMsg(f"图像生成失败: {image_url}", msg.roomid, msg.sender) + + # 阿里文生图处理 + if content.startswith(aliyun_trigger): + prompt = content[len(aliyun_trigger):].strip() + if prompt: + result = self.handle_image_generation('aliyun', prompt, msg.roomid, msg.sender) + if result: return True - else: - self.LOG.info("群聊中收到图像生成请求但功能未启用") - - fallback_to_chat = self.config.COGVIEW.get('fallback_to_chat', False) if hasattr(self.config, 'COGVIEW') else False - if fallback_to_chat and self.chat: - self.LOG.info("将画图请求转发给聊天模型处理") - return self.toChitchat(msg) - else: - self.sendTextMsg("报一丝,图像生成功能没有开启,请联系管理员开启此功能。(可以贿赂他开启)", msg.roomid, msg.sender) + # 原有CogView处理 + elif content.startswith(cogview_trigger): + prompt = content[len(cogview_trigger):].strip() + if prompt: + result = self.handle_image_generation('cogview', prompt, msg.roomid, msg.sender) + if result: return True + return self.toChitchat(msg) def toChengyu(self, msg: WxMsg) -> bool: @@ -230,44 +349,21 @@ class Robot(Job): self.config.reload() self.LOG.info("已更新") else: - trigger = self.config.COGVIEW.get('trigger_keyword', '画一张') if hasattr(self.config, 'COGVIEW') else '画一张' - if msg.content.startswith(trigger): - if self.cogview and hasattr(self.config, 'COGVIEW') and self.config.COGVIEW.get('enable', False): - prompt = msg.content[len(trigger):].strip() - if prompt: - self.LOG.info(f"收到图像生成请求: {prompt}") - self.sendTextMsg("正在生成图像,请稍等...", msg.sender) - image_url = self.cogview.generate_image(prompt) - - if image_url and image_url.startswith("http"): - try: - self.LOG.info(f"开始下载图片: {image_url}") - image_path = self.cogview.download_image(image_url) - - if image_path: - self.LOG.info(f"发送图片: {image_path}") - self.wcf.send_image(image_path, msg.sender) - os.remove(image_path) # 发送后删除临时文件 - else: - self.LOG.warning(f"图片下载失败,发送URL链接作为备用: {image_url}") - self.sendTextMsg(f"图像已生成,但无法自动显示,点链接也能查看:\n{image_url}", msg.sender) - except Exception as e: - self.LOG.error(f"发送图片过程出错: {str(e)}") - self.sendTextMsg(f"图像已生成,但发送过程出错,点链接也能查看:\n{image_url}", msg.sender) - else: - self.LOG.error(f"图像生成失败: {image_url}") - self.sendTextMsg(f"图像生成失败: {image_url}", msg.sender) + # 阿里文生图触发词处理 + aliyun_trigger = self.config.ALIYUN_IMAGE.get('trigger_keyword', '牛阿里') if hasattr(self.config, 'ALIYUN_IMAGE') else '牛阿里' + if msg.content.startswith(aliyun_trigger): + prompt = msg.content[len(aliyun_trigger):].strip() + if prompt: + result = self.handle_image_generation('aliyun', prompt, msg.sender) + if result: return - else: - self.LOG.info("私聊中收到图像生成请求但功能未启用") - - fallback_to_chat = self.config.COGVIEW.get('fallback_to_chat', False) if hasattr(self.config, 'COGVIEW') else False - - if fallback_to_chat and self.chat: - self.LOG.info("将画图请求转发给聊天模型处理") - return self.toChitchat(msg) - else: - self.sendTextMsg("报一丝,图像生成功能没有开启,请联系管理员开启此功能。(可以贿赂他开启)", msg.sender) + + cogview_trigger = self.config.COGVIEW.get('trigger_keyword', '牛智谱') if hasattr(self.config, 'COGVIEW') else '牛智谱' + if msg.content.startswith(cogview_trigger): + prompt = msg.content[len(cogview_trigger):].strip() + if prompt: + result = self.handle_image_generation('cogview', prompt, msg.sender) + if result: return self.toChitchat(msg) # 闲聊 -- Gitee From 933e830f91fa2c724c989f02d0f4813809500fd9 Mon Sep 17 00:00:00 2001 From: JiQingzhe2004 Date: Tue, 8 Apr 2025 17:08:32 +0800 Subject: [PATCH 08/13] =?UTF-8?q?=E9=87=8D=E6=9E=84=E5=9B=BE=E5=83=8F?= =?UTF-8?q?=E7=94=9F=E6=88=90=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E8=B0=B7=E6=AD=8CGemini=E5=9B=BE=E5=83=8F=E7=94=9F=E6=88=90?= =?UTF-8?q?=E6=94=AF=E6=8C=81=EF=BC=8C=E6=9B=B4=E6=96=B0=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=96=87=E4=BB=B6=EF=BC=8C=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E7=BB=93=E6=9E=84=EF=BC=8C=E5=B0=86=E6=96=87=E7=94=9F=E5=9B=BE?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=94=BE=E5=85=A5=E5=8D=95=E7=8B=AC=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E5=A4=B9=EF=BC=8C=E5=B9=B6=E5=85=B6=E5=AE=9A=E4=B9=89?= =?UTF-8?q?=E4=B8=BA=E6=A0=87=E5=87=86Python=E5=8C=85=EF=BC=8C=E6=96=B9?= =?UTF-8?q?=E4=BE=BF=E8=B0=83=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.yaml.template | 10 +- image/__init__.py | 13 ++ {base => image}/func_aliyun_image.py | 0 {base => image}/func_cogview.py | 0 image/func_gemini_image.py | 132 ++++++++++++++++++ ...77\347\224\250\350\257\264\346\230\216.MD" | 60 ++++++++ requirements.txt | 2 +- robot.py | 66 +++++++-- 8 files changed, 271 insertions(+), 12 deletions(-) create mode 100644 image/__init__.py rename {base => image}/func_aliyun_image.py (100%) rename {base => image}/func_cogview.py (100%) create mode 100644 image/func_gemini_image.py create mode 100644 "image/\346\226\207\347\224\237\345\233\276\345\212\237\350\203\275\347\232\204\344\275\277\347\224\250\350\257\264\346\230\216.MD" diff --git a/config.yaml.template b/config.yaml.template index 42fef2f..8a4c891 100644 --- a/config.yaml.template +++ b/config.yaml.template @@ -133,4 +133,12 @@ aliyun_image: # -----如果要使用阿里云文生图,取消下面的注释 n: 1 # 生成图像的数量 temp_dir: ./temp # 临时文件存储路径 trigger_keyword: 牛阿里 # 触发词,默认为"牛阿里" - fallback_to_chat: true # 当服务不可用时是否转发给聊天模型处理 \ No newline at end of file + fallback_to_chat: true # 当服务不可用时是否转发给聊天模型处理 + +gemini_image: # -----谷歌AI画图配置这行不填----- + enable: true # 是否启用谷歌AI画图功能 + api_key: # 谷歌Gemini API密钥,必填 + model: gemini-2.0-flash-exp-image-generation # 模型名称,建议保持默认,只有这一个模型可以进行绘画 + temp_dir: ./geminiimg # 图片保存目录,可选 + trigger_keyword: 牛谷歌 # 触发词,默认为"牛谷歌" + fallback_to_chat: false # 未启用时是否回退到聊天模式 diff --git a/image/__init__.py b/image/__init__.py new file mode 100644 index 0000000..6849a9c --- /dev/null +++ b/image/__init__.py @@ -0,0 +1,13 @@ +"""图像生成功能模块 + +包含以下功能: +- CogView: 智谱AI文生图 +- AliyunImage: 阿里云文生图 +- GeminiImage: 谷歌Gemini文生图 +""" + +from .func_cogview import CogView +from .func_aliyun_image import AliyunImage +from .func_gemini_image import GeminiImage + +__all__ = ['CogView', 'AliyunImage', 'GeminiImage'] \ No newline at end of file diff --git a/base/func_aliyun_image.py b/image/func_aliyun_image.py similarity index 100% rename from base/func_aliyun_image.py rename to image/func_aliyun_image.py diff --git a/base/func_cogview.py b/image/func_cogview.py similarity index 100% rename from base/func_cogview.py rename to image/func_cogview.py diff --git a/image/func_gemini_image.py b/image/func_gemini_image.py new file mode 100644 index 0000000..8dabde0 --- /dev/null +++ b/image/func_gemini_image.py @@ -0,0 +1,132 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- + +import base64 +import logging +import os +import mimetypes +import time +import random +from google import genai +from google.genai import types + +class GeminiImage: + """谷歌AI画图API调用 + """ + + @staticmethod + def value_check(args: dict) -> bool: + try: + return bool(args and args.get("api_key", "")) + except Exception: + return False + + def __init__(self, config={}) -> None: + self.LOG = logging.getLogger("GeminiImage") + if not config: + raise Exception("缺少配置信息") + + self.api_key = config.get("api_key", "") + self.model = config.get("model", "gemini-2.0-flash-exp-image-generation") + self.enable = config.get("enable", True) + + # 确定临时目录 + project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + default_img_dir = os.path.join(project_dir, "geminiimg") + self.temp_dir = config.get("temp_dir", default_img_dir) + + # 确保临时目录存在 + if not os.path.exists(self.temp_dir): + os.makedirs(self.temp_dir) + + self.LOG.info("GeminiImage 已初始化") + + def generate_image(self, prompt: str) -> str: + """生成图像并返回图像文件路径或URL + + Args: + prompt (str): 图像描述 + + Returns: + str: 生成的图像路径或错误信息 + """ + if not self.enable or not self.api_key: + return "谷歌AI画图功能未启用或API密钥未配置" + + try: + # 初始化Google AI客户端 + client = genai.Client(api_key=self.api_key) + + # 配置生成请求 + contents = [ + types.Content( + role="user", + parts=[types.Part.from_text(text=prompt)], + ) + ] + + generate_content_config = types.GenerateContentConfig( + response_modalities=["image", "text"], + response_mime_type="text/plain", + ) + + # 生成图片 + response = client.models.generate_content( + model=self.model, + contents=contents, + config=generate_content_config, + ) + + # 处理响应 + if response and response.candidates and response.candidates[0].content and response.candidates[0].content.parts: + for part in response.candidates[0].content.parts: + if hasattr(part, 'inline_data') and part.inline_data: + # 保存图片到临时文件 + file_name = f"gemini_image_{int(time.time())}_{random.randint(1000, 9999)}" + file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png" + file_path = os.path.join(self.temp_dir, f"{file_name}{file_extension}") + + with open(file_path, "wb") as f: + f.write(part.inline_data.data) + + self.LOG.info(f"图片已保存到: {file_path}") + return file_path + + return "图像生成失败,未收到有效响应" + + except Exception as e: + error_str = str(e) + self.LOG.error(f"图像生成出错: {error_str}") + + if "violated" in error_str.lower() or "policy" in error_str.lower() or "inappropriate" in error_str.lower(): + self.LOG.warning(f"检测到违规内容请求: {prompt}") + return "很抱歉,您的请求可能包含违规内容,无法生成图像" + + return "图像生成失败,请调整您的描述后重试" + + def download_image(self, image_path: str) -> str: + """ + 因为Gemini API直接返回图像数据,所以这里直接返回图像路径 + + Args: + image_path (str): 图片路径 + + Returns: + str: 本地图片文件路径 + """ + return image_path + +if __name__ == "__main__": + # 测试代码 + import sys + from configuration import Config + + config = Config().GEMINI_IMAGE if hasattr(Config(), 'GEMINI_IMAGE') else None + if not config: + print("未找到GEMINI_IMAGE配置") + sys.exit(1) + + gemini = GeminiImage(config) + prompt = "一只可爱的猫咪在阳光下玩耍" + image_path = gemini.generate_image(prompt) + print(f"生成图像路径: {image_path}") diff --git "a/image/\346\226\207\347\224\237\345\233\276\345\212\237\350\203\275\347\232\204\344\275\277\347\224\250\350\257\264\346\230\216.MD" "b/image/\346\226\207\347\224\237\345\233\276\345\212\237\350\203\275\347\232\204\344\275\277\347\224\250\350\257\264\346\230\216.MD" new file mode 100644 index 0000000..a8e7bc0 --- /dev/null +++ "b/image/\346\226\207\347\224\237\345\233\276\345\212\237\350\203\275\347\232\204\344\275\277\347\224\250\350\257\264\346\230\216.MD" @@ -0,0 +1,60 @@ +# Gemini 图像生成配置说明 + +在`config.yaml`中进行以下配置才可以调用: +```yaml +cogview: # -----智谱AI图像生成配置这行不填----- + # 此API请参考 https://www.bigmodel.cn/dev/api/image-model/cogview + enable: False # 是否启用图像生成功能,默认关闭,将False替换为true则开启,此模型可和其他模型同时运行。 + api_key: # 智谱API密钥,请填入您的API Key + model: cogview-4-250304 # 模型编码,可选:cogview-4-250304、cogview-4、cogview-3-flash + quality: standard # 生成质量,可选:standard(快速)、hd(高清) + size: 1024x1024 # 图片尺寸,可自定义,需符合条件 + trigger_keyword: 牛智谱 # 触发图像生成的关键词 + temp_dir: # 临时文件存储目录,留空则默认使用项目目录下的zhipuimg文件夹,如果要更改,例如 D:/Pictures/temp 或 /home/user/temp + fallback_to_chat: true # 当未启用绘画功能时:true=将请求发给聊天模型处理,false=回复固定的未启用提示信息 + +aliyun_image: # -----如果要使用阿里云文生图,取消下面的注释并填写相关内容,模型到阿里云百炼找通义万相-文生图2.1-Turbo----- + enable: true # 是否启用阿里文生图功能,false为关闭,默认开启,如果未配置,则会将消息发送给聊天大模型 + api_key: sk-xxxxxxxxxxxxxxxxxxxxxxxx # 替换为你的DashScope API密钥 + model: wanx2.1-t2i-turbo # 模型名称,默认使用wanx2.1-t2i-turbo(快),wanx2.1-t2i-plus(中),wanx-v1(慢),会给用户不同的提示! + size: 1024*1024 # 图像尺寸,格式为宽*高 + n: 1 # 生成图像的数量 + temp_dir: ./temp # 临时文件存储路径 + trigger_keyword: 牛阿里 # 触发词,默认为"牛阿里" + fallback_to_chat: true # 当未启用绘画功能时:true=将请求发给聊天模型处理,false=回复固定的未启用提示信息 + +gemini_image: # -----谷歌AI画图配置这行不填----- + api_key: your-api-key-here # 谷歌Gemini API密钥,必填 + model: gemini-2.0-flash-exp-image-generation # 模型名称,建议保持默认,只有这一个模型可以进行绘画 + enable: true # 是否启用谷歌AI画图功能 + temp_dir: ./geminiimg # 图片保存目录,可选 + trigger_keyword: 牛谷歌 # 触发词,默认为"牛谷歌" + fallback_to_chat: false # 当未启用绘画功能时:true=将请求发给聊天模型处理,false=回复固定的未启用提示信息 +``` + +## 如何获取API密钥 + +1. 访问 [Google AI Studio](https://aistudio.google.com/) +2. 创建一个账号或登录 +3. 访问 [API Keys](https://aistudio.google.com/app/apikeys) 页面 +4. 创建一个新的API密钥 +5. 复制API密钥并填入配置文件 + +## 使用方法 + +直接发送消息或在群聊中@机器人,使用触发词加提示词,例如: + +# 单人聊天的使用 +``` +牛智谱 一只可爱的猫咪在阳光下玩耍 +牛阿里 一只可爱的猫咪在阳光下玩耍 +牛谷歌 一只可爱的猫咪在阳光下玩耍 +``` +## 群组的使用方法 +``` +@ 牛图图 一只可爱的猫咪在阳光下玩耍 + +需要接入机器人的微信名称叫做牛图图 +``` + +生成的图片会自动发送到聊天窗口。 diff --git a/requirements.txt b/requirements.txt index 275fbe4..bc2b7d2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ pillow jupyter_client zhdate ipykernel -google-generativeai +google-generativeai>=0.3.0 zhipuai>=1.0.0 ollama dashscope \ No newline at end of file diff --git a/robot.py b/robot.py index c63d879..13a7c1d 100644 --- a/robot.py +++ b/robot.py @@ -10,8 +10,7 @@ import os import random import shutil from base.func_zhipu import ZhiPu -from base.func_cogview import CogView -from base.func_aliyun_image import AliyunImage +from image import CogView, AliyunImage, GeminiImage from wcferry import Wcf, WxMsg @@ -108,6 +107,17 @@ class Robot(Job): self.LOG.info("阿里文生图服务未启用或配置不正确") else: self.LOG.info("配置中未找到ALIYUN_IMAGE配置部分") + + # 初始化谷歌AI画图服务 + if hasattr(self.config, 'GEMINI_IMAGE') and GeminiImage.value_check(self.config.GEMINI_IMAGE): + self.gemini_image = GeminiImage(self.config.GEMINI_IMAGE) + self.LOG.info("谷歌AI画图服务已初始化") + else: + self.gemini_image = None + if hasattr(self.config, 'GEMINI_IMAGE'): + self.LOG.info("谷歌AI画图服务未启用或配置不正确") + else: + self.LOG.info("配置中未找到GEMINI_IMAGE配置部分") @staticmethod def value_check(args: dict) -> bool: @@ -118,7 +128,7 @@ class Robot(Job): def handle_image_generation(self, service_type, prompt, receiver, at_user=None): """处理图像生成请求的通用函数 - :param service_type: 服务类型,'cogview'或'aliyun' + :param service_type: 服务类型,'cogview'/'aliyun'/'gemini' :param prompt: 图像生成提示词 :param receiver: 接收者ID :param at_user: 被@的用户ID,用于群聊 @@ -126,10 +136,10 @@ class Robot(Job): """ if service_type == 'cogview': if not self.cogview or not hasattr(self.config, 'COGVIEW') or not self.config.COGVIEW.get('enable', False): - self.LOG.info(f"收到图像生成请求但功能未启用: {prompt}") + self.LOG.info(f"收到智谱文生图请求但功能未启用: {prompt}") fallback_to_chat = self.config.COGVIEW.get('fallback_to_chat', False) if hasattr(self.config, 'COGVIEW') else False if not fallback_to_chat: - self.sendTextMsg("报一丝,图像生成功能没有开启,请联系管理员开启此功能。(可以贿赂他开启)", receiver, at_user) + self.sendTextMsg("报一丝,智谱文生图功能没有开启,请联系管理员开启此功能。(可以贿赂他开启)", receiver, at_user) return True return False @@ -141,7 +151,7 @@ class Robot(Job): self.LOG.info(f"收到阿里文生图请求但功能未启用: {prompt}") fallback_to_chat = self.config.ALIYUN_IMAGE.get('fallback_to_chat', False) if hasattr(self.config, 'ALIYUN_IMAGE') else False if not fallback_to_chat: - self.sendTextMsg("报一丝,阿里文生图功能没有开启,请联系管理员开启此功能。", receiver, at_user) + self.sendTextMsg("报一丝,阿里文生图功能没有开启,请联系管理员开启此功能。(可以贿赂他开启)", receiver, at_user) return True return False @@ -153,6 +163,18 @@ class Robot(Job): wait_message = "当前模型为阿里V1模型,生成速度非常慢,可能需要等待较长时间,请耐心等候..." else: wait_message = "正在生成图像,请稍等..." + + elif service_type == 'gemini': + if not self.gemini_image or not hasattr(self.config, 'GEMINI_IMAGE') or not self.config.GEMINI_IMAGE.get('enable', False): + self.LOG.info(f"收到谷歌AI画图请求但功能未启用: {prompt}") + fallback_to_chat = self.config.GEMINI_IMAGE.get('fallback_to_chat', False) if hasattr(self.config, 'GEMINI_IMAGE') else False + if not fallback_to_chat: + self.sendTextMsg("报一丝,谷歌文生图功能没有开启,请联系管理员开启此功能。(可以贿赂他开启)", receiver, at_user) + return True + return False + + service = self.gemini_image + wait_message = "正在通过谷歌AI生成图像,请稍等..." else: self.LOG.error(f"未知的图像生成服务类型: {service_type}") return False @@ -162,10 +184,14 @@ class Robot(Job): image_url = service.generate_image(prompt) - if image_url and image_url.startswith("http"): + if image_url and (image_url.startswith("http") or os.path.exists(image_url)): try: - self.LOG.info(f"开始下载图片: {image_url}") - image_path = service.download_image(image_url) + self.LOG.info(f"开始处理图片: {image_url}") + # 谷歌API直接返回本地文件路径,无需下载 + if service_type == 'gemini': + image_path = image_url + else: + image_path = service.download_image(image_url) if image_path: # 创建一个临时副本,避免文件占用问题 @@ -245,6 +271,8 @@ class Robot(Job): cogview_trigger = self.config.COGVIEW.get('trigger_keyword', '牛智谱') if hasattr(self.config, 'COGVIEW') else '牛智谱' # 阿里文生图触发词 aliyun_trigger = self.config.ALIYUN_IMAGE.get('trigger_keyword', '牛阿里') if hasattr(self.config, 'ALIYUN_IMAGE') else '牛阿里' + # 谷歌AI画图触发词 + gemini_trigger = self.config.GEMINI_IMAGE.get('trigger_keyword', '牛谷歌') if hasattr(self.config, 'GEMINI_IMAGE') else '牛谷歌' content = re.sub(r"@.*?[\u2005|\s]", "", msg.content).replace(" ", "") @@ -256,7 +284,7 @@ class Robot(Job): if result: return True - # 原有CogView处理 + # CogView处理 elif content.startswith(cogview_trigger): prompt = content[len(cogview_trigger):].strip() if prompt: @@ -264,6 +292,14 @@ class Robot(Job): if result: return True + # 谷歌AI画图处理 + elif content.startswith(gemini_trigger): + prompt = content[len(gemini_trigger):].strip() + if prompt: + result = self.handle_image_generation('gemini', prompt, msg.roomid, msg.sender) + if result: + return True + return self.toChitchat(msg) def toChengyu(self, msg: WxMsg) -> bool: @@ -358,6 +394,7 @@ class Robot(Job): if result: return + # CogView触发词处理 cogview_trigger = self.config.COGVIEW.get('trigger_keyword', '牛智谱') if hasattr(self.config, 'COGVIEW') else '牛智谱' if msg.content.startswith(cogview_trigger): prompt = msg.content[len(cogview_trigger):].strip() @@ -365,6 +402,15 @@ class Robot(Job): result = self.handle_image_generation('cogview', prompt, msg.sender) if result: return + + # 谷歌AI画图触发词处理 + gemini_trigger = self.config.GEMINI_IMAGE.get('trigger_keyword', '牛谷歌') if hasattr(self.config, 'GEMINI_IMAGE') else '牛谷歌' + if msg.content.startswith(gemini_trigger): + prompt = msg.content[len(gemini_trigger):].strip() + if prompt: + result = self.handle_image_generation('gemini', prompt, msg.sender) + if result: + return self.toChitchat(msg) # 闲聊 -- Gitee From f723abb5e488955618b9f8a3a35936db1e2ddcdf Mon Sep 17 00:00:00 2001 From: JiQingzhe2004 Date: Tue, 8 Apr 2025 20:17:11 +0800 Subject: [PATCH 09/13] =?UTF-8?q?=E4=BC=98=E5=8C=96=E8=B0=B7=E6=AD=8CGemin?= =?UTF-8?q?i=E5=9B=BE=E5=83=8F=E7=94=9F=E6=88=90=E6=9C=8D=E5=8A=A1?= =?UTF-8?q?=E7=9A=84=E5=88=9D=E5=A7=8B=E5=8C=96=E9=80=BB=E8=BE=91=EF=BC=8C?= =?UTF-8?q?=E7=A1=AE=E4=BF=9D=E6=9C=8D=E5=8A=A1=E9=BB=98=E8=AE=A4=E5=90=AF?= =?UTF-8?q?=E7=94=A8=E5=B9=B6=E6=B7=BB=E5=8A=A0API=E5=AF=86=E9=92=A5?= =?UTF-8?q?=E9=AA=8C=E8=AF=81=EF=BC=8C=E6=94=B9=E8=BF=9B=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E5=A4=84=E7=90=86=E5=92=8C=E6=97=A5=E5=BF=97=E8=AE=B0=E5=BD=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- image/func_gemini_image.py | 66 ++++++++++++++++++--------- robot.py | 92 +++++++++++++++++++------------------- 2 files changed, 93 insertions(+), 65 deletions(-) diff --git a/image/func_gemini_image.py b/image/func_gemini_image.py index 8dabde0..b6ab520 100644 --- a/image/func_gemini_image.py +++ b/image/func_gemini_image.py @@ -7,8 +7,8 @@ import os import mimetypes import time import random -from google import genai -from google.genai import types +import google.generativeai as genai +from google.generativeai import types class GeminiImage: """谷歌AI画图API调用 @@ -17,18 +17,20 @@ class GeminiImage: @staticmethod def value_check(args: dict) -> bool: try: - return bool(args and args.get("api_key", "")) + # 修改检查逻辑,如果配置存在就返回True + return bool(args) except Exception: return False def __init__(self, config={}) -> None: self.LOG = logging.getLogger("GeminiImage") - if not config: - raise Exception("缺少配置信息") - - self.api_key = config.get("api_key", "") + + # 默认值 + self.enable = True + + # API密钥可以从环境变量获取或配置文件 + self.api_key = config.get("api_key", "") or os.environ.get("GEMINI_API_KEY", "") self.model = config.get("model", "gemini-2.0-flash-exp-image-generation") - self.enable = config.get("enable", True) # 确定临时目录 project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -38,8 +40,14 @@ class GeminiImage: # 确保临时目录存在 if not os.path.exists(self.temp_dir): os.makedirs(self.temp_dir) - - self.LOG.info("GeminiImage 已初始化") + self.LOG.info(f"创建Gemini图像临时目录: {self.temp_dir}") + + # 验证API密钥是否有效 + if not self.api_key: + self.LOG.warning("未配置谷歌Gemini API密钥,请在config.yaml中设置GEMINI_IMAGE.api_key或设置环境变量GEMINI_API_KEY") + # 虽然没有API密钥,但仍然保持服务启用,以便在handle_image_generation中显示友好错误消息 + else: + self.LOG.info("谷歌Gemini图像生成功能已初始化并默认开启") def generate_image(self, prompt: str) -> str: """生成图像并返回图像文件路径或URL @@ -70,16 +78,26 @@ class GeminiImage: response_mime_type="text/plain", ) - # 生成图片 - response = client.models.generate_content( + # 使用流式模式生成图片 + response_text = "" + image_path = None + + # 使用流式API获取响应 + for chunk in client.models.generate_content_stream( model=self.model, contents=contents, config=generate_content_config, - ) - - # 处理响应 - if response and response.candidates and response.candidates[0].content and response.candidates[0].content.parts: - for part in response.candidates[0].content.parts: + ): + # 处理文本部分 + if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts: + continue + + for part in chunk.candidates[0].content.parts: + # 处理文本 + if hasattr(part, 'text') and part.text: + response_text += part.text + + # 处理图像数据 if hasattr(part, 'inline_data') and part.inline_data: # 保存图片到临时文件 file_name = f"gemini_image_{int(time.time())}_{random.randint(1000, 9999)}" @@ -90,9 +108,17 @@ class GeminiImage: f.write(part.inline_data.data) self.LOG.info(f"图片已保存到: {file_path}") - return file_path + image_path = file_path + + # 记录生成的文本响应 + if response_text: + self.LOG.info(f"模型生成的文本响应: {response_text}") - return "图像生成失败,未收到有效响应" + # 如果成功生成图像,返回路径 + if image_path: + return image_path + else: + return "图像生成失败,未收到有效响应" except Exception as e: error_str = str(e) @@ -102,7 +128,7 @@ class GeminiImage: self.LOG.warning(f"检测到违规内容请求: {prompt}") return "很抱歉,您的请求可能包含违规内容,无法生成图像" - return "图像生成失败,请调整您的描述后重试" + return f"图像生成失败,请调整您的描述后重试: {error_str}" def download_image(self, image_path: str) -> str: """ diff --git a/robot.py b/robot.py index 13a7c1d..4f36190 100644 --- a/robot.py +++ b/robot.py @@ -87,38 +87,40 @@ class Robot(Job): self.LOG.info(f"已选择: {self.chat}") # 初始化图像生成服务 - if hasattr(self.config, 'COGVIEW') and CogView.value_check(self.config.COGVIEW): - self.cogview = CogView(self.config.COGVIEW) - self.LOG.info("图像生成服务(CogView)已初始化") - else: - self.cogview = None - if hasattr(self.config, 'COGVIEW'): - self.LOG.info("图像生成服务(CogView)未启用或配置不正确") + self.cogview = None + self.aliyun_image = None + self.gemini_image = None + + # 优先初始化Gemini图像生成服务 - 确保默认启用 + try: + # 不管配置如何,都强制初始化Gemini服务 + if hasattr(self.config, 'GEMINI_IMAGE'): + self.gemini_image = GeminiImage(self.config.GEMINI_IMAGE) else: - self.LOG.info("配置中未找到COGVIEW配置部分") - - # 初始化阿里文生图服务 - if hasattr(self.config, 'ALIYUN_IMAGE') and AliyunImage.value_check(self.config.ALIYUN_IMAGE): - self.aliyun_image = AliyunImage(self.config.ALIYUN_IMAGE) - self.LOG.info("阿里文生图服务已初始化") - else: - self.aliyun_image = None - if hasattr(self.config, 'ALIYUN_IMAGE'): - self.LOG.info("阿里文生图服务未启用或配置不正确") + # 如果没有配置,使用空字典初始化,会使用默认值和环境变量 + self.gemini_image = GeminiImage({}) + + if self.gemini_image.enable: + self.LOG.info("谷歌Gemini图像生成功能已初始化并启用") else: - self.LOG.info("配置中未找到ALIYUN_IMAGE配置部分") + self.LOG.info("谷歌AI画图功能未启用,未配置API密钥") + except Exception as e: + self.LOG.error(f"初始化谷歌Gemini图像生成服务失败: {str(e)}") + + # 初始化CogView和AliyunImage服务 + if hasattr(self.config, 'COGVIEW') and self.config.COGVIEW.get('enable', False): + try: + self.cogview = CogView(self.config.COGVIEW) + self.LOG.info("智谱CogView文生图功能已初始化") + except Exception as e: + self.LOG.error(f"初始化智谱CogView文生图服务失败: {str(e)}") + if hasattr(self.config, 'ALIYUN_IMAGE') and self.config.ALIYUN_IMAGE.get('enable', False): + try: + self.aliyun_image = AliyunImage(self.config.ALIYUN_IMAGE) + self.LOG.info("阿里云文生图功能已初始化") + except Exception as e: + self.LOG.error(f"初始化阿里云文生图服务失败: {str(e)}") - # 初始化谷歌AI画图服务 - if hasattr(self.config, 'GEMINI_IMAGE') and GeminiImage.value_check(self.config.GEMINI_IMAGE): - self.gemini_image = GeminiImage(self.config.GEMINI_IMAGE) - self.LOG.info("谷歌AI画图服务已初始化") - else: - self.gemini_image = None - if hasattr(self.config, 'GEMINI_IMAGE'): - self.LOG.info("谷歌AI画图服务未启用或配置不正确") - else: - self.LOG.info("配置中未找到GEMINI_IMAGE配置部分") - @staticmethod def value_check(args: dict) -> bool: if args: @@ -127,7 +129,6 @@ class Robot(Job): def handle_image_generation(self, service_type, prompt, receiver, at_user=None): """处理图像生成请求的通用函数 - :param service_type: 服务类型,'cogview'/'aliyun'/'gemini' :param prompt: 图像生成提示词 :param receiver: 接收者ID @@ -142,10 +143,8 @@ class Robot(Job): self.sendTextMsg("报一丝,智谱文生图功能没有开启,请联系管理员开启此功能。(可以贿赂他开启)", receiver, at_user) return True return False - service = self.cogview wait_message = "正在生成图像,请稍等..." - elif service_type == 'aliyun': if not self.aliyun_image or not hasattr(self.config, 'ALIYUN_IMAGE') or not self.config.ALIYUN_IMAGE.get('enable', False): self.LOG.info(f"收到阿里文生图请求但功能未启用: {prompt}") @@ -154,7 +153,6 @@ class Robot(Job): self.sendTextMsg("报一丝,阿里文生图功能没有开启,请联系管理员开启此功能。(可以贿赂他开启)", receiver, at_user) return True return False - service = self.aliyun_image model_type = self.config.ALIYUN_IMAGE.get('model', '') if model_type == 'wanx2.1-t2i-plus': @@ -163,15 +161,18 @@ class Robot(Job): wait_message = "当前模型为阿里V1模型,生成速度非常慢,可能需要等待较长时间,请耐心等候..." else: wait_message = "正在生成图像,请稍等..." - elif service_type == 'gemini': - if not self.gemini_image or not hasattr(self.config, 'GEMINI_IMAGE') or not self.config.GEMINI_IMAGE.get('enable', False): - self.LOG.info(f"收到谷歌AI画图请求但功能未启用: {prompt}") - fallback_to_chat = self.config.GEMINI_IMAGE.get('fallback_to_chat', False) if hasattr(self.config, 'GEMINI_IMAGE') else False - if not fallback_to_chat: - self.sendTextMsg("报一丝,谷歌文生图功能没有开启,请联系管理员开启此功能。(可以贿赂他开启)", receiver, at_user) - return True - return False + if not self.gemini_image: + # 服务实例不存在的情况 + self.LOG.info(f"收到谷歌AI画图请求但服务未初始化: {prompt}") + self.sendTextMsg("谷歌文生图服务初始化失败,请联系管理员检查日志", receiver, at_user) + return True + + # 直接检查API密钥是否有效 + if not getattr(self.gemini_image, 'api_key', ''): + self.LOG.info(f"收到谷歌AI画图请求但API密钥未配置: {prompt}") + self.sendTextMsg("谷歌文生图功能需要配置API密钥,请联系管理员设置API密钥", receiver, at_user) + return True service = self.gemini_image wait_message = "正在通过谷歌AI生成图像,请稍等..." @@ -224,7 +225,7 @@ class Robot(Job): self._safe_delete_file(image_path) if os.path.exists(temp_copy): self._safe_delete_file(temp_copy) - + else: self.LOG.warning(f"图片下载失败,发送URL链接作为备用: {image_url}") self.sendTextMsg(f"图像已生成,但无法自动显示,点链接也能查看:\n{image_url}", receiver, at_user) @@ -296,9 +297,10 @@ class Robot(Job): elif content.startswith(gemini_trigger): prompt = content[len(gemini_trigger):].strip() if prompt: - result = self.handle_image_generation('gemini', prompt, msg.roomid, msg.sender) - if result: - return True + return self.handle_image_generation('gemini', prompt, msg.roomid or msg.sender, msg.sender if msg.roomid else None) + else: + self.sendTextMsg(f"请在{gemini_trigger}后面添加您想要生成的图像描述", msg.roomid or msg.sender, msg.sender if msg.roomid else None) + return True return self.toChitchat(msg) -- Gitee From c1abfcb0230811181c438936599aa9e664022193 Mon Sep 17 00:00:00 2001 From: JiQingzhe2004 Date: Tue, 8 Apr 2025 22:32:17 +0800 Subject: [PATCH 10/13] =?UTF-8?q?=E6=9B=B4=E6=96=B0Gemini=E5=9B=BE?= =?UTF-8?q?=E5=83=8F=E7=94=9F=E6=88=90=E9=85=8D=E7=BD=AE=EF=BC=8C=E7=A1=AE?= =?UTF-8?q?=E4=BF=9D=E9=BB=98=E8=AE=A4=E5=90=AF=E7=94=A8=E5=B9=B6=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0API=E5=AF=86=E9=92=A5=E9=AA=8C=E8=AF=81=EF=BC=8C?= =?UTF-8?q?=E6=94=B9=E8=BF=9B=E9=94=99=E8=AF=AF=E5=A4=84=E7=90=86=E5=92=8C?= =?UTF-8?q?=E6=97=A5=E5=BF=97=E8=AE=B0=E5=BD=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- configuration.py | 2 + image/func_gemini_image.py | 92 +++++++++++++++----------------------- 2 files changed, 37 insertions(+), 57 deletions(-) diff --git a/configuration.py b/configuration.py index 135a1a8..368d138 100644 --- a/configuration.py +++ b/configuration.py @@ -13,6 +13,7 @@ class Config(object): def __init__(self) -> None: self.COGVIEW = {} self.ALIYUN_IMAGE = {} + self.GEMINI_IMAGE = {} # 确保GEMINI_IMAGE有默认值 self.reload() def _load_config(self) -> dict: @@ -46,4 +47,5 @@ class Config(object): self.DEEPSEEK = yconfig.get("deepseek", {}) self.COGVIEW = yconfig.get("cogview", {}) self.ALIYUN_IMAGE = yconfig.get("aliyun_image", {}) + self.GEMINI_IMAGE = yconfig.get("gemini_image", {}) self.SEND_RATE_LIMIT = yconfig.get("send_rate_limit", 0) diff --git a/image/func_gemini_image.py b/image/func_gemini_image.py index b6ab520..56ea056 100644 --- a/image/func_gemini_image.py +++ b/image/func_gemini_image.py @@ -8,7 +8,6 @@ import mimetypes import time import random import google.generativeai as genai -from google.generativeai import types class GeminiImage: """谷歌AI画图API调用 @@ -25,7 +24,7 @@ class GeminiImage: def __init__(self, config={}) -> None: self.LOG = logging.getLogger("GeminiImage") - # 默认值 + # 强制启用,忽略配置中的enable字段 self.enable = True # API密钥可以从环境变量获取或配置文件 @@ -45,9 +44,15 @@ class GeminiImage: # 验证API密钥是否有效 if not self.api_key: self.LOG.warning("未配置谷歌Gemini API密钥,请在config.yaml中设置GEMINI_IMAGE.api_key或设置环境变量GEMINI_API_KEY") - # 虽然没有API密钥,但仍然保持服务启用,以便在handle_image_generation中显示友好错误消息 + self.enable = False else: - self.LOG.info("谷歌Gemini图像生成功能已初始化并默认开启") + try: + # 配置API密钥 + genai.configure(api_key=self.api_key) + self.LOG.info("谷歌Gemini图像生成功能已初始化并默认开启") + except Exception as e: + self.LOG.error(f"初始化Gemini API失败: {str(e)}") + self.enable = False def generate_image(self, prompt: str) -> str: """生成图像并返回图像文件路径或URL @@ -62,63 +67,36 @@ class GeminiImage: return "谷歌AI画图功能未启用或API密钥未配置" try: - # 初始化Google AI客户端 - client = genai.Client(api_key=self.api_key) + # 修复API调用方式 + genai.configure(api_key=self.api_key) - # 配置生成请求 - contents = [ - types.Content( - role="user", - parts=[types.Part.from_text(text=prompt)], - ) - ] - - generate_content_config = types.GenerateContentConfig( - response_modalities=["image", "text"], - response_mime_type="text/plain", - ) + # 使用当前版本的API调用方式 + model = genai.GenerativeModel(self.model) - # 使用流式模式生成图片 - response_text = "" - image_path = None - - # 使用流式API获取响应 - for chunk in client.models.generate_content_stream( - model=self.model, - contents=contents, - config=generate_content_config, - ): - # 处理文本部分 - if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts: - continue - - for part in chunk.candidates[0].content.parts: - # 处理文本 - if hasattr(part, 'text') and part.text: - response_text += part.text - - # 处理图像数据 - if hasattr(part, 'inline_data') and part.inline_data: - # 保存图片到临时文件 - file_name = f"gemini_image_{int(time.time())}_{random.randint(1000, 9999)}" - file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png" - file_path = os.path.join(self.temp_dir, f"{file_name}{file_extension}") - - with open(file_path, "wb") as f: - f.write(part.inline_data.data) - - self.LOG.info(f"图片已保存到: {file_path}") - image_path = file_path + # 配置生成请求 + response = model.generate_content(prompt) - # 记录生成的文本响应 - if response_text: - self.LOG.info(f"模型生成的文本响应: {response_text}") + # 处理响应和图像生成 + if hasattr(response, 'candidates') and response.candidates: + for candidate in response.candidates: + if hasattr(candidate, 'content') and candidate.content: + for part in candidate.content.parts: + if hasattr(part, 'inline_data') and part.inline_data: + # 保存图片到临时文件 + file_name = f"gemini_image_{int(time.time())}_{random.randint(1000, 9999)}" + file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png" + file_path = os.path.join(self.temp_dir, f"{file_name}{file_extension}") + + with open(file_path, "wb") as f: + f.write(part.inline_data.data) + + self.LOG.info(f"图片已保存到: {file_path}") + return file_path + elif hasattr(response, 'text'): + self.LOG.info(f"模型返回了文本而非图像: {response.text[:100]}...") + return f"生成图像失败,模型返回了文本而非图像" - # 如果成功生成图像,返回路径 - if image_path: - return image_path - else: - return "图像生成失败,未收到有效响应" + return "图像生成失败,未收到有效响应" except Exception as e: error_str = str(e) -- Gitee From 4f8eba3f626643f0afe627bbb40abe50b70632a2 Mon Sep 17 00:00:00 2001 From: JiQingzhe2004 Date: Wed, 9 Apr 2025 03:24:12 +0800 Subject: [PATCH 11/13] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=9B=BE=E5=83=8F?= =?UTF-8?q?=E7=94=9F=E6=88=90=E6=9C=8D=E5=8A=A1=E7=9A=84=E6=97=A5=E5=BF=97?= =?UTF-8?q?=E8=AE=B0=E5=BD=95=EF=BC=8C=E7=A7=BB=E9=99=A4=E4=B8=8D=E5=BF=85?= =?UTF-8?q?=E8=A6=81=E7=9A=84=E5=88=9D=E5=A7=8B=E5=8C=96=E4=BF=A1=E6=81=AF?= =?UTF-8?q?=EF=BC=8C=E6=9B=B4=E6=96=B0=E4=BE=9D=E8=B5=96=E9=A1=B9=E4=BB=A5?= =?UTF-8?q?=E5=8C=85=E5=90=ABgoogle-genai=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.yaml.template | 1 + image/func_aliyun_image.py | 2 - image/func_cogview.py | 1 - image/func_gemini_image.py | 142 ++++++++++++++++++++++++++----------- requirements.txt | 3 +- robot.py | 4 +- 6 files changed, 103 insertions(+), 50 deletions(-) diff --git a/config.yaml.template b/config.yaml.template index 8a4c891..f2bb541 100644 --- a/config.yaml.template +++ b/config.yaml.template @@ -142,3 +142,4 @@ gemini_image: # -----谷歌AI画图配置这行不填----- temp_dir: ./geminiimg # 图片保存目录,可选 trigger_keyword: 牛谷歌 # 触发词,默认为"牛谷歌" fallback_to_chat: false # 未启用时是否回退到聊天模式 + proxy: http://127.0.0.1:7890 # 使用Clash代理,格式为:http://域名或者IP地址:端口号 diff --git a/image/func_aliyun_image.py b/image/func_aliyun_image.py index b6d9f29..2e5fc43 100644 --- a/image/func_aliyun_image.py +++ b/image/func_aliyun_image.py @@ -41,8 +41,6 @@ class AliyunImage(): # 设置API密钥 dashscope.api_key = self.api_key - - self.LOG.info("AliyunImage 已初始化") def generate_image(self, prompt: str) -> str: """生成图像并返回图像URL diff --git a/image/func_cogview.py b/image/func_cogview.py index 01cba46..a7a72b7 100644 --- a/image/func_cogview.py +++ b/image/func_cogview.py @@ -21,7 +21,6 @@ class CogView(): if self.api_key: self.client = ZhipuAI(api_key=self.api_key) - self.LOG.info(f"CogView 初始化成功,模型:{self.model},质量:{self.quality},图片保存目录:{self.temp_dir}") else: self.LOG.warning("未配置智谱API密钥,图像生成功能无法使用") self.client = None diff --git a/image/func_gemini_image.py b/image/func_gemini_image.py index 56ea056..6cfa19a 100644 --- a/image/func_gemini_image.py +++ b/image/func_gemini_image.py @@ -7,7 +7,12 @@ import os import mimetypes import time import random -import google.generativeai as genai +from io import BytesIO +from PIL import Image + +# 替换为官方推荐的 SDK +from google import genai +from google.genai import types class GeminiImage: """谷歌AI画图API调用 @@ -30,6 +35,8 @@ class GeminiImage: # API密钥可以从环境变量获取或配置文件 self.api_key = config.get("api_key", "") or os.environ.get("GEMINI_API_KEY", "") self.model = config.get("model", "gemini-2.0-flash-exp-image-generation") + # 读取代理设置 + self.proxy = config.get("proxy", "") # 确定临时目录 project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -47,9 +54,13 @@ class GeminiImage: self.enable = False else: try: - # 配置API密钥 - genai.configure(api_key=self.api_key) - self.LOG.info("谷歌Gemini图像生成功能已初始化并默认开启") + # 配置API密钥和代理 + if self.proxy: + os.environ["HTTP_PROXY"] = self.proxy + os.environ["HTTPS_PROXY"] = self.proxy + + # 使用新SDK的配置方式 + self.client = genai.Client(api_key=self.api_key) except Exception as e: self.LOG.error(f"初始化Gemini API失败: {str(e)}") self.enable = False @@ -67,46 +78,106 @@ class GeminiImage: return "谷歌AI画图功能未启用或API密钥未配置" try: - # 修复API调用方式 - genai.configure(api_key=self.api_key) + # 确保每次生成图像时都应用代理设置 + if self.proxy: + os.environ["HTTP_PROXY"] = self.proxy + os.environ["HTTPS_PROXY"] = self.proxy + + # 修改提示词格式,明确指示生成图像 + image_prompt = f"生成一张高质量的图片: {prompt}。请直接提供图像,不需要描述。" + + self.LOG.info(f"使用google-genai SDK发送请求: {image_prompt}") - # 使用当前版本的API调用方式 - model = genai.GenerativeModel(self.model) + # 使用新SDK的API调用方式 + response = self.client.models.generate_content( + model=self.model, + contents=image_prompt, + config=types.GenerateContentConfig( + response_modalities=['Text', 'Image'] + ) + ) - # 配置生成请求 - response = model.generate_content(prompt) + # 添加详细日志,记录响应结构 + self.LOG.info(f"收到API响应: {type(response)}") - # 处理响应和图像生成 + # 新SDK响应结构的处理 if hasattr(response, 'candidates') and response.candidates: + self.LOG.info(f"找到候选结果: {len(response.candidates)}个") for candidate in response.candidates: if hasattr(candidate, 'content') and candidate.content: + content = candidate.content + self.LOG.info(f"处理候选内容: {type(content)}") + + if hasattr(content, 'parts'): + parts = content.parts + self.LOG.info(f"内容包含 {len(parts)} 个部分") + + # 遍历所有部分寻找图片 + for part in parts: + # 检查是否为图像数据 + if hasattr(part, 'inline_data') and part.inline_data: + self.LOG.info(f"找到图像数据: {part.inline_data.mime_type}") + + # 保存图片到临时文件 + file_name = f"gemini_image_{int(time.time())}_{random.randint(1000, 9999)}" + file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png" + file_path = os.path.join(self.temp_dir, f"{file_name}{file_extension}") + + with open(file_path, "wb") as f: + f.write(part.inline_data.data) + + self.LOG.info(f"图片已保存到: {file_path}") + return file_path + # 检查文本内容部分 + elif hasattr(part, 'text') and part.text: + self.LOG.info(f"包含文本部分: {part.text[:100]}...") + + # 检查是否返回了纯文本而不是图像 + text_content = None + try: + text_content = response.text + except (AttributeError, TypeError): + pass + + if not text_content and hasattr(response, 'candidates') and response.candidates: + for candidate in response.candidates: + if hasattr(candidate, 'content'): for part in candidate.content.parts: - if hasattr(part, 'inline_data') and part.inline_data: - # 保存图片到临时文件 - file_name = f"gemini_image_{int(time.time())}_{random.randint(1000, 9999)}" - file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png" - file_path = os.path.join(self.temp_dir, f"{file_name}{file_extension}") - - with open(file_path, "wb") as f: - f.write(part.inline_data.data) - - self.LOG.info(f"图片已保存到: {file_path}") - return file_path - elif hasattr(response, 'text'): - self.LOG.info(f"模型返回了文本而非图像: {response.text[:100]}...") - return f"生成图像失败,模型返回了文本而非图像" + if hasattr(part, 'text') and part.text: + text_content = part.text + break - return "图像生成失败,未收到有效响应" + if text_content: + self.LOG.warning(f"模型仅返回了文本而非图像: {text_content[:100]}...") + return f"模型未能生成图像,仅返回了文本: {text_content[:100]}..." + + # 记录完整响应信息以便调试 + self.LOG.error(f"未知响应格式或未找到图像: {repr(response)[:500]}") + self.LOG.info("检查是否需要更新模型或调整提示词") + + return "图像生成失败,使用的模型可能不支持图像生成。请尝试使用'gemini-1.5-flash-latest'或其他支持图像生成的模型。" except Exception as e: error_str = str(e) self.LOG.error(f"图像生成出错: {error_str}") + # 添加额外错误上下文 + if "timeout" in error_str.lower() or "time" in error_str.lower(): + proxy_status = f"当前代理: {self.proxy or '未设置'}" + self.LOG.info(f"超时错误,{proxy_status}") + return f"图像生成超时,请检查网络或代理设置。{proxy_status}" + if "violated" in error_str.lower() or "policy" in error_str.lower() or "inappropriate" in error_str.lower(): self.LOG.warning(f"检测到违规内容请求: {prompt}") return "很抱歉,您的请求可能包含违规内容,无法生成图像" + + # 改进API相关错误信息 + if "config" in error_str.lower() or "modalities" in error_str.lower(): + return f"API调用错误: {error_str}。请尝试更新google-genai库: pip install -U google-genai" - return f"图像生成失败,请调整您的描述后重试: {error_str}" + import traceback + self.LOG.error(f"详细异常信息: {traceback.format_exc()}") + return f"图像生成失败: {error_str}" def download_image(self, image_path: str) -> str: """ @@ -118,19 +189,4 @@ class GeminiImage: Returns: str: 本地图片文件路径 """ - return image_path - -if __name__ == "__main__": - # 测试代码 - import sys - from configuration import Config - - config = Config().GEMINI_IMAGE if hasattr(Config(), 'GEMINI_IMAGE') else None - if not config: - print("未找到GEMINI_IMAGE配置") - sys.exit(1) - - gemini = GeminiImage(config) - prompt = "一只可爱的猫咪在阳光下玩耍" - image_path = gemini.generate_image(prompt) - print(f"生成图像路径: {image_path}") + return image_path \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index bc2b7d2..37a28fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,5 @@ ipykernel google-generativeai>=0.3.0 zhipuai>=1.0.0 ollama -dashscope \ No newline at end of file +dashscope +google-genai \ No newline at end of file diff --git a/robot.py b/robot.py index 4f36190..2fc841a 100644 --- a/robot.py +++ b/robot.py @@ -102,8 +102,6 @@ class Robot(Job): if self.gemini_image.enable: self.LOG.info("谷歌Gemini图像生成功能已初始化并启用") - else: - self.LOG.info("谷歌AI画图功能未启用,未配置API密钥") except Exception as e: self.LOG.error(f"初始化谷歌Gemini图像生成服务失败: {str(e)}") @@ -117,7 +115,7 @@ class Robot(Job): if hasattr(self.config, 'ALIYUN_IMAGE') and self.config.ALIYUN_IMAGE.get('enable', False): try: self.aliyun_image = AliyunImage(self.config.ALIYUN_IMAGE) - self.LOG.info("阿里云文生图功能已初始化") + self.LOG.info("阿里Aliyun功能已初始化") except Exception as e: self.LOG.error(f"初始化阿里云文生图服务失败: {str(e)}") -- Gitee From cf999c2963acf7c791090d8fcec4a30b6b89472e Mon Sep 17 00:00:00 2001 From: JiQingzhe2004 Date: Wed, 9 Apr 2025 04:27:25 +0800 Subject: [PATCH 12/13] =?UTF-8?q?=E6=96=B0=E5=A2=9EGemini=E6=96=87?= =?UTF-8?q?=E7=94=9F=E5=9B=BE=E5=8A=9F=E8=83=BD=EF=BC=8C=E8=B0=83=E6=95=B4?= =?UTF-8?q?=E6=96=87=E7=94=9F=E5=9B=BE=E7=9B=B8=E5=85=B3=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E7=9A=84=E5=AD=98=E6=94=BE=E4=BD=8D=E7=BD=AE=EF=BC=8C=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E4=B8=8D=E5=BF=85=E8=A6=81=E7=9A=84=E6=B3=A8=E9=87=8A?= =?UTF-8?q?=E3=80=81=E6=97=A5=E5=BF=97=E3=80=81=E5=92=8C=E5=86=97=E4=BD=99?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=EF=BC=8C=E5=87=8F=E5=B0=91=E5=90=AF=E5=8A=A8?= =?UTF-8?q?=E6=97=B6=E6=8E=A7=E5=88=B6=E5=8F=B0=E7=9A=84=E8=BE=93=E5=87=BA?= =?UTF-8?q?=E5=86=85=E5=AE=B9=EF=BC=8C=E4=BC=98=E5=8C=96=E5=9B=BE=E7=89=87?= =?UTF-8?q?=E7=9A=84=E5=A4=84=E7=90=86=E3=80=82=E4=BF=AE=E6=94=B9=E8=AF=B4?= =?UTF-8?q?=E6=98=8E=E6=96=87=E4=BB=B6=EF=BC=8C=E5=90=91=E5=BC=80=E5=8F=91?= =?UTF-8?q?=E8=80=85=E8=AF=B4=E6=98=8E=EF=BC=9A=E4=BD=BF=E7=94=A8=E8=A7=84?= =?UTF-8?q?=E5=88=99=E7=9A=84=E5=88=9B=E5=BB=BA=E6=96=B9=E5=BC=8F=E6=9D=A5?= =?UTF-8?q?=E8=BF=9B=E8=A1=8C=E5=8A=9F=E8=83=BD=E6=96=B0=E5=A2=9E=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.MD | 9 + base/func_deepseek.py | 4 - image/func_aliyun_image.py | 2 + image/func_gemini_image.py | 177 +++++------------- ...77\347\224\250\350\257\264\346\230\216.MD" | 16 +- main.py | 6 +- robot.py | 27 +-- 7 files changed, 84 insertions(+), 157 deletions(-) diff --git a/README.MD b/README.MD index 83302cb..3da98b6 100644 --- a/README.MD +++ b/README.MD @@ -153,6 +153,15 @@ deepseek: sk-xxxxxxxxxxxxxxx # -----deepseek配置这行不填----- show_reasoning: false # 是否在回复中显示思维过程,仅在启用思维链功能时有效 ``` +## 至开发者 +``` +在接入图片生成的相关功能时,可将调用文件放入image文件夹内。 +在 image/__init__.py 文件内加入对应的模块儿,以便作为Python包来调用。 +在 configuration.py 内也要加入相关的代码,否则会初始化失败。 +文生图相关功能,全部默认开启,如果均未配置,全部转接至语言大模型。 +``` +[文生图功能的使用说明](./image/文生图功能的使用说明.MD) + ## HTTP 如需要使用 HTTP 接口,请参考: * [wcfrust](https://github.com/lich0821/wcf-client-rust)(基于 Rust),开箱即用:[快速开始](https://github.com/lich0821/wcf-client-rust?tab=readme-ov-file#%E5%BF%AB%E9%80%9F%E5%BC%80%E5%A7%8B)。 diff --git a/base/func_deepseek.py b/base/func_deepseek.py index 5d8846d..d0b1d17 100644 --- a/base/func_deepseek.py +++ b/base/func_deepseek.py @@ -34,10 +34,6 @@ class DeepSeek(): self.system_content_msg = {"role": "system", "content": prompt} - reasoning_status = "开启" if self.enable_reasoning else "关闭" - reasoning_display = "显示" if self.show_reasoning else "隐藏" - self.LOG.info(f"使用 DeepSeek 模型: {self.model}, 思维链功能: {reasoning_status}({reasoning_display}), 模型支持思维链: {'是' if self.reasoning_supported else '否'}") - def __repr__(self): return 'DeepSeek' diff --git a/image/func_aliyun_image.py b/image/func_aliyun_image.py index 2e5fc43..a93d876 100644 --- a/image/func_aliyun_image.py +++ b/image/func_aliyun_image.py @@ -41,6 +41,8 @@ class AliyunImage(): # 设置API密钥 dashscope.api_key = self.api_key + + # 不要记录初始化日志 def generate_image(self, prompt: str) -> str: """生成图像并返回图像URL diff --git a/image/func_gemini_image.py b/image/func_gemini_image.py index 6cfa19a..708a2bd 100644 --- a/image/func_gemini_image.py +++ b/image/func_gemini_image.py @@ -1,16 +1,11 @@ #! /usr/bin/env python3 # -*- coding: utf-8 -*- -import base64 import logging import os import mimetypes import time import random -from io import BytesIO -from PIL import Image - -# 替换为官方推荐的 SDK from google import genai from google.genai import types @@ -18,77 +13,47 @@ class GeminiImage: """谷歌AI画图API调用 """ - @staticmethod - def value_check(args: dict) -> bool: - try: - # 修改检查逻辑,如果配置存在就返回True - return bool(args) - except Exception: - return False - def __init__(self, config={}) -> None: self.LOG = logging.getLogger("GeminiImage") - # 强制启用,忽略配置中的enable字段 - self.enable = True - - # API密钥可以从环境变量获取或配置文件 + self.enable = config.get("enable", True) self.api_key = config.get("api_key", "") or os.environ.get("GEMINI_API_KEY", "") self.model = config.get("model", "gemini-2.0-flash-exp-image-generation") - # 读取代理设置 self.proxy = config.get("proxy", "") - # 确定临时目录 project_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - default_img_dir = os.path.join(project_dir, "geminiimg") - self.temp_dir = config.get("temp_dir", default_img_dir) + self.temp_dir = config.get("temp_dir", os.path.join(project_dir, "geminiimg")) - # 确保临时目录存在 if not os.path.exists(self.temp_dir): os.makedirs(self.temp_dir) - self.LOG.info(f"创建Gemini图像临时目录: {self.temp_dir}") - # 验证API密钥是否有效 if not self.api_key: - self.LOG.warning("未配置谷歌Gemini API密钥,请在config.yaml中设置GEMINI_IMAGE.api_key或设置环境变量GEMINI_API_KEY") self.enable = False - else: - try: - # 配置API密钥和代理 - if self.proxy: - os.environ["HTTP_PROXY"] = self.proxy - os.environ["HTTPS_PROXY"] = self.proxy - - # 使用新SDK的配置方式 - self.client = genai.Client(api_key=self.api_key) - except Exception as e: - self.LOG.error(f"初始化Gemini API失败: {str(e)}") - self.enable = False + return + + try: + # 设置代理 + if self.proxy: + os.environ["HTTP_PROXY"] = self.proxy + os.environ["HTTPS_PROXY"] = self.proxy + + # 初始化客户端 + self.client = genai.Client(api_key=self.api_key) + except Exception: + self.enable = False def generate_image(self, prompt: str) -> str: - """生成图像并返回图像文件路径或URL - - Args: - prompt (str): 图像描述 - - Returns: - str: 生成的图像路径或错误信息 + """生成图像并返回图像文件路径 """ - if not self.enable or not self.api_key: - return "谷歌AI画图功能未启用或API密钥未配置" - try: - # 确保每次生成图像时都应用代理设置 + # 设置代理 if self.proxy: os.environ["HTTP_PROXY"] = self.proxy os.environ["HTTPS_PROXY"] = self.proxy - # 修改提示词格式,明确指示生成图像 image_prompt = f"生成一张高质量的图片: {prompt}。请直接提供图像,不需要描述。" - self.LOG.info(f"使用google-genai SDK发送请求: {image_prompt}") - - # 使用新SDK的API调用方式 + # 发送请求 response = self.client.models.generate_content( model=self.model, contents=image_prompt, @@ -97,96 +62,52 @@ class GeminiImage: ) ) - # 添加详细日志,记录响应结构 - self.LOG.info(f"收到API响应: {type(response)}") - - # 新SDK响应结构的处理 + # 处理响应 if hasattr(response, 'candidates') and response.candidates: - self.LOG.info(f"找到候选结果: {len(response.candidates)}个") for candidate in response.candidates: if hasattr(candidate, 'content') and candidate.content: - content = candidate.content - self.LOG.info(f"处理候选内容: {type(content)}") - - if hasattr(content, 'parts'): - parts = content.parts - self.LOG.info(f"内容包含 {len(parts)} 个部分") - - # 遍历所有部分寻找图片 - for part in parts: - # 检查是否为图像数据 - if hasattr(part, 'inline_data') and part.inline_data: - self.LOG.info(f"找到图像数据: {part.inline_data.mime_type}") - - # 保存图片到临时文件 - file_name = f"gemini_image_{int(time.time())}_{random.randint(1000, 9999)}" - file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png" - file_path = os.path.join(self.temp_dir, f"{file_name}{file_extension}") - - with open(file_path, "wb") as f: - f.write(part.inline_data.data) - - self.LOG.info(f"图片已保存到: {file_path}") - return file_path - # 检查文本内容部分 - elif hasattr(part, 'text') and part.text: - self.LOG.info(f"包含文本部分: {part.text[:100]}...") + for part in candidate.content.parts: + if hasattr(part, 'inline_data') and part.inline_data: + # 保存图像 + file_name = f"gemini_image_{int(time.time())}_{random.randint(1000, 9999)}" + file_extension = mimetypes.guess_extension(part.inline_data.mime_type) or ".png" + file_path = os.path.join(self.temp_dir, f"{file_name}{file_extension}") + + with open(file_path, "wb") as f: + f.write(part.inline_data.data) + + return file_path - # 检查是否返回了纯文本而不是图像 - text_content = None + # 如果没有找到图像,尝试获取文本响应 try: text_content = response.text + if text_content: + return f"模型未能生成图像: {text_content[:100]}..." except (AttributeError, TypeError): pass - - if not text_content and hasattr(response, 'candidates') and response.candidates: - for candidate in response.candidates: - if hasattr(candidate, 'content'): - for part in candidate.content.parts: - if hasattr(part, 'text') and part.text: - text_content = part.text - break - - if text_content: - self.LOG.warning(f"模型仅返回了文本而非图像: {text_content[:100]}...") - return f"模型未能生成图像,仅返回了文本: {text_content[:100]}..." - # 记录完整响应信息以便调试 - self.LOG.error(f"未知响应格式或未找到图像: {repr(response)[:500]}") - self.LOG.info("检查是否需要更新模型或调整提示词") - - return "图像生成失败,使用的模型可能不支持图像生成。请尝试使用'gemini-1.5-flash-latest'或其他支持图像生成的模型。" + return "图像生成失败,可能需要更新模型或调整提示词" except Exception as e: error_str = str(e) self.LOG.error(f"图像生成出错: {error_str}") - # 添加额外错误上下文 - if "timeout" in error_str.lower() or "time" in error_str.lower(): - proxy_status = f"当前代理: {self.proxy or '未设置'}" - self.LOG.info(f"超时错误,{proxy_status}") - return f"图像生成超时,请检查网络或代理设置。{proxy_status}" - - if "violated" in error_str.lower() or "policy" in error_str.lower() or "inappropriate" in error_str.lower(): - self.LOG.warning(f"检测到违规内容请求: {prompt}") - return "很抱歉,您的请求可能包含违规内容,无法生成图像" + # 处理500错误 + if "500 INTERNAL" in error_str: + self.LOG.error("遇到谷歌服务器内部错误") + return "谷歌AI服务器临时故障,请稍后再试。这是谷歌服务器的问题,不是你的请求有误。" - # 改进API相关错误信息 - if "config" in error_str.lower() or "modalities" in error_str.lower(): - return f"API调用错误: {error_str}。请尝试更新google-genai库: pip install -U google-genai" + if "timeout" in error_str.lower(): + return "图像生成超时,请检查网络或代理设置" - import traceback - self.LOG.error(f"详细异常信息: {traceback.format_exc()}") - return f"图像生成失败: {error_str}" - - def download_image(self, image_path: str) -> str: - """ - 因为Gemini API直接返回图像数据,所以这里直接返回图像路径 - - Args: - image_path (str): 图片路径 + if "violated" in error_str.lower() or "policy" in error_str.lower(): + return "请求包含违规内容,无法生成图像" - Returns: - str: 本地图片文件路径 - """ - return image_path \ No newline at end of file + # 其他常见错误类型处理 + if "quota" in error_str.lower() or "rate" in error_str.lower(): + return "API使用配额已用尽或请求频率过高,请稍后再试" + + if "authentication" in error_str.lower() or "auth" in error_str.lower(): + return "API密钥验证失败,请联系管理员检查配置" + + return f"图像生成失败,错误原因: {error_str.split('.')[-1] if '.' in error_str else error_str}" \ No newline at end of file diff --git "a/image/\346\226\207\347\224\237\345\233\276\345\212\237\350\203\275\347\232\204\344\275\277\347\224\250\350\257\264\346\230\216.MD" "b/image/\346\226\207\347\224\237\345\233\276\345\212\237\350\203\275\347\232\204\344\275\277\347\224\250\350\257\264\346\230\216.MD" index a8e7bc0..dd563b2 100644 --- "a/image/\346\226\207\347\224\237\345\233\276\345\212\237\350\203\275\347\232\204\344\275\277\347\224\250\350\257\264\346\230\216.MD" +++ "b/image/\346\226\207\347\224\237\345\233\276\345\212\237\350\203\275\347\232\204\344\275\277\347\224\250\350\257\264\346\230\216.MD" @@ -1,6 +1,18 @@ -# Gemini 图像生成配置说明 +# 图像生成配置说明 +#### 文生图相关功能的加入,可在此说明文件内加入贡献者的GitHub链接,方便以后的更新,以及BUG的修改! + + + +智谱AI绘画:[JiQingzhe2004 (JiQingzhe)](https://github.com/JiQingzhe2004) + +阿里云AI绘画:[JiQingzhe2004 (JiQingzhe)](https://github.com/JiQingzhe2004) + +谷歌AI绘画:[JiQingzhe2004 (JiQingzhe)](https://github.com/JiQingzhe2004) + +------ 在`config.yaml`中进行以下配置才可以调用: + ```yaml cogview: # -----智谱AI图像生成配置这行不填----- # 此API请参考 https://www.bigmodel.cn/dev/api/image-model/cogview @@ -24,9 +36,9 @@ aliyun_image: # -----如果要使用阿里云文生图,取消下面的注释 fallback_to_chat: true # 当未启用绘画功能时:true=将请求发给聊天模型处理,false=回复固定的未启用提示信息 gemini_image: # -----谷歌AI画图配置这行不填----- + enable: true # 是否启用谷歌AI画图功能 api_key: your-api-key-here # 谷歌Gemini API密钥,必填 model: gemini-2.0-flash-exp-image-generation # 模型名称,建议保持默认,只有这一个模型可以进行绘画 - enable: true # 是否启用谷歌AI画图功能 temp_dir: ./geminiimg # 图片保存目录,可选 trigger_keyword: 牛谷歌 # 触发词,默认为"牛谷歌" fallback_to_chat: false # 当未启用绘画功能时:true=将请求发给聊天模型处理,false=回复固定的未启用提示信息 diff --git a/main.py b/main.py index 1870c00..a294d64 100644 --- a/main.py +++ b/main.py @@ -23,16 +23,16 @@ def main(chat_type: int): robot = Robot(config, wcf, chat_type) robot.LOG.info(f"WeChatRobot【{__version__}】成功启动···") - # 机器人启动发送测试消息 # 机器人启动发送测试消息 robot.sendTextMsg("机器人启动成功!\n" - "🎨 绘画功能使用说明:\n" + "绘画功能使用说明:\n" "• 智谱绘画:牛智谱[描述]\n" "• 阿里绘画:牛阿里[描述]\n" + "• 谷歌绘画:牛谷歌[描述]\n" "实例:\n" "牛阿里 画一张家乡\n" "@XX 牛阿里 画一张家乡\n" - "💬 聊天时直接发送消息即可", "filehelper") + "聊天时直接发送消息即可", "filehelper") # 接收消息 # robot.enableRecvMsg() # 可能会丢消息? diff --git a/robot.py b/robot.py index 2fc841a..ff4ddb8 100644 --- a/robot.py +++ b/robot.py @@ -91,19 +91,17 @@ class Robot(Job): self.aliyun_image = None self.gemini_image = None - # 优先初始化Gemini图像生成服务 - 确保默认启用 + # 初始化Gemini图像生成服务 try: - # 不管配置如何,都强制初始化Gemini服务 if hasattr(self.config, 'GEMINI_IMAGE'): self.gemini_image = GeminiImage(self.config.GEMINI_IMAGE) else: - # 如果没有配置,使用空字典初始化,会使用默认值和环境变量 self.gemini_image = GeminiImage({}) - if self.gemini_image.enable: - self.LOG.info("谷歌Gemini图像生成功能已初始化并启用") + if getattr(self.gemini_image, 'enable', False): + self.LOG.info("谷歌Gemini图像生成功能已启用") except Exception as e: - self.LOG.error(f"初始化谷歌Gemini图像生成服务失败: {str(e)}") + self.LOG.error(f"初始化谷歌Gemini图像生成服务失败: {e}") # 初始化CogView和AliyunImage服务 if hasattr(self.config, 'COGVIEW') and self.config.COGVIEW.get('enable', False): @@ -160,16 +158,8 @@ class Robot(Job): else: wait_message = "正在生成图像,请稍等..." elif service_type == 'gemini': - if not self.gemini_image: - # 服务实例不存在的情况 - self.LOG.info(f"收到谷歌AI画图请求但服务未初始化: {prompt}") - self.sendTextMsg("谷歌文生图服务初始化失败,请联系管理员检查日志", receiver, at_user) - return True - - # 直接检查API密钥是否有效 - if not getattr(self.gemini_image, 'api_key', ''): - self.LOG.info(f"收到谷歌AI画图请求但API密钥未配置: {prompt}") - self.sendTextMsg("谷歌文生图功能需要配置API密钥,请联系管理员设置API密钥", receiver, at_user) + if not self.gemini_image or not getattr(self.gemini_image, 'enable', False): + self.sendTextMsg("谷歌文生图服务未启用", receiver, at_user) return True service = self.gemini_image @@ -187,10 +177,7 @@ class Robot(Job): try: self.LOG.info(f"开始处理图片: {image_url}") # 谷歌API直接返回本地文件路径,无需下载 - if service_type == 'gemini': - image_path = image_url - else: - image_path = service.download_image(image_url) + image_path = image_url if service_type == 'gemini' else service.download_image(image_url) if image_path: # 创建一个临时副本,避免文件占用问题 -- Gitee From de7c6d19d9bf45f194cf957daaa9211079f6530e Mon Sep 17 00:00:00 2001 From: JiQingzhe2004 Date: Wed, 9 Apr 2025 10:39:01 +0800 Subject: [PATCH 13/13] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E7=BB=93=E6=9E=84=EF=BC=8C=E5=8E=BB=E6=8E=89=E9=87=8D=E5=A4=8D?= =?UTF-8?q?=E7=9A=84=E5=88=9D=E5=A7=8B=E5=8C=96=E9=BB=98=E8=AE=A4=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- configuration.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/configuration.py b/configuration.py index 368d138..10bf629 100644 --- a/configuration.py +++ b/configuration.py @@ -11,9 +11,6 @@ import yaml class Config(object): def __init__(self) -> None: - self.COGVIEW = {} - self.ALIYUN_IMAGE = {} - self.GEMINI_IMAGE = {} # 确保GEMINI_IMAGE有默认值 self.reload() def _load_config(self) -> dict: -- Gitee