Ai
1 Star 0 Fork 0

codeMonkey/retrievalQA

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
ko_chatbot.py 7.16 KB
一键复制 编辑 原始数据 按行查看 历史
codeMonkey 提交于 2023-10-16 10:56 +08:00 . 初始化
from echo_ai.embeddings import *
import os
from langchain.document_loaders import WebBaseLoader
from echo_ai.splitters import *
from langchain.embeddings import OpenAIEmbeddings
from langchain import FAISS
from echo_ai.retrival import MyRetrival
from echo_ai.splitters import *
os.environ["OPENAI_API_KEY"] = "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"
os.environ["OPENAI_API_BASE"] = "https://api.openai-proxy.com/v1"
os.environ["SERPAPI_API_KEY"] = '8da25bc3675d83956013941474661366b365386475bde059dc6da49302312f47'
class KoChatBot:
def __init__(self):
self.retrival = MyRetrival() # 检索器,从单个获取多个知识库中检索相关的知识
self.llm = ChatOpenAI(temperature=0) ## temperature越低回答越准确,越高创造性越强
self.splitter = ChineseTextSplitter(['.','?','!',';','。','!',';','?',';'], overlap_sentences=1)
self.embedding = HuggingFaceTextEmbedding(model_name='./model/embeddings/text2vec_large_chinese')
def init_ko_chatbot(self, db_dirs: List[str]):
"""
初始化ko问答模块的知识库
:param db_dirs:
:return:
"""
embeddings = [self.embedding]
self.retrival.init_from_faiss_dbs(db_dirs, embeddings)
return self
def query2llm(self, query: str):
"""
直接与大模型进行对话
:param query:
:return:
"""
return self.llm.predict(query)
def post_progress_data(self, docs_list: List[List]):
docs = [d for ds in docs_list for d in ds]
res = []
docs = sorted(docs, key=lambda x: x[1], reverse=True)
return [(d[0].page_content, d[1]) for d in docs]
def query2kb(self, query: str):
"""
从本地知识库中检索相关知识并回答问题
:param query:
:return:
"""
docs = self.retrival.get_relevant_documents(query)
docs = self.post_progress_data(docs)
content = ""
for d in docs:
content += d[0] + '\n'
prompt = f"""
You are a helpful AI assistant.
The following are the relevant knowledge content fragments found from the knowledge base.
The relevance is sorted from high to low.
You can only answer according to the following content:
\n>>>\n{content}\n<<<\n
You need to carefully consider your answer to ensure that it is based on the context.
If the context does not mention the content or it is uncertain whether it is correct,
please answer "Current knowledge base cannot provide effective information."
You must use {"Chinese"} to respond.
"""
messages = [
SystemMessage(content=prompt),
HumanMessage(content=query)
]
return self.llm.predict_messages(messages).content, docs
def query2text(self, text: str, query: str):
prompt = f"""
You are a helpful AI assistant.
The following are the relevant knowledge content fragments found from the knowledge base.
The relevance is sorted from high to low.
You can only answer according to the following content:
\n>>>\n{text}\n<<<\n
You need to carefully consider your answer to ensure that it is based on the context.
If the context does not mention the content or it is uncertain whether it is correct,
please answer "Current knowledge base cannot provide effective information."
You must use {"Chinese"} to respond.
"""
messages = [
SystemMessage(content=prompt),
HumanMessage(content=query)
]
return self.llm.predict_messages(messages).content
def query2longtext(self, text: str, query: str):
prompt = f"""
You are a helpful AI assistant.
The following are the relevant knowledge content fragments found from the knowledge base.
The relevance is sorted from high to low.
You can only answer according to the following content:
\n>>>\n{text}\n<<<\n
You need to carefully consider your answer to ensure that it is based on the context.
If the context does not mention the content or it is uncertain whether it is correct,
please answer "Current knowledge base cannot provide effective information."
You must use {"Chinese"} to respond.
"""
messages = [
SystemMessage(content=prompt),
HumanMessage(content=query)
]
return self.llm.predict_messages(messages).content
def query2url(self, url: str, query: str):
# https://zhuanlan.zhihu.com/p/470784563
loader = WebBaseLoader(url)
pages = loader.load()
docs = self.splitter.split_documents(pages)
db = FAISS.from_documents(docs, self.embedding)
relvant_docs = db.similarity_search(query, k=10)
text = ""
for d in relvant_docs:
text += d.page_content + '\n'
prompt = f"""
You are a helpful AI assistant.
The following are the relevant knowledge content fragments found from the knowledge base.
The relevance is sorted from high to low.
You can only answer according to the following content:
\n>>>\n{text}\n<<<\n
You need to carefully consider your answer to ensure that it is based on the context.
If the context does not mention the content or it is uncertain whether it is correct,
please answer "Current knowledge base cannot provide effective information."
You must use {"Chinese"} to respond.
"""
messages = [
SystemMessage(content=prompt),
HumanMessage(content=query)
]
return self.llm.predict_messages(messages).content
if __name__ == '__main__':
bot = KoChatBot()
# text = "检索增强 LLM ( Retrieval Augmented LLM ),简单来说,就是给 LLM 提供外部数据库,对于用户问题 ( Query ),通过一些信息检索 ( Information Retrieval, IR ) 的技术,先从外部数据库中检索出和用户问题相关的信息,然后让 LLM 结合这些相关信息来生成结果。这种模式有时候也被称为 检索增强生成 ( Retrieval Augmented Generation, RAG )。下图是一个检索增强 LLM 的简单示意图。"
# resp = bot.query2text(text,'介绍一下检索增强是什么')
# print(resp)
res = bot.query2url('https://zhuanlan.zhihu.com/p/470784563', '什么是检索增强')
print(res)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/codemonkey9527/retrieval-qa.git
git@gitee.com:codemonkey9527/retrieval-qa.git
codemonkey9527
retrieval-qa
retrievalQA
master

搜索帮助