代码拉取完成,页面将自动刷新
from echo_ai.embeddings import *
import os
from langchain.document_loaders import WebBaseLoader
from echo_ai.splitters import *
from langchain.embeddings import OpenAIEmbeddings
from langchain import FAISS
from echo_ai.retrival import MyRetrival
from echo_ai.splitters import *
os.environ["OPENAI_API_KEY"] = "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"
os.environ["OPENAI_API_BASE"] = "https://api.openai-proxy.com/v1"
os.environ["SERPAPI_API_KEY"] = '8da25bc3675d83956013941474661366b365386475bde059dc6da49302312f47'
class KoChatBot:
def __init__(self):
self.retrival = MyRetrival() # 检索器,从单个获取多个知识库中检索相关的知识
self.llm = ChatOpenAI(temperature=0) ## temperature越低回答越准确,越高创造性越强
self.splitter = ChineseTextSplitter(['.','?','!',';','。','!',';','?',';'], overlap_sentences=1)
self.embedding = HuggingFaceTextEmbedding(model_name='./model/embeddings/text2vec_large_chinese')
def init_ko_chatbot(self, db_dirs: List[str]):
"""
初始化ko问答模块的知识库
:param db_dirs:
:return:
"""
embeddings = [self.embedding]
self.retrival.init_from_faiss_dbs(db_dirs, embeddings)
return self
def query2llm(self, query: str):
"""
直接与大模型进行对话
:param query:
:return:
"""
return self.llm.predict(query)
def post_progress_data(self, docs_list: List[List]):
docs = [d for ds in docs_list for d in ds]
res = []
docs = sorted(docs, key=lambda x: x[1], reverse=True)
return [(d[0].page_content, d[1]) for d in docs]
def query2kb(self, query: str):
"""
从本地知识库中检索相关知识并回答问题
:param query:
:return:
"""
docs = self.retrival.get_relevant_documents(query)
docs = self.post_progress_data(docs)
content = ""
for d in docs:
content += d[0] + '\n'
prompt = f"""
You are a helpful AI assistant.
The following are the relevant knowledge content fragments found from the knowledge base.
The relevance is sorted from high to low.
You can only answer according to the following content:
\n>>>\n{content}\n<<<\n
You need to carefully consider your answer to ensure that it is based on the context.
If the context does not mention the content or it is uncertain whether it is correct,
please answer "Current knowledge base cannot provide effective information."
You must use {"Chinese"} to respond.
"""
messages = [
SystemMessage(content=prompt),
HumanMessage(content=query)
]
return self.llm.predict_messages(messages).content, docs
def query2text(self, text: str, query: str):
prompt = f"""
You are a helpful AI assistant.
The following are the relevant knowledge content fragments found from the knowledge base.
The relevance is sorted from high to low.
You can only answer according to the following content:
\n>>>\n{text}\n<<<\n
You need to carefully consider your answer to ensure that it is based on the context.
If the context does not mention the content or it is uncertain whether it is correct,
please answer "Current knowledge base cannot provide effective information."
You must use {"Chinese"} to respond.
"""
messages = [
SystemMessage(content=prompt),
HumanMessage(content=query)
]
return self.llm.predict_messages(messages).content
def query2longtext(self, text: str, query: str):
prompt = f"""
You are a helpful AI assistant.
The following are the relevant knowledge content fragments found from the knowledge base.
The relevance is sorted from high to low.
You can only answer according to the following content:
\n>>>\n{text}\n<<<\n
You need to carefully consider your answer to ensure that it is based on the context.
If the context does not mention the content or it is uncertain whether it is correct,
please answer "Current knowledge base cannot provide effective information."
You must use {"Chinese"} to respond.
"""
messages = [
SystemMessage(content=prompt),
HumanMessage(content=query)
]
return self.llm.predict_messages(messages).content
def query2url(self, url: str, query: str):
# https://zhuanlan.zhihu.com/p/470784563
loader = WebBaseLoader(url)
pages = loader.load()
docs = self.splitter.split_documents(pages)
db = FAISS.from_documents(docs, self.embedding)
relvant_docs = db.similarity_search(query, k=10)
text = ""
for d in relvant_docs:
text += d.page_content + '\n'
prompt = f"""
You are a helpful AI assistant.
The following are the relevant knowledge content fragments found from the knowledge base.
The relevance is sorted from high to low.
You can only answer according to the following content:
\n>>>\n{text}\n<<<\n
You need to carefully consider your answer to ensure that it is based on the context.
If the context does not mention the content or it is uncertain whether it is correct,
please answer "Current knowledge base cannot provide effective information."
You must use {"Chinese"} to respond.
"""
messages = [
SystemMessage(content=prompt),
HumanMessage(content=query)
]
return self.llm.predict_messages(messages).content
if __name__ == '__main__':
bot = KoChatBot()
# text = "检索增强 LLM ( Retrieval Augmented LLM ),简单来说,就是给 LLM 提供外部数据库,对于用户问题 ( Query ),通过一些信息检索 ( Information Retrieval, IR ) 的技术,先从外部数据库中检索出和用户问题相关的信息,然后让 LLM 结合这些相关信息来生成结果。这种模式有时候也被称为 检索增强生成 ( Retrieval Augmented Generation, RAG )。下图是一个检索增强 LLM 的简单示意图。"
# resp = bot.query2text(text,'介绍一下检索增强是什么')
# print(resp)
res = bot.query2url('https://zhuanlan.zhihu.com/p/470784563', '什么是检索增强')
print(res)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。