master

分支 (1)

管理

管理

master

retrieval-qa
/
ko_chatbot.py

from echo_ai.embeddings import *
import os
from langchain.document_loaders import WebBaseLoader
from echo_ai.splitters import *
from langchain.embeddings import OpenAIEmbeddings
from langchain import FAISS
from echo_ai.retrival import MyRetrival
from echo_ai.splitters import *
os.environ["OPENAI_API_KEY"] = "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"
os.environ["OPENAI_API_BASE"] = "https://api.openai-proxy.com/v1"
os.environ["SERPAPI_API_KEY"] = '8da25bc3675d83956013941474661366b365386475bde059dc6da49302312f47'


class KoChatBot:

    def __init__(self):
        self.retrival = MyRetrival()  # 检索器，从单个获取多个知识库中检索相关的知识
        self.llm = ChatOpenAI(temperature=0)  ## temperature越低回答越准确，越高创造性越强
        self.splitter = ChineseTextSplitter(['.','?','!',';','。','！','；','？','；'], overlap_sentences=1)
        self.embedding = HuggingFaceTextEmbedding(model_name='./model/embeddings/text2vec_large_chinese')
    def init_ko_chatbot(self, db_dirs: List[str]):
        """
        初始化ko问答模块的知识库
        :param db_dirs:
        :return:
        """
        embeddings = [self.embedding]
        self.retrival.init_from_faiss_dbs(db_dirs, embeddings)
        return self

    def query2llm(self, query: str):
        """
        直接与大模型进行对话
        :param query:
        :return:
        """
        return self.llm.predict(query)

    def post_progress_data(self, docs_list: List[List]):
        docs = [d for ds in docs_list for d in ds]
        res = []
        docs = sorted(docs, key=lambda x: x[1], reverse=True)
        return [(d[0].page_content, d[1]) for d in docs]

    def query2kb(self, query: str):
        """
        从本地知识库中检索相关知识并回答问题
        :param query:
        :return:
        """
        docs = self.retrival.get_relevant_documents(query)
        docs = self.post_progress_data(docs)
        content = ""
        for d in docs:
            content += d[0] + '\n'
        prompt = f"""
                    You are a helpful AI assistant.
                    The following are the relevant knowledge content fragments found from the knowledge base.
                    The relevance is sorted from high to low.
                    You can only answer according to the following content:
                    \n>>>\n{content}\n<<<\n
                    You need to carefully consider your answer to ensure that it is based on the context.
                    If the context does not mention the content or it is uncertain whether it is correct,
                    please answer "Current knowledge base cannot provide effective information."
                    You must use {"Chinese"} to respond.
                 """
        messages = [
            SystemMessage(content=prompt),
            HumanMessage(content=query)
        ]
        return self.llm.predict_messages(messages).content, docs

    def query2text(self, text: str, query: str):
        prompt = f"""
                            You are a helpful AI assistant.
                            The following are the relevant knowledge content fragments found from the knowledge base.
                            The relevance is sorted from high to low.
                            You can only answer according to the following content:
                            \n>>>\n{text}\n<<<\n
                            You need to carefully consider your answer to ensure that it is based on the context.
                            If the context does not mention the content or it is uncertain whether it is correct,
                            please answer "Current knowledge base cannot provide effective information."
                            You must use {"Chinese"} to respond.
                         """
        messages = [
            SystemMessage(content=prompt),
            HumanMessage(content=query)
        ]
        return self.llm.predict_messages(messages).content

    def query2longtext(self, text: str, query: str):
        prompt = f"""
                            You are a helpful AI assistant.
                            The following are the relevant knowledge content fragments found from the knowledge base.
                            The relevance is sorted from high to low.
                            You can only answer according to the following content:
                            \n>>>\n{text}\n<<<\n
                            You need to carefully consider your answer to ensure that it is based on the context.
                            If the context does not mention the content or it is uncertain whether it is correct,
                            please answer "Current knowledge base cannot provide effective information."
                            You must use {"Chinese"} to respond.
                         """
        messages = [
            SystemMessage(content=prompt),
            HumanMessage(content=query)
        ]
        return self.llm.predict_messages(messages).content


    def query2url(self, url: str, query: str):
        # https://zhuanlan.zhihu.com/p/470784563
        loader = WebBaseLoader(url)
        pages = loader.load()
        docs = self.splitter.split_documents(pages)
        db = FAISS.from_documents(docs, self.embedding)
        relvant_docs = db.similarity_search(query, k=10)
        text = ""
        for d in relvant_docs:
            text += d.page_content + '\n'
        prompt = f"""
                                    You are a helpful AI assistant.
                                    The following are the relevant knowledge content fragments found from the knowledge base.
                                    The relevance is sorted from high to low.
                                    You can only answer according to the following content:
                                    \n>>>\n{text}\n<<<\n
                                    You need to carefully consider your answer to ensure that it is based on the context.
                                    If the context does not mention the content or it is uncertain whether it is correct,
                                    please answer "Current knowledge base cannot provide effective information."
                                    You must use {"Chinese"} to respond.
                                 """
        messages = [
            SystemMessage(content=prompt),
            HumanMessage(content=query)
        ]
        return self.llm.predict_messages(messages).content

if __name__ == '__main__':
    bot = KoChatBot()
    # text = "检索增强 LLM ( Retrieval Augmented LLM )，简单来说，就是给 LLM 提供外部数据库，对于用户问题 ( Query )，通过一些信息检索 ( Information Retrieval, IR ) 的技术，先从外部数据库中检索出和用户问题相关的信息，然后让 LLM 结合这些相关信息来生成结果。这种模式有时候也被称为 检索增强生成 ( Retrieval Augmented Generation, RAG )。下图是一个检索增强 LLM 的简单示意图。"
    # resp = bot.query2text(text,'介绍一下检索增强是什么')
    # print(resp)
    res = bot.query2url('https://zhuanlan.zhihu.com/p/470784563', '什么是检索增强')
    print(res)