2024-02-17 04:14:30 +03:00
import json
from typing import AsyncIterable
from uuid import UUID
from langchain . chains import ConversationalRetrievalChain , LLMChain
from langchain . chains . question_answering import load_qa_chain
from langchain_community . chat_models import ChatLiteLLM
from langchain_core . prompts . chat import (
ChatPromptTemplate ,
HumanMessagePromptTemplate ,
SystemMessagePromptTemplate ,
)
from langchain_core . prompts . prompt import PromptTemplate
from logger import get_logger
from modules . brain . knowledge_brain_qa import KnowledgeBrainQA
from modules . chat . dto . chats import ChatQuestion
logger = get_logger ( __name__ )
class BigBrain ( KnowledgeBrainQA ) :
2024-05-11 00:46:55 +03:00
"""
The BigBrain class integrates advanced conversational retrieval and language model chains
to provide comprehensive and context - aware responses to user queries .
2024-02-17 04:14:30 +03:00
2024-05-11 00:46:55 +03:00
It leverages a combination of document retrieval , question condensation , and document - based
question answering to generate responses that are informed by a wide range of knowledge sources .
2024-02-17 04:14:30 +03:00
"""
def __init__ (
self ,
* * kwargs ,
) :
2024-05-11 00:46:55 +03:00
"""
Initializes the BigBrain class with specific configurations .
Args :
* * kwargs : Arbitrary keyword arguments .
"""
2024-02-17 04:14:30 +03:00
super ( ) . __init__ (
* * kwargs ,
)
def get_chain ( self ) :
2024-05-11 00:46:55 +03:00
"""
Constructs and returns the conversational QA chain used by BigBrain .
Returns :
A ConversationalRetrievalChain instance .
"""
2024-02-17 04:14:30 +03:00
system_template = """ Combine these summaries in a way that makes sense and answer the user ' s question.
2024-03-20 02:56:45 +03:00
Use markdown or any other techniques to display the content in a nice and aerated way . Answer in the language of the question .
2024-02-19 03:22:21 +03:00
Here are user instructions on how to respond : { custom_personality }
2024-02-17 04:14:30 +03:00
______________________
{ summaries } """
messages = [
SystemMessagePromptTemplate . from_template ( system_template ) ,
HumanMessagePromptTemplate . from_template ( " {question} " ) ,
]
CHAT_COMBINE_PROMPT = ChatPromptTemplate . from_messages ( messages )
### Question prompt
question_prompt_template = """ Use the following portion of a long document to see if any of the text is relevant to answer the question.
2024-03-20 02:56:45 +03:00
Return any relevant text verbatim . Return the answer in the same language as the question . If the answer is not in the text , just say nothing in the same language as the question .
2024-02-17 04:14:30 +03:00
{ context }
Question : { question }
Relevant text , if any , else say Nothing : """
QUESTION_PROMPT = PromptTemplate (
template = question_prompt_template , input_variables = [ " context " , " question " ]
)
2024-03-20 02:56:45 +03:00
### Condense Question Prompt
_template = """ Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question in exactly the same language as the original question.
Chat History :
{ chat_history }
Follow Up Input : { question }
Standalone question in same language as question : """
CONDENSE_QUESTION_PROMPT = PromptTemplate . from_template ( _template )
2024-03-05 10:48:23 +03:00
api_base = None
if self . brain_settings . ollama_api_base_url and self . model . startswith ( " ollama " ) :
api_base = self . brain_settings . ollama_api_base_url
2024-03-20 02:56:45 +03:00
llm = ChatLiteLLM (
temperature = 0 ,
model = self . model ,
api_base = api_base ,
max_tokens = self . max_tokens ,
)
2024-02-17 04:14:30 +03:00
retriever_doc = self . knowledge_qa . get_retriever ( )
question_generator = LLMChain ( llm = llm , prompt = CONDENSE_QUESTION_PROMPT )
doc_chain = load_qa_chain (
llm ,
chain_type = " map_reduce " ,
question_prompt = QUESTION_PROMPT ,
combine_prompt = CHAT_COMBINE_PROMPT ,
)
chain = ConversationalRetrievalChain (
retriever = retriever_doc ,
question_generator = question_generator ,
combine_docs_chain = doc_chain ,
)
return chain
async def generate_stream (
self , chat_id : UUID , question : ChatQuestion , save_answer : bool = True
) - > AsyncIterable :
2024-05-11 00:46:55 +03:00
"""
Generates a stream of responses for a given question in real - time .
Args :
chat_id ( UUID ) : The unique identifier for the chat session .
question ( ChatQuestion ) : The question object containing the user ' s query.
save_answer ( bool ) : Flag indicating whether to save the answer to the chat history .
Returns :
An asynchronous iterable of response strings .
"""
2024-02-17 04:14:30 +03:00
conversational_qa_chain = self . get_chain ( )
transformed_history , streamed_chat_history = (
self . initialize_streamed_chat_history ( chat_id , question )
)
response_tokens = [ ]
async for chunk in conversational_qa_chain . astream (
{
" question " : question . question ,
" chat_history " : transformed_history ,
2024-02-19 03:22:21 +03:00
" custom_personality " : (
self . prompt_to_use . content if self . prompt_to_use else None
) ,
2024-02-17 04:14:30 +03:00
}
) :
if " answer " in chunk :
response_tokens . append ( chunk [ " answer " ] )
streamed_chat_history . assistant = chunk [ " answer " ]
yield f " data: { json . dumps ( streamed_chat_history . dict ( ) ) } "
self . save_answer ( question , response_tokens , streamed_chat_history , save_answer )