quivr/backend/llm/brainpicking.py

258 lines
8.7 KiB
Python
Raw Normal View History

import asyncio
import json
from typing import AsyncIterable, Awaitable
from langchain.callbacks import AsyncIteratorCallbackHandler
2023-06-20 09:56:17 +03:00
# Importing various modules and classes from a custom library 'langchain' likely used for natural language processing
2023-06-17 02:16:11 +03:00
from langchain.chains import ConversationalRetrievalChain, LLMChain
2023-06-13 11:35:06 +03:00
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms.base import LLM
2023-06-17 02:16:11 +03:00
from llm.prompt.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT
from logger import get_logger
from models.settings import BrainSettings # Importing settings related to the 'brain'
from pydantic import BaseModel # For data validation and settings management
from repository.chat.get_chat_history import get_chat_history
from repository.chat.update_chat_history import update_chat_history
from repository.chat.update_message_by_id import update_message_by_id
from supabase import Client # For interacting with Supabase database
from supabase import create_client
from vectorstore.supabase import (
CustomSupabaseVectorStore,
) # Custom class for handling vector storage with Supabase
2023-05-22 09:39:55 +03:00
logger = get_logger(__name__)
2023-06-19 21:51:13 +03:00
class BrainPicking(BaseModel):
2023-06-20 09:56:17 +03:00
"""
Main class for the Brain Picking functionality.
It allows to initialize a Chat model, generate questions and retrieve answers using ConversationalRetrievalChain.
"""
# Instantiate settings
settings = BrainSettings()
2023-06-20 09:56:17 +03:00
# Default class attributes
2023-06-19 21:51:13 +03:00
llm_name: str = "gpt-3.5-turbo"
temperature: float = 0.0
chat_id: str
max_tokens: int = 256
# Storage
2023-06-19 21:51:13 +03:00
supabase_client: Client = None
vector_store: CustomSupabaseVectorStore = None
# Language models
embeddings: OpenAIEmbeddings = None
question_llm: LLM = None
doc_llm: LLM = None
2023-06-19 21:51:13 +03:00
question_generator: LLMChain = None
doc_chain: LLMChain = None
qa: ConversationalRetrievalChain = None
# Streaming
callback: AsyncIteratorCallbackHandler = None
streaming: bool = False
2023-06-19 21:51:13 +03:00
class Config:
2023-06-20 09:56:17 +03:00
# Allowing arbitrary types for class validation
2023-06-19 21:51:13 +03:00
arbitrary_types_allowed = True
def __init__(
self,
model: str,
2023-06-28 20:39:27 +03:00
brain_id: str,
temperature: float,
chat_id: str,
max_tokens: int,
user_openai_api_key: str,
streaming: bool = False,
) -> "BrainPicking":
2023-06-20 09:56:17 +03:00
"""
Initialize the BrainPicking class by setting embeddings, supabase client, vector store, language model and chains.
:param model: Language model name to be used.
2023-06-28 20:39:27 +03:00
:param user_brain_idid: The brain id to be used for CustomSupabaseVectorStore.
2023-06-20 09:56:17 +03:00
:return: BrainPicking instance
"""
super().__init__(
model=model,
2023-06-28 20:39:27 +03:00
brain_id=brain_id,
chat_id=chat_id,
max_tokens=max_tokens,
temperature=temperature,
user_openai_api_key=user_openai_api_key,
)
# If user provided an API key, update the settings
if user_openai_api_key is not None:
self.settings.openai_api_key = user_openai_api_key
self.temperature = temperature
2023-06-19 21:51:13 +03:00
self.embeddings = OpenAIEmbeddings(openai_api_key=self.settings.openai_api_key)
self.supabase_client = create_client(
self.settings.supabase_url, self.settings.supabase_service_key
)
self.llm_name = model
2023-06-19 21:51:13 +03:00
self.vector_store = CustomSupabaseVectorStore(
self.supabase_client,
self.embeddings,
table_name="vectors",
2023-06-28 20:39:27 +03:00
brain_id=brain_id,
)
self.question_llm = self._create_llm(
model_name=self.llm_name,
streaming=False,
)
self.question_generator = LLMChain(
llm=self.question_llm, prompt=CONDENSE_QUESTION_PROMPT
)
if streaming:
self.callback = AsyncIteratorCallbackHandler()
self.doc_llm = self._create_llm(
model_name=self.llm_name,
streaming=streaming,
callbacks=[self.callback],
)
self.doc_chain = load_qa_chain(
llm=self.doc_llm,
chain_type="stuff",
)
self.streaming = streaming
else:
self.doc_llm = self._create_llm(
model_name=self.llm_name,
streaming=streaming,
)
self.doc_chain = load_qa_chain(llm=self.doc_llm, chain_type="stuff")
self.streaming = streaming
self.chat_id = chat_id
self.max_tokens = max_tokens
def _create_llm(self, model_name, streaming=False, callbacks=None) -> LLM:
"""
Determine the language model to be used.
:param model_name: Language model name to be used.
:param private_model_args: Dictionary containing model_path, n_ctx and n_batch.
:param private: Boolean value to determine if private model is to be used.
:return: Language model instance
"""
return ChatOpenAI(
temperature=0,
model_name=model_name,
streaming=streaming,
callbacks=callbacks,
)
def _get_qa(
self,
) -> ConversationalRetrievalChain:
2023-06-20 09:56:17 +03:00
"""
Retrieves a QA chain for the given chat message and API key.
:param chat_message: The chat message containing history.
:param user_openai_api_key: The OpenAI API key to be used.
:return: ConversationalRetrievalChain instance
"""
2023-06-20 09:56:17 +03:00
# Initialize and return a ConversationalRetrievalChain
2023-06-17 02:16:11 +03:00
qa = ConversationalRetrievalChain(
retriever=self.vector_store.as_retriever(),
question_generator=self.question_generator,
combine_docs_chain=self.doc_chain,
verbose=True,
)
2023-06-19 21:51:13 +03:00
return qa
def generate_answer(self, question: str) -> str:
2023-06-20 09:56:17 +03:00
"""
Generate an answer to a given question by interacting with the language model.
:param question: The question
2023-06-20 09:56:17 +03:00
:return: The generated answer.
"""
transformed_history = []
2023-06-20 09:56:17 +03:00
# Get the QA chain
qa = self._get_qa()
history = get_chat_history(self.chat_id)
# Format the chat history into a list of tuples (human, ai)
transformed_history = [(chat.user_message, chat.assistant) for chat in history]
2023-06-20 09:56:17 +03:00
# Generate the model response using the QA chain
model_response = qa({"question": question, "chat_history": transformed_history})
answer = model_response["answer"]
2023-06-20 09:56:17 +03:00
return answer
async def generate_stream(self, question: str) -> AsyncIterable:
"""
Generate a streaming answer to a given question by interacting with the language model.
:param question: The question
:return: An async iterable which generates the answer.
"""
# Get the QA chain
qa = self._get_qa()
history = get_chat_history(self.chat_id)
callback = self.callback
# # Format the chat history into a list of tuples (human, ai)
transformed_history = [(chat.user_message, chat.assistant) for chat in history]
# Initialize a list to hold the tokens
response_tokens = []
# Wrap an awaitable with a event to signal when it's done or an exception is raised.
async def wrap_done(fn: Awaitable, event: asyncio.Event):
try:
await fn
except Exception as e:
logger.error(f"Caught exception: {e}")
finally:
event.set()
# Use the acall method to perform an async call to the QA chain
task = asyncio.create_task(
wrap_done(
qa.acall(
{
"question": question,
"chat_history": transformed_history,
}
),
callback.done,
)
)
streamed_chat_history = update_chat_history(
chat_id=self.chat_id,
user_message=question,
assistant="",
)
# Use the aiter method of the callback to stream the response with server-sent-events
async for token in callback.aiter():
logger.info("Token: %s", token)
# Add the token to the response_tokens list
response_tokens.append(token)
streamed_chat_history.assistant = token
yield f"data: {json.dumps(streamed_chat_history.to_dict())}"
await task
# Join the tokens to create the assistant's response
assistant = "".join(response_tokens)
update_message_by_id(
message_id=streamed_chat_history.message_id,
user_message=question,
assistant=assistant,
)