diff --git a/.backend_env.example b/.backend_env.example index 9881cea08..e0c66a81c 100644 --- a/.backend_env.example +++ b/.backend_env.example @@ -8,3 +8,9 @@ GOOGLE_APPLICATION_CREDENTIALS=/code/application_default_credentials.json GOOGLE_CLOUD_PROJECT=XXXXX to be changed with your GCP id MAX_BRAIN_SIZE=52428800 MAX_REQUESTS_NUMBER=200 + +#Private LLM Variables +PRIVATE=False +MODEL_PATH=./local_models/ggml-gpt4all-j-v1.3-groovy.bin +MODEL_N_CTX=1000 +MODEL_N_BATCH=8 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 1b80652eb..a0eb89fb1 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,6 @@ streamlit-demo/.streamlit/secrets.toml backend/pandoc-* **/.pandoc-* backend/application_default_credentials.json + +#local models +backend/local_models/* \ No newline at end of file diff --git a/backend/Dockerfile b/backend/Dockerfile index 01c652f49..a4a7d322e 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.11-buster +FROM python:3.11-bullseye # Install GEOS library RUN apt-get update && apt-get install -y libgeos-dev @@ -9,6 +9,17 @@ COPY ./requirements.txt /code/requirements.txt RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt --timeout 100 +# Install additional dependencies +RUN apt-get install -y liblzma-dev cmake + +# Build GPT4All from source (required for GPT4All langchain bindings) +RUN cd /tmp && git clone --recurse-submodules https://github.com/nomic-ai/gpt4all && \ + cd gpt4all/gpt4all-backend/ && \ + mkdir build && cd build && \ + cmake .. && cmake --build . --parallel && \ + cd ../../gpt4all-bindings/python && \ + pip3 install -e . + COPY . /code/ -CMD ["uvicorn", "main:app", "--reload", "--host", "0.0.0.0", "--port", "5050"] \ No newline at end of file +CMD ["uvicorn", "main:app", "--reload", "--host", "0.0.0.0", "--port", "5050"] diff --git a/backend/llm/brainpicking.py b/backend/llm/brainpicking.py index f6a313410..d7c1e6422 100644 --- a/backend/llm/brainpicking.py +++ b/backend/llm/brainpicking.py @@ -1,33 +1,38 @@ import os # A module to interact with the OS -from typing import Any, Dict, List # For type hinting +from typing import Any, Dict, List +from models.settings import LLMSettings # For type hinting # Importing various modules and classes from a custom library 'langchain' likely used for natural language processing from langchain.chains import ConversationalRetrievalChain, LLMChain from langchain.chains.question_answering import load_qa_chain -from langchain.chains.router.llm_router import (LLMRouterChain, - RouterOutputParser) -from langchain.chains.router.multi_prompt_prompt import \ - MULTI_PROMPT_ROUTER_TEMPLATE +from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser +from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE from langchain.chat_models import ChatOpenAI, ChatVertexAI from langchain.chat_models.anthropic import ChatAnthropic from langchain.docstore.document import Document +from langchain.embeddings.base import Embeddings from langchain.embeddings.openai import OpenAIEmbeddings -from langchain.llms import OpenAI, VertexAI +from langchain.embeddings.huggingface import HuggingFaceEmbeddings +from langchain.llms import GPT4All +from langchain.llms.base import LLM from langchain.memory import ConversationBufferMemory from langchain.vectorstores import SupabaseVectorStore from llm.prompt import LANGUAGE_PROMPT from llm.prompt.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT -from models.chats import \ - ChatMessage # Importing a custom ChatMessage class for handling chat messages -from models.settings import \ - BrainSettings # Importing settings related to the 'brain' -from pydantic import (BaseModel, # For data validation and settings management - BaseSettings) -from supabase import (Client, # For interacting with Supabase database - create_client) -from vectorstore.supabase import \ - CustomSupabaseVectorStore # Custom class for handling vector storage with Supabase +from models.chats import ( + ChatMessage, +) # Importing a custom ChatMessage class for handling chat messages +from models.settings import BrainSettings # Importing settings related to the 'brain' +from pydantic import BaseModel # For data validation and settings management +from pydantic import BaseSettings +from supabase import Client # For interacting with Supabase database +from supabase import create_client +from vectorstore.supabase import ( + CustomSupabaseVectorStore, +) # Custom class for handling vector storage with Supabase +from logger import get_logger +logger = get_logger(__name__) class AnswerConversationBufferMemory(ConversationBufferMemory): """ @@ -35,11 +40,12 @@ class AnswerConversationBufferMemory(ConversationBufferMemory): It overrides the save_context method to save the response using the 'answer' key in the outputs. Reference to some issue comment is given in the docstring. """ - + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: # Overriding the save_context method of the parent class return super(AnswerConversationBufferMemory, self).save_context( - inputs, {'response': outputs['answer']}) + inputs, {"response": outputs["answer"]} + ) def get_chat_history(inputs) -> str: @@ -59,21 +65,22 @@ class BrainPicking(BaseModel): Main class for the Brain Picking functionality. It allows to initialize a Chat model, generate questions and retrieve answers using ConversationalRetrievalChain. """ - + # Default class attributes llm_name: str = "gpt-3.5-turbo" settings = BrainSettings() + llm_config = LLMSettings() embeddings: OpenAIEmbeddings = None supabase_client: Client = None vector_store: CustomSupabaseVectorStore = None - llm: ChatOpenAI = None + llm: LLM = None question_generator: LLMChain = None doc_chain: ConversationalRetrievalChain = None - + class Config: # Allowing arbitrary types for class validation arbitrary_types_allowed = True - + def init(self, model: str, user_id: str) -> "BrainPicking": """ Initialize the BrainPicking class by setting embeddings, supabase client, vector store, language model and chains. @@ -82,15 +89,61 @@ class BrainPicking(BaseModel): :return: BrainPicking instance """ self.embeddings = OpenAIEmbeddings(openai_api_key=self.settings.openai_api_key) - self.supabase_client = create_client(self.settings.supabase_url, self.settings.supabase_service_key) + self.supabase_client = create_client( + self.settings.supabase_url, self.settings.supabase_service_key + ) self.vector_store = CustomSupabaseVectorStore( - self.supabase_client, self.embeddings, table_name="vectors", user_id=user_id) - self.llm = ChatOpenAI(temperature=0, model_name=model) - self.question_generator = LLMChain(llm=self.llm, prompt=CONDENSE_QUESTION_PROMPT) + self.supabase_client, + self.embeddings, + table_name="vectors", + user_id=user_id, + ) + + self.llm = self._determine_llm( + private_model_args={ + "model_path": self.llm_config.model_path, + "n_ctx": self.llm_config.model_n_ctx, + "n_batch": self.llm_config.model_n_batch, + }, + private=self.llm_config.private, + model_name=self.llm_name, + ) + self.question_generator = LLMChain( + llm=self.llm, prompt=CONDENSE_QUESTION_PROMPT + ) self.doc_chain = load_qa_chain(self.llm, chain_type="stuff") return self - - def _get_qa(self, chat_message: ChatMessage, user_openai_api_key) -> ConversationalRetrievalChain: + + def _determine_llm( + self, private_model_args: dict, private: bool = False, model_name: str = None + ) -> LLM: + """ + Determine the language model to be used. + :param model_name: Language model name to be used. + :param private_model_args: Dictionary containing model_path, n_ctx and n_batch. + :param private: Boolean value to determine if private model is to be used. + :return: Language model instance + """ + if private: + model_path = private_model_args["model_path"] + model_n_ctx = private_model_args["n_ctx"] + model_n_batch = private_model_args["n_batch"] + + logger.info("Using private model: %s", model_path) + + return GPT4All( + model=model_path, + n_ctx=model_n_ctx, + n_batch=model_n_batch, + backend="gptj", + verbose=True, + ) + else: + return ChatOpenAI(temperature=0, model_name=model_name) + + def _get_qa( + self, chat_message: ChatMessage, user_openai_api_key + ) -> ConversationalRetrievalChain: """ Retrieves a QA chain for the given chat message and API key. :param chat_message: The chat message containing history. @@ -100,12 +153,15 @@ class BrainPicking(BaseModel): # If user provided an API key, update the settings if user_openai_api_key is not None and user_openai_api_key != "": self.settings.openai_api_key = user_openai_api_key - + # Initialize and return a ConversationalRetrievalChain qa = ConversationalRetrievalChain( - retriever=self.vector_store.as_retriever(), - max_tokens_limit=chat_message.max_tokens, question_generator=self.question_generator, - combine_docs_chain=self.doc_chain, get_chat_history=get_chat_history) + retriever=self.vector_store.as_retriever(), + max_tokens_limit=chat_message.max_tokens, + question_generator=self.question_generator, + combine_docs_chain=self.doc_chain, + get_chat_history=get_chat_history, + ) return qa def generate_answer(self, chat_message: ChatMessage, user_openai_api_key) -> str: @@ -119,15 +175,17 @@ class BrainPicking(BaseModel): # Get the QA chain qa = self._get_qa(chat_message, user_openai_api_key) - + # Transform the chat history into a list of tuples for i in range(0, len(chat_message.history) - 1, 2): user_message = chat_message.history[i][1] assistant_message = chat_message.history[i + 1][1] transformed_history.append((user_message, assistant_message)) - + # Generate the model response using the QA chain - model_response = qa({"question": chat_message.question, "chat_history": transformed_history}) - answer = model_response['answer'] + model_response = qa( + {"question": chat_message.question, "chat_history": transformed_history} + ) + answer = model_response["answer"] return answer diff --git a/backend/models/settings.py b/backend/models/settings.py index b65297a3b..8f9192614 100644 --- a/backend/models/settings.py +++ b/backend/models/settings.py @@ -13,6 +13,11 @@ class BrainSettings(BaseSettings): supabase_url: str supabase_service_key: str +class LLMSettings(BaseSettings): + private: bool + model_path: str + model_n_ctx: int + model_n_batch: int def common_dependencies() -> dict: settings = BrainSettings() diff --git a/backend/requirements.txt b/backend/requirements.txt index 5b8ed2623..4a9817bdb 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -22,3 +22,4 @@ transformers==4.30.1 asyncpg==0.27.0 flake8==6.0.0 flake8-black==0.3.6 +sentence_transformers>=2.0.0 diff --git a/backend/vectorstore/supabase.py b/backend/vectorstore/supabase.py index 9f8c186af..0b2992cc9 100644 --- a/backend/vectorstore/supabase.py +++ b/backend/vectorstore/supabase.py @@ -1,15 +1,9 @@ from typing import Any, List -from langchain.chains import ConversationalRetrievalChain, LLMChain -from langchain.chains.question_answering import load_qa_chain -from langchain.chat_models import ChatOpenAI, ChatVertexAI -from langchain.client import arun_on_dataset from langchain.docstore.document import Document from langchain.embeddings.openai import OpenAIEmbeddings -from langchain.llms import OpenAI, VertexAI -from langchain.prompts.prompt import PromptTemplate from langchain.vectorstores import SupabaseVectorStore -from supabase import Client, create_client +from supabase import Client class CustomSupabaseVectorStore(SupabaseVectorStore): @@ -22,7 +16,6 @@ class CustomSupabaseVectorStore(SupabaseVectorStore): def similarity_search( self, query: str, - user_id: str = "none", table: str = "match_vectors", k: int = 6, threshold: float = 0.5, diff --git a/docs/docs/backend/llm/_category_.json b/docs/docs/backend/llm/_category_.json new file mode 100644 index 000000000..174c44918 --- /dev/null +++ b/docs/docs/backend/llm/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "LLM", + "position": 1, + "link": { + "type": "generated-index", + "description": "How does the LLM (Large Language Model Work)?" + } +} diff --git a/docs/docs/backend/llm/private-llm.md b/docs/docs/backend/llm/private-llm.md new file mode 100644 index 000000000..5283673ff --- /dev/null +++ b/docs/docs/backend/llm/private-llm.md @@ -0,0 +1,23 @@ +--- +sidebar_position: 1 +--- + +# Private LLM + +Quivr now has the capability to use a private LLM model powered by GPT4All (other open source models coming soon). + +This is simular to the functionality provided by the PrivateGPT project. + +This means that your data never leaves the server. The LLM is downloaded to the server and runs inference on your question locally. + +## How to use +Set the 'private' flag to True in the /backend/.env file. You can also set other model parameters in the .env file. + +Download the GPT4All model from [here]( + https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin) and place it in the /backend/local_models folder. + +## Future Plans +We are planning to add more models to the private LLM feature. We are also planning on using a local embedding model from Hugging Face to reduce our reliance on OpenAI's API. + +We will also be adding the ability to use a private LLM model from in the frontend and api. Currently it is only available if you self host the backend. +