feat: private llm (#360)

* feat: private llm * Update backend/vectorstore/supabase.py * Update backend/vectorstore/supabase.py
2024-10-26 14:00:37 +03:00 · 2023-06-22 09:45:35 +01:00 · 2023-06-22 09:45:35 +01:00 · 83fde0aeea
commit 83fde0aeea
parent 3d11e3fb51
9 changed files with 154 additions and 46 deletions
--- a/.backend_env.example
+++ b/.backend_env.example
@ -8,3 +8,9 @@ GOOGLE_APPLICATION_CREDENTIALS=/code/application_default_credentials.json
 GOOGLE_CLOUD_PROJECT=XXXXX to  be changed with your GCP id
 MAX_BRAIN_SIZE=52428800
 MAX_REQUESTS_NUMBER=200
+
+#Private LLM Variables
+PRIVATE=False
+MODEL_PATH=./local_models/ggml-gpt4all-j-v1.3-groovy.bin
+MODEL_N_CTX=1000
+MODEL_N_BATCH=8
--- a/.gitignore
+++ b/.gitignore
@ -51,3 +51,6 @@ streamlit-demo/.streamlit/secrets.toml
 backend/pandoc-*
 **/.pandoc-*
 backend/application_default_credentials.json
+
+#local models
+backend/local_models/*
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -1,4 +1,4 @@
-FROM python:3.11-buster
+FROM python:3.11-bullseye

 # Install GEOS library
 RUN apt-get update && apt-get install -y libgeos-dev
@ -9,6 +9,17 @@ COPY ./requirements.txt /code/requirements.txt

 RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt --timeout 100

+# Install additional dependencies
+RUN apt-get install -y liblzma-dev cmake
+
+# Build GPT4All from source (required for GPT4All langchain bindings)
+RUN cd /tmp && git clone --recurse-submodules https://github.com/nomic-ai/gpt4all && \
+    cd gpt4all/gpt4all-backend/ && \
+    mkdir build && cd build && \
+    cmake .. && cmake --build . --parallel  && \
+    cd ../../gpt4all-bindings/python && \
+    pip3 install -e .
+
 COPY . /code/

-CMD ["uvicorn", "main:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]
+CMD ["uvicorn", "main:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]
--- a/backend/llm/brainpicking.py
+++ b/backend/llm/brainpicking.py
@ -1,33 +1,38 @@
 import os  # A module to interact with the OS
-from typing import Any, Dict, List  # For type hinting
+from typing import Any, Dict, List
+from models.settings import LLMSettings  # For type hinting

 # Importing various modules and classes from a custom library 'langchain' likely used for natural language processing
 from langchain.chains import ConversationalRetrievalChain, LLMChain
 from langchain.chains.question_answering import load_qa_chain
-from langchain.chains.router.llm_router import (LLMRouterChain,
-                                                RouterOutputParser)
-from langchain.chains.router.multi_prompt_prompt import \
-    MULTI_PROMPT_ROUTER_TEMPLATE
+from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser
+from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE
 from langchain.chat_models import ChatOpenAI, ChatVertexAI
 from langchain.chat_models.anthropic import ChatAnthropic
 from langchain.docstore.document import Document
+from langchain.embeddings.base import Embeddings
 from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.llms import OpenAI, VertexAI
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+from langchain.llms import GPT4All
+from langchain.llms.base import LLM
 from langchain.memory import ConversationBufferMemory
 from langchain.vectorstores import SupabaseVectorStore
 from llm.prompt import LANGUAGE_PROMPT
 from llm.prompt.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT
-from models.chats import \
-    ChatMessage  # Importing a custom ChatMessage class for handling chat messages
-from models.settings import \
-    BrainSettings  # Importing settings related to the 'brain'
-from pydantic import (BaseModel,  # For data validation and settings management
-                      BaseSettings)
-from supabase import (Client,  # For interacting with Supabase database
-                      create_client)
-from vectorstore.supabase import \
-    CustomSupabaseVectorStore  # Custom class for handling vector storage with Supabase
+from models.chats import (
+    ChatMessage,
+)  # Importing a custom ChatMessage class for handling chat messages
+from models.settings import BrainSettings  # Importing settings related to the 'brain'
+from pydantic import BaseModel  # For data validation and settings management
+from pydantic import BaseSettings
+from supabase import Client  # For interacting with Supabase database
+from supabase import create_client
+from vectorstore.supabase import (
+    CustomSupabaseVectorStore,
+)  # Custom class for handling vector storage with Supabase
+from logger import get_logger

+logger = get_logger(__name__)

 class AnswerConversationBufferMemory(ConversationBufferMemory):
    """
@ -35,11 +40,12 @@ class AnswerConversationBufferMemory(ConversationBufferMemory):
    It overrides the save_context method to save the response using the 'answer' key in the outputs.
    Reference to some issue comment is given in the docstring.
    """
-    
+
    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
        # Overriding the save_context method of the parent class
        return super(AnswerConversationBufferMemory, self).save_context(
-            inputs, {'response': outputs['answer']})
+            inputs, {"response": outputs["answer"]}
+        )


 def get_chat_history(inputs) -> str:
@ -59,21 +65,22 @@ class BrainPicking(BaseModel):
    Main class for the Brain Picking functionality.
    It allows to initialize a Chat model, generate questions and retrieve answers using ConversationalRetrievalChain.
    """
-    
+
    # Default class attributes
    llm_name: str = "gpt-3.5-turbo"
    settings = BrainSettings()
+    llm_config = LLMSettings()
    embeddings: OpenAIEmbeddings = None
    supabase_client: Client = None
    vector_store: CustomSupabaseVectorStore = None
-    llm: ChatOpenAI = None
+    llm: LLM = None
    question_generator: LLMChain = None
    doc_chain: ConversationalRetrievalChain = None
-    
+
    class Config:
        # Allowing arbitrary types for class validation
        arbitrary_types_allowed = True
-    
+
    def init(self, model: str, user_id: str) -> "BrainPicking":
        """
        Initialize the BrainPicking class by setting embeddings, supabase client, vector store, language model and chains.
@ -82,15 +89,61 @@ class BrainPicking(BaseModel):
        :return: BrainPicking instance
        """
        self.embeddings = OpenAIEmbeddings(openai_api_key=self.settings.openai_api_key)
-        self.supabase_client = create_client(self.settings.supabase_url, self.settings.supabase_service_key)
+        self.supabase_client = create_client(
+            self.settings.supabase_url, self.settings.supabase_service_key
+        )
        self.vector_store = CustomSupabaseVectorStore(
-            self.supabase_client, self.embeddings, table_name="vectors", user_id=user_id)
-        self.llm = ChatOpenAI(temperature=0, model_name=model)
-        self.question_generator = LLMChain(llm=self.llm, prompt=CONDENSE_QUESTION_PROMPT)
+            self.supabase_client,
+            self.embeddings,
+            table_name="vectors",
+            user_id=user_id,
+        )
+                    
+        self.llm = self._determine_llm(
+            private_model_args={
+                "model_path": self.llm_config.model_path,
+                "n_ctx": self.llm_config.model_n_ctx,
+                "n_batch": self.llm_config.model_n_batch,
+            },
+            private=self.llm_config.private,
+            model_name=self.llm_name,
+        )
+        self.question_generator = LLMChain(
+            llm=self.llm, prompt=CONDENSE_QUESTION_PROMPT
+        )
        self.doc_chain = load_qa_chain(self.llm, chain_type="stuff")
        return self
-    
-    def _get_qa(self, chat_message: ChatMessage, user_openai_api_key) -> ConversationalRetrievalChain:
+
+    def _determine_llm(
+        self, private_model_args: dict, private: bool = False, model_name: str = None
+    ) -> LLM:
+        """
+        Determine the language model to be used.
+        :param model_name: Language model name to be used.
+        :param private_model_args: Dictionary containing model_path, n_ctx and n_batch.
+        :param private: Boolean value to determine if private model is to be used.
+        :return: Language model instance
+        """
+        if private:
+            model_path = private_model_args["model_path"]
+            model_n_ctx = private_model_args["n_ctx"]
+            model_n_batch = private_model_args["n_batch"]
+            
+            logger.info("Using private model: %s", model_path)
+
+            return GPT4All(
+                model=model_path,
+                n_ctx=model_n_ctx,
+                n_batch=model_n_batch,
+                backend="gptj",
+                verbose=True,
+            )
+        else:
+            return ChatOpenAI(temperature=0, model_name=model_name)
+
+    def _get_qa(
+        self, chat_message: ChatMessage, user_openai_api_key
+    ) -> ConversationalRetrievalChain:
        """
        Retrieves a QA chain for the given chat message and API key.
        :param chat_message: The chat message containing history.
@ -100,12 +153,15 @@ class BrainPicking(BaseModel):
        # If user provided an API key, update the settings
        if user_openai_api_key is not None and user_openai_api_key != "":
            self.settings.openai_api_key = user_openai_api_key
-        
+
        # Initialize and return a ConversationalRetrievalChain
        qa = ConversationalRetrievalChain(
-                retriever=self.vector_store.as_retriever(),
-                max_tokens_limit=chat_message.max_tokens, question_generator=self.question_generator,
-                combine_docs_chain=self.doc_chain, get_chat_history=get_chat_history)
+            retriever=self.vector_store.as_retriever(),
+            max_tokens_limit=chat_message.max_tokens,
+            question_generator=self.question_generator,
+            combine_docs_chain=self.doc_chain,
+            get_chat_history=get_chat_history,
+        )
        return qa

    def generate_answer(self, chat_message: ChatMessage, user_openai_api_key) -> str:
@ -119,15 +175,17 @@ class BrainPicking(BaseModel):

        # Get the QA chain
        qa = self._get_qa(chat_message, user_openai_api_key)
-        
+
        # Transform the chat history into a list of tuples
        for i in range(0, len(chat_message.history) - 1, 2):
            user_message = chat_message.history[i][1]
            assistant_message = chat_message.history[i + 1][1]
            transformed_history.append((user_message, assistant_message))
-        
+
        # Generate the model response using the QA chain
-        model_response = qa({"question": chat_message.question, "chat_history": transformed_history})
-        answer = model_response['answer']
+        model_response = qa(
+            {"question": chat_message.question, "chat_history": transformed_history}
+        )
+        answer = model_response["answer"]

        return answer
--- a/backend/models/settings.py
+++ b/backend/models/settings.py
@ -13,6 +13,11 @@ class BrainSettings(BaseSettings):
    supabase_url: str
    supabase_service_key: str

+class LLMSettings(BaseSettings):
+    private: bool
+    model_path: str
+    model_n_ctx: int
+    model_n_batch: int

 def common_dependencies() -> dict:
    settings = BrainSettings()
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -22,3 +22,4 @@ transformers==4.30.1
 asyncpg==0.27.0
 flake8==6.0.0
 flake8-black==0.3.6
+sentence_transformers>=2.0.0
--- a/backend/vectorstore/supabase.py
+++ b/backend/vectorstore/supabase.py
@ -1,15 +1,9 @@
 from typing import Any, List

-from langchain.chains import ConversationalRetrievalChain, LLMChain
-from langchain.chains.question_answering import load_qa_chain
-from langchain.chat_models import ChatOpenAI, ChatVertexAI
-from langchain.client import arun_on_dataset
 from langchain.docstore.document import Document
 from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.llms import OpenAI, VertexAI
-from langchain.prompts.prompt import PromptTemplate
 from langchain.vectorstores import SupabaseVectorStore
-from supabase import Client, create_client
+from supabase import Client


 class CustomSupabaseVectorStore(SupabaseVectorStore):
@ -22,7 +16,6 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
    def similarity_search(
        self, 
        query: str, 
-        user_id: str = "none",
        table: str = "match_vectors", 
        k: int = 6, 
        threshold: float = 0.5, 
--- a/docs/docs/backend/llm/_category_.json
+++ b/docs/docs/backend/llm/_category_.json
@ -0,0 +1,8 @@
+{
+  "label": "LLM",
+  "position": 1,
+  "link": {
+    "type": "generated-index",
+    "description": "How does the LLM (Large Language Model Work)?"
+  }
+}
--- a/docs/docs/backend/llm/private-llm.md
+++ b/docs/docs/backend/llm/private-llm.md
@ -0,0 +1,23 @@
+---
+sidebar_position: 1
+---
+
+# Private LLM
+
+Quivr now has the capability to use a private LLM model powered by GPT4All (other open source models coming soon). 
+
+This is simular to the functionality provided by the PrivateGPT project.
+
+This means that your data never leaves the server. The LLM is downloaded to the server and runs inference on your question locally.
+
+## How to use
+Set the 'private' flag to True in the /backend/.env file. You can also set other model parameters in the .env file.
+
+Download the GPT4All model from [here](
+    https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin) and place it in the /backend/local_models folder.
+
+## Future Plans
+We are planning to add more models to the private LLM feature. We are also planning on using a local embedding model from Hugging Face to reduce our reliance on OpenAI's API.
+
+We will also be adding the ability to use a private LLM model from in the frontend and api. Currently it is only available if you self host the backend.
+