feat: private llm (#360)

* feat: private llm

* Update backend/vectorstore/supabase.py

* Update backend/vectorstore/supabase.py
This commit is contained in:
Matt 2023-06-22 09:45:35 +01:00 committed by GitHub
parent 3d11e3fb51
commit 83fde0aeea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 154 additions and 46 deletions

View File

@ -8,3 +8,9 @@ GOOGLE_APPLICATION_CREDENTIALS=/code/application_default_credentials.json
GOOGLE_CLOUD_PROJECT=XXXXX to be changed with your GCP id
MAX_BRAIN_SIZE=52428800
MAX_REQUESTS_NUMBER=200
#Private LLM Variables
PRIVATE=False
MODEL_PATH=./local_models/ggml-gpt4all-j-v1.3-groovy.bin
MODEL_N_CTX=1000
MODEL_N_BATCH=8

3
.gitignore vendored
View File

@ -51,3 +51,6 @@ streamlit-demo/.streamlit/secrets.toml
backend/pandoc-*
**/.pandoc-*
backend/application_default_credentials.json
#local models
backend/local_models/*

View File

@ -1,4 +1,4 @@
FROM python:3.11-buster
FROM python:3.11-bullseye
# Install GEOS library
RUN apt-get update && apt-get install -y libgeos-dev
@ -9,6 +9,17 @@ COPY ./requirements.txt /code/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt --timeout 100
# Install additional dependencies
RUN apt-get install -y liblzma-dev cmake
# Build GPT4All from source (required for GPT4All langchain bindings)
RUN cd /tmp && git clone --recurse-submodules https://github.com/nomic-ai/gpt4all && \
cd gpt4all/gpt4all-backend/ && \
mkdir build && cd build && \
cmake .. && cmake --build . --parallel && \
cd ../../gpt4all-bindings/python && \
pip3 install -e .
COPY . /code/
CMD ["uvicorn", "main:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]
CMD ["uvicorn", "main:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]

View File

@ -1,33 +1,38 @@
import os # A module to interact with the OS
from typing import Any, Dict, List # For type hinting
from typing import Any, Dict, List
from models.settings import LLMSettings # For type hinting
# Importing various modules and classes from a custom library 'langchain' likely used for natural language processing
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.router.llm_router import (LLMRouterChain,
RouterOutputParser)
from langchain.chains.router.multi_prompt_prompt import \
MULTI_PROMPT_ROUTER_TEMPLATE
from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser
from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE
from langchain.chat_models import ChatOpenAI, ChatVertexAI
from langchain.chat_models.anthropic import ChatAnthropic
from langchain.docstore.document import Document
from langchain.embeddings.base import Embeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI, VertexAI
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.llms import GPT4All
from langchain.llms.base import LLM
from langchain.memory import ConversationBufferMemory
from langchain.vectorstores import SupabaseVectorStore
from llm.prompt import LANGUAGE_PROMPT
from llm.prompt.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT
from models.chats import \
ChatMessage # Importing a custom ChatMessage class for handling chat messages
from models.settings import \
BrainSettings # Importing settings related to the 'brain'
from pydantic import (BaseModel, # For data validation and settings management
BaseSettings)
from supabase import (Client, # For interacting with Supabase database
create_client)
from vectorstore.supabase import \
CustomSupabaseVectorStore # Custom class for handling vector storage with Supabase
from models.chats import (
ChatMessage,
) # Importing a custom ChatMessage class for handling chat messages
from models.settings import BrainSettings # Importing settings related to the 'brain'
from pydantic import BaseModel # For data validation and settings management
from pydantic import BaseSettings
from supabase import Client # For interacting with Supabase database
from supabase import create_client
from vectorstore.supabase import (
CustomSupabaseVectorStore,
) # Custom class for handling vector storage with Supabase
from logger import get_logger
logger = get_logger(__name__)
class AnswerConversationBufferMemory(ConversationBufferMemory):
"""
@ -35,11 +40,12 @@ class AnswerConversationBufferMemory(ConversationBufferMemory):
It overrides the save_context method to save the response using the 'answer' key in the outputs.
Reference to some issue comment is given in the docstring.
"""
def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
# Overriding the save_context method of the parent class
return super(AnswerConversationBufferMemory, self).save_context(
inputs, {'response': outputs['answer']})
inputs, {"response": outputs["answer"]}
)
def get_chat_history(inputs) -> str:
@ -59,21 +65,22 @@ class BrainPicking(BaseModel):
Main class for the Brain Picking functionality.
It allows to initialize a Chat model, generate questions and retrieve answers using ConversationalRetrievalChain.
"""
# Default class attributes
llm_name: str = "gpt-3.5-turbo"
settings = BrainSettings()
llm_config = LLMSettings()
embeddings: OpenAIEmbeddings = None
supabase_client: Client = None
vector_store: CustomSupabaseVectorStore = None
llm: ChatOpenAI = None
llm: LLM = None
question_generator: LLMChain = None
doc_chain: ConversationalRetrievalChain = None
class Config:
# Allowing arbitrary types for class validation
arbitrary_types_allowed = True
def init(self, model: str, user_id: str) -> "BrainPicking":
"""
Initialize the BrainPicking class by setting embeddings, supabase client, vector store, language model and chains.
@ -82,15 +89,61 @@ class BrainPicking(BaseModel):
:return: BrainPicking instance
"""
self.embeddings = OpenAIEmbeddings(openai_api_key=self.settings.openai_api_key)
self.supabase_client = create_client(self.settings.supabase_url, self.settings.supabase_service_key)
self.supabase_client = create_client(
self.settings.supabase_url, self.settings.supabase_service_key
)
self.vector_store = CustomSupabaseVectorStore(
self.supabase_client, self.embeddings, table_name="vectors", user_id=user_id)
self.llm = ChatOpenAI(temperature=0, model_name=model)
self.question_generator = LLMChain(llm=self.llm, prompt=CONDENSE_QUESTION_PROMPT)
self.supabase_client,
self.embeddings,
table_name="vectors",
user_id=user_id,
)
self.llm = self._determine_llm(
private_model_args={
"model_path": self.llm_config.model_path,
"n_ctx": self.llm_config.model_n_ctx,
"n_batch": self.llm_config.model_n_batch,
},
private=self.llm_config.private,
model_name=self.llm_name,
)
self.question_generator = LLMChain(
llm=self.llm, prompt=CONDENSE_QUESTION_PROMPT
)
self.doc_chain = load_qa_chain(self.llm, chain_type="stuff")
return self
def _get_qa(self, chat_message: ChatMessage, user_openai_api_key) -> ConversationalRetrievalChain:
def _determine_llm(
self, private_model_args: dict, private: bool = False, model_name: str = None
) -> LLM:
"""
Determine the language model to be used.
:param model_name: Language model name to be used.
:param private_model_args: Dictionary containing model_path, n_ctx and n_batch.
:param private: Boolean value to determine if private model is to be used.
:return: Language model instance
"""
if private:
model_path = private_model_args["model_path"]
model_n_ctx = private_model_args["n_ctx"]
model_n_batch = private_model_args["n_batch"]
logger.info("Using private model: %s", model_path)
return GPT4All(
model=model_path,
n_ctx=model_n_ctx,
n_batch=model_n_batch,
backend="gptj",
verbose=True,
)
else:
return ChatOpenAI(temperature=0, model_name=model_name)
def _get_qa(
self, chat_message: ChatMessage, user_openai_api_key
) -> ConversationalRetrievalChain:
"""
Retrieves a QA chain for the given chat message and API key.
:param chat_message: The chat message containing history.
@ -100,12 +153,15 @@ class BrainPicking(BaseModel):
# If user provided an API key, update the settings
if user_openai_api_key is not None and user_openai_api_key != "":
self.settings.openai_api_key = user_openai_api_key
# Initialize and return a ConversationalRetrievalChain
qa = ConversationalRetrievalChain(
retriever=self.vector_store.as_retriever(),
max_tokens_limit=chat_message.max_tokens, question_generator=self.question_generator,
combine_docs_chain=self.doc_chain, get_chat_history=get_chat_history)
retriever=self.vector_store.as_retriever(),
max_tokens_limit=chat_message.max_tokens,
question_generator=self.question_generator,
combine_docs_chain=self.doc_chain,
get_chat_history=get_chat_history,
)
return qa
def generate_answer(self, chat_message: ChatMessage, user_openai_api_key) -> str:
@ -119,15 +175,17 @@ class BrainPicking(BaseModel):
# Get the QA chain
qa = self._get_qa(chat_message, user_openai_api_key)
# Transform the chat history into a list of tuples
for i in range(0, len(chat_message.history) - 1, 2):
user_message = chat_message.history[i][1]
assistant_message = chat_message.history[i + 1][1]
transformed_history.append((user_message, assistant_message))
# Generate the model response using the QA chain
model_response = qa({"question": chat_message.question, "chat_history": transformed_history})
answer = model_response['answer']
model_response = qa(
{"question": chat_message.question, "chat_history": transformed_history}
)
answer = model_response["answer"]
return answer

View File

@ -13,6 +13,11 @@ class BrainSettings(BaseSettings):
supabase_url: str
supabase_service_key: str
class LLMSettings(BaseSettings):
private: bool
model_path: str
model_n_ctx: int
model_n_batch: int
def common_dependencies() -> dict:
settings = BrainSettings()

View File

@ -22,3 +22,4 @@ transformers==4.30.1
asyncpg==0.27.0
flake8==6.0.0
flake8-black==0.3.6
sentence_transformers>=2.0.0

View File

@ -1,15 +1,9 @@
from typing import Any, List
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI, ChatVertexAI
from langchain.client import arun_on_dataset
from langchain.docstore.document import Document
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI, VertexAI
from langchain.prompts.prompt import PromptTemplate
from langchain.vectorstores import SupabaseVectorStore
from supabase import Client, create_client
from supabase import Client
class CustomSupabaseVectorStore(SupabaseVectorStore):
@ -22,7 +16,6 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
def similarity_search(
self,
query: str,
user_id: str = "none",
table: str = "match_vectors",
k: int = 6,
threshold: float = 0.5,

View File

@ -0,0 +1,8 @@
{
"label": "LLM",
"position": 1,
"link": {
"type": "generated-index",
"description": "How does the LLM (Large Language Model Work)?"
}
}

View File

@ -0,0 +1,23 @@
---
sidebar_position: 1
---
# Private LLM
Quivr now has the capability to use a private LLM model powered by GPT4All (other open source models coming soon).
This is simular to the functionality provided by the PrivateGPT project.
This means that your data never leaves the server. The LLM is downloaded to the server and runs inference on your question locally.
## How to use
Set the 'private' flag to True in the /backend/.env file. You can also set other model parameters in the .env file.
Download the GPT4All model from [here](
https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin) and place it in the /backend/local_models folder.
## Future Plans
We are planning to add more models to the private LLM feature. We are also planning on using a local embedding model from Hugging Face to reduce our reliance on OpenAI's API.
We will also be adding the ability to use a private LLM model from in the frontend and api. Currently it is only available if you self host the backend.