mirror of
https://github.com/StanGirard/quivr.git
synced 2024-10-26 14:00:37 +03:00
feat: private llm (#360)
* feat: private llm * Update backend/vectorstore/supabase.py * Update backend/vectorstore/supabase.py
This commit is contained in:
parent
3d11e3fb51
commit
83fde0aeea
@ -8,3 +8,9 @@ GOOGLE_APPLICATION_CREDENTIALS=/code/application_default_credentials.json
|
||||
GOOGLE_CLOUD_PROJECT=XXXXX to be changed with your GCP id
|
||||
MAX_BRAIN_SIZE=52428800
|
||||
MAX_REQUESTS_NUMBER=200
|
||||
|
||||
#Private LLM Variables
|
||||
PRIVATE=False
|
||||
MODEL_PATH=./local_models/ggml-gpt4all-j-v1.3-groovy.bin
|
||||
MODEL_N_CTX=1000
|
||||
MODEL_N_BATCH=8
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -51,3 +51,6 @@ streamlit-demo/.streamlit/secrets.toml
|
||||
backend/pandoc-*
|
||||
**/.pandoc-*
|
||||
backend/application_default_credentials.json
|
||||
|
||||
#local models
|
||||
backend/local_models/*
|
@ -1,4 +1,4 @@
|
||||
FROM python:3.11-buster
|
||||
FROM python:3.11-bullseye
|
||||
|
||||
# Install GEOS library
|
||||
RUN apt-get update && apt-get install -y libgeos-dev
|
||||
@ -9,6 +9,17 @@ COPY ./requirements.txt /code/requirements.txt
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt --timeout 100
|
||||
|
||||
# Install additional dependencies
|
||||
RUN apt-get install -y liblzma-dev cmake
|
||||
|
||||
# Build GPT4All from source (required for GPT4All langchain bindings)
|
||||
RUN cd /tmp && git clone --recurse-submodules https://github.com/nomic-ai/gpt4all && \
|
||||
cd gpt4all/gpt4all-backend/ && \
|
||||
mkdir build && cd build && \
|
||||
cmake .. && cmake --build . --parallel && \
|
||||
cd ../../gpt4all-bindings/python && \
|
||||
pip3 install -e .
|
||||
|
||||
COPY . /code/
|
||||
|
||||
CMD ["uvicorn", "main:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]
|
||||
CMD ["uvicorn", "main:app", "--reload", "--host", "0.0.0.0", "--port", "5050"]
|
||||
|
@ -1,33 +1,38 @@
|
||||
import os # A module to interact with the OS
|
||||
from typing import Any, Dict, List # For type hinting
|
||||
from typing import Any, Dict, List
|
||||
from models.settings import LLMSettings # For type hinting
|
||||
|
||||
# Importing various modules and classes from a custom library 'langchain' likely used for natural language processing
|
||||
from langchain.chains import ConversationalRetrievalChain, LLMChain
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.chains.router.llm_router import (LLMRouterChain,
|
||||
RouterOutputParser)
|
||||
from langchain.chains.router.multi_prompt_prompt import \
|
||||
MULTI_PROMPT_ROUTER_TEMPLATE
|
||||
from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser
|
||||
from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE
|
||||
from langchain.chat_models import ChatOpenAI, ChatVertexAI
|
||||
from langchain.chat_models.anthropic import ChatAnthropic
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain.llms import OpenAI, VertexAI
|
||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
||||
from langchain.llms import GPT4All
|
||||
from langchain.llms.base import LLM
|
||||
from langchain.memory import ConversationBufferMemory
|
||||
from langchain.vectorstores import SupabaseVectorStore
|
||||
from llm.prompt import LANGUAGE_PROMPT
|
||||
from llm.prompt.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT
|
||||
from models.chats import \
|
||||
ChatMessage # Importing a custom ChatMessage class for handling chat messages
|
||||
from models.settings import \
|
||||
BrainSettings # Importing settings related to the 'brain'
|
||||
from pydantic import (BaseModel, # For data validation and settings management
|
||||
BaseSettings)
|
||||
from supabase import (Client, # For interacting with Supabase database
|
||||
create_client)
|
||||
from vectorstore.supabase import \
|
||||
CustomSupabaseVectorStore # Custom class for handling vector storage with Supabase
|
||||
from models.chats import (
|
||||
ChatMessage,
|
||||
) # Importing a custom ChatMessage class for handling chat messages
|
||||
from models.settings import BrainSettings # Importing settings related to the 'brain'
|
||||
from pydantic import BaseModel # For data validation and settings management
|
||||
from pydantic import BaseSettings
|
||||
from supabase import Client # For interacting with Supabase database
|
||||
from supabase import create_client
|
||||
from vectorstore.supabase import (
|
||||
CustomSupabaseVectorStore,
|
||||
) # Custom class for handling vector storage with Supabase
|
||||
from logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
class AnswerConversationBufferMemory(ConversationBufferMemory):
|
||||
"""
|
||||
@ -35,11 +40,12 @@ class AnswerConversationBufferMemory(ConversationBufferMemory):
|
||||
It overrides the save_context method to save the response using the 'answer' key in the outputs.
|
||||
Reference to some issue comment is given in the docstring.
|
||||
"""
|
||||
|
||||
|
||||
def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
|
||||
# Overriding the save_context method of the parent class
|
||||
return super(AnswerConversationBufferMemory, self).save_context(
|
||||
inputs, {'response': outputs['answer']})
|
||||
inputs, {"response": outputs["answer"]}
|
||||
)
|
||||
|
||||
|
||||
def get_chat_history(inputs) -> str:
|
||||
@ -59,21 +65,22 @@ class BrainPicking(BaseModel):
|
||||
Main class for the Brain Picking functionality.
|
||||
It allows to initialize a Chat model, generate questions and retrieve answers using ConversationalRetrievalChain.
|
||||
"""
|
||||
|
||||
|
||||
# Default class attributes
|
||||
llm_name: str = "gpt-3.5-turbo"
|
||||
settings = BrainSettings()
|
||||
llm_config = LLMSettings()
|
||||
embeddings: OpenAIEmbeddings = None
|
||||
supabase_client: Client = None
|
||||
vector_store: CustomSupabaseVectorStore = None
|
||||
llm: ChatOpenAI = None
|
||||
llm: LLM = None
|
||||
question_generator: LLMChain = None
|
||||
doc_chain: ConversationalRetrievalChain = None
|
||||
|
||||
|
||||
class Config:
|
||||
# Allowing arbitrary types for class validation
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
|
||||
def init(self, model: str, user_id: str) -> "BrainPicking":
|
||||
"""
|
||||
Initialize the BrainPicking class by setting embeddings, supabase client, vector store, language model and chains.
|
||||
@ -82,15 +89,61 @@ class BrainPicking(BaseModel):
|
||||
:return: BrainPicking instance
|
||||
"""
|
||||
self.embeddings = OpenAIEmbeddings(openai_api_key=self.settings.openai_api_key)
|
||||
self.supabase_client = create_client(self.settings.supabase_url, self.settings.supabase_service_key)
|
||||
self.supabase_client = create_client(
|
||||
self.settings.supabase_url, self.settings.supabase_service_key
|
||||
)
|
||||
self.vector_store = CustomSupabaseVectorStore(
|
||||
self.supabase_client, self.embeddings, table_name="vectors", user_id=user_id)
|
||||
self.llm = ChatOpenAI(temperature=0, model_name=model)
|
||||
self.question_generator = LLMChain(llm=self.llm, prompt=CONDENSE_QUESTION_PROMPT)
|
||||
self.supabase_client,
|
||||
self.embeddings,
|
||||
table_name="vectors",
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
self.llm = self._determine_llm(
|
||||
private_model_args={
|
||||
"model_path": self.llm_config.model_path,
|
||||
"n_ctx": self.llm_config.model_n_ctx,
|
||||
"n_batch": self.llm_config.model_n_batch,
|
||||
},
|
||||
private=self.llm_config.private,
|
||||
model_name=self.llm_name,
|
||||
)
|
||||
self.question_generator = LLMChain(
|
||||
llm=self.llm, prompt=CONDENSE_QUESTION_PROMPT
|
||||
)
|
||||
self.doc_chain = load_qa_chain(self.llm, chain_type="stuff")
|
||||
return self
|
||||
|
||||
def _get_qa(self, chat_message: ChatMessage, user_openai_api_key) -> ConversationalRetrievalChain:
|
||||
|
||||
def _determine_llm(
|
||||
self, private_model_args: dict, private: bool = False, model_name: str = None
|
||||
) -> LLM:
|
||||
"""
|
||||
Determine the language model to be used.
|
||||
:param model_name: Language model name to be used.
|
||||
:param private_model_args: Dictionary containing model_path, n_ctx and n_batch.
|
||||
:param private: Boolean value to determine if private model is to be used.
|
||||
:return: Language model instance
|
||||
"""
|
||||
if private:
|
||||
model_path = private_model_args["model_path"]
|
||||
model_n_ctx = private_model_args["n_ctx"]
|
||||
model_n_batch = private_model_args["n_batch"]
|
||||
|
||||
logger.info("Using private model: %s", model_path)
|
||||
|
||||
return GPT4All(
|
||||
model=model_path,
|
||||
n_ctx=model_n_ctx,
|
||||
n_batch=model_n_batch,
|
||||
backend="gptj",
|
||||
verbose=True,
|
||||
)
|
||||
else:
|
||||
return ChatOpenAI(temperature=0, model_name=model_name)
|
||||
|
||||
def _get_qa(
|
||||
self, chat_message: ChatMessage, user_openai_api_key
|
||||
) -> ConversationalRetrievalChain:
|
||||
"""
|
||||
Retrieves a QA chain for the given chat message and API key.
|
||||
:param chat_message: The chat message containing history.
|
||||
@ -100,12 +153,15 @@ class BrainPicking(BaseModel):
|
||||
# If user provided an API key, update the settings
|
||||
if user_openai_api_key is not None and user_openai_api_key != "":
|
||||
self.settings.openai_api_key = user_openai_api_key
|
||||
|
||||
|
||||
# Initialize and return a ConversationalRetrievalChain
|
||||
qa = ConversationalRetrievalChain(
|
||||
retriever=self.vector_store.as_retriever(),
|
||||
max_tokens_limit=chat_message.max_tokens, question_generator=self.question_generator,
|
||||
combine_docs_chain=self.doc_chain, get_chat_history=get_chat_history)
|
||||
retriever=self.vector_store.as_retriever(),
|
||||
max_tokens_limit=chat_message.max_tokens,
|
||||
question_generator=self.question_generator,
|
||||
combine_docs_chain=self.doc_chain,
|
||||
get_chat_history=get_chat_history,
|
||||
)
|
||||
return qa
|
||||
|
||||
def generate_answer(self, chat_message: ChatMessage, user_openai_api_key) -> str:
|
||||
@ -119,15 +175,17 @@ class BrainPicking(BaseModel):
|
||||
|
||||
# Get the QA chain
|
||||
qa = self._get_qa(chat_message, user_openai_api_key)
|
||||
|
||||
|
||||
# Transform the chat history into a list of tuples
|
||||
for i in range(0, len(chat_message.history) - 1, 2):
|
||||
user_message = chat_message.history[i][1]
|
||||
assistant_message = chat_message.history[i + 1][1]
|
||||
transformed_history.append((user_message, assistant_message))
|
||||
|
||||
|
||||
# Generate the model response using the QA chain
|
||||
model_response = qa({"question": chat_message.question, "chat_history": transformed_history})
|
||||
answer = model_response['answer']
|
||||
model_response = qa(
|
||||
{"question": chat_message.question, "chat_history": transformed_history}
|
||||
)
|
||||
answer = model_response["answer"]
|
||||
|
||||
return answer
|
||||
|
@ -13,6 +13,11 @@ class BrainSettings(BaseSettings):
|
||||
supabase_url: str
|
||||
supabase_service_key: str
|
||||
|
||||
class LLMSettings(BaseSettings):
|
||||
private: bool
|
||||
model_path: str
|
||||
model_n_ctx: int
|
||||
model_n_batch: int
|
||||
|
||||
def common_dependencies() -> dict:
|
||||
settings = BrainSettings()
|
||||
|
@ -22,3 +22,4 @@ transformers==4.30.1
|
||||
asyncpg==0.27.0
|
||||
flake8==6.0.0
|
||||
flake8-black==0.3.6
|
||||
sentence_transformers>=2.0.0
|
||||
|
@ -1,15 +1,9 @@
|
||||
from typing import Any, List
|
||||
|
||||
from langchain.chains import ConversationalRetrievalChain, LLMChain
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.chat_models import ChatOpenAI, ChatVertexAI
|
||||
from langchain.client import arun_on_dataset
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain.llms import OpenAI, VertexAI
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
from langchain.vectorstores import SupabaseVectorStore
|
||||
from supabase import Client, create_client
|
||||
from supabase import Client
|
||||
|
||||
|
||||
class CustomSupabaseVectorStore(SupabaseVectorStore):
|
||||
@ -22,7 +16,6 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
|
||||
def similarity_search(
|
||||
self,
|
||||
query: str,
|
||||
user_id: str = "none",
|
||||
table: str = "match_vectors",
|
||||
k: int = 6,
|
||||
threshold: float = 0.5,
|
||||
|
8
docs/docs/backend/llm/_category_.json
Normal file
8
docs/docs/backend/llm/_category_.json
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"label": "LLM",
|
||||
"position": 1,
|
||||
"link": {
|
||||
"type": "generated-index",
|
||||
"description": "How does the LLM (Large Language Model Work)?"
|
||||
}
|
||||
}
|
23
docs/docs/backend/llm/private-llm.md
Normal file
23
docs/docs/backend/llm/private-llm.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
---
|
||||
|
||||
# Private LLM
|
||||
|
||||
Quivr now has the capability to use a private LLM model powered by GPT4All (other open source models coming soon).
|
||||
|
||||
This is simular to the functionality provided by the PrivateGPT project.
|
||||
|
||||
This means that your data never leaves the server. The LLM is downloaded to the server and runs inference on your question locally.
|
||||
|
||||
## How to use
|
||||
Set the 'private' flag to True in the /backend/.env file. You can also set other model parameters in the .env file.
|
||||
|
||||
Download the GPT4All model from [here](
|
||||
https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin) and place it in the /backend/local_models folder.
|
||||
|
||||
## Future Plans
|
||||
We are planning to add more models to the private LLM feature. We are also planning on using a local embedding model from Hugging Face to reduce our reliance on OpenAI's API.
|
||||
|
||||
We will also be adding the ability to use a private LLM model from in the frontend and api. Currently it is only available if you self host the backend.
|
||||
|
Loading…
Reference in New Issue
Block a user