feat(llm): removing all llms to prepare for genoss (#804)

* feat(routes): removed all except openai

* feat(deadcode): removed some deadcode and summarization feature that wasn't used

* feat(streaming): removed privateGPT from it

* chore(requirements): increased version
This commit is contained in:
Stan Girard 2023-07-31 16:01:34 +02:00 committed by GitHub
parent 1a499f494d
commit db40f3cccd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 19 additions and 477 deletions

View File

@ -1,13 +1,9 @@
from .base import BaseBrainPicking
from .qa_base import QABaseBrainPicking
from .openai import OpenAIBrainPicking
from .openai_functions import OpenAIFunctionsBrainPicking
from .private_gpt4all import PrivateGPT4AllBrainPicking
__all__ = [
"BaseBrainPicking",
"QABaseBrainPicking",
"OpenAIBrainPicking",
"OpenAIFunctionsBrainPicking",
"PrivateGPT4AllBrainPicking",
]

View File

@ -1,227 +0,0 @@
from typing import Any, Dict, List, Optional
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from llm.models.FunctionCall import FunctionCall
from llm.models.OpenAiAnswer import OpenAiAnswer
from logger import get_logger
from models.chat import ChatHistory
from repository.chat.get_chat_history import get_chat_history
from repository.chat.update_chat_history import update_chat_history
from supabase.client import Client, create_client
from vectorstore.supabase import CustomSupabaseVectorStore
from .base import BaseBrainPicking
logger = get_logger(__name__)
def format_answer(model_response: Dict[str, Any]) -> OpenAiAnswer:
answer = model_response["choices"][0]["message"]
content = answer["content"]
function_call = None
if answer.get("function_call", None) is not None:
function_call = FunctionCall(
answer["function_call"]["name"],
answer["function_call"]["arguments"],
)
return OpenAiAnswer(
content=content,
function_call=function_call, # pyright: ignore reportPrivateUsage=none
)
class OpenAIFunctionsBrainPicking(BaseBrainPicking):
"""
Class for the OpenAI Brain Picking functionality using OpenAI Functions.
It allows to initialize a Chat model, generate questions and retrieve answers using ConversationalRetrievalChain.
"""
# Default class attributes
model: str = "gpt-3.5-turbo-0613"
def __init__(
self,
model: str,
chat_id: str,
temperature: float,
max_tokens: int,
brain_id: str,
user_openai_api_key: str,
# TODO: add streaming
) -> "OpenAIFunctionsBrainPicking": # pyright: ignore reportPrivateUsage=none
super().__init__(
model=model,
chat_id=chat_id,
max_tokens=max_tokens,
user_openai_api_key=user_openai_api_key,
temperature=temperature,
brain_id=str(brain_id),
streaming=False,
)
@property
def openai_client(self) -> ChatOpenAI:
return ChatOpenAI(
openai_api_key=self.openai_api_key
) # pyright: ignore reportPrivateUsage=none
@property
def embeddings(self) -> OpenAIEmbeddings:
return OpenAIEmbeddings(
openai_api_key=self.openai_api_key
) # pyright: ignore reportPrivateUsage=none
@property
def supabase_client(self) -> Client:
return create_client(
self.brain_settings.supabase_url, self.brain_settings.supabase_service_key
)
@property
def vector_store(self) -> CustomSupabaseVectorStore:
return CustomSupabaseVectorStore(
self.supabase_client,
self.embeddings,
table_name="vectors",
brain_id=self.brain_id,
)
def _get_model_response(
self,
messages: List[Dict[str, str]],
functions: Optional[List[Dict[str, Any]]] = None,
) -> Any:
"""
Retrieve a model response given messages and functions
"""
logger.info("Getting model response")
kwargs = {
"messages": messages,
"model": self.model,
"temperature": self.temperature,
"max_tokens": self.max_tokens,
}
if functions:
logger.info("Adding functions to model response")
kwargs["functions"] = functions
return self.openai_client.completion_with_retry(**kwargs)
def _get_chat_history(self) -> List[Dict[str, str]]:
"""
Retrieves the chat history in a formatted list
"""
logger.info("Getting chat history")
history = get_chat_history(self.chat_id)
return [
item
for chat in history
for item in [
{"role": "user", "content": chat.user_message},
{"role": "assistant", "content": chat.assistant},
]
]
def _get_context(self, question: str) -> str:
"""
Retrieve documents related to the question
"""
logger.info("Getting context")
return self.vector_store.similarity_search(
query=question
) # pyright: ignore reportPrivateUsage=none
def _construct_prompt(
self, question: str, useContext: bool = False, useHistory: bool = False
) -> List[Dict[str, str]]:
"""
Constructs a prompt given a question, and optionally include context and history
"""
logger.info("Constructing prompt")
system_messages = [
{
"role": "system",
"content": """Your name is Quivr. You are an assistant that has access to a person's documents and that can answer questions about them.
A person will ask you a question and you will provide a helpful answer.
Write the answer in the same language as the question.
You have access to functions to help you answer the question.
If you don't know the answer, just say that you don't know but be helpful and explain why you can't answer""",
}
]
if useHistory:
logger.info("Adding chat history to prompt")
history = self._get_chat_history()
system_messages.append(
{"role": "system", "content": "Previous messages are already in chat."}
)
system_messages.extend(history)
if useContext:
logger.info("Adding chat context to prompt")
chat_context = self._get_context(question)
context_message = f"Here are the documents you have access to: {chat_context if chat_context else 'No document found'}"
system_messages.append({"role": "user", "content": context_message})
system_messages.append({"role": "user", "content": question})
return system_messages
def generate_answer(self, question: str) -> ChatHistory:
"""
Main function to get an answer for the given question
"""
logger.info("Getting answer")
functions = [
{
"name": "get_history_and_context",
"description": "Get the chat history between you and the user and also get the relevant documents to answer the question. Always use that unless a very simple question is asked that a 5 years old could answer or if the user says continue or something like that.",
"parameters": {"type": "object", "properties": {}},
},
]
# First, try to get an answer using just the question
response = self._get_model_response(
messages=self._construct_prompt(question), functions=functions
)
formatted_response = format_answer(response)
# If the model calls for history, try again with history included
if (
formatted_response.function_call
and formatted_response.function_call.name == "get_history"
):
logger.info("Model called for history")
response = self._get_model_response(
messages=self._construct_prompt(question, useHistory=True),
functions=[],
)
formatted_response = format_answer(response)
if (
formatted_response.function_call
and formatted_response.function_call.name == "get_history_and_context"
):
logger.info("Model called for history and context")
response = self._get_model_response(
messages=self._construct_prompt(
question, useContext=True, useHistory=True
),
functions=[],
)
formatted_response = format_answer(response)
# Update chat history
chat_history = update_chat_history(
chat_id=self.chat_id,
user_message=question,
assistant=formatted_response.content or "",
)
return chat_history

View File

@ -1,74 +0,0 @@
from typing import Optional
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms.base import BaseLLM
from langchain.llms.gpt4all import GPT4All
from llm.qa_base import QABaseBrainPicking
from logger import get_logger
logger = get_logger(__name__)
class PrivateGPT4AllBrainPicking(QABaseBrainPicking):
"""
This subclass of BrainPicking is used to specifically work with the private language model GPT4All.
"""
# Define the default model path
model_path: str = "./local_models/ggml-gpt4all-j-v1.3-groovy.bin"
def __init__(
self,
chat_id: str,
brain_id: str,
user_openai_api_key: Optional[str],
streaming: bool,
model_path: str,
) -> None:
"""
Initialize the PrivateBrainPicking class by calling the parent class's initializer.
:param brain_id: The brain_id in the DB.
:param chat_id: The id of the chat in the DB.
:param streaming: Whether to enable streaming of the model
:param model_path: The path to the model. If not provided, a default path is used.
"""
super().__init__(
model="gpt4all-j-1.3",
brain_id=brain_id,
chat_id=chat_id,
user_openai_api_key=user_openai_api_key,
streaming=streaming,
)
# Set the model path
self.model_path = model_path
# TODO: Use private embeddings model. This involves some restructuring of how we store the embeddings.
@property
def embeddings(self) -> OpenAIEmbeddings:
return OpenAIEmbeddings(
openai_api_key=self.openai_api_key
) # pyright: ignore reportPrivateUsage=none
def _create_llm(
self,
model,
streaming=False,
callbacks=None,
) -> BaseLLM:
"""
Override the _create_llm method to enforce the use of a private model.
:param model: Language model name to be used.
:param streaming: Whether to enable streaming of the model
:param callbacks: Callbacks to be used for streaming
:return: Language model instance
"""
model_path = self.model_path
logger.info("Using private model: %s", model)
logger.info("Streaming is set to %s", streaming)
return GPT4All(
model=model_path,
) # pyright: ignore reportPrivateUsage=none

View File

@ -1,107 +0,0 @@
import os
import guidance
import openai
from logger import get_logger
logger = get_logger(__name__)
openai_api_key = os.environ.get("OPENAI_API_KEY")
openai.api_key = openai_api_key
summary_llm = guidance.llms.OpenAI("gpt-3.5-turbo-0613", caching=False)
def llm_summerize(document):
summary = guidance(
"""
{{#system~}}
You are a world best summarizer. \n
Condense the text, capturing essential points and core ideas. Include relevant \
examples, omit excess details, and ensure the summary's length matches the \
original's complexity.
{{/system~}}
{{#user~}}
Summarize the following text:
---
{{document}}
{{/user~}}
{{#assistant~}}
{{gen 'summarization' temperature=0.2 max_tokens=100}}
{{/assistant~}}
""",
llm=summary_llm,
) # pyright: ignore reportPrivateUsage=none
summary = summary(document=document)
logger.info("Summarization: %s", summary)
return summary["summarization"]
def llm_evaluate_summaries(question, summaries, model):
if not model.startswith("gpt"):
logger.info(f"Model {model} not supported. Using gpt-3.5-turbo instead.")
model = "gpt-3.5-turbo-0613"
logger.info(f"Evaluating summaries with {model}")
evaluation_llm = guidance.llms.OpenAI(model, caching=False)
evaluation = guidance(
"""
{{#system~}}
You are a world best evaluator. You evaluate the relevance of summaries based \
on user input question. Return evaluation in following csv format, csv headers \
are [summary_id,document_id,evaluation,reason].
Evaluator Task
- Evaluation should be a score number between 0 and 5.
- Reason should be a short sentence within 20 words explain why the evaluation.
---
Example
summary_id,document_id,evaluation,reason
1,4,3,"not mentioned about topic A"
2,2,4,"It is not relevant to the question"
{{/system~}}
{{#user~}}
Based on the question, do Evaluator Task for each summary.
---
Question: {{question}}
{{#each summaries}}
Summary
summary_id: {{this.id}}
document_id: {{this.document_id}}
evaluation: ""
reason: ""
Summary Content: {{this.content}}
File Name: {{this.metadata.file_name}}
{{/each}}
{{/user~}}
{{#assistant~}}
{{gen 'evaluation' temperature=0.2 stop='<|im_end|>'}}
{{/assistant~}}
""",
llm=evaluation_llm,
) # pyright: ignore reportPrivateUsage=none
result = evaluation(question=question, summaries=summaries)
evaluations = {}
for evaluation in result["evaluation"].split(
"\n"
): # pyright: ignore reportPrivateUsage=none
if evaluation == "" or not evaluation[0].isdigit():
continue
logger.info("Evaluation Row: %s", evaluation)
summary_id, document_id, score, *reason = evaluation.split(",")
if not score.isdigit():
continue
score = int(score)
if score < 3 or score > 5:
continue
evaluations[summary_id] = {
"evaluation": score,
"reason": ",".join(reason),
"summary_id": summary_id,
"document_id": document_id,
}
return [
e
for e in sorted(
evaluations.values(), key=lambda x: x["evaluation"], reverse=True
)
]

View File

@ -1,5 +1,5 @@
pymupdf==1.22.3
langchain==0.0.228
langchain==0.0.247
Markdown==3.4.3
openai==0.27.6
pdf2image==1.16.3

View File

@ -8,8 +8,6 @@ from auth import AuthBearer, get_current_user
from fastapi import APIRouter, Depends, Query, Request
from fastapi.responses import StreamingResponse
from llm.openai import OpenAIBrainPicking
from llm.openai_functions import OpenAIFunctionsBrainPicking
from llm.private_gpt4all import PrivateGPT4AllBrainPicking
from models.brains import get_default_user_brain_or_create_new
from models.chat import Chat, ChatHistory
from models.chats import ChatQuestion
@ -21,7 +19,6 @@ from repository.chat.get_chat_history import get_chat_history
from repository.chat.get_user_chats import get_user_chats
from repository.chat.update_chat import ChatUpdatableProperties, update_chat
from utils.constants import (
openai_function_compatible_models,
streaming_compatible_models,
)
@ -193,34 +190,14 @@ async def create_question_handler(
if not brain_id:
brain_id = get_default_user_brain_or_create_new(current_user).id
if llm_settings.private:
gpt_answer_generator = PrivateGPT4AllBrainPicking(
chat_id=str(chat_id),
brain_id=str(brain_id),
user_openai_api_key=current_user.user_openai_api_key,
streaming=False,
model_path=llm_settings.model_path,
)
elif chat_question.model in openai_function_compatible_models:
gpt_answer_generator = OpenAIFunctionsBrainPicking(
model=chat_question.model,
chat_id=str(chat_id),
temperature=chat_question.temperature,
max_tokens=chat_question.max_tokens,
brain_id=str(brain_id),
user_openai_api_key=current_user.user_openai_api_key, # pyright: ignore reportPrivateUsage=none
)
else:
gpt_answer_generator = OpenAIBrainPicking(
chat_id=str(chat_id),
model=chat_question.model,
max_tokens=chat_question.max_tokens,
temperature=chat_question.temperature,
brain_id=str(brain_id),
user_openai_api_key=current_user.user_openai_api_key, # pyright: ignore reportPrivateUsage=none
)
gpt_answer_generator = OpenAIBrainPicking(
chat_id=str(chat_id),
model=chat_question.model,
max_tokens=chat_question.max_tokens,
temperature=chat_question.temperature,
brain_id=str(brain_id),
user_openai_api_key=current_user.user_openai_api_key, # pyright: ignore reportPrivateUsage=none
)
chat_answer = gpt_answer_generator.generate_answer( # pyright: ignore reportPrivateUsage=none
chat_question.question
@ -267,26 +244,16 @@ async def create_stream_question_handler(
user_openai_api_key = request.headers.get("Openai-Api-Key")
streaming = True
check_user_limit(current_user)
llm_settings = LLMSettings()
if llm_settings.private:
gpt_answer_generator = PrivateGPT4AllBrainPicking(
chat_id=str(chat_id),
brain_id=str(brain_id),
user_openai_api_key=user_openai_api_key,
streaming=streaming,
model_path=llm_settings.model_path,
)
else:
gpt_answer_generator = OpenAIBrainPicking(
chat_id=str(chat_id),
model=chat_question.model,
max_tokens=chat_question.max_tokens,
temperature=chat_question.temperature,
brain_id=str(brain_id),
user_openai_api_key=user_openai_api_key, # pyright: ignore reportPrivateUsage=none
streaming=streaming,
)
gpt_answer_generator = OpenAIBrainPicking(
chat_id=str(chat_id),
model=chat_question.model,
max_tokens=chat_question.max_tokens,
temperature=chat_question.temperature,
brain_id=str(brain_id),
user_openai_api_key=user_openai_api_key, # pyright: ignore reportPrivateUsage=none
streaming=streaming,
)
return StreamingResponse(
gpt_answer_generator.generate_stream( # pyright: ignore reportPrivateUsage=none

View File

@ -2,8 +2,6 @@ from concurrent.futures import ThreadPoolExecutor
from typing import List
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.schema import Document
from llm.utils.summarization import llm_summerize
from logger import get_logger
from models.settings import BrainSettings, CommonsDep, common_dependencies
from pydantic import BaseModel
@ -50,17 +48,6 @@ class Neurons(BaseModel):
return summaries.data
def create_summary(commons: CommonsDep, document_id, content, metadata):
logger.info(f"Summarizing document {content[:100]}")
summary = llm_summerize(content)
logger.info(f"Summary: {summary}")
metadata["document_id"] = document_id
summary_doc_with_metadata = Document(page_content=summary, metadata=metadata)
sids = commons["summaries_vector_store"].add_documents([summary_doc_with_metadata])
if sids and len(sids) > 0:
commons["supabase"].table("summaries").update(
{"document_id": document_id}
).match({"id": sids[0]}).execute()
def error_callback(exception):