feat(llm): removing all llms to prepare for genoss (#804)

* feat(routes): removed all except openai * feat(deadcode): removed some deadcode and summarization feature that wasn't used * feat(streaming): removed privateGPT from it * chore(requirements): increased version
2024-09-11 14:36:35 +03:00 · 2023-07-31 16:01:34 +02:00 · 2023-07-31 16:01:34 +02:00 · db40f3cccd
commit db40f3cccd
parent 1a499f494d
7 changed files with 19 additions and 477 deletions
--- a/backend/core/llm/init.py
+++ b/backend/core/llm/init.py
@ -1,13 +1,9 @@
 from .base import BaseBrainPicking
 from .qa_base import QABaseBrainPicking
 from .openai import OpenAIBrainPicking
-from .openai_functions import OpenAIFunctionsBrainPicking
-from .private_gpt4all import PrivateGPT4AllBrainPicking

 __all__ = [
    "BaseBrainPicking",
    "QABaseBrainPicking",
    "OpenAIBrainPicking",
-    "OpenAIFunctionsBrainPicking",
-    "PrivateGPT4AllBrainPicking",
 ]
--- a/backend/core/llm/openai_functions.py
+++ b/backend/core/llm/openai_functions.py
@ -1,227 +0,0 @@
-from typing import Any, Dict, List, Optional
-
-from langchain.chat_models import ChatOpenAI
-from langchain.embeddings.openai import OpenAIEmbeddings
-from llm.models.FunctionCall import FunctionCall
-from llm.models.OpenAiAnswer import OpenAiAnswer
-from logger import get_logger
-from models.chat import ChatHistory
-from repository.chat.get_chat_history import get_chat_history
-from repository.chat.update_chat_history import update_chat_history
-from supabase.client import Client, create_client
-from vectorstore.supabase import CustomSupabaseVectorStore
-
-from .base import BaseBrainPicking
-
-logger = get_logger(__name__)
-
-
-def format_answer(model_response: Dict[str, Any]) -> OpenAiAnswer:
-    answer = model_response["choices"][0]["message"]
-    content = answer["content"]
-    function_call = None
-
-    if answer.get("function_call", None) is not None:
-        function_call = FunctionCall(
-            answer["function_call"]["name"],
-            answer["function_call"]["arguments"],
-        )
-
-    return OpenAiAnswer(
-        content=content,
-        function_call=function_call,  # pyright: ignore reportPrivateUsage=none
-    )
-
-
-class OpenAIFunctionsBrainPicking(BaseBrainPicking):
-    """
-    Class for the OpenAI Brain Picking functionality using OpenAI Functions.
-    It allows to initialize a Chat model, generate questions and retrieve answers using ConversationalRetrievalChain.
-    """
-
-    # Default class attributes
-    model: str = "gpt-3.5-turbo-0613"
-
-    def __init__(
-        self,
-        model: str,
-        chat_id: str,
-        temperature: float,
-        max_tokens: int,
-        brain_id: str,
-        user_openai_api_key: str,
-        # TODO: add streaming
-    ) -> "OpenAIFunctionsBrainPicking":  # pyright: ignore reportPrivateUsage=none
-        super().__init__(
-            model=model,
-            chat_id=chat_id,
-            max_tokens=max_tokens,
-            user_openai_api_key=user_openai_api_key,
-            temperature=temperature,
-            brain_id=str(brain_id),
-            streaming=False,
-        )
-
-    @property
-    def openai_client(self) -> ChatOpenAI:
-        return ChatOpenAI(
-            openai_api_key=self.openai_api_key
-        )  # pyright: ignore reportPrivateUsage=none
-
-    @property
-    def embeddings(self) -> OpenAIEmbeddings:
-        return OpenAIEmbeddings(
-            openai_api_key=self.openai_api_key
-        )  # pyright: ignore reportPrivateUsage=none
-
-    @property
-    def supabase_client(self) -> Client:
-        return create_client(
-            self.brain_settings.supabase_url, self.brain_settings.supabase_service_key
-        )
-
-    @property
-    def vector_store(self) -> CustomSupabaseVectorStore:
-        return CustomSupabaseVectorStore(
-            self.supabase_client,
-            self.embeddings,
-            table_name="vectors",
-            brain_id=self.brain_id,
-        )
-
-    def _get_model_response(
-        self,
-        messages: List[Dict[str, str]],
-        functions: Optional[List[Dict[str, Any]]] = None,
-    ) -> Any:
-        """
-        Retrieve a model response given messages and functions
-        """
-        logger.info("Getting model response")
-        kwargs = {
-            "messages": messages,
-            "model": self.model,
-            "temperature": self.temperature,
-            "max_tokens": self.max_tokens,
-        }
-
-        if functions:
-            logger.info("Adding functions to model response")
-            kwargs["functions"] = functions
-
-        return self.openai_client.completion_with_retry(**kwargs)
-
-    def _get_chat_history(self) -> List[Dict[str, str]]:
-        """
-        Retrieves the chat history in a formatted list
-        """
-        logger.info("Getting chat history")
-        history = get_chat_history(self.chat_id)
-        return [
-            item
-            for chat in history
-            for item in [
-                {"role": "user", "content": chat.user_message},
-                {"role": "assistant", "content": chat.assistant},
-            ]
-        ]
-
-    def _get_context(self, question: str) -> str:
-        """
-        Retrieve documents related to the question
-        """
-        logger.info("Getting context")
-
-        return self.vector_store.similarity_search(
-            query=question
-        )  # pyright: ignore reportPrivateUsage=none
-
-    def _construct_prompt(
-        self, question: str, useContext: bool = False, useHistory: bool = False
-    ) -> List[Dict[str, str]]:
-        """
-        Constructs a prompt given a question, and optionally include context and history
-        """
-        logger.info("Constructing prompt")
-        system_messages = [
-            {
-                "role": "system",
-                "content": """Your name is Quivr. You are an assistant that has access to a person's documents and that can answer questions about them.
-                A person will ask you a question and you will provide a helpful answer.
-                Write the answer in the same language as the question.
-                You have access to functions to help you answer the question.
-                If you don't know the answer, just say that you don't know but be helpful and explain why you can't answer""",
-            }
-        ]
-
-        if useHistory:
-            logger.info("Adding chat history to prompt")
-            history = self._get_chat_history()
-            system_messages.append(
-                {"role": "system", "content": "Previous messages are already in chat."}
-            )
-            system_messages.extend(history)
-
-        if useContext:
-            logger.info("Adding chat context to prompt")
-            chat_context = self._get_context(question)
-            context_message = f"Here are the documents you have access to: {chat_context if chat_context else 'No document found'}"
-            system_messages.append({"role": "user", "content": context_message})
-
-        system_messages.append({"role": "user", "content": question})
-
-        return system_messages
-
-    def generate_answer(self, question: str) -> ChatHistory:
-        """
-        Main function to get an answer for the given question
-        """
-        logger.info("Getting answer")
-        functions = [
-            {
-                "name": "get_history_and_context",
-                "description": "Get the chat history between you and the user and also get the relevant documents to answer the question. Always use that unless a very simple question is asked that a 5 years old could answer or if the user says continue or something like that.",
-                "parameters": {"type": "object", "properties": {}},
-            },
-        ]
-
-        # First, try to get an answer using just the question
-        response = self._get_model_response(
-            messages=self._construct_prompt(question), functions=functions
-        )
-        formatted_response = format_answer(response)
-
-        # If the model calls for history, try again with history included
-        if (
-            formatted_response.function_call
-            and formatted_response.function_call.name == "get_history"
-        ):
-            logger.info("Model called for history")
-            response = self._get_model_response(
-                messages=self._construct_prompt(question, useHistory=True),
-                functions=[],
-            )
-
-            formatted_response = format_answer(response)
-
-        if (
-            formatted_response.function_call
-            and formatted_response.function_call.name == "get_history_and_context"
-        ):
-            logger.info("Model called for history and context")
-            response = self._get_model_response(
-                messages=self._construct_prompt(
-                    question, useContext=True, useHistory=True
-                ),
-                functions=[],
-            )
-            formatted_response = format_answer(response)
-
-        # Update chat history
-        chat_history = update_chat_history(
-            chat_id=self.chat_id,
-            user_message=question,
-            assistant=formatted_response.content or "",
-        )
-
-        return chat_history
--- a/backend/core/llm/private_gpt4all.py
+++ b/backend/core/llm/private_gpt4all.py
@ -1,74 +0,0 @@
-from typing import Optional
-
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.llms.base import BaseLLM
-from langchain.llms.gpt4all import GPT4All
-from llm.qa_base import QABaseBrainPicking
-from logger import get_logger
-
-logger = get_logger(__name__)
-
-
-class PrivateGPT4AllBrainPicking(QABaseBrainPicking):
-    """
-    This subclass of BrainPicking is used to specifically work with the private language model GPT4All.
-    """
-
-    # Define the default model path
-    model_path: str = "./local_models/ggml-gpt4all-j-v1.3-groovy.bin"
-
-    def __init__(
-        self,
-        chat_id: str,
-        brain_id: str,
-        user_openai_api_key: Optional[str],
-        streaming: bool,
-        model_path: str,
-    ) -> None:
-        """
-        Initialize the PrivateBrainPicking class by calling the parent class's initializer.
-        :param brain_id: The brain_id in the DB.
-        :param chat_id: The id of the chat in the DB.
-        :param streaming: Whether to enable streaming of the model
-        :param model_path: The path to the model. If not provided, a default path is used.
-        """
-
-        super().__init__(
-            model="gpt4all-j-1.3",
-            brain_id=brain_id,
-            chat_id=chat_id,
-            user_openai_api_key=user_openai_api_key,
-            streaming=streaming,
-        )
-
-        # Set the model path
-        self.model_path = model_path
-
-    # TODO: Use private embeddings model. This involves some restructuring of how we store the embeddings.
-    @property
-    def embeddings(self) -> OpenAIEmbeddings:
-        return OpenAIEmbeddings(
-            openai_api_key=self.openai_api_key
-        )  # pyright: ignore reportPrivateUsage=none
-
-    def _create_llm(
-        self,
-        model,
-        streaming=False,
-        callbacks=None,
-    ) -> BaseLLM:
-        """
-        Override the _create_llm method to enforce the use of a private model.
-        :param model: Language model name to be used.
-        :param streaming: Whether to enable streaming of the model
-        :param callbacks: Callbacks to be used for streaming
-        :return: Language model instance
-        """
-        model_path = self.model_path
-
-        logger.info("Using private model: %s", model)
-        logger.info("Streaming is set to %s", streaming)
-
-        return GPT4All(
-            model=model_path,
-        )  # pyright: ignore reportPrivateUsage=none
--- a/backend/core/llm/utils/summarization.py
+++ b/backend/core/llm/utils/summarization.py
@ -1,107 +0,0 @@
-import os
-
-import guidance
-import openai
-from logger import get_logger
-
-logger = get_logger(__name__)
-
-openai_api_key = os.environ.get("OPENAI_API_KEY")
-openai.api_key = openai_api_key
-summary_llm = guidance.llms.OpenAI("gpt-3.5-turbo-0613", caching=False)
-
-
-def llm_summerize(document):
-    summary = guidance(
-        """
-{{#system~}}
-You are a world best summarizer. \n
-Condense the text, capturing essential points and core ideas. Include relevant \
-examples, omit excess details, and ensure the summary's length matches the \
-original's complexity.
-{{/system~}}
-{{#user~}}
-Summarize the following text:
---
-{{document}}
-{{/user~}}
-
-{{#assistant~}}
-{{gen 'summarization' temperature=0.2 max_tokens=100}}
-{{/assistant~}}
-""",
-        llm=summary_llm,
-    )  # pyright: ignore reportPrivateUsage=none
-
-    summary = summary(document=document)
-    logger.info("Summarization: %s", summary)
-    return summary["summarization"]
-
-
-def llm_evaluate_summaries(question, summaries, model):
-    if not model.startswith("gpt"):
-        logger.info(f"Model {model} not supported. Using gpt-3.5-turbo instead.")
-        model = "gpt-3.5-turbo-0613"
-    logger.info(f"Evaluating summaries with {model}")
-    evaluation_llm = guidance.llms.OpenAI(model, caching=False)
-    evaluation = guidance(
-        """
-{{#system~}}
-You are a world best evaluator. You evaluate the relevance of summaries based \
-on user input question. Return evaluation in following csv format, csv headers \
-are [summary_id,document_id,evaluation,reason].
-Evaluator Task
- Evaluation should be a score number between 0 and 5.
- Reason should be a short sentence within 20 words explain why the evaluation.
---
-Example
-summary_id,document_id,evaluation,reason
-1,4,3,"not mentioned about topic A"
-2,2,4,"It is not relevant to the question"
-{{/system~}}
-{{#user~}}
-Based on the question, do Evaluator Task for each summary.
---
-Question: {{question}}
-{{#each summaries}}
-Summary
-    summary_id: {{this.id}}
-    document_id: {{this.document_id}}
-    evaluation: ""
-    reason: ""
-    Summary Content: {{this.content}}
-    File Name: {{this.metadata.file_name}}
-{{/each}}
-{{/user~}}
-{{#assistant~}}
-{{gen 'evaluation' temperature=0.2 stop='<|im_end|>'}}
-{{/assistant~}}
-""",
-        llm=evaluation_llm,
-    )  # pyright: ignore reportPrivateUsage=none
-    result = evaluation(question=question, summaries=summaries)
-    evaluations = {}
-    for evaluation in result["evaluation"].split(
-        "\n"
-    ):  # pyright: ignore reportPrivateUsage=none
-        if evaluation == "" or not evaluation[0].isdigit():
-            continue
-        logger.info("Evaluation Row: %s", evaluation)
-        summary_id, document_id, score, *reason = evaluation.split(",")
-        if not score.isdigit():
-            continue
-        score = int(score)
-        if score < 3 or score > 5:
-            continue
-        evaluations[summary_id] = {
-            "evaluation": score,
-            "reason": ",".join(reason),
-            "summary_id": summary_id,
-            "document_id": document_id,
-        }
-    return [
-        e
-        for e in sorted(
-            evaluations.values(), key=lambda x: x["evaluation"], reverse=True
-        )
-    ]
--- a/backend/core/requirements.txt
+++ b/backend/core/requirements.txt
@ -1,5 +1,5 @@
 pymupdf==1.22.3
-langchain==0.0.228
+langchain==0.0.247
 Markdown==3.4.3
 openai==0.27.6
 pdf2image==1.16.3
--- a/backend/core/routes/chat_routes.py
+++ b/backend/core/routes/chat_routes.py
@ -8,8 +8,6 @@ from auth import AuthBearer, get_current_user
 from fastapi import APIRouter, Depends, Query, Request
 from fastapi.responses import StreamingResponse
 from llm.openai import OpenAIBrainPicking
-from llm.openai_functions import OpenAIFunctionsBrainPicking
-from llm.private_gpt4all import PrivateGPT4AllBrainPicking
 from models.brains import get_default_user_brain_or_create_new
 from models.chat import Chat, ChatHistory
 from models.chats import ChatQuestion
@ -21,7 +19,6 @@ from repository.chat.get_chat_history import get_chat_history
 from repository.chat.get_user_chats import get_user_chats
 from repository.chat.update_chat import ChatUpdatableProperties, update_chat
 from utils.constants import (
-    openai_function_compatible_models,
    streaming_compatible_models,
 )

@ -193,34 +190,14 @@ async def create_question_handler(
        if not brain_id:
            brain_id = get_default_user_brain_or_create_new(current_user).id

-        if llm_settings.private:
-            gpt_answer_generator = PrivateGPT4AllBrainPicking(
-                chat_id=str(chat_id),
-                brain_id=str(brain_id),
-                user_openai_api_key=current_user.user_openai_api_key,
-                streaming=False,
-                model_path=llm_settings.model_path,
-            )
-
-        elif chat_question.model in openai_function_compatible_models:
-            gpt_answer_generator = OpenAIFunctionsBrainPicking(
-                model=chat_question.model,
-                chat_id=str(chat_id),
-                temperature=chat_question.temperature,
-                max_tokens=chat_question.max_tokens,
-                brain_id=str(brain_id),
-                user_openai_api_key=current_user.user_openai_api_key,  # pyright: ignore reportPrivateUsage=none
-            )
-
-        else:
-            gpt_answer_generator = OpenAIBrainPicking(
-                chat_id=str(chat_id),
-                model=chat_question.model,
-                max_tokens=chat_question.max_tokens,
-                temperature=chat_question.temperature,
-                brain_id=str(brain_id),
-                user_openai_api_key=current_user.user_openai_api_key,  # pyright: ignore reportPrivateUsage=none
-            )
+        gpt_answer_generator = OpenAIBrainPicking(
+            chat_id=str(chat_id),
+            model=chat_question.model,
+            max_tokens=chat_question.max_tokens,
+            temperature=chat_question.temperature,
+            brain_id=str(brain_id),
+            user_openai_api_key=current_user.user_openai_api_key,  # pyright: ignore reportPrivateUsage=none
+        )

        chat_answer = gpt_answer_generator.generate_answer(  # pyright: ignore reportPrivateUsage=none
            chat_question.question
@ -267,26 +244,16 @@ async def create_stream_question_handler(
        user_openai_api_key = request.headers.get("Openai-Api-Key")
        streaming = True
        check_user_limit(current_user)
-        llm_settings = LLMSettings()
-
-        if llm_settings.private:
-            gpt_answer_generator = PrivateGPT4AllBrainPicking(
-                chat_id=str(chat_id),
-                brain_id=str(brain_id),
-                user_openai_api_key=user_openai_api_key,
-                streaming=streaming,
-                model_path=llm_settings.model_path,
-            )
-        else:
-            gpt_answer_generator = OpenAIBrainPicking(
-                chat_id=str(chat_id),
-                model=chat_question.model,
-                max_tokens=chat_question.max_tokens,
-                temperature=chat_question.temperature,
-                brain_id=str(brain_id),
-                user_openai_api_key=user_openai_api_key,  # pyright: ignore reportPrivateUsage=none
-                streaming=streaming,
-            )
+        
+        gpt_answer_generator = OpenAIBrainPicking(
+            chat_id=str(chat_id),
+            model=chat_question.model,
+            max_tokens=chat_question.max_tokens,
+            temperature=chat_question.temperature,
+            brain_id=str(brain_id),
+            user_openai_api_key=user_openai_api_key,  # pyright: ignore reportPrivateUsage=none
+            streaming=streaming,
+        )

        return StreamingResponse(
            gpt_answer_generator.generate_stream(  # pyright: ignore reportPrivateUsage=none
--- a/backend/core/utils/vectors.py
+++ b/backend/core/utils/vectors.py
@ -2,8 +2,6 @@ from concurrent.futures import ThreadPoolExecutor
 from typing import List

 from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.schema import Document
-from llm.utils.summarization import llm_summerize
 from logger import get_logger
 from models.settings import BrainSettings, CommonsDep, common_dependencies
 from pydantic import BaseModel
@ -50,17 +48,6 @@ class Neurons(BaseModel):
        return summaries.data


-def create_summary(commons: CommonsDep, document_id, content, metadata):
-    logger.info(f"Summarizing document {content[:100]}")
-    summary = llm_summerize(content)
-    logger.info(f"Summary: {summary}")
-    metadata["document_id"] = document_id
-    summary_doc_with_metadata = Document(page_content=summary, metadata=metadata)
-    sids = commons["summaries_vector_store"].add_documents([summary_doc_with_metadata])
-    if sids and len(sids) > 0:
-        commons["supabase"].table("summaries").update(
-            {"document_id": document_id}
-        ).match({"id": sids[0]}).execute()


 def error_callback(exception):