feat: Update QuivrRAG and run_evaluation.py files (#2615)

# Description Please include a summary of the changes and the related issue. Please also include relevant motivation and context. ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate): --------- Co-authored-by: chloedia <chloedaems0@gmail.com>
2024-08-16 18:00:23 +03:00 · 2024-05-28 16:47:31 +02:00 · 2024-05-28 16:47:31 +02:00 · ff4ecb86e5
commit ff4ecb86e5
parent e944a621e7
12 changed files with 831 additions and 590 deletions
--- a/.gitignore
+++ b/.gitignore
@ -84,3 +84,4 @@ backend/application.log.*
 backend/score.json
 backend/modules/assistant/ito/utils/simple.pdf
 backend/modules/sync/controller/credentials.json
+backend/.env.test
--- a/Pipfile.lock
+++ b/Pipfile.lock
--- a/backend/modules/brain/integrations/Multi_Contract/Brain.py
+++ b/backend/modules/brain/integrations/Multi_Contract/Brain.py
@ -0,0 +1,206 @@
+import datetime
+from operator import itemgetter
+from typing import List
+
+from langchain.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate
+from langchain_community.chat_models import ChatLiteLLM
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
+from langchain_core.pydantic_v1 import BaseModel as BaseModelV1
+from langchain_core.pydantic_v1 import Field as FieldV1
+from langchain_core.runnables import RunnableLambda, RunnablePassthrough
+from langchain_openai import ChatOpenAI
+from logger import get_logger
+from modules.brain.knowledge_brain_qa import KnowledgeBrainQA
+
+logger = get_logger(__name__)
+
+
+class cited_answer(BaseModelV1):
+    """Answer the user question based only on the given sources, and cite the sources used."""
+
+    thoughts: str = FieldV1(
+        ...,
+        description="""Description of the thought process, based only on the given sources. 
+        Cite the text as much as possible and give the document name it appears in. In the format : 'Doc_name states : cited_text'. Be the most 
+        procedural as possible.""",
+    )
+    answer: str = FieldV1(
+        ...,
+        description="The answer to the user question, which is based only on the given sources.",
+    )
+    citations: List[int] = FieldV1(
+        ...,
+        description="The integer IDs of the SPECIFIC sources which justify the answer.",
+    )
+
+    thoughts: str = FieldV1(
+        ...,
+        description="Explain shortly what you did to find the answer and what you used by citing the sources by their name.",
+    )
+    followup_questions: List[str] = FieldV1(
+        ...,
+        description="Generate up to 3 follow-up questions that could be asked based on the answer given or context provided.",
+    )
+
+
+# First step is to create the Rephrasing Prompt
+_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language. Keep as much details as possible from previous messages. Keep entity names and all.
+
+Chat History:
+{chat_history}
+Follow Up Input: {question}
+Standalone question:"""
+CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
+
+# Next is the answering prompt
+
+template_answer = """
+Context:
+{context}
+
+User Question: {question}
+Answer:
+"""
+
+today_date = datetime.datetime.now().strftime("%B %d, %Y")
+
+system_message_template = (
+    f"Your name is Quivr. You're a helpful assistant. Today's date is {today_date}."
+)
+
+system_message_template += """
+When answering use markdown neat.
+Answer in a concise and clear manner.
+Use the following pieces of context from files provided by the user to answer the users.
+Answer in the same language as the user question.
+If you don't know the answer with the context provided from the files, just say that you don't know, don't try to make up an answer.
+Don't cite the source id in the answer objects, but you can use the source to answer the question.
+You have access to the files to answer the user question (limited to first 20 files):
+{files}
+
+If not None, User instruction to follow to answer: {custom_instructions}
+Don't cite the source id in the answer objects, but you can use the source to answer the question.
+"""
+
+
+ANSWER_PROMPT = ChatPromptTemplate.from_messages(
+    [
+        SystemMessagePromptTemplate.from_template(system_message_template),
+        HumanMessagePromptTemplate.from_template(template_answer),
+    ]
+)
+
+
+# How we format documents
+
+DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(
+    template="Source: {index} \n {page_content}"
+)
+
+
+class MultiContractBrain(KnowledgeBrainQA):
+    """
+    The MultiContract class integrates advanced conversational retrieval and language model chains
+    to provide comprehensive and context-aware responses to user queries.
+
+    It leverages a combination of document retrieval, question condensation, and document-based
+    question answering to generate responses that are informed by a wide range of knowledge sources.
+    """
+
+    def __init__(
+        self,
+        **kwargs,
+    ):
+        """
+        Initializes the MultiContract class with specific configurations.
+
+        Args:
+            **kwargs: Arbitrary keyword arguments.
+        """
+        super().__init__(
+            **kwargs,
+        )
+
+    def get_chain(self):
+
+        list_files_array = (
+            self.knowledge_qa.knowledge_service.get_all_knowledge_in_brain(
+                self.brain_id
+            )
+        )  # pyright: ignore reportPrivateUsage=none
+
+        list_files_array = [file.file_name for file in list_files_array]
+        # Max first 10 files
+        if len(list_files_array) > 20:
+            list_files_array = list_files_array[:20]
+
+        list_files = "\n".join(list_files_array) if list_files_array else "None"
+
+        retriever_doc = self.knowledge_qa.get_retriever()
+
+        loaded_memory = RunnablePassthrough.assign(
+            chat_history=RunnableLambda(
+                lambda x: self.filter_history(x["chat_history"]),
+            ),
+            question=lambda x: x["question"],
+        )
+
+        api_base = None
+        if self.brain_settings.ollama_api_base_url and self.model.startswith("ollama"):
+            api_base = self.brain_settings.ollama_api_base_url
+
+        standalone_question = {
+            "standalone_question": {
+                "question": lambda x: x["question"],
+                "chat_history": itemgetter("chat_history"),
+            }
+            | CONDENSE_QUESTION_PROMPT
+            | ChatLiteLLM(temperature=0, model=self.model, api_base=api_base)
+            | StrOutputParser(),
+        }
+
+        knowledge_qa = self.knowledge_qa
+        prompt_custom_user = knowledge_qa.prompt_to_use()
+        prompt_to_use = "None"
+        if prompt_custom_user:
+            prompt_to_use = prompt_custom_user.content
+
+        # Now we retrieve the documents
+        retrieved_documents = {
+            "docs": itemgetter("standalone_question") | retriever_doc,
+            "question": lambda x: x["standalone_question"],
+            "custom_instructions": lambda x: prompt_to_use,
+        }
+
+        final_inputs = {
+            "context": lambda x: self.knowledge_qa._combine_documents(x["docs"]),
+            "question": itemgetter("question"),
+            "custom_instructions": itemgetter("custom_instructions"),
+            "files": lambda x: list_files,
+        }
+        llm = ChatLiteLLM(
+            max_tokens=self.max_tokens,
+            model=self.model,
+            temperature=self.temperature,
+            api_base=api_base,
+        )  # pyright: ignore reportPrivateUsage=none
+        if self.model_compatible_with_function_calling(self.model):
+
+            # And finally, we do the part that returns the answers
+            llm_function = ChatOpenAI(
+                max_tokens=self.max_tokens,
+                model=self.model,
+                temperature=self.temperature,
+            )
+            llm = llm_function.bind_tools(
+                [cited_answer],
+                tool_choice="cited_answer",
+            )
+
+        answer = {
+            "answer": final_inputs | ANSWER_PROMPT | llm,
+            "docs": itemgetter("docs"),
+        }
+
+        return loaded_memory | standalone_question | retrieved_documents | answer
--- a/backend/modules/brain/integrations/Multi_Contract/init.py
+++ b/backend/modules/brain/integrations/Multi_Contract/init.py
--- a/backend/modules/brain/knowledge_brain_qa.py
+++ b/backend/modules/brain/knowledge_brain_qa.py
@ -331,7 +331,9 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
    async def generate_stream(
        self, chat_id: UUID, question: ChatQuestion, save_answer: bool = True
    ) -> AsyncIterable:
-        conversational_qa_chain = self.knowledge_qa.get_chain()
+        conversational_qa_chain = (
+            self.get_chain() if self.get_chain() else self.knowledge_qa.get_chain()
+        )
        transformed_history, streamed_chat_history = (
            self.initialize_streamed_chat_history(chat_id, question)
        )
--- a/backend/modules/brain/rags/quivr_rag.py
+++ b/backend/modules/brain/rags/quivr_rag.py
@ -24,6 +24,7 @@ from models import BrainSettings  # Importing settings related to the 'brain'
 from models.settings import get_supabase_client
 from modules.brain.service.brain_service import BrainService
 from modules.chat.service.chat_service import ChatService
+from modules.knowledge.repository.knowledges import Knowledges
 from modules.prompt.service.get_prompt_to_use import get_prompt_to_use
 from pydantic import BaseModel, ConfigDict
 from pydantic_settings import BaseSettings
@ -36,6 +37,12 @@ logger = get_logger(__name__)
 class cited_answer(BaseModelV1):
    """Answer the user question based only on the given sources, and cite the sources used."""

+    thoughts: str = FieldV1(
+        ...,
+        description="""Description of the thought process, based only on the given sources. 
+        Cite the text as much as possible and give the document name it appears in. In the format : 'Doc_name states : cited_text'. Be the most 
+        procedural as possible.""",
+    )
    answer: str = FieldV1(
        ...,
        description="The answer to the user question, which is based only on the given sources.",
@ -47,7 +54,7 @@ class cited_answer(BaseModelV1):

    thoughts: str = FieldV1(
        ...,
-        description="Explain shortly what you did to generate the answer. Explain any assumptions you made, and why you made them.",
+        description="Explain shortly what you did to find the answer and what you used by citing the sources by their name.",
    )
    followup_questions: List[str] = FieldV1(
        ...,
@ -86,6 +93,10 @@ Answer in a concise and clear manner.
 Use the following pieces of context from files provided by the user to answer the users.
 Answer in the same language as the user question.
 If you don't know the answer with the context provided from the files, just say that you don't know, don't try to make up an answer.
+Don't cite the source id in the answer objects, but you can use the source to answer the question.
+You have access to the files to answer the user question (limited to first 20 files):
+{files}
+
 If not None, User instruction to follow to answer: {custom_instructions}
 Don't cite the source id in the answer objects, but you can use the source to answer the question.
 """
@ -128,7 +139,6 @@ class QuivrRAG(BaseModel):

    # Instantiate settings
    brain_settings: BaseSettings = BrainSettings()
-
    # Default class attributes
    model: str = None  # pyright: ignore reportPrivateUsage=none
    temperature: float = 0.1
@ -137,6 +147,7 @@ class QuivrRAG(BaseModel):
    max_tokens: int = 2000  # Output length
    max_input: int = 2000
    streaming: bool = False
+    knowledge_service: Knowledges = None

    @property
    def embeddings(self):
@ -205,6 +216,7 @@ class QuivrRAG(BaseModel):
        self.brain_id = brain_id
        self.chat_id = chat_id
        self.streaming = streaming
+        self.knowledge_service = Knowledges()

    def _create_supabase_client(self) -> Client:
        return get_supabase_client()
@ -235,7 +247,9 @@ class QuivrRAG(BaseModel):

        api_base = None
        if self.brain_settings.ollama_api_base_url and model.startswith("ollama"):
-            api_base = self.brain_settings.ollama_api_base_url
+            api_base = (
+                self.brain_settings.ollama_api_base_url  # pyright: ignore reportPrivateUsage=none
+            )

        return ChatLiteLLM(
            temperature=temperature,
@ -245,7 +259,7 @@ class QuivrRAG(BaseModel):
            verbose=False,
            callbacks=callbacks,
            api_base=api_base,
-        )
+        )  # pyright: ignore reportPrivateUsage=none

    def _combine_documents(
        self, docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
@ -294,11 +308,23 @@ class QuivrRAG(BaseModel):
        return chat_history

    def get_chain(self):
+
+        list_files_array = self.knowledge_service.get_all_knowledge_in_brain(
+            self.brain_id
+        )  # pyright: ignore reportPrivateUsage=none
+
+        list_files_array = [file.file_name for file in list_files_array]
+        # Max first 10 files
+        if len(list_files_array) > 20:
+            list_files_array = list_files_array[:20]
+
+        list_files = "\n".join(list_files_array) if list_files_array else "None"
+
        compressor = None
        if os.getenv("COHERE_API_KEY"):
-            compressor = CohereRerank(top_n=10)
+            compressor = CohereRerank(top_n=20)
        else:
-            compressor = FlashrankRerank(model="ms-marco-TinyBERT-L-2-v2", top_n=10)
+            compressor = FlashrankRerank(model="ms-marco-TinyBERT-L-2-v2", top_n=20)

        retriever_doc = self.get_retriever()
        compression_retriever = ContextualCompressionRetriever(
@ -342,6 +368,7 @@ class QuivrRAG(BaseModel):
            "context": lambda x: self._combine_documents(x["docs"]),
            "question": itemgetter("question"),
            "custom_instructions": itemgetter("custom_instructions"),
+            "files": lambda x: list_files,
        }
        llm = ChatLiteLLM(
            max_tokens=self.max_tokens,
--- a/backend/modules/chat/controller/chat/brainful_chat.py
+++ b/backend/modules/chat/controller/chat/brainful_chat.py
@ -1,8 +1,8 @@
 from logger import get_logger
-from modules.brain.api_brain_qa import APIBrainQA
 from modules.brain.entity.brain_entity import BrainType, RoleEnum
 from modules.brain.integrations.Big.Brain import BigBrain
 from modules.brain.integrations.GPT4.Brain import GPT4Brain
+from modules.brain.integrations.Multi_Contract.Brain import MultiContractBrain
 from modules.brain.integrations.Notion.Brain import NotionBrain
 from modules.brain.integrations.Proxy.Brain import ProxyBrain
 from modules.brain.integrations.Self.Brain import SelfBrain
@ -47,6 +47,7 @@ integration_list = {
    "doc": KnowledgeBrainQA,
    "proxy": ProxyBrain,
    "self": SelfBrain,
+    "multi-contract": MultiContractBrain,
 }

 brain_service = BrainService()
@ -82,23 +83,6 @@ class BrainfulChat(ChatInterface):
                user_email=user_email,
            )

-        if brain.brain_type == BrainType.API:
-            brain_definition = api_brain_definition_service.get_api_brain_definition(
-                brain.brain_id
-            )
-            return APIBrainQA(
-                chat_id=chat_id,
-                temperature=temperature,
-                brain_id=str(brain.brain_id),
-                streaming=streaming,
-                prompt_id=prompt_id,
-                user_id=user_id,
-                raw=(brain_definition.raw if brain_definition else None),
-                jq_instructions=(
-                    brain_definition.jq_instructions if brain_definition else None
-                ),
-                user_email=user_email,
-            )
        if brain.brain_type == BrainType.INTEGRATION:
            integration_brain = integration_brain_description_service.get_integration_description_by_user_brain_id(
                brain.brain_id, user_id
--- a/backend/packages/files/parsers/common.py
+++ b/backend/packages/files/parsers/common.py
@ -45,7 +45,9 @@ async def process_file(

            parser = LlamaParse(
                result_type="markdown",  # "markdown" and "text" are available
-                parsing_instruction="Try to extract the tables and checkboxes. Transform tables to key = value. You can duplicates Keys if needed. For example: Productions Fonts = 300 productions Fonts Company Desktop License = Yes for Maximum of 60 Licensed Desktop users For example checkboxes should be: Premium Activated = Yes License Premier = No If a checkbox is present for a table with multiple options.  Say Yes for the one activated and no for the one not activated",
+                parsing_instruction="Extract the tables and checkboxes. Transform tables to key = value. You can duplicates Keys if needed. For example: Productions Fonts = 300 productions Fonts Company Desktop License = Yes for Maximum of 60 Licensed Desktop users For example checkboxes should be: Premium Activated = Yes License Premier = No If a checkbox is present for a table with multiple options.  Say Yes for the one activated and no for the one not activated.Format using headers.",
+                gpt4o_mode=True,
+                gpt4o_api_key=os.getenv("OPENAI_API_KEY"),
            )

            document_llama_parsed = parser.load_data(document_tmp.name)
@ -79,7 +81,9 @@ async def process_file(

    if file.documents is not None:
        logger.info("Coming here?")
-        for doc in file.documents:  # pyright: ignore reportPrivateUsage=none
+        for index, doc in enumerate(
+            file.documents, start=1
+        ):  # pyright: ignore reportPrivateUsage=none
            new_metadata = metadata.copy()
            logger.info(f"Processing document {doc}")
            # Add filename at beginning of page content
@ -95,6 +99,7 @@ async def process_file(
            )

            new_metadata["chunk_size"] = len_chunk
+            new_metadata["index"] = index
            doc_with_metadata = DocumentSerializable(
                page_content=doc.page_content, metadata=new_metadata
            )
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -4,9 +4,9 @@
 aiohttp==3.9.5; python_version >= '3.8'
 aiosignal==1.3.1; python_version >= '3.7'
 amqp==5.2.0; python_version >= '3.6'
-annotated-types==0.6.0; python_version >= '3.8'
+annotated-types==0.7.0; python_version >= '3.8'
 antlr4-python3-runtime==4.9.3
-anyio==4.3.0; python_version >= '3.8'
+anyio==4.4.0; python_version >= '3.8'
 appdirs==1.4.4
 astor==0.8.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
 async-generator==1.10; python_version >= '3.5'
@ -17,8 +17,8 @@ backoff==2.2.1; python_version >= '3.7' and python_version < '4.0'
 beautifulsoup4==4.12.3; python_full_version >= '3.6.0'
 billiard==4.2.0; python_version >= '3.7'
 black==24.4.2; python_version >= '3.8'
-boto3==1.34.107; python_version >= '3.8'
-botocore==1.34.107; python_version >= '3.8'
+boto3==1.34.113; python_version >= '3.8'
+botocore==1.34.113; python_version >= '3.8'
 cachetools==5.3.3; python_version >= '3.7'
 celery[redis,sqs]==5.4.0; python_version >= '3.8'
 certifi==2024.2.2; python_version >= '3.6'
@ -29,7 +29,7 @@ click==8.1.7; python_version >= '3.7'
 click-didyoumean==0.3.1; python_full_version >= '3.6.2'
 click-plugins==1.1.1
 click-repl==0.3.0; python_version >= '3.6'
-cohere==5.5.0; python_version >= '3.8' and python_version < '4.0'
+cohere==5.5.3; python_version >= '3.8' and python_version < '4.0'
 coloredlogs==15.0.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
 colorlog==6.8.2; python_version >= '3.6'
 contourpy==1.2.1; python_version >= '3.9'
@ -49,18 +49,18 @@ dirtyjson==1.0.8
 diskcache==5.6.3; python_version >= '3'
 distro==1.9.0; python_version >= '3.6'
 dnspython==2.6.1; python_version >= '3.8'
-docker==7.0.0; python_version >= '3.8'
+docker==7.1.0; python_version >= '3.8'
 docx2txt==0.8
-duckdb==0.10.2; python_full_version >= '3.7.0'
-duckduckgo-search==6.1.0; python_version >= '3.8'
+duckdb==0.10.3; python_full_version >= '3.7.0'
+duckduckgo-search==6.1.1; python_version >= '3.8'
 ecdsa==0.19.0; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
 effdet==0.4.1
 email-validator==2.1.1; python_version >= '3.8'
-emoji==2.11.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
+emoji==2.12.1; python_version >= '3.7'
 et-xmlfile==1.1.0; python_version >= '3.6'
 faker==19.13.0; python_version >= '3.8'
 fastapi==0.111.0; python_version >= '3.8'
-fastapi-cli==0.0.3; python_version >= '3.8'
+fastapi-cli==0.0.4; python_version >= '3.8'
 fastavro==1.9.4; python_version >= '3.8'
 feedfinder2==0.0.4
 feedparser==6.0.11; python_version >= '3.6'
@ -71,14 +71,14 @@ flake8-black==0.3.6; python_version >= '3.7'
 flashrank==0.2.5; python_version >= '3.6'
 flatbuffers==24.3.25
 flower==2.0.1; python_version >= '3.7'
-fonttools==4.51.0; python_version >= '3.8'
-fpdf2==2.7.8; python_version >= '3.7'
+fonttools==4.52.1; python_version >= '3.8'
+fpdf2==2.7.9; python_version >= '3.7'
 frozenlist==1.4.1; python_version >= '3.8'
 fsspec[http]==2024.3.1; python_version >= '3.8'
 gitdb==4.0.11; python_version >= '3.7'
 gitpython==3.1.43; python_version >= '3.7'
 google-api-core[grpc]==2.19.0; python_version >= '3.7'
-google-api-python-client==2.129.0; python_version >= '3.7'
+google-api-python-client==2.130.0; python_version >= '3.7'
 google-auth==2.29.0; python_version >= '3.7'
 google-auth-httplib2==0.2.0
 google-auth-oauthlib==1.2.0; python_version >= '3.6'
@ -86,7 +86,7 @@ google-cloud-vision==3.7.2
 googleapis-common-protos==1.63.0; python_version >= '3.7'
 gotrue==2.4.2; python_version >= '3.8' and python_version < '4.0'
 greenlet==3.0.3; python_version >= '3.7'
-grpcio==1.63.0
+grpcio==1.64.0
 grpcio-status==1.62.2
 h11==0.14.0; python_version >= '3.7'
 html5lib==1.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
@ -95,7 +95,7 @@ httplib2==0.22.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3
 httptools==0.6.1
 httpx==0.27.0; python_version >= '3.8'
 httpx-sse==0.4.0; python_version >= '3.8'
-huggingface-hub==0.23.0; python_full_version >= '3.8.0'
+huggingface-hub==0.23.2; python_full_version >= '3.8.0'
 humanfriendly==10.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
 humanize==4.9.0; python_version >= '3.8'
 idna==3.7; python_version >= '3.5'
@ -112,31 +112,31 @@ jsonpath-python==1.0.6; python_version >= '3.6'
 jsonpointer==2.4; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
 kiwisolver==1.4.5; python_version >= '3.7'
 kombu[sqs]==5.3.7; python_version >= '3.8'
-langchain==0.1.20; python_version < '4.0' and python_full_version >= '3.8.1'
+langchain==0.2.1; python_version < '4.0' and python_full_version >= '3.8.1'
 langchain-cohere==0.1.5; python_version < '4.0' and python_full_version >= '3.8.1'
-langchain-community==0.0.38; python_version < '4.0' and python_full_version >= '3.8.1'
-langchain-core==0.1.52; python_version < '4.0' and python_full_version >= '3.8.1'
+langchain-community==0.2.1; python_version < '4.0' and python_full_version >= '3.8.1'
+langchain-core==0.2.1; python_version < '4.0' and python_full_version >= '3.8.1'
 langchain-openai==0.1.7; python_version < '4.0' and python_full_version >= '3.8.1'
-langchain-text-splitters==0.0.2; python_version < '4.0' and python_full_version >= '3.8.1'
+langchain-text-splitters==0.2.0; python_version < '4.0' and python_full_version >= '3.8.1'
 langdetect==1.0.9
-langfuse==2.32.0; python_version < '4.0' and python_full_version >= '3.8.1'
-langgraph==0.0.49; python_version < '4.0' and python_full_version >= '3.9.0'
-langsmith==0.1.59; python_version < '4.0' and python_full_version >= '3.8.1'
+langfuse==2.33.0; python_version < '4.0' and python_full_version >= '3.8.1'
+langgraph==0.0.55; python_version < '4.0' and python_full_version >= '3.9.0'
+langsmith==0.1.63; python_version < '4.0' and python_full_version >= '3.8.1'
 layoutparser[layoutmodels,tesseract]==0.3.4; python_version >= '3.6'
-litellm==1.37.13; python_version not in '2.7, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7' and python_version >= '3.8'
+litellm==1.38.10; python_version not in '2.7, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7' and python_version >= '3.8'
 llama-cpp-python==0.2.67; python_version >= '3.8'
-llama-index==0.10.37; python_version < '4.0' and python_full_version >= '3.8.1'
+llama-index==0.10.39; python_version < '4.0' and python_full_version >= '3.8.1'
 llama-index-agent-openai==0.2.5; python_version < '4.0' and python_full_version >= '3.8.1'
 llama-index-cli==0.1.12; python_version < '4.0' and python_full_version >= '3.8.1'
-llama-index-core==0.10.36; python_version < '4.0' and python_full_version >= '3.8.1'
-llama-index-embeddings-openai==0.1.9; python_version < '4.0' and python_full_version >= '3.8.1'
+llama-index-core==0.10.39.post1; python_version < '4.0' and python_full_version >= '3.8.1'
+llama-index-embeddings-openai==0.1.10; python_version < '4.0' and python_full_version >= '3.8.1'
 llama-index-indices-managed-llama-cloud==0.1.6; python_version < '4.0' and python_full_version >= '3.8.1'
 llama-index-legacy==0.9.48; python_version < '4.0' and python_full_version >= '3.8.1'
-llama-index-llms-openai==0.1.19; python_version < '4.0' and python_full_version >= '3.8.1'
+llama-index-llms-openai==0.1.21; python_version < '4.0' and python_full_version >= '3.8.1'
 llama-index-multi-modal-llms-openai==0.1.6; python_version < '4.0' and python_full_version >= '3.8.1'
 llama-index-program-openai==0.1.6; python_version < '4.0' and python_full_version >= '3.8.1'
 llama-index-question-gen-openai==0.1.3; python_version < '4.0' and python_full_version >= '3.8.1'
-llama-index-readers-file==0.1.22; python_version < '4.0' and python_full_version >= '3.8.1'
+llama-index-readers-file==0.1.23; python_version < '4.0' and python_full_version >= '3.8.1'
 llama-index-readers-llama-parse==0.1.4; python_version < '4.0' and python_full_version >= '3.8.1'
 llama-parse==0.4.3; python_version < '4.0' and python_full_version >= '3.8.1'
 llamaindex-py-client==0.1.19; python_version >= '3.8' and python_version < '4'
@ -161,20 +161,20 @@ networkx==3.3
 newspaper3k==0.2.8
 nltk==3.8.1; python_version >= '3.7'
 nodeenv==1.8.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
-numpy==1.26.4; python_version >= '3.9'
+numpy==1.26.4; python_version >= '3.10'
 oauthlib==3.2.2; python_version >= '3.6'
 olefile==0.47; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
 omegaconf==2.3.0; python_version >= '3.6'
-onnx==1.16.0
-onnxruntime==1.17.3
-openai==1.30.1; python_full_version >= '3.7.1'
+onnx==1.16.1
+onnxruntime==1.18.0
+openai==1.30.3; python_full_version >= '3.7.1'
 opencv-python==4.9.0.80; python_version >= '3.6'
 openpyxl==3.1.2
 ordered-set==4.1.0; python_version >= '3.7'
 orjson==3.10.3; python_version >= '3.8'
 packaging==23.2; python_version >= '3.7'
 pandas==1.5.3; python_version >= '3.8'
-pandasai==2.0.42; python_version not in '2.7, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8' and python_version >= '3.9'
+pandasai==2.0.43; python_version not in '2.7, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8' and python_version >= '3.9'
 pathspec==0.12.1; python_version >= '3.8'
 pdf2image==1.17.0
 pdfminer.six==20231228
@ -183,7 +183,7 @@ pikepdf==8.15.1
 pillow==10.3.0; python_version >= '3.8'
 pillow-heif==0.16.0
 platformdirs==4.2.2; python_version >= '3.8'
-playwright==1.43.0; python_version >= '3.8'
+playwright==1.44.0; python_version >= '3.8'
 pluggy==1.5.0; python_version >= '3.8'
 portalocker==2.8.2; python_version >= '3.8'
 postgrest==0.16.4; python_version >= '3.8' and python_version < '4.0'
@ -217,10 +217,10 @@ pyparsing==3.1.2; python_version >= '3.1'
 pypdf==4.2.0; python_version >= '3.6'
 pypdfium2==4.30.0; python_version >= '3.6'
 pyreqwest-impersonate==0.4.5; python_version >= '3.8'
-pyright==1.1.363; python_version >= '3.7'
+pyright==1.1.364; python_version >= '3.7'
 pysbd==0.3.4; python_version >= '3'
 pytesseract==0.3.10; python_version >= '3.7'
-pytest==8.2.0; python_version >= '3.8'
+pytest==8.2.1; python_version >= '3.8'
 pytest-celery==1.0.0; python_version >= '3.8' and python_version < '4.0'
 pytest-docker-tools==3.1.3; python_full_version >= '3.7.0' and python_full_version < '4.0.0'
 pytest-dotenv==0.5.2
@ -235,23 +235,23 @@ python-multipart==0.0.9; python_version >= '3.8'
 python-pptx==0.6.23
 pytz==2024.1
 pyyaml==6.0.1; python_version >= '3.6'
-ragas==0.1.7
-rapidfuzz==3.9.0; python_version >= '3.8'
+ragas==0.1.8
+rapidfuzz==3.9.1; python_version >= '3.8'
 realtime==1.0.4; python_version >= '3.8' and python_version < '4.0'
 redis==5.0.4; python_version >= '3.7'
 regex==2024.5.15; python_version >= '3.8'
-requests==2.31.0; python_version >= '3.7'
-requests-file==2.0.0
+requests==2.32.2; python_version >= '3.8'
+requests-file==2.1.0
 requests-oauthlib==2.0.0; python_version >= '3.4'
-resend==1.0.2; python_version >= '3.7'
+resend==1.2.0; python_version >= '3.7'
 retry==0.9.2
 rich==13.7.1; python_full_version >= '3.7.0'
 rsa==4.9; python_version >= '3.6' and python_version < '4'
 s3transfer==0.10.1; python_version >= '3.8'
 safetensors==0.4.3; python_version >= '3.7'
-scipy==1.13.0; python_version >= '3.9'
-sentry-sdk[fastapi]==2.2.0; python_version >= '3.6'
-setuptools==69.5.1; python_version >= '3.8'
+scipy==1.13.1; python_version >= '3.9'
+sentry-sdk[fastapi]==2.3.1; python_version >= '3.6'
+setuptools==70.0.0; python_version >= '3.8'
 sgmllib3k==1.0.0
 shellingham==1.5.4; python_version >= '3.7'
 six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
@ -263,7 +263,7 @@ starlette==0.37.2; python_version >= '3.8'
 storage3==0.7.4; python_version >= '3.8' and python_version < '4.0'
 strenum==0.4.15
 striprtf==0.0.26
-supabase==2.4.5; python_version >= '3.8' and python_version < '4.0'
+supabase==2.4.6; python_version >= '3.8' and python_version < '4.0'
 supafunc==0.4.5; python_version >= '3.8' and python_version < '4.0'
 sympy==1.12; python_version >= '3.8'
 tabulate==0.9.0; python_version >= '3.7'
@ -274,28 +274,29 @@ timm==1.0.3; python_version >= '3.8'
 tinysegmenter==0.3
 tldextract==5.1.2; python_version >= '3.8'
 tokenizers==0.19.1; python_version >= '3.7'
-torch==2.3.0
-torchvision==0.18.0
+torch==2.3.0; python_full_version >= '3.8.0'
+torchvision==0.18.0; python_version >= '3.8'
 tornado==6.4; python_version >= '3.8'
 tqdm==4.66.4; python_version >= '3.7'
-transformers==4.40.2; python_full_version >= '3.8.0'
+transformers==4.41.1; python_full_version >= '3.8.0'
 typer==0.12.3; python_version >= '3.7'
-types-requests==2.31.0.20240406; python_version >= '3.8'
-typing-extensions==4.11.0; python_version >= '3.8'
+types-requests==2.32.0.20240523; python_version >= '3.8'
+typing-extensions==4.12.0; python_version >= '3.8'
 typing-inspect==0.9.0
 tzdata==2024.1; python_version >= '2'
 ujson==5.10.0; python_version >= '3.8'
 unidecode==1.3.8; python_version >= '3.5'
-unstructured[all-docs]==0.13.7; python_version < '3.12' and python_full_version >= '3.9.0'
+unstructured[all-docs]==0.14.2; python_version < '3.13' and python_full_version >= '3.9.0'
 unstructured-client==0.22.0; python_version >= '3.8'
-unstructured-inference==0.7.31
+unstructured-inference==0.7.33
 unstructured.pytesseract==0.3.12
 uritemplate==4.1.1; python_version >= '3.6'
 urllib3==2.2.1; python_version >= '3.8'
+uuid6==2024.1.12; python_version >= '3.8'
 uvicorn[standard]==0.29.0; python_version >= '3.8'
 uvloop==0.19.0
 vine==5.1.0; python_version >= '3.6'
-watchdog==4.0.0; python_version >= '3.8'
+watchdog==4.0.1; python_version >= '3.8'
 watchfiles==0.21.0
 wcwidth==0.2.13
 webencodings==0.5.1
@ -305,4 +306,4 @@ xlrd==2.0.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3
 xlsxwriter==3.2.0; python_version >= '3.6'
 xxhash==3.4.1; python_version >= '3.7'
 yarl==1.9.4; python_version >= '3.7'
-zipp==3.18.2; python_version >= '3.8'
+zipp==3.19.0; python_version >= '3.8'
--- a/backend/tests/ragas_evaluation/run_evaluation.py
+++ b/backend/tests/ragas_evaluation/run_evaluation.py
@ -1,5 +1,4 @@
 import argparse
-import json
 import os
 import sys

@ -8,9 +7,10 @@ from dotenv import load_dotenv
 # Add the current directory to the Python path
 sys.path.append(os.getcwd())
 # Load environment variables from .env file
-load_dotenv(verbose=True, override=True)
+load_dotenv(verbose=True, override=True, dotenv_path=".env.test")

 import glob
+import json
 import uuid

 import pandas as pd
@ -62,6 +62,7 @@ def main(
        max_input=context_size,
        max_tokens=1000,
    )
+
    brain_chain = knowledge_qa.get_chain()

    # run langchain RAG
@ -79,7 +80,6 @@ def main(

    score.to_json(output_folder + "/score.json", orient="records")
    for metric in metrics:
-        print(f"{metric} scores: {score[metric]}")
        print(f"{metric} mean score: {score[metric].mean()}")
        print(f"{metric} median score: {score[metric].median()}")
    # Cleanup if a new brain was created
@ -142,6 +142,7 @@ def generate_replies(
    contexts = []
    test_questions = test_data.question.tolist()
    test_groundtruths = test_data.ground_truth.tolist()
+    thoughts = []

    for question in test_questions:
        response = brain_chain.invoke({"question": question, "chat_history": []})
@ -149,16 +150,15 @@ def generate_replies(
            "function"
        ]["arguments"]
        cited_answer_obj = json.loads(cited_answer_data)
-        print(f"Answer: {cited_answer_obj['answer']}")
        answers.append(cited_answer_obj["answer"])
-        print(f"Context: {cited_answer_obj}")
-        print(response)
+        thoughts.append(cited_answer_obj["thoughts"])
        contexts.append([context.page_content for context in response["docs"]])

    return Dataset.from_dict(
        {
            "question": test_questions,
            "answer": answers,
+            "thoughs" : thoughts,
            "contexts": contexts,
            "ground_truth": test_groundtruths,
        }
@ -181,9 +181,7 @@ if __name__ == "__main__":
    parser.add_argument(
        "--testset_path", type=str, required=True, help="Path to the testset JSON file"
    )
-    parser.add_argument(
-        "--model", type=str, default="gpt-3.5-turbo-0125", help="Model to use"
-    )
+    parser.add_argument("--model", type=str, default="gpt-4o", help="Model to use")
    parser.add_argument(
        "--context_size", type=int, default=10000, help="Context size for the model"
    )
--- a/backend/tests/ragas_tests/experiment.json
+++ b/backend/tests/ragas_tests/experiment.json
@ -2,12 +2,12 @@
    {
        "question": "When is the current contract with Contoso ending?",
        "evolution_type": "simple",
-        "ground_truth": "12/31/2028"
+        "ground_truth": "The current contract with Contoso is ending on December 31, 2028."
    },
    {
        "question": "What's the maximum Production Fonts Contoso have?",
        "evolution_type": "simple",
-        "ground_truth": "Unlimited"
+        "ground_truth": "Contoso has an unlimited Production Fonts licensed under the Monotype Fonts License Agreement."
    },
    {
        "question": "How many contracts we have with Contoso?",
@ -17,22 +17,17 @@
    {
        "question": "Have we got any change in number of production fonts by addendum in previous contracts, before the current contract?",
        "evolution_type": "simple",
-        "ground_truth": "Yes"
-    },
-    {
-        "question": "For Contoso, can you mention the following based on all the contracts available: Contract Number, contract Start Date, contract End date. The result should be in tabular format with these 3 columns.",
-        "evolution_type": "simple",
-        "ground_truth": ""
+        "ground_truth": "Yes, there have been changes in the number of production fonts in previous contracts. For example, in the Addendum No. 1 to the Design and Deploy License Agreement, the Permitted Usage per Term table"
    },
    {
        "question": "What's the number of production fonts for previous contract before current contract with Contoso?",
        "evolution_type": "simple",
-        "ground_truth": "140"
+        "ground_truth": "The number of production fonts for the contract with Contoso that preceded the current contract was 140."
    },
    {
        "question": "What will be the next renewal date for Contoso?",
        "evolution_type": "simple",
-        "ground_truth": "12/31/2028"
+        "ground_truth": "The next renewal date for Contoso will be December 31, 2028,"
    },
    {
        "question": "How many Software License Order Form contract we ever had with Contoso?",
@ -52,7 +47,7 @@
    {
        "question": "What's the Licensed Page Views in the latest contract with Contoso?",
        "evolution_type": "simple",
-        "ground_truth": "Unlimited"
+        "ground_truth": "The latest contract with Contoso specifies that the Licensed Page Views are unlimited"
    },
    {
        "question": "What's the Licensed Impressions (Digital Marketing Communications) in the latest contract with Contoso?",
--- a/backend/vectorstore/supabase.py
+++ b/backend/vectorstore/supabase.py
@ -97,4 +97,12 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
            if search.get("content")
        ]

-        return match_result
+        sorted_match_result_by_file_name_metadata = sorted(
+            match_result,
+            key=lambda x: (
+                x.metadata.get("file_name", ""),
+                x.metadata.get("index", float("inf")),
+            ),
+        )
+
+        return sorted_match_result_by_file_name_metadata