feat: tests get default brain (#593)

* feat: tests get default brain * feat: chains docs * feat: brains docs * fix: remove brain_id creation from fe * fix: docs categories
2024-12-15 09:32:22 +03:00 · 2023-07-11 11:09:56 +01:00 · 2023-07-11 11:09:56 +01:00 · cf37666f02
commit cf37666f02
parent 72924b52bc
7 changed files with 139 additions and 24 deletions
--- a/backend/models/brains.py
+++ b/backend/models/brains.py
@ -2,11 +2,13 @@ import os
 from typing import Any, List, Optional
 from uuid import UUID

+from logger import get_logger
+from models.settings import CommonsDep, common_dependencies
+from models.users import User
 from pydantic import BaseModel
 from utils.vectors import get_unique_files_from_vector_ids

-from models.settings import CommonsDep, common_dependencies
-from models.users import User
+logger = get_logger(__name__)


 class Brain(BaseModel):
@ -16,7 +18,6 @@ class Brain(BaseModel):
    model: Optional[str] = "gpt-3.5-turbo-0613"
    temperature: Optional[float] = 0.0
    max_tokens: Optional[int] = 256
-    brain_size: Optional[float] = 0.0  # pyright: ignore reportPrivateUsage=none
    max_brain_size: Optional[int] = int(os.getenv("MAX_BRAIN_SIZE", 0))
    files: List[Any] = []
    _commons: Optional[CommonsDep] = None
@ -35,7 +36,6 @@ class Brain(BaseModel):
        self.get_unique_brain_files()
        current_brain_size = sum(float(doc["size"]) for doc in self.files)

-        print("current_brain_size", current_brain_size)
        return current_brain_size

    @property
@ -86,8 +86,6 @@ class Brain(BaseModel):
        return response.data

    def delete_brain(self, user_id):
-        print("user_id", user_id)
-        print("self.id", self.id)
        results = (
            self.commons["supabase"]
            .table("brains_users")
@ -96,7 +94,6 @@ class Brain(BaseModel):
            .execute()
        )
        if len(results.data) == 0:
-            print("You are not the owner of this brain.")
            return {"message": "You are not the owner of this brain."}
        else:
            results = (
@ -106,7 +103,6 @@ class Brain(BaseModel):
                .match({"brain_id": self.id})
                .execute()
            )
-            print("results", results)

            results = (
                self.commons["supabase"]
@ -115,7 +111,6 @@ class Brain(BaseModel):
                .match({"brain_id": self.id})
                .execute()
            )
-            print("results", results)

            results = (
                self.commons["supabase"]
@ -124,14 +119,12 @@ class Brain(BaseModel):
                .match({"brain_id": self.id})
                .execute()
            )
-            print("results", results)

    def create_brain(self):
        commons = common_dependencies()
        response = (
            commons["supabase"].table("brains").insert({"name": self.name}).execute()
        )
-        # set the brainId with response.data

        self.id = response.data[0]["brain_id"]
        return response.data
@ -206,13 +199,10 @@ class Brain(BaseModel):

        vector_ids = [item["vector_id"] for item in response.data]

-        print("vector_ids", vector_ids)
-
        if len(vector_ids) == 0:
            return []

        self.files = get_unique_files_from_vector_ids(vector_ids)
-        print("unique_files", self.files)

        return self.files

@ -258,19 +248,17 @@ def get_default_user_brain(user: User):
    commons = common_dependencies()
    response = (
        commons["supabase"]
-        .from_("brains_users")  # I'm assuming this is the correct table
+        .from_("brains_users")
        .select("brain_id")
        .filter("user_id", "eq", user.id)
-        .filter(
-            "default_brain", "eq", True
-        )  # Assuming 'default' is the correct column name
+        .filter("default_brain", "eq", True)
        .execute()
    )

-    print("Default brain response:", response.data)
+    logger.info("Default brain response:", response.data)
    default_brain_id = response.data[0]["brain_id"] if response.data else None

-    print(f"Default brain id: {default_brain_id}")
+    logger.info(f"Default brain id: {default_brain_id}")

    if default_brain_id:
        brain_response = (
--- a/backend/test_main.py
+++ b/backend/test_main.py
@ -45,8 +45,9 @@ def test_create_and_delete_api_key():
    assert verify_response.status_code == 200

    # Now, let's delete the API key
-    # Assuming the key_id is part of the api_key_info response. If not, adjust this.
+    assert "key_id" in api_key_info
    key_id = api_key_info["key_id"]
+
    delete_response = client.delete(
        f"/api-key/{key_id}", headers={"Authorization": f"Bearer {API_KEY}"}
    )
@ -71,8 +72,6 @@ def test_retrieve_default_brain():


 def test_create_brain():
-    # Generate a random UUID for brain_id
-    random_brain_id = str(uuid.uuid4())

    # Generate a random name for the brain
    random_brain_name = "".join(
@ -81,7 +80,6 @@ def test_create_brain():

    # Set up the request payload
    payload = {
-        "brain_id": random_brain_id,
        "name": random_brain_name,
        "status": "public",
        "model": "gpt-3.5-turbo-0613",
@ -153,6 +151,59 @@ def test_delete_all_brains():
        assert delete_response.status_code == 200


+def test_delete_all_brains_and_get_default_brain():
+    # First create a new brain
+    test_create_brain()
+
+    # Now, retrieve all brains for the current user
+    response = client.get(
+        "/brains/",
+        headers={"Authorization": "Bearer " + API_KEY},
+    )
+
+    # Assert that the response status code is 200 (HTTP OK)
+    assert response.status_code == 200
+    assert len(response.json()["brains"]) > 0
+
+    test_delete_all_brains()
+
+    # Now, retrieve all brains for the current user
+    response = client.get(
+        "/brains/",
+        headers={"Authorization": "Bearer " + API_KEY},
+    )
+
+    # Assert that the response status code is 200 (HTTP OK)
+    assert response.status_code == 200
+    assert len(response.json()["brains"]) == 0
+
+    # Get the default brain, it should create one if it doesn't exist
+    response = client.get(
+        "/brains/default/",
+        headers={"Authorization": "Bearer " + API_KEY},
+    )
+
+    # Assert that the response status code is 200 (HTTP OK)
+    assert response.status_code == 200
+    assert response.json()["name"] == "Default brain"
+
+    # Now, retrieve all brains for the current user
+    response = client.get(
+        "/brains/",
+        headers={"Authorization": "Bearer " + API_KEY},
+    )
+
+    # Assert that there is only one brain
+    response_data = response.json()
+    assert len(response_data) == 1
+    for brain in response_data["brains"]:
+        assert "id" in brain
+        assert "name" in brain
+
+    # Assert that the brain is the default brain
+    assert response_data["brains"][0]["name"] == "Default brain"
+
+
 def test_get_all_chats():
    # Making a GET request to the /chat endpoint to retrieve all chats
    response = client.get(
--- a/docs/docs/backend/brains/_category_.json
+++ b/docs/docs/backend/brains/_category_.json
@ -0,0 +1,9 @@
+{
+  "label": "Brains",
+  "position": 3,
+  "link": {
+    "type": "generated-index",
+    "description": "What are brains?"
+  }
+}
+
--- a/docs/docs/backend/brains/brains.md
+++ b/docs/docs/backend/brains/brains.md
@ -0,0 +1,27 @@
+---
+sidebar_position: 1
+---
+
+# Introduction to Brains
+
+Quivr has a concept of "Brains". They are ring fenced bodies of information that can be used to provide context to Large Language Models (LLMs) to answer questions on a particular topic.
+
+LLMs are trained on a large variety of data but to answer a question on a specific topic or to be used to make deductions around a specific topic, they need to be supplied with the context of that topic.
+
+Quivr uses brains as an intuitive way to provide that context.
+
+When a brain is selected in Quivr, the LLM will be provided with only the context of that brain. This allows users to build brains for specific topics and then use them to answer questions about that topic.
+
+In the future there will be the functionality to share brains with other users of Quivr.
+
+## How to use Brains
+
+To use a brain, simply select the menu from using the Brain icon in the header at the top right of the Quivr interface.
+
+You can create a new brain by clicking the "Create Brain" button. You will be prompted to enter a name for the brain. If you wish you can also just use the default brain for your account.
+
+To switch to a different brain, simply click on the brain name in the menu and select the brain you wish to use.
+
+If you have not chosen a brain, you can assume that any documentation you upload will be added to the default brain.
+
+**Note: If you are having problems with the chat functionality, try selecting a brain from the menu. The default brain is not always selected automatically and you will need a brain selected to use the chat functionality.**
--- a/docs/docs/backend/chains/_category_.json
+++ b/docs/docs/backend/chains/_category_.json
@ -0,0 +1,9 @@
+{
+  "label": "Chains",
+  "position": 4,
+  "link": {
+    "type": "generated-index",
+    "description": "What are chains?"
+  }
+}
+
--- a/docs/docs/backend/chains/open_ai_functions_tree.jpg
+++ b/docs/docs/backend/chains/open_ai_functions_tree.jpg
--- a/docs/docs/backend/chains/qa.md
+++ b/docs/docs/backend/chains/qa.md
@ -0,0 +1,31 @@
+---
+sidebar_position: 1
+---
+
+# Introduction to Chains
+
+Quivr uses a framework called [Langchain](https://python.langchain.com/docs/get_started/introduction.html) for the majority of the interaction with the Large Language Models (LLMs).
+
+Langchain provides functionality to connect multiple components such as LLMs, document retrievers, and other components together to form a "chain" of components.
+
+They define a Chain very generically as a sequence of calls to components, which can include other chains. For example, a chain could be a sequence of calls to a document retriever, followed by a call to an LLM, followed by a call to a summarizer.
+
+## Conversational Retrieval Chains
+
+In Quivr we make use of the Conversational Retrieval Chain. These chains take in chat history and new questions and return an answer to the question. The algorithm for Conversational Retrieval Chains consists of three parts:
+
+1. Creating a standalone question: The chat history and new question are combined to create a standalone question. This is done to ensure that relevant context is included in the retrieval step without unnecessary information from the whole conversation.
+
+2. Retrieving relevant documents: The standalone question is passed to a retriever, which fetches relevant documents.
+
+3. Generating a final response: The retrieved documents are passed to a language model (LLM) along with either the new question or the original question and chat history. The LLM generates a final response based on this input.
+
+## OpenAI Functions
+
+Quivr also uses OpenAI Functions for the newer models. OpenAI Functions allow us to define out own version of a lightweight Conversational Retrieval Chain. In this case we ask the LLM if it can answer the question directly or if it needs either history or history and context. If it needs history and context, we pass the question and history to a retriever which performs a simple vector similarity search and then pass the retrieved documents to the LLM as context.
+
+Using this method we can get the simular results as the Conversational Retrieval Chain but with a much simpler implementation and less then 1/2 of the latency.
+
+See the diagram below for a visual representation:
+
+![OpenAI Functions](open_ai_functions_tree.jpg)