feat: tests get default brain (#593)

* feat: tests get default brain

* feat: chains docs

* feat: brains docs

* fix: remove brain_id creation from fe

* fix: docs categories
This commit is contained in:
Matt 2023-07-11 11:09:56 +01:00 committed by GitHub
parent 72924b52bc
commit cf37666f02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 139 additions and 24 deletions

View File

@ -2,11 +2,13 @@ import os
from typing import Any, List, Optional
from uuid import UUID
from logger import get_logger
from models.settings import CommonsDep, common_dependencies
from models.users import User
from pydantic import BaseModel
from utils.vectors import get_unique_files_from_vector_ids
from models.settings import CommonsDep, common_dependencies
from models.users import User
logger = get_logger(__name__)
class Brain(BaseModel):
@ -16,7 +18,6 @@ class Brain(BaseModel):
model: Optional[str] = "gpt-3.5-turbo-0613"
temperature: Optional[float] = 0.0
max_tokens: Optional[int] = 256
brain_size: Optional[float] = 0.0 # pyright: ignore reportPrivateUsage=none
max_brain_size: Optional[int] = int(os.getenv("MAX_BRAIN_SIZE", 0))
files: List[Any] = []
_commons: Optional[CommonsDep] = None
@ -35,7 +36,6 @@ class Brain(BaseModel):
self.get_unique_brain_files()
current_brain_size = sum(float(doc["size"]) for doc in self.files)
print("current_brain_size", current_brain_size)
return current_brain_size
@property
@ -86,8 +86,6 @@ class Brain(BaseModel):
return response.data
def delete_brain(self, user_id):
print("user_id", user_id)
print("self.id", self.id)
results = (
self.commons["supabase"]
.table("brains_users")
@ -96,7 +94,6 @@ class Brain(BaseModel):
.execute()
)
if len(results.data) == 0:
print("You are not the owner of this brain.")
return {"message": "You are not the owner of this brain."}
else:
results = (
@ -106,7 +103,6 @@ class Brain(BaseModel):
.match({"brain_id": self.id})
.execute()
)
print("results", results)
results = (
self.commons["supabase"]
@ -115,7 +111,6 @@ class Brain(BaseModel):
.match({"brain_id": self.id})
.execute()
)
print("results", results)
results = (
self.commons["supabase"]
@ -124,14 +119,12 @@ class Brain(BaseModel):
.match({"brain_id": self.id})
.execute()
)
print("results", results)
def create_brain(self):
commons = common_dependencies()
response = (
commons["supabase"].table("brains").insert({"name": self.name}).execute()
)
# set the brainId with response.data
self.id = response.data[0]["brain_id"]
return response.data
@ -206,13 +199,10 @@ class Brain(BaseModel):
vector_ids = [item["vector_id"] for item in response.data]
print("vector_ids", vector_ids)
if len(vector_ids) == 0:
return []
self.files = get_unique_files_from_vector_ids(vector_ids)
print("unique_files", self.files)
return self.files
@ -258,19 +248,17 @@ def get_default_user_brain(user: User):
commons = common_dependencies()
response = (
commons["supabase"]
.from_("brains_users") # I'm assuming this is the correct table
.from_("brains_users")
.select("brain_id")
.filter("user_id", "eq", user.id)
.filter(
"default_brain", "eq", True
) # Assuming 'default' is the correct column name
.filter("default_brain", "eq", True)
.execute()
)
print("Default brain response:", response.data)
logger.info("Default brain response:", response.data)
default_brain_id = response.data[0]["brain_id"] if response.data else None
print(f"Default brain id: {default_brain_id}")
logger.info(f"Default brain id: {default_brain_id}")
if default_brain_id:
brain_response = (

View File

@ -45,8 +45,9 @@ def test_create_and_delete_api_key():
assert verify_response.status_code == 200
# Now, let's delete the API key
# Assuming the key_id is part of the api_key_info response. If not, adjust this.
assert "key_id" in api_key_info
key_id = api_key_info["key_id"]
delete_response = client.delete(
f"/api-key/{key_id}", headers={"Authorization": f"Bearer {API_KEY}"}
)
@ -71,8 +72,6 @@ def test_retrieve_default_brain():
def test_create_brain():
# Generate a random UUID for brain_id
random_brain_id = str(uuid.uuid4())
# Generate a random name for the brain
random_brain_name = "".join(
@ -81,7 +80,6 @@ def test_create_brain():
# Set up the request payload
payload = {
"brain_id": random_brain_id,
"name": random_brain_name,
"status": "public",
"model": "gpt-3.5-turbo-0613",
@ -153,6 +151,59 @@ def test_delete_all_brains():
assert delete_response.status_code == 200
def test_delete_all_brains_and_get_default_brain():
# First create a new brain
test_create_brain()
# Now, retrieve all brains for the current user
response = client.get(
"/brains/",
headers={"Authorization": "Bearer " + API_KEY},
)
# Assert that the response status code is 200 (HTTP OK)
assert response.status_code == 200
assert len(response.json()["brains"]) > 0
test_delete_all_brains()
# Now, retrieve all brains for the current user
response = client.get(
"/brains/",
headers={"Authorization": "Bearer " + API_KEY},
)
# Assert that the response status code is 200 (HTTP OK)
assert response.status_code == 200
assert len(response.json()["brains"]) == 0
# Get the default brain, it should create one if it doesn't exist
response = client.get(
"/brains/default/",
headers={"Authorization": "Bearer " + API_KEY},
)
# Assert that the response status code is 200 (HTTP OK)
assert response.status_code == 200
assert response.json()["name"] == "Default brain"
# Now, retrieve all brains for the current user
response = client.get(
"/brains/",
headers={"Authorization": "Bearer " + API_KEY},
)
# Assert that there is only one brain
response_data = response.json()
assert len(response_data) == 1
for brain in response_data["brains"]:
assert "id" in brain
assert "name" in brain
# Assert that the brain is the default brain
assert response_data["brains"][0]["name"] == "Default brain"
def test_get_all_chats():
# Making a GET request to the /chat endpoint to retrieve all chats
response = client.get(

View File

@ -0,0 +1,9 @@
{
"label": "Brains",
"position": 3,
"link": {
"type": "generated-index",
"description": "What are brains?"
}
}

View File

@ -0,0 +1,27 @@
---
sidebar_position: 1
---
# Introduction to Brains
Quivr has a concept of "Brains". They are ring fenced bodies of information that can be used to provide context to Large Language Models (LLMs) to answer questions on a particular topic.
LLMs are trained on a large variety of data but to answer a question on a specific topic or to be used to make deductions around a specific topic, they need to be supplied with the context of that topic.
Quivr uses brains as an intuitive way to provide that context.
When a brain is selected in Quivr, the LLM will be provided with only the context of that brain. This allows users to build brains for specific topics and then use them to answer questions about that topic.
In the future there will be the functionality to share brains with other users of Quivr.
## How to use Brains
To use a brain, simply select the menu from using the Brain icon in the header at the top right of the Quivr interface.
You can create a new brain by clicking the "Create Brain" button. You will be prompted to enter a name for the brain. If you wish you can also just use the default brain for your account.
To switch to a different brain, simply click on the brain name in the menu and select the brain you wish to use.
If you have not chosen a brain, you can assume that any documentation you upload will be added to the default brain.
**Note: If you are having problems with the chat functionality, try selecting a brain from the menu. The default brain is not always selected automatically and you will need a brain selected to use the chat functionality.**

View File

@ -0,0 +1,9 @@
{
"label": "Chains",
"position": 4,
"link": {
"type": "generated-index",
"description": "What are chains?"
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

View File

@ -0,0 +1,31 @@
---
sidebar_position: 1
---
# Introduction to Chains
Quivr uses a framework called [Langchain](https://python.langchain.com/docs/get_started/introduction.html) for the majority of the interaction with the Large Language Models (LLMs).
Langchain provides functionality to connect multiple components such as LLMs, document retrievers, and other components together to form a "chain" of components.
They define a Chain very generically as a sequence of calls to components, which can include other chains. For example, a chain could be a sequence of calls to a document retriever, followed by a call to an LLM, followed by a call to a summarizer.
## Conversational Retrieval Chains
In Quivr we make use of the Conversational Retrieval Chain. These chains take in chat history and new questions and return an answer to the question. The algorithm for Conversational Retrieval Chains consists of three parts:
1. Creating a standalone question: The chat history and new question are combined to create a standalone question. This is done to ensure that relevant context is included in the retrieval step without unnecessary information from the whole conversation.
2. Retrieving relevant documents: The standalone question is passed to a retriever, which fetches relevant documents.
3. Generating a final response: The retrieved documents are passed to a language model (LLM) along with either the new question or the original question and chat history. The LLM generates a final response based on this input.
## OpenAI Functions
Quivr also uses OpenAI Functions for the newer models. OpenAI Functions allow us to define out own version of a lightweight Conversational Retrieval Chain. In this case we ask the LLM if it can answer the question directly or if it needs either history or history and context. If it needs history and context, we pass the question and history to a retriever which performs a simple vector similarity search and then pass the retrieved documents to the LLM as context.
Using this method we can get the simular results as the Conversational Retrieval Chain but with a much simpler implementation and less then 1/2 of the latency.
See the diagram below for a visual representation:
![OpenAI Functions](open_ai_functions_tree.jpg)