From cc39f9e3ba9e5fffd6541d5f85d71dac14ec905f Mon Sep 17 00:00:00 2001
From: Stan Girard <girard.stanislas@gmail.com>
Date: Sat, 27 Jan 2024 01:50:58 -0800
Subject: [PATCH] feat(14k): done (#2102)

# Description

Please include a summary of the changes and the related issue. Please
also include relevant motivation and context.

## Checklist before requesting a review

Please delete options that are not relevant.

- [ ] My code follows the style guidelines of this project
- [ ] I have performed a self-review of my code
- [ ] I have commented hard-to-understand areas
- [ ] I have ideally added tests that prove my fix is effective or that
my feature works
- [ ] New and existing unit tests pass locally with my changes
- [ ] Any dependent changes have been merged

## Screenshots (if appropriate):
---
 .github/workflows/stale.yml                   |  14 +--
 .github/workflows/vercel-docs.yml             |  23 ----
 .github/workflows/vitest.yml                  |  32 ------
 .vscode/settings.json                         |   2 +-
 Pipfile                                       |   5 +-
 Pipfile.lock                                  |  81 ++++++++++++--
 backend/llm/knowledge_brain_qa.py             |  13 ++-
 backend/llm/rags/quivr_rag.py                 |  16 ++-
 backend/models/databases/entity.py            |  13 +++
 backend/models/databases/repository.py        |   6 ++
 .../models/databases/supabase/user_usage.py   |   2 +
 .../modules/brain/service/brain_service.py    |  72 +++++++++++++
 .../chat/controller/chat/brainful_chat.py     |  78 ++------------
 .../chat/controller/chat/brainless_chat.py    |   1 -
 .../modules/chat/controller/chat/interface.py |   1 -
 backend/modules/chat/controller/chat/utils.py |  11 ++
 .../modules/chat/controller/chat_routes.py    | 101 ++++++++++++------
 backend/modules/chat/dto/chats.py             |   1 +
 backend/packages/files/parsers/audio.py       |   5 +-
 backend/packages/files/parsers/code_python.py |   3 +-
 backend/packages/files/parsers/common.py      |   9 +-
 backend/packages/files/parsers/csv.py         |   6 +-
 backend/packages/files/parsers/docx.py        |   3 +-
 backend/packages/files/parsers/epub.py        |   3 +-
 backend/packages/files/parsers/github.py      |   1 +
 backend/packages/files/parsers/html.py        |   3 +-
 backend/packages/files/parsers/markdown.py    |   3 +-
 backend/packages/files/parsers/notebook.py    |   3 +-
 backend/packages/files/parsers/odt.py         |   3 +-
 backend/packages/files/parsers/pdf.py         |   3 +-
 backend/packages/files/parsers/powerpoint.py  |   3 +-
 backend/packages/files/parsers/telegram.py    |   6 +-
 backend/packages/files/parsers/txt.py         |   2 +
 backend/packages/files/parsers/xlsx.py        |   6 +-
 backend/packages/files/processors.py          |   1 +
 .../brain/get_question_context_from_brain.py  |   9 ++
 backend/vectorstore/supabase.py               |  17 ++-
 37 files changed, 349 insertions(+), 212 deletions(-)
 delete mode 100644 .github/workflows/vercel-docs.yml
 delete mode 100644 .github/workflows/vitest.yml
 create mode 100644 backend/models/databases/entity.py

diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index e1be87da3..d6ca3dbd9 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -1,12 +1,12 @@
-name: 'Close stale issues and PRs'
+name: "Close stale issues and PRs"
 on:
   schedule:
-    - cron: '0 */4 * * *'
+    - cron: "0 */4 * * *"
 
 permissions:
-    contents: write # only for delete-branch option
-    issues: write
-    pull-requests: write
+  contents: write # only for delete-branch option
+  issues: write
+  pull-requests: write
 
 jobs:
   stale:
@@ -16,9 +16,9 @@ jobs:
         with:
           exempt-assignees: true
           exempt-draft-pr: true
-          days-before-stale: 30
+          days-before-stale: 90
           days-before-close: 5
           operations-per-run: 400
           exempt-milestones: true
           stale-issue-message: "Thanks for your contributions, we'll be closing this issue as it has gone stale. Feel free to reopen if you'd like to continue the discussion."
-          stale-pr-message: "Thanks for your contributions, we'll be closing this PR as it has gone stale. Feel free to reopen if you'd like to continue the discussion."
\ No newline at end of file
+          stale-pr-message: "Thanks for your contributions, we'll be closing this PR as it has gone stale. Feel free to reopen if you'd like to continue the discussion."
diff --git a/.github/workflows/vercel-docs.yml b/.github/workflows/vercel-docs.yml
deleted file mode 100644
index b1692efe4..000000000
--- a/.github/workflows/vercel-docs.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-# name: Deploy Docs Deployment
-# env:
-#   VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }}
-#   VERCEL_PROJECT_ID: ${{ secrets.VERCEL_DOC_PROJECT_ID }}
-# on:
-#   push:
-#     branches: [ "main" ]
-#     paths:
-#       - 'docs/**'
-# jobs:
-#   Deploy-Preview:
-#     environment: production
-#     runs-on: ubuntu-latest
-#     steps:
-#       - uses: actions/checkout@v3
-#       - name: Install Vercel CLI
-#         run: npm install --global vercel@latest
-#       - name: Pull Vercel Environment Information
-#         run: vercel pull --yes --environment=production --token=${{ secrets.VERCEL_TOKEN }}
-#       - name: Build Project Artifacts
-#         run: vercel build --prod --token=${{ secrets.VERCEL_TOKEN }}
-#       - name: Deploy Project Artifacts to Vercel
-#         run: vercel deploy --prebuilt --prod --token=${{ secrets.VERCEL_TOKEN }}
\ No newline at end of file
diff --git a/.github/workflows/vitest.yml b/.github/workflows/vitest.yml
deleted file mode 100644
index 61cefd78c..000000000
--- a/.github/workflows/vitest.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-name: Vitest
-
-on:
-  push:
-    branches: [ main ]
-    paths:
-      - 'frontend/**'
-  pull_request:
-    branches: [ main ]
-    paths:
-      - 'frontend/**'
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    defaults:
-        run:
-            working-directory: ./frontend
-    strategy:
-      matrix:
-        node-version: [18]
-
-    steps:
-      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
-      - name: Use Node.js ${{ matrix.node-version }}
-        uses: actions/setup-node@b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8 # v4
-        with:
-          node-version: ${{ matrix.node-version }}
-          cache: 'yarn'
-          cache-dependency-path: frontend/yarn.lock
-      - run: yarn
-      - run: yarn run test-unit
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 615de73dc..ca65cceaa 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -12,7 +12,7 @@
   ],
   "editor.formatOnSave": true,
   "[python]": {
-    "editor.defaultFormatter": "esbenp.prettier-vscode",
+    "editor.defaultFormatter": "ms-python.black-formatter",
     "editor.formatOnSave": true,
     "editor.codeActionsOnSave": {
       "source.organizeImports": "explicit",
diff --git a/Pipfile b/Pipfile
index a7d1461fc..c6bb58bf5 100644
--- a/Pipfile
+++ b/Pipfile
@@ -23,7 +23,7 @@ python-jose = "==3.3.0"
 asyncpg = "==0.27.0"
 flake8 = "==6.0.0"
 flake8-black = "==0.3.6"
-sentry-sdk = {extras = ["fastapi"], version = "==1.37.1"}
+sentry-sdk = {extras = ["fastapi"] }
 pyright = "==1.1.316"
 resend = "==0.5.1"
 html5lib = "==1.1"
@@ -34,7 +34,7 @@ redis = "==4.5.4"
 flower = "*"
 boto3 = "==1.33.7"
 botocore = "==1.33.7"
-celery = {extras = ["sqs"], version = "*"}
+celery = {extras = ["sqs"] }
 python-dotenv = "*"
 pytest-mock = "*"
 pytest-celery = "*"
@@ -45,6 +45,7 @@ jq = "==1.6.0"
 pytest = "*"
 
 [dev-packages]
+black = "*"
 
 [requires]
 python_version = "3.11"
diff --git a/Pipfile.lock b/Pipfile.lock
index d258bb783..4cb69de88 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "3fd64a4e95ad0de8091ce39b705c9f87f941eb483f95f1c5d501f9bf473781be"
+            "sha256": "9ae12dd1d097d77ce3cb08c7b6b5e5fa8a96216a98df213860d0ea30bb22dcc5"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -2008,10 +2008,10 @@
                 "fastapi"
             ],
             "hashes": [
-                "sha256:7cd324dd2877fdc861f75cba4242bce23a58272a6fea581fcb218bb718bd9cc5",
-                "sha256:a249c7364827ee89daaa078bb8b56ece0b3d52d9130961bef2302b79bdf7fe70"
+                "sha256:24c83b0b41c887d33328a9166f5950dc37ad58f01c9f2fbff6b87a6f1094170c",
+                "sha256:acaf597b30258fc7663063b291aa99e58f3096e91fe1e6634f4b79f9c1943e8e"
             ],
-            "version": "==1.37.1"
+            "version": "==1.39.2"
         },
         "setuptools": {
             "hashes": [
@@ -2398,7 +2398,7 @@
                 "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84",
                 "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e"
             ],
-            "markers": "python_version >= '3.6'",
+            "markers": "python_version >= '3.10'",
             "version": "==2.0.7"
         },
         "uvicorn": {
@@ -2709,5 +2709,74 @@
             "version": "==3.17.0"
         }
     },
-    "develop": {}
+    "develop": {
+        "black": {
+            "hashes": [
+                "sha256:0cd59d01bf3306ff7e3076dd7f4435fcd2fafe5506a6111cae1138fc7de52382",
+                "sha256:1e0fa70b8464055069864a4733901b31cbdbe1273f63a24d2fa9d726723d45ac",
+                "sha256:30fbf768cd4f4576598b1db0202413fafea9a227ef808d1a12230c643cefe9fc",
+                "sha256:39addf23f7070dbc0b5518cdb2018468ac249d7412a669b50ccca18427dba1f3",
+                "sha256:5134a6f6b683aa0a5592e3fd61dd3519d8acd953d93e2b8b76f9981245b65594",
+                "sha256:6a8977774929b5db90442729f131221e58cc5d8208023c6af9110f26f75b6b20",
+                "sha256:6cc5a6ba3e671cfea95a40030b16a98ee7dc2e22b6427a6f3389567ecf1b5262",
+                "sha256:780f13d03066a7daf1707ec723fdb36bd698ffa29d95a2e7ef33a8dd8fe43b5c",
+                "sha256:7fa8d9aaa22d846f8c0f7f07391148e5e346562e9b215794f9101a8339d8b6d8",
+                "sha256:827a7c0da520dd2f8e6d7d3595f4591aa62ccccce95b16c0e94bb4066374c4c2",
+                "sha256:82d9452aeabd51d1c8f0d52d4d18e82b9f010ecb30fd55867b5ff95904f427ff",
+                "sha256:94d5280d020dadfafc75d7cae899609ed38653d3f5e82e7ce58f75e76387ed3d",
+                "sha256:9aede09f72b2a466e673ee9fca96e4bccc36f463cac28a35ce741f0fd13aea8b",
+                "sha256:a15670c650668399c4b5eae32e222728185961d6ef6b568f62c1681d57b381ba",
+                "sha256:a5a0100b4bdb3744dd68412c3789f472d822dc058bb3857743342f8d7f93a5a7",
+                "sha256:aaf9aa85aaaa466bf969e7dd259547f4481b712fe7ee14befeecc152c403ee05",
+                "sha256:be305563ff4a2dea813f699daaffac60b977935f3264f66922b1936a5e492ee4",
+                "sha256:bf8dd261ee82df1abfb591f97e174345ab7375a55019cc93ad38993b9ff5c6ad",
+                "sha256:d74d4d0da276fbe3b95aa1f404182562c28a04402e4ece60cf373d0b902f33a0",
+                "sha256:e0e367759062dcabcd9a426d12450c6d61faf1704a352a49055a04c9f9ce8f5a",
+                "sha256:ec489cae76eac3f7573629955573c3a0e913641cafb9e3bfc87d8ce155ebdb29",
+                "sha256:f0dfbfbacfbf9cd1fac7a5ddd3e72510ffa93e841a69fcf4a6358feab1685382"
+            ],
+            "markers": "python_version >= '3.8'",
+            "version": "==24.1.0"
+        },
+        "click": {
+            "hashes": [
+                "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28",
+                "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"
+            ],
+            "markers": "python_version >= '3.7'",
+            "version": "==8.1.7"
+        },
+        "mypy-extensions": {
+            "hashes": [
+                "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d",
+                "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.0.0"
+        },
+        "packaging": {
+            "hashes": [
+                "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5",
+                "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"
+            ],
+            "markers": "python_version >= '3.7'",
+            "version": "==23.2"
+        },
+        "pathspec": {
+            "hashes": [
+                "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08",
+                "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"
+            ],
+            "markers": "python_version >= '3.8'",
+            "version": "==0.12.1"
+        },
+        "platformdirs": {
+            "hashes": [
+                "sha256:11c8f37bcca40db96d8144522d925583bdb7a31f7b0e37e3ed4318400a8e2380",
+                "sha256:906d548203468492d432bcb294d4bc2fff751bf84971fbb2c10918cc206ee420"
+            ],
+            "markers": "python_version >= '3.8'",
+            "version": "==4.1.0"
+        }
+    }
 }
diff --git a/backend/llm/knowledge_brain_qa.py b/backend/llm/knowledge_brain_qa.py
index 27adc9948..5e4b06a99 100644
--- a/backend/llm/knowledge_brain_qa.py
+++ b/backend/llm/knowledge_brain_qa.py
@@ -11,6 +11,7 @@ from llm.rags.rag_interface import RAGInterface
 from llm.utils.format_chat_history import format_chat_history
 from llm.utils.get_prompt_to_use import get_prompt_to_use
 from llm.utils.get_prompt_to_use_id import get_prompt_to_use_id
+from repository.files.generate_file_signed_url import generate_file_signed_url
 from logger import get_logger
 from models import BrainSettings
 from modules.brain.service.brain_service import BrainService
@@ -61,6 +62,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
     chat_id: str = None  # pyright: ignore reportPrivateUsage=none
     brain_id: str  # pyright: ignore reportPrivateUsage=none
     max_tokens: int = 2000
+    max_input: int = 2000
     streaming: bool = False
     knowledge_qa: Optional[RAGInterface]
     metadata: Optional[dict] = None
@@ -76,6 +78,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
         model: str,
         brain_id: str,
         chat_id: str,
+        max_tokens: int,
         streaming: bool = False,
         prompt_id: Optional[UUID] = None,
         metadata: Optional[dict] = None,
@@ -97,6 +100,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
             **kwargs,
         )
         self.metadata = metadata
+        self.max_tokens = max_tokens
 
     @property
     def prompt_to_use(self):
@@ -309,9 +313,14 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
                                 if "url" in doc.metadata
                                 else doc.metadata["file_name"],
                                 "type": "url" if "url" in doc.metadata else "file",
-                                "source_url": doc.metadata["url"]
-                                if "url" in doc.metadata
+                                "source_url": generate_file_signed_url(
+                                    f"{brain.brain_id}/{doc.metadata['file_name']}"
+                                ).get("signedURL", "")
+                                if "url" not in doc.metadata
                                 else "",
+                                "original_file_name": doc.metadata[
+                                    "original_file_name"
+                                ],
                             }
                         )
                     )
diff --git a/backend/llm/rags/quivr_rag.py b/backend/llm/rags/quivr_rag.py
index d0eb23918..9f0ddb0f0 100644
--- a/backend/llm/rags/quivr_rag.py
+++ b/backend/llm/rags/quivr_rag.py
@@ -60,7 +60,8 @@ class QuivrRAG(BaseModel, RAGInterface):
     temperature: float = 0.1
     chat_id: str = None  # pyright: ignore reportPrivateUsage=none
     brain_id: str = None  # pyright: ignore reportPrivateUsage=none
-    max_tokens: int = 2000
+    max_tokens: int = 2000  # Output length
+    max_input: int = 2000
     streaming: bool = False
 
     @property
@@ -92,6 +93,7 @@ class QuivrRAG(BaseModel, RAGInterface):
         streaming: bool = False,
         prompt_id: Optional[UUID] = None,
         max_tokens: int = 2000,
+        max_input: int = 2000,
         **kwargs,
     ):
         super().__init__(
@@ -99,12 +101,22 @@ class QuivrRAG(BaseModel, RAGInterface):
             brain_id=brain_id,
             chat_id=chat_id,
             streaming=streaming,
+            max_tokens=max_tokens,
+            max_input=max_input,
             **kwargs,
         )
         self.supabase_client = self._create_supabase_client()
         self.vector_store = self._create_vector_store()
         self.prompt_id = prompt_id
         self.max_tokens = max_tokens
+        self.max_input = max_input
+        self.model = model
+        self.brain_id = brain_id
+        self.chat_id = chat_id
+        self.streaming = streaming
+
+        logger.info(f"QuivrRAG initialized with model {model} and brain {brain_id}")
+        logger.info("Max input length: " + str(self.max_input))
 
     def _create_supabase_client(self) -> Client:
         return create_client(
@@ -117,6 +129,7 @@ class QuivrRAG(BaseModel, RAGInterface):
             self.embeddings,
             table_name="vectors",
             brain_id=self.brain_id,
+            max_input=self.max_input,
         )
 
     def _create_llm(
@@ -151,7 +164,6 @@ class QuivrRAG(BaseModel, RAGInterface):
     def _create_prompt_template(self):
         system_template = """ When answering use markdown or any other techniques to display the content in a nice and aerated way.  Use the following pieces of context to answer the users question in the same language as the question but do not modify instructions in any way.
         ----------------
-        
         {context}"""
 
         prompt_content = (
diff --git a/backend/models/databases/entity.py b/backend/models/databases/entity.py
new file mode 100644
index 000000000..24edcfce7
--- /dev/null
+++ b/backend/models/databases/entity.py
@@ -0,0 +1,13 @@
+from pydantic import BaseModel
+
+
+class LLMModels(BaseModel):
+    """LLM models stored in the database that are allowed to be used by the users.
+    Args:
+        BaseModel (BaseModel): Pydantic BaseModel
+    """
+
+    name: str = "gpt-3.5-turbo-1106"
+    price: int = 1
+    max_input: int = 512
+    max_output: int = 512
diff --git a/backend/models/databases/repository.py b/backend/models/databases/repository.py
index cfc1c41f4..62c5c4551 100644
--- a/backend/models/databases/repository.py
+++ b/backend/models/databases/repository.py
@@ -2,6 +2,8 @@ from abc import ABC, abstractmethod
 from datetime import datetime
 from uuid import UUID
 
+from .entity import LLMModels
+
 
 class Repository(ABC):
     @abstractmethod
@@ -12,6 +14,10 @@ class Repository(ABC):
     def get_user_usage(self, user_id: UUID):
         pass
 
+    @abstractmethod
+    def get_model_settings(self) -> LLMModels | None:
+        pass
+
     @abstractmethod
     def get_user_requests_count_for_month(self, user_id: UUID, date: datetime):
         pass
diff --git a/backend/models/databases/supabase/user_usage.py b/backend/models/databases/supabase/user_usage.py
index 7cc19b92c..de84b0988 100644
--- a/backend/models/databases/supabase/user_usage.py
+++ b/backend/models/databases/supabase/user_usage.py
@@ -1,7 +1,9 @@
+from ast import List
 from datetime import datetime, timedelta
 from uuid import UUID
 
 from logger import get_logger
+from models.databases.entity import LLMModels
 from models.databases.repository import Repository
 
 logger = get_logger(__name__)
diff --git a/backend/modules/brain/service/brain_service.py b/backend/modules/brain/service/brain_service.py
index 7ecca5d37..11fbf9563 100644
--- a/backend/modules/brain/service/brain_service.py
+++ b/backend/modules/brain/service/brain_service.py
@@ -2,6 +2,10 @@ from typing import Optional
 from uuid import UUID
 
 from fastapi import HTTPException
+from langchain.embeddings.ollama import OllamaEmbeddings
+from langchain.embeddings.openai import OpenAIEmbeddings
+from vectorstore.supabase import CustomSupabaseVectorStore
+from models.settings import BrainSettings, get_supabase_client
 from modules.brain.dto.inputs import BrainUpdatableProperties, CreateBrainProperties
 from modules.brain.entity.brain_entity import BrainEntity, BrainType, PublicBrain
 from modules.brain.repository import (
@@ -22,6 +26,10 @@ from modules.brain.service.api_brain_definition_service import ApiBrainDefinitio
 from modules.brain.service.utils.validate_brain import validate_api_brain
 from modules.knowledge.service.knowledge_service import KnowledgeService
 
+from logger import get_logger
+
+logger = get_logger(__name__)
+
 knowledge_service = KnowledgeService()
 # TODO: directly user api_brain_definition repository
 api_brain_definition_service = ApiBrainDefinitionService()
@@ -44,6 +52,70 @@ class BrainService:
     def get_brain_by_id(self, brain_id: UUID):
         return self.brain_repository.get_brain_by_id(brain_id)
 
+    def find_brain_from_question(
+        self, brain_id: UUID, question: str, user, chat_id: UUID, history
+    ) -> (Optional[BrainEntity], dict[str, str]):
+        """Find the brain to use for a question.
+
+        Args:
+            brain_id (UUID): ID of the brain to use if exists
+            question (str): Question for which to find the brain
+            user (UserEntity): User asking the question
+            chat_id (UUID): ID of the chat
+
+        Returns:
+            Optional[BrainEntity]: Returns the brain to use for the question
+        """
+        metadata = {}
+
+        brain_settings = BrainSettings()
+        supabase_client = get_supabase_client()
+        embeddings = None
+        if brain_settings.ollama_api_base_url:
+            embeddings = OllamaEmbeddings(
+                base_url=brain_settings.ollama_api_base_url
+            )  # pyright: ignore reportPrivateUsage=none
+        else:
+            embeddings = OpenAIEmbeddings()
+        vector_store = CustomSupabaseVectorStore(
+            supabase_client, embeddings, table_name="vectors", user_id=user.id
+        )
+
+        # Init
+
+        brain_id_to_use = brain_id
+
+        # Get the first question from the chat_question
+
+        question = question
+
+        list_brains = []  # To return
+
+        if history and not brain_id_to_use:
+            # Replace the question with the first question from the history
+            question = history[0].user_message
+
+        if history and not brain_id:
+            brain_id_to_use = history[0].brain_id
+
+        # Calculate the closest brains to the question
+        list_brains = vector_store.find_brain_closest_query(user.id, question)
+
+        unique_list_brains = []
+        seen_brain_ids = set()
+
+        for brain in list_brains:
+            if brain["id"] not in seen_brain_ids:
+                unique_list_brains.append(brain)
+                seen_brain_ids.add(brain["id"])
+
+        metadata["close_brains"] = unique_list_brains[:5]
+
+        if list_brains and not brain_id_to_use:
+            brain_id_to_use = list_brains[0]["id"]
+
+        return brain_id_to_use, metadata
+
     def create_brain(
         self,
         user_id: UUID,
diff --git a/backend/modules/chat/controller/chat/brainful_chat.py b/backend/modules/chat/controller/chat/brainful_chat.py
index 78b24383f..55f12cd86 100644
--- a/backend/modules/chat/controller/chat/brainful_chat.py
+++ b/backend/modules/chat/controller/chat/brainful_chat.py
@@ -1,10 +1,6 @@
-from langchain.embeddings.ollama import OllamaEmbeddings
-from langchain.embeddings.openai import OpenAIEmbeddings
 from llm.api_brain_qa import APIBrainQA
-from llm.composite_brain_qa import CompositeBrainQA
 from llm.knowledge_brain_qa import KnowledgeBrainQA
 from logger import get_logger
-from models.settings import BrainSettings, get_supabase_client
 from modules.brain.entity.brain_entity import BrainType, RoleEnum
 from modules.brain.service.api_brain_definition_service import ApiBrainDefinitionService
 from modules.brain.service.brain_authorization_service import (
@@ -13,7 +9,6 @@ from modules.brain.service.brain_authorization_service import (
 from modules.brain.service.brain_service import BrainService
 from modules.chat.controller.chat.interface import ChatInterface
 from modules.chat.service.chat_service import ChatService
-from vectorstore.supabase import CustomSupabaseVectorStore
 
 chat_service = ChatService()
 api_brain_definition_service = ApiBrainDefinitionService()
@@ -43,64 +38,17 @@ class BrainfulChat(ChatInterface):
 
     def get_answer_generator(
         self,
-        brain_id,
+        brain,
         chat_id,
         model,
         max_tokens,
+        max_input,
         temperature,
         streaming,
         prompt_id,
         user_id,
-        chat_question,
+        metadata,
     ):
-        metadata = {}
-        brain_settings = BrainSettings()
-        supabase_client = get_supabase_client()
-        embeddings = None
-        if brain_settings.ollama_api_base_url:
-            embeddings = OllamaEmbeddings(
-                base_url=brain_settings.ollama_api_base_url
-            )  # pyright: ignore reportPrivateUsage=none
-        else:
-            embeddings = OpenAIEmbeddings()
-        vector_store = CustomSupabaseVectorStore(
-            supabase_client, embeddings, table_name="vectors", user_id=user_id
-        )
-
-        # Init
-
-        brain_id_to_use = brain_id
-
-        # Get the first question from the chat_question
-
-        question = chat_question.question
-        history = chat_service.get_chat_history(chat_id)
-
-        list_brains = []  # To return
-
-        if history and not brain_id_to_use:
-            # Replace the question with the first question from the history
-            question = history[0].user_message
-
-        if history and not brain_id:
-            brain_id_to_use = history[0].brain_id
-
-        # Calculate the closest brains to the question
-        list_brains = vector_store.find_brain_closest_query(user_id, question)
-
-        metadata["close_brains"] = list_brains[:5]
-
-        if list_brains and not brain_id_to_use:
-            brain_id_to_use = list_brains[0]["id"]
-
-        # GENERIC
-        follow_up_questions = chat_service.get_follow_up_question(chat_id)
-        metadata["follow_up_questions"] = follow_up_questions
-        metadata["model"] = model
-        metadata["max_tokens"] = max_tokens
-        metadata["temperature"] = temperature
-
-        brain = brain_service.get_brain_by_id(brain_id_to_use)
         if (
             brain
             and brain.brain_type == BrainType.DOC
@@ -110,33 +58,23 @@ class BrainfulChat(ChatInterface):
                 chat_id=chat_id,
                 model=model,
                 max_tokens=max_tokens,
+                max_input=max_input,
                 temperature=temperature,
                 brain_id=str(brain.brain_id),
                 streaming=streaming,
                 prompt_id=prompt_id,
                 metadata=metadata,
             )
-        if brain.brain_type == BrainType.COMPOSITE:
-            return CompositeBrainQA(
-                chat_id=chat_id,
-                model=model,
-                max_tokens=max_tokens,
-                temperature=temperature,
-                brain_id=str(brain.brain_id),
-                streaming=streaming,
-                prompt_id=prompt_id,
-                user_id=user_id,
-                metadata=metadata,
-            )
 
         if brain.brain_type == BrainType.API:
             brain_definition = api_brain_definition_service.get_api_brain_definition(
-                brain_id_to_use
+                brain.brain_id
             )
             return APIBrainQA(
                 chat_id=chat_id,
                 model=model,
                 max_tokens=max_tokens,
+                max_input=max_input,
                 temperature=temperature,
                 brain_id=str(brain.brain_id),
                 streaming=streaming,
@@ -144,5 +82,7 @@ class BrainfulChat(ChatInterface):
                 user_id=user_id,
                 metadata=metadata,
                 raw=(brain_definition.raw if brain_definition else None),
-                jq_instructions=(brain_definition.jq_instructions if brain_definition else None),
+                jq_instructions=(
+                    brain_definition.jq_instructions if brain_definition else None
+                ),
             )
diff --git a/backend/modules/chat/controller/chat/brainless_chat.py b/backend/modules/chat/controller/chat/brainless_chat.py
index 559431ac4..c5dcec220 100644
--- a/backend/modules/chat/controller/chat/brainless_chat.py
+++ b/backend/modules/chat/controller/chat/brainless_chat.py
@@ -8,7 +8,6 @@ class BrainlessChat(ChatInterface):
 
     def get_answer_generator(
         self,
-        brain_id,
         chat_id,
         model,
         max_tokens,
diff --git a/backend/modules/chat/controller/chat/interface.py b/backend/modules/chat/controller/chat/interface.py
index 39479e7b8..73745df7f 100644
--- a/backend/modules/chat/controller/chat/interface.py
+++ b/backend/modules/chat/controller/chat/interface.py
@@ -9,7 +9,6 @@ class ChatInterface(ABC):
     @abstractmethod
     def get_answer_generator(
         self,
-        brain_id,
         chat_id,
         model,
         max_tokens,
diff --git a/backend/modules/chat/controller/chat/utils.py b/backend/modules/chat/controller/chat/utils.py
index 1105e8c83..3ea0e17db 100644
--- a/backend/modules/chat/controller/chat/utils.py
+++ b/backend/modules/chat/controller/chat/utils.py
@@ -25,6 +25,17 @@ class NullableUUID(UUID):
 
 
 def check_user_requests_limit(user: UserIdentity, model: str):
+    """Checks the user requests limit.
+    It checks the user requests limit and raises an exception if the user has reached the limit.
+    By default, the user has a limit of 100 requests per month. The limit can be increased by upgrading the plan.
+
+    Args:
+        user (UserIdentity): User object
+        model (str): Model name for which the user is making the request
+
+    Raises:
+        HTTPException: Raises a 429 error if the user has reached the limit.
+    """
     userDailyUsage = UserUsage(id=user.id, email=user.email)
 
     userSettings = userDailyUsage.get_user_settings()
diff --git a/backend/modules/chat/controller/chat_routes.py b/backend/modules/chat/controller/chat_routes.py
index 264d912fa..e2ac7bc68 100644
--- a/backend/modules/chat/controller/chat_routes.py
+++ b/backend/modules/chat/controller/chat_routes.py
@@ -1,10 +1,10 @@
 from typing import List, Optional
 from uuid import UUID
-from venv import logger
 
 from fastapi import APIRouter, Depends, HTTPException, Query, Request
 from fastapi.responses import StreamingResponse
 from middlewares.auth import AuthBearer, get_current_user
+from models.databases.entity import LLMModels
 from models.user_usage import UserUsage
 from modules.brain.service.brain_service import BrainService
 from modules.chat.controller.chat.brainful_chat import BrainfulChat
@@ -21,6 +21,10 @@ from modules.chat.service.chat_service import ChatService
 from modules.notification.service.notification_service import NotificationService
 from modules.user.entity.user_identity import UserIdentity
 
+from logger import get_logger
+
+logger = get_logger(__name__)
+
 chat_router = APIRouter()
 
 notification_service = NotificationService()
@@ -163,11 +167,12 @@ async def create_question_handler(
             model=chat_question.model if is_model_ok else "gpt-3.5-turbo-1106",  # type: ignore
             max_tokens=chat_question.max_tokens,
             temperature=chat_question.temperature,
-            brain_id=str(brain_id),
             streaming=False,
             prompt_id=chat_question.prompt_id,
             user_id=current_user.id,
-            chat_question=chat_question,
+            max_input=2000,
+            brain=brain_service.get_brain_by_id(brain_id),
+            metadata={},
         )
 
         chat_answer = gpt_answer_generator.generate_answer(
@@ -201,49 +206,81 @@ async def create_stream_question_handler(
     chat_instance = BrainfulChat()
     chat_instance.validate_authorization(user_id=current_user.id, brain_id=brain_id)
 
-    user_daily_usage = UserUsage(
+    user_usage = UserUsage(
         id=current_user.id,
         email=current_user.email,
     )
 
-    user_settings = user_daily_usage.get_user_settings()
+    # Get History
+    history = chat_service.get_chat_history(chat_id)
 
-    # Retrieve chat model (temperature, max_tokens, model)
-    if (
-        not chat_question.model
-        or chat_question.temperature is None
-        or not chat_question.max_tokens
-    ):
-        fallback_model = "gpt-3.5-turbo-1106"
-        fallback_temperature = 0
-        fallback_max_tokens = 256
-        if brain_id:
-            brain = brain_service.get_brain_by_id(brain_id)
-            if brain:
-                fallback_model = brain.model or fallback_model
-                fallback_temperature = brain.temperature or fallback_temperature
-                fallback_max_tokens = brain.max_tokens or fallback_max_tokens
+    # Get user settings
+    user_settings = user_usage.get_user_settings()
 
-        chat_question.model = chat_question.model or fallback_model
-        chat_question.temperature = chat_question.temperature or fallback_temperature
-        chat_question.max_tokens = chat_question.max_tokens or fallback_max_tokens
+    # Get Model settings for the user
+    models_settings = user_usage.get_model_settings()
 
+    # Generic
+    brain_id_to_use, metadata_brain = brain_service.find_brain_from_question(
+        brain_id, chat_question.question, current_user, chat_id, history
+    )
+
+    # Add metadata_brain to metadata
+    metadata = {}
+    metadata = {**metadata, **metadata_brain}
+    follow_up_questions = chat_service.get_follow_up_question(chat_id)
+    metadata["follow_up_questions"] = follow_up_questions
+
+    # Get the Brain settings
+    brain = brain_service.get_brain_by_id(brain_id_to_use)
+
+    logger.info(f"Brain model: {brain.model}")
+    logger.info(f"Brain is : {str(brain)}")
     try:
-        logger.info(f"Streaming request for {chat_question.model}")
-        check_user_requests_limit(current_user, chat_question.model)
-        # TODO check if model is in the list of models available for the user
+        # Default model is gpt-3.5-turbo-1106
+        model_to_use = LLMModels(
+            name="gpt-3.5-turbo-1106", price=1, max_input=512, max_output=512
+        )
 
-        is_model_ok = chat_question.model in user_settings.get("models", ["gpt-3.5-turbo-1106"])  # type: ignore
+        is_brain_model_available = any(
+            brain.model == model_dict.get("name") for model_dict in models_settings
+        )
+
+        is_user_allowed_model = brain.model in user_settings.get(
+            "models", ["gpt-3.5-turbo-1106"]
+        )  # Checks if the model is available in the list of models
+
+        logger.info(f"Brain model: {brain.model}")
+        logger.info(f"User models: {user_settings.get('models', [])}")
+        logger.info(f"Model available: {is_brain_model_available}")
+        logger.info(f"User allowed model: {is_user_allowed_model}")
+
+        if is_brain_model_available and is_user_allowed_model:
+            # Use the model from the brain
+            model_to_use.name = brain.model
+            for model_dict in models_settings:
+                if model_dict.get("name") == model_to_use.name:
+                    logger.info(f"Using model {model_to_use.name}")
+                    model_to_use.max_input = model_dict.get("max_input")
+                    model_to_use.max_output = model_dict.get("max_output")
+                    break
+
+        metadata["model"] = model_to_use.name
+        metadata["max_tokens"] = model_to_use.max_output
+        metadata["max_input"] = model_to_use.max_input
+
+        check_user_requests_limit(current_user, chat_question.model)
         gpt_answer_generator = chat_instance.get_answer_generator(
             chat_id=str(chat_id),
-            model=chat_question.model if is_model_ok else "gpt-3.5-turbo-1106",  # type: ignore
-            max_tokens=chat_question.max_tokens,
-            temperature=chat_question.temperature,  # type: ignore
+            model=model_to_use.name,
+            max_tokens=model_to_use.max_output,
+            max_input=model_to_use.max_input,
+            temperature=0.1,
             streaming=True,
             prompt_id=chat_question.prompt_id,
-            brain_id=brain_id,
             user_id=current_user.id,
-            chat_question=chat_question,
+            metadata=metadata,
+            brain=brain,
         )
 
         return StreamingResponse(
diff --git a/backend/modules/chat/dto/chats.py b/backend/modules/chat/dto/chats.py
index 5a76de77f..6a46abff1 100644
--- a/backend/modules/chat/dto/chats.py
+++ b/backend/modules/chat/dto/chats.py
@@ -32,6 +32,7 @@ class Sources(BaseModel):
     name: str
     source_url: str
     type: str
+    original_file_name: str
 
     class Config:
         json_encoders = {
diff --git a/backend/packages/files/parsers/audio.py b/backend/packages/files/parsers/audio.py
index 6f210f383..fd6d8d578 100644
--- a/backend/packages/files/parsers/audio.py
+++ b/backend/packages/files/parsers/audio.py
@@ -9,10 +9,7 @@ from models import File, get_documents_vector_store
 from packages.files.file import compute_sha1_from_content
 
 
-async def process_audio(
-    file: File,
-    user,
-):
+async def process_audio(file: File, user, original_file_name):
     temp_filename = None
     file_sha = ""
     dateshort = time.strftime("%Y%m%d-%H%M%S")
diff --git a/backend/packages/files/parsers/code_python.py b/backend/packages/files/parsers/code_python.py
index 4806424f3..95dfcbd87 100644
--- a/backend/packages/files/parsers/code_python.py
+++ b/backend/packages/files/parsers/code_python.py
@@ -4,9 +4,10 @@ from models import File
 from .common import process_file
 
 
-async def process_python(file: File, brain_id):
+async def process_python(file: File, brain_id, original_file_name):
     return await process_file(
         file=file,
         loader_class=PythonLoader,
         brain_id=brain_id,
+        original_file_name=original_file_name,
     )
diff --git a/backend/packages/files/parsers/common.py b/backend/packages/files/parsers/common.py
index 681ec7d7d..759189088 100644
--- a/backend/packages/files/parsers/common.py
+++ b/backend/packages/files/parsers/common.py
@@ -2,7 +2,6 @@ import time
 
 from logger import get_logger
 from models import File
-from models.settings import get_supabase_db
 from modules.brain.service.brain_vector_service import BrainVectorService
 from packages.embeddings.vectors import Neurons
 from repository.files.upload_file import DocumentSerializable
@@ -10,12 +9,7 @@ from repository.files.upload_file import DocumentSerializable
 logger = get_logger(__name__)
 
 
-async def process_file(
-    file: File,
-    loader_class,
-    brain_id,
-):
-    database = get_supabase_db()
+async def process_file(file: File, loader_class, brain_id, original_file_name):
     dateshort = time.strftime("%Y%m%d")
     neurons = Neurons()
 
@@ -28,6 +22,7 @@ async def process_file(
         "chunk_size": file.chunk_size,
         "chunk_overlap": file.chunk_overlap,
         "date": dateshort,
+        "original_file_name": original_file_name or file.file_name,
     }
     docs = []
 
diff --git a/backend/packages/files/parsers/csv.py b/backend/packages/files/parsers/csv.py
index 470515b99..7e539fff1 100644
--- a/backend/packages/files/parsers/csv.py
+++ b/backend/packages/files/parsers/csv.py
@@ -4,12 +4,10 @@ from models import File
 from .common import process_file
 
 
-def process_csv(
-    file: File,
-    brain_id,
-):
+def process_csv(file: File, brain_id, original_file_name):
     return process_file(
         file=file,
         loader_class=CSVLoader,
         brain_id=brain_id,
+        original_file_name=original_file_name,
     )
diff --git a/backend/packages/files/parsers/docx.py b/backend/packages/files/parsers/docx.py
index 3b26c1a8f..2a98b0c50 100644
--- a/backend/packages/files/parsers/docx.py
+++ b/backend/packages/files/parsers/docx.py
@@ -4,9 +4,10 @@ from models import File
 from .common import process_file
 
 
-def process_docx(file: File, brain_id):
+def process_docx(file: File, brain_id, original_file_name):
     return process_file(
         file=file,
         loader_class=Docx2txtLoader,
         brain_id=brain_id,
+        original_file_name=original_file_name,
     )
diff --git a/backend/packages/files/parsers/epub.py b/backend/packages/files/parsers/epub.py
index 695212aae..49b2e4e6b 100644
--- a/backend/packages/files/parsers/epub.py
+++ b/backend/packages/files/parsers/epub.py
@@ -4,9 +4,10 @@ from models import File
 from .common import process_file
 
 
-def process_epub(file: File, brain_id):
+def process_epub(file: File, brain_id, original_file_name):
     return process_file(
         file=file,
         loader_class=UnstructuredEPubLoader,
         brain_id=brain_id,
+        original_file_name=original_file_name,
     )
diff --git a/backend/packages/files/parsers/github.py b/backend/packages/files/parsers/github.py
index 1c2071983..44d4ef577 100644
--- a/backend/packages/files/parsers/github.py
+++ b/backend/packages/files/parsers/github.py
@@ -52,6 +52,7 @@ async def process_github(
             "chunk_size": chunk_size,
             "chunk_overlap": chunk_overlap,
             "date": dateshort,
+            "original_file_name": doc.metadata["original_file_name"],
         }
         doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata)
 
diff --git a/backend/packages/files/parsers/html.py b/backend/packages/files/parsers/html.py
index 3e247cc9c..620419f26 100644
--- a/backend/packages/files/parsers/html.py
+++ b/backend/packages/files/parsers/html.py
@@ -4,9 +4,10 @@ from models import File
 from .common import process_file
 
 
-def process_html(file: File, brain_id):
+def process_html(file: File, brain_id, original_file_name):
     return process_file(
         file=file,
         loader_class=UnstructuredHTMLLoader,
         brain_id=brain_id,
+        original_file_name=original_file_name,
     )
diff --git a/backend/packages/files/parsers/markdown.py b/backend/packages/files/parsers/markdown.py
index a10f5edbc..600da1e36 100644
--- a/backend/packages/files/parsers/markdown.py
+++ b/backend/packages/files/parsers/markdown.py
@@ -4,9 +4,10 @@ from models import File
 from .common import process_file
 
 
-def process_markdown(file: File, brain_id):
+def process_markdown(file: File, brain_id, original_file_name):
     return process_file(
         file=file,
         loader_class=UnstructuredMarkdownLoader,
         brain_id=brain_id,
+        original_file_name=original_file_name,
     )
diff --git a/backend/packages/files/parsers/notebook.py b/backend/packages/files/parsers/notebook.py
index a610b8f44..7cbb1db58 100644
--- a/backend/packages/files/parsers/notebook.py
+++ b/backend/packages/files/parsers/notebook.py
@@ -4,9 +4,10 @@ from models import File
 from .common import process_file
 
 
-def process_ipnyb(file: File, brain_id):
+def process_ipnyb(file: File, brain_id, original_file_name):
     return process_file(
         file=file,
         loader_class=NotebookLoader,
         brain_id=brain_id,
+        original_file_name=original_file_name,
     )
diff --git a/backend/packages/files/parsers/odt.py b/backend/packages/files/parsers/odt.py
index 5c57de39e..a13fdf254 100644
--- a/backend/packages/files/parsers/odt.py
+++ b/backend/packages/files/parsers/odt.py
@@ -4,9 +4,10 @@ from models import File
 from .common import process_file
 
 
-def process_odt(file: File, brain_id):
+def process_odt(file: File, brain_id, original_file_name):
     return process_file(
         file=file,
         loader_class=UnstructuredPDFLoader,
         brain_id=brain_id,
+        original_file_name=original_file_name,
     )
diff --git a/backend/packages/files/parsers/pdf.py b/backend/packages/files/parsers/pdf.py
index fbc9e771f..0b138a214 100644
--- a/backend/packages/files/parsers/pdf.py
+++ b/backend/packages/files/parsers/pdf.py
@@ -4,9 +4,10 @@ from models import File
 from .common import process_file
 
 
-def process_pdf(file: File, brain_id):
+def process_pdf(file: File, brain_id, original_file_name):
     return process_file(
         file=file,
         loader_class=UnstructuredPDFLoader,
         brain_id=brain_id,
+        original_file_name=original_file_name,
     )
diff --git a/backend/packages/files/parsers/powerpoint.py b/backend/packages/files/parsers/powerpoint.py
index 2f6c49a26..3c02d1456 100644
--- a/backend/packages/files/parsers/powerpoint.py
+++ b/backend/packages/files/parsers/powerpoint.py
@@ -4,9 +4,10 @@ from models import File
 from .common import process_file
 
 
-def process_powerpoint(file: File, brain_id):
+def process_powerpoint(file: File, brain_id, original_file_name):
     return process_file(
         file=file,
         loader_class=UnstructuredPowerPointLoader,
         brain_id=brain_id,
+        original_file_name=original_file_name,
     )
diff --git a/backend/packages/files/parsers/telegram.py b/backend/packages/files/parsers/telegram.py
index 071cc4c79..416a7b53d 100644
--- a/backend/packages/files/parsers/telegram.py
+++ b/backend/packages/files/parsers/telegram.py
@@ -4,12 +4,10 @@ from packages.files.loaders.telegram import TelegramChatFileLoader
 from .common import process_file
 
 
-def process_telegram(
-    file: File,
-    brain_id,
-):
+def process_telegram(file: File, brain_id, original_file_name):
     return process_file(
         file=file,
         loader_class=TelegramChatFileLoader,
         brain_id=brain_id,
+        original_file_name=original_file_name,
     )
diff --git a/backend/packages/files/parsers/txt.py b/backend/packages/files/parsers/txt.py
index b38b9318c..3e31cd3be 100644
--- a/backend/packages/files/parsers/txt.py
+++ b/backend/packages/files/parsers/txt.py
@@ -7,9 +7,11 @@ from .common import process_file
 async def process_txt(
     file: File,
     brain_id,
+    original_file_name,
 ):
     return await process_file(
         file=file,
         loader_class=TextLoader,
         brain_id=brain_id,
+        original_file_name=original_file_name,
     )
diff --git a/backend/packages/files/parsers/xlsx.py b/backend/packages/files/parsers/xlsx.py
index c3c5d8f64..e349bbd70 100644
--- a/backend/packages/files/parsers/xlsx.py
+++ b/backend/packages/files/parsers/xlsx.py
@@ -4,12 +4,10 @@ from models.files import File
 from .common import process_file
 
 
-def process_xlsx(
-    file: File,
-    brain_id,
-):
+def process_xlsx(file: File, brain_id, original_file_name):
     return process_file(
         file=file,
         loader_class=UnstructuredExcelLoader,
         brain_id=brain_id,
+        original_file_name=original_file_name,
     )
diff --git a/backend/packages/files/processors.py b/backend/packages/files/processors.py
index 5a6a279f5..d7f903dd0 100644
--- a/backend/packages/files/processors.py
+++ b/backend/packages/files/processors.py
@@ -86,6 +86,7 @@ async def filter_file(
             result = await file_processors[file.file_extension](
                 file=file,
                 brain_id=brain_id,
+                original_file_name=original_file_name,
             )
             if result is None or result == 0:
                 return create_response(
diff --git a/backend/repository/brain/get_question_context_from_brain.py b/backend/repository/brain/get_question_context_from_brain.py
index 9f6fde639..14eca2c96 100644
--- a/backend/repository/brain/get_question_context_from_brain.py
+++ b/backend/repository/brain/get_question_context_from_brain.py
@@ -20,6 +20,15 @@ class DocumentAnswer:
 
 
 def get_question_context_from_brain(brain_id: UUID, question: str) -> str:
+    """Finds the best brain to answer the question based on the question's meaning.
+
+    Args:
+        brain_id (UUID): Id of the brain to search in
+        question (str): Question to search for in the vector store
+
+    Returns:
+        str: _descripton_
+    """
     # TODO: Move to AnswerGenerator service
     supabase_client = get_supabase_client()
     embeddings = get_embeddings()
diff --git a/backend/vectorstore/supabase.py b/backend/vectorstore/supabase.py
index 1e2e1e079..8c9e27add 100644
--- a/backend/vectorstore/supabase.py
+++ b/backend/vectorstore/supabase.py
@@ -15,6 +15,7 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
     brain_id: str = "none"
     user_id: str = "none"
     number_docs: int = 35
+    max_input: int = 2000
 
     def __init__(
         self,
@@ -24,11 +25,13 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
         brain_id: str = "none",
         user_id: str = "none",
         number_docs: int = 35,
+        max_input: int = 2000,
     ):
         super().__init__(client, embedding, table_name)
         self.brain_id = brain_id
         self.user_id = user_id
         self.number_docs = number_docs
+        self.max_input = max_input
 
     def find_brain_closest_query(
         self,
@@ -65,7 +68,7 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
     def similarity_search(
         self,
         query: str,
-        k: int = 35,
+        k: int = 40,
         table: str = "match_vectors",
         threshold: float = 0.5,
         **kwargs: Any,
@@ -98,5 +101,15 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
         ]
 
         documents = [doc for doc, _ in match_result]
+        max_tokens_input = self.max_input
+        documents_to_return = []
 
-        return documents
+        # Limits to max_tokens_input with metadata chunk_size
+        for doc in documents:
+            if doc.metadata["chunk_size"] <= max_tokens_input:
+                documents_to_return.append(doc)
+                max_tokens_input -= doc.metadata["chunk_size"]
+            else:
+                break
+
+        return documents_to_return