feat(14k): done (#2102)

# Description Please include a summary of the changes and the related issue. Please also include relevant motivation and context. ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate):
2024-11-22 20:09:40 +03:00 · 2024-01-27 01:50:58 -08:00 · 2024-01-27 01:50:58 -08:00 · cc39f9e3ba
commit cc39f9e3ba
parent 3fcdd016e2
37 changed files with 349 additions and 212 deletions
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@ -1,12 +1,12 @@
-name: 'Close stale issues and PRs'
+name: "Close stale issues and PRs"
 on:
  schedule:
-    - cron: '0 */4 * * *'
+    - cron: "0 */4 * * *"

 permissions:
-    contents: write # only for delete-branch option
-    issues: write
-    pull-requests: write
+  contents: write # only for delete-branch option
+  issues: write
+  pull-requests: write

 jobs:
  stale:
@ -16,9 +16,9 @@ jobs:
        with:
          exempt-assignees: true
          exempt-draft-pr: true
-          days-before-stale: 30
+          days-before-stale: 90
          days-before-close: 5
          operations-per-run: 400
          exempt-milestones: true
          stale-issue-message: "Thanks for your contributions, we'll be closing this issue as it has gone stale. Feel free to reopen if you'd like to continue the discussion."
-          stale-pr-message: "Thanks for your contributions, we'll be closing this PR as it has gone stale. Feel free to reopen if you'd like to continue the discussion."
+          stale-pr-message: "Thanks for your contributions, we'll be closing this PR as it has gone stale. Feel free to reopen if you'd like to continue the discussion."
--- a/.github/workflows/vercel-docs.yml
+++ b/.github/workflows/vercel-docs.yml
@ -1,23 +0,0 @@
-# name: Deploy Docs Deployment
-# env:
-#   VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }}
-#   VERCEL_PROJECT_ID: ${{ secrets.VERCEL_DOC_PROJECT_ID }}
-# on:
-#   push:
-#     branches: [ "main" ]
-#     paths:
-#       - 'docs/**'
-# jobs:
-#   Deploy-Preview:
-#     environment: production
-#     runs-on: ubuntu-latest
-#     steps:
-#       - uses: actions/checkout@v3
-#       - name: Install Vercel CLI
-#         run: npm install --global vercel@latest
-#       - name: Pull Vercel Environment Information
-#         run: vercel pull --yes --environment=production --token=${{ secrets.VERCEL_TOKEN }}
-#       - name: Build Project Artifacts
-#         run: vercel build --prod --token=${{ secrets.VERCEL_TOKEN }}
-#       - name: Deploy Project Artifacts to Vercel
-#         run: vercel deploy --prebuilt --prod --token=${{ secrets.VERCEL_TOKEN }}
--- a/.github/workflows/vitest.yml
+++ b/.github/workflows/vitest.yml
@ -1,32 +0,0 @@
-name: Vitest
-
-on:
-  push:
-    branches: [ main ]
-    paths:
-      - 'frontend/**'
-  pull_request:
-    branches: [ main ]
-    paths:
-      - 'frontend/**'
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    defaults:
-        run:
-            working-directory: ./frontend
-    strategy:
-      matrix:
-        node-version: [18]
-
-    steps:
-      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
-      - name: Use Node.js ${{ matrix.node-version }}
-        uses: actions/setup-node@b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8 # v4
-        with:
-          node-version: ${{ matrix.node-version }}
-          cache: 'yarn'
-          cache-dependency-path: frontend/yarn.lock
-      - run: yarn
-      - run: yarn run test-unit
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -12,7 +12,7 @@
  ],
  "editor.formatOnSave": true,
  "[python]": {
-    "editor.defaultFormatter": "esbenp.prettier-vscode",
+    "editor.defaultFormatter": "ms-python.black-formatter",
    "editor.formatOnSave": true,
    "editor.codeActionsOnSave": {
      "source.organizeImports": "explicit",
--- a/5
+++ b/5
@ -23,7 +23,7 @@ python-jose = "==3.3.0"
 asyncpg = "==0.27.0"
 flake8 = "==6.0.0"
 flake8-black = "==0.3.6"
-sentry-sdk = {extras = ["fastapi"], version = "==1.37.1"}
+sentry-sdk = {extras = ["fastapi"] }
 pyright = "==1.1.316"
 resend = "==0.5.1"
 html5lib = "==1.1"
@ -34,7 +34,7 @@ redis = "==4.5.4"
 flower = "*"
 boto3 = "==1.33.7"
 botocore = "==1.33.7"
-celery = {extras = ["sqs"], version = "*"}
+celery = {extras = ["sqs"] }
 python-dotenv = "*"
 pytest-mock = "*"
 pytest-celery = "*"
@ -45,6 +45,7 @@ jq = "==1.6.0"
 pytest = "*"

 [dev-packages]
+black = "*"

 [requires]
 python_version = "3.11"
--- a/Pipfile.lock
+++ b/Pipfile.lock
@ -1,7 +1,7 @@
 {
    "_meta": {
        "hash": {
-            "sha256": "3fd64a4e95ad0de8091ce39b705c9f87f941eb483f95f1c5d501f9bf473781be"
+            "sha256": "9ae12dd1d097d77ce3cb08c7b6b5e5fa8a96216a98df213860d0ea30bb22dcc5"
        },
        "pipfile-spec": 6,
        "requires": {
@ -2008,10 +2008,10 @@
                "fastapi"
            ],
            "hashes": [
-                "sha256:7cd324dd2877fdc861f75cba4242bce23a58272a6fea581fcb218bb718bd9cc5",
-                "sha256:a249c7364827ee89daaa078bb8b56ece0b3d52d9130961bef2302b79bdf7fe70"
+                "sha256:24c83b0b41c887d33328a9166f5950dc37ad58f01c9f2fbff6b87a6f1094170c",
+                "sha256:acaf597b30258fc7663063b291aa99e58f3096e91fe1e6634f4b79f9c1943e8e"
            ],
-            "version": "==1.37.1"
+            "version": "==1.39.2"
        },
        "setuptools": {
            "hashes": [
@ -2398,7 +2398,7 @@
                "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84",
                "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e"
            ],
-            "markers": "python_version >= '3.6'",
+            "markers": "python_version >= '3.10'",
            "version": "==2.0.7"
        },
        "uvicorn": {
@ -2709,5 +2709,74 @@
            "version": "==3.17.0"
        }
    },
-    "develop": {}
+    "develop": {
+        "black": {
+            "hashes": [
+                "sha256:0cd59d01bf3306ff7e3076dd7f4435fcd2fafe5506a6111cae1138fc7de52382",
+                "sha256:1e0fa70b8464055069864a4733901b31cbdbe1273f63a24d2fa9d726723d45ac",
+                "sha256:30fbf768cd4f4576598b1db0202413fafea9a227ef808d1a12230c643cefe9fc",
+                "sha256:39addf23f7070dbc0b5518cdb2018468ac249d7412a669b50ccca18427dba1f3",
+                "sha256:5134a6f6b683aa0a5592e3fd61dd3519d8acd953d93e2b8b76f9981245b65594",
+                "sha256:6a8977774929b5db90442729f131221e58cc5d8208023c6af9110f26f75b6b20",
+                "sha256:6cc5a6ba3e671cfea95a40030b16a98ee7dc2e22b6427a6f3389567ecf1b5262",
+                "sha256:780f13d03066a7daf1707ec723fdb36bd698ffa29d95a2e7ef33a8dd8fe43b5c",
+                "sha256:7fa8d9aaa22d846f8c0f7f07391148e5e346562e9b215794f9101a8339d8b6d8",
+                "sha256:827a7c0da520dd2f8e6d7d3595f4591aa62ccccce95b16c0e94bb4066374c4c2",
+                "sha256:82d9452aeabd51d1c8f0d52d4d18e82b9f010ecb30fd55867b5ff95904f427ff",
+                "sha256:94d5280d020dadfafc75d7cae899609ed38653d3f5e82e7ce58f75e76387ed3d",
+                "sha256:9aede09f72b2a466e673ee9fca96e4bccc36f463cac28a35ce741f0fd13aea8b",
+                "sha256:a15670c650668399c4b5eae32e222728185961d6ef6b568f62c1681d57b381ba",
+                "sha256:a5a0100b4bdb3744dd68412c3789f472d822dc058bb3857743342f8d7f93a5a7",
+                "sha256:aaf9aa85aaaa466bf969e7dd259547f4481b712fe7ee14befeecc152c403ee05",
+                "sha256:be305563ff4a2dea813f699daaffac60b977935f3264f66922b1936a5e492ee4",
+                "sha256:bf8dd261ee82df1abfb591f97e174345ab7375a55019cc93ad38993b9ff5c6ad",
+                "sha256:d74d4d0da276fbe3b95aa1f404182562c28a04402e4ece60cf373d0b902f33a0",
+                "sha256:e0e367759062dcabcd9a426d12450c6d61faf1704a352a49055a04c9f9ce8f5a",
+                "sha256:ec489cae76eac3f7573629955573c3a0e913641cafb9e3bfc87d8ce155ebdb29",
+                "sha256:f0dfbfbacfbf9cd1fac7a5ddd3e72510ffa93e841a69fcf4a6358feab1685382"
+            ],
+            "markers": "python_version >= '3.8'",
+            "version": "==24.1.0"
+        },
+        "click": {
+            "hashes": [
+                "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28",
+                "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"
+            ],
+            "markers": "python_version >= '3.7'",
+            "version": "==8.1.7"
+        },
+        "mypy-extensions": {
+            "hashes": [
+                "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d",
+                "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.0.0"
+        },
+        "packaging": {
+            "hashes": [
+                "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5",
+                "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"
+            ],
+            "markers": "python_version >= '3.7'",
+            "version": "==23.2"
+        },
+        "pathspec": {
+            "hashes": [
+                "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08",
+                "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"
+            ],
+            "markers": "python_version >= '3.8'",
+            "version": "==0.12.1"
+        },
+        "platformdirs": {
+            "hashes": [
+                "sha256:11c8f37bcca40db96d8144522d925583bdb7a31f7b0e37e3ed4318400a8e2380",
+                "sha256:906d548203468492d432bcb294d4bc2fff751bf84971fbb2c10918cc206ee420"
+            ],
+            "markers": "python_version >= '3.8'",
+            "version": "==4.1.0"
+        }
+    }
 }
--- a/backend/llm/knowledge_brain_qa.py
+++ b/backend/llm/knowledge_brain_qa.py
@ -11,6 +11,7 @@ from llm.rags.rag_interface import RAGInterface
 from llm.utils.format_chat_history import format_chat_history
 from llm.utils.get_prompt_to_use import get_prompt_to_use
 from llm.utils.get_prompt_to_use_id import get_prompt_to_use_id
+from repository.files.generate_file_signed_url import generate_file_signed_url
 from logger import get_logger
 from models import BrainSettings
 from modules.brain.service.brain_service import BrainService
@ -61,6 +62,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
    chat_id: str = None  # pyright: ignore reportPrivateUsage=none
    brain_id: str  # pyright: ignore reportPrivateUsage=none
    max_tokens: int = 2000
+    max_input: int = 2000
    streaming: bool = False
    knowledge_qa: Optional[RAGInterface]
    metadata: Optional[dict] = None
@ -76,6 +78,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
        model: str,
        brain_id: str,
        chat_id: str,
+        max_tokens: int,
        streaming: bool = False,
        prompt_id: Optional[UUID] = None,
        metadata: Optional[dict] = None,
@ -97,6 +100,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
            **kwargs,
        )
        self.metadata = metadata
+        self.max_tokens = max_tokens

    @property
    def prompt_to_use(self):
@ -309,9 +313,14 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
                                if "url" in doc.metadata
                                else doc.metadata["file_name"],
                                "type": "url" if "url" in doc.metadata else "file",
-                                "source_url": doc.metadata["url"]
-                                if "url" in doc.metadata
+                                "source_url": generate_file_signed_url(
+                                    f"{brain.brain_id}/{doc.metadata['file_name']}"
+                                ).get("signedURL", "")
+                                if "url" not in doc.metadata
                                else "",
+                                "original_file_name": doc.metadata[
+                                    "original_file_name"
+                                ],
                            }
                        )
                    )
--- a/backend/llm/rags/quivr_rag.py
+++ b/backend/llm/rags/quivr_rag.py
@ -60,7 +60,8 @@ class QuivrRAG(BaseModel, RAGInterface):
    temperature: float = 0.1
    chat_id: str = None  # pyright: ignore reportPrivateUsage=none
    brain_id: str = None  # pyright: ignore reportPrivateUsage=none
-    max_tokens: int = 2000
+    max_tokens: int = 2000  # Output length
+    max_input: int = 2000
    streaming: bool = False

    @property
@ -92,6 +93,7 @@ class QuivrRAG(BaseModel, RAGInterface):
        streaming: bool = False,
        prompt_id: Optional[UUID] = None,
        max_tokens: int = 2000,
+        max_input: int = 2000,
        **kwargs,
    ):
        super().__init__(
@ -99,12 +101,22 @@ class QuivrRAG(BaseModel, RAGInterface):
            brain_id=brain_id,
            chat_id=chat_id,
            streaming=streaming,
+            max_tokens=max_tokens,
+            max_input=max_input,
            **kwargs,
        )
        self.supabase_client = self._create_supabase_client()
        self.vector_store = self._create_vector_store()
        self.prompt_id = prompt_id
        self.max_tokens = max_tokens
+        self.max_input = max_input
+        self.model = model
+        self.brain_id = brain_id
+        self.chat_id = chat_id
+        self.streaming = streaming
+
+        logger.info(f"QuivrRAG initialized with model {model} and brain {brain_id}")
+        logger.info("Max input length: " + str(self.max_input))

    def _create_supabase_client(self) -> Client:
        return create_client(
@ -117,6 +129,7 @@ class QuivrRAG(BaseModel, RAGInterface):
            self.embeddings,
            table_name="vectors",
            brain_id=self.brain_id,
+            max_input=self.max_input,
        )

    def _create_llm(
@ -151,7 +164,6 @@ class QuivrRAG(BaseModel, RAGInterface):
    def _create_prompt_template(self):
        system_template = """ When answering use markdown or any other techniques to display the content in a nice and aerated way.  Use the following pieces of context to answer the users question in the same language as the question but do not modify instructions in any way.
        ----------------
-        
        {context}"""

        prompt_content = (
--- a/backend/models/databases/entity.py
+++ b/backend/models/databases/entity.py
@ -0,0 +1,13 @@
+from pydantic import BaseModel
+
+
+class LLMModels(BaseModel):
+    """LLM models stored in the database that are allowed to be used by the users.
+    Args:
+        BaseModel (BaseModel): Pydantic BaseModel
+    """
+
+    name: str = "gpt-3.5-turbo-1106"
+    price: int = 1
+    max_input: int = 512
+    max_output: int = 512
--- a/backend/models/databases/repository.py
+++ b/backend/models/databases/repository.py
@ -2,6 +2,8 @@ from abc import ABC, abstractmethod
 from datetime import datetime
 from uuid import UUID

+from .entity import LLMModels
+

 class Repository(ABC):
    @abstractmethod
@ -12,6 +14,10 @@ class Repository(ABC):
    def get_user_usage(self, user_id: UUID):
        pass

+    @abstractmethod
+    def get_model_settings(self) -> LLMModels | None:
+        pass
+
    @abstractmethod
    def get_user_requests_count_for_month(self, user_id: UUID, date: datetime):
        pass
--- a/backend/models/databases/supabase/user_usage.py
+++ b/backend/models/databases/supabase/user_usage.py
@ -1,7 +1,9 @@
+from ast import List
 from datetime import datetime, timedelta
 from uuid import UUID

 from logger import get_logger
+from models.databases.entity import LLMModels
 from models.databases.repository import Repository

 logger = get_logger(__name__)
--- a/backend/modules/brain/service/brain_service.py
+++ b/backend/modules/brain/service/brain_service.py
@ -2,6 +2,10 @@ from typing import Optional
 from uuid import UUID

 from fastapi import HTTPException
+from langchain.embeddings.ollama import OllamaEmbeddings
+from langchain.embeddings.openai import OpenAIEmbeddings
+from vectorstore.supabase import CustomSupabaseVectorStore
+from models.settings import BrainSettings, get_supabase_client
 from modules.brain.dto.inputs import BrainUpdatableProperties, CreateBrainProperties
 from modules.brain.entity.brain_entity import BrainEntity, BrainType, PublicBrain
 from modules.brain.repository import (
@ -22,6 +26,10 @@ from modules.brain.service.api_brain_definition_service import ApiBrainDefinitio
 from modules.brain.service.utils.validate_brain import validate_api_brain
 from modules.knowledge.service.knowledge_service import KnowledgeService

+from logger import get_logger
+
+logger = get_logger(__name__)
+
 knowledge_service = KnowledgeService()
 # TODO: directly user api_brain_definition repository
 api_brain_definition_service = ApiBrainDefinitionService()
@ -44,6 +52,70 @@ class BrainService:
    def get_brain_by_id(self, brain_id: UUID):
        return self.brain_repository.get_brain_by_id(brain_id)

+    def find_brain_from_question(
+        self, brain_id: UUID, question: str, user, chat_id: UUID, history
+    ) -> (Optional[BrainEntity], dict[str, str]):
+        """Find the brain to use for a question.
+
+        Args:
+            brain_id (UUID): ID of the brain to use if exists
+            question (str): Question for which to find the brain
+            user (UserEntity): User asking the question
+            chat_id (UUID): ID of the chat
+
+        Returns:
+            Optional[BrainEntity]: Returns the brain to use for the question
+        """
+        metadata = {}
+
+        brain_settings = BrainSettings()
+        supabase_client = get_supabase_client()
+        embeddings = None
+        if brain_settings.ollama_api_base_url:
+            embeddings = OllamaEmbeddings(
+                base_url=brain_settings.ollama_api_base_url
+            )  # pyright: ignore reportPrivateUsage=none
+        else:
+            embeddings = OpenAIEmbeddings()
+        vector_store = CustomSupabaseVectorStore(
+            supabase_client, embeddings, table_name="vectors", user_id=user.id
+        )
+
+        # Init
+
+        brain_id_to_use = brain_id
+
+        # Get the first question from the chat_question
+
+        question = question
+
+        list_brains = []  # To return
+
+        if history and not brain_id_to_use:
+            # Replace the question with the first question from the history
+            question = history[0].user_message
+
+        if history and not brain_id:
+            brain_id_to_use = history[0].brain_id
+
+        # Calculate the closest brains to the question
+        list_brains = vector_store.find_brain_closest_query(user.id, question)
+
+        unique_list_brains = []
+        seen_brain_ids = set()
+
+        for brain in list_brains:
+            if brain["id"] not in seen_brain_ids:
+                unique_list_brains.append(brain)
+                seen_brain_ids.add(brain["id"])
+
+        metadata["close_brains"] = unique_list_brains[:5]
+
+        if list_brains and not brain_id_to_use:
+            brain_id_to_use = list_brains[0]["id"]
+
+        return brain_id_to_use, metadata
+
    def create_brain(
        self,
        user_id: UUID,
--- a/backend/modules/chat/controller/chat/brainful_chat.py
+++ b/backend/modules/chat/controller/chat/brainful_chat.py
@ -1,10 +1,6 @@
-from langchain.embeddings.ollama import OllamaEmbeddings
-from langchain.embeddings.openai import OpenAIEmbeddings
 from llm.api_brain_qa import APIBrainQA
-from llm.composite_brain_qa import CompositeBrainQA
 from llm.knowledge_brain_qa import KnowledgeBrainQA
 from logger import get_logger
-from models.settings import BrainSettings, get_supabase_client
 from modules.brain.entity.brain_entity import BrainType, RoleEnum
 from modules.brain.service.api_brain_definition_service import ApiBrainDefinitionService
 from modules.brain.service.brain_authorization_service import (
@ -13,7 +9,6 @@ from modules.brain.service.brain_authorization_service import (
 from modules.brain.service.brain_service import BrainService
 from modules.chat.controller.chat.interface import ChatInterface
 from modules.chat.service.chat_service import ChatService
-from vectorstore.supabase import CustomSupabaseVectorStore

 chat_service = ChatService()
 api_brain_definition_service = ApiBrainDefinitionService()
@ -43,64 +38,17 @@ class BrainfulChat(ChatInterface):

    def get_answer_generator(
        self,
-        brain_id,
+        brain,
        chat_id,
        model,
        max_tokens,
+        max_input,
        temperature,
        streaming,
        prompt_id,
        user_id,
-        chat_question,
+        metadata,
    ):
-        metadata = {}
-        brain_settings = BrainSettings()
-        supabase_client = get_supabase_client()
-        embeddings = None
-        if brain_settings.ollama_api_base_url:
-            embeddings = OllamaEmbeddings(
-                base_url=brain_settings.ollama_api_base_url
-            )  # pyright: ignore reportPrivateUsage=none
-        else:
-            embeddings = OpenAIEmbeddings()
-        vector_store = CustomSupabaseVectorStore(
-            supabase_client, embeddings, table_name="vectors", user_id=user_id
-        )
-
-        # Init
-
-        brain_id_to_use = brain_id
-
-        # Get the first question from the chat_question
-
-        question = chat_question.question
-        history = chat_service.get_chat_history(chat_id)
-
-        list_brains = []  # To return
-
-        if history and not brain_id_to_use:
-            # Replace the question with the first question from the history
-            question = history[0].user_message
-
-        if history and not brain_id:
-            brain_id_to_use = history[0].brain_id
-
-        # Calculate the closest brains to the question
-        list_brains = vector_store.find_brain_closest_query(user_id, question)
-
-        metadata["close_brains"] = list_brains[:5]
-
-        if list_brains and not brain_id_to_use:
-            brain_id_to_use = list_brains[0]["id"]
-
-        # GENERIC
-        follow_up_questions = chat_service.get_follow_up_question(chat_id)
-        metadata["follow_up_questions"] = follow_up_questions
-        metadata["model"] = model
-        metadata["max_tokens"] = max_tokens
-        metadata["temperature"] = temperature
-
-        brain = brain_service.get_brain_by_id(brain_id_to_use)
        if (
            brain
            and brain.brain_type == BrainType.DOC
@ -110,33 +58,23 @@ class BrainfulChat(ChatInterface):
                chat_id=chat_id,
                model=model,
                max_tokens=max_tokens,
+                max_input=max_input,
                temperature=temperature,
                brain_id=str(brain.brain_id),
                streaming=streaming,
                prompt_id=prompt_id,
                metadata=metadata,
            )
-        if brain.brain_type == BrainType.COMPOSITE:
-            return CompositeBrainQA(
-                chat_id=chat_id,
-                model=model,
-                max_tokens=max_tokens,
-                temperature=temperature,
-                brain_id=str(brain.brain_id),
-                streaming=streaming,
-                prompt_id=prompt_id,
-                user_id=user_id,
-                metadata=metadata,
-            )

        if brain.brain_type == BrainType.API:
            brain_definition = api_brain_definition_service.get_api_brain_definition(
-                brain_id_to_use
+                brain.brain_id
            )
            return APIBrainQA(
                chat_id=chat_id,
                model=model,
                max_tokens=max_tokens,
+                max_input=max_input,
                temperature=temperature,
                brain_id=str(brain.brain_id),
                streaming=streaming,
@ -144,5 +82,7 @@ class BrainfulChat(ChatInterface):
                user_id=user_id,
                metadata=metadata,
                raw=(brain_definition.raw if brain_definition else None),
-                jq_instructions=(brain_definition.jq_instructions if brain_definition else None),
+                jq_instructions=(
+                    brain_definition.jq_instructions if brain_definition else None
+                ),
            )
--- a/backend/modules/chat/controller/chat/brainless_chat.py
+++ b/backend/modules/chat/controller/chat/brainless_chat.py
@ -8,7 +8,6 @@ class BrainlessChat(ChatInterface):

    def get_answer_generator(
        self,
-        brain_id,
        chat_id,
        model,
        max_tokens,
--- a/backend/modules/chat/controller/chat/interface.py
+++ b/backend/modules/chat/controller/chat/interface.py
@ -9,7 +9,6 @@ class ChatInterface(ABC):
    @abstractmethod
    def get_answer_generator(
        self,
-        brain_id,
        chat_id,
        model,
        max_tokens,
--- a/backend/modules/chat/controller/chat/utils.py
+++ b/backend/modules/chat/controller/chat/utils.py
@ -25,6 +25,17 @@ class NullableUUID(UUID):


 def check_user_requests_limit(user: UserIdentity, model: str):
+    """Checks the user requests limit.
+    It checks the user requests limit and raises an exception if the user has reached the limit.
+    By default, the user has a limit of 100 requests per month. The limit can be increased by upgrading the plan.
+
+    Args:
+        user (UserIdentity): User object
+        model (str): Model name for which the user is making the request
+
+    Raises:
+        HTTPException: Raises a 429 error if the user has reached the limit.
+    """
    userDailyUsage = UserUsage(id=user.id, email=user.email)

    userSettings = userDailyUsage.get_user_settings()
--- a/backend/modules/chat/controller/chat_routes.py
+++ b/backend/modules/chat/controller/chat_routes.py
@ -1,10 +1,10 @@
 from typing import List, Optional
 from uuid import UUID
-from venv import logger

 from fastapi import APIRouter, Depends, HTTPException, Query, Request
 from fastapi.responses import StreamingResponse
 from middlewares.auth import AuthBearer, get_current_user
+from models.databases.entity import LLMModels
 from models.user_usage import UserUsage
 from modules.brain.service.brain_service import BrainService
 from modules.chat.controller.chat.brainful_chat import BrainfulChat
@ -21,6 +21,10 @@ from modules.chat.service.chat_service import ChatService
 from modules.notification.service.notification_service import NotificationService
 from modules.user.entity.user_identity import UserIdentity

+from logger import get_logger
+
+logger = get_logger(__name__)
+
 chat_router = APIRouter()

 notification_service = NotificationService()
@ -163,11 +167,12 @@ async def create_question_handler(
            model=chat_question.model if is_model_ok else "gpt-3.5-turbo-1106",  # type: ignore
            max_tokens=chat_question.max_tokens,
            temperature=chat_question.temperature,
-            brain_id=str(brain_id),
            streaming=False,
            prompt_id=chat_question.prompt_id,
            user_id=current_user.id,
-            chat_question=chat_question,
+            max_input=2000,
+            brain=brain_service.get_brain_by_id(brain_id),
+            metadata={},
        )

        chat_answer = gpt_answer_generator.generate_answer(
@ -201,49 +206,81 @@ async def create_stream_question_handler(
    chat_instance = BrainfulChat()
    chat_instance.validate_authorization(user_id=current_user.id, brain_id=brain_id)

-    user_daily_usage = UserUsage(
+    user_usage = UserUsage(
        id=current_user.id,
        email=current_user.email,
    )

-    user_settings = user_daily_usage.get_user_settings()
+    # Get History
+    history = chat_service.get_chat_history(chat_id)

-    # Retrieve chat model (temperature, max_tokens, model)
-    if (
-        not chat_question.model
-        or chat_question.temperature is None
-        or not chat_question.max_tokens
-    ):
-        fallback_model = "gpt-3.5-turbo-1106"
-        fallback_temperature = 0
-        fallback_max_tokens = 256
-        if brain_id:
-            brain = brain_service.get_brain_by_id(brain_id)
-            if brain:
-                fallback_model = brain.model or fallback_model
-                fallback_temperature = brain.temperature or fallback_temperature
-                fallback_max_tokens = brain.max_tokens or fallback_max_tokens
+    # Get user settings
+    user_settings = user_usage.get_user_settings()

-        chat_question.model = chat_question.model or fallback_model
-        chat_question.temperature = chat_question.temperature or fallback_temperature
-        chat_question.max_tokens = chat_question.max_tokens or fallback_max_tokens
+    # Get Model settings for the user
+    models_settings = user_usage.get_model_settings()

+    # Generic
+    brain_id_to_use, metadata_brain = brain_service.find_brain_from_question(
+        brain_id, chat_question.question, current_user, chat_id, history
+    )
+
+    # Add metadata_brain to metadata
+    metadata = {}
+    metadata = {**metadata, **metadata_brain}
+    follow_up_questions = chat_service.get_follow_up_question(chat_id)
+    metadata["follow_up_questions"] = follow_up_questions
+
+    # Get the Brain settings
+    brain = brain_service.get_brain_by_id(brain_id_to_use)
+
+    logger.info(f"Brain model: {brain.model}")
+    logger.info(f"Brain is : {str(brain)}")
    try:
-        logger.info(f"Streaming request for {chat_question.model}")
-        check_user_requests_limit(current_user, chat_question.model)
-        # TODO check if model is in the list of models available for the user
+        # Default model is gpt-3.5-turbo-1106
+        model_to_use = LLMModels(
+            name="gpt-3.5-turbo-1106", price=1, max_input=512, max_output=512
+        )

-        is_model_ok = chat_question.model in user_settings.get("models", ["gpt-3.5-turbo-1106"])  # type: ignore
+        is_brain_model_available = any(
+            brain.model == model_dict.get("name") for model_dict in models_settings
+        )
+
+        is_user_allowed_model = brain.model in user_settings.get(
+            "models", ["gpt-3.5-turbo-1106"]
+        )  # Checks if the model is available in the list of models
+
+        logger.info(f"Brain model: {brain.model}")
+        logger.info(f"User models: {user_settings.get('models', [])}")
+        logger.info(f"Model available: {is_brain_model_available}")
+        logger.info(f"User allowed model: {is_user_allowed_model}")
+
+        if is_brain_model_available and is_user_allowed_model:
+            # Use the model from the brain
+            model_to_use.name = brain.model
+            for model_dict in models_settings:
+                if model_dict.get("name") == model_to_use.name:
+                    logger.info(f"Using model {model_to_use.name}")
+                    model_to_use.max_input = model_dict.get("max_input")
+                    model_to_use.max_output = model_dict.get("max_output")
+                    break
+
+        metadata["model"] = model_to_use.name
+        metadata["max_tokens"] = model_to_use.max_output
+        metadata["max_input"] = model_to_use.max_input
+
+        check_user_requests_limit(current_user, chat_question.model)
        gpt_answer_generator = chat_instance.get_answer_generator(
            chat_id=str(chat_id),
-            model=chat_question.model if is_model_ok else "gpt-3.5-turbo-1106",  # type: ignore
-            max_tokens=chat_question.max_tokens,
-            temperature=chat_question.temperature,  # type: ignore
+            model=model_to_use.name,
+            max_tokens=model_to_use.max_output,
+            max_input=model_to_use.max_input,
+            temperature=0.1,
            streaming=True,
            prompt_id=chat_question.prompt_id,
-            brain_id=brain_id,
            user_id=current_user.id,
-            chat_question=chat_question,
+            metadata=metadata,
+            brain=brain,
        )

        return StreamingResponse(
--- a/backend/modules/chat/dto/chats.py
+++ b/backend/modules/chat/dto/chats.py
@ -32,6 +32,7 @@ class Sources(BaseModel):
    name: str
    source_url: str
    type: str
+    original_file_name: str

    class Config:
        json_encoders = {
--- a/backend/packages/files/parsers/audio.py
+++ b/backend/packages/files/parsers/audio.py
@ -9,10 +9,7 @@ from models import File, get_documents_vector_store
 from packages.files.file import compute_sha1_from_content


-async def process_audio(
-    file: File,
-    user,
-):
+async def process_audio(file: File, user, original_file_name):
    temp_filename = None
    file_sha = ""
    dateshort = time.strftime("%Y%m%d-%H%M%S")
--- a/backend/packages/files/parsers/code_python.py
+++ b/backend/packages/files/parsers/code_python.py
@ -4,9 +4,10 @@ from models import File
 from .common import process_file


-async def process_python(file: File, brain_id):
+async def process_python(file: File, brain_id, original_file_name):
    return await process_file(
        file=file,
        loader_class=PythonLoader,
        brain_id=brain_id,
+        original_file_name=original_file_name,
    )
--- a/backend/packages/files/parsers/common.py
+++ b/backend/packages/files/parsers/common.py
@ -2,7 +2,6 @@ import time

 from logger import get_logger
 from models import File
-from models.settings import get_supabase_db
 from modules.brain.service.brain_vector_service import BrainVectorService
 from packages.embeddings.vectors import Neurons
 from repository.files.upload_file import DocumentSerializable
@ -10,12 +9,7 @@ from repository.files.upload_file import DocumentSerializable
 logger = get_logger(__name__)


-async def process_file(
-    file: File,
-    loader_class,
-    brain_id,
-):
-    database = get_supabase_db()
+async def process_file(file: File, loader_class, brain_id, original_file_name):
    dateshort = time.strftime("%Y%m%d")
    neurons = Neurons()

@ -28,6 +22,7 @@ async def process_file(
        "chunk_size": file.chunk_size,
        "chunk_overlap": file.chunk_overlap,
        "date": dateshort,
+        "original_file_name": original_file_name or file.file_name,
    }
    docs = []

--- a/backend/packages/files/parsers/csv.py
+++ b/backend/packages/files/parsers/csv.py
@ -4,12 +4,10 @@ from models import File
 from .common import process_file


-def process_csv(
-    file: File,
-    brain_id,
-):
+def process_csv(file: File, brain_id, original_file_name):
    return process_file(
        file=file,
        loader_class=CSVLoader,
        brain_id=brain_id,
+        original_file_name=original_file_name,
    )
--- a/backend/packages/files/parsers/docx.py
+++ b/backend/packages/files/parsers/docx.py
@ -4,9 +4,10 @@ from models import File
 from .common import process_file


-def process_docx(file: File, brain_id):
+def process_docx(file: File, brain_id, original_file_name):
    return process_file(
        file=file,
        loader_class=Docx2txtLoader,
        brain_id=brain_id,
+        original_file_name=original_file_name,
    )
--- a/backend/packages/files/parsers/epub.py
+++ b/backend/packages/files/parsers/epub.py
@ -4,9 +4,10 @@ from models import File
 from .common import process_file


-def process_epub(file: File, brain_id):
+def process_epub(file: File, brain_id, original_file_name):
    return process_file(
        file=file,
        loader_class=UnstructuredEPubLoader,
        brain_id=brain_id,
+        original_file_name=original_file_name,
    )
--- a/backend/packages/files/parsers/github.py
+++ b/backend/packages/files/parsers/github.py
@ -52,6 +52,7 @@ async def process_github(
            "chunk_size": chunk_size,
            "chunk_overlap": chunk_overlap,
            "date": dateshort,
+            "original_file_name": doc.metadata["original_file_name"],
        }
        doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata)

--- a/backend/packages/files/parsers/html.py
+++ b/backend/packages/files/parsers/html.py
@ -4,9 +4,10 @@ from models import File
 from .common import process_file


-def process_html(file: File, brain_id):
+def process_html(file: File, brain_id, original_file_name):
    return process_file(
        file=file,
        loader_class=UnstructuredHTMLLoader,
        brain_id=brain_id,
+        original_file_name=original_file_name,
    )
--- a/backend/packages/files/parsers/markdown.py
+++ b/backend/packages/files/parsers/markdown.py
@ -4,9 +4,10 @@ from models import File
 from .common import process_file


-def process_markdown(file: File, brain_id):
+def process_markdown(file: File, brain_id, original_file_name):
    return process_file(
        file=file,
        loader_class=UnstructuredMarkdownLoader,
        brain_id=brain_id,
+        original_file_name=original_file_name,
    )
--- a/backend/packages/files/parsers/notebook.py
+++ b/backend/packages/files/parsers/notebook.py
@ -4,9 +4,10 @@ from models import File
 from .common import process_file


-def process_ipnyb(file: File, brain_id):
+def process_ipnyb(file: File, brain_id, original_file_name):
    return process_file(
        file=file,
        loader_class=NotebookLoader,
        brain_id=brain_id,
+        original_file_name=original_file_name,
    )
--- a/backend/packages/files/parsers/odt.py
+++ b/backend/packages/files/parsers/odt.py
@ -4,9 +4,10 @@ from models import File
 from .common import process_file


-def process_odt(file: File, brain_id):
+def process_odt(file: File, brain_id, original_file_name):
    return process_file(
        file=file,
        loader_class=UnstructuredPDFLoader,
        brain_id=brain_id,
+        original_file_name=original_file_name,
    )
--- a/backend/packages/files/parsers/pdf.py
+++ b/backend/packages/files/parsers/pdf.py
@ -4,9 +4,10 @@ from models import File
 from .common import process_file


-def process_pdf(file: File, brain_id):
+def process_pdf(file: File, brain_id, original_file_name):
    return process_file(
        file=file,
        loader_class=UnstructuredPDFLoader,
        brain_id=brain_id,
+        original_file_name=original_file_name,
    )
--- a/backend/packages/files/parsers/powerpoint.py
+++ b/backend/packages/files/parsers/powerpoint.py
@ -4,9 +4,10 @@ from models import File
 from .common import process_file


-def process_powerpoint(file: File, brain_id):
+def process_powerpoint(file: File, brain_id, original_file_name):
    return process_file(
        file=file,
        loader_class=UnstructuredPowerPointLoader,
        brain_id=brain_id,
+        original_file_name=original_file_name,
    )
--- a/backend/packages/files/parsers/telegram.py
+++ b/backend/packages/files/parsers/telegram.py
@ -4,12 +4,10 @@ from packages.files.loaders.telegram import TelegramChatFileLoader
 from .common import process_file


-def process_telegram(
-    file: File,
-    brain_id,
-):
+def process_telegram(file: File, brain_id, original_file_name):
    return process_file(
        file=file,
        loader_class=TelegramChatFileLoader,
        brain_id=brain_id,
+        original_file_name=original_file_name,
    )
--- a/backend/packages/files/parsers/txt.py
+++ b/backend/packages/files/parsers/txt.py
@ -7,9 +7,11 @@ from .common import process_file
 async def process_txt(
    file: File,
    brain_id,
+    original_file_name,
 ):
    return await process_file(
        file=file,
        loader_class=TextLoader,
        brain_id=brain_id,
+        original_file_name=original_file_name,
    )
--- a/backend/packages/files/parsers/xlsx.py
+++ b/backend/packages/files/parsers/xlsx.py
@ -4,12 +4,10 @@ from models.files import File
 from .common import process_file


-def process_xlsx(
-    file: File,
-    brain_id,
-):
+def process_xlsx(file: File, brain_id, original_file_name):
    return process_file(
        file=file,
        loader_class=UnstructuredExcelLoader,
        brain_id=brain_id,
+        original_file_name=original_file_name,
    )
--- a/backend/packages/files/processors.py
+++ b/backend/packages/files/processors.py
@ -86,6 +86,7 @@ async def filter_file(
            result = await file_processors[file.file_extension](
                file=file,
                brain_id=brain_id,
+                original_file_name=original_file_name,
            )
            if result is None or result == 0:
                return create_response(
--- a/backend/repository/brain/get_question_context_from_brain.py
+++ b/backend/repository/brain/get_question_context_from_brain.py
@ -20,6 +20,15 @@ class DocumentAnswer:


 def get_question_context_from_brain(brain_id: UUID, question: str) -> str:
+    """Finds the best brain to answer the question based on the question's meaning.
+
+    Args:
+        brain_id (UUID): Id of the brain to search in
+        question (str): Question to search for in the vector store
+
+    Returns:
+        str: _descripton_
+    """
    # TODO: Move to AnswerGenerator service
    supabase_client = get_supabase_client()
    embeddings = get_embeddings()
--- a/backend/vectorstore/supabase.py
+++ b/backend/vectorstore/supabase.py
@ -15,6 +15,7 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
    brain_id: str = "none"
    user_id: str = "none"
    number_docs: int = 35
+    max_input: int = 2000

    def __init__(
        self,
@ -24,11 +25,13 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
        brain_id: str = "none",
        user_id: str = "none",
        number_docs: int = 35,
+        max_input: int = 2000,
    ):
        super().__init__(client, embedding, table_name)
        self.brain_id = brain_id
        self.user_id = user_id
        self.number_docs = number_docs
+        self.max_input = max_input

    def find_brain_closest_query(
        self,
@ -65,7 +68,7 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
    def similarity_search(
        self,
        query: str,
-        k: int = 35,
+        k: int = 40,
        table: str = "match_vectors",
        threshold: float = 0.5,
        **kwargs: Any,
@ -98,5 +101,15 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
        ]

        documents = [doc for doc, _ in match_result]
+        max_tokens_input = self.max_input
+        documents_to_return = []

-        return documents
+        # Limits to max_tokens_input with metadata chunk_size
+        for doc in documents:
+            if doc.metadata["chunk_size"] <= max_tokens_input:
+                documents_to_return.append(doc)
+                max_tokens_input -= doc.metadata["chunk_size"]
+            else:
+                break
+
+        return documents_to_return