From cc39f9e3ba9e5fffd6541d5f85d71dac14ec905f Mon Sep 17 00:00:00 2001 From: Stan Girard Date: Sat, 27 Jan 2024 01:50:58 -0800 Subject: [PATCH] feat(14k): done (#2102) # Description Please include a summary of the changes and the related issue. Please also include relevant motivation and context. ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate): --- .github/workflows/stale.yml | 14 +-- .github/workflows/vercel-docs.yml | 23 ---- .github/workflows/vitest.yml | 32 ------ .vscode/settings.json | 2 +- Pipfile | 5 +- Pipfile.lock | 81 ++++++++++++-- backend/llm/knowledge_brain_qa.py | 13 ++- backend/llm/rags/quivr_rag.py | 16 ++- backend/models/databases/entity.py | 13 +++ backend/models/databases/repository.py | 6 ++ .../models/databases/supabase/user_usage.py | 2 + .../modules/brain/service/brain_service.py | 72 +++++++++++++ .../chat/controller/chat/brainful_chat.py | 78 ++------------ .../chat/controller/chat/brainless_chat.py | 1 - .../modules/chat/controller/chat/interface.py | 1 - backend/modules/chat/controller/chat/utils.py | 11 ++ .../modules/chat/controller/chat_routes.py | 101 ++++++++++++------ backend/modules/chat/dto/chats.py | 1 + backend/packages/files/parsers/audio.py | 5 +- backend/packages/files/parsers/code_python.py | 3 +- backend/packages/files/parsers/common.py | 9 +- backend/packages/files/parsers/csv.py | 6 +- backend/packages/files/parsers/docx.py | 3 +- backend/packages/files/parsers/epub.py | 3 +- backend/packages/files/parsers/github.py | 1 + backend/packages/files/parsers/html.py | 3 +- backend/packages/files/parsers/markdown.py | 3 +- backend/packages/files/parsers/notebook.py | 3 +- backend/packages/files/parsers/odt.py | 3 +- backend/packages/files/parsers/pdf.py | 3 +- backend/packages/files/parsers/powerpoint.py | 3 +- backend/packages/files/parsers/telegram.py | 6 +- backend/packages/files/parsers/txt.py | 2 + backend/packages/files/parsers/xlsx.py | 6 +- backend/packages/files/processors.py | 1 + .../brain/get_question_context_from_brain.py | 9 ++ backend/vectorstore/supabase.py | 17 ++- 37 files changed, 349 insertions(+), 212 deletions(-) delete mode 100644 .github/workflows/vercel-docs.yml delete mode 100644 .github/workflows/vitest.yml create mode 100644 backend/models/databases/entity.py diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index e1be87da3..d6ca3dbd9 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -1,12 +1,12 @@ -name: 'Close stale issues and PRs' +name: "Close stale issues and PRs" on: schedule: - - cron: '0 */4 * * *' + - cron: "0 */4 * * *" permissions: - contents: write # only for delete-branch option - issues: write - pull-requests: write + contents: write # only for delete-branch option + issues: write + pull-requests: write jobs: stale: @@ -16,9 +16,9 @@ jobs: with: exempt-assignees: true exempt-draft-pr: true - days-before-stale: 30 + days-before-stale: 90 days-before-close: 5 operations-per-run: 400 exempt-milestones: true stale-issue-message: "Thanks for your contributions, we'll be closing this issue as it has gone stale. Feel free to reopen if you'd like to continue the discussion." - stale-pr-message: "Thanks for your contributions, we'll be closing this PR as it has gone stale. Feel free to reopen if you'd like to continue the discussion." \ No newline at end of file + stale-pr-message: "Thanks for your contributions, we'll be closing this PR as it has gone stale. Feel free to reopen if you'd like to continue the discussion." diff --git a/.github/workflows/vercel-docs.yml b/.github/workflows/vercel-docs.yml deleted file mode 100644 index b1692efe4..000000000 --- a/.github/workflows/vercel-docs.yml +++ /dev/null @@ -1,23 +0,0 @@ -# name: Deploy Docs Deployment -# env: -# VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }} -# VERCEL_PROJECT_ID: ${{ secrets.VERCEL_DOC_PROJECT_ID }} -# on: -# push: -# branches: [ "main" ] -# paths: -# - 'docs/**' -# jobs: -# Deploy-Preview: -# environment: production -# runs-on: ubuntu-latest -# steps: -# - uses: actions/checkout@v3 -# - name: Install Vercel CLI -# run: npm install --global vercel@latest -# - name: Pull Vercel Environment Information -# run: vercel pull --yes --environment=production --token=${{ secrets.VERCEL_TOKEN }} -# - name: Build Project Artifacts -# run: vercel build --prod --token=${{ secrets.VERCEL_TOKEN }} -# - name: Deploy Project Artifacts to Vercel -# run: vercel deploy --prebuilt --prod --token=${{ secrets.VERCEL_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/vitest.yml b/.github/workflows/vitest.yml deleted file mode 100644 index 61cefd78c..000000000 --- a/.github/workflows/vitest.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Vitest - -on: - push: - branches: [ main ] - paths: - - 'frontend/**' - pull_request: - branches: [ main ] - paths: - - 'frontend/**' - -jobs: - build: - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./frontend - strategy: - matrix: - node-version: [18] - - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - name: Use Node.js ${{ matrix.node-version }} - uses: actions/setup-node@b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8 # v4 - with: - node-version: ${{ matrix.node-version }} - cache: 'yarn' - cache-dependency-path: frontend/yarn.lock - - run: yarn - - run: yarn run test-unit diff --git a/.vscode/settings.json b/.vscode/settings.json index 615de73dc..ca65cceaa 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -12,7 +12,7 @@ ], "editor.formatOnSave": true, "[python]": { - "editor.defaultFormatter": "esbenp.prettier-vscode", + "editor.defaultFormatter": "ms-python.black-formatter", "editor.formatOnSave": true, "editor.codeActionsOnSave": { "source.organizeImports": "explicit", diff --git a/Pipfile b/Pipfile index a7d1461fc..c6bb58bf5 100644 --- a/Pipfile +++ b/Pipfile @@ -23,7 +23,7 @@ python-jose = "==3.3.0" asyncpg = "==0.27.0" flake8 = "==6.0.0" flake8-black = "==0.3.6" -sentry-sdk = {extras = ["fastapi"], version = "==1.37.1"} +sentry-sdk = {extras = ["fastapi"] } pyright = "==1.1.316" resend = "==0.5.1" html5lib = "==1.1" @@ -34,7 +34,7 @@ redis = "==4.5.4" flower = "*" boto3 = "==1.33.7" botocore = "==1.33.7" -celery = {extras = ["sqs"], version = "*"} +celery = {extras = ["sqs"] } python-dotenv = "*" pytest-mock = "*" pytest-celery = "*" @@ -45,6 +45,7 @@ jq = "==1.6.0" pytest = "*" [dev-packages] +black = "*" [requires] python_version = "3.11" diff --git a/Pipfile.lock b/Pipfile.lock index d258bb783..4cb69de88 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "3fd64a4e95ad0de8091ce39b705c9f87f941eb483f95f1c5d501f9bf473781be" + "sha256": "9ae12dd1d097d77ce3cb08c7b6b5e5fa8a96216a98df213860d0ea30bb22dcc5" }, "pipfile-spec": 6, "requires": { @@ -2008,10 +2008,10 @@ "fastapi" ], "hashes": [ - "sha256:7cd324dd2877fdc861f75cba4242bce23a58272a6fea581fcb218bb718bd9cc5", - "sha256:a249c7364827ee89daaa078bb8b56ece0b3d52d9130961bef2302b79bdf7fe70" + "sha256:24c83b0b41c887d33328a9166f5950dc37ad58f01c9f2fbff6b87a6f1094170c", + "sha256:acaf597b30258fc7663063b291aa99e58f3096e91fe1e6634f4b79f9c1943e8e" ], - "version": "==1.37.1" + "version": "==1.39.2" }, "setuptools": { "hashes": [ @@ -2398,7 +2398,7 @@ "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84", "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e" ], - "markers": "python_version >= '3.6'", + "markers": "python_version >= '3.10'", "version": "==2.0.7" }, "uvicorn": { @@ -2709,5 +2709,74 @@ "version": "==3.17.0" } }, - "develop": {} + "develop": { + "black": { + "hashes": [ + "sha256:0cd59d01bf3306ff7e3076dd7f4435fcd2fafe5506a6111cae1138fc7de52382", + "sha256:1e0fa70b8464055069864a4733901b31cbdbe1273f63a24d2fa9d726723d45ac", + "sha256:30fbf768cd4f4576598b1db0202413fafea9a227ef808d1a12230c643cefe9fc", + "sha256:39addf23f7070dbc0b5518cdb2018468ac249d7412a669b50ccca18427dba1f3", + "sha256:5134a6f6b683aa0a5592e3fd61dd3519d8acd953d93e2b8b76f9981245b65594", + "sha256:6a8977774929b5db90442729f131221e58cc5d8208023c6af9110f26f75b6b20", + "sha256:6cc5a6ba3e671cfea95a40030b16a98ee7dc2e22b6427a6f3389567ecf1b5262", + "sha256:780f13d03066a7daf1707ec723fdb36bd698ffa29d95a2e7ef33a8dd8fe43b5c", + "sha256:7fa8d9aaa22d846f8c0f7f07391148e5e346562e9b215794f9101a8339d8b6d8", + "sha256:827a7c0da520dd2f8e6d7d3595f4591aa62ccccce95b16c0e94bb4066374c4c2", + "sha256:82d9452aeabd51d1c8f0d52d4d18e82b9f010ecb30fd55867b5ff95904f427ff", + "sha256:94d5280d020dadfafc75d7cae899609ed38653d3f5e82e7ce58f75e76387ed3d", + "sha256:9aede09f72b2a466e673ee9fca96e4bccc36f463cac28a35ce741f0fd13aea8b", + "sha256:a15670c650668399c4b5eae32e222728185961d6ef6b568f62c1681d57b381ba", + "sha256:a5a0100b4bdb3744dd68412c3789f472d822dc058bb3857743342f8d7f93a5a7", + "sha256:aaf9aa85aaaa466bf969e7dd259547f4481b712fe7ee14befeecc152c403ee05", + "sha256:be305563ff4a2dea813f699daaffac60b977935f3264f66922b1936a5e492ee4", + "sha256:bf8dd261ee82df1abfb591f97e174345ab7375a55019cc93ad38993b9ff5c6ad", + "sha256:d74d4d0da276fbe3b95aa1f404182562c28a04402e4ece60cf373d0b902f33a0", + "sha256:e0e367759062dcabcd9a426d12450c6d61faf1704a352a49055a04c9f9ce8f5a", + "sha256:ec489cae76eac3f7573629955573c3a0e913641cafb9e3bfc87d8ce155ebdb29", + "sha256:f0dfbfbacfbf9cd1fac7a5ddd3e72510ffa93e841a69fcf4a6358feab1685382" + ], + "markers": "python_version >= '3.8'", + "version": "==24.1.0" + }, + "click": { + "hashes": [ + "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", + "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de" + ], + "markers": "python_version >= '3.7'", + "version": "==8.1.7" + }, + "mypy-extensions": { + "hashes": [ + "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d", + "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782" + ], + "markers": "python_version >= '3.5'", + "version": "==1.0.0" + }, + "packaging": { + "hashes": [ + "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5", + "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7" + ], + "markers": "python_version >= '3.7'", + "version": "==23.2" + }, + "pathspec": { + "hashes": [ + "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", + "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712" + ], + "markers": "python_version >= '3.8'", + "version": "==0.12.1" + }, + "platformdirs": { + "hashes": [ + "sha256:11c8f37bcca40db96d8144522d925583bdb7a31f7b0e37e3ed4318400a8e2380", + "sha256:906d548203468492d432bcb294d4bc2fff751bf84971fbb2c10918cc206ee420" + ], + "markers": "python_version >= '3.8'", + "version": "==4.1.0" + } + } } diff --git a/backend/llm/knowledge_brain_qa.py b/backend/llm/knowledge_brain_qa.py index 27adc9948..5e4b06a99 100644 --- a/backend/llm/knowledge_brain_qa.py +++ b/backend/llm/knowledge_brain_qa.py @@ -11,6 +11,7 @@ from llm.rags.rag_interface import RAGInterface from llm.utils.format_chat_history import format_chat_history from llm.utils.get_prompt_to_use import get_prompt_to_use from llm.utils.get_prompt_to_use_id import get_prompt_to_use_id +from repository.files.generate_file_signed_url import generate_file_signed_url from logger import get_logger from models import BrainSettings from modules.brain.service.brain_service import BrainService @@ -61,6 +62,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface): chat_id: str = None # pyright: ignore reportPrivateUsage=none brain_id: str # pyright: ignore reportPrivateUsage=none max_tokens: int = 2000 + max_input: int = 2000 streaming: bool = False knowledge_qa: Optional[RAGInterface] metadata: Optional[dict] = None @@ -76,6 +78,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface): model: str, brain_id: str, chat_id: str, + max_tokens: int, streaming: bool = False, prompt_id: Optional[UUID] = None, metadata: Optional[dict] = None, @@ -97,6 +100,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface): **kwargs, ) self.metadata = metadata + self.max_tokens = max_tokens @property def prompt_to_use(self): @@ -309,9 +313,14 @@ class KnowledgeBrainQA(BaseModel, QAInterface): if "url" in doc.metadata else doc.metadata["file_name"], "type": "url" if "url" in doc.metadata else "file", - "source_url": doc.metadata["url"] - if "url" in doc.metadata + "source_url": generate_file_signed_url( + f"{brain.brain_id}/{doc.metadata['file_name']}" + ).get("signedURL", "") + if "url" not in doc.metadata else "", + "original_file_name": doc.metadata[ + "original_file_name" + ], } ) ) diff --git a/backend/llm/rags/quivr_rag.py b/backend/llm/rags/quivr_rag.py index d0eb23918..9f0ddb0f0 100644 --- a/backend/llm/rags/quivr_rag.py +++ b/backend/llm/rags/quivr_rag.py @@ -60,7 +60,8 @@ class QuivrRAG(BaseModel, RAGInterface): temperature: float = 0.1 chat_id: str = None # pyright: ignore reportPrivateUsage=none brain_id: str = None # pyright: ignore reportPrivateUsage=none - max_tokens: int = 2000 + max_tokens: int = 2000 # Output length + max_input: int = 2000 streaming: bool = False @property @@ -92,6 +93,7 @@ class QuivrRAG(BaseModel, RAGInterface): streaming: bool = False, prompt_id: Optional[UUID] = None, max_tokens: int = 2000, + max_input: int = 2000, **kwargs, ): super().__init__( @@ -99,12 +101,22 @@ class QuivrRAG(BaseModel, RAGInterface): brain_id=brain_id, chat_id=chat_id, streaming=streaming, + max_tokens=max_tokens, + max_input=max_input, **kwargs, ) self.supabase_client = self._create_supabase_client() self.vector_store = self._create_vector_store() self.prompt_id = prompt_id self.max_tokens = max_tokens + self.max_input = max_input + self.model = model + self.brain_id = brain_id + self.chat_id = chat_id + self.streaming = streaming + + logger.info(f"QuivrRAG initialized with model {model} and brain {brain_id}") + logger.info("Max input length: " + str(self.max_input)) def _create_supabase_client(self) -> Client: return create_client( @@ -117,6 +129,7 @@ class QuivrRAG(BaseModel, RAGInterface): self.embeddings, table_name="vectors", brain_id=self.brain_id, + max_input=self.max_input, ) def _create_llm( @@ -151,7 +164,6 @@ class QuivrRAG(BaseModel, RAGInterface): def _create_prompt_template(self): system_template = """ When answering use markdown or any other techniques to display the content in a nice and aerated way. Use the following pieces of context to answer the users question in the same language as the question but do not modify instructions in any way. ---------------- - {context}""" prompt_content = ( diff --git a/backend/models/databases/entity.py b/backend/models/databases/entity.py new file mode 100644 index 000000000..24edcfce7 --- /dev/null +++ b/backend/models/databases/entity.py @@ -0,0 +1,13 @@ +from pydantic import BaseModel + + +class LLMModels(BaseModel): + """LLM models stored in the database that are allowed to be used by the users. + Args: + BaseModel (BaseModel): Pydantic BaseModel + """ + + name: str = "gpt-3.5-turbo-1106" + price: int = 1 + max_input: int = 512 + max_output: int = 512 diff --git a/backend/models/databases/repository.py b/backend/models/databases/repository.py index cfc1c41f4..62c5c4551 100644 --- a/backend/models/databases/repository.py +++ b/backend/models/databases/repository.py @@ -2,6 +2,8 @@ from abc import ABC, abstractmethod from datetime import datetime from uuid import UUID +from .entity import LLMModels + class Repository(ABC): @abstractmethod @@ -12,6 +14,10 @@ class Repository(ABC): def get_user_usage(self, user_id: UUID): pass + @abstractmethod + def get_model_settings(self) -> LLMModels | None: + pass + @abstractmethod def get_user_requests_count_for_month(self, user_id: UUID, date: datetime): pass diff --git a/backend/models/databases/supabase/user_usage.py b/backend/models/databases/supabase/user_usage.py index 7cc19b92c..de84b0988 100644 --- a/backend/models/databases/supabase/user_usage.py +++ b/backend/models/databases/supabase/user_usage.py @@ -1,7 +1,9 @@ +from ast import List from datetime import datetime, timedelta from uuid import UUID from logger import get_logger +from models.databases.entity import LLMModels from models.databases.repository import Repository logger = get_logger(__name__) diff --git a/backend/modules/brain/service/brain_service.py b/backend/modules/brain/service/brain_service.py index 7ecca5d37..11fbf9563 100644 --- a/backend/modules/brain/service/brain_service.py +++ b/backend/modules/brain/service/brain_service.py @@ -2,6 +2,10 @@ from typing import Optional from uuid import UUID from fastapi import HTTPException +from langchain.embeddings.ollama import OllamaEmbeddings +from langchain.embeddings.openai import OpenAIEmbeddings +from vectorstore.supabase import CustomSupabaseVectorStore +from models.settings import BrainSettings, get_supabase_client from modules.brain.dto.inputs import BrainUpdatableProperties, CreateBrainProperties from modules.brain.entity.brain_entity import BrainEntity, BrainType, PublicBrain from modules.brain.repository import ( @@ -22,6 +26,10 @@ from modules.brain.service.api_brain_definition_service import ApiBrainDefinitio from modules.brain.service.utils.validate_brain import validate_api_brain from modules.knowledge.service.knowledge_service import KnowledgeService +from logger import get_logger + +logger = get_logger(__name__) + knowledge_service = KnowledgeService() # TODO: directly user api_brain_definition repository api_brain_definition_service = ApiBrainDefinitionService() @@ -44,6 +52,70 @@ class BrainService: def get_brain_by_id(self, brain_id: UUID): return self.brain_repository.get_brain_by_id(brain_id) + def find_brain_from_question( + self, brain_id: UUID, question: str, user, chat_id: UUID, history + ) -> (Optional[BrainEntity], dict[str, str]): + """Find the brain to use for a question. + + Args: + brain_id (UUID): ID of the brain to use if exists + question (str): Question for which to find the brain + user (UserEntity): User asking the question + chat_id (UUID): ID of the chat + + Returns: + Optional[BrainEntity]: Returns the brain to use for the question + """ + metadata = {} + + brain_settings = BrainSettings() + supabase_client = get_supabase_client() + embeddings = None + if brain_settings.ollama_api_base_url: + embeddings = OllamaEmbeddings( + base_url=brain_settings.ollama_api_base_url + ) # pyright: ignore reportPrivateUsage=none + else: + embeddings = OpenAIEmbeddings() + vector_store = CustomSupabaseVectorStore( + supabase_client, embeddings, table_name="vectors", user_id=user.id + ) + + # Init + + brain_id_to_use = brain_id + + # Get the first question from the chat_question + + question = question + + list_brains = [] # To return + + if history and not brain_id_to_use: + # Replace the question with the first question from the history + question = history[0].user_message + + if history and not brain_id: + brain_id_to_use = history[0].brain_id + + # Calculate the closest brains to the question + list_brains = vector_store.find_brain_closest_query(user.id, question) + + unique_list_brains = [] + seen_brain_ids = set() + + for brain in list_brains: + if brain["id"] not in seen_brain_ids: + unique_list_brains.append(brain) + seen_brain_ids.add(brain["id"]) + + metadata["close_brains"] = unique_list_brains[:5] + + if list_brains and not brain_id_to_use: + brain_id_to_use = list_brains[0]["id"] + + return brain_id_to_use, metadata + def create_brain( self, user_id: UUID, diff --git a/backend/modules/chat/controller/chat/brainful_chat.py b/backend/modules/chat/controller/chat/brainful_chat.py index 78b24383f..55f12cd86 100644 --- a/backend/modules/chat/controller/chat/brainful_chat.py +++ b/backend/modules/chat/controller/chat/brainful_chat.py @@ -1,10 +1,6 @@ -from langchain.embeddings.ollama import OllamaEmbeddings -from langchain.embeddings.openai import OpenAIEmbeddings from llm.api_brain_qa import APIBrainQA -from llm.composite_brain_qa import CompositeBrainQA from llm.knowledge_brain_qa import KnowledgeBrainQA from logger import get_logger -from models.settings import BrainSettings, get_supabase_client from modules.brain.entity.brain_entity import BrainType, RoleEnum from modules.brain.service.api_brain_definition_service import ApiBrainDefinitionService from modules.brain.service.brain_authorization_service import ( @@ -13,7 +9,6 @@ from modules.brain.service.brain_authorization_service import ( from modules.brain.service.brain_service import BrainService from modules.chat.controller.chat.interface import ChatInterface from modules.chat.service.chat_service import ChatService -from vectorstore.supabase import CustomSupabaseVectorStore chat_service = ChatService() api_brain_definition_service = ApiBrainDefinitionService() @@ -43,64 +38,17 @@ class BrainfulChat(ChatInterface): def get_answer_generator( self, - brain_id, + brain, chat_id, model, max_tokens, + max_input, temperature, streaming, prompt_id, user_id, - chat_question, + metadata, ): - metadata = {} - brain_settings = BrainSettings() - supabase_client = get_supabase_client() - embeddings = None - if brain_settings.ollama_api_base_url: - embeddings = OllamaEmbeddings( - base_url=brain_settings.ollama_api_base_url - ) # pyright: ignore reportPrivateUsage=none - else: - embeddings = OpenAIEmbeddings() - vector_store = CustomSupabaseVectorStore( - supabase_client, embeddings, table_name="vectors", user_id=user_id - ) - - # Init - - brain_id_to_use = brain_id - - # Get the first question from the chat_question - - question = chat_question.question - history = chat_service.get_chat_history(chat_id) - - list_brains = [] # To return - - if history and not brain_id_to_use: - # Replace the question with the first question from the history - question = history[0].user_message - - if history and not brain_id: - brain_id_to_use = history[0].brain_id - - # Calculate the closest brains to the question - list_brains = vector_store.find_brain_closest_query(user_id, question) - - metadata["close_brains"] = list_brains[:5] - - if list_brains and not brain_id_to_use: - brain_id_to_use = list_brains[0]["id"] - - # GENERIC - follow_up_questions = chat_service.get_follow_up_question(chat_id) - metadata["follow_up_questions"] = follow_up_questions - metadata["model"] = model - metadata["max_tokens"] = max_tokens - metadata["temperature"] = temperature - - brain = brain_service.get_brain_by_id(brain_id_to_use) if ( brain and brain.brain_type == BrainType.DOC @@ -110,33 +58,23 @@ class BrainfulChat(ChatInterface): chat_id=chat_id, model=model, max_tokens=max_tokens, + max_input=max_input, temperature=temperature, brain_id=str(brain.brain_id), streaming=streaming, prompt_id=prompt_id, metadata=metadata, ) - if brain.brain_type == BrainType.COMPOSITE: - return CompositeBrainQA( - chat_id=chat_id, - model=model, - max_tokens=max_tokens, - temperature=temperature, - brain_id=str(brain.brain_id), - streaming=streaming, - prompt_id=prompt_id, - user_id=user_id, - metadata=metadata, - ) if brain.brain_type == BrainType.API: brain_definition = api_brain_definition_service.get_api_brain_definition( - brain_id_to_use + brain.brain_id ) return APIBrainQA( chat_id=chat_id, model=model, max_tokens=max_tokens, + max_input=max_input, temperature=temperature, brain_id=str(brain.brain_id), streaming=streaming, @@ -144,5 +82,7 @@ class BrainfulChat(ChatInterface): user_id=user_id, metadata=metadata, raw=(brain_definition.raw if brain_definition else None), - jq_instructions=(brain_definition.jq_instructions if brain_definition else None), + jq_instructions=( + brain_definition.jq_instructions if brain_definition else None + ), ) diff --git a/backend/modules/chat/controller/chat/brainless_chat.py b/backend/modules/chat/controller/chat/brainless_chat.py index 559431ac4..c5dcec220 100644 --- a/backend/modules/chat/controller/chat/brainless_chat.py +++ b/backend/modules/chat/controller/chat/brainless_chat.py @@ -8,7 +8,6 @@ class BrainlessChat(ChatInterface): def get_answer_generator( self, - brain_id, chat_id, model, max_tokens, diff --git a/backend/modules/chat/controller/chat/interface.py b/backend/modules/chat/controller/chat/interface.py index 39479e7b8..73745df7f 100644 --- a/backend/modules/chat/controller/chat/interface.py +++ b/backend/modules/chat/controller/chat/interface.py @@ -9,7 +9,6 @@ class ChatInterface(ABC): @abstractmethod def get_answer_generator( self, - brain_id, chat_id, model, max_tokens, diff --git a/backend/modules/chat/controller/chat/utils.py b/backend/modules/chat/controller/chat/utils.py index 1105e8c83..3ea0e17db 100644 --- a/backend/modules/chat/controller/chat/utils.py +++ b/backend/modules/chat/controller/chat/utils.py @@ -25,6 +25,17 @@ class NullableUUID(UUID): def check_user_requests_limit(user: UserIdentity, model: str): + """Checks the user requests limit. + It checks the user requests limit and raises an exception if the user has reached the limit. + By default, the user has a limit of 100 requests per month. The limit can be increased by upgrading the plan. + + Args: + user (UserIdentity): User object + model (str): Model name for which the user is making the request + + Raises: + HTTPException: Raises a 429 error if the user has reached the limit. + """ userDailyUsage = UserUsage(id=user.id, email=user.email) userSettings = userDailyUsage.get_user_settings() diff --git a/backend/modules/chat/controller/chat_routes.py b/backend/modules/chat/controller/chat_routes.py index 264d912fa..e2ac7bc68 100644 --- a/backend/modules/chat/controller/chat_routes.py +++ b/backend/modules/chat/controller/chat_routes.py @@ -1,10 +1,10 @@ from typing import List, Optional from uuid import UUID -from venv import logger from fastapi import APIRouter, Depends, HTTPException, Query, Request from fastapi.responses import StreamingResponse from middlewares.auth import AuthBearer, get_current_user +from models.databases.entity import LLMModels from models.user_usage import UserUsage from modules.brain.service.brain_service import BrainService from modules.chat.controller.chat.brainful_chat import BrainfulChat @@ -21,6 +21,10 @@ from modules.chat.service.chat_service import ChatService from modules.notification.service.notification_service import NotificationService from modules.user.entity.user_identity import UserIdentity +from logger import get_logger + +logger = get_logger(__name__) + chat_router = APIRouter() notification_service = NotificationService() @@ -163,11 +167,12 @@ async def create_question_handler( model=chat_question.model if is_model_ok else "gpt-3.5-turbo-1106", # type: ignore max_tokens=chat_question.max_tokens, temperature=chat_question.temperature, - brain_id=str(brain_id), streaming=False, prompt_id=chat_question.prompt_id, user_id=current_user.id, - chat_question=chat_question, + max_input=2000, + brain=brain_service.get_brain_by_id(brain_id), + metadata={}, ) chat_answer = gpt_answer_generator.generate_answer( @@ -201,49 +206,81 @@ async def create_stream_question_handler( chat_instance = BrainfulChat() chat_instance.validate_authorization(user_id=current_user.id, brain_id=brain_id) - user_daily_usage = UserUsage( + user_usage = UserUsage( id=current_user.id, email=current_user.email, ) - user_settings = user_daily_usage.get_user_settings() + # Get History + history = chat_service.get_chat_history(chat_id) - # Retrieve chat model (temperature, max_tokens, model) - if ( - not chat_question.model - or chat_question.temperature is None - or not chat_question.max_tokens - ): - fallback_model = "gpt-3.5-turbo-1106" - fallback_temperature = 0 - fallback_max_tokens = 256 - if brain_id: - brain = brain_service.get_brain_by_id(brain_id) - if brain: - fallback_model = brain.model or fallback_model - fallback_temperature = brain.temperature or fallback_temperature - fallback_max_tokens = brain.max_tokens or fallback_max_tokens + # Get user settings + user_settings = user_usage.get_user_settings() - chat_question.model = chat_question.model or fallback_model - chat_question.temperature = chat_question.temperature or fallback_temperature - chat_question.max_tokens = chat_question.max_tokens or fallback_max_tokens + # Get Model settings for the user + models_settings = user_usage.get_model_settings() + # Generic + brain_id_to_use, metadata_brain = brain_service.find_brain_from_question( + brain_id, chat_question.question, current_user, chat_id, history + ) + + # Add metadata_brain to metadata + metadata = {} + metadata = {**metadata, **metadata_brain} + follow_up_questions = chat_service.get_follow_up_question(chat_id) + metadata["follow_up_questions"] = follow_up_questions + + # Get the Brain settings + brain = brain_service.get_brain_by_id(brain_id_to_use) + + logger.info(f"Brain model: {brain.model}") + logger.info(f"Brain is : {str(brain)}") try: - logger.info(f"Streaming request for {chat_question.model}") - check_user_requests_limit(current_user, chat_question.model) - # TODO check if model is in the list of models available for the user + # Default model is gpt-3.5-turbo-1106 + model_to_use = LLMModels( + name="gpt-3.5-turbo-1106", price=1, max_input=512, max_output=512 + ) - is_model_ok = chat_question.model in user_settings.get("models", ["gpt-3.5-turbo-1106"]) # type: ignore + is_brain_model_available = any( + brain.model == model_dict.get("name") for model_dict in models_settings + ) + + is_user_allowed_model = brain.model in user_settings.get( + "models", ["gpt-3.5-turbo-1106"] + ) # Checks if the model is available in the list of models + + logger.info(f"Brain model: {brain.model}") + logger.info(f"User models: {user_settings.get('models', [])}") + logger.info(f"Model available: {is_brain_model_available}") + logger.info(f"User allowed model: {is_user_allowed_model}") + + if is_brain_model_available and is_user_allowed_model: + # Use the model from the brain + model_to_use.name = brain.model + for model_dict in models_settings: + if model_dict.get("name") == model_to_use.name: + logger.info(f"Using model {model_to_use.name}") + model_to_use.max_input = model_dict.get("max_input") + model_to_use.max_output = model_dict.get("max_output") + break + + metadata["model"] = model_to_use.name + metadata["max_tokens"] = model_to_use.max_output + metadata["max_input"] = model_to_use.max_input + + check_user_requests_limit(current_user, chat_question.model) gpt_answer_generator = chat_instance.get_answer_generator( chat_id=str(chat_id), - model=chat_question.model if is_model_ok else "gpt-3.5-turbo-1106", # type: ignore - max_tokens=chat_question.max_tokens, - temperature=chat_question.temperature, # type: ignore + model=model_to_use.name, + max_tokens=model_to_use.max_output, + max_input=model_to_use.max_input, + temperature=0.1, streaming=True, prompt_id=chat_question.prompt_id, - brain_id=brain_id, user_id=current_user.id, - chat_question=chat_question, + metadata=metadata, + brain=brain, ) return StreamingResponse( diff --git a/backend/modules/chat/dto/chats.py b/backend/modules/chat/dto/chats.py index 5a76de77f..6a46abff1 100644 --- a/backend/modules/chat/dto/chats.py +++ b/backend/modules/chat/dto/chats.py @@ -32,6 +32,7 @@ class Sources(BaseModel): name: str source_url: str type: str + original_file_name: str class Config: json_encoders = { diff --git a/backend/packages/files/parsers/audio.py b/backend/packages/files/parsers/audio.py index 6f210f383..fd6d8d578 100644 --- a/backend/packages/files/parsers/audio.py +++ b/backend/packages/files/parsers/audio.py @@ -9,10 +9,7 @@ from models import File, get_documents_vector_store from packages.files.file import compute_sha1_from_content -async def process_audio( - file: File, - user, -): +async def process_audio(file: File, user, original_file_name): temp_filename = None file_sha = "" dateshort = time.strftime("%Y%m%d-%H%M%S") diff --git a/backend/packages/files/parsers/code_python.py b/backend/packages/files/parsers/code_python.py index 4806424f3..95dfcbd87 100644 --- a/backend/packages/files/parsers/code_python.py +++ b/backend/packages/files/parsers/code_python.py @@ -4,9 +4,10 @@ from models import File from .common import process_file -async def process_python(file: File, brain_id): +async def process_python(file: File, brain_id, original_file_name): return await process_file( file=file, loader_class=PythonLoader, brain_id=brain_id, + original_file_name=original_file_name, ) diff --git a/backend/packages/files/parsers/common.py b/backend/packages/files/parsers/common.py index 681ec7d7d..759189088 100644 --- a/backend/packages/files/parsers/common.py +++ b/backend/packages/files/parsers/common.py @@ -2,7 +2,6 @@ import time from logger import get_logger from models import File -from models.settings import get_supabase_db from modules.brain.service.brain_vector_service import BrainVectorService from packages.embeddings.vectors import Neurons from repository.files.upload_file import DocumentSerializable @@ -10,12 +9,7 @@ from repository.files.upload_file import DocumentSerializable logger = get_logger(__name__) -async def process_file( - file: File, - loader_class, - brain_id, -): - database = get_supabase_db() +async def process_file(file: File, loader_class, brain_id, original_file_name): dateshort = time.strftime("%Y%m%d") neurons = Neurons() @@ -28,6 +22,7 @@ async def process_file( "chunk_size": file.chunk_size, "chunk_overlap": file.chunk_overlap, "date": dateshort, + "original_file_name": original_file_name or file.file_name, } docs = [] diff --git a/backend/packages/files/parsers/csv.py b/backend/packages/files/parsers/csv.py index 470515b99..7e539fff1 100644 --- a/backend/packages/files/parsers/csv.py +++ b/backend/packages/files/parsers/csv.py @@ -4,12 +4,10 @@ from models import File from .common import process_file -def process_csv( - file: File, - brain_id, -): +def process_csv(file: File, brain_id, original_file_name): return process_file( file=file, loader_class=CSVLoader, brain_id=brain_id, + original_file_name=original_file_name, ) diff --git a/backend/packages/files/parsers/docx.py b/backend/packages/files/parsers/docx.py index 3b26c1a8f..2a98b0c50 100644 --- a/backend/packages/files/parsers/docx.py +++ b/backend/packages/files/parsers/docx.py @@ -4,9 +4,10 @@ from models import File from .common import process_file -def process_docx(file: File, brain_id): +def process_docx(file: File, brain_id, original_file_name): return process_file( file=file, loader_class=Docx2txtLoader, brain_id=brain_id, + original_file_name=original_file_name, ) diff --git a/backend/packages/files/parsers/epub.py b/backend/packages/files/parsers/epub.py index 695212aae..49b2e4e6b 100644 --- a/backend/packages/files/parsers/epub.py +++ b/backend/packages/files/parsers/epub.py @@ -4,9 +4,10 @@ from models import File from .common import process_file -def process_epub(file: File, brain_id): +def process_epub(file: File, brain_id, original_file_name): return process_file( file=file, loader_class=UnstructuredEPubLoader, brain_id=brain_id, + original_file_name=original_file_name, ) diff --git a/backend/packages/files/parsers/github.py b/backend/packages/files/parsers/github.py index 1c2071983..44d4ef577 100644 --- a/backend/packages/files/parsers/github.py +++ b/backend/packages/files/parsers/github.py @@ -52,6 +52,7 @@ async def process_github( "chunk_size": chunk_size, "chunk_overlap": chunk_overlap, "date": dateshort, + "original_file_name": doc.metadata["original_file_name"], } doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata) diff --git a/backend/packages/files/parsers/html.py b/backend/packages/files/parsers/html.py index 3e247cc9c..620419f26 100644 --- a/backend/packages/files/parsers/html.py +++ b/backend/packages/files/parsers/html.py @@ -4,9 +4,10 @@ from models import File from .common import process_file -def process_html(file: File, brain_id): +def process_html(file: File, brain_id, original_file_name): return process_file( file=file, loader_class=UnstructuredHTMLLoader, brain_id=brain_id, + original_file_name=original_file_name, ) diff --git a/backend/packages/files/parsers/markdown.py b/backend/packages/files/parsers/markdown.py index a10f5edbc..600da1e36 100644 --- a/backend/packages/files/parsers/markdown.py +++ b/backend/packages/files/parsers/markdown.py @@ -4,9 +4,10 @@ from models import File from .common import process_file -def process_markdown(file: File, brain_id): +def process_markdown(file: File, brain_id, original_file_name): return process_file( file=file, loader_class=UnstructuredMarkdownLoader, brain_id=brain_id, + original_file_name=original_file_name, ) diff --git a/backend/packages/files/parsers/notebook.py b/backend/packages/files/parsers/notebook.py index a610b8f44..7cbb1db58 100644 --- a/backend/packages/files/parsers/notebook.py +++ b/backend/packages/files/parsers/notebook.py @@ -4,9 +4,10 @@ from models import File from .common import process_file -def process_ipnyb(file: File, brain_id): +def process_ipnyb(file: File, brain_id, original_file_name): return process_file( file=file, loader_class=NotebookLoader, brain_id=brain_id, + original_file_name=original_file_name, ) diff --git a/backend/packages/files/parsers/odt.py b/backend/packages/files/parsers/odt.py index 5c57de39e..a13fdf254 100644 --- a/backend/packages/files/parsers/odt.py +++ b/backend/packages/files/parsers/odt.py @@ -4,9 +4,10 @@ from models import File from .common import process_file -def process_odt(file: File, brain_id): +def process_odt(file: File, brain_id, original_file_name): return process_file( file=file, loader_class=UnstructuredPDFLoader, brain_id=brain_id, + original_file_name=original_file_name, ) diff --git a/backend/packages/files/parsers/pdf.py b/backend/packages/files/parsers/pdf.py index fbc9e771f..0b138a214 100644 --- a/backend/packages/files/parsers/pdf.py +++ b/backend/packages/files/parsers/pdf.py @@ -4,9 +4,10 @@ from models import File from .common import process_file -def process_pdf(file: File, brain_id): +def process_pdf(file: File, brain_id, original_file_name): return process_file( file=file, loader_class=UnstructuredPDFLoader, brain_id=brain_id, + original_file_name=original_file_name, ) diff --git a/backend/packages/files/parsers/powerpoint.py b/backend/packages/files/parsers/powerpoint.py index 2f6c49a26..3c02d1456 100644 --- a/backend/packages/files/parsers/powerpoint.py +++ b/backend/packages/files/parsers/powerpoint.py @@ -4,9 +4,10 @@ from models import File from .common import process_file -def process_powerpoint(file: File, brain_id): +def process_powerpoint(file: File, brain_id, original_file_name): return process_file( file=file, loader_class=UnstructuredPowerPointLoader, brain_id=brain_id, + original_file_name=original_file_name, ) diff --git a/backend/packages/files/parsers/telegram.py b/backend/packages/files/parsers/telegram.py index 071cc4c79..416a7b53d 100644 --- a/backend/packages/files/parsers/telegram.py +++ b/backend/packages/files/parsers/telegram.py @@ -4,12 +4,10 @@ from packages.files.loaders.telegram import TelegramChatFileLoader from .common import process_file -def process_telegram( - file: File, - brain_id, -): +def process_telegram(file: File, brain_id, original_file_name): return process_file( file=file, loader_class=TelegramChatFileLoader, brain_id=brain_id, + original_file_name=original_file_name, ) diff --git a/backend/packages/files/parsers/txt.py b/backend/packages/files/parsers/txt.py index b38b9318c..3e31cd3be 100644 --- a/backend/packages/files/parsers/txt.py +++ b/backend/packages/files/parsers/txt.py @@ -7,9 +7,11 @@ from .common import process_file async def process_txt( file: File, brain_id, + original_file_name, ): return await process_file( file=file, loader_class=TextLoader, brain_id=brain_id, + original_file_name=original_file_name, ) diff --git a/backend/packages/files/parsers/xlsx.py b/backend/packages/files/parsers/xlsx.py index c3c5d8f64..e349bbd70 100644 --- a/backend/packages/files/parsers/xlsx.py +++ b/backend/packages/files/parsers/xlsx.py @@ -4,12 +4,10 @@ from models.files import File from .common import process_file -def process_xlsx( - file: File, - brain_id, -): +def process_xlsx(file: File, brain_id, original_file_name): return process_file( file=file, loader_class=UnstructuredExcelLoader, brain_id=brain_id, + original_file_name=original_file_name, ) diff --git a/backend/packages/files/processors.py b/backend/packages/files/processors.py index 5a6a279f5..d7f903dd0 100644 --- a/backend/packages/files/processors.py +++ b/backend/packages/files/processors.py @@ -86,6 +86,7 @@ async def filter_file( result = await file_processors[file.file_extension]( file=file, brain_id=brain_id, + original_file_name=original_file_name, ) if result is None or result == 0: return create_response( diff --git a/backend/repository/brain/get_question_context_from_brain.py b/backend/repository/brain/get_question_context_from_brain.py index 9f6fde639..14eca2c96 100644 --- a/backend/repository/brain/get_question_context_from_brain.py +++ b/backend/repository/brain/get_question_context_from_brain.py @@ -20,6 +20,15 @@ class DocumentAnswer: def get_question_context_from_brain(brain_id: UUID, question: str) -> str: + """Finds the best brain to answer the question based on the question's meaning. + + Args: + brain_id (UUID): Id of the brain to search in + question (str): Question to search for in the vector store + + Returns: + str: _descripton_ + """ # TODO: Move to AnswerGenerator service supabase_client = get_supabase_client() embeddings = get_embeddings() diff --git a/backend/vectorstore/supabase.py b/backend/vectorstore/supabase.py index 1e2e1e079..8c9e27add 100644 --- a/backend/vectorstore/supabase.py +++ b/backend/vectorstore/supabase.py @@ -15,6 +15,7 @@ class CustomSupabaseVectorStore(SupabaseVectorStore): brain_id: str = "none" user_id: str = "none" number_docs: int = 35 + max_input: int = 2000 def __init__( self, @@ -24,11 +25,13 @@ class CustomSupabaseVectorStore(SupabaseVectorStore): brain_id: str = "none", user_id: str = "none", number_docs: int = 35, + max_input: int = 2000, ): super().__init__(client, embedding, table_name) self.brain_id = brain_id self.user_id = user_id self.number_docs = number_docs + self.max_input = max_input def find_brain_closest_query( self, @@ -65,7 +68,7 @@ class CustomSupabaseVectorStore(SupabaseVectorStore): def similarity_search( self, query: str, - k: int = 35, + k: int = 40, table: str = "match_vectors", threshold: float = 0.5, **kwargs: Any, @@ -98,5 +101,15 @@ class CustomSupabaseVectorStore(SupabaseVectorStore): ] documents = [doc for doc, _ in match_result] + max_tokens_input = self.max_input + documents_to_return = [] - return documents + # Limits to max_tokens_input with metadata chunk_size + for doc in documents: + if doc.metadata["chunk_size"] <= max_tokens_input: + documents_to_return.append(doc) + max_tokens_input -= doc.metadata["chunk_size"] + else: + break + + return documents_to_return