feat(14k): done (#2102)

# Description

Please include a summary of the changes and the related issue. Please
also include relevant motivation and context.

## Checklist before requesting a review

Please delete options that are not relevant.

- [ ] My code follows the style guidelines of this project
- [ ] I have performed a self-review of my code
- [ ] I have commented hard-to-understand areas
- [ ] I have ideally added tests that prove my fix is effective or that
my feature works
- [ ] New and existing unit tests pass locally with my changes
- [ ] Any dependent changes have been merged

## Screenshots (if appropriate):
This commit is contained in:
Stan Girard 2024-01-27 01:50:58 -08:00 committed by GitHub
parent 3fcdd016e2
commit cc39f9e3ba
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
37 changed files with 349 additions and 212 deletions

View File

@ -1,12 +1,12 @@
name: 'Close stale issues and PRs'
name: "Close stale issues and PRs"
on:
schedule:
- cron: '0 */4 * * *'
- cron: "0 */4 * * *"
permissions:
contents: write # only for delete-branch option
issues: write
pull-requests: write
contents: write # only for delete-branch option
issues: write
pull-requests: write
jobs:
stale:
@ -16,9 +16,9 @@ jobs:
with:
exempt-assignees: true
exempt-draft-pr: true
days-before-stale: 30
days-before-stale: 90
days-before-close: 5
operations-per-run: 400
exempt-milestones: true
stale-issue-message: "Thanks for your contributions, we'll be closing this issue as it has gone stale. Feel free to reopen if you'd like to continue the discussion."
stale-pr-message: "Thanks for your contributions, we'll be closing this PR as it has gone stale. Feel free to reopen if you'd like to continue the discussion."
stale-pr-message: "Thanks for your contributions, we'll be closing this PR as it has gone stale. Feel free to reopen if you'd like to continue the discussion."

View File

@ -1,23 +0,0 @@
# name: Deploy Docs Deployment
# env:
# VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }}
# VERCEL_PROJECT_ID: ${{ secrets.VERCEL_DOC_PROJECT_ID }}
# on:
# push:
# branches: [ "main" ]
# paths:
# - 'docs/**'
# jobs:
# Deploy-Preview:
# environment: production
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
# - name: Install Vercel CLI
# run: npm install --global vercel@latest
# - name: Pull Vercel Environment Information
# run: vercel pull --yes --environment=production --token=${{ secrets.VERCEL_TOKEN }}
# - name: Build Project Artifacts
# run: vercel build --prod --token=${{ secrets.VERCEL_TOKEN }}
# - name: Deploy Project Artifacts to Vercel
# run: vercel deploy --prebuilt --prod --token=${{ secrets.VERCEL_TOKEN }}

View File

@ -1,32 +0,0 @@
name: Vitest
on:
push:
branches: [ main ]
paths:
- 'frontend/**'
pull_request:
branches: [ main ]
paths:
- 'frontend/**'
jobs:
build:
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./frontend
strategy:
matrix:
node-version: [18]
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8 # v4
with:
node-version: ${{ matrix.node-version }}
cache: 'yarn'
cache-dependency-path: frontend/yarn.lock
- run: yarn
- run: yarn run test-unit

View File

@ -12,7 +12,7 @@
],
"editor.formatOnSave": true,
"[python]": {
"editor.defaultFormatter": "esbenp.prettier-vscode",
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit",

View File

@ -23,7 +23,7 @@ python-jose = "==3.3.0"
asyncpg = "==0.27.0"
flake8 = "==6.0.0"
flake8-black = "==0.3.6"
sentry-sdk = {extras = ["fastapi"], version = "==1.37.1"}
sentry-sdk = {extras = ["fastapi"] }
pyright = "==1.1.316"
resend = "==0.5.1"
html5lib = "==1.1"
@ -34,7 +34,7 @@ redis = "==4.5.4"
flower = "*"
boto3 = "==1.33.7"
botocore = "==1.33.7"
celery = {extras = ["sqs"], version = "*"}
celery = {extras = ["sqs"] }
python-dotenv = "*"
pytest-mock = "*"
pytest-celery = "*"
@ -45,6 +45,7 @@ jq = "==1.6.0"
pytest = "*"
[dev-packages]
black = "*"
[requires]
python_version = "3.11"

81
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "3fd64a4e95ad0de8091ce39b705c9f87f941eb483f95f1c5d501f9bf473781be"
"sha256": "9ae12dd1d097d77ce3cb08c7b6b5e5fa8a96216a98df213860d0ea30bb22dcc5"
},
"pipfile-spec": 6,
"requires": {
@ -2008,10 +2008,10 @@
"fastapi"
],
"hashes": [
"sha256:7cd324dd2877fdc861f75cba4242bce23a58272a6fea581fcb218bb718bd9cc5",
"sha256:a249c7364827ee89daaa078bb8b56ece0b3d52d9130961bef2302b79bdf7fe70"
"sha256:24c83b0b41c887d33328a9166f5950dc37ad58f01c9f2fbff6b87a6f1094170c",
"sha256:acaf597b30258fc7663063b291aa99e58f3096e91fe1e6634f4b79f9c1943e8e"
],
"version": "==1.37.1"
"version": "==1.39.2"
},
"setuptools": {
"hashes": [
@ -2398,7 +2398,7 @@
"sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84",
"sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e"
],
"markers": "python_version >= '3.6'",
"markers": "python_version >= '3.10'",
"version": "==2.0.7"
},
"uvicorn": {
@ -2709,5 +2709,74 @@
"version": "==3.17.0"
}
},
"develop": {}
"develop": {
"black": {
"hashes": [
"sha256:0cd59d01bf3306ff7e3076dd7f4435fcd2fafe5506a6111cae1138fc7de52382",
"sha256:1e0fa70b8464055069864a4733901b31cbdbe1273f63a24d2fa9d726723d45ac",
"sha256:30fbf768cd4f4576598b1db0202413fafea9a227ef808d1a12230c643cefe9fc",
"sha256:39addf23f7070dbc0b5518cdb2018468ac249d7412a669b50ccca18427dba1f3",
"sha256:5134a6f6b683aa0a5592e3fd61dd3519d8acd953d93e2b8b76f9981245b65594",
"sha256:6a8977774929b5db90442729f131221e58cc5d8208023c6af9110f26f75b6b20",
"sha256:6cc5a6ba3e671cfea95a40030b16a98ee7dc2e22b6427a6f3389567ecf1b5262",
"sha256:780f13d03066a7daf1707ec723fdb36bd698ffa29d95a2e7ef33a8dd8fe43b5c",
"sha256:7fa8d9aaa22d846f8c0f7f07391148e5e346562e9b215794f9101a8339d8b6d8",
"sha256:827a7c0da520dd2f8e6d7d3595f4591aa62ccccce95b16c0e94bb4066374c4c2",
"sha256:82d9452aeabd51d1c8f0d52d4d18e82b9f010ecb30fd55867b5ff95904f427ff",
"sha256:94d5280d020dadfafc75d7cae899609ed38653d3f5e82e7ce58f75e76387ed3d",
"sha256:9aede09f72b2a466e673ee9fca96e4bccc36f463cac28a35ce741f0fd13aea8b",
"sha256:a15670c650668399c4b5eae32e222728185961d6ef6b568f62c1681d57b381ba",
"sha256:a5a0100b4bdb3744dd68412c3789f472d822dc058bb3857743342f8d7f93a5a7",
"sha256:aaf9aa85aaaa466bf969e7dd259547f4481b712fe7ee14befeecc152c403ee05",
"sha256:be305563ff4a2dea813f699daaffac60b977935f3264f66922b1936a5e492ee4",
"sha256:bf8dd261ee82df1abfb591f97e174345ab7375a55019cc93ad38993b9ff5c6ad",
"sha256:d74d4d0da276fbe3b95aa1f404182562c28a04402e4ece60cf373d0b902f33a0",
"sha256:e0e367759062dcabcd9a426d12450c6d61faf1704a352a49055a04c9f9ce8f5a",
"sha256:ec489cae76eac3f7573629955573c3a0e913641cafb9e3bfc87d8ce155ebdb29",
"sha256:f0dfbfbacfbf9cd1fac7a5ddd3e72510ffa93e841a69fcf4a6358feab1685382"
],
"markers": "python_version >= '3.8'",
"version": "==24.1.0"
},
"click": {
"hashes": [
"sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28",
"sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"
],
"markers": "python_version >= '3.7'",
"version": "==8.1.7"
},
"mypy-extensions": {
"hashes": [
"sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d",
"sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"
],
"markers": "python_version >= '3.5'",
"version": "==1.0.0"
},
"packaging": {
"hashes": [
"sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5",
"sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"
],
"markers": "python_version >= '3.7'",
"version": "==23.2"
},
"pathspec": {
"hashes": [
"sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08",
"sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"
],
"markers": "python_version >= '3.8'",
"version": "==0.12.1"
},
"platformdirs": {
"hashes": [
"sha256:11c8f37bcca40db96d8144522d925583bdb7a31f7b0e37e3ed4318400a8e2380",
"sha256:906d548203468492d432bcb294d4bc2fff751bf84971fbb2c10918cc206ee420"
],
"markers": "python_version >= '3.8'",
"version": "==4.1.0"
}
}
}

View File

@ -11,6 +11,7 @@ from llm.rags.rag_interface import RAGInterface
from llm.utils.format_chat_history import format_chat_history
from llm.utils.get_prompt_to_use import get_prompt_to_use
from llm.utils.get_prompt_to_use_id import get_prompt_to_use_id
from repository.files.generate_file_signed_url import generate_file_signed_url
from logger import get_logger
from models import BrainSettings
from modules.brain.service.brain_service import BrainService
@ -61,6 +62,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
chat_id: str = None # pyright: ignore reportPrivateUsage=none
brain_id: str # pyright: ignore reportPrivateUsage=none
max_tokens: int = 2000
max_input: int = 2000
streaming: bool = False
knowledge_qa: Optional[RAGInterface]
metadata: Optional[dict] = None
@ -76,6 +78,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
model: str,
brain_id: str,
chat_id: str,
max_tokens: int,
streaming: bool = False,
prompt_id: Optional[UUID] = None,
metadata: Optional[dict] = None,
@ -97,6 +100,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
**kwargs,
)
self.metadata = metadata
self.max_tokens = max_tokens
@property
def prompt_to_use(self):
@ -309,9 +313,14 @@ class KnowledgeBrainQA(BaseModel, QAInterface):
if "url" in doc.metadata
else doc.metadata["file_name"],
"type": "url" if "url" in doc.metadata else "file",
"source_url": doc.metadata["url"]
if "url" in doc.metadata
"source_url": generate_file_signed_url(
f"{brain.brain_id}/{doc.metadata['file_name']}"
).get("signedURL", "")
if "url" not in doc.metadata
else "",
"original_file_name": doc.metadata[
"original_file_name"
],
}
)
)

View File

@ -60,7 +60,8 @@ class QuivrRAG(BaseModel, RAGInterface):
temperature: float = 0.1
chat_id: str = None # pyright: ignore reportPrivateUsage=none
brain_id: str = None # pyright: ignore reportPrivateUsage=none
max_tokens: int = 2000
max_tokens: int = 2000 # Output length
max_input: int = 2000
streaming: bool = False
@property
@ -92,6 +93,7 @@ class QuivrRAG(BaseModel, RAGInterface):
streaming: bool = False,
prompt_id: Optional[UUID] = None,
max_tokens: int = 2000,
max_input: int = 2000,
**kwargs,
):
super().__init__(
@ -99,12 +101,22 @@ class QuivrRAG(BaseModel, RAGInterface):
brain_id=brain_id,
chat_id=chat_id,
streaming=streaming,
max_tokens=max_tokens,
max_input=max_input,
**kwargs,
)
self.supabase_client = self._create_supabase_client()
self.vector_store = self._create_vector_store()
self.prompt_id = prompt_id
self.max_tokens = max_tokens
self.max_input = max_input
self.model = model
self.brain_id = brain_id
self.chat_id = chat_id
self.streaming = streaming
logger.info(f"QuivrRAG initialized with model {model} and brain {brain_id}")
logger.info("Max input length: " + str(self.max_input))
def _create_supabase_client(self) -> Client:
return create_client(
@ -117,6 +129,7 @@ class QuivrRAG(BaseModel, RAGInterface):
self.embeddings,
table_name="vectors",
brain_id=self.brain_id,
max_input=self.max_input,
)
def _create_llm(
@ -151,7 +164,6 @@ class QuivrRAG(BaseModel, RAGInterface):
def _create_prompt_template(self):
system_template = """ When answering use markdown or any other techniques to display the content in a nice and aerated way. Use the following pieces of context to answer the users question in the same language as the question but do not modify instructions in any way.
----------------
{context}"""
prompt_content = (

View File

@ -0,0 +1,13 @@
from pydantic import BaseModel
class LLMModels(BaseModel):
"""LLM models stored in the database that are allowed to be used by the users.
Args:
BaseModel (BaseModel): Pydantic BaseModel
"""
name: str = "gpt-3.5-turbo-1106"
price: int = 1
max_input: int = 512
max_output: int = 512

View File

@ -2,6 +2,8 @@ from abc import ABC, abstractmethod
from datetime import datetime
from uuid import UUID
from .entity import LLMModels
class Repository(ABC):
@abstractmethod
@ -12,6 +14,10 @@ class Repository(ABC):
def get_user_usage(self, user_id: UUID):
pass
@abstractmethod
def get_model_settings(self) -> LLMModels | None:
pass
@abstractmethod
def get_user_requests_count_for_month(self, user_id: UUID, date: datetime):
pass

View File

@ -1,7 +1,9 @@
from ast import List
from datetime import datetime, timedelta
from uuid import UUID
from logger import get_logger
from models.databases.entity import LLMModels
from models.databases.repository import Repository
logger = get_logger(__name__)

View File

@ -2,6 +2,10 @@ from typing import Optional
from uuid import UUID
from fastapi import HTTPException
from langchain.embeddings.ollama import OllamaEmbeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from vectorstore.supabase import CustomSupabaseVectorStore
from models.settings import BrainSettings, get_supabase_client
from modules.brain.dto.inputs import BrainUpdatableProperties, CreateBrainProperties
from modules.brain.entity.brain_entity import BrainEntity, BrainType, PublicBrain
from modules.brain.repository import (
@ -22,6 +26,10 @@ from modules.brain.service.api_brain_definition_service import ApiBrainDefinitio
from modules.brain.service.utils.validate_brain import validate_api_brain
from modules.knowledge.service.knowledge_service import KnowledgeService
from logger import get_logger
logger = get_logger(__name__)
knowledge_service = KnowledgeService()
# TODO: directly user api_brain_definition repository
api_brain_definition_service = ApiBrainDefinitionService()
@ -44,6 +52,70 @@ class BrainService:
def get_brain_by_id(self, brain_id: UUID):
return self.brain_repository.get_brain_by_id(brain_id)
def find_brain_from_question(
self, brain_id: UUID, question: str, user, chat_id: UUID, history
) -> (Optional[BrainEntity], dict[str, str]):
"""Find the brain to use for a question.
Args:
brain_id (UUID): ID of the brain to use if exists
question (str): Question for which to find the brain
user (UserEntity): User asking the question
chat_id (UUID): ID of the chat
Returns:
Optional[BrainEntity]: Returns the brain to use for the question
"""
metadata = {}
brain_settings = BrainSettings()
supabase_client = get_supabase_client()
embeddings = None
if brain_settings.ollama_api_base_url:
embeddings = OllamaEmbeddings(
base_url=brain_settings.ollama_api_base_url
) # pyright: ignore reportPrivateUsage=none
else:
embeddings = OpenAIEmbeddings()
vector_store = CustomSupabaseVectorStore(
supabase_client, embeddings, table_name="vectors", user_id=user.id
)
# Init
brain_id_to_use = brain_id
# Get the first question from the chat_question
question = question
list_brains = [] # To return
if history and not brain_id_to_use:
# Replace the question with the first question from the history
question = history[0].user_message
if history and not brain_id:
brain_id_to_use = history[0].brain_id
# Calculate the closest brains to the question
list_brains = vector_store.find_brain_closest_query(user.id, question)
unique_list_brains = []
seen_brain_ids = set()
for brain in list_brains:
if brain["id"] not in seen_brain_ids:
unique_list_brains.append(brain)
seen_brain_ids.add(brain["id"])
metadata["close_brains"] = unique_list_brains[:5]
if list_brains and not brain_id_to_use:
brain_id_to_use = list_brains[0]["id"]
return brain_id_to_use, metadata
def create_brain(
self,
user_id: UUID,

View File

@ -1,10 +1,6 @@
from langchain.embeddings.ollama import OllamaEmbeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from llm.api_brain_qa import APIBrainQA
from llm.composite_brain_qa import CompositeBrainQA
from llm.knowledge_brain_qa import KnowledgeBrainQA
from logger import get_logger
from models.settings import BrainSettings, get_supabase_client
from modules.brain.entity.brain_entity import BrainType, RoleEnum
from modules.brain.service.api_brain_definition_service import ApiBrainDefinitionService
from modules.brain.service.brain_authorization_service import (
@ -13,7 +9,6 @@ from modules.brain.service.brain_authorization_service import (
from modules.brain.service.brain_service import BrainService
from modules.chat.controller.chat.interface import ChatInterface
from modules.chat.service.chat_service import ChatService
from vectorstore.supabase import CustomSupabaseVectorStore
chat_service = ChatService()
api_brain_definition_service = ApiBrainDefinitionService()
@ -43,64 +38,17 @@ class BrainfulChat(ChatInterface):
def get_answer_generator(
self,
brain_id,
brain,
chat_id,
model,
max_tokens,
max_input,
temperature,
streaming,
prompt_id,
user_id,
chat_question,
metadata,
):
metadata = {}
brain_settings = BrainSettings()
supabase_client = get_supabase_client()
embeddings = None
if brain_settings.ollama_api_base_url:
embeddings = OllamaEmbeddings(
base_url=brain_settings.ollama_api_base_url
) # pyright: ignore reportPrivateUsage=none
else:
embeddings = OpenAIEmbeddings()
vector_store = CustomSupabaseVectorStore(
supabase_client, embeddings, table_name="vectors", user_id=user_id
)
# Init
brain_id_to_use = brain_id
# Get the first question from the chat_question
question = chat_question.question
history = chat_service.get_chat_history(chat_id)
list_brains = [] # To return
if history and not brain_id_to_use:
# Replace the question with the first question from the history
question = history[0].user_message
if history and not brain_id:
brain_id_to_use = history[0].brain_id
# Calculate the closest brains to the question
list_brains = vector_store.find_brain_closest_query(user_id, question)
metadata["close_brains"] = list_brains[:5]
if list_brains and not brain_id_to_use:
brain_id_to_use = list_brains[0]["id"]
# GENERIC
follow_up_questions = chat_service.get_follow_up_question(chat_id)
metadata["follow_up_questions"] = follow_up_questions
metadata["model"] = model
metadata["max_tokens"] = max_tokens
metadata["temperature"] = temperature
brain = brain_service.get_brain_by_id(brain_id_to_use)
if (
brain
and brain.brain_type == BrainType.DOC
@ -110,33 +58,23 @@ class BrainfulChat(ChatInterface):
chat_id=chat_id,
model=model,
max_tokens=max_tokens,
max_input=max_input,
temperature=temperature,
brain_id=str(brain.brain_id),
streaming=streaming,
prompt_id=prompt_id,
metadata=metadata,
)
if brain.brain_type == BrainType.COMPOSITE:
return CompositeBrainQA(
chat_id=chat_id,
model=model,
max_tokens=max_tokens,
temperature=temperature,
brain_id=str(brain.brain_id),
streaming=streaming,
prompt_id=prompt_id,
user_id=user_id,
metadata=metadata,
)
if brain.brain_type == BrainType.API:
brain_definition = api_brain_definition_service.get_api_brain_definition(
brain_id_to_use
brain.brain_id
)
return APIBrainQA(
chat_id=chat_id,
model=model,
max_tokens=max_tokens,
max_input=max_input,
temperature=temperature,
brain_id=str(brain.brain_id),
streaming=streaming,
@ -144,5 +82,7 @@ class BrainfulChat(ChatInterface):
user_id=user_id,
metadata=metadata,
raw=(brain_definition.raw if brain_definition else None),
jq_instructions=(brain_definition.jq_instructions if brain_definition else None),
jq_instructions=(
brain_definition.jq_instructions if brain_definition else None
),
)

View File

@ -8,7 +8,6 @@ class BrainlessChat(ChatInterface):
def get_answer_generator(
self,
brain_id,
chat_id,
model,
max_tokens,

View File

@ -9,7 +9,6 @@ class ChatInterface(ABC):
@abstractmethod
def get_answer_generator(
self,
brain_id,
chat_id,
model,
max_tokens,

View File

@ -25,6 +25,17 @@ class NullableUUID(UUID):
def check_user_requests_limit(user: UserIdentity, model: str):
"""Checks the user requests limit.
It checks the user requests limit and raises an exception if the user has reached the limit.
By default, the user has a limit of 100 requests per month. The limit can be increased by upgrading the plan.
Args:
user (UserIdentity): User object
model (str): Model name for which the user is making the request
Raises:
HTTPException: Raises a 429 error if the user has reached the limit.
"""
userDailyUsage = UserUsage(id=user.id, email=user.email)
userSettings = userDailyUsage.get_user_settings()

View File

@ -1,10 +1,10 @@
from typing import List, Optional
from uuid import UUID
from venv import logger
from fastapi import APIRouter, Depends, HTTPException, Query, Request
from fastapi.responses import StreamingResponse
from middlewares.auth import AuthBearer, get_current_user
from models.databases.entity import LLMModels
from models.user_usage import UserUsage
from modules.brain.service.brain_service import BrainService
from modules.chat.controller.chat.brainful_chat import BrainfulChat
@ -21,6 +21,10 @@ from modules.chat.service.chat_service import ChatService
from modules.notification.service.notification_service import NotificationService
from modules.user.entity.user_identity import UserIdentity
from logger import get_logger
logger = get_logger(__name__)
chat_router = APIRouter()
notification_service = NotificationService()
@ -163,11 +167,12 @@ async def create_question_handler(
model=chat_question.model if is_model_ok else "gpt-3.5-turbo-1106", # type: ignore
max_tokens=chat_question.max_tokens,
temperature=chat_question.temperature,
brain_id=str(brain_id),
streaming=False,
prompt_id=chat_question.prompt_id,
user_id=current_user.id,
chat_question=chat_question,
max_input=2000,
brain=brain_service.get_brain_by_id(brain_id),
metadata={},
)
chat_answer = gpt_answer_generator.generate_answer(
@ -201,49 +206,81 @@ async def create_stream_question_handler(
chat_instance = BrainfulChat()
chat_instance.validate_authorization(user_id=current_user.id, brain_id=brain_id)
user_daily_usage = UserUsage(
user_usage = UserUsage(
id=current_user.id,
email=current_user.email,
)
user_settings = user_daily_usage.get_user_settings()
# Get History
history = chat_service.get_chat_history(chat_id)
# Retrieve chat model (temperature, max_tokens, model)
if (
not chat_question.model
or chat_question.temperature is None
or not chat_question.max_tokens
):
fallback_model = "gpt-3.5-turbo-1106"
fallback_temperature = 0
fallback_max_tokens = 256
if brain_id:
brain = brain_service.get_brain_by_id(brain_id)
if brain:
fallback_model = brain.model or fallback_model
fallback_temperature = brain.temperature or fallback_temperature
fallback_max_tokens = brain.max_tokens or fallback_max_tokens
# Get user settings
user_settings = user_usage.get_user_settings()
chat_question.model = chat_question.model or fallback_model
chat_question.temperature = chat_question.temperature or fallback_temperature
chat_question.max_tokens = chat_question.max_tokens or fallback_max_tokens
# Get Model settings for the user
models_settings = user_usage.get_model_settings()
# Generic
brain_id_to_use, metadata_brain = brain_service.find_brain_from_question(
brain_id, chat_question.question, current_user, chat_id, history
)
# Add metadata_brain to metadata
metadata = {}
metadata = {**metadata, **metadata_brain}
follow_up_questions = chat_service.get_follow_up_question(chat_id)
metadata["follow_up_questions"] = follow_up_questions
# Get the Brain settings
brain = brain_service.get_brain_by_id(brain_id_to_use)
logger.info(f"Brain model: {brain.model}")
logger.info(f"Brain is : {str(brain)}")
try:
logger.info(f"Streaming request for {chat_question.model}")
check_user_requests_limit(current_user, chat_question.model)
# TODO check if model is in the list of models available for the user
# Default model is gpt-3.5-turbo-1106
model_to_use = LLMModels(
name="gpt-3.5-turbo-1106", price=1, max_input=512, max_output=512
)
is_model_ok = chat_question.model in user_settings.get("models", ["gpt-3.5-turbo-1106"]) # type: ignore
is_brain_model_available = any(
brain.model == model_dict.get("name") for model_dict in models_settings
)
is_user_allowed_model = brain.model in user_settings.get(
"models", ["gpt-3.5-turbo-1106"]
) # Checks if the model is available in the list of models
logger.info(f"Brain model: {brain.model}")
logger.info(f"User models: {user_settings.get('models', [])}")
logger.info(f"Model available: {is_brain_model_available}")
logger.info(f"User allowed model: {is_user_allowed_model}")
if is_brain_model_available and is_user_allowed_model:
# Use the model from the brain
model_to_use.name = brain.model
for model_dict in models_settings:
if model_dict.get("name") == model_to_use.name:
logger.info(f"Using model {model_to_use.name}")
model_to_use.max_input = model_dict.get("max_input")
model_to_use.max_output = model_dict.get("max_output")
break
metadata["model"] = model_to_use.name
metadata["max_tokens"] = model_to_use.max_output
metadata["max_input"] = model_to_use.max_input
check_user_requests_limit(current_user, chat_question.model)
gpt_answer_generator = chat_instance.get_answer_generator(
chat_id=str(chat_id),
model=chat_question.model if is_model_ok else "gpt-3.5-turbo-1106", # type: ignore
max_tokens=chat_question.max_tokens,
temperature=chat_question.temperature, # type: ignore
model=model_to_use.name,
max_tokens=model_to_use.max_output,
max_input=model_to_use.max_input,
temperature=0.1,
streaming=True,
prompt_id=chat_question.prompt_id,
brain_id=brain_id,
user_id=current_user.id,
chat_question=chat_question,
metadata=metadata,
brain=brain,
)
return StreamingResponse(

View File

@ -32,6 +32,7 @@ class Sources(BaseModel):
name: str
source_url: str
type: str
original_file_name: str
class Config:
json_encoders = {

View File

@ -9,10 +9,7 @@ from models import File, get_documents_vector_store
from packages.files.file import compute_sha1_from_content
async def process_audio(
file: File,
user,
):
async def process_audio(file: File, user, original_file_name):
temp_filename = None
file_sha = ""
dateshort = time.strftime("%Y%m%d-%H%M%S")

View File

@ -4,9 +4,10 @@ from models import File
from .common import process_file
async def process_python(file: File, brain_id):
async def process_python(file: File, brain_id, original_file_name):
return await process_file(
file=file,
loader_class=PythonLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)

View File

@ -2,7 +2,6 @@ import time
from logger import get_logger
from models import File
from models.settings import get_supabase_db
from modules.brain.service.brain_vector_service import BrainVectorService
from packages.embeddings.vectors import Neurons
from repository.files.upload_file import DocumentSerializable
@ -10,12 +9,7 @@ from repository.files.upload_file import DocumentSerializable
logger = get_logger(__name__)
async def process_file(
file: File,
loader_class,
brain_id,
):
database = get_supabase_db()
async def process_file(file: File, loader_class, brain_id, original_file_name):
dateshort = time.strftime("%Y%m%d")
neurons = Neurons()
@ -28,6 +22,7 @@ async def process_file(
"chunk_size": file.chunk_size,
"chunk_overlap": file.chunk_overlap,
"date": dateshort,
"original_file_name": original_file_name or file.file_name,
}
docs = []

View File

@ -4,12 +4,10 @@ from models import File
from .common import process_file
def process_csv(
file: File,
brain_id,
):
def process_csv(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=CSVLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)

View File

@ -4,9 +4,10 @@ from models import File
from .common import process_file
def process_docx(file: File, brain_id):
def process_docx(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=Docx2txtLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)

View File

@ -4,9 +4,10 @@ from models import File
from .common import process_file
def process_epub(file: File, brain_id):
def process_epub(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=UnstructuredEPubLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)

View File

@ -52,6 +52,7 @@ async def process_github(
"chunk_size": chunk_size,
"chunk_overlap": chunk_overlap,
"date": dateshort,
"original_file_name": doc.metadata["original_file_name"],
}
doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata)

View File

@ -4,9 +4,10 @@ from models import File
from .common import process_file
def process_html(file: File, brain_id):
def process_html(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=UnstructuredHTMLLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)

View File

@ -4,9 +4,10 @@ from models import File
from .common import process_file
def process_markdown(file: File, brain_id):
def process_markdown(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=UnstructuredMarkdownLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)

View File

@ -4,9 +4,10 @@ from models import File
from .common import process_file
def process_ipnyb(file: File, brain_id):
def process_ipnyb(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=NotebookLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)

View File

@ -4,9 +4,10 @@ from models import File
from .common import process_file
def process_odt(file: File, brain_id):
def process_odt(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=UnstructuredPDFLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)

View File

@ -4,9 +4,10 @@ from models import File
from .common import process_file
def process_pdf(file: File, brain_id):
def process_pdf(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=UnstructuredPDFLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)

View File

@ -4,9 +4,10 @@ from models import File
from .common import process_file
def process_powerpoint(file: File, brain_id):
def process_powerpoint(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=UnstructuredPowerPointLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)

View File

@ -4,12 +4,10 @@ from packages.files.loaders.telegram import TelegramChatFileLoader
from .common import process_file
def process_telegram(
file: File,
brain_id,
):
def process_telegram(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=TelegramChatFileLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)

View File

@ -7,9 +7,11 @@ from .common import process_file
async def process_txt(
file: File,
brain_id,
original_file_name,
):
return await process_file(
file=file,
loader_class=TextLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)

View File

@ -4,12 +4,10 @@ from models.files import File
from .common import process_file
def process_xlsx(
file: File,
brain_id,
):
def process_xlsx(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=UnstructuredExcelLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)

View File

@ -86,6 +86,7 @@ async def filter_file(
result = await file_processors[file.file_extension](
file=file,
brain_id=brain_id,
original_file_name=original_file_name,
)
if result is None or result == 0:
return create_response(

View File

@ -20,6 +20,15 @@ class DocumentAnswer:
def get_question_context_from_brain(brain_id: UUID, question: str) -> str:
"""Finds the best brain to answer the question based on the question's meaning.
Args:
brain_id (UUID): Id of the brain to search in
question (str): Question to search for in the vector store
Returns:
str: _descripton_
"""
# TODO: Move to AnswerGenerator service
supabase_client = get_supabase_client()
embeddings = get_embeddings()

View File

@ -15,6 +15,7 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
brain_id: str = "none"
user_id: str = "none"
number_docs: int = 35
max_input: int = 2000
def __init__(
self,
@ -24,11 +25,13 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
brain_id: str = "none",
user_id: str = "none",
number_docs: int = 35,
max_input: int = 2000,
):
super().__init__(client, embedding, table_name)
self.brain_id = brain_id
self.user_id = user_id
self.number_docs = number_docs
self.max_input = max_input
def find_brain_closest_query(
self,
@ -65,7 +68,7 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
def similarity_search(
self,
query: str,
k: int = 35,
k: int = 40,
table: str = "match_vectors",
threshold: float = 0.5,
**kwargs: Any,
@ -98,5 +101,15 @@ class CustomSupabaseVectorStore(SupabaseVectorStore):
]
documents = [doc for doc, _ in match_result]
max_tokens_input = self.max_input
documents_to_return = []
return documents
# Limits to max_tokens_input with metadata chunk_size
for doc in documents:
if doc.metadata["chunk_size"] <= max_tokens_input:
documents_to_return.append(doc)
max_tokens_input -= doc.metadata["chunk_size"]
else:
break
return documents_to_return