diff --git a/backend/core/models/brains.py b/backend/core/models/brains.py index 5b6c30ac6..099ec3005 100644 --- a/backend/core/models/brains.py +++ b/backend/core/models/brains.py @@ -2,7 +2,7 @@ from typing import Any, List, Optional from uuid import UUID from logger import get_logger -from models.settings import BrainRateLimiting, common_dependencies, get_supabase_client +from models.settings import BrainRateLimiting, get_supabase_client from models.users import User from pydantic import BaseModel from supabase.client import Client @@ -46,7 +46,7 @@ class Brain(BaseModel): @classmethod def create(cls, *args, **kwargs): - commons = common_dependencies() + commons = {"supabase": get_supabase_client()} return cls( commons=commons, *args, **kwargs # pyright: ignore reportPrivateUsage=none ) # pyright: ignore reportPrivateUsage=none @@ -320,4 +320,3 @@ def get_default_user_brain_or_create_new(user: User) -> Brain: brain.create_brain_user(user.id, "Owner", True) return brain return brain - return brain diff --git a/backend/core/models/files.py b/backend/core/models/files.py index 891a92dbe..2f5706fc3 100644 --- a/backend/core/models/files.py +++ b/backend/core/models/files.py @@ -7,7 +7,7 @@ from fastapi import UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter from logger import get_logger from models.brains import Brain -from models.settings import CommonsDep, get_supabase_client +from models.settings import get_supabase_client from pydantic import BaseModel from utils.file import compute_sha1_from_file @@ -26,7 +26,6 @@ class File(BaseModel): chunk_size: int = 500 chunk_overlap: int = 0 documents: Optional[Any] = None - _commons: Optional[CommonsDep] = None def __init__(self, **kwargs): super().__init__(**kwargs) diff --git a/backend/core/models/settings.py b/backend/core/models/settings.py index c77a733cf..47d5a27dd 100644 --- a/backend/core/models/settings.py +++ b/backend/core/models/settings.py @@ -1,6 +1,5 @@ -from typing import Annotated, TypedDict +from typing import TypedDict -from fastapi import Depends from langchain.embeddings.openai import OpenAIEmbeddings from pydantic import BaseSettings from supabase.client import Client, create_client @@ -42,27 +41,20 @@ def get_supabase_client() -> Client: return supabase_client -def common_dependencies() -> CommonDependencies: +def get_embeddings() -> OpenAIEmbeddings: settings = BrainSettings() # pyright: ignore reportPrivateUsage=none embeddings = OpenAIEmbeddings( openai_api_key=settings.openai_api_key ) # pyright: ignore reportPrivateUsage=none - supabase_client: Client = create_client( - settings.supabase_url, settings.supabase_service_key - ) + + return embeddings + + +def get_documents_vector_store() -> SupabaseVectorStore: + supabase_client = get_supabase_client() + embeddings = get_embeddings() documents_vector_store = SupabaseVectorStore( supabase_client, embeddings, table_name="vectors" ) - summaries_vector_store = SupabaseVectorStore( - supabase_client, embeddings, table_name="summaries" - ) - return { - "supabase": supabase_client, - "embeddings": embeddings, - "documents_vector_store": documents_vector_store, - "summaries_vector_store": summaries_vector_store, # delete - } - - -CommonsDep = Annotated[dict, Depends(common_dependencies)] + return documents_vector_store diff --git a/backend/core/parsers/audio.py b/backend/core/parsers/audio.py index 452972cff..6b9ca5da4 100644 --- a/backend/core/parsers/audio.py +++ b/backend/core/parsers/audio.py @@ -6,12 +6,11 @@ import openai from langchain.schema import Document from langchain.text_splitter import RecursiveCharacterTextSplitter from models.files import File -from models.settings import CommonsDep +from models.settings import get_documents_vector_store from utils.file import compute_sha1_from_content async def process_audio( - commons: CommonsDep, # pyright: ignore reportPrivateUsage=none file: File, enable_summarization: bool, user, @@ -78,7 +77,8 @@ async def process_audio( for text in texts ] - commons.documents_vector_store.add_documents( # pyright: ignore reportPrivateUsage=none + documents_vector_store = get_documents_vector_store() + documents_vector_store.add_documents( # pyright: ignore reportPrivateUsage=none docs_with_metadata ) diff --git a/backend/core/parsers/common.py b/backend/core/parsers/common.py index 164889e7d..db8843f5a 100644 --- a/backend/core/parsers/common.py +++ b/backend/core/parsers/common.py @@ -3,12 +3,10 @@ import time from langchain.schema import Document from models.brains import Brain from models.files import File -from models.settings import CommonsDep from utils.vectors import Neurons async def process_file( - commons: CommonsDep, file: File, loader_class, enable_summarization, @@ -31,7 +29,7 @@ async def process_file( } doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata) - neurons = Neurons(commons=commons) + neurons = Neurons() created_vector = neurons.create_vector(doc_with_metadata, user_openai_api_key) # add_usage(stats_db, "embedding", "audio", metadata={"file_name": file_meta_name,"file_type": ".txt", "chunk_size": chunk_size, "chunk_overlap": chunk_overlap}) diff --git a/backend/core/parsers/csv.py b/backend/core/parsers/csv.py index 0744a8162..4df75b535 100644 --- a/backend/core/parsers/csv.py +++ b/backend/core/parsers/csv.py @@ -1,22 +1,19 @@ from langchain.document_loaders import CSVLoader from models.files import File -from models.settings import CommonsDep from .common import process_file def process_csv( - commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key, ): return process_file( - commons, - file, - CSVLoader, - enable_summarization, - brain_id, - user_openai_api_key, + file=file, + loader_class=CSVLoader, + enable_summarization=enable_summarization, + brain_id=brain_id, + user_openai_api_key=user_openai_api_key, ) diff --git a/backend/core/parsers/docx.py b/backend/core/parsers/docx.py index 8cd2876a6..6094c9b7a 100644 --- a/backend/core/parsers/docx.py +++ b/backend/core/parsers/docx.py @@ -1,9 +1,19 @@ from langchain.document_loaders import Docx2txtLoader from models.files import File -from models.settings import CommonsDep from .common import process_file -def process_docx(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key): - return process_file(commons, file, Docx2txtLoader, enable_summarization, brain_id, user_openai_api_key) +def process_docx( + file: File, + enable_summarization, + brain_id, + user_openai_api_key, +): + return process_file( + file=file, + loader_class=Docx2txtLoader, + enable_summarization=enable_summarization, + brain_id=brain_id, + user_openai_api_key=user_openai_api_key, + ) diff --git a/backend/core/parsers/epub.py b/backend/core/parsers/epub.py index d89d7b260..bc393ab3b 100644 --- a/backend/core/parsers/epub.py +++ b/backend/core/parsers/epub.py @@ -1,9 +1,19 @@ from langchain.document_loaders.epub import UnstructuredEPubLoader from models.files import File -from models.settings import CommonsDep from .common import process_file -def process_epub(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key): - return process_file(commons, file, UnstructuredEPubLoader, enable_summarization, brain_id, user_openai_api_key) +def process_epub( + file: File, + enable_summarization, + brain_id, + user_openai_api_key, +): + return process_file( + file=file, + loader_class=UnstructuredEPubLoader, + enable_summarization=enable_summarization, + brain_id=brain_id, + user_openai_api_key=user_openai_api_key, + ) diff --git a/backend/core/parsers/github.py b/backend/core/parsers/github.py index 6ad2c4013..2b49c06b3 100644 --- a/backend/core/parsers/github.py +++ b/backend/core/parsers/github.py @@ -6,13 +6,11 @@ from langchain.schema import Document from langchain.text_splitter import RecursiveCharacterTextSplitter from models.brains import Brain from models.files import File -from models.settings import CommonsDep from utils.file import compute_sha1_from_content from utils.vectors import Neurons async def process_github( - commons: CommonsDep, # pyright: ignore reportPrivateUsage=none repo, enable_summarization, brain_id, @@ -70,7 +68,7 @@ async def process_github( if not file_exists: print(f"Creating entry for file {file.file_sha1} in vectors...") - neurons = Neurons(commons=commons) + neurons = Neurons() created_vector = neurons.create_vector( doc_with_metadata, user_openai_api_key ) diff --git a/backend/core/parsers/html.py b/backend/core/parsers/html.py index 5326ea29a..06f2913bd 100644 --- a/backend/core/parsers/html.py +++ b/backend/core/parsers/html.py @@ -1,21 +1,22 @@ import requests from langchain.document_loaders import UnstructuredHTMLLoader from models.files import File -from models.settings import CommonsDep from .common import process_file def process_html( - commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key + file: File, + enable_summarization, + brain_id, + user_openai_api_key, ): return process_file( - commons, - file, - UnstructuredHTMLLoader, - enable_summarization, - brain_id, - user_openai_api_key, + file=file, + loader_class=UnstructuredHTMLLoader, + enable_summarization=enable_summarization, + brain_id=brain_id, + user_openai_api_key=user_openai_api_key, ) diff --git a/backend/core/parsers/markdown.py b/backend/core/parsers/markdown.py index a114b7668..ecb9f1fe9 100644 --- a/backend/core/parsers/markdown.py +++ b/backend/core/parsers/markdown.py @@ -1,9 +1,19 @@ from langchain.document_loaders import UnstructuredMarkdownLoader from models.files import File -from models.settings import CommonsDep from .common import process_file -def process_markdown(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key): - return process_file(commons, file, UnstructuredMarkdownLoader, enable_summarization, brain_id, user_openai_api_key) +def process_markdown( + file: File, + enable_summarization, + brain_id, + user_openai_api_key, +): + return process_file( + file=file, + loader_class=UnstructuredMarkdownLoader, + enable_summarization=enable_summarization, + brain_id=brain_id, + user_openai_api_key=user_openai_api_key, + ) diff --git a/backend/core/parsers/notebook.py b/backend/core/parsers/notebook.py index cdc682858..74bd11153 100644 --- a/backend/core/parsers/notebook.py +++ b/backend/core/parsers/notebook.py @@ -1,9 +1,19 @@ from langchain.document_loaders import NotebookLoader from models.files import File -from models.settings import CommonsDep from .common import process_file -def process_ipnyb(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key): - return process_file(commons, file, NotebookLoader, enable_summarization, brain_id, user_openai_api_key) +def process_ipnyb( + file: File, + enable_summarization, + brain_id, + user_openai_api_key, +): + return process_file( + file=file, + loader_class=NotebookLoader, + enable_summarization=enable_summarization, + brain_id=brain_id, + user_openai_api_key=user_openai_api_key, + ) diff --git a/backend/core/parsers/odt.py b/backend/core/parsers/odt.py index e2050588d..73a68f6d1 100644 --- a/backend/core/parsers/odt.py +++ b/backend/core/parsers/odt.py @@ -1,9 +1,19 @@ from langchain.document_loaders import PyMuPDFLoader from models.files import File -from models.settings import CommonsDep from .common import process_file -def process_odt(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key): - return process_file(commons, file, PyMuPDFLoader, enable_summarization, brain_id, user_openai_api_key) +def process_odt( + file: File, + enable_summarization, + brain_id, + user_openai_api_key, +): + return process_file( + file=file, + loader_class=PyMuPDFLoader, + enable_summarization=enable_summarization, + brain_id=brain_id, + user_openai_api_key=user_openai_api_key, + ) diff --git a/backend/core/parsers/pdf.py b/backend/core/parsers/pdf.py index d50dffc0f..afb8612b2 100644 --- a/backend/core/parsers/pdf.py +++ b/backend/core/parsers/pdf.py @@ -1,10 +1,19 @@ from langchain.document_loaders import PyMuPDFLoader from models.files import File -from models.settings import CommonsDep from .common import process_file -def process_pdf(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key): - return process_file(commons, file, PyMuPDFLoader, enable_summarization, brain_id, user_openai_api_key) - +def process_pdf( + file: File, + enable_summarization, + brain_id, + user_openai_api_key, +): + return process_file( + file=file, + loader_class=PyMuPDFLoader, + enable_summarization=enable_summarization, + brain_id=brain_id, + user_openai_api_key=user_openai_api_key, + ) diff --git a/backend/core/parsers/powerpoint.py b/backend/core/parsers/powerpoint.py index 21de56c9b..a8579b00a 100644 --- a/backend/core/parsers/powerpoint.py +++ b/backend/core/parsers/powerpoint.py @@ -1,9 +1,19 @@ from langchain.document_loaders import UnstructuredPowerPointLoader from models.files import File -from models.settings import CommonsDep from .common import process_file -def process_powerpoint(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key): - return process_file(commons, file, UnstructuredPowerPointLoader, enable_summarization, brain_id, user_openai_api_key) +def process_powerpoint( + file: File, + enable_summarization, + brain_id, + user_openai_api_key, +): + return process_file( + file=file, + loader_class=UnstructuredPowerPointLoader, + enable_summarization=enable_summarization, + brain_id=brain_id, + user_openai_api_key=user_openai_api_key, + ) diff --git a/backend/core/parsers/txt.py b/backend/core/parsers/txt.py index 4bc461f71..da569bf1c 100644 --- a/backend/core/parsers/txt.py +++ b/backend/core/parsers/txt.py @@ -1,9 +1,19 @@ from langchain.document_loaders import TextLoader from models.files import File -from models.settings import CommonsDep from .common import process_file -async def process_txt(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key): - return await process_file(commons, file, TextLoader, enable_summarization, brain_id,user_openai_api_key) +async def process_txt( + file: File, + enable_summarization, + brain_id, + user_openai_api_key, +): + return await process_file( + file=file, + loader_class=TextLoader, + enable_summarization=enable_summarization, + brain_id=brain_id, + user_openai_api_key=user_openai_api_key, + ) diff --git a/backend/core/repository/brain_subscription/subscription_invitation_service.py b/backend/core/repository/brain_subscription/subscription_invitation_service.py index 0920070b9..6743fc9a9 100644 --- a/backend/core/repository/brain_subscription/subscription_invitation_service.py +++ b/backend/core/repository/brain_subscription/subscription_invitation_service.py @@ -1,8 +1,6 @@ -from typing import Optional - from logger import get_logger from models.brains_subscription_invitations import BrainSubscription -from models.settings import CommonsDep, get_supabase_client +from models.settings import get_supabase_client from utils.db_commands import ( delete_data_in_table, insert_data_in_table, @@ -14,7 +12,7 @@ logger = get_logger(__name__) class SubscriptionInvitationService: - def __init__(self, commons: Optional[CommonsDep] = None): + def __init__(self): self.supabase_client = get_supabase_client() def create_subscription_invitation(self, brain_subscription: BrainSubscription): diff --git a/backend/core/routes/api_key_routes.py b/backend/core/routes/api_key_routes.py index 59ec53054..16eaea303 100644 --- a/backend/core/routes/api_key_routes.py +++ b/backend/core/routes/api_key_routes.py @@ -7,7 +7,7 @@ from asyncpg.exceptions import UniqueViolationError from auth import AuthBearer, get_current_user from fastapi import APIRouter, Depends from logger import get_logger -from models.settings import CommonsDep +from models.settings import get_supabase_client from models.users import User from pydantic import BaseModel @@ -33,9 +33,7 @@ api_key_router = APIRouter() dependencies=[Depends(AuthBearer())], tags=["API Key"], ) -async def create_api_key( - commons: CommonsDep, current_user: User = Depends(get_current_user) -): +async def create_api_key(current_user: User = Depends(get_current_user)): """ Create new API key for the current user. @@ -49,11 +47,12 @@ async def create_api_key( new_key_id = uuid4() new_api_key = token_hex(16) api_key_inserted = False + supabase_client = get_supabase_client() while not api_key_inserted: try: # Attempt to insert new API key into database - commons["supabase"].table("api_keys").insert( + supabase_client.table("api_keys").insert( [ { "key_id": str(new_key_id), @@ -83,9 +82,7 @@ async def create_api_key( @api_key_router.delete( "/api-key/{key_id}", dependencies=[Depends(AuthBearer())], tags=["API Key"] ) -async def delete_api_key( - key_id: str, commons: CommonsDep, current_user: User = Depends(get_current_user) -): +async def delete_api_key(key_id: str, current_user: User = Depends(get_current_user)): """ Delete (deactivate) an API key for the current user. @@ -95,8 +92,8 @@ async def delete_api_key( as inactive in the database. """ - - commons["supabase"].table("api_keys").update( + supabase_client = get_supabase_client() + supabase_client.table("api_keys").update( { "is_active": False, "deleted_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), @@ -112,9 +109,7 @@ async def delete_api_key( dependencies=[Depends(AuthBearer())], tags=["API Key"], ) -async def get_api_keys( - commons: CommonsDep, current_user: User = Depends(get_current_user) -): +async def get_api_keys(current_user: User = Depends(get_current_user)): """ Get all active API keys for the current user. @@ -124,10 +119,9 @@ async def get_api_keys( This endpoint retrieves all the active API keys associated with the current user. It returns a list of API key objects containing the key ID and creation time for each API key. """ - + supabase_client = get_supabase_client() response = ( - commons["supabase"] - .table("api_keys") + supabase_client.table("api_keys") .select("key_id, creation_time") .filter("user_id", "eq", current_user.id) .filter("is_active", "eq", True) diff --git a/backend/core/routes/crawl_routes.py b/backend/core/routes/crawl_routes.py index 2e093170f..2c55c62fa 100644 --- a/backend/core/routes/crawl_routes.py +++ b/backend/core/routes/crawl_routes.py @@ -8,7 +8,6 @@ from crawl.crawler import CrawlWebsite from fastapi import APIRouter, Depends, Query, Request, UploadFile from models.brains import Brain from models.files import File -from models.settings import common_dependencies from models.users import User from parsers.github import process_github from utils.file import convert_bytes @@ -32,8 +31,6 @@ async def crawl_endpoint( # [TODO] check if the user is the owner/editor of the brain brain = Brain(id=brain_id) - commons = common_dependencies() - if request.headers.get("Openai-Api-Key"): brain.max_brain_size = os.getenv( "MAX_BRAIN_SIZE_WITH_KEY", 209715200 @@ -66,19 +63,17 @@ async def crawl_endpoint( file = File(file=uploadFile) # check remaining free space here !! message = await filter_file( - commons, - file, - enable_summarization, - brain.id, + file=file, + enable_summarization=enable_summarization, + brain_id=brain.id, openai_api_key=request.headers.get("Openai-Api-Key", None), ) return message else: # check remaining free space here !! message = await process_github( - commons, - crawl_website.url, - "false", - brain_id, + repo=crawl_website.url, + enable_summarization="false", + brain_id=brain_id, user_openai_api_key=request.headers.get("Openai-Api-Key", None), ) diff --git a/backend/core/routes/upload_routes.py b/backend/core/routes/upload_routes.py index 42d087126..9c2c3b701 100644 --- a/backend/core/routes/upload_routes.py +++ b/backend/core/routes/upload_routes.py @@ -5,16 +5,14 @@ from auth import AuthBearer, get_current_user from fastapi import APIRouter, Depends, Query, Request, UploadFile from models.brains import Brain from models.files import File -from models.settings import common_dependencies from models.users import User from repository.user_identity.get_user_identity import get_user_identity -from utils.file import convert_bytes, get_file_size -from utils.processors import filter_file - from routes.authorizations.brain_authorization import ( RoleEnum, validate_brain_authorization, ) +from utils.file import convert_bytes, get_file_size +from utils.processors import filter_file upload_router = APIRouter() @@ -44,7 +42,6 @@ async def upload_file( ) brain = Brain(id=brain_id) - commons = common_dependencies() if request.headers.get("Openai-Api-Key"): brain.max_brain_size = int(os.getenv("MAX_BRAIN_SIZE_WITH_KEY", 209715200)) @@ -70,9 +67,8 @@ async def upload_file( openai_api_key = get_user_identity(current_user.id).openai_api_key message = await filter_file( - commons, - file, - enable_summarization, + file=file, + enable_summarization=enable_summarization, brain_id=brain_id, openai_api_key=openai_api_key, ) diff --git a/backend/core/utils/processors.py b/backend/core/utils/processors.py index 75e242ca7..ecb035ca4 100644 --- a/backend/core/utils/processors.py +++ b/backend/core/utils/processors.py @@ -1,6 +1,5 @@ from models.brains import Brain from models.files import File -from models.settings import CommonsDep from parsers.audio import process_audio from parsers.csv import process_csv from parsers.docx import process_docx @@ -40,7 +39,6 @@ def create_response(message, type): async def filter_file( - commons: CommonsDep, file: File, enable_summarization: bool, brain_id, @@ -72,7 +70,10 @@ async def filter_file( if file.file_extension in file_processors: try: await file_processors[file.file_extension]( - commons, file, enable_summarization, brain_id, openai_api_key + file=file, + enable_summarization=enable_summarization, + brain_id=brain_id, + openai_api_key=openai_api_key, ) return create_response( f"✅ {file.file.filename} has been uploaded to brain {brain_id}.", # pyright: ignore reportPrivateUsage=none diff --git a/backend/core/utils/vectors.py b/backend/core/utils/vectors.py index 7bad323d0..d52f7337e 100644 --- a/backend/core/utils/vectors.py +++ b/backend/core/utils/vectors.py @@ -3,25 +3,27 @@ from typing import List from langchain.embeddings.openai import OpenAIEmbeddings from logger import get_logger -from models.settings import BrainSettings, CommonsDep, get_supabase_client +from models.settings import ( + get_documents_vector_store, + get_embeddings, + get_supabase_client, +) from pydantic import BaseModel logger = get_logger(__name__) class Neurons(BaseModel): - commons: CommonsDep - settings = BrainSettings() # pyright: ignore reportPrivateUsage=none - def create_vector(self, doc, user_openai_api_key=None): + documents_vector_store = get_documents_vector_store() logger.info("Creating vector for document") logger.info(f"Document: {doc}") if user_openai_api_key: - self.commons["documents_vector_store"]._embedding = OpenAIEmbeddings( + documents_vector_store._embedding = OpenAIEmbeddings( openai_api_key=user_openai_api_key ) # pyright: ignore reportPrivateUsage=none try: - sids = self.commons["documents_vector_store"].add_documents([doc]) + sids = documents_vector_store.add_documents([doc]) if sids and len(sids) > 0: return sids @@ -29,22 +31,20 @@ class Neurons(BaseModel): logger.error(f"Error creating vector for document {e}") def create_embedding(self, content): - return self.commons["embeddings"].embed_query(content) + embeddings = get_embeddings() + return embeddings.embed_query(content) def similarity_search(self, query, table="match_summaries", top_k=5, threshold=0.5): query_embedding = self.create_embedding(query) - summaries = ( - self.commons["supabase"] - .rpc( - table, - { - "query_embedding": query_embedding, - "match_count": top_k, - "match_threshold": threshold, - }, - ) - .execute() - ) + supabase_client = get_supabase_client() + summaries = supabase_client.rpc( + table, + { + "query_embedding": query_embedding, + "match_count": top_k, + "match_threshold": threshold, + }, + ).execute() return summaries.data