diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 000000000..4d8e27c0c --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,8 @@ +{ + "recommendations": [ + "ms-pyright.pyright", + "dbaeumer.vscode-eslint", + "ms-python.vscode-pylance", + "ms-pyright.pyright" + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 4814e1966..ac993a6a2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -12,7 +12,8 @@ "editor.defaultFormatter": "ms-python.black-formatter", "editor.formatOnSave": true, "editor.codeActionsOnSave": { - "source.organizeImports": true + "source.organizeImports": true, + "source.fixAll": true } }, "[typescriptreact]": { @@ -41,4 +42,7 @@ "**/.docusaurus/": true, "**/node_modules/": true, }, + "python.linting.pycodestyleCategorySeverity.W": "Error", + "python.defaultInterpreterPath": "python3", + "python.linting.flake8CategorySeverity.W": "Error", } \ No newline at end of file diff --git a/Makefile b/Makefile index 7fe8f3c0e..bda740dcc 100644 --- a/Makefile +++ b/Makefile @@ -4,4 +4,11 @@ dev: docker compose -f docker-compose.dev.yml up --build prod: - docker compose -f docker-compose.yml up --build \ No newline at end of file + docker compose -f docker-compose.yml up --build + +test-type: + @if command -v python3 &>/dev/null; then \ + python3 -m pyright; \ + else \ + python -m pyright; \ + fi diff --git a/backend/auth/auth_bearer.py b/backend/auth/auth_bearer.py index 4f0c9d5bd..913cc8297 100644 --- a/backend/auth/auth_bearer.py +++ b/backend/auth/auth_bearer.py @@ -1,12 +1,13 @@ import os from typing import Optional -from auth.api_key_handler import get_user_from_api_key, verify_api_key -from auth.jwt_token_handler import decode_access_token, verify_token from fastapi import Depends, HTTPException, Request from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer from models.users import User +from auth.api_key_handler import get_user_from_api_key, verify_api_key +from auth.jwt_token_handler import decode_access_token, verify_token + class AuthBearer(HTTPBearer): def __init__(self, auto_error: bool = True): @@ -20,7 +21,7 @@ class AuthBearer(HTTPBearer): request ) self.check_scheme(credentials) - token = credentials.credentials + token = credentials.credentials # pyright: ignore reportPrivateUsage=none return await self.authenticate( token, ) @@ -52,7 +53,7 @@ class AuthBearer(HTTPBearer): def get_test_user(self) -> User: return User( - email="test@example.com", id="XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" + email="test@example.com", id="XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" # type: ignore ) # replace with test user information diff --git a/backend/auth/jwt_token_handler.py b/backend/auth/jwt_token_handler.py index 43b287500..60b85dc80 100644 --- a/backend/auth/jwt_token_handler.py +++ b/backend/auth/jwt_token_handler.py @@ -9,6 +9,9 @@ from models.users import User SECRET_KEY = os.environ.get("JWT_SECRET_KEY") ALGORITHM = "HS256" +if not SECRET_KEY: + raise ValueError("JWT_SECRET_KEY environment variable not set") + def create_access_token(data: dict, expires_delta: Optional[timedelta] = None): to_encode = data.copy() @@ -27,9 +30,12 @@ def decode_access_token(token: str) -> User: token, SECRET_KEY, algorithms=[ALGORITHM], options={"verify_aud": False} ) except JWTError: - return None + return None # pyright: ignore reportPrivateUsage=none - return User(email=payload.get("email"), id=payload.get("sub")) + return User( + email=payload.get("email"), + id=payload.get("sub"), # pyright: ignore reportPrivateUsage=none + ) def verify_token(token: str): diff --git a/backend/crawl/crawler.py b/backend/crawl/crawler.py index 0c2b24edc..9ec83f86d 100644 --- a/backend/crawl/crawler.py +++ b/backend/crawl/crawler.py @@ -4,7 +4,6 @@ import tempfile import unicodedata import requests -from langchain.document_loaders import GitLoader from pydantic import BaseModel @@ -29,7 +28,7 @@ class CrawlWebsite(BaseModel): file_name = slugify(self.url) + ".html" temp_file_path = os.path.join(tempfile.gettempdir(), file_name) with open(temp_file_path, "w") as temp_file: - temp_file.write(content) + temp_file.write(content) # pyright: ignore reportPrivateUsage=none # Process the file if content: diff --git a/backend/llm/base.py b/backend/llm/base.py index 24893779d..0bc0e255d 100644 --- a/backend/llm/base.py +++ b/backend/llm/base.py @@ -20,19 +20,21 @@ class BaseBrainPicking(BaseModel): """ # Instantiate settings - brain_settings = BrainSettings() + brain_settings = BrainSettings() # type: ignore other parameters are optional # Default class attributes - model: str = None + model: str = None # pyright: ignore reportPrivateUsage=none temperature: float = 0.0 - chat_id: str = None - brain_id: str = None + chat_id: str = None # pyright: ignore reportPrivateUsage=none + brain_id: str = None # pyright: ignore reportPrivateUsage=none max_tokens: int = 256 - user_openai_api_key: str = None + user_openai_api_key: str = None # pyright: ignore reportPrivateUsage=none streaming: bool = False - openai_api_key: str = None - callbacks: List[AsyncCallbackHandler] = None + openai_api_key: str = None # pyright: ignore reportPrivateUsage=none + callbacks: List[ + AsyncCallbackHandler + ] = None # pyright: ignore reportPrivateUsage=none def _determine_api_key(self, openai_api_key, user_openai_api_key): """If user provided an API key, use it.""" @@ -55,10 +57,12 @@ class BaseBrainPicking(BaseModel): def _determine_callback_array( self, streaming - ) -> List[AsyncIteratorCallbackHandler]: + ) -> List[AsyncIteratorCallbackHandler]: # pyright: ignore reportPrivateUsage=none """If streaming is set, set the AsyncIteratorCallbackHandler as the only callback.""" if streaming: - return [AsyncIteratorCallbackHandler] + return [ + AsyncIteratorCallbackHandler # pyright: ignore reportPrivateUsage=none + ] def __init__(self, **data): super().__init__(**data) @@ -66,8 +70,12 @@ class BaseBrainPicking(BaseModel): self.openai_api_key = self._determine_api_key( self.brain_settings.openai_api_key, self.user_openai_api_key ) - self.streaming = self._determine_streaming(self.model, self.streaming) - self.callbacks = self._determine_callback_array(self.streaming) + self.streaming = self._determine_streaming( + self.model, self.streaming + ) # pyright: ignore reportPrivateUsage=none + self.callbacks = self._determine_callback_array( + self.streaming + ) # pyright: ignore reportPrivateUsage=none class Config: """Configuration of the Pydantic Object""" diff --git a/backend/llm/models/OpenAiAnswer.py b/backend/llm/models/OpenAiAnswer.py index dc09850f4..9500df0a4 100644 --- a/backend/llm/models/OpenAiAnswer.py +++ b/backend/llm/models/OpenAiAnswer.py @@ -1,4 +1,5 @@ from typing import Optional + from .FunctionCall import FunctionCall @@ -6,7 +7,7 @@ class OpenAiAnswer: def __init__( self, content: Optional[str] = None, - function_call: FunctionCall = None, + function_call: FunctionCall = None, # pyright: ignore reportPrivateUsage=none ): self.content = content self.function_call = function_call diff --git a/backend/llm/openai.py b/backend/llm/openai.py index ab91fcbc2..214dce4fd 100644 --- a/backend/llm/openai.py +++ b/backend/llm/openai.py @@ -13,11 +13,12 @@ from repository.chat.format_chat_history import format_chat_history from repository.chat.get_chat_history import get_chat_history from repository.chat.update_chat_history import update_chat_history from repository.chat.update_message_by_id import update_message_by_id -from supabase import Client, create_client +from supabase.client import Client, create_client from vectorstore.supabase import ( CustomSupabaseVectorStore, -) # Custom class for handling vector storage with Supabase +) +# Custom class for handling vector storage with Supabase from .base import BaseBrainPicking from .prompts.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT @@ -42,7 +43,7 @@ class OpenAIBrainPicking(BaseBrainPicking): max_tokens: int, user_openai_api_key: str, streaming: bool = False, - ) -> "OpenAIBrainPicking": + ) -> "OpenAIBrainPicking": # pyright: ignore reportPrivateUsage=none """ Initialize the BrainPicking class by setting embeddings, supabase client, vector store, language model and chains. :return: OpenAIBrainPicking instance @@ -59,7 +60,9 @@ class OpenAIBrainPicking(BaseBrainPicking): @property def embeddings(self) -> OpenAIEmbeddings: - return OpenAIEmbeddings(openai_api_key=self.openai_api_key) + return OpenAIEmbeddings( + openai_api_key=self.openai_api_key + ) # pyright: ignore reportPrivateUsage=none @property def supabase_client(self) -> Client: @@ -92,14 +95,16 @@ class OpenAIBrainPicking(BaseBrainPicking): @property def doc_chain(self) -> LLMChain: - return load_qa_chain(llm=self.doc_llm, chain_type="stuff") + return load_qa_chain( + llm=self.doc_llm, chain_type="stuff" + ) # pyright: ignore reportPrivateUsage=none @property def qa(self) -> ConversationalRetrievalChain: return ConversationalRetrievalChain( retriever=self.vector_store.as_retriever(), question_generator=self.question_generator, - combine_docs_chain=self.doc_chain, + combine_docs_chain=self.doc_chain, # pyright: ignore reportPrivateUsage=none verbose=True, ) @@ -116,7 +121,7 @@ class OpenAIBrainPicking(BaseBrainPicking): model=model, streaming=streaming, callbacks=callbacks, - ) + ) # pyright: ignore reportPrivateUsage=none def _call_chain(self, chain, question, history): """ @@ -205,8 +210,10 @@ class OpenAIBrainPicking(BaseBrainPicking): task = asyncio.create_task( wrap_done( - self.qa._acall_chain(self.qa, question, transformed_history), - callback.done, + self.qa._acall_chain( # pyright: ignore reportPrivateUsage=none + self.qa, question, transformed_history + ), + callback.done, # pyright: ignore reportPrivateUsage=none ) ) @@ -217,7 +224,7 @@ class OpenAIBrainPicking(BaseBrainPicking): ) # Use the aiter method of the callback to stream the response with server-sent-events - async for token in callback.aiter(): + async for token in callback.aiter(): # pyright: ignore reportPrivateUsage=none logger.info("Token: %s", token) # Add the token to the response_tokens list diff --git a/backend/llm/openai_functions.py b/backend/llm/openai_functions.py index 2900b1c95..08001ed57 100644 --- a/backend/llm/openai_functions.py +++ b/backend/llm/openai_functions.py @@ -2,15 +2,16 @@ from typing import Any, Dict, List, Optional from langchain.chat_models import ChatOpenAI from langchain.embeddings.openai import OpenAIEmbeddings -from llm.models.FunctionCall import FunctionCall -from llm.models.OpenAiAnswer import OpenAiAnswer from logger import get_logger from models.chat import ChatHistory from repository.chat.get_chat_history import get_chat_history from repository.chat.update_chat_history import update_chat_history -from supabase import Client, create_client +from supabase.client import Client, create_client from vectorstore.supabase import CustomSupabaseVectorStore +from llm.models.FunctionCall import FunctionCall +from llm.models.OpenAiAnswer import OpenAiAnswer + from .base import BaseBrainPicking logger = get_logger(__name__) @@ -27,7 +28,10 @@ def format_answer(model_response: Dict[str, Any]) -> OpenAiAnswer: answer["function_call"]["arguments"], ) - return OpenAiAnswer(content=content, function_call=function_call) + return OpenAiAnswer( + content=content, + function_call=function_call, # pyright: ignore reportPrivateUsage=none + ) class OpenAIFunctionsBrainPicking(BaseBrainPicking): @@ -48,7 +52,7 @@ class OpenAIFunctionsBrainPicking(BaseBrainPicking): brain_id: str, user_openai_api_key: str, # TODO: add streaming - ) -> "OpenAIFunctionsBrainPicking": + ) -> "OpenAIFunctionsBrainPicking": # pyright: ignore reportPrivateUsage=none super().__init__( model=model, chat_id=chat_id, @@ -61,11 +65,15 @@ class OpenAIFunctionsBrainPicking(BaseBrainPicking): @property def openai_client(self) -> ChatOpenAI: - return ChatOpenAI(openai_api_key=self.openai_api_key) + return ChatOpenAI( + openai_api_key=self.openai_api_key + ) # pyright: ignore reportPrivateUsage=none @property def embeddings(self) -> OpenAIEmbeddings: - return OpenAIEmbeddings(openai_api_key=self.openai_api_key) + return OpenAIEmbeddings( + openai_api_key=self.openai_api_key + ) # pyright: ignore reportPrivateUsage=none @property def supabase_client(self) -> Client: @@ -125,7 +133,9 @@ class OpenAIFunctionsBrainPicking(BaseBrainPicking): """ logger.info("Getting context") - return self.vector_store.similarity_search(query=question) + return self.vector_store.similarity_search( + query=question + ) # pyright: ignore reportPrivateUsage=none def _construct_prompt( self, question: str, useContext: bool = False, useHistory: bool = False diff --git a/backend/llm/private_gpt4all.py b/backend/llm/private_gpt4all.py index 997026aff..655960612 100644 --- a/backend/llm/private_gpt4all.py +++ b/backend/llm/private_gpt4all.py @@ -21,7 +21,7 @@ class PrivateGPT4AllBrainPicking(BaseBrainPicking): chat_id: str, brain_id: str, streaming: bool, - ) -> "PrivateGPT4AllBrainPicking": + ) -> "PrivateGPT4AllBrainPicking": # pyright: ignore reportPrivateUsage=none """ Initialize the PrivateBrainPicking class by calling the parent class's initializer. :param brain_id: The brain_id in the DB. @@ -57,4 +57,4 @@ class PrivateGPT4AllBrainPicking(BaseBrainPicking): n_batch=model_n_batch, backend="gptj", verbose=True, - ) + ) # pyright: ignore reportPrivateUsage=none diff --git a/backend/llm/utils/summarization.py b/backend/llm/utils/summarization.py index 1df98f5df..55c7b7b91 100644 --- a/backend/llm/utils/summarization.py +++ b/backend/llm/utils/summarization.py @@ -31,7 +31,7 @@ Summarize the following text: {{/assistant~}} """, llm=summary_llm, - ) + ) # pyright: ignore reportPrivateUsage=none summary = summary(document=document) logger.info("Summarization: %s", summary) @@ -78,10 +78,12 @@ Summary {{/assistant~}} """, llm=evaluation_llm, - ) + ) # pyright: ignore reportPrivateUsage=none result = evaluation(question=question, summaries=summaries) evaluations = {} - for evaluation in result["evaluation"].split("\n"): + for evaluation in result["evaluation"].split( + "\n" + ): # pyright: ignore reportPrivateUsage=none if evaluation == "" or not evaluation[0].isdigit(): continue logger.info("Evaluation Row: %s", evaluation) diff --git a/backend/models/brains.py b/backend/models/brains.py index 8ad07f354..924a82013 100644 --- a/backend/models/brains.py +++ b/backend/models/brains.py @@ -2,11 +2,12 @@ import os from typing import Any, List, Optional from uuid import UUID -from models.settings import CommonsDep, common_dependencies -from models.users import User from pydantic import BaseModel from utils.vectors import get_unique_files_from_vector_ids +from models.settings import CommonsDep, common_dependencies +from models.users import User + class Brain(BaseModel): id: Optional[UUID] = None @@ -15,7 +16,7 @@ class Brain(BaseModel): model: Optional[str] = "gpt-3.5-turbo-0613" temperature: Optional[float] = 0.0 max_tokens: Optional[int] = 256 - brain_size: Optional[float] = 0.0 + brain_size: Optional[float] = 0.0 # pyright: ignore reportPrivateUsage=none max_brain_size: Optional[int] = int(os.getenv("MAX_BRAIN_SIZE", 0)) files: List[Any] = [] _commons: Optional[CommonsDep] = None @@ -27,7 +28,7 @@ class Brain(BaseModel): def commons(self) -> CommonsDep: if not self._commons: self.__class__._commons = common_dependencies() - return self._commons + return self._commons # pyright: ignore reportPrivateUsage=none @property def brain_size(self): @@ -39,12 +40,17 @@ class Brain(BaseModel): @property def remaining_brain_size(self): - return float(self.max_brain_size) - self.brain_size + return ( + float(self.max_brain_size) # pyright: ignore reportPrivateUsage=none + - self.brain_size # pyright: ignore reportPrivateUsage=none + ) @classmethod def create(cls, *args, **kwargs): commons = common_dependencies() - return cls(commons=commons, *args, **kwargs) + return cls( + commons=commons, *args, **kwargs # pyright: ignore reportPrivateUsage=none + ) # pyright: ignore reportPrivateUsage=none def get_user_brains(self, user_id): response = ( diff --git a/backend/models/chat.py b/backend/models/chat.py index 266945e92..1634b55dd 100644 --- a/backend/models/chat.py +++ b/backend/models/chat.py @@ -9,10 +9,18 @@ class Chat: chat_name: str def __init__(self, chat_dict: dict): - self.chat_id = chat_dict.get("chat_id") - self.user_id = chat_dict.get("user_id") - self.creation_time = chat_dict.get("creation_time") - self.chat_name = chat_dict.get("chat_name") + self.chat_id = chat_dict.get( + "chat_id" + ) # pyright: ignore reportPrivateUsage=none + self.user_id = chat_dict.get( + "user_id" + ) # pyright: ignore reportPrivateUsage=none + self.creation_time = chat_dict.get( + "creation_time" + ) # pyright: ignore reportPrivateUsage=none + self.chat_name = chat_dict.get( + "chat_name" + ) # pyright: ignore reportPrivateUsage=none @dataclass @@ -24,11 +32,21 @@ class ChatHistory: message_time: str def __init__(self, chat_dict: dict): - self.chat_id = chat_dict.get("chat_id") - self.message_id = chat_dict.get("message_id") - self.user_message = chat_dict.get("user_message") - self.assistant = chat_dict.get("assistant") - self.message_time = chat_dict.get("message_time") + self.chat_id = chat_dict.get( + "chat_id" + ) # pyright: ignore reportPrivateUsage=none + self.message_id = chat_dict.get( + "message_id" + ) # pyright: ignore reportPrivateUsage=none + self.user_message = chat_dict.get( + "user_message" + ) # pyright: ignore reportPrivateUsage=none + self.assistant = chat_dict.get( + "assistant" + ) # pyright: ignore reportPrivateUsage=none + self.message_time = chat_dict.get( + "message_time" + ) # pyright: ignore reportPrivateUsage=none def to_dict(self): return asdict(self) diff --git a/backend/models/files.py b/backend/models/files.py index a7a925870..5772bea43 100644 --- a/backend/models/files.py +++ b/backend/models/files.py @@ -6,11 +6,12 @@ from uuid import UUID from fastapi import UploadFile from langchain.text_splitter import RecursiveCharacterTextSplitter from logger import get_logger -from models.brains import Brain -from models.settings import CommonsDep, common_dependencies from pydantic import BaseModel from utils.file import compute_sha1_from_file +from models.brains import Brain +from models.settings import CommonsDep, common_dependencies + logger = get_logger(__name__) @@ -18,9 +19,9 @@ class File(BaseModel): id: Optional[UUID] = None file: Optional[UploadFile] file_name: Optional[str] = "" - file_size: Optional[int] = "" + file_size: Optional[int] = "" # pyright: ignore reportPrivateUsage=none file_sha1: Optional[str] = "" - vectors_ids: Optional[int] = [] + vectors_ids: Optional[int] = [] # pyright: ignore reportPrivateUsage=none file_extension: Optional[str] = "" content: Optional[Any] = None chunk_size: int = 500 @@ -30,16 +31,25 @@ class File(BaseModel): def __init__(self, **kwargs): super().__init__(**kwargs) - + if self.file: self.file_name = self.file.filename - self.file_size = self.file.file._file.tell() - self.file_extension = os.path.splitext(self.file.filename)[-1].lower() - + self.file_size = ( + self.file.file._file.tell() # pyright: ignore reportPrivateUsage=none + ) + self.file_extension = os.path.splitext( + self.file.filename # pyright: ignore reportPrivateUsage=none + )[-1].lower() + async def compute_file_sha1(self): - with tempfile.NamedTemporaryFile(delete=False, suffix=self.file.filename) as tmp_file: - await self.file.seek(0) - self.content = await self.file.read() + with tempfile.NamedTemporaryFile( + delete=False, + suffix=self.file.filename, # pyright: ignore reportPrivateUsage=none + ) as tmp_file: + await self.file.seek(0) # pyright: ignore reportPrivateUsage=none + self.content = ( + await self.file.read() # pyright: ignore reportPrivateUsage=none + ) tmp_file.write(self.content) tmp_file.flush() self.file_sha1 = compute_sha1_from_file(tmp_file.name) @@ -48,18 +58,21 @@ class File(BaseModel): def compute_documents(self, loader_class): logger.info(f"Computing documents from file {self.file_name}") - + documents = [] - with tempfile.NamedTemporaryFile(delete=False, suffix=self.file.filename) as tmp_file: - tmp_file.write(self.content) + with tempfile.NamedTemporaryFile( + delete=False, + suffix=self.file.filename, # pyright: ignore reportPrivateUsage=none + ) as tmp_file: + tmp_file.write(self.content) # pyright: ignore reportPrivateUsage=none tmp_file.flush() loader = loader_class(tmp_file.name) documents = loader.load() - + print("documents", documents) os.remove(tmp_file.name) - + text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap ) @@ -70,20 +83,21 @@ class File(BaseModel): def set_file_vectors_ids(self): """ - Set the vectors_ids property with the ids of the vectors + Set the vectors_ids property with the ids of the vectors that are associated with the file in the vectors table """ - commons = common_dependencies() + commons = common_dependencies() response = ( - commons["supabase"].table("vectors") + commons["supabase"] + .table("vectors") .select("id") .filter("metadata->>file_sha1", "eq", self.file_sha1) .execute() ) self.vectors_ids = response.data return - + def file_already_exists(self): """ Check if file already exists in vectors table @@ -92,20 +106,24 @@ class File(BaseModel): print("file_sha1", self.file_sha1) print("vectors_ids", self.vectors_ids) - print("len(vectors_ids)", len(self.vectors_ids)) + print( + "len(vectors_ids)", + len(self.vectors_ids), # pyright: ignore reportPrivateUsage=none + ) # if the file does not exist in vectors then no need to go check in brains_vectors - if len(self.vectors_ids) == 0: + if len(self.vectors_ids) == 0: # pyright: ignore reportPrivateUsage=none return False - + return True - + def file_already_exists_in_brain(self, brain_id): - commons = common_dependencies() + commons = common_dependencies() self.set_file_vectors_ids() # Check if file exists in that brain response = ( - commons["supabase"].table("brains_vectors") + commons["supabase"] + .table("brains_vectors") .select("brain_id, vector_id") .filter("brain_id", "eq", brain_id) .filter("file_sha1", "eq", self.file_sha1) @@ -114,15 +132,17 @@ class File(BaseModel): print("response.data", response.data) if len(response.data) == 0: return False - + return True - + def file_is_empty(self): - return self.file.file._file.tell() < 1 - + return ( + self.file.file._file.tell() < 1 # pyright: ignore reportPrivateUsage=none + ) + def link_file_to_brain(self, brain: Brain): self.set_file_vectors_ids() - for vector_id in self.vectors_ids: - brain.create_brain_vector(vector_id['id'], self.file_sha1) + for vector_id in self.vectors_ids: # pyright: ignore reportPrivateUsage=none + brain.create_brain_vector(vector_id["id"], self.file_sha1) print(f"Successfully linked file {self.file_sha1} to brain {brain.id}") diff --git a/backend/models/settings.py b/backend/models/settings.py index 058c988bb..8ce45cb4d 100644 --- a/backend/models/settings.py +++ b/backend/models/settings.py @@ -3,7 +3,7 @@ from typing import Annotated from fastapi import Depends from langchain.embeddings.openai import OpenAIEmbeddings from pydantic import BaseSettings -from supabase import Client, create_client +from supabase.client import Client, create_client from vectorstore.supabase import SupabaseVectorStore @@ -22,8 +22,10 @@ class LLMSettings(BaseSettings): def common_dependencies() -> dict: - settings = BrainSettings() - embeddings = OpenAIEmbeddings(openai_api_key=settings.openai_api_key) + settings = BrainSettings() # pyright: ignore reportPrivateUsage=none + embeddings = OpenAIEmbeddings( + openai_api_key=settings.openai_api_key + ) # pyright: ignore reportPrivateUsage=none supabase_client: Client = create_client( settings.supabase_url, settings.supabase_service_key ) diff --git a/backend/parsers/audio.py b/backend/parsers/audio.py index 78ba75816..452972cff 100644 --- a/backend/parsers/audio.py +++ b/backend/parsers/audio.py @@ -10,23 +10,33 @@ from models.settings import CommonsDep from utils.file import compute_sha1_from_content -async def process_audio(commons: CommonsDep, file: File, enable_summarization: bool, user, user_openai_api_key): - +async def process_audio( + commons: CommonsDep, # pyright: ignore reportPrivateUsage=none + file: File, + enable_summarization: bool, + user, + user_openai_api_key, +): temp_filename = None file_sha = "" dateshort = time.strftime("%Y%m%d-%H%M%S") file_meta_name = f"audiotranscript_{dateshort}.txt" # use this for whisper - openai_api_key = os.environ.get("OPENAI_API_KEY") + os.environ.get("OPENAI_API_KEY") if user_openai_api_key: - openai_api_key = user_openai_api_key + pass try: upload_file = file.file - with tempfile.NamedTemporaryFile(delete=False, suffix=upload_file.filename) as tmp_file: - await upload_file.seek(0) - content = await upload_file.read() + with tempfile.NamedTemporaryFile( + delete=False, + suffix=upload_file.filename, # pyright: ignore reportPrivateUsage=none + ) as tmp_file: + await upload_file.seek(0) # pyright: ignore reportPrivateUsage=none + content = ( + await upload_file.read() # pyright: ignore reportPrivateUsage=none + ) tmp_file.write(content) tmp_file.flush() tmp_file.close() @@ -36,21 +46,42 @@ async def process_audio(commons: CommonsDep, file: File, enable_summarization: b with open(tmp_file.name, "rb") as audio_file: transcript = openai.Audio.transcribe("whisper-1", audio_file) - file_sha = compute_sha1_from_content(transcript.text.encode("utf-8")) - file_size = len(transcript.text.encode("utf-8")) + file_sha = compute_sha1_from_content( + transcript.text.encode("utf-8") # pyright: ignore reportPrivateUsage=none + ) + file_size = len( + transcript.text.encode("utf-8") # pyright: ignore reportPrivateUsage=none + ) chunk_size = 500 chunk_overlap = 0 text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( - chunk_size=chunk_size, chunk_overlap=chunk_overlap) - texts = text_splitter.split_text(transcript.text.encode("utf-8")) + chunk_size=chunk_size, chunk_overlap=chunk_overlap + ) + texts = text_splitter.split_text( + transcript.text.encode("utf-8") # pyright: ignore reportPrivateUsage=none + ) - docs_with_metadata = [Document(page_content=text, metadata={"file_sha1": file_sha, "file_size": file_size, "file_name": file_meta_name, - "chunk_size": chunk_size, "chunk_overlap": chunk_overlap, "date": dateshort}) for text in texts] + docs_with_metadata = [ + Document( + page_content=text, + metadata={ + "file_sha1": file_sha, + "file_size": file_size, + "file_name": file_meta_name, + "chunk_size": chunk_size, + "chunk_overlap": chunk_overlap, + "date": dateshort, + }, + ) + for text in texts + ] - commons.documents_vector_store.add_documents(docs_with_metadata) + commons.documents_vector_store.add_documents( # pyright: ignore reportPrivateUsage=none + docs_with_metadata + ) finally: if temp_filename and os.path.exists(temp_filename): - os.remove(temp_filename) + os.remove(temp_filename) diff --git a/backend/parsers/common.py b/backend/parsers/common.py index a4180042b..164889e7d 100644 --- a/backend/parsers/common.py +++ b/backend/parsers/common.py @@ -19,7 +19,7 @@ async def process_file( file.compute_documents(loader_class) - for doc in file.documents: + for doc in file.documents: # pyright: ignore reportPrivateUsage=none metadata = { "file_sha1": file.file_sha1, "file_size": file.file_size, @@ -29,17 +29,15 @@ async def process_file( "date": dateshort, "summarization": "true" if enable_summarization else "false", } - doc_with_metadata = Document( - page_content=doc.page_content, metadata=metadata) - + doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata) + neurons = Neurons(commons=commons) created_vector = neurons.create_vector(doc_with_metadata, user_openai_api_key) # add_usage(stats_db, "embedding", "audio", metadata={"file_name": file_meta_name,"file_type": ".txt", "chunk_size": chunk_size, "chunk_overlap": chunk_overlap}) - created_vector_id = created_vector[0] + created_vector_id = created_vector[0] # pyright: ignore reportPrivateUsage=none brain = Brain(id=brain_id) brain.create_brain_vector(created_vector_id, file.file_sha1) return - diff --git a/backend/parsers/github.py b/backend/parsers/github.py index 3ab119b29..5425879d7 100644 --- a/backend/parsers/github.py +++ b/backend/parsers/github.py @@ -11,7 +11,13 @@ from utils.file import compute_sha1_from_content from utils.vectors import Neurons -async def process_github(commons: CommonsDep, repo, enable_summarization, brain_id, user_openai_api_key): +async def process_github( + commons: CommonsDep, # pyright: ignore reportPrivateUsage=none + repo, + enable_summarization, + brain_id, + user_openai_api_key, +): random_dir_name = os.urandom(16).hex() dateshort = time.strftime("%Y%m%d") loader = GitLoader( @@ -24,41 +30,60 @@ async def process_github(commons: CommonsDep, repo, enable_summarization, brain_ chunk_size = 500 chunk_overlap = 0 text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( - chunk_size=chunk_size, chunk_overlap=chunk_overlap) - + chunk_size=chunk_size, chunk_overlap=chunk_overlap + ) + documents = text_splitter.split_documents(documents) print(documents[:1]) for doc in documents: - if doc.metadata["file_type"] in [".pyc",".png",".svg", ".env", ".lock", ".gitignore", ".gitmodules", ".gitattributes", ".gitkeep", ".git", ".json"]: + if doc.metadata["file_type"] in [ + ".pyc", + ".png", + ".svg", + ".env", + ".lock", + ".gitignore", + ".gitmodules", + ".gitattributes", + ".gitkeep", + ".git", + ".json", + ]: continue metadata = { "file_sha1": compute_sha1_from_content(doc.page_content.encode("utf-8")), - "file_size": len(doc.page_content)*8, + "file_size": len(doc.page_content) * 8, "file_name": doc.metadata["file_name"], "chunk_size": chunk_size, "chunk_overlap": chunk_overlap, "date": dateshort, - "summarization": "true" if enable_summarization else "false" + "summarization": "true" if enable_summarization else "false", } - doc_with_metadata = Document( - page_content=doc.page_content, metadata=metadata) - - file = File(file_sha1=compute_sha1_from_content(doc.page_content.encode("utf-8"))) - + doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata) + + file = File( + file_sha1=compute_sha1_from_content(doc.page_content.encode("utf-8")) + ) + file_exists = file.file_already_exists() if not file_exists: print(f"Creating entry for file {file.file_sha1} in vectors...") - neurons = Neurons(commons=commons) - created_vector = neurons.create_vector(doc_with_metadata, user_openai_api_key) + neurons = Neurons(commons=commons) + created_vector = neurons.create_vector( + doc_with_metadata, user_openai_api_key + ) print("Created vector sids ", created_vector) print("Created vector for ", doc.metadata["file_name"]) file_exists_in_brain = file.file_already_exists_in_brain(brain_id) if not file_exists_in_brain: - file.add_file_to_brain(brain_id) + file.add_file_to_brain(brain_id) # pyright: ignore reportPrivateUsage=none brain = Brain(id=brain_id) file.link_file_to_brain(brain) - return {"message": f"✅ Github with {len(documents)} files has been uploaded.", "type": "success"} + return { + "message": f"✅ Github with {len(documents)} files has been uploaded.", + "type": "success", + } diff --git a/backend/pyrightconfig.json b/backend/pyrightconfig.json new file mode 100644 index 000000000..eeab53266 --- /dev/null +++ b/backend/pyrightconfig.json @@ -0,0 +1,5 @@ +{ + "exclude": [ + "supabase" + ] +} \ No newline at end of file diff --git a/backend/repository/chat/get_chat_history.py b/backend/repository/chat/get_chat_history.py index 4de83b4e5..e83d19fba 100644 --- a/backend/repository/chat/get_chat_history.py +++ b/backend/repository/chat/get_chat_history.py @@ -1,6 +1,7 @@ +from typing import List # For type hinting + from models.chat import ChatHistory from models.settings import common_dependencies -from typing import List # For type hinting def get_chat_history(chat_id: str) -> List[ChatHistory]: @@ -16,4 +17,7 @@ def get_chat_history(chat_id: str) -> List[ChatHistory]: if history is None: return [] else: - return [ChatHistory(message) for message in history] + return [ + ChatHistory(message) # pyright: ignore reportPrivateUsage=none + for message in history + ] diff --git a/backend/repository/chat/update_chat.py b/backend/repository/chat/update_chat.py index e866333d9..7360392e7 100644 --- a/backend/repository/chat/update_chat.py +++ b/backend/repository/chat/update_chat.py @@ -21,7 +21,7 @@ def update_chat(chat_id, chat_data: ChatUpdatableProperties) -> Chat: if not chat_id: logger.error("No chat_id provided") - return + return # pyright: ignore reportPrivateUsage=none updates = {} @@ -41,4 +41,4 @@ def update_chat(chat_id, chat_data: ChatUpdatableProperties) -> Chat: logger.info(f"Chat {chat_id} updated") else: logger.info(f"No updates to apply for chat {chat_id}") - return updated_chat + return updated_chat # pyright: ignore reportPrivateUsage=none diff --git a/backend/repository/chat/update_chat_history.py b/backend/repository/chat/update_chat_history.py index 5680bc53c..a34aa314d 100644 --- a/backend/repository/chat/update_chat_history.py +++ b/backend/repository/chat/update_chat_history.py @@ -23,4 +23,4 @@ def update_chat_history(chat_id: str, user_message: str, assistant: str) -> Chat raise HTTPException( status_code=500, detail="An exception occurred while updating chat history." ) - return ChatHistory(response[0]) + return ChatHistory(response[0]) # pyright: ignore reportPrivateUsage=none diff --git a/backend/repository/chat/update_message_by_id.py b/backend/repository/chat/update_message_by_id.py index 724afd703..09853e39f 100644 --- a/backend/repository/chat/update_message_by_id.py +++ b/backend/repository/chat/update_message_by_id.py @@ -6,13 +6,15 @@ logger = get_logger(__name__) def update_message_by_id( - message_id: str, user_message: str = None, assistant: str = None + message_id: str, + user_message: str = None, # pyright: ignore reportPrivateUsage=none + assistant: str = None, # pyright: ignore reportPrivateUsage=none ) -> ChatHistory: commons = common_dependencies() if not message_id: logger.error("No message_id provided") - return + return # pyright: ignore reportPrivateUsage=none updates = {} @@ -35,4 +37,4 @@ def update_message_by_id( logger.info(f"Message {message_id} updated") else: logger.info(f"No updates to apply for message {message_id}") - return ChatHistory(updated_message) + return ChatHistory(updated_message) # pyright: ignore reportPrivateUsage=none diff --git a/backend/requirements.txt b/backend/requirements.txt index 82c25fa1a..e6464a947 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -22,4 +22,5 @@ asyncpg==0.27.0 flake8==6.0.0 flake8-black==0.3.6 sentence_transformers>=2.0.0 -sentry-sdk==1.26.0 \ No newline at end of file +sentry-sdk==1.26.0 +pyright==1.1.316 \ No newline at end of file diff --git a/backend/routes/brain_routes.py b/backend/routes/brain_routes.py index 1440c91e3..616e1af36 100644 --- a/backend/routes/brain_routes.py +++ b/backend/routes/brain_routes.py @@ -142,24 +142,27 @@ async def create_brain_endpoint( In the brains table & in the brains_users table and put the creator user as 'Owner' """ - brain = Brain(name=brain.name) + brain = Brain(name=brain.name) # pyright: ignore reportPrivateUsage=none - brain.create_brain() + brain.create_brain() # pyright: ignore reportPrivateUsage=none default_brain = get_default_user_brain(current_user) if default_brain: logger.info(f"Default brain already exists for user {current_user.id}") - brain.create_brain_user( + brain.create_brain_user( # pyright: ignore reportPrivateUsage=none user_id=current_user.id, rights="Owner", default_brain=False ) else: logger.info( f"Default brain does not exist for user {current_user.id}. It will be created." ) - brain.create_brain_user( + brain.create_brain_user( # pyright: ignore reportPrivateUsage=none user_id=current_user.id, rights="Owner", default_brain=True ) - return {"id": brain.id, "name": brain.name} + return { + "id": brain.id, # pyright: ignore reportPrivateUsage=none + "name": brain.name, + } # update existing brain @@ -182,10 +185,12 @@ async def update_brain_endpoint( brain = Brain(id=brain_id) # Add new file to brain , il file_sha1 already exists in brains_vectors -> out (not now) - if brain.file_sha1: + if brain.file_sha1: # pyright: ignore reportPrivateUsage=none # add all the vector Ids to the brains_vectors with the given brain.brain_id - brain.update_brain_with_file(file_sha1=input_brain.file_sha1) + brain.update_brain_with_file( + file_sha1=input_brain.file_sha1 # pyright: ignore reportPrivateUsage=none + ) print("brain:", brain) - brain.update_brain_fields(commons, brain) + brain.update_brain_fields(commons, brain) # pyright: ignore reportPrivateUsage=none return {"message": f"Brain {brain_id} has been updated."} diff --git a/backend/routes/chat_routes.py b/backend/routes/chat_routes.py index 18b9392a3..af9b9edf5 100644 --- a/backend/routes/chat_routes.py +++ b/backend/routes/chat_routes.py @@ -78,8 +78,8 @@ def check_user_limit( user.increment_user_request_count(date) if int(user.requests_count) >= int(max_requests_number): raise HTTPException( - status_code=429, - detail="You have reached the maximum number of requests for today.", + status_code=429, # pyright: ignore reportPrivateUsage=none + detail="You have reached the maximum number of requests for today.", # pyright: ignore reportPrivateUsage=none ) else: pass @@ -97,7 +97,7 @@ async def get_chats(current_user: User = Depends(get_current_user)): This endpoint retrieves all the chats associated with the current authenticated user. It returns a list of chat objects containing the chat ID and chat name for each chat. """ - chats = get_user_chats(current_user.id) + chats = get_user_chats(current_user.id) # pyright: ignore reportPrivateUsage=none return {"chats": chats} @@ -127,10 +127,11 @@ async def update_chat_metadata_handler( Update chat attributes """ - chat = get_chat_by_id(chat_id) + chat = get_chat_by_id(chat_id) # pyright: ignore reportPrivateUsage=none if current_user.id != chat.user_id: raise HTTPException( - status_code=403, detail="You should be the owner of the chat to update it." + status_code=403, # pyright: ignore reportPrivateUsage=none + detail="You should be the owner of the chat to update it.", # pyright: ignore reportPrivateUsage=none ) return update_chat(chat_id=chat_id, chat_data=chat_data) @@ -181,7 +182,7 @@ async def create_question_handler( temperature=chat_question.temperature, max_tokens=chat_question.max_tokens, brain_id=str(brain_id), - user_openai_api_key=current_user.user_openai_api_key, + user_openai_api_key=current_user.user_openai_api_key, # pyright: ignore reportPrivateUsage=none ) else: @@ -191,10 +192,12 @@ async def create_question_handler( max_tokens=chat_question.max_tokens, temperature=chat_question.temperature, brain_id=str(brain_id), - user_openai_api_key=current_user.user_openai_api_key, + user_openai_api_key=current_user.user_openai_api_key, # pyright: ignore reportPrivateUsage=none ) - chat_answer = gpt_answer_generator.generate_answer(chat_question.question) + chat_answer = gpt_answer_generator.generate_answer( # pyright: ignore reportPrivateUsage=none + chat_question.question + ) return chat_answer except HTTPException as e: @@ -217,7 +220,10 @@ async def create_stream_question_handler( if chat_question.model not in streaming_compatible_models: # Forward the request to the none streaming endpoint return await create_question_handler( - request, chat_question, chat_id, current_user + request, + chat_question, + chat_id, + current_user, # pyright: ignore reportPrivateUsage=none ) try: @@ -238,12 +244,14 @@ async def create_stream_question_handler( max_tokens=chat_question.max_tokens, temperature=chat_question.temperature, brain_id=str(brain_id), - user_openai_api_key=user_openai_api_key, + user_openai_api_key=user_openai_api_key, # pyright: ignore reportPrivateUsage=none streaming=True, ) return StreamingResponse( - gpt_answer_generator.generate_stream(chat_question.question), + gpt_answer_generator.generate_stream( # pyright: ignore reportPrivateUsage=none + chat_question.question + ), media_type="text/event-stream", ) @@ -259,4 +267,4 @@ async def get_chat_history_handler( chat_id: UUID, ) -> List[ChatHistory]: # TODO: RBAC with current_user - return get_chat_history(chat_id) + return get_chat_history(chat_id) # pyright: ignore reportPrivateUsage=none diff --git a/backend/routes/crawl_routes.py b/backend/routes/crawl_routes.py index fd7124c44..2e093170f 100644 --- a/backend/routes/crawl_routes.py +++ b/backend/routes/crawl_routes.py @@ -35,7 +35,9 @@ async def crawl_endpoint( commons = common_dependencies() if request.headers.get("Openai-Api-Key"): - brain.max_brain_size = os.getenv("MAX_BRAIN_SIZE_WITH_KEY", 209715200) + brain.max_brain_size = os.getenv( + "MAX_BRAIN_SIZE_WITH_KEY", 209715200 + ) # pyright: ignore reportPrivateUsage=none file_size = 1000000 remaining_free_space = brain.remaining_brain_size @@ -47,14 +49,20 @@ async def crawl_endpoint( } else: if not crawl_website.checkGithub(): - file_path, file_name = crawl_website.process() + ( + file_path, + file_name, + ) = crawl_website.process() # pyright: ignore reportPrivateUsage=none # Create a SpooledTemporaryFile from the file_path spooled_file = SpooledTemporaryFile() with open(file_path, "rb") as f: shutil.copyfileobj(f, spooled_file) # Pass the SpooledTemporaryFile to UploadFile - uploadFile = UploadFile(file=spooled_file, filename=file_name) + uploadFile = UploadFile( + file=spooled_file, # pyright: ignore reportPrivateUsage=none + filename=file_name, + ) file = File(file=uploadFile) # check remaining free space here !! message = await filter_file( diff --git a/backend/routes/upload_routes.py b/backend/routes/upload_routes.py index 487d6e3b9..ab75b555d 100644 --- a/backend/routes/upload_routes.py +++ b/backend/routes/upload_routes.py @@ -42,7 +42,9 @@ async def upload_file( commons = common_dependencies() if request.headers.get("Openai-Api-Key"): - brain.max_brain_size = os.getenv("MAX_BRAIN_SIZE_WITH_KEY", 209715200) + brain.max_brain_size = os.getenv( + "MAX_BRAIN_SIZE_WITH_KEY", 209715200 + ) # pyright: ignore reportPrivateUsage=none remaining_free_space = brain.remaining_brain_size file_size = get_file_size(uploadFile) diff --git a/backend/test_main.py b/backend/test_main.py index 485df9389..7f8141c97 100644 --- a/backend/test_main.py +++ b/backend/test_main.py @@ -10,6 +10,9 @@ client = TestClient(app) API_KEY = os.getenv("CI_TEST_API_KEY") +if not API_KEY: + raise ValueError("CI_TEST_API_KEY environment variable not set. Cannot run tests.") + def test_read_main(): response = client.get("/") @@ -54,7 +57,8 @@ def test_create_and_delete_api_key(): def test_retrieve_default_brain(): # Making a GET request to the /brains/default/ endpoint response = client.get( - "/brains/default/", headers={"Authorization": "Bearer " + API_KEY} + "/brains/default/", + headers={"Authorization": "Bearer " + API_KEY}, ) # Assert that the response status code is 200 (HTTP OK) @@ -88,7 +92,9 @@ def test_create_brain(): # Making a POST request to the /brains/ endpoint response = client.post( - "/brains/", json=payload, headers={"Authorization": "Bearer " + API_KEY} + "/brains/", + json=payload, + headers={"Authorization": "Bearer " + API_KEY}, ) # Assert that the response status code is 200 (HTTP OK) @@ -106,7 +112,10 @@ def test_create_brain(): def test_retrieve_all_brains(): # Making a GET request to the /brains/ endpoint to retrieve all brains for the current user - response = client.get("/brains/", headers={"Authorization": "Bearer " + API_KEY}) + response = client.get( + "/brains/", + headers={"Authorization": "Bearer " + API_KEY}, + ) # Assert that the response status code is 200 (HTTP OK) assert response.status_code == 200 @@ -120,7 +129,10 @@ def test_retrieve_all_brains(): def test_delete_all_brains(): # First, retrieve all brains for the current user - response = client.get("/brains/", headers={"Authorization": "Bearer " + API_KEY}) + response = client.get( + "/brains/", + headers={"Authorization": "Bearer " + API_KEY}, + ) # Assert that the response status code is 200 (HTTP OK) assert response.status_code == 200 @@ -133,7 +145,8 @@ def test_delete_all_brains(): # Send a DELETE request to delete the specific brain delete_response = client.delete( - f"/brains/{brain_id}/", headers={"Authorization": "Bearer " + API_KEY} + f"/brains/{brain_id}/", + headers={"Authorization": "Bearer " + API_KEY}, ) # Assert that the DELETE response status code is 200 (HTTP OK) @@ -142,7 +155,10 @@ def test_delete_all_brains(): def test_get_all_chats(): # Making a GET request to the /chat endpoint to retrieve all chats - response = client.get("/chat", headers={"Authorization": "Bearer " + API_KEY}) + response = client.get( + "/chat", + headers={"Authorization": "Bearer " + API_KEY}, + ) # Assert that the response status code is 200 (HTTP OK) assert response.status_code == 200 diff --git a/backend/utils/file.py b/backend/utils/file.py index b713cf9aa..61d31de7c 100644 --- a/backend/utils/file.py +++ b/backend/utils/file.py @@ -18,8 +18,10 @@ def convert_bytes(bytes, precision=2): def get_file_size(file: UploadFile): # move the cursor to the end of the file - file.file._file.seek(0, 2) - file_size = file.file._file.tell() # Getting the size of the file + file.file._file.seek(0, 2) # pyright: ignore reportPrivateUsage=none + file_size = ( + file.file._file.tell() # pyright: ignore reportPrivateUsage=none + ) # Getting the size of the file # move the cursor back to the beginning of the file file.file.seek(0) diff --git a/backend/utils/processors.py b/backend/utils/processors.py index 4646163db..75e242ca7 100644 --- a/backend/utils/processors.py +++ b/backend/utils/processors.py @@ -54,14 +54,19 @@ async def filter_file( if file_exists_in_brain: return create_response( - f"🤔 {file.file.filename} already exists in brain {brain_id}.", "warning" + f"🤔 {file.file.filename} already exists in brain {brain_id}.", # pyright: ignore reportPrivateUsage=none + "warning", ) elif file.file_is_empty(): - return create_response(f"❌ {file.file.filename} is empty.", "error") + return create_response( + f"❌ {file.file.filename} is empty.", # pyright: ignore reportPrivateUsage=none + "error", # pyright: ignore reportPrivateUsage=none + ) elif file_exists: file.link_file_to_brain(brain=Brain(id=brain_id)) return create_response( - f"✅ {file.file.filename} has been uploaded to brain {brain_id}.", "success" + f"✅ {file.file.filename} has been uploaded to brain {brain_id}.", # pyright: ignore reportPrivateUsage=none + "success", ) if file.file_extension in file_processors: @@ -70,14 +75,18 @@ async def filter_file( commons, file, enable_summarization, brain_id, openai_api_key ) return create_response( - f"✅ {file.file.filename} has been uploaded to brain {brain_id}.", + f"✅ {file.file.filename} has been uploaded to brain {brain_id}.", # pyright: ignore reportPrivateUsage=none "success", ) except Exception as e: # Add more specific exceptions as needed. print(f"Error processing file: {e}") return create_response( - f"⚠️ An error occurred while processing {file.file.filename}.", "error" + f"⚠️ An error occurred while processing {file.file.filename}.", # pyright: ignore reportPrivateUsage=none + "error", ) - return create_response(f"❌ {file.file.filename} is not supported.", "error") + return create_response( + f"❌ {file.file.filename} is not supported.", # pyright: ignore reportPrivateUsage=none + "error", + ) diff --git a/backend/utils/vectors.py b/backend/utils/vectors.py index c1b4cee71..4ecf34f55 100644 --- a/backend/utils/vectors.py +++ b/backend/utils/vectors.py @@ -13,7 +13,7 @@ logger = get_logger(__name__) class Neurons(BaseModel): commons: CommonsDep - settings = BrainSettings() + settings = BrainSettings() # pyright: ignore reportPrivateUsage=none def create_vector(self, doc, user_openai_api_key=None): logger.info("Creating vector for document") @@ -21,7 +21,7 @@ class Neurons(BaseModel): if user_openai_api_key: self.commons["documents_vector_store"]._embedding = OpenAIEmbeddings( openai_api_key=user_openai_api_key - ) + ) # pyright: ignore reportPrivateUsage=none try: sids = self.commons["documents_vector_store"].add_documents([doc]) if sids and len(sids) > 0: @@ -64,7 +64,7 @@ def create_summary(commons: CommonsDep, document_id, content, metadata): def error_callback(exception): - print('An exception occurred:', exception) + print("An exception occurred:", exception) def process_batch(batch_ids): @@ -106,14 +106,14 @@ def get_unique_files_from_vector_ids(vectors_ids: List[int]): with ThreadPoolExecutor() as executor: futures = [] for i in range(0, len(vectors_ids), BATCH_SIZE): - batch_ids = vectors_ids[i:i + BATCH_SIZE] + batch_ids = vectors_ids[i : i + BATCH_SIZE] future = executor.submit(process_batch, batch_ids) futures.append(future) # Retrieve the results vectors_responses = [future.result() for future in futures] - + documents = [item for sublist in vectors_responses for item in sublist] - print('document', documents) + print("document", documents) unique_files = [dict(t) for t in set(tuple(d.items()) for d in documents)] return unique_files diff --git a/backend/vectorstore/supabase.py b/backend/vectorstore/supabase.py index 3e2edc929..1018f5a67 100644 --- a/backend/vectorstore/supabase.py +++ b/backend/vectorstore/supabase.py @@ -3,8 +3,7 @@ from typing import Any, List from langchain.docstore.document import Document from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import SupabaseVectorStore - -from supabase import Client +from supabase.client import Client class CustomSupabaseVectorStore(SupabaseVectorStore):