Feat/static analysis (#582)

* feat: add static analysis

* chore: update Makefile add static analysis script

* chore: add vscode extensions recommandations
This commit is contained in:
Mamadou DICKO 2023-07-10 14:27:49 +02:00 committed by GitHub
parent f2a06dc6de
commit 9e9f531c99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 400 additions and 186 deletions

8
.vscode/extensions.json vendored Normal file
View File

@ -0,0 +1,8 @@
{
"recommendations": [
"ms-pyright.pyright",
"dbaeumer.vscode-eslint",
"ms-python.vscode-pylance",
"ms-pyright.pyright"
]
}

View File

@ -12,7 +12,8 @@
"editor.defaultFormatter": "ms-python.black-formatter", "editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true, "editor.formatOnSave": true,
"editor.codeActionsOnSave": { "editor.codeActionsOnSave": {
"source.organizeImports": true "source.organizeImports": true,
"source.fixAll": true
} }
}, },
"[typescriptreact]": { "[typescriptreact]": {
@ -41,4 +42,7 @@
"**/.docusaurus/": true, "**/.docusaurus/": true,
"**/node_modules/": true, "**/node_modules/": true,
}, },
"python.linting.pycodestyleCategorySeverity.W": "Error",
"python.defaultInterpreterPath": "python3",
"python.linting.flake8CategorySeverity.W": "Error",
} }

View File

@ -5,3 +5,10 @@ dev:
prod: prod:
docker compose -f docker-compose.yml up --build docker compose -f docker-compose.yml up --build
test-type:
@if command -v python3 &>/dev/null; then \
python3 -m pyright; \
else \
python -m pyright; \
fi

View File

@ -1,12 +1,13 @@
import os import os
from typing import Optional from typing import Optional
from auth.api_key_handler import get_user_from_api_key, verify_api_key
from auth.jwt_token_handler import decode_access_token, verify_token
from fastapi import Depends, HTTPException, Request from fastapi import Depends, HTTPException, Request
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from models.users import User from models.users import User
from auth.api_key_handler import get_user_from_api_key, verify_api_key
from auth.jwt_token_handler import decode_access_token, verify_token
class AuthBearer(HTTPBearer): class AuthBearer(HTTPBearer):
def __init__(self, auto_error: bool = True): def __init__(self, auto_error: bool = True):
@ -20,7 +21,7 @@ class AuthBearer(HTTPBearer):
request request
) )
self.check_scheme(credentials) self.check_scheme(credentials)
token = credentials.credentials token = credentials.credentials # pyright: ignore reportPrivateUsage=none
return await self.authenticate( return await self.authenticate(
token, token,
) )
@ -52,7 +53,7 @@ class AuthBearer(HTTPBearer):
def get_test_user(self) -> User: def get_test_user(self) -> User:
return User( return User(
email="test@example.com", id="XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" email="test@example.com", id="XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" # type: ignore
) # replace with test user information ) # replace with test user information

View File

@ -9,6 +9,9 @@ from models.users import User
SECRET_KEY = os.environ.get("JWT_SECRET_KEY") SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
ALGORITHM = "HS256" ALGORITHM = "HS256"
if not SECRET_KEY:
raise ValueError("JWT_SECRET_KEY environment variable not set")
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None): def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
to_encode = data.copy() to_encode = data.copy()
@ -27,9 +30,12 @@ def decode_access_token(token: str) -> User:
token, SECRET_KEY, algorithms=[ALGORITHM], options={"verify_aud": False} token, SECRET_KEY, algorithms=[ALGORITHM], options={"verify_aud": False}
) )
except JWTError: except JWTError:
return None return None # pyright: ignore reportPrivateUsage=none
return User(email=payload.get("email"), id=payload.get("sub")) return User(
email=payload.get("email"),
id=payload.get("sub"), # pyright: ignore reportPrivateUsage=none
)
def verify_token(token: str): def verify_token(token: str):

View File

@ -4,7 +4,6 @@ import tempfile
import unicodedata import unicodedata
import requests import requests
from langchain.document_loaders import GitLoader
from pydantic import BaseModel from pydantic import BaseModel
@ -29,7 +28,7 @@ class CrawlWebsite(BaseModel):
file_name = slugify(self.url) + ".html" file_name = slugify(self.url) + ".html"
temp_file_path = os.path.join(tempfile.gettempdir(), file_name) temp_file_path = os.path.join(tempfile.gettempdir(), file_name)
with open(temp_file_path, "w") as temp_file: with open(temp_file_path, "w") as temp_file:
temp_file.write(content) temp_file.write(content) # pyright: ignore reportPrivateUsage=none
# Process the file # Process the file
if content: if content:

View File

@ -20,19 +20,21 @@ class BaseBrainPicking(BaseModel):
""" """
# Instantiate settings # Instantiate settings
brain_settings = BrainSettings() brain_settings = BrainSettings() # type: ignore other parameters are optional
# Default class attributes # Default class attributes
model: str = None model: str = None # pyright: ignore reportPrivateUsage=none
temperature: float = 0.0 temperature: float = 0.0
chat_id: str = None chat_id: str = None # pyright: ignore reportPrivateUsage=none
brain_id: str = None brain_id: str = None # pyright: ignore reportPrivateUsage=none
max_tokens: int = 256 max_tokens: int = 256
user_openai_api_key: str = None user_openai_api_key: str = None # pyright: ignore reportPrivateUsage=none
streaming: bool = False streaming: bool = False
openai_api_key: str = None openai_api_key: str = None # pyright: ignore reportPrivateUsage=none
callbacks: List[AsyncCallbackHandler] = None callbacks: List[
AsyncCallbackHandler
] = None # pyright: ignore reportPrivateUsage=none
def _determine_api_key(self, openai_api_key, user_openai_api_key): def _determine_api_key(self, openai_api_key, user_openai_api_key):
"""If user provided an API key, use it.""" """If user provided an API key, use it."""
@ -55,10 +57,12 @@ class BaseBrainPicking(BaseModel):
def _determine_callback_array( def _determine_callback_array(
self, streaming self, streaming
) -> List[AsyncIteratorCallbackHandler]: ) -> List[AsyncIteratorCallbackHandler]: # pyright: ignore reportPrivateUsage=none
"""If streaming is set, set the AsyncIteratorCallbackHandler as the only callback.""" """If streaming is set, set the AsyncIteratorCallbackHandler as the only callback."""
if streaming: if streaming:
return [AsyncIteratorCallbackHandler] return [
AsyncIteratorCallbackHandler # pyright: ignore reportPrivateUsage=none
]
def __init__(self, **data): def __init__(self, **data):
super().__init__(**data) super().__init__(**data)
@ -66,8 +70,12 @@ class BaseBrainPicking(BaseModel):
self.openai_api_key = self._determine_api_key( self.openai_api_key = self._determine_api_key(
self.brain_settings.openai_api_key, self.user_openai_api_key self.brain_settings.openai_api_key, self.user_openai_api_key
) )
self.streaming = self._determine_streaming(self.model, self.streaming) self.streaming = self._determine_streaming(
self.callbacks = self._determine_callback_array(self.streaming) self.model, self.streaming
) # pyright: ignore reportPrivateUsage=none
self.callbacks = self._determine_callback_array(
self.streaming
) # pyright: ignore reportPrivateUsage=none
class Config: class Config:
"""Configuration of the Pydantic Object""" """Configuration of the Pydantic Object"""

View File

@ -1,4 +1,5 @@
from typing import Optional from typing import Optional
from .FunctionCall import FunctionCall from .FunctionCall import FunctionCall
@ -6,7 +7,7 @@ class OpenAiAnswer:
def __init__( def __init__(
self, self,
content: Optional[str] = None, content: Optional[str] = None,
function_call: FunctionCall = None, function_call: FunctionCall = None, # pyright: ignore reportPrivateUsage=none
): ):
self.content = content self.content = content
self.function_call = function_call self.function_call = function_call

View File

@ -13,11 +13,12 @@ from repository.chat.format_chat_history import format_chat_history
from repository.chat.get_chat_history import get_chat_history from repository.chat.get_chat_history import get_chat_history
from repository.chat.update_chat_history import update_chat_history from repository.chat.update_chat_history import update_chat_history
from repository.chat.update_message_by_id import update_message_by_id from repository.chat.update_message_by_id import update_message_by_id
from supabase import Client, create_client from supabase.client import Client, create_client
from vectorstore.supabase import ( from vectorstore.supabase import (
CustomSupabaseVectorStore, CustomSupabaseVectorStore,
) # Custom class for handling vector storage with Supabase )
# Custom class for handling vector storage with Supabase
from .base import BaseBrainPicking from .base import BaseBrainPicking
from .prompts.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT from .prompts.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT
@ -42,7 +43,7 @@ class OpenAIBrainPicking(BaseBrainPicking):
max_tokens: int, max_tokens: int,
user_openai_api_key: str, user_openai_api_key: str,
streaming: bool = False, streaming: bool = False,
) -> "OpenAIBrainPicking": ) -> "OpenAIBrainPicking": # pyright: ignore reportPrivateUsage=none
""" """
Initialize the BrainPicking class by setting embeddings, supabase client, vector store, language model and chains. Initialize the BrainPicking class by setting embeddings, supabase client, vector store, language model and chains.
:return: OpenAIBrainPicking instance :return: OpenAIBrainPicking instance
@ -59,7 +60,9 @@ class OpenAIBrainPicking(BaseBrainPicking):
@property @property
def embeddings(self) -> OpenAIEmbeddings: def embeddings(self) -> OpenAIEmbeddings:
return OpenAIEmbeddings(openai_api_key=self.openai_api_key) return OpenAIEmbeddings(
openai_api_key=self.openai_api_key
) # pyright: ignore reportPrivateUsage=none
@property @property
def supabase_client(self) -> Client: def supabase_client(self) -> Client:
@ -92,14 +95,16 @@ class OpenAIBrainPicking(BaseBrainPicking):
@property @property
def doc_chain(self) -> LLMChain: def doc_chain(self) -> LLMChain:
return load_qa_chain(llm=self.doc_llm, chain_type="stuff") return load_qa_chain(
llm=self.doc_llm, chain_type="stuff"
) # pyright: ignore reportPrivateUsage=none
@property @property
def qa(self) -> ConversationalRetrievalChain: def qa(self) -> ConversationalRetrievalChain:
return ConversationalRetrievalChain( return ConversationalRetrievalChain(
retriever=self.vector_store.as_retriever(), retriever=self.vector_store.as_retriever(),
question_generator=self.question_generator, question_generator=self.question_generator,
combine_docs_chain=self.doc_chain, combine_docs_chain=self.doc_chain, # pyright: ignore reportPrivateUsage=none
verbose=True, verbose=True,
) )
@ -116,7 +121,7 @@ class OpenAIBrainPicking(BaseBrainPicking):
model=model, model=model,
streaming=streaming, streaming=streaming,
callbacks=callbacks, callbacks=callbacks,
) ) # pyright: ignore reportPrivateUsage=none
def _call_chain(self, chain, question, history): def _call_chain(self, chain, question, history):
""" """
@ -205,8 +210,10 @@ class OpenAIBrainPicking(BaseBrainPicking):
task = asyncio.create_task( task = asyncio.create_task(
wrap_done( wrap_done(
self.qa._acall_chain(self.qa, question, transformed_history), self.qa._acall_chain( # pyright: ignore reportPrivateUsage=none
callback.done, self.qa, question, transformed_history
),
callback.done, # pyright: ignore reportPrivateUsage=none
) )
) )
@ -217,7 +224,7 @@ class OpenAIBrainPicking(BaseBrainPicking):
) )
# Use the aiter method of the callback to stream the response with server-sent-events # Use the aiter method of the callback to stream the response with server-sent-events
async for token in callback.aiter(): async for token in callback.aiter(): # pyright: ignore reportPrivateUsage=none
logger.info("Token: %s", token) logger.info("Token: %s", token)
# Add the token to the response_tokens list # Add the token to the response_tokens list

View File

@ -2,15 +2,16 @@ from typing import Any, Dict, List, Optional
from langchain.chat_models import ChatOpenAI from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings from langchain.embeddings.openai import OpenAIEmbeddings
from llm.models.FunctionCall import FunctionCall
from llm.models.OpenAiAnswer import OpenAiAnswer
from logger import get_logger from logger import get_logger
from models.chat import ChatHistory from models.chat import ChatHistory
from repository.chat.get_chat_history import get_chat_history from repository.chat.get_chat_history import get_chat_history
from repository.chat.update_chat_history import update_chat_history from repository.chat.update_chat_history import update_chat_history
from supabase import Client, create_client from supabase.client import Client, create_client
from vectorstore.supabase import CustomSupabaseVectorStore from vectorstore.supabase import CustomSupabaseVectorStore
from llm.models.FunctionCall import FunctionCall
from llm.models.OpenAiAnswer import OpenAiAnswer
from .base import BaseBrainPicking from .base import BaseBrainPicking
logger = get_logger(__name__) logger = get_logger(__name__)
@ -27,7 +28,10 @@ def format_answer(model_response: Dict[str, Any]) -> OpenAiAnswer:
answer["function_call"]["arguments"], answer["function_call"]["arguments"],
) )
return OpenAiAnswer(content=content, function_call=function_call) return OpenAiAnswer(
content=content,
function_call=function_call, # pyright: ignore reportPrivateUsage=none
)
class OpenAIFunctionsBrainPicking(BaseBrainPicking): class OpenAIFunctionsBrainPicking(BaseBrainPicking):
@ -48,7 +52,7 @@ class OpenAIFunctionsBrainPicking(BaseBrainPicking):
brain_id: str, brain_id: str,
user_openai_api_key: str, user_openai_api_key: str,
# TODO: add streaming # TODO: add streaming
) -> "OpenAIFunctionsBrainPicking": ) -> "OpenAIFunctionsBrainPicking": # pyright: ignore reportPrivateUsage=none
super().__init__( super().__init__(
model=model, model=model,
chat_id=chat_id, chat_id=chat_id,
@ -61,11 +65,15 @@ class OpenAIFunctionsBrainPicking(BaseBrainPicking):
@property @property
def openai_client(self) -> ChatOpenAI: def openai_client(self) -> ChatOpenAI:
return ChatOpenAI(openai_api_key=self.openai_api_key) return ChatOpenAI(
openai_api_key=self.openai_api_key
) # pyright: ignore reportPrivateUsage=none
@property @property
def embeddings(self) -> OpenAIEmbeddings: def embeddings(self) -> OpenAIEmbeddings:
return OpenAIEmbeddings(openai_api_key=self.openai_api_key) return OpenAIEmbeddings(
openai_api_key=self.openai_api_key
) # pyright: ignore reportPrivateUsage=none
@property @property
def supabase_client(self) -> Client: def supabase_client(self) -> Client:
@ -125,7 +133,9 @@ class OpenAIFunctionsBrainPicking(BaseBrainPicking):
""" """
logger.info("Getting context") logger.info("Getting context")
return self.vector_store.similarity_search(query=question) return self.vector_store.similarity_search(
query=question
) # pyright: ignore reportPrivateUsage=none
def _construct_prompt( def _construct_prompt(
self, question: str, useContext: bool = False, useHistory: bool = False self, question: str, useContext: bool = False, useHistory: bool = False

View File

@ -21,7 +21,7 @@ class PrivateGPT4AllBrainPicking(BaseBrainPicking):
chat_id: str, chat_id: str,
brain_id: str, brain_id: str,
streaming: bool, streaming: bool,
) -> "PrivateGPT4AllBrainPicking": ) -> "PrivateGPT4AllBrainPicking": # pyright: ignore reportPrivateUsage=none
""" """
Initialize the PrivateBrainPicking class by calling the parent class's initializer. Initialize the PrivateBrainPicking class by calling the parent class's initializer.
:param brain_id: The brain_id in the DB. :param brain_id: The brain_id in the DB.
@ -57,4 +57,4 @@ class PrivateGPT4AllBrainPicking(BaseBrainPicking):
n_batch=model_n_batch, n_batch=model_n_batch,
backend="gptj", backend="gptj",
verbose=True, verbose=True,
) ) # pyright: ignore reportPrivateUsage=none

View File

@ -31,7 +31,7 @@ Summarize the following text:
{{/assistant~}} {{/assistant~}}
""", """,
llm=summary_llm, llm=summary_llm,
) ) # pyright: ignore reportPrivateUsage=none
summary = summary(document=document) summary = summary(document=document)
logger.info("Summarization: %s", summary) logger.info("Summarization: %s", summary)
@ -78,10 +78,12 @@ Summary
{{/assistant~}} {{/assistant~}}
""", """,
llm=evaluation_llm, llm=evaluation_llm,
) ) # pyright: ignore reportPrivateUsage=none
result = evaluation(question=question, summaries=summaries) result = evaluation(question=question, summaries=summaries)
evaluations = {} evaluations = {}
for evaluation in result["evaluation"].split("\n"): for evaluation in result["evaluation"].split(
"\n"
): # pyright: ignore reportPrivateUsage=none
if evaluation == "" or not evaluation[0].isdigit(): if evaluation == "" or not evaluation[0].isdigit():
continue continue
logger.info("Evaluation Row: %s", evaluation) logger.info("Evaluation Row: %s", evaluation)

View File

@ -2,11 +2,12 @@ import os
from typing import Any, List, Optional from typing import Any, List, Optional
from uuid import UUID from uuid import UUID
from models.settings import CommonsDep, common_dependencies
from models.users import User
from pydantic import BaseModel from pydantic import BaseModel
from utils.vectors import get_unique_files_from_vector_ids from utils.vectors import get_unique_files_from_vector_ids
from models.settings import CommonsDep, common_dependencies
from models.users import User
class Brain(BaseModel): class Brain(BaseModel):
id: Optional[UUID] = None id: Optional[UUID] = None
@ -15,7 +16,7 @@ class Brain(BaseModel):
model: Optional[str] = "gpt-3.5-turbo-0613" model: Optional[str] = "gpt-3.5-turbo-0613"
temperature: Optional[float] = 0.0 temperature: Optional[float] = 0.0
max_tokens: Optional[int] = 256 max_tokens: Optional[int] = 256
brain_size: Optional[float] = 0.0 brain_size: Optional[float] = 0.0 # pyright: ignore reportPrivateUsage=none
max_brain_size: Optional[int] = int(os.getenv("MAX_BRAIN_SIZE", 0)) max_brain_size: Optional[int] = int(os.getenv("MAX_BRAIN_SIZE", 0))
files: List[Any] = [] files: List[Any] = []
_commons: Optional[CommonsDep] = None _commons: Optional[CommonsDep] = None
@ -27,7 +28,7 @@ class Brain(BaseModel):
def commons(self) -> CommonsDep: def commons(self) -> CommonsDep:
if not self._commons: if not self._commons:
self.__class__._commons = common_dependencies() self.__class__._commons = common_dependencies()
return self._commons return self._commons # pyright: ignore reportPrivateUsage=none
@property @property
def brain_size(self): def brain_size(self):
@ -39,12 +40,17 @@ class Brain(BaseModel):
@property @property
def remaining_brain_size(self): def remaining_brain_size(self):
return float(self.max_brain_size) - self.brain_size return (
float(self.max_brain_size) # pyright: ignore reportPrivateUsage=none
- self.brain_size # pyright: ignore reportPrivateUsage=none
)
@classmethod @classmethod
def create(cls, *args, **kwargs): def create(cls, *args, **kwargs):
commons = common_dependencies() commons = common_dependencies()
return cls(commons=commons, *args, **kwargs) return cls(
commons=commons, *args, **kwargs # pyright: ignore reportPrivateUsage=none
) # pyright: ignore reportPrivateUsage=none
def get_user_brains(self, user_id): def get_user_brains(self, user_id):
response = ( response = (

View File

@ -9,10 +9,18 @@ class Chat:
chat_name: str chat_name: str
def __init__(self, chat_dict: dict): def __init__(self, chat_dict: dict):
self.chat_id = chat_dict.get("chat_id") self.chat_id = chat_dict.get(
self.user_id = chat_dict.get("user_id") "chat_id"
self.creation_time = chat_dict.get("creation_time") ) # pyright: ignore reportPrivateUsage=none
self.chat_name = chat_dict.get("chat_name") self.user_id = chat_dict.get(
"user_id"
) # pyright: ignore reportPrivateUsage=none
self.creation_time = chat_dict.get(
"creation_time"
) # pyright: ignore reportPrivateUsage=none
self.chat_name = chat_dict.get(
"chat_name"
) # pyright: ignore reportPrivateUsage=none
@dataclass @dataclass
@ -24,11 +32,21 @@ class ChatHistory:
message_time: str message_time: str
def __init__(self, chat_dict: dict): def __init__(self, chat_dict: dict):
self.chat_id = chat_dict.get("chat_id") self.chat_id = chat_dict.get(
self.message_id = chat_dict.get("message_id") "chat_id"
self.user_message = chat_dict.get("user_message") ) # pyright: ignore reportPrivateUsage=none
self.assistant = chat_dict.get("assistant") self.message_id = chat_dict.get(
self.message_time = chat_dict.get("message_time") "message_id"
) # pyright: ignore reportPrivateUsage=none
self.user_message = chat_dict.get(
"user_message"
) # pyright: ignore reportPrivateUsage=none
self.assistant = chat_dict.get(
"assistant"
) # pyright: ignore reportPrivateUsage=none
self.message_time = chat_dict.get(
"message_time"
) # pyright: ignore reportPrivateUsage=none
def to_dict(self): def to_dict(self):
return asdict(self) return asdict(self)

View File

@ -6,11 +6,12 @@ from uuid import UUID
from fastapi import UploadFile from fastapi import UploadFile
from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter
from logger import get_logger from logger import get_logger
from models.brains import Brain
from models.settings import CommonsDep, common_dependencies
from pydantic import BaseModel from pydantic import BaseModel
from utils.file import compute_sha1_from_file from utils.file import compute_sha1_from_file
from models.brains import Brain
from models.settings import CommonsDep, common_dependencies
logger = get_logger(__name__) logger = get_logger(__name__)
@ -18,9 +19,9 @@ class File(BaseModel):
id: Optional[UUID] = None id: Optional[UUID] = None
file: Optional[UploadFile] file: Optional[UploadFile]
file_name: Optional[str] = "" file_name: Optional[str] = ""
file_size: Optional[int] = "" file_size: Optional[int] = "" # pyright: ignore reportPrivateUsage=none
file_sha1: Optional[str] = "" file_sha1: Optional[str] = ""
vectors_ids: Optional[int] = [] vectors_ids: Optional[int] = [] # pyright: ignore reportPrivateUsage=none
file_extension: Optional[str] = "" file_extension: Optional[str] = ""
content: Optional[Any] = None content: Optional[Any] = None
chunk_size: int = 500 chunk_size: int = 500
@ -33,13 +34,22 @@ class File(BaseModel):
if self.file: if self.file:
self.file_name = self.file.filename self.file_name = self.file.filename
self.file_size = self.file.file._file.tell() self.file_size = (
self.file_extension = os.path.splitext(self.file.filename)[-1].lower() self.file.file._file.tell() # pyright: ignore reportPrivateUsage=none
)
self.file_extension = os.path.splitext(
self.file.filename # pyright: ignore reportPrivateUsage=none
)[-1].lower()
async def compute_file_sha1(self): async def compute_file_sha1(self):
with tempfile.NamedTemporaryFile(delete=False, suffix=self.file.filename) as tmp_file: with tempfile.NamedTemporaryFile(
await self.file.seek(0) delete=False,
self.content = await self.file.read() suffix=self.file.filename, # pyright: ignore reportPrivateUsage=none
) as tmp_file:
await self.file.seek(0) # pyright: ignore reportPrivateUsage=none
self.content = (
await self.file.read() # pyright: ignore reportPrivateUsage=none
)
tmp_file.write(self.content) tmp_file.write(self.content)
tmp_file.flush() tmp_file.flush()
self.file_sha1 = compute_sha1_from_file(tmp_file.name) self.file_sha1 = compute_sha1_from_file(tmp_file.name)
@ -50,8 +60,11 @@ class File(BaseModel):
logger.info(f"Computing documents from file {self.file_name}") logger.info(f"Computing documents from file {self.file_name}")
documents = [] documents = []
with tempfile.NamedTemporaryFile(delete=False, suffix=self.file.filename) as tmp_file: with tempfile.NamedTemporaryFile(
tmp_file.write(self.content) delete=False,
suffix=self.file.filename, # pyright: ignore reportPrivateUsage=none
) as tmp_file:
tmp_file.write(self.content) # pyright: ignore reportPrivateUsage=none
tmp_file.flush() tmp_file.flush()
loader = loader_class(tmp_file.name) loader = loader_class(tmp_file.name)
documents = loader.load() documents = loader.load()
@ -76,7 +89,8 @@ class File(BaseModel):
commons = common_dependencies() commons = common_dependencies()
response = ( response = (
commons["supabase"].table("vectors") commons["supabase"]
.table("vectors")
.select("id") .select("id")
.filter("metadata->>file_sha1", "eq", self.file_sha1) .filter("metadata->>file_sha1", "eq", self.file_sha1)
.execute() .execute()
@ -92,10 +106,13 @@ class File(BaseModel):
print("file_sha1", self.file_sha1) print("file_sha1", self.file_sha1)
print("vectors_ids", self.vectors_ids) print("vectors_ids", self.vectors_ids)
print("len(vectors_ids)", len(self.vectors_ids)) print(
"len(vectors_ids)",
len(self.vectors_ids), # pyright: ignore reportPrivateUsage=none
)
# if the file does not exist in vectors then no need to go check in brains_vectors # if the file does not exist in vectors then no need to go check in brains_vectors
if len(self.vectors_ids) == 0: if len(self.vectors_ids) == 0: # pyright: ignore reportPrivateUsage=none
return False return False
return True return True
@ -105,7 +122,8 @@ class File(BaseModel):
self.set_file_vectors_ids() self.set_file_vectors_ids()
# Check if file exists in that brain # Check if file exists in that brain
response = ( response = (
commons["supabase"].table("brains_vectors") commons["supabase"]
.table("brains_vectors")
.select("brain_id, vector_id") .select("brain_id, vector_id")
.filter("brain_id", "eq", brain_id) .filter("brain_id", "eq", brain_id)
.filter("file_sha1", "eq", self.file_sha1) .filter("file_sha1", "eq", self.file_sha1)
@ -118,11 +136,13 @@ class File(BaseModel):
return True return True
def file_is_empty(self): def file_is_empty(self):
return self.file.file._file.tell() < 1 return (
self.file.file._file.tell() < 1 # pyright: ignore reportPrivateUsage=none
)
def link_file_to_brain(self, brain: Brain): def link_file_to_brain(self, brain: Brain):
self.set_file_vectors_ids() self.set_file_vectors_ids()
for vector_id in self.vectors_ids: for vector_id in self.vectors_ids: # pyright: ignore reportPrivateUsage=none
brain.create_brain_vector(vector_id['id'], self.file_sha1) brain.create_brain_vector(vector_id["id"], self.file_sha1)
print(f"Successfully linked file {self.file_sha1} to brain {brain.id}") print(f"Successfully linked file {self.file_sha1} to brain {brain.id}")

View File

@ -3,7 +3,7 @@ from typing import Annotated
from fastapi import Depends from fastapi import Depends
from langchain.embeddings.openai import OpenAIEmbeddings from langchain.embeddings.openai import OpenAIEmbeddings
from pydantic import BaseSettings from pydantic import BaseSettings
from supabase import Client, create_client from supabase.client import Client, create_client
from vectorstore.supabase import SupabaseVectorStore from vectorstore.supabase import SupabaseVectorStore
@ -22,8 +22,10 @@ class LLMSettings(BaseSettings):
def common_dependencies() -> dict: def common_dependencies() -> dict:
settings = BrainSettings() settings = BrainSettings() # pyright: ignore reportPrivateUsage=none
embeddings = OpenAIEmbeddings(openai_api_key=settings.openai_api_key) embeddings = OpenAIEmbeddings(
openai_api_key=settings.openai_api_key
) # pyright: ignore reportPrivateUsage=none
supabase_client: Client = create_client( supabase_client: Client = create_client(
settings.supabase_url, settings.supabase_service_key settings.supabase_url, settings.supabase_service_key
) )

View File

@ -10,23 +10,33 @@ from models.settings import CommonsDep
from utils.file import compute_sha1_from_content from utils.file import compute_sha1_from_content
async def process_audio(commons: CommonsDep, file: File, enable_summarization: bool, user, user_openai_api_key): async def process_audio(
commons: CommonsDep, # pyright: ignore reportPrivateUsage=none
file: File,
enable_summarization: bool,
user,
user_openai_api_key,
):
temp_filename = None temp_filename = None
file_sha = "" file_sha = ""
dateshort = time.strftime("%Y%m%d-%H%M%S") dateshort = time.strftime("%Y%m%d-%H%M%S")
file_meta_name = f"audiotranscript_{dateshort}.txt" file_meta_name = f"audiotranscript_{dateshort}.txt"
# use this for whisper # use this for whisper
openai_api_key = os.environ.get("OPENAI_API_KEY") os.environ.get("OPENAI_API_KEY")
if user_openai_api_key: if user_openai_api_key:
openai_api_key = user_openai_api_key pass
try: try:
upload_file = file.file upload_file = file.file
with tempfile.NamedTemporaryFile(delete=False, suffix=upload_file.filename) as tmp_file: with tempfile.NamedTemporaryFile(
await upload_file.seek(0) delete=False,
content = await upload_file.read() suffix=upload_file.filename, # pyright: ignore reportPrivateUsage=none
) as tmp_file:
await upload_file.seek(0) # pyright: ignore reportPrivateUsage=none
content = (
await upload_file.read() # pyright: ignore reportPrivateUsage=none
)
tmp_file.write(content) tmp_file.write(content)
tmp_file.flush() tmp_file.flush()
tmp_file.close() tmp_file.close()
@ -36,21 +46,42 @@ async def process_audio(commons: CommonsDep, file: File, enable_summarization: b
with open(tmp_file.name, "rb") as audio_file: with open(tmp_file.name, "rb") as audio_file:
transcript = openai.Audio.transcribe("whisper-1", audio_file) transcript = openai.Audio.transcribe("whisper-1", audio_file)
file_sha = compute_sha1_from_content(transcript.text.encode("utf-8")) file_sha = compute_sha1_from_content(
file_size = len(transcript.text.encode("utf-8")) transcript.text.encode("utf-8") # pyright: ignore reportPrivateUsage=none
)
file_size = len(
transcript.text.encode("utf-8") # pyright: ignore reportPrivateUsage=none
)
chunk_size = 500 chunk_size = 500
chunk_overlap = 0 chunk_overlap = 0
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
chunk_size=chunk_size, chunk_overlap=chunk_overlap) chunk_size=chunk_size, chunk_overlap=chunk_overlap
texts = text_splitter.split_text(transcript.text.encode("utf-8")) )
texts = text_splitter.split_text(
transcript.text.encode("utf-8") # pyright: ignore reportPrivateUsage=none
)
docs_with_metadata = [Document(page_content=text, metadata={"file_sha1": file_sha, "file_size": file_size, "file_name": file_meta_name, docs_with_metadata = [
"chunk_size": chunk_size, "chunk_overlap": chunk_overlap, "date": dateshort}) for text in texts] Document(
page_content=text,
metadata={
"file_sha1": file_sha,
"file_size": file_size,
"file_name": file_meta_name,
"chunk_size": chunk_size,
"chunk_overlap": chunk_overlap,
"date": dateshort,
},
)
for text in texts
]
commons.documents_vector_store.add_documents(docs_with_metadata) commons.documents_vector_store.add_documents( # pyright: ignore reportPrivateUsage=none
docs_with_metadata
)
finally: finally:
if temp_filename and os.path.exists(temp_filename): if temp_filename and os.path.exists(temp_filename):
os.remove(temp_filename) os.remove(temp_filename)

View File

@ -19,7 +19,7 @@ async def process_file(
file.compute_documents(loader_class) file.compute_documents(loader_class)
for doc in file.documents: for doc in file.documents: # pyright: ignore reportPrivateUsage=none
metadata = { metadata = {
"file_sha1": file.file_sha1, "file_sha1": file.file_sha1,
"file_size": file.file_size, "file_size": file.file_size,
@ -29,17 +29,15 @@ async def process_file(
"date": dateshort, "date": dateshort,
"summarization": "true" if enable_summarization else "false", "summarization": "true" if enable_summarization else "false",
} }
doc_with_metadata = Document( doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata)
page_content=doc.page_content, metadata=metadata)
neurons = Neurons(commons=commons) neurons = Neurons(commons=commons)
created_vector = neurons.create_vector(doc_with_metadata, user_openai_api_key) created_vector = neurons.create_vector(doc_with_metadata, user_openai_api_key)
# add_usage(stats_db, "embedding", "audio", metadata={"file_name": file_meta_name,"file_type": ".txt", "chunk_size": chunk_size, "chunk_overlap": chunk_overlap}) # add_usage(stats_db, "embedding", "audio", metadata={"file_name": file_meta_name,"file_type": ".txt", "chunk_size": chunk_size, "chunk_overlap": chunk_overlap})
created_vector_id = created_vector[0] created_vector_id = created_vector[0] # pyright: ignore reportPrivateUsage=none
brain = Brain(id=brain_id) brain = Brain(id=brain_id)
brain.create_brain_vector(created_vector_id, file.file_sha1) brain.create_brain_vector(created_vector_id, file.file_sha1)
return return

View File

@ -11,7 +11,13 @@ from utils.file import compute_sha1_from_content
from utils.vectors import Neurons from utils.vectors import Neurons
async def process_github(commons: CommonsDep, repo, enable_summarization, brain_id, user_openai_api_key): async def process_github(
commons: CommonsDep, # pyright: ignore reportPrivateUsage=none
repo,
enable_summarization,
brain_id,
user_openai_api_key,
):
random_dir_name = os.urandom(16).hex() random_dir_name = os.urandom(16).hex()
dateshort = time.strftime("%Y%m%d") dateshort = time.strftime("%Y%m%d")
loader = GitLoader( loader = GitLoader(
@ -24,41 +30,60 @@ async def process_github(commons: CommonsDep, repo, enable_summarization, brain_
chunk_size = 500 chunk_size = 500
chunk_overlap = 0 chunk_overlap = 0
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
chunk_size=chunk_size, chunk_overlap=chunk_overlap) chunk_size=chunk_size, chunk_overlap=chunk_overlap
)
documents = text_splitter.split_documents(documents) documents = text_splitter.split_documents(documents)
print(documents[:1]) print(documents[:1])
for doc in documents: for doc in documents:
if doc.metadata["file_type"] in [".pyc",".png",".svg", ".env", ".lock", ".gitignore", ".gitmodules", ".gitattributes", ".gitkeep", ".git", ".json"]: if doc.metadata["file_type"] in [
".pyc",
".png",
".svg",
".env",
".lock",
".gitignore",
".gitmodules",
".gitattributes",
".gitkeep",
".git",
".json",
]:
continue continue
metadata = { metadata = {
"file_sha1": compute_sha1_from_content(doc.page_content.encode("utf-8")), "file_sha1": compute_sha1_from_content(doc.page_content.encode("utf-8")),
"file_size": len(doc.page_content)*8, "file_size": len(doc.page_content) * 8,
"file_name": doc.metadata["file_name"], "file_name": doc.metadata["file_name"],
"chunk_size": chunk_size, "chunk_size": chunk_size,
"chunk_overlap": chunk_overlap, "chunk_overlap": chunk_overlap,
"date": dateshort, "date": dateshort,
"summarization": "true" if enable_summarization else "false" "summarization": "true" if enable_summarization else "false",
} }
doc_with_metadata = Document( doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata)
page_content=doc.page_content, metadata=metadata)
file = File(file_sha1=compute_sha1_from_content(doc.page_content.encode("utf-8"))) file = File(
file_sha1=compute_sha1_from_content(doc.page_content.encode("utf-8"))
)
file_exists = file.file_already_exists() file_exists = file.file_already_exists()
if not file_exists: if not file_exists:
print(f"Creating entry for file {file.file_sha1} in vectors...") print(f"Creating entry for file {file.file_sha1} in vectors...")
neurons = Neurons(commons=commons) neurons = Neurons(commons=commons)
created_vector = neurons.create_vector(doc_with_metadata, user_openai_api_key) created_vector = neurons.create_vector(
doc_with_metadata, user_openai_api_key
)
print("Created vector sids ", created_vector) print("Created vector sids ", created_vector)
print("Created vector for ", doc.metadata["file_name"]) print("Created vector for ", doc.metadata["file_name"])
file_exists_in_brain = file.file_already_exists_in_brain(brain_id) file_exists_in_brain = file.file_already_exists_in_brain(brain_id)
if not file_exists_in_brain: if not file_exists_in_brain:
file.add_file_to_brain(brain_id) file.add_file_to_brain(brain_id) # pyright: ignore reportPrivateUsage=none
brain = Brain(id=brain_id) brain = Brain(id=brain_id)
file.link_file_to_brain(brain) file.link_file_to_brain(brain)
return {"message": f"✅ Github with {len(documents)} files has been uploaded.", "type": "success"} return {
"message": f"✅ Github with {len(documents)} files has been uploaded.",
"type": "success",
}

View File

@ -0,0 +1,5 @@
{
"exclude": [
"supabase"
]
}

View File

@ -1,6 +1,7 @@
from typing import List # For type hinting
from models.chat import ChatHistory from models.chat import ChatHistory
from models.settings import common_dependencies from models.settings import common_dependencies
from typing import List # For type hinting
def get_chat_history(chat_id: str) -> List[ChatHistory]: def get_chat_history(chat_id: str) -> List[ChatHistory]:
@ -16,4 +17,7 @@ def get_chat_history(chat_id: str) -> List[ChatHistory]:
if history is None: if history is None:
return [] return []
else: else:
return [ChatHistory(message) for message in history] return [
ChatHistory(message) # pyright: ignore reportPrivateUsage=none
for message in history
]

View File

@ -21,7 +21,7 @@ def update_chat(chat_id, chat_data: ChatUpdatableProperties) -> Chat:
if not chat_id: if not chat_id:
logger.error("No chat_id provided") logger.error("No chat_id provided")
return return # pyright: ignore reportPrivateUsage=none
updates = {} updates = {}
@ -41,4 +41,4 @@ def update_chat(chat_id, chat_data: ChatUpdatableProperties) -> Chat:
logger.info(f"Chat {chat_id} updated") logger.info(f"Chat {chat_id} updated")
else: else:
logger.info(f"No updates to apply for chat {chat_id}") logger.info(f"No updates to apply for chat {chat_id}")
return updated_chat return updated_chat # pyright: ignore reportPrivateUsage=none

View File

@ -23,4 +23,4 @@ def update_chat_history(chat_id: str, user_message: str, assistant: str) -> Chat
raise HTTPException( raise HTTPException(
status_code=500, detail="An exception occurred while updating chat history." status_code=500, detail="An exception occurred while updating chat history."
) )
return ChatHistory(response[0]) return ChatHistory(response[0]) # pyright: ignore reportPrivateUsage=none

View File

@ -6,13 +6,15 @@ logger = get_logger(__name__)
def update_message_by_id( def update_message_by_id(
message_id: str, user_message: str = None, assistant: str = None message_id: str,
user_message: str = None, # pyright: ignore reportPrivateUsage=none
assistant: str = None, # pyright: ignore reportPrivateUsage=none
) -> ChatHistory: ) -> ChatHistory:
commons = common_dependencies() commons = common_dependencies()
if not message_id: if not message_id:
logger.error("No message_id provided") logger.error("No message_id provided")
return return # pyright: ignore reportPrivateUsage=none
updates = {} updates = {}
@ -35,4 +37,4 @@ def update_message_by_id(
logger.info(f"Message {message_id} updated") logger.info(f"Message {message_id} updated")
else: else:
logger.info(f"No updates to apply for message {message_id}") logger.info(f"No updates to apply for message {message_id}")
return ChatHistory(updated_message) return ChatHistory(updated_message) # pyright: ignore reportPrivateUsage=none

View File

@ -23,3 +23,4 @@ flake8==6.0.0
flake8-black==0.3.6 flake8-black==0.3.6
sentence_transformers>=2.0.0 sentence_transformers>=2.0.0
sentry-sdk==1.26.0 sentry-sdk==1.26.0
pyright==1.1.316

View File

@ -142,24 +142,27 @@ async def create_brain_endpoint(
In the brains table & in the brains_users table and put the creator user as 'Owner' In the brains table & in the brains_users table and put the creator user as 'Owner'
""" """
brain = Brain(name=brain.name) brain = Brain(name=brain.name) # pyright: ignore reportPrivateUsage=none
brain.create_brain() brain.create_brain() # pyright: ignore reportPrivateUsage=none
default_brain = get_default_user_brain(current_user) default_brain = get_default_user_brain(current_user)
if default_brain: if default_brain:
logger.info(f"Default brain already exists for user {current_user.id}") logger.info(f"Default brain already exists for user {current_user.id}")
brain.create_brain_user( brain.create_brain_user( # pyright: ignore reportPrivateUsage=none
user_id=current_user.id, rights="Owner", default_brain=False user_id=current_user.id, rights="Owner", default_brain=False
) )
else: else:
logger.info( logger.info(
f"Default brain does not exist for user {current_user.id}. It will be created." f"Default brain does not exist for user {current_user.id}. It will be created."
) )
brain.create_brain_user( brain.create_brain_user( # pyright: ignore reportPrivateUsage=none
user_id=current_user.id, rights="Owner", default_brain=True user_id=current_user.id, rights="Owner", default_brain=True
) )
return {"id": brain.id, "name": brain.name} return {
"id": brain.id, # pyright: ignore reportPrivateUsage=none
"name": brain.name,
}
# update existing brain # update existing brain
@ -182,10 +185,12 @@ async def update_brain_endpoint(
brain = Brain(id=brain_id) brain = Brain(id=brain_id)
# Add new file to brain , il file_sha1 already exists in brains_vectors -> out (not now) # Add new file to brain , il file_sha1 already exists in brains_vectors -> out (not now)
if brain.file_sha1: if brain.file_sha1: # pyright: ignore reportPrivateUsage=none
# add all the vector Ids to the brains_vectors with the given brain.brain_id # add all the vector Ids to the brains_vectors with the given brain.brain_id
brain.update_brain_with_file(file_sha1=input_brain.file_sha1) brain.update_brain_with_file(
file_sha1=input_brain.file_sha1 # pyright: ignore reportPrivateUsage=none
)
print("brain:", brain) print("brain:", brain)
brain.update_brain_fields(commons, brain) brain.update_brain_fields(commons, brain) # pyright: ignore reportPrivateUsage=none
return {"message": f"Brain {brain_id} has been updated."} return {"message": f"Brain {brain_id} has been updated."}

View File

@ -78,8 +78,8 @@ def check_user_limit(
user.increment_user_request_count(date) user.increment_user_request_count(date)
if int(user.requests_count) >= int(max_requests_number): if int(user.requests_count) >= int(max_requests_number):
raise HTTPException( raise HTTPException(
status_code=429, status_code=429, # pyright: ignore reportPrivateUsage=none
detail="You have reached the maximum number of requests for today.", detail="You have reached the maximum number of requests for today.", # pyright: ignore reportPrivateUsage=none
) )
else: else:
pass pass
@ -97,7 +97,7 @@ async def get_chats(current_user: User = Depends(get_current_user)):
This endpoint retrieves all the chats associated with the current authenticated user. It returns a list of chat objects This endpoint retrieves all the chats associated with the current authenticated user. It returns a list of chat objects
containing the chat ID and chat name for each chat. containing the chat ID and chat name for each chat.
""" """
chats = get_user_chats(current_user.id) chats = get_user_chats(current_user.id) # pyright: ignore reportPrivateUsage=none
return {"chats": chats} return {"chats": chats}
@ -127,10 +127,11 @@ async def update_chat_metadata_handler(
Update chat attributes Update chat attributes
""" """
chat = get_chat_by_id(chat_id) chat = get_chat_by_id(chat_id) # pyright: ignore reportPrivateUsage=none
if current_user.id != chat.user_id: if current_user.id != chat.user_id:
raise HTTPException( raise HTTPException(
status_code=403, detail="You should be the owner of the chat to update it." status_code=403, # pyright: ignore reportPrivateUsage=none
detail="You should be the owner of the chat to update it.", # pyright: ignore reportPrivateUsage=none
) )
return update_chat(chat_id=chat_id, chat_data=chat_data) return update_chat(chat_id=chat_id, chat_data=chat_data)
@ -181,7 +182,7 @@ async def create_question_handler(
temperature=chat_question.temperature, temperature=chat_question.temperature,
max_tokens=chat_question.max_tokens, max_tokens=chat_question.max_tokens,
brain_id=str(brain_id), brain_id=str(brain_id),
user_openai_api_key=current_user.user_openai_api_key, user_openai_api_key=current_user.user_openai_api_key, # pyright: ignore reportPrivateUsage=none
) )
else: else:
@ -191,10 +192,12 @@ async def create_question_handler(
max_tokens=chat_question.max_tokens, max_tokens=chat_question.max_tokens,
temperature=chat_question.temperature, temperature=chat_question.temperature,
brain_id=str(brain_id), brain_id=str(brain_id),
user_openai_api_key=current_user.user_openai_api_key, user_openai_api_key=current_user.user_openai_api_key, # pyright: ignore reportPrivateUsage=none
) )
chat_answer = gpt_answer_generator.generate_answer(chat_question.question) chat_answer = gpt_answer_generator.generate_answer( # pyright: ignore reportPrivateUsage=none
chat_question.question
)
return chat_answer return chat_answer
except HTTPException as e: except HTTPException as e:
@ -217,7 +220,10 @@ async def create_stream_question_handler(
if chat_question.model not in streaming_compatible_models: if chat_question.model not in streaming_compatible_models:
# Forward the request to the none streaming endpoint # Forward the request to the none streaming endpoint
return await create_question_handler( return await create_question_handler(
request, chat_question, chat_id, current_user request,
chat_question,
chat_id,
current_user, # pyright: ignore reportPrivateUsage=none
) )
try: try:
@ -238,12 +244,14 @@ async def create_stream_question_handler(
max_tokens=chat_question.max_tokens, max_tokens=chat_question.max_tokens,
temperature=chat_question.temperature, temperature=chat_question.temperature,
brain_id=str(brain_id), brain_id=str(brain_id),
user_openai_api_key=user_openai_api_key, user_openai_api_key=user_openai_api_key, # pyright: ignore reportPrivateUsage=none
streaming=True, streaming=True,
) )
return StreamingResponse( return StreamingResponse(
gpt_answer_generator.generate_stream(chat_question.question), gpt_answer_generator.generate_stream( # pyright: ignore reportPrivateUsage=none
chat_question.question
),
media_type="text/event-stream", media_type="text/event-stream",
) )
@ -259,4 +267,4 @@ async def get_chat_history_handler(
chat_id: UUID, chat_id: UUID,
) -> List[ChatHistory]: ) -> List[ChatHistory]:
# TODO: RBAC with current_user # TODO: RBAC with current_user
return get_chat_history(chat_id) return get_chat_history(chat_id) # pyright: ignore reportPrivateUsage=none

View File

@ -35,7 +35,9 @@ async def crawl_endpoint(
commons = common_dependencies() commons = common_dependencies()
if request.headers.get("Openai-Api-Key"): if request.headers.get("Openai-Api-Key"):
brain.max_brain_size = os.getenv("MAX_BRAIN_SIZE_WITH_KEY", 209715200) brain.max_brain_size = os.getenv(
"MAX_BRAIN_SIZE_WITH_KEY", 209715200
) # pyright: ignore reportPrivateUsage=none
file_size = 1000000 file_size = 1000000
remaining_free_space = brain.remaining_brain_size remaining_free_space = brain.remaining_brain_size
@ -47,14 +49,20 @@ async def crawl_endpoint(
} }
else: else:
if not crawl_website.checkGithub(): if not crawl_website.checkGithub():
file_path, file_name = crawl_website.process() (
file_path,
file_name,
) = crawl_website.process() # pyright: ignore reportPrivateUsage=none
# Create a SpooledTemporaryFile from the file_path # Create a SpooledTemporaryFile from the file_path
spooled_file = SpooledTemporaryFile() spooled_file = SpooledTemporaryFile()
with open(file_path, "rb") as f: with open(file_path, "rb") as f:
shutil.copyfileobj(f, spooled_file) shutil.copyfileobj(f, spooled_file)
# Pass the SpooledTemporaryFile to UploadFile # Pass the SpooledTemporaryFile to UploadFile
uploadFile = UploadFile(file=spooled_file, filename=file_name) uploadFile = UploadFile(
file=spooled_file, # pyright: ignore reportPrivateUsage=none
filename=file_name,
)
file = File(file=uploadFile) file = File(file=uploadFile)
# check remaining free space here !! # check remaining free space here !!
message = await filter_file( message = await filter_file(

View File

@ -42,7 +42,9 @@ async def upload_file(
commons = common_dependencies() commons = common_dependencies()
if request.headers.get("Openai-Api-Key"): if request.headers.get("Openai-Api-Key"):
brain.max_brain_size = os.getenv("MAX_BRAIN_SIZE_WITH_KEY", 209715200) brain.max_brain_size = os.getenv(
"MAX_BRAIN_SIZE_WITH_KEY", 209715200
) # pyright: ignore reportPrivateUsage=none
remaining_free_space = brain.remaining_brain_size remaining_free_space = brain.remaining_brain_size
file_size = get_file_size(uploadFile) file_size = get_file_size(uploadFile)

View File

@ -10,6 +10,9 @@ client = TestClient(app)
API_KEY = os.getenv("CI_TEST_API_KEY") API_KEY = os.getenv("CI_TEST_API_KEY")
if not API_KEY:
raise ValueError("CI_TEST_API_KEY environment variable not set. Cannot run tests.")
def test_read_main(): def test_read_main():
response = client.get("/") response = client.get("/")
@ -54,7 +57,8 @@ def test_create_and_delete_api_key():
def test_retrieve_default_brain(): def test_retrieve_default_brain():
# Making a GET request to the /brains/default/ endpoint # Making a GET request to the /brains/default/ endpoint
response = client.get( response = client.get(
"/brains/default/", headers={"Authorization": "Bearer " + API_KEY} "/brains/default/",
headers={"Authorization": "Bearer " + API_KEY},
) )
# Assert that the response status code is 200 (HTTP OK) # Assert that the response status code is 200 (HTTP OK)
@ -88,7 +92,9 @@ def test_create_brain():
# Making a POST request to the /brains/ endpoint # Making a POST request to the /brains/ endpoint
response = client.post( response = client.post(
"/brains/", json=payload, headers={"Authorization": "Bearer " + API_KEY} "/brains/",
json=payload,
headers={"Authorization": "Bearer " + API_KEY},
) )
# Assert that the response status code is 200 (HTTP OK) # Assert that the response status code is 200 (HTTP OK)
@ -106,7 +112,10 @@ def test_create_brain():
def test_retrieve_all_brains(): def test_retrieve_all_brains():
# Making a GET request to the /brains/ endpoint to retrieve all brains for the current user # Making a GET request to the /brains/ endpoint to retrieve all brains for the current user
response = client.get("/brains/", headers={"Authorization": "Bearer " + API_KEY}) response = client.get(
"/brains/",
headers={"Authorization": "Bearer " + API_KEY},
)
# Assert that the response status code is 200 (HTTP OK) # Assert that the response status code is 200 (HTTP OK)
assert response.status_code == 200 assert response.status_code == 200
@ -120,7 +129,10 @@ def test_retrieve_all_brains():
def test_delete_all_brains(): def test_delete_all_brains():
# First, retrieve all brains for the current user # First, retrieve all brains for the current user
response = client.get("/brains/", headers={"Authorization": "Bearer " + API_KEY}) response = client.get(
"/brains/",
headers={"Authorization": "Bearer " + API_KEY},
)
# Assert that the response status code is 200 (HTTP OK) # Assert that the response status code is 200 (HTTP OK)
assert response.status_code == 200 assert response.status_code == 200
@ -133,7 +145,8 @@ def test_delete_all_brains():
# Send a DELETE request to delete the specific brain # Send a DELETE request to delete the specific brain
delete_response = client.delete( delete_response = client.delete(
f"/brains/{brain_id}/", headers={"Authorization": "Bearer " + API_KEY} f"/brains/{brain_id}/",
headers={"Authorization": "Bearer " + API_KEY},
) )
# Assert that the DELETE response status code is 200 (HTTP OK) # Assert that the DELETE response status code is 200 (HTTP OK)
@ -142,7 +155,10 @@ def test_delete_all_brains():
def test_get_all_chats(): def test_get_all_chats():
# Making a GET request to the /chat endpoint to retrieve all chats # Making a GET request to the /chat endpoint to retrieve all chats
response = client.get("/chat", headers={"Authorization": "Bearer " + API_KEY}) response = client.get(
"/chat",
headers={"Authorization": "Bearer " + API_KEY},
)
# Assert that the response status code is 200 (HTTP OK) # Assert that the response status code is 200 (HTTP OK)
assert response.status_code == 200 assert response.status_code == 200

View File

@ -18,8 +18,10 @@ def convert_bytes(bytes, precision=2):
def get_file_size(file: UploadFile): def get_file_size(file: UploadFile):
# move the cursor to the end of the file # move the cursor to the end of the file
file.file._file.seek(0, 2) file.file._file.seek(0, 2) # pyright: ignore reportPrivateUsage=none
file_size = file.file._file.tell() # Getting the size of the file file_size = (
file.file._file.tell() # pyright: ignore reportPrivateUsage=none
) # Getting the size of the file
# move the cursor back to the beginning of the file # move the cursor back to the beginning of the file
file.file.seek(0) file.file.seek(0)

View File

@ -54,14 +54,19 @@ async def filter_file(
if file_exists_in_brain: if file_exists_in_brain:
return create_response( return create_response(
f"🤔 {file.file.filename} already exists in brain {brain_id}.", "warning" f"🤔 {file.file.filename} already exists in brain {brain_id}.", # pyright: ignore reportPrivateUsage=none
"warning",
) )
elif file.file_is_empty(): elif file.file_is_empty():
return create_response(f"{file.file.filename} is empty.", "error") return create_response(
f"{file.file.filename} is empty.", # pyright: ignore reportPrivateUsage=none
"error", # pyright: ignore reportPrivateUsage=none
)
elif file_exists: elif file_exists:
file.link_file_to_brain(brain=Brain(id=brain_id)) file.link_file_to_brain(brain=Brain(id=brain_id))
return create_response( return create_response(
f"{file.file.filename} has been uploaded to brain {brain_id}.", "success" f"{file.file.filename} has been uploaded to brain {brain_id}.", # pyright: ignore reportPrivateUsage=none
"success",
) )
if file.file_extension in file_processors: if file.file_extension in file_processors:
@ -70,14 +75,18 @@ async def filter_file(
commons, file, enable_summarization, brain_id, openai_api_key commons, file, enable_summarization, brain_id, openai_api_key
) )
return create_response( return create_response(
f"{file.file.filename} has been uploaded to brain {brain_id}.", f"{file.file.filename} has been uploaded to brain {brain_id}.", # pyright: ignore reportPrivateUsage=none
"success", "success",
) )
except Exception as e: except Exception as e:
# Add more specific exceptions as needed. # Add more specific exceptions as needed.
print(f"Error processing file: {e}") print(f"Error processing file: {e}")
return create_response( return create_response(
f"⚠️ An error occurred while processing {file.file.filename}.", "error" f"⚠️ An error occurred while processing {file.file.filename}.", # pyright: ignore reportPrivateUsage=none
"error",
) )
return create_response(f"{file.file.filename} is not supported.", "error") return create_response(
f"{file.file.filename} is not supported.", # pyright: ignore reportPrivateUsage=none
"error",
)

View File

@ -13,7 +13,7 @@ logger = get_logger(__name__)
class Neurons(BaseModel): class Neurons(BaseModel):
commons: CommonsDep commons: CommonsDep
settings = BrainSettings() settings = BrainSettings() # pyright: ignore reportPrivateUsage=none
def create_vector(self, doc, user_openai_api_key=None): def create_vector(self, doc, user_openai_api_key=None):
logger.info("Creating vector for document") logger.info("Creating vector for document")
@ -21,7 +21,7 @@ class Neurons(BaseModel):
if user_openai_api_key: if user_openai_api_key:
self.commons["documents_vector_store"]._embedding = OpenAIEmbeddings( self.commons["documents_vector_store"]._embedding = OpenAIEmbeddings(
openai_api_key=user_openai_api_key openai_api_key=user_openai_api_key
) ) # pyright: ignore reportPrivateUsage=none
try: try:
sids = self.commons["documents_vector_store"].add_documents([doc]) sids = self.commons["documents_vector_store"].add_documents([doc])
if sids and len(sids) > 0: if sids and len(sids) > 0:
@ -64,7 +64,7 @@ def create_summary(commons: CommonsDep, document_id, content, metadata):
def error_callback(exception): def error_callback(exception):
print('An exception occurred:', exception) print("An exception occurred:", exception)
def process_batch(batch_ids): def process_batch(batch_ids):
@ -106,7 +106,7 @@ def get_unique_files_from_vector_ids(vectors_ids: List[int]):
with ThreadPoolExecutor() as executor: with ThreadPoolExecutor() as executor:
futures = [] futures = []
for i in range(0, len(vectors_ids), BATCH_SIZE): for i in range(0, len(vectors_ids), BATCH_SIZE):
batch_ids = vectors_ids[i:i + BATCH_SIZE] batch_ids = vectors_ids[i : i + BATCH_SIZE]
future = executor.submit(process_batch, batch_ids) future = executor.submit(process_batch, batch_ids)
futures.append(future) futures.append(future)
@ -114,6 +114,6 @@ def get_unique_files_from_vector_ids(vectors_ids: List[int]):
vectors_responses = [future.result() for future in futures] vectors_responses = [future.result() for future in futures]
documents = [item for sublist in vectors_responses for item in sublist] documents = [item for sublist in vectors_responses for item in sublist]
print('document', documents) print("document", documents)
unique_files = [dict(t) for t in set(tuple(d.items()) for d in documents)] unique_files = [dict(t) for t in set(tuple(d.items()) for d in documents)]
return unique_files return unique_files

View File

@ -3,8 +3,7 @@ from typing import Any, List
from langchain.docstore.document import Document from langchain.docstore.document import Document
from langchain.embeddings.openai import OpenAIEmbeddings from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import SupabaseVectorStore from langchain.vectorstores import SupabaseVectorStore
from supabase.client import Client
from supabase import Client
class CustomSupabaseVectorStore(SupabaseVectorStore): class CustomSupabaseVectorStore(SupabaseVectorStore):