mirror of
https://github.com/StanGirard/quivr.git
synced 2024-12-23 11:23:00 +03:00
Feat/static analysis (#582)
* feat: add static analysis * chore: update Makefile add static analysis script * chore: add vscode extensions recommandations
This commit is contained in:
parent
f2a06dc6de
commit
9e9f531c99
8
.vscode/extensions.json
vendored
Normal file
8
.vscode/extensions.json
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"ms-pyright.pyright",
|
||||
"dbaeumer.vscode-eslint",
|
||||
"ms-python.vscode-pylance",
|
||||
"ms-pyright.pyright"
|
||||
]
|
||||
}
|
6
.vscode/settings.json
vendored
6
.vscode/settings.json
vendored
@ -12,7 +12,8 @@
|
||||
"editor.defaultFormatter": "ms-python.black-formatter",
|
||||
"editor.formatOnSave": true,
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.organizeImports": true
|
||||
"source.organizeImports": true,
|
||||
"source.fixAll": true
|
||||
}
|
||||
},
|
||||
"[typescriptreact]": {
|
||||
@ -41,4 +42,7 @@
|
||||
"**/.docusaurus/": true,
|
||||
"**/node_modules/": true,
|
||||
},
|
||||
"python.linting.pycodestyleCategorySeverity.W": "Error",
|
||||
"python.defaultInterpreterPath": "python3",
|
||||
"python.linting.flake8CategorySeverity.W": "Error",
|
||||
}
|
9
Makefile
9
Makefile
@ -4,4 +4,11 @@ dev:
|
||||
docker compose -f docker-compose.dev.yml up --build
|
||||
|
||||
prod:
|
||||
docker compose -f docker-compose.yml up --build
|
||||
docker compose -f docker-compose.yml up --build
|
||||
|
||||
test-type:
|
||||
@if command -v python3 &>/dev/null; then \
|
||||
python3 -m pyright; \
|
||||
else \
|
||||
python -m pyright; \
|
||||
fi
|
||||
|
@ -1,12 +1,13 @@
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from auth.api_key_handler import get_user_from_api_key, verify_api_key
|
||||
from auth.jwt_token_handler import decode_access_token, verify_token
|
||||
from fastapi import Depends, HTTPException, Request
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from models.users import User
|
||||
|
||||
from auth.api_key_handler import get_user_from_api_key, verify_api_key
|
||||
from auth.jwt_token_handler import decode_access_token, verify_token
|
||||
|
||||
|
||||
class AuthBearer(HTTPBearer):
|
||||
def __init__(self, auto_error: bool = True):
|
||||
@ -20,7 +21,7 @@ class AuthBearer(HTTPBearer):
|
||||
request
|
||||
)
|
||||
self.check_scheme(credentials)
|
||||
token = credentials.credentials
|
||||
token = credentials.credentials # pyright: ignore reportPrivateUsage=none
|
||||
return await self.authenticate(
|
||||
token,
|
||||
)
|
||||
@ -52,7 +53,7 @@ class AuthBearer(HTTPBearer):
|
||||
|
||||
def get_test_user(self) -> User:
|
||||
return User(
|
||||
email="test@example.com", id="XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX"
|
||||
email="test@example.com", id="XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" # type: ignore
|
||||
) # replace with test user information
|
||||
|
||||
|
||||
|
@ -9,6 +9,9 @@ from models.users import User
|
||||
SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
|
||||
ALGORITHM = "HS256"
|
||||
|
||||
if not SECRET_KEY:
|
||||
raise ValueError("JWT_SECRET_KEY environment variable not set")
|
||||
|
||||
|
||||
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
|
||||
to_encode = data.copy()
|
||||
@ -27,9 +30,12 @@ def decode_access_token(token: str) -> User:
|
||||
token, SECRET_KEY, algorithms=[ALGORITHM], options={"verify_aud": False}
|
||||
)
|
||||
except JWTError:
|
||||
return None
|
||||
return None # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
return User(email=payload.get("email"), id=payload.get("sub"))
|
||||
return User(
|
||||
email=payload.get("email"),
|
||||
id=payload.get("sub"), # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
|
||||
|
||||
def verify_token(token: str):
|
||||
|
@ -4,7 +4,6 @@ import tempfile
|
||||
import unicodedata
|
||||
|
||||
import requests
|
||||
from langchain.document_loaders import GitLoader
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
@ -29,7 +28,7 @@ class CrawlWebsite(BaseModel):
|
||||
file_name = slugify(self.url) + ".html"
|
||||
temp_file_path = os.path.join(tempfile.gettempdir(), file_name)
|
||||
with open(temp_file_path, "w") as temp_file:
|
||||
temp_file.write(content)
|
||||
temp_file.write(content) # pyright: ignore reportPrivateUsage=none
|
||||
# Process the file
|
||||
|
||||
if content:
|
||||
|
@ -20,19 +20,21 @@ class BaseBrainPicking(BaseModel):
|
||||
"""
|
||||
|
||||
# Instantiate settings
|
||||
brain_settings = BrainSettings()
|
||||
brain_settings = BrainSettings() # type: ignore other parameters are optional
|
||||
|
||||
# Default class attributes
|
||||
model: str = None
|
||||
model: str = None # pyright: ignore reportPrivateUsage=none
|
||||
temperature: float = 0.0
|
||||
chat_id: str = None
|
||||
brain_id: str = None
|
||||
chat_id: str = None # pyright: ignore reportPrivateUsage=none
|
||||
brain_id: str = None # pyright: ignore reportPrivateUsage=none
|
||||
max_tokens: int = 256
|
||||
user_openai_api_key: str = None
|
||||
user_openai_api_key: str = None # pyright: ignore reportPrivateUsage=none
|
||||
streaming: bool = False
|
||||
|
||||
openai_api_key: str = None
|
||||
callbacks: List[AsyncCallbackHandler] = None
|
||||
openai_api_key: str = None # pyright: ignore reportPrivateUsage=none
|
||||
callbacks: List[
|
||||
AsyncCallbackHandler
|
||||
] = None # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
def _determine_api_key(self, openai_api_key, user_openai_api_key):
|
||||
"""If user provided an API key, use it."""
|
||||
@ -55,10 +57,12 @@ class BaseBrainPicking(BaseModel):
|
||||
|
||||
def _determine_callback_array(
|
||||
self, streaming
|
||||
) -> List[AsyncIteratorCallbackHandler]:
|
||||
) -> List[AsyncIteratorCallbackHandler]: # pyright: ignore reportPrivateUsage=none
|
||||
"""If streaming is set, set the AsyncIteratorCallbackHandler as the only callback."""
|
||||
if streaming:
|
||||
return [AsyncIteratorCallbackHandler]
|
||||
return [
|
||||
AsyncIteratorCallbackHandler # pyright: ignore reportPrivateUsage=none
|
||||
]
|
||||
|
||||
def __init__(self, **data):
|
||||
super().__init__(**data)
|
||||
@ -66,8 +70,12 @@ class BaseBrainPicking(BaseModel):
|
||||
self.openai_api_key = self._determine_api_key(
|
||||
self.brain_settings.openai_api_key, self.user_openai_api_key
|
||||
)
|
||||
self.streaming = self._determine_streaming(self.model, self.streaming)
|
||||
self.callbacks = self._determine_callback_array(self.streaming)
|
||||
self.streaming = self._determine_streaming(
|
||||
self.model, self.streaming
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
self.callbacks = self._determine_callback_array(
|
||||
self.streaming
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
class Config:
|
||||
"""Configuration of the Pydantic Object"""
|
||||
|
@ -1,4 +1,5 @@
|
||||
from typing import Optional
|
||||
|
||||
from .FunctionCall import FunctionCall
|
||||
|
||||
|
||||
@ -6,7 +7,7 @@ class OpenAiAnswer:
|
||||
def __init__(
|
||||
self,
|
||||
content: Optional[str] = None,
|
||||
function_call: FunctionCall = None,
|
||||
function_call: FunctionCall = None, # pyright: ignore reportPrivateUsage=none
|
||||
):
|
||||
self.content = content
|
||||
self.function_call = function_call
|
||||
|
@ -13,11 +13,12 @@ from repository.chat.format_chat_history import format_chat_history
|
||||
from repository.chat.get_chat_history import get_chat_history
|
||||
from repository.chat.update_chat_history import update_chat_history
|
||||
from repository.chat.update_message_by_id import update_message_by_id
|
||||
from supabase import Client, create_client
|
||||
from supabase.client import Client, create_client
|
||||
from vectorstore.supabase import (
|
||||
CustomSupabaseVectorStore,
|
||||
) # Custom class for handling vector storage with Supabase
|
||||
)
|
||||
|
||||
# Custom class for handling vector storage with Supabase
|
||||
from .base import BaseBrainPicking
|
||||
from .prompts.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT
|
||||
|
||||
@ -42,7 +43,7 @@ class OpenAIBrainPicking(BaseBrainPicking):
|
||||
max_tokens: int,
|
||||
user_openai_api_key: str,
|
||||
streaming: bool = False,
|
||||
) -> "OpenAIBrainPicking":
|
||||
) -> "OpenAIBrainPicking": # pyright: ignore reportPrivateUsage=none
|
||||
"""
|
||||
Initialize the BrainPicking class by setting embeddings, supabase client, vector store, language model and chains.
|
||||
:return: OpenAIBrainPicking instance
|
||||
@ -59,7 +60,9 @@ class OpenAIBrainPicking(BaseBrainPicking):
|
||||
|
||||
@property
|
||||
def embeddings(self) -> OpenAIEmbeddings:
|
||||
return OpenAIEmbeddings(openai_api_key=self.openai_api_key)
|
||||
return OpenAIEmbeddings(
|
||||
openai_api_key=self.openai_api_key
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
@property
|
||||
def supabase_client(self) -> Client:
|
||||
@ -92,14 +95,16 @@ class OpenAIBrainPicking(BaseBrainPicking):
|
||||
|
||||
@property
|
||||
def doc_chain(self) -> LLMChain:
|
||||
return load_qa_chain(llm=self.doc_llm, chain_type="stuff")
|
||||
return load_qa_chain(
|
||||
llm=self.doc_llm, chain_type="stuff"
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
@property
|
||||
def qa(self) -> ConversationalRetrievalChain:
|
||||
return ConversationalRetrievalChain(
|
||||
retriever=self.vector_store.as_retriever(),
|
||||
question_generator=self.question_generator,
|
||||
combine_docs_chain=self.doc_chain,
|
||||
combine_docs_chain=self.doc_chain, # pyright: ignore reportPrivateUsage=none
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
@ -116,7 +121,7 @@ class OpenAIBrainPicking(BaseBrainPicking):
|
||||
model=model,
|
||||
streaming=streaming,
|
||||
callbacks=callbacks,
|
||||
)
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
def _call_chain(self, chain, question, history):
|
||||
"""
|
||||
@ -205,8 +210,10 @@ class OpenAIBrainPicking(BaseBrainPicking):
|
||||
|
||||
task = asyncio.create_task(
|
||||
wrap_done(
|
||||
self.qa._acall_chain(self.qa, question, transformed_history),
|
||||
callback.done,
|
||||
self.qa._acall_chain( # pyright: ignore reportPrivateUsage=none
|
||||
self.qa, question, transformed_history
|
||||
),
|
||||
callback.done, # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
)
|
||||
|
||||
@ -217,7 +224,7 @@ class OpenAIBrainPicking(BaseBrainPicking):
|
||||
)
|
||||
|
||||
# Use the aiter method of the callback to stream the response with server-sent-events
|
||||
async for token in callback.aiter():
|
||||
async for token in callback.aiter(): # pyright: ignore reportPrivateUsage=none
|
||||
logger.info("Token: %s", token)
|
||||
|
||||
# Add the token to the response_tokens list
|
||||
|
@ -2,15 +2,16 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from llm.models.FunctionCall import FunctionCall
|
||||
from llm.models.OpenAiAnswer import OpenAiAnswer
|
||||
from logger import get_logger
|
||||
from models.chat import ChatHistory
|
||||
from repository.chat.get_chat_history import get_chat_history
|
||||
from repository.chat.update_chat_history import update_chat_history
|
||||
from supabase import Client, create_client
|
||||
from supabase.client import Client, create_client
|
||||
from vectorstore.supabase import CustomSupabaseVectorStore
|
||||
|
||||
from llm.models.FunctionCall import FunctionCall
|
||||
from llm.models.OpenAiAnswer import OpenAiAnswer
|
||||
|
||||
from .base import BaseBrainPicking
|
||||
|
||||
logger = get_logger(__name__)
|
||||
@ -27,7 +28,10 @@ def format_answer(model_response: Dict[str, Any]) -> OpenAiAnswer:
|
||||
answer["function_call"]["arguments"],
|
||||
)
|
||||
|
||||
return OpenAiAnswer(content=content, function_call=function_call)
|
||||
return OpenAiAnswer(
|
||||
content=content,
|
||||
function_call=function_call, # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
|
||||
|
||||
class OpenAIFunctionsBrainPicking(BaseBrainPicking):
|
||||
@ -48,7 +52,7 @@ class OpenAIFunctionsBrainPicking(BaseBrainPicking):
|
||||
brain_id: str,
|
||||
user_openai_api_key: str,
|
||||
# TODO: add streaming
|
||||
) -> "OpenAIFunctionsBrainPicking":
|
||||
) -> "OpenAIFunctionsBrainPicking": # pyright: ignore reportPrivateUsage=none
|
||||
super().__init__(
|
||||
model=model,
|
||||
chat_id=chat_id,
|
||||
@ -61,11 +65,15 @@ class OpenAIFunctionsBrainPicking(BaseBrainPicking):
|
||||
|
||||
@property
|
||||
def openai_client(self) -> ChatOpenAI:
|
||||
return ChatOpenAI(openai_api_key=self.openai_api_key)
|
||||
return ChatOpenAI(
|
||||
openai_api_key=self.openai_api_key
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
@property
|
||||
def embeddings(self) -> OpenAIEmbeddings:
|
||||
return OpenAIEmbeddings(openai_api_key=self.openai_api_key)
|
||||
return OpenAIEmbeddings(
|
||||
openai_api_key=self.openai_api_key
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
@property
|
||||
def supabase_client(self) -> Client:
|
||||
@ -125,7 +133,9 @@ class OpenAIFunctionsBrainPicking(BaseBrainPicking):
|
||||
"""
|
||||
logger.info("Getting context")
|
||||
|
||||
return self.vector_store.similarity_search(query=question)
|
||||
return self.vector_store.similarity_search(
|
||||
query=question
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
def _construct_prompt(
|
||||
self, question: str, useContext: bool = False, useHistory: bool = False
|
||||
|
@ -21,7 +21,7 @@ class PrivateGPT4AllBrainPicking(BaseBrainPicking):
|
||||
chat_id: str,
|
||||
brain_id: str,
|
||||
streaming: bool,
|
||||
) -> "PrivateGPT4AllBrainPicking":
|
||||
) -> "PrivateGPT4AllBrainPicking": # pyright: ignore reportPrivateUsage=none
|
||||
"""
|
||||
Initialize the PrivateBrainPicking class by calling the parent class's initializer.
|
||||
:param brain_id: The brain_id in the DB.
|
||||
@ -57,4 +57,4 @@ class PrivateGPT4AllBrainPicking(BaseBrainPicking):
|
||||
n_batch=model_n_batch,
|
||||
backend="gptj",
|
||||
verbose=True,
|
||||
)
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
|
@ -31,7 +31,7 @@ Summarize the following text:
|
||||
{{/assistant~}}
|
||||
""",
|
||||
llm=summary_llm,
|
||||
)
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
summary = summary(document=document)
|
||||
logger.info("Summarization: %s", summary)
|
||||
@ -78,10 +78,12 @@ Summary
|
||||
{{/assistant~}}
|
||||
""",
|
||||
llm=evaluation_llm,
|
||||
)
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
result = evaluation(question=question, summaries=summaries)
|
||||
evaluations = {}
|
||||
for evaluation in result["evaluation"].split("\n"):
|
||||
for evaluation in result["evaluation"].split(
|
||||
"\n"
|
||||
): # pyright: ignore reportPrivateUsage=none
|
||||
if evaluation == "" or not evaluation[0].isdigit():
|
||||
continue
|
||||
logger.info("Evaluation Row: %s", evaluation)
|
||||
|
@ -2,11 +2,12 @@ import os
|
||||
from typing import Any, List, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from models.settings import CommonsDep, common_dependencies
|
||||
from models.users import User
|
||||
from pydantic import BaseModel
|
||||
from utils.vectors import get_unique_files_from_vector_ids
|
||||
|
||||
from models.settings import CommonsDep, common_dependencies
|
||||
from models.users import User
|
||||
|
||||
|
||||
class Brain(BaseModel):
|
||||
id: Optional[UUID] = None
|
||||
@ -15,7 +16,7 @@ class Brain(BaseModel):
|
||||
model: Optional[str] = "gpt-3.5-turbo-0613"
|
||||
temperature: Optional[float] = 0.0
|
||||
max_tokens: Optional[int] = 256
|
||||
brain_size: Optional[float] = 0.0
|
||||
brain_size: Optional[float] = 0.0 # pyright: ignore reportPrivateUsage=none
|
||||
max_brain_size: Optional[int] = int(os.getenv("MAX_BRAIN_SIZE", 0))
|
||||
files: List[Any] = []
|
||||
_commons: Optional[CommonsDep] = None
|
||||
@ -27,7 +28,7 @@ class Brain(BaseModel):
|
||||
def commons(self) -> CommonsDep:
|
||||
if not self._commons:
|
||||
self.__class__._commons = common_dependencies()
|
||||
return self._commons
|
||||
return self._commons # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
@property
|
||||
def brain_size(self):
|
||||
@ -39,12 +40,17 @@ class Brain(BaseModel):
|
||||
|
||||
@property
|
||||
def remaining_brain_size(self):
|
||||
return float(self.max_brain_size) - self.brain_size
|
||||
return (
|
||||
float(self.max_brain_size) # pyright: ignore reportPrivateUsage=none
|
||||
- self.brain_size # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def create(cls, *args, **kwargs):
|
||||
commons = common_dependencies()
|
||||
return cls(commons=commons, *args, **kwargs)
|
||||
return cls(
|
||||
commons=commons, *args, **kwargs # pyright: ignore reportPrivateUsage=none
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
def get_user_brains(self, user_id):
|
||||
response = (
|
||||
|
@ -9,10 +9,18 @@ class Chat:
|
||||
chat_name: str
|
||||
|
||||
def __init__(self, chat_dict: dict):
|
||||
self.chat_id = chat_dict.get("chat_id")
|
||||
self.user_id = chat_dict.get("user_id")
|
||||
self.creation_time = chat_dict.get("creation_time")
|
||||
self.chat_name = chat_dict.get("chat_name")
|
||||
self.chat_id = chat_dict.get(
|
||||
"chat_id"
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
self.user_id = chat_dict.get(
|
||||
"user_id"
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
self.creation_time = chat_dict.get(
|
||||
"creation_time"
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
self.chat_name = chat_dict.get(
|
||||
"chat_name"
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -24,11 +32,21 @@ class ChatHistory:
|
||||
message_time: str
|
||||
|
||||
def __init__(self, chat_dict: dict):
|
||||
self.chat_id = chat_dict.get("chat_id")
|
||||
self.message_id = chat_dict.get("message_id")
|
||||
self.user_message = chat_dict.get("user_message")
|
||||
self.assistant = chat_dict.get("assistant")
|
||||
self.message_time = chat_dict.get("message_time")
|
||||
self.chat_id = chat_dict.get(
|
||||
"chat_id"
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
self.message_id = chat_dict.get(
|
||||
"message_id"
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
self.user_message = chat_dict.get(
|
||||
"user_message"
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
self.assistant = chat_dict.get(
|
||||
"assistant"
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
self.message_time = chat_dict.get(
|
||||
"message_time"
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
def to_dict(self):
|
||||
return asdict(self)
|
||||
|
@ -6,11 +6,12 @@ from uuid import UUID
|
||||
from fastapi import UploadFile
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from logger import get_logger
|
||||
from models.brains import Brain
|
||||
from models.settings import CommonsDep, common_dependencies
|
||||
from pydantic import BaseModel
|
||||
from utils.file import compute_sha1_from_file
|
||||
|
||||
from models.brains import Brain
|
||||
from models.settings import CommonsDep, common_dependencies
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@ -18,9 +19,9 @@ class File(BaseModel):
|
||||
id: Optional[UUID] = None
|
||||
file: Optional[UploadFile]
|
||||
file_name: Optional[str] = ""
|
||||
file_size: Optional[int] = ""
|
||||
file_size: Optional[int] = "" # pyright: ignore reportPrivateUsage=none
|
||||
file_sha1: Optional[str] = ""
|
||||
vectors_ids: Optional[int] = []
|
||||
vectors_ids: Optional[int] = [] # pyright: ignore reportPrivateUsage=none
|
||||
file_extension: Optional[str] = ""
|
||||
content: Optional[Any] = None
|
||||
chunk_size: int = 500
|
||||
@ -30,16 +31,25 @@ class File(BaseModel):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
|
||||
if self.file:
|
||||
self.file_name = self.file.filename
|
||||
self.file_size = self.file.file._file.tell()
|
||||
self.file_extension = os.path.splitext(self.file.filename)[-1].lower()
|
||||
|
||||
self.file_size = (
|
||||
self.file.file._file.tell() # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
self.file_extension = os.path.splitext(
|
||||
self.file.filename # pyright: ignore reportPrivateUsage=none
|
||||
)[-1].lower()
|
||||
|
||||
async def compute_file_sha1(self):
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=self.file.filename) as tmp_file:
|
||||
await self.file.seek(0)
|
||||
self.content = await self.file.read()
|
||||
with tempfile.NamedTemporaryFile(
|
||||
delete=False,
|
||||
suffix=self.file.filename, # pyright: ignore reportPrivateUsage=none
|
||||
) as tmp_file:
|
||||
await self.file.seek(0) # pyright: ignore reportPrivateUsage=none
|
||||
self.content = (
|
||||
await self.file.read() # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
tmp_file.write(self.content)
|
||||
tmp_file.flush()
|
||||
self.file_sha1 = compute_sha1_from_file(tmp_file.name)
|
||||
@ -48,18 +58,21 @@ class File(BaseModel):
|
||||
|
||||
def compute_documents(self, loader_class):
|
||||
logger.info(f"Computing documents from file {self.file_name}")
|
||||
|
||||
|
||||
documents = []
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=self.file.filename) as tmp_file:
|
||||
tmp_file.write(self.content)
|
||||
with tempfile.NamedTemporaryFile(
|
||||
delete=False,
|
||||
suffix=self.file.filename, # pyright: ignore reportPrivateUsage=none
|
||||
) as tmp_file:
|
||||
tmp_file.write(self.content) # pyright: ignore reportPrivateUsage=none
|
||||
tmp_file.flush()
|
||||
loader = loader_class(tmp_file.name)
|
||||
documents = loader.load()
|
||||
|
||||
|
||||
print("documents", documents)
|
||||
|
||||
os.remove(tmp_file.name)
|
||||
|
||||
|
||||
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
||||
chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap
|
||||
)
|
||||
@ -70,20 +83,21 @@ class File(BaseModel):
|
||||
|
||||
def set_file_vectors_ids(self):
|
||||
"""
|
||||
Set the vectors_ids property with the ids of the vectors
|
||||
Set the vectors_ids property with the ids of the vectors
|
||||
that are associated with the file in the vectors table
|
||||
"""
|
||||
|
||||
commons = common_dependencies()
|
||||
commons = common_dependencies()
|
||||
response = (
|
||||
commons["supabase"].table("vectors")
|
||||
commons["supabase"]
|
||||
.table("vectors")
|
||||
.select("id")
|
||||
.filter("metadata->>file_sha1", "eq", self.file_sha1)
|
||||
.execute()
|
||||
)
|
||||
self.vectors_ids = response.data
|
||||
return
|
||||
|
||||
|
||||
def file_already_exists(self):
|
||||
"""
|
||||
Check if file already exists in vectors table
|
||||
@ -92,20 +106,24 @@ class File(BaseModel):
|
||||
|
||||
print("file_sha1", self.file_sha1)
|
||||
print("vectors_ids", self.vectors_ids)
|
||||
print("len(vectors_ids)", len(self.vectors_ids))
|
||||
print(
|
||||
"len(vectors_ids)",
|
||||
len(self.vectors_ids), # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
|
||||
# if the file does not exist in vectors then no need to go check in brains_vectors
|
||||
if len(self.vectors_ids) == 0:
|
||||
if len(self.vectors_ids) == 0: # pyright: ignore reportPrivateUsage=none
|
||||
return False
|
||||
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def file_already_exists_in_brain(self, brain_id):
|
||||
commons = common_dependencies()
|
||||
commons = common_dependencies()
|
||||
self.set_file_vectors_ids()
|
||||
# Check if file exists in that brain
|
||||
response = (
|
||||
commons["supabase"].table("brains_vectors")
|
||||
commons["supabase"]
|
||||
.table("brains_vectors")
|
||||
.select("brain_id, vector_id")
|
||||
.filter("brain_id", "eq", brain_id)
|
||||
.filter("file_sha1", "eq", self.file_sha1)
|
||||
@ -114,15 +132,17 @@ class File(BaseModel):
|
||||
print("response.data", response.data)
|
||||
if len(response.data) == 0:
|
||||
return False
|
||||
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def file_is_empty(self):
|
||||
return self.file.file._file.tell() < 1
|
||||
|
||||
return (
|
||||
self.file.file._file.tell() < 1 # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
|
||||
def link_file_to_brain(self, brain: Brain):
|
||||
self.set_file_vectors_ids()
|
||||
|
||||
for vector_id in self.vectors_ids:
|
||||
brain.create_brain_vector(vector_id['id'], self.file_sha1)
|
||||
for vector_id in self.vectors_ids: # pyright: ignore reportPrivateUsage=none
|
||||
brain.create_brain_vector(vector_id["id"], self.file_sha1)
|
||||
print(f"Successfully linked file {self.file_sha1} to brain {brain.id}")
|
||||
|
@ -3,7 +3,7 @@ from typing import Annotated
|
||||
from fastapi import Depends
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from pydantic import BaseSettings
|
||||
from supabase import Client, create_client
|
||||
from supabase.client import Client, create_client
|
||||
from vectorstore.supabase import SupabaseVectorStore
|
||||
|
||||
|
||||
@ -22,8 +22,10 @@ class LLMSettings(BaseSettings):
|
||||
|
||||
|
||||
def common_dependencies() -> dict:
|
||||
settings = BrainSettings()
|
||||
embeddings = OpenAIEmbeddings(openai_api_key=settings.openai_api_key)
|
||||
settings = BrainSettings() # pyright: ignore reportPrivateUsage=none
|
||||
embeddings = OpenAIEmbeddings(
|
||||
openai_api_key=settings.openai_api_key
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
supabase_client: Client = create_client(
|
||||
settings.supabase_url, settings.supabase_service_key
|
||||
)
|
||||
|
@ -10,23 +10,33 @@ from models.settings import CommonsDep
|
||||
from utils.file import compute_sha1_from_content
|
||||
|
||||
|
||||
async def process_audio(commons: CommonsDep, file: File, enable_summarization: bool, user, user_openai_api_key):
|
||||
|
||||
async def process_audio(
|
||||
commons: CommonsDep, # pyright: ignore reportPrivateUsage=none
|
||||
file: File,
|
||||
enable_summarization: bool,
|
||||
user,
|
||||
user_openai_api_key,
|
||||
):
|
||||
temp_filename = None
|
||||
file_sha = ""
|
||||
dateshort = time.strftime("%Y%m%d-%H%M%S")
|
||||
file_meta_name = f"audiotranscript_{dateshort}.txt"
|
||||
|
||||
# use this for whisper
|
||||
openai_api_key = os.environ.get("OPENAI_API_KEY")
|
||||
os.environ.get("OPENAI_API_KEY")
|
||||
if user_openai_api_key:
|
||||
openai_api_key = user_openai_api_key
|
||||
pass
|
||||
|
||||
try:
|
||||
upload_file = file.file
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=upload_file.filename) as tmp_file:
|
||||
await upload_file.seek(0)
|
||||
content = await upload_file.read()
|
||||
with tempfile.NamedTemporaryFile(
|
||||
delete=False,
|
||||
suffix=upload_file.filename, # pyright: ignore reportPrivateUsage=none
|
||||
) as tmp_file:
|
||||
await upload_file.seek(0) # pyright: ignore reportPrivateUsage=none
|
||||
content = (
|
||||
await upload_file.read() # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
tmp_file.write(content)
|
||||
tmp_file.flush()
|
||||
tmp_file.close()
|
||||
@ -36,21 +46,42 @@ async def process_audio(commons: CommonsDep, file: File, enable_summarization: b
|
||||
with open(tmp_file.name, "rb") as audio_file:
|
||||
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
||||
|
||||
file_sha = compute_sha1_from_content(transcript.text.encode("utf-8"))
|
||||
file_size = len(transcript.text.encode("utf-8"))
|
||||
file_sha = compute_sha1_from_content(
|
||||
transcript.text.encode("utf-8") # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
file_size = len(
|
||||
transcript.text.encode("utf-8") # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
|
||||
chunk_size = 500
|
||||
chunk_overlap = 0
|
||||
|
||||
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
||||
chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
||||
texts = text_splitter.split_text(transcript.text.encode("utf-8"))
|
||||
chunk_size=chunk_size, chunk_overlap=chunk_overlap
|
||||
)
|
||||
texts = text_splitter.split_text(
|
||||
transcript.text.encode("utf-8") # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
|
||||
docs_with_metadata = [Document(page_content=text, metadata={"file_sha1": file_sha, "file_size": file_size, "file_name": file_meta_name,
|
||||
"chunk_size": chunk_size, "chunk_overlap": chunk_overlap, "date": dateshort}) for text in texts]
|
||||
docs_with_metadata = [
|
||||
Document(
|
||||
page_content=text,
|
||||
metadata={
|
||||
"file_sha1": file_sha,
|
||||
"file_size": file_size,
|
||||
"file_name": file_meta_name,
|
||||
"chunk_size": chunk_size,
|
||||
"chunk_overlap": chunk_overlap,
|
||||
"date": dateshort,
|
||||
},
|
||||
)
|
||||
for text in texts
|
||||
]
|
||||
|
||||
commons.documents_vector_store.add_documents(docs_with_metadata)
|
||||
commons.documents_vector_store.add_documents( # pyright: ignore reportPrivateUsage=none
|
||||
docs_with_metadata
|
||||
)
|
||||
|
||||
finally:
|
||||
if temp_filename and os.path.exists(temp_filename):
|
||||
os.remove(temp_filename)
|
||||
os.remove(temp_filename)
|
||||
|
@ -19,7 +19,7 @@ async def process_file(
|
||||
|
||||
file.compute_documents(loader_class)
|
||||
|
||||
for doc in file.documents:
|
||||
for doc in file.documents: # pyright: ignore reportPrivateUsage=none
|
||||
metadata = {
|
||||
"file_sha1": file.file_sha1,
|
||||
"file_size": file.file_size,
|
||||
@ -29,17 +29,15 @@ async def process_file(
|
||||
"date": dateshort,
|
||||
"summarization": "true" if enable_summarization else "false",
|
||||
}
|
||||
doc_with_metadata = Document(
|
||||
page_content=doc.page_content, metadata=metadata)
|
||||
|
||||
doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata)
|
||||
|
||||
neurons = Neurons(commons=commons)
|
||||
created_vector = neurons.create_vector(doc_with_metadata, user_openai_api_key)
|
||||
# add_usage(stats_db, "embedding", "audio", metadata={"file_name": file_meta_name,"file_type": ".txt", "chunk_size": chunk_size, "chunk_overlap": chunk_overlap})
|
||||
|
||||
created_vector_id = created_vector[0]
|
||||
created_vector_id = created_vector[0] # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
brain = Brain(id=brain_id)
|
||||
brain.create_brain_vector(created_vector_id, file.file_sha1)
|
||||
|
||||
return
|
||||
|
||||
|
@ -11,7 +11,13 @@ from utils.file import compute_sha1_from_content
|
||||
from utils.vectors import Neurons
|
||||
|
||||
|
||||
async def process_github(commons: CommonsDep, repo, enable_summarization, brain_id, user_openai_api_key):
|
||||
async def process_github(
|
||||
commons: CommonsDep, # pyright: ignore reportPrivateUsage=none
|
||||
repo,
|
||||
enable_summarization,
|
||||
brain_id,
|
||||
user_openai_api_key,
|
||||
):
|
||||
random_dir_name = os.urandom(16).hex()
|
||||
dateshort = time.strftime("%Y%m%d")
|
||||
loader = GitLoader(
|
||||
@ -24,41 +30,60 @@ async def process_github(commons: CommonsDep, repo, enable_summarization, brain_
|
||||
chunk_size = 500
|
||||
chunk_overlap = 0
|
||||
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
||||
chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
||||
|
||||
chunk_size=chunk_size, chunk_overlap=chunk_overlap
|
||||
)
|
||||
|
||||
documents = text_splitter.split_documents(documents)
|
||||
print(documents[:1])
|
||||
|
||||
for doc in documents:
|
||||
if doc.metadata["file_type"] in [".pyc",".png",".svg", ".env", ".lock", ".gitignore", ".gitmodules", ".gitattributes", ".gitkeep", ".git", ".json"]:
|
||||
if doc.metadata["file_type"] in [
|
||||
".pyc",
|
||||
".png",
|
||||
".svg",
|
||||
".env",
|
||||
".lock",
|
||||
".gitignore",
|
||||
".gitmodules",
|
||||
".gitattributes",
|
||||
".gitkeep",
|
||||
".git",
|
||||
".json",
|
||||
]:
|
||||
continue
|
||||
metadata = {
|
||||
"file_sha1": compute_sha1_from_content(doc.page_content.encode("utf-8")),
|
||||
"file_size": len(doc.page_content)*8,
|
||||
"file_size": len(doc.page_content) * 8,
|
||||
"file_name": doc.metadata["file_name"],
|
||||
"chunk_size": chunk_size,
|
||||
"chunk_overlap": chunk_overlap,
|
||||
"date": dateshort,
|
||||
"summarization": "true" if enable_summarization else "false"
|
||||
"summarization": "true" if enable_summarization else "false",
|
||||
}
|
||||
doc_with_metadata = Document(
|
||||
page_content=doc.page_content, metadata=metadata)
|
||||
|
||||
file = File(file_sha1=compute_sha1_from_content(doc.page_content.encode("utf-8")))
|
||||
|
||||
doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata)
|
||||
|
||||
file = File(
|
||||
file_sha1=compute_sha1_from_content(doc.page_content.encode("utf-8"))
|
||||
)
|
||||
|
||||
file_exists = file.file_already_exists()
|
||||
|
||||
if not file_exists:
|
||||
print(f"Creating entry for file {file.file_sha1} in vectors...")
|
||||
neurons = Neurons(commons=commons)
|
||||
created_vector = neurons.create_vector(doc_with_metadata, user_openai_api_key)
|
||||
neurons = Neurons(commons=commons)
|
||||
created_vector = neurons.create_vector(
|
||||
doc_with_metadata, user_openai_api_key
|
||||
)
|
||||
print("Created vector sids ", created_vector)
|
||||
print("Created vector for ", doc.metadata["file_name"])
|
||||
|
||||
file_exists_in_brain = file.file_already_exists_in_brain(brain_id)
|
||||
|
||||
if not file_exists_in_brain:
|
||||
file.add_file_to_brain(brain_id)
|
||||
file.add_file_to_brain(brain_id) # pyright: ignore reportPrivateUsage=none
|
||||
brain = Brain(id=brain_id)
|
||||
file.link_file_to_brain(brain)
|
||||
return {"message": f"✅ Github with {len(documents)} files has been uploaded.", "type": "success"}
|
||||
return {
|
||||
"message": f"✅ Github with {len(documents)} files has been uploaded.",
|
||||
"type": "success",
|
||||
}
|
||||
|
5
backend/pyrightconfig.json
Normal file
5
backend/pyrightconfig.json
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"exclude": [
|
||||
"supabase"
|
||||
]
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
from typing import List # For type hinting
|
||||
|
||||
from models.chat import ChatHistory
|
||||
from models.settings import common_dependencies
|
||||
from typing import List # For type hinting
|
||||
|
||||
|
||||
def get_chat_history(chat_id: str) -> List[ChatHistory]:
|
||||
@ -16,4 +17,7 @@ def get_chat_history(chat_id: str) -> List[ChatHistory]:
|
||||
if history is None:
|
||||
return []
|
||||
else:
|
||||
return [ChatHistory(message) for message in history]
|
||||
return [
|
||||
ChatHistory(message) # pyright: ignore reportPrivateUsage=none
|
||||
for message in history
|
||||
]
|
||||
|
@ -21,7 +21,7 @@ def update_chat(chat_id, chat_data: ChatUpdatableProperties) -> Chat:
|
||||
|
||||
if not chat_id:
|
||||
logger.error("No chat_id provided")
|
||||
return
|
||||
return # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
updates = {}
|
||||
|
||||
@ -41,4 +41,4 @@ def update_chat(chat_id, chat_data: ChatUpdatableProperties) -> Chat:
|
||||
logger.info(f"Chat {chat_id} updated")
|
||||
else:
|
||||
logger.info(f"No updates to apply for chat {chat_id}")
|
||||
return updated_chat
|
||||
return updated_chat # pyright: ignore reportPrivateUsage=none
|
||||
|
@ -23,4 +23,4 @@ def update_chat_history(chat_id: str, user_message: str, assistant: str) -> Chat
|
||||
raise HTTPException(
|
||||
status_code=500, detail="An exception occurred while updating chat history."
|
||||
)
|
||||
return ChatHistory(response[0])
|
||||
return ChatHistory(response[0]) # pyright: ignore reportPrivateUsage=none
|
||||
|
@ -6,13 +6,15 @@ logger = get_logger(__name__)
|
||||
|
||||
|
||||
def update_message_by_id(
|
||||
message_id: str, user_message: str = None, assistant: str = None
|
||||
message_id: str,
|
||||
user_message: str = None, # pyright: ignore reportPrivateUsage=none
|
||||
assistant: str = None, # pyright: ignore reportPrivateUsage=none
|
||||
) -> ChatHistory:
|
||||
commons = common_dependencies()
|
||||
|
||||
if not message_id:
|
||||
logger.error("No message_id provided")
|
||||
return
|
||||
return # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
updates = {}
|
||||
|
||||
@ -35,4 +37,4 @@ def update_message_by_id(
|
||||
logger.info(f"Message {message_id} updated")
|
||||
else:
|
||||
logger.info(f"No updates to apply for message {message_id}")
|
||||
return ChatHistory(updated_message)
|
||||
return ChatHistory(updated_message) # pyright: ignore reportPrivateUsage=none
|
||||
|
@ -22,4 +22,5 @@ asyncpg==0.27.0
|
||||
flake8==6.0.0
|
||||
flake8-black==0.3.6
|
||||
sentence_transformers>=2.0.0
|
||||
sentry-sdk==1.26.0
|
||||
sentry-sdk==1.26.0
|
||||
pyright==1.1.316
|
@ -142,24 +142,27 @@ async def create_brain_endpoint(
|
||||
In the brains table & in the brains_users table and put the creator user as 'Owner'
|
||||
"""
|
||||
|
||||
brain = Brain(name=brain.name)
|
||||
brain = Brain(name=brain.name) # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
brain.create_brain()
|
||||
brain.create_brain() # pyright: ignore reportPrivateUsage=none
|
||||
default_brain = get_default_user_brain(current_user)
|
||||
if default_brain:
|
||||
logger.info(f"Default brain already exists for user {current_user.id}")
|
||||
brain.create_brain_user(
|
||||
brain.create_brain_user( # pyright: ignore reportPrivateUsage=none
|
||||
user_id=current_user.id, rights="Owner", default_brain=False
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Default brain does not exist for user {current_user.id}. It will be created."
|
||||
)
|
||||
brain.create_brain_user(
|
||||
brain.create_brain_user( # pyright: ignore reportPrivateUsage=none
|
||||
user_id=current_user.id, rights="Owner", default_brain=True
|
||||
)
|
||||
|
||||
return {"id": brain.id, "name": brain.name}
|
||||
return {
|
||||
"id": brain.id, # pyright: ignore reportPrivateUsage=none
|
||||
"name": brain.name,
|
||||
}
|
||||
|
||||
|
||||
# update existing brain
|
||||
@ -182,10 +185,12 @@ async def update_brain_endpoint(
|
||||
brain = Brain(id=brain_id)
|
||||
|
||||
# Add new file to brain , il file_sha1 already exists in brains_vectors -> out (not now)
|
||||
if brain.file_sha1:
|
||||
if brain.file_sha1: # pyright: ignore reportPrivateUsage=none
|
||||
# add all the vector Ids to the brains_vectors with the given brain.brain_id
|
||||
brain.update_brain_with_file(file_sha1=input_brain.file_sha1)
|
||||
brain.update_brain_with_file(
|
||||
file_sha1=input_brain.file_sha1 # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
print("brain:", brain)
|
||||
|
||||
brain.update_brain_fields(commons, brain)
|
||||
brain.update_brain_fields(commons, brain) # pyright: ignore reportPrivateUsage=none
|
||||
return {"message": f"Brain {brain_id} has been updated."}
|
||||
|
@ -78,8 +78,8 @@ def check_user_limit(
|
||||
user.increment_user_request_count(date)
|
||||
if int(user.requests_count) >= int(max_requests_number):
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail="You have reached the maximum number of requests for today.",
|
||||
status_code=429, # pyright: ignore reportPrivateUsage=none
|
||||
detail="You have reached the maximum number of requests for today.", # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
else:
|
||||
pass
|
||||
@ -97,7 +97,7 @@ async def get_chats(current_user: User = Depends(get_current_user)):
|
||||
This endpoint retrieves all the chats associated with the current authenticated user. It returns a list of chat objects
|
||||
containing the chat ID and chat name for each chat.
|
||||
"""
|
||||
chats = get_user_chats(current_user.id)
|
||||
chats = get_user_chats(current_user.id) # pyright: ignore reportPrivateUsage=none
|
||||
return {"chats": chats}
|
||||
|
||||
|
||||
@ -127,10 +127,11 @@ async def update_chat_metadata_handler(
|
||||
Update chat attributes
|
||||
"""
|
||||
|
||||
chat = get_chat_by_id(chat_id)
|
||||
chat = get_chat_by_id(chat_id) # pyright: ignore reportPrivateUsage=none
|
||||
if current_user.id != chat.user_id:
|
||||
raise HTTPException(
|
||||
status_code=403, detail="You should be the owner of the chat to update it."
|
||||
status_code=403, # pyright: ignore reportPrivateUsage=none
|
||||
detail="You should be the owner of the chat to update it.", # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
return update_chat(chat_id=chat_id, chat_data=chat_data)
|
||||
|
||||
@ -181,7 +182,7 @@ async def create_question_handler(
|
||||
temperature=chat_question.temperature,
|
||||
max_tokens=chat_question.max_tokens,
|
||||
brain_id=str(brain_id),
|
||||
user_openai_api_key=current_user.user_openai_api_key,
|
||||
user_openai_api_key=current_user.user_openai_api_key, # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
|
||||
else:
|
||||
@ -191,10 +192,12 @@ async def create_question_handler(
|
||||
max_tokens=chat_question.max_tokens,
|
||||
temperature=chat_question.temperature,
|
||||
brain_id=str(brain_id),
|
||||
user_openai_api_key=current_user.user_openai_api_key,
|
||||
user_openai_api_key=current_user.user_openai_api_key, # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
|
||||
chat_answer = gpt_answer_generator.generate_answer(chat_question.question)
|
||||
chat_answer = gpt_answer_generator.generate_answer( # pyright: ignore reportPrivateUsage=none
|
||||
chat_question.question
|
||||
)
|
||||
|
||||
return chat_answer
|
||||
except HTTPException as e:
|
||||
@ -217,7 +220,10 @@ async def create_stream_question_handler(
|
||||
if chat_question.model not in streaming_compatible_models:
|
||||
# Forward the request to the none streaming endpoint
|
||||
return await create_question_handler(
|
||||
request, chat_question, chat_id, current_user
|
||||
request,
|
||||
chat_question,
|
||||
chat_id,
|
||||
current_user, # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
|
||||
try:
|
||||
@ -238,12 +244,14 @@ async def create_stream_question_handler(
|
||||
max_tokens=chat_question.max_tokens,
|
||||
temperature=chat_question.temperature,
|
||||
brain_id=str(brain_id),
|
||||
user_openai_api_key=user_openai_api_key,
|
||||
user_openai_api_key=user_openai_api_key, # pyright: ignore reportPrivateUsage=none
|
||||
streaming=True,
|
||||
)
|
||||
|
||||
return StreamingResponse(
|
||||
gpt_answer_generator.generate_stream(chat_question.question),
|
||||
gpt_answer_generator.generate_stream( # pyright: ignore reportPrivateUsage=none
|
||||
chat_question.question
|
||||
),
|
||||
media_type="text/event-stream",
|
||||
)
|
||||
|
||||
@ -259,4 +267,4 @@ async def get_chat_history_handler(
|
||||
chat_id: UUID,
|
||||
) -> List[ChatHistory]:
|
||||
# TODO: RBAC with current_user
|
||||
return get_chat_history(chat_id)
|
||||
return get_chat_history(chat_id) # pyright: ignore reportPrivateUsage=none
|
||||
|
@ -35,7 +35,9 @@ async def crawl_endpoint(
|
||||
commons = common_dependencies()
|
||||
|
||||
if request.headers.get("Openai-Api-Key"):
|
||||
brain.max_brain_size = os.getenv("MAX_BRAIN_SIZE_WITH_KEY", 209715200)
|
||||
brain.max_brain_size = os.getenv(
|
||||
"MAX_BRAIN_SIZE_WITH_KEY", 209715200
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
file_size = 1000000
|
||||
remaining_free_space = brain.remaining_brain_size
|
||||
@ -47,14 +49,20 @@ async def crawl_endpoint(
|
||||
}
|
||||
else:
|
||||
if not crawl_website.checkGithub():
|
||||
file_path, file_name = crawl_website.process()
|
||||
(
|
||||
file_path,
|
||||
file_name,
|
||||
) = crawl_website.process() # pyright: ignore reportPrivateUsage=none
|
||||
# Create a SpooledTemporaryFile from the file_path
|
||||
spooled_file = SpooledTemporaryFile()
|
||||
with open(file_path, "rb") as f:
|
||||
shutil.copyfileobj(f, spooled_file)
|
||||
|
||||
# Pass the SpooledTemporaryFile to UploadFile
|
||||
uploadFile = UploadFile(file=spooled_file, filename=file_name)
|
||||
uploadFile = UploadFile(
|
||||
file=spooled_file, # pyright: ignore reportPrivateUsage=none
|
||||
filename=file_name,
|
||||
)
|
||||
file = File(file=uploadFile)
|
||||
# check remaining free space here !!
|
||||
message = await filter_file(
|
||||
|
@ -42,7 +42,9 @@ async def upload_file(
|
||||
commons = common_dependencies()
|
||||
|
||||
if request.headers.get("Openai-Api-Key"):
|
||||
brain.max_brain_size = os.getenv("MAX_BRAIN_SIZE_WITH_KEY", 209715200)
|
||||
brain.max_brain_size = os.getenv(
|
||||
"MAX_BRAIN_SIZE_WITH_KEY", 209715200
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
remaining_free_space = brain.remaining_brain_size
|
||||
|
||||
file_size = get_file_size(uploadFile)
|
||||
|
@ -10,6 +10,9 @@ client = TestClient(app)
|
||||
|
||||
API_KEY = os.getenv("CI_TEST_API_KEY")
|
||||
|
||||
if not API_KEY:
|
||||
raise ValueError("CI_TEST_API_KEY environment variable not set. Cannot run tests.")
|
||||
|
||||
|
||||
def test_read_main():
|
||||
response = client.get("/")
|
||||
@ -54,7 +57,8 @@ def test_create_and_delete_api_key():
|
||||
def test_retrieve_default_brain():
|
||||
# Making a GET request to the /brains/default/ endpoint
|
||||
response = client.get(
|
||||
"/brains/default/", headers={"Authorization": "Bearer " + API_KEY}
|
||||
"/brains/default/",
|
||||
headers={"Authorization": "Bearer " + API_KEY},
|
||||
)
|
||||
|
||||
# Assert that the response status code is 200 (HTTP OK)
|
||||
@ -88,7 +92,9 @@ def test_create_brain():
|
||||
|
||||
# Making a POST request to the /brains/ endpoint
|
||||
response = client.post(
|
||||
"/brains/", json=payload, headers={"Authorization": "Bearer " + API_KEY}
|
||||
"/brains/",
|
||||
json=payload,
|
||||
headers={"Authorization": "Bearer " + API_KEY},
|
||||
)
|
||||
|
||||
# Assert that the response status code is 200 (HTTP OK)
|
||||
@ -106,7 +112,10 @@ def test_create_brain():
|
||||
|
||||
def test_retrieve_all_brains():
|
||||
# Making a GET request to the /brains/ endpoint to retrieve all brains for the current user
|
||||
response = client.get("/brains/", headers={"Authorization": "Bearer " + API_KEY})
|
||||
response = client.get(
|
||||
"/brains/",
|
||||
headers={"Authorization": "Bearer " + API_KEY},
|
||||
)
|
||||
|
||||
# Assert that the response status code is 200 (HTTP OK)
|
||||
assert response.status_code == 200
|
||||
@ -120,7 +129,10 @@ def test_retrieve_all_brains():
|
||||
|
||||
def test_delete_all_brains():
|
||||
# First, retrieve all brains for the current user
|
||||
response = client.get("/brains/", headers={"Authorization": "Bearer " + API_KEY})
|
||||
response = client.get(
|
||||
"/brains/",
|
||||
headers={"Authorization": "Bearer " + API_KEY},
|
||||
)
|
||||
|
||||
# Assert that the response status code is 200 (HTTP OK)
|
||||
assert response.status_code == 200
|
||||
@ -133,7 +145,8 @@ def test_delete_all_brains():
|
||||
|
||||
# Send a DELETE request to delete the specific brain
|
||||
delete_response = client.delete(
|
||||
f"/brains/{brain_id}/", headers={"Authorization": "Bearer " + API_KEY}
|
||||
f"/brains/{brain_id}/",
|
||||
headers={"Authorization": "Bearer " + API_KEY},
|
||||
)
|
||||
|
||||
# Assert that the DELETE response status code is 200 (HTTP OK)
|
||||
@ -142,7 +155,10 @@ def test_delete_all_brains():
|
||||
|
||||
def test_get_all_chats():
|
||||
# Making a GET request to the /chat endpoint to retrieve all chats
|
||||
response = client.get("/chat", headers={"Authorization": "Bearer " + API_KEY})
|
||||
response = client.get(
|
||||
"/chat",
|
||||
headers={"Authorization": "Bearer " + API_KEY},
|
||||
)
|
||||
|
||||
# Assert that the response status code is 200 (HTTP OK)
|
||||
assert response.status_code == 200
|
||||
|
@ -18,8 +18,10 @@ def convert_bytes(bytes, precision=2):
|
||||
|
||||
def get_file_size(file: UploadFile):
|
||||
# move the cursor to the end of the file
|
||||
file.file._file.seek(0, 2)
|
||||
file_size = file.file._file.tell() # Getting the size of the file
|
||||
file.file._file.seek(0, 2) # pyright: ignore reportPrivateUsage=none
|
||||
file_size = (
|
||||
file.file._file.tell() # pyright: ignore reportPrivateUsage=none
|
||||
) # Getting the size of the file
|
||||
# move the cursor back to the beginning of the file
|
||||
file.file.seek(0)
|
||||
|
||||
|
@ -54,14 +54,19 @@ async def filter_file(
|
||||
|
||||
if file_exists_in_brain:
|
||||
return create_response(
|
||||
f"🤔 {file.file.filename} already exists in brain {brain_id}.", "warning"
|
||||
f"🤔 {file.file.filename} already exists in brain {brain_id}.", # pyright: ignore reportPrivateUsage=none
|
||||
"warning",
|
||||
)
|
||||
elif file.file_is_empty():
|
||||
return create_response(f"❌ {file.file.filename} is empty.", "error")
|
||||
return create_response(
|
||||
f"❌ {file.file.filename} is empty.", # pyright: ignore reportPrivateUsage=none
|
||||
"error", # pyright: ignore reportPrivateUsage=none
|
||||
)
|
||||
elif file_exists:
|
||||
file.link_file_to_brain(brain=Brain(id=brain_id))
|
||||
return create_response(
|
||||
f"✅ {file.file.filename} has been uploaded to brain {brain_id}.", "success"
|
||||
f"✅ {file.file.filename} has been uploaded to brain {brain_id}.", # pyright: ignore reportPrivateUsage=none
|
||||
"success",
|
||||
)
|
||||
|
||||
if file.file_extension in file_processors:
|
||||
@ -70,14 +75,18 @@ async def filter_file(
|
||||
commons, file, enable_summarization, brain_id, openai_api_key
|
||||
)
|
||||
return create_response(
|
||||
f"✅ {file.file.filename} has been uploaded to brain {brain_id}.",
|
||||
f"✅ {file.file.filename} has been uploaded to brain {brain_id}.", # pyright: ignore reportPrivateUsage=none
|
||||
"success",
|
||||
)
|
||||
except Exception as e:
|
||||
# Add more specific exceptions as needed.
|
||||
print(f"Error processing file: {e}")
|
||||
return create_response(
|
||||
f"⚠️ An error occurred while processing {file.file.filename}.", "error"
|
||||
f"⚠️ An error occurred while processing {file.file.filename}.", # pyright: ignore reportPrivateUsage=none
|
||||
"error",
|
||||
)
|
||||
|
||||
return create_response(f"❌ {file.file.filename} is not supported.", "error")
|
||||
return create_response(
|
||||
f"❌ {file.file.filename} is not supported.", # pyright: ignore reportPrivateUsage=none
|
||||
"error",
|
||||
)
|
||||
|
@ -13,7 +13,7 @@ logger = get_logger(__name__)
|
||||
|
||||
class Neurons(BaseModel):
|
||||
commons: CommonsDep
|
||||
settings = BrainSettings()
|
||||
settings = BrainSettings() # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
def create_vector(self, doc, user_openai_api_key=None):
|
||||
logger.info("Creating vector for document")
|
||||
@ -21,7 +21,7 @@ class Neurons(BaseModel):
|
||||
if user_openai_api_key:
|
||||
self.commons["documents_vector_store"]._embedding = OpenAIEmbeddings(
|
||||
openai_api_key=user_openai_api_key
|
||||
)
|
||||
) # pyright: ignore reportPrivateUsage=none
|
||||
try:
|
||||
sids = self.commons["documents_vector_store"].add_documents([doc])
|
||||
if sids and len(sids) > 0:
|
||||
@ -64,7 +64,7 @@ def create_summary(commons: CommonsDep, document_id, content, metadata):
|
||||
|
||||
|
||||
def error_callback(exception):
|
||||
print('An exception occurred:', exception)
|
||||
print("An exception occurred:", exception)
|
||||
|
||||
|
||||
def process_batch(batch_ids):
|
||||
@ -106,14 +106,14 @@ def get_unique_files_from_vector_ids(vectors_ids: List[int]):
|
||||
with ThreadPoolExecutor() as executor:
|
||||
futures = []
|
||||
for i in range(0, len(vectors_ids), BATCH_SIZE):
|
||||
batch_ids = vectors_ids[i:i + BATCH_SIZE]
|
||||
batch_ids = vectors_ids[i : i + BATCH_SIZE]
|
||||
future = executor.submit(process_batch, batch_ids)
|
||||
futures.append(future)
|
||||
|
||||
# Retrieve the results
|
||||
vectors_responses = [future.result() for future in futures]
|
||||
|
||||
|
||||
documents = [item for sublist in vectors_responses for item in sublist]
|
||||
print('document', documents)
|
||||
print("document", documents)
|
||||
unique_files = [dict(t) for t in set(tuple(d.items()) for d in documents)]
|
||||
return unique_files
|
||||
|
@ -3,8 +3,7 @@ from typing import Any, List
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain.vectorstores import SupabaseVectorStore
|
||||
|
||||
from supabase import Client
|
||||
from supabase.client import Client
|
||||
|
||||
|
||||
class CustomSupabaseVectorStore(SupabaseVectorStore):
|
||||
|
Loading…
Reference in New Issue
Block a user