mirror of
https://github.com/StanGirard/quivr.git
synced 2024-12-23 11:23:00 +03:00
Feat/static analysis (#582)
* feat: add static analysis * chore: update Makefile add static analysis script * chore: add vscode extensions recommandations
This commit is contained in:
parent
f2a06dc6de
commit
9e9f531c99
8
.vscode/extensions.json
vendored
Normal file
8
.vscode/extensions.json
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"recommendations": [
|
||||||
|
"ms-pyright.pyright",
|
||||||
|
"dbaeumer.vscode-eslint",
|
||||||
|
"ms-python.vscode-pylance",
|
||||||
|
"ms-pyright.pyright"
|
||||||
|
]
|
||||||
|
}
|
6
.vscode/settings.json
vendored
6
.vscode/settings.json
vendored
@ -12,7 +12,8 @@
|
|||||||
"editor.defaultFormatter": "ms-python.black-formatter",
|
"editor.defaultFormatter": "ms-python.black-formatter",
|
||||||
"editor.formatOnSave": true,
|
"editor.formatOnSave": true,
|
||||||
"editor.codeActionsOnSave": {
|
"editor.codeActionsOnSave": {
|
||||||
"source.organizeImports": true
|
"source.organizeImports": true,
|
||||||
|
"source.fixAll": true
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"[typescriptreact]": {
|
"[typescriptreact]": {
|
||||||
@ -41,4 +42,7 @@
|
|||||||
"**/.docusaurus/": true,
|
"**/.docusaurus/": true,
|
||||||
"**/node_modules/": true,
|
"**/node_modules/": true,
|
||||||
},
|
},
|
||||||
|
"python.linting.pycodestyleCategorySeverity.W": "Error",
|
||||||
|
"python.defaultInterpreterPath": "python3",
|
||||||
|
"python.linting.flake8CategorySeverity.W": "Error",
|
||||||
}
|
}
|
9
Makefile
9
Makefile
@ -4,4 +4,11 @@ dev:
|
|||||||
docker compose -f docker-compose.dev.yml up --build
|
docker compose -f docker-compose.dev.yml up --build
|
||||||
|
|
||||||
prod:
|
prod:
|
||||||
docker compose -f docker-compose.yml up --build
|
docker compose -f docker-compose.yml up --build
|
||||||
|
|
||||||
|
test-type:
|
||||||
|
@if command -v python3 &>/dev/null; then \
|
||||||
|
python3 -m pyright; \
|
||||||
|
else \
|
||||||
|
python -m pyright; \
|
||||||
|
fi
|
||||||
|
@ -1,12 +1,13 @@
|
|||||||
import os
|
import os
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from auth.api_key_handler import get_user_from_api_key, verify_api_key
|
|
||||||
from auth.jwt_token_handler import decode_access_token, verify_token
|
|
||||||
from fastapi import Depends, HTTPException, Request
|
from fastapi import Depends, HTTPException, Request
|
||||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||||
from models.users import User
|
from models.users import User
|
||||||
|
|
||||||
|
from auth.api_key_handler import get_user_from_api_key, verify_api_key
|
||||||
|
from auth.jwt_token_handler import decode_access_token, verify_token
|
||||||
|
|
||||||
|
|
||||||
class AuthBearer(HTTPBearer):
|
class AuthBearer(HTTPBearer):
|
||||||
def __init__(self, auto_error: bool = True):
|
def __init__(self, auto_error: bool = True):
|
||||||
@ -20,7 +21,7 @@ class AuthBearer(HTTPBearer):
|
|||||||
request
|
request
|
||||||
)
|
)
|
||||||
self.check_scheme(credentials)
|
self.check_scheme(credentials)
|
||||||
token = credentials.credentials
|
token = credentials.credentials # pyright: ignore reportPrivateUsage=none
|
||||||
return await self.authenticate(
|
return await self.authenticate(
|
||||||
token,
|
token,
|
||||||
)
|
)
|
||||||
@ -52,7 +53,7 @@ class AuthBearer(HTTPBearer):
|
|||||||
|
|
||||||
def get_test_user(self) -> User:
|
def get_test_user(self) -> User:
|
||||||
return User(
|
return User(
|
||||||
email="test@example.com", id="XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX"
|
email="test@example.com", id="XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" # type: ignore
|
||||||
) # replace with test user information
|
) # replace with test user information
|
||||||
|
|
||||||
|
|
||||||
|
@ -9,6 +9,9 @@ from models.users import User
|
|||||||
SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
|
SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
|
||||||
ALGORITHM = "HS256"
|
ALGORITHM = "HS256"
|
||||||
|
|
||||||
|
if not SECRET_KEY:
|
||||||
|
raise ValueError("JWT_SECRET_KEY environment variable not set")
|
||||||
|
|
||||||
|
|
||||||
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
|
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
|
||||||
to_encode = data.copy()
|
to_encode = data.copy()
|
||||||
@ -27,9 +30,12 @@ def decode_access_token(token: str) -> User:
|
|||||||
token, SECRET_KEY, algorithms=[ALGORITHM], options={"verify_aud": False}
|
token, SECRET_KEY, algorithms=[ALGORITHM], options={"verify_aud": False}
|
||||||
)
|
)
|
||||||
except JWTError:
|
except JWTError:
|
||||||
return None
|
return None # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
return User(email=payload.get("email"), id=payload.get("sub"))
|
return User(
|
||||||
|
email=payload.get("email"),
|
||||||
|
id=payload.get("sub"), # pyright: ignore reportPrivateUsage=none
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def verify_token(token: str):
|
def verify_token(token: str):
|
||||||
|
@ -4,7 +4,6 @@ import tempfile
|
|||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from langchain.document_loaders import GitLoader
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
@ -29,7 +28,7 @@ class CrawlWebsite(BaseModel):
|
|||||||
file_name = slugify(self.url) + ".html"
|
file_name = slugify(self.url) + ".html"
|
||||||
temp_file_path = os.path.join(tempfile.gettempdir(), file_name)
|
temp_file_path = os.path.join(tempfile.gettempdir(), file_name)
|
||||||
with open(temp_file_path, "w") as temp_file:
|
with open(temp_file_path, "w") as temp_file:
|
||||||
temp_file.write(content)
|
temp_file.write(content) # pyright: ignore reportPrivateUsage=none
|
||||||
# Process the file
|
# Process the file
|
||||||
|
|
||||||
if content:
|
if content:
|
||||||
|
@ -20,19 +20,21 @@ class BaseBrainPicking(BaseModel):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Instantiate settings
|
# Instantiate settings
|
||||||
brain_settings = BrainSettings()
|
brain_settings = BrainSettings() # type: ignore other parameters are optional
|
||||||
|
|
||||||
# Default class attributes
|
# Default class attributes
|
||||||
model: str = None
|
model: str = None # pyright: ignore reportPrivateUsage=none
|
||||||
temperature: float = 0.0
|
temperature: float = 0.0
|
||||||
chat_id: str = None
|
chat_id: str = None # pyright: ignore reportPrivateUsage=none
|
||||||
brain_id: str = None
|
brain_id: str = None # pyright: ignore reportPrivateUsage=none
|
||||||
max_tokens: int = 256
|
max_tokens: int = 256
|
||||||
user_openai_api_key: str = None
|
user_openai_api_key: str = None # pyright: ignore reportPrivateUsage=none
|
||||||
streaming: bool = False
|
streaming: bool = False
|
||||||
|
|
||||||
openai_api_key: str = None
|
openai_api_key: str = None # pyright: ignore reportPrivateUsage=none
|
||||||
callbacks: List[AsyncCallbackHandler] = None
|
callbacks: List[
|
||||||
|
AsyncCallbackHandler
|
||||||
|
] = None # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
def _determine_api_key(self, openai_api_key, user_openai_api_key):
|
def _determine_api_key(self, openai_api_key, user_openai_api_key):
|
||||||
"""If user provided an API key, use it."""
|
"""If user provided an API key, use it."""
|
||||||
@ -55,10 +57,12 @@ class BaseBrainPicking(BaseModel):
|
|||||||
|
|
||||||
def _determine_callback_array(
|
def _determine_callback_array(
|
||||||
self, streaming
|
self, streaming
|
||||||
) -> List[AsyncIteratorCallbackHandler]:
|
) -> List[AsyncIteratorCallbackHandler]: # pyright: ignore reportPrivateUsage=none
|
||||||
"""If streaming is set, set the AsyncIteratorCallbackHandler as the only callback."""
|
"""If streaming is set, set the AsyncIteratorCallbackHandler as the only callback."""
|
||||||
if streaming:
|
if streaming:
|
||||||
return [AsyncIteratorCallbackHandler]
|
return [
|
||||||
|
AsyncIteratorCallbackHandler # pyright: ignore reportPrivateUsage=none
|
||||||
|
]
|
||||||
|
|
||||||
def __init__(self, **data):
|
def __init__(self, **data):
|
||||||
super().__init__(**data)
|
super().__init__(**data)
|
||||||
@ -66,8 +70,12 @@ class BaseBrainPicking(BaseModel):
|
|||||||
self.openai_api_key = self._determine_api_key(
|
self.openai_api_key = self._determine_api_key(
|
||||||
self.brain_settings.openai_api_key, self.user_openai_api_key
|
self.brain_settings.openai_api_key, self.user_openai_api_key
|
||||||
)
|
)
|
||||||
self.streaming = self._determine_streaming(self.model, self.streaming)
|
self.streaming = self._determine_streaming(
|
||||||
self.callbacks = self._determine_callback_array(self.streaming)
|
self.model, self.streaming
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
self.callbacks = self._determine_callback_array(
|
||||||
|
self.streaming
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
"""Configuration of the Pydantic Object"""
|
"""Configuration of the Pydantic Object"""
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from .FunctionCall import FunctionCall
|
from .FunctionCall import FunctionCall
|
||||||
|
|
||||||
|
|
||||||
@ -6,7 +7,7 @@ class OpenAiAnswer:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
content: Optional[str] = None,
|
content: Optional[str] = None,
|
||||||
function_call: FunctionCall = None,
|
function_call: FunctionCall = None, # pyright: ignore reportPrivateUsage=none
|
||||||
):
|
):
|
||||||
self.content = content
|
self.content = content
|
||||||
self.function_call = function_call
|
self.function_call = function_call
|
||||||
|
@ -13,11 +13,12 @@ from repository.chat.format_chat_history import format_chat_history
|
|||||||
from repository.chat.get_chat_history import get_chat_history
|
from repository.chat.get_chat_history import get_chat_history
|
||||||
from repository.chat.update_chat_history import update_chat_history
|
from repository.chat.update_chat_history import update_chat_history
|
||||||
from repository.chat.update_message_by_id import update_message_by_id
|
from repository.chat.update_message_by_id import update_message_by_id
|
||||||
from supabase import Client, create_client
|
from supabase.client import Client, create_client
|
||||||
from vectorstore.supabase import (
|
from vectorstore.supabase import (
|
||||||
CustomSupabaseVectorStore,
|
CustomSupabaseVectorStore,
|
||||||
) # Custom class for handling vector storage with Supabase
|
)
|
||||||
|
|
||||||
|
# Custom class for handling vector storage with Supabase
|
||||||
from .base import BaseBrainPicking
|
from .base import BaseBrainPicking
|
||||||
from .prompts.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT
|
from .prompts.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT
|
||||||
|
|
||||||
@ -42,7 +43,7 @@ class OpenAIBrainPicking(BaseBrainPicking):
|
|||||||
max_tokens: int,
|
max_tokens: int,
|
||||||
user_openai_api_key: str,
|
user_openai_api_key: str,
|
||||||
streaming: bool = False,
|
streaming: bool = False,
|
||||||
) -> "OpenAIBrainPicking":
|
) -> "OpenAIBrainPicking": # pyright: ignore reportPrivateUsage=none
|
||||||
"""
|
"""
|
||||||
Initialize the BrainPicking class by setting embeddings, supabase client, vector store, language model and chains.
|
Initialize the BrainPicking class by setting embeddings, supabase client, vector store, language model and chains.
|
||||||
:return: OpenAIBrainPicking instance
|
:return: OpenAIBrainPicking instance
|
||||||
@ -59,7 +60,9 @@ class OpenAIBrainPicking(BaseBrainPicking):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def embeddings(self) -> OpenAIEmbeddings:
|
def embeddings(self) -> OpenAIEmbeddings:
|
||||||
return OpenAIEmbeddings(openai_api_key=self.openai_api_key)
|
return OpenAIEmbeddings(
|
||||||
|
openai_api_key=self.openai_api_key
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def supabase_client(self) -> Client:
|
def supabase_client(self) -> Client:
|
||||||
@ -92,14 +95,16 @@ class OpenAIBrainPicking(BaseBrainPicking):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def doc_chain(self) -> LLMChain:
|
def doc_chain(self) -> LLMChain:
|
||||||
return load_qa_chain(llm=self.doc_llm, chain_type="stuff")
|
return load_qa_chain(
|
||||||
|
llm=self.doc_llm, chain_type="stuff"
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def qa(self) -> ConversationalRetrievalChain:
|
def qa(self) -> ConversationalRetrievalChain:
|
||||||
return ConversationalRetrievalChain(
|
return ConversationalRetrievalChain(
|
||||||
retriever=self.vector_store.as_retriever(),
|
retriever=self.vector_store.as_retriever(),
|
||||||
question_generator=self.question_generator,
|
question_generator=self.question_generator,
|
||||||
combine_docs_chain=self.doc_chain,
|
combine_docs_chain=self.doc_chain, # pyright: ignore reportPrivateUsage=none
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -116,7 +121,7 @@ class OpenAIBrainPicking(BaseBrainPicking):
|
|||||||
model=model,
|
model=model,
|
||||||
streaming=streaming,
|
streaming=streaming,
|
||||||
callbacks=callbacks,
|
callbacks=callbacks,
|
||||||
)
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
def _call_chain(self, chain, question, history):
|
def _call_chain(self, chain, question, history):
|
||||||
"""
|
"""
|
||||||
@ -205,8 +210,10 @@ class OpenAIBrainPicking(BaseBrainPicking):
|
|||||||
|
|
||||||
task = asyncio.create_task(
|
task = asyncio.create_task(
|
||||||
wrap_done(
|
wrap_done(
|
||||||
self.qa._acall_chain(self.qa, question, transformed_history),
|
self.qa._acall_chain( # pyright: ignore reportPrivateUsage=none
|
||||||
callback.done,
|
self.qa, question, transformed_history
|
||||||
|
),
|
||||||
|
callback.done, # pyright: ignore reportPrivateUsage=none
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -217,7 +224,7 @@ class OpenAIBrainPicking(BaseBrainPicking):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Use the aiter method of the callback to stream the response with server-sent-events
|
# Use the aiter method of the callback to stream the response with server-sent-events
|
||||||
async for token in callback.aiter():
|
async for token in callback.aiter(): # pyright: ignore reportPrivateUsage=none
|
||||||
logger.info("Token: %s", token)
|
logger.info("Token: %s", token)
|
||||||
|
|
||||||
# Add the token to the response_tokens list
|
# Add the token to the response_tokens list
|
||||||
|
@ -2,15 +2,16 @@ from typing import Any, Dict, List, Optional
|
|||||||
|
|
||||||
from langchain.chat_models import ChatOpenAI
|
from langchain.chat_models import ChatOpenAI
|
||||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||||
from llm.models.FunctionCall import FunctionCall
|
|
||||||
from llm.models.OpenAiAnswer import OpenAiAnswer
|
|
||||||
from logger import get_logger
|
from logger import get_logger
|
||||||
from models.chat import ChatHistory
|
from models.chat import ChatHistory
|
||||||
from repository.chat.get_chat_history import get_chat_history
|
from repository.chat.get_chat_history import get_chat_history
|
||||||
from repository.chat.update_chat_history import update_chat_history
|
from repository.chat.update_chat_history import update_chat_history
|
||||||
from supabase import Client, create_client
|
from supabase.client import Client, create_client
|
||||||
from vectorstore.supabase import CustomSupabaseVectorStore
|
from vectorstore.supabase import CustomSupabaseVectorStore
|
||||||
|
|
||||||
|
from llm.models.FunctionCall import FunctionCall
|
||||||
|
from llm.models.OpenAiAnswer import OpenAiAnswer
|
||||||
|
|
||||||
from .base import BaseBrainPicking
|
from .base import BaseBrainPicking
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
@ -27,7 +28,10 @@ def format_answer(model_response: Dict[str, Any]) -> OpenAiAnswer:
|
|||||||
answer["function_call"]["arguments"],
|
answer["function_call"]["arguments"],
|
||||||
)
|
)
|
||||||
|
|
||||||
return OpenAiAnswer(content=content, function_call=function_call)
|
return OpenAiAnswer(
|
||||||
|
content=content,
|
||||||
|
function_call=function_call, # pyright: ignore reportPrivateUsage=none
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class OpenAIFunctionsBrainPicking(BaseBrainPicking):
|
class OpenAIFunctionsBrainPicking(BaseBrainPicking):
|
||||||
@ -48,7 +52,7 @@ class OpenAIFunctionsBrainPicking(BaseBrainPicking):
|
|||||||
brain_id: str,
|
brain_id: str,
|
||||||
user_openai_api_key: str,
|
user_openai_api_key: str,
|
||||||
# TODO: add streaming
|
# TODO: add streaming
|
||||||
) -> "OpenAIFunctionsBrainPicking":
|
) -> "OpenAIFunctionsBrainPicking": # pyright: ignore reportPrivateUsage=none
|
||||||
super().__init__(
|
super().__init__(
|
||||||
model=model,
|
model=model,
|
||||||
chat_id=chat_id,
|
chat_id=chat_id,
|
||||||
@ -61,11 +65,15 @@ class OpenAIFunctionsBrainPicking(BaseBrainPicking):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def openai_client(self) -> ChatOpenAI:
|
def openai_client(self) -> ChatOpenAI:
|
||||||
return ChatOpenAI(openai_api_key=self.openai_api_key)
|
return ChatOpenAI(
|
||||||
|
openai_api_key=self.openai_api_key
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def embeddings(self) -> OpenAIEmbeddings:
|
def embeddings(self) -> OpenAIEmbeddings:
|
||||||
return OpenAIEmbeddings(openai_api_key=self.openai_api_key)
|
return OpenAIEmbeddings(
|
||||||
|
openai_api_key=self.openai_api_key
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def supabase_client(self) -> Client:
|
def supabase_client(self) -> Client:
|
||||||
@ -125,7 +133,9 @@ class OpenAIFunctionsBrainPicking(BaseBrainPicking):
|
|||||||
"""
|
"""
|
||||||
logger.info("Getting context")
|
logger.info("Getting context")
|
||||||
|
|
||||||
return self.vector_store.similarity_search(query=question)
|
return self.vector_store.similarity_search(
|
||||||
|
query=question
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
def _construct_prompt(
|
def _construct_prompt(
|
||||||
self, question: str, useContext: bool = False, useHistory: bool = False
|
self, question: str, useContext: bool = False, useHistory: bool = False
|
||||||
|
@ -21,7 +21,7 @@ class PrivateGPT4AllBrainPicking(BaseBrainPicking):
|
|||||||
chat_id: str,
|
chat_id: str,
|
||||||
brain_id: str,
|
brain_id: str,
|
||||||
streaming: bool,
|
streaming: bool,
|
||||||
) -> "PrivateGPT4AllBrainPicking":
|
) -> "PrivateGPT4AllBrainPicking": # pyright: ignore reportPrivateUsage=none
|
||||||
"""
|
"""
|
||||||
Initialize the PrivateBrainPicking class by calling the parent class's initializer.
|
Initialize the PrivateBrainPicking class by calling the parent class's initializer.
|
||||||
:param brain_id: The brain_id in the DB.
|
:param brain_id: The brain_id in the DB.
|
||||||
@ -57,4 +57,4 @@ class PrivateGPT4AllBrainPicking(BaseBrainPicking):
|
|||||||
n_batch=model_n_batch,
|
n_batch=model_n_batch,
|
||||||
backend="gptj",
|
backend="gptj",
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
@ -31,7 +31,7 @@ Summarize the following text:
|
|||||||
{{/assistant~}}
|
{{/assistant~}}
|
||||||
""",
|
""",
|
||||||
llm=summary_llm,
|
llm=summary_llm,
|
||||||
)
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
summary = summary(document=document)
|
summary = summary(document=document)
|
||||||
logger.info("Summarization: %s", summary)
|
logger.info("Summarization: %s", summary)
|
||||||
@ -78,10 +78,12 @@ Summary
|
|||||||
{{/assistant~}}
|
{{/assistant~}}
|
||||||
""",
|
""",
|
||||||
llm=evaluation_llm,
|
llm=evaluation_llm,
|
||||||
)
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
result = evaluation(question=question, summaries=summaries)
|
result = evaluation(question=question, summaries=summaries)
|
||||||
evaluations = {}
|
evaluations = {}
|
||||||
for evaluation in result["evaluation"].split("\n"):
|
for evaluation in result["evaluation"].split(
|
||||||
|
"\n"
|
||||||
|
): # pyright: ignore reportPrivateUsage=none
|
||||||
if evaluation == "" or not evaluation[0].isdigit():
|
if evaluation == "" or not evaluation[0].isdigit():
|
||||||
continue
|
continue
|
||||||
logger.info("Evaluation Row: %s", evaluation)
|
logger.info("Evaluation Row: %s", evaluation)
|
||||||
|
@ -2,11 +2,12 @@ import os
|
|||||||
from typing import Any, List, Optional
|
from typing import Any, List, Optional
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
from models.settings import CommonsDep, common_dependencies
|
|
||||||
from models.users import User
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from utils.vectors import get_unique_files_from_vector_ids
|
from utils.vectors import get_unique_files_from_vector_ids
|
||||||
|
|
||||||
|
from models.settings import CommonsDep, common_dependencies
|
||||||
|
from models.users import User
|
||||||
|
|
||||||
|
|
||||||
class Brain(BaseModel):
|
class Brain(BaseModel):
|
||||||
id: Optional[UUID] = None
|
id: Optional[UUID] = None
|
||||||
@ -15,7 +16,7 @@ class Brain(BaseModel):
|
|||||||
model: Optional[str] = "gpt-3.5-turbo-0613"
|
model: Optional[str] = "gpt-3.5-turbo-0613"
|
||||||
temperature: Optional[float] = 0.0
|
temperature: Optional[float] = 0.0
|
||||||
max_tokens: Optional[int] = 256
|
max_tokens: Optional[int] = 256
|
||||||
brain_size: Optional[float] = 0.0
|
brain_size: Optional[float] = 0.0 # pyright: ignore reportPrivateUsage=none
|
||||||
max_brain_size: Optional[int] = int(os.getenv("MAX_BRAIN_SIZE", 0))
|
max_brain_size: Optional[int] = int(os.getenv("MAX_BRAIN_SIZE", 0))
|
||||||
files: List[Any] = []
|
files: List[Any] = []
|
||||||
_commons: Optional[CommonsDep] = None
|
_commons: Optional[CommonsDep] = None
|
||||||
@ -27,7 +28,7 @@ class Brain(BaseModel):
|
|||||||
def commons(self) -> CommonsDep:
|
def commons(self) -> CommonsDep:
|
||||||
if not self._commons:
|
if not self._commons:
|
||||||
self.__class__._commons = common_dependencies()
|
self.__class__._commons = common_dependencies()
|
||||||
return self._commons
|
return self._commons # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def brain_size(self):
|
def brain_size(self):
|
||||||
@ -39,12 +40,17 @@ class Brain(BaseModel):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def remaining_brain_size(self):
|
def remaining_brain_size(self):
|
||||||
return float(self.max_brain_size) - self.brain_size
|
return (
|
||||||
|
float(self.max_brain_size) # pyright: ignore reportPrivateUsage=none
|
||||||
|
- self.brain_size # pyright: ignore reportPrivateUsage=none
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(cls, *args, **kwargs):
|
def create(cls, *args, **kwargs):
|
||||||
commons = common_dependencies()
|
commons = common_dependencies()
|
||||||
return cls(commons=commons, *args, **kwargs)
|
return cls(
|
||||||
|
commons=commons, *args, **kwargs # pyright: ignore reportPrivateUsage=none
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
def get_user_brains(self, user_id):
|
def get_user_brains(self, user_id):
|
||||||
response = (
|
response = (
|
||||||
|
@ -9,10 +9,18 @@ class Chat:
|
|||||||
chat_name: str
|
chat_name: str
|
||||||
|
|
||||||
def __init__(self, chat_dict: dict):
|
def __init__(self, chat_dict: dict):
|
||||||
self.chat_id = chat_dict.get("chat_id")
|
self.chat_id = chat_dict.get(
|
||||||
self.user_id = chat_dict.get("user_id")
|
"chat_id"
|
||||||
self.creation_time = chat_dict.get("creation_time")
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
self.chat_name = chat_dict.get("chat_name")
|
self.user_id = chat_dict.get(
|
||||||
|
"user_id"
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
self.creation_time = chat_dict.get(
|
||||||
|
"creation_time"
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
self.chat_name = chat_dict.get(
|
||||||
|
"chat_name"
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -24,11 +32,21 @@ class ChatHistory:
|
|||||||
message_time: str
|
message_time: str
|
||||||
|
|
||||||
def __init__(self, chat_dict: dict):
|
def __init__(self, chat_dict: dict):
|
||||||
self.chat_id = chat_dict.get("chat_id")
|
self.chat_id = chat_dict.get(
|
||||||
self.message_id = chat_dict.get("message_id")
|
"chat_id"
|
||||||
self.user_message = chat_dict.get("user_message")
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
self.assistant = chat_dict.get("assistant")
|
self.message_id = chat_dict.get(
|
||||||
self.message_time = chat_dict.get("message_time")
|
"message_id"
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
self.user_message = chat_dict.get(
|
||||||
|
"user_message"
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
self.assistant = chat_dict.get(
|
||||||
|
"assistant"
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
self.message_time = chat_dict.get(
|
||||||
|
"message_time"
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
def to_dict(self):
|
def to_dict(self):
|
||||||
return asdict(self)
|
return asdict(self)
|
||||||
|
@ -6,11 +6,12 @@ from uuid import UUID
|
|||||||
from fastapi import UploadFile
|
from fastapi import UploadFile
|
||||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
from logger import get_logger
|
from logger import get_logger
|
||||||
from models.brains import Brain
|
|
||||||
from models.settings import CommonsDep, common_dependencies
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from utils.file import compute_sha1_from_file
|
from utils.file import compute_sha1_from_file
|
||||||
|
|
||||||
|
from models.brains import Brain
|
||||||
|
from models.settings import CommonsDep, common_dependencies
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@ -18,9 +19,9 @@ class File(BaseModel):
|
|||||||
id: Optional[UUID] = None
|
id: Optional[UUID] = None
|
||||||
file: Optional[UploadFile]
|
file: Optional[UploadFile]
|
||||||
file_name: Optional[str] = ""
|
file_name: Optional[str] = ""
|
||||||
file_size: Optional[int] = ""
|
file_size: Optional[int] = "" # pyright: ignore reportPrivateUsage=none
|
||||||
file_sha1: Optional[str] = ""
|
file_sha1: Optional[str] = ""
|
||||||
vectors_ids: Optional[int] = []
|
vectors_ids: Optional[int] = [] # pyright: ignore reportPrivateUsage=none
|
||||||
file_extension: Optional[str] = ""
|
file_extension: Optional[str] = ""
|
||||||
content: Optional[Any] = None
|
content: Optional[Any] = None
|
||||||
chunk_size: int = 500
|
chunk_size: int = 500
|
||||||
@ -30,16 +31,25 @@ class File(BaseModel):
|
|||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
if self.file:
|
if self.file:
|
||||||
self.file_name = self.file.filename
|
self.file_name = self.file.filename
|
||||||
self.file_size = self.file.file._file.tell()
|
self.file_size = (
|
||||||
self.file_extension = os.path.splitext(self.file.filename)[-1].lower()
|
self.file.file._file.tell() # pyright: ignore reportPrivateUsage=none
|
||||||
|
)
|
||||||
|
self.file_extension = os.path.splitext(
|
||||||
|
self.file.filename # pyright: ignore reportPrivateUsage=none
|
||||||
|
)[-1].lower()
|
||||||
|
|
||||||
async def compute_file_sha1(self):
|
async def compute_file_sha1(self):
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=self.file.filename) as tmp_file:
|
with tempfile.NamedTemporaryFile(
|
||||||
await self.file.seek(0)
|
delete=False,
|
||||||
self.content = await self.file.read()
|
suffix=self.file.filename, # pyright: ignore reportPrivateUsage=none
|
||||||
|
) as tmp_file:
|
||||||
|
await self.file.seek(0) # pyright: ignore reportPrivateUsage=none
|
||||||
|
self.content = (
|
||||||
|
await self.file.read() # pyright: ignore reportPrivateUsage=none
|
||||||
|
)
|
||||||
tmp_file.write(self.content)
|
tmp_file.write(self.content)
|
||||||
tmp_file.flush()
|
tmp_file.flush()
|
||||||
self.file_sha1 = compute_sha1_from_file(tmp_file.name)
|
self.file_sha1 = compute_sha1_from_file(tmp_file.name)
|
||||||
@ -48,18 +58,21 @@ class File(BaseModel):
|
|||||||
|
|
||||||
def compute_documents(self, loader_class):
|
def compute_documents(self, loader_class):
|
||||||
logger.info(f"Computing documents from file {self.file_name}")
|
logger.info(f"Computing documents from file {self.file_name}")
|
||||||
|
|
||||||
documents = []
|
documents = []
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=self.file.filename) as tmp_file:
|
with tempfile.NamedTemporaryFile(
|
||||||
tmp_file.write(self.content)
|
delete=False,
|
||||||
|
suffix=self.file.filename, # pyright: ignore reportPrivateUsage=none
|
||||||
|
) as tmp_file:
|
||||||
|
tmp_file.write(self.content) # pyright: ignore reportPrivateUsage=none
|
||||||
tmp_file.flush()
|
tmp_file.flush()
|
||||||
loader = loader_class(tmp_file.name)
|
loader = loader_class(tmp_file.name)
|
||||||
documents = loader.load()
|
documents = loader.load()
|
||||||
|
|
||||||
print("documents", documents)
|
print("documents", documents)
|
||||||
|
|
||||||
os.remove(tmp_file.name)
|
os.remove(tmp_file.name)
|
||||||
|
|
||||||
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
||||||
chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap
|
chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap
|
||||||
)
|
)
|
||||||
@ -70,20 +83,21 @@ class File(BaseModel):
|
|||||||
|
|
||||||
def set_file_vectors_ids(self):
|
def set_file_vectors_ids(self):
|
||||||
"""
|
"""
|
||||||
Set the vectors_ids property with the ids of the vectors
|
Set the vectors_ids property with the ids of the vectors
|
||||||
that are associated with the file in the vectors table
|
that are associated with the file in the vectors table
|
||||||
"""
|
"""
|
||||||
|
|
||||||
commons = common_dependencies()
|
commons = common_dependencies()
|
||||||
response = (
|
response = (
|
||||||
commons["supabase"].table("vectors")
|
commons["supabase"]
|
||||||
|
.table("vectors")
|
||||||
.select("id")
|
.select("id")
|
||||||
.filter("metadata->>file_sha1", "eq", self.file_sha1)
|
.filter("metadata->>file_sha1", "eq", self.file_sha1)
|
||||||
.execute()
|
.execute()
|
||||||
)
|
)
|
||||||
self.vectors_ids = response.data
|
self.vectors_ids = response.data
|
||||||
return
|
return
|
||||||
|
|
||||||
def file_already_exists(self):
|
def file_already_exists(self):
|
||||||
"""
|
"""
|
||||||
Check if file already exists in vectors table
|
Check if file already exists in vectors table
|
||||||
@ -92,20 +106,24 @@ class File(BaseModel):
|
|||||||
|
|
||||||
print("file_sha1", self.file_sha1)
|
print("file_sha1", self.file_sha1)
|
||||||
print("vectors_ids", self.vectors_ids)
|
print("vectors_ids", self.vectors_ids)
|
||||||
print("len(vectors_ids)", len(self.vectors_ids))
|
print(
|
||||||
|
"len(vectors_ids)",
|
||||||
|
len(self.vectors_ids), # pyright: ignore reportPrivateUsage=none
|
||||||
|
)
|
||||||
|
|
||||||
# if the file does not exist in vectors then no need to go check in brains_vectors
|
# if the file does not exist in vectors then no need to go check in brains_vectors
|
||||||
if len(self.vectors_ids) == 0:
|
if len(self.vectors_ids) == 0: # pyright: ignore reportPrivateUsage=none
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def file_already_exists_in_brain(self, brain_id):
|
def file_already_exists_in_brain(self, brain_id):
|
||||||
commons = common_dependencies()
|
commons = common_dependencies()
|
||||||
self.set_file_vectors_ids()
|
self.set_file_vectors_ids()
|
||||||
# Check if file exists in that brain
|
# Check if file exists in that brain
|
||||||
response = (
|
response = (
|
||||||
commons["supabase"].table("brains_vectors")
|
commons["supabase"]
|
||||||
|
.table("brains_vectors")
|
||||||
.select("brain_id, vector_id")
|
.select("brain_id, vector_id")
|
||||||
.filter("brain_id", "eq", brain_id)
|
.filter("brain_id", "eq", brain_id)
|
||||||
.filter("file_sha1", "eq", self.file_sha1)
|
.filter("file_sha1", "eq", self.file_sha1)
|
||||||
@ -114,15 +132,17 @@ class File(BaseModel):
|
|||||||
print("response.data", response.data)
|
print("response.data", response.data)
|
||||||
if len(response.data) == 0:
|
if len(response.data) == 0:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def file_is_empty(self):
|
def file_is_empty(self):
|
||||||
return self.file.file._file.tell() < 1
|
return (
|
||||||
|
self.file.file._file.tell() < 1 # pyright: ignore reportPrivateUsage=none
|
||||||
|
)
|
||||||
|
|
||||||
def link_file_to_brain(self, brain: Brain):
|
def link_file_to_brain(self, brain: Brain):
|
||||||
self.set_file_vectors_ids()
|
self.set_file_vectors_ids()
|
||||||
|
|
||||||
for vector_id in self.vectors_ids:
|
for vector_id in self.vectors_ids: # pyright: ignore reportPrivateUsage=none
|
||||||
brain.create_brain_vector(vector_id['id'], self.file_sha1)
|
brain.create_brain_vector(vector_id["id"], self.file_sha1)
|
||||||
print(f"Successfully linked file {self.file_sha1} to brain {brain.id}")
|
print(f"Successfully linked file {self.file_sha1} to brain {brain.id}")
|
||||||
|
@ -3,7 +3,7 @@ from typing import Annotated
|
|||||||
from fastapi import Depends
|
from fastapi import Depends
|
||||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||||
from pydantic import BaseSettings
|
from pydantic import BaseSettings
|
||||||
from supabase import Client, create_client
|
from supabase.client import Client, create_client
|
||||||
from vectorstore.supabase import SupabaseVectorStore
|
from vectorstore.supabase import SupabaseVectorStore
|
||||||
|
|
||||||
|
|
||||||
@ -22,8 +22,10 @@ class LLMSettings(BaseSettings):
|
|||||||
|
|
||||||
|
|
||||||
def common_dependencies() -> dict:
|
def common_dependencies() -> dict:
|
||||||
settings = BrainSettings()
|
settings = BrainSettings() # pyright: ignore reportPrivateUsage=none
|
||||||
embeddings = OpenAIEmbeddings(openai_api_key=settings.openai_api_key)
|
embeddings = OpenAIEmbeddings(
|
||||||
|
openai_api_key=settings.openai_api_key
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
supabase_client: Client = create_client(
|
supabase_client: Client = create_client(
|
||||||
settings.supabase_url, settings.supabase_service_key
|
settings.supabase_url, settings.supabase_service_key
|
||||||
)
|
)
|
||||||
|
@ -10,23 +10,33 @@ from models.settings import CommonsDep
|
|||||||
from utils.file import compute_sha1_from_content
|
from utils.file import compute_sha1_from_content
|
||||||
|
|
||||||
|
|
||||||
async def process_audio(commons: CommonsDep, file: File, enable_summarization: bool, user, user_openai_api_key):
|
async def process_audio(
|
||||||
|
commons: CommonsDep, # pyright: ignore reportPrivateUsage=none
|
||||||
|
file: File,
|
||||||
|
enable_summarization: bool,
|
||||||
|
user,
|
||||||
|
user_openai_api_key,
|
||||||
|
):
|
||||||
temp_filename = None
|
temp_filename = None
|
||||||
file_sha = ""
|
file_sha = ""
|
||||||
dateshort = time.strftime("%Y%m%d-%H%M%S")
|
dateshort = time.strftime("%Y%m%d-%H%M%S")
|
||||||
file_meta_name = f"audiotranscript_{dateshort}.txt"
|
file_meta_name = f"audiotranscript_{dateshort}.txt"
|
||||||
|
|
||||||
# use this for whisper
|
# use this for whisper
|
||||||
openai_api_key = os.environ.get("OPENAI_API_KEY")
|
os.environ.get("OPENAI_API_KEY")
|
||||||
if user_openai_api_key:
|
if user_openai_api_key:
|
||||||
openai_api_key = user_openai_api_key
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
upload_file = file.file
|
upload_file = file.file
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=upload_file.filename) as tmp_file:
|
with tempfile.NamedTemporaryFile(
|
||||||
await upload_file.seek(0)
|
delete=False,
|
||||||
content = await upload_file.read()
|
suffix=upload_file.filename, # pyright: ignore reportPrivateUsage=none
|
||||||
|
) as tmp_file:
|
||||||
|
await upload_file.seek(0) # pyright: ignore reportPrivateUsage=none
|
||||||
|
content = (
|
||||||
|
await upload_file.read() # pyright: ignore reportPrivateUsage=none
|
||||||
|
)
|
||||||
tmp_file.write(content)
|
tmp_file.write(content)
|
||||||
tmp_file.flush()
|
tmp_file.flush()
|
||||||
tmp_file.close()
|
tmp_file.close()
|
||||||
@ -36,21 +46,42 @@ async def process_audio(commons: CommonsDep, file: File, enable_summarization: b
|
|||||||
with open(tmp_file.name, "rb") as audio_file:
|
with open(tmp_file.name, "rb") as audio_file:
|
||||||
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
||||||
|
|
||||||
file_sha = compute_sha1_from_content(transcript.text.encode("utf-8"))
|
file_sha = compute_sha1_from_content(
|
||||||
file_size = len(transcript.text.encode("utf-8"))
|
transcript.text.encode("utf-8") # pyright: ignore reportPrivateUsage=none
|
||||||
|
)
|
||||||
|
file_size = len(
|
||||||
|
transcript.text.encode("utf-8") # pyright: ignore reportPrivateUsage=none
|
||||||
|
)
|
||||||
|
|
||||||
chunk_size = 500
|
chunk_size = 500
|
||||||
chunk_overlap = 0
|
chunk_overlap = 0
|
||||||
|
|
||||||
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
||||||
chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
chunk_size=chunk_size, chunk_overlap=chunk_overlap
|
||||||
texts = text_splitter.split_text(transcript.text.encode("utf-8"))
|
)
|
||||||
|
texts = text_splitter.split_text(
|
||||||
|
transcript.text.encode("utf-8") # pyright: ignore reportPrivateUsage=none
|
||||||
|
)
|
||||||
|
|
||||||
docs_with_metadata = [Document(page_content=text, metadata={"file_sha1": file_sha, "file_size": file_size, "file_name": file_meta_name,
|
docs_with_metadata = [
|
||||||
"chunk_size": chunk_size, "chunk_overlap": chunk_overlap, "date": dateshort}) for text in texts]
|
Document(
|
||||||
|
page_content=text,
|
||||||
|
metadata={
|
||||||
|
"file_sha1": file_sha,
|
||||||
|
"file_size": file_size,
|
||||||
|
"file_name": file_meta_name,
|
||||||
|
"chunk_size": chunk_size,
|
||||||
|
"chunk_overlap": chunk_overlap,
|
||||||
|
"date": dateshort,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
for text in texts
|
||||||
|
]
|
||||||
|
|
||||||
commons.documents_vector_store.add_documents(docs_with_metadata)
|
commons.documents_vector_store.add_documents( # pyright: ignore reportPrivateUsage=none
|
||||||
|
docs_with_metadata
|
||||||
|
)
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
if temp_filename and os.path.exists(temp_filename):
|
if temp_filename and os.path.exists(temp_filename):
|
||||||
os.remove(temp_filename)
|
os.remove(temp_filename)
|
||||||
|
@ -19,7 +19,7 @@ async def process_file(
|
|||||||
|
|
||||||
file.compute_documents(loader_class)
|
file.compute_documents(loader_class)
|
||||||
|
|
||||||
for doc in file.documents:
|
for doc in file.documents: # pyright: ignore reportPrivateUsage=none
|
||||||
metadata = {
|
metadata = {
|
||||||
"file_sha1": file.file_sha1,
|
"file_sha1": file.file_sha1,
|
||||||
"file_size": file.file_size,
|
"file_size": file.file_size,
|
||||||
@ -29,17 +29,15 @@ async def process_file(
|
|||||||
"date": dateshort,
|
"date": dateshort,
|
||||||
"summarization": "true" if enable_summarization else "false",
|
"summarization": "true" if enable_summarization else "false",
|
||||||
}
|
}
|
||||||
doc_with_metadata = Document(
|
doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata)
|
||||||
page_content=doc.page_content, metadata=metadata)
|
|
||||||
|
|
||||||
neurons = Neurons(commons=commons)
|
neurons = Neurons(commons=commons)
|
||||||
created_vector = neurons.create_vector(doc_with_metadata, user_openai_api_key)
|
created_vector = neurons.create_vector(doc_with_metadata, user_openai_api_key)
|
||||||
# add_usage(stats_db, "embedding", "audio", metadata={"file_name": file_meta_name,"file_type": ".txt", "chunk_size": chunk_size, "chunk_overlap": chunk_overlap})
|
# add_usage(stats_db, "embedding", "audio", metadata={"file_name": file_meta_name,"file_type": ".txt", "chunk_size": chunk_size, "chunk_overlap": chunk_overlap})
|
||||||
|
|
||||||
created_vector_id = created_vector[0]
|
created_vector_id = created_vector[0] # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
brain = Brain(id=brain_id)
|
brain = Brain(id=brain_id)
|
||||||
brain.create_brain_vector(created_vector_id, file.file_sha1)
|
brain.create_brain_vector(created_vector_id, file.file_sha1)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -11,7 +11,13 @@ from utils.file import compute_sha1_from_content
|
|||||||
from utils.vectors import Neurons
|
from utils.vectors import Neurons
|
||||||
|
|
||||||
|
|
||||||
async def process_github(commons: CommonsDep, repo, enable_summarization, brain_id, user_openai_api_key):
|
async def process_github(
|
||||||
|
commons: CommonsDep, # pyright: ignore reportPrivateUsage=none
|
||||||
|
repo,
|
||||||
|
enable_summarization,
|
||||||
|
brain_id,
|
||||||
|
user_openai_api_key,
|
||||||
|
):
|
||||||
random_dir_name = os.urandom(16).hex()
|
random_dir_name = os.urandom(16).hex()
|
||||||
dateshort = time.strftime("%Y%m%d")
|
dateshort = time.strftime("%Y%m%d")
|
||||||
loader = GitLoader(
|
loader = GitLoader(
|
||||||
@ -24,41 +30,60 @@ async def process_github(commons: CommonsDep, repo, enable_summarization, brain_
|
|||||||
chunk_size = 500
|
chunk_size = 500
|
||||||
chunk_overlap = 0
|
chunk_overlap = 0
|
||||||
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
||||||
chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
chunk_size=chunk_size, chunk_overlap=chunk_overlap
|
||||||
|
)
|
||||||
|
|
||||||
documents = text_splitter.split_documents(documents)
|
documents = text_splitter.split_documents(documents)
|
||||||
print(documents[:1])
|
print(documents[:1])
|
||||||
|
|
||||||
for doc in documents:
|
for doc in documents:
|
||||||
if doc.metadata["file_type"] in [".pyc",".png",".svg", ".env", ".lock", ".gitignore", ".gitmodules", ".gitattributes", ".gitkeep", ".git", ".json"]:
|
if doc.metadata["file_type"] in [
|
||||||
|
".pyc",
|
||||||
|
".png",
|
||||||
|
".svg",
|
||||||
|
".env",
|
||||||
|
".lock",
|
||||||
|
".gitignore",
|
||||||
|
".gitmodules",
|
||||||
|
".gitattributes",
|
||||||
|
".gitkeep",
|
||||||
|
".git",
|
||||||
|
".json",
|
||||||
|
]:
|
||||||
continue
|
continue
|
||||||
metadata = {
|
metadata = {
|
||||||
"file_sha1": compute_sha1_from_content(doc.page_content.encode("utf-8")),
|
"file_sha1": compute_sha1_from_content(doc.page_content.encode("utf-8")),
|
||||||
"file_size": len(doc.page_content)*8,
|
"file_size": len(doc.page_content) * 8,
|
||||||
"file_name": doc.metadata["file_name"],
|
"file_name": doc.metadata["file_name"],
|
||||||
"chunk_size": chunk_size,
|
"chunk_size": chunk_size,
|
||||||
"chunk_overlap": chunk_overlap,
|
"chunk_overlap": chunk_overlap,
|
||||||
"date": dateshort,
|
"date": dateshort,
|
||||||
"summarization": "true" if enable_summarization else "false"
|
"summarization": "true" if enable_summarization else "false",
|
||||||
}
|
}
|
||||||
doc_with_metadata = Document(
|
doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata)
|
||||||
page_content=doc.page_content, metadata=metadata)
|
|
||||||
|
file = File(
|
||||||
file = File(file_sha1=compute_sha1_from_content(doc.page_content.encode("utf-8")))
|
file_sha1=compute_sha1_from_content(doc.page_content.encode("utf-8"))
|
||||||
|
)
|
||||||
|
|
||||||
file_exists = file.file_already_exists()
|
file_exists = file.file_already_exists()
|
||||||
|
|
||||||
if not file_exists:
|
if not file_exists:
|
||||||
print(f"Creating entry for file {file.file_sha1} in vectors...")
|
print(f"Creating entry for file {file.file_sha1} in vectors...")
|
||||||
neurons = Neurons(commons=commons)
|
neurons = Neurons(commons=commons)
|
||||||
created_vector = neurons.create_vector(doc_with_metadata, user_openai_api_key)
|
created_vector = neurons.create_vector(
|
||||||
|
doc_with_metadata, user_openai_api_key
|
||||||
|
)
|
||||||
print("Created vector sids ", created_vector)
|
print("Created vector sids ", created_vector)
|
||||||
print("Created vector for ", doc.metadata["file_name"])
|
print("Created vector for ", doc.metadata["file_name"])
|
||||||
|
|
||||||
file_exists_in_brain = file.file_already_exists_in_brain(brain_id)
|
file_exists_in_brain = file.file_already_exists_in_brain(brain_id)
|
||||||
|
|
||||||
if not file_exists_in_brain:
|
if not file_exists_in_brain:
|
||||||
file.add_file_to_brain(brain_id)
|
file.add_file_to_brain(brain_id) # pyright: ignore reportPrivateUsage=none
|
||||||
brain = Brain(id=brain_id)
|
brain = Brain(id=brain_id)
|
||||||
file.link_file_to_brain(brain)
|
file.link_file_to_brain(brain)
|
||||||
return {"message": f"✅ Github with {len(documents)} files has been uploaded.", "type": "success"}
|
return {
|
||||||
|
"message": f"✅ Github with {len(documents)} files has been uploaded.",
|
||||||
|
"type": "success",
|
||||||
|
}
|
||||||
|
5
backend/pyrightconfig.json
Normal file
5
backend/pyrightconfig.json
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"exclude": [
|
||||||
|
"supabase"
|
||||||
|
]
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
|
from typing import List # For type hinting
|
||||||
|
|
||||||
from models.chat import ChatHistory
|
from models.chat import ChatHistory
|
||||||
from models.settings import common_dependencies
|
from models.settings import common_dependencies
|
||||||
from typing import List # For type hinting
|
|
||||||
|
|
||||||
|
|
||||||
def get_chat_history(chat_id: str) -> List[ChatHistory]:
|
def get_chat_history(chat_id: str) -> List[ChatHistory]:
|
||||||
@ -16,4 +17,7 @@ def get_chat_history(chat_id: str) -> List[ChatHistory]:
|
|||||||
if history is None:
|
if history is None:
|
||||||
return []
|
return []
|
||||||
else:
|
else:
|
||||||
return [ChatHistory(message) for message in history]
|
return [
|
||||||
|
ChatHistory(message) # pyright: ignore reportPrivateUsage=none
|
||||||
|
for message in history
|
||||||
|
]
|
||||||
|
@ -21,7 +21,7 @@ def update_chat(chat_id, chat_data: ChatUpdatableProperties) -> Chat:
|
|||||||
|
|
||||||
if not chat_id:
|
if not chat_id:
|
||||||
logger.error("No chat_id provided")
|
logger.error("No chat_id provided")
|
||||||
return
|
return # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
updates = {}
|
updates = {}
|
||||||
|
|
||||||
@ -41,4 +41,4 @@ def update_chat(chat_id, chat_data: ChatUpdatableProperties) -> Chat:
|
|||||||
logger.info(f"Chat {chat_id} updated")
|
logger.info(f"Chat {chat_id} updated")
|
||||||
else:
|
else:
|
||||||
logger.info(f"No updates to apply for chat {chat_id}")
|
logger.info(f"No updates to apply for chat {chat_id}")
|
||||||
return updated_chat
|
return updated_chat # pyright: ignore reportPrivateUsage=none
|
||||||
|
@ -23,4 +23,4 @@ def update_chat_history(chat_id: str, user_message: str, assistant: str) -> Chat
|
|||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=500, detail="An exception occurred while updating chat history."
|
status_code=500, detail="An exception occurred while updating chat history."
|
||||||
)
|
)
|
||||||
return ChatHistory(response[0])
|
return ChatHistory(response[0]) # pyright: ignore reportPrivateUsage=none
|
||||||
|
@ -6,13 +6,15 @@ logger = get_logger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def update_message_by_id(
|
def update_message_by_id(
|
||||||
message_id: str, user_message: str = None, assistant: str = None
|
message_id: str,
|
||||||
|
user_message: str = None, # pyright: ignore reportPrivateUsage=none
|
||||||
|
assistant: str = None, # pyright: ignore reportPrivateUsage=none
|
||||||
) -> ChatHistory:
|
) -> ChatHistory:
|
||||||
commons = common_dependencies()
|
commons = common_dependencies()
|
||||||
|
|
||||||
if not message_id:
|
if not message_id:
|
||||||
logger.error("No message_id provided")
|
logger.error("No message_id provided")
|
||||||
return
|
return # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
updates = {}
|
updates = {}
|
||||||
|
|
||||||
@ -35,4 +37,4 @@ def update_message_by_id(
|
|||||||
logger.info(f"Message {message_id} updated")
|
logger.info(f"Message {message_id} updated")
|
||||||
else:
|
else:
|
||||||
logger.info(f"No updates to apply for message {message_id}")
|
logger.info(f"No updates to apply for message {message_id}")
|
||||||
return ChatHistory(updated_message)
|
return ChatHistory(updated_message) # pyright: ignore reportPrivateUsage=none
|
||||||
|
@ -22,4 +22,5 @@ asyncpg==0.27.0
|
|||||||
flake8==6.0.0
|
flake8==6.0.0
|
||||||
flake8-black==0.3.6
|
flake8-black==0.3.6
|
||||||
sentence_transformers>=2.0.0
|
sentence_transformers>=2.0.0
|
||||||
sentry-sdk==1.26.0
|
sentry-sdk==1.26.0
|
||||||
|
pyright==1.1.316
|
@ -142,24 +142,27 @@ async def create_brain_endpoint(
|
|||||||
In the brains table & in the brains_users table and put the creator user as 'Owner'
|
In the brains table & in the brains_users table and put the creator user as 'Owner'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
brain = Brain(name=brain.name)
|
brain = Brain(name=brain.name) # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
brain.create_brain()
|
brain.create_brain() # pyright: ignore reportPrivateUsage=none
|
||||||
default_brain = get_default_user_brain(current_user)
|
default_brain = get_default_user_brain(current_user)
|
||||||
if default_brain:
|
if default_brain:
|
||||||
logger.info(f"Default brain already exists for user {current_user.id}")
|
logger.info(f"Default brain already exists for user {current_user.id}")
|
||||||
brain.create_brain_user(
|
brain.create_brain_user( # pyright: ignore reportPrivateUsage=none
|
||||||
user_id=current_user.id, rights="Owner", default_brain=False
|
user_id=current_user.id, rights="Owner", default_brain=False
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Default brain does not exist for user {current_user.id}. It will be created."
|
f"Default brain does not exist for user {current_user.id}. It will be created."
|
||||||
)
|
)
|
||||||
brain.create_brain_user(
|
brain.create_brain_user( # pyright: ignore reportPrivateUsage=none
|
||||||
user_id=current_user.id, rights="Owner", default_brain=True
|
user_id=current_user.id, rights="Owner", default_brain=True
|
||||||
)
|
)
|
||||||
|
|
||||||
return {"id": brain.id, "name": brain.name}
|
return {
|
||||||
|
"id": brain.id, # pyright: ignore reportPrivateUsage=none
|
||||||
|
"name": brain.name,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# update existing brain
|
# update existing brain
|
||||||
@ -182,10 +185,12 @@ async def update_brain_endpoint(
|
|||||||
brain = Brain(id=brain_id)
|
brain = Brain(id=brain_id)
|
||||||
|
|
||||||
# Add new file to brain , il file_sha1 already exists in brains_vectors -> out (not now)
|
# Add new file to brain , il file_sha1 already exists in brains_vectors -> out (not now)
|
||||||
if brain.file_sha1:
|
if brain.file_sha1: # pyright: ignore reportPrivateUsage=none
|
||||||
# add all the vector Ids to the brains_vectors with the given brain.brain_id
|
# add all the vector Ids to the brains_vectors with the given brain.brain_id
|
||||||
brain.update_brain_with_file(file_sha1=input_brain.file_sha1)
|
brain.update_brain_with_file(
|
||||||
|
file_sha1=input_brain.file_sha1 # pyright: ignore reportPrivateUsage=none
|
||||||
|
)
|
||||||
print("brain:", brain)
|
print("brain:", brain)
|
||||||
|
|
||||||
brain.update_brain_fields(commons, brain)
|
brain.update_brain_fields(commons, brain) # pyright: ignore reportPrivateUsage=none
|
||||||
return {"message": f"Brain {brain_id} has been updated."}
|
return {"message": f"Brain {brain_id} has been updated."}
|
||||||
|
@ -78,8 +78,8 @@ def check_user_limit(
|
|||||||
user.increment_user_request_count(date)
|
user.increment_user_request_count(date)
|
||||||
if int(user.requests_count) >= int(max_requests_number):
|
if int(user.requests_count) >= int(max_requests_number):
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=429,
|
status_code=429, # pyright: ignore reportPrivateUsage=none
|
||||||
detail="You have reached the maximum number of requests for today.",
|
detail="You have reached the maximum number of requests for today.", # pyright: ignore reportPrivateUsage=none
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
@ -97,7 +97,7 @@ async def get_chats(current_user: User = Depends(get_current_user)):
|
|||||||
This endpoint retrieves all the chats associated with the current authenticated user. It returns a list of chat objects
|
This endpoint retrieves all the chats associated with the current authenticated user. It returns a list of chat objects
|
||||||
containing the chat ID and chat name for each chat.
|
containing the chat ID and chat name for each chat.
|
||||||
"""
|
"""
|
||||||
chats = get_user_chats(current_user.id)
|
chats = get_user_chats(current_user.id) # pyright: ignore reportPrivateUsage=none
|
||||||
return {"chats": chats}
|
return {"chats": chats}
|
||||||
|
|
||||||
|
|
||||||
@ -127,10 +127,11 @@ async def update_chat_metadata_handler(
|
|||||||
Update chat attributes
|
Update chat attributes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
chat = get_chat_by_id(chat_id)
|
chat = get_chat_by_id(chat_id) # pyright: ignore reportPrivateUsage=none
|
||||||
if current_user.id != chat.user_id:
|
if current_user.id != chat.user_id:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=403, detail="You should be the owner of the chat to update it."
|
status_code=403, # pyright: ignore reportPrivateUsage=none
|
||||||
|
detail="You should be the owner of the chat to update it.", # pyright: ignore reportPrivateUsage=none
|
||||||
)
|
)
|
||||||
return update_chat(chat_id=chat_id, chat_data=chat_data)
|
return update_chat(chat_id=chat_id, chat_data=chat_data)
|
||||||
|
|
||||||
@ -181,7 +182,7 @@ async def create_question_handler(
|
|||||||
temperature=chat_question.temperature,
|
temperature=chat_question.temperature,
|
||||||
max_tokens=chat_question.max_tokens,
|
max_tokens=chat_question.max_tokens,
|
||||||
brain_id=str(brain_id),
|
brain_id=str(brain_id),
|
||||||
user_openai_api_key=current_user.user_openai_api_key,
|
user_openai_api_key=current_user.user_openai_api_key, # pyright: ignore reportPrivateUsage=none
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -191,10 +192,12 @@ async def create_question_handler(
|
|||||||
max_tokens=chat_question.max_tokens,
|
max_tokens=chat_question.max_tokens,
|
||||||
temperature=chat_question.temperature,
|
temperature=chat_question.temperature,
|
||||||
brain_id=str(brain_id),
|
brain_id=str(brain_id),
|
||||||
user_openai_api_key=current_user.user_openai_api_key,
|
user_openai_api_key=current_user.user_openai_api_key, # pyright: ignore reportPrivateUsage=none
|
||||||
)
|
)
|
||||||
|
|
||||||
chat_answer = gpt_answer_generator.generate_answer(chat_question.question)
|
chat_answer = gpt_answer_generator.generate_answer( # pyright: ignore reportPrivateUsage=none
|
||||||
|
chat_question.question
|
||||||
|
)
|
||||||
|
|
||||||
return chat_answer
|
return chat_answer
|
||||||
except HTTPException as e:
|
except HTTPException as e:
|
||||||
@ -217,7 +220,10 @@ async def create_stream_question_handler(
|
|||||||
if chat_question.model not in streaming_compatible_models:
|
if chat_question.model not in streaming_compatible_models:
|
||||||
# Forward the request to the none streaming endpoint
|
# Forward the request to the none streaming endpoint
|
||||||
return await create_question_handler(
|
return await create_question_handler(
|
||||||
request, chat_question, chat_id, current_user
|
request,
|
||||||
|
chat_question,
|
||||||
|
chat_id,
|
||||||
|
current_user, # pyright: ignore reportPrivateUsage=none
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -238,12 +244,14 @@ async def create_stream_question_handler(
|
|||||||
max_tokens=chat_question.max_tokens,
|
max_tokens=chat_question.max_tokens,
|
||||||
temperature=chat_question.temperature,
|
temperature=chat_question.temperature,
|
||||||
brain_id=str(brain_id),
|
brain_id=str(brain_id),
|
||||||
user_openai_api_key=user_openai_api_key,
|
user_openai_api_key=user_openai_api_key, # pyright: ignore reportPrivateUsage=none
|
||||||
streaming=True,
|
streaming=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
gpt_answer_generator.generate_stream(chat_question.question),
|
gpt_answer_generator.generate_stream( # pyright: ignore reportPrivateUsage=none
|
||||||
|
chat_question.question
|
||||||
|
),
|
||||||
media_type="text/event-stream",
|
media_type="text/event-stream",
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -259,4 +267,4 @@ async def get_chat_history_handler(
|
|||||||
chat_id: UUID,
|
chat_id: UUID,
|
||||||
) -> List[ChatHistory]:
|
) -> List[ChatHistory]:
|
||||||
# TODO: RBAC with current_user
|
# TODO: RBAC with current_user
|
||||||
return get_chat_history(chat_id)
|
return get_chat_history(chat_id) # pyright: ignore reportPrivateUsage=none
|
||||||
|
@ -35,7 +35,9 @@ async def crawl_endpoint(
|
|||||||
commons = common_dependencies()
|
commons = common_dependencies()
|
||||||
|
|
||||||
if request.headers.get("Openai-Api-Key"):
|
if request.headers.get("Openai-Api-Key"):
|
||||||
brain.max_brain_size = os.getenv("MAX_BRAIN_SIZE_WITH_KEY", 209715200)
|
brain.max_brain_size = os.getenv(
|
||||||
|
"MAX_BRAIN_SIZE_WITH_KEY", 209715200
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
file_size = 1000000
|
file_size = 1000000
|
||||||
remaining_free_space = brain.remaining_brain_size
|
remaining_free_space = brain.remaining_brain_size
|
||||||
@ -47,14 +49,20 @@ async def crawl_endpoint(
|
|||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
if not crawl_website.checkGithub():
|
if not crawl_website.checkGithub():
|
||||||
file_path, file_name = crawl_website.process()
|
(
|
||||||
|
file_path,
|
||||||
|
file_name,
|
||||||
|
) = crawl_website.process() # pyright: ignore reportPrivateUsage=none
|
||||||
# Create a SpooledTemporaryFile from the file_path
|
# Create a SpooledTemporaryFile from the file_path
|
||||||
spooled_file = SpooledTemporaryFile()
|
spooled_file = SpooledTemporaryFile()
|
||||||
with open(file_path, "rb") as f:
|
with open(file_path, "rb") as f:
|
||||||
shutil.copyfileobj(f, spooled_file)
|
shutil.copyfileobj(f, spooled_file)
|
||||||
|
|
||||||
# Pass the SpooledTemporaryFile to UploadFile
|
# Pass the SpooledTemporaryFile to UploadFile
|
||||||
uploadFile = UploadFile(file=spooled_file, filename=file_name)
|
uploadFile = UploadFile(
|
||||||
|
file=spooled_file, # pyright: ignore reportPrivateUsage=none
|
||||||
|
filename=file_name,
|
||||||
|
)
|
||||||
file = File(file=uploadFile)
|
file = File(file=uploadFile)
|
||||||
# check remaining free space here !!
|
# check remaining free space here !!
|
||||||
message = await filter_file(
|
message = await filter_file(
|
||||||
|
@ -42,7 +42,9 @@ async def upload_file(
|
|||||||
commons = common_dependencies()
|
commons = common_dependencies()
|
||||||
|
|
||||||
if request.headers.get("Openai-Api-Key"):
|
if request.headers.get("Openai-Api-Key"):
|
||||||
brain.max_brain_size = os.getenv("MAX_BRAIN_SIZE_WITH_KEY", 209715200)
|
brain.max_brain_size = os.getenv(
|
||||||
|
"MAX_BRAIN_SIZE_WITH_KEY", 209715200
|
||||||
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
remaining_free_space = brain.remaining_brain_size
|
remaining_free_space = brain.remaining_brain_size
|
||||||
|
|
||||||
file_size = get_file_size(uploadFile)
|
file_size = get_file_size(uploadFile)
|
||||||
|
@ -10,6 +10,9 @@ client = TestClient(app)
|
|||||||
|
|
||||||
API_KEY = os.getenv("CI_TEST_API_KEY")
|
API_KEY = os.getenv("CI_TEST_API_KEY")
|
||||||
|
|
||||||
|
if not API_KEY:
|
||||||
|
raise ValueError("CI_TEST_API_KEY environment variable not set. Cannot run tests.")
|
||||||
|
|
||||||
|
|
||||||
def test_read_main():
|
def test_read_main():
|
||||||
response = client.get("/")
|
response = client.get("/")
|
||||||
@ -54,7 +57,8 @@ def test_create_and_delete_api_key():
|
|||||||
def test_retrieve_default_brain():
|
def test_retrieve_default_brain():
|
||||||
# Making a GET request to the /brains/default/ endpoint
|
# Making a GET request to the /brains/default/ endpoint
|
||||||
response = client.get(
|
response = client.get(
|
||||||
"/brains/default/", headers={"Authorization": "Bearer " + API_KEY}
|
"/brains/default/",
|
||||||
|
headers={"Authorization": "Bearer " + API_KEY},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Assert that the response status code is 200 (HTTP OK)
|
# Assert that the response status code is 200 (HTTP OK)
|
||||||
@ -88,7 +92,9 @@ def test_create_brain():
|
|||||||
|
|
||||||
# Making a POST request to the /brains/ endpoint
|
# Making a POST request to the /brains/ endpoint
|
||||||
response = client.post(
|
response = client.post(
|
||||||
"/brains/", json=payload, headers={"Authorization": "Bearer " + API_KEY}
|
"/brains/",
|
||||||
|
json=payload,
|
||||||
|
headers={"Authorization": "Bearer " + API_KEY},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Assert that the response status code is 200 (HTTP OK)
|
# Assert that the response status code is 200 (HTTP OK)
|
||||||
@ -106,7 +112,10 @@ def test_create_brain():
|
|||||||
|
|
||||||
def test_retrieve_all_brains():
|
def test_retrieve_all_brains():
|
||||||
# Making a GET request to the /brains/ endpoint to retrieve all brains for the current user
|
# Making a GET request to the /brains/ endpoint to retrieve all brains for the current user
|
||||||
response = client.get("/brains/", headers={"Authorization": "Bearer " + API_KEY})
|
response = client.get(
|
||||||
|
"/brains/",
|
||||||
|
headers={"Authorization": "Bearer " + API_KEY},
|
||||||
|
)
|
||||||
|
|
||||||
# Assert that the response status code is 200 (HTTP OK)
|
# Assert that the response status code is 200 (HTTP OK)
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
@ -120,7 +129,10 @@ def test_retrieve_all_brains():
|
|||||||
|
|
||||||
def test_delete_all_brains():
|
def test_delete_all_brains():
|
||||||
# First, retrieve all brains for the current user
|
# First, retrieve all brains for the current user
|
||||||
response = client.get("/brains/", headers={"Authorization": "Bearer " + API_KEY})
|
response = client.get(
|
||||||
|
"/brains/",
|
||||||
|
headers={"Authorization": "Bearer " + API_KEY},
|
||||||
|
)
|
||||||
|
|
||||||
# Assert that the response status code is 200 (HTTP OK)
|
# Assert that the response status code is 200 (HTTP OK)
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
@ -133,7 +145,8 @@ def test_delete_all_brains():
|
|||||||
|
|
||||||
# Send a DELETE request to delete the specific brain
|
# Send a DELETE request to delete the specific brain
|
||||||
delete_response = client.delete(
|
delete_response = client.delete(
|
||||||
f"/brains/{brain_id}/", headers={"Authorization": "Bearer " + API_KEY}
|
f"/brains/{brain_id}/",
|
||||||
|
headers={"Authorization": "Bearer " + API_KEY},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Assert that the DELETE response status code is 200 (HTTP OK)
|
# Assert that the DELETE response status code is 200 (HTTP OK)
|
||||||
@ -142,7 +155,10 @@ def test_delete_all_brains():
|
|||||||
|
|
||||||
def test_get_all_chats():
|
def test_get_all_chats():
|
||||||
# Making a GET request to the /chat endpoint to retrieve all chats
|
# Making a GET request to the /chat endpoint to retrieve all chats
|
||||||
response = client.get("/chat", headers={"Authorization": "Bearer " + API_KEY})
|
response = client.get(
|
||||||
|
"/chat",
|
||||||
|
headers={"Authorization": "Bearer " + API_KEY},
|
||||||
|
)
|
||||||
|
|
||||||
# Assert that the response status code is 200 (HTTP OK)
|
# Assert that the response status code is 200 (HTTP OK)
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
|
@ -18,8 +18,10 @@ def convert_bytes(bytes, precision=2):
|
|||||||
|
|
||||||
def get_file_size(file: UploadFile):
|
def get_file_size(file: UploadFile):
|
||||||
# move the cursor to the end of the file
|
# move the cursor to the end of the file
|
||||||
file.file._file.seek(0, 2)
|
file.file._file.seek(0, 2) # pyright: ignore reportPrivateUsage=none
|
||||||
file_size = file.file._file.tell() # Getting the size of the file
|
file_size = (
|
||||||
|
file.file._file.tell() # pyright: ignore reportPrivateUsage=none
|
||||||
|
) # Getting the size of the file
|
||||||
# move the cursor back to the beginning of the file
|
# move the cursor back to the beginning of the file
|
||||||
file.file.seek(0)
|
file.file.seek(0)
|
||||||
|
|
||||||
|
@ -54,14 +54,19 @@ async def filter_file(
|
|||||||
|
|
||||||
if file_exists_in_brain:
|
if file_exists_in_brain:
|
||||||
return create_response(
|
return create_response(
|
||||||
f"🤔 {file.file.filename} already exists in brain {brain_id}.", "warning"
|
f"🤔 {file.file.filename} already exists in brain {brain_id}.", # pyright: ignore reportPrivateUsage=none
|
||||||
|
"warning",
|
||||||
)
|
)
|
||||||
elif file.file_is_empty():
|
elif file.file_is_empty():
|
||||||
return create_response(f"❌ {file.file.filename} is empty.", "error")
|
return create_response(
|
||||||
|
f"❌ {file.file.filename} is empty.", # pyright: ignore reportPrivateUsage=none
|
||||||
|
"error", # pyright: ignore reportPrivateUsage=none
|
||||||
|
)
|
||||||
elif file_exists:
|
elif file_exists:
|
||||||
file.link_file_to_brain(brain=Brain(id=brain_id))
|
file.link_file_to_brain(brain=Brain(id=brain_id))
|
||||||
return create_response(
|
return create_response(
|
||||||
f"✅ {file.file.filename} has been uploaded to brain {brain_id}.", "success"
|
f"✅ {file.file.filename} has been uploaded to brain {brain_id}.", # pyright: ignore reportPrivateUsage=none
|
||||||
|
"success",
|
||||||
)
|
)
|
||||||
|
|
||||||
if file.file_extension in file_processors:
|
if file.file_extension in file_processors:
|
||||||
@ -70,14 +75,18 @@ async def filter_file(
|
|||||||
commons, file, enable_summarization, brain_id, openai_api_key
|
commons, file, enable_summarization, brain_id, openai_api_key
|
||||||
)
|
)
|
||||||
return create_response(
|
return create_response(
|
||||||
f"✅ {file.file.filename} has been uploaded to brain {brain_id}.",
|
f"✅ {file.file.filename} has been uploaded to brain {brain_id}.", # pyright: ignore reportPrivateUsage=none
|
||||||
"success",
|
"success",
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Add more specific exceptions as needed.
|
# Add more specific exceptions as needed.
|
||||||
print(f"Error processing file: {e}")
|
print(f"Error processing file: {e}")
|
||||||
return create_response(
|
return create_response(
|
||||||
f"⚠️ An error occurred while processing {file.file.filename}.", "error"
|
f"⚠️ An error occurred while processing {file.file.filename}.", # pyright: ignore reportPrivateUsage=none
|
||||||
|
"error",
|
||||||
)
|
)
|
||||||
|
|
||||||
return create_response(f"❌ {file.file.filename} is not supported.", "error")
|
return create_response(
|
||||||
|
f"❌ {file.file.filename} is not supported.", # pyright: ignore reportPrivateUsage=none
|
||||||
|
"error",
|
||||||
|
)
|
||||||
|
@ -13,7 +13,7 @@ logger = get_logger(__name__)
|
|||||||
|
|
||||||
class Neurons(BaseModel):
|
class Neurons(BaseModel):
|
||||||
commons: CommonsDep
|
commons: CommonsDep
|
||||||
settings = BrainSettings()
|
settings = BrainSettings() # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
def create_vector(self, doc, user_openai_api_key=None):
|
def create_vector(self, doc, user_openai_api_key=None):
|
||||||
logger.info("Creating vector for document")
|
logger.info("Creating vector for document")
|
||||||
@ -21,7 +21,7 @@ class Neurons(BaseModel):
|
|||||||
if user_openai_api_key:
|
if user_openai_api_key:
|
||||||
self.commons["documents_vector_store"]._embedding = OpenAIEmbeddings(
|
self.commons["documents_vector_store"]._embedding = OpenAIEmbeddings(
|
||||||
openai_api_key=user_openai_api_key
|
openai_api_key=user_openai_api_key
|
||||||
)
|
) # pyright: ignore reportPrivateUsage=none
|
||||||
try:
|
try:
|
||||||
sids = self.commons["documents_vector_store"].add_documents([doc])
|
sids = self.commons["documents_vector_store"].add_documents([doc])
|
||||||
if sids and len(sids) > 0:
|
if sids and len(sids) > 0:
|
||||||
@ -64,7 +64,7 @@ def create_summary(commons: CommonsDep, document_id, content, metadata):
|
|||||||
|
|
||||||
|
|
||||||
def error_callback(exception):
|
def error_callback(exception):
|
||||||
print('An exception occurred:', exception)
|
print("An exception occurred:", exception)
|
||||||
|
|
||||||
|
|
||||||
def process_batch(batch_ids):
|
def process_batch(batch_ids):
|
||||||
@ -106,14 +106,14 @@ def get_unique_files_from_vector_ids(vectors_ids: List[int]):
|
|||||||
with ThreadPoolExecutor() as executor:
|
with ThreadPoolExecutor() as executor:
|
||||||
futures = []
|
futures = []
|
||||||
for i in range(0, len(vectors_ids), BATCH_SIZE):
|
for i in range(0, len(vectors_ids), BATCH_SIZE):
|
||||||
batch_ids = vectors_ids[i:i + BATCH_SIZE]
|
batch_ids = vectors_ids[i : i + BATCH_SIZE]
|
||||||
future = executor.submit(process_batch, batch_ids)
|
future = executor.submit(process_batch, batch_ids)
|
||||||
futures.append(future)
|
futures.append(future)
|
||||||
|
|
||||||
# Retrieve the results
|
# Retrieve the results
|
||||||
vectors_responses = [future.result() for future in futures]
|
vectors_responses = [future.result() for future in futures]
|
||||||
|
|
||||||
documents = [item for sublist in vectors_responses for item in sublist]
|
documents = [item for sublist in vectors_responses for item in sublist]
|
||||||
print('document', documents)
|
print("document", documents)
|
||||||
unique_files = [dict(t) for t in set(tuple(d.items()) for d in documents)]
|
unique_files = [dict(t) for t in set(tuple(d.items()) for d in documents)]
|
||||||
return unique_files
|
return unique_files
|
||||||
|
@ -3,8 +3,7 @@ from typing import Any, List
|
|||||||
from langchain.docstore.document import Document
|
from langchain.docstore.document import Document
|
||||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||||
from langchain.vectorstores import SupabaseVectorStore
|
from langchain.vectorstores import SupabaseVectorStore
|
||||||
|
from supabase.client import Client
|
||||||
from supabase import Client
|
|
||||||
|
|
||||||
|
|
||||||
class CustomSupabaseVectorStore(SupabaseVectorStore):
|
class CustomSupabaseVectorStore(SupabaseVectorStore):
|
||||||
|
Loading…
Reference in New Issue
Block a user