delete common_dependencies

This commit is contained in:
Chloe Mouret 2023-08-01 18:32:35 +02:00
parent ed734a66a9
commit 9fdc1b8c83
22 changed files with 181 additions and 134 deletions

View File

@ -2,7 +2,7 @@ from typing import Any, List, Optional
from uuid import UUID
from logger import get_logger
from models.settings import BrainRateLimiting, common_dependencies, get_supabase_client
from models.settings import BrainRateLimiting, get_supabase_client
from models.users import User
from pydantic import BaseModel
from supabase.client import Client
@ -46,7 +46,7 @@ class Brain(BaseModel):
@classmethod
def create(cls, *args, **kwargs):
commons = common_dependencies()
commons = {"supabase": get_supabase_client()}
return cls(
commons=commons, *args, **kwargs # pyright: ignore reportPrivateUsage=none
) # pyright: ignore reportPrivateUsage=none
@ -320,4 +320,3 @@ def get_default_user_brain_or_create_new(user: User) -> Brain:
brain.create_brain_user(user.id, "Owner", True)
return brain
return brain
return brain

View File

@ -7,7 +7,7 @@ from fastapi import UploadFile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from logger import get_logger
from models.brains import Brain
from models.settings import CommonsDep, get_supabase_client
from models.settings import get_supabase_client
from pydantic import BaseModel
from utils.file import compute_sha1_from_file
@ -26,7 +26,6 @@ class File(BaseModel):
chunk_size: int = 500
chunk_overlap: int = 0
documents: Optional[Any] = None
_commons: Optional[CommonsDep] = None
def __init__(self, **kwargs):
super().__init__(**kwargs)

View File

@ -1,6 +1,5 @@
from typing import Annotated, TypedDict
from typing import TypedDict
from fastapi import Depends
from langchain.embeddings.openai import OpenAIEmbeddings
from pydantic import BaseSettings
from supabase.client import Client, create_client
@ -42,27 +41,20 @@ def get_supabase_client() -> Client:
return supabase_client
def common_dependencies() -> CommonDependencies:
def get_embeddings() -> OpenAIEmbeddings:
settings = BrainSettings() # pyright: ignore reportPrivateUsage=none
embeddings = OpenAIEmbeddings(
openai_api_key=settings.openai_api_key
) # pyright: ignore reportPrivateUsage=none
supabase_client: Client = create_client(
settings.supabase_url, settings.supabase_service_key
)
return embeddings
def get_documents_vector_store() -> SupabaseVectorStore:
supabase_client = get_supabase_client()
embeddings = get_embeddings()
documents_vector_store = SupabaseVectorStore(
supabase_client, embeddings, table_name="vectors"
)
summaries_vector_store = SupabaseVectorStore(
supabase_client, embeddings, table_name="summaries"
)
return {
"supabase": supabase_client,
"embeddings": embeddings,
"documents_vector_store": documents_vector_store,
"summaries_vector_store": summaries_vector_store, # delete
}
CommonsDep = Annotated[dict, Depends(common_dependencies)]
return documents_vector_store

View File

@ -6,12 +6,11 @@ import openai
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from models.files import File
from models.settings import CommonsDep
from models.settings import get_documents_vector_store
from utils.file import compute_sha1_from_content
async def process_audio(
commons: CommonsDep, # pyright: ignore reportPrivateUsage=none
file: File,
enable_summarization: bool,
user,
@ -78,7 +77,8 @@ async def process_audio(
for text in texts
]
commons.documents_vector_store.add_documents( # pyright: ignore reportPrivateUsage=none
documents_vector_store = get_documents_vector_store()
documents_vector_store.add_documents( # pyright: ignore reportPrivateUsage=none
docs_with_metadata
)

View File

@ -3,12 +3,10 @@ import time
from langchain.schema import Document
from models.brains import Brain
from models.files import File
from models.settings import CommonsDep
from utils.vectors import Neurons
async def process_file(
commons: CommonsDep,
file: File,
loader_class,
enable_summarization,
@ -31,7 +29,7 @@ async def process_file(
}
doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata)
neurons = Neurons(commons=commons)
neurons = Neurons()
created_vector = neurons.create_vector(doc_with_metadata, user_openai_api_key)
# add_usage(stats_db, "embedding", "audio", metadata={"file_name": file_meta_name,"file_type": ".txt", "chunk_size": chunk_size, "chunk_overlap": chunk_overlap})

View File

@ -1,22 +1,19 @@
from langchain.document_loaders import CSVLoader
from models.files import File
from models.settings import CommonsDep
from .common import process_file
def process_csv(
commons: CommonsDep,
file: File,
enable_summarization,
brain_id,
user_openai_api_key,
):
return process_file(
commons,
file,
CSVLoader,
enable_summarization,
brain_id,
user_openai_api_key,
file=file,
loader_class=CSVLoader,
enable_summarization=enable_summarization,
brain_id=brain_id,
user_openai_api_key=user_openai_api_key,
)

View File

@ -1,9 +1,19 @@
from langchain.document_loaders import Docx2txtLoader
from models.files import File
from models.settings import CommonsDep
from .common import process_file
def process_docx(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key):
return process_file(commons, file, Docx2txtLoader, enable_summarization, brain_id, user_openai_api_key)
def process_docx(
file: File,
enable_summarization,
brain_id,
user_openai_api_key,
):
return process_file(
file=file,
loader_class=Docx2txtLoader,
enable_summarization=enable_summarization,
brain_id=brain_id,
user_openai_api_key=user_openai_api_key,
)

View File

@ -1,9 +1,19 @@
from langchain.document_loaders.epub import UnstructuredEPubLoader
from models.files import File
from models.settings import CommonsDep
from .common import process_file
def process_epub(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key):
return process_file(commons, file, UnstructuredEPubLoader, enable_summarization, brain_id, user_openai_api_key)
def process_epub(
file: File,
enable_summarization,
brain_id,
user_openai_api_key,
):
return process_file(
file=file,
loader_class=UnstructuredEPubLoader,
enable_summarization=enable_summarization,
brain_id=brain_id,
user_openai_api_key=user_openai_api_key,
)

View File

@ -6,13 +6,11 @@ from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from models.brains import Brain
from models.files import File
from models.settings import CommonsDep
from utils.file import compute_sha1_from_content
from utils.vectors import Neurons
async def process_github(
commons: CommonsDep, # pyright: ignore reportPrivateUsage=none
repo,
enable_summarization,
brain_id,
@ -70,7 +68,7 @@ async def process_github(
if not file_exists:
print(f"Creating entry for file {file.file_sha1} in vectors...")
neurons = Neurons(commons=commons)
neurons = Neurons()
created_vector = neurons.create_vector(
doc_with_metadata, user_openai_api_key
)

View File

@ -1,21 +1,22 @@
import requests
from langchain.document_loaders import UnstructuredHTMLLoader
from models.files import File
from models.settings import CommonsDep
from .common import process_file
def process_html(
commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key
file: File,
enable_summarization,
brain_id,
user_openai_api_key,
):
return process_file(
commons,
file,
UnstructuredHTMLLoader,
enable_summarization,
brain_id,
user_openai_api_key,
file=file,
loader_class=UnstructuredHTMLLoader,
enable_summarization=enable_summarization,
brain_id=brain_id,
user_openai_api_key=user_openai_api_key,
)

View File

@ -1,9 +1,19 @@
from langchain.document_loaders import UnstructuredMarkdownLoader
from models.files import File
from models.settings import CommonsDep
from .common import process_file
def process_markdown(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key):
return process_file(commons, file, UnstructuredMarkdownLoader, enable_summarization, brain_id, user_openai_api_key)
def process_markdown(
file: File,
enable_summarization,
brain_id,
user_openai_api_key,
):
return process_file(
file=file,
loader_class=UnstructuredMarkdownLoader,
enable_summarization=enable_summarization,
brain_id=brain_id,
user_openai_api_key=user_openai_api_key,
)

View File

@ -1,9 +1,19 @@
from langchain.document_loaders import NotebookLoader
from models.files import File
from models.settings import CommonsDep
from .common import process_file
def process_ipnyb(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key):
return process_file(commons, file, NotebookLoader, enable_summarization, brain_id, user_openai_api_key)
def process_ipnyb(
file: File,
enable_summarization,
brain_id,
user_openai_api_key,
):
return process_file(
file=file,
loader_class=NotebookLoader,
enable_summarization=enable_summarization,
brain_id=brain_id,
user_openai_api_key=user_openai_api_key,
)

View File

@ -1,9 +1,19 @@
from langchain.document_loaders import PyMuPDFLoader
from models.files import File
from models.settings import CommonsDep
from .common import process_file
def process_odt(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key):
return process_file(commons, file, PyMuPDFLoader, enable_summarization, brain_id, user_openai_api_key)
def process_odt(
file: File,
enable_summarization,
brain_id,
user_openai_api_key,
):
return process_file(
file=file,
loader_class=PyMuPDFLoader,
enable_summarization=enable_summarization,
brain_id=brain_id,
user_openai_api_key=user_openai_api_key,
)

View File

@ -1,10 +1,19 @@
from langchain.document_loaders import PyMuPDFLoader
from models.files import File
from models.settings import CommonsDep
from .common import process_file
def process_pdf(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key):
return process_file(commons, file, PyMuPDFLoader, enable_summarization, brain_id, user_openai_api_key)
def process_pdf(
file: File,
enable_summarization,
brain_id,
user_openai_api_key,
):
return process_file(
file=file,
loader_class=PyMuPDFLoader,
enable_summarization=enable_summarization,
brain_id=brain_id,
user_openai_api_key=user_openai_api_key,
)

View File

@ -1,9 +1,19 @@
from langchain.document_loaders import UnstructuredPowerPointLoader
from models.files import File
from models.settings import CommonsDep
from .common import process_file
def process_powerpoint(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key):
return process_file(commons, file, UnstructuredPowerPointLoader, enable_summarization, brain_id, user_openai_api_key)
def process_powerpoint(
file: File,
enable_summarization,
brain_id,
user_openai_api_key,
):
return process_file(
file=file,
loader_class=UnstructuredPowerPointLoader,
enable_summarization=enable_summarization,
brain_id=brain_id,
user_openai_api_key=user_openai_api_key,
)

View File

@ -1,9 +1,19 @@
from langchain.document_loaders import TextLoader
from models.files import File
from models.settings import CommonsDep
from .common import process_file
async def process_txt(commons: CommonsDep, file: File, enable_summarization, brain_id, user_openai_api_key):
return await process_file(commons, file, TextLoader, enable_summarization, brain_id,user_openai_api_key)
async def process_txt(
file: File,
enable_summarization,
brain_id,
user_openai_api_key,
):
return await process_file(
file=file,
loader_class=TextLoader,
enable_summarization=enable_summarization,
brain_id=brain_id,
user_openai_api_key=user_openai_api_key,
)

View File

@ -1,8 +1,6 @@
from typing import Optional
from logger import get_logger
from models.brains_subscription_invitations import BrainSubscription
from models.settings import CommonsDep, get_supabase_client
from models.settings import get_supabase_client
from utils.db_commands import (
delete_data_in_table,
insert_data_in_table,
@ -14,7 +12,7 @@ logger = get_logger(__name__)
class SubscriptionInvitationService:
def __init__(self, commons: Optional[CommonsDep] = None):
def __init__(self):
self.supabase_client = get_supabase_client()
def create_subscription_invitation(self, brain_subscription: BrainSubscription):

View File

@ -7,7 +7,7 @@ from asyncpg.exceptions import UniqueViolationError
from auth import AuthBearer, get_current_user
from fastapi import APIRouter, Depends
from logger import get_logger
from models.settings import CommonsDep
from models.settings import get_supabase_client
from models.users import User
from pydantic import BaseModel
@ -33,9 +33,7 @@ api_key_router = APIRouter()
dependencies=[Depends(AuthBearer())],
tags=["API Key"],
)
async def create_api_key(
commons: CommonsDep, current_user: User = Depends(get_current_user)
):
async def create_api_key(current_user: User = Depends(get_current_user)):
"""
Create new API key for the current user.
@ -49,11 +47,12 @@ async def create_api_key(
new_key_id = uuid4()
new_api_key = token_hex(16)
api_key_inserted = False
supabase_client = get_supabase_client()
while not api_key_inserted:
try:
# Attempt to insert new API key into database
commons["supabase"].table("api_keys").insert(
supabase_client.table("api_keys").insert(
[
{
"key_id": str(new_key_id),
@ -83,9 +82,7 @@ async def create_api_key(
@api_key_router.delete(
"/api-key/{key_id}", dependencies=[Depends(AuthBearer())], tags=["API Key"]
)
async def delete_api_key(
key_id: str, commons: CommonsDep, current_user: User = Depends(get_current_user)
):
async def delete_api_key(key_id: str, current_user: User = Depends(get_current_user)):
"""
Delete (deactivate) an API key for the current user.
@ -95,8 +92,8 @@ async def delete_api_key(
as inactive in the database.
"""
commons["supabase"].table("api_keys").update(
supabase_client = get_supabase_client()
supabase_client.table("api_keys").update(
{
"is_active": False,
"deleted_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
@ -112,9 +109,7 @@ async def delete_api_key(
dependencies=[Depends(AuthBearer())],
tags=["API Key"],
)
async def get_api_keys(
commons: CommonsDep, current_user: User = Depends(get_current_user)
):
async def get_api_keys(current_user: User = Depends(get_current_user)):
"""
Get all active API keys for the current user.
@ -124,10 +119,9 @@ async def get_api_keys(
This endpoint retrieves all the active API keys associated with the current user. It returns a list of API key objects
containing the key ID and creation time for each API key.
"""
supabase_client = get_supabase_client()
response = (
commons["supabase"]
.table("api_keys")
supabase_client.table("api_keys")
.select("key_id, creation_time")
.filter("user_id", "eq", current_user.id)
.filter("is_active", "eq", True)

View File

@ -8,7 +8,6 @@ from crawl.crawler import CrawlWebsite
from fastapi import APIRouter, Depends, Query, Request, UploadFile
from models.brains import Brain
from models.files import File
from models.settings import common_dependencies
from models.users import User
from parsers.github import process_github
from utils.file import convert_bytes
@ -32,8 +31,6 @@ async def crawl_endpoint(
# [TODO] check if the user is the owner/editor of the brain
brain = Brain(id=brain_id)
commons = common_dependencies()
if request.headers.get("Openai-Api-Key"):
brain.max_brain_size = os.getenv(
"MAX_BRAIN_SIZE_WITH_KEY", 209715200
@ -66,19 +63,17 @@ async def crawl_endpoint(
file = File(file=uploadFile)
# check remaining free space here !!
message = await filter_file(
commons,
file,
enable_summarization,
brain.id,
file=file,
enable_summarization=enable_summarization,
brain_id=brain.id,
openai_api_key=request.headers.get("Openai-Api-Key", None),
)
return message
else:
# check remaining free space here !!
message = await process_github(
commons,
crawl_website.url,
"false",
brain_id,
repo=crawl_website.url,
enable_summarization="false",
brain_id=brain_id,
user_openai_api_key=request.headers.get("Openai-Api-Key", None),
)

View File

@ -5,16 +5,14 @@ from auth import AuthBearer, get_current_user
from fastapi import APIRouter, Depends, Query, Request, UploadFile
from models.brains import Brain
from models.files import File
from models.settings import common_dependencies
from models.users import User
from repository.user_identity.get_user_identity import get_user_identity
from utils.file import convert_bytes, get_file_size
from utils.processors import filter_file
from routes.authorizations.brain_authorization import (
RoleEnum,
validate_brain_authorization,
)
from utils.file import convert_bytes, get_file_size
from utils.processors import filter_file
upload_router = APIRouter()
@ -44,7 +42,6 @@ async def upload_file(
)
brain = Brain(id=brain_id)
commons = common_dependencies()
if request.headers.get("Openai-Api-Key"):
brain.max_brain_size = int(os.getenv("MAX_BRAIN_SIZE_WITH_KEY", 209715200))
@ -70,9 +67,8 @@ async def upload_file(
openai_api_key = get_user_identity(current_user.id).openai_api_key
message = await filter_file(
commons,
file,
enable_summarization,
file=file,
enable_summarization=enable_summarization,
brain_id=brain_id,
openai_api_key=openai_api_key,
)

View File

@ -1,6 +1,5 @@
from models.brains import Brain
from models.files import File
from models.settings import CommonsDep
from parsers.audio import process_audio
from parsers.csv import process_csv
from parsers.docx import process_docx
@ -40,7 +39,6 @@ def create_response(message, type):
async def filter_file(
commons: CommonsDep,
file: File,
enable_summarization: bool,
brain_id,
@ -72,7 +70,10 @@ async def filter_file(
if file.file_extension in file_processors:
try:
await file_processors[file.file_extension](
commons, file, enable_summarization, brain_id, openai_api_key
file=file,
enable_summarization=enable_summarization,
brain_id=brain_id,
openai_api_key=openai_api_key,
)
return create_response(
f"{file.file.filename} has been uploaded to brain {brain_id}.", # pyright: ignore reportPrivateUsage=none

View File

@ -3,25 +3,27 @@ from typing import List
from langchain.embeddings.openai import OpenAIEmbeddings
from logger import get_logger
from models.settings import BrainSettings, CommonsDep, get_supabase_client
from models.settings import (
get_documents_vector_store,
get_embeddings,
get_supabase_client,
)
from pydantic import BaseModel
logger = get_logger(__name__)
class Neurons(BaseModel):
commons: CommonsDep
settings = BrainSettings() # pyright: ignore reportPrivateUsage=none
def create_vector(self, doc, user_openai_api_key=None):
documents_vector_store = get_documents_vector_store()
logger.info("Creating vector for document")
logger.info(f"Document: {doc}")
if user_openai_api_key:
self.commons["documents_vector_store"]._embedding = OpenAIEmbeddings(
documents_vector_store._embedding = OpenAIEmbeddings(
openai_api_key=user_openai_api_key
) # pyright: ignore reportPrivateUsage=none
try:
sids = self.commons["documents_vector_store"].add_documents([doc])
sids = documents_vector_store.add_documents([doc])
if sids and len(sids) > 0:
return sids
@ -29,22 +31,20 @@ class Neurons(BaseModel):
logger.error(f"Error creating vector for document {e}")
def create_embedding(self, content):
return self.commons["embeddings"].embed_query(content)
embeddings = get_embeddings()
return embeddings.embed_query(content)
def similarity_search(self, query, table="match_summaries", top_k=5, threshold=0.5):
query_embedding = self.create_embedding(query)
summaries = (
self.commons["supabase"]
.rpc(
table,
{
"query_embedding": query_embedding,
"match_count": top_k,
"match_threshold": threshold,
},
)
.execute()
)
supabase_client = get_supabase_client()
summaries = supabase_client.rpc(
table,
{
"query_embedding": query_embedding,
"match_count": top_k,
"match_threshold": threshold,
},
).execute()
return summaries.data