diff --git a/backend/parsers/audio.py b/backend/parsers/audio.py index fbfcc3ef8..08ebd94b3 100644 --- a/backend/parsers/audio.py +++ b/backend/parsers/audio.py @@ -32,7 +32,7 @@ from utils.vectors import documents_vector_store # return transcript # async def process_audio(upload_file: UploadFile, stats_db): -async def process_audio(upload_file: UploadFile, enable_summarization: bool, user): +async def process_audio(upload_file: UploadFile, enable_summarization: bool, user, user_openai_api_key): file_sha = "" dateshort = time.strftime("%Y%m%d-%H%M%S") @@ -40,6 +40,8 @@ async def process_audio(upload_file: UploadFile, enable_summarization: bool, use # uploaded file to file object openai_api_key = os.environ.get("OPENAI_API_KEY") + if user_openai_api_key: + openai_api_key = user_openai_api_key # Here, we're writing the uploaded file to a temporary file, so we can use it with your existing code. with tempfile.NamedTemporaryFile(delete=False, suffix=upload_file.filename) as tmp_file: diff --git a/backend/parsers/common.py b/backend/parsers/common.py index 637f5850c..01a77ee56 100644 --- a/backend/parsers/common.py +++ b/backend/parsers/common.py @@ -12,7 +12,7 @@ from utils.file import compute_sha1_from_content, compute_sha1_from_file from utils.vectors import create_summary, create_vector, documents_vector_store -async def process_file(file: UploadFile, loader_class, file_suffix, enable_summarization, user): +async def process_file(file: UploadFile, loader_class, file_suffix, enable_summarization, user, user_openai_api_key): documents = [] file_name = file.filename file_size = file.file._file.tell() # Getting the size of the file @@ -51,7 +51,7 @@ async def process_file(file: UploadFile, loader_class, file_suffix, enable_summa } doc_with_metadata = Document( page_content=doc.page_content, metadata=metadata) - create_vector(user.email, doc_with_metadata) + create_vector(user.email, doc_with_metadata, user_openai_api_key) # add_usage(stats_db, "embedding", "audio", metadata={"file_name": file_meta_name,"file_type": ".txt", "chunk_size": chunk_size, "chunk_overlap": chunk_overlap}) if enable_summarization and ids and len(ids) > 0: diff --git a/backend/parsers/csv.py b/backend/parsers/csv.py index 1589d8f93..1dbf3c4e2 100644 --- a/backend/parsers/csv.py +++ b/backend/parsers/csv.py @@ -4,5 +4,5 @@ from langchain.document_loaders.csv_loader import CSVLoader from .common import process_file -def process_csv(file: UploadFile, enable_summarization, user): - return process_file(file, CSVLoader, ".csv", enable_summarization, user) +def process_csv(file: UploadFile, enable_summarization, user, user_openai_api_key): + return process_file(file, CSVLoader, ".csv", enable_summarization, user, user_openai_api_key) diff --git a/backend/parsers/docx.py b/backend/parsers/docx.py index 63825aa10..e73a2ddcc 100644 --- a/backend/parsers/docx.py +++ b/backend/parsers/docx.py @@ -4,5 +4,5 @@ from langchain.document_loaders import Docx2txtLoader from .common import process_file -def process_docx(file: UploadFile, enable_summarization, user): - return process_file(file, Docx2txtLoader, ".docx", enable_summarization, user) +def process_docx(file: UploadFile, enable_summarization, user, user_openai_api_key): + return process_file(file, Docx2txtLoader, ".docx", enable_summarization, user, user_openai_api_key) diff --git a/backend/parsers/epub.py b/backend/parsers/epub.py index c1d6ff0a2..a596f22de 100644 --- a/backend/parsers/epub.py +++ b/backend/parsers/epub.py @@ -4,5 +4,5 @@ from langchain.document_loaders.epub import UnstructuredEPubLoader from .common import process_file -def process_epub(file: UploadFile, enable_summarization, user): - return process_file(file, UnstructuredEPubLoader, ".epub", enable_summarization, user) +def process_epub(file: UploadFile, enable_summarization, user, user_openai_api_key): + return process_file(file, UnstructuredEPubLoader, ".epub", enable_summarization, user, user_openai_api_key) diff --git a/backend/parsers/github.py b/backend/parsers/github.py index e41881fce..1757f0df8 100644 --- a/backend/parsers/github.py +++ b/backend/parsers/github.py @@ -12,7 +12,7 @@ from utils.vectors import create_summary, create_vector, documents_vector_store from .common import process_file -async def process_github(repo, enable_summarization, user, supabase): +async def process_github(repo, enable_summarization, user, supabase, user_openai_api_key): random_dir_name = os.urandom(16).hex() dateshort = time.strftime("%Y%m%d") loader = GitLoader( @@ -46,7 +46,7 @@ async def process_github(repo, enable_summarization, user, supabase): page_content=doc.page_content, metadata=metadata) exist = await file_already_exists_from_content(supabase, doc.page_content.encode("utf-8"), user) if not exist: - create_vector(user.email, doc_with_metadata) + create_vector(user.email, doc_with_metadata, user_openai_api_key) print("Created vector for ", doc.metadata["file_name"]) return {"message": f"✅ Github with {len(documents)} files has been uploaded.", "type": "success"} diff --git a/backend/parsers/html.py b/backend/parsers/html.py index d8193339b..19d1c5543 100644 --- a/backend/parsers/html.py +++ b/backend/parsers/html.py @@ -10,8 +10,8 @@ from langchain.document_loaders import UnstructuredHTMLLoader from .common import process_file -def process_html(file: UploadFile, enable_summarization, user): - return process_file(file, UnstructuredHTMLLoader, ".html", enable_summarization, user) +def process_html(file: UploadFile, enable_summarization, user, user_openai_api_key): + return process_file(file, UnstructuredHTMLLoader, ".html", enable_summarization, user, user_openai_api_key) def get_html(url): diff --git a/backend/parsers/markdown.py b/backend/parsers/markdown.py index 04ac567fb..feb9077ba 100644 --- a/backend/parsers/markdown.py +++ b/backend/parsers/markdown.py @@ -4,5 +4,5 @@ from langchain.document_loaders import UnstructuredMarkdownLoader from .common import process_file -def process_markdown(file: UploadFile, enable_summarization, user): - return process_file(file, UnstructuredMarkdownLoader, ".md", enable_summarization, user) +def process_markdown(file: UploadFile, enable_summarization, user, user_openai_api_key): + return process_file(file, UnstructuredMarkdownLoader, ".md", enable_summarization, user, user_openai_api_key) diff --git a/backend/parsers/notebook.py b/backend/parsers/notebook.py index e95a378bb..3cb918353 100644 --- a/backend/parsers/notebook.py +++ b/backend/parsers/notebook.py @@ -4,5 +4,5 @@ from langchain.document_loaders import NotebookLoader from .common import process_file -def process_ipnyb(file: UploadFile, enable_summarization, user): - return process_file(file, NotebookLoader, "ipynb", enable_summarization, user) +def process_ipnyb(file: UploadFile, enable_summarization, user, user_openai_api_key): + return process_file(file, NotebookLoader, "ipynb", enable_summarization, user, user_openai_api_key) diff --git a/backend/parsers/odt.py b/backend/parsers/odt.py index 90bda1a70..4e36d3eea 100644 --- a/backend/parsers/odt.py +++ b/backend/parsers/odt.py @@ -4,5 +4,5 @@ from langchain.document_loaders import UnstructuredODTLoader from .common import process_file -def process_odt(file: UploadFile, enable_summarization, user): - return process_file(file, UnstructuredODTLoader, ".odt", enable_summarization, user) +def process_odt(file: UploadFile, enable_summarization, user, user_openai_api_key): + return process_file(file, UnstructuredODTLoader, ".odt", enable_summarization, user, user_openai_api_key) diff --git a/backend/parsers/pdf.py b/backend/parsers/pdf.py index c1614142e..ea8d204b4 100644 --- a/backend/parsers/pdf.py +++ b/backend/parsers/pdf.py @@ -4,5 +4,5 @@ from langchain.document_loaders import PyMuPDFLoader from .common import process_file -def process_pdf(file: UploadFile, enable_summarization, user): - return process_file(file, PyMuPDFLoader, ".pdf", enable_summarization, user) +def process_pdf(file: UploadFile, enable_summarization, user, user_openai_api_key): + return process_file(file, PyMuPDFLoader, ".pdf", enable_summarization, user, user_openai_api_key) diff --git a/backend/parsers/powerpoint.py b/backend/parsers/powerpoint.py index 8775d154a..e1617884c 100644 --- a/backend/parsers/powerpoint.py +++ b/backend/parsers/powerpoint.py @@ -4,5 +4,5 @@ from langchain.document_loaders import UnstructuredPowerPointLoader from .common import process_file -def process_powerpoint(file: UploadFile, enable_summarization, user): - return process_file(file, UnstructuredPowerPointLoader, ".pptx", enable_summarization, user) +def process_powerpoint(file: UploadFile, enable_summarization, user, user_openai_api_key): + return process_file(file, UnstructuredPowerPointLoader, ".pptx", enable_summarization, user, user_openai_api_key) diff --git a/backend/parsers/txt.py b/backend/parsers/txt.py index 2a5567c24..333030085 100644 --- a/backend/parsers/txt.py +++ b/backend/parsers/txt.py @@ -4,5 +4,5 @@ from langchain.document_loaders import TextLoader from .common import process_file -async def process_txt(file: UploadFile, enable_summarization, user): - return await process_file(file, TextLoader, ".txt", enable_summarization, user) +async def process_txt(file: UploadFile, enable_summarization, user, user_openai_api_key): + return await process_file(file, TextLoader, ".txt", enable_summarization, user,user_openai_api_key) diff --git a/backend/routes/upload_routes.py b/backend/routes/upload_routes.py index e06d0d5a9..4a5848f48 100644 --- a/backend/routes/upload_routes.py +++ b/backend/routes/upload_routes.py @@ -38,7 +38,7 @@ async def upload_file(request: Request,commons: CommonsDep, file: UploadFile, e if remaining_free_space - file_size < 0: message = {"message": f"❌ User's brain will exceed maximum capacity with this upload. Maximum file allowed is : {convert_bytes(remaining_free_space)}", "type": "error"} else: - message = await filter_file(file, enable_summarization, commons['supabase'], user) + message = await filter_file(file, enable_summarization, commons['supabase'], user, openai_api_key=request.headers.get('Openai-Api-Key', None)) return message diff --git a/backend/utils/processors.py b/backend/utils/processors.py index 28e9076cf..b054e4048 100644 --- a/backend/utils/processors.py +++ b/backend/utils/processors.py @@ -14,6 +14,7 @@ from parsers.odt import process_odt from parsers.pdf import process_pdf from parsers.powerpoint import process_powerpoint from parsers.txt import process_txt + from supabase import Client file_processors = { @@ -40,7 +41,7 @@ file_processors = { -async def filter_file(file: UploadFile, enable_summarization: bool, supabase_client: Client, user: User): +async def filter_file(file: UploadFile, enable_summarization: bool, supabase_client: Client, user: User, openai_api_key): if await file_already_exists(supabase_client, file, user): return {"message": f"🤔 {file.filename} already exists.", "type": "warning"} elif file.file._file.tell() < 1: @@ -48,7 +49,7 @@ async def filter_file(file: UploadFile, enable_summarization: bool, supabase_cli else: file_extension = os.path.splitext(file.filename)[-1].lower() # Convert file extension to lowercase if file_extension in file_processors: - await file_processors[file_extension](file, enable_summarization, user) + await file_processors[file_extension](file, enable_summarization, user ,openai_api_key ) return {"message": f"✅ {file.filename} has been uploaded.", "type": "success"} else: return {"message": f"❌ {file.filename} is not supported.", "type": "error"} diff --git a/backend/utils/vectors.py b/backend/utils/vectors.py index 2de6d0ca9..732ed1fc3 100644 --- a/backend/utils/vectors.py +++ b/backend/utils/vectors.py @@ -60,10 +60,13 @@ def create_summary(document_id, content, metadata): supabase_client.table("summaries").update( {"document_id": document_id}).match({"id": sids[0]}).execute() -def create_vector(user_id,doc): +def create_vector(user_id,doc, user_openai_api_key=None): logger.info(f"Creating vector for document") logger.info(f"Document: {doc}") + if user_openai_api_key: + documents_vector_store._embedding = embeddings_request = OpenAIEmbeddings(openai_api_key=user_openai_api_key) try: + sids = documents_vector_store.add_documents( [doc]) if sids and len(sids) > 0: