feat(embeddings): now using users api key

This commit is contained in:
Stan Girard 2023-06-10 23:53:51 +02:00
parent 7b653e29a5
commit b24b86fa6e
16 changed files with 35 additions and 29 deletions

View File

@ -32,7 +32,7 @@ from utils.vectors import documents_vector_store
# return transcript
# async def process_audio(upload_file: UploadFile, stats_db):
async def process_audio(upload_file: UploadFile, enable_summarization: bool, user):
async def process_audio(upload_file: UploadFile, enable_summarization: bool, user, user_openai_api_key):
file_sha = ""
dateshort = time.strftime("%Y%m%d-%H%M%S")
@ -40,6 +40,8 @@ async def process_audio(upload_file: UploadFile, enable_summarization: bool, use
# uploaded file to file object
openai_api_key = os.environ.get("OPENAI_API_KEY")
if user_openai_api_key:
openai_api_key = user_openai_api_key
# Here, we're writing the uploaded file to a temporary file, so we can use it with your existing code.
with tempfile.NamedTemporaryFile(delete=False, suffix=upload_file.filename) as tmp_file:

View File

@ -12,7 +12,7 @@ from utils.file import compute_sha1_from_content, compute_sha1_from_file
from utils.vectors import create_summary, create_vector, documents_vector_store
async def process_file(file: UploadFile, loader_class, file_suffix, enable_summarization, user):
async def process_file(file: UploadFile, loader_class, file_suffix, enable_summarization, user, user_openai_api_key):
documents = []
file_name = file.filename
file_size = file.file._file.tell() # Getting the size of the file
@ -51,7 +51,7 @@ async def process_file(file: UploadFile, loader_class, file_suffix, enable_summa
}
doc_with_metadata = Document(
page_content=doc.page_content, metadata=metadata)
create_vector(user.email, doc_with_metadata)
create_vector(user.email, doc_with_metadata, user_openai_api_key)
# add_usage(stats_db, "embedding", "audio", metadata={"file_name": file_meta_name,"file_type": ".txt", "chunk_size": chunk_size, "chunk_overlap": chunk_overlap})
if enable_summarization and ids and len(ids) > 0:

View File

@ -4,5 +4,5 @@ from langchain.document_loaders.csv_loader import CSVLoader
from .common import process_file
def process_csv(file: UploadFile, enable_summarization, user):
return process_file(file, CSVLoader, ".csv", enable_summarization, user)
def process_csv(file: UploadFile, enable_summarization, user, user_openai_api_key):
return process_file(file, CSVLoader, ".csv", enable_summarization, user, user_openai_api_key)

View File

@ -4,5 +4,5 @@ from langchain.document_loaders import Docx2txtLoader
from .common import process_file
def process_docx(file: UploadFile, enable_summarization, user):
return process_file(file, Docx2txtLoader, ".docx", enable_summarization, user)
def process_docx(file: UploadFile, enable_summarization, user, user_openai_api_key):
return process_file(file, Docx2txtLoader, ".docx", enable_summarization, user, user_openai_api_key)

View File

@ -4,5 +4,5 @@ from langchain.document_loaders.epub import UnstructuredEPubLoader
from .common import process_file
def process_epub(file: UploadFile, enable_summarization, user):
return process_file(file, UnstructuredEPubLoader, ".epub", enable_summarization, user)
def process_epub(file: UploadFile, enable_summarization, user, user_openai_api_key):
return process_file(file, UnstructuredEPubLoader, ".epub", enable_summarization, user, user_openai_api_key)

View File

@ -12,7 +12,7 @@ from utils.vectors import create_summary, create_vector, documents_vector_store
from .common import process_file
async def process_github(repo, enable_summarization, user, supabase):
async def process_github(repo, enable_summarization, user, supabase, user_openai_api_key):
random_dir_name = os.urandom(16).hex()
dateshort = time.strftime("%Y%m%d")
loader = GitLoader(
@ -46,7 +46,7 @@ async def process_github(repo, enable_summarization, user, supabase):
page_content=doc.page_content, metadata=metadata)
exist = await file_already_exists_from_content(supabase, doc.page_content.encode("utf-8"), user)
if not exist:
create_vector(user.email, doc_with_metadata)
create_vector(user.email, doc_with_metadata, user_openai_api_key)
print("Created vector for ", doc.metadata["file_name"])
return {"message": f"✅ Github with {len(documents)} files has been uploaded.", "type": "success"}

View File

@ -10,8 +10,8 @@ from langchain.document_loaders import UnstructuredHTMLLoader
from .common import process_file
def process_html(file: UploadFile, enable_summarization, user):
return process_file(file, UnstructuredHTMLLoader, ".html", enable_summarization, user)
def process_html(file: UploadFile, enable_summarization, user, user_openai_api_key):
return process_file(file, UnstructuredHTMLLoader, ".html", enable_summarization, user, user_openai_api_key)
def get_html(url):

View File

@ -4,5 +4,5 @@ from langchain.document_loaders import UnstructuredMarkdownLoader
from .common import process_file
def process_markdown(file: UploadFile, enable_summarization, user):
return process_file(file, UnstructuredMarkdownLoader, ".md", enable_summarization, user)
def process_markdown(file: UploadFile, enable_summarization, user, user_openai_api_key):
return process_file(file, UnstructuredMarkdownLoader, ".md", enable_summarization, user, user_openai_api_key)

View File

@ -4,5 +4,5 @@ from langchain.document_loaders import NotebookLoader
from .common import process_file
def process_ipnyb(file: UploadFile, enable_summarization, user):
return process_file(file, NotebookLoader, "ipynb", enable_summarization, user)
def process_ipnyb(file: UploadFile, enable_summarization, user, user_openai_api_key):
return process_file(file, NotebookLoader, "ipynb", enable_summarization, user, user_openai_api_key)

View File

@ -4,5 +4,5 @@ from langchain.document_loaders import UnstructuredODTLoader
from .common import process_file
def process_odt(file: UploadFile, enable_summarization, user):
return process_file(file, UnstructuredODTLoader, ".odt", enable_summarization, user)
def process_odt(file: UploadFile, enable_summarization, user, user_openai_api_key):
return process_file(file, UnstructuredODTLoader, ".odt", enable_summarization, user, user_openai_api_key)

View File

@ -4,5 +4,5 @@ from langchain.document_loaders import PyMuPDFLoader
from .common import process_file
def process_pdf(file: UploadFile, enable_summarization, user):
return process_file(file, PyMuPDFLoader, ".pdf", enable_summarization, user)
def process_pdf(file: UploadFile, enable_summarization, user, user_openai_api_key):
return process_file(file, PyMuPDFLoader, ".pdf", enable_summarization, user, user_openai_api_key)

View File

@ -4,5 +4,5 @@ from langchain.document_loaders import UnstructuredPowerPointLoader
from .common import process_file
def process_powerpoint(file: UploadFile, enable_summarization, user):
return process_file(file, UnstructuredPowerPointLoader, ".pptx", enable_summarization, user)
def process_powerpoint(file: UploadFile, enable_summarization, user, user_openai_api_key):
return process_file(file, UnstructuredPowerPointLoader, ".pptx", enable_summarization, user, user_openai_api_key)

View File

@ -4,5 +4,5 @@ from langchain.document_loaders import TextLoader
from .common import process_file
async def process_txt(file: UploadFile, enable_summarization, user):
return await process_file(file, TextLoader, ".txt", enable_summarization, user)
async def process_txt(file: UploadFile, enable_summarization, user, user_openai_api_key):
return await process_file(file, TextLoader, ".txt", enable_summarization, user,user_openai_api_key)

View File

@ -38,7 +38,7 @@ async def upload_file(request: Request,commons: CommonsDep, file: UploadFile, e
if remaining_free_space - file_size < 0:
message = {"message": f"❌ User's brain will exceed maximum capacity with this upload. Maximum file allowed is : {convert_bytes(remaining_free_space)}", "type": "error"}
else:
message = await filter_file(file, enable_summarization, commons['supabase'], user)
message = await filter_file(file, enable_summarization, commons['supabase'], user, openai_api_key=request.headers.get('Openai-Api-Key', None))
return message

View File

@ -14,6 +14,7 @@ from parsers.odt import process_odt
from parsers.pdf import process_pdf
from parsers.powerpoint import process_powerpoint
from parsers.txt import process_txt
from supabase import Client
file_processors = {
@ -40,7 +41,7 @@ file_processors = {
async def filter_file(file: UploadFile, enable_summarization: bool, supabase_client: Client, user: User):
async def filter_file(file: UploadFile, enable_summarization: bool, supabase_client: Client, user: User, openai_api_key):
if await file_already_exists(supabase_client, file, user):
return {"message": f"🤔 {file.filename} already exists.", "type": "warning"}
elif file.file._file.tell() < 1:
@ -48,7 +49,7 @@ async def filter_file(file: UploadFile, enable_summarization: bool, supabase_cli
else:
file_extension = os.path.splitext(file.filename)[-1].lower() # Convert file extension to lowercase
if file_extension in file_processors:
await file_processors[file_extension](file, enable_summarization, user)
await file_processors[file_extension](file, enable_summarization, user ,openai_api_key )
return {"message": f"{file.filename} has been uploaded.", "type": "success"}
else:
return {"message": f"{file.filename} is not supported.", "type": "error"}

View File

@ -60,10 +60,13 @@ def create_summary(document_id, content, metadata):
supabase_client.table("summaries").update(
{"document_id": document_id}).match({"id": sids[0]}).execute()
def create_vector(user_id,doc):
def create_vector(user_id,doc, user_openai_api_key=None):
logger.info(f"Creating vector for document")
logger.info(f"Document: {doc}")
if user_openai_api_key:
documents_vector_store._embedding = embeddings_request = OpenAIEmbeddings(openai_api_key=user_openai_api_key)
try:
sids = documents_vector_store.add_documents(
[doc])
if sids and len(sids) > 0: