mirror of
https://github.com/StanGirard/quivr.git
synced 2024-11-30 11:46:32 +03:00
feat(upload): changed to task (#1178)
This commit is contained in:
parent
223d3d9102
commit
980a704002
19
backend/celery_task.py
Normal file
19
backend/celery_task.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
from celery import shared_task
|
||||||
|
from models.brains import Brain
|
||||||
|
from repository.files.upload_file import DocumentSerializable
|
||||||
|
from utils.vectors import Neurons
|
||||||
|
|
||||||
|
|
||||||
|
@shared_task
|
||||||
|
def create_embedding_for_document(
|
||||||
|
brain_id, doc_with_metadata, user_openai_api_key, file_sha1
|
||||||
|
):
|
||||||
|
neurons = Neurons()
|
||||||
|
doc = DocumentSerializable.from_json(doc_with_metadata)
|
||||||
|
created_vector = neurons.create_vector(doc, user_openai_api_key)
|
||||||
|
# add_usage(stats_db, "embedding", "audio", metadata={"file_name": file_meta_name,"file_type": ".txt", "chunk_size": chunk_size, "chunk_overlap": chunk_overlap})
|
||||||
|
|
||||||
|
created_vector_id = created_vector[0] # pyright: ignore reportPrivateUsage=none
|
||||||
|
|
||||||
|
brain = Brain(id=brain_id)
|
||||||
|
brain.create_brain_vector(created_vector_id, file_sha1)
|
@ -1,8 +1,8 @@
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from langchain.schema import Document
|
from celery_task import create_embedding_for_document
|
||||||
from models import Brain, File
|
from models import File
|
||||||
from utils.vectors import Neurons
|
from repository.files.upload_file import DocumentSerializable
|
||||||
|
|
||||||
|
|
||||||
async def process_file(
|
async def process_file(
|
||||||
@ -26,15 +26,12 @@ async def process_file(
|
|||||||
"date": dateshort,
|
"date": dateshort,
|
||||||
"summarization": "true" if enable_summarization else "false",
|
"summarization": "true" if enable_summarization else "false",
|
||||||
}
|
}
|
||||||
doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata)
|
doc_with_metadata = DocumentSerializable(
|
||||||
|
page_content=doc.page_content, metadata=metadata
|
||||||
|
)
|
||||||
|
|
||||||
neurons = Neurons()
|
create_embedding_for_document.delay(
|
||||||
created_vector = neurons.create_vector(doc_with_metadata, user_openai_api_key)
|
brain_id, doc_with_metadata.to_json(), user_openai_api_key, file.file_sha1
|
||||||
# add_usage(stats_db, "embedding", "audio", metadata={"file_name": file_meta_name,"file_type": ".txt", "chunk_size": chunk_size, "chunk_overlap": chunk_overlap})
|
)
|
||||||
|
|
||||||
created_vector_id = created_vector[0] # pyright: ignore reportPrivateUsage=none
|
|
||||||
|
|
||||||
brain = Brain(id=brain_id)
|
|
||||||
brain.create_brain_vector(created_vector_id, file.file_sha1)
|
|
||||||
|
|
||||||
return
|
return
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
|
import json
|
||||||
from multiprocessing import get_logger
|
from multiprocessing import get_logger
|
||||||
|
|
||||||
from httpx import Response
|
from httpx import Response
|
||||||
|
from langchain.pydantic_v1 import Field
|
||||||
|
from langchain.schema import Document
|
||||||
from models import get_supabase_client
|
from models import get_supabase_client
|
||||||
from supabase.client import Client
|
from supabase.client import Client
|
||||||
|
|
||||||
@ -19,3 +22,35 @@ def upload_file_storage(file, file_identifier: str) -> Response:
|
|||||||
logger.error(e)
|
logger.error(e)
|
||||||
print(e)
|
print(e)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentSerializable(Document):
|
||||||
|
"""Class for storing a piece of text and associated metadata."""
|
||||||
|
|
||||||
|
page_content: str
|
||||||
|
metadata: dict = Field(default_factory=dict)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def lc_serializable(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"Document(page_content='{self.page_content[:50]}...', metadata={self.metadata})"
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.__repr__()
|
||||||
|
|
||||||
|
def to_json(self) -> str:
|
||||||
|
"""Convert the Document object to a JSON string."""
|
||||||
|
return json.dumps(
|
||||||
|
{
|
||||||
|
"page_content": self.page_content,
|
||||||
|
"metadata": self.metadata,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_json(cls, json_str: str):
|
||||||
|
"""Create a Document object from a JSON string."""
|
||||||
|
data = json.loads(json_str)
|
||||||
|
return cls(page_content=data["page_content"], metadata=data["metadata"])
|
||||||
|
Loading…
Reference in New Issue
Block a user