mirror of
https://github.com/QuivrHQ/quivr.git
synced 2024-12-15 17:43:03 +03:00
380cf82706
# Description # Testing backend ## Docker setup 1. Copy `.env.example` to `.env`. Some env variables were added : EMBEDDING_DIM 2. Apply supabase migratrions : ```sh supabase stop supabase db reset supabase start ``` 3. Start backend containers ``` make dev ``` ## Local setup You can also run backend without docker. 1. Install [`rye`](https://rye.astral.sh/guide/installation/). Choose the managed python version and set the version to 3.11 2. Run the following: ``` cd quivr/backend rye sync ``` 3. Source `.venv` virtual env : `source .venv/bin/activate` 4. Run the backend, make sure you are running redis and supabase API: ``` LOG_LEVEL=debug uvicorn quivr_api.main:app --log-level debug --reload --host 0.0.0.0 --port 5050 --workers 1 ``` Worker: ``` LOG_LEVEL=debug celery -A quivr_worker.celery_worker worker -l info -E --concurrency 1 ``` Notifier: ``` LOG_LEVEL=debug python worker/quivr_worker/celery_monitor.py ``` --------- Co-authored-by: chloedia <chloedaems0@gmail.com> Co-authored-by: aminediro <aminedirhoussi1@gmail.com> Co-authored-by: Antoine Dewez <44063631+Zewed@users.noreply.github.com> Co-authored-by: Chloé Daems <73901882+chloedia@users.noreply.github.com> Co-authored-by: Zewed <dewez.antoine2@gmail.com>
107 lines
3.0 KiB
Python
107 lines
3.0 KiB
Python
import hashlib
|
|
import time
|
|
from contextlib import contextmanager
|
|
from pathlib import Path
|
|
from tempfile import NamedTemporaryFile
|
|
from typing import Any
|
|
from uuid import UUID
|
|
|
|
from quivr_api.logger import get_logger
|
|
from quivr_core.files.file import FileExtension, QuivrFile
|
|
|
|
from quivr_worker.utils import get_tmp_name
|
|
|
|
logger = get_logger("celery_worker")
|
|
|
|
|
|
def compute_sha1(content: bytes) -> str:
|
|
m = hashlib.sha1()
|
|
m.update(content)
|
|
return m.hexdigest()
|
|
|
|
|
|
@contextmanager
|
|
def build_file(
|
|
file_data: bytes,
|
|
knowledge_id: UUID,
|
|
file_name: str,
|
|
original_file_name: str | None = None,
|
|
):
|
|
try:
|
|
# TODO(@aminediro) : Maybe use fsspec file to be agnostic to where files are stored :?
|
|
# We are reading the whole file to memory, which doesn't scale
|
|
tmp_name, base_file_name, file_extension = get_tmp_name(file_name)
|
|
tmp_file = NamedTemporaryFile(
|
|
suffix="_" + tmp_name, # pyright: ignore reportPrivateUsage=none
|
|
)
|
|
tmp_file.write(file_data)
|
|
tmp_file.flush()
|
|
file_sha1 = compute_sha1(file_data)
|
|
|
|
file_instance = File(
|
|
knowledge_id=knowledge_id,
|
|
file_name=base_file_name,
|
|
original_file_name=(
|
|
original_file_name if original_file_name else base_file_name
|
|
),
|
|
tmp_file_path=Path(tmp_file.name),
|
|
file_size=len(file_data),
|
|
file_extension=file_extension,
|
|
file_sha1=file_sha1,
|
|
)
|
|
yield file_instance
|
|
finally:
|
|
# Code to release resource, e.g.:
|
|
tmp_file.close()
|
|
|
|
|
|
class File:
|
|
__slots__ = [
|
|
"id",
|
|
"file_name",
|
|
"tmp_file_path",
|
|
"file_size",
|
|
"file_extension",
|
|
"file_sha1",
|
|
"original_file_name",
|
|
]
|
|
|
|
def __init__(
|
|
self,
|
|
knowledge_id: UUID,
|
|
file_name: str,
|
|
tmp_file_path: Path,
|
|
file_size: int,
|
|
file_extension: str,
|
|
file_sha1: str,
|
|
original_file_name: str,
|
|
):
|
|
self.id = knowledge_id
|
|
self.file_name = file_name
|
|
self.tmp_file_path = tmp_file_path
|
|
self.file_size = file_size
|
|
self.file_sha1 = file_sha1
|
|
self.file_extension = FileExtension(file_extension)
|
|
self.original_file_name = original_file_name
|
|
|
|
def is_empty(self):
|
|
return self.file_size < 1 # pyright: ignore reportPrivateUsage=none
|
|
|
|
def to_qfile(self, brain_id: UUID, metadata: dict[str, Any] = {}) -> QuivrFile:
|
|
return QuivrFile(
|
|
id=self.id,
|
|
original_filename=self.file_name,
|
|
path=self.tmp_file_path,
|
|
brain_id=brain_id,
|
|
file_sha1=self.file_sha1,
|
|
file_extension=self.file_extension,
|
|
file_size=self.file_size,
|
|
metadata={
|
|
"date": time.strftime("%Y%m%d"),
|
|
"file_name": self.file_name,
|
|
"original_file_name": self.original_file_name,
|
|
"knowledge_id": self.id,
|
|
**metadata,
|
|
},
|
|
)
|