mirror of
https://github.com/QuivrHQ/quivr.git
synced 2024-12-17 11:21:35 +03:00
f48dab4a7d
# Description Please include a summary of the changes and the related issue. Please also include relevant motivation and context. ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate):
63 lines
2.2 KiB
Python
63 lines
2.2 KiB
Python
from typing import Any, List
|
|
from uuid import UUID
|
|
|
|
from logger import get_logger
|
|
from modules.brain.repository.brains_vectors import BrainsVectors
|
|
from modules.brain.repository.interfaces.brains_vectors_interface import (
|
|
BrainsVectorsInterface,
|
|
)
|
|
from modules.knowledge.repository.storage import Storage
|
|
from packages.embeddings.vectors import get_unique_files_from_vector_ids
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
class BrainVectorService:
|
|
repository: BrainsVectorsInterface
|
|
id: UUID
|
|
files: List[Any] = []
|
|
|
|
def __init__(self, brain_id: UUID):
|
|
self.repository = BrainsVectors()
|
|
self.id = brain_id
|
|
|
|
def create_brain_vector(self, vector_id, file_sha1):
|
|
return self.repository.create_brain_vector(self.id, vector_id, file_sha1) # type: ignore
|
|
|
|
def update_brain_with_file(self, file_sha1: str):
|
|
# not used
|
|
vector_ids = self.repository.get_vector_ids_from_file_sha1(file_sha1)
|
|
if vector_ids == None or len(vector_ids) == 0:
|
|
logger.info(f"No vector ids found for file {file_sha1}")
|
|
return
|
|
|
|
for vector_id in vector_ids:
|
|
self.create_brain_vector(vector_id, file_sha1)
|
|
|
|
def get_unique_brain_files(self):
|
|
"""
|
|
Retrieve unique brain data (i.e. uploaded files and crawled websites).
|
|
"""
|
|
|
|
vector_ids = self.repository.get_brain_vector_ids(self.id) # type: ignore
|
|
self.files = get_unique_files_from_vector_ids(vector_ids)
|
|
|
|
return self.files
|
|
|
|
def delete_file_from_brain(self, file_name: str):
|
|
file_name_with_brain_id = f"{self.id}/{file_name}"
|
|
storage = Storage()
|
|
storage.remove_file(file_name_with_brain_id)
|
|
return self.repository.delete_file_from_brain(self.id, file_name) # type: ignore
|
|
|
|
def delete_file_url_from_brain(self, file_name: str):
|
|
return self.repository.delete_file_from_brain(self.id, file_name) # type: ignore
|
|
|
|
@property
|
|
def brain_size(self):
|
|
# TODO: change the calculation of the brain size, calculate the size stored for the embeddings + what's in the storage
|
|
self.get_unique_brain_files()
|
|
current_brain_size = sum(float(doc["size"]) for doc in self.files)
|
|
|
|
return current_brain_size
|