mirror of
https://github.com/QuivrHQ/quivr.git
synced 2024-12-19 04:12:03 +03:00
105 lines
3.1 KiB
Python
105 lines
3.1 KiB
Python
|
from logger import get_logger
|
||
|
from models.settings import get_supabase_client
|
||
|
from modules.brain.repository.interfaces.brains_vectors_interface import (
|
||
|
BrainsVectorsInterface,
|
||
|
)
|
||
|
|
||
|
logger = get_logger(__name__)
|
||
|
|
||
|
|
||
|
class BrainsVectors(BrainsVectorsInterface):
|
||
|
def __init__(self):
|
||
|
supabase_client = get_supabase_client()
|
||
|
self.db = supabase_client
|
||
|
|
||
|
def create_brain_vector(self, brain_id, vector_id, file_sha1):
|
||
|
response = (
|
||
|
self.db.table("brains_vectors")
|
||
|
.insert(
|
||
|
{
|
||
|
"brain_id": str(brain_id),
|
||
|
"vector_id": str(vector_id),
|
||
|
"file_sha1": file_sha1,
|
||
|
}
|
||
|
)
|
||
|
.execute()
|
||
|
)
|
||
|
return response.data
|
||
|
|
||
|
def get_vector_ids_from_file_sha1(self, file_sha1: str):
|
||
|
# move to vectors class
|
||
|
vectorsResponse = (
|
||
|
self.db.table("vectors")
|
||
|
.select("id")
|
||
|
.filter("file_sha1", "eq", file_sha1)
|
||
|
.execute()
|
||
|
)
|
||
|
return vectorsResponse.data
|
||
|
|
||
|
def get_brain_vector_ids(self, brain_id):
|
||
|
"""
|
||
|
Retrieve unique brain data (i.e. uploaded files and crawled websites).
|
||
|
"""
|
||
|
|
||
|
response = (
|
||
|
self.db.from_("brains_vectors")
|
||
|
.select("vector_id")
|
||
|
.filter("brain_id", "eq", brain_id)
|
||
|
.execute()
|
||
|
)
|
||
|
|
||
|
vector_ids = [item["vector_id"] for item in response.data]
|
||
|
|
||
|
if len(vector_ids) == 0:
|
||
|
return []
|
||
|
|
||
|
return vector_ids
|
||
|
|
||
|
def delete_file_from_brain(self, brain_id, file_name: str):
|
||
|
# First, get the vector_ids associated with the file_name
|
||
|
file_vectors = (
|
||
|
self.db.table("vectors")
|
||
|
.select("id")
|
||
|
.filter("metadata->>file_name", "eq", file_name)
|
||
|
.execute()
|
||
|
)
|
||
|
|
||
|
file_vectors_ids = [item["id"] for item in file_vectors.data]
|
||
|
|
||
|
# remove current file vectors from brain vectors
|
||
|
self.db.table("brains_vectors").delete().filter(
|
||
|
"vector_id", "in", f"({','.join(map(str, file_vectors_ids))})"
|
||
|
).filter("brain_id", "eq", brain_id).execute()
|
||
|
|
||
|
vectors_used_by_another_brain = (
|
||
|
self.db.table("brains_vectors")
|
||
|
.select("vector_id")
|
||
|
.filter("vector_id", "in", f"({','.join(map(str, file_vectors_ids))})")
|
||
|
.filter("brain_id", "neq", brain_id)
|
||
|
.execute()
|
||
|
)
|
||
|
|
||
|
vectors_used_by_another_brain_ids = [
|
||
|
item["vector_id"] for item in vectors_used_by_another_brain.data
|
||
|
]
|
||
|
|
||
|
vectors_no_longer_used_ids = [
|
||
|
id for id in file_vectors_ids if id not in vectors_used_by_another_brain_ids
|
||
|
]
|
||
|
|
||
|
self.db.table("vectors").delete().filter(
|
||
|
"id", "in", f"({','.join(map(str, vectors_no_longer_used_ids))})"
|
||
|
).execute()
|
||
|
|
||
|
return {"message": f"File {file_name} in brain {brain_id} has been deleted."}
|
||
|
|
||
|
def delete_brain_vector(self, brain_id: str):
|
||
|
results = (
|
||
|
self.db.table("brains_vectors")
|
||
|
.delete()
|
||
|
.match({"brain_id": brain_id})
|
||
|
.execute()
|
||
|
)
|
||
|
|
||
|
return results
|