quivr/backend/modules/brain/repository/brains_vectors.py
Stan Girard aeaa16dc5f
feat(notion): added custom integration (#2268)
This pull request adds a custom integration feature and sync
functionality to the application. It includes the following changes:

- Added a new integration entity for custom integrations.

- Implemented the ability to load and poll the custom integration.

- Added a task to sync the custom integration with the user's brain.

- Updated the celery beat schedule to include the new task.

Please review and merge this pull request.
2024-02-27 21:30:25 -08:00

106 lines
3.2 KiB
Python

from logger import get_logger
from models.settings import get_supabase_client
from modules.brain.repository.interfaces.brains_vectors_interface import (
BrainsVectorsInterface,
)
logger = get_logger(__name__)
class BrainsVectors(BrainsVectorsInterface):
def __init__(self):
supabase_client = get_supabase_client()
self.db = supabase_client
def create_brain_vector(self, brain_id, vector_id, file_sha1):
response = (
self.db.table("brains_vectors")
.insert(
{
"brain_id": str(brain_id),
"vector_id": str(vector_id),
"file_sha1": file_sha1,
}
)
.execute()
)
return response.data
def get_vector_ids_from_file_sha1(self, file_sha1: str):
# move to vectors class
vectorsResponse = (
self.db.table("vectors")
.select("id")
.filter("file_sha1", "eq", file_sha1)
.execute()
)
return vectorsResponse.data
def get_brain_vector_ids(self, brain_id):
"""
Retrieve unique brain data (i.e. uploaded files and crawled websites).
"""
response = (
self.db.from_("brains_vectors")
.select("vector_id")
.filter("brain_id", "eq", brain_id)
.execute()
)
vector_ids = [item["vector_id"] for item in response.data]
if len(vector_ids) == 0:
return []
return vector_ids
def delete_file_from_brain(self, brain_id, file_name: str):
# First, get the vector_ids associated with the file_name
# TODO: filter by brain_id
file_vectors = (
self.db.table("vectors")
.select("id")
.filter("metadata->>file_name", "eq", file_name)
.execute()
)
file_vectors_ids = [item["id"] for item in file_vectors.data]
# remove current file vectors from brain vectors
self.db.table("brains_vectors").delete().filter(
"vector_id", "in", f"({','.join(map(str, file_vectors_ids))})"
).filter("brain_id", "eq", brain_id).execute()
vectors_used_by_another_brain = (
self.db.table("brains_vectors")
.select("vector_id")
.filter("vector_id", "in", f"({','.join(map(str, file_vectors_ids))})")
.filter("brain_id", "neq", brain_id)
.execute()
)
vectors_used_by_another_brain_ids = [
item["vector_id"] for item in vectors_used_by_another_brain.data
]
vectors_no_longer_used_ids = [
id for id in file_vectors_ids if id not in vectors_used_by_another_brain_ids
]
self.db.table("vectors").delete().filter(
"id", "in", f"({','.join(map(str, vectors_no_longer_used_ids))})"
).execute()
return {"message": f"File {file_name} in brain {brain_id} has been deleted."}
def delete_brain_vector(self, brain_id: str):
results = (
self.db.table("brains_vectors")
.delete()
.match({"brain_id": brain_id})
.execute()
)
return results