feat: improve delete knowledge performance (#1733)

Issue: https://github.com/StanGirard/quivr/issues/1724
This commit is contained in:
Mamadou DICKO 2023-11-27 16:47:13 +01:00 committed by GitHub
parent f1ddaca7e2
commit 10e94e5a91
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 31 additions and 26 deletions

View File

@ -278,36 +278,39 @@ class Brain(Repository):
def delete_file_from_brain(self, brain_id, file_name: str):
# First, get the vector_ids associated with the file_name
vector_response = (
file_vectors = (
self.db.table("vectors")
.select("id")
.filter("metadata->>file_name", "eq", file_name)
.execute()
)
vector_ids = [item["id"] for item in vector_response.data]
# For each vector_id, delete the corresponding entry from the 'brains_vectors' table
for vector_id in vector_ids:
self.db.table("brains_vectors").delete().filter(
"vector_id", "eq", vector_id
).filter("brain_id", "eq", brain_id).execute()
file_vectors_ids = [item["id"] for item in file_vectors.data]
# Check if the vector is still associated with any other brains
associated_brains_response = (
self.db.table("brains_vectors")
.select("brain_id")
.filter("vector_id", "eq", vector_id)
.execute()
)
associated_brains = [
item["brain_id"] for item in associated_brains_response.data
]
# remove current file vectors from brain vectors
self.db.table("brains_vectors").delete().filter(
"vector_id", "in", file_vectors_ids
).filter("brain_id", "eq", brain_id).execute()
# If the vector is not associated with any other brains, delete it from 'vectors' table
if not associated_brains:
self.db.table("vectors").delete().filter(
"id", "eq", vector_id
).execute()
vectors_used_by_another_brain = (
self.db.table("brains_vectors")
.select("vector_id")
.filter("vector_id", "in", file_vectors_ids)
.filter("brain_id", "neq", brain_id)
.execute()
)
vectors_used_by_another_brain_ids = [
item["vector_id"] for item in vectors_used_by_another_brain.data
]
vectors_no_longer_used_ids = [
id for id in file_vectors_ids if id not in vectors_used_by_another_brain_ids
]
self.db.table("vectors").delete().filter(
"id", "in", vectors_no_longer_used_ids
).execute()
return {"message": f"File {file_name} in brain {brain_id} has been deleted."}

View File

@ -1,6 +1,6 @@
from uuid import UUID
from fastapi import APIRouter, Depends, Query
from fastapi import APIRouter, Depends, HTTPException, Query
from logger import get_logger
from middlewares.auth import AuthBearer, get_current_user
from models import Brain
@ -10,6 +10,7 @@ from repository.files.generate_file_signed_url import generate_file_signed_url
from repository.knowledge.get_all_knowledge import get_all_knowledge
from repository.knowledge.get_knowledge import get_knowledge
from repository.knowledge.remove_knowledge import remove_knowledge
from routes.authorizations.brain_authorization import (
RoleEnum,
has_brain_authorization,
@ -56,8 +57,6 @@ async def delete_endpoint(
Delete a specific knowledge from a brain.
"""
validate_brain_authorization(brain_id=brain_id, user_id=current_user.id)
brain = Brain(id=brain_id)
knowledge = get_knowledge(knowledge_id)
@ -93,7 +92,10 @@ async def generate_signed_url_endpoint(
validate_brain_authorization(brain_id=knowledge.brain_id, user_id=current_user.id)
if knowledge.file_name == None:
raise Exception(f"Knowledge {knowledge_id} has no file_name associated with it")
raise HTTPException(
status_code=404,
detail=f"Knowledge with id {knowledge_id} is not a file.",
)
file_path_in_storage = f"{knowledge.brain_id}/{knowledge.file_name}"