feat: improve delete knowledge performance (#1733)

Issue: https://github.com/StanGirard/quivr/issues/1724
This commit is contained in:
Mamadou DICKO 2023-11-27 16:47:13 +01:00 committed by GitHub
parent f1ddaca7e2
commit 10e94e5a91
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 31 additions and 26 deletions

View File

@ -278,36 +278,39 @@ class Brain(Repository):
def delete_file_from_brain(self, brain_id, file_name: str): def delete_file_from_brain(self, brain_id, file_name: str):
# First, get the vector_ids associated with the file_name # First, get the vector_ids associated with the file_name
vector_response = ( file_vectors = (
self.db.table("vectors") self.db.table("vectors")
.select("id") .select("id")
.filter("metadata->>file_name", "eq", file_name) .filter("metadata->>file_name", "eq", file_name)
.execute() .execute()
) )
vector_ids = [item["id"] for item in vector_response.data]
# For each vector_id, delete the corresponding entry from the 'brains_vectors' table file_vectors_ids = [item["id"] for item in file_vectors.data]
for vector_id in vector_ids:
self.db.table("brains_vectors").delete().filter(
"vector_id", "eq", vector_id
).filter("brain_id", "eq", brain_id).execute()
# Check if the vector is still associated with any other brains # remove current file vectors from brain vectors
associated_brains_response = ( self.db.table("brains_vectors").delete().filter(
self.db.table("brains_vectors") "vector_id", "in", file_vectors_ids
.select("brain_id") ).filter("brain_id", "eq", brain_id).execute()
.filter("vector_id", "eq", vector_id)
.execute()
)
associated_brains = [
item["brain_id"] for item in associated_brains_response.data
]
# If the vector is not associated with any other brains, delete it from 'vectors' table vectors_used_by_another_brain = (
if not associated_brains: self.db.table("brains_vectors")
self.db.table("vectors").delete().filter( .select("vector_id")
"id", "eq", vector_id .filter("vector_id", "in", file_vectors_ids)
).execute() .filter("brain_id", "neq", brain_id)
.execute()
)
vectors_used_by_another_brain_ids = [
item["vector_id"] for item in vectors_used_by_another_brain.data
]
vectors_no_longer_used_ids = [
id for id in file_vectors_ids if id not in vectors_used_by_another_brain_ids
]
self.db.table("vectors").delete().filter(
"id", "in", vectors_no_longer_used_ids
).execute()
return {"message": f"File {file_name} in brain {brain_id} has been deleted."} return {"message": f"File {file_name} in brain {brain_id} has been deleted."}

View File

@ -1,6 +1,6 @@
from uuid import UUID from uuid import UUID
from fastapi import APIRouter, Depends, Query from fastapi import APIRouter, Depends, HTTPException, Query
from logger import get_logger from logger import get_logger
from middlewares.auth import AuthBearer, get_current_user from middlewares.auth import AuthBearer, get_current_user
from models import Brain from models import Brain
@ -10,6 +10,7 @@ from repository.files.generate_file_signed_url import generate_file_signed_url
from repository.knowledge.get_all_knowledge import get_all_knowledge from repository.knowledge.get_all_knowledge import get_all_knowledge
from repository.knowledge.get_knowledge import get_knowledge from repository.knowledge.get_knowledge import get_knowledge
from repository.knowledge.remove_knowledge import remove_knowledge from repository.knowledge.remove_knowledge import remove_knowledge
from routes.authorizations.brain_authorization import ( from routes.authorizations.brain_authorization import (
RoleEnum, RoleEnum,
has_brain_authorization, has_brain_authorization,
@ -56,8 +57,6 @@ async def delete_endpoint(
Delete a specific knowledge from a brain. Delete a specific knowledge from a brain.
""" """
validate_brain_authorization(brain_id=brain_id, user_id=current_user.id)
brain = Brain(id=brain_id) brain = Brain(id=brain_id)
knowledge = get_knowledge(knowledge_id) knowledge = get_knowledge(knowledge_id)
@ -93,7 +92,10 @@ async def generate_signed_url_endpoint(
validate_brain_authorization(brain_id=knowledge.brain_id, user_id=current_user.id) validate_brain_authorization(brain_id=knowledge.brain_id, user_id=current_user.id)
if knowledge.file_name == None: if knowledge.file_name == None:
raise Exception(f"Knowledge {knowledge_id} has no file_name associated with it") raise HTTPException(
status_code=404,
detail=f"Knowledge with id {knowledge_id} is not a file.",
)
file_path_in_storage = f"{knowledge.brain_id}/{knowledge.file_name}" file_path_in_storage = f"{knowledge.brain_id}/{knowledge.file_name}"