mirror of
https://github.com/StanGirard/quivr.git
synced 2024-09-11 21:17:36 +03:00
feat: improve delete knowledge performance (#1733)
Issue: https://github.com/StanGirard/quivr/issues/1724
This commit is contained in:
parent
f1ddaca7e2
commit
10e94e5a91
@ -278,36 +278,39 @@ class Brain(Repository):
|
|||||||
|
|
||||||
def delete_file_from_brain(self, brain_id, file_name: str):
|
def delete_file_from_brain(self, brain_id, file_name: str):
|
||||||
# First, get the vector_ids associated with the file_name
|
# First, get the vector_ids associated with the file_name
|
||||||
vector_response = (
|
file_vectors = (
|
||||||
self.db.table("vectors")
|
self.db.table("vectors")
|
||||||
.select("id")
|
.select("id")
|
||||||
.filter("metadata->>file_name", "eq", file_name)
|
.filter("metadata->>file_name", "eq", file_name)
|
||||||
.execute()
|
.execute()
|
||||||
)
|
)
|
||||||
vector_ids = [item["id"] for item in vector_response.data]
|
|
||||||
|
|
||||||
# For each vector_id, delete the corresponding entry from the 'brains_vectors' table
|
file_vectors_ids = [item["id"] for item in file_vectors.data]
|
||||||
for vector_id in vector_ids:
|
|
||||||
self.db.table("brains_vectors").delete().filter(
|
|
||||||
"vector_id", "eq", vector_id
|
|
||||||
).filter("brain_id", "eq", brain_id).execute()
|
|
||||||
|
|
||||||
# Check if the vector is still associated with any other brains
|
# remove current file vectors from brain vectors
|
||||||
associated_brains_response = (
|
self.db.table("brains_vectors").delete().filter(
|
||||||
self.db.table("brains_vectors")
|
"vector_id", "in", file_vectors_ids
|
||||||
.select("brain_id")
|
).filter("brain_id", "eq", brain_id).execute()
|
||||||
.filter("vector_id", "eq", vector_id)
|
|
||||||
.execute()
|
|
||||||
)
|
|
||||||
associated_brains = [
|
|
||||||
item["brain_id"] for item in associated_brains_response.data
|
|
||||||
]
|
|
||||||
|
|
||||||
# If the vector is not associated with any other brains, delete it from 'vectors' table
|
vectors_used_by_another_brain = (
|
||||||
if not associated_brains:
|
self.db.table("brains_vectors")
|
||||||
self.db.table("vectors").delete().filter(
|
.select("vector_id")
|
||||||
"id", "eq", vector_id
|
.filter("vector_id", "in", file_vectors_ids)
|
||||||
).execute()
|
.filter("brain_id", "neq", brain_id)
|
||||||
|
.execute()
|
||||||
|
)
|
||||||
|
|
||||||
|
vectors_used_by_another_brain_ids = [
|
||||||
|
item["vector_id"] for item in vectors_used_by_another_brain.data
|
||||||
|
]
|
||||||
|
|
||||||
|
vectors_no_longer_used_ids = [
|
||||||
|
id for id in file_vectors_ids if id not in vectors_used_by_another_brain_ids
|
||||||
|
]
|
||||||
|
|
||||||
|
self.db.table("vectors").delete().filter(
|
||||||
|
"id", "in", vectors_no_longer_used_ids
|
||||||
|
).execute()
|
||||||
|
|
||||||
return {"message": f"File {file_name} in brain {brain_id} has been deleted."}
|
return {"message": f"File {file_name} in brain {brain_id} has been deleted."}
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, Query
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
from logger import get_logger
|
from logger import get_logger
|
||||||
from middlewares.auth import AuthBearer, get_current_user
|
from middlewares.auth import AuthBearer, get_current_user
|
||||||
from models import Brain
|
from models import Brain
|
||||||
@ -10,6 +10,7 @@ from repository.files.generate_file_signed_url import generate_file_signed_url
|
|||||||
from repository.knowledge.get_all_knowledge import get_all_knowledge
|
from repository.knowledge.get_all_knowledge import get_all_knowledge
|
||||||
from repository.knowledge.get_knowledge import get_knowledge
|
from repository.knowledge.get_knowledge import get_knowledge
|
||||||
from repository.knowledge.remove_knowledge import remove_knowledge
|
from repository.knowledge.remove_knowledge import remove_knowledge
|
||||||
|
|
||||||
from routes.authorizations.brain_authorization import (
|
from routes.authorizations.brain_authorization import (
|
||||||
RoleEnum,
|
RoleEnum,
|
||||||
has_brain_authorization,
|
has_brain_authorization,
|
||||||
@ -56,8 +57,6 @@ async def delete_endpoint(
|
|||||||
Delete a specific knowledge from a brain.
|
Delete a specific knowledge from a brain.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
validate_brain_authorization(brain_id=brain_id, user_id=current_user.id)
|
|
||||||
|
|
||||||
brain = Brain(id=brain_id)
|
brain = Brain(id=brain_id)
|
||||||
|
|
||||||
knowledge = get_knowledge(knowledge_id)
|
knowledge = get_knowledge(knowledge_id)
|
||||||
@ -93,7 +92,10 @@ async def generate_signed_url_endpoint(
|
|||||||
validate_brain_authorization(brain_id=knowledge.brain_id, user_id=current_user.id)
|
validate_brain_authorization(brain_id=knowledge.brain_id, user_id=current_user.id)
|
||||||
|
|
||||||
if knowledge.file_name == None:
|
if knowledge.file_name == None:
|
||||||
raise Exception(f"Knowledge {knowledge_id} has no file_name associated with it")
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=f"Knowledge with id {knowledge_id} is not a file.",
|
||||||
|
)
|
||||||
|
|
||||||
file_path_in_storage = f"{knowledge.brain_id}/{knowledge.file_name}"
|
file_path_in_storage = f"{knowledge.brain_id}/{knowledge.file_name}"
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user