quivr/backend/models/databases/supabase/vectors.py
Stan Girard 4d41901106
feat(perf): increased perf embedding and search for files (#1182)
* feat(upload): changed to task

* feat(sha1): added column for better speed
2023-09-15 23:39:29 +02:00

87 lines
2.4 KiB
Python

from models.databases.repository import Repository
class Vector(Repository):
def __init__(self, supabase_client):
self.db = supabase_client
def get_vectors_by_file_name(self, file_name):
response = (
self.db.table("vectors")
.select(
"metadata->>file_name, metadata->>file_size, metadata->>file_extension, metadata->>file_url",
"content",
"brains_vectors(brain_id,vector_id)",
)
.match({"metadata->>file_name": file_name})
.execute()
)
return response
def get_vectors_by_file_sha1(self, file_sha1):
response = (
self.db.table("vectors")
.select("id")
.filter("file_sha1", "eq", file_sha1)
.execute()
)
return response
def set_file_sha_from_metadata(self, file_sha1):
# It looks at the file that have a file_sha1 in the metadata that is corresponding but an empty file_sha1 column and set it
response = (
self.db.table("vectors")
.update({"file_sha1": file_sha1})
.match({"metadata->>file_sha1": file_sha1})
.execute()
)
return response
def similarity_search(self, query_embedding, table, top_k, threshold):
response = self.db.rpc(
table,
{
"query_embedding": query_embedding,
"match_count": top_k,
"match_threshold": threshold,
},
).execute()
return response
def update_summary(self, document_id, summary_id):
return (
self.db.table("summaries")
.update({"document_id": document_id})
.match({"id": summary_id})
.execute()
)
def get_vectors_by_batch(self, batch_id):
response = (
self.db.table("vectors")
.select(
"name:metadata->>file_name, size:metadata->>file_size",
count="exact",
)
.eq("id", batch_id)
.execute()
)
return response
def get_vectors_in_batch(self, batch_ids):
response = (
self.db.table("vectors")
.select(
"name:metadata->>file_name, size:metadata->>file_size",
count="exact",
)
.in_("id", batch_ids)
.execute()
)
return response