mirror of
https://github.com/QuivrHQ/quivr.git
synced 2024-12-15 17:43:03 +03:00
4d41901106
* feat(upload): changed to task * feat(sha1): added column for better speed
87 lines
2.4 KiB
Python
87 lines
2.4 KiB
Python
from models.databases.repository import Repository
|
|
|
|
|
|
class Vector(Repository):
|
|
def __init__(self, supabase_client):
|
|
self.db = supabase_client
|
|
|
|
def get_vectors_by_file_name(self, file_name):
|
|
response = (
|
|
self.db.table("vectors")
|
|
.select(
|
|
"metadata->>file_name, metadata->>file_size, metadata->>file_extension, metadata->>file_url",
|
|
"content",
|
|
"brains_vectors(brain_id,vector_id)",
|
|
)
|
|
.match({"metadata->>file_name": file_name})
|
|
.execute()
|
|
)
|
|
|
|
return response
|
|
|
|
def get_vectors_by_file_sha1(self, file_sha1):
|
|
response = (
|
|
self.db.table("vectors")
|
|
.select("id")
|
|
.filter("file_sha1", "eq", file_sha1)
|
|
.execute()
|
|
)
|
|
|
|
return response
|
|
|
|
def set_file_sha_from_metadata(self, file_sha1):
|
|
# It looks at the file that have a file_sha1 in the metadata that is corresponding but an empty file_sha1 column and set it
|
|
response = (
|
|
self.db.table("vectors")
|
|
.update({"file_sha1": file_sha1})
|
|
.match({"metadata->>file_sha1": file_sha1})
|
|
.execute()
|
|
)
|
|
|
|
return response
|
|
|
|
def similarity_search(self, query_embedding, table, top_k, threshold):
|
|
response = self.db.rpc(
|
|
table,
|
|
{
|
|
"query_embedding": query_embedding,
|
|
"match_count": top_k,
|
|
"match_threshold": threshold,
|
|
},
|
|
).execute()
|
|
return response
|
|
|
|
def update_summary(self, document_id, summary_id):
|
|
return (
|
|
self.db.table("summaries")
|
|
.update({"document_id": document_id})
|
|
.match({"id": summary_id})
|
|
.execute()
|
|
)
|
|
|
|
def get_vectors_by_batch(self, batch_id):
|
|
response = (
|
|
self.db.table("vectors")
|
|
.select(
|
|
"name:metadata->>file_name, size:metadata->>file_size",
|
|
count="exact",
|
|
)
|
|
.eq("id", batch_id)
|
|
.execute()
|
|
)
|
|
|
|
return response
|
|
|
|
def get_vectors_in_batch(self, batch_ids):
|
|
response = (
|
|
self.db.table("vectors")
|
|
.select(
|
|
"name:metadata->>file_name, size:metadata->>file_size",
|
|
count="exact",
|
|
)
|
|
.in_("id", batch_ids)
|
|
.execute()
|
|
)
|
|
|
|
return response
|