mirror of
https://github.com/QuivrHQ/quivr.git
synced 2024-12-14 17:03:29 +03:00
feat(perf): increased perf embedding and search for files (#1182)
* feat(upload): changed to task * feat(sha1): added column for better speed
This commit is contained in:
parent
cdf587cfde
commit
4d41901106
@ -1,5 +1,7 @@
|
||||
from celery import shared_task
|
||||
from models.brains import Brain
|
||||
from models.settings import get_supabase_db
|
||||
|
||||
from repository.files.upload_file import DocumentSerializable
|
||||
from utils.vectors import Neurons
|
||||
|
||||
@ -12,6 +14,9 @@ def create_embedding_for_document(
|
||||
doc = DocumentSerializable.from_json(doc_with_metadata)
|
||||
created_vector = neurons.create_vector(doc, user_openai_api_key)
|
||||
# add_usage(stats_db, "embedding", "audio", metadata={"file_name": file_meta_name,"file_type": ".txt", "chunk_size": chunk_size, "chunk_overlap": chunk_overlap})
|
||||
database = get_supabase_db()
|
||||
database.set_file_sha_from_metadata(file_sha1)
|
||||
|
||||
|
||||
created_vector_id = created_vector[0] # pyright: ignore reportPrivateUsage=none
|
||||
|
||||
|
@ -172,7 +172,7 @@ class Brain(Repository):
|
||||
vectorsResponse = (
|
||||
self.db.table("vectors")
|
||||
.select("id")
|
||||
.filter("metadata->>file_sha1", "eq", file_sha1)
|
||||
.filter("file_sha1", "eq", file_sha1)
|
||||
.execute()
|
||||
)
|
||||
return vectorsResponse.data
|
||||
|
@ -9,7 +9,7 @@ class File(Repository):
|
||||
response = (
|
||||
self.db.table("vectors")
|
||||
.select("id")
|
||||
.filter("metadata->>file_sha1", "eq", file_sha1)
|
||||
.filter("file_sha1", "eq", file_sha1)
|
||||
.execute()
|
||||
)
|
||||
return response.data
|
||||
|
@ -23,7 +23,18 @@ class Vector(Repository):
|
||||
response = (
|
||||
self.db.table("vectors")
|
||||
.select("id")
|
||||
.filter("metadata->>file_sha1", "eq", file_sha1)
|
||||
.filter("file_sha1", "eq", file_sha1)
|
||||
.execute()
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def set_file_sha_from_metadata(self, file_sha1):
|
||||
# It looks at the file that have a file_sha1 in the metadata that is corresponding but an empty file_sha1 column and set it
|
||||
response = (
|
||||
self.db.table("vectors")
|
||||
.update({"file_sha1": file_sha1})
|
||||
.match({"metadata->>file_sha1": file_sha1})
|
||||
.execute()
|
||||
)
|
||||
|
||||
|
21
scripts/202309157004032_add_sha1_column.sql
Normal file
21
scripts/202309157004032_add_sha1_column.sql
Normal file
@ -0,0 +1,21 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
-- Check if file_sha1 column does not exist
|
||||
IF NOT EXISTS(SELECT 1 FROM information_schema.columns WHERE table_schema = 'public' AND table_name = 'vectors' AND column_name = 'file_sha1') THEN
|
||||
-- Add the file_sha1 column
|
||||
ALTER TABLE public.vectors ADD COLUMN file_sha1 TEXT;
|
||||
|
||||
-- Populate file_sha1 using metadata JSONB column
|
||||
UPDATE public.vectors SET file_sha1 = metadata->>'file_sha1';
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
|
||||
-- Update migrations table
|
||||
INSERT INTO migrations (name)
|
||||
SELECT '202309157004032_add_sha1_column'
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM migrations WHERE name = '202309157004032_add_sha1_column'
|
||||
);
|
||||
|
||||
COMMIT;
|
@ -24,6 +24,7 @@ CREATE EXTENSION IF NOT EXISTS vector;
|
||||
CREATE TABLE IF NOT EXISTS vectors (
|
||||
id UUID DEFAULT uuid_generate_v4() PRIMARY KEY,
|
||||
content TEXT,
|
||||
file_sha1 TEXT,
|
||||
metadata JSONB,
|
||||
embedding VECTOR(1536)
|
||||
);
|
||||
@ -248,9 +249,9 @@ CREATE POLICY "Access Quivr Storage 1jccrwz_2" ON storage.objects FOR UPDATE TO
|
||||
CREATE POLICY "Access Quivr Storage 1jccrwz_3" ON storage.objects FOR DELETE TO anon USING (bucket_id = 'quivr');
|
||||
|
||||
INSERT INTO migrations (name)
|
||||
SELECT '20230913110420_add_storage_bucket'
|
||||
SELECT '202309157004032_add_sha1_column'
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM migrations WHERE name = '20230913110420_add_storage_bucket'
|
||||
SELECT 1 FROM migrations WHERE name = '202309157004032_add_sha1_column'
|
||||
);
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user