diff --git a/README.md b/README.md index 80ca5a633..a074e3099 100644 --- a/README.md +++ b/README.md @@ -101,17 +101,9 @@ cp .frontend_env.example frontend/.env - **Step 4**: Run the following migration scripts on the Supabase database via the web interface (SQL Editor -> `New query`) -[Migration Script 1](scripts/supabase_new_store_documents.sql) +[Creation Script 1](scripts/tables.sql) -[Migration Script 2](scripts/supabase_usage_table.sql) - -[Migration Script 3](scripts/supabase_vector_store_summary.sql) - -[Migrations Script 4](scripts/supabase_users_table.sql) - -[Migration Script 5](scripts/supabase_chats_table.sql) - -[Migration Script 6](supabase/migrations/20230606131110_add_uuid_user_id.sql) +> _If you come from an old version of Quivr, you can use the [migration script](scripts/20230606131110_add_uuid_user_id.sql) to migrate your data to the new version_ - **Step 5**: Launch the app diff --git a/docs/docs/intro.md b/docs/docs/intro.md index a5b09110b..a6d14f80d 100644 --- a/docs/docs/intro.md +++ b/docs/docs/intro.md @@ -100,11 +100,9 @@ cp .frontend_env.example frontend/.env [DB Table Creation Script 1](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_new_store_documents.sql) -[DB Table Creation Script 2](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_usage_table.sql) +[Creation Script 1](https://github.com/StanGirard/quivr/tree/main/scripts/tables.sql) -[DB Table Creation Script 3](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_vector_store_summary.sql) - -[DB Table Creation Script 4](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_users_table.sql) +> _If you come from an old version of Quivr, you can use the [migration script](https://github.com/StanGirard/quivr/tree/main/scripts/20230606131110_add_uuid_user_id.sql) to migrate your data to the new version_ - **Step 5**: Launch the app diff --git a/scripts/20230606131110_add_uuid_user_id.sql b/scripts/20230606131110_add_uuid_user_id.sql new file mode 100644 index 000000000..e81ba700c --- /dev/null +++ b/scripts/20230606131110_add_uuid_user_id.sql @@ -0,0 +1,33 @@ +BEGIN; + +-- Function to check if column exists in a table +DO $$ +BEGIN + -- Check if email column doesn't exist, then add it + IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'email') THEN + ALTER TABLE users ADD COLUMN email TEXT; + END IF; + + -- Copy user_id to email column only if user_id column exists + IF EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'user_id') THEN + UPDATE users SET email = user_id; + END IF; + + -- Check if user_id column exists, then drop it + IF EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'user_id') THEN + ALTER TABLE users DROP COLUMN user_id; + END IF; + + -- Check if new user_id column doesn't exist, then add it + IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'user_id') THEN + ALTER TABLE users ADD COLUMN user_id UUID DEFAULT gen_random_uuid(); + ALTER TABLE users ADD PRIMARY KEY (user_id); + END IF; + +EXCEPTION WHEN others THEN + -- Exception block to catch errors + RAISE NOTICE 'An error occurred during migration.'; +END; +$$ LANGUAGE plpgsql; + +COMMIT; diff --git a/scripts/supabase_chats_table.sql b/scripts/supabase_chats_table.sql deleted file mode 100644 index ad87d1bbf..000000000 --- a/scripts/supabase_chats_table.sql +++ /dev/null @@ -1,7 +0,0 @@ -create table if not exists chats( - chat_id uuid default uuid_generate_v4() primary key, - user_id uuid references users(user_id), - creation_time timestamp default current_timestamp, - history jsonb, - chat_name text -); \ No newline at end of file diff --git a/scripts/supabase_new_store_documents.sql b/scripts/supabase_new_store_documents.sql deleted file mode 100644 index 3a721c6b4..000000000 --- a/scripts/supabase_new_store_documents.sql +++ /dev/null @@ -1,40 +0,0 @@ -create extension if not exists vector; - --- Create a table to store your documents -create table if not exists vectors ( -id bigserial primary key, -user_id text, -- new column added here -content text, -- corresponds to Document.pageContent -metadata jsonb, -- corresponds to Document.metadata -embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed -); - -CREATE OR REPLACE FUNCTION match_vectors(query_embedding vector(1536), match_count int, p_user_id text) -- user_id changed to p_user_id here - RETURNS TABLE( - id bigint, - user_id text, -- new column added here - content text, - metadata jsonb, - -- we return matched vectors to enable maximal marginal relevance searches - embedding vector(1536), - similarity float) - LANGUAGE plpgsql - AS $$ - # variable_conflict use_column -BEGIN - RETURN query - SELECT - id, - user_id, -- new column added here - content, - metadata, - embedding, - 1 -(vectors.embedding <=> query_embedding) AS similarity - FROM - vectors - WHERE vectors.user_id = p_user_id -- filter changed here - ORDER BY - vectors.embedding <=> query_embedding - LIMIT match_count; -END; -$$; diff --git a/scripts/supabase_usage_table.sql b/scripts/supabase_usage_table.sql deleted file mode 100644 index 4573ba9c5..000000000 --- a/scripts/supabase_usage_table.sql +++ /dev/null @@ -1,12 +0,0 @@ -create table - stats ( - -- A column called "time" with data type "timestamp" - time timestamp, - -- A column called "details" with data type "text" - chat boolean, - embedding boolean, - details text, - metadata jsonb, - -- An "integer" primary key column called "id" that is generated always as identity - id integer primary key generated always as identity - ); \ No newline at end of file diff --git a/scripts/supabase_users_table.sql b/scripts/supabase_users_table.sql deleted file mode 100644 index bcca78325..000000000 --- a/scripts/supabase_users_table.sql +++ /dev/null @@ -1,6 +0,0 @@ -create table if not exists users( - user_id uuid, - email text, - date text, - requests_count int -); diff --git a/scripts/supabase_vector_store_summary.sql b/scripts/supabase_vector_store_summary.sql deleted file mode 100644 index 59eb5a4e5..000000000 --- a/scripts/supabase_vector_store_summary.sql +++ /dev/null @@ -1,38 +0,0 @@ --- Create a table to store your summaries -create table if not exists summaries ( - id bigserial primary key, - document_id bigint references vectors(id), - content text, -- corresponds to the summarized content - metadata jsonb, -- corresponds to Document.metadata - embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed -); - -CREATE OR REPLACE FUNCTION match_summaries(query_embedding vector(1536), match_count int, match_threshold float) - RETURNS TABLE( - id bigint, - document_id bigint, - content text, - metadata jsonb, - -- we return matched vectors to enable maximal marginal relevance searches - embedding vector(1536), - similarity float) - LANGUAGE plpgsql - AS $$ - # variable_conflict use_column -BEGIN - RETURN query - SELECT - id, - document_id, - content, - metadata, - embedding, - 1 -(summaries.embedding <=> query_embedding) AS similarity - FROM - summaries - WHERE 1 - (summaries.embedding <=> query_embedding) > match_threshold - ORDER BY - summaries.embedding <=> query_embedding - LIMIT match_count; -END; -$$; diff --git a/scripts/tables.sql b/scripts/tables.sql new file mode 100644 index 000000000..34dcfe1fd --- /dev/null +++ b/scripts/tables.sql @@ -0,0 +1,105 @@ +-- Create users table +CREATE TABLE IF NOT EXISTS users( + user_id UUID DEFAULT gen_random_uuid() PRIMARY KEY, + email TEXT, + date TEXT, + requests_count INT +); + +-- Create chats table +CREATE TABLE IF NOT EXISTS chats( + chat_id UUID DEFAULT uuid_generate_v4() PRIMARY KEY, + user_id UUID REFERENCES users(user_id), + creation_time TIMESTAMP DEFAULT current_timestamp, + history JSONB, + chat_name TEXT +); + +-- Create vector extension +CREATE EXTENSION IF NOT EXISTS vector; + +-- Create vectors table +CREATE TABLE IF NOT EXISTS vectors ( + id BIGSERIAL PRIMARY KEY, + user_id TEXT, + content TEXT, + metadata JSONB, + embedding VECTOR(1536) +); + +-- Create function to match vectors +CREATE OR REPLACE FUNCTION match_vectors(query_embedding VECTOR(1536), match_count INT, p_user_id TEXT) +RETURNS TABLE( + id BIGINT, + user_id TEXT, + content TEXT, + metadata JSONB, + embedding VECTOR(1536), + similarity FLOAT +) LANGUAGE plpgsql AS $$ +#variable_conflict use_column +BEGIN + RETURN QUERY + SELECT + id, + user_id, + content, + metadata, + embedding, + 1 - (vectors.embedding <=> query_embedding) AS similarity + FROM + vectors + WHERE vectors.user_id = p_user_id + ORDER BY + vectors.embedding <=> query_embedding + LIMIT match_count; +END; +$$; + +-- Create stats table +CREATE TABLE stats ( + time TIMESTAMP, + chat BOOLEAN, + embedding BOOLEAN, + details TEXT, + metadata JSONB, + id INTEGER PRIMARY KEY GENERATED ALWAYS AS IDENTITY +); + +-- Create summaries table +CREATE TABLE IF NOT EXISTS summaries ( + id BIGSERIAL PRIMARY KEY, + document_id BIGINT REFERENCES vectors(id), + content TEXT, + metadata JSONB, + embedding VECTOR(1536) +); + +-- Create function to match summaries +CREATE OR REPLACE FUNCTION match_summaries(query_embedding VECTOR(1536), match_count INT, match_threshold FLOAT) +RETURNS TABLE( + id BIGINT, + document_id BIGINT, + content TEXT, + metadata JSONB, + embedding VECTOR(1536), + similarity FLOAT +) LANGUAGE plpgsql AS $$ +#variable_conflict use_column +BEGIN + RETURN QUERY + SELECT + id, + document_id, + content, + metadata, + embedding, + 1 - (summaries.embedding <=> query_embedding) AS similarity + FROM + summaries + WHERE 1 - (summaries.embedding <=> query_embedding) > match_threshold + ORDER BY + summaries.embedding <=> query_embedding + LIMIT match_count; +END; +$$; diff --git a/supabase/migrations/20230606131110_add_uuid_user_id.sql b/supabase/migrations/20230606131110_add_uuid_user_id.sql deleted file mode 100644 index 3e7245b1f..000000000 --- a/supabase/migrations/20230606131110_add_uuid_user_id.sql +++ /dev/null @@ -1,13 +0,0 @@ -BEGIN; - --- Create a new column for email and copy the current user_id to it -ALTER TABLE users ADD COLUMN email text; -UPDATE users SET email = user_id; - --- Drop the current user_id column -ALTER TABLE users DROP COLUMN user_id; - --- Create a new UUID user_id column and set it as the primary key -ALTER TABLE users ADD COLUMN user_id UUID DEFAULT gen_random_uuid() PRIMARY KEY; - -COMMIT;