feat(sql): merged all into one and made migration fault tolerant

2024-08-16 08:30:28 +03:00 · 2023-06-11 21:11:43 +02:00 · 2023-06-11 21:11:43 +02:00 · 537efc834d
commit 537efc834d
parent 583e4d6378
10 changed files with 142 additions and 130 deletions
--- a/README.md
+++ b/README.md
@ -101,17 +101,9 @@ cp .frontend_env.example frontend/.env

 - **Step 4**: Run the following migration scripts on the Supabase database via the web interface (SQL Editor -> `New query`)

-[Migration Script 1](scripts/supabase_new_store_documents.sql)
+[Creation Script 1](scripts/tables.sql)

-[Migration Script 2](scripts/supabase_usage_table.sql)
-
-[Migration Script 3](scripts/supabase_vector_store_summary.sql)
-
-[Migrations Script 4](scripts/supabase_users_table.sql)
-
-[Migration Script 5](scripts/supabase_chats_table.sql)
-
-[Migration Script 6](supabase/migrations/20230606131110_add_uuid_user_id.sql)
+> _If you come from an old version of Quivr, you can use the [migration script](scripts/20230606131110_add_uuid_user_id.sql) to migrate your data to the new version_

 - **Step 5**: Launch the app

--- a/docs/docs/intro.md
+++ b/docs/docs/intro.md
@ -100,11 +100,9 @@ cp .frontend_env.example frontend/.env

 [DB Table Creation Script 1](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_new_store_documents.sql)

-[DB Table Creation Script 2](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_usage_table.sql)
+[Creation Script 1](https://github.com/StanGirard/quivr/tree/main/scripts/tables.sql)

-[DB Table Creation Script 3](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_vector_store_summary.sql)
-
-[DB Table Creation Script 4](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_users_table.sql)
+> _If you come from an old version of Quivr, you can use the [migration script](https://github.com/StanGirard/quivr/tree/main/scripts/20230606131110_add_uuid_user_id.sql) to migrate your data to the new version_

 - **Step 5**: Launch the app

--- a/scripts/20230606131110_add_uuid_user_id.sql
+++ b/scripts/20230606131110_add_uuid_user_id.sql
@ -0,0 +1,33 @@
+BEGIN;
+
+-- Function to check if column exists in a table
+DO $$ 
+BEGIN
+    -- Check if email column doesn't exist, then add it
+    IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'email') THEN
+        ALTER TABLE users ADD COLUMN email TEXT;
+    END IF;
+    
+    -- Copy user_id to email column only if user_id column exists
+    IF EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'user_id') THEN
+        UPDATE users SET email = user_id;
+    END IF;
+    
+    -- Check if user_id column exists, then drop it
+    IF EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'user_id') THEN
+        ALTER TABLE users DROP COLUMN user_id;
+    END IF;
+
+    -- Check if new user_id column doesn't exist, then add it
+    IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'user_id') THEN
+        ALTER TABLE users ADD COLUMN user_id UUID DEFAULT gen_random_uuid();
+        ALTER TABLE users ADD PRIMARY KEY (user_id);
+    END IF;
+    
+EXCEPTION WHEN others THEN
+    -- Exception block to catch errors
+    RAISE NOTICE 'An error occurred during migration.';
+END;
+$$ LANGUAGE plpgsql;
+
+COMMIT;
--- a/scripts/supabase_chats_table.sql
+++ b/scripts/supabase_chats_table.sql
@ -1,7 +0,0 @@
-create table if not exists chats(
-    chat_id uuid default uuid_generate_v4() primary key,
-    user_id uuid references users(user_id),
-    creation_time timestamp default current_timestamp,
-    history jsonb,
-    chat_name text
-);
--- a/scripts/supabase_new_store_documents.sql
+++ b/scripts/supabase_new_store_documents.sql
@ -1,40 +0,0 @@
-create extension if not exists vector;
-
-- Create a table to store your documents
-create table if not exists vectors (
-id bigserial primary key,
-user_id text, -- new column added here
-content text, -- corresponds to Document.pageContent
-metadata jsonb, -- corresponds to Document.metadata
-embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed
-);
-
-CREATE OR REPLACE FUNCTION match_vectors(query_embedding vector(1536), match_count int, p_user_id text) -- user_id changed to p_user_id here
-    RETURNS TABLE(
-        id bigint,
-        user_id text, -- new column added here
-        content text,
-        metadata jsonb,
-        -- we return matched vectors to enable maximal marginal relevance searches
-        embedding vector(1536),
-        similarity float)
-    LANGUAGE plpgsql
-    AS $$
-    # variable_conflict use_column
-BEGIN
-    RETURN query
-    SELECT
-        id,
-        user_id, -- new column added here
-        content,
-        metadata,
-        embedding,
-        1 -(vectors.embedding <=> query_embedding) AS similarity
-    FROM
-        vectors
-    WHERE vectors.user_id = p_user_id -- filter changed here
-    ORDER BY
-        vectors.embedding <=> query_embedding
-    LIMIT match_count;
-END;
-$$;
--- a/scripts/supabase_usage_table.sql
+++ b/scripts/supabase_usage_table.sql
@ -1,12 +0,0 @@
-create table
-  stats (
-    -- A column called "time" with data type "timestamp"
-    time timestamp,
-    -- A column called "details" with data type "text"
-    chat boolean,
-    embedding boolean,
-    details text,
-    metadata jsonb,
-    -- An "integer" primary key column called "id" that is generated always as identity
-    id integer primary key generated always as identity
-  );
--- a/scripts/supabase_users_table.sql
+++ b/scripts/supabase_users_table.sql
@ -1,6 +0,0 @@
-create table if not exists users(
-    user_id uuid,
-    email text,
-    date text,
-    requests_count int
-);
--- a/scripts/supabase_vector_store_summary.sql
+++ b/scripts/supabase_vector_store_summary.sql
@ -1,38 +0,0 @@
-- Create a table to store your summaries
-create table if not exists summaries (
-    id bigserial primary key,
-    document_id bigint references vectors(id),
-    content text, -- corresponds to the summarized content
-    metadata jsonb, -- corresponds to Document.metadata
-    embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed
-);
-
-CREATE OR REPLACE FUNCTION match_summaries(query_embedding vector(1536), match_count int, match_threshold float)
-    RETURNS TABLE(
-        id bigint,
-        document_id bigint,
-        content text,
-        metadata jsonb,
-        -- we return matched vectors to enable maximal marginal relevance searches
-        embedding vector(1536),
-        similarity float)
-    LANGUAGE plpgsql
-    AS $$
-    # variable_conflict use_column
-BEGIN
-    RETURN query
-    SELECT
-        id,
-        document_id,
-        content,
-        metadata,
-        embedding,
-        1 -(summaries.embedding <=> query_embedding) AS similarity
-    FROM
-        summaries
-    WHERE 1 - (summaries.embedding <=> query_embedding) > match_threshold
-    ORDER BY
-        summaries.embedding <=> query_embedding
-    LIMIT match_count;
-END;
-$$;
--- a/scripts/tables.sql
+++ b/scripts/tables.sql
@ -0,0 +1,105 @@
+-- Create users table
+CREATE TABLE IF NOT EXISTS users(
+    user_id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
+    email TEXT,
+    date TEXT,
+    requests_count INT
+);
+
+-- Create chats table
+CREATE TABLE IF NOT EXISTS chats(
+    chat_id UUID DEFAULT uuid_generate_v4() PRIMARY KEY,
+    user_id UUID REFERENCES users(user_id),
+    creation_time TIMESTAMP DEFAULT current_timestamp,
+    history JSONB,
+    chat_name TEXT
+);
+
+-- Create vector extension
+CREATE EXTENSION IF NOT EXISTS vector;
+
+-- Create vectors table
+CREATE TABLE IF NOT EXISTS vectors (
+    id BIGSERIAL PRIMARY KEY,
+    user_id TEXT,
+    content TEXT,
+    metadata JSONB,
+    embedding VECTOR(1536)
+);
+
+-- Create function to match vectors
+CREATE OR REPLACE FUNCTION match_vectors(query_embedding VECTOR(1536), match_count INT, p_user_id TEXT)
+RETURNS TABLE(
+    id BIGINT,
+    user_id TEXT,
+    content TEXT,
+    metadata JSONB,
+    embedding VECTOR(1536),
+    similarity FLOAT
+) LANGUAGE plpgsql AS $$
+#variable_conflict use_column
+BEGIN
+    RETURN QUERY
+    SELECT
+        id,
+        user_id,
+        content,
+        metadata,
+        embedding,
+        1 - (vectors.embedding <=> query_embedding) AS similarity
+    FROM
+        vectors
+    WHERE vectors.user_id = p_user_id
+    ORDER BY
+        vectors.embedding <=> query_embedding
+    LIMIT match_count;
+END;
+$$;
+
+-- Create stats table
+CREATE TABLE stats (
+    time TIMESTAMP,
+    chat BOOLEAN,
+    embedding BOOLEAN,
+    details TEXT,
+    metadata JSONB,
+    id INTEGER PRIMARY KEY GENERATED ALWAYS AS IDENTITY
+);
+
+-- Create summaries table
+CREATE TABLE IF NOT EXISTS summaries (
+    id BIGSERIAL PRIMARY KEY,
+    document_id BIGINT REFERENCES vectors(id),
+    content TEXT,
+    metadata JSONB,
+    embedding VECTOR(1536)
+);
+
+-- Create function to match summaries
+CREATE OR REPLACE FUNCTION match_summaries(query_embedding VECTOR(1536), match_count INT, match_threshold FLOAT)
+RETURNS TABLE(
+    id BIGINT,
+    document_id BIGINT,
+    content TEXT,
+    metadata JSONB,
+    embedding VECTOR(1536),
+    similarity FLOAT
+) LANGUAGE plpgsql AS $$
+#variable_conflict use_column
+BEGIN
+    RETURN QUERY
+    SELECT
+        id,
+        document_id,
+        content,
+        metadata,
+        embedding,
+        1 - (summaries.embedding <=> query_embedding) AS similarity
+    FROM
+        summaries
+    WHERE 1 - (summaries.embedding <=> query_embedding) > match_threshold
+    ORDER BY
+        summaries.embedding <=> query_embedding
+    LIMIT match_count;
+END;
+$$;
--- a/supabase/migrations/20230606131110_add_uuid_user_id.sql
+++ b/supabase/migrations/20230606131110_add_uuid_user_id.sql
@ -1,13 +0,0 @@
-BEGIN;
-
-- Create a new column for email and copy the current user_id to it
-ALTER TABLE users ADD COLUMN email text;
-UPDATE users SET email = user_id;
-
-- Drop the current user_id column
-ALTER TABLE users DROP COLUMN user_id;
-
-- Create a new UUID user_id column and set it as the primary key
-ALTER TABLE users ADD COLUMN user_id UUID DEFAULT gen_random_uuid() PRIMARY KEY;
-
-COMMIT;