feat(sql): merged all into one and made migration fault tolerant

This commit is contained in:
Stan Girard 2023-06-11 21:11:43 +02:00
parent 583e4d6378
commit 537efc834d
10 changed files with 142 additions and 130 deletions

View File

@ -101,17 +101,9 @@ cp .frontend_env.example frontend/.env
- **Step 4**: Run the following migration scripts on the Supabase database via the web interface (SQL Editor -> `New query`)
[Migration Script 1](scripts/supabase_new_store_documents.sql)
[Creation Script 1](scripts/tables.sql)
[Migration Script 2](scripts/supabase_usage_table.sql)
[Migration Script 3](scripts/supabase_vector_store_summary.sql)
[Migrations Script 4](scripts/supabase_users_table.sql)
[Migration Script 5](scripts/supabase_chats_table.sql)
[Migration Script 6](supabase/migrations/20230606131110_add_uuid_user_id.sql)
> _If you come from an old version of Quivr, you can use the [migration script](scripts/20230606131110_add_uuid_user_id.sql) to migrate your data to the new version_
- **Step 5**: Launch the app

View File

@ -100,11 +100,9 @@ cp .frontend_env.example frontend/.env
[DB Table Creation Script 1](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_new_store_documents.sql)
[DB Table Creation Script 2](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_usage_table.sql)
[Creation Script 1](https://github.com/StanGirard/quivr/tree/main/scripts/tables.sql)
[DB Table Creation Script 3](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_vector_store_summary.sql)
[DB Table Creation Script 4](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_users_table.sql)
> _If you come from an old version of Quivr, you can use the [migration script](https://github.com/StanGirard/quivr/tree/main/scripts/20230606131110_add_uuid_user_id.sql) to migrate your data to the new version_
- **Step 5**: Launch the app

View File

@ -0,0 +1,33 @@
BEGIN;
-- Function to check if column exists in a table
DO $$
BEGIN
-- Check if email column doesn't exist, then add it
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'email') THEN
ALTER TABLE users ADD COLUMN email TEXT;
END IF;
-- Copy user_id to email column only if user_id column exists
IF EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'user_id') THEN
UPDATE users SET email = user_id;
END IF;
-- Check if user_id column exists, then drop it
IF EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'user_id') THEN
ALTER TABLE users DROP COLUMN user_id;
END IF;
-- Check if new user_id column doesn't exist, then add it
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'user_id') THEN
ALTER TABLE users ADD COLUMN user_id UUID DEFAULT gen_random_uuid();
ALTER TABLE users ADD PRIMARY KEY (user_id);
END IF;
EXCEPTION WHEN others THEN
-- Exception block to catch errors
RAISE NOTICE 'An error occurred during migration.';
END;
$$ LANGUAGE plpgsql;
COMMIT;

View File

@ -1,7 +0,0 @@
create table if not exists chats(
chat_id uuid default uuid_generate_v4() primary key,
user_id uuid references users(user_id),
creation_time timestamp default current_timestamp,
history jsonb,
chat_name text
);

View File

@ -1,40 +0,0 @@
create extension if not exists vector;
-- Create a table to store your documents
create table if not exists vectors (
id bigserial primary key,
user_id text, -- new column added here
content text, -- corresponds to Document.pageContent
metadata jsonb, -- corresponds to Document.metadata
embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed
);
CREATE OR REPLACE FUNCTION match_vectors(query_embedding vector(1536), match_count int, p_user_id text) -- user_id changed to p_user_id here
RETURNS TABLE(
id bigint,
user_id text, -- new column added here
content text,
metadata jsonb,
-- we return matched vectors to enable maximal marginal relevance searches
embedding vector(1536),
similarity float)
LANGUAGE plpgsql
AS $$
# variable_conflict use_column
BEGIN
RETURN query
SELECT
id,
user_id, -- new column added here
content,
metadata,
embedding,
1 -(vectors.embedding <=> query_embedding) AS similarity
FROM
vectors
WHERE vectors.user_id = p_user_id -- filter changed here
ORDER BY
vectors.embedding <=> query_embedding
LIMIT match_count;
END;
$$;

View File

@ -1,12 +0,0 @@
create table
stats (
-- A column called "time" with data type "timestamp"
time timestamp,
-- A column called "details" with data type "text"
chat boolean,
embedding boolean,
details text,
metadata jsonb,
-- An "integer" primary key column called "id" that is generated always as identity
id integer primary key generated always as identity
);

View File

@ -1,6 +0,0 @@
create table if not exists users(
user_id uuid,
email text,
date text,
requests_count int
);

View File

@ -1,38 +0,0 @@
-- Create a table to store your summaries
create table if not exists summaries (
id bigserial primary key,
document_id bigint references vectors(id),
content text, -- corresponds to the summarized content
metadata jsonb, -- corresponds to Document.metadata
embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed
);
CREATE OR REPLACE FUNCTION match_summaries(query_embedding vector(1536), match_count int, match_threshold float)
RETURNS TABLE(
id bigint,
document_id bigint,
content text,
metadata jsonb,
-- we return matched vectors to enable maximal marginal relevance searches
embedding vector(1536),
similarity float)
LANGUAGE plpgsql
AS $$
# variable_conflict use_column
BEGIN
RETURN query
SELECT
id,
document_id,
content,
metadata,
embedding,
1 -(summaries.embedding <=> query_embedding) AS similarity
FROM
summaries
WHERE 1 - (summaries.embedding <=> query_embedding) > match_threshold
ORDER BY
summaries.embedding <=> query_embedding
LIMIT match_count;
END;
$$;

105
scripts/tables.sql Normal file
View File

@ -0,0 +1,105 @@
-- Create users table
CREATE TABLE IF NOT EXISTS users(
user_id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
email TEXT,
date TEXT,
requests_count INT
);
-- Create chats table
CREATE TABLE IF NOT EXISTS chats(
chat_id UUID DEFAULT uuid_generate_v4() PRIMARY KEY,
user_id UUID REFERENCES users(user_id),
creation_time TIMESTAMP DEFAULT current_timestamp,
history JSONB,
chat_name TEXT
);
-- Create vector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- Create vectors table
CREATE TABLE IF NOT EXISTS vectors (
id BIGSERIAL PRIMARY KEY,
user_id TEXT,
content TEXT,
metadata JSONB,
embedding VECTOR(1536)
);
-- Create function to match vectors
CREATE OR REPLACE FUNCTION match_vectors(query_embedding VECTOR(1536), match_count INT, p_user_id TEXT)
RETURNS TABLE(
id BIGINT,
user_id TEXT,
content TEXT,
metadata JSONB,
embedding VECTOR(1536),
similarity FLOAT
) LANGUAGE plpgsql AS $$
#variable_conflict use_column
BEGIN
RETURN QUERY
SELECT
id,
user_id,
content,
metadata,
embedding,
1 - (vectors.embedding <=> query_embedding) AS similarity
FROM
vectors
WHERE vectors.user_id = p_user_id
ORDER BY
vectors.embedding <=> query_embedding
LIMIT match_count;
END;
$$;
-- Create stats table
CREATE TABLE stats (
time TIMESTAMP,
chat BOOLEAN,
embedding BOOLEAN,
details TEXT,
metadata JSONB,
id INTEGER PRIMARY KEY GENERATED ALWAYS AS IDENTITY
);
-- Create summaries table
CREATE TABLE IF NOT EXISTS summaries (
id BIGSERIAL PRIMARY KEY,
document_id BIGINT REFERENCES vectors(id),
content TEXT,
metadata JSONB,
embedding VECTOR(1536)
);
-- Create function to match summaries
CREATE OR REPLACE FUNCTION match_summaries(query_embedding VECTOR(1536), match_count INT, match_threshold FLOAT)
RETURNS TABLE(
id BIGINT,
document_id BIGINT,
content TEXT,
metadata JSONB,
embedding VECTOR(1536),
similarity FLOAT
) LANGUAGE plpgsql AS $$
#variable_conflict use_column
BEGIN
RETURN QUERY
SELECT
id,
document_id,
content,
metadata,
embedding,
1 - (summaries.embedding <=> query_embedding) AS similarity
FROM
summaries
WHERE 1 - (summaries.embedding <=> query_embedding) > match_threshold
ORDER BY
summaries.embedding <=> query_embedding
LIMIT match_count;
END;
$$;

View File

@ -1,13 +0,0 @@
BEGIN;
-- Create a new column for email and copy the current user_id to it
ALTER TABLE users ADD COLUMN email text;
UPDATE users SET email = user_id;
-- Drop the current user_id column
ALTER TABLE users DROP COLUMN user_id;
-- Create a new UUID user_id column and set it as the primary key
ALTER TABLE users ADD COLUMN user_id UUID DEFAULT gen_random_uuid() PRIMARY KEY;
COMMIT;