mirror of
https://github.com/StanGirard/quivr.git
synced 2024-08-16 08:30:28 +03:00
feat(sql): merged all into one and made migration fault tolerant
This commit is contained in:
parent
583e4d6378
commit
537efc834d
12
README.md
12
README.md
@ -101,17 +101,9 @@ cp .frontend_env.example frontend/.env
|
||||
|
||||
- **Step 4**: Run the following migration scripts on the Supabase database via the web interface (SQL Editor -> `New query`)
|
||||
|
||||
[Migration Script 1](scripts/supabase_new_store_documents.sql)
|
||||
[Creation Script 1](scripts/tables.sql)
|
||||
|
||||
[Migration Script 2](scripts/supabase_usage_table.sql)
|
||||
|
||||
[Migration Script 3](scripts/supabase_vector_store_summary.sql)
|
||||
|
||||
[Migrations Script 4](scripts/supabase_users_table.sql)
|
||||
|
||||
[Migration Script 5](scripts/supabase_chats_table.sql)
|
||||
|
||||
[Migration Script 6](supabase/migrations/20230606131110_add_uuid_user_id.sql)
|
||||
> _If you come from an old version of Quivr, you can use the [migration script](scripts/20230606131110_add_uuid_user_id.sql) to migrate your data to the new version_
|
||||
|
||||
- **Step 5**: Launch the app
|
||||
|
||||
|
@ -100,11 +100,9 @@ cp .frontend_env.example frontend/.env
|
||||
|
||||
[DB Table Creation Script 1](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_new_store_documents.sql)
|
||||
|
||||
[DB Table Creation Script 2](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_usage_table.sql)
|
||||
[Creation Script 1](https://github.com/StanGirard/quivr/tree/main/scripts/tables.sql)
|
||||
|
||||
[DB Table Creation Script 3](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_vector_store_summary.sql)
|
||||
|
||||
[DB Table Creation Script 4](https://github.com/StanGirard/quivr/tree/main/scripts/supabase_users_table.sql)
|
||||
> _If you come from an old version of Quivr, you can use the [migration script](https://github.com/StanGirard/quivr/tree/main/scripts/20230606131110_add_uuid_user_id.sql) to migrate your data to the new version_
|
||||
|
||||
- **Step 5**: Launch the app
|
||||
|
||||
|
33
scripts/20230606131110_add_uuid_user_id.sql
Normal file
33
scripts/20230606131110_add_uuid_user_id.sql
Normal file
@ -0,0 +1,33 @@
|
||||
BEGIN;
|
||||
|
||||
-- Function to check if column exists in a table
|
||||
DO $$
|
||||
BEGIN
|
||||
-- Check if email column doesn't exist, then add it
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'email') THEN
|
||||
ALTER TABLE users ADD COLUMN email TEXT;
|
||||
END IF;
|
||||
|
||||
-- Copy user_id to email column only if user_id column exists
|
||||
IF EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'user_id') THEN
|
||||
UPDATE users SET email = user_id;
|
||||
END IF;
|
||||
|
||||
-- Check if user_id column exists, then drop it
|
||||
IF EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'user_id') THEN
|
||||
ALTER TABLE users DROP COLUMN user_id;
|
||||
END IF;
|
||||
|
||||
-- Check if new user_id column doesn't exist, then add it
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'user_id') THEN
|
||||
ALTER TABLE users ADD COLUMN user_id UUID DEFAULT gen_random_uuid();
|
||||
ALTER TABLE users ADD PRIMARY KEY (user_id);
|
||||
END IF;
|
||||
|
||||
EXCEPTION WHEN others THEN
|
||||
-- Exception block to catch errors
|
||||
RAISE NOTICE 'An error occurred during migration.';
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
COMMIT;
|
@ -1,7 +0,0 @@
|
||||
create table if not exists chats(
|
||||
chat_id uuid default uuid_generate_v4() primary key,
|
||||
user_id uuid references users(user_id),
|
||||
creation_time timestamp default current_timestamp,
|
||||
history jsonb,
|
||||
chat_name text
|
||||
);
|
@ -1,40 +0,0 @@
|
||||
create extension if not exists vector;
|
||||
|
||||
-- Create a table to store your documents
|
||||
create table if not exists vectors (
|
||||
id bigserial primary key,
|
||||
user_id text, -- new column added here
|
||||
content text, -- corresponds to Document.pageContent
|
||||
metadata jsonb, -- corresponds to Document.metadata
|
||||
embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION match_vectors(query_embedding vector(1536), match_count int, p_user_id text) -- user_id changed to p_user_id here
|
||||
RETURNS TABLE(
|
||||
id bigint,
|
||||
user_id text, -- new column added here
|
||||
content text,
|
||||
metadata jsonb,
|
||||
-- we return matched vectors to enable maximal marginal relevance searches
|
||||
embedding vector(1536),
|
||||
similarity float)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
# variable_conflict use_column
|
||||
BEGIN
|
||||
RETURN query
|
||||
SELECT
|
||||
id,
|
||||
user_id, -- new column added here
|
||||
content,
|
||||
metadata,
|
||||
embedding,
|
||||
1 -(vectors.embedding <=> query_embedding) AS similarity
|
||||
FROM
|
||||
vectors
|
||||
WHERE vectors.user_id = p_user_id -- filter changed here
|
||||
ORDER BY
|
||||
vectors.embedding <=> query_embedding
|
||||
LIMIT match_count;
|
||||
END;
|
||||
$$;
|
@ -1,12 +0,0 @@
|
||||
create table
|
||||
stats (
|
||||
-- A column called "time" with data type "timestamp"
|
||||
time timestamp,
|
||||
-- A column called "details" with data type "text"
|
||||
chat boolean,
|
||||
embedding boolean,
|
||||
details text,
|
||||
metadata jsonb,
|
||||
-- An "integer" primary key column called "id" that is generated always as identity
|
||||
id integer primary key generated always as identity
|
||||
);
|
@ -1,6 +0,0 @@
|
||||
create table if not exists users(
|
||||
user_id uuid,
|
||||
email text,
|
||||
date text,
|
||||
requests_count int
|
||||
);
|
@ -1,38 +0,0 @@
|
||||
-- Create a table to store your summaries
|
||||
create table if not exists summaries (
|
||||
id bigserial primary key,
|
||||
document_id bigint references vectors(id),
|
||||
content text, -- corresponds to the summarized content
|
||||
metadata jsonb, -- corresponds to Document.metadata
|
||||
embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION match_summaries(query_embedding vector(1536), match_count int, match_threshold float)
|
||||
RETURNS TABLE(
|
||||
id bigint,
|
||||
document_id bigint,
|
||||
content text,
|
||||
metadata jsonb,
|
||||
-- we return matched vectors to enable maximal marginal relevance searches
|
||||
embedding vector(1536),
|
||||
similarity float)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
# variable_conflict use_column
|
||||
BEGIN
|
||||
RETURN query
|
||||
SELECT
|
||||
id,
|
||||
document_id,
|
||||
content,
|
||||
metadata,
|
||||
embedding,
|
||||
1 -(summaries.embedding <=> query_embedding) AS similarity
|
||||
FROM
|
||||
summaries
|
||||
WHERE 1 - (summaries.embedding <=> query_embedding) > match_threshold
|
||||
ORDER BY
|
||||
summaries.embedding <=> query_embedding
|
||||
LIMIT match_count;
|
||||
END;
|
||||
$$;
|
105
scripts/tables.sql
Normal file
105
scripts/tables.sql
Normal file
@ -0,0 +1,105 @@
|
||||
-- Create users table
|
||||
CREATE TABLE IF NOT EXISTS users(
|
||||
user_id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
|
||||
email TEXT,
|
||||
date TEXT,
|
||||
requests_count INT
|
||||
);
|
||||
|
||||
-- Create chats table
|
||||
CREATE TABLE IF NOT EXISTS chats(
|
||||
chat_id UUID DEFAULT uuid_generate_v4() PRIMARY KEY,
|
||||
user_id UUID REFERENCES users(user_id),
|
||||
creation_time TIMESTAMP DEFAULT current_timestamp,
|
||||
history JSONB,
|
||||
chat_name TEXT
|
||||
);
|
||||
|
||||
-- Create vector extension
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
|
||||
-- Create vectors table
|
||||
CREATE TABLE IF NOT EXISTS vectors (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
user_id TEXT,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
embedding VECTOR(1536)
|
||||
);
|
||||
|
||||
-- Create function to match vectors
|
||||
CREATE OR REPLACE FUNCTION match_vectors(query_embedding VECTOR(1536), match_count INT, p_user_id TEXT)
|
||||
RETURNS TABLE(
|
||||
id BIGINT,
|
||||
user_id TEXT,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
embedding VECTOR(1536),
|
||||
similarity FLOAT
|
||||
) LANGUAGE plpgsql AS $$
|
||||
#variable_conflict use_column
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
id,
|
||||
user_id,
|
||||
content,
|
||||
metadata,
|
||||
embedding,
|
||||
1 - (vectors.embedding <=> query_embedding) AS similarity
|
||||
FROM
|
||||
vectors
|
||||
WHERE vectors.user_id = p_user_id
|
||||
ORDER BY
|
||||
vectors.embedding <=> query_embedding
|
||||
LIMIT match_count;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- Create stats table
|
||||
CREATE TABLE stats (
|
||||
time TIMESTAMP,
|
||||
chat BOOLEAN,
|
||||
embedding BOOLEAN,
|
||||
details TEXT,
|
||||
metadata JSONB,
|
||||
id INTEGER PRIMARY KEY GENERATED ALWAYS AS IDENTITY
|
||||
);
|
||||
|
||||
-- Create summaries table
|
||||
CREATE TABLE IF NOT EXISTS summaries (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
document_id BIGINT REFERENCES vectors(id),
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
embedding VECTOR(1536)
|
||||
);
|
||||
|
||||
-- Create function to match summaries
|
||||
CREATE OR REPLACE FUNCTION match_summaries(query_embedding VECTOR(1536), match_count INT, match_threshold FLOAT)
|
||||
RETURNS TABLE(
|
||||
id BIGINT,
|
||||
document_id BIGINT,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
embedding VECTOR(1536),
|
||||
similarity FLOAT
|
||||
) LANGUAGE plpgsql AS $$
|
||||
#variable_conflict use_column
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
id,
|
||||
document_id,
|
||||
content,
|
||||
metadata,
|
||||
embedding,
|
||||
1 - (summaries.embedding <=> query_embedding) AS similarity
|
||||
FROM
|
||||
summaries
|
||||
WHERE 1 - (summaries.embedding <=> query_embedding) > match_threshold
|
||||
ORDER BY
|
||||
summaries.embedding <=> query_embedding
|
||||
LIMIT match_count;
|
||||
END;
|
||||
$$;
|
@ -1,13 +0,0 @@
|
||||
BEGIN;
|
||||
|
||||
-- Create a new column for email and copy the current user_id to it
|
||||
ALTER TABLE users ADD COLUMN email text;
|
||||
UPDATE users SET email = user_id;
|
||||
|
||||
-- Drop the current user_id column
|
||||
ALTER TABLE users DROP COLUMN user_id;
|
||||
|
||||
-- Create a new UUID user_id column and set it as the primary key
|
||||
ALTER TABLE users ADD COLUMN user_id UUID DEFAULT gen_random_uuid() PRIMARY KEY;
|
||||
|
||||
COMMIT;
|
Loading…
Reference in New Issue
Block a user