quivr/backend/supabase/migrations/20240207071108_chunk.sql
AmineDiro ca93cb9062
refacto(backend): poetry package manager and chat route refactoring (#2684)
# Description
- Added package manager
- Added precommit checks
- Rewrote dependency injection of Services and Repositories
- Integrate async SQL alchemy engine
- Migrate Chat  repository to SQLModel 
- Migrated ChatHistory repository to SQLModel
- User SQLModel
- Unit test methodology with db rollback
- Unit tests ChatRepository
- Test ChatService get_history
- Brain entity SQL Model
- Promp SQLModel
- Rewrite chat/{chat_id}/question route
- updated docker files and docker compose in dev and production

Added `quivr_core` subpackages:
- Refactored KnowledgebrainQa
- Added Rag service to interface with non-rag dependencies

---------

Co-authored-by: aminediro <aminediro@github.com>
2024-06-26 00:58:55 -07:00

60 lines
2.3 KiB
PL/PgSQL

alter table "public"."brains_vectors" drop constraint "brains_vectors_vector_id_fkey";
drop function if exists "public"."match_vectors"(query_embedding vector, match_count integer, p_brain_id uuid);
CREATE INDEX vectors_metadata_idx ON public.vectors USING gin (metadata);
alter table "public"."brains_vectors" add constraint "brains_vectors_vector_id_fkey" FOREIGN KEY (vector_id) REFERENCES vectors(id) ON UPDATE CASCADE ON DELETE CASCADE not valid;
alter table "public"."brains_vectors" validate constraint "brains_vectors_vector_id_fkey";
set check_function_bodies = off;
CREATE OR REPLACE FUNCTION public.match_vectors(query_embedding vector, p_brain_id uuid, max_chunk_sum integer)
RETURNS TABLE(id uuid, brain_id uuid, content text, metadata jsonb, embedding vector, similarity double precision)
LANGUAGE plpgsql
AS $function$
BEGIN
RETURN QUERY
WITH ranked_vectors AS (
SELECT
v.id AS vector_id, -- Explicitly qualified
bv.brain_id AS vector_brain_id, -- Explicitly qualified and aliased
v.content AS vector_content, -- Explicitly qualified and aliased
v.metadata AS vector_metadata, -- Explicitly qualified and aliased
v.embedding AS vector_embedding, -- Explicitly qualified and aliased
1 - (v.embedding <=> query_embedding) AS calculated_similarity, -- Calculated and aliased
(v.metadata->>'chunk_size')::integer AS chunk_size -- Explicitly qualified
FROM
vectors v
INNER JOIN
brains_vectors bv ON v.id = bv.vector_id
WHERE
bv.brain_id = p_brain_id
ORDER BY
calculated_similarity -- Aliased similarity
), filtered_vectors AS (
SELECT
vector_id,
vector_brain_id,
vector_content,
vector_metadata,
vector_embedding,
calculated_similarity,
chunk_size,
sum(chunk_size) OVER (ORDER BY calculated_similarity) AS running_total
FROM ranked_vectors
)
SELECT
vector_id AS id,
vector_brain_id AS brain_id,
vector_content AS content,
vector_metadata AS metadata,
vector_embedding AS embedding,
calculated_similarity AS similarity
FROM filtered_vectors
WHERE running_total <= max_chunk_sum;
END;
$function$
;