quivr/backend/supabase/migrations/20240320215813_fix_match_vector_function.sql

50 lines
1.8 KiB
PL/PgSQL

set check_function_bodies = off;
CREATE OR REPLACE FUNCTION public.match_vectors(query_embedding vector, p_brain_id uuid, max_chunk_sum integer)
RETURNS TABLE(id uuid, brain_id uuid, content text, metadata jsonb, embedding vector, similarity double precision)
LANGUAGE plpgsql
AS $function$
BEGIN
RETURN QUERY
WITH ranked_vectors AS (
SELECT
v.id AS vector_id, -- Explicitly qualified
bv.brain_id AS vector_brain_id, -- Explicitly qualified and aliased
v.content AS vector_content, -- Explicitly qualified and aliased
v.metadata AS vector_metadata, -- Explicitly qualified and aliased
v.embedding AS vector_embedding, -- Explicitly qualified and aliased
1 - (v.embedding <=> query_embedding) AS calculated_similarity, -- Calculated and aliased
(v.metadata->>'chunk_size')::integer AS chunk_size -- Explicitly qualified
FROM
vectors v
INNER JOIN
brains_vectors bv ON v.id = bv.vector_id
WHERE
bv.brain_id = p_brain_id
ORDER BY
calculated_similarity -- Aliased similarity
), filtered_vectors AS (
SELECT
vector_id,
vector_brain_id,
vector_content,
vector_metadata,
vector_embedding,
calculated_similarity,
chunk_size,
sum(chunk_size) OVER (ORDER BY calculated_similarity DESC) AS running_total
FROM ranked_vectors
)
SELECT
vector_id AS id,
vector_brain_id AS brain_id,
vector_content AS content,
vector_metadata AS metadata,
vector_embedding AS embedding,
calculated_similarity AS similarity
FROM filtered_vectors
WHERE running_total <= max_chunk_sum;
END;
$function$
;