mirror of
https://github.com/QuivrHQ/quivr.git
synced 2024-12-16 18:52:12 +03:00
ca93cb9062
# Description - Added package manager - Added precommit checks - Rewrote dependency injection of Services and Repositories - Integrate async SQL alchemy engine - Migrate Chat repository to SQLModel - Migrated ChatHistory repository to SQLModel - User SQLModel - Unit test methodology with db rollback - Unit tests ChatRepository - Test ChatService get_history - Brain entity SQL Model - Promp SQLModel - Rewrite chat/{chat_id}/question route - updated docker files and docker compose in dev and production Added `quivr_core` subpackages: - Refactored KnowledgebrainQa - Added Rag service to interface with non-rag dependencies --------- Co-authored-by: aminediro <aminediro@github.com>
51 lines
1.8 KiB
PL/PgSQL
51 lines
1.8 KiB
PL/PgSQL
set check_function_bodies = off;
|
|
|
|
|
|
CREATE OR REPLACE FUNCTION public.match_vectors(query_embedding vector, p_brain_id uuid, max_chunk_sum integer)
|
|
RETURNS TABLE(id uuid, brain_id uuid, content text, metadata jsonb, embedding vector, similarity double precision)
|
|
LANGUAGE plpgsql
|
|
AS $function$
|
|
BEGIN
|
|
RETURN QUERY
|
|
WITH ranked_vectors AS (
|
|
SELECT
|
|
v.id AS vector_id, -- Explicitly qualified
|
|
bv.brain_id AS vector_brain_id, -- Explicitly qualified and aliased
|
|
v.content AS vector_content, -- Explicitly qualified and aliased
|
|
v.metadata AS vector_metadata, -- Explicitly qualified and aliased
|
|
v.embedding AS vector_embedding, -- Explicitly qualified and aliased
|
|
1 - (v.embedding <=> query_embedding) AS calculated_similarity, -- Calculated and aliased
|
|
(v.metadata->>'chunk_size')::integer AS chunk_size -- Explicitly qualified
|
|
FROM
|
|
vectors v
|
|
INNER JOIN
|
|
brains_vectors bv ON v.id = bv.vector_id
|
|
WHERE
|
|
bv.brain_id = p_brain_id
|
|
ORDER BY
|
|
calculated_similarity -- Aliased similarity
|
|
), filtered_vectors AS (
|
|
SELECT
|
|
vector_id,
|
|
vector_brain_id,
|
|
vector_content,
|
|
vector_metadata,
|
|
vector_embedding,
|
|
calculated_similarity,
|
|
chunk_size,
|
|
sum(chunk_size) OVER (ORDER BY calculated_similarity DESC) AS running_total
|
|
FROM ranked_vectors
|
|
)
|
|
SELECT
|
|
vector_id AS id,
|
|
vector_brain_id AS brain_id,
|
|
vector_content AS content,
|
|
vector_metadata AS metadata,
|
|
vector_embedding AS embedding,
|
|
calculated_similarity AS similarity
|
|
FROM filtered_vectors
|
|
WHERE running_total <= max_chunk_sum;
|
|
END;
|
|
$function$
|
|
;
|