diff --git a/backend/modules/brain/controller/brain_routes.py b/backend/modules/brain/controller/brain_routes.py index f15635501..e6897ead6 100644 --- a/backend/modules/brain/controller/brain_routes.py +++ b/backend/modules/brain/controller/brain_routes.py @@ -220,12 +220,14 @@ async def set_brain_as_default( @brain_router.post( - "/brains/{brain_id}/question_context", + "/brains/{brain_id}/documents", dependencies=[Depends(AuthBearer()), Depends(has_brain_authorization())], tags=["Brain"], ) -async def get_question_context_for_brain(brain_id: UUID, request: BrainQuestionRequest): +async def get_question_context_for_brain( + brain_id: UUID, question: BrainQuestionRequest +): # TODO: Move this endpoint to AnswerGenerator service """Retrieve the question context from a specific brain.""" - context = get_question_context_from_brain(brain_id, request.question) - return {"context": context} + context = get_question_context_from_brain(brain_id, question.question) + return {"docs": context} diff --git a/backend/repository/brain/get_question_context_from_brain.py b/backend/repository/brain/get_question_context_from_brain.py index d52fa7204..dde055196 100644 --- a/backend/repository/brain/get_question_context_from_brain.py +++ b/backend/repository/brain/get_question_context_from_brain.py @@ -1,5 +1,6 @@ from uuid import UUID +from attr import dataclass from logger import get_logger from models.settings import get_embeddings, get_supabase_client from vectorstore.supabase import CustomSupabaseVectorStore @@ -7,6 +8,16 @@ from vectorstore.supabase import CustomSupabaseVectorStore logger = get_logger(__name__) +@dataclass +class DocumentAnswer: + file_name: str + file_sha1: str + file_size: int + file_url: str = "" + file_id: str = "" + file_similarity: float = 0.0 + + def get_question_context_from_brain(brain_id: UUID, question: str) -> str: # TODO: Move to AnswerGenerator service supabase_client = get_supabase_client() @@ -18,16 +29,22 @@ def get_question_context_from_brain(brain_id: UUID, question: str) -> str: table_name="vectors", brain_id=str(brain_id), ) - documents = vector_store.similarity_search(question) - ## I can't pass more than 2500 tokens to as return value in my array. So i need to remove the docs after i reach 2000 tokens. A token equals 1.5 characters. So 2000 tokens is 3000 characters. - tokens = 0 - for doc in documents: - tokens += len(doc.page_content) * 1.5 - if tokens > 3000: - documents.remove(doc) - logger.info("documents", documents) - logger.info("tokens", tokens) - logger.info("🔥🔥🔥🔥🔥🔥") + documents = vector_store.similarity_search(question, k=20, threshold=0.8) - # aggregate all the documents into one string - return "\n".join([doc.page_content for doc in documents]) + ## Create a list of DocumentAnswer objects from the documents but with no duplicates file_sha1 + answers = [] + file_sha1s = [] + for document in documents: + if document.metadata["file_sha1"] not in file_sha1s: + file_sha1s.append(document.metadata["file_sha1"]) + answers.append( + DocumentAnswer( + file_name=document.metadata["file_name"], + file_sha1=document.metadata["file_sha1"], + file_size=document.metadata["file_size"], + file_id=document.metadata["id"], + file_similarity=document.metadata["similarity"], + ) + ) + + return answers diff --git a/backend/vectorstore/supabase.py b/backend/vectorstore/supabase.py index daf3382ef..91c20961f 100644 --- a/backend/vectorstore/supabase.py +++ b/backend/vectorstore/supabase.py @@ -43,7 +43,11 @@ class CustomSupabaseVectorStore(SupabaseVectorStore): match_result = [ ( Document( - metadata=search.get("metadata", {}), # type: ignore + metadata={ + **search.get("metadata", {}), + "id": search.get("id", ""), + "similarity": search.get("similarity", 0.0), + }, page_content=search.get("content", ""), ), search.get("similarity", 0.0), diff --git a/frontend/lib/api/brain/brain.ts b/frontend/lib/api/brain/brain.ts index 8e25bf4f5..3c6c7ce09 100644 --- a/frontend/lib/api/brain/brain.ts +++ b/frontend/lib/api/brain/brain.ts @@ -139,3 +139,14 @@ export const updateBrainSecrets = async ( ): Promise => { await axiosInstance.put(`/brains/${brainId}/secrets-values`, secrets); }; + +export const getDocsFromQuestion = async ( + brainId: string, + question: string, + axiosInstance: AxiosInstance +): Promise => { + return (await axiosInstance.post>(`/brains/${brainId}/documents`, { + question, + })).data.docs; + } + diff --git a/frontend/lib/api/brain/useBrainApi.ts b/frontend/lib/api/brain/useBrainApi.ts index 770606cd4..0951d21a2 100644 --- a/frontend/lib/api/brain/useBrainApi.ts +++ b/frontend/lib/api/brain/useBrainApi.ts @@ -8,6 +8,7 @@ import { getBrains, getBrainUsers, getDefaultBrain, + getDocsFromQuestion, getPublicBrains, setAsDefaultBrain, Subscription, @@ -48,6 +49,8 @@ export const useBrainApi = () => { updateBrain: async (brainId: string, brain: UpdateBrainInput) => updateBrain(brainId, brain, axiosInstance), getPublicBrains: async () => getPublicBrains(axiosInstance), + getDocsFromQuestion: async (brainId: string, question: string) => + getDocsFromQuestion(brainId, question, axiosInstance), updateBrainSecrets: async ( brainId: string, secrets: Record