From fbddb70f817b37f59b4294169da840d8ae3a64f2 Mon Sep 17 00:00:00 2001 From: Stan Girard Date: Sun, 21 Apr 2024 14:50:44 -0700 Subject: [PATCH] fix(sources): Remove duplicate sources and add metadata to model response (#2462) This pull request fixes the issue of duplicate sources in the model response and adds metadata to the response. It removes duplicate sources with the same name and creates a list of unique sources. Additionally, it includes the generated URLs and sources in the metadata of the model response. --- backend/modules/brain/knowledge_brain_qa.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/backend/modules/brain/knowledge_brain_qa.py b/backend/modules/brain/knowledge_brain_qa.py index 2c9be2b5a..e0eae4d01 100644 --- a/backend/modules/brain/knowledge_brain_qa.py +++ b/backend/modules/brain/knowledge_brain_qa.py @@ -50,6 +50,11 @@ def generate_source(source_documents, brain_id): # Initialize a dictionary for storing generated URLs generated_urls = {} + # remove duplicate sources with same name and create a list of unique sources + source_documents = list( + {v.metadata["file_name"]: v for v in source_documents}.values() + ) + # Get source documents from the result, default to an empty list if not found # If source documents exist @@ -242,6 +247,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface): transformed_history, streamed_chat_history = ( self.initialize_streamed_chat_history(chat_id, question) ) + metadata = self.metadata or {} model_response = conversational_qa_chain.invoke( { "question": question.question, @@ -252,6 +258,11 @@ class KnowledgeBrainQA(BaseModel, QAInterface): } ) + sources = model_response["docs"] or [] + if len(sources) > 0: + sources_list = generate_source(sources, self.brain_id) + metadata["sources"] = sources_list + answer = model_response["answer"].content if save_answer: @@ -280,6 +291,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface): "brain_name": self.brain.name if self.brain else None, "message_id": new_chat.message_id, "brain_id": str(self.brain.brain_id) if self.brain else None, + "metadata": metadata, } ) @@ -295,6 +307,7 @@ class KnowledgeBrainQA(BaseModel, QAInterface): "brain_name": None, "message_id": None, "brain_id": str(self.brain.brain_id) if self.brain else None, + "metadata": metadata, } )