From 719c9971f7326e1859aab8120419f888ce8fffd3 Mon Sep 17 00:00:00 2001
From: Stan Girard <girard.stanislas@gmail.com>
Date: Tue, 19 Mar 2024 16:56:45 -0700
Subject: [PATCH] feat: mistral (#2365)

# Description

Please include a summary of the changes and the related issue. Please
also include relevant motivation and context.

## Checklist before requesting a review

Please delete options that are not relevant.

- [ ] My code follows the style guidelines of this project
- [ ] I have performed a self-review of my code
- [ ] I have commented hard-to-understand areas
- [ ] I have ideally added tests that prove my fix is effective or that
my feature works
- [ ] New and existing unit tests pass locally with my changes
- [ ] Any dependent changes have been merged

## Screenshots (if appropriate):
---
 .../modules/brain/integrations/Big/Brain.py   | 22 +++++++++++++++----
 frontend/lib/helpers/defineMaxTokens.ts       |  2 +-
 frontend/lib/types/BrainConfig.ts             |  2 +-
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/backend/modules/brain/integrations/Big/Brain.py b/backend/modules/brain/integrations/Big/Brain.py
index 45492d2dd..0a9bad12b 100644
--- a/backend/modules/brain/integrations/Big/Brain.py
+++ b/backend/modules/brain/integrations/Big/Brain.py
@@ -3,7 +3,6 @@ from typing import AsyncIterable
 from uuid import UUID
 
 from langchain.chains import ConversationalRetrievalChain, LLMChain
-from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
 from langchain.chains.question_answering import load_qa_chain
 from langchain_community.chat_models import ChatLiteLLM
 from langchain_core.prompts.chat import (
@@ -36,7 +35,7 @@ class BigBrain(KnowledgeBrainQA):
 
     def get_chain(self):
         system_template = """Combine these summaries in a way that makes sense and answer the user's question.
-        Use markdown or any other techniques to display the content in a nice and aerated way.
+        Use markdown or any other techniques to display the content in a nice and aerated way. Answer in the language of the question.
         Here are user instructions on how to respond: {custom_personality}
         ______________________
         {summaries}"""
@@ -48,7 +47,7 @@ class BigBrain(KnowledgeBrainQA):
 
         ### Question prompt
         question_prompt_template = """Use the following portion of a long document to see if any of the text is relevant to answer the question. 
-        Return any relevant text verbatim.
+        Return any relevant text verbatim. Return the answer in the same language as the question. If the answer is not in the text, just say nothing in the same language as the question.
         {context}
         Question: {question}
         Relevant text, if any, else say Nothing:"""
@@ -56,11 +55,26 @@ class BigBrain(KnowledgeBrainQA):
             template=question_prompt_template, input_variables=["context", "question"]
         )
 
+        ### Condense Question Prompt
+
+        _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question in exactly the same language as the original question.
+
+        Chat History:
+        {chat_history}
+        Follow Up Input: {question}
+        Standalone question in same language as question:"""
+        CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
+
         api_base = None
         if self.brain_settings.ollama_api_base_url and self.model.startswith("ollama"):
             api_base = self.brain_settings.ollama_api_base_url
 
-        llm = ChatLiteLLM(temperature=0, model=self.model, api_base=api_base)
+        llm = ChatLiteLLM(
+            temperature=0,
+            model=self.model,
+            api_base=api_base,
+            max_tokens=self.max_tokens,
+        )
 
         retriever_doc = self.knowledge_qa.get_retriever()
 
diff --git a/frontend/lib/helpers/defineMaxTokens.ts b/frontend/lib/helpers/defineMaxTokens.ts
index 989fa4f57..58c9b820f 100644
--- a/frontend/lib/helpers/defineMaxTokens.ts
+++ b/frontend/lib/helpers/defineMaxTokens.ts
@@ -19,7 +19,7 @@ export const defineMaxTokens = (
       return 1000;
     case "mistral/mistral-medium":
       return 2000;
-    case "mistral/mistral-large":
+    case "mistral/mistral-large-latest":
       return 2000;
     default:
       return 1000;
diff --git a/frontend/lib/types/BrainConfig.ts b/frontend/lib/types/BrainConfig.ts
index 0a89991f0..d0dc11528 100644
--- a/frontend/lib/types/BrainConfig.ts
+++ b/frontend/lib/types/BrainConfig.ts
@@ -45,7 +45,7 @@ export const openAiFreeModels = [
   "gpt-3.5-turbo-0125",
   "mistral/mistral-small",
   "mistral/mistral-medium",
-  "mistral/mistral-large",
+  "mistral/mistral-large-latest",
 ] as const;
 
 export const openAiPaidModels = [...openAiFreeModels, "gpt-4"] as const;