fix(qa_base): asign max_token to llm (#1179)

2024-12-24 03:41:56 +03:00 · 2023-09-15 12:51:53 -04:00 · 2023-09-15 12:51:53 -04:00 · cdf587cfde
commit cdf587cfde
parent 980a704002
2 changed files with 8 additions and 7 deletions
--- a/backend/llm/qa_base.py
+++ b/backend/llm/qa_base.py
@ -94,7 +94,7 @@ class QABaseBrainPicking(BaseBrainPicking):
        )

    def _create_llm(
-        self, model, temperature=0, streaming=False, callbacks=None
+        self, model, temperature=0, streaming=False, callbacks=None, max_tokens=256
    ) -> BaseLLM:
        """
        Determine the language model to be used.
@ -105,11 +105,12 @@ class QABaseBrainPicking(BaseBrainPicking):
        """
        return ChatLiteLLM(
            temperature=temperature,
+            max_tokens=max_tokens,
            model=model,
            streaming=streaming,
            verbose=False,
            callbacks=callbacks,
-            openai_api_key=self.openai_api_key,
+            openai_api_key=self.openai_api_key
        )  # pyright: ignore reportPrivateUsage=none

    def _create_prompt_template(self):
@ -211,7 +212,7 @@ class QABaseBrainPicking(BaseBrainPicking):
        self.callbacks = [callback]

        answering_llm = self._create_llm(
-            model=self.model, streaming=True, callbacks=self.callbacks
+            model=self.model, streaming=True, callbacks=self.callbacks, max_tokens=self.max_tokens
        )

        # The Chain that generates the answer to the question
--- a/backend/routes/chat_routes.py
+++ b/backend/routes/chat_routes.py
@ -298,10 +298,10 @@ async def create_stream_question_handler(
                else "gpt-3.5-turbo",  # type: ignore
                max_tokens=(brain_details or chat_question).max_tokens
                if current_user.openai_api_key
-                else 0,  # type: ignore
+                else 256,  # type: ignore
                temperature=(brain_details or chat_question).temperature
                if current_user.openai_api_key
-                else 256,  # type: ignore
+                else 0,  # type: ignore
                brain_id=str(brain_id),
                user_openai_api_key=current_user.openai_api_key,  # pyright: ignore reportPrivateUsage=none
                streaming=True,
@ -314,10 +314,10 @@ async def create_stream_question_handler(
                else "gpt-3.5-turbo",
                temperature=chat_question.temperature
                if current_user.openai_api_key
-                else 256,
+                else 0,
                max_tokens=chat_question.max_tokens
                if current_user.openai_api_key
-                else 0,
+                else 256,
                user_openai_api_key=current_user.openai_api_key,  # pyright: ignore reportPrivateUsage=none
                chat_id=str(chat_id),
                streaming=True,