From cdf587cfde934cec373f1172527e46b9b1c45134 Mon Sep 17 00:00:00 2001 From: "Gustavo J. Maciel" <40073861+B0rrA@users.noreply.github.com> Date: Fri, 15 Sep 2023 12:51:53 -0400 Subject: [PATCH] fix(qa_base): asign max_token to llm (#1179) --- backend/llm/qa_base.py | 7 ++++--- backend/routes/chat_routes.py | 8 ++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/backend/llm/qa_base.py b/backend/llm/qa_base.py index 117d9a25b..7669d3e0e 100644 --- a/backend/llm/qa_base.py +++ b/backend/llm/qa_base.py @@ -94,7 +94,7 @@ class QABaseBrainPicking(BaseBrainPicking): ) def _create_llm( - self, model, temperature=0, streaming=False, callbacks=None + self, model, temperature=0, streaming=False, callbacks=None, max_tokens=256 ) -> BaseLLM: """ Determine the language model to be used. @@ -105,11 +105,12 @@ class QABaseBrainPicking(BaseBrainPicking): """ return ChatLiteLLM( temperature=temperature, + max_tokens=max_tokens, model=model, streaming=streaming, verbose=False, callbacks=callbacks, - openai_api_key=self.openai_api_key, + openai_api_key=self.openai_api_key ) # pyright: ignore reportPrivateUsage=none def _create_prompt_template(self): @@ -211,7 +212,7 @@ class QABaseBrainPicking(BaseBrainPicking): self.callbacks = [callback] answering_llm = self._create_llm( - model=self.model, streaming=True, callbacks=self.callbacks + model=self.model, streaming=True, callbacks=self.callbacks, max_tokens=self.max_tokens ) # The Chain that generates the answer to the question diff --git a/backend/routes/chat_routes.py b/backend/routes/chat_routes.py index 77572dbe0..8a2e128c0 100644 --- a/backend/routes/chat_routes.py +++ b/backend/routes/chat_routes.py @@ -298,10 +298,10 @@ async def create_stream_question_handler( else "gpt-3.5-turbo", # type: ignore max_tokens=(brain_details or chat_question).max_tokens if current_user.openai_api_key - else 0, # type: ignore + else 256, # type: ignore temperature=(brain_details or chat_question).temperature if current_user.openai_api_key - else 256, # type: ignore + else 0, # type: ignore brain_id=str(brain_id), user_openai_api_key=current_user.openai_api_key, # pyright: ignore reportPrivateUsage=none streaming=True, @@ -314,10 +314,10 @@ async def create_stream_question_handler( else "gpt-3.5-turbo", temperature=chat_question.temperature if current_user.openai_api_key - else 256, + else 0, max_tokens=chat_question.max_tokens if current_user.openai_api_key - else 0, + else 256, user_openai_api_key=current_user.openai_api_key, # pyright: ignore reportPrivateUsage=none chat_id=str(chat_id), streaming=True,