From 874c21f7e49accd5412c83b76072ea39f952daeb Mon Sep 17 00:00:00 2001 From: Stan Girard Date: Mon, 19 Feb 2024 08:12:33 -0800 Subject: [PATCH] Revert "feat: Add pricing calculation method to GPT4Brain class and update user usage in chat controller" (#2215) Reverts QuivrHQ/quivr#2210 --- .../modules/brain/integrations/GPT4/Brain.py | 3 - backend/modules/brain/knowledge_brain_qa.py | 59 ++----------------- backend/modules/brain/qa_interface.py | 6 -- .../chat/controller/chat/brainful_chat.py | 16 +++-- .../chat/controller/chat/test_utils.py | 10 ++-- backend/modules/chat/controller/chat/utils.py | 51 ++++++++++------ .../modules/chat/controller/chat_routes.py | 36 +++++++++-- 7 files changed, 87 insertions(+), 94 deletions(-) diff --git a/backend/modules/brain/integrations/GPT4/Brain.py b/backend/modules/brain/integrations/GPT4/Brain.py index dea230129..505b4d4f9 100644 --- a/backend/modules/brain/integrations/GPT4/Brain.py +++ b/backend/modules/brain/integrations/GPT4/Brain.py @@ -24,9 +24,6 @@ class GPT4Brain(KnowledgeBrainQA): **kwargs, ) - def calculate_pricing(self): - return 3 - def get_chain(self): prompt = ChatPromptTemplate.from_messages( diff --git a/backend/modules/brain/knowledge_brain_qa.py b/backend/modules/brain/knowledge_brain_qa.py index 612b15e57..f710c0c1c 100644 --- a/backend/modules/brain/knowledge_brain_qa.py +++ b/backend/modules/brain/knowledge_brain_qa.py @@ -8,15 +8,10 @@ from llm.utils.get_prompt_to_use import get_prompt_to_use from llm.utils.get_prompt_to_use_id import get_prompt_to_use_id from logger import get_logger from models import BrainSettings -from models.user_usage import UserUsage from modules.brain.qa_interface import QAInterface from modules.brain.rags.quivr_rag import QuivrRAG from modules.brain.rags.rag_interface import RAGInterface from modules.brain.service.brain_service import BrainService -from modules.chat.controller.chat.utils import ( - find_model_and_generate_metadata, - update_user_usage, -) from modules.chat.dto.chats import ChatQuestion, Sources from modules.chat.dto.inputs import CreateChatHistory from modules.chat.dto.outputs import GetChatHistoryOutput @@ -129,12 +124,8 @@ class KnowledgeBrainQA(BaseModel, QAInterface): max_input: int = 2000 streaming: bool = False knowledge_qa: Optional[RAGInterface] = None - user_id: str = None - user_email: str = None - user_usage: Optional[UserUsage] = None - user_settings: Optional[dict] = None - models_settings: Optional[List[dict]] = None metadata: Optional[dict] = None + user_id: str = None callbacks: List[AsyncIteratorCallbackHandler] = ( None # pyright: ignore reportPrivateUsage=none @@ -147,12 +138,11 @@ class KnowledgeBrainQA(BaseModel, QAInterface): model: str, brain_id: str, chat_id: str, + max_tokens: int, streaming: bool = False, prompt_id: Optional[UUID] = None, metadata: Optional[dict] = None, user_id: str = None, - user_email: str = None, - cost: int = 100, **kwargs, ): super().__init__( @@ -170,17 +160,9 @@ class KnowledgeBrainQA(BaseModel, QAInterface): streaming=streaming, **kwargs, ) + self.metadata = metadata + self.max_tokens = max_tokens self.user_id = user_id - self.user_email = user_email - self.user_usage = UserUsage( - id=user_id, - email=user_email, - ) - self.user_settings = self.user_usage.get_user_settings() - - # Get Model settings for the user - self.models_settings = self.user_usage.get_model_settings() - self.increase_usage_user() @property def prompt_to_use(self): @@ -197,39 +179,6 @@ class KnowledgeBrainQA(BaseModel, QAInterface): else: return None - def increase_usage_user(self): - # Raises an error if the user has consumed all of of his credits - - update_user_usage( - usage=self.user_usage, - user_settings=self.user_settings, - cost=self.calculate_pricing(), - ) - - def calculate_pricing(self): - - logger.info("Calculating pricing") - logger.info(f"Model: {self.model}") - logger.info(f"User settings: {self.user_settings}") - logger.info(f"Models settings: {self.models_settings}") - model_to_use = find_model_and_generate_metadata( - self.chat_id, - self.model, - self.user_settings, - self.models_settings, - ) - - self.model = model_to_use.name - self.max_input = model_to_use.max_input - self.max_tokens = model_to_use.max_output - user_choosen_model_price = 1000 - - for model_setting in self.models_settings: - if model_setting["name"] == self.model: - user_choosen_model_price = model_setting["price"] - - return user_choosen_model_price - def generate_answer( self, chat_id: UUID, question: ChatQuestion, save_answer: bool = True ) -> GetChatHistoryOutput: diff --git a/backend/modules/brain/qa_interface.py b/backend/modules/brain/qa_interface.py index 69b9f2c91..a4c18fcde 100644 --- a/backend/modules/brain/qa_interface.py +++ b/backend/modules/brain/qa_interface.py @@ -10,12 +10,6 @@ class QAInterface(ABC): This can be used to implement custom answer generation logic. """ - @abstractmethod - def calculate_pricing(self): - raise NotImplementedError( - "calculate_pricing is an abstract method and must be implemented" - ) - @abstractmethod def generate_answer( self, diff --git a/backend/modules/chat/controller/chat/brainful_chat.py b/backend/modules/chat/controller/chat/brainful_chat.py index a1984585c..58b536482 100644 --- a/backend/modules/chat/controller/chat/brainful_chat.py +++ b/backend/modules/chat/controller/chat/brainful_chat.py @@ -59,22 +59,26 @@ class BrainfulChat(ChatInterface): brain, chat_id, model, + max_tokens, + max_input, temperature, streaming, prompt_id, user_id, - user_email, + metadata, ): if brain and brain.brain_type == BrainType.DOC: return KnowledgeBrainQA( chat_id=chat_id, model=model, + max_tokens=max_tokens, + max_input=max_input, temperature=temperature, brain_id=str(brain.brain_id), streaming=streaming, prompt_id=prompt_id, + metadata=metadata, user_id=user_id, - user_email=user_email, ) if brain.brain_type == BrainType.API: @@ -84,16 +88,18 @@ class BrainfulChat(ChatInterface): return APIBrainQA( chat_id=chat_id, model=model, + max_tokens=max_tokens, + max_input=max_input, temperature=temperature, brain_id=str(brain.brain_id), streaming=streaming, prompt_id=prompt_id, user_id=user_id, + metadata=metadata, raw=(brain_definition.raw if brain_definition else None), jq_instructions=( brain_definition.jq_instructions if brain_definition else None ), - user_email=user_email, ) if brain.brain_type == BrainType.INTEGRATION: integration_brain = integration_brain_description_service.get_integration_description_by_user_brain_id( @@ -107,10 +113,12 @@ class BrainfulChat(ChatInterface): return integration_class( chat_id=chat_id, model=model, + max_tokens=max_tokens, + max_input=max_input, temperature=temperature, brain_id=str(brain.brain_id), streaming=streaming, prompt_id=prompt_id, + metadata=metadata, user_id=user_id, - user_email=user_email, ) diff --git a/backend/modules/chat/controller/chat/test_utils.py b/backend/modules/chat/controller/chat/test_utils.py index beb66cb50..abbf24e57 100644 --- a/backend/modules/chat/controller/chat/test_utils.py +++ b/backend/modules/chat/controller/chat/test_utils.py @@ -7,8 +7,8 @@ import pytest from fastapi import HTTPException from models.databases.entity import LLMModels from modules.chat.controller.chat.utils import ( + check_user_requests_limit, find_model_and_generate_metadata, - update_user_usage, ) @@ -76,7 +76,7 @@ def test_find_model_and_generate_metadata_user_not_allowed(mock_chat_service): @patch("modules.chat.controller.chat.utils.time") -def test_check_update_user_usage_within_limit(mock_time): +def test_check_user_requests_limit_within_limit(mock_time): mock_time.strftime.return_value = "20220101" usage = Mock() usage.get_user_monthly_usage.return_value = 50 @@ -84,13 +84,13 @@ def test_check_update_user_usage_within_limit(mock_time): models_settings = [{"name": "gpt-3.5-turbo", "price": 10}] model_name = "gpt-3.5-turbo" - update_user_usage(usage, user_settings, models_settings, model_name) + check_user_requests_limit(usage, user_settings, models_settings, model_name) usage.handle_increment_user_request_count.assert_called_once_with("20220101", 10) @patch("modules.chat.controller.chat.utils.time") -def test_update_user_usage_exceeds_limit(mock_time): +def test_check_user_requests_limit_exceeds_limit(mock_time): mock_time.strftime.return_value = "20220101" usage = Mock() usage.get_user_monthly_usage.return_value = 100 @@ -99,7 +99,7 @@ def test_update_user_usage_exceeds_limit(mock_time): model_name = "gpt-3.5-turbo" with pytest.raises(HTTPException) as exc_info: - update_user_usage(usage, user_settings, models_settings, model_name) + check_user_requests_limit(usage, user_settings, models_settings, model_name) assert exc_info.value.status_code == 429 assert ( diff --git a/backend/modules/chat/controller/chat/utils.py b/backend/modules/chat/controller/chat/utils.py index 7fe8eba81..a38f6950e 100644 --- a/backend/modules/chat/controller/chat/utils.py +++ b/backend/modules/chat/controller/chat/utils.py @@ -31,39 +31,44 @@ class NullableUUID(UUID): def find_model_and_generate_metadata( chat_id: UUID, - brain_model: str, + brain, user_settings, models_settings, + metadata_brain, ): - + # Add metadata_brain to metadata + metadata = {} + metadata = {**metadata, **metadata_brain} + follow_up_questions = chat_service.get_follow_up_question(chat_id) + metadata["follow_up_questions"] = follow_up_questions # Default model is gpt-3.5-turbo-0125 default_model = "gpt-3.5-turbo-0125" model_to_use = LLMModels( # TODO Implement default models in database name=default_model, price=1, max_input=4000, max_output=1000 ) - logger.debug("Brain model: %s", brain_model) + logger.info("Brain model: %s", brain.model) # If brain.model is None, set it to the default_model - if brain_model is None: - brain_model = default_model + if brain.model is None: + brain.model = default_model is_brain_model_available = any( - brain_model == model_dict.get("name") for model_dict in models_settings + brain.model == model_dict.get("name") for model_dict in models_settings ) - is_user_allowed_model = brain_model in user_settings.get( + is_user_allowed_model = brain.model in user_settings.get( "models", [default_model] ) # Checks if the model is available in the list of models - logger.debug(f"Brain model: {brain_model}") - logger.debug(f"User models: {user_settings.get('models', [])}") - logger.debug(f"Model available: {is_brain_model_available}") - logger.debug(f"User allowed model: {is_user_allowed_model}") + logger.info(f"Brain model: {brain.model}") + logger.info(f"User models: {user_settings.get('models', [])}") + logger.info(f"Model available: {is_brain_model_available}") + logger.info(f"User allowed model: {is_user_allowed_model}") if is_brain_model_available and is_user_allowed_model: # Use the model from the brain - model_to_use.name = brain_model + model_to_use.name = brain.model for model_dict in models_settings: if model_dict.get("name") == model_to_use.name: model_to_use.price = model_dict.get("price") @@ -71,12 +76,19 @@ def find_model_and_generate_metadata( model_to_use.max_output = model_dict.get("max_output") break + metadata["model"] = model_to_use.name + metadata["max_tokens"] = model_to_use.max_output + metadata["max_input"] = model_to_use.max_input + logger.info(f"Model to use: {model_to_use}") + logger.info(f"Metadata: {metadata}") - return model_to_use + return model_to_use, metadata -def update_user_usage(usage: UserUsage, user_settings, cost: int = 100): +def check_user_requests_limit( + usage: UserUsage, user_settings, models_settings, model_name: str +): """Checks the user requests limit. It checks the user requests limit and raises an exception if the user has reached the limit. By default, the user has a limit of 100 requests per month. The limit can be increased by upgrading the plan. @@ -93,13 +105,18 @@ def update_user_usage(usage: UserUsage, user_settings, cost: int = 100): date = time.strftime("%Y%m%d") monthly_chat_credit = user_settings.get("monthly_chat_credit", 100) - montly_usage = usage.get_user_monthly_usage(date) + daily_user_count = usage.get_user_monthly_usage(date) + user_choosen_model_price = 1000 - if int(montly_usage + cost) > int(monthly_chat_credit): + for model_setting in models_settings: + if model_setting["name"] == model_name: + user_choosen_model_price = model_setting["price"] + + if int(daily_user_count + user_choosen_model_price) > int(monthly_chat_credit): raise HTTPException( status_code=429, # pyright: ignore reportPrivateUsage=none detail=f"You have reached your monthly chat limit of {monthly_chat_credit} requests per months. Please upgrade your plan to increase your daily chat limit.", ) else: - usage.handle_increment_user_request_count(date, cost) + usage.handle_increment_user_request_count(date, user_choosen_model_price) pass diff --git a/backend/modules/chat/controller/chat_routes.py b/backend/modules/chat/controller/chat_routes.py index c4857b873..2b266cba8 100644 --- a/backend/modules/chat/controller/chat_routes.py +++ b/backend/modules/chat/controller/chat_routes.py @@ -11,6 +11,10 @@ from models.settings import BrainSettings, get_supabase_client from models.user_usage import UserUsage from modules.brain.service.brain_service import BrainService from modules.chat.controller.chat.brainful_chat import BrainfulChat +from modules.chat.controller.chat.utils import ( + check_user_requests_limit, + find_model_and_generate_metadata, +) from modules.chat.dto.chats import ChatItem, ChatQuestion from modules.chat.dto.inputs import ( ChatUpdatableProperties, @@ -72,6 +76,12 @@ def get_answer_generator( # Get History history = chat_service.get_chat_history(chat_id) + # Get user settings + user_settings = user_usage.get_user_settings() + + # Get Model settings for the user + models_settings = user_usage.get_model_settings() + # Generic brain, metadata_brain = brain_service.find_brain_from_question( brain_id, chat_question.question, current_user, chat_id, history, vector_store @@ -79,17 +89,35 @@ def get_answer_generator( logger.info(f"Brain: {brain}") - send_telemetry("question_asked", {"model_name": brain.model}) + model_to_use, metadata = find_model_and_generate_metadata( + chat_id, + brain, + user_settings, + models_settings, + metadata_brain, + ) + + # Raises an error if the user has consumed all of of his credits + check_user_requests_limit( + usage=user_usage, + user_settings=user_settings, + models_settings=models_settings, + model_name=model_to_use.name, + ) + + send_telemetry("question_asked", {"model_name": model_to_use.name}) gpt_answer_generator = chat_instance.get_answer_generator( - brain=brain, chat_id=str(chat_id), - model=brain.model, + model=model_to_use.name, + max_tokens=model_to_use.max_output, + max_input=model_to_use.max_input, temperature=0.1, streaming=True, prompt_id=chat_question.prompt_id, user_id=current_user.id, - user_email=current_user.email, + metadata=metadata, + brain=brain, ) return gpt_answer_generator