diff --git a/.env.example b/.env.example index aa780636e..ba88fdfa3 100644 --- a/.env.example +++ b/.env.example @@ -37,9 +37,6 @@ CELEBRY_BROKER_QUEUE_NAME=quivr-preview.fifo QUIVR_DOMAIN=http://localhost:3000/ #COHERE_API_KEY=CHANGE_ME - - - #RESEND RESEND_API_KEY= RESEND_EMAIL_ADDRESS=onboarding@resend.dev @@ -48,9 +45,9 @@ RESEND_CONTACT_SALES_TO= CRAWL_DEPTH=1 - PREMIUM_MAX_BRAIN_NUMBER=30 PREMIUM_MAX_BRAIN_SIZE=10000000 PREMIUM_DAILY_CHAT_CREDIT=100 - +# BRAVE SEARCH API KEY +#BRAVE_SEARCH_API_KEY=CHANGE_ME diff --git a/backend/modules/brain/integrations/GPT4/Brain.py b/backend/modules/brain/integrations/GPT4/Brain.py index eb4457932..962c85312 100644 --- a/backend/modules/brain/integrations/GPT4/Brain.py +++ b/backend/modules/brain/integrations/GPT4/Brain.py @@ -1,16 +1,9 @@ import json import operator -from typing import Annotated, AsyncIterable, List, Optional, Sequence, Type, TypedDict +from typing import Annotated, AsyncIterable, List, Sequence, TypedDict from uuid import UUID -from langchain.callbacks.manager import ( - AsyncCallbackManagerForToolRun, - CallbackManagerForToolRun, -) -from langchain.pydantic_v1 import BaseModel as BaseModelV1 -from langchain.pydantic_v1 import Field as FieldV1 from langchain.tools import BaseTool -from langchain_community.tools import DuckDuckGoSearchResults from langchain_core.messages import BaseMessage, ToolMessage from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.tools import BaseTool @@ -22,9 +15,7 @@ from modules.brain.knowledge_brain_qa import KnowledgeBrainQA from modules.chat.dto.chats import ChatQuestion from modules.chat.dto.outputs import GetChatHistoryOutput from modules.chat.service.chat_service import ChatService -from openai import OpenAI -from pydantic import BaseModel -from modules.tools import ImageGeneratorTool +from modules.tools import ImageGeneratorTool, WebSearchTool class AgentState(TypedDict): @@ -37,6 +28,7 @@ logger = get_logger(__name__) chat_service = ChatService() + class GPT4Brain(KnowledgeBrainQA): """This is the Notion brain class. it is a KnowledgeBrainQA has the data is stored locally. It is going to call the Data Store internally to get the data. @@ -45,7 +37,7 @@ class GPT4Brain(KnowledgeBrainQA): KnowledgeBrainQA (_type_): A brain that store the knowledge internaly """ - tools: List[BaseTool] = [DuckDuckGoSearchResults(), ImageGeneratorTool()] + tools: List[BaseTool] = [WebSearchTool(), ImageGeneratorTool()] tool_executor: ToolExecutor = ToolExecutor(tools) model_function: ChatOpenAI = None diff --git a/backend/modules/tools/__init__.py b/backend/modules/tools/__init__.py index 3267c76bf..2fb21345c 100644 --- a/backend/modules/tools/__init__.py +++ b/backend/modules/tools/__init__.py @@ -1,2 +1,3 @@ from .image_generator import ImageGeneratorTool +from .web_search import WebSearchTool diff --git a/backend/modules/tools/web_search.py b/backend/modules/tools/web_search.py new file mode 100644 index 000000000..1b7f7e412 --- /dev/null +++ b/backend/modules/tools/web_search.py @@ -0,0 +1,80 @@ +import os +from typing import Dict, Optional, Type + +import requests +from langchain.callbacks.manager import ( + AsyncCallbackManagerForToolRun, + CallbackManagerForToolRun, +) +from langchain.pydantic_v1 import BaseModel as BaseModelV1 +from langchain.pydantic_v1 import Field as FieldV1 +from langchain_core.tools import BaseTool +from logger import get_logger +from pydantic import BaseModel + +logger = get_logger(__name__) + + +class WebSearchInput(BaseModelV1): + query: str = FieldV1(..., title="query", description="search query to look up") + + +class WebSearchTool(BaseTool): + name = "brave-web-search" + description = "useful for when you need to search the web for something." + args_schema: Type[BaseModel] = WebSearchInput + api_key = os.getenv("BRAVE_SEARCH_API_KEY") + + def _check_environment_variable(self) -> bool: + """Check if the environment variable is set.""" + + return os.getenv("BRAVE_SEARCH_API_KEY") is not None + + def __init__(self): + if not self._check_environment_variable(): + raise ValueError("BRAVE_SEARCH_API_KEY environment variable is not set") + super().__init__() + + def _run( + self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None + ) -> Dict: + """Run the tool.""" + headers = { + "Accept": "application/json", + "Accept-Encoding": "gzip", + "X-Subscription-Token": self.api_key, + } + response = requests.get( + f"https://api.search.brave.com/res/v1/web/search?q={query}&count=3", + headers=headers, + ) + return self._parse_response(response.json()) + + async def _arun( + self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None + ) -> Dict: + """Run the tool asynchronously.""" + headers = { + "Accept": "application/json", + "Accept-Encoding": "gzip", + "X-Subscription-Token": self.api_key, + } + response = requests.get( + f"https://api.search.brave.com/res/v1/web/search?q={query}&count=3", + headers=headers, + ) + return self._parse_response(response.json()) + + def _parse_response(self, response: Dict) -> str: + """Parse the response.""" + short_results = [] + results = response["web"]["results"] + for result in results: + title = result["title"] + url = result["url"] + description = result["description"] + short_results.append(self._format_result(title, description, url)) + return "\n".join(short_results) + + def _format_result(self, title: str, description: str, url: str) -> str: + return f"**{title}**\n{description}\n{url}" diff --git a/backend/packages/files/crawl/crawler.py b/backend/packages/files/crawl/crawler.py index 723ed77cf..0b9133294 100644 --- a/backend/packages/files/crawl/crawler.py +++ b/backend/packages/files/crawl/crawler.py @@ -19,7 +19,6 @@ class CrawlWebsite(BaseModel): def process(self): # Extract and combine content recursively - visited_urls = set() loader = PlaywrightURLLoader( urls=[self.url], remove_selectors=["header", "footer"] ) diff --git a/docs/configuring/brave-search.mdx b/docs/configuring/brave-search.mdx new file mode 100644 index 000000000..3a2a61011 --- /dev/null +++ b/docs/configuring/brave-search.mdx @@ -0,0 +1,42 @@ +--- +title: Configuring Brave Search in Quivr +description: A guide on how to integrate Brave Search into Quivr by obtaining and configuring the BRAVE_SEARCH_API_KEY. +--- + +# Configuring Brave Search in Quivr + +Integrating Brave Search into Quivr enhances your search capabilities by leveraging the privacy-focused search engine. This guide will walk you through the process of obtaining a Brave Search API key and configuring it in Quivr. + +## Step 1: Create a Brave Search Account + +To use Brave Search's API, you first need to create an account with Brave Search. Follow these steps: + +1. Visit [Brave Search](https://search.brave.com/) and click on the "Sign Up" button. +2. Follow the on-screen instructions to create your account. + +## Step 2: Obtain the API Key + +Once you have a Brave Search account, you can obtain an API key: + +1. Log in to your Brave Search account. +2. Navigate to the API section (this might be located in your account settings or developer settings). +3. Generate a new API key for your application. Note this key as you will need it for the next step. + +## Step 3: Configure the BRAVE_SEARCH_API_KEY in Quivr + +With your Brave Search API key in hand, you can now configure it in Quivr: + +1. Open your Quivr project's `.env` file. +2. Add the following line to the file: + ``` + BRAVE_SEARCH_API_KEY=your_brave_search_api_key_here + ``` + Replace `your_brave_search_api_key_here` with the actual API key you obtained from Brave Search. + +3. Save the `.env` file. + +## Conclusion + +You have successfully integrated Brave Search into Quivr. Your searches will now leverage the privacy-focused and efficient search capabilities of Brave Search. + +For any issues or further configuration options, refer to the Brave Search API documentation or contact Quivr support. diff --git a/docs/configuring/environment-variables.mdx b/docs/configuring/environment-variables.mdx index cbf8d809c..bb883f483 100644 --- a/docs/configuring/environment-variables.mdx +++ b/docs/configuring/environment-variables.mdx @@ -59,3 +59,17 @@ The frontend environment file is used to configure the frontend application. It - `NEXT_PUBLIC_STRIPE_MANAGE_PLAN_URL`: The URL for managing Stripe subscription plans. - `NEXT_PUBLIC_AUTH_MODES`: The authentication modes supported by the application, such as magic link, password-based authentication, and Google Single Sign-On (SSO). + +### Configuring BRAVE_SEARCH_API_KEY + +To use the Brave Search functionality within Quivr, you need to configure the `BRAVE_SEARCH_API_KEY` in your environment. Here's how to obtain and set up your API key: + +1. **Create a Brave Search Account**: Visit [Brave Search](https://api.search.brave.com/app/keys) and sign up for an account if you haven't already. +2. **Obtain API Key**: Once logged in, navigate to the developer settings or API section to generate a new API key for your application. +3. **Configure .env File**: Add the following line to your `.env` file in the root of the Quivr project: + ``` + BRAVE_SEARCH_API_KEY=your_brave_search_api_key_here + ``` + Replace `your_brave_search_api_key_here` with the actual API key you obtained from Brave Search. + +By configuring the `BRAVE_SEARCH_API_KEY`, you enable Quivr to perform web searches using Brave Search's capabilities directly from within the application. diff --git a/docs/mint.json b/docs/mint.json index 406399c06..66434efba 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -70,7 +70,8 @@ "configuring/environment-variables", "configuring/profiler", "configuring/telemetry", - "configuring/supabase-setup" + "configuring/supabase-setup", + "configuring/brave-search" ] }, "tech-design",