feat(tool): Add URLReaderTool (#2577)

This pull request adds the URLReaderTool to the list of tools in the
GPT4Brain class. The URLReaderTool allows for reading the content of a
URL.
This commit is contained in:
Stan Girard 2024-05-10 14:03:21 +02:00 committed by GitHub
parent 62f9b5bed2
commit 105a2b8ecc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 56 additions and 2 deletions

View File

@ -15,7 +15,7 @@ from modules.brain.knowledge_brain_qa import KnowledgeBrainQA
from modules.chat.dto.chats import ChatQuestion
from modules.chat.dto.outputs import GetChatHistoryOutput
from modules.chat.service.chat_service import ChatService
from modules.tools import ImageGeneratorTool, WebSearchTool
from modules.tools import ImageGeneratorTool, URLReaderTool, WebSearchTool
class AgentState(TypedDict):
@ -37,7 +37,7 @@ class GPT4Brain(KnowledgeBrainQA):
KnowledgeBrainQA (_type_): A brain that store the knowledge internaly
"""
tools: List[BaseTool] = [WebSearchTool(), ImageGeneratorTool()]
tools: List[BaseTool] = [WebSearchTool(), ImageGeneratorTool(), URLReaderTool()]
tool_executor: ToolExecutor = ToolExecutor(tools)
model_function: ChatOpenAI = None

View File

@ -1,3 +1,4 @@
from .image_generator import ImageGeneratorTool
from .web_search import WebSearchTool
from .url_reader import URLReaderTool

View File

@ -0,0 +1,53 @@
# Extract and combine content recursively
import os
from typing import Dict, Optional, Type
from langchain.callbacks.manager import (
AsyncCallbackManagerForToolRun,
CallbackManagerForToolRun,
)
from langchain.pydantic_v1 import BaseModel as BaseModelV1
from langchain.pydantic_v1 import Field as FieldV1
from langchain_community.document_loaders import PlaywrightURLLoader
from langchain_core.tools import BaseTool
from logger import get_logger
from pydantic import BaseModel
logger = get_logger(__name__)
class URLReaderInput(BaseModelV1):
url: str = FieldV1(..., title="url", description="url to read")
class URLReaderTool(BaseTool):
name = "url-reader"
description = "useful for when you need to read the content of a url."
args_schema: Type[BaseModel] = URLReaderInput
api_key = os.getenv("BRAVE_SEARCH_API_KEY")
def _run(
self, url: str, run_manager: Optional[CallbackManagerForToolRun] = None
) -> Dict:
loader = PlaywrightURLLoader(urls=[url], remove_selectors=["header", "footer"])
data = loader.load()
extracted_content = ""
for page in data:
extracted_content += page.page_content
return {"content": extracted_content}
async def _arun(
self, url: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None
) -> Dict:
"""Run the tool asynchronously."""
loader = PlaywrightURLLoader(urls=[url], remove_selectors=["header", "footer"])
data = loader.load()
extracted_content = ""
for page in data:
extracted_content += page.page_content
return {"content": extracted_content}