mirror of
https://github.com/QuivrHQ/quivr.git
synced 2024-12-14 17:03:29 +03:00
feat: assistants (#2421)
…dio_transcript and crawler assistants # Description Please include a summary of the changes and the related issue. Please also include relevant motivation and context. ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate):
This commit is contained in:
parent
1ffeb8f25d
commit
488949d408
@ -1,12 +1,12 @@
|
||||
from typing import List
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Depends, File, Query, UploadFile
|
||||
from fastapi import APIRouter, Depends, HTTPException, UploadFile
|
||||
from logger import get_logger
|
||||
from middlewares.auth import AuthBearer, get_current_user
|
||||
from modules.assistant.dto.inputs import InputAssistant
|
||||
from modules.assistant.dto.outputs import AssistantOutput
|
||||
from modules.assistant.ito.audio_transcript import AudioTranscriptAssistant
|
||||
from modules.assistant.ito.crawler import CrawlerAssistant
|
||||
from modules.assistant.ito.audio_transcript import audio_transcript_inputs
|
||||
from modules.assistant.ito.crawler import crawler_inputs
|
||||
from modules.assistant.ito.summary import SummaryAssistant, summary_inputs
|
||||
from modules.assistant.service.assistant import Assistant
|
||||
from modules.user.entity.user_identity import UserIdentity
|
||||
@ -28,55 +28,28 @@ async def list_assistants(
|
||||
"""
|
||||
|
||||
summary = summary_inputs()
|
||||
return [summary]
|
||||
crawler = crawler_inputs()
|
||||
audio_transcript = audio_transcript_inputs()
|
||||
return [summary, crawler, audio_transcript]
|
||||
|
||||
|
||||
@assistant_router.post(
|
||||
"/assistant/{ingestion_id}/process",
|
||||
"/assistant/process",
|
||||
dependencies=[Depends(AuthBearer())],
|
||||
tags=["Assistant"],
|
||||
)
|
||||
async def process_assistant(
|
||||
ingestion_id: UUID,
|
||||
file_1: UploadFile = File(None),
|
||||
input: InputAssistant,
|
||||
files: List[UploadFile] = None,
|
||||
current_user: UserIdentity = Depends(get_current_user),
|
||||
brain_id: UUID = Query(None, description="The ID of the brain"),
|
||||
send_file_email: bool = Query(False, description="Send the file by email"),
|
||||
url: str = Query(None, description="The URL to process"),
|
||||
):
|
||||
if ingestion_id is None:
|
||||
raise ValueError("Ingestion ID is required")
|
||||
|
||||
assistant = assistant_service.get_assistant_by_id(ingestion_id)
|
||||
|
||||
if assistant.name == "summary":
|
||||
summary = SummaryAssistant(
|
||||
uploadFile=file_1,
|
||||
current_user=current_user,
|
||||
brain_id=brain_id,
|
||||
send_file_email=send_file_email,
|
||||
url=url,
|
||||
if input.name == "summary":
|
||||
summary_assistant = SummaryAssistant(
|
||||
input=input, files=files, current_user=current_user
|
||||
)
|
||||
return await summary.process_assistant()
|
||||
|
||||
if assistant.name == "audio_transcript":
|
||||
audio_summary = AudioTranscriptAssistant(
|
||||
uploadFile=file_1,
|
||||
current_user=current_user,
|
||||
brain_id=brain_id,
|
||||
send_file_email=send_file_email,
|
||||
url=url,
|
||||
)
|
||||
return await audio_summary.process_assistant()
|
||||
|
||||
if assistant.name == "crawler":
|
||||
crawler = CrawlerAssistant(
|
||||
uploadFile=file_1,
|
||||
current_user=current_user,
|
||||
brain_id=brain_id,
|
||||
send_file_email=send_file_email,
|
||||
url=url,
|
||||
)
|
||||
return await crawler.process_assistant()
|
||||
|
||||
return {"message": "Not found"}
|
||||
try:
|
||||
summary_assistant.check_input()
|
||||
return await summary_assistant.process_assistant()
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
return {"message": "Assistant not found"}
|
||||
|
@ -1,57 +1,51 @@
|
||||
from typing import List
|
||||
import json
|
||||
from typing import List, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, model_validator
|
||||
|
||||
|
||||
class InputFile(BaseModel):
|
||||
allowed_extensions: List[str]
|
||||
required: bool
|
||||
description: str
|
||||
class EmailInput(BaseModel):
|
||||
activated: bool
|
||||
|
||||
|
||||
class InputUrl(BaseModel):
|
||||
required: bool
|
||||
description: bool
|
||||
class BrainInput(BaseModel):
|
||||
activated: bool
|
||||
value: UUID
|
||||
|
||||
|
||||
class InputText(BaseModel):
|
||||
required: bool
|
||||
description: bool
|
||||
class FileInput(BaseModel):
|
||||
key: str
|
||||
value: str
|
||||
|
||||
|
||||
class UrlInput(BaseModel):
|
||||
key: str
|
||||
value: str
|
||||
|
||||
|
||||
class TextInput(BaseModel):
|
||||
key: str
|
||||
value: str
|
||||
|
||||
|
||||
class Inputs(BaseModel):
|
||||
files: List[InputFile]
|
||||
urls: List[InputUrl]
|
||||
texts: List[InputText]
|
||||
|
||||
|
||||
class OutputEmail(BaseModel):
|
||||
required: bool
|
||||
description: str
|
||||
type: str
|
||||
|
||||
|
||||
class OutputBrain(BaseModel):
|
||||
required: bool
|
||||
description: str
|
||||
type: UUID
|
||||
files: Optional[List[FileInput]] = None
|
||||
urls: Optional[List[UrlInput]] = None
|
||||
texts: Optional[List[TextInput]] = None
|
||||
|
||||
|
||||
class Outputs(BaseModel):
|
||||
emails: OutputEmail
|
||||
brains: OutputBrain
|
||||
email: Optional[EmailInput] = None
|
||||
brain: Optional[BrainInput] = None
|
||||
|
||||
|
||||
class Outputs(BaseModel):
|
||||
files: List[InputFile]
|
||||
urls: List[InputUrl]
|
||||
texts: List[InputText]
|
||||
|
||||
|
||||
class AssistantOutput(BaseModel):
|
||||
class InputAssistant(BaseModel):
|
||||
name: str
|
||||
input_description: str
|
||||
output_description: str
|
||||
inputs: Inputs
|
||||
outputs: Outputs
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def to_py_dict(cls, data):
|
||||
return json.loads(data)
|
||||
|
@ -51,3 +51,4 @@ class AssistantOutput(BaseModel):
|
||||
output_description: str
|
||||
inputs: Inputs
|
||||
outputs: Outputs
|
||||
icon_url: Optional[str] = None
|
||||
|
@ -2,6 +2,14 @@ import os
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
from logger import get_logger
|
||||
from modules.assistant.dto.outputs import (
|
||||
AssistantOutput,
|
||||
InputFile,
|
||||
Inputs,
|
||||
OutputBrain,
|
||||
OutputEmail,
|
||||
Outputs,
|
||||
)
|
||||
from modules.assistant.ito.ito import ITO
|
||||
from openai import OpenAI
|
||||
|
||||
@ -45,3 +53,35 @@ class AudioTranscriptAssistant(ITO):
|
||||
return await self.create_and_upload_processed_file(
|
||||
transcription, self.uploadFile.filename, "Audio Transcript"
|
||||
)
|
||||
|
||||
|
||||
def audio_transcript_inputs():
|
||||
output = AssistantOutput(
|
||||
name="Audio Transcript",
|
||||
description="Transcribes an audio file",
|
||||
input_description="One audio file to transcribe",
|
||||
output_description="Transcription of the audio file",
|
||||
inputs=Inputs(
|
||||
files=[
|
||||
InputFile(
|
||||
key="audio_file",
|
||||
allowed_extensions=["mp3", "wav", "ogg", "m4a"],
|
||||
required=True,
|
||||
description="The audio file to transcribe",
|
||||
)
|
||||
]
|
||||
),
|
||||
outputs=Outputs(
|
||||
brain=OutputBrain(
|
||||
required=True,
|
||||
description="The brain to which to upload the document",
|
||||
type="uuid",
|
||||
),
|
||||
email=OutputEmail(
|
||||
required=True,
|
||||
description="Send the document by email",
|
||||
type="str",
|
||||
),
|
||||
),
|
||||
)
|
||||
return output
|
||||
|
@ -1,6 +1,14 @@
|
||||
from bs4 import BeautifulSoup as Soup
|
||||
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
|
||||
from logger import get_logger
|
||||
from modules.assistant.dto.outputs import (
|
||||
AssistantOutput,
|
||||
Inputs,
|
||||
InputUrl,
|
||||
OutputBrain,
|
||||
OutputEmail,
|
||||
Outputs,
|
||||
)
|
||||
from modules.assistant.ito.ito import ITO
|
||||
|
||||
logger = get_logger(__name__)
|
||||
@ -31,3 +39,34 @@ class CrawlerAssistant(ITO):
|
||||
await self.create_and_upload_processed_file(
|
||||
docs.page_content, nice_url, "Crawler"
|
||||
)
|
||||
|
||||
|
||||
def crawler_inputs():
|
||||
output = AssistantOutput(
|
||||
name="Crawler",
|
||||
description="Crawls a website and extracts the text from the pages",
|
||||
input_description="One URL to crawl",
|
||||
output_description="Text extracted from the pages",
|
||||
inputs=Inputs(
|
||||
urls=[
|
||||
InputUrl(
|
||||
key="url",
|
||||
required=True,
|
||||
description="The URL to crawl",
|
||||
)
|
||||
],
|
||||
),
|
||||
outputs=Outputs(
|
||||
brain=OutputBrain(
|
||||
required=True,
|
||||
description="The brain to which upload the document",
|
||||
type="uuid",
|
||||
),
|
||||
email=OutputEmail(
|
||||
required=True,
|
||||
description="Send the document by email",
|
||||
type="str",
|
||||
),
|
||||
),
|
||||
)
|
||||
return output
|
||||
|
@ -2,11 +2,11 @@ import random
|
||||
from abc import abstractmethod
|
||||
from io import BytesIO
|
||||
from tempfile import NamedTemporaryFile
|
||||
from uuid import UUID
|
||||
from typing import List
|
||||
|
||||
from fastapi import UploadFile
|
||||
from logger import get_logger
|
||||
from modules.assistant.dto.outputs import AssistantOutput
|
||||
from modules.assistant.dto.inputs import InputAssistant
|
||||
from modules.contact_support.controller.settings import ContactsSettings
|
||||
from modules.upload.controller.upload_routes import upload_file
|
||||
from modules.user.entity.user_identity import UserIdentity
|
||||
@ -17,36 +17,14 @@ logger = get_logger(__name__)
|
||||
|
||||
|
||||
class ITO(BaseModel):
|
||||
uploadFile: UploadFile | None = None
|
||||
current_user: UserIdentity = None
|
||||
brain_id: UUID | None = None
|
||||
send_file_email: bool = False
|
||||
url: str | None = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
uploadFile: UploadFile,
|
||||
current_user: UserIdentity,
|
||||
brain_id: UUID,
|
||||
send_file_email: bool = False,
|
||||
url: str = None,
|
||||
):
|
||||
super().__init__(
|
||||
uploadFile=uploadFile,
|
||||
current_user=current_user,
|
||||
brain_id=brain_id,
|
||||
send_file_email=send_file_email,
|
||||
url=url,
|
||||
)
|
||||
input: InputAssistant
|
||||
files: List[UploadFile]
|
||||
current_user: UserIdentity
|
||||
|
||||
@abstractmethod
|
||||
async def process_assistant(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def assistant_inputs(self) -> AssistantOutput:
|
||||
pass
|
||||
|
||||
async def send_output_by_email(
|
||||
self, file: UploadFile, name: str, custom_message: str = None
|
||||
):
|
||||
@ -100,7 +78,7 @@ class ITO(BaseModel):
|
||||
headers={"content-type": "text/plain"},
|
||||
)
|
||||
|
||||
if self.send_file_email:
|
||||
if self.input.outputs.email.activated:
|
||||
await self.send_output_by_email(
|
||||
file_to_upload,
|
||||
new_filename,
|
||||
@ -109,11 +87,12 @@ class ITO(BaseModel):
|
||||
|
||||
# Reset to start of file before upload
|
||||
file_to_upload.file.seek(0)
|
||||
await upload_file(
|
||||
uploadFile=file_to_upload,
|
||||
brain_id=self.brain_id,
|
||||
current_user=self.current_user,
|
||||
chat_id=None,
|
||||
)
|
||||
if self.input.outputs.brain.activated:
|
||||
await upload_file(
|
||||
uploadFile=file_to_upload,
|
||||
brain_id=self.input.outputs.brain.value,
|
||||
current_user=self.current_user,
|
||||
chat_id=None,
|
||||
)
|
||||
|
||||
return {"message": f"{file_description} generated successfully"}
|
||||
|
@ -1,5 +1,7 @@
|
||||
import tempfile
|
||||
from typing import List
|
||||
|
||||
from fastapi import UploadFile
|
||||
from langchain.chains import (
|
||||
MapReduceDocumentsChain,
|
||||
ReduceDocumentsChain,
|
||||
@ -11,6 +13,7 @@ from langchain_community.document_loaders import UnstructuredPDFLoader
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
from langchain_text_splitters import CharacterTextSplitter
|
||||
from logger import get_logger
|
||||
from modules.assistant.dto.inputs import InputAssistant
|
||||
from modules.assistant.dto.outputs import (
|
||||
AssistantOutput,
|
||||
InputFile,
|
||||
@ -20,6 +23,7 @@ from modules.assistant.dto.outputs import (
|
||||
Outputs,
|
||||
)
|
||||
from modules.assistant.ito.ito import ITO
|
||||
from modules.user.entity.user_identity import UserIdentity
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@ -28,19 +32,44 @@ class SummaryAssistant(ITO):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input: InputAssistant,
|
||||
files: List[UploadFile] = None,
|
||||
current_user: UserIdentity = None,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(
|
||||
input=input,
|
||||
files=files,
|
||||
current_user=current_user,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def check_input(self):
|
||||
if not self.files:
|
||||
raise ValueError("No file was uploaded")
|
||||
if len(self.files) > 1:
|
||||
raise ValueError("Only one file can be uploaded")
|
||||
if not self.input.inputs.files:
|
||||
raise ValueError("No files key were given in the input")
|
||||
if len(self.input.inputs.files) > 1:
|
||||
raise ValueError("Only one file can be uploaded")
|
||||
if not self.input.inputs.files[0].key == "doc_to_summarize":
|
||||
raise ValueError("The key of the file should be doc_to_summarize")
|
||||
if not self.input.inputs.files[0].value:
|
||||
raise ValueError("No file was uploaded")
|
||||
if not (
|
||||
self.input.outputs.brain.activated or self.input.outputs.email.activated
|
||||
):
|
||||
raise ValueError("No output was selected")
|
||||
return True
|
||||
|
||||
async def process_assistant(self):
|
||||
|
||||
# Create a temporary file with the uploaded file as a temporary file and then pass it to the loader
|
||||
tmp_file = tempfile.NamedTemporaryFile(delete=False)
|
||||
|
||||
# Write the file to the temporary file
|
||||
tmp_file.write(self.uploadFile.file.read())
|
||||
tmp_file.write(self.files[0].file.read())
|
||||
|
||||
# Now pass the path of the temporary file to the loader
|
||||
|
||||
@ -104,7 +133,7 @@ class SummaryAssistant(ITO):
|
||||
content = map_reduce_chain.run(split_docs)
|
||||
|
||||
return await self.create_and_upload_processed_file(
|
||||
content, self.uploadFile.filename, "Summary"
|
||||
content, self.files[0].filename, "Summary"
|
||||
)
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user