feat: Add assistant module and remove ingestion module (#2420)

# Description

Please include a summary of the changes and the related issue. Please
also include relevant motivation and context.

## Checklist before requesting a review

Please delete options that are not relevant.

- [ ] My code follows the style guidelines of this project
- [ ] I have performed a self-review of my code
- [ ] I have commented hard-to-understand areas
- [ ] I have ideally added tests that prove my fix is effective or that
my feature works
- [ ] New and existing unit tests pass locally with my changes
- [ ] Any dependent changes have been merged

## Screenshots (if appropriate):
This commit is contained in:
Stan Girard 2024-04-10 04:28:22 -07:00 committed by GitHub
parent 82e0dd9342
commit 1ffeb8f25d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 350 additions and 107 deletions

View File

@ -6,24 +6,26 @@ if __name__ == "__main__":
from dotenv import load_dotenv # type: ignore
load_dotenv()
import sentry_sdk
import logging
import litellm
import sentry_sdk
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from logger import get_logger
from middlewares.cors import add_cors_middleware
from modules.analytics.controller.analytics_routes import analytics_router
from modules.api_key.controller import api_key_router
from modules.assistant.controller import assistant_router
from modules.brain.controller import brain_router
from modules.chat.controller import chat_router
from modules.contact_support.controller import contact_router
from modules.ingestion.controller import ingestion_router
from modules.knowledge.controller import knowledge_router
from modules.misc.controller import misc_router
from modules.notification.controller import notification_router
from modules.onboarding.controller import onboarding_router
from modules.prompt.controller import prompt_router
from modules.upload.controller import upload_router
from modules.analytics.controller.analytics_routes import analytics_router
from modules.user.controller import user_router
from packages.utils import handle_request_validation_error
from packages.utils.telemetry import maybe_send_telemetry
@ -31,7 +33,6 @@ from routes.crawl_routes import crawl_router
from routes.subscription_routes import subscription_router
from sentry_sdk.integrations.fastapi import FastApiIntegration
from sentry_sdk.integrations.starlette import StarletteIntegration
import logging
# Set the logging level for all loggers to WARNING
logging.basicConfig(level=logging.INFO)
@ -76,7 +77,7 @@ add_cors_middleware(app)
app.include_router(brain_router)
app.include_router(chat_router)
app.include_router(crawl_router)
app.include_router(ingestion_router)
app.include_router(assistant_router)
app.include_router(onboarding_router)
app.include_router(misc_router)
app.include_router(analytics_router)

View File

@ -0,0 +1 @@
from .assistant_routes import assistant_router

View File

@ -4,39 +4,39 @@ from uuid import UUID
from fastapi import APIRouter, Depends, File, Query, UploadFile
from logger import get_logger
from middlewares.auth import AuthBearer, get_current_user
from modules.ingestion.entity.ingestion import IngestionEntity
from modules.ingestion.ito.audio_transcript import AudioTranscriptIngestion
from modules.ingestion.ito.crawler import CrawlerIngestion
from modules.ingestion.ito.summary import SummaryIngestion
from modules.ingestion.service.ingestion import Ingestion
from modules.assistant.dto.outputs import AssistantOutput
from modules.assistant.ito.audio_transcript import AudioTranscriptAssistant
from modules.assistant.ito.crawler import CrawlerAssistant
from modules.assistant.ito.summary import SummaryAssistant, summary_inputs
from modules.assistant.service.assistant import Assistant
from modules.user.entity.user_identity import UserIdentity
ingestion_router = APIRouter()
assistant_router = APIRouter()
logger = get_logger(__name__)
ingestion_service = Ingestion()
assistant_service = Assistant()
@ingestion_router.get(
"/ingestion", dependencies=[Depends(AuthBearer())], tags=["Ingestion"]
@assistant_router.get(
"/assistants", dependencies=[Depends(AuthBearer())], tags=["Assistant"]
)
async def list_ingestion(
async def list_assistants(
current_user: UserIdentity = Depends(get_current_user),
) -> List[IngestionEntity]:
) -> List[AssistantOutput]:
"""
Retrieve and list all the knowledge in a brain.
"""
ingestions = ingestion_service.get_all_ingestions()
return ingestions
summary = summary_inputs()
return [summary]
@ingestion_router.post(
"/ingestion/{ingestion_id}/process",
@assistant_router.post(
"/assistant/{ingestion_id}/process",
dependencies=[Depends(AuthBearer())],
tags=["Ingestion"],
tags=["Assistant"],
)
async def process_ingestion(
async def process_assistant(
ingestion_id: UUID,
file_1: UploadFile = File(None),
current_user: UserIdentity = Depends(get_current_user),
@ -47,36 +47,36 @@ async def process_ingestion(
if ingestion_id is None:
raise ValueError("Ingestion ID is required")
ingestion = ingestion_service.get_ingestion_by_id(ingestion_id)
assistant = assistant_service.get_assistant_by_id(ingestion_id)
if ingestion.name == "summary":
summary = SummaryIngestion(
if assistant.name == "summary":
summary = SummaryAssistant(
uploadFile=file_1,
current_user=current_user,
brain_id=brain_id,
send_file_email=send_file_email,
url=url,
)
return await summary.process_ingestion()
return await summary.process_assistant()
if ingestion.name == "audio_transcript":
audio_summary = AudioTranscriptIngestion(
if assistant.name == "audio_transcript":
audio_summary = AudioTranscriptAssistant(
uploadFile=file_1,
current_user=current_user,
brain_id=brain_id,
send_file_email=send_file_email,
url=url,
)
return await audio_summary.process_ingestion()
return await audio_summary.process_assistant()
if ingestion.name == "crawler":
crawler = CrawlerIngestion(
if assistant.name == "crawler":
crawler = CrawlerAssistant(
uploadFile=file_1,
current_user=current_user,
brain_id=brain_id,
send_file_email=send_file_email,
url=url,
)
return await crawler.process_ingestion()
return await crawler.process_assistant()
return {"message": "Not found"}

View File

@ -0,0 +1,57 @@
from typing import List
from uuid import UUID
from pydantic import BaseModel
class InputFile(BaseModel):
allowed_extensions: List[str]
required: bool
description: str
class InputUrl(BaseModel):
required: bool
description: bool
class InputText(BaseModel):
required: bool
description: bool
class Inputs(BaseModel):
files: List[InputFile]
urls: List[InputUrl]
texts: List[InputText]
class OutputEmail(BaseModel):
required: bool
description: str
type: str
class OutputBrain(BaseModel):
required: bool
description: str
type: UUID
class Outputs(BaseModel):
emails: OutputEmail
brains: OutputBrain
class Outputs(BaseModel):
files: List[InputFile]
urls: List[InputUrl]
texts: List[InputText]
class AssistantOutput(BaseModel):
name: str
input_description: str
output_description: str
inputs: Inputs
outputs: Outputs

View File

@ -0,0 +1,53 @@
from typing import List, Optional
from pydantic import BaseModel
class InputFile(BaseModel):
key: str
allowed_extensions: Optional[List[str]] = ["pdf"]
required: Optional[bool] = True
description: str
class InputUrl(BaseModel):
key: str
required: Optional[bool] = True
description: str
class InputText(BaseModel):
key: str
required: Optional[bool] = True
description: str
class Inputs(BaseModel):
files: Optional[List[InputFile]] = None
urls: Optional[List[InputUrl]] = None
texts: Optional[List[InputText]] = None
class OutputEmail(BaseModel):
required: Optional[bool] = True
description: str
type: str
class OutputBrain(BaseModel):
required: Optional[bool] = True
description: str
type: str
class Outputs(BaseModel):
email: Optional[OutputEmail] = None
brain: Optional[OutputBrain] = None
class AssistantOutput(BaseModel):
name: str
input_description: str
output_description: str
inputs: Inputs
outputs: Outputs

View File

@ -0,0 +1 @@
from .assistant import AssistantEntity

View File

@ -3,7 +3,7 @@ from uuid import UUID
from pydantic import BaseModel
class IngestionEntity(BaseModel):
class AssistantEntity(BaseModel):
id: UUID
name: str
brain_id_required: bool

View File

@ -2,13 +2,13 @@ import os
from tempfile import NamedTemporaryFile
from logger import get_logger
from modules.ingestion.ito.ito import ITO
from modules.assistant.ito.ito import ITO
from openai import OpenAI
logger = get_logger(__name__)
class AudioTranscriptIngestion(ITO):
class AudioTranscriptAssistant(ITO):
def __init__(
self,
@ -18,7 +18,7 @@ class AudioTranscriptIngestion(ITO):
**kwargs,
)
async def process_ingestion(self):
async def process_assistant(self):
client = OpenAI()
logger.info(f"Processing audio file {self.uploadFile.filename}")

View File

@ -1,12 +1,12 @@
from bs4 import BeautifulSoup as Soup
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
from logger import get_logger
from modules.ingestion.ito.ito import ITO
from modules.assistant.ito.ito import ITO
logger = get_logger(__name__)
class CrawlerIngestion(ITO):
class CrawlerAssistant(ITO):
def __init__(
self,
@ -16,7 +16,7 @@ class CrawlerIngestion(ITO):
**kwargs,
)
async def process_ingestion(self):
async def process_assistant(self):
url = self.url
loader = RecursiveUrlLoader(

View File

@ -6,6 +6,7 @@ from uuid import UUID
from fastapi import UploadFile
from logger import get_logger
from modules.assistant.dto.outputs import AssistantOutput
from modules.contact_support.controller.settings import ContactsSettings
from modules.upload.controller.upload_routes import upload_file
from modules.user.entity.user_identity import UserIdentity
@ -39,7 +40,11 @@ class ITO(BaseModel):
)
@abstractmethod
async def process_ingestion(self):
async def process_assistant(self):
pass
@abstractmethod
def assistant_inputs(self) -> AssistantOutput:
pass
async def send_output_by_email(

View File

@ -11,12 +11,20 @@ from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_core.prompts import PromptTemplate
from langchain_text_splitters import CharacterTextSplitter
from logger import get_logger
from modules.ingestion.ito.ito import ITO
from modules.assistant.dto.outputs import (
AssistantOutput,
InputFile,
Inputs,
OutputBrain,
OutputEmail,
Outputs,
)
from modules.assistant.ito.ito import ITO
logger = get_logger(__name__)
class SummaryIngestion(ITO):
class SummaryAssistant(ITO):
def __init__(
self,
@ -26,7 +34,7 @@ class SummaryIngestion(ITO):
**kwargs,
)
async def process_ingestion(self):
async def process_assistant(self):
# Create a temporary file with the uploaded file as a temporary file and then pass it to the loader
tmp_file = tempfile.NamedTemporaryFile(delete=False)
@ -98,3 +106,35 @@ class SummaryIngestion(ITO):
return await self.create_and_upload_processed_file(
content, self.uploadFile.filename, "Summary"
)
def summary_inputs():
output = AssistantOutput(
name="Summary",
description="Summarize a set of documents",
input_description="One document to summarize",
output_description="A summary of the document",
inputs=Inputs(
files=[
InputFile(
key="doc_to_summarize",
allowed_extensions=["pdf"],
required=True,
description="The document to summarize",
)
]
),
outputs=Outputs(
brain=OutputBrain(
required=True,
description="The brain to which upload the document",
type="uuid",
),
email=OutputEmail(
required=True,
description="Send the document by email",
type="str",
),
),
)
return output

View File

@ -0,0 +1 @@
from .assistant_interface import AssistantInterface

View File

@ -1,13 +1,13 @@
from abc import ABC, abstractmethod
from typing import List
from modules.ingestion.entity.ingestion import IngestionEntity
from modules.assistant.entity.assistant import AssistantEntity
class IngestionInterface(ABC):
class AssistantInterface(ABC):
@abstractmethod
def get_all_ingestions(self) -> List[IngestionEntity]:
def get_all_assistants(self) -> List[AssistantEntity]:
"""
Get all the knowledge in a brain
Args:

View File

@ -0,0 +1,30 @@
from models.settings import get_supabase_client
from modules.assistant.entity.assistant import AssistantEntity
from modules.assistant.repository.assistant_interface import AssistantInterface
class Assistant(AssistantInterface):
def __init__(self):
supabase_client = get_supabase_client()
self.db = supabase_client
def get_all_assistants(self):
response = self.db.from_("assistants").select("*").execute()
if response.data:
return response.data
return []
def get_assistant_by_id(self, ingestion_id) -> AssistantEntity:
response = (
self.db.from_("assistants")
.select("*")
.filter("id", "eq", ingestion_id)
.execute()
)
if response.data:
return AssistantEntity(**response.data[0])
return None

View File

@ -1 +0,0 @@
from .ingestion_routes import ingestion_router

View File

@ -1,2 +0,0 @@
from .inputs import CreateKnowledgeProperties
from .outputs import DeleteKnowledgeResponse

View File

@ -1,18 +0,0 @@
from typing import Optional
from uuid import UUID
from pydantic import BaseModel
class CreateKnowledgeProperties(BaseModel):
brain_id: UUID
file_name: Optional[str] = None
url: Optional[str] = None
extension: str = "txt"
integration: Optional[str] = None
integration_link: Optional[str] = None
def dict(self, *args, **kwargs):
knowledge_dict = super().dict(*args, **kwargs)
knowledge_dict["brain_id"] = str(knowledge_dict.get("brain_id"))
return knowledge_dict

View File

@ -1,8 +0,0 @@
from uuid import UUID
from pydantic import BaseModel
class DeleteKnowledgeResponse(BaseModel):
status: str = "delete"
knowledge_id: UUID

View File

@ -1 +0,0 @@
from .ingestion import IngestionEntity

View File

@ -1 +0,0 @@
from .ingestion_interface import IngestionInterface

View File

@ -1,30 +0,0 @@
from models.settings import get_supabase_client
from modules.ingestion.entity.ingestion import IngestionEntity
from modules.ingestion.repository.ingestion_interface import IngestionInterface
class Ingestion(IngestionInterface):
def __init__(self):
supabase_client = get_supabase_client()
self.db = supabase_client
def get_all_ingestions(self):
response = self.db.from_("ingestions").select("*").execute()
if response.data:
return response.data
return []
def get_ingestion_by_id(self, ingestion_id) -> IngestionEntity:
response = (
self.db.from_("ingestions")
.select("*")
.filter("id", "eq", ingestion_id)
.execute()
)
if response.data:
return IngestionEntity(**response.data[0])
return None

View File

@ -0,0 +1,115 @@
drop policy "INGESTION" on "public"."ingestions";
revoke delete on table "public"."ingestions" from "anon";
revoke insert on table "public"."ingestions" from "anon";
revoke references on table "public"."ingestions" from "anon";
revoke select on table "public"."ingestions" from "anon";
revoke trigger on table "public"."ingestions" from "anon";
revoke truncate on table "public"."ingestions" from "anon";
revoke update on table "public"."ingestions" from "anon";
revoke delete on table "public"."ingestions" from "authenticated";
revoke insert on table "public"."ingestions" from "authenticated";
revoke references on table "public"."ingestions" from "authenticated";
revoke select on table "public"."ingestions" from "authenticated";
revoke trigger on table "public"."ingestions" from "authenticated";
revoke truncate on table "public"."ingestions" from "authenticated";
revoke update on table "public"."ingestions" from "authenticated";
revoke delete on table "public"."ingestions" from "service_role";
revoke insert on table "public"."ingestions" from "service_role";
revoke references on table "public"."ingestions" from "service_role";
revoke select on table "public"."ingestions" from "service_role";
revoke trigger on table "public"."ingestions" from "service_role";
revoke truncate on table "public"."ingestions" from "service_role";
revoke update on table "public"."ingestions" from "service_role";
alter table "public"."ingestions" drop constraint "ingestions_pkey";
drop index if exists "public"."ingestions_pkey";
drop table "public"."ingestions";
create table "public"."assistants" (
"name" text,
"id" uuid not null default gen_random_uuid(),
"brain_id_required" boolean not null default true,
"file_1_required" boolean not null default false,
"url_required" boolean default false
);
alter table "public"."assistants" enable row level security;
CREATE UNIQUE INDEX ingestions_pkey ON public.assistants USING btree (id);
alter table "public"."assistants" add constraint "ingestions_pkey" PRIMARY KEY using index "ingestions_pkey";
grant delete on table "public"."assistants" to "anon";
grant insert on table "public"."assistants" to "anon";
grant references on table "public"."assistants" to "anon";
grant select on table "public"."assistants" to "anon";
grant trigger on table "public"."assistants" to "anon";
grant truncate on table "public"."assistants" to "anon";
grant update on table "public"."assistants" to "anon";
grant delete on table "public"."assistants" to "authenticated";
grant insert on table "public"."assistants" to "authenticated";
grant references on table "public"."assistants" to "authenticated";
grant select on table "public"."assistants" to "authenticated";
grant trigger on table "public"."assistants" to "authenticated";
grant truncate on table "public"."assistants" to "authenticated";
grant update on table "public"."assistants" to "authenticated";
grant delete on table "public"."assistants" to "service_role";
grant insert on table "public"."assistants" to "service_role";
grant references on table "public"."assistants" to "service_role";
grant select on table "public"."assistants" to "service_role";
grant trigger on table "public"."assistants" to "service_role";
grant truncate on table "public"."assistants" to "service_role";
grant update on table "public"."assistants" to "service_role";
create policy "INGESTION"
on "public"."assistants"
as permissive
for all
to service_role;