quivr/backend/routes/crawl_routes.py
Stan Girard 1d33fbd3eb
feat(file-system): added queue and filesystem (#1159)
* feat(queue): added

* feat(crawling): added queue

* fix(crawler): fixed github

* feat(docker): simplified docker compose

* feat(celery): added worker

* feat(files): now uploaded

* feat(files): missing routes

* feat(delete): added

* feat(storage): added policy and migrations

* feat(sqs): implemented

* feat(redis): added queue name variable

* fix(task): updated

* style(env): emoved unused env

* ci(tests): removed broken tests
2023-09-14 11:56:59 +02:00

77 lines
2.6 KiB
Python

from typing import Optional
from uuid import UUID
from auth import AuthBearer, get_current_user
from celery_worker import process_crawl_and_notify
from crawl.crawler import CrawlWebsite
from fastapi import APIRouter, Depends, Query, Request
from models import Brain, UserIdentity, UserUsage
from models.databases.supabase.notifications import CreateNotificationProperties
from models.notifications import NotificationsStatusEnum
from repository.notification.add_notification import add_notification
from utils.file import convert_bytes
crawl_router = APIRouter()
@crawl_router.get("/crawl/healthz", tags=["Health"])
async def healthz():
return {"status": "ok"}
@crawl_router.post("/crawl", dependencies=[Depends(AuthBearer())], tags=["Crawl"])
async def crawl_endpoint(
request: Request,
crawl_website: CrawlWebsite,
brain_id: UUID = Query(..., description="The ID of the brain"),
chat_id: Optional[UUID] = Query(None, description="The ID of the chat"),
enable_summarization: bool = False,
current_user: UserIdentity = Depends(get_current_user),
):
"""
Crawl a website and process the crawled data.
"""
# [TODO] check if the user is the owner/editor of the brain
brain = Brain(id=brain_id)
userDailyUsage = UserUsage(
id=current_user.id,
email=current_user.email,
openai_api_key=current_user.openai_api_key,
)
userSettings = userDailyUsage.get_user_settings()
# [TODO] rate limiting of user for crawl
if request.headers.get("Openai-Api-Key"):
brain.max_brain_size = userSettings.get("max_brain_size", 1000000000)
file_size = 1000000
remaining_free_space = userSettings.get("max_brain_size", 1000000000)
if remaining_free_space - file_size < 0:
message = {
"message": f"❌ UserIdentity's brain will exceed maximum capacity with this upload. Maximum file allowed is : {convert_bytes(remaining_free_space)}",
"type": "error",
}
else:
crawl_notification = None
if chat_id:
crawl_notification = add_notification(
CreateNotificationProperties(
action="CRAWL",
chat_id=chat_id,
status=NotificationsStatusEnum.Pending,
)
)
process_crawl_and_notify.delay(
crawl_website_url=crawl_website.url,
enable_summarization=enable_summarization,
brain_id=brain_id,
openai_api_key=request.headers.get("Openai-Api-Key", None),
notification_id=crawl_notification.id,
)
return {"message": "Crawl processing has started."}
return message