From 064557f480791272bf26a680818776e53ae90052 Mon Sep 17 00:00:00 2001 From: Stan Girard Date: Fri, 12 Jul 2024 10:51:27 +0200 Subject: [PATCH] feat: Add bulk_id field to CreateNotification and Notification models (#2850) This pull request adds a new field called `bulk_id` to the `CreateNotification` and `Notification` models. The `bulk_id` field is an optional UUID that can be used for bulk operations. --- Makefile | 6 +++++- backend/api/quivr_api/celery_worker.py | 12 ++++++++++++ .../api/quivr_api/modules/knowledge/dto/inputs.py | 8 ++++++++ .../modules/knowledge/entity/knowledge.py | 1 + .../knowledge/repository/knowledge_interface.py | 12 +++++++++++- .../modules/knowledge/repository/knowledges.py | 14 ++++++++++++++ .../modules/knowledge/service/knowledge_service.py | 10 +++++++++- .../quivr_api/modules/notification/dto/inputs.py | 1 + .../modules/notification/entity/notification.py | 1 + .../quivr_api/modules/sync/utils/googleutils.py | 7 +++++-- .../modules/sync/utils/sharepointutils.py | 5 ++++- backend/api/quivr_api/modules/sync/utils/upload.py | 6 +++++- .../modules/upload/controller/upload_routes.py | 7 +++++-- backend/api/quivr_api/routes/crawl_routes.py | 2 ++ .../20240711141940_notification_bulk.sql | 3 +++ .../20240711150140_knowledge-process-status.sql | 7 +++++++ 16 files changed, 93 insertions(+), 9 deletions(-) create mode 100644 backend/supabase/migrations/20240711141940_notification_bulk.sql create mode 100644 backend/supabase/migrations/20240711150140_knowledge-process-status.sql diff --git a/Makefile b/Makefile index 892c67733..6cc077c27 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,14 @@ test: pytest backend/ -dev: +dev-build: DOCKER_BUILDKIT=1 docker compose -f docker-compose.dev.yml build backend-core DOCKER_BUILDKIT=1 docker compose -f docker-compose.dev.yml up --build +dev: + DOCKER_BUILDKIT=1 docker compose -f docker-compose.dev.yml up + + dev-saas: docker compose -f docker-compose-dev-saas-supabase.yml build backend-core docker compose -f docker-compose-dev-saas-supabase.yml up --build diff --git a/backend/api/quivr_api/celery_worker.py b/backend/api/quivr_api/celery_worker.py index 4005827bd..4a0bc5b3a 100644 --- a/backend/api/quivr_api/celery_worker.py +++ b/backend/api/quivr_api/celery_worker.py @@ -13,6 +13,8 @@ from quivr_api.models.settings import get_supabase_client, get_supabase_db from quivr_api.modules.brain.integrations.Notion.Notion_connector import NotionConnector from quivr_api.modules.brain.service.brain_service import BrainService from quivr_api.modules.brain.service.brain_vector_service import BrainVectorService +from quivr_api.modules.knowledge.dto.inputs import KnowledgeStatus +from quivr_api.modules.knowledge.service.knowledge_service import KnowledgeService from quivr_api.modules.notification.dto.inputs import NotificationUpdatableProperties from quivr_api.modules.notification.entity.notification import NotificationsStatusEnum from quivr_api.modules.notification.service.notification_service import ( @@ -40,6 +42,7 @@ def process_file_and_notify( notification_id=None, integration=None, delete_file=False, + knowledge_id: UUID = None, ): try: supabase_client = get_supabase_client() @@ -61,6 +64,7 @@ def process_file_and_notify( file_extension=file_extension, ) brain_vector_service = BrainVectorService(brain_id) + knowledge_service = KnowledgeService() if delete_file: # TODO fix bug brain_vector_service.delete_file_from_brain( file_original_name, only_vectors=True @@ -80,6 +84,10 @@ def process_file_and_notify( description="Your file has been properly uploaded!", ), ) + if knowledge_id: + knowledge_service.update_status_knowledge( + knowledge_id, KnowledgeStatus.UPLOADED + ) brain_service.update_brain_last_update_time(brain_id) return True @@ -96,6 +104,10 @@ def process_file_and_notify( description=f"An error occurred while processing the file: {e}", ), ) + if knowledge_id: + knowledge_service.update_status_knowledge( + knowledge_id, KnowledgeStatus.ERROR + ) @celery.task(name="process_crawl_and_notify") diff --git a/backend/api/quivr_api/modules/knowledge/dto/inputs.py b/backend/api/quivr_api/modules/knowledge/dto/inputs.py index b6d008c35..fed96f867 100644 --- a/backend/api/quivr_api/modules/knowledge/dto/inputs.py +++ b/backend/api/quivr_api/modules/knowledge/dto/inputs.py @@ -1,9 +1,16 @@ +from enum import Enum from typing import Optional from uuid import UUID from pydantic import BaseModel +class KnowledgeStatus(str, Enum): + PROCESSING = "PROCESSING" + UPLOADED = "UPLOADED" + ERROR = "ERROR" + + class CreateKnowledgeProperties(BaseModel): brain_id: UUID file_name: Optional[str] = None @@ -11,6 +18,7 @@ class CreateKnowledgeProperties(BaseModel): extension: str = "txt" integration: Optional[str] = None integration_link: Optional[str] = None + status: KnowledgeStatus = KnowledgeStatus.PROCESSING def dict(self, *args, **kwargs): knowledge_dict = super().dict(*args, **kwargs) diff --git a/backend/api/quivr_api/modules/knowledge/entity/knowledge.py b/backend/api/quivr_api/modules/knowledge/entity/knowledge.py index 504162d0c..c30ba51fd 100644 --- a/backend/api/quivr_api/modules/knowledge/entity/knowledge.py +++ b/backend/api/quivr_api/modules/knowledge/entity/knowledge.py @@ -10,3 +10,4 @@ class Knowledge(BaseModel): file_name: Optional[str] = None url: Optional[str] = None extension: str = "txt" + status: str diff --git a/backend/api/quivr_api/modules/knowledge/repository/knowledge_interface.py b/backend/api/quivr_api/modules/knowledge/repository/knowledge_interface.py index 707d4224b..1045b1cf9 100644 --- a/backend/api/quivr_api/modules/knowledge/repository/knowledge_interface.py +++ b/backend/api/quivr_api/modules/knowledge/repository/knowledge_interface.py @@ -2,7 +2,10 @@ from abc import ABC, abstractmethod from typing import List from uuid import UUID -from quivr_api.modules.knowledge.dto.inputs import CreateKnowledgeProperties +from quivr_api.modules.knowledge.dto.inputs import ( + CreateKnowledgeProperties, + KnowledgeStatus, +) from quivr_api.modules.knowledge.dto.outputs import DeleteKnowledgeResponse from quivr_api.modules.knowledge.entity.knowledge import Knowledge @@ -56,3 +59,10 @@ class KnowledgeInterface(ABC): brain_id (UUID): The id of the brain """ pass + + @abstractmethod + def update_status_knowledge(self, knowledge_id: UUID, status: KnowledgeStatus): + """ + Update the status of a knowledge + """ + pass diff --git a/backend/api/quivr_api/modules/knowledge/repository/knowledges.py b/backend/api/quivr_api/modules/knowledge/repository/knowledges.py index 41196aff2..621acd6fa 100644 --- a/backend/api/quivr_api/modules/knowledge/repository/knowledges.py +++ b/backend/api/quivr_api/modules/knowledge/repository/knowledges.py @@ -2,6 +2,7 @@ from uuid import UUID from fastapi import HTTPException from quivr_api.models.settings import get_supabase_client +from quivr_api.modules.knowledge.dto.inputs import KnowledgeStatus from quivr_api.modules.knowledge.dto.outputs import DeleteKnowledgeResponse from quivr_api.modules.knowledge.entity.knowledge import Knowledge from quivr_api.modules.knowledge.repository.knowledge_interface import ( @@ -111,3 +112,16 @@ class KnowledgeRepository(KnowledgeInterface): self.db.from_("knowledge").delete().filter( "brain_id", "eq", str(brain_id) ).execute() + + def update_status_knowledge(self, knowledge_id: UUID, status: KnowledgeStatus): + """ + Update the status of a knowledge + """ + updated_knowledge = ( + self.db.from_("knowledge") + .update({"status": status}) + .filter("id", "eq", str(knowledge_id)) + .execute() + ).data + + return True diff --git a/backend/api/quivr_api/modules/knowledge/service/knowledge_service.py b/backend/api/quivr_api/modules/knowledge/service/knowledge_service.py index 43b78fe7c..c1991649e 100644 --- a/backend/api/quivr_api/modules/knowledge/service/knowledge_service.py +++ b/backend/api/quivr_api/modules/knowledge/service/knowledge_service.py @@ -1,7 +1,10 @@ from uuid import UUID from quivr_api.logger import get_logger -from quivr_api.modules.knowledge.dto.inputs import CreateKnowledgeProperties +from quivr_api.modules.knowledge.dto.inputs import ( + CreateKnowledgeProperties, + KnowledgeStatus, +) from quivr_api.modules.knowledge.entity.knowledge import Knowledge from quivr_api.modules.knowledge.repository.knowledge_interface import ( KnowledgeInterface, @@ -27,6 +30,11 @@ class KnowledgeService: return knowledges + def update_status_knowledge(self, knowledge_id: UUID, status: KnowledgeStatus): + knowledge = self.repository.update_status_knowledge(knowledge_id, status) + + return knowledge + def get_knowledge(self, knowledge_id: UUID) -> Knowledge: knowledge = self.repository.get_knowledge_by_id(knowledge_id) diff --git a/backend/api/quivr_api/modules/notification/dto/inputs.py b/backend/api/quivr_api/modules/notification/dto/inputs.py index 7aed0fa3c..830a3ca56 100644 --- a/backend/api/quivr_api/modules/notification/dto/inputs.py +++ b/backend/api/quivr_api/modules/notification/dto/inputs.py @@ -12,6 +12,7 @@ class CreateNotification(BaseModel): status: NotificationsStatusEnum title: str description: Optional[str] = None + bulk_id: Optional[UUID] = None def model_dump(self, *args, **kwargs): notification_dict = super().model_dump(*args, **kwargs) diff --git a/backend/api/quivr_api/modules/notification/entity/notification.py b/backend/api/quivr_api/modules/notification/entity/notification.py index 4a1378b8d..5a8e5166d 100644 --- a/backend/api/quivr_api/modules/notification/entity/notification.py +++ b/backend/api/quivr_api/modules/notification/entity/notification.py @@ -16,6 +16,7 @@ class NotificationsStatusEnum(str, Enum): class Notification(BaseModel): id: UUID user_id: UUID + bulk_id: Optional[UUID] = None status: NotificationsStatusEnum title: str description: Optional[str] diff --git a/backend/api/quivr_api/modules/sync/utils/googleutils.py b/backend/api/quivr_api/modules/sync/utils/googleutils.py index cc690112d..1b40ad7f4 100644 --- a/backend/api/quivr_api/modules/sync/utils/googleutils.py +++ b/backend/api/quivr_api/modules/sync/utils/googleutils.py @@ -1,3 +1,4 @@ +import uuid from datetime import datetime, timedelta, timezone from io import BytesIO @@ -61,10 +62,12 @@ class GoogleSyncUtils(BaseModel): service = build("drive", "v3", credentials=creds) downloaded_files = [] + + bulk_id = uuid.uuid4() for file in files: logger.info("🔥🔥🔥🔥: %s", file) try: - file_id = file["id"] + file_id = str(file["id"]) file_name = file["name"] mime_type = file["mime_type"] modified_time = file["last_modified"] @@ -140,7 +143,7 @@ class GoogleSyncUtils(BaseModel): supported = False if (existing_file and existing_file.supported) or not existing_file: supported = True - await upload_file(to_upload_file, brain_id, current_user) # type: ignore + await upload_file(to_upload_file, brain_id, current_user, bulk_id) # type: ignore if existing_file: # Update the existing file record diff --git a/backend/api/quivr_api/modules/sync/utils/sharepointutils.py b/backend/api/quivr_api/modules/sync/utils/sharepointutils.py index d5797e44e..ac6581366 100644 --- a/backend/api/quivr_api/modules/sync/utils/sharepointutils.py +++ b/backend/api/quivr_api/modules/sync/utils/sharepointutils.py @@ -1,4 +1,5 @@ import os +import uuid from datetime import datetime, timedelta, timezone from io import BytesIO @@ -79,6 +80,8 @@ class AzureSyncUtils(BaseModel): headers = self.get_headers(token_data) downloaded_files = [] + # Generate random UUID + bulk_id = uuid.uuid4() for file in files: try: file_id = file["id"] @@ -139,7 +142,7 @@ class AzureSyncUtils(BaseModel): supported = False if (existing_file and existing_file.supported) or not existing_file: supported = True - await upload_file(to_upload_file, brain_id, current_user) + await upload_file(to_upload_file, brain_id, current_user, bulk_id) if existing_file: # Update the existing file record diff --git a/backend/api/quivr_api/modules/sync/utils/upload.py b/backend/api/quivr_api/modules/sync/utils/upload.py index 3bb4aa288..72a9502a6 100644 --- a/backend/api/quivr_api/modules/sync/utils/upload.py +++ b/backend/api/quivr_api/modules/sync/utils/upload.py @@ -1,4 +1,5 @@ import os +from typing import Optional from uuid import UUID from fastapi import HTTPException, UploadFile @@ -30,6 +31,7 @@ async def upload_file( upload_file: UploadFile, brain_id: UUID, current_user: str, + bulk_id: Optional[UUID] = None, ): validate_brain_authorization( brain_id, current_user, [RoleEnum.Editor, RoleEnum.Owner] @@ -40,8 +42,9 @@ async def upload_file( upload_notification = notification_service.add_notification( CreateNotification( user_id=current_user, + bulk_id=bulk_id, status=NotificationsStatusEnum.INFO, - title=f"Processing File {upload_file.filename}", + title=f"{upload_file.filename}", ) ) @@ -96,5 +99,6 @@ async def upload_file( file_original_name=upload_file.filename, brain_id=brain_id, notification_id=upload_notification.id, + knowledge_id=added_knowledge.id, ) return {"message": "File processing has started."} diff --git a/backend/api/quivr_api/modules/upload/controller/upload_routes.py b/backend/api/quivr_api/modules/upload/controller/upload_routes.py index dc49ccc91..1f7b999a4 100644 --- a/backend/api/quivr_api/modules/upload/controller/upload_routes.py +++ b/backend/api/quivr_api/modules/upload/controller/upload_routes.py @@ -41,6 +41,7 @@ async def healthz(): @upload_router.post("/upload", dependencies=[Depends(AuthBearer())], tags=["Upload"]) async def upload_file( uploadFile: UploadFile, + bulk_id: Optional[UUID] = Query(None, description="The ID of the bulk upload"), brain_id: UUID = Query(..., description="The ID of the brain"), chat_id: Optional[UUID] = Query(None, description="The ID of the chat"), current_user: UserIdentity = Depends(get_current_user), @@ -57,8 +58,9 @@ async def upload_file( upload_notification = notification_service.add_notification( CreateNotification( user_id=current_user.id, + bulk_id=bulk_id, status=NotificationsStatusEnum.INFO, - title=f"Processing File {uploadFile.filename}", + title=f"{uploadFile.filename}", ) ) @@ -106,12 +108,13 @@ async def upload_file( )[-1].lower(), ) - knowledge_service.add_knowledge(knowledge_to_add) + knowledge = knowledge_service.add_knowledge(knowledge_to_add) process_file_and_notify.delay( file_name=filename_with_brain_id, file_original_name=uploadFile.filename, brain_id=brain_id, notification_id=upload_notification.id, + knowledge_id=knowledge.id, ) return {"message": "File processing has started."} diff --git a/backend/api/quivr_api/routes/crawl_routes.py b/backend/api/quivr_api/routes/crawl_routes.py index 728a2f8e6..8a6b09506 100644 --- a/backend/api/quivr_api/routes/crawl_routes.py +++ b/backend/api/quivr_api/routes/crawl_routes.py @@ -37,6 +37,7 @@ async def healthz(): async def crawl_endpoint( request: Request, crawl_website: CrawlWebsite, + bulk_id: Optional[UUID] = Query(None, description="The ID of the bulk upload"), brain_id: UUID = Query(..., description="The ID of the brain"), chat_id: Optional[UUID] = Query(None, description="The ID of the chat"), current_user: UserIdentity = Depends(get_current_user), @@ -67,6 +68,7 @@ async def crawl_endpoint( upload_notification = notification_service.add_notification( CreateNotification( user_id=current_user.id, + bulk_id=bulk_id, status=NotificationsStatusEnum.INFO, title=f"Processing Crawl {crawl_website.url}", ) diff --git a/backend/supabase/migrations/20240711141940_notification_bulk.sql b/backend/supabase/migrations/20240711141940_notification_bulk.sql new file mode 100644 index 000000000..905106ab1 --- /dev/null +++ b/backend/supabase/migrations/20240711141940_notification_bulk.sql @@ -0,0 +1,3 @@ +alter table "public"."notifications" add column "bulk_id" uuid; + + diff --git a/backend/supabase/migrations/20240711150140_knowledge-process-status.sql b/backend/supabase/migrations/20240711150140_knowledge-process-status.sql new file mode 100644 index 000000000..36c318e09 --- /dev/null +++ b/backend/supabase/migrations/20240711150140_knowledge-process-status.sql @@ -0,0 +1,7 @@ +alter table "public"."knowledge" add column "status" text not null default 'UPLOADED'::text; + +CREATE UNIQUE INDEX knowledge_pkey ON public.knowledge USING btree (id); + +alter table "public"."knowledge" add constraint "knowledge_pkey" PRIMARY KEY using index "knowledge_pkey"; + +