fix: Refacto & update dropbox refresh (#2875)

This pull request adds functionality to sync files with DropBox. It includes the following changes: - Created a BaseSync class with all specific function for each clouds - Created a SyncUtils class that takes in a BaseSync and apply the pipe - fix the refresh method for DropBox Please review and merge this pull request to enable DropBox sync functionality in the application. --------- Co-authored-by: Stan Girard <stan@quivr.app> Co-authored-by: Amine Dirhoussi <aminediro@quivr.app>
2024-11-30 01:32:52 +03:00 · 2024-07-19 09:47:11 +02:00 · 2024-07-19 09:47:11 +02:00 · 3b68855a83
commit 3b68855a83
parent 96cc5b5b5a
12 changed files with 748 additions and 1356 deletions
--- a/backend/api/quivr_api/main.py
+++ b/backend/api/quivr_api/main.py
@ -28,7 +28,6 @@ from quivr_api.routes.crawl_routes import crawl_router
 from quivr_api.routes.subscription_routes import subscription_router
 from sentry_sdk.integrations.fastapi import FastApiIntegration
 from sentry_sdk.integrations.starlette import StarletteIntegration
-from starlette.middleware.sessions import SessionMiddleware

 load_dotenv()

@ -71,7 +70,6 @@ if sentry_dsn:

 app = FastAPI()
 add_cors_middleware(app)
-app.add_middleware(SessionMiddleware, secret_key=str(os.getenv("SESSION_SECRET_KEY")))

 app.include_router(brain_router)
 app.include_router(chat_router)
--- a/backend/api/quivr_api/modules/sync/controller/dropbox_sync_routes.py
+++ b/backend/api/quivr_api/modules/sync/controller/dropbox_sync_routes.py
@ -51,7 +51,7 @@ def authorize_dropbox(
    auth_flow = DropboxOAuth2Flow(
        DROPBOX_APP_KEY,
        redirect_uri=BASE_REDIRECT_URI,
-        session=request.session,
+        session={},
        csrf_token_session_key="csrf-token",
        consumer_secret=DROPBOX_APP_SECRET,
        token_access_type="offline",
@ -88,10 +88,11 @@ def oauth2callback_dropbox(request: Request):
    state = request.query_params.get("state")
    if not state:
        raise HTTPException(status_code=400, detail="Invalid state parameter")
-    request.session["csrf-token"] = state.split("|")[0] if "|" in state else ""
+    session = {}
+    session["csrf-token"] = state.split("|")[0] if "|" in state else ""

-    logger.debug("Keys in session : %s", request.session.keys())
-    logger.debug("Value in session : %s", request.session.values())
+    logger.debug("Keys in session : %s", session.keys())
+    logger.debug("Value in session : %s", session.values())

    state = state.split("|")[1] if "|" in state else state  # type: ignore
    state_dict = {"state": state}
@ -117,7 +118,7 @@ def oauth2callback_dropbox(request: Request):
    auth_flow = DropboxOAuth2Flow(
        DROPBOX_APP_KEY,
        redirect_uri=BASE_REDIRECT_URI,
-        session=request.session,
+        session=session,
        csrf_token_session_key="csrf-token",
        consumer_secret=DROPBOX_APP_SECRET,
        token_access_type="offline",
@ -139,7 +140,6 @@ def oauth2callback_dropbox(request: Request):
            "access_token": oauth_result.access_token,
            "refresh_token": oauth_result.refresh_token,
            "account_id": account_id,
-            "email": user_email,
            "expires_in": str(oauth_result.expires_at),
        }

--- a/backend/api/quivr_api/modules/sync/repository/sync_user.py
+++ b/backend/api/quivr_api/modules/sync/repository/sync_user.py
@ -8,10 +8,10 @@ from quivr_api.modules.notification.service.notification_service import (
 )
 from quivr_api.modules.sync.dto.inputs import SyncsUserInput, SyncUserUpdateInput
 from quivr_api.modules.sync.repository.sync_interfaces import SyncUserInterface
-from quivr_api.modules.sync.utils.list_files import (
-    get_google_drive_files,
-    list_azure_files,
-    list_dropbox_files,
+from quivr_api.modules.sync.utils.sync import (
+    AzureDriveSync,
+    DropboxSync,
+    GoogleDriveSync,
 )

 notification_service = NotificationService()
@ -205,20 +205,19 @@ class SyncUser(SyncUserInterface):
        provider = sync_user["provider"].lower()
        if provider == "google":
            logger.info("Getting files for Google sync")
-            return {
-                "files": get_google_drive_files(sync_user["credentials"], folder_id)
-            }
+            sync = GoogleDriveSync()
+            return {"files": sync.get_files(sync_user["credentials"], folder_id)}
        elif provider == "azure":
            logger.info("Getting files for Azure sync")
+            sync = AzureDriveSync()
            return {
-                "files": list_azure_files(
-                    sync_user["credentials"], folder_id, recursive
-                )
+                "files": sync.get_files(sync_user["credentials"], folder_id, recursive)
            }
        elif provider == "dropbox":
            logger.info("Getting files for Drop Box sync")
+            sync = DropboxSync()
            return {
-                "files": list_dropbox_files(
+                "files": sync.get_files(
                    sync_user["credentials"], folder_id if folder_id else "", recursive
                )
            }
--- a/backend/api/quivr_api/modules/sync/tasks.py
+++ b/backend/api/quivr_api/modules/sync/tasks.py
@ -5,9 +5,12 @@ from quivr_api.logger import get_logger
 from quivr_api.modules.knowledge.repository.storage import Storage
 from quivr_api.modules.sync.repository.sync_files import SyncFiles
 from quivr_api.modules.sync.service.sync_service import SyncService, SyncUserService
-from quivr_api.modules.sync.utils.dropboxutils import DropboxSyncUtils
-from quivr_api.modules.sync.utils.googleutils import GoogleSyncUtils
-from quivr_api.modules.sync.utils.sharepointutils import AzureSyncUtils
+from quivr_api.modules.sync.utils.sync import (
+    AzureDriveSync,
+    DropboxSync,
+    GoogleDriveSync,
+)
+from quivr_api.modules.sync.utils.syncutils import SyncUtils

 logger = get_logger(__name__)

@ -24,25 +27,28 @@ async def _process_sync_active():
    sync_files_repo_service = SyncFiles()
    storage = Storage()

-    google_sync_utils = GoogleSyncUtils(
+    google_sync_utils = SyncUtils(
        sync_user_service=sync_user_service,
        sync_active_service=sync_active_service,
        sync_files_repo=sync_files_repo_service,
        storage=storage,
+        sync_cloud=GoogleDriveSync(),
    )

-    azure_sync_utils = AzureSyncUtils(
+    azure_sync_utils = SyncUtils(
        sync_user_service=sync_user_service,
        sync_active_service=sync_active_service,
        sync_files_repo=sync_files_repo_service,
        storage=storage,
+        sync_cloud=AzureDriveSync(),
    )

-    dropbox_sync_utils = DropboxSyncUtils(
+    dropbox_sync_utils = SyncUtils(
        sync_user_service=sync_user_service,
        sync_active_service=sync_active_service,
        sync_files_repo=sync_files_repo_service,
        storage=storage,
+        sync_cloud=DropboxSync(),
    )

    active = await sync_active_service.get_syncs_active_in_interval()
--- a/backend/api/quivr_api/modules/sync/utils/googleutils.py
+++ b/backend/api/quivr_api/modules/sync/utils/googleutils.py
@ -1,383 +0,0 @@
-import uuid
-from datetime import datetime, timedelta, timezone
-from io import BytesIO
-from typing import List
-
-from fastapi import UploadFile
-from google.auth.transport.requests import Request as GoogleRequest
-from google.oauth2.credentials import Credentials
-from googleapiclient.discovery import build
-from pydantic import BaseModel, ConfigDict
-from quivr_api.logger import get_logger
-from quivr_api.modules.brain.repository.brains_vectors import BrainsVectors
-from quivr_api.modules.knowledge.repository.storage import Storage
-from quivr_api.modules.notification.dto.inputs import (
-    CreateNotification,
-    NotificationUpdatableProperties,
-)
-from quivr_api.modules.notification.entity.notification import NotificationsStatusEnum
-from quivr_api.modules.notification.service.notification_service import (
-    NotificationService,
-)
-from quivr_api.modules.sync.dto.inputs import (
-    SyncFileInput,
-    SyncFileUpdateInput,
-    SyncsActiveUpdateInput,
-)
-from quivr_api.modules.sync.entity.sync import SyncFile
-from quivr_api.modules.sync.repository.sync_files import SyncFiles
-from quivr_api.modules.sync.service.sync_service import SyncService, SyncUserService
-from quivr_api.modules.sync.utils.list_files import (
-    get_google_drive_files,
-    get_google_drive_files_by_id,
-)
-from quivr_api.modules.sync.utils.upload import upload_file
-from quivr_api.modules.upload.service.upload_file import check_file_exists
-
-notification_service = NotificationService()
-
-logger = get_logger(__name__)
-
-
-class GoogleSyncUtils(BaseModel):
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-
-    sync_user_service: SyncUserService
-    sync_active_service: SyncService
-    sync_files_repo: SyncFiles
-    storage: Storage
-
-    async def _upload_files(
-        self,
-        credentials: dict,
-        files: List[SyncFile],
-        current_user: str,
-        brain_id: str,
-        sync_active_id: int,
-    ):
-        """
-        Download files from Google Drive.
-
-        Args:
-            credentials (dict): The credentials for accessing Google Drive.
-            files (list): The list of file metadata to download.
-
-        Returns:
-            dict: A dictionary containing the status of the download or an error message.
-        """
-        logger.info("Downloading Google Drive files with metadata: %s", files)
-        creds = Credentials.from_authorized_user_info(credentials)
-        if creds.expired and creds.refresh_token:
-            creds.refresh(GoogleRequest())
-            logger.info("Google Drive credentials refreshed")
-            # Updating the credentials in the database
-
-        service = build("drive", "v3", credentials=creds)
-        downloaded_files = []
-
-        bulk_id = uuid.uuid4()
-
-        for file in files:
-            upload_notification = notification_service.add_notification(
-                CreateNotification(
-                    user_id=current_user,
-                    bulk_id=bulk_id,
-                    status=NotificationsStatusEnum.INFO,
-                    title=file.name,
-                    category="sync",
-                    brain_id=str(brain_id),
-                )
-            )
-
-            file.notification_id = str(upload_notification.id)
-
-        for file in files:
-            logger.info("🔥🔥🔥🔥: %s", file)
-            try:
-                file_id = file.id
-                file_name = file.name
-                mime_type = file.mime_type
-                modified_time = file.last_modified
-                file_url = file.web_view_link
-                # Convert Google Docs files to appropriate formats before downloading
-                if mime_type == "application/vnd.google-apps.document":
-                    logger.debug(
-                        "Converting Google Docs file with file_id: %s to DOCX.",
-                        file_id,
-                    )
-                    request = service.files().export_media(
-                        fileId=file_id,
-                        mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-                    )
-                    file_name += ".docx"
-                elif mime_type == "application/vnd.google-apps.spreadsheet":
-                    logger.debug(
-                        "Converting Google Sheets file with file_id: %s to XLSX.",
-                        file_id,
-                    )
-                    request = service.files().export_media(
-                        fileId=file_id,
-                        mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-                    )
-                    file_name += ".xlsx"
-                elif mime_type == "application/vnd.google-apps.presentation":
-                    logger.debug(
-                        "Converting Google Slides file with file_id: %s to PPTX.",
-                        file_id,
-                    )
-                    request = service.files().export_media(
-                        fileId=file_id,
-                        mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation",
-                    )
-                    file_name += ".pptx"
-                ### Elif pdf, txt, md, csv, docx, xlsx, pptx, doc
-                elif file_name.split(".")[-1] in [
-                    "pdf",
-                    "txt",
-                    "md",
-                    "csv",
-                    "docx",
-                    "xlsx",
-                    "pptx",
-                    "doc",
-                ]:
-                    request = service.files().get_media(fileId=file_id)
-                else:
-                    logger.warning(
-                        "Skipping unsupported file type: %s for file_id: %s",
-                        mime_type,
-                        file_id,
-                    )
-                    continue
-
-                file_data = request.execute()
-
-                # Check if the file already exists in the storage
-                if check_file_exists(brain_id, file_name):
-                    logger.debug("🔥 File already exists in the storage: %s", file_name)
-                    self.storage.remove_file(brain_id + "/" + file_name)
-                    BrainsVectors().delete_file_from_brain(brain_id, file_name)
-
-                to_upload_file = UploadFile(
-                    file=BytesIO(file_data),
-                    filename=file_name,
-                )
-
-                # Check if the file already exists in the database
-                existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
-                existing_file = next(
-                    (f for f in existing_files if f.path == file_name), None
-                )
-                supported = False
-                if (existing_file and existing_file.supported) or not existing_file:
-                    supported = True
-
-                    await upload_file(
-                        to_upload_file,
-                        brain_id,
-                        current_user,
-                        bulk_id,
-                        "Google Drive",
-                        file.web_view_link,
-                        notification_id=file.notification_id,
-                    )  # type: ignore
-
-                if existing_file:
-                    # Update the existing file record
-                    self.sync_files_repo.update_sync_file(
-                        existing_file.id,
-                        SyncFileUpdateInput(
-                            last_modified=modified_time,
-                            supported=supported,
-                        ),
-                    )
-                else:
-                    # Create a new file record
-                    self.sync_files_repo.create_sync_file(
-                        SyncFileInput(
-                            path=file_name,
-                            syncs_active_id=sync_active_id,
-                            last_modified=modified_time,
-                            brain_id=str(brain_id),  # Convert UUID to string
-                            supported=supported,
-                        )
-                    )
-
-                    downloaded_files.append(file_name)
-                notification_service.update_notification_by_id(
-                    file.notification_id,
-                    NotificationUpdatableProperties(
-                        status=NotificationsStatusEnum.SUCCESS,
-                        description="File downloaded successfully",
-                    ),
-                )
-            except Exception as error:
-                logger.error(
-                    "An error occurred while downloading Google Drive files: %s",
-                    str(error),  # Convert error to string
-                )
-                # Check if the file already exists in the database
-                existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
-                existing_file = next(
-                    (f for f in existing_files if f.path == file.name), None
-                )
-                # Update the existing file record
-                if existing_file:
-                    self.sync_files_repo.update_sync_file(
-                        existing_file.id,
-                        SyncFileUpdateInput(
-                            supported=False,
-                        ),
-                    )
-                else:
-                    # Create a new file record
-                    self.sync_files_repo.create_sync_file(
-                        SyncFileInput(
-                            path=file.name,
-                            syncs_active_id=sync_active_id,
-                            last_modified=file.last_modified,
-                            brain_id=brain_id,
-                            supported=False,
-                        )
-                    )
-                notification_service.update_notification_by_id(
-                    file.notification_id,
-                    NotificationUpdatableProperties(
-                        status=NotificationsStatusEnum.ERROR,
-                        description="Error downloading file",
-                    ),
-                )
-        return {"downloaded_files": downloaded_files}
-
-    async def sync(self, sync_active_id: int, user_id: str):
-        """
-        Check if the Google sync has not been synced and download the folders and files based on the settings.
-
-        Args:
-            sync_active_id (int): The ID of the active sync.
-            user_id (str): The user ID associated with the active sync.
-        """
-
-        # Retrieve the active sync details
-        sync_active = self.sync_active_service.get_details_sync_active(sync_active_id)
-        if not sync_active:
-            logger.warning(
-                "No active sync found for sync_active_id: %s", sync_active_id
-            )
-            return None
-
-        # Check if the sync is due
-        last_synced = sync_active.get("last_synced")
-        force_sync = sync_active.get("force_sync", False)
-        sync_interval_minutes = sync_active.get("sync_interval_minutes", 0)
-        if last_synced and not force_sync:
-            last_synced_time = datetime.fromisoformat(last_synced).astimezone(
-                timezone.utc
-            )
-            current_time = datetime.now().astimezone()
-
-            # Debug logging to check the values
-            logger.debug("Last synced time (UTC): %s", last_synced_time)
-            logger.debug("Current time (local timezone): %s", current_time)
-
-            # Convert current_time to UTC for comparison
-            current_time_utc = current_time.astimezone(timezone.utc)
-            logger.debug("Current time (UTC): %s", current_time_utc)
-            time_difference = current_time_utc - last_synced_time
-            if time_difference < timedelta(minutes=sync_interval_minutes):
-                logger.info(
-                    "Google sync is not due for sync_active_id: %s", sync_active_id
-                )
-                return None
-
-        # Retrieve the sync user details
-        sync_user = self.sync_user_service.get_syncs_user(
-            user_id=user_id, sync_user_id=sync_active["syncs_user_id"]
-        )
-        if not sync_user:
-            logger.warning(
-                "No sync user found for sync_active_id: %s, user_id: %s",
-                sync_active_id,
-                user_id,
-            )
-            return None
-
-        sync_user = sync_user[0]
-        if sync_user["provider"].lower() != "google":
-            logger.warning(
-                "Sync provider is not Google for sync_active_id: %s", sync_active_id
-            )
-            return None
-
-        # Download the folders and files from Google Drive
-        logger.info(
-            "Downloading folders and files from Google Drive for sync_active_id: %s",
-            sync_active_id,
-        )
-
-        settings = sync_active.get("settings", {})
-        folders = settings.get("folders", [])
-        files_to_download = settings.get("files", [])
-        files: List[SyncFile] = []
-        files_metadata: List[SyncFile] = []
-        if len(folders) > 0:
-            for folder in folders:
-                folder_files = get_google_drive_files(
-                    sync_user["credentials"],
-                    folder_id=folder,
-                    recursive=True,
-                )
-                if isinstance(folder_files, list):
-                    files.extend(folder_files)
-                else:
-                    logger.error(
-                        f"Error fetching files for folder {folder}: {folder_files}"
-                    )
-        if len(files_to_download) > 0:
-            files_metadata = get_google_drive_files_by_id(
-                sync_user["credentials"], files_to_download
-            )
-        files = files + files_metadata  # type: ignore
-        if "error" in files:
-            logger.error(
-                "Failed to download files from Google Drive for sync_active_id: %s",
-                sync_active_id,
-            )
-            return None
-
-        # Filter files that have been modified since the last sync
-        last_synced_time = datetime.fromisoformat(last_synced) if last_synced else None
-
-        files_to_download = [
-            file
-            for file in files
-            if not file.is_folder
-            and (
-                (
-                    not last_synced_time
-                    or datetime.fromisoformat(file.last_modified) > last_synced_time
-                )
-                or not check_file_exists(sync_active["brain_id"], file.name)
-            )
-        ]
-
-        downloaded_files = await self._upload_files(
-            sync_user["credentials"],
-            files_to_download,
-            user_id,
-            sync_active["brain_id"],
-            sync_active_id,
-        )
-
-        # Update the last_synced timestamp
-        self.sync_active_service.update_sync_active(
-            sync_active_id,
-            SyncsActiveUpdateInput(
-                last_synced=datetime.now().astimezone().isoformat(),
-                force_sync=False,
-            ),
-        )
-        logger.info(
-            "Google Drive sync completed for sync_active_id: %s", sync_active_id
-        )
-        return downloaded_files
--- a/backend/api/quivr_api/modules/sync/utils/list_files.py
+++ b/backend/api/quivr_api/modules/sync/utils/list_files.py
@ -1,436 +0,0 @@
-import os
-from typing import Dict, List
-
-import dropbox
-import msal
-import requests
-from fastapi import HTTPException
-from google.auth.transport.requests import Request as GoogleRequest
-from google.oauth2.credentials import Credentials
-from googleapiclient.discovery import build
-from quivr_api.logger import get_logger
-from quivr_api.modules.sync.entity.sync import SyncFile
-from quivr_api.modules.sync.utils.normalize import remove_special_characters
-from requests import HTTPError
-
-logger = get_logger(__name__)
-
-# GOOGLE
-
-
-def get_google_drive_files_by_id(
-    credentials: dict, file_ids: List[str]
-) -> List[SyncFile]:
-    """
-    Retrieve files from Google Drive by their IDs.
-
-    Args:
-        credentials (dict): The credentials for accessing Google Drive.
-        file_ids (list): The list of file IDs to retrieve.
-
-    Returns:
-        list: A list of dictionaries containing the metadata of each file or an error message.
-    """
-    logger.info("Retrieving Google Drive files with file_ids: %s", file_ids)
-    creds = Credentials.from_authorized_user_info(credentials)
-    if creds.expired and creds.refresh_token:
-        creds.refresh(GoogleRequest())
-        logger.info("Google Drive credentials refreshed")
-
-    try:
-        service = build("drive", "v3", credentials=creds)
-        files: List[SyncFile] = []
-
-        for file_id in file_ids:
-            result = (
-                service.files()
-                .get(
-                    fileId=file_id,
-                    fields="id, name, mimeType, modifiedTime, webViewLink",
-                )
-                .execute()
-            )
-
-            files.append(
-                SyncFile(
-                    name=result["name"],
-                    id=result["id"],
-                    is_folder=(
-                        result["mimeType"] == "application/vnd.google-apps.folder"
-                    ),
-                    last_modified=result["modifiedTime"],
-                    mime_type=result["mimeType"],
-                    web_view_link=result["webViewLink"],
-                )
-            )
-
-        logger.info("Google Drive files retrieved successfully: %s", len(files))
-        for file in files:
-            file.name = remove_special_characters(file.name)
-        return files
-    except HTTPError as error:
-        logger.error("An error occurred while retrieving Google Drive files: %s", error)
-        return []
-
-
-def get_google_drive_files(
-    credentials: dict, folder_id: str = None, recursive: bool = False
-) -> List[SyncFile]:
-    """
-    Retrieve files from Google Drive.
-
-    Args:
-        credentials (dict): The credentials for accessing Google Drive.
-        folder_id (str, optional): The folder ID to filter files. Defaults to None.
-        recursive (bool, optional): If True, fetch files from all subfolders. Defaults to False.
-
-    Returns:
-        dict: A dictionary containing the list of files or an error message.
-    """
-    logger.info("Retrieving Google Drive files with folder_id: %s", folder_id)
-    creds = Credentials.from_authorized_user_info(credentials)
-    if creds.expired and creds.refresh_token:
-        creds.refresh(GoogleRequest())
-        logger.info("Google Drive credentials refreshed")
-        # Updating the credentials in the database
-
-    try:
-        service = build("drive", "v3", credentials=creds)
-        if folder_id:
-            query = f"'{folder_id}' in parents"
-        else:
-            query = "'root' in parents or sharedWithMe"
-        page_token = None
-        files: List[SyncFile] = []
-
-        while True:
-            results = (
-                service.files()
-                .list(
-                    q=query,
-                    pageSize=100,
-                    fields="nextPageToken, files(id, name, mimeType, modifiedTime, webViewLink)",
-                    pageToken=page_token,
-                )
-                .execute()
-            )
-            items = results.get("files", [])
-
-            if not items:
-                logger.info("No files found in Google Drive")
-                break
-
-            for item in items:
-                files.append(
-                    SyncFile(
-                        name=item["name"],
-                        id=item["id"],
-                        is_folder=(
-                            item["mimeType"] == "application/vnd.google-apps.folder"
-                        ),
-                        last_modified=item["modifiedTime"],
-                        mime_type=item["mimeType"],
-                        web_view_link=item["webViewLink"],
-                    )
-                )
-
-                # If recursive is True and the item is a folder, get files from the folder
-                if recursive and item.mimeType == "application/vnd.google-apps.folder":
-                    logger.warning(
-                        "Calling Recursive for folder: %s",
-                        item.name,
-                    )
-                    files.extend(
-                        get_google_drive_files(credentials, item.id, recursive)
-                    )
-
-            page_token = results.get("nextPageToken", None)
-            if page_token is None:
-                break
-
-        logger.info("Google Drive files retrieved successfully: %s", len(files))
-
-        for file in files:
-            file.name = remove_special_characters(file.name)
-        return files
-    except HTTPError as error:
-        logger.error("An error occurred while retrieving Google Drive files: %s", error)
-        return []
-
-
-# AZURE
-CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID")
-AUTHORITY = "https://login.microsoftonline.com/common"
-BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:5050")
-REDIRECT_URI = f"{BACKEND_URL}/sync/azure/oauth2callback"
-SCOPE = [
-    "https://graph.microsoft.com/Files.Read",
-    "https://graph.microsoft.com/User.Read",
-    "https://graph.microsoft.com/Sites.Read.All",
-]
-
-
-def get_azure_token_data(credentials):
-    if "access_token" not in credentials:
-        raise HTTPException(status_code=401, detail="Invalid token data")
-    return credentials
-
-
-def refresh_azure_token(credentials):
-    if "refresh_token" not in credentials:
-        raise HTTPException(status_code=401, detail="No refresh token available")
-
-    client = msal.PublicClientApplication(CLIENT_ID, authority=AUTHORITY)
-    result = client.acquire_token_by_refresh_token(
-        credentials["refresh_token"], scopes=SCOPE
-    )
-    if "access_token" not in result:
-        raise HTTPException(status_code=400, detail="Failed to refresh token")
-
-    return result
-
-
-def get_azure_headers(token_data):
-    return {
-        "Authorization": f"Bearer {token_data['access_token']}",
-        "Accept": "application/json",
-    }
-
-
-def list_azure_files(credentials, folder_id=None, recursive=False) -> list[SyncFile]:
-    def fetch_files(endpoint, headers):
-        response = requests.get(endpoint, headers=headers)
-        if response.status_code == 401:
-            token_data = refresh_azure_token(credentials)
-            headers = get_azure_headers(token_data)
-            response = requests.get(endpoint, headers=headers)
-        if response.status_code != 200:
-            return {"error": response.text}
-        return response.json().get("value", [])
-
-    token_data = get_azure_token_data(credentials)
-    headers = get_azure_headers(token_data)
-    endpoint = "https://graph.microsoft.com/v1.0/me/drive/root/children"
-    if folder_id:
-        endpoint = (
-            f"https://graph.microsoft.com/v1.0/me/drive/items/{folder_id}/children"
-        )
-
-    items = fetch_files(endpoint, headers)
-
-    if not items:
-        logger.info("No files found in Azure Drive")
-        return []
-
-    files = []
-    for item in items:
-        file_data = SyncFile(
-            name=item.get("name"),
-            id=item.get("id"),
-            is_folder="folder" in item,
-            last_modified=item.get("lastModifiedDateTime"),
-            mime_type=item.get("file", {}).get("mimeType", "folder"),
-            web_view_link=item.get("webUrl"),
-        )
-        files.append(file_data)
-
-        # If recursive option is enabled and the item is a folder, fetch files from it
-        if recursive and file_data.is_folder:
-            folder_files = list_azure_files(
-                credentials, folder_id=file_data.id, recursive=True
-            )
-
-            files.extend(folder_files)
-    for file in files:
-        file.name = remove_special_characters(file.name)
-    logger.info("Azure Drive files retrieved successfully: %s", len(files))
-    return files
-
-
-def get_azure_files_by_id(
-    credentials: dict, file_ids: List[str]
-) -> List[SyncFile] | dict:
-    """
-    Retrieve files from Azure Drive by their IDs.
-
-    Args:
-        credentials (dict): The credentials for accessing Azure Drive.
-        file_ids (list): The list of file IDs to retrieve.
-
-    Returns:
-        list: A list of dictionaries containing the metadata of each file or an error message.
-    """
-    logger.info("Retrieving Azure Drive files with file_ids: %s", file_ids)
-    token_data = get_azure_token_data(credentials)
-    headers = get_azure_headers(token_data)
-    files = []
-
-    for file_id in file_ids:
-        endpoint = f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}"
-        response = requests.get(endpoint, headers=headers)
-        if response.status_code == 401:
-            token_data = refresh_azure_token(credentials)
-            headers = get_azure_headers(token_data)
-            response = requests.get(endpoint, headers=headers)
-        if response.status_code != 200:
-            logger.error(
-                "An error occurred while retrieving Azure Drive files: %s",
-                response.text,
-            )
-            return {"error": response.text}
-
-        result = response.json()
-        files.append(
-            SyncFile(
-                name=result.get("name"),
-                id=result.get("id"),
-                is_folder="folder" in result,
-                last_modified=result.get("lastModifiedDateTime"),
-                mime_type=result.get("file", {}).get("mimeType", "folder"),
-                web_view_link=result.get("webUrl"),
-            )
-        )
-
-    for file in files:
-        file.name = remove_special_characters(file.name)
-    logger.info("Azure Drive files retrieved successfully: %s", len(files))
-    return files
-
-
-# Drop Box
-def list_dropbox_files(
-    credentials: dict, folder_id: str = "", recursive: bool = False
-) -> List[SyncFile] | dict:
-    """
-    Retrieve files from Dropbox.
-
-    Args:
-        credentials (dict): The credentials for accessing Dropbox.
-        folder_id (str, optional): The folder ID to filter files. Defaults to "".
-        recursive (bool, optional): If True, fetch files from all subfolders. Defaults to False.
-
-    Returns:
-        dict: A dictionary containing the list of files or an error message.
-    """
-    logger.info("Retrieving Dropbox files with folder_id: %s", folder_id)
-
-    # Verify credential has the access token
-    if "access_token" not in credentials:
-        print("Invalid token data")
-        return {"error": "Invalid token data"}
-
-    try:
-        dbx = dropbox.Dropbox(credentials["access_token"])
-        dbx.check_and_refresh_access_token()
-        credentials["access_token"] = dbx._oauth2_access_token
-
-        def fetch_files(metadata):
-            files = []
-            for file in metadata.entries:
-
-                shared_link = f"https://www.dropbox.com/preview{file.path_display}?context=content_suggestions&role=personal"
-                is_folder = isinstance(file, dropbox.files.FolderMetadata)
-                logger.debug(f"IS FOLDER ? {is_folder}")
-
-                files.append(
-                    SyncFile(
-                        name=file.name,
-                        id=file.id,
-                        is_folder=is_folder,
-                        last_modified=(
-                            str(file.client_modified) if not is_folder else ""
-                        ),
-                        mime_type=(
-                            file.path_lower.split(".")[-1] if not is_folder else ""
-                        ),
-                        web_view_link=shared_link,
-                    )
-                )
-            return files
-
-        files = []
-        list_metadata = dbx.files_list_folder(folder_id, recursive=recursive)
-        files.extend(fetch_files(list_metadata))
-
-        while list_metadata.has_more:
-            list_metadata = dbx.files_list_folder_continue(list_metadata.cursor)
-            files.extend(fetch_files(list_metadata))
-
-        for file in files:
-            file.name = remove_special_characters(file.name)
-
-        logger.info("Dropbox files retrieved successfully: %d", len(files))
-        return files
-
-    except dropbox.exceptions.ApiError as e:
-        logger.error("Dropbox API error: %s", e)
-        raise HTTPException(status_code=500, detail="Dropbox API error")
-    except Exception as e:
-        logger.error("Unexpected error: %s", e)
-        raise HTTPException(status_code=500, detail="Unexpected error occurred")
-
-
-def get_dropbox_files_by_id(
-    credentials: Dict[str, str], file_ids: List[str]
-) -> List[SyncFile] | Dict[str, str]:
-    """
-    Retrieve files from Dropbox by their IDs.
-
-    Args:
-        credentials (dict): The credentials for accessing Dropbox.
-        file_ids (list): The list of file IDs to retrieve.
-
-    Returns:
-        list: A list of dictionaries containing the metadata of each file or an error message.
-    """
-    logger.info("Retrieving Dropbox files with file_ids: %s", file_ids)
-
-    if "access_token" not in credentials:
-        raise HTTPException(status_code=401, detail="Invalid token data")
-
-    try:
-        dbx = dropbox.Dropbox(credentials["access_token"])
-        dbx.check_and_refresh_access_token()
-        credentials["access_token"] = dbx._oauth2_access_token
-
-        files = []
-
-        for file_id in file_ids:
-            try:
-                metadata = dbx.files_get_metadata(file_id)
-                logger.debug("Metadata for file_id %s: %s", file_id, metadata)
-                shared_link = f"https://www.dropbox.com/preview/{metadata.path_display}?context=content_suggestions&role=personal"
-                is_folder = isinstance(metadata, dropbox.files.FolderMetadata)
-                file_info = SyncFile(
-                    name=metadata.name,
-                    id=metadata.id,
-                    is_folder=is_folder,
-                    last_modified=(
-                        str(metadata.client_modified) if not is_folder else ""
-                    ),
-                    mime_type=(
-                        metadata.path_lower.split(".")[-1] if not is_folder else ""
-                    ),
-                    web_view_link=shared_link,
-                )
-
-                files.append(file_info)
-            except dropbox.exceptions.ApiError as api_err:
-                logger.error("Dropbox API error for file_id %s: %s", file_id, api_err)
-                continue  # Skip this file and proceed with the next one
-            except Exception as err:
-                logger.error("Unexpected error for file_id %s: %s", file_id, err)
-                continue  # Skip this file and proceed with the next one
-
-        for file in files:
-            file.name = remove_special_characters(file.name)
-
-        logger.info("Dropbox files retrieved successfully: %d", len(files))
-        return files
-
-    except dropbox.exceptions.AuthError as auth_err:
-        logger.error("Authentication error: %s", auth_err)
-        raise HTTPException(status_code=401, detail="Authentication error")
-    except Exception as e:
-        logger.error("Unexpected error: %s", e)
-        raise HTTPException(status_code=500, detail="Unexpected error occurred")
--- a/backend/api/quivr_api/modules/sync/utils/sharepointutils.py
+++ b/backend/api/quivr_api/modules/sync/utils/sharepointutils.py
@ -1,387 +0,0 @@
-import os
-import uuid
-from datetime import datetime, timedelta, timezone
-from io import BytesIO
-
-import msal
-import requests
-from fastapi import HTTPException, UploadFile
-from pydantic import BaseModel, ConfigDict
-from quivr_api.logger import get_logger
-from quivr_api.modules.brain.repository.brains_vectors import BrainsVectors
-from quivr_api.modules.knowledge.repository.storage import Storage
-from quivr_api.modules.notification.dto.inputs import (
-    CreateNotification,
-    NotificationUpdatableProperties,
-)
-from quivr_api.modules.notification.entity.notification import NotificationsStatusEnum
-from quivr_api.modules.notification.service.notification_service import (
-    NotificationService,
-)
-from quivr_api.modules.sync.dto.inputs import (
-    SyncFileInput,
-    SyncFileUpdateInput,
-    SyncsActiveUpdateInput,
-)
-from quivr_api.modules.sync.entity.sync import SyncFile
-from quivr_api.modules.sync.repository.sync_files import SyncFiles
-from quivr_api.modules.sync.service.sync_service import SyncService, SyncUserService
-from quivr_api.modules.sync.utils.list_files import (
-    get_azure_files_by_id,
-    list_azure_files,
-)
-from quivr_api.modules.sync.utils.upload import upload_file
-from quivr_api.modules.upload.service.upload_file import check_file_exists
-
-notification_service = NotificationService()
-
-logger = get_logger(__name__)
-
-CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID")
-AUTHORITY = "https://login.microsoftonline.com/common"
-BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:5050")
-REDIRECT_URI = f"{BACKEND_URL}/sync/azure/oauth2callback"
-SCOPE = [
-    "https://graph.microsoft.com/Files.Read",
-    "https://graph.microsoft.com/User.Read",
-    "https://graph.microsoft.com/Sites.Read.All",
-]
-
-
-class AzureSyncUtils(BaseModel):
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-
-    sync_user_service: SyncUserService
-    sync_active_service: SyncService
-    sync_files_repo: SyncFiles
-    storage: Storage
-
-    def get_headers(self, token_data):
-        return {
-            "Authorization": f"Bearer {token_data['access_token']}",
-            "Accept": "application/json",
-        }
-
-    def refresh_token(self, refresh_token):
-        client = msal.PublicClientApplication(CLIENT_ID, authority=AUTHORITY)
-        result = client.acquire_token_by_refresh_token(refresh_token, scopes=SCOPE)
-        if "access_token" not in result:
-            raise HTTPException(status_code=400, detail="Failed to refresh token")
-        return result
-
-    async def _upload_files(
-        self,
-        token_data: dict,
-        files: list[SyncFile],
-        current_user: str,
-        brain_id: str,
-        sync_active_id: int,
-    ):
-        """
-        Download files from Azure.
-
-        Args:
-            token_data (dict): The token data for accessing Azure.
-            files (list): The list of file metadata to download.
-
-        Returns:
-            dict: A dictionary containing the status of the download or an error message.
-        """
-        logger.info("Downloading Azure files with metadata: %s", files)
-        headers = self.get_headers(token_data)
-
-        downloaded_files = []
-        # Generate random UUID
-        bulk_id = uuid.uuid4()
-        for file in files:
-            upload_notification = notification_service.add_notification(
-                CreateNotification(
-                    user_id=current_user,
-                    bulk_id=bulk_id,
-                    status=NotificationsStatusEnum.INFO,
-                    title=file.name,
-                    category="sync",
-                    brain_id=str(brain_id),
-                )
-            )
-
-            file.notification_id = str(upload_notification.id)
-        for file in files:
-            try:
-                file_id = file.id
-                file_name = file.name
-                modified_time = file.last_modified
-
-                download_endpoint = (
-                    f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}/content"
-                )
-                logger.info("Downloading file: %s", file_name)
-                download_response = requests.get(
-                    download_endpoint, headers=headers, stream=True
-                )
-                if download_response.status_code == 401:
-                    token_data = self.refresh_token(token_data["refresh_token"])
-                    headers = self.get_headers(token_data)
-                    download_response = requests.get(
-                        download_endpoint, headers=headers, stream=True
-                    )
-                if download_response.status_code != 200:
-                    logger.error("Failed to download file: %s", file_name)
-                    continue
-
-                file_data = BytesIO(download_response.content)
-
-                # Check if the file already exists in the storage
-                if check_file_exists(brain_id, file_name):
-                    logger.debug("🔥 File already exists in the storage: %s", file_name)
-
-                    self.storage.remove_file(brain_id + "/" + file_name)
-                    BrainsVectors().delete_file_from_brain(brain_id, file_name)
-
-                # Check if the file extension is compatible
-                if file_name.split(".")[-1] not in [
-                    "pdf",
-                    "txt",
-                    "md",
-                    "csv",
-                    "docx",
-                    "xlsx",
-                    "pptx",
-                    "doc",
-                ]:
-                    logger.info("File is not compatible: %s", file_name)
-                    continue
-
-                to_upload_file = UploadFile(
-                    file=file_data,
-                    filename=file_name,
-                )
-
-                # Check if the file already exists in the database
-                existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
-                existing_file = next(
-                    (f for f in existing_files if f.path == file_name), None
-                )
-
-                supported = False
-                if (existing_file and existing_file.supported) or not existing_file:
-                    supported = True
-                    await upload_file(
-                        to_upload_file,
-                        brain_id,
-                        current_user,
-                        bulk_id,
-                        "Share Point",
-                        file.web_view_link,
-                        notification_id=file.notification_id,
-                    )
-
-                if existing_file:
-                    # Update the existing file record
-                    self.sync_files_repo.update_sync_file(
-                        existing_file.id,
-                        SyncFileUpdateInput(
-                            last_modified=modified_time,
-                            supported=supported,
-                        ),
-                    )
-                else:
-                    # Create a new file record
-                    self.sync_files_repo.create_sync_file(
-                        SyncFileInput(
-                            path=file_name,
-                            syncs_active_id=sync_active_id,
-                            last_modified=modified_time,
-                            brain_id=brain_id,
-                            supported=supported,
-                        )
-                    )
-
-                    downloaded_files.append(file_name)
-                notification_service.update_notification_by_id(
-                    file.notification_id,
-                    NotificationUpdatableProperties(
-                        status=NotificationsStatusEnum.SUCCESS,
-                        description="File downloaded successfully",
-                    ),
-                )
-            except Exception as error:
-                logger.error(
-                    "An error occurred while downloading Azure files: %s", error
-                )
-                # Check if the file already exists in the database
-                existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
-                existing_file = next(
-                    (f for f in existing_files if f.path == file.name), None
-                )
-                # Update the existing file record
-                if existing_file:
-                    self.sync_files_repo.update_sync_file(
-                        existing_file.id,
-                        SyncFileUpdateInput(
-                            supported=False,
-                        ),
-                    )
-                else:
-                    # Create a new file record
-                    self.sync_files_repo.create_sync_file(
-                        SyncFileInput(
-                            path=file.name,
-                            syncs_active_id=sync_active_id,
-                            last_modified=file.last_modified,
-                            brain_id=brain_id,
-                            supported=False,
-                        )
-                    )
-                notification_service.update_notification_by_id(
-                    file.notification_id,
-                    NotificationUpdatableProperties(
-                        status=NotificationsStatusEnum.ERROR,
-                        description="Error downloading file",
-                    ),
-                )
-        return {"downloaded_files": downloaded_files}
-
-    async def sync(self, sync_active_id: int, user_id: str):
-        """
-        Check if the Azure sync has not been synced and download the folders and files based on the settings.
-
-        Args:
-            sync_active_id (int): The ID of the active sync.
-            user_id (str): The user ID associated with the active sync.
-        """
-
-        # Retrieve the active sync details
-        sync_active = self.sync_active_service.get_details_sync_active(sync_active_id)
-        if not sync_active:
-            logger.warning(
-                "No active sync found for sync_active_id: %s", sync_active_id
-            )
-            return None
-
-        # Check if the sync is due
-        last_synced = sync_active.get("last_synced")
-        force_sync = sync_active.get("force_sync", False)
-        sync_interval_minutes = sync_active.get("sync_interval_minutes", 0)
-        if last_synced and not force_sync:
-            last_synced_time = datetime.fromisoformat(last_synced).astimezone(
-                timezone.utc
-            )
-            current_time = datetime.now().astimezone()
-
-            # Debug logging to check the values
-            logger.debug("Last synced time (UTC): %s", last_synced_time)
-            logger.debug("Current time (local timezone): %s", current_time)
-
-            # Convert current_time to UTC for comparison
-            current_time_utc = current_time.astimezone(timezone.utc)
-            logger.debug("Current time (UTC): %s", current_time_utc)
-            time_difference = current_time_utc - last_synced_time
-            if time_difference < timedelta(minutes=sync_interval_minutes):
-                logger.info(
-                    "Azure sync is not due for sync_active_id: %s", sync_active_id
-                )
-                return None
-
-        # Retrieve the sync user details
-        sync_user = self.sync_user_service.get_syncs_user(
-            user_id=user_id, sync_user_id=sync_active["syncs_user_id"]
-        )
-        if not sync_user:
-            logger.warning(
-                "No sync user found for sync_active_id: %s, user_id: %s",
-                sync_active_id,
-                user_id,
-            )
-            return None
-
-        sync_user = sync_user[0]
-        if sync_user["provider"].lower() != "azure":
-            logger.warning(
-                "Sync provider is not Azure for sync_active_id: %s", sync_active_id
-            )
-            return None
-
-        # Download the folders and files from Azure
-        logger.info(
-            "Downloading folders and files from Azure for sync_active_id: %s",
-            sync_active_id,
-        )
-
-        # Get the folder id from the settings from sync_active
-        settings = sync_active.get("settings", {})
-        folders = settings.get("folders", [])
-        files_to_download = settings.get("files", [])
-        files = []
-        files_metadata = []
-        if len(folders) > 0:
-            files = []
-            for folder in folders:
-                files.extend(
-                    list_azure_files(
-                        sync_user["credentials"],
-                        folder_id=folder,
-                        recursive=True,
-                    )
-                )
-        if len(files_to_download) > 0:
-            files_metadata = get_azure_files_by_id(
-                sync_user["credentials"],
-                files_to_download,
-            )
-        files = files + files_metadata  # type: ignore
-
-        if "error" in files:
-            logger.error(
-                "Failed to download files from Azure for sync_active_id: %s",
-                sync_active_id,
-            )
-            return None
-
-        # Filter files that have been modified since the last sync
-        last_synced_time = (
-            datetime.fromisoformat(last_synced).astimezone(timezone.utc)
-            if last_synced
-            else None
-        )
-        logger.info("Files retrieved from Azure: %s", len(files))
-        logger.info("Files retrieved from Azure: %s", files)
-        files_to_download = [
-            file
-            for file in files
-            if not file.is_folder
-            and (
-                (
-                    not last_synced_time
-                    or datetime.strptime(
-                        file.last_modified, "%Y-%m-%dT%H:%M:%SZ"
-                    ).replace(tzinfo=timezone.utc)
-                    > last_synced_time
-                )
-                or not check_file_exists(sync_active["brain_id"], file.name)
-            )
-        ]
-
-        downloaded_files = await self._upload_files(
-            sync_user["credentials"],
-            files_to_download,
-            user_id,
-            sync_active["brain_id"],
-            sync_active_id,
-        )
-        if "error" in downloaded_files:
-            logger.error(
-                "Failed to download files from Azure for sync_active_id: %s",
-                sync_active_id,
-            )
-            return None
-
-        # Update the last_synced timestamp
-        self.sync_active_service.update_sync_active(
-            sync_active_id,
-            SyncsActiveUpdateInput(
-                last_synced=datetime.now().astimezone().isoformat(), force_sync=False
-            ),
-        )
-        logger.info("Azure sync completed for sync_active_id: %s", sync_active_id)
-        return downloaded_files
--- a/backend/api/quivr_api/modules/sync/utils/sync.py
+++ b/backend/api/quivr_api/modules/sync/utils/sync.py
@ -0,0 +1,622 @@
+import json
+import os
+import time
+from abc import ABC, abstractmethod
+from io import BytesIO
+from typing import Any, Dict, List
+
+import dropbox
+import msal
+import requests
+from fastapi import HTTPException
+from google.auth.transport.requests import Request as GoogleRequest
+from google.oauth2.credentials import Credentials
+from googleapiclient.discovery import build
+from quivr_api.logger import get_logger
+from quivr_api.modules.sync.entity.sync import SyncFile
+from quivr_api.modules.sync.utils.normalize import remove_special_characters
+from requests import HTTPError
+
+logger = get_logger(__name__)
+
+
+class BaseSync(ABC):
+    name: str
+    lower_name: str
+    datetime_format: str
+
+    @abstractmethod
+    def get_files_by_id(self, credentials: Dict, file_ids: List[str]) -> List[SyncFile]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_files(
+        self, credentials: Dict, folder_id: str | None = None, recursive: bool = False
+    ) -> List[SyncFile]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def check_and_refresh_access_token(self, credentials: dict) -> Dict:
+        raise NotImplementedError
+
+    @abstractmethod
+    def download_file(self, credentials: Dict, file: SyncFile) -> BytesIO:
+        raise NotImplementedError
+
+
+class GoogleDriveSync(BaseSync):
+    name = "Google Drive"
+    lower_name = "google"
+    creds: Credentials | None = None
+    service: Any | None = None
+    datetime_format: str = "%Y-%m-%dT%H:%M:%S.%fZ"
+
+    def check_and_refresh_access_token(self, credentials: dict) -> Dict:
+        self.creds = Credentials.from_authorized_user_info(credentials)
+        if self.creds.expired and self.creds.refresh_token:
+            self.creds.refresh(GoogleRequest())
+            logger.info("Google Drive credentials refreshed")
+        return json.loads(self.creds.to_json())
+
+    def download_file(self, credentials: Dict, file: SyncFile) -> BytesIO:
+        file_id = file.id
+        file_name = file.name
+        mime_type = file.mime_type
+        modified_time = file.last_modified
+        if not self.creds:
+            self.check_and_refresh_access_token(credentials)
+        if not self.service:
+            self.service = build("drive", "v3", credentials=self.creds)
+
+        # Convert Google Docs files to appropriate formats before downloading
+        if mime_type == "application/vnd.google-apps.document":
+            logger.debug(
+                "Converting Google Docs file with file_id: %s to DOCX.",
+                file_id,
+            )
+            request = self.service.files().export_media(
+                fileId=file_id,
+                mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            )
+            file_name += ".docx"
+        elif mime_type == "application/vnd.google-apps.spreadsheet":
+            logger.debug(
+                "Converting Google Sheets file with file_id: %s to XLSX.",
+                file_id,
+            )
+            request = self.service.files().export_media(
+                fileId=file_id,
+                mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            )
+            file_name += ".xlsx"
+        elif mime_type == "application/vnd.google-apps.presentation":
+            logger.debug(
+                "Converting Google Slides file with file_id: %s to PPTX.",
+                file_id,
+            )
+            request = self.service.files().export_media(
+                fileId=file_id,
+                mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation",
+            )
+            file_name += ".pptx"
+        ### Elif pdf, txt, md, csv, docx, xlsx, pptx, doc
+        elif file_name.split(".")[-1] in [
+            "pdf",
+            "txt",
+            "md",
+            "csv",
+            "docx",
+            "xlsx",
+            "pptx",
+            "doc",
+        ]:
+            request = self.service.files().get_media(fileId=file_id)
+        else:
+            logger.warning(
+                "Skipping unsupported file type: %s for file_id: %s",
+                mime_type,
+                file_id,
+            )
+            raise Exception("Unsupported file type")
+
+        file_data = request.execute()
+        return BytesIO(file_data)
+
+    def get_files_by_id(self, credentials: Dict, file_ids: List[str]) -> List[SyncFile]:
+        """
+        Retrieve files from Google Drive by their IDs.
+
+        Args:
+            credentials (dict): The credentials for accessing Google Drive.
+            file_ids (list): The list of file IDs to retrieve.
+
+        Returns:
+            list: A list of dictionaries containing the metadata of each file or an error message.
+        """
+        logger.info("Retrieving Google Drive files with file_ids: %s", file_ids)
+        self.check_and_refresh_access_token(credentials)
+
+        try:
+            service = build("drive", "v3", credentials=self.creds)
+            files: List[SyncFile] = []
+
+            for file_id in file_ids:
+                result = (
+                    service.files()
+                    .get(
+                        fileId=file_id,
+                        fields="id, name, mimeType, modifiedTime, webViewLink",
+                    )
+                    .execute()
+                )
+
+                files.append(
+                    SyncFile(
+                        name=result["name"],
+                        id=result["id"],
+                        is_folder=(
+                            result["mimeType"] == "application/vnd.google-apps.folder"
+                        ),
+                        last_modified=result["modifiedTime"],
+                        mime_type=result["mimeType"],
+                        web_view_link=result["webViewLink"],
+                    )
+                )
+
+            logger.info("Google Drive files retrieved successfully: %s", len(files))
+            for file in files:
+                file.name = remove_special_characters(file.name)
+            return files
+
+        except HTTPError as error:
+            logger.error(
+                "An error occurred while retrieving Google Drive files: %s", error
+            )
+            raise Exception("Failed to retrieve files")
+
+    def get_files(
+        self, credentials: dict, folder_id: str | None = None, recursive: bool = False
+    ) -> List[SyncFile]:
+        """
+        Retrieve files from Google Drive.
+
+        Args:
+            credentials (dict): The credentials for accessing Google Drive.
+            folder_id (str, optional): The folder ID to filter files. Defaults to None.
+            recursive (bool, optional): If True, fetch files from all subfolders. Defaults to False.
+
+        Returns:
+            dict: A dictionary containing the list of files or an error message.
+        """
+        logger.info("Retrieving Google Drive files with folder_id: %s", folder_id)
+
+        self.check_and_refresh_access_token(credentials)
+        # Updating the credentials in the database
+
+        try:
+            service = build("drive", "v3", credentials=self.creds)
+            if folder_id:
+                query = f"'{folder_id}' in parents"
+            else:
+                query = "'root' in parents or sharedWithMe"
+            page_token = None
+            files: List[SyncFile] = []
+
+            while True:
+                results = (
+                    service.files()
+                    .list(
+                        q=query,
+                        pageSize=100,
+                        fields="nextPageToken, files(id, name, mimeType, modifiedTime, webViewLink)",
+                        pageToken=page_token,
+                    )
+                    .execute()
+                )
+                items = results.get("files", [])
+
+                if not items:
+                    logger.info("No files found in Google Drive")
+                    break
+
+                for item in items:
+                    files.append(
+                        SyncFile(
+                            name=item["name"],
+                            id=item["id"],
+                            is_folder=(
+                                item["mimeType"] == "application/vnd.google-apps.folder"
+                            ),
+                            last_modified=item["modifiedTime"],
+                            mime_type=item["mimeType"],
+                            web_view_link=item["webViewLink"],
+                        )
+                    )
+
+                    # If recursive is True and the item is a folder, get files from the folder
+                    if (
+                        recursive
+                        and item.mimeType == "application/vnd.google-apps.folder"
+                    ):
+                        logger.warning(
+                            "Calling Recursive for folder: %s",
+                            item.name,
+                        )
+                        files.extend(self.get_files(credentials, item.id, recursive))
+
+                page_token = results.get("nextPageToken", None)
+                if page_token is None:
+                    break
+
+            logger.info("Google Drive files retrieved successfully: %s", len(files))
+
+            for file in files:
+                file.name = remove_special_characters(file.name)
+            return files
+        except HTTPError as error:
+            logger.error(
+                "An error occurred while retrieving Google Drive files: %s", error
+            )
+            raise Exception("Failed to retrieve files")
+
+
+class AzureDriveSync(BaseSync):
+    name = "Azure Drive"
+    lower_name = "azure"
+    datetime_format: str = "%Y-%m-%dT%H:%M:%SZ"
+    CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID")
+    AUTHORITY = "https://login.microsoftonline.com/common"
+    BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:5050")
+    REDIRECT_URI = f"{BACKEND_URL}/sync/azure/oauth2callback"
+    SCOPE = [
+        "https://graph.microsoft.com/Files.Read",
+        "https://graph.microsoft.com/User.Read",
+        "https://graph.microsoft.com/Sites.Read.All",
+    ]
+
+    @staticmethod
+    def get_azure_token_data(credentials):
+        if "access_token" not in credentials:
+            raise HTTPException(status_code=401, detail="Invalid token data")
+        return credentials
+
+    @staticmethod
+    def get_azure_headers(token_data):
+        return {
+            "Authorization": f"Bearer {token_data['access_token']}",
+            "Accept": "application/json",
+        }
+
+    def check_and_refresh_access_token(self, credentials) -> Dict:
+        if "refresh_token" not in credentials:
+            raise HTTPException(status_code=401, detail="No refresh token available")
+
+        client = msal.PublicClientApplication(self.CLIENT_ID, authority=self.AUTHORITY)
+        result = client.acquire_token_by_refresh_token(
+            credentials["refresh_token"], scopes=self.SCOPE
+        )
+        if "access_token" not in result:
+            raise HTTPException(status_code=400, detail="Failed to refresh token")
+
+        credentials.update(
+            {
+                "access_token": result["access_token"],
+                "refresh_token": result.get(
+                    "refresh_token", credentials["refresh_token"]
+                ),
+                "id_token": result.get("id_token", credentials.get("id_token")),
+            }
+        )
+
+        return credentials
+
+    def get_files(self, credentials, folder_id=None, recursive=False) -> List[SyncFile]:
+        def fetch_files(endpoint, headers, max_retries=1):
+            logger.debug(f"fetching files from {endpoint}.")
+
+            retry_count = 0
+            while retry_count <= max_retries:
+                try:
+                    response = requests.get(endpoint, headers=headers)
+
+                    # Retrying with refereshed token
+                    if response.status_code == 401:
+                        token_data = self.check_and_refresh_access_token(credentials)
+                        headers = self.get_azure_headers(token_data)
+                        response = requests.get(endpoint, headers=headers)
+                    else:
+                        response.raise_for_status()
+                    return response.json().get("value", [])
+
+                except HTTPError as e:
+                    logger.exception(
+                        f"azure_list_files got exception : {e}. headers: {headers}. {retry_count} retrying."
+                    )
+                    # Exponential backoff
+                    time.sleep(2**retry_count)
+                    retry_count += 1
+
+            raise HTTPException(
+                504, detail="can't connect to azure endpoint to retrieve files."
+            )
+
+        token_data = self.get_azure_token_data(credentials)
+        headers = self.get_azure_headers(token_data)
+        endpoint = "https://graph.microsoft.com/v1.0/me/drive/root/children"
+        if folder_id:
+            endpoint = (
+                f"https://graph.microsoft.com/v1.0/me/drive/items/{folder_id}/children"
+            )
+
+        items = fetch_files(endpoint, headers)
+
+        if not items:
+            logger.info("No files found in Azure Drive")
+            return []
+
+        files = []
+        for item in items:
+            file_data = SyncFile(
+                name=item.get("name"),
+                id=item.get("id"),
+                is_folder="folder" in item,
+                last_modified=item.get("lastModifiedDateTime"),
+                mime_type=item.get("file", {}).get("mimeType", "folder"),
+                web_view_link=item.get("webUrl"),
+            )
+            files.append(file_data)
+
+            # If recursive option is enabled and the item is a folder, fetch files from it
+            if recursive and file_data.is_folder:
+                folder_files = self.get_files(
+                    credentials, folder_id=file_data.id, recursive=True
+                )
+
+                files.extend(folder_files)
+        for file in files:
+            file.name = remove_special_characters(file.name)
+        logger.info("Azure Drive files retrieved successfully: %s", len(files))
+        return files
+
+    def get_files_by_id(
+        self, credentials: dict, file_ids: List[str]
+    ) -> List[SyncFile] | dict:
+        """
+        Retrieve files from Azure Drive by their IDs.
+
+        Args:
+            credentials (dict): The credentials for accessing Azure Drive.
+            file_ids (list): The list of file IDs to retrieve.
+
+        Returns:
+            list: A list of dictionaries containing the metadata of each file or an error message.
+        """
+        logger.info("Retrieving Azure Drive files with file_ids: %s", file_ids)
+        token_data = self.get_azure_token_data(credentials)
+        headers = self.get_azure_headers(token_data)
+        files = []
+
+        for file_id in file_ids:
+            endpoint = f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}"
+            response = requests.get(endpoint, headers=headers)
+            if response.status_code == 401:
+                token_data = self.check_and_refresh_access_token(credentials)
+                headers = self.get_azure_headers(token_data)
+                response = requests.get(endpoint, headers=headers)
+            if response.status_code != 200:
+                logger.error(
+                    "An error occurred while retrieving Azure Drive files: %s",
+                    response.text,
+                )
+                raise Exception("Failed to retrieve files")
+
+            result = response.json()
+            files.append(
+                SyncFile(
+                    name=result.get("name"),
+                    id=result.get("id"),
+                    is_folder="folder" in result,
+                    last_modified=result.get("lastModifiedDateTime"),
+                    mime_type=result.get("file", {}).get("mimeType", "folder"),
+                    web_view_link=result.get("webUrl"),
+                )
+            )
+
+        for file in files:
+            file.name = remove_special_characters(file.name)
+        logger.info("Azure Drive files retrieved successfully: %s", len(files))
+        return files
+
+    def download_file(self, credentials: Dict, file: SyncFile) -> BytesIO:
+        file_id = file.id
+        file_name = file.name
+        modified_time = file.last_modified
+        headers = self.get_azure_headers(credentials)
+
+        download_endpoint = (
+            f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}/content"
+        )
+        logger.info("Downloading file: %s", file_name)
+        download_response = requests.get(
+            download_endpoint, headers=headers, stream=True
+        )
+        return BytesIO(download_response.content)
+
+
+class DropboxSync(BaseSync):
+    name = "Dropbox"
+    lower_name = "dropbox"
+    dbx: dropbox.Dropbox | None = None
+    datetime_format: str = "%Y-%m-%d %H:%M:%S"
+
+    def link_dropbox(self, credentials) -> dropbox.Dropbox:
+        return dropbox.Dropbox(
+            credentials["access_token"],
+            oauth2_refresh_token=credentials["refresh_token"],
+            app_key=os.getenv("DROPBOX_APP_KEY"),
+            oauth2_access_token_expiration=credentials.get("expires_at"),
+            app_secret=os.getenv("DROPBOX_APP_SECRET"),
+        )
+
+    def check_and_refresh_access_token(self, credentials: Dict) -> Dict:
+        if not self.dbx:
+            self.dbx = self.link_dropbox(credentials)
+        self.dbx.check_and_refresh_access_token()
+        credentials["access_token"] = self.dbx._oauth2_access_token
+        credentials["refresh_token"] = self.dbx.refresh_access_token
+        return credentials
+
+    def get_files(
+        self, credentials: Dict, folder_id: str = "", recursive: bool = False
+    ) -> List[SyncFile]:
+        """
+        Retrieve files from Dropbox.
+
+        Args:
+            credentials (dict): The credentials for accessing Dropbox.
+            folder_id (str, optional): The folder ID to filter files. Defaults to "".
+            recursive (bool, optional): If True, fetch files from all subfolders. Defaults to False.
+
+        Returns:
+            dict: A dictionary containing the list of files or an error message.
+        """
+        logger.info("Retrieving Dropbox files with folder_id: %s", folder_id)
+
+        # Verify credential has the access token
+        if "access_token" not in credentials:
+            logger.error("Invalid access token")
+            raise Exception("Invalid access token")
+
+        try:
+            if not self.dbx:
+                self.dbx = dropbox.Dropbox(
+                    credentials["access_token"],
+                    oauth2_refresh_token=credentials["refresh_token"],
+                    app_key=os.getenv("DROPBOX_APP_KEY"),
+                    oauth2_access_token_expiration=credentials.get("expires_at"),
+                    app_secret=os.getenv("DROPBOX_APP_SECRET"),
+                )
+            self.dbx.check_and_refresh_access_token()
+            credentials["access_token"] = self.dbx._oauth2_access_token
+
+            def fetch_files(metadata):
+                files = []
+                for file in metadata.entries:
+
+                    shared_link = f"https://www.dropbox.com/preview{file.path_display}?context=content_suggestions&role=personal"
+                    is_folder = isinstance(file, dropbox.files.FolderMetadata)
+
+                    files.append(
+                        SyncFile(
+                            name=file.name,
+                            id=file.id,
+                            is_folder=is_folder,
+                            last_modified=(
+                                str(file.client_modified) if not is_folder else ""
+                            ),
+                            mime_type=(
+                                file.path_lower.split(".")[-1] if not is_folder else ""
+                            ),
+                            web_view_link=shared_link,
+                        )
+                    )
+                return files
+
+            files = []
+            list_metadata = self.dbx.files_list_folder(folder_id, recursive=recursive)
+            files.extend(fetch_files(list_metadata))
+
+            while list_metadata.has_more:
+                list_metadata = self.dbx.files_list_folder_continue(
+                    list_metadata.cursor
+                )
+                files.extend(fetch_files(list_metadata))
+
+            for file in files:
+                file.name = remove_special_characters(file.name)
+
+            logger.info("Dropbox files retrieved successfully: %d", len(files))
+            return files
+
+        except dropbox.exceptions.ApiError as e:
+            logger.error("Dropbox API error: %s", e)
+            raise Exception("Failed to retrieve files")
+        except Exception as e:
+            logger.error("Unexpected error: %s", e)
+            raise Exception("Failed to retrieve files")
+
+    def get_files_by_id(
+        self, credentials: Dict[str, str], file_ids: List[str]
+    ) -> List[SyncFile]:
+        """
+        Retrieve files from Dropbox by their IDs.
+
+        Args:
+            credentials (dict): The credentials for accessing Dropbox.
+            file_ids (list): The list of file IDs to retrieve.
+
+        Returns:
+            list: A list of dictionaries containing the metadata of each file or an error message.
+        """
+        logger.info("Retrieving Dropbox files with file_ids: %s", file_ids)
+
+        if "access_token" not in credentials:
+            logger.error("Access token is not in the credentials")
+            raise Exception("Invalid access token")
+
+        try:
+            if not self.dbx:
+                self.dbx = self.link_dropbox(credentials)
+            self.dbx.check_and_refresh_access_token()
+            credentials["access_token"] = self.dbx._oauth2_access_token  # type: ignore
+
+            files = []
+
+            for file_id in file_ids:
+                try:
+                    metadata = self.dbx.files_get_metadata(file_id)
+                    shared_link = f"https://www.dropbox.com/preview/{metadata.path_display}?context=content_suggestions&role=personal"
+                    is_folder = isinstance(metadata, dropbox.files.FolderMetadata)
+                    file_info = SyncFile(
+                        name=metadata.name,
+                        id=metadata.id,
+                        is_folder=is_folder,
+                        last_modified=(
+                            str(metadata.client_modified) if not is_folder else ""
+                        ),
+                        mime_type=(
+                            metadata.path_lower.split(".")[-1] if not is_folder else ""
+                        ),
+                        web_view_link=shared_link,
+                    )
+
+                    files.append(file_info)
+                except dropbox.exceptions.ApiError as api_err:
+                    logger.error(
+                        "Dropbox API error for file_id %s: %s", file_id, api_err
+                    )
+                    continue  # Skip this file and proceed with the next one
+                except Exception as err:
+                    logger.error("Unexpected error for file_id %s: %s", file_id, err)
+                    continue  # Skip this file and proceed with the next one
+
+            for file in files:
+                file.name = remove_special_characters(file.name)
+
+            logger.info("Dropbox files retrieved successfully: %d", len(files))
+            return files
+
+        except dropbox.exceptions.AuthError as auth_err:
+            logger.error("Authentication error: %s", auth_err)
+            raise Exception("Failed to retrieve files")
+        except Exception as e:
+            logger.error("Unexpected error: %s", e)
+            raise Exception("Failed to retrieve files")
+
+    def download_file(self, credentials: Dict, file: SyncFile) -> BytesIO:
+        file_id = str(file.id)
+        if not self.dbx:
+            self.dbx = self.link_dropbox(credentials)
+
+        metadata, file_data = self.dbx.files_download(file_id)  # type: ignore
+        return BytesIO(file_data.content)
--- a/backend/api/quivr_api/modules/sync/utils/dropboxutils.py
+++ b/backend/api/quivr_api/modules/sync/utils/dropboxutils.py
@ -1,15 +1,16 @@
-import os
 import uuid
 from datetime import datetime, timedelta, timezone
-from io import BytesIO
+from typing import List

-import dropbox
 from fastapi import UploadFile
 from pydantic import BaseModel, ConfigDict
 from quivr_api.logger import get_logger
 from quivr_api.modules.brain.repository.brains_vectors import BrainsVectors
 from quivr_api.modules.knowledge.repository.storage import Storage
-from quivr_api.modules.notification.dto.inputs import CreateNotification
+from quivr_api.modules.notification.dto.inputs import (
+    CreateNotification,
+    NotificationUpdatableProperties,
+)
 from quivr_api.modules.notification.entity.notification import NotificationsStatusEnum
 from quivr_api.modules.notification.service.notification_service import (
    NotificationService,
@ -19,52 +20,46 @@ from quivr_api.modules.sync.dto.inputs import (
    SyncFileUpdateInput,
    SyncsActiveUpdateInput,
 )
+from quivr_api.modules.sync.entity.sync import SyncFile
 from quivr_api.modules.sync.repository.sync_files import SyncFiles
 from quivr_api.modules.sync.service.sync_service import SyncService, SyncUserService
-from quivr_api.modules.sync.utils.list_files import (
-    get_dropbox_files_by_id,
-    list_dropbox_files,
-)
+from quivr_api.modules.sync.utils.sync import BaseSync
 from quivr_api.modules.sync.utils.upload import upload_file
 from quivr_api.modules.upload.service.upload_file import check_file_exists

+notification_service = NotificationService()
 logger = get_logger(__name__)

-APP_KEY = os.getenv("DROPBOX_APP_KEY")
-APP_SECRET = os.getenv("DROPBOW_CONSUMER_SECRET")

-notification_service = NotificationService()
-
-
-class DropboxSyncUtils(BaseModel):
+class SyncUtils(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)

-    storage: Storage
-    sync_files_repo: SyncFiles
-    sync_active_service: SyncService
    sync_user_service: SyncUserService
+    sync_active_service: SyncService
+    sync_files_repo: SyncFiles
+    storage: Storage
+    sync_cloud: BaseSync

    async def _upload_files(
        self,
-        token_data: dict,
-        files: list,
+        credentials: dict,
+        files: List[SyncFile],
        current_user: str,
        brain_id: str,
        sync_active_id: int,
    ):
        """
-        Download files from DropBox.
+        Download files from an external cloud.

        Args:
-            credentials (dict): The credentials for accessin DropBox Drive.
+            credentials (dict): The token data for accessing the external cloud.
            files (list): The list of file metadata to download.

        Returns:
            dict: A dictionary containing the status of the download or an error message.
        """
-        dbx = dropbox.Dropbox(token_data["access_token"])
-        dbx.check_and_refresh_access_token()
-        token_data["access_token"] = dbx._oauth2_access_token
+
+        credentials = self.sync_cloud.check_and_refresh_access_token(credentials)

        downloaded_files = []
        bulk_id = uuid.uuid4()
@ -80,22 +75,20 @@ class DropboxSyncUtils(BaseModel):
                    brain_id=str(brain_id),
                )
            )
-
            file.notification_id = str(upload_notification.id)

+        for file in files:
+            logger.info("Processing file: %s", file.name)
            try:
-                file_id = str(file.id)
+                file_id = file.id
                file_name = file.name
                mime_type = file.mime_type
                modified_time = file.last_modified

-                metadata, file_data = dbx.files_download(file_id)  # type: ignore
-                # logger.debug("🔥 Filedata :", file_data.content)
-                file_data = BytesIO(file_data.content)
-
+                file_data = self.sync_cloud.download_file(credentials, file)
                # Check if the file already exists in the storage
                if check_file_exists(brain_id, file_name):
-                    logger.debug("🔥 File already exists in the storage: %s", file_name)
+                    logger.debug("%s already exists in the storage", file_name)

                    self.storage.remove_file(brain_id + "/" + file_name)
                    BrainsVectors().delete_file_from_brain(brain_id, file_name)
@ -133,7 +126,7 @@ class DropboxSyncUtils(BaseModel):
                        brain_id,
                        current_user,
                        bulk_id,
-                        "DropBox",
+                        self.sync_cloud.name,
                        file.web_view_link,
                        notification_id=file.notification_id,
                    )
@ -159,11 +152,19 @@ class DropboxSyncUtils(BaseModel):
                        )
                    )

-                downloaded_files.append(file_name)
-
+                    downloaded_files.append(file_name)
+                notification_service.update_notification_by_id(
+                    file.notification_id,
+                    NotificationUpdatableProperties(
+                        status=NotificationsStatusEnum.SUCCESS,
+                        description="File downloaded successfully",
+                    ),
+                )
            except Exception as error:
                logger.error(
-                    "An error occurred while downloading DropBox files: %s", error
+                    "An error occurred while downloading %s files: %s",
+                    self.sync_cloud.name,
+                    error,
                )
                # Check if the file already exists in the database
                existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
@ -189,16 +190,25 @@ class DropboxSyncUtils(BaseModel):
                            supported=False,
                        )
                    )
+                notification_service.update_notification_by_id(
+                    file.notification_id,
+                    NotificationUpdatableProperties(
+                        status=NotificationsStatusEnum.ERROR,
+                        description="Error downloading file",
+                    ),
+                )
+
        return {"downloaded_files": downloaded_files}

    async def sync(self, sync_active_id: int, user_id: str):
        """
-        Check if the Dropbox sync has not been synced and download the folders and files based on the settings.
+        Check if the Specific sync has not been synced and download the folders and files based on the settings.

        Args:
            sync_active_id (int): The ID of the active sync.
            user_id (str): The user ID associated with the active sync.
        """
+
        # Retrieve the active sync details
        sync_active = self.sync_active_service.get_details_sync_active(sync_active_id)
        if not sync_active:
@ -211,7 +221,6 @@ class DropboxSyncUtils(BaseModel):
        last_synced = sync_active.get("last_synced")
        force_sync = sync_active.get("force_sync", False)
        sync_interval_minutes = sync_active.get("sync_interval_minutes", 0)
-
        if last_synced and not force_sync:
            last_synced_time = datetime.fromisoformat(last_synced).astimezone(
                timezone.utc
@ -228,7 +237,9 @@ class DropboxSyncUtils(BaseModel):
            time_difference = current_time_utc - last_synced_time
            if time_difference < timedelta(minutes=sync_interval_minutes):
                logger.info(
-                    "DropBox sync is not due for sync_active_id: %s", sync_active_id
+                    "%s sync is not due for sync_active_id: %s",
+                    self.sync_cloud.name,
+                    sync_active_id,
                )
                return None

@ -245,15 +256,18 @@ class DropboxSyncUtils(BaseModel):
            return None

        sync_user = sync_user[0]
-        if sync_user["provider"].lower() != "dropbox":
+        if sync_user["provider"].lower() != self.sync_cloud.lower_name:
            logger.warning(
-                "Sync provider is not DropBox for sync_active_id: %s", sync_active_id
+                "Sync provider is not %s for sync_active_id: %s",
+                self.sync_cloud.name,
+                sync_active_id,
            )
            return None

-        # Download the folders and files from DropBox
+        # Download the folders and files from Cloud
        logger.info(
-            "Downloading folders and files from Dropbox for sync_active_id: %s",
+            "Downloading folders and files from %s for sync_active_id: %s",
+            self.sync_cloud.name,
            sync_active_id,
        )

@ -261,20 +275,19 @@ class DropboxSyncUtils(BaseModel):
        settings = sync_active.get("settings", {})
        folders = settings.get("folders", [])
        files_to_download = settings.get("files", [])
-        files = []
+        files: List[SyncFile] = []
        files_metadata = []
        if len(folders) > 0:
-            files = []
            for folder in folders:
                files.extend(
-                    list_dropbox_files(
+                    self.sync_cloud.get_files(
                        sync_user["credentials"],
                        folder_id=folder,
                        recursive=True,
                    )
                )
        if len(files_to_download) > 0:
-            files_metadata = get_dropbox_files_by_id(
+            files_metadata = self.sync_cloud.get_files_by_id(
                sync_user["credentials"],
                files_to_download,
            )
@ -282,7 +295,7 @@ class DropboxSyncUtils(BaseModel):

        if "error" in files:
            logger.error(
-                "Failed to download files from DropBox for sync_active_id: %s",
+                "Failed to download files from Azure for sync_active_id: %s",
                sync_active_id,
            )
            return None
@ -293,8 +306,7 @@ class DropboxSyncUtils(BaseModel):
            if last_synced
            else None
        )
-        logger.info("Files retrieved from DropBox: %s", len(files))
-        logger.info("Files retrieved from DropBox: %s", files)
+        logger.info("Files retrieved from %s: %s", self.sync_cloud.lower_name, files)

        files_to_download = [
            file
@ -304,7 +316,8 @@ class DropboxSyncUtils(BaseModel):
                (
                    not last_synced_time
                    or datetime.strptime(
-                        file.last_modified, "%Y-%m-%d %H:%M:%S"
+                        file.last_modified,
+                        (self.sync_cloud.datetime_format),
                    ).replace(tzinfo=timezone.utc)
                    > last_synced_time
                )
@ -321,7 +334,7 @@ class DropboxSyncUtils(BaseModel):
        )
        if "error" in downloaded_files:
            logger.error(
-                "Failed to download files from DropBox for sync_active_id: %s",
+                "Failed to download files from Azure for sync_active_id: %s",
                sync_active_id,
            )
            return None
@ -333,5 +346,9 @@ class DropboxSyncUtils(BaseModel):
                last_synced=datetime.now().astimezone().isoformat(), force_sync=False
            ),
        )
-        logger.info("DropBox sync completed for sync_active_id: %s", sync_active_id)
+        logger.info(
+            "%s sync completed for sync_active_id: %s",
+            self.sync_cloud.lower_name,
+            sync_active_id,
+        )
        return downloaded_files
--- a/backend/core/quivr_core/models.py
+++ b/backend/core/quivr_core/models.py
@ -85,12 +85,9 @@ class QuivrKnowledge(BaseModel):
    file_name: str | None = None
    url: str | None = None
    extension: str = "txt"
-    integration: str | None = None
-    integration_link: str | None = None
    status: str = "PROCESSING"


-
 # NOTE: for compatibility issues with langchain <-> PydanticV1
 class SearchResult(BaseModelV1):
    chunk: Document
--- a/backend/poetry.lock
+++ b/backend/poetry.lock
@ -9535,4 +9535,5 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "963b9ff228f2478505802a0e2915d2802e9d92396e33cde549f33bdb07f2a30d"
+content-hash = "4f318e2f8561fac7ad1f4aa603e5264accf857d6e7e1bfd9fcf770279f6a2e9a"
+
--- a/frontend/yarn.lock
+++ b/frontend/yarn.lock
@ -2721,7 +2721,12 @@
  resolved "https://registry.npmjs.org/@types/throttle-debounce/-/throttle-debounce-2.1.0.tgz"
  integrity sha512-5eQEtSCoESnh2FsiLTxE121IiE60hnMqcb435fShf4bpLRjEu1Eoekht23y6zXS9Ts3l+Szu3TARnTsA0GkOkQ==

-"@types/unist@*", "@types/unist@^2", "@types/unist@^2.0.0":
+"@types/unist@*":
+  version "2.0.7"
+  resolved "https://registry.npmjs.org/@types/unist/-/unist-2.0.7.tgz"
+  integrity sha512-cputDpIbFgLUaGQn6Vqg3/YsJwxUwHLO13v3i5ouxT4lat0khip9AEWxtERujXV9wxIB1EyF97BSJFt6vpdI8g==
+
+"@types/unist@^2", "@types/unist@^2.0.0":
  version "2.0.7"
  resolved "https://registry.npmjs.org/@types/unist/-/unist-2.0.7.tgz"
  integrity sha512-cputDpIbFgLUaGQn6Vqg3/YsJwxUwHLO13v3i5ouxT4lat0khip9AEWxtERujXV9wxIB1EyF97BSJFt6vpdI8g==
@ -3367,7 +3372,7 @@ chalk@^2.4.2:
    escape-string-regexp "^1.0.5"
    supports-color "^5.3.0"

-chalk@^3.0.0, chalk@3.0.0:
+chalk@^3.0.0:
  version "3.0.0"
  resolved "https://registry.npmjs.org/chalk/-/chalk-3.0.0.tgz"
  integrity sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==
@ -3391,6 +3396,14 @@ chalk@^4.1.0:
    ansi-styles "^4.1.0"
    supports-color "^7.1.0"

+chalk@3.0.0:
+  version "3.0.0"
+  resolved "https://registry.npmjs.org/chalk/-/chalk-3.0.0.tgz"
+  integrity sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==
+  dependencies:
+    ansi-styles "^4.1.0"
+    supports-color "^7.1.0"
+
 change-case@^5.4.2:
  version "5.4.2"
  resolved "https://registry.npmjs.org/change-case/-/change-case-5.4.2.tgz"
@ -3745,7 +3758,14 @@ date-fns@2.30.0:
  dependencies:
    "@babel/runtime" "^7.21.0"

-debug@^2.2.0, debug@^2.6.9:
+debug@^2.2.0:
+  version "2.6.9"
+  resolved "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz"
+  integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==
+  dependencies:
+    ms "2.0.0"
+
+debug@^2.6.9:
  version "2.6.9"
  resolved "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz"
  integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==
@ -3766,35 +3786,7 @@ debug@^4.0.0:
  dependencies:
    ms "2.1.2"

-debug@^4.1.0:
-  version "4.3.4"
-  resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
-  integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
-  dependencies:
-    ms "2.1.2"
-
-debug@^4.1.1:
-  version "4.3.4"
-  resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
-  integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
-  dependencies:
-    ms "2.1.2"
-
-debug@^4.3.1:
-  version "4.3.4"
-  resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
-  integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
-  dependencies:
-    ms "2.1.2"
-
-debug@^4.3.2:
-  version "4.3.4"
-  resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
-  integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
-  dependencies:
-    ms "2.1.2"
-
-debug@^4.3.4, debug@4:
+debug@^4.1.0, debug@^4.1.1, debug@^4.3.1, debug@^4.3.2, debug@^4.3.4, debug@4:
  version "4.3.4"
  resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
  integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
@ -4821,7 +4813,7 @@ github-from-package@0.0.0:
  resolved "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz"
  integrity sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==

-glob-parent@^5.1.2:
+glob-parent@^5.1.2, glob-parent@~5.1.2:
  version "5.1.2"
  resolved "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz"
  integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==
@ -4835,25 +4827,7 @@ glob-parent@^6.0.2:
  dependencies:
    is-glob "^4.0.3"

-glob-parent@~5.1.2:
-  version "5.1.2"
-  resolved "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz"
-  integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==
-  dependencies:
-    is-glob "^4.0.1"
-
-glob@^10.2.2:
-  version "10.3.10"
-  resolved "https://registry.npmjs.org/glob/-/glob-10.3.10.tgz"
-  integrity sha512-fa46+tv1Ak0UPK1TOy/pZrIybNNt4HCv7SDzwyfiOZkvZLEbjsZkJBPtDHVshZjbecAoAGSC20MjLDG/qr679g==
-  dependencies:
-    foreground-child "^3.1.0"
-    jackspeak "^2.3.5"
-    minimatch "^9.0.1"
-    minipass "^5.0.0 || ^6.0.2 || ^7.0.0"
-    path-scurry "^1.10.1"
-
-glob@^10.3.10:
+glob@^10.2.2, glob@^10.3.10, glob@10.3.10:
  version "10.3.10"
  resolved "https://registry.npmjs.org/glob/-/glob-10.3.10.tgz"
  integrity sha512-fa46+tv1Ak0UPK1TOy/pZrIybNNt4HCv7SDzwyfiOZkvZLEbjsZkJBPtDHVshZjbecAoAGSC20MjLDG/qr679g==
@ -4887,17 +4861,6 @@ glob@^8.0.3:
    minimatch "^5.0.1"
    once "^1.3.0"

-glob@10.3.10:
-  version "10.3.10"
-  resolved "https://registry.npmjs.org/glob/-/glob-10.3.10.tgz"
-  integrity sha512-fa46+tv1Ak0UPK1TOy/pZrIybNNt4HCv7SDzwyfiOZkvZLEbjsZkJBPtDHVshZjbecAoAGSC20MjLDG/qr679g==
-  dependencies:
-    foreground-child "^3.1.0"
-    jackspeak "^2.3.5"
-    minimatch "^9.0.1"
-    minipass "^5.0.0 || ^6.0.2 || ^7.0.0"
-    path-scurry "^1.10.1"
-
 glob@7.1.6:
  version "7.1.6"
  resolved "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz"
@ -5921,7 +5884,7 @@ lowlight@^2.0.0:
    fault "^2.0.0"
    highlight.js "~11.8.0"

-lru-cache@^10.0.1:
+lru-cache@^10.0.1, "lru-cache@^9.1.1 || ^10.0.0":
  version "10.0.3"
  resolved "https://registry.npmjs.org/lru-cache/-/lru-cache-10.0.3.tgz"
  integrity sha512-B7gr+F6MkqB3uzINHXNctGieGsRTMwIBgxkp0yq/5BwcuDzD4A8wQpHQW6vDAm1uKSLQghmRdD9sKqf2vJ1cEg==
@ -5940,11 +5903,6 @@ lru-cache@^6.0.0:
  dependencies:
    yallist "^4.0.0"

-"lru-cache@^9.1.1 || ^10.0.0":
-  version "10.0.3"
-  resolved "https://registry.npmjs.org/lru-cache/-/lru-cache-10.0.3.tgz"
-  integrity sha512-B7gr+F6MkqB3uzINHXNctGieGsRTMwIBgxkp0yq/5BwcuDzD4A8wQpHQW6vDAm1uKSLQghmRdD9sKqf2vJ1cEg==
-
 lz-string@^1.5.0:
  version "1.5.0"
  resolved "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz"