mirror of
https://github.com/StanGirard/quivr.git
synced 2024-11-30 01:32:52 +03:00
fix: Refacto & update dropbox refresh (#2875)
This pull request adds functionality to sync files with DropBox. It includes the following changes: - Created a BaseSync class with all specific function for each clouds - Created a SyncUtils class that takes in a BaseSync and apply the pipe - fix the refresh method for DropBox Please review and merge this pull request to enable DropBox sync functionality in the application. --------- Co-authored-by: Stan Girard <stan@quivr.app> Co-authored-by: Amine Dirhoussi <aminediro@quivr.app>
This commit is contained in:
parent
96cc5b5b5a
commit
3b68855a83
@ -28,7 +28,6 @@ from quivr_api.routes.crawl_routes import crawl_router
|
||||
from quivr_api.routes.subscription_routes import subscription_router
|
||||
from sentry_sdk.integrations.fastapi import FastApiIntegration
|
||||
from sentry_sdk.integrations.starlette import StarletteIntegration
|
||||
from starlette.middleware.sessions import SessionMiddleware
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@ -71,7 +70,6 @@ if sentry_dsn:
|
||||
|
||||
app = FastAPI()
|
||||
add_cors_middleware(app)
|
||||
app.add_middleware(SessionMiddleware, secret_key=str(os.getenv("SESSION_SECRET_KEY")))
|
||||
|
||||
app.include_router(brain_router)
|
||||
app.include_router(chat_router)
|
||||
|
@ -51,7 +51,7 @@ def authorize_dropbox(
|
||||
auth_flow = DropboxOAuth2Flow(
|
||||
DROPBOX_APP_KEY,
|
||||
redirect_uri=BASE_REDIRECT_URI,
|
||||
session=request.session,
|
||||
session={},
|
||||
csrf_token_session_key="csrf-token",
|
||||
consumer_secret=DROPBOX_APP_SECRET,
|
||||
token_access_type="offline",
|
||||
@ -88,10 +88,11 @@ def oauth2callback_dropbox(request: Request):
|
||||
state = request.query_params.get("state")
|
||||
if not state:
|
||||
raise HTTPException(status_code=400, detail="Invalid state parameter")
|
||||
request.session["csrf-token"] = state.split("|")[0] if "|" in state else ""
|
||||
session = {}
|
||||
session["csrf-token"] = state.split("|")[0] if "|" in state else ""
|
||||
|
||||
logger.debug("Keys in session : %s", request.session.keys())
|
||||
logger.debug("Value in session : %s", request.session.values())
|
||||
logger.debug("Keys in session : %s", session.keys())
|
||||
logger.debug("Value in session : %s", session.values())
|
||||
|
||||
state = state.split("|")[1] if "|" in state else state # type: ignore
|
||||
state_dict = {"state": state}
|
||||
@ -117,7 +118,7 @@ def oauth2callback_dropbox(request: Request):
|
||||
auth_flow = DropboxOAuth2Flow(
|
||||
DROPBOX_APP_KEY,
|
||||
redirect_uri=BASE_REDIRECT_URI,
|
||||
session=request.session,
|
||||
session=session,
|
||||
csrf_token_session_key="csrf-token",
|
||||
consumer_secret=DROPBOX_APP_SECRET,
|
||||
token_access_type="offline",
|
||||
@ -139,7 +140,6 @@ def oauth2callback_dropbox(request: Request):
|
||||
"access_token": oauth_result.access_token,
|
||||
"refresh_token": oauth_result.refresh_token,
|
||||
"account_id": account_id,
|
||||
"email": user_email,
|
||||
"expires_in": str(oauth_result.expires_at),
|
||||
}
|
||||
|
||||
|
@ -8,10 +8,10 @@ from quivr_api.modules.notification.service.notification_service import (
|
||||
)
|
||||
from quivr_api.modules.sync.dto.inputs import SyncsUserInput, SyncUserUpdateInput
|
||||
from quivr_api.modules.sync.repository.sync_interfaces import SyncUserInterface
|
||||
from quivr_api.modules.sync.utils.list_files import (
|
||||
get_google_drive_files,
|
||||
list_azure_files,
|
||||
list_dropbox_files,
|
||||
from quivr_api.modules.sync.utils.sync import (
|
||||
AzureDriveSync,
|
||||
DropboxSync,
|
||||
GoogleDriveSync,
|
||||
)
|
||||
|
||||
notification_service = NotificationService()
|
||||
@ -205,20 +205,19 @@ class SyncUser(SyncUserInterface):
|
||||
provider = sync_user["provider"].lower()
|
||||
if provider == "google":
|
||||
logger.info("Getting files for Google sync")
|
||||
return {
|
||||
"files": get_google_drive_files(sync_user["credentials"], folder_id)
|
||||
}
|
||||
sync = GoogleDriveSync()
|
||||
return {"files": sync.get_files(sync_user["credentials"], folder_id)}
|
||||
elif provider == "azure":
|
||||
logger.info("Getting files for Azure sync")
|
||||
sync = AzureDriveSync()
|
||||
return {
|
||||
"files": list_azure_files(
|
||||
sync_user["credentials"], folder_id, recursive
|
||||
)
|
||||
"files": sync.get_files(sync_user["credentials"], folder_id, recursive)
|
||||
}
|
||||
elif provider == "dropbox":
|
||||
logger.info("Getting files for Drop Box sync")
|
||||
sync = DropboxSync()
|
||||
return {
|
||||
"files": list_dropbox_files(
|
||||
"files": sync.get_files(
|
||||
sync_user["credentials"], folder_id if folder_id else "", recursive
|
||||
)
|
||||
}
|
||||
|
@ -5,9 +5,12 @@ from quivr_api.logger import get_logger
|
||||
from quivr_api.modules.knowledge.repository.storage import Storage
|
||||
from quivr_api.modules.sync.repository.sync_files import SyncFiles
|
||||
from quivr_api.modules.sync.service.sync_service import SyncService, SyncUserService
|
||||
from quivr_api.modules.sync.utils.dropboxutils import DropboxSyncUtils
|
||||
from quivr_api.modules.sync.utils.googleutils import GoogleSyncUtils
|
||||
from quivr_api.modules.sync.utils.sharepointutils import AzureSyncUtils
|
||||
from quivr_api.modules.sync.utils.sync import (
|
||||
AzureDriveSync,
|
||||
DropboxSync,
|
||||
GoogleDriveSync,
|
||||
)
|
||||
from quivr_api.modules.sync.utils.syncutils import SyncUtils
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@ -24,25 +27,28 @@ async def _process_sync_active():
|
||||
sync_files_repo_service = SyncFiles()
|
||||
storage = Storage()
|
||||
|
||||
google_sync_utils = GoogleSyncUtils(
|
||||
google_sync_utils = SyncUtils(
|
||||
sync_user_service=sync_user_service,
|
||||
sync_active_service=sync_active_service,
|
||||
sync_files_repo=sync_files_repo_service,
|
||||
storage=storage,
|
||||
sync_cloud=GoogleDriveSync(),
|
||||
)
|
||||
|
||||
azure_sync_utils = AzureSyncUtils(
|
||||
azure_sync_utils = SyncUtils(
|
||||
sync_user_service=sync_user_service,
|
||||
sync_active_service=sync_active_service,
|
||||
sync_files_repo=sync_files_repo_service,
|
||||
storage=storage,
|
||||
sync_cloud=AzureDriveSync(),
|
||||
)
|
||||
|
||||
dropbox_sync_utils = DropboxSyncUtils(
|
||||
dropbox_sync_utils = SyncUtils(
|
||||
sync_user_service=sync_user_service,
|
||||
sync_active_service=sync_active_service,
|
||||
sync_files_repo=sync_files_repo_service,
|
||||
storage=storage,
|
||||
sync_cloud=DropboxSync(),
|
||||
)
|
||||
|
||||
active = await sync_active_service.get_syncs_active_in_interval()
|
||||
|
@ -1,383 +0,0 @@
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from io import BytesIO
|
||||
from typing import List
|
||||
|
||||
from fastapi import UploadFile
|
||||
from google.auth.transport.requests import Request as GoogleRequest
|
||||
from google.oauth2.credentials import Credentials
|
||||
from googleapiclient.discovery import build
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
from quivr_api.logger import get_logger
|
||||
from quivr_api.modules.brain.repository.brains_vectors import BrainsVectors
|
||||
from quivr_api.modules.knowledge.repository.storage import Storage
|
||||
from quivr_api.modules.notification.dto.inputs import (
|
||||
CreateNotification,
|
||||
NotificationUpdatableProperties,
|
||||
)
|
||||
from quivr_api.modules.notification.entity.notification import NotificationsStatusEnum
|
||||
from quivr_api.modules.notification.service.notification_service import (
|
||||
NotificationService,
|
||||
)
|
||||
from quivr_api.modules.sync.dto.inputs import (
|
||||
SyncFileInput,
|
||||
SyncFileUpdateInput,
|
||||
SyncsActiveUpdateInput,
|
||||
)
|
||||
from quivr_api.modules.sync.entity.sync import SyncFile
|
||||
from quivr_api.modules.sync.repository.sync_files import SyncFiles
|
||||
from quivr_api.modules.sync.service.sync_service import SyncService, SyncUserService
|
||||
from quivr_api.modules.sync.utils.list_files import (
|
||||
get_google_drive_files,
|
||||
get_google_drive_files_by_id,
|
||||
)
|
||||
from quivr_api.modules.sync.utils.upload import upload_file
|
||||
from quivr_api.modules.upload.service.upload_file import check_file_exists
|
||||
|
||||
notification_service = NotificationService()
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class GoogleSyncUtils(BaseModel):
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
sync_user_service: SyncUserService
|
||||
sync_active_service: SyncService
|
||||
sync_files_repo: SyncFiles
|
||||
storage: Storage
|
||||
|
||||
async def _upload_files(
|
||||
self,
|
||||
credentials: dict,
|
||||
files: List[SyncFile],
|
||||
current_user: str,
|
||||
brain_id: str,
|
||||
sync_active_id: int,
|
||||
):
|
||||
"""
|
||||
Download files from Google Drive.
|
||||
|
||||
Args:
|
||||
credentials (dict): The credentials for accessing Google Drive.
|
||||
files (list): The list of file metadata to download.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the status of the download or an error message.
|
||||
"""
|
||||
logger.info("Downloading Google Drive files with metadata: %s", files)
|
||||
creds = Credentials.from_authorized_user_info(credentials)
|
||||
if creds.expired and creds.refresh_token:
|
||||
creds.refresh(GoogleRequest())
|
||||
logger.info("Google Drive credentials refreshed")
|
||||
# Updating the credentials in the database
|
||||
|
||||
service = build("drive", "v3", credentials=creds)
|
||||
downloaded_files = []
|
||||
|
||||
bulk_id = uuid.uuid4()
|
||||
|
||||
for file in files:
|
||||
upload_notification = notification_service.add_notification(
|
||||
CreateNotification(
|
||||
user_id=current_user,
|
||||
bulk_id=bulk_id,
|
||||
status=NotificationsStatusEnum.INFO,
|
||||
title=file.name,
|
||||
category="sync",
|
||||
brain_id=str(brain_id),
|
||||
)
|
||||
)
|
||||
|
||||
file.notification_id = str(upload_notification.id)
|
||||
|
||||
for file in files:
|
||||
logger.info("🔥🔥🔥🔥: %s", file)
|
||||
try:
|
||||
file_id = file.id
|
||||
file_name = file.name
|
||||
mime_type = file.mime_type
|
||||
modified_time = file.last_modified
|
||||
file_url = file.web_view_link
|
||||
# Convert Google Docs files to appropriate formats before downloading
|
||||
if mime_type == "application/vnd.google-apps.document":
|
||||
logger.debug(
|
||||
"Converting Google Docs file with file_id: %s to DOCX.",
|
||||
file_id,
|
||||
)
|
||||
request = service.files().export_media(
|
||||
fileId=file_id,
|
||||
mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
)
|
||||
file_name += ".docx"
|
||||
elif mime_type == "application/vnd.google-apps.spreadsheet":
|
||||
logger.debug(
|
||||
"Converting Google Sheets file with file_id: %s to XLSX.",
|
||||
file_id,
|
||||
)
|
||||
request = service.files().export_media(
|
||||
fileId=file_id,
|
||||
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
)
|
||||
file_name += ".xlsx"
|
||||
elif mime_type == "application/vnd.google-apps.presentation":
|
||||
logger.debug(
|
||||
"Converting Google Slides file with file_id: %s to PPTX.",
|
||||
file_id,
|
||||
)
|
||||
request = service.files().export_media(
|
||||
fileId=file_id,
|
||||
mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
)
|
||||
file_name += ".pptx"
|
||||
### Elif pdf, txt, md, csv, docx, xlsx, pptx, doc
|
||||
elif file_name.split(".")[-1] in [
|
||||
"pdf",
|
||||
"txt",
|
||||
"md",
|
||||
"csv",
|
||||
"docx",
|
||||
"xlsx",
|
||||
"pptx",
|
||||
"doc",
|
||||
]:
|
||||
request = service.files().get_media(fileId=file_id)
|
||||
else:
|
||||
logger.warning(
|
||||
"Skipping unsupported file type: %s for file_id: %s",
|
||||
mime_type,
|
||||
file_id,
|
||||
)
|
||||
continue
|
||||
|
||||
file_data = request.execute()
|
||||
|
||||
# Check if the file already exists in the storage
|
||||
if check_file_exists(brain_id, file_name):
|
||||
logger.debug("🔥 File already exists in the storage: %s", file_name)
|
||||
self.storage.remove_file(brain_id + "/" + file_name)
|
||||
BrainsVectors().delete_file_from_brain(brain_id, file_name)
|
||||
|
||||
to_upload_file = UploadFile(
|
||||
file=BytesIO(file_data),
|
||||
filename=file_name,
|
||||
)
|
||||
|
||||
# Check if the file already exists in the database
|
||||
existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
|
||||
existing_file = next(
|
||||
(f for f in existing_files if f.path == file_name), None
|
||||
)
|
||||
supported = False
|
||||
if (existing_file and existing_file.supported) or not existing_file:
|
||||
supported = True
|
||||
|
||||
await upload_file(
|
||||
to_upload_file,
|
||||
brain_id,
|
||||
current_user,
|
||||
bulk_id,
|
||||
"Google Drive",
|
||||
file.web_view_link,
|
||||
notification_id=file.notification_id,
|
||||
) # type: ignore
|
||||
|
||||
if existing_file:
|
||||
# Update the existing file record
|
||||
self.sync_files_repo.update_sync_file(
|
||||
existing_file.id,
|
||||
SyncFileUpdateInput(
|
||||
last_modified=modified_time,
|
||||
supported=supported,
|
||||
),
|
||||
)
|
||||
else:
|
||||
# Create a new file record
|
||||
self.sync_files_repo.create_sync_file(
|
||||
SyncFileInput(
|
||||
path=file_name,
|
||||
syncs_active_id=sync_active_id,
|
||||
last_modified=modified_time,
|
||||
brain_id=str(brain_id), # Convert UUID to string
|
||||
supported=supported,
|
||||
)
|
||||
)
|
||||
|
||||
downloaded_files.append(file_name)
|
||||
notification_service.update_notification_by_id(
|
||||
file.notification_id,
|
||||
NotificationUpdatableProperties(
|
||||
status=NotificationsStatusEnum.SUCCESS,
|
||||
description="File downloaded successfully",
|
||||
),
|
||||
)
|
||||
except Exception as error:
|
||||
logger.error(
|
||||
"An error occurred while downloading Google Drive files: %s",
|
||||
str(error), # Convert error to string
|
||||
)
|
||||
# Check if the file already exists in the database
|
||||
existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
|
||||
existing_file = next(
|
||||
(f for f in existing_files if f.path == file.name), None
|
||||
)
|
||||
# Update the existing file record
|
||||
if existing_file:
|
||||
self.sync_files_repo.update_sync_file(
|
||||
existing_file.id,
|
||||
SyncFileUpdateInput(
|
||||
supported=False,
|
||||
),
|
||||
)
|
||||
else:
|
||||
# Create a new file record
|
||||
self.sync_files_repo.create_sync_file(
|
||||
SyncFileInput(
|
||||
path=file.name,
|
||||
syncs_active_id=sync_active_id,
|
||||
last_modified=file.last_modified,
|
||||
brain_id=brain_id,
|
||||
supported=False,
|
||||
)
|
||||
)
|
||||
notification_service.update_notification_by_id(
|
||||
file.notification_id,
|
||||
NotificationUpdatableProperties(
|
||||
status=NotificationsStatusEnum.ERROR,
|
||||
description="Error downloading file",
|
||||
),
|
||||
)
|
||||
return {"downloaded_files": downloaded_files}
|
||||
|
||||
async def sync(self, sync_active_id: int, user_id: str):
|
||||
"""
|
||||
Check if the Google sync has not been synced and download the folders and files based on the settings.
|
||||
|
||||
Args:
|
||||
sync_active_id (int): The ID of the active sync.
|
||||
user_id (str): The user ID associated with the active sync.
|
||||
"""
|
||||
|
||||
# Retrieve the active sync details
|
||||
sync_active = self.sync_active_service.get_details_sync_active(sync_active_id)
|
||||
if not sync_active:
|
||||
logger.warning(
|
||||
"No active sync found for sync_active_id: %s", sync_active_id
|
||||
)
|
||||
return None
|
||||
|
||||
# Check if the sync is due
|
||||
last_synced = sync_active.get("last_synced")
|
||||
force_sync = sync_active.get("force_sync", False)
|
||||
sync_interval_minutes = sync_active.get("sync_interval_minutes", 0)
|
||||
if last_synced and not force_sync:
|
||||
last_synced_time = datetime.fromisoformat(last_synced).astimezone(
|
||||
timezone.utc
|
||||
)
|
||||
current_time = datetime.now().astimezone()
|
||||
|
||||
# Debug logging to check the values
|
||||
logger.debug("Last synced time (UTC): %s", last_synced_time)
|
||||
logger.debug("Current time (local timezone): %s", current_time)
|
||||
|
||||
# Convert current_time to UTC for comparison
|
||||
current_time_utc = current_time.astimezone(timezone.utc)
|
||||
logger.debug("Current time (UTC): %s", current_time_utc)
|
||||
time_difference = current_time_utc - last_synced_time
|
||||
if time_difference < timedelta(minutes=sync_interval_minutes):
|
||||
logger.info(
|
||||
"Google sync is not due for sync_active_id: %s", sync_active_id
|
||||
)
|
||||
return None
|
||||
|
||||
# Retrieve the sync user details
|
||||
sync_user = self.sync_user_service.get_syncs_user(
|
||||
user_id=user_id, sync_user_id=sync_active["syncs_user_id"]
|
||||
)
|
||||
if not sync_user:
|
||||
logger.warning(
|
||||
"No sync user found for sync_active_id: %s, user_id: %s",
|
||||
sync_active_id,
|
||||
user_id,
|
||||
)
|
||||
return None
|
||||
|
||||
sync_user = sync_user[0]
|
||||
if sync_user["provider"].lower() != "google":
|
||||
logger.warning(
|
||||
"Sync provider is not Google for sync_active_id: %s", sync_active_id
|
||||
)
|
||||
return None
|
||||
|
||||
# Download the folders and files from Google Drive
|
||||
logger.info(
|
||||
"Downloading folders and files from Google Drive for sync_active_id: %s",
|
||||
sync_active_id,
|
||||
)
|
||||
|
||||
settings = sync_active.get("settings", {})
|
||||
folders = settings.get("folders", [])
|
||||
files_to_download = settings.get("files", [])
|
||||
files: List[SyncFile] = []
|
||||
files_metadata: List[SyncFile] = []
|
||||
if len(folders) > 0:
|
||||
for folder in folders:
|
||||
folder_files = get_google_drive_files(
|
||||
sync_user["credentials"],
|
||||
folder_id=folder,
|
||||
recursive=True,
|
||||
)
|
||||
if isinstance(folder_files, list):
|
||||
files.extend(folder_files)
|
||||
else:
|
||||
logger.error(
|
||||
f"Error fetching files for folder {folder}: {folder_files}"
|
||||
)
|
||||
if len(files_to_download) > 0:
|
||||
files_metadata = get_google_drive_files_by_id(
|
||||
sync_user["credentials"], files_to_download
|
||||
)
|
||||
files = files + files_metadata # type: ignore
|
||||
if "error" in files:
|
||||
logger.error(
|
||||
"Failed to download files from Google Drive for sync_active_id: %s",
|
||||
sync_active_id,
|
||||
)
|
||||
return None
|
||||
|
||||
# Filter files that have been modified since the last sync
|
||||
last_synced_time = datetime.fromisoformat(last_synced) if last_synced else None
|
||||
|
||||
files_to_download = [
|
||||
file
|
||||
for file in files
|
||||
if not file.is_folder
|
||||
and (
|
||||
(
|
||||
not last_synced_time
|
||||
or datetime.fromisoformat(file.last_modified) > last_synced_time
|
||||
)
|
||||
or not check_file_exists(sync_active["brain_id"], file.name)
|
||||
)
|
||||
]
|
||||
|
||||
downloaded_files = await self._upload_files(
|
||||
sync_user["credentials"],
|
||||
files_to_download,
|
||||
user_id,
|
||||
sync_active["brain_id"],
|
||||
sync_active_id,
|
||||
)
|
||||
|
||||
# Update the last_synced timestamp
|
||||
self.sync_active_service.update_sync_active(
|
||||
sync_active_id,
|
||||
SyncsActiveUpdateInput(
|
||||
last_synced=datetime.now().astimezone().isoformat(),
|
||||
force_sync=False,
|
||||
),
|
||||
)
|
||||
logger.info(
|
||||
"Google Drive sync completed for sync_active_id: %s", sync_active_id
|
||||
)
|
||||
return downloaded_files
|
@ -1,436 +0,0 @@
|
||||
import os
|
||||
from typing import Dict, List
|
||||
|
||||
import dropbox
|
||||
import msal
|
||||
import requests
|
||||
from fastapi import HTTPException
|
||||
from google.auth.transport.requests import Request as GoogleRequest
|
||||
from google.oauth2.credentials import Credentials
|
||||
from googleapiclient.discovery import build
|
||||
from quivr_api.logger import get_logger
|
||||
from quivr_api.modules.sync.entity.sync import SyncFile
|
||||
from quivr_api.modules.sync.utils.normalize import remove_special_characters
|
||||
from requests import HTTPError
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# GOOGLE
|
||||
|
||||
|
||||
def get_google_drive_files_by_id(
|
||||
credentials: dict, file_ids: List[str]
|
||||
) -> List[SyncFile]:
|
||||
"""
|
||||
Retrieve files from Google Drive by their IDs.
|
||||
|
||||
Args:
|
||||
credentials (dict): The credentials for accessing Google Drive.
|
||||
file_ids (list): The list of file IDs to retrieve.
|
||||
|
||||
Returns:
|
||||
list: A list of dictionaries containing the metadata of each file or an error message.
|
||||
"""
|
||||
logger.info("Retrieving Google Drive files with file_ids: %s", file_ids)
|
||||
creds = Credentials.from_authorized_user_info(credentials)
|
||||
if creds.expired and creds.refresh_token:
|
||||
creds.refresh(GoogleRequest())
|
||||
logger.info("Google Drive credentials refreshed")
|
||||
|
||||
try:
|
||||
service = build("drive", "v3", credentials=creds)
|
||||
files: List[SyncFile] = []
|
||||
|
||||
for file_id in file_ids:
|
||||
result = (
|
||||
service.files()
|
||||
.get(
|
||||
fileId=file_id,
|
||||
fields="id, name, mimeType, modifiedTime, webViewLink",
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
|
||||
files.append(
|
||||
SyncFile(
|
||||
name=result["name"],
|
||||
id=result["id"],
|
||||
is_folder=(
|
||||
result["mimeType"] == "application/vnd.google-apps.folder"
|
||||
),
|
||||
last_modified=result["modifiedTime"],
|
||||
mime_type=result["mimeType"],
|
||||
web_view_link=result["webViewLink"],
|
||||
)
|
||||
)
|
||||
|
||||
logger.info("Google Drive files retrieved successfully: %s", len(files))
|
||||
for file in files:
|
||||
file.name = remove_special_characters(file.name)
|
||||
return files
|
||||
except HTTPError as error:
|
||||
logger.error("An error occurred while retrieving Google Drive files: %s", error)
|
||||
return []
|
||||
|
||||
|
||||
def get_google_drive_files(
|
||||
credentials: dict, folder_id: str = None, recursive: bool = False
|
||||
) -> List[SyncFile]:
|
||||
"""
|
||||
Retrieve files from Google Drive.
|
||||
|
||||
Args:
|
||||
credentials (dict): The credentials for accessing Google Drive.
|
||||
folder_id (str, optional): The folder ID to filter files. Defaults to None.
|
||||
recursive (bool, optional): If True, fetch files from all subfolders. Defaults to False.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the list of files or an error message.
|
||||
"""
|
||||
logger.info("Retrieving Google Drive files with folder_id: %s", folder_id)
|
||||
creds = Credentials.from_authorized_user_info(credentials)
|
||||
if creds.expired and creds.refresh_token:
|
||||
creds.refresh(GoogleRequest())
|
||||
logger.info("Google Drive credentials refreshed")
|
||||
# Updating the credentials in the database
|
||||
|
||||
try:
|
||||
service = build("drive", "v3", credentials=creds)
|
||||
if folder_id:
|
||||
query = f"'{folder_id}' in parents"
|
||||
else:
|
||||
query = "'root' in parents or sharedWithMe"
|
||||
page_token = None
|
||||
files: List[SyncFile] = []
|
||||
|
||||
while True:
|
||||
results = (
|
||||
service.files()
|
||||
.list(
|
||||
q=query,
|
||||
pageSize=100,
|
||||
fields="nextPageToken, files(id, name, mimeType, modifiedTime, webViewLink)",
|
||||
pageToken=page_token,
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
items = results.get("files", [])
|
||||
|
||||
if not items:
|
||||
logger.info("No files found in Google Drive")
|
||||
break
|
||||
|
||||
for item in items:
|
||||
files.append(
|
||||
SyncFile(
|
||||
name=item["name"],
|
||||
id=item["id"],
|
||||
is_folder=(
|
||||
item["mimeType"] == "application/vnd.google-apps.folder"
|
||||
),
|
||||
last_modified=item["modifiedTime"],
|
||||
mime_type=item["mimeType"],
|
||||
web_view_link=item["webViewLink"],
|
||||
)
|
||||
)
|
||||
|
||||
# If recursive is True and the item is a folder, get files from the folder
|
||||
if recursive and item.mimeType == "application/vnd.google-apps.folder":
|
||||
logger.warning(
|
||||
"Calling Recursive for folder: %s",
|
||||
item.name,
|
||||
)
|
||||
files.extend(
|
||||
get_google_drive_files(credentials, item.id, recursive)
|
||||
)
|
||||
|
||||
page_token = results.get("nextPageToken", None)
|
||||
if page_token is None:
|
||||
break
|
||||
|
||||
logger.info("Google Drive files retrieved successfully: %s", len(files))
|
||||
|
||||
for file in files:
|
||||
file.name = remove_special_characters(file.name)
|
||||
return files
|
||||
except HTTPError as error:
|
||||
logger.error("An error occurred while retrieving Google Drive files: %s", error)
|
||||
return []
|
||||
|
||||
|
||||
# AZURE
|
||||
CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID")
|
||||
AUTHORITY = "https://login.microsoftonline.com/common"
|
||||
BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:5050")
|
||||
REDIRECT_URI = f"{BACKEND_URL}/sync/azure/oauth2callback"
|
||||
SCOPE = [
|
||||
"https://graph.microsoft.com/Files.Read",
|
||||
"https://graph.microsoft.com/User.Read",
|
||||
"https://graph.microsoft.com/Sites.Read.All",
|
||||
]
|
||||
|
||||
|
||||
def get_azure_token_data(credentials):
|
||||
if "access_token" not in credentials:
|
||||
raise HTTPException(status_code=401, detail="Invalid token data")
|
||||
return credentials
|
||||
|
||||
|
||||
def refresh_azure_token(credentials):
|
||||
if "refresh_token" not in credentials:
|
||||
raise HTTPException(status_code=401, detail="No refresh token available")
|
||||
|
||||
client = msal.PublicClientApplication(CLIENT_ID, authority=AUTHORITY)
|
||||
result = client.acquire_token_by_refresh_token(
|
||||
credentials["refresh_token"], scopes=SCOPE
|
||||
)
|
||||
if "access_token" not in result:
|
||||
raise HTTPException(status_code=400, detail="Failed to refresh token")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_azure_headers(token_data):
|
||||
return {
|
||||
"Authorization": f"Bearer {token_data['access_token']}",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
|
||||
def list_azure_files(credentials, folder_id=None, recursive=False) -> list[SyncFile]:
|
||||
def fetch_files(endpoint, headers):
|
||||
response = requests.get(endpoint, headers=headers)
|
||||
if response.status_code == 401:
|
||||
token_data = refresh_azure_token(credentials)
|
||||
headers = get_azure_headers(token_data)
|
||||
response = requests.get(endpoint, headers=headers)
|
||||
if response.status_code != 200:
|
||||
return {"error": response.text}
|
||||
return response.json().get("value", [])
|
||||
|
||||
token_data = get_azure_token_data(credentials)
|
||||
headers = get_azure_headers(token_data)
|
||||
endpoint = "https://graph.microsoft.com/v1.0/me/drive/root/children"
|
||||
if folder_id:
|
||||
endpoint = (
|
||||
f"https://graph.microsoft.com/v1.0/me/drive/items/{folder_id}/children"
|
||||
)
|
||||
|
||||
items = fetch_files(endpoint, headers)
|
||||
|
||||
if not items:
|
||||
logger.info("No files found in Azure Drive")
|
||||
return []
|
||||
|
||||
files = []
|
||||
for item in items:
|
||||
file_data = SyncFile(
|
||||
name=item.get("name"),
|
||||
id=item.get("id"),
|
||||
is_folder="folder" in item,
|
||||
last_modified=item.get("lastModifiedDateTime"),
|
||||
mime_type=item.get("file", {}).get("mimeType", "folder"),
|
||||
web_view_link=item.get("webUrl"),
|
||||
)
|
||||
files.append(file_data)
|
||||
|
||||
# If recursive option is enabled and the item is a folder, fetch files from it
|
||||
if recursive and file_data.is_folder:
|
||||
folder_files = list_azure_files(
|
||||
credentials, folder_id=file_data.id, recursive=True
|
||||
)
|
||||
|
||||
files.extend(folder_files)
|
||||
for file in files:
|
||||
file.name = remove_special_characters(file.name)
|
||||
logger.info("Azure Drive files retrieved successfully: %s", len(files))
|
||||
return files
|
||||
|
||||
|
||||
def get_azure_files_by_id(
|
||||
credentials: dict, file_ids: List[str]
|
||||
) -> List[SyncFile] | dict:
|
||||
"""
|
||||
Retrieve files from Azure Drive by their IDs.
|
||||
|
||||
Args:
|
||||
credentials (dict): The credentials for accessing Azure Drive.
|
||||
file_ids (list): The list of file IDs to retrieve.
|
||||
|
||||
Returns:
|
||||
list: A list of dictionaries containing the metadata of each file or an error message.
|
||||
"""
|
||||
logger.info("Retrieving Azure Drive files with file_ids: %s", file_ids)
|
||||
token_data = get_azure_token_data(credentials)
|
||||
headers = get_azure_headers(token_data)
|
||||
files = []
|
||||
|
||||
for file_id in file_ids:
|
||||
endpoint = f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}"
|
||||
response = requests.get(endpoint, headers=headers)
|
||||
if response.status_code == 401:
|
||||
token_data = refresh_azure_token(credentials)
|
||||
headers = get_azure_headers(token_data)
|
||||
response = requests.get(endpoint, headers=headers)
|
||||
if response.status_code != 200:
|
||||
logger.error(
|
||||
"An error occurred while retrieving Azure Drive files: %s",
|
||||
response.text,
|
||||
)
|
||||
return {"error": response.text}
|
||||
|
||||
result = response.json()
|
||||
files.append(
|
||||
SyncFile(
|
||||
name=result.get("name"),
|
||||
id=result.get("id"),
|
||||
is_folder="folder" in result,
|
||||
last_modified=result.get("lastModifiedDateTime"),
|
||||
mime_type=result.get("file", {}).get("mimeType", "folder"),
|
||||
web_view_link=result.get("webUrl"),
|
||||
)
|
||||
)
|
||||
|
||||
for file in files:
|
||||
file.name = remove_special_characters(file.name)
|
||||
logger.info("Azure Drive files retrieved successfully: %s", len(files))
|
||||
return files
|
||||
|
||||
|
||||
# Drop Box
|
||||
def list_dropbox_files(
|
||||
credentials: dict, folder_id: str = "", recursive: bool = False
|
||||
) -> List[SyncFile] | dict:
|
||||
"""
|
||||
Retrieve files from Dropbox.
|
||||
|
||||
Args:
|
||||
credentials (dict): The credentials for accessing Dropbox.
|
||||
folder_id (str, optional): The folder ID to filter files. Defaults to "".
|
||||
recursive (bool, optional): If True, fetch files from all subfolders. Defaults to False.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the list of files or an error message.
|
||||
"""
|
||||
logger.info("Retrieving Dropbox files with folder_id: %s", folder_id)
|
||||
|
||||
# Verify credential has the access token
|
||||
if "access_token" not in credentials:
|
||||
print("Invalid token data")
|
||||
return {"error": "Invalid token data"}
|
||||
|
||||
try:
|
||||
dbx = dropbox.Dropbox(credentials["access_token"])
|
||||
dbx.check_and_refresh_access_token()
|
||||
credentials["access_token"] = dbx._oauth2_access_token
|
||||
|
||||
def fetch_files(metadata):
|
||||
files = []
|
||||
for file in metadata.entries:
|
||||
|
||||
shared_link = f"https://www.dropbox.com/preview{file.path_display}?context=content_suggestions&role=personal"
|
||||
is_folder = isinstance(file, dropbox.files.FolderMetadata)
|
||||
logger.debug(f"IS FOLDER ? {is_folder}")
|
||||
|
||||
files.append(
|
||||
SyncFile(
|
||||
name=file.name,
|
||||
id=file.id,
|
||||
is_folder=is_folder,
|
||||
last_modified=(
|
||||
str(file.client_modified) if not is_folder else ""
|
||||
),
|
||||
mime_type=(
|
||||
file.path_lower.split(".")[-1] if not is_folder else ""
|
||||
),
|
||||
web_view_link=shared_link,
|
||||
)
|
||||
)
|
||||
return files
|
||||
|
||||
files = []
|
||||
list_metadata = dbx.files_list_folder(folder_id, recursive=recursive)
|
||||
files.extend(fetch_files(list_metadata))
|
||||
|
||||
while list_metadata.has_more:
|
||||
list_metadata = dbx.files_list_folder_continue(list_metadata.cursor)
|
||||
files.extend(fetch_files(list_metadata))
|
||||
|
||||
for file in files:
|
||||
file.name = remove_special_characters(file.name)
|
||||
|
||||
logger.info("Dropbox files retrieved successfully: %d", len(files))
|
||||
return files
|
||||
|
||||
except dropbox.exceptions.ApiError as e:
|
||||
logger.error("Dropbox API error: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Dropbox API error")
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Unexpected error occurred")
|
||||
|
||||
|
||||
def get_dropbox_files_by_id(
|
||||
credentials: Dict[str, str], file_ids: List[str]
|
||||
) -> List[SyncFile] | Dict[str, str]:
|
||||
"""
|
||||
Retrieve files from Dropbox by their IDs.
|
||||
|
||||
Args:
|
||||
credentials (dict): The credentials for accessing Dropbox.
|
||||
file_ids (list): The list of file IDs to retrieve.
|
||||
|
||||
Returns:
|
||||
list: A list of dictionaries containing the metadata of each file or an error message.
|
||||
"""
|
||||
logger.info("Retrieving Dropbox files with file_ids: %s", file_ids)
|
||||
|
||||
if "access_token" not in credentials:
|
||||
raise HTTPException(status_code=401, detail="Invalid token data")
|
||||
|
||||
try:
|
||||
dbx = dropbox.Dropbox(credentials["access_token"])
|
||||
dbx.check_and_refresh_access_token()
|
||||
credentials["access_token"] = dbx._oauth2_access_token
|
||||
|
||||
files = []
|
||||
|
||||
for file_id in file_ids:
|
||||
try:
|
||||
metadata = dbx.files_get_metadata(file_id)
|
||||
logger.debug("Metadata for file_id %s: %s", file_id, metadata)
|
||||
shared_link = f"https://www.dropbox.com/preview/{metadata.path_display}?context=content_suggestions&role=personal"
|
||||
is_folder = isinstance(metadata, dropbox.files.FolderMetadata)
|
||||
file_info = SyncFile(
|
||||
name=metadata.name,
|
||||
id=metadata.id,
|
||||
is_folder=is_folder,
|
||||
last_modified=(
|
||||
str(metadata.client_modified) if not is_folder else ""
|
||||
),
|
||||
mime_type=(
|
||||
metadata.path_lower.split(".")[-1] if not is_folder else ""
|
||||
),
|
||||
web_view_link=shared_link,
|
||||
)
|
||||
|
||||
files.append(file_info)
|
||||
except dropbox.exceptions.ApiError as api_err:
|
||||
logger.error("Dropbox API error for file_id %s: %s", file_id, api_err)
|
||||
continue # Skip this file and proceed with the next one
|
||||
except Exception as err:
|
||||
logger.error("Unexpected error for file_id %s: %s", file_id, err)
|
||||
continue # Skip this file and proceed with the next one
|
||||
|
||||
for file in files:
|
||||
file.name = remove_special_characters(file.name)
|
||||
|
||||
logger.info("Dropbox files retrieved successfully: %d", len(files))
|
||||
return files
|
||||
|
||||
except dropbox.exceptions.AuthError as auth_err:
|
||||
logger.error("Authentication error: %s", auth_err)
|
||||
raise HTTPException(status_code=401, detail="Authentication error")
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error: %s", e)
|
||||
raise HTTPException(status_code=500, detail="Unexpected error occurred")
|
@ -1,387 +0,0 @@
|
||||
import os
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from io import BytesIO
|
||||
|
||||
import msal
|
||||
import requests
|
||||
from fastapi import HTTPException, UploadFile
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
from quivr_api.logger import get_logger
|
||||
from quivr_api.modules.brain.repository.brains_vectors import BrainsVectors
|
||||
from quivr_api.modules.knowledge.repository.storage import Storage
|
||||
from quivr_api.modules.notification.dto.inputs import (
|
||||
CreateNotification,
|
||||
NotificationUpdatableProperties,
|
||||
)
|
||||
from quivr_api.modules.notification.entity.notification import NotificationsStatusEnum
|
||||
from quivr_api.modules.notification.service.notification_service import (
|
||||
NotificationService,
|
||||
)
|
||||
from quivr_api.modules.sync.dto.inputs import (
|
||||
SyncFileInput,
|
||||
SyncFileUpdateInput,
|
||||
SyncsActiveUpdateInput,
|
||||
)
|
||||
from quivr_api.modules.sync.entity.sync import SyncFile
|
||||
from quivr_api.modules.sync.repository.sync_files import SyncFiles
|
||||
from quivr_api.modules.sync.service.sync_service import SyncService, SyncUserService
|
||||
from quivr_api.modules.sync.utils.list_files import (
|
||||
get_azure_files_by_id,
|
||||
list_azure_files,
|
||||
)
|
||||
from quivr_api.modules.sync.utils.upload import upload_file
|
||||
from quivr_api.modules.upload.service.upload_file import check_file_exists
|
||||
|
||||
notification_service = NotificationService()
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID")
|
||||
AUTHORITY = "https://login.microsoftonline.com/common"
|
||||
BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:5050")
|
||||
REDIRECT_URI = f"{BACKEND_URL}/sync/azure/oauth2callback"
|
||||
SCOPE = [
|
||||
"https://graph.microsoft.com/Files.Read",
|
||||
"https://graph.microsoft.com/User.Read",
|
||||
"https://graph.microsoft.com/Sites.Read.All",
|
||||
]
|
||||
|
||||
|
||||
class AzureSyncUtils(BaseModel):
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
sync_user_service: SyncUserService
|
||||
sync_active_service: SyncService
|
||||
sync_files_repo: SyncFiles
|
||||
storage: Storage
|
||||
|
||||
def get_headers(self, token_data):
|
||||
return {
|
||||
"Authorization": f"Bearer {token_data['access_token']}",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
def refresh_token(self, refresh_token):
|
||||
client = msal.PublicClientApplication(CLIENT_ID, authority=AUTHORITY)
|
||||
result = client.acquire_token_by_refresh_token(refresh_token, scopes=SCOPE)
|
||||
if "access_token" not in result:
|
||||
raise HTTPException(status_code=400, detail="Failed to refresh token")
|
||||
return result
|
||||
|
||||
async def _upload_files(
|
||||
self,
|
||||
token_data: dict,
|
||||
files: list[SyncFile],
|
||||
current_user: str,
|
||||
brain_id: str,
|
||||
sync_active_id: int,
|
||||
):
|
||||
"""
|
||||
Download files from Azure.
|
||||
|
||||
Args:
|
||||
token_data (dict): The token data for accessing Azure.
|
||||
files (list): The list of file metadata to download.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the status of the download or an error message.
|
||||
"""
|
||||
logger.info("Downloading Azure files with metadata: %s", files)
|
||||
headers = self.get_headers(token_data)
|
||||
|
||||
downloaded_files = []
|
||||
# Generate random UUID
|
||||
bulk_id = uuid.uuid4()
|
||||
for file in files:
|
||||
upload_notification = notification_service.add_notification(
|
||||
CreateNotification(
|
||||
user_id=current_user,
|
||||
bulk_id=bulk_id,
|
||||
status=NotificationsStatusEnum.INFO,
|
||||
title=file.name,
|
||||
category="sync",
|
||||
brain_id=str(brain_id),
|
||||
)
|
||||
)
|
||||
|
||||
file.notification_id = str(upload_notification.id)
|
||||
for file in files:
|
||||
try:
|
||||
file_id = file.id
|
||||
file_name = file.name
|
||||
modified_time = file.last_modified
|
||||
|
||||
download_endpoint = (
|
||||
f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}/content"
|
||||
)
|
||||
logger.info("Downloading file: %s", file_name)
|
||||
download_response = requests.get(
|
||||
download_endpoint, headers=headers, stream=True
|
||||
)
|
||||
if download_response.status_code == 401:
|
||||
token_data = self.refresh_token(token_data["refresh_token"])
|
||||
headers = self.get_headers(token_data)
|
||||
download_response = requests.get(
|
||||
download_endpoint, headers=headers, stream=True
|
||||
)
|
||||
if download_response.status_code != 200:
|
||||
logger.error("Failed to download file: %s", file_name)
|
||||
continue
|
||||
|
||||
file_data = BytesIO(download_response.content)
|
||||
|
||||
# Check if the file already exists in the storage
|
||||
if check_file_exists(brain_id, file_name):
|
||||
logger.debug("🔥 File already exists in the storage: %s", file_name)
|
||||
|
||||
self.storage.remove_file(brain_id + "/" + file_name)
|
||||
BrainsVectors().delete_file_from_brain(brain_id, file_name)
|
||||
|
||||
# Check if the file extension is compatible
|
||||
if file_name.split(".")[-1] not in [
|
||||
"pdf",
|
||||
"txt",
|
||||
"md",
|
||||
"csv",
|
||||
"docx",
|
||||
"xlsx",
|
||||
"pptx",
|
||||
"doc",
|
||||
]:
|
||||
logger.info("File is not compatible: %s", file_name)
|
||||
continue
|
||||
|
||||
to_upload_file = UploadFile(
|
||||
file=file_data,
|
||||
filename=file_name,
|
||||
)
|
||||
|
||||
# Check if the file already exists in the database
|
||||
existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
|
||||
existing_file = next(
|
||||
(f for f in existing_files if f.path == file_name), None
|
||||
)
|
||||
|
||||
supported = False
|
||||
if (existing_file and existing_file.supported) or not existing_file:
|
||||
supported = True
|
||||
await upload_file(
|
||||
to_upload_file,
|
||||
brain_id,
|
||||
current_user,
|
||||
bulk_id,
|
||||
"Share Point",
|
||||
file.web_view_link,
|
||||
notification_id=file.notification_id,
|
||||
)
|
||||
|
||||
if existing_file:
|
||||
# Update the existing file record
|
||||
self.sync_files_repo.update_sync_file(
|
||||
existing_file.id,
|
||||
SyncFileUpdateInput(
|
||||
last_modified=modified_time,
|
||||
supported=supported,
|
||||
),
|
||||
)
|
||||
else:
|
||||
# Create a new file record
|
||||
self.sync_files_repo.create_sync_file(
|
||||
SyncFileInput(
|
||||
path=file_name,
|
||||
syncs_active_id=sync_active_id,
|
||||
last_modified=modified_time,
|
||||
brain_id=brain_id,
|
||||
supported=supported,
|
||||
)
|
||||
)
|
||||
|
||||
downloaded_files.append(file_name)
|
||||
notification_service.update_notification_by_id(
|
||||
file.notification_id,
|
||||
NotificationUpdatableProperties(
|
||||
status=NotificationsStatusEnum.SUCCESS,
|
||||
description="File downloaded successfully",
|
||||
),
|
||||
)
|
||||
except Exception as error:
|
||||
logger.error(
|
||||
"An error occurred while downloading Azure files: %s", error
|
||||
)
|
||||
# Check if the file already exists in the database
|
||||
existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
|
||||
existing_file = next(
|
||||
(f for f in existing_files if f.path == file.name), None
|
||||
)
|
||||
# Update the existing file record
|
||||
if existing_file:
|
||||
self.sync_files_repo.update_sync_file(
|
||||
existing_file.id,
|
||||
SyncFileUpdateInput(
|
||||
supported=False,
|
||||
),
|
||||
)
|
||||
else:
|
||||
# Create a new file record
|
||||
self.sync_files_repo.create_sync_file(
|
||||
SyncFileInput(
|
||||
path=file.name,
|
||||
syncs_active_id=sync_active_id,
|
||||
last_modified=file.last_modified,
|
||||
brain_id=brain_id,
|
||||
supported=False,
|
||||
)
|
||||
)
|
||||
notification_service.update_notification_by_id(
|
||||
file.notification_id,
|
||||
NotificationUpdatableProperties(
|
||||
status=NotificationsStatusEnum.ERROR,
|
||||
description="Error downloading file",
|
||||
),
|
||||
)
|
||||
return {"downloaded_files": downloaded_files}
|
||||
|
||||
async def sync(self, sync_active_id: int, user_id: str):
|
||||
"""
|
||||
Check if the Azure sync has not been synced and download the folders and files based on the settings.
|
||||
|
||||
Args:
|
||||
sync_active_id (int): The ID of the active sync.
|
||||
user_id (str): The user ID associated with the active sync.
|
||||
"""
|
||||
|
||||
# Retrieve the active sync details
|
||||
sync_active = self.sync_active_service.get_details_sync_active(sync_active_id)
|
||||
if not sync_active:
|
||||
logger.warning(
|
||||
"No active sync found for sync_active_id: %s", sync_active_id
|
||||
)
|
||||
return None
|
||||
|
||||
# Check if the sync is due
|
||||
last_synced = sync_active.get("last_synced")
|
||||
force_sync = sync_active.get("force_sync", False)
|
||||
sync_interval_minutes = sync_active.get("sync_interval_minutes", 0)
|
||||
if last_synced and not force_sync:
|
||||
last_synced_time = datetime.fromisoformat(last_synced).astimezone(
|
||||
timezone.utc
|
||||
)
|
||||
current_time = datetime.now().astimezone()
|
||||
|
||||
# Debug logging to check the values
|
||||
logger.debug("Last synced time (UTC): %s", last_synced_time)
|
||||
logger.debug("Current time (local timezone): %s", current_time)
|
||||
|
||||
# Convert current_time to UTC for comparison
|
||||
current_time_utc = current_time.astimezone(timezone.utc)
|
||||
logger.debug("Current time (UTC): %s", current_time_utc)
|
||||
time_difference = current_time_utc - last_synced_time
|
||||
if time_difference < timedelta(minutes=sync_interval_minutes):
|
||||
logger.info(
|
||||
"Azure sync is not due for sync_active_id: %s", sync_active_id
|
||||
)
|
||||
return None
|
||||
|
||||
# Retrieve the sync user details
|
||||
sync_user = self.sync_user_service.get_syncs_user(
|
||||
user_id=user_id, sync_user_id=sync_active["syncs_user_id"]
|
||||
)
|
||||
if not sync_user:
|
||||
logger.warning(
|
||||
"No sync user found for sync_active_id: %s, user_id: %s",
|
||||
sync_active_id,
|
||||
user_id,
|
||||
)
|
||||
return None
|
||||
|
||||
sync_user = sync_user[0]
|
||||
if sync_user["provider"].lower() != "azure":
|
||||
logger.warning(
|
||||
"Sync provider is not Azure for sync_active_id: %s", sync_active_id
|
||||
)
|
||||
return None
|
||||
|
||||
# Download the folders and files from Azure
|
||||
logger.info(
|
||||
"Downloading folders and files from Azure for sync_active_id: %s",
|
||||
sync_active_id,
|
||||
)
|
||||
|
||||
# Get the folder id from the settings from sync_active
|
||||
settings = sync_active.get("settings", {})
|
||||
folders = settings.get("folders", [])
|
||||
files_to_download = settings.get("files", [])
|
||||
files = []
|
||||
files_metadata = []
|
||||
if len(folders) > 0:
|
||||
files = []
|
||||
for folder in folders:
|
||||
files.extend(
|
||||
list_azure_files(
|
||||
sync_user["credentials"],
|
||||
folder_id=folder,
|
||||
recursive=True,
|
||||
)
|
||||
)
|
||||
if len(files_to_download) > 0:
|
||||
files_metadata = get_azure_files_by_id(
|
||||
sync_user["credentials"],
|
||||
files_to_download,
|
||||
)
|
||||
files = files + files_metadata # type: ignore
|
||||
|
||||
if "error" in files:
|
||||
logger.error(
|
||||
"Failed to download files from Azure for sync_active_id: %s",
|
||||
sync_active_id,
|
||||
)
|
||||
return None
|
||||
|
||||
# Filter files that have been modified since the last sync
|
||||
last_synced_time = (
|
||||
datetime.fromisoformat(last_synced).astimezone(timezone.utc)
|
||||
if last_synced
|
||||
else None
|
||||
)
|
||||
logger.info("Files retrieved from Azure: %s", len(files))
|
||||
logger.info("Files retrieved from Azure: %s", files)
|
||||
files_to_download = [
|
||||
file
|
||||
for file in files
|
||||
if not file.is_folder
|
||||
and (
|
||||
(
|
||||
not last_synced_time
|
||||
or datetime.strptime(
|
||||
file.last_modified, "%Y-%m-%dT%H:%M:%SZ"
|
||||
).replace(tzinfo=timezone.utc)
|
||||
> last_synced_time
|
||||
)
|
||||
or not check_file_exists(sync_active["brain_id"], file.name)
|
||||
)
|
||||
]
|
||||
|
||||
downloaded_files = await self._upload_files(
|
||||
sync_user["credentials"],
|
||||
files_to_download,
|
||||
user_id,
|
||||
sync_active["brain_id"],
|
||||
sync_active_id,
|
||||
)
|
||||
if "error" in downloaded_files:
|
||||
logger.error(
|
||||
"Failed to download files from Azure for sync_active_id: %s",
|
||||
sync_active_id,
|
||||
)
|
||||
return None
|
||||
|
||||
# Update the last_synced timestamp
|
||||
self.sync_active_service.update_sync_active(
|
||||
sync_active_id,
|
||||
SyncsActiveUpdateInput(
|
||||
last_synced=datetime.now().astimezone().isoformat(), force_sync=False
|
||||
),
|
||||
)
|
||||
logger.info("Azure sync completed for sync_active_id: %s", sync_active_id)
|
||||
return downloaded_files
|
622
backend/api/quivr_api/modules/sync/utils/sync.py
Normal file
622
backend/api/quivr_api/modules/sync/utils/sync.py
Normal file
@ -0,0 +1,622 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from io import BytesIO
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import dropbox
|
||||
import msal
|
||||
import requests
|
||||
from fastapi import HTTPException
|
||||
from google.auth.transport.requests import Request as GoogleRequest
|
||||
from google.oauth2.credentials import Credentials
|
||||
from googleapiclient.discovery import build
|
||||
from quivr_api.logger import get_logger
|
||||
from quivr_api.modules.sync.entity.sync import SyncFile
|
||||
from quivr_api.modules.sync.utils.normalize import remove_special_characters
|
||||
from requests import HTTPError
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class BaseSync(ABC):
|
||||
name: str
|
||||
lower_name: str
|
||||
datetime_format: str
|
||||
|
||||
@abstractmethod
|
||||
def get_files_by_id(self, credentials: Dict, file_ids: List[str]) -> List[SyncFile]:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def get_files(
|
||||
self, credentials: Dict, folder_id: str | None = None, recursive: bool = False
|
||||
) -> List[SyncFile]:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def check_and_refresh_access_token(self, credentials: dict) -> Dict:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def download_file(self, credentials: Dict, file: SyncFile) -> BytesIO:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class GoogleDriveSync(BaseSync):
|
||||
name = "Google Drive"
|
||||
lower_name = "google"
|
||||
creds: Credentials | None = None
|
||||
service: Any | None = None
|
||||
datetime_format: str = "%Y-%m-%dT%H:%M:%S.%fZ"
|
||||
|
||||
def check_and_refresh_access_token(self, credentials: dict) -> Dict:
|
||||
self.creds = Credentials.from_authorized_user_info(credentials)
|
||||
if self.creds.expired and self.creds.refresh_token:
|
||||
self.creds.refresh(GoogleRequest())
|
||||
logger.info("Google Drive credentials refreshed")
|
||||
return json.loads(self.creds.to_json())
|
||||
|
||||
def download_file(self, credentials: Dict, file: SyncFile) -> BytesIO:
|
||||
file_id = file.id
|
||||
file_name = file.name
|
||||
mime_type = file.mime_type
|
||||
modified_time = file.last_modified
|
||||
if not self.creds:
|
||||
self.check_and_refresh_access_token(credentials)
|
||||
if not self.service:
|
||||
self.service = build("drive", "v3", credentials=self.creds)
|
||||
|
||||
# Convert Google Docs files to appropriate formats before downloading
|
||||
if mime_type == "application/vnd.google-apps.document":
|
||||
logger.debug(
|
||||
"Converting Google Docs file with file_id: %s to DOCX.",
|
||||
file_id,
|
||||
)
|
||||
request = self.service.files().export_media(
|
||||
fileId=file_id,
|
||||
mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
)
|
||||
file_name += ".docx"
|
||||
elif mime_type == "application/vnd.google-apps.spreadsheet":
|
||||
logger.debug(
|
||||
"Converting Google Sheets file with file_id: %s to XLSX.",
|
||||
file_id,
|
||||
)
|
||||
request = self.service.files().export_media(
|
||||
fileId=file_id,
|
||||
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
)
|
||||
file_name += ".xlsx"
|
||||
elif mime_type == "application/vnd.google-apps.presentation":
|
||||
logger.debug(
|
||||
"Converting Google Slides file with file_id: %s to PPTX.",
|
||||
file_id,
|
||||
)
|
||||
request = self.service.files().export_media(
|
||||
fileId=file_id,
|
||||
mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
)
|
||||
file_name += ".pptx"
|
||||
### Elif pdf, txt, md, csv, docx, xlsx, pptx, doc
|
||||
elif file_name.split(".")[-1] in [
|
||||
"pdf",
|
||||
"txt",
|
||||
"md",
|
||||
"csv",
|
||||
"docx",
|
||||
"xlsx",
|
||||
"pptx",
|
||||
"doc",
|
||||
]:
|
||||
request = self.service.files().get_media(fileId=file_id)
|
||||
else:
|
||||
logger.warning(
|
||||
"Skipping unsupported file type: %s for file_id: %s",
|
||||
mime_type,
|
||||
file_id,
|
||||
)
|
||||
raise Exception("Unsupported file type")
|
||||
|
||||
file_data = request.execute()
|
||||
return BytesIO(file_data)
|
||||
|
||||
def get_files_by_id(self, credentials: Dict, file_ids: List[str]) -> List[SyncFile]:
|
||||
"""
|
||||
Retrieve files from Google Drive by their IDs.
|
||||
|
||||
Args:
|
||||
credentials (dict): The credentials for accessing Google Drive.
|
||||
file_ids (list): The list of file IDs to retrieve.
|
||||
|
||||
Returns:
|
||||
list: A list of dictionaries containing the metadata of each file or an error message.
|
||||
"""
|
||||
logger.info("Retrieving Google Drive files with file_ids: %s", file_ids)
|
||||
self.check_and_refresh_access_token(credentials)
|
||||
|
||||
try:
|
||||
service = build("drive", "v3", credentials=self.creds)
|
||||
files: List[SyncFile] = []
|
||||
|
||||
for file_id in file_ids:
|
||||
result = (
|
||||
service.files()
|
||||
.get(
|
||||
fileId=file_id,
|
||||
fields="id, name, mimeType, modifiedTime, webViewLink",
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
|
||||
files.append(
|
||||
SyncFile(
|
||||
name=result["name"],
|
||||
id=result["id"],
|
||||
is_folder=(
|
||||
result["mimeType"] == "application/vnd.google-apps.folder"
|
||||
),
|
||||
last_modified=result["modifiedTime"],
|
||||
mime_type=result["mimeType"],
|
||||
web_view_link=result["webViewLink"],
|
||||
)
|
||||
)
|
||||
|
||||
logger.info("Google Drive files retrieved successfully: %s", len(files))
|
||||
for file in files:
|
||||
file.name = remove_special_characters(file.name)
|
||||
return files
|
||||
|
||||
except HTTPError as error:
|
||||
logger.error(
|
||||
"An error occurred while retrieving Google Drive files: %s", error
|
||||
)
|
||||
raise Exception("Failed to retrieve files")
|
||||
|
||||
def get_files(
|
||||
self, credentials: dict, folder_id: str | None = None, recursive: bool = False
|
||||
) -> List[SyncFile]:
|
||||
"""
|
||||
Retrieve files from Google Drive.
|
||||
|
||||
Args:
|
||||
credentials (dict): The credentials for accessing Google Drive.
|
||||
folder_id (str, optional): The folder ID to filter files. Defaults to None.
|
||||
recursive (bool, optional): If True, fetch files from all subfolders. Defaults to False.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the list of files or an error message.
|
||||
"""
|
||||
logger.info("Retrieving Google Drive files with folder_id: %s", folder_id)
|
||||
|
||||
self.check_and_refresh_access_token(credentials)
|
||||
# Updating the credentials in the database
|
||||
|
||||
try:
|
||||
service = build("drive", "v3", credentials=self.creds)
|
||||
if folder_id:
|
||||
query = f"'{folder_id}' in parents"
|
||||
else:
|
||||
query = "'root' in parents or sharedWithMe"
|
||||
page_token = None
|
||||
files: List[SyncFile] = []
|
||||
|
||||
while True:
|
||||
results = (
|
||||
service.files()
|
||||
.list(
|
||||
q=query,
|
||||
pageSize=100,
|
||||
fields="nextPageToken, files(id, name, mimeType, modifiedTime, webViewLink)",
|
||||
pageToken=page_token,
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
items = results.get("files", [])
|
||||
|
||||
if not items:
|
||||
logger.info("No files found in Google Drive")
|
||||
break
|
||||
|
||||
for item in items:
|
||||
files.append(
|
||||
SyncFile(
|
||||
name=item["name"],
|
||||
id=item["id"],
|
||||
is_folder=(
|
||||
item["mimeType"] == "application/vnd.google-apps.folder"
|
||||
),
|
||||
last_modified=item["modifiedTime"],
|
||||
mime_type=item["mimeType"],
|
||||
web_view_link=item["webViewLink"],
|
||||
)
|
||||
)
|
||||
|
||||
# If recursive is True and the item is a folder, get files from the folder
|
||||
if (
|
||||
recursive
|
||||
and item.mimeType == "application/vnd.google-apps.folder"
|
||||
):
|
||||
logger.warning(
|
||||
"Calling Recursive for folder: %s",
|
||||
item.name,
|
||||
)
|
||||
files.extend(self.get_files(credentials, item.id, recursive))
|
||||
|
||||
page_token = results.get("nextPageToken", None)
|
||||
if page_token is None:
|
||||
break
|
||||
|
||||
logger.info("Google Drive files retrieved successfully: %s", len(files))
|
||||
|
||||
for file in files:
|
||||
file.name = remove_special_characters(file.name)
|
||||
return files
|
||||
except HTTPError as error:
|
||||
logger.error(
|
||||
"An error occurred while retrieving Google Drive files: %s", error
|
||||
)
|
||||
raise Exception("Failed to retrieve files")
|
||||
|
||||
|
||||
class AzureDriveSync(BaseSync):
|
||||
name = "Azure Drive"
|
||||
lower_name = "azure"
|
||||
datetime_format: str = "%Y-%m-%dT%H:%M:%SZ"
|
||||
CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID")
|
||||
AUTHORITY = "https://login.microsoftonline.com/common"
|
||||
BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:5050")
|
||||
REDIRECT_URI = f"{BACKEND_URL}/sync/azure/oauth2callback"
|
||||
SCOPE = [
|
||||
"https://graph.microsoft.com/Files.Read",
|
||||
"https://graph.microsoft.com/User.Read",
|
||||
"https://graph.microsoft.com/Sites.Read.All",
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def get_azure_token_data(credentials):
|
||||
if "access_token" not in credentials:
|
||||
raise HTTPException(status_code=401, detail="Invalid token data")
|
||||
return credentials
|
||||
|
||||
@staticmethod
|
||||
def get_azure_headers(token_data):
|
||||
return {
|
||||
"Authorization": f"Bearer {token_data['access_token']}",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
def check_and_refresh_access_token(self, credentials) -> Dict:
|
||||
if "refresh_token" not in credentials:
|
||||
raise HTTPException(status_code=401, detail="No refresh token available")
|
||||
|
||||
client = msal.PublicClientApplication(self.CLIENT_ID, authority=self.AUTHORITY)
|
||||
result = client.acquire_token_by_refresh_token(
|
||||
credentials["refresh_token"], scopes=self.SCOPE
|
||||
)
|
||||
if "access_token" not in result:
|
||||
raise HTTPException(status_code=400, detail="Failed to refresh token")
|
||||
|
||||
credentials.update(
|
||||
{
|
||||
"access_token": result["access_token"],
|
||||
"refresh_token": result.get(
|
||||
"refresh_token", credentials["refresh_token"]
|
||||
),
|
||||
"id_token": result.get("id_token", credentials.get("id_token")),
|
||||
}
|
||||
)
|
||||
|
||||
return credentials
|
||||
|
||||
def get_files(self, credentials, folder_id=None, recursive=False) -> List[SyncFile]:
|
||||
def fetch_files(endpoint, headers, max_retries=1):
|
||||
logger.debug(f"fetching files from {endpoint}.")
|
||||
|
||||
retry_count = 0
|
||||
while retry_count <= max_retries:
|
||||
try:
|
||||
response = requests.get(endpoint, headers=headers)
|
||||
|
||||
# Retrying with refereshed token
|
||||
if response.status_code == 401:
|
||||
token_data = self.check_and_refresh_access_token(credentials)
|
||||
headers = self.get_azure_headers(token_data)
|
||||
response = requests.get(endpoint, headers=headers)
|
||||
else:
|
||||
response.raise_for_status()
|
||||
return response.json().get("value", [])
|
||||
|
||||
except HTTPError as e:
|
||||
logger.exception(
|
||||
f"azure_list_files got exception : {e}. headers: {headers}. {retry_count} retrying."
|
||||
)
|
||||
# Exponential backoff
|
||||
time.sleep(2**retry_count)
|
||||
retry_count += 1
|
||||
|
||||
raise HTTPException(
|
||||
504, detail="can't connect to azure endpoint to retrieve files."
|
||||
)
|
||||
|
||||
token_data = self.get_azure_token_data(credentials)
|
||||
headers = self.get_azure_headers(token_data)
|
||||
endpoint = "https://graph.microsoft.com/v1.0/me/drive/root/children"
|
||||
if folder_id:
|
||||
endpoint = (
|
||||
f"https://graph.microsoft.com/v1.0/me/drive/items/{folder_id}/children"
|
||||
)
|
||||
|
||||
items = fetch_files(endpoint, headers)
|
||||
|
||||
if not items:
|
||||
logger.info("No files found in Azure Drive")
|
||||
return []
|
||||
|
||||
files = []
|
||||
for item in items:
|
||||
file_data = SyncFile(
|
||||
name=item.get("name"),
|
||||
id=item.get("id"),
|
||||
is_folder="folder" in item,
|
||||
last_modified=item.get("lastModifiedDateTime"),
|
||||
mime_type=item.get("file", {}).get("mimeType", "folder"),
|
||||
web_view_link=item.get("webUrl"),
|
||||
)
|
||||
files.append(file_data)
|
||||
|
||||
# If recursive option is enabled and the item is a folder, fetch files from it
|
||||
if recursive and file_data.is_folder:
|
||||
folder_files = self.get_files(
|
||||
credentials, folder_id=file_data.id, recursive=True
|
||||
)
|
||||
|
||||
files.extend(folder_files)
|
||||
for file in files:
|
||||
file.name = remove_special_characters(file.name)
|
||||
logger.info("Azure Drive files retrieved successfully: %s", len(files))
|
||||
return files
|
||||
|
||||
def get_files_by_id(
|
||||
self, credentials: dict, file_ids: List[str]
|
||||
) -> List[SyncFile] | dict:
|
||||
"""
|
||||
Retrieve files from Azure Drive by their IDs.
|
||||
|
||||
Args:
|
||||
credentials (dict): The credentials for accessing Azure Drive.
|
||||
file_ids (list): The list of file IDs to retrieve.
|
||||
|
||||
Returns:
|
||||
list: A list of dictionaries containing the metadata of each file or an error message.
|
||||
"""
|
||||
logger.info("Retrieving Azure Drive files with file_ids: %s", file_ids)
|
||||
token_data = self.get_azure_token_data(credentials)
|
||||
headers = self.get_azure_headers(token_data)
|
||||
files = []
|
||||
|
||||
for file_id in file_ids:
|
||||
endpoint = f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}"
|
||||
response = requests.get(endpoint, headers=headers)
|
||||
if response.status_code == 401:
|
||||
token_data = self.check_and_refresh_access_token(credentials)
|
||||
headers = self.get_azure_headers(token_data)
|
||||
response = requests.get(endpoint, headers=headers)
|
||||
if response.status_code != 200:
|
||||
logger.error(
|
||||
"An error occurred while retrieving Azure Drive files: %s",
|
||||
response.text,
|
||||
)
|
||||
raise Exception("Failed to retrieve files")
|
||||
|
||||
result = response.json()
|
||||
files.append(
|
||||
SyncFile(
|
||||
name=result.get("name"),
|
||||
id=result.get("id"),
|
||||
is_folder="folder" in result,
|
||||
last_modified=result.get("lastModifiedDateTime"),
|
||||
mime_type=result.get("file", {}).get("mimeType", "folder"),
|
||||
web_view_link=result.get("webUrl"),
|
||||
)
|
||||
)
|
||||
|
||||
for file in files:
|
||||
file.name = remove_special_characters(file.name)
|
||||
logger.info("Azure Drive files retrieved successfully: %s", len(files))
|
||||
return files
|
||||
|
||||
def download_file(self, credentials: Dict, file: SyncFile) -> BytesIO:
|
||||
file_id = file.id
|
||||
file_name = file.name
|
||||
modified_time = file.last_modified
|
||||
headers = self.get_azure_headers(credentials)
|
||||
|
||||
download_endpoint = (
|
||||
f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}/content"
|
||||
)
|
||||
logger.info("Downloading file: %s", file_name)
|
||||
download_response = requests.get(
|
||||
download_endpoint, headers=headers, stream=True
|
||||
)
|
||||
return BytesIO(download_response.content)
|
||||
|
||||
|
||||
class DropboxSync(BaseSync):
|
||||
name = "Dropbox"
|
||||
lower_name = "dropbox"
|
||||
dbx: dropbox.Dropbox | None = None
|
||||
datetime_format: str = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
def link_dropbox(self, credentials) -> dropbox.Dropbox:
|
||||
return dropbox.Dropbox(
|
||||
credentials["access_token"],
|
||||
oauth2_refresh_token=credentials["refresh_token"],
|
||||
app_key=os.getenv("DROPBOX_APP_KEY"),
|
||||
oauth2_access_token_expiration=credentials.get("expires_at"),
|
||||
app_secret=os.getenv("DROPBOX_APP_SECRET"),
|
||||
)
|
||||
|
||||
def check_and_refresh_access_token(self, credentials: Dict) -> Dict:
|
||||
if not self.dbx:
|
||||
self.dbx = self.link_dropbox(credentials)
|
||||
self.dbx.check_and_refresh_access_token()
|
||||
credentials["access_token"] = self.dbx._oauth2_access_token
|
||||
credentials["refresh_token"] = self.dbx.refresh_access_token
|
||||
return credentials
|
||||
|
||||
def get_files(
|
||||
self, credentials: Dict, folder_id: str = "", recursive: bool = False
|
||||
) -> List[SyncFile]:
|
||||
"""
|
||||
Retrieve files from Dropbox.
|
||||
|
||||
Args:
|
||||
credentials (dict): The credentials for accessing Dropbox.
|
||||
folder_id (str, optional): The folder ID to filter files. Defaults to "".
|
||||
recursive (bool, optional): If True, fetch files from all subfolders. Defaults to False.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the list of files or an error message.
|
||||
"""
|
||||
logger.info("Retrieving Dropbox files with folder_id: %s", folder_id)
|
||||
|
||||
# Verify credential has the access token
|
||||
if "access_token" not in credentials:
|
||||
logger.error("Invalid access token")
|
||||
raise Exception("Invalid access token")
|
||||
|
||||
try:
|
||||
if not self.dbx:
|
||||
self.dbx = dropbox.Dropbox(
|
||||
credentials["access_token"],
|
||||
oauth2_refresh_token=credentials["refresh_token"],
|
||||
app_key=os.getenv("DROPBOX_APP_KEY"),
|
||||
oauth2_access_token_expiration=credentials.get("expires_at"),
|
||||
app_secret=os.getenv("DROPBOX_APP_SECRET"),
|
||||
)
|
||||
self.dbx.check_and_refresh_access_token()
|
||||
credentials["access_token"] = self.dbx._oauth2_access_token
|
||||
|
||||
def fetch_files(metadata):
|
||||
files = []
|
||||
for file in metadata.entries:
|
||||
|
||||
shared_link = f"https://www.dropbox.com/preview{file.path_display}?context=content_suggestions&role=personal"
|
||||
is_folder = isinstance(file, dropbox.files.FolderMetadata)
|
||||
|
||||
files.append(
|
||||
SyncFile(
|
||||
name=file.name,
|
||||
id=file.id,
|
||||
is_folder=is_folder,
|
||||
last_modified=(
|
||||
str(file.client_modified) if not is_folder else ""
|
||||
),
|
||||
mime_type=(
|
||||
file.path_lower.split(".")[-1] if not is_folder else ""
|
||||
),
|
||||
web_view_link=shared_link,
|
||||
)
|
||||
)
|
||||
return files
|
||||
|
||||
files = []
|
||||
list_metadata = self.dbx.files_list_folder(folder_id, recursive=recursive)
|
||||
files.extend(fetch_files(list_metadata))
|
||||
|
||||
while list_metadata.has_more:
|
||||
list_metadata = self.dbx.files_list_folder_continue(
|
||||
list_metadata.cursor
|
||||
)
|
||||
files.extend(fetch_files(list_metadata))
|
||||
|
||||
for file in files:
|
||||
file.name = remove_special_characters(file.name)
|
||||
|
||||
logger.info("Dropbox files retrieved successfully: %d", len(files))
|
||||
return files
|
||||
|
||||
except dropbox.exceptions.ApiError as e:
|
||||
logger.error("Dropbox API error: %s", e)
|
||||
raise Exception("Failed to retrieve files")
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error: %s", e)
|
||||
raise Exception("Failed to retrieve files")
|
||||
|
||||
def get_files_by_id(
|
||||
self, credentials: Dict[str, str], file_ids: List[str]
|
||||
) -> List[SyncFile]:
|
||||
"""
|
||||
Retrieve files from Dropbox by their IDs.
|
||||
|
||||
Args:
|
||||
credentials (dict): The credentials for accessing Dropbox.
|
||||
file_ids (list): The list of file IDs to retrieve.
|
||||
|
||||
Returns:
|
||||
list: A list of dictionaries containing the metadata of each file or an error message.
|
||||
"""
|
||||
logger.info("Retrieving Dropbox files with file_ids: %s", file_ids)
|
||||
|
||||
if "access_token" not in credentials:
|
||||
logger.error("Access token is not in the credentials")
|
||||
raise Exception("Invalid access token")
|
||||
|
||||
try:
|
||||
if not self.dbx:
|
||||
self.dbx = self.link_dropbox(credentials)
|
||||
self.dbx.check_and_refresh_access_token()
|
||||
credentials["access_token"] = self.dbx._oauth2_access_token # type: ignore
|
||||
|
||||
files = []
|
||||
|
||||
for file_id in file_ids:
|
||||
try:
|
||||
metadata = self.dbx.files_get_metadata(file_id)
|
||||
shared_link = f"https://www.dropbox.com/preview/{metadata.path_display}?context=content_suggestions&role=personal"
|
||||
is_folder = isinstance(metadata, dropbox.files.FolderMetadata)
|
||||
file_info = SyncFile(
|
||||
name=metadata.name,
|
||||
id=metadata.id,
|
||||
is_folder=is_folder,
|
||||
last_modified=(
|
||||
str(metadata.client_modified) if not is_folder else ""
|
||||
),
|
||||
mime_type=(
|
||||
metadata.path_lower.split(".")[-1] if not is_folder else ""
|
||||
),
|
||||
web_view_link=shared_link,
|
||||
)
|
||||
|
||||
files.append(file_info)
|
||||
except dropbox.exceptions.ApiError as api_err:
|
||||
logger.error(
|
||||
"Dropbox API error for file_id %s: %s", file_id, api_err
|
||||
)
|
||||
continue # Skip this file and proceed with the next one
|
||||
except Exception as err:
|
||||
logger.error("Unexpected error for file_id %s: %s", file_id, err)
|
||||
continue # Skip this file and proceed with the next one
|
||||
|
||||
for file in files:
|
||||
file.name = remove_special_characters(file.name)
|
||||
|
||||
logger.info("Dropbox files retrieved successfully: %d", len(files))
|
||||
return files
|
||||
|
||||
except dropbox.exceptions.AuthError as auth_err:
|
||||
logger.error("Authentication error: %s", auth_err)
|
||||
raise Exception("Failed to retrieve files")
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error: %s", e)
|
||||
raise Exception("Failed to retrieve files")
|
||||
|
||||
def download_file(self, credentials: Dict, file: SyncFile) -> BytesIO:
|
||||
file_id = str(file.id)
|
||||
if not self.dbx:
|
||||
self.dbx = self.link_dropbox(credentials)
|
||||
|
||||
metadata, file_data = self.dbx.files_download(file_id) # type: ignore
|
||||
return BytesIO(file_data.content)
|
@ -1,15 +1,16 @@
|
||||
import os
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from io import BytesIO
|
||||
from typing import List
|
||||
|
||||
import dropbox
|
||||
from fastapi import UploadFile
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
from quivr_api.logger import get_logger
|
||||
from quivr_api.modules.brain.repository.brains_vectors import BrainsVectors
|
||||
from quivr_api.modules.knowledge.repository.storage import Storage
|
||||
from quivr_api.modules.notification.dto.inputs import CreateNotification
|
||||
from quivr_api.modules.notification.dto.inputs import (
|
||||
CreateNotification,
|
||||
NotificationUpdatableProperties,
|
||||
)
|
||||
from quivr_api.modules.notification.entity.notification import NotificationsStatusEnum
|
||||
from quivr_api.modules.notification.service.notification_service import (
|
||||
NotificationService,
|
||||
@ -19,52 +20,46 @@ from quivr_api.modules.sync.dto.inputs import (
|
||||
SyncFileUpdateInput,
|
||||
SyncsActiveUpdateInput,
|
||||
)
|
||||
from quivr_api.modules.sync.entity.sync import SyncFile
|
||||
from quivr_api.modules.sync.repository.sync_files import SyncFiles
|
||||
from quivr_api.modules.sync.service.sync_service import SyncService, SyncUserService
|
||||
from quivr_api.modules.sync.utils.list_files import (
|
||||
get_dropbox_files_by_id,
|
||||
list_dropbox_files,
|
||||
)
|
||||
from quivr_api.modules.sync.utils.sync import BaseSync
|
||||
from quivr_api.modules.sync.utils.upload import upload_file
|
||||
from quivr_api.modules.upload.service.upload_file import check_file_exists
|
||||
|
||||
notification_service = NotificationService()
|
||||
logger = get_logger(__name__)
|
||||
|
||||
APP_KEY = os.getenv("DROPBOX_APP_KEY")
|
||||
APP_SECRET = os.getenv("DROPBOW_CONSUMER_SECRET")
|
||||
|
||||
notification_service = NotificationService()
|
||||
|
||||
|
||||
class DropboxSyncUtils(BaseModel):
|
||||
class SyncUtils(BaseModel):
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
storage: Storage
|
||||
sync_files_repo: SyncFiles
|
||||
sync_active_service: SyncService
|
||||
sync_user_service: SyncUserService
|
||||
sync_active_service: SyncService
|
||||
sync_files_repo: SyncFiles
|
||||
storage: Storage
|
||||
sync_cloud: BaseSync
|
||||
|
||||
async def _upload_files(
|
||||
self,
|
||||
token_data: dict,
|
||||
files: list,
|
||||
credentials: dict,
|
||||
files: List[SyncFile],
|
||||
current_user: str,
|
||||
brain_id: str,
|
||||
sync_active_id: int,
|
||||
):
|
||||
"""
|
||||
Download files from DropBox.
|
||||
Download files from an external cloud.
|
||||
|
||||
Args:
|
||||
credentials (dict): The credentials for accessin DropBox Drive.
|
||||
credentials (dict): The token data for accessing the external cloud.
|
||||
files (list): The list of file metadata to download.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the status of the download or an error message.
|
||||
"""
|
||||
dbx = dropbox.Dropbox(token_data["access_token"])
|
||||
dbx.check_and_refresh_access_token()
|
||||
token_data["access_token"] = dbx._oauth2_access_token
|
||||
|
||||
credentials = self.sync_cloud.check_and_refresh_access_token(credentials)
|
||||
|
||||
downloaded_files = []
|
||||
bulk_id = uuid.uuid4()
|
||||
@ -80,22 +75,20 @@ class DropboxSyncUtils(BaseModel):
|
||||
brain_id=str(brain_id),
|
||||
)
|
||||
)
|
||||
|
||||
file.notification_id = str(upload_notification.id)
|
||||
|
||||
for file in files:
|
||||
logger.info("Processing file: %s", file.name)
|
||||
try:
|
||||
file_id = str(file.id)
|
||||
file_id = file.id
|
||||
file_name = file.name
|
||||
mime_type = file.mime_type
|
||||
modified_time = file.last_modified
|
||||
|
||||
metadata, file_data = dbx.files_download(file_id) # type: ignore
|
||||
# logger.debug("🔥 Filedata :", file_data.content)
|
||||
file_data = BytesIO(file_data.content)
|
||||
|
||||
file_data = self.sync_cloud.download_file(credentials, file)
|
||||
# Check if the file already exists in the storage
|
||||
if check_file_exists(brain_id, file_name):
|
||||
logger.debug("🔥 File already exists in the storage: %s", file_name)
|
||||
logger.debug("%s already exists in the storage", file_name)
|
||||
|
||||
self.storage.remove_file(brain_id + "/" + file_name)
|
||||
BrainsVectors().delete_file_from_brain(brain_id, file_name)
|
||||
@ -133,7 +126,7 @@ class DropboxSyncUtils(BaseModel):
|
||||
brain_id,
|
||||
current_user,
|
||||
bulk_id,
|
||||
"DropBox",
|
||||
self.sync_cloud.name,
|
||||
file.web_view_link,
|
||||
notification_id=file.notification_id,
|
||||
)
|
||||
@ -159,11 +152,19 @@ class DropboxSyncUtils(BaseModel):
|
||||
)
|
||||
)
|
||||
|
||||
downloaded_files.append(file_name)
|
||||
|
||||
downloaded_files.append(file_name)
|
||||
notification_service.update_notification_by_id(
|
||||
file.notification_id,
|
||||
NotificationUpdatableProperties(
|
||||
status=NotificationsStatusEnum.SUCCESS,
|
||||
description="File downloaded successfully",
|
||||
),
|
||||
)
|
||||
except Exception as error:
|
||||
logger.error(
|
||||
"An error occurred while downloading DropBox files: %s", error
|
||||
"An error occurred while downloading %s files: %s",
|
||||
self.sync_cloud.name,
|
||||
error,
|
||||
)
|
||||
# Check if the file already exists in the database
|
||||
existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
|
||||
@ -189,16 +190,25 @@ class DropboxSyncUtils(BaseModel):
|
||||
supported=False,
|
||||
)
|
||||
)
|
||||
notification_service.update_notification_by_id(
|
||||
file.notification_id,
|
||||
NotificationUpdatableProperties(
|
||||
status=NotificationsStatusEnum.ERROR,
|
||||
description="Error downloading file",
|
||||
),
|
||||
)
|
||||
|
||||
return {"downloaded_files": downloaded_files}
|
||||
|
||||
async def sync(self, sync_active_id: int, user_id: str):
|
||||
"""
|
||||
Check if the Dropbox sync has not been synced and download the folders and files based on the settings.
|
||||
Check if the Specific sync has not been synced and download the folders and files based on the settings.
|
||||
|
||||
Args:
|
||||
sync_active_id (int): The ID of the active sync.
|
||||
user_id (str): The user ID associated with the active sync.
|
||||
"""
|
||||
|
||||
# Retrieve the active sync details
|
||||
sync_active = self.sync_active_service.get_details_sync_active(sync_active_id)
|
||||
if not sync_active:
|
||||
@ -211,7 +221,6 @@ class DropboxSyncUtils(BaseModel):
|
||||
last_synced = sync_active.get("last_synced")
|
||||
force_sync = sync_active.get("force_sync", False)
|
||||
sync_interval_minutes = sync_active.get("sync_interval_minutes", 0)
|
||||
|
||||
if last_synced and not force_sync:
|
||||
last_synced_time = datetime.fromisoformat(last_synced).astimezone(
|
||||
timezone.utc
|
||||
@ -228,7 +237,9 @@ class DropboxSyncUtils(BaseModel):
|
||||
time_difference = current_time_utc - last_synced_time
|
||||
if time_difference < timedelta(minutes=sync_interval_minutes):
|
||||
logger.info(
|
||||
"DropBox sync is not due for sync_active_id: %s", sync_active_id
|
||||
"%s sync is not due for sync_active_id: %s",
|
||||
self.sync_cloud.name,
|
||||
sync_active_id,
|
||||
)
|
||||
return None
|
||||
|
||||
@ -245,15 +256,18 @@ class DropboxSyncUtils(BaseModel):
|
||||
return None
|
||||
|
||||
sync_user = sync_user[0]
|
||||
if sync_user["provider"].lower() != "dropbox":
|
||||
if sync_user["provider"].lower() != self.sync_cloud.lower_name:
|
||||
logger.warning(
|
||||
"Sync provider is not DropBox for sync_active_id: %s", sync_active_id
|
||||
"Sync provider is not %s for sync_active_id: %s",
|
||||
self.sync_cloud.name,
|
||||
sync_active_id,
|
||||
)
|
||||
return None
|
||||
|
||||
# Download the folders and files from DropBox
|
||||
# Download the folders and files from Cloud
|
||||
logger.info(
|
||||
"Downloading folders and files from Dropbox for sync_active_id: %s",
|
||||
"Downloading folders and files from %s for sync_active_id: %s",
|
||||
self.sync_cloud.name,
|
||||
sync_active_id,
|
||||
)
|
||||
|
||||
@ -261,20 +275,19 @@ class DropboxSyncUtils(BaseModel):
|
||||
settings = sync_active.get("settings", {})
|
||||
folders = settings.get("folders", [])
|
||||
files_to_download = settings.get("files", [])
|
||||
files = []
|
||||
files: List[SyncFile] = []
|
||||
files_metadata = []
|
||||
if len(folders) > 0:
|
||||
files = []
|
||||
for folder in folders:
|
||||
files.extend(
|
||||
list_dropbox_files(
|
||||
self.sync_cloud.get_files(
|
||||
sync_user["credentials"],
|
||||
folder_id=folder,
|
||||
recursive=True,
|
||||
)
|
||||
)
|
||||
if len(files_to_download) > 0:
|
||||
files_metadata = get_dropbox_files_by_id(
|
||||
files_metadata = self.sync_cloud.get_files_by_id(
|
||||
sync_user["credentials"],
|
||||
files_to_download,
|
||||
)
|
||||
@ -282,7 +295,7 @@ class DropboxSyncUtils(BaseModel):
|
||||
|
||||
if "error" in files:
|
||||
logger.error(
|
||||
"Failed to download files from DropBox for sync_active_id: %s",
|
||||
"Failed to download files from Azure for sync_active_id: %s",
|
||||
sync_active_id,
|
||||
)
|
||||
return None
|
||||
@ -293,8 +306,7 @@ class DropboxSyncUtils(BaseModel):
|
||||
if last_synced
|
||||
else None
|
||||
)
|
||||
logger.info("Files retrieved from DropBox: %s", len(files))
|
||||
logger.info("Files retrieved from DropBox: %s", files)
|
||||
logger.info("Files retrieved from %s: %s", self.sync_cloud.lower_name, files)
|
||||
|
||||
files_to_download = [
|
||||
file
|
||||
@ -304,7 +316,8 @@ class DropboxSyncUtils(BaseModel):
|
||||
(
|
||||
not last_synced_time
|
||||
or datetime.strptime(
|
||||
file.last_modified, "%Y-%m-%d %H:%M:%S"
|
||||
file.last_modified,
|
||||
(self.sync_cloud.datetime_format),
|
||||
).replace(tzinfo=timezone.utc)
|
||||
> last_synced_time
|
||||
)
|
||||
@ -321,7 +334,7 @@ class DropboxSyncUtils(BaseModel):
|
||||
)
|
||||
if "error" in downloaded_files:
|
||||
logger.error(
|
||||
"Failed to download files from DropBox for sync_active_id: %s",
|
||||
"Failed to download files from Azure for sync_active_id: %s",
|
||||
sync_active_id,
|
||||
)
|
||||
return None
|
||||
@ -333,5 +346,9 @@ class DropboxSyncUtils(BaseModel):
|
||||
last_synced=datetime.now().astimezone().isoformat(), force_sync=False
|
||||
),
|
||||
)
|
||||
logger.info("DropBox sync completed for sync_active_id: %s", sync_active_id)
|
||||
logger.info(
|
||||
"%s sync completed for sync_active_id: %s",
|
||||
self.sync_cloud.lower_name,
|
||||
sync_active_id,
|
||||
)
|
||||
return downloaded_files
|
@ -85,12 +85,9 @@ class QuivrKnowledge(BaseModel):
|
||||
file_name: str | None = None
|
||||
url: str | None = None
|
||||
extension: str = "txt"
|
||||
integration: str | None = None
|
||||
integration_link: str | None = None
|
||||
status: str = "PROCESSING"
|
||||
|
||||
|
||||
|
||||
# NOTE: for compatibility issues with langchain <-> PydanticV1
|
||||
class SearchResult(BaseModelV1):
|
||||
chunk: Document
|
||||
|
3
backend/poetry.lock
generated
3
backend/poetry.lock
generated
@ -9535,4 +9535,5 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "963b9ff228f2478505802a0e2915d2802e9d92396e33cde549f33bdb07f2a30d"
|
||||
content-hash = "4f318e2f8561fac7ad1f4aa603e5264accf857d6e7e1bfd9fcf770279f6a2e9a"
|
||||
|
||||
|
@ -2721,7 +2721,12 @@
|
||||
resolved "https://registry.npmjs.org/@types/throttle-debounce/-/throttle-debounce-2.1.0.tgz"
|
||||
integrity sha512-5eQEtSCoESnh2FsiLTxE121IiE60hnMqcb435fShf4bpLRjEu1Eoekht23y6zXS9Ts3l+Szu3TARnTsA0GkOkQ==
|
||||
|
||||
"@types/unist@*", "@types/unist@^2", "@types/unist@^2.0.0":
|
||||
"@types/unist@*":
|
||||
version "2.0.7"
|
||||
resolved "https://registry.npmjs.org/@types/unist/-/unist-2.0.7.tgz"
|
||||
integrity sha512-cputDpIbFgLUaGQn6Vqg3/YsJwxUwHLO13v3i5ouxT4lat0khip9AEWxtERujXV9wxIB1EyF97BSJFt6vpdI8g==
|
||||
|
||||
"@types/unist@^2", "@types/unist@^2.0.0":
|
||||
version "2.0.7"
|
||||
resolved "https://registry.npmjs.org/@types/unist/-/unist-2.0.7.tgz"
|
||||
integrity sha512-cputDpIbFgLUaGQn6Vqg3/YsJwxUwHLO13v3i5ouxT4lat0khip9AEWxtERujXV9wxIB1EyF97BSJFt6vpdI8g==
|
||||
@ -3367,7 +3372,7 @@ chalk@^2.4.2:
|
||||
escape-string-regexp "^1.0.5"
|
||||
supports-color "^5.3.0"
|
||||
|
||||
chalk@^3.0.0, chalk@3.0.0:
|
||||
chalk@^3.0.0:
|
||||
version "3.0.0"
|
||||
resolved "https://registry.npmjs.org/chalk/-/chalk-3.0.0.tgz"
|
||||
integrity sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==
|
||||
@ -3391,6 +3396,14 @@ chalk@^4.1.0:
|
||||
ansi-styles "^4.1.0"
|
||||
supports-color "^7.1.0"
|
||||
|
||||
chalk@3.0.0:
|
||||
version "3.0.0"
|
||||
resolved "https://registry.npmjs.org/chalk/-/chalk-3.0.0.tgz"
|
||||
integrity sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==
|
||||
dependencies:
|
||||
ansi-styles "^4.1.0"
|
||||
supports-color "^7.1.0"
|
||||
|
||||
change-case@^5.4.2:
|
||||
version "5.4.2"
|
||||
resolved "https://registry.npmjs.org/change-case/-/change-case-5.4.2.tgz"
|
||||
@ -3745,7 +3758,14 @@ date-fns@2.30.0:
|
||||
dependencies:
|
||||
"@babel/runtime" "^7.21.0"
|
||||
|
||||
debug@^2.2.0, debug@^2.6.9:
|
||||
debug@^2.2.0:
|
||||
version "2.6.9"
|
||||
resolved "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz"
|
||||
integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==
|
||||
dependencies:
|
||||
ms "2.0.0"
|
||||
|
||||
debug@^2.6.9:
|
||||
version "2.6.9"
|
||||
resolved "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz"
|
||||
integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==
|
||||
@ -3766,35 +3786,7 @@ debug@^4.0.0:
|
||||
dependencies:
|
||||
ms "2.1.2"
|
||||
|
||||
debug@^4.1.0:
|
||||
version "4.3.4"
|
||||
resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
|
||||
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
|
||||
dependencies:
|
||||
ms "2.1.2"
|
||||
|
||||
debug@^4.1.1:
|
||||
version "4.3.4"
|
||||
resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
|
||||
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
|
||||
dependencies:
|
||||
ms "2.1.2"
|
||||
|
||||
debug@^4.3.1:
|
||||
version "4.3.4"
|
||||
resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
|
||||
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
|
||||
dependencies:
|
||||
ms "2.1.2"
|
||||
|
||||
debug@^4.3.2:
|
||||
version "4.3.4"
|
||||
resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
|
||||
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
|
||||
dependencies:
|
||||
ms "2.1.2"
|
||||
|
||||
debug@^4.3.4, debug@4:
|
||||
debug@^4.1.0, debug@^4.1.1, debug@^4.3.1, debug@^4.3.2, debug@^4.3.4, debug@4:
|
||||
version "4.3.4"
|
||||
resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
|
||||
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
|
||||
@ -4821,7 +4813,7 @@ github-from-package@0.0.0:
|
||||
resolved "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz"
|
||||
integrity sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==
|
||||
|
||||
glob-parent@^5.1.2:
|
||||
glob-parent@^5.1.2, glob-parent@~5.1.2:
|
||||
version "5.1.2"
|
||||
resolved "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz"
|
||||
integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==
|
||||
@ -4835,25 +4827,7 @@ glob-parent@^6.0.2:
|
||||
dependencies:
|
||||
is-glob "^4.0.3"
|
||||
|
||||
glob-parent@~5.1.2:
|
||||
version "5.1.2"
|
||||
resolved "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz"
|
||||
integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==
|
||||
dependencies:
|
||||
is-glob "^4.0.1"
|
||||
|
||||
glob@^10.2.2:
|
||||
version "10.3.10"
|
||||
resolved "https://registry.npmjs.org/glob/-/glob-10.3.10.tgz"
|
||||
integrity sha512-fa46+tv1Ak0UPK1TOy/pZrIybNNt4HCv7SDzwyfiOZkvZLEbjsZkJBPtDHVshZjbecAoAGSC20MjLDG/qr679g==
|
||||
dependencies:
|
||||
foreground-child "^3.1.0"
|
||||
jackspeak "^2.3.5"
|
||||
minimatch "^9.0.1"
|
||||
minipass "^5.0.0 || ^6.0.2 || ^7.0.0"
|
||||
path-scurry "^1.10.1"
|
||||
|
||||
glob@^10.3.10:
|
||||
glob@^10.2.2, glob@^10.3.10, glob@10.3.10:
|
||||
version "10.3.10"
|
||||
resolved "https://registry.npmjs.org/glob/-/glob-10.3.10.tgz"
|
||||
integrity sha512-fa46+tv1Ak0UPK1TOy/pZrIybNNt4HCv7SDzwyfiOZkvZLEbjsZkJBPtDHVshZjbecAoAGSC20MjLDG/qr679g==
|
||||
@ -4887,17 +4861,6 @@ glob@^8.0.3:
|
||||
minimatch "^5.0.1"
|
||||
once "^1.3.0"
|
||||
|
||||
glob@10.3.10:
|
||||
version "10.3.10"
|
||||
resolved "https://registry.npmjs.org/glob/-/glob-10.3.10.tgz"
|
||||
integrity sha512-fa46+tv1Ak0UPK1TOy/pZrIybNNt4HCv7SDzwyfiOZkvZLEbjsZkJBPtDHVshZjbecAoAGSC20MjLDG/qr679g==
|
||||
dependencies:
|
||||
foreground-child "^3.1.0"
|
||||
jackspeak "^2.3.5"
|
||||
minimatch "^9.0.1"
|
||||
minipass "^5.0.0 || ^6.0.2 || ^7.0.0"
|
||||
path-scurry "^1.10.1"
|
||||
|
||||
glob@7.1.6:
|
||||
version "7.1.6"
|
||||
resolved "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz"
|
||||
@ -5921,7 +5884,7 @@ lowlight@^2.0.0:
|
||||
fault "^2.0.0"
|
||||
highlight.js "~11.8.0"
|
||||
|
||||
lru-cache@^10.0.1:
|
||||
lru-cache@^10.0.1, "lru-cache@^9.1.1 || ^10.0.0":
|
||||
version "10.0.3"
|
||||
resolved "https://registry.npmjs.org/lru-cache/-/lru-cache-10.0.3.tgz"
|
||||
integrity sha512-B7gr+F6MkqB3uzINHXNctGieGsRTMwIBgxkp0yq/5BwcuDzD4A8wQpHQW6vDAm1uKSLQghmRdD9sKqf2vJ1cEg==
|
||||
@ -5940,11 +5903,6 @@ lru-cache@^6.0.0:
|
||||
dependencies:
|
||||
yallist "^4.0.0"
|
||||
|
||||
"lru-cache@^9.1.1 || ^10.0.0":
|
||||
version "10.0.3"
|
||||
resolved "https://registry.npmjs.org/lru-cache/-/lru-cache-10.0.3.tgz"
|
||||
integrity sha512-B7gr+F6MkqB3uzINHXNctGieGsRTMwIBgxkp0yq/5BwcuDzD4A8wQpHQW6vDAm1uKSLQghmRdD9sKqf2vJ1cEg==
|
||||
|
||||
lz-string@^1.5.0:
|
||||
version "1.5.0"
|
||||
resolved "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz"
|
||||
|
Loading…
Reference in New Issue
Block a user