fix: Refacto & update dropbox refresh (#2875)

This pull request adds functionality to sync files with DropBox. It
includes the following changes:

- Created a BaseSync class with all specific function for each clouds
- Created a SyncUtils class that takes in a BaseSync and apply the pipe
- fix the refresh method for DropBox

Please review and merge this pull request to enable DropBox sync
functionality in the application.

---------

Co-authored-by: Stan Girard <stan@quivr.app>
Co-authored-by: Amine Dirhoussi <aminediro@quivr.app>
This commit is contained in:
Chloé Daems 2024-07-19 09:47:11 +02:00 committed by GitHub
parent 96cc5b5b5a
commit 3b68855a83
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 748 additions and 1356 deletions

View File

@ -28,7 +28,6 @@ from quivr_api.routes.crawl_routes import crawl_router
from quivr_api.routes.subscription_routes import subscription_router
from sentry_sdk.integrations.fastapi import FastApiIntegration
from sentry_sdk.integrations.starlette import StarletteIntegration
from starlette.middleware.sessions import SessionMiddleware
load_dotenv()
@ -71,7 +70,6 @@ if sentry_dsn:
app = FastAPI()
add_cors_middleware(app)
app.add_middleware(SessionMiddleware, secret_key=str(os.getenv("SESSION_SECRET_KEY")))
app.include_router(brain_router)
app.include_router(chat_router)

View File

@ -51,7 +51,7 @@ def authorize_dropbox(
auth_flow = DropboxOAuth2Flow(
DROPBOX_APP_KEY,
redirect_uri=BASE_REDIRECT_URI,
session=request.session,
session={},
csrf_token_session_key="csrf-token",
consumer_secret=DROPBOX_APP_SECRET,
token_access_type="offline",
@ -88,10 +88,11 @@ def oauth2callback_dropbox(request: Request):
state = request.query_params.get("state")
if not state:
raise HTTPException(status_code=400, detail="Invalid state parameter")
request.session["csrf-token"] = state.split("|")[0] if "|" in state else ""
session = {}
session["csrf-token"] = state.split("|")[0] if "|" in state else ""
logger.debug("Keys in session : %s", request.session.keys())
logger.debug("Value in session : %s", request.session.values())
logger.debug("Keys in session : %s", session.keys())
logger.debug("Value in session : %s", session.values())
state = state.split("|")[1] if "|" in state else state # type: ignore
state_dict = {"state": state}
@ -117,7 +118,7 @@ def oauth2callback_dropbox(request: Request):
auth_flow = DropboxOAuth2Flow(
DROPBOX_APP_KEY,
redirect_uri=BASE_REDIRECT_URI,
session=request.session,
session=session,
csrf_token_session_key="csrf-token",
consumer_secret=DROPBOX_APP_SECRET,
token_access_type="offline",
@ -139,7 +140,6 @@ def oauth2callback_dropbox(request: Request):
"access_token": oauth_result.access_token,
"refresh_token": oauth_result.refresh_token,
"account_id": account_id,
"email": user_email,
"expires_in": str(oauth_result.expires_at),
}

View File

@ -8,10 +8,10 @@ from quivr_api.modules.notification.service.notification_service import (
)
from quivr_api.modules.sync.dto.inputs import SyncsUserInput, SyncUserUpdateInput
from quivr_api.modules.sync.repository.sync_interfaces import SyncUserInterface
from quivr_api.modules.sync.utils.list_files import (
get_google_drive_files,
list_azure_files,
list_dropbox_files,
from quivr_api.modules.sync.utils.sync import (
AzureDriveSync,
DropboxSync,
GoogleDriveSync,
)
notification_service = NotificationService()
@ -205,20 +205,19 @@ class SyncUser(SyncUserInterface):
provider = sync_user["provider"].lower()
if provider == "google":
logger.info("Getting files for Google sync")
return {
"files": get_google_drive_files(sync_user["credentials"], folder_id)
}
sync = GoogleDriveSync()
return {"files": sync.get_files(sync_user["credentials"], folder_id)}
elif provider == "azure":
logger.info("Getting files for Azure sync")
sync = AzureDriveSync()
return {
"files": list_azure_files(
sync_user["credentials"], folder_id, recursive
)
"files": sync.get_files(sync_user["credentials"], folder_id, recursive)
}
elif provider == "dropbox":
logger.info("Getting files for Drop Box sync")
sync = DropboxSync()
return {
"files": list_dropbox_files(
"files": sync.get_files(
sync_user["credentials"], folder_id if folder_id else "", recursive
)
}

View File

@ -5,9 +5,12 @@ from quivr_api.logger import get_logger
from quivr_api.modules.knowledge.repository.storage import Storage
from quivr_api.modules.sync.repository.sync_files import SyncFiles
from quivr_api.modules.sync.service.sync_service import SyncService, SyncUserService
from quivr_api.modules.sync.utils.dropboxutils import DropboxSyncUtils
from quivr_api.modules.sync.utils.googleutils import GoogleSyncUtils
from quivr_api.modules.sync.utils.sharepointutils import AzureSyncUtils
from quivr_api.modules.sync.utils.sync import (
AzureDriveSync,
DropboxSync,
GoogleDriveSync,
)
from quivr_api.modules.sync.utils.syncutils import SyncUtils
logger = get_logger(__name__)
@ -24,25 +27,28 @@ async def _process_sync_active():
sync_files_repo_service = SyncFiles()
storage = Storage()
google_sync_utils = GoogleSyncUtils(
google_sync_utils = SyncUtils(
sync_user_service=sync_user_service,
sync_active_service=sync_active_service,
sync_files_repo=sync_files_repo_service,
storage=storage,
sync_cloud=GoogleDriveSync(),
)
azure_sync_utils = AzureSyncUtils(
azure_sync_utils = SyncUtils(
sync_user_service=sync_user_service,
sync_active_service=sync_active_service,
sync_files_repo=sync_files_repo_service,
storage=storage,
sync_cloud=AzureDriveSync(),
)
dropbox_sync_utils = DropboxSyncUtils(
dropbox_sync_utils = SyncUtils(
sync_user_service=sync_user_service,
sync_active_service=sync_active_service,
sync_files_repo=sync_files_repo_service,
storage=storage,
sync_cloud=DropboxSync(),
)
active = await sync_active_service.get_syncs_active_in_interval()

View File

@ -1,383 +0,0 @@
import uuid
from datetime import datetime, timedelta, timezone
from io import BytesIO
from typing import List
from fastapi import UploadFile
from google.auth.transport.requests import Request as GoogleRequest
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from pydantic import BaseModel, ConfigDict
from quivr_api.logger import get_logger
from quivr_api.modules.brain.repository.brains_vectors import BrainsVectors
from quivr_api.modules.knowledge.repository.storage import Storage
from quivr_api.modules.notification.dto.inputs import (
CreateNotification,
NotificationUpdatableProperties,
)
from quivr_api.modules.notification.entity.notification import NotificationsStatusEnum
from quivr_api.modules.notification.service.notification_service import (
NotificationService,
)
from quivr_api.modules.sync.dto.inputs import (
SyncFileInput,
SyncFileUpdateInput,
SyncsActiveUpdateInput,
)
from quivr_api.modules.sync.entity.sync import SyncFile
from quivr_api.modules.sync.repository.sync_files import SyncFiles
from quivr_api.modules.sync.service.sync_service import SyncService, SyncUserService
from quivr_api.modules.sync.utils.list_files import (
get_google_drive_files,
get_google_drive_files_by_id,
)
from quivr_api.modules.sync.utils.upload import upload_file
from quivr_api.modules.upload.service.upload_file import check_file_exists
notification_service = NotificationService()
logger = get_logger(__name__)
class GoogleSyncUtils(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)
sync_user_service: SyncUserService
sync_active_service: SyncService
sync_files_repo: SyncFiles
storage: Storage
async def _upload_files(
self,
credentials: dict,
files: List[SyncFile],
current_user: str,
brain_id: str,
sync_active_id: int,
):
"""
Download files from Google Drive.
Args:
credentials (dict): The credentials for accessing Google Drive.
files (list): The list of file metadata to download.
Returns:
dict: A dictionary containing the status of the download or an error message.
"""
logger.info("Downloading Google Drive files with metadata: %s", files)
creds = Credentials.from_authorized_user_info(credentials)
if creds.expired and creds.refresh_token:
creds.refresh(GoogleRequest())
logger.info("Google Drive credentials refreshed")
# Updating the credentials in the database
service = build("drive", "v3", credentials=creds)
downloaded_files = []
bulk_id = uuid.uuid4()
for file in files:
upload_notification = notification_service.add_notification(
CreateNotification(
user_id=current_user,
bulk_id=bulk_id,
status=NotificationsStatusEnum.INFO,
title=file.name,
category="sync",
brain_id=str(brain_id),
)
)
file.notification_id = str(upload_notification.id)
for file in files:
logger.info("🔥🔥🔥🔥: %s", file)
try:
file_id = file.id
file_name = file.name
mime_type = file.mime_type
modified_time = file.last_modified
file_url = file.web_view_link
# Convert Google Docs files to appropriate formats before downloading
if mime_type == "application/vnd.google-apps.document":
logger.debug(
"Converting Google Docs file with file_id: %s to DOCX.",
file_id,
)
request = service.files().export_media(
fileId=file_id,
mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
)
file_name += ".docx"
elif mime_type == "application/vnd.google-apps.spreadsheet":
logger.debug(
"Converting Google Sheets file with file_id: %s to XLSX.",
file_id,
)
request = service.files().export_media(
fileId=file_id,
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)
file_name += ".xlsx"
elif mime_type == "application/vnd.google-apps.presentation":
logger.debug(
"Converting Google Slides file with file_id: %s to PPTX.",
file_id,
)
request = service.files().export_media(
fileId=file_id,
mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation",
)
file_name += ".pptx"
### Elif pdf, txt, md, csv, docx, xlsx, pptx, doc
elif file_name.split(".")[-1] in [
"pdf",
"txt",
"md",
"csv",
"docx",
"xlsx",
"pptx",
"doc",
]:
request = service.files().get_media(fileId=file_id)
else:
logger.warning(
"Skipping unsupported file type: %s for file_id: %s",
mime_type,
file_id,
)
continue
file_data = request.execute()
# Check if the file already exists in the storage
if check_file_exists(brain_id, file_name):
logger.debug("🔥 File already exists in the storage: %s", file_name)
self.storage.remove_file(brain_id + "/" + file_name)
BrainsVectors().delete_file_from_brain(brain_id, file_name)
to_upload_file = UploadFile(
file=BytesIO(file_data),
filename=file_name,
)
# Check if the file already exists in the database
existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
existing_file = next(
(f for f in existing_files if f.path == file_name), None
)
supported = False
if (existing_file and existing_file.supported) or not existing_file:
supported = True
await upload_file(
to_upload_file,
brain_id,
current_user,
bulk_id,
"Google Drive",
file.web_view_link,
notification_id=file.notification_id,
) # type: ignore
if existing_file:
# Update the existing file record
self.sync_files_repo.update_sync_file(
existing_file.id,
SyncFileUpdateInput(
last_modified=modified_time,
supported=supported,
),
)
else:
# Create a new file record
self.sync_files_repo.create_sync_file(
SyncFileInput(
path=file_name,
syncs_active_id=sync_active_id,
last_modified=modified_time,
brain_id=str(brain_id), # Convert UUID to string
supported=supported,
)
)
downloaded_files.append(file_name)
notification_service.update_notification_by_id(
file.notification_id,
NotificationUpdatableProperties(
status=NotificationsStatusEnum.SUCCESS,
description="File downloaded successfully",
),
)
except Exception as error:
logger.error(
"An error occurred while downloading Google Drive files: %s",
str(error), # Convert error to string
)
# Check if the file already exists in the database
existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
existing_file = next(
(f for f in existing_files if f.path == file.name), None
)
# Update the existing file record
if existing_file:
self.sync_files_repo.update_sync_file(
existing_file.id,
SyncFileUpdateInput(
supported=False,
),
)
else:
# Create a new file record
self.sync_files_repo.create_sync_file(
SyncFileInput(
path=file.name,
syncs_active_id=sync_active_id,
last_modified=file.last_modified,
brain_id=brain_id,
supported=False,
)
)
notification_service.update_notification_by_id(
file.notification_id,
NotificationUpdatableProperties(
status=NotificationsStatusEnum.ERROR,
description="Error downloading file",
),
)
return {"downloaded_files": downloaded_files}
async def sync(self, sync_active_id: int, user_id: str):
"""
Check if the Google sync has not been synced and download the folders and files based on the settings.
Args:
sync_active_id (int): The ID of the active sync.
user_id (str): The user ID associated with the active sync.
"""
# Retrieve the active sync details
sync_active = self.sync_active_service.get_details_sync_active(sync_active_id)
if not sync_active:
logger.warning(
"No active sync found for sync_active_id: %s", sync_active_id
)
return None
# Check if the sync is due
last_synced = sync_active.get("last_synced")
force_sync = sync_active.get("force_sync", False)
sync_interval_minutes = sync_active.get("sync_interval_minutes", 0)
if last_synced and not force_sync:
last_synced_time = datetime.fromisoformat(last_synced).astimezone(
timezone.utc
)
current_time = datetime.now().astimezone()
# Debug logging to check the values
logger.debug("Last synced time (UTC): %s", last_synced_time)
logger.debug("Current time (local timezone): %s", current_time)
# Convert current_time to UTC for comparison
current_time_utc = current_time.astimezone(timezone.utc)
logger.debug("Current time (UTC): %s", current_time_utc)
time_difference = current_time_utc - last_synced_time
if time_difference < timedelta(minutes=sync_interval_minutes):
logger.info(
"Google sync is not due for sync_active_id: %s", sync_active_id
)
return None
# Retrieve the sync user details
sync_user = self.sync_user_service.get_syncs_user(
user_id=user_id, sync_user_id=sync_active["syncs_user_id"]
)
if not sync_user:
logger.warning(
"No sync user found for sync_active_id: %s, user_id: %s",
sync_active_id,
user_id,
)
return None
sync_user = sync_user[0]
if sync_user["provider"].lower() != "google":
logger.warning(
"Sync provider is not Google for sync_active_id: %s", sync_active_id
)
return None
# Download the folders and files from Google Drive
logger.info(
"Downloading folders and files from Google Drive for sync_active_id: %s",
sync_active_id,
)
settings = sync_active.get("settings", {})
folders = settings.get("folders", [])
files_to_download = settings.get("files", [])
files: List[SyncFile] = []
files_metadata: List[SyncFile] = []
if len(folders) > 0:
for folder in folders:
folder_files = get_google_drive_files(
sync_user["credentials"],
folder_id=folder,
recursive=True,
)
if isinstance(folder_files, list):
files.extend(folder_files)
else:
logger.error(
f"Error fetching files for folder {folder}: {folder_files}"
)
if len(files_to_download) > 0:
files_metadata = get_google_drive_files_by_id(
sync_user["credentials"], files_to_download
)
files = files + files_metadata # type: ignore
if "error" in files:
logger.error(
"Failed to download files from Google Drive for sync_active_id: %s",
sync_active_id,
)
return None
# Filter files that have been modified since the last sync
last_synced_time = datetime.fromisoformat(last_synced) if last_synced else None
files_to_download = [
file
for file in files
if not file.is_folder
and (
(
not last_synced_time
or datetime.fromisoformat(file.last_modified) > last_synced_time
)
or not check_file_exists(sync_active["brain_id"], file.name)
)
]
downloaded_files = await self._upload_files(
sync_user["credentials"],
files_to_download,
user_id,
sync_active["brain_id"],
sync_active_id,
)
# Update the last_synced timestamp
self.sync_active_service.update_sync_active(
sync_active_id,
SyncsActiveUpdateInput(
last_synced=datetime.now().astimezone().isoformat(),
force_sync=False,
),
)
logger.info(
"Google Drive sync completed for sync_active_id: %s", sync_active_id
)
return downloaded_files

View File

@ -1,436 +0,0 @@
import os
from typing import Dict, List
import dropbox
import msal
import requests
from fastapi import HTTPException
from google.auth.transport.requests import Request as GoogleRequest
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from quivr_api.logger import get_logger
from quivr_api.modules.sync.entity.sync import SyncFile
from quivr_api.modules.sync.utils.normalize import remove_special_characters
from requests import HTTPError
logger = get_logger(__name__)
# GOOGLE
def get_google_drive_files_by_id(
credentials: dict, file_ids: List[str]
) -> List[SyncFile]:
"""
Retrieve files from Google Drive by their IDs.
Args:
credentials (dict): The credentials for accessing Google Drive.
file_ids (list): The list of file IDs to retrieve.
Returns:
list: A list of dictionaries containing the metadata of each file or an error message.
"""
logger.info("Retrieving Google Drive files with file_ids: %s", file_ids)
creds = Credentials.from_authorized_user_info(credentials)
if creds.expired and creds.refresh_token:
creds.refresh(GoogleRequest())
logger.info("Google Drive credentials refreshed")
try:
service = build("drive", "v3", credentials=creds)
files: List[SyncFile] = []
for file_id in file_ids:
result = (
service.files()
.get(
fileId=file_id,
fields="id, name, mimeType, modifiedTime, webViewLink",
)
.execute()
)
files.append(
SyncFile(
name=result["name"],
id=result["id"],
is_folder=(
result["mimeType"] == "application/vnd.google-apps.folder"
),
last_modified=result["modifiedTime"],
mime_type=result["mimeType"],
web_view_link=result["webViewLink"],
)
)
logger.info("Google Drive files retrieved successfully: %s", len(files))
for file in files:
file.name = remove_special_characters(file.name)
return files
except HTTPError as error:
logger.error("An error occurred while retrieving Google Drive files: %s", error)
return []
def get_google_drive_files(
credentials: dict, folder_id: str = None, recursive: bool = False
) -> List[SyncFile]:
"""
Retrieve files from Google Drive.
Args:
credentials (dict): The credentials for accessing Google Drive.
folder_id (str, optional): The folder ID to filter files. Defaults to None.
recursive (bool, optional): If True, fetch files from all subfolders. Defaults to False.
Returns:
dict: A dictionary containing the list of files or an error message.
"""
logger.info("Retrieving Google Drive files with folder_id: %s", folder_id)
creds = Credentials.from_authorized_user_info(credentials)
if creds.expired and creds.refresh_token:
creds.refresh(GoogleRequest())
logger.info("Google Drive credentials refreshed")
# Updating the credentials in the database
try:
service = build("drive", "v3", credentials=creds)
if folder_id:
query = f"'{folder_id}' in parents"
else:
query = "'root' in parents or sharedWithMe"
page_token = None
files: List[SyncFile] = []
while True:
results = (
service.files()
.list(
q=query,
pageSize=100,
fields="nextPageToken, files(id, name, mimeType, modifiedTime, webViewLink)",
pageToken=page_token,
)
.execute()
)
items = results.get("files", [])
if not items:
logger.info("No files found in Google Drive")
break
for item in items:
files.append(
SyncFile(
name=item["name"],
id=item["id"],
is_folder=(
item["mimeType"] == "application/vnd.google-apps.folder"
),
last_modified=item["modifiedTime"],
mime_type=item["mimeType"],
web_view_link=item["webViewLink"],
)
)
# If recursive is True and the item is a folder, get files from the folder
if recursive and item.mimeType == "application/vnd.google-apps.folder":
logger.warning(
"Calling Recursive for folder: %s",
item.name,
)
files.extend(
get_google_drive_files(credentials, item.id, recursive)
)
page_token = results.get("nextPageToken", None)
if page_token is None:
break
logger.info("Google Drive files retrieved successfully: %s", len(files))
for file in files:
file.name = remove_special_characters(file.name)
return files
except HTTPError as error:
logger.error("An error occurred while retrieving Google Drive files: %s", error)
return []
# AZURE
CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID")
AUTHORITY = "https://login.microsoftonline.com/common"
BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:5050")
REDIRECT_URI = f"{BACKEND_URL}/sync/azure/oauth2callback"
SCOPE = [
"https://graph.microsoft.com/Files.Read",
"https://graph.microsoft.com/User.Read",
"https://graph.microsoft.com/Sites.Read.All",
]
def get_azure_token_data(credentials):
if "access_token" not in credentials:
raise HTTPException(status_code=401, detail="Invalid token data")
return credentials
def refresh_azure_token(credentials):
if "refresh_token" not in credentials:
raise HTTPException(status_code=401, detail="No refresh token available")
client = msal.PublicClientApplication(CLIENT_ID, authority=AUTHORITY)
result = client.acquire_token_by_refresh_token(
credentials["refresh_token"], scopes=SCOPE
)
if "access_token" not in result:
raise HTTPException(status_code=400, detail="Failed to refresh token")
return result
def get_azure_headers(token_data):
return {
"Authorization": f"Bearer {token_data['access_token']}",
"Accept": "application/json",
}
def list_azure_files(credentials, folder_id=None, recursive=False) -> list[SyncFile]:
def fetch_files(endpoint, headers):
response = requests.get(endpoint, headers=headers)
if response.status_code == 401:
token_data = refresh_azure_token(credentials)
headers = get_azure_headers(token_data)
response = requests.get(endpoint, headers=headers)
if response.status_code != 200:
return {"error": response.text}
return response.json().get("value", [])
token_data = get_azure_token_data(credentials)
headers = get_azure_headers(token_data)
endpoint = "https://graph.microsoft.com/v1.0/me/drive/root/children"
if folder_id:
endpoint = (
f"https://graph.microsoft.com/v1.0/me/drive/items/{folder_id}/children"
)
items = fetch_files(endpoint, headers)
if not items:
logger.info("No files found in Azure Drive")
return []
files = []
for item in items:
file_data = SyncFile(
name=item.get("name"),
id=item.get("id"),
is_folder="folder" in item,
last_modified=item.get("lastModifiedDateTime"),
mime_type=item.get("file", {}).get("mimeType", "folder"),
web_view_link=item.get("webUrl"),
)
files.append(file_data)
# If recursive option is enabled and the item is a folder, fetch files from it
if recursive and file_data.is_folder:
folder_files = list_azure_files(
credentials, folder_id=file_data.id, recursive=True
)
files.extend(folder_files)
for file in files:
file.name = remove_special_characters(file.name)
logger.info("Azure Drive files retrieved successfully: %s", len(files))
return files
def get_azure_files_by_id(
credentials: dict, file_ids: List[str]
) -> List[SyncFile] | dict:
"""
Retrieve files from Azure Drive by their IDs.
Args:
credentials (dict): The credentials for accessing Azure Drive.
file_ids (list): The list of file IDs to retrieve.
Returns:
list: A list of dictionaries containing the metadata of each file or an error message.
"""
logger.info("Retrieving Azure Drive files with file_ids: %s", file_ids)
token_data = get_azure_token_data(credentials)
headers = get_azure_headers(token_data)
files = []
for file_id in file_ids:
endpoint = f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}"
response = requests.get(endpoint, headers=headers)
if response.status_code == 401:
token_data = refresh_azure_token(credentials)
headers = get_azure_headers(token_data)
response = requests.get(endpoint, headers=headers)
if response.status_code != 200:
logger.error(
"An error occurred while retrieving Azure Drive files: %s",
response.text,
)
return {"error": response.text}
result = response.json()
files.append(
SyncFile(
name=result.get("name"),
id=result.get("id"),
is_folder="folder" in result,
last_modified=result.get("lastModifiedDateTime"),
mime_type=result.get("file", {}).get("mimeType", "folder"),
web_view_link=result.get("webUrl"),
)
)
for file in files:
file.name = remove_special_characters(file.name)
logger.info("Azure Drive files retrieved successfully: %s", len(files))
return files
# Drop Box
def list_dropbox_files(
credentials: dict, folder_id: str = "", recursive: bool = False
) -> List[SyncFile] | dict:
"""
Retrieve files from Dropbox.
Args:
credentials (dict): The credentials for accessing Dropbox.
folder_id (str, optional): The folder ID to filter files. Defaults to "".
recursive (bool, optional): If True, fetch files from all subfolders. Defaults to False.
Returns:
dict: A dictionary containing the list of files or an error message.
"""
logger.info("Retrieving Dropbox files with folder_id: %s", folder_id)
# Verify credential has the access token
if "access_token" not in credentials:
print("Invalid token data")
return {"error": "Invalid token data"}
try:
dbx = dropbox.Dropbox(credentials["access_token"])
dbx.check_and_refresh_access_token()
credentials["access_token"] = dbx._oauth2_access_token
def fetch_files(metadata):
files = []
for file in metadata.entries:
shared_link = f"https://www.dropbox.com/preview{file.path_display}?context=content_suggestions&role=personal"
is_folder = isinstance(file, dropbox.files.FolderMetadata)
logger.debug(f"IS FOLDER ? {is_folder}")
files.append(
SyncFile(
name=file.name,
id=file.id,
is_folder=is_folder,
last_modified=(
str(file.client_modified) if not is_folder else ""
),
mime_type=(
file.path_lower.split(".")[-1] if not is_folder else ""
),
web_view_link=shared_link,
)
)
return files
files = []
list_metadata = dbx.files_list_folder(folder_id, recursive=recursive)
files.extend(fetch_files(list_metadata))
while list_metadata.has_more:
list_metadata = dbx.files_list_folder_continue(list_metadata.cursor)
files.extend(fetch_files(list_metadata))
for file in files:
file.name = remove_special_characters(file.name)
logger.info("Dropbox files retrieved successfully: %d", len(files))
return files
except dropbox.exceptions.ApiError as e:
logger.error("Dropbox API error: %s", e)
raise HTTPException(status_code=500, detail="Dropbox API error")
except Exception as e:
logger.error("Unexpected error: %s", e)
raise HTTPException(status_code=500, detail="Unexpected error occurred")
def get_dropbox_files_by_id(
credentials: Dict[str, str], file_ids: List[str]
) -> List[SyncFile] | Dict[str, str]:
"""
Retrieve files from Dropbox by their IDs.
Args:
credentials (dict): The credentials for accessing Dropbox.
file_ids (list): The list of file IDs to retrieve.
Returns:
list: A list of dictionaries containing the metadata of each file or an error message.
"""
logger.info("Retrieving Dropbox files with file_ids: %s", file_ids)
if "access_token" not in credentials:
raise HTTPException(status_code=401, detail="Invalid token data")
try:
dbx = dropbox.Dropbox(credentials["access_token"])
dbx.check_and_refresh_access_token()
credentials["access_token"] = dbx._oauth2_access_token
files = []
for file_id in file_ids:
try:
metadata = dbx.files_get_metadata(file_id)
logger.debug("Metadata for file_id %s: %s", file_id, metadata)
shared_link = f"https://www.dropbox.com/preview/{metadata.path_display}?context=content_suggestions&role=personal"
is_folder = isinstance(metadata, dropbox.files.FolderMetadata)
file_info = SyncFile(
name=metadata.name,
id=metadata.id,
is_folder=is_folder,
last_modified=(
str(metadata.client_modified) if not is_folder else ""
),
mime_type=(
metadata.path_lower.split(".")[-1] if not is_folder else ""
),
web_view_link=shared_link,
)
files.append(file_info)
except dropbox.exceptions.ApiError as api_err:
logger.error("Dropbox API error for file_id %s: %s", file_id, api_err)
continue # Skip this file and proceed with the next one
except Exception as err:
logger.error("Unexpected error for file_id %s: %s", file_id, err)
continue # Skip this file and proceed with the next one
for file in files:
file.name = remove_special_characters(file.name)
logger.info("Dropbox files retrieved successfully: %d", len(files))
return files
except dropbox.exceptions.AuthError as auth_err:
logger.error("Authentication error: %s", auth_err)
raise HTTPException(status_code=401, detail="Authentication error")
except Exception as e:
logger.error("Unexpected error: %s", e)
raise HTTPException(status_code=500, detail="Unexpected error occurred")

View File

@ -1,387 +0,0 @@
import os
import uuid
from datetime import datetime, timedelta, timezone
from io import BytesIO
import msal
import requests
from fastapi import HTTPException, UploadFile
from pydantic import BaseModel, ConfigDict
from quivr_api.logger import get_logger
from quivr_api.modules.brain.repository.brains_vectors import BrainsVectors
from quivr_api.modules.knowledge.repository.storage import Storage
from quivr_api.modules.notification.dto.inputs import (
CreateNotification,
NotificationUpdatableProperties,
)
from quivr_api.modules.notification.entity.notification import NotificationsStatusEnum
from quivr_api.modules.notification.service.notification_service import (
NotificationService,
)
from quivr_api.modules.sync.dto.inputs import (
SyncFileInput,
SyncFileUpdateInput,
SyncsActiveUpdateInput,
)
from quivr_api.modules.sync.entity.sync import SyncFile
from quivr_api.modules.sync.repository.sync_files import SyncFiles
from quivr_api.modules.sync.service.sync_service import SyncService, SyncUserService
from quivr_api.modules.sync.utils.list_files import (
get_azure_files_by_id,
list_azure_files,
)
from quivr_api.modules.sync.utils.upload import upload_file
from quivr_api.modules.upload.service.upload_file import check_file_exists
notification_service = NotificationService()
logger = get_logger(__name__)
CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID")
AUTHORITY = "https://login.microsoftonline.com/common"
BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:5050")
REDIRECT_URI = f"{BACKEND_URL}/sync/azure/oauth2callback"
SCOPE = [
"https://graph.microsoft.com/Files.Read",
"https://graph.microsoft.com/User.Read",
"https://graph.microsoft.com/Sites.Read.All",
]
class AzureSyncUtils(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)
sync_user_service: SyncUserService
sync_active_service: SyncService
sync_files_repo: SyncFiles
storage: Storage
def get_headers(self, token_data):
return {
"Authorization": f"Bearer {token_data['access_token']}",
"Accept": "application/json",
}
def refresh_token(self, refresh_token):
client = msal.PublicClientApplication(CLIENT_ID, authority=AUTHORITY)
result = client.acquire_token_by_refresh_token(refresh_token, scopes=SCOPE)
if "access_token" not in result:
raise HTTPException(status_code=400, detail="Failed to refresh token")
return result
async def _upload_files(
self,
token_data: dict,
files: list[SyncFile],
current_user: str,
brain_id: str,
sync_active_id: int,
):
"""
Download files from Azure.
Args:
token_data (dict): The token data for accessing Azure.
files (list): The list of file metadata to download.
Returns:
dict: A dictionary containing the status of the download or an error message.
"""
logger.info("Downloading Azure files with metadata: %s", files)
headers = self.get_headers(token_data)
downloaded_files = []
# Generate random UUID
bulk_id = uuid.uuid4()
for file in files:
upload_notification = notification_service.add_notification(
CreateNotification(
user_id=current_user,
bulk_id=bulk_id,
status=NotificationsStatusEnum.INFO,
title=file.name,
category="sync",
brain_id=str(brain_id),
)
)
file.notification_id = str(upload_notification.id)
for file in files:
try:
file_id = file.id
file_name = file.name
modified_time = file.last_modified
download_endpoint = (
f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}/content"
)
logger.info("Downloading file: %s", file_name)
download_response = requests.get(
download_endpoint, headers=headers, stream=True
)
if download_response.status_code == 401:
token_data = self.refresh_token(token_data["refresh_token"])
headers = self.get_headers(token_data)
download_response = requests.get(
download_endpoint, headers=headers, stream=True
)
if download_response.status_code != 200:
logger.error("Failed to download file: %s", file_name)
continue
file_data = BytesIO(download_response.content)
# Check if the file already exists in the storage
if check_file_exists(brain_id, file_name):
logger.debug("🔥 File already exists in the storage: %s", file_name)
self.storage.remove_file(brain_id + "/" + file_name)
BrainsVectors().delete_file_from_brain(brain_id, file_name)
# Check if the file extension is compatible
if file_name.split(".")[-1] not in [
"pdf",
"txt",
"md",
"csv",
"docx",
"xlsx",
"pptx",
"doc",
]:
logger.info("File is not compatible: %s", file_name)
continue
to_upload_file = UploadFile(
file=file_data,
filename=file_name,
)
# Check if the file already exists in the database
existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
existing_file = next(
(f for f in existing_files if f.path == file_name), None
)
supported = False
if (existing_file and existing_file.supported) or not existing_file:
supported = True
await upload_file(
to_upload_file,
brain_id,
current_user,
bulk_id,
"Share Point",
file.web_view_link,
notification_id=file.notification_id,
)
if existing_file:
# Update the existing file record
self.sync_files_repo.update_sync_file(
existing_file.id,
SyncFileUpdateInput(
last_modified=modified_time,
supported=supported,
),
)
else:
# Create a new file record
self.sync_files_repo.create_sync_file(
SyncFileInput(
path=file_name,
syncs_active_id=sync_active_id,
last_modified=modified_time,
brain_id=brain_id,
supported=supported,
)
)
downloaded_files.append(file_name)
notification_service.update_notification_by_id(
file.notification_id,
NotificationUpdatableProperties(
status=NotificationsStatusEnum.SUCCESS,
description="File downloaded successfully",
),
)
except Exception as error:
logger.error(
"An error occurred while downloading Azure files: %s", error
)
# Check if the file already exists in the database
existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
existing_file = next(
(f for f in existing_files if f.path == file.name), None
)
# Update the existing file record
if existing_file:
self.sync_files_repo.update_sync_file(
existing_file.id,
SyncFileUpdateInput(
supported=False,
),
)
else:
# Create a new file record
self.sync_files_repo.create_sync_file(
SyncFileInput(
path=file.name,
syncs_active_id=sync_active_id,
last_modified=file.last_modified,
brain_id=brain_id,
supported=False,
)
)
notification_service.update_notification_by_id(
file.notification_id,
NotificationUpdatableProperties(
status=NotificationsStatusEnum.ERROR,
description="Error downloading file",
),
)
return {"downloaded_files": downloaded_files}
async def sync(self, sync_active_id: int, user_id: str):
"""
Check if the Azure sync has not been synced and download the folders and files based on the settings.
Args:
sync_active_id (int): The ID of the active sync.
user_id (str): The user ID associated with the active sync.
"""
# Retrieve the active sync details
sync_active = self.sync_active_service.get_details_sync_active(sync_active_id)
if not sync_active:
logger.warning(
"No active sync found for sync_active_id: %s", sync_active_id
)
return None
# Check if the sync is due
last_synced = sync_active.get("last_synced")
force_sync = sync_active.get("force_sync", False)
sync_interval_minutes = sync_active.get("sync_interval_minutes", 0)
if last_synced and not force_sync:
last_synced_time = datetime.fromisoformat(last_synced).astimezone(
timezone.utc
)
current_time = datetime.now().astimezone()
# Debug logging to check the values
logger.debug("Last synced time (UTC): %s", last_synced_time)
logger.debug("Current time (local timezone): %s", current_time)
# Convert current_time to UTC for comparison
current_time_utc = current_time.astimezone(timezone.utc)
logger.debug("Current time (UTC): %s", current_time_utc)
time_difference = current_time_utc - last_synced_time
if time_difference < timedelta(minutes=sync_interval_minutes):
logger.info(
"Azure sync is not due for sync_active_id: %s", sync_active_id
)
return None
# Retrieve the sync user details
sync_user = self.sync_user_service.get_syncs_user(
user_id=user_id, sync_user_id=sync_active["syncs_user_id"]
)
if not sync_user:
logger.warning(
"No sync user found for sync_active_id: %s, user_id: %s",
sync_active_id,
user_id,
)
return None
sync_user = sync_user[0]
if sync_user["provider"].lower() != "azure":
logger.warning(
"Sync provider is not Azure for sync_active_id: %s", sync_active_id
)
return None
# Download the folders and files from Azure
logger.info(
"Downloading folders and files from Azure for sync_active_id: %s",
sync_active_id,
)
# Get the folder id from the settings from sync_active
settings = sync_active.get("settings", {})
folders = settings.get("folders", [])
files_to_download = settings.get("files", [])
files = []
files_metadata = []
if len(folders) > 0:
files = []
for folder in folders:
files.extend(
list_azure_files(
sync_user["credentials"],
folder_id=folder,
recursive=True,
)
)
if len(files_to_download) > 0:
files_metadata = get_azure_files_by_id(
sync_user["credentials"],
files_to_download,
)
files = files + files_metadata # type: ignore
if "error" in files:
logger.error(
"Failed to download files from Azure for sync_active_id: %s",
sync_active_id,
)
return None
# Filter files that have been modified since the last sync
last_synced_time = (
datetime.fromisoformat(last_synced).astimezone(timezone.utc)
if last_synced
else None
)
logger.info("Files retrieved from Azure: %s", len(files))
logger.info("Files retrieved from Azure: %s", files)
files_to_download = [
file
for file in files
if not file.is_folder
and (
(
not last_synced_time
or datetime.strptime(
file.last_modified, "%Y-%m-%dT%H:%M:%SZ"
).replace(tzinfo=timezone.utc)
> last_synced_time
)
or not check_file_exists(sync_active["brain_id"], file.name)
)
]
downloaded_files = await self._upload_files(
sync_user["credentials"],
files_to_download,
user_id,
sync_active["brain_id"],
sync_active_id,
)
if "error" in downloaded_files:
logger.error(
"Failed to download files from Azure for sync_active_id: %s",
sync_active_id,
)
return None
# Update the last_synced timestamp
self.sync_active_service.update_sync_active(
sync_active_id,
SyncsActiveUpdateInput(
last_synced=datetime.now().astimezone().isoformat(), force_sync=False
),
)
logger.info("Azure sync completed for sync_active_id: %s", sync_active_id)
return downloaded_files

View File

@ -0,0 +1,622 @@
import json
import os
import time
from abc import ABC, abstractmethod
from io import BytesIO
from typing import Any, Dict, List
import dropbox
import msal
import requests
from fastapi import HTTPException
from google.auth.transport.requests import Request as GoogleRequest
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from quivr_api.logger import get_logger
from quivr_api.modules.sync.entity.sync import SyncFile
from quivr_api.modules.sync.utils.normalize import remove_special_characters
from requests import HTTPError
logger = get_logger(__name__)
class BaseSync(ABC):
name: str
lower_name: str
datetime_format: str
@abstractmethod
def get_files_by_id(self, credentials: Dict, file_ids: List[str]) -> List[SyncFile]:
raise NotImplementedError
@abstractmethod
def get_files(
self, credentials: Dict, folder_id: str | None = None, recursive: bool = False
) -> List[SyncFile]:
raise NotImplementedError
@abstractmethod
def check_and_refresh_access_token(self, credentials: dict) -> Dict:
raise NotImplementedError
@abstractmethod
def download_file(self, credentials: Dict, file: SyncFile) -> BytesIO:
raise NotImplementedError
class GoogleDriveSync(BaseSync):
name = "Google Drive"
lower_name = "google"
creds: Credentials | None = None
service: Any | None = None
datetime_format: str = "%Y-%m-%dT%H:%M:%S.%fZ"
def check_and_refresh_access_token(self, credentials: dict) -> Dict:
self.creds = Credentials.from_authorized_user_info(credentials)
if self.creds.expired and self.creds.refresh_token:
self.creds.refresh(GoogleRequest())
logger.info("Google Drive credentials refreshed")
return json.loads(self.creds.to_json())
def download_file(self, credentials: Dict, file: SyncFile) -> BytesIO:
file_id = file.id
file_name = file.name
mime_type = file.mime_type
modified_time = file.last_modified
if not self.creds:
self.check_and_refresh_access_token(credentials)
if not self.service:
self.service = build("drive", "v3", credentials=self.creds)
# Convert Google Docs files to appropriate formats before downloading
if mime_type == "application/vnd.google-apps.document":
logger.debug(
"Converting Google Docs file with file_id: %s to DOCX.",
file_id,
)
request = self.service.files().export_media(
fileId=file_id,
mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
)
file_name += ".docx"
elif mime_type == "application/vnd.google-apps.spreadsheet":
logger.debug(
"Converting Google Sheets file with file_id: %s to XLSX.",
file_id,
)
request = self.service.files().export_media(
fileId=file_id,
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)
file_name += ".xlsx"
elif mime_type == "application/vnd.google-apps.presentation":
logger.debug(
"Converting Google Slides file with file_id: %s to PPTX.",
file_id,
)
request = self.service.files().export_media(
fileId=file_id,
mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation",
)
file_name += ".pptx"
### Elif pdf, txt, md, csv, docx, xlsx, pptx, doc
elif file_name.split(".")[-1] in [
"pdf",
"txt",
"md",
"csv",
"docx",
"xlsx",
"pptx",
"doc",
]:
request = self.service.files().get_media(fileId=file_id)
else:
logger.warning(
"Skipping unsupported file type: %s for file_id: %s",
mime_type,
file_id,
)
raise Exception("Unsupported file type")
file_data = request.execute()
return BytesIO(file_data)
def get_files_by_id(self, credentials: Dict, file_ids: List[str]) -> List[SyncFile]:
"""
Retrieve files from Google Drive by their IDs.
Args:
credentials (dict): The credentials for accessing Google Drive.
file_ids (list): The list of file IDs to retrieve.
Returns:
list: A list of dictionaries containing the metadata of each file or an error message.
"""
logger.info("Retrieving Google Drive files with file_ids: %s", file_ids)
self.check_and_refresh_access_token(credentials)
try:
service = build("drive", "v3", credentials=self.creds)
files: List[SyncFile] = []
for file_id in file_ids:
result = (
service.files()
.get(
fileId=file_id,
fields="id, name, mimeType, modifiedTime, webViewLink",
)
.execute()
)
files.append(
SyncFile(
name=result["name"],
id=result["id"],
is_folder=(
result["mimeType"] == "application/vnd.google-apps.folder"
),
last_modified=result["modifiedTime"],
mime_type=result["mimeType"],
web_view_link=result["webViewLink"],
)
)
logger.info("Google Drive files retrieved successfully: %s", len(files))
for file in files:
file.name = remove_special_characters(file.name)
return files
except HTTPError as error:
logger.error(
"An error occurred while retrieving Google Drive files: %s", error
)
raise Exception("Failed to retrieve files")
def get_files(
self, credentials: dict, folder_id: str | None = None, recursive: bool = False
) -> List[SyncFile]:
"""
Retrieve files from Google Drive.
Args:
credentials (dict): The credentials for accessing Google Drive.
folder_id (str, optional): The folder ID to filter files. Defaults to None.
recursive (bool, optional): If True, fetch files from all subfolders. Defaults to False.
Returns:
dict: A dictionary containing the list of files or an error message.
"""
logger.info("Retrieving Google Drive files with folder_id: %s", folder_id)
self.check_and_refresh_access_token(credentials)
# Updating the credentials in the database
try:
service = build("drive", "v3", credentials=self.creds)
if folder_id:
query = f"'{folder_id}' in parents"
else:
query = "'root' in parents or sharedWithMe"
page_token = None
files: List[SyncFile] = []
while True:
results = (
service.files()
.list(
q=query,
pageSize=100,
fields="nextPageToken, files(id, name, mimeType, modifiedTime, webViewLink)",
pageToken=page_token,
)
.execute()
)
items = results.get("files", [])
if not items:
logger.info("No files found in Google Drive")
break
for item in items:
files.append(
SyncFile(
name=item["name"],
id=item["id"],
is_folder=(
item["mimeType"] == "application/vnd.google-apps.folder"
),
last_modified=item["modifiedTime"],
mime_type=item["mimeType"],
web_view_link=item["webViewLink"],
)
)
# If recursive is True and the item is a folder, get files from the folder
if (
recursive
and item.mimeType == "application/vnd.google-apps.folder"
):
logger.warning(
"Calling Recursive for folder: %s",
item.name,
)
files.extend(self.get_files(credentials, item.id, recursive))
page_token = results.get("nextPageToken", None)
if page_token is None:
break
logger.info("Google Drive files retrieved successfully: %s", len(files))
for file in files:
file.name = remove_special_characters(file.name)
return files
except HTTPError as error:
logger.error(
"An error occurred while retrieving Google Drive files: %s", error
)
raise Exception("Failed to retrieve files")
class AzureDriveSync(BaseSync):
name = "Azure Drive"
lower_name = "azure"
datetime_format: str = "%Y-%m-%dT%H:%M:%SZ"
CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID")
AUTHORITY = "https://login.microsoftonline.com/common"
BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:5050")
REDIRECT_URI = f"{BACKEND_URL}/sync/azure/oauth2callback"
SCOPE = [
"https://graph.microsoft.com/Files.Read",
"https://graph.microsoft.com/User.Read",
"https://graph.microsoft.com/Sites.Read.All",
]
@staticmethod
def get_azure_token_data(credentials):
if "access_token" not in credentials:
raise HTTPException(status_code=401, detail="Invalid token data")
return credentials
@staticmethod
def get_azure_headers(token_data):
return {
"Authorization": f"Bearer {token_data['access_token']}",
"Accept": "application/json",
}
def check_and_refresh_access_token(self, credentials) -> Dict:
if "refresh_token" not in credentials:
raise HTTPException(status_code=401, detail="No refresh token available")
client = msal.PublicClientApplication(self.CLIENT_ID, authority=self.AUTHORITY)
result = client.acquire_token_by_refresh_token(
credentials["refresh_token"], scopes=self.SCOPE
)
if "access_token" not in result:
raise HTTPException(status_code=400, detail="Failed to refresh token")
credentials.update(
{
"access_token": result["access_token"],
"refresh_token": result.get(
"refresh_token", credentials["refresh_token"]
),
"id_token": result.get("id_token", credentials.get("id_token")),
}
)
return credentials
def get_files(self, credentials, folder_id=None, recursive=False) -> List[SyncFile]:
def fetch_files(endpoint, headers, max_retries=1):
logger.debug(f"fetching files from {endpoint}.")
retry_count = 0
while retry_count <= max_retries:
try:
response = requests.get(endpoint, headers=headers)
# Retrying with refereshed token
if response.status_code == 401:
token_data = self.check_and_refresh_access_token(credentials)
headers = self.get_azure_headers(token_data)
response = requests.get(endpoint, headers=headers)
else:
response.raise_for_status()
return response.json().get("value", [])
except HTTPError as e:
logger.exception(
f"azure_list_files got exception : {e}. headers: {headers}. {retry_count} retrying."
)
# Exponential backoff
time.sleep(2**retry_count)
retry_count += 1
raise HTTPException(
504, detail="can't connect to azure endpoint to retrieve files."
)
token_data = self.get_azure_token_data(credentials)
headers = self.get_azure_headers(token_data)
endpoint = "https://graph.microsoft.com/v1.0/me/drive/root/children"
if folder_id:
endpoint = (
f"https://graph.microsoft.com/v1.0/me/drive/items/{folder_id}/children"
)
items = fetch_files(endpoint, headers)
if not items:
logger.info("No files found in Azure Drive")
return []
files = []
for item in items:
file_data = SyncFile(
name=item.get("name"),
id=item.get("id"),
is_folder="folder" in item,
last_modified=item.get("lastModifiedDateTime"),
mime_type=item.get("file", {}).get("mimeType", "folder"),
web_view_link=item.get("webUrl"),
)
files.append(file_data)
# If recursive option is enabled and the item is a folder, fetch files from it
if recursive and file_data.is_folder:
folder_files = self.get_files(
credentials, folder_id=file_data.id, recursive=True
)
files.extend(folder_files)
for file in files:
file.name = remove_special_characters(file.name)
logger.info("Azure Drive files retrieved successfully: %s", len(files))
return files
def get_files_by_id(
self, credentials: dict, file_ids: List[str]
) -> List[SyncFile] | dict:
"""
Retrieve files from Azure Drive by their IDs.
Args:
credentials (dict): The credentials for accessing Azure Drive.
file_ids (list): The list of file IDs to retrieve.
Returns:
list: A list of dictionaries containing the metadata of each file or an error message.
"""
logger.info("Retrieving Azure Drive files with file_ids: %s", file_ids)
token_data = self.get_azure_token_data(credentials)
headers = self.get_azure_headers(token_data)
files = []
for file_id in file_ids:
endpoint = f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}"
response = requests.get(endpoint, headers=headers)
if response.status_code == 401:
token_data = self.check_and_refresh_access_token(credentials)
headers = self.get_azure_headers(token_data)
response = requests.get(endpoint, headers=headers)
if response.status_code != 200:
logger.error(
"An error occurred while retrieving Azure Drive files: %s",
response.text,
)
raise Exception("Failed to retrieve files")
result = response.json()
files.append(
SyncFile(
name=result.get("name"),
id=result.get("id"),
is_folder="folder" in result,
last_modified=result.get("lastModifiedDateTime"),
mime_type=result.get("file", {}).get("mimeType", "folder"),
web_view_link=result.get("webUrl"),
)
)
for file in files:
file.name = remove_special_characters(file.name)
logger.info("Azure Drive files retrieved successfully: %s", len(files))
return files
def download_file(self, credentials: Dict, file: SyncFile) -> BytesIO:
file_id = file.id
file_name = file.name
modified_time = file.last_modified
headers = self.get_azure_headers(credentials)
download_endpoint = (
f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}/content"
)
logger.info("Downloading file: %s", file_name)
download_response = requests.get(
download_endpoint, headers=headers, stream=True
)
return BytesIO(download_response.content)
class DropboxSync(BaseSync):
name = "Dropbox"
lower_name = "dropbox"
dbx: dropbox.Dropbox | None = None
datetime_format: str = "%Y-%m-%d %H:%M:%S"
def link_dropbox(self, credentials) -> dropbox.Dropbox:
return dropbox.Dropbox(
credentials["access_token"],
oauth2_refresh_token=credentials["refresh_token"],
app_key=os.getenv("DROPBOX_APP_KEY"),
oauth2_access_token_expiration=credentials.get("expires_at"),
app_secret=os.getenv("DROPBOX_APP_SECRET"),
)
def check_and_refresh_access_token(self, credentials: Dict) -> Dict:
if not self.dbx:
self.dbx = self.link_dropbox(credentials)
self.dbx.check_and_refresh_access_token()
credentials["access_token"] = self.dbx._oauth2_access_token
credentials["refresh_token"] = self.dbx.refresh_access_token
return credentials
def get_files(
self, credentials: Dict, folder_id: str = "", recursive: bool = False
) -> List[SyncFile]:
"""
Retrieve files from Dropbox.
Args:
credentials (dict): The credentials for accessing Dropbox.
folder_id (str, optional): The folder ID to filter files. Defaults to "".
recursive (bool, optional): If True, fetch files from all subfolders. Defaults to False.
Returns:
dict: A dictionary containing the list of files or an error message.
"""
logger.info("Retrieving Dropbox files with folder_id: %s", folder_id)
# Verify credential has the access token
if "access_token" not in credentials:
logger.error("Invalid access token")
raise Exception("Invalid access token")
try:
if not self.dbx:
self.dbx = dropbox.Dropbox(
credentials["access_token"],
oauth2_refresh_token=credentials["refresh_token"],
app_key=os.getenv("DROPBOX_APP_KEY"),
oauth2_access_token_expiration=credentials.get("expires_at"),
app_secret=os.getenv("DROPBOX_APP_SECRET"),
)
self.dbx.check_and_refresh_access_token()
credentials["access_token"] = self.dbx._oauth2_access_token
def fetch_files(metadata):
files = []
for file in metadata.entries:
shared_link = f"https://www.dropbox.com/preview{file.path_display}?context=content_suggestions&role=personal"
is_folder = isinstance(file, dropbox.files.FolderMetadata)
files.append(
SyncFile(
name=file.name,
id=file.id,
is_folder=is_folder,
last_modified=(
str(file.client_modified) if not is_folder else ""
),
mime_type=(
file.path_lower.split(".")[-1] if not is_folder else ""
),
web_view_link=shared_link,
)
)
return files
files = []
list_metadata = self.dbx.files_list_folder(folder_id, recursive=recursive)
files.extend(fetch_files(list_metadata))
while list_metadata.has_more:
list_metadata = self.dbx.files_list_folder_continue(
list_metadata.cursor
)
files.extend(fetch_files(list_metadata))
for file in files:
file.name = remove_special_characters(file.name)
logger.info("Dropbox files retrieved successfully: %d", len(files))
return files
except dropbox.exceptions.ApiError as e:
logger.error("Dropbox API error: %s", e)
raise Exception("Failed to retrieve files")
except Exception as e:
logger.error("Unexpected error: %s", e)
raise Exception("Failed to retrieve files")
def get_files_by_id(
self, credentials: Dict[str, str], file_ids: List[str]
) -> List[SyncFile]:
"""
Retrieve files from Dropbox by their IDs.
Args:
credentials (dict): The credentials for accessing Dropbox.
file_ids (list): The list of file IDs to retrieve.
Returns:
list: A list of dictionaries containing the metadata of each file or an error message.
"""
logger.info("Retrieving Dropbox files with file_ids: %s", file_ids)
if "access_token" not in credentials:
logger.error("Access token is not in the credentials")
raise Exception("Invalid access token")
try:
if not self.dbx:
self.dbx = self.link_dropbox(credentials)
self.dbx.check_and_refresh_access_token()
credentials["access_token"] = self.dbx._oauth2_access_token # type: ignore
files = []
for file_id in file_ids:
try:
metadata = self.dbx.files_get_metadata(file_id)
shared_link = f"https://www.dropbox.com/preview/{metadata.path_display}?context=content_suggestions&role=personal"
is_folder = isinstance(metadata, dropbox.files.FolderMetadata)
file_info = SyncFile(
name=metadata.name,
id=metadata.id,
is_folder=is_folder,
last_modified=(
str(metadata.client_modified) if not is_folder else ""
),
mime_type=(
metadata.path_lower.split(".")[-1] if not is_folder else ""
),
web_view_link=shared_link,
)
files.append(file_info)
except dropbox.exceptions.ApiError as api_err:
logger.error(
"Dropbox API error for file_id %s: %s", file_id, api_err
)
continue # Skip this file and proceed with the next one
except Exception as err:
logger.error("Unexpected error for file_id %s: %s", file_id, err)
continue # Skip this file and proceed with the next one
for file in files:
file.name = remove_special_characters(file.name)
logger.info("Dropbox files retrieved successfully: %d", len(files))
return files
except dropbox.exceptions.AuthError as auth_err:
logger.error("Authentication error: %s", auth_err)
raise Exception("Failed to retrieve files")
except Exception as e:
logger.error("Unexpected error: %s", e)
raise Exception("Failed to retrieve files")
def download_file(self, credentials: Dict, file: SyncFile) -> BytesIO:
file_id = str(file.id)
if not self.dbx:
self.dbx = self.link_dropbox(credentials)
metadata, file_data = self.dbx.files_download(file_id) # type: ignore
return BytesIO(file_data.content)

View File

@ -1,15 +1,16 @@
import os
import uuid
from datetime import datetime, timedelta, timezone
from io import BytesIO
from typing import List
import dropbox
from fastapi import UploadFile
from pydantic import BaseModel, ConfigDict
from quivr_api.logger import get_logger
from quivr_api.modules.brain.repository.brains_vectors import BrainsVectors
from quivr_api.modules.knowledge.repository.storage import Storage
from quivr_api.modules.notification.dto.inputs import CreateNotification
from quivr_api.modules.notification.dto.inputs import (
CreateNotification,
NotificationUpdatableProperties,
)
from quivr_api.modules.notification.entity.notification import NotificationsStatusEnum
from quivr_api.modules.notification.service.notification_service import (
NotificationService,
@ -19,52 +20,46 @@ from quivr_api.modules.sync.dto.inputs import (
SyncFileUpdateInput,
SyncsActiveUpdateInput,
)
from quivr_api.modules.sync.entity.sync import SyncFile
from quivr_api.modules.sync.repository.sync_files import SyncFiles
from quivr_api.modules.sync.service.sync_service import SyncService, SyncUserService
from quivr_api.modules.sync.utils.list_files import (
get_dropbox_files_by_id,
list_dropbox_files,
)
from quivr_api.modules.sync.utils.sync import BaseSync
from quivr_api.modules.sync.utils.upload import upload_file
from quivr_api.modules.upload.service.upload_file import check_file_exists
notification_service = NotificationService()
logger = get_logger(__name__)
APP_KEY = os.getenv("DROPBOX_APP_KEY")
APP_SECRET = os.getenv("DROPBOW_CONSUMER_SECRET")
notification_service = NotificationService()
class DropboxSyncUtils(BaseModel):
class SyncUtils(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)
storage: Storage
sync_files_repo: SyncFiles
sync_active_service: SyncService
sync_user_service: SyncUserService
sync_active_service: SyncService
sync_files_repo: SyncFiles
storage: Storage
sync_cloud: BaseSync
async def _upload_files(
self,
token_data: dict,
files: list,
credentials: dict,
files: List[SyncFile],
current_user: str,
brain_id: str,
sync_active_id: int,
):
"""
Download files from DropBox.
Download files from an external cloud.
Args:
credentials (dict): The credentials for accessin DropBox Drive.
credentials (dict): The token data for accessing the external cloud.
files (list): The list of file metadata to download.
Returns:
dict: A dictionary containing the status of the download or an error message.
"""
dbx = dropbox.Dropbox(token_data["access_token"])
dbx.check_and_refresh_access_token()
token_data["access_token"] = dbx._oauth2_access_token
credentials = self.sync_cloud.check_and_refresh_access_token(credentials)
downloaded_files = []
bulk_id = uuid.uuid4()
@ -80,22 +75,20 @@ class DropboxSyncUtils(BaseModel):
brain_id=str(brain_id),
)
)
file.notification_id = str(upload_notification.id)
for file in files:
logger.info("Processing file: %s", file.name)
try:
file_id = str(file.id)
file_id = file.id
file_name = file.name
mime_type = file.mime_type
modified_time = file.last_modified
metadata, file_data = dbx.files_download(file_id) # type: ignore
# logger.debug("🔥 Filedata :", file_data.content)
file_data = BytesIO(file_data.content)
file_data = self.sync_cloud.download_file(credentials, file)
# Check if the file already exists in the storage
if check_file_exists(brain_id, file_name):
logger.debug("🔥 File already exists in the storage: %s", file_name)
logger.debug("%s already exists in the storage", file_name)
self.storage.remove_file(brain_id + "/" + file_name)
BrainsVectors().delete_file_from_brain(brain_id, file_name)
@ -133,7 +126,7 @@ class DropboxSyncUtils(BaseModel):
brain_id,
current_user,
bulk_id,
"DropBox",
self.sync_cloud.name,
file.web_view_link,
notification_id=file.notification_id,
)
@ -159,11 +152,19 @@ class DropboxSyncUtils(BaseModel):
)
)
downloaded_files.append(file_name)
downloaded_files.append(file_name)
notification_service.update_notification_by_id(
file.notification_id,
NotificationUpdatableProperties(
status=NotificationsStatusEnum.SUCCESS,
description="File downloaded successfully",
),
)
except Exception as error:
logger.error(
"An error occurred while downloading DropBox files: %s", error
"An error occurred while downloading %s files: %s",
self.sync_cloud.name,
error,
)
# Check if the file already exists in the database
existing_files = self.sync_files_repo.get_sync_files(sync_active_id)
@ -189,16 +190,25 @@ class DropboxSyncUtils(BaseModel):
supported=False,
)
)
notification_service.update_notification_by_id(
file.notification_id,
NotificationUpdatableProperties(
status=NotificationsStatusEnum.ERROR,
description="Error downloading file",
),
)
return {"downloaded_files": downloaded_files}
async def sync(self, sync_active_id: int, user_id: str):
"""
Check if the Dropbox sync has not been synced and download the folders and files based on the settings.
Check if the Specific sync has not been synced and download the folders and files based on the settings.
Args:
sync_active_id (int): The ID of the active sync.
user_id (str): The user ID associated with the active sync.
"""
# Retrieve the active sync details
sync_active = self.sync_active_service.get_details_sync_active(sync_active_id)
if not sync_active:
@ -211,7 +221,6 @@ class DropboxSyncUtils(BaseModel):
last_synced = sync_active.get("last_synced")
force_sync = sync_active.get("force_sync", False)
sync_interval_minutes = sync_active.get("sync_interval_minutes", 0)
if last_synced and not force_sync:
last_synced_time = datetime.fromisoformat(last_synced).astimezone(
timezone.utc
@ -228,7 +237,9 @@ class DropboxSyncUtils(BaseModel):
time_difference = current_time_utc - last_synced_time
if time_difference < timedelta(minutes=sync_interval_minutes):
logger.info(
"DropBox sync is not due for sync_active_id: %s", sync_active_id
"%s sync is not due for sync_active_id: %s",
self.sync_cloud.name,
sync_active_id,
)
return None
@ -245,15 +256,18 @@ class DropboxSyncUtils(BaseModel):
return None
sync_user = sync_user[0]
if sync_user["provider"].lower() != "dropbox":
if sync_user["provider"].lower() != self.sync_cloud.lower_name:
logger.warning(
"Sync provider is not DropBox for sync_active_id: %s", sync_active_id
"Sync provider is not %s for sync_active_id: %s",
self.sync_cloud.name,
sync_active_id,
)
return None
# Download the folders and files from DropBox
# Download the folders and files from Cloud
logger.info(
"Downloading folders and files from Dropbox for sync_active_id: %s",
"Downloading folders and files from %s for sync_active_id: %s",
self.sync_cloud.name,
sync_active_id,
)
@ -261,20 +275,19 @@ class DropboxSyncUtils(BaseModel):
settings = sync_active.get("settings", {})
folders = settings.get("folders", [])
files_to_download = settings.get("files", [])
files = []
files: List[SyncFile] = []
files_metadata = []
if len(folders) > 0:
files = []
for folder in folders:
files.extend(
list_dropbox_files(
self.sync_cloud.get_files(
sync_user["credentials"],
folder_id=folder,
recursive=True,
)
)
if len(files_to_download) > 0:
files_metadata = get_dropbox_files_by_id(
files_metadata = self.sync_cloud.get_files_by_id(
sync_user["credentials"],
files_to_download,
)
@ -282,7 +295,7 @@ class DropboxSyncUtils(BaseModel):
if "error" in files:
logger.error(
"Failed to download files from DropBox for sync_active_id: %s",
"Failed to download files from Azure for sync_active_id: %s",
sync_active_id,
)
return None
@ -293,8 +306,7 @@ class DropboxSyncUtils(BaseModel):
if last_synced
else None
)
logger.info("Files retrieved from DropBox: %s", len(files))
logger.info("Files retrieved from DropBox: %s", files)
logger.info("Files retrieved from %s: %s", self.sync_cloud.lower_name, files)
files_to_download = [
file
@ -304,7 +316,8 @@ class DropboxSyncUtils(BaseModel):
(
not last_synced_time
or datetime.strptime(
file.last_modified, "%Y-%m-%d %H:%M:%S"
file.last_modified,
(self.sync_cloud.datetime_format),
).replace(tzinfo=timezone.utc)
> last_synced_time
)
@ -321,7 +334,7 @@ class DropboxSyncUtils(BaseModel):
)
if "error" in downloaded_files:
logger.error(
"Failed to download files from DropBox for sync_active_id: %s",
"Failed to download files from Azure for sync_active_id: %s",
sync_active_id,
)
return None
@ -333,5 +346,9 @@ class DropboxSyncUtils(BaseModel):
last_synced=datetime.now().astimezone().isoformat(), force_sync=False
),
)
logger.info("DropBox sync completed for sync_active_id: %s", sync_active_id)
logger.info(
"%s sync completed for sync_active_id: %s",
self.sync_cloud.lower_name,
sync_active_id,
)
return downloaded_files

View File

@ -85,12 +85,9 @@ class QuivrKnowledge(BaseModel):
file_name: str | None = None
url: str | None = None
extension: str = "txt"
integration: str | None = None
integration_link: str | None = None
status: str = "PROCESSING"
# NOTE: for compatibility issues with langchain <-> PydanticV1
class SearchResult(BaseModelV1):
chunk: Document

3
backend/poetry.lock generated
View File

@ -9535,4 +9535,5 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "963b9ff228f2478505802a0e2915d2802e9d92396e33cde549f33bdb07f2a30d"
content-hash = "4f318e2f8561fac7ad1f4aa603e5264accf857d6e7e1bfd9fcf770279f6a2e9a"

View File

@ -2721,7 +2721,12 @@
resolved "https://registry.npmjs.org/@types/throttle-debounce/-/throttle-debounce-2.1.0.tgz"
integrity sha512-5eQEtSCoESnh2FsiLTxE121IiE60hnMqcb435fShf4bpLRjEu1Eoekht23y6zXS9Ts3l+Szu3TARnTsA0GkOkQ==
"@types/unist@*", "@types/unist@^2", "@types/unist@^2.0.0":
"@types/unist@*":
version "2.0.7"
resolved "https://registry.npmjs.org/@types/unist/-/unist-2.0.7.tgz"
integrity sha512-cputDpIbFgLUaGQn6Vqg3/YsJwxUwHLO13v3i5ouxT4lat0khip9AEWxtERujXV9wxIB1EyF97BSJFt6vpdI8g==
"@types/unist@^2", "@types/unist@^2.0.0":
version "2.0.7"
resolved "https://registry.npmjs.org/@types/unist/-/unist-2.0.7.tgz"
integrity sha512-cputDpIbFgLUaGQn6Vqg3/YsJwxUwHLO13v3i5ouxT4lat0khip9AEWxtERujXV9wxIB1EyF97BSJFt6vpdI8g==
@ -3367,7 +3372,7 @@ chalk@^2.4.2:
escape-string-regexp "^1.0.5"
supports-color "^5.3.0"
chalk@^3.0.0, chalk@3.0.0:
chalk@^3.0.0:
version "3.0.0"
resolved "https://registry.npmjs.org/chalk/-/chalk-3.0.0.tgz"
integrity sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==
@ -3391,6 +3396,14 @@ chalk@^4.1.0:
ansi-styles "^4.1.0"
supports-color "^7.1.0"
chalk@3.0.0:
version "3.0.0"
resolved "https://registry.npmjs.org/chalk/-/chalk-3.0.0.tgz"
integrity sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==
dependencies:
ansi-styles "^4.1.0"
supports-color "^7.1.0"
change-case@^5.4.2:
version "5.4.2"
resolved "https://registry.npmjs.org/change-case/-/change-case-5.4.2.tgz"
@ -3745,7 +3758,14 @@ date-fns@2.30.0:
dependencies:
"@babel/runtime" "^7.21.0"
debug@^2.2.0, debug@^2.6.9:
debug@^2.2.0:
version "2.6.9"
resolved "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz"
integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==
dependencies:
ms "2.0.0"
debug@^2.6.9:
version "2.6.9"
resolved "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz"
integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==
@ -3766,35 +3786,7 @@ debug@^4.0.0:
dependencies:
ms "2.1.2"
debug@^4.1.0:
version "4.3.4"
resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
dependencies:
ms "2.1.2"
debug@^4.1.1:
version "4.3.4"
resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
dependencies:
ms "2.1.2"
debug@^4.3.1:
version "4.3.4"
resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
dependencies:
ms "2.1.2"
debug@^4.3.2:
version "4.3.4"
resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
dependencies:
ms "2.1.2"
debug@^4.3.4, debug@4:
debug@^4.1.0, debug@^4.1.1, debug@^4.3.1, debug@^4.3.2, debug@^4.3.4, debug@4:
version "4.3.4"
resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz"
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
@ -4821,7 +4813,7 @@ github-from-package@0.0.0:
resolved "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz"
integrity sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==
glob-parent@^5.1.2:
glob-parent@^5.1.2, glob-parent@~5.1.2:
version "5.1.2"
resolved "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz"
integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==
@ -4835,25 +4827,7 @@ glob-parent@^6.0.2:
dependencies:
is-glob "^4.0.3"
glob-parent@~5.1.2:
version "5.1.2"
resolved "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz"
integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==
dependencies:
is-glob "^4.0.1"
glob@^10.2.2:
version "10.3.10"
resolved "https://registry.npmjs.org/glob/-/glob-10.3.10.tgz"
integrity sha512-fa46+tv1Ak0UPK1TOy/pZrIybNNt4HCv7SDzwyfiOZkvZLEbjsZkJBPtDHVshZjbecAoAGSC20MjLDG/qr679g==
dependencies:
foreground-child "^3.1.0"
jackspeak "^2.3.5"
minimatch "^9.0.1"
minipass "^5.0.0 || ^6.0.2 || ^7.0.0"
path-scurry "^1.10.1"
glob@^10.3.10:
glob@^10.2.2, glob@^10.3.10, glob@10.3.10:
version "10.3.10"
resolved "https://registry.npmjs.org/glob/-/glob-10.3.10.tgz"
integrity sha512-fa46+tv1Ak0UPK1TOy/pZrIybNNt4HCv7SDzwyfiOZkvZLEbjsZkJBPtDHVshZjbecAoAGSC20MjLDG/qr679g==
@ -4887,17 +4861,6 @@ glob@^8.0.3:
minimatch "^5.0.1"
once "^1.3.0"
glob@10.3.10:
version "10.3.10"
resolved "https://registry.npmjs.org/glob/-/glob-10.3.10.tgz"
integrity sha512-fa46+tv1Ak0UPK1TOy/pZrIybNNt4HCv7SDzwyfiOZkvZLEbjsZkJBPtDHVshZjbecAoAGSC20MjLDG/qr679g==
dependencies:
foreground-child "^3.1.0"
jackspeak "^2.3.5"
minimatch "^9.0.1"
minipass "^5.0.0 || ^6.0.2 || ^7.0.0"
path-scurry "^1.10.1"
glob@7.1.6:
version "7.1.6"
resolved "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz"
@ -5921,7 +5884,7 @@ lowlight@^2.0.0:
fault "^2.0.0"
highlight.js "~11.8.0"
lru-cache@^10.0.1:
lru-cache@^10.0.1, "lru-cache@^9.1.1 || ^10.0.0":
version "10.0.3"
resolved "https://registry.npmjs.org/lru-cache/-/lru-cache-10.0.3.tgz"
integrity sha512-B7gr+F6MkqB3uzINHXNctGieGsRTMwIBgxkp0yq/5BwcuDzD4A8wQpHQW6vDAm1uKSLQghmRdD9sKqf2vJ1cEg==
@ -5940,11 +5903,6 @@ lru-cache@^6.0.0:
dependencies:
yallist "^4.0.0"
"lru-cache@^9.1.1 || ^10.0.0":
version "10.0.3"
resolved "https://registry.npmjs.org/lru-cache/-/lru-cache-10.0.3.tgz"
integrity sha512-B7gr+F6MkqB3uzINHXNctGieGsRTMwIBgxkp0yq/5BwcuDzD4A8wQpHQW6vDAm1uKSLQghmRdD9sKqf2vJ1cEg==
lz-string@^1.5.0:
version "1.5.0"
resolved "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz"