2024-05-21 23:20:35 +03:00
|
|
|
import os
|
2024-06-08 12:55:11 +03:00
|
|
|
from typing import List
|
2024-05-21 23:20:35 +03:00
|
|
|
|
|
|
|
import msal
|
|
|
|
import requests
|
|
|
|
from fastapi import HTTPException
|
|
|
|
from google.auth.transport.requests import Request as GoogleRequest
|
|
|
|
from google.oauth2.credentials import Credentials
|
|
|
|
from googleapiclient.discovery import build
|
|
|
|
from logger import get_logger
|
|
|
|
from requests import HTTPError
|
|
|
|
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
|
|
|
2024-06-08 12:55:11 +03:00
|
|
|
def get_google_drive_files_by_id(credentials: dict, file_ids: List[str]):
|
|
|
|
"""
|
|
|
|
Retrieve files from Google Drive by their IDs.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
credentials (dict): The credentials for accessing Google Drive.
|
|
|
|
file_ids (list): The list of file IDs to retrieve.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
list: A list of dictionaries containing the metadata of each file or an error message.
|
|
|
|
"""
|
|
|
|
logger.info("Retrieving Google Drive files with file_ids: %s", file_ids)
|
|
|
|
creds = Credentials.from_authorized_user_info(credentials)
|
|
|
|
if creds.expired and creds.refresh_token:
|
|
|
|
creds.refresh(GoogleRequest())
|
|
|
|
logger.info("Google Drive credentials refreshed")
|
|
|
|
|
|
|
|
try:
|
|
|
|
service = build("drive", "v3", credentials=creds)
|
|
|
|
files = []
|
|
|
|
|
|
|
|
for file_id in file_ids:
|
|
|
|
result = (
|
|
|
|
service.files()
|
|
|
|
.get(fileId=file_id, fields="id, name, mimeType, modifiedTime")
|
|
|
|
.execute()
|
|
|
|
)
|
|
|
|
|
|
|
|
files.append(
|
|
|
|
{
|
|
|
|
"name": result["name"],
|
|
|
|
"id": result["id"],
|
|
|
|
"is_folder": result["mimeType"]
|
|
|
|
== "application/vnd.google-apps.folder",
|
|
|
|
"last_modified": result["modifiedTime"],
|
|
|
|
"mime_type": result["mimeType"],
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
logger.info("Google Drive files retrieved successfully: %s", len(files))
|
|
|
|
return files
|
|
|
|
except HTTPError as error:
|
|
|
|
logger.error("An error occurred while retrieving Google Drive files: %s", error)
|
|
|
|
return {"error": f"An error occurred: {error}"}
|
|
|
|
|
|
|
|
|
|
|
|
def get_google_drive_files(
|
|
|
|
credentials: dict, folder_id: str = None, recursive: bool = False
|
|
|
|
):
|
2024-05-21 23:20:35 +03:00
|
|
|
"""
|
|
|
|
Retrieve files from Google Drive.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
credentials (dict): The credentials for accessing Google Drive.
|
|
|
|
folder_id (str, optional): The folder ID to filter files. Defaults to None.
|
2024-06-08 12:55:11 +03:00
|
|
|
recursive (bool, optional): If True, fetch files from all subfolders. Defaults to False.
|
2024-05-21 23:20:35 +03:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
dict: A dictionary containing the list of files or an error message.
|
|
|
|
"""
|
|
|
|
logger.info("Retrieving Google Drive files with folder_id: %s", folder_id)
|
|
|
|
creds = Credentials.from_authorized_user_info(credentials)
|
|
|
|
if creds.expired and creds.refresh_token:
|
|
|
|
creds.refresh(GoogleRequest())
|
|
|
|
logger.info("Google Drive credentials refreshed")
|
|
|
|
# Updating the credentials in the database
|
|
|
|
|
|
|
|
try:
|
|
|
|
service = build("drive", "v3", credentials=creds)
|
2024-06-08 12:55:11 +03:00
|
|
|
if folder_id:
|
|
|
|
query = f"'{folder_id}' in parents"
|
|
|
|
else:
|
|
|
|
query = "'root' in parents or sharedWithMe"
|
|
|
|
page_token = None
|
|
|
|
files = []
|
|
|
|
|
|
|
|
while True:
|
|
|
|
results = (
|
|
|
|
service.files()
|
|
|
|
.list(
|
|
|
|
q=query,
|
|
|
|
pageSize=100,
|
|
|
|
fields="nextPageToken, files(id, name, mimeType, modifiedTime)",
|
|
|
|
pageToken=page_token,
|
|
|
|
)
|
|
|
|
.execute()
|
2024-05-21 23:20:35 +03:00
|
|
|
)
|
2024-06-08 12:55:11 +03:00
|
|
|
items = results.get("files", [])
|
2024-05-21 23:20:35 +03:00
|
|
|
|
2024-06-08 12:55:11 +03:00
|
|
|
if not items:
|
|
|
|
logger.info("No files found in Google Drive")
|
|
|
|
break
|
2024-05-21 23:20:35 +03:00
|
|
|
|
2024-06-08 12:55:11 +03:00
|
|
|
for item in items:
|
|
|
|
files.append(
|
|
|
|
{
|
|
|
|
"name": item["name"],
|
|
|
|
"id": item["id"],
|
|
|
|
"is_folder": item["mimeType"]
|
|
|
|
== "application/vnd.google-apps.folder",
|
|
|
|
"last_modified": item["modifiedTime"],
|
|
|
|
"mime_type": item["mimeType"],
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
# If recursive is True and the item is a folder, get files from the folder
|
|
|
|
if item["name"] == "Monotype":
|
|
|
|
logger.warning(item)
|
|
|
|
if (
|
|
|
|
recursive
|
|
|
|
and item["mimeType"] == "application/vnd.google-apps.folder"
|
|
|
|
):
|
|
|
|
logger.warning(
|
|
|
|
"Calling Recursive for folder: %s",
|
|
|
|
item["name"],
|
|
|
|
)
|
|
|
|
files.extend(
|
|
|
|
get_google_drive_files(credentials, item["id"], recursive)
|
|
|
|
)
|
|
|
|
|
|
|
|
page_token = results.get("nextPageToken", None)
|
|
|
|
if page_token is None:
|
|
|
|
break
|
|
|
|
|
|
|
|
logger.info("Google Drive files retrieved successfully: %s", len(files))
|
|
|
|
return files
|
2024-05-21 23:20:35 +03:00
|
|
|
except HTTPError as error:
|
|
|
|
logger.error("An error occurred while retrieving Google Drive files: %s", error)
|
|
|
|
return {"error": f"An error occurred: {error}"}
|
|
|
|
|
|
|
|
|
|
|
|
CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID")
|
|
|
|
AUTHORITY = "https://login.microsoftonline.com/common"
|
|
|
|
BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:5050")
|
|
|
|
REDIRECT_URI = f"{BACKEND_URL}/sync/azure/oauth2callback"
|
|
|
|
SCOPE = [
|
|
|
|
"https://graph.microsoft.com/Files.Read",
|
|
|
|
"https://graph.microsoft.com/User.Read",
|
|
|
|
"https://graph.microsoft.com/Sites.Read.All",
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def get_azure_token_data(credentials):
|
|
|
|
if "access_token" not in credentials:
|
|
|
|
raise HTTPException(status_code=401, detail="Invalid token data")
|
|
|
|
return credentials
|
|
|
|
|
|
|
|
|
|
|
|
def refresh_azure_token(credentials):
|
|
|
|
if "refresh_token" not in credentials:
|
|
|
|
raise HTTPException(status_code=401, detail="No refresh token available")
|
|
|
|
|
|
|
|
client = msal.PublicClientApplication(CLIENT_ID, authority=AUTHORITY)
|
|
|
|
result = client.acquire_token_by_refresh_token(
|
|
|
|
credentials["refresh_token"], scopes=SCOPE
|
|
|
|
)
|
|
|
|
if "access_token" not in result:
|
|
|
|
raise HTTPException(status_code=400, detail="Failed to refresh token")
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def get_azure_headers(token_data):
|
|
|
|
return {
|
|
|
|
"Authorization": f"Bearer {token_data['access_token']}",
|
|
|
|
"Accept": "application/json",
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2024-06-08 12:55:11 +03:00
|
|
|
def list_azure_files(credentials, folder_id=None, recursive=False):
|
|
|
|
def fetch_files(endpoint, headers):
|
|
|
|
response = requests.get(endpoint, headers=headers)
|
|
|
|
if response.status_code == 401:
|
|
|
|
token_data = refresh_azure_token(credentials)
|
|
|
|
headers = get_azure_headers(token_data)
|
|
|
|
response = requests.get(endpoint, headers=headers)
|
|
|
|
if response.status_code != 200:
|
|
|
|
return {"error": response.text}
|
|
|
|
return response.json().get("value", [])
|
|
|
|
|
2024-05-21 23:20:35 +03:00
|
|
|
token_data = get_azure_token_data(credentials)
|
|
|
|
headers = get_azure_headers(token_data)
|
|
|
|
endpoint = f"https://graph.microsoft.com/v1.0/me/drive/root/children"
|
|
|
|
if folder_id:
|
|
|
|
endpoint = (
|
|
|
|
f"https://graph.microsoft.com/v1.0/me/drive/items/{folder_id}/children"
|
|
|
|
)
|
2024-06-08 12:55:11 +03:00
|
|
|
|
|
|
|
items = fetch_files(endpoint, headers)
|
2024-05-21 23:20:35 +03:00
|
|
|
|
|
|
|
if not items:
|
|
|
|
logger.info("No files found in Azure Drive")
|
2024-06-08 12:55:11 +03:00
|
|
|
return []
|
2024-05-21 23:20:35 +03:00
|
|
|
|
2024-06-08 12:55:11 +03:00
|
|
|
files = []
|
|
|
|
for item in items:
|
|
|
|
file_data = {
|
2024-05-21 23:20:35 +03:00
|
|
|
"name": item["name"],
|
|
|
|
"id": item["id"],
|
|
|
|
"is_folder": "folder" in item,
|
|
|
|
"last_modified": item["lastModifiedDateTime"],
|
|
|
|
"mime_type": item.get("file", {}).get("mimeType", "folder"),
|
|
|
|
}
|
2024-06-08 12:55:11 +03:00
|
|
|
files.append(file_data)
|
|
|
|
|
|
|
|
# If recursive option is enabled and the item is a folder, fetch files from it
|
|
|
|
if recursive and file_data["is_folder"]:
|
|
|
|
folder_files = list_azure_files(
|
|
|
|
credentials, folder_id=file_data["id"], recursive=True
|
|
|
|
)
|
|
|
|
|
|
|
|
files.extend(folder_files)
|
|
|
|
|
|
|
|
logger.info("Azure Drive files retrieved successfully: %s", len(files))
|
|
|
|
return files
|
|
|
|
|
|
|
|
|
|
|
|
def get_azure_files_by_id(credentials: dict, file_ids: List[str]):
|
|
|
|
"""
|
|
|
|
Retrieve files from Azure Drive by their IDs.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
credentials (dict): The credentials for accessing Azure Drive.
|
|
|
|
file_ids (list): The list of file IDs to retrieve.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
list: A list of dictionaries containing the metadata of each file or an error message.
|
|
|
|
"""
|
|
|
|
logger.info("Retrieving Azure Drive files with file_ids: %s", file_ids)
|
|
|
|
token_data = get_azure_token_data(credentials)
|
|
|
|
headers = get_azure_headers(token_data)
|
|
|
|
files = []
|
|
|
|
|
|
|
|
for file_id in file_ids:
|
|
|
|
endpoint = f"https://graph.microsoft.com/v1.0/me/drive/items/{file_id}"
|
|
|
|
response = requests.get(endpoint, headers=headers)
|
|
|
|
if response.status_code == 401:
|
|
|
|
token_data = refresh_azure_token(credentials)
|
|
|
|
headers = get_azure_headers(token_data)
|
|
|
|
response = requests.get(endpoint, headers=headers)
|
|
|
|
if response.status_code != 200:
|
|
|
|
logger.error(
|
|
|
|
"An error occurred while retrieving Azure Drive files: %s",
|
|
|
|
response.text,
|
|
|
|
)
|
|
|
|
return {"error": response.text}
|
|
|
|
|
|
|
|
result = response.json()
|
|
|
|
files.append(
|
|
|
|
{
|
|
|
|
"name": result["name"],
|
|
|
|
"id": result["id"],
|
|
|
|
"is_folder": "folder" in result,
|
|
|
|
"last_modified": result["lastModifiedDateTime"],
|
|
|
|
"mime_type": result.get("file", {}).get("mimeType", "folder"),
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
logger.info("Azure Drive files retrieved successfully: %s", len(files))
|
|
|
|
return files
|