Merge pull request #581 from samschott/symlink-handling

Improve symlink handling
This commit is contained in:
samschott 2022-01-18 11:38:34 +01:00 committed by GitHub
commit 842d1a1dc4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 470 additions and 200 deletions

View File

@ -42,7 +42,7 @@ jobs:
- name: Test with pytest
run: |
pytest --cov=maestral --cov-report=xml tests/offline
python -m pytest --cov=maestral --cov-report=xml tests/offline
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v2.1.0

View File

@ -8,6 +8,10 @@
notification. Previously, only clicking on the "Show" button of the notification
would open the file browser.
* Removed update notifications by the CLI.
* Proper symlink handling: Remote items which are symlinks will now be synced as symlinks
instead of empty files. Local symlinks will no longer be followed during indexing or
silently ignored when syncing. Instead, attempting to upload a symlink will raise an
error because uploading symlinks is not currently supported by the public Dropbox API.
#### Fixed:

View File

@ -60,6 +60,7 @@ from .errors import (
NotAFolderError,
IsAFolderError,
FileSizeError,
SymlinkError,
OutOfMemoryError,
BadInputError,
DropboxAuthError,
@ -76,7 +77,7 @@ from .errors import (
from .config import MaestralState
from .constants import DROPBOX_APP_KEY
from .utils import natural_size, chunks, clamp
from .utils.path import fs_max_lengths_for_path
from .utils.path import fs_max_lengths_for_path, opener_no_symlink
if TYPE_CHECKING:
from .database import SyncEvent
@ -618,14 +619,24 @@ class DropboxClient:
md, http_resp = self.dbx.files_download(dbx_path, **kwargs)
chunksize = 2 ** 13
if md.symlink_info is not None:
# Don't download but reproduce symlink locally.
http_resp.close()
try:
os.unlink(local_path)
except FileNotFoundError:
pass
os.symlink(md.symlink_info.target, local_path)
with open(local_path, "wb") as f:
with contextlib.closing(http_resp):
for c in http_resp.iter_content(chunksize):
f.write(c)
if sync_event:
sync_event.completed = f.tell()
else:
chunksize = 2 ** 13
with open(local_path, "wb", opener=opener_no_symlink) as f:
with contextlib.closing(http_resp):
for c in http_resp.iter_content(chunksize):
f.write(c)
if sync_event:
sync_event.completed = f.tell()
# Dropbox SDK provides naive datetime in UTC.
client_mod = md.client_modified.replace(tzinfo=timezone.utc)
@ -634,7 +645,7 @@ class DropboxClient:
# Enforce client_modified < server_modified.
timestamp = min(client_mod.timestamp(), server_mod.timestamp(), time.time())
# Set mtime of downloaded file.
os.utime(local_path, (time.time(), timestamp))
os.utime(local_path, (time.time(), timestamp), follow_symlinks=False)
return md
@ -663,14 +674,13 @@ class DropboxClient:
with convert_api_errors(dbx_path=dbx_path, local_path=local_path):
size = osp.getsize(local_path)
stat = os.lstat(local_path)
# Dropbox SDK takes naive datetime in UTC/
mtime = osp.getmtime(local_path)
mtime_dt = datetime.utcfromtimestamp(mtime)
mtime_dt = datetime.utcfromtimestamp(stat.st_mtime)
if size <= chunk_size:
with open(local_path, "rb") as f:
if stat.st_size <= chunk_size:
with open(local_path, "rb", opener=opener_no_symlink) as f:
md = self.dbx.files_upload(
f.read(), dbx_path, client_modified=mtime_dt, **kwargs
)
@ -681,7 +691,7 @@ class DropboxClient:
# Note: We currently do not support resuming interrupted uploads.
# Dropbox keeps upload sessions open for 48h so this could be done in
# the future.
with open(local_path, "rb") as f:
with open(local_path, "rb", opener=opener_no_symlink) as f:
data = f.read(chunk_size)
session_start = self.dbx.files_upload_session_start(data)
uploaded = f.tell()
@ -699,7 +709,7 @@ class DropboxClient:
while True:
try:
if size - f.tell() <= chunk_size:
if stat.st_size - f.tell() <= chunk_size:
# Finish upload session and return metadata.
data = f.read(chunk_size)
md = self.dbx.files_upload_session_finish(
@ -1338,6 +1348,10 @@ def os_to_maestral_error(
err_cls = FileSizeError # subclass of SyncError
title = "Could not download file"
text = "The file size too large."
elif exc.errno == errno.ELOOP:
err_cls = SymlinkError # subclass of SyncError
title = "Cannot upload symlink"
text = "Symlinks are not currently supported by the public Dropbox API."
elif exc.errno == errno.ENOSPC:
err_cls = InsufficientSpaceError # subclass of SyncError
title = "Could not download file"

View File

@ -103,6 +103,7 @@ class SyncEvent(Model):
"_local_path_from",
"_rev",
"_content_hash",
"_symlink_target",
"_change_type",
"_change_dbid",
"_change_user_name",
@ -185,6 +186,11 @@ class SyncEvent(Model):
for deletions. Set for both local and remote changes.
"""
symlink_target = Column(SqlPath())
"""
If the file is a symlink, its target path. This should only be set for files.
"""
change_type = Column(SqlEnum(ChangeType), nullable=False)
"""
The :class:`ChangeType`. Remote SyncEvents currently do not generate moved events
@ -271,11 +277,15 @@ class SyncEvent(Model):
return self.direction == SyncDirection.Down
def __repr__(self):
return (
f"<{self.__class__.__name__}(direction={self.direction.name}, "
f"change_type={self.change_type.name}, item_type={self.item_type}, "
f"dbx_path='{self.dbx_path}')>"
)
properties = ["direction", "change_type", "item_type", "dbx_path"]
if self.change_type is ChangeType.Moved:
properties.append("dbx_path_from")
prop_str = ", ".join(f"{p}={getattr(self, p)}" for p in properties)
return f"<{self.__class__.__name__}({prop_str})>"
@classmethod
def from_dbx_metadata(cls, md: Metadata, sync_engine: "SyncEngine") -> "SyncEvent":
@ -294,6 +304,7 @@ class SyncEvent(Model):
size = 0
rev = None
hash_str = None
symlink_target = None
dbx_id = None
change_dbid = None
@ -312,6 +323,7 @@ class SyncEvent(Model):
size = 0
rev = "folder"
hash_str = "folder"
symlink_target = None
dbx_id = md.id
change_time = None
change_dbid = None
@ -320,6 +332,7 @@ class SyncEvent(Model):
item_type = ItemType.File
rev = md.rev
hash_str = md.content_hash
symlink_target = md.symlink_info.target if md.symlink_info else None
dbx_id = md.id
size = md.size
change_time = md.client_modified.replace(tzinfo=timezone.utc).timestamp()
@ -348,6 +361,7 @@ class SyncEvent(Model):
local_path=sync_engine.to_local_path_from_cased(dbx_path_cased),
rev=rev,
content_hash=hash_str,
symlink_target=symlink_target,
change_type=change_type,
change_time=change_time,
change_dbid=change_dbid,
@ -394,7 +408,7 @@ class SyncEvent(Model):
stat: Optional[os.stat_result]
try:
stat = os.stat(to_path)
stat = os.stat(to_path, follow_symlinks=False)
except OSError:
stat = None
@ -410,10 +424,15 @@ class SyncEvent(Model):
change_time = stat.st_birthtime # type: ignore
except AttributeError:
change_time = None
symlink_target = None
else:
item_type = ItemType.File
change_time = stat.st_ctime if stat else None
size = stat.st_size if stat else 0
try:
symlink_target = os.readlink(event.src_path)
except OSError:
symlink_target = None
dbx_path = sync_engine.to_dbx_path(to_path)
dbx_path_lower = normalize(dbx_path)
@ -438,6 +457,7 @@ class SyncEvent(Model):
dbx_path_from_lower=dbx_path_from_lower,
local_path_from=from_path,
content_hash=content_hash,
symlink_target=symlink_target,
change_type=change_type,
change_time=change_time,
change_dbid=change_dbid,
@ -458,6 +478,7 @@ class IndexEntry(Model):
"_last_sync",
"_rev",
"_content_hash",
"_symlink_target",
]
__tablename__ = "'index'"
@ -495,16 +516,26 @@ class IndexEntry(Model):
``None`` if not yet calculated.
"""
symlink_target = Column(SqlPath())
"""
If the file is a symlink, its target path. This should only be set for files.
"""
@property
def is_file(self) -> bool:
"""Returns True for file changes"""
"""Returns True for files"""
return self.item_type == ItemType.File
@property
def is_directory(self) -> bool:
"""Returns True for folder changes"""
"""Returns True for folders"""
return self.item_type == ItemType.Folder
@property
def is_symlink(self) -> bool:
"""Returns True for symlinks"""
return self.symlink_target is not None
def __repr__(self):
return (
f"<{self.__class__.__name__}(item_type={self.item_type.name}, "

View File

@ -120,6 +120,10 @@ class FileReadError(SyncError):
"""Raised when reading a local file failed."""
class SymlinkError(SyncError):
"""Raised when we cannot sync a symlink."""
# ==== errors which are not related to a specific sync event ===========================

View File

@ -4,6 +4,7 @@
import os
import os.path as osp
import shutil
import sqlite3
import time
import warnings
import logging.handlers
@ -74,6 +75,22 @@ from .constants import IDLE, PAUSED, CONNECTING, FileStatus, GITHUB_RELEASES_API
__all__ = ["Maestral"]
def _sql_add_column(db: Database, table: str, column: str, affinity: str) -> None:
try:
db.execute(f"ALTER TABLE {table} ADD COLUMN {column} {affinity};")
except sqlite3.OperationalError:
# column already exists
pass
def _sql_drop_table(db: Database, table: str) -> None:
try:
db.execute(f"DROP TABLE {table};")
except sqlite3.OperationalError:
# table does not exist
pass
# ======================================================================================
# Main API
# ======================================================================================
@ -1453,7 +1470,7 @@ class Maestral:
self._update_from_pre_v1_4_5()
if Version(updated_from) < Version("1.4.8"):
self._update_from_pre_v1_4_8()
if Version(updated_from) < Version("1.5.3.dev0"):
if Version(updated_from) < Version("1.5.3"):
self._update_from_pre_v1_5_3()
self._state.set("app", "updated_scripts_completed", __version__)
@ -1478,7 +1495,7 @@ class Maestral:
db_path = get_data_path("maestral", f"{self.config_name}.db")
db = Database(db_path, check_same_thread=False)
db.execute("DROP TABLE history")
_sql_drop_table(db, "history")
db.close()
def _update_from_pre_v1_4_8(self) -> None:
@ -1512,11 +1529,15 @@ class Maestral:
def _update_from_pre_v1_5_3(self) -> None:
self._logger.info("Clearing hash cache after update from pre v1.5.3.dev0")
self._logger.info("Migrating database after update from pre v1.5.3")
db_path = get_data_path("maestral", f"{self.config_name}.db")
db = Database(db_path, check_same_thread=False)
db.execute("DROP TABLE hash_cache")
_sql_drop_table(db, "hash_cache")
_sql_add_column(db, "history", "symlink_target", "TEXT")
_sql_add_column(db, "'index'", "symlink_target", "TEXT")
db.close()
# ==== Periodic async jobs =========================================================

View File

@ -1,6 +1,7 @@
"""This module contains the main syncing functionality."""
# system imports
import errno
import sys
import os
import os.path as osp
@ -110,6 +111,10 @@ from .utils.integration import (
CPU_COUNT,
)
from .utils.path import (
exists,
isfile,
isdir,
getsize,
generate_cc_name,
move,
delete,
@ -531,7 +536,7 @@ class SyncEngine:
# Initialize SQLite database.
self._db_path = get_data_path("maestral", f"{self.config_name}.db")
if not osp.exists(self._db_path):
if not exists(self._db_path):
# Reset the sync state if DB is missing.
self.remote_cursor = ""
self.local_cursor = 0.0
@ -841,7 +846,7 @@ class SyncEngine:
"""
try:
stat = os.stat(local_path)
stat = os.lstat(local_path)
except (FileNotFoundError, NotADirectoryError):
# Remove all cache entries for local_path and return None.
with self._database_access():
@ -854,6 +859,10 @@ class SyncEngine:
self._db_manager_hash_cache.clear_cache()
return None
except OSError as err:
if err.errno == errno.ENAMETOOLONG:
return None
raise os_to_maestral_error(err)
if S_ISDIR(stat.st_mode):
@ -876,6 +885,31 @@ class SyncEngine:
return hash_str
def get_local_symlink_target(self, local_path: str) -> Optional[str]:
"""
Gets the symlink target of a local file.
:param local_path: Absolute path on local drive.
:returns: Symlink target of local file. None if the local path does not refer to
a symlink or does not exist.
"""
try:
return os.readlink(local_path)
except (FileNotFoundError, NotADirectoryError):
return None
except OSError as err:
if err.errno == errno.EINVAL:
# File is not a symlink.
return None
if err.errno == errno.ENAMETOOLONG:
# Path cannot exist on this filesystem.
return None
raise os_to_maestral_error(err)
def _save_local_hash(
self,
inode: int,
@ -948,6 +982,7 @@ class SyncEngine:
last_sync=self._get_ctime(event.local_path),
rev=event.rev,
content_hash=event.content_hash,
symlink_target=event.symlink_target,
)
self._db_manager_index.update(entry)
@ -972,10 +1007,14 @@ class SyncEngine:
else:
symlink_target: Optional[str] = None
if isinstance(md, FileMetadata):
rev = md.rev
hash_str = md.content_hash
item_type = ItemType.File
if md.symlink_info:
symlink_target = md.symlink_info.target
else:
rev = "folder"
hash_str = "folder"
@ -993,6 +1032,7 @@ class SyncEngine:
last_sync=None,
rev=rev,
content_hash=hash_str,
symlink_target=symlink_target,
)
self._db_manager_index.update(entry)
@ -1071,7 +1111,7 @@ class SyncEngine:
"or restart Maestral to set up a new folder.",
)
if not osp.isdir(self.dropbox_path):
if not isdir(self.dropbox_path):
raise exception
# If the file system is not case-sensitive but preserving, a path which was
@ -1103,7 +1143,7 @@ class SyncEngine:
retries = 0
max_retries = 10
while not osp.isdir(self.file_cache_path):
while not isdir(self.file_cache_path):
try:
# This will raise FileExistsError if file_cache_path
# exists but is a file instead of a directory.
@ -1726,7 +1766,7 @@ class SyncEngine:
local_path = self.to_local_path_from_cased(entry.dbx_path_cased)
is_mignore = self._is_mignore_path(entry.dbx_path_cased, entry.is_directory)
if is_mignore or not osp.exists(local_path):
if is_mignore or not exists(local_path):
if entry.is_directory:
event = DirDeletedEvent(local_path)
else:
@ -1840,7 +1880,7 @@ class SyncEngine:
for event in sync_events:
if self.is_excluded(event.dbx_path) or self.is_mignore(event):
if self.is_excluded(event.local_path) or self.is_mignore(event):
continue
if event.is_deleted:
@ -2169,7 +2209,7 @@ class SyncEngine:
suffix=suffix,
)
event_cls = DirMovedEvent if osp.isdir(event.local_path) else FileMovedEvent
event_cls = DirMovedEvent if isdir(event.local_path) else FileMovedEvent
with self.fs_events.ignore(event_cls(event.local_path, local_path_cc)):
with convert_api_errors():
move(event.local_path, local_path_cc, raise_error=True)
@ -2204,7 +2244,7 @@ class SyncEngine:
suffix="selective sync conflict",
)
event_cls = DirMovedEvent if osp.isdir(event.local_path) else FileMovedEvent
event_cls = DirMovedEvent if isdir(event.local_path) else FileMovedEvent
with self.fs_events.ignore(event_cls(event.local_path, local_path_cc)):
with convert_api_errors():
move(event.local_path, local_path_cc, raise_error=True)
@ -2244,8 +2284,10 @@ class SyncEngine:
try:
with self.client.clone_with_new_session() as client:
if event.is_added:
res = self._on_local_created(event, client)
if event.is_added and event.is_file:
res = self._on_local_file_created(event, client)
elif event.is_added and event.is_directory:
res = self._on_local_folder_created(event, client)
elif event.is_moved:
res = self._on_local_moved(event, client)
elif event.is_changed:
@ -2282,9 +2324,9 @@ class SyncEngine:
"""
try:
while True:
size1 = osp.getsize(local_path)
size1 = getsize(local_path)
time.sleep(0.2)
size2 = osp.getsize(local_path)
size2 = getsize(local_path)
if size1 == size2:
return
except OSError:
@ -2376,11 +2418,11 @@ class SyncEngine:
for md in result.entries:
self.update_index_from_dbx_metadata(md, client)
def _on_local_created(
def _on_local_file_created(
self, event: SyncEvent, client: Optional[DropboxClient] = None
) -> Optional[Metadata]:
"""
Call when a local item is created.
Call when a local file is created.
:param event: SyncEvent corresponding to local created event.
:param client: Client instance to use. If not given, use the instance provided
@ -2401,88 +2443,117 @@ class SyncEngine:
self._wait_for_creation(event.local_path)
if event.is_directory:
try:
if client.is_team_space and event.dbx_path.count("/") == 1:
# We create the folder as a shared folder immediately to prevent
# race conditions when it is created, unmounted, and remounted as a
# shared folder in a Team Space. We then retrieve the metadata in a
# second `get_metadata` call. Note: This is also racy because the
# shared folder may no longer exist at the point of the get_metadata
# call. However, this is easier for us to deal with.
shared_md = client.share_dir(event.dbx_path)
md_new = client.get_metadata(f"ns:{shared_md.shared_folder_id}")
if not md_new:
# Remote folder has been deleted after creating. Reflect changes
# locally and return.
self._logger.debug(
'"%s" on Dropbox was deleted after creation, '
"deleting local copy",
event.dbx_path,
)
err = delete(event.local_path)
if err:
raise os_to_maestral_error(err)
return None
else:
md_new = client.make_dir(event.dbx_path, autorename=False)
except FolderConflictError:
self._logger.debug(
'No conflict for "%s": the folder already exists', event.local_path
)
try:
md = client.get_metadata(event.dbx_path)
if isinstance(md, FolderMetadata):
self.update_index_from_dbx_metadata(md, client)
except NotFoundError:
pass
# Check if file already exists with identical content.
md_old = client.get_metadata(event.dbx_path)
if isinstance(md_old, FileMetadata):
if event.content_hash == md_old.content_hash:
# File hashes are identical, do not upload.
self.update_index_from_dbx_metadata(md_old, client)
return None
except FileConflictError:
md_new = client.make_dir(event.dbx_path, autorename=True)
local_entry = self.get_index_entry(event.dbx_path_lower)
if not local_entry:
# File is new to us, let Dropbox rename it if something is in the way.
mode = WriteMode.add
elif local_entry.is_directory:
# Try to overwrite the destination, this will fail...
mode = WriteMode.overwrite
else:
# Check if file already exists with identical content.
md_old = client.get_metadata(event.dbx_path)
if isinstance(md_old, FileMetadata):
if event.content_hash == md_old.content_hash:
# File hashes are identical, do not upload.
self.update_index_from_dbx_metadata(md_old, client)
# File has been modified, update remote if matching rev,
# create conflict otherwise.
self._logger.debug(
'"%s" appears to have been created but we are ' "already tracking it",
event.dbx_path,
)
mode = WriteMode.update(local_entry.rev)
try:
md_new = client.upload(
event.local_path,
event.dbx_path,
autorename=True,
mode=mode,
sync_event=event,
)
except (NotFoundError, IsAFolderError):
self._logger.debug(
'Could not upload "%s": the file does not exist', event.local_path
)
return None
if not self._handle_upload_conflict(md_new, event, client):
self._logger.debug('Created "%s" on Dropbox', event.dbx_path)
self.update_index_from_dbx_metadata(md_new, client)
return md_new
def _on_local_folder_created(
self, event: SyncEvent, client: Optional[DropboxClient] = None
) -> Optional[Metadata]:
"""
Call when a local folder is created.
:param event: SyncEvent corresponding to local created event.
:param client: Client instance to use. If not given, use the instance provided
in the constructor.
:returns: Metadata for created item or None if no remote item is created.
:raises MaestralApiError: For any issues when syncing the item.
"""
client = client or self.client
# Fail fast on badly decoded paths.
validate_encoding(event.local_path)
if self._handle_selective_sync_conflict(event):
return None
if self._handle_normalization_conflict(event):
return None
self._wait_for_creation(event.local_path)
try:
if client.is_team_space and event.dbx_path.count("/") == 1:
# We create the folder as a shared folder immediately to prevent
# race conditions when it is created, unmounted, and remounted as a
# shared folder in a Team Space. We then retrieve the metadata in a
# second `get_metadata` call. Note: This is also racy because the
# shared folder may no longer exist at the point of the get_metadata
# call. However, this is easier for us to deal with.
shared_md = client.share_dir(event.dbx_path)
md_new = client.get_metadata(f"ns:{shared_md.shared_folder_id}")
if not md_new:
# Remote folder has been deleted after creating. Reflect changes
# locally and return.
self._logger.debug(
'"%s" on Dropbox was deleted after creation, '
"deleting local copy",
event.dbx_path,
)
err = delete(event.local_path)
if err:
raise os_to_maestral_error(err)
return None
local_entry = self.get_index_entry(event.dbx_path_lower)
if not local_entry:
# File is new to us, let Dropbox rename it if something is in the way.
mode = WriteMode.add
elif local_entry.is_directory:
# Try to overwrite the destination, this will fail...
mode = WriteMode.overwrite
else:
# File has been modified, update remote if matching rev,
# create conflict otherwise.
self._logger.debug(
'"%s" appears to have been created but we are '
"already tracking it",
event.dbx_path,
)
mode = WriteMode.update(local_entry.rev)
md_new = client.make_dir(event.dbx_path, autorename=False)
except FolderConflictError:
self._logger.debug(
'No conflict for "%s": the folder already exists', event.local_path
)
try:
md_new = client.upload(
event.local_path,
event.dbx_path,
autorename=True,
mode=mode,
sync_event=event,
)
except (NotFoundError, IsAFolderError):
self._logger.debug(
'Could not upload "%s": the file does not exist', event.local_path
)
return None
md = client.get_metadata(event.dbx_path)
if isinstance(md, FolderMetadata):
self.update_index_from_dbx_metadata(md, client)
except NotFoundError:
pass
return None
except FileConflictError:
md_new = client.make_dir(event.dbx_path, autorename=True)
if not self._handle_upload_conflict(md_new, event, client):
self._logger.debug('Created "%s" on Dropbox', event.dbx_path)
@ -2515,8 +2586,18 @@ class SyncEngine:
# Check if item already exists with identical content.
md_old = client.get_metadata(event.dbx_path)
if isinstance(md_old, FileMetadata):
if event.content_hash == md_old.content_hash:
if md_old.symlink_info:
md_symlink_target = md_old.symlink_info.target
else:
md_symlink_target = None
if (
event.content_hash == md_old.content_hash
and event.symlink_target == md_symlink_target
):
# File hashes are identical, do not upload.
self.update_index_from_dbx_metadata(md_old, client)
self._logger.debug(
@ -2704,7 +2785,7 @@ class SyncEngine:
local_path_cc = self.to_local_path(md_new.path_display, client)
# Move the local item.
event_cls = DirMovedEvent if osp.isdir(event.local_path) else FileMovedEvent
event_cls = DirMovedEvent if isdir(event.local_path) else FileMovedEvent
with self.fs_events.ignore(event_cls(event.local_path, local_path_cc)):
with convert_api_errors():
move(event.local_path, local_path_cc, raise_error=True)
@ -3198,7 +3279,9 @@ class SyncEngine:
)
return Conflict.LocalNewerOrIdentical
elif event.content_hash == self.get_local_hash(event.local_path):
elif event.content_hash == self.get_local_hash(
event.local_path
) and event.symlink_target == self.get_local_symlink_target(event.local_path):
# Content hashes are equal, therefore items are identical. Folders will
# have a content hash of 'folder'.
self._logger.debug(
@ -3258,7 +3341,7 @@ class SyncEngine:
with convert_api_errors(): # Catch OSErrors.
try:
stat = os.stat(local_path)
stat = os.lstat(local_path)
except (FileNotFoundError, NotADirectoryError):
# Don't check ctime for deleted items (os won't give stat info)
# but confirm absence from index.
@ -3271,14 +3354,15 @@ class SyncEngine:
return True
# Recurse over children.
# TODO: Handle symlinks with entry.is_symlink()
with os.scandir(local_path) as it:
for entry in it:
if entry.is_dir():
if entry.is_dir(follow_symlinks=False):
if self._ctime_newer_than_last_sync(entry.path):
return True
elif not self.is_excluded(entry.name):
child_dbx_path_lower = self.to_dbx_path_lower(entry.path)
ctime = entry.stat().st_ctime
ctime = entry.stat(follow_symlinks=False).st_ctime
if ctime > self.get_last_sync(child_dbx_path_lower):
return True
@ -3299,15 +3383,15 @@ class SyncEngine:
"""
try:
stat = os.stat(local_path)
stat = os.lstat(local_path)
if S_ISDIR(stat.st_mode):
ctime = stat.st_ctime
with os.scandir(local_path) as it:
for entry in it:
if entry.is_dir():
if entry.is_dir(follow_symlinks=False):
child_ctime = self._get_ctime(entry.path)
elif not self.is_excluded(entry.name):
child_ctime = entry.stat().st_ctime
child_ctime = entry.stat(follow_symlinks=False).st_ctime
else:
child_ctime = -1.0
@ -3512,7 +3596,7 @@ class SyncEngine:
if self._check_download_conflict(event) == Conflict.Conflict:
new_local_path = generate_cc_name(local_path)
event_cls = DirMovedEvent if osp.isdir(local_path) else FileMovedEvent
event_cls = DirMovedEvent if isdir(local_path) else FileMovedEvent
with self.fs_events.ignore(event_cls(local_path, new_local_path)):
with convert_api_errors():
move(local_path, new_local_path, raise_error=True)
@ -3522,7 +3606,7 @@ class SyncEngine:
)
self.rescan(new_local_path)
if osp.isdir(local_path):
if isdir(local_path):
with self.fs_events.ignore(DirDeletedEvent(local_path)):
delete(local_path)
@ -3541,16 +3625,19 @@ class SyncEngine:
if IS_MACOS:
ignore_events.append(FileModifiedEvent(local_path))
if osp.isfile(local_path):
if isfile(local_path):
# Ignore FileDeletedEvent when replacing old file.
ignore_events.append(FileDeletedEvent(local_path))
is_symlink = event.symlink_target is not None
if is_symlink:
ignore_events.append(DirMovedEvent(tmp_fname, local_path))
# Move the downloaded file to its destination.
with self.fs_events.ignore(*ignore_events):
stat = os.stat(tmp_fname)
with self.fs_events.ignore(*ignore_events, recursive=is_symlink):
with convert_api_errors(dbx_path=event.dbx_path, local_path=local_path):
stat = os.lstat(tmp_fname)
move(
tmp_fname,
local_path,
@ -3598,7 +3685,7 @@ class SyncEngine:
if conflict_check == Conflict.Conflict:
new_local_path = generate_cc_name(event.local_path)
event_cls = DirMovedEvent if osp.isdir(event.local_path) else FileMovedEvent
event_cls = DirMovedEvent if isdir(event.local_path) else FileMovedEvent
with self.fs_events.ignore(event_cls(event.local_path, new_local_path)):
with convert_api_errors():
move(event.local_path, new_local_path, raise_error=True)
@ -3613,7 +3700,7 @@ class SyncEngine:
# Ensure that parent folders are synced.
self._ensure_parent(event, client)
if osp.isfile(event.local_path):
if isfile(event.local_path):
with self.fs_events.ignore(
FileModifiedEvent(event.local_path), # May be emitted on macOS.
FileDeletedEvent(event.local_path),
@ -3659,7 +3746,7 @@ class SyncEngine:
elif conflict_check is Conflict.LocalNewerOrIdentical:
return None
event_cls = DirDeletedEvent if osp.isdir(event.local_path) else FileDeletedEvent
event_cls = DirDeletedEvent if isdir(event.local_path) else FileDeletedEvent
with self.fs_events.ignore(event_cls(event.local_path)):
exc = delete(event.local_path)
@ -3691,7 +3778,7 @@ class SyncEngine:
local_path_old = self.to_local_path_from_cased(entry.dbx_path_cased)
event_cls = DirMovedEvent if osp.isdir(local_path_old) else FileMovedEvent
event_cls = DirMovedEvent if isdir(local_path_old) else FileMovedEvent
with self.fs_events.ignore(event_cls(local_path_old, event.local_path)):
move(local_path_old, event.local_path)
@ -3712,10 +3799,10 @@ class SyncEngine:
self._logger.debug('Rescanning "%s"', local_path)
if osp.isfile(local_path):
if isfile(local_path):
self.fs_events.queue_event(FileModifiedEvent(local_path))
elif osp.isdir(local_path):
elif isdir(local_path):
self.fs_events.queue_event(DirCreatedEvent(local_path))
# Add created and deleted events of children as appropriate.
@ -3741,13 +3828,13 @@ class SyncEngine:
for entry in entries:
entry = cast(IndexEntry, entry)
child_path = self.to_local_path_from_cased(entry.dbx_path_cased)
if not osp.exists(child_path):
if not exists(child_path):
if entry.is_directory:
self.fs_events.queue_event(DirDeletedEvent(child_path))
else:
self.fs_events.queue_event(FileDeletedEvent(child_path))
elif not osp.exists(local_path):
elif not exists(local_path):
dbx_path_lower = self.to_dbx_path_lower(local_path)
local_entry = self.get_index_entry(dbx_path_lower)
@ -3782,7 +3869,7 @@ class SyncEngine:
for entry in it:
dbx_path = self.to_dbx_path(entry.path)
if not self.is_excluded(entry.path) and not self._is_mignore_path(
dbx_path, entry.is_dir()
dbx_path, entry.is_dir(follow_symlinks=False)
):
yield entry

View File

@ -22,6 +22,44 @@ def _path_components(path: str) -> List[str]:
return cleaned_components
Path = Union[str, bytes, "os.PathLike[str]", "os.PathLike[bytes]"]
# ==== path relationships ==============================================================
def is_child(path: str, parent: str) -> bool:
"""
Checks if ``path`` semantically is inside ``parent``. Neither path needs to
refer to an actual item on the drive. This function is case-sensitive.
:param path: Item path.
:param parent: Parent path.
:returns: Whether ``path`` semantically lies inside ``parent``.
"""
parent = parent.rstrip(osp.sep) + osp.sep
path = path.rstrip(osp.sep)
return path.startswith(parent)
def is_equal_or_child(path: str, parent: str) -> bool:
"""
Checks if ``path`` semantically is inside ``parent`` or equals ``parent``. Neither
path needs to refer to an actual item on the drive. This function is case-sensitive.
:param path: Item path.
:param parent: Parent path.
:returns: ``True`` if ``path`` semantically lies inside ``parent`` or
``path == parent``.
"""
return is_child(path, parent) or path == parent
# ==== case sensitivity and normalization ==============================================
def normalize_case(string: str) -> str:
"""
Converts a string to lower case. Todo: Follow Python 2.5 / Dropbox conventions.
@ -85,42 +123,12 @@ def is_fs_case_sensitive(path: str) -> bool:
else:
check_path = path.lower()
if osp.exists(path) and not osp.exists(check_path):
if exists(path) and not exists(check_path):
return True
else:
return not osp.samefile(path, check_path)
def is_child(path: str, parent: str) -> bool:
"""
Checks if ``path`` semantically is inside ``parent``. Neither path needs to
refer to an actual item on the drive. This function is case-sensitive.
:param path: Item path.
:param parent: Parent path.
:returns: Whether ``path`` semantically lies inside ``parent``.
"""
parent = parent.rstrip(osp.sep) + osp.sep
path = path.rstrip(osp.sep)
return path.startswith(parent)
def is_equal_or_child(path: str, parent: str) -> bool:
"""
Checks if ``path`` semantically is inside ``parent`` or equals ``parent``. Neither
path needs to refer to an actual item on the drive. This function is case-sensitive.
:param path: Item path.
:param parent: Parent path.
:returns: ``True`` if ``path`` semantically lies inside ``parent`` or
``path == parent``.
"""
return is_child(path, parent) or path == parent
def equivalent_path_candidates(
path: str,
root: str = osp.sep,
@ -230,7 +238,7 @@ def to_existing_unnormalized_path(path: str, root: str = osp.sep) -> str:
candidates = equivalent_path_candidates(path, root)
for candidate in candidates:
if osp.exists(candidate):
if exists(candidate):
return candidate
raise FileNotFoundError(f'No matches with different casing found in "{root}"')
@ -251,7 +259,7 @@ def normalized_path_exists(path: str, root: str = osp.sep) -> bool:
candidates = equivalent_path_candidates(path, root)
for c in candidates:
if osp.exists(c):
if exists(c):
return True
return False
@ -287,9 +295,12 @@ def generate_cc_name(path: str, suffix: str = "conflicting copy") -> str:
return osp.join(dirname, cc_candidate)
# ==== higher level file operations ====================================================
def delete(path: str, raise_error: bool = False) -> Optional[OSError]:
"""
Deletes a file or folder at ``path``.
Deletes a file or folder at ``path``. Symlinks will not be followed.
:param path: Path of item to delete.
:param raise_error: Whether to raise errors or return them.
@ -298,7 +309,7 @@ def delete(path: str, raise_error: bool = False) -> Optional[OSError]:
err = None
try:
shutil.rmtree(path)
shutil.rmtree(path) # Will raise OSError when it finds a symlink.
except OSError:
try:
os.unlink(path)
@ -338,7 +349,7 @@ def move(
if preserve_dest_permissions:
# save dest permissions
try:
orig_mode = os.stat(dest_path).st_mode & 0o777
orig_mode = os.stat(dest_path, follow_symlinks=False).st_mode & 0o777
except FileNotFoundError:
pass
@ -381,7 +392,7 @@ def walk(
try:
path = entry.path
stat = entry.stat()
stat = entry.stat(follow_symlinks=False)
yield path, stat
@ -400,6 +411,9 @@ def walk(
raise
# ==== miscellaneous utilities =========================================================
def content_hash(
local_path: str, chunk_size: int = 65536
) -> Tuple[Optional[str], Optional[float]]:
@ -415,10 +429,10 @@ def content_hash(
hasher = DropboxContentHasher()
try:
mtime = os.stat(local_path).st_mtime
mtime = os.stat(local_path, follow_symlinks=False).st_mtime
try:
with open(local_path, "rb") as f:
with open(local_path, "rb", opener=opener_no_symlink) as f:
while True:
chunk = f.read(chunk_size)
if len(chunk) == 0:
@ -427,8 +441,14 @@ def content_hash(
except IsADirectoryError:
return "folder", mtime
else:
return str(hasher.hexdigest()), mtime
except OSError as exc:
if exc.errno == errno.ELOOP:
hasher.update(b"") # use empty file for symlinks
else:
raise exc
return str(hasher.hexdigest()), mtime
except FileNotFoundError:
return None, None
@ -466,3 +486,56 @@ def fs_max_lengths_for_path(path: str = "/") -> Tuple[int, int]:
raise RuntimeError("Cannot get file length limits.")
else:
dirname = "/"
# ==== symlink-proof os methods ========================================================
def opener_no_symlink(path: Path, flags: int) -> int:
"""
Opener that does not follow symlinks. Uses :meth:`os.open` under the hood.
:param path: Path to open.
:param flags: Flags passed to :meth:`os.open`. O_NOFOLLOW will be added.
:return: Open file descriptor.
"""
flags |= os.O_NOFOLLOW
return os.open(path, flags=flags)
def _get_stats_no_symlink(path: Path) -> Optional[os.stat_result]:
try:
return os.stat(path, follow_symlinks=False)
except (FileNotFoundError, NotADirectoryError):
return None
def exists(path: Path) -> bool:
"""Returns whether an item exists at the path. Returns True for symlinks."""
return _get_stats_no_symlink(path) is not None
def isfile(path: Path) -> bool:
"""Returns whether a file exists at the path. Returns True for symlinks."""
stat = _get_stats_no_symlink(path)
if stat is None:
return False
else:
return not S_ISDIR(stat.st_mode)
def isdir(path: Path) -> bool:
"""Returns whether a folder exists at the path. Returns False for symlinks."""
stat = _get_stats_no_symlink(path)
if stat is None:
return False
else:
return S_ISDIR(stat.st_mode)
def getsize(path: Path) -> int:
"""Returns the size. Returns False for symlinks."""
stat = os.stat(path, follow_symlinks=False)
return stat.st_size

View File

@ -5,7 +5,6 @@ from datetime import datetime
import uuid
import pytest
from watchdog.utils.dirsnapshot import DirectorySnapshot
from dropbox.files import WriteMode, FileMetadata
from maestral.main import Maestral
@ -17,6 +16,7 @@ from maestral.utils.path import (
delete,
to_existing_unnormalized_path,
is_child,
walk,
)
from maestral.utils.appdirs import get_home_dir
from maestral.daemon import MaestralProxy
@ -165,29 +165,32 @@ def assert_synced(m: Maestral):
"""Asserts that the `local_folder` and `remote_folder` are synced."""
listing = m.client.list_folder("/", recursive=True)
local_snapshot = DirectorySnapshot(m.dropbox_path)
# Assert that all items from server are present locally with the same content hash.
for e in listing.entries:
for md in listing.entries:
if m.sync.is_excluded_by_user(e.path_lower):
if m.sync.is_excluded_by_user(md.path_lower):
continue
local_path = m.to_local_path(e.path_display)
local_path = m.to_local_path(md.path_display)
remote_hash = e.content_hash if isinstance(e, FileMetadata) else "folder"
remote_hash = md.content_hash if isinstance(md, FileMetadata) else "folder"
local_hash = m.sync.get_local_hash(local_path)
local_symlink_target = m.sync.get_local_symlink_target(local_path)
assert local_hash, f"'{e.path_display}' not found locally"
assert local_hash == remote_hash, f'different content for "{e.path_display}"'
assert local_hash, f"'{md.path_display}' not found locally"
assert local_hash == remote_hash, f'different content for "{md.path_display}"'
if isinstance(md, FileMetadata) and md.symlink_info:
assert (
md.symlink_info.target == local_symlink_target
), f'different symlink targets for "{md.path_display}"'
# Assert that all local items are present on server.
for path in local_snapshot.paths:
if not m.sync.is_excluded(path) and is_child(path, m.dropbox_path):
if not m.sync.is_excluded(path):
dbx_path = m.sync.to_dbx_path_lower(path)
has_match = any(e for e in listing.entries if e.path_lower == dbx_path)
assert has_match, f'local item "{path}" does not exist on dbx'
for path, _ in walk(m.dropbox_path, m.sync._scandir_with_ignore):
dbx_path = m.sync.to_dbx_path_lower(path)
has_match = any(md for md in listing.entries if md.path_lower == dbx_path)
assert has_match, f'local item "{path}" does not exist on dbx'
# Check that our index is correct.
for index_entry in m.sync.get_index():

View File

@ -1154,6 +1154,40 @@ def test_unknown_path_encoding(m, capsys):
assert_no_errors(m)
def test_symlink_error(m):
dbx_path = m.test_folder_dbx + "/link"
local_path = m.test_folder_local + "/link"
os.symlink("to_nowhere", local_path)
wait_for_idle(m)
assert len(m.fatal_errors) == 0
assert len(m.sync_errors) == 1
assert m.sync_errors[0]["local_path"] == local_path
assert m.sync_errors[0]["type"] == "SymlinkError"
assert normalize(dbx_path) in m.sync.upload_errors
def test_symlink_indexing_error(m):
m.stop_sync()
dbx_path = m.test_folder_dbx + "/link"
local_path = m.test_folder_local + "/link"
os.symlink("to_nowhere", local_path)
m.start_sync()
wait_for_idle(m)
assert len(m.fatal_errors) == 0
assert len(m.sync_errors) == 1
assert m.sync_errors[0]["local_path"] == local_path
assert m.sync_errors[0]["type"] == "SymlinkError"
assert normalize(dbx_path) in m.sync.upload_errors
def test_dropbox_dir_delete_during_sync(m):
delete(m.dropbox_path)

View File

@ -247,7 +247,6 @@ def test_nested_events(sync):
group="local-event-processing",
min_time=0.1,
max_time=5,
min_rounds=4,
)
def test_performance(sync, benchmark):