create: fix repo lock getting stale when processing lots of unchanged files, fixes #8442

as a side effect, maybe also better keeps the ssh / tcp connection alive,
if there is a bit of traffic every 60s.
This commit is contained in:
Thomas Waldmann 2024-10-02 12:49:39 +02:00
parent 29d16f5c41
commit f1100f3c86
No known key found for this signature in database
GPG Key ID: 243ACFA951F78E01

View File

@ -4,7 +4,7 @@
import shutil
import stat
from collections import namedtuple
from datetime import datetime, timezone
from datetime import datetime, timezone, timedelta
from time import perf_counter
from .logger import create_logger
@ -709,6 +709,8 @@ class ChunksMixin:
def __init__(self):
self._chunks = None
self.last_refresh_dt = datetime.now(timezone.utc)
self.refresh_td = timedelta(seconds=60)
@property
def chunks(self):
@ -751,13 +753,18 @@ def add_chunk(
size = len(data) # data is still uncompressed
else:
raise ValueError("when giving compressed data for a chunk, the uncompressed size must be given also")
now = datetime.now(timezone.utc)
exists = self.seen_chunk(id, size)
if exists:
# if borg create is processing lots of unchanged files (no content and not metadata changes),
# there could be a long time without any repository operations and the repo lock would get stale.
self.refresh_lock(now)
return self.reuse_chunk(id, size, stats)
cdata = self.repo_objs.format(
id, meta, data, compress=compress, size=size, ctype=ctype, clevel=clevel, ro_type=ro_type
)
self.repository.put(id, cdata, wait=wait)
self.last_refresh_dt = now # .put also refreshed the lock
self.chunks.add(id, ChunkIndex.MAX_VALUE, size)
stats.update(size, not exists)
return ChunkListEntry(id, size)
@ -767,6 +774,13 @@ def _write_chunks_cache(self, chunks):
write_chunkindex_to_repo_cache(self.repository, self._chunks, compact=True, clear=True)
self._chunks = None # nothing there (cleared!)
def refresh_lock(self, now):
if now > self.last_refresh_dt + self.refresh_td:
# the repository lock needs to get refreshed regularly, or it will be killed as stale.
# refreshing the lock is not part of the repository API, so we do it indirectly via repository.info.
self.repository.info()
self.last_refresh_dt = now
class AdHocWithFilesCache(FilesCacheMixin, ChunksMixin):
"""