From 81191243596aa1bfce143b062b4fbdf188be1e8f Mon Sep 17 00:00:00 2001 From: Jeff Rizzo Date: Tue, 28 Jul 2015 11:39:00 -0700 Subject: [PATCH 01/14] Don't process an entry if the nodump flag is set. --- attic/archiver.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/attic/archiver.py b/attic/archiver.py index 5731ffb4c..8d63c0714 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -158,6 +158,9 @@ def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, # Ignore unix sockets if stat.S_ISSOCK(st.st_mode): return + # Ignore if nodump flag set + if st.st_flags and stat.UF_NODUMP(st.st_flags): + return self.print_verbose(remove_surrogates(path)) if stat.S_ISREG(st.st_mode): try: From e11a4a5d3a912596d8db9d85f721699231982526 Mon Sep 17 00:00:00 2001 From: Jeff Rizzo Date: Tue, 28 Jul 2015 12:30:25 -0700 Subject: [PATCH 02/14] Check the UF_NODUMP flag properly. --- attic/archiver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attic/archiver.py b/attic/archiver.py index 8d63c0714..335012477 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -159,7 +159,7 @@ def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, if stat.S_ISSOCK(st.st_mode): return # Ignore if nodump flag set - if st.st_flags and stat.UF_NODUMP(st.st_flags): + if st.st_flags and (st.st_flags & stat.UF_NODUMP): return self.print_verbose(remove_surrogates(path)) if stat.S_ISREG(st.st_mode): From ebc04b0ebffe82036670409148ef856f83226be8 Mon Sep 17 00:00:00 2001 From: Jeff Rizzo Date: Tue, 28 Jul 2015 15:01:42 -0700 Subject: [PATCH 03/14] Check for lchflags properly. --- attic/archiver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/attic/archiver.py b/attic/archiver.py index 335012477..05076fc0e 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -21,6 +21,7 @@ is_cachedir, bigint_to_int from attic.remote import RepositoryServer, RemoteRepository +has_lchflags = hasattr(os, 'lchflags') class Archiver: @@ -159,7 +160,7 @@ def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, if stat.S_ISSOCK(st.st_mode): return # Ignore if nodump flag set - if st.st_flags and (st.st_flags & stat.UF_NODUMP): + if has_lchflags and (st.st_flags & stat.UF_NODUMP): return self.print_verbose(remove_surrogates(path)) if stat.S_ISREG(st.st_mode): From 57845c07ed4e772abf4ddf353db8d08a2cca6fa0 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Mon, 10 Aug 2015 16:48:19 +0100 Subject: [PATCH 04/14] Clean up fds of segments we delete (during compaction) When we delete a segment, let's close its fd as well. Note as well wasting the fd, this was forcing the filesystem to preserve the deleted file until we exited. I noticed roughly 20 open fds of deleted files when attic saved 10G of data. --- attic/repository.py | 3 +++ 1 file changed, 3 insertions(+) mode change 100644 => 100755 attic/repository.py diff --git a/attic/repository.py b/attic/repository.py old mode 100644 new mode 100755 index eed85dc43..1d2bb0f0f --- a/attic/repository.py +++ b/attic/repository.py @@ -478,6 +478,9 @@ def get_fd(self, segment): return fd def delete_segment(self, segment): + fd = self.fds.pop(segment) + if fd != None: + fd.close() try: os.unlink(self.segment_filename(segment)) except OSError: From 3321a887d34d607fc59e9d2d19f07b5862295908 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Mon, 10 Aug 2015 22:37:32 +0100 Subject: [PATCH 05/14] io.write_commit() already implies io.close_segment() --- attic/repository.py | 1 - 1 file changed, 1 deletion(-) diff --git a/attic/repository.py b/attic/repository.py index 1d2bb0f0f..bd10e3212 100755 --- a/attic/repository.py +++ b/attic/repository.py @@ -300,7 +300,6 @@ def report_error(msg): report_error('Adding commit tag to segment {}'.format(transaction_id)) self.io.segment = transaction_id + 1 self.io.write_commit() - self.io.close_segment() if current_index and not repair: if len(current_index) != len(self.index): report_error('Index object count mismatch. {} != {}'.format(len(current_index), len(self.index))) From e06b0b36129d42f979a09456ed66a6e9b2d9a8ad Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 01:04:03 +0200 Subject: [PATCH 06/14] use C99's uintmax_t and %ju format whatever size_t and off_t is, should even fit in there --- borg/_hashindex.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/borg/_hashindex.c b/borg/_hashindex.c index 33d12ca03..aa1881f18 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -145,10 +145,12 @@ hashindex_read(const char *path) bytes_read = fread(&header, 1, sizeof(HashHeader), fd); if(bytes_read != sizeof(HashHeader)) { if(ferror(fd)) { - EPRINTF_PATH(path, "fread header failed (expected %ld, got %ld)", sizeof(HashHeader), bytes_read); + EPRINTF_PATH(path, "fread header failed (expected %ju, got %ju)", + (uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read); } else { - EPRINTF_MSG_PATH(path, "fread header failed (expected %ld, got %ld)", sizeof(HashHeader), bytes_read); + EPRINTF_MSG_PATH(path, "fread header failed (expected %ju, got %ju)", + (uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read); } goto fail; } @@ -170,7 +172,8 @@ hashindex_read(const char *path) } buckets_length = (off_t)_le32toh(header.num_buckets) * (header.key_size + header.value_size); if(length != sizeof(HashHeader) + buckets_length) { - EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ld, got %ld)", sizeof(HashHeader) + buckets_length, length); + EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ju, got %ju)", + (uintmax_t) sizeof(HashHeader) + buckets_length, (uintmax_t) length); goto fail; } if(!(index = malloc(sizeof(HashIndex)))) { @@ -186,10 +189,12 @@ hashindex_read(const char *path) bytes_read = fread(index->buckets, 1, buckets_length, fd); if(bytes_read != buckets_length) { if(ferror(fd)) { - EPRINTF_PATH(path, "fread buckets failed (expected %ld, got %ld)", buckets_length, bytes_read); + EPRINTF_PATH(path, "fread buckets failed (expected %ju, got %ju)", + (uintmax_t) buckets_length, (uintmax_t) bytes_read); } else { - EPRINTF_MSG_PATH(path, "fread buckets failed (expected %ld, got %ld)", buckets_length, bytes_read); + EPRINTF_MSG_PATH(path, "fread buckets failed (expected %ju, got %ju)", + (uintmax_t) buckets_length, (uintmax_t) bytes_read); } free(index->buckets); free(index); From feff0f0c9421c7487e618eb7f771bbf1a2568603 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 03:15:44 +0200 Subject: [PATCH 07/14] install docs: replace hack for llfuse with proper solution found out why it could not install llfuse into virtual env: it always complained about not being able to find fuse.pc - which is part of libfuse-dev / fuse-devel and was missing. once one adds the fuse dev stuff, llfuse installs to virtual env without problems. --- docs/installation.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 90bd33f84..3cd4e13b6 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -62,13 +62,11 @@ Some of the steps detailled below might be useful also for non-git installs. # if you do not have gcc / make / etc. yet apt-get install build-essential - # optional: lowlevel FUSE py binding - to mount backup archives + # optional: FUSE support - to mount backup archives # in case you get complaints about permission denied on /etc/fuse.conf: # on ubuntu this means your user is not in the "fuse" group. just add # yourself there, log out and log in again. - # if it complains about not being able to find llfuse: make a symlink - # borg-env/lib/python3.4/site-packages/llfuse -> /usr/lib/python3/dist-packages/llfuse - apt-get install python3-llfuse fuse + apt-get install libfuse-dev fuse # optional: for unit testing apt-get install fakeroot @@ -84,6 +82,7 @@ Some of the steps detailled below might be useful also for non-git installs. pip install cython # compile .pyx -> .c pip install tox pytest # optional, for running unit tests pip install sphinx # optional, to build the docs + pip install llfuse # optional, for FUSE support cd borg pip install -e . # in-place editable mode @@ -108,8 +107,8 @@ Some of the steps detailled below might be useful also for non-git installs. # ACL support Headers + Library sudo dnf install libacl-devel libacl - # optional: lowlevel FUSE py binding - to mount backup archives - sudo dnf install python3-llfuse fuse + # optional: FUSE support - to mount backup archives + sudo dnf install fuse-devel fuse # optional: for unit testing sudo dnf install fakeroot @@ -125,6 +124,7 @@ Some of the steps detailled below might be useful also for non-git installs. pip install cython # compile .pyx -> .c pip install tox pytest # optional, for running unit tests pip install sphinx # optional, to build the docs + pip install llfuse # optional, for FUSE support cd borg pip install -e . # in-place editable mode From 4d8949e66a6f0183e50b07d7f68827b86f22641b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 04:09:36 +0200 Subject: [PATCH 08/14] archiver: more tests --- borg/archiver.py | 6 +++--- borg/testsuite/archiver.py | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 38d270647..deed03786 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -859,7 +859,7 @@ def run(self, args=None): return args.func(args) -def sig_info_handler(signum, stack): +def sig_info_handler(signum, stack): # pragma: no cover """search the stack for infos about the currently processed file and print them""" for frame in inspect.getouterframes(stack): func, loc = frame[3], frame[0].f_locals @@ -882,7 +882,7 @@ def sig_info_handler(signum, stack): break -def setup_signal_handlers(): +def setup_signal_handlers(): # pragma: no cover sigs = [] if hasattr(signal, 'SIGUSR1'): sigs.append(signal.SIGUSR1) # kill -USR1 pid @@ -892,7 +892,7 @@ def setup_signal_handlers(): signal.signal(sig, sig_info_handler) -def main(): +def main(): # pragma: no cover # Make sure stdout and stderr have errors='replace') to avoid unicode # issues when print()-ing unicode file names sys.stdout = io.TextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 20e76a7e0..489f3f69f 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -183,7 +183,7 @@ def test_basic_functionality(self): self.create_test_files() self.cmd('init', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') - self.cmd('create', self.repository_location + '::test.2', 'input') + self.cmd('create', '--stats', self.repository_location + '::test.2', 'input') with changedir('output'): self.cmd('extract', self.repository_location + '::test') self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2) @@ -403,7 +403,7 @@ def test_delete(self): self.cmd('extract', '--dry-run', self.repository_location + '::test.2') self.cmd('delete', self.repository_location + '::test') self.cmd('extract', '--dry-run', self.repository_location + '::test.2') - self.cmd('delete', self.repository_location + '::test.2') + self.cmd('delete', '--stats', self.repository_location + '::test.2') # Make sure all data except the manifest has been deleted repository = Repository(self.repository_path) self.assert_equal(len(repository), 1) @@ -470,10 +470,38 @@ def test_prune_repository(self): self.assert_not_in('test1', output) self.assert_in('test2', output) + def test_prune_repository_prefix(self): + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::foo-2015-08-12-10:00', src_dir) + self.cmd('create', self.repository_location + '::foo-2015-08-12-20:00', src_dir) + self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir) + self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir) + output = self.cmd('prune', '-v', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-') + self.assert_in('Keeping archive: foo-2015-08-12-20:00', output) + self.assert_in('Would prune: foo-2015-08-12-10:00', output) + output = self.cmd('list', self.repository_location) + self.assert_in('foo-2015-08-12-10:00', output) + self.assert_in('foo-2015-08-12-20:00', output) + self.assert_in('bar-2015-08-12-10:00', output) + self.assert_in('bar-2015-08-12-20:00', output) + self.cmd('prune', self.repository_location, '--keep-daily=2', '--prefix=foo-') + output = self.cmd('list', self.repository_location) + self.assert_not_in('foo-2015-08-12-10:00', output) + self.assert_in('foo-2015-08-12-20:00', output) + self.assert_in('bar-2015-08-12-10:00', output) + self.assert_in('bar-2015-08-12-20:00', output) + def test_usage(self): self.assert_raises(SystemExit, lambda: self.cmd()) self.assert_raises(SystemExit, lambda: self.cmd('-h')) + def test_help(self): + assert 'Borg' in self.cmd('help') + assert 'patterns' in self.cmd('help', 'patterns') + assert 'Initialize' in self.cmd('help', 'init') + assert 'positional arguments' not in self.cmd('help', 'init', '--epilog-only') + assert 'This command initializes' not in self.cmd('help', 'init', '--usage-only') + @unittest.skipUnless(has_llfuse, 'llfuse not installed') def test_fuse_mount_repository(self): mountpoint = os.path.join(self.tmpdir, 'mountpoint') From 8300efb1dbfe17d9964c68fe790480acbc453e51 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 04:28:31 +0200 Subject: [PATCH 09/14] remote: pragma: no cover for the stuff we can't test --- borg/remote.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/remote.py b/borg/remote.py index 1d7ae84e2..3a274b214 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -28,7 +28,7 @@ class InvalidRPCMethod(Error): """RPC method is not valid""" -class RepositoryServer: +class RepositoryServer: # pragma: no cover rpc_methods = ( '__len__', 'check', @@ -129,7 +129,7 @@ def __init__(self, location, create=False): umask = ['--umask', '%03o' % self.umask] if location.host == '__testsuite__': args = [sys.executable, '-m', 'borg.archiver', 'serve'] + umask + self.extra_test_args - else: + else: # pragma: no cover args = ['ssh'] if location.port: args += ['-p', str(location.port)] From d83b919d52d40af54b5d353e9f408c550f714358 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 12 Aug 2015 11:20:02 +0100 Subject: [PATCH 10/14] Style fix in added code PEP8 says to prefer "is not None" --- attic/repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attic/repository.py b/attic/repository.py index bd10e3212..a926d7a78 100755 --- a/attic/repository.py +++ b/attic/repository.py @@ -478,7 +478,7 @@ def get_fd(self, segment): def delete_segment(self, segment): fd = self.fds.pop(segment) - if fd != None: + if fd is not None: fd.close() try: os.unlink(self.segment_filename(segment)) From 04887439a0261388d0e3088f851299f075a0e4a5 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 12 Aug 2015 11:32:12 +0100 Subject: [PATCH 11/14] recover_segment(): don't assume we have an fd for segment Suggested by @ThomasWaldmann. Avoiding a complex assumption should make the code easier to understand and maintain. (Technically we do have an fd for the segment, because the only caller opens the segment and checks it before calling for repair.) --- attic/repository.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/attic/repository.py b/attic/repository.py index a926d7a78..ad7031654 100755 --- a/attic/repository.py +++ b/attic/repository.py @@ -515,7 +515,9 @@ def iter_objects(self, segment, include_data=False): header = fd.read(self.header_fmt.size) def recover_segment(self, segment, filename): - self.fds.pop(segment).close() + fd = self.fds.pop(segment) + if fd is not None: + fd.close() # FIXME: save a copy of the original file with open(filename, 'rb') as fd: data = memoryview(fd.read()) From 2194d9837e4021370402ead33d5724ceb78b0735 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 16:04:41 +0200 Subject: [PATCH 12/14] update CHANGES --- CHANGES.rst | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index b333ba48a..13dfdb4ce 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,30 @@ Borg Changelog ============== +Version 0.25.0 (not released yet) +--------------------------------- + +Incompatible changes (compared to 0.24): + +- none yet + +Deprecations: + +- none yet + +New features: + +- honor the nodump flag (UF_NODUMP) and do not backup such items + +Bug fixes: + +- close fds of segments we delete (during compaction) + +Other changes: + +- none yet + + Version 0.24.0 -------------- From 04814241289c4febdfb9c497783d1cf3d7a51538 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 16:41:30 +0200 Subject: [PATCH 13/14] fix archiver test to not expect backup of the UF_NODUMP file --- borg/testsuite/archiver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 489f3f69f..eb707ade1 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -187,7 +187,8 @@ def test_basic_functionality(self): with changedir('output'): self.cmd('extract', self.repository_location + '::test') self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2) - self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), 11) + file_count = 10 if has_lchflags else 11 # one file is UF_NODUMP + self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), file_count) self.assert_dirs_equal('input', 'output/input') info_output = self.cmd('info', self.repository_location + '::test') self.assert_in('Number of files: 4', info_output) From 3100fac3617851d4d67096df31f74a96f9fd2e86 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 17:03:30 +0200 Subject: [PATCH 14/14] fix archiver test to not expect backup of the UF_NODUMP file, try 2 --- borg/testsuite/archiver.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index eb707ade1..2ed2f7821 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -187,11 +187,15 @@ def test_basic_functionality(self): with changedir('output'): self.cmd('extract', self.repository_location + '::test') self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2) - file_count = 10 if has_lchflags else 11 # one file is UF_NODUMP - self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), file_count) + item_count = 10 if has_lchflags else 11 # one file is UF_NODUMP + self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), item_count) + if has_lchflags: + # remove the file we did not backup, so input and output become equal + os.remove(os.path.join('input', 'flagfile')) self.assert_dirs_equal('input', 'output/input') info_output = self.cmd('info', self.repository_location + '::test') - self.assert_in('Number of files: 4', info_output) + item_count = 3 if has_lchflags else 4 # one file is UF_NODUMP + self.assert_in('Number of files: %d' % item_count, info_output) shutil.rmtree(self.cache_path) with environment_variable(BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='1'): info_output2 = self.cmd('info', self.repository_location + '::test')