Redo borg list

- This is compatible except for {formatkeys}, which has been replaced by "borg list --help" - --list-format is deprecated, use --format instead (using deprecated arguments will print a warning and an exit code of 1) - borg list now supports the usual [PATH [PATHS…]] syntax and excludes - Additional keys: csize, num_chunks, unique_chunks, NUL - Supports guaranteed_available hashlib hashes (to avoid varying functionality depending on environment) (also, the other hashes are really obscure, like MD-4)
2024-10-06 00:37:15 +03:00 · 2016-03-17 17:32:23 +01:00 · 2016-03-17 17:32:23 +01:00 · 4151db270c
commit 4151db270c
parent 220d44b2b8
4 changed files with 244 additions and 85 deletions
--- a/borg/archiver.py
+++ b/borg/archiver.py
@ -16,12 +16,12 @@
 import traceback

 from . import __version__
-from .helpers import Error, location_validator, archivename_validator, format_line, format_time, format_file_size, \
-    parse_pattern, PathPrefixPattern, to_localtime, timestamp, safe_timestamp, \
+from .helpers import Error, location_validator, archivename_validator, format_time, format_file_size, \
+    parse_pattern, PathPrefixPattern, to_localtime, timestamp, \
    get_cache_dir, prune_within, prune_split, \
    Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
-    dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \
-    EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher
+    dir_is_tagged, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \
+    EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ItemFormatter
 from .logger import create_logger, setup_logging
 logger = create_logger()
 from .compress import Compressor, COMPR_BUFFER
@ -585,79 +585,29 @@ def do_list(self, args):
        repository = self.open_repository(args)
        manifest, key = Manifest.load(repository)
        if args.location.archive:
-            archive = Archive(repository, key, manifest, args.location.archive)
-            """use_user_format flag is used to speed up default listing.
-            When user issues format options, listing is a bit slower, but more keys are available and
-            precalculated.
-            """
-            use_user_format = args.listformat is not None
-            if use_user_format:
-                list_format = args.listformat
-            elif args.short:
-                list_format = "{path}{LF}"
-            else:
-                list_format = "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{LF}"
+            matcher, _ = self.build_matcher(args.excludes, args.paths)

-            for item in archive.iter_items():
-                mode = stat.filemode(item[b'mode'])
-                type = mode[0]
-                size = 0
-                if type == '-':
-                    try:
-                        size = sum(size for _, size, _ in item[b'chunks'])
-                    except KeyError:
-                        pass
+            with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
+                archive = Archive(repository, key, manifest, args.location.archive, cache=cache)

-                mtime = safe_timestamp(item[b'mtime'])
-                if use_user_format:
-                    atime = safe_timestamp(item.get(b'atime') or item[b'mtime'])
-                    ctime = safe_timestamp(item.get(b'ctime') or item[b'mtime'])
-
-                if b'source' in item:
-                    source = item[b'source']
-                    if type == 'l':
-                        extra = ' -> %s' % item[b'source']
-                    else:
-                        mode = 'h' + mode[1:]
-                        extra = ' link to %s' % item[b'source']
+                if args.format:
+                    format = args.format
+                elif args.short:
+                    format = "{path}{NL}"
                else:
-                    extra = ''
-                    source = ''
-
-                item_data = {
-                        'mode': mode,
-                        'user': item[b'user'] or item[b'uid'],
-                        'group': item[b'group'] or item[b'gid'],
-                        'size': size,
-                        'isomtime': format_time(mtime),
-                        'path': remove_surrogates(item[b'path']),
-                        'extra': extra,
-                        'LF': '\n',
-                        }
-                if use_user_format:
-                    item_data_advanced = {
-                        'bmode': item[b'mode'],
-                        'type': type,
-                        'source': source,
-                        'linktarget': source,
-                        'uid': item[b'uid'],
-                        'gid': item[b'gid'],
-                        'mtime': mtime,
-                        'isoctime': format_time(ctime),
-                        'ctime': ctime,
-                        'isoatime': format_time(atime),
-                        'atime': atime,
-                        'archivename': archive.name,
-                        'SPACE': ' ',
-                        'TAB': '\t',
-                        'CR': '\r',
-                        'NEWLINE': os.linesep,
-                        }
-                    item_data.update(item_data_advanced)
-                item_data['formatkeys'] = list(item_data.keys())
-
-                print(format_line(list_format, item_data), end='')
+                    format = "{mode} {user:6} {group:6} {size:8} {isomtime} {path}{extra}{NL}"
+                formatter = ItemFormatter(archive, format)

+                if not hasattr(sys.stdout, 'buffer'):
+                    # This is a shim for supporting unit tests replacing sys.stdout with e.g. StringIO,
+                    # which doesn't have an underlying buffer (= lower file object).
+                    def write(bytestring):
+                        sys.stdout.write(bytestring.decode('utf-8', errors='replace'))
+                else:
+                    write = sys.stdout.buffer.write
+                for item in archive.iter_items(lambda item: matcher.match(item[b'path'])):
+                    write(formatter.format_item(item).encode('utf-8', errors='surrogateescape'))
+            repository.close()
        else:
            for archive_info in manifest.list_archive_infos(sort_by='ts'):
                if args.prefix and not archive_info.name.startswith(args.prefix):
@ -944,12 +894,13 @@ def do_help(self, parser, commands, args):
    def preprocess_args(self, args):
        deprecations = [
            # ('--old', '--new', 'Warning: "--old" has been deprecated. Use "--new" instead.'),
+            ('--list-format', '--format', 'Warning: "--list-format" has been deprecated. Use "--format" instead.'),
        ]
        for i, arg in enumerate(args[:]):
            for old_name, new_name, warning in deprecations:
                if arg.startswith(old_name):
                    args[i] = arg.replace(old_name, new_name)
-                    print(warning)
+                    self.print_warning(warning)
        return args

    def build_parser(self, args=None, prog=None):
@ -1322,7 +1273,12 @@ def build_parser(self, args=None, prog=None):

        list_epilog = textwrap.dedent("""
        This command lists the contents of a repository or an archive.
-        """)
+
+        See the "borg help patterns" command for more help on exclude patterns.
+
+        The following keys are available for --format:
+
+        """) + ItemFormatter.keys_help()
        subparser = subparsers.add_parser('list', parents=[common_parser],
                                          description=self.do_list.__doc__,
                                          epilog=list_epilog,
@ -1332,15 +1288,22 @@ def build_parser(self, args=None, prog=None):
        subparser.add_argument('--short', dest='short',
                               action='store_true', default=False,
                               help='only print file/directory names, nothing else')
-        subparser.add_argument('--list-format', dest='listformat', type=str,
-                               help="""specify format for archive file listing
-                                (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}")
-                                Special "{formatkeys}" exists to list available keys""")
+        subparser.add_argument('--format', '--list-format', dest='format', type=str,
+                               help="""specify format for file listing
+                                (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""")
        subparser.add_argument('-P', '--prefix', dest='prefix', type=str,
                               help='only consider archive names starting with this prefix')
+        subparser.add_argument('-e', '--exclude', dest='excludes',
+                               type=parse_pattern, action='append',
+                               metavar="PATTERN", help='exclude paths matching PATTERN')
+        subparser.add_argument('--exclude-from', dest='exclude_files',
+                               type=argparse.FileType('r'), action='append',
+                               metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
        subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
                               type=location_validator(),
                               help='repository/archive to list contents of')
+        subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
+                               help='paths to extract; patterns are supported')

        mount_epilog = textwrap.dedent("""
        This command mounts an archive as a FUSE filesystem. This can be useful for
--- a/borg/helpers.py
+++ b/borg/helpers.py
@ -1,8 +1,9 @@
 import argparse
 from binascii import hexlify
 from collections import namedtuple
-from functools import wraps
+from functools import wraps, partial
 import grp
+import hashlib
 import os
 import stat
 import textwrap
@ -10,6 +11,7 @@
 import re
 from shutil import get_terminal_size
 import sys
+from string import Formatter
 import platform
 import time
 import unicodedata
@ -548,6 +550,20 @@ def dir_is_tagged(path, exclude_caches, exclude_if_present):
    return tag_paths


+def partial_format(format, mapping):
+    """
+    Apply format.format_map(mapping) while preserving unknown keys
+
+    Does not support attribute access, indexing and ![rsa] conversions
+    """
+    for key, value in mapping.items():
+        key = re.escape(key)
+        format = re.sub(r'(?<!\{)((\{%s\})|(\{%s:[^\}]*\}))' % (key, key),
+                        lambda match: match.group(1).format_map(mapping),
+                        format)
+    return format
+
+
 def format_line(format, data):
    # TODO: Filter out unwanted properties of str.format(), because "format" is user provided.

@ -556,7 +572,7 @@ def format_line(format, data):
    except (KeyError, ValueError) as e:
        # this should catch format errors
        print('Error in lineformat: "{}" - reason "{}"'.format(format, str(e)))
-    except:
+    except Exception as e:
        # something unexpected, print error and raise exception
        print('Error in lineformat: "{}" - reason "{}"'.format(format, str(e)))
        raise
@ -1090,3 +1106,141 @@ def log_multi(*msgs, level=logging.INFO):
        lines.extend(msg.splitlines())
    for line in lines:
        logger.log(level, line)
+
+
+class ItemFormatter:
+    FIXED_KEYS = {
+        # Formatting aids
+        'LF': '\n',
+        'SPACE': ' ',
+        'TAB': '\t',
+        'CR': '\r',
+        'NUL': '\0',
+        'NEWLINE': os.linesep,
+        'NL': os.linesep,
+    }
+    KEY_DESCRIPTIONS = {
+        'NEWLINE': 'OS dependent line separator',
+        'NL': 'alias of NEWLINE',
+        'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
+        'csize': 'compressed size',
+        'bpath': 'verbatim POSIX path, can contain any character except NUL',
+        'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
+        'source': 'link target for links (identical to linktarget)',
+        'num_chunks': 'number of chunks in this file',
+        'unique_chunks': 'number of unique chunks in this file',
+    }
+
+    @classmethod
+    def available_keys(cls):
+        class FakeArchive:
+            fpr = name = ""
+
+        fake_item = {
+            b'mode': 0, b'path': '', b'user': '', b'group': '', b'mtime': 0,
+            b'uid': 0, b'gid': 0,
+        }
+        formatter = cls(FakeArchive, "")
+        keys = []
+        keys.extend(formatter.call_keys.keys())
+        keys.extend(formatter.get_item_data(fake_item).keys())
+        return sorted(keys, key=lambda s: (s.isupper(), s))
+
+    @classmethod
+    def keys_help(cls):
+        help = []
+        for key in cls.available_keys():
+            text = " - " + key
+            if key in cls.KEY_DESCRIPTIONS:
+                text += ": " + cls.KEY_DESCRIPTIONS[key]
+            help.append(text)
+        return "\n".join(help)
+
+    def __init__(self, archive, format):
+        self.archive = archive
+        static_keys = {
+            'archivename': archive.name,
+            'archiveid': archive.fpr,
+        }
+        static_keys.update(self.FIXED_KEYS)
+        self.format = partial_format(format, static_keys)
+        self.format_keys = {f[1] for f in Formatter().parse(format)}
+        self.call_keys = {
+            'size': self.calculate_size,
+            'csize': self.calculate_csize,
+            'num_chunks': self.calculate_num_chunks,
+            'unique_chunks': self.calculate_unique_chunks,
+            'isomtime': partial(self.format_time, b'mtime'),
+            'isoctime': partial(self.format_time, b'ctime'),
+            'isoatime': partial(self.format_time, b'atime'),
+            'mtime': partial(self.time, b'mtime'),
+            'ctime': partial(self.time, b'ctime'),
+            'atime': partial(self.time, b'atime'),
+        }
+        for hash_function in hashlib.algorithms_guaranteed:
+            self.add_key(hash_function, partial(self.hash_item, hash_function))
+        self.used_call_keys = set(self.call_keys) & self.format_keys
+        self.item_data = static_keys
+
+    def add_key(self, key, callable_with_item):
+        self.call_keys[key] = callable_with_item
+        self.used_call_keys = set(self.call_keys) & self.format_keys
+
+    def get_item_data(self, item):
+        mode = stat.filemode(item[b'mode'])
+        item_type = mode[0]
+        item_data = self.item_data
+
+        source = item.get(b'source', '')
+        extra = ''
+        if source:
+            source = remove_surrogates(source)
+            if item_type == 'l':
+                extra = ' -> %s' % source
+            else:
+                mode = 'h' + mode[1:]
+                extra = ' link to %s' % source
+        item_data['type'] = item_type
+        item_data['mode'] = mode
+        item_data['user'] = item[b'user'] or item[b'uid']
+        item_data['group'] = item[b'group'] or item[b'gid']
+        item_data['uid'] = item[b'uid']
+        item_data['gid'] = item[b'gid']
+        item_data['path'] = remove_surrogates(item[b'path'])
+        item_data['bpath'] = item[b'path']
+        item_data['source'] = source
+        item_data['linktarget'] = source
+        item_data['extra'] = extra
+        for key in self.used_call_keys:
+            item_data[key] = self.call_keys[key](item)
+        return item_data
+
+    def format_item(self, item):
+        return self.format.format_map(self.get_item_data(item))
+
+    def calculate_num_chunks(self, item):
+        return len(item.get(b'chunks', []))
+
+    def calculate_unique_chunks(self, item):
+        chunk_index = self.archive.cache.chunks
+        return sum(1 for chunk_id, _, _ in item.get(b'chunks', []) if chunk_index[chunk_id][0] == 1)
+
+    def calculate_size(self, item):
+        return sum(size for _, size, _ in item.get(b'chunks', []))
+
+    def calculate_csize(self, item):
+        return sum(csize for _, _, csize in item.get(b'chunks', []))
+
+    def hash_item(self, hash_function, item):
+        if b'chunks' not in item:
+            return ""
+        hash = hashlib.new(hash_function)
+        for chunk in self.archive.pipeline.fetch_many([c[0] for c in item[b'chunks']]):
+            hash.update(chunk)
+        return hash.hexdigest()
+
+    def format_time(self, key, item):
+        return format_time(safe_timestamp(item.get(key) or item[b'mtime']))
+
+    def time(self, key, item):
+        return safe_timestamp(item.get(key) or item[b'mtime'])
--- a/borg/testsuite/archiver.py
+++ b/borg/testsuite/archiver.py
@ -892,16 +892,50 @@ def test_list_prefix(self):
        self.assert_in('test-2', output)
        self.assert_not_in('something-else', output)

-    def test_list_list_format(self):
+    def test_list_format(self):
        self.cmd('init', self.repository_location)
        test_archive = self.repository_location + '::test'
        self.cmd('create', test_archive, src_dir)
+        self.cmd('list', '--list-format', '-', test_archive, exit_code=1)
+        self.archiver.exit_code = 0  # reset exit code for following tests
        output_1 = self.cmd('list', test_archive)
-        output_2 = self.cmd('list', '--list-format', '{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}', test_archive)
-        output_3 = self.cmd('list', '--list-format', '{mtime:%s} {path}{NL}', test_archive)
+        output_2 = self.cmd('list', '--format', '{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}', test_archive)
+        output_3 = self.cmd('list', '--format', '{mtime:%s} {path}{NL}', test_archive)
        self.assertEqual(output_1, output_2)
        self.assertNotEqual(output_1, output_3)

+    def test_list_hash(self):
+        self.create_regular_file('empty_file', size=0)
+        self.create_regular_file('amb', contents=b'a' * 1000000)
+        self.cmd('init', self.repository_location)
+        test_archive = self.repository_location + '::test'
+        self.cmd('create', test_archive, 'input')
+        output = self.cmd('list', '--format', '{sha256} {path}{NL}', test_archive)
+        assert "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0 input/amb" in output
+        assert "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 input/empty_file" in output
+
+    def test_list_chunk_counts(self):
+        self.create_regular_file('empty_file', size=0)
+        self.create_regular_file('two_chunks')
+        with open(os.path.join(self.input_path, 'two_chunks'), 'wb') as fd:
+            fd.write(b'abba' * 2000000)
+            fd.write(b'baab' * 2000000)
+        self.cmd('init', self.repository_location)
+        test_archive = self.repository_location + '::test'
+        self.cmd('create', test_archive, 'input')
+        output = self.cmd('list', '--format', '{num_chunks} {unique_chunks} {path}{NL}', test_archive)
+        assert "0 0 input/empty_file" in output
+        assert "2 2 input/two_chunks" in output
+
+    def test_list_size(self):
+        self.create_regular_file('compressible_file', size=10000)
+        self.cmd('init', self.repository_location)
+        test_archive = self.repository_location + '::test'
+        self.cmd('create', '-C', 'lz4', test_archive, 'input')
+        output = self.cmd('list', '--format', '{size} {csize} {path}{NL}', test_archive)
+        size, csize, path = output.split("\n")[1].split(" ")
+        assert int(csize) < int(size)
+
    def test_break_lock(self):
        self.cmd('init', self.repository_location)
        self.cmd('break-lock', self.repository_location)
--- a/borg/testsuite/helpers.py
+++ b/borg/testsuite/helpers.py
@ -15,7 +15,7 @@
    yes, TRUISH, FALSISH, DEFAULTISH, \
    StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \
    ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \
-    PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
+    PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format
 from . import BaseTestCase, environment_variable, FakeInputs


@ -877,3 +877,11 @@ def test_progress_endless_step(capfd):
    pi.show()
    out, err = capfd.readouterr()
    assert err == '.'
+
+
+def test_partial_format():
+    assert partial_format('{space:10}', {'space': ' '}) == ' ' * 10
+    assert partial_format('{foobar}', {'bar': 'wrong', 'foobar': 'correct'}) == 'correct'
+    assert partial_format('{unknown_key}', {}) == '{unknown_key}'
+    assert partial_format('{key}{{escaped_key}}', {}) == '{key}{{escaped_key}}'
+    assert partial_format('{{escaped_key}}', {'escaped_key': 1234}) == '{{escaped_key}}'