Redo borg list

- This is compatible except for {formatkeys}, which has been replaced
  by "borg list --help"
- --list-format is deprecated, use --format instead
  (using deprecated arguments will print a warning and an exit code of 1)
- borg list now supports the usual [PATH [PATHS…]] syntax and excludes
- Additional keys: csize, num_chunks, unique_chunks, NUL
- Supports guaranteed_available hashlib hashes
  (to avoid varying functionality depending on environment)
  (also, the other hashes are really obscure, like MD-4)
This commit is contained in:
Marian Beermann 2016-03-17 17:32:23 +01:00
parent 220d44b2b8
commit 4151db270c
4 changed files with 244 additions and 85 deletions

View File

@ -16,12 +16,12 @@
import traceback
from . import __version__
from .helpers import Error, location_validator, archivename_validator, format_line, format_time, format_file_size, \
parse_pattern, PathPrefixPattern, to_localtime, timestamp, safe_timestamp, \
from .helpers import Error, location_validator, archivename_validator, format_time, format_file_size, \
parse_pattern, PathPrefixPattern, to_localtime, timestamp, \
get_cache_dir, prune_within, prune_split, \
Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \
EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher
dir_is_tagged, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \
EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ItemFormatter
from .logger import create_logger, setup_logging
logger = create_logger()
from .compress import Compressor, COMPR_BUFFER
@ -585,79 +585,29 @@ def do_list(self, args):
repository = self.open_repository(args)
manifest, key = Manifest.load(repository)
if args.location.archive:
archive = Archive(repository, key, manifest, args.location.archive)
"""use_user_format flag is used to speed up default listing.
When user issues format options, listing is a bit slower, but more keys are available and
precalculated.
"""
use_user_format = args.listformat is not None
if use_user_format:
list_format = args.listformat
elif args.short:
list_format = "{path}{LF}"
else:
list_format = "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{LF}"
matcher, _ = self.build_matcher(args.excludes, args.paths)
for item in archive.iter_items():
mode = stat.filemode(item[b'mode'])
type = mode[0]
size = 0
if type == '-':
try:
size = sum(size for _, size, _ in item[b'chunks'])
except KeyError:
pass
with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
archive = Archive(repository, key, manifest, args.location.archive, cache=cache)
mtime = safe_timestamp(item[b'mtime'])
if use_user_format:
atime = safe_timestamp(item.get(b'atime') or item[b'mtime'])
ctime = safe_timestamp(item.get(b'ctime') or item[b'mtime'])
if b'source' in item:
source = item[b'source']
if type == 'l':
extra = ' -> %s' % item[b'source']
else:
mode = 'h' + mode[1:]
extra = ' link to %s' % item[b'source']
if args.format:
format = args.format
elif args.short:
format = "{path}{NL}"
else:
extra = ''
source = ''
item_data = {
'mode': mode,
'user': item[b'user'] or item[b'uid'],
'group': item[b'group'] or item[b'gid'],
'size': size,
'isomtime': format_time(mtime),
'path': remove_surrogates(item[b'path']),
'extra': extra,
'LF': '\n',
}
if use_user_format:
item_data_advanced = {
'bmode': item[b'mode'],
'type': type,
'source': source,
'linktarget': source,
'uid': item[b'uid'],
'gid': item[b'gid'],
'mtime': mtime,
'isoctime': format_time(ctime),
'ctime': ctime,
'isoatime': format_time(atime),
'atime': atime,
'archivename': archive.name,
'SPACE': ' ',
'TAB': '\t',
'CR': '\r',
'NEWLINE': os.linesep,
}
item_data.update(item_data_advanced)
item_data['formatkeys'] = list(item_data.keys())
print(format_line(list_format, item_data), end='')
format = "{mode} {user:6} {group:6} {size:8} {isomtime} {path}{extra}{NL}"
formatter = ItemFormatter(archive, format)
if not hasattr(sys.stdout, 'buffer'):
# This is a shim for supporting unit tests replacing sys.stdout with e.g. StringIO,
# which doesn't have an underlying buffer (= lower file object).
def write(bytestring):
sys.stdout.write(bytestring.decode('utf-8', errors='replace'))
else:
write = sys.stdout.buffer.write
for item in archive.iter_items(lambda item: matcher.match(item[b'path'])):
write(formatter.format_item(item).encode('utf-8', errors='surrogateescape'))
repository.close()
else:
for archive_info in manifest.list_archive_infos(sort_by='ts'):
if args.prefix and not archive_info.name.startswith(args.prefix):
@ -944,12 +894,13 @@ def do_help(self, parser, commands, args):
def preprocess_args(self, args):
deprecations = [
# ('--old', '--new', 'Warning: "--old" has been deprecated. Use "--new" instead.'),
('--list-format', '--format', 'Warning: "--list-format" has been deprecated. Use "--format" instead.'),
]
for i, arg in enumerate(args[:]):
for old_name, new_name, warning in deprecations:
if arg.startswith(old_name):
args[i] = arg.replace(old_name, new_name)
print(warning)
self.print_warning(warning)
return args
def build_parser(self, args=None, prog=None):
@ -1322,7 +1273,12 @@ def build_parser(self, args=None, prog=None):
list_epilog = textwrap.dedent("""
This command lists the contents of a repository or an archive.
""")
See the "borg help patterns" command for more help on exclude patterns.
The following keys are available for --format:
""") + ItemFormatter.keys_help()
subparser = subparsers.add_parser('list', parents=[common_parser],
description=self.do_list.__doc__,
epilog=list_epilog,
@ -1332,15 +1288,22 @@ def build_parser(self, args=None, prog=None):
subparser.add_argument('--short', dest='short',
action='store_true', default=False,
help='only print file/directory names, nothing else')
subparser.add_argument('--list-format', dest='listformat', type=str,
help="""specify format for archive file listing
(default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}")
Special "{formatkeys}" exists to list available keys""")
subparser.add_argument('--format', '--list-format', dest='format', type=str,
help="""specify format for file listing
(default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""")
subparser.add_argument('-P', '--prefix', dest='prefix', type=str,
help='only consider archive names starting with this prefix')
subparser.add_argument('-e', '--exclude', dest='excludes',
type=parse_pattern, action='append',
metavar="PATTERN", help='exclude paths matching PATTERN')
subparser.add_argument('--exclude-from', dest='exclude_files',
type=argparse.FileType('r'), action='append',
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
type=location_validator(),
help='repository/archive to list contents of')
subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
help='paths to extract; patterns are supported')
mount_epilog = textwrap.dedent("""
This command mounts an archive as a FUSE filesystem. This can be useful for

View File

@ -1,8 +1,9 @@
import argparse
from binascii import hexlify
from collections import namedtuple
from functools import wraps
from functools import wraps, partial
import grp
import hashlib
import os
import stat
import textwrap
@ -10,6 +11,7 @@
import re
from shutil import get_terminal_size
import sys
from string import Formatter
import platform
import time
import unicodedata
@ -548,6 +550,20 @@ def dir_is_tagged(path, exclude_caches, exclude_if_present):
return tag_paths
def partial_format(format, mapping):
"""
Apply format.format_map(mapping) while preserving unknown keys
Does not support attribute access, indexing and ![rsa] conversions
"""
for key, value in mapping.items():
key = re.escape(key)
format = re.sub(r'(?<!\{)((\{%s\})|(\{%s:[^\}]*\}))' % (key, key),
lambda match: match.group(1).format_map(mapping),
format)
return format
def format_line(format, data):
# TODO: Filter out unwanted properties of str.format(), because "format" is user provided.
@ -556,7 +572,7 @@ def format_line(format, data):
except (KeyError, ValueError) as e:
# this should catch format errors
print('Error in lineformat: "{}" - reason "{}"'.format(format, str(e)))
except:
except Exception as e:
# something unexpected, print error and raise exception
print('Error in lineformat: "{}" - reason "{}"'.format(format, str(e)))
raise
@ -1090,3 +1106,141 @@ def log_multi(*msgs, level=logging.INFO):
lines.extend(msg.splitlines())
for line in lines:
logger.log(level, line)
class ItemFormatter:
FIXED_KEYS = {
# Formatting aids
'LF': '\n',
'SPACE': ' ',
'TAB': '\t',
'CR': '\r',
'NUL': '\0',
'NEWLINE': os.linesep,
'NL': os.linesep,
}
KEY_DESCRIPTIONS = {
'NEWLINE': 'OS dependent line separator',
'NL': 'alias of NEWLINE',
'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
'csize': 'compressed size',
'bpath': 'verbatim POSIX path, can contain any character except NUL',
'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
'source': 'link target for links (identical to linktarget)',
'num_chunks': 'number of chunks in this file',
'unique_chunks': 'number of unique chunks in this file',
}
@classmethod
def available_keys(cls):
class FakeArchive:
fpr = name = ""
fake_item = {
b'mode': 0, b'path': '', b'user': '', b'group': '', b'mtime': 0,
b'uid': 0, b'gid': 0,
}
formatter = cls(FakeArchive, "")
keys = []
keys.extend(formatter.call_keys.keys())
keys.extend(formatter.get_item_data(fake_item).keys())
return sorted(keys, key=lambda s: (s.isupper(), s))
@classmethod
def keys_help(cls):
help = []
for key in cls.available_keys():
text = " - " + key
if key in cls.KEY_DESCRIPTIONS:
text += ": " + cls.KEY_DESCRIPTIONS[key]
help.append(text)
return "\n".join(help)
def __init__(self, archive, format):
self.archive = archive
static_keys = {
'archivename': archive.name,
'archiveid': archive.fpr,
}
static_keys.update(self.FIXED_KEYS)
self.format = partial_format(format, static_keys)
self.format_keys = {f[1] for f in Formatter().parse(format)}
self.call_keys = {
'size': self.calculate_size,
'csize': self.calculate_csize,
'num_chunks': self.calculate_num_chunks,
'unique_chunks': self.calculate_unique_chunks,
'isomtime': partial(self.format_time, b'mtime'),
'isoctime': partial(self.format_time, b'ctime'),
'isoatime': partial(self.format_time, b'atime'),
'mtime': partial(self.time, b'mtime'),
'ctime': partial(self.time, b'ctime'),
'atime': partial(self.time, b'atime'),
}
for hash_function in hashlib.algorithms_guaranteed:
self.add_key(hash_function, partial(self.hash_item, hash_function))
self.used_call_keys = set(self.call_keys) & self.format_keys
self.item_data = static_keys
def add_key(self, key, callable_with_item):
self.call_keys[key] = callable_with_item
self.used_call_keys = set(self.call_keys) & self.format_keys
def get_item_data(self, item):
mode = stat.filemode(item[b'mode'])
item_type = mode[0]
item_data = self.item_data
source = item.get(b'source', '')
extra = ''
if source:
source = remove_surrogates(source)
if item_type == 'l':
extra = ' -> %s' % source
else:
mode = 'h' + mode[1:]
extra = ' link to %s' % source
item_data['type'] = item_type
item_data['mode'] = mode
item_data['user'] = item[b'user'] or item[b'uid']
item_data['group'] = item[b'group'] or item[b'gid']
item_data['uid'] = item[b'uid']
item_data['gid'] = item[b'gid']
item_data['path'] = remove_surrogates(item[b'path'])
item_data['bpath'] = item[b'path']
item_data['source'] = source
item_data['linktarget'] = source
item_data['extra'] = extra
for key in self.used_call_keys:
item_data[key] = self.call_keys[key](item)
return item_data
def format_item(self, item):
return self.format.format_map(self.get_item_data(item))
def calculate_num_chunks(self, item):
return len(item.get(b'chunks', []))
def calculate_unique_chunks(self, item):
chunk_index = self.archive.cache.chunks
return sum(1 for chunk_id, _, _ in item.get(b'chunks', []) if chunk_index[chunk_id][0] == 1)
def calculate_size(self, item):
return sum(size for _, size, _ in item.get(b'chunks', []))
def calculate_csize(self, item):
return sum(csize for _, _, csize in item.get(b'chunks', []))
def hash_item(self, hash_function, item):
if b'chunks' not in item:
return ""
hash = hashlib.new(hash_function)
for chunk in self.archive.pipeline.fetch_many([c[0] for c in item[b'chunks']]):
hash.update(chunk)
return hash.hexdigest()
def format_time(self, key, item):
return format_time(safe_timestamp(item.get(key) or item[b'mtime']))
def time(self, key, item):
return safe_timestamp(item.get(key) or item[b'mtime'])

View File

@ -892,16 +892,50 @@ def test_list_prefix(self):
self.assert_in('test-2', output)
self.assert_not_in('something-else', output)
def test_list_list_format(self):
def test_list_format(self):
self.cmd('init', self.repository_location)
test_archive = self.repository_location + '::test'
self.cmd('create', test_archive, src_dir)
self.cmd('list', '--list-format', '-', test_archive, exit_code=1)
self.archiver.exit_code = 0 # reset exit code for following tests
output_1 = self.cmd('list', test_archive)
output_2 = self.cmd('list', '--list-format', '{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}', test_archive)
output_3 = self.cmd('list', '--list-format', '{mtime:%s} {path}{NL}', test_archive)
output_2 = self.cmd('list', '--format', '{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}', test_archive)
output_3 = self.cmd('list', '--format', '{mtime:%s} {path}{NL}', test_archive)
self.assertEqual(output_1, output_2)
self.assertNotEqual(output_1, output_3)
def test_list_hash(self):
self.create_regular_file('empty_file', size=0)
self.create_regular_file('amb', contents=b'a' * 1000000)
self.cmd('init', self.repository_location)
test_archive = self.repository_location + '::test'
self.cmd('create', test_archive, 'input')
output = self.cmd('list', '--format', '{sha256} {path}{NL}', test_archive)
assert "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0 input/amb" in output
assert "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 input/empty_file" in output
def test_list_chunk_counts(self):
self.create_regular_file('empty_file', size=0)
self.create_regular_file('two_chunks')
with open(os.path.join(self.input_path, 'two_chunks'), 'wb') as fd:
fd.write(b'abba' * 2000000)
fd.write(b'baab' * 2000000)
self.cmd('init', self.repository_location)
test_archive = self.repository_location + '::test'
self.cmd('create', test_archive, 'input')
output = self.cmd('list', '--format', '{num_chunks} {unique_chunks} {path}{NL}', test_archive)
assert "0 0 input/empty_file" in output
assert "2 2 input/two_chunks" in output
def test_list_size(self):
self.create_regular_file('compressible_file', size=10000)
self.cmd('init', self.repository_location)
test_archive = self.repository_location + '::test'
self.cmd('create', '-C', 'lz4', test_archive, 'input')
output = self.cmd('list', '--format', '{size} {csize} {path}{NL}', test_archive)
size, csize, path = output.split("\n")[1].split(" ")
assert int(csize) < int(size)
def test_break_lock(self):
self.cmd('init', self.repository_location)
self.cmd('break-lock', self.repository_location)

View File

@ -15,7 +15,7 @@
yes, TRUISH, FALSISH, DEFAULTISH, \
StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \
ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \
PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format
from . import BaseTestCase, environment_variable, FakeInputs
@ -877,3 +877,11 @@ def test_progress_endless_step(capfd):
pi.show()
out, err = capfd.readouterr()
assert err == '.'
def test_partial_format():
assert partial_format('{space:10}', {'space': ' '}) == ' ' * 10
assert partial_format('{foobar}', {'bar': 'wrong', 'foobar': 'correct'}) == 'correct'
assert partial_format('{unknown_key}', {}) == '{unknown_key}'
assert partial_format('{key}{{escaped_key}}', {}) == '{key}{{escaped_key}}'
assert partial_format('{{escaped_key}}', {'escaped_key': 1234}) == '{{escaped_key}}'