sapling/eden/py/dirstate.py
Adam Simpkins 954d8945b2 refactor CLI subcommand definitions
Summary:
Refactor the Eden CLI command so that all subcommands are implemented as
subclasses.  This helps keep the command line argument definitions together
with the logic for the command.

This is primarily just a code refactoring change, but I did include a few minor
behavioral changes to the help output:
- The command list is now always sorted alphabetically in the help output.
- The "help" subcommand can now show help for more than just one subcommand
  deep.  (e.g., `eden help stats io` now works correctly)
- I made some minor improvements to a few of the help strings.

Reviewed By: chadaustin

Differential Revision: D7673021

fbshipit-source-id: dc4c6db20a0fe7452d38bdafc6273e234dba8e4e
2018-04-19 17:59:51 -07:00

224 lines
8.2 KiB
Python

# Copyright (c) 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import binascii
import hashlib
import struct
from six import iteritems
# Version number for the format of the .hg/dirstate file that is read/written by
# this library.
CURRENT_DIRSTATE_VERSION = 1
# Valid values for the merge state.
MERGE_STATE_NOT_APPLICABLE = 0
MERGE_STATE_BOTH_PARENTS = -1
MERGE_STATE_OTHER_PARENT = -2
def write(file, parents, tuples_dict, copymap):
# type(IO[bytes], Tuple[bytes, bytes], Dict[bytes, Tuple[char, int, byte],
# Dict[bytes, bytes]])
#
# The serialization format of the dirstate is as follows:
# - The first 40 bytes are the hashes of the two parent pointers.
# - The next 4 bytes are the version number of the format.
# - The next section is the dirstate tuples. Each dirstate tuple is
# represented as follows:
# - The first byte is '\x01'.
# - The second byte represents the status. It is the ASCII value of
# 'n', 'm', 'r', 'a', '?', as appropriate.
# - The next four bytes are an unsigned integer representing mode_t.
# - The seventh byte (which is signed) represents the merge state:
# - 0 is NotApplicable
# - -1 is BothParents
# - -2 is OtherParent
# - The next two bytes are an unsigned short representing the length of
# the path, in bytes.
# - The bytes of the path itself. Note that a path cannot contain \0.
# - The next section is the copymap. Each entry in the copymap is
# represented as follows.
# - The first byte is '\x02'.
# - An unsigned short (two bytes) representing the length, followed by
# that number of bytes, which constitutes the relative path name of the
# *destination* of the copy.
# - An unsigned short (two bytes) representing the length, followed by
# that number of bytes, which constitutes the relative path name of the
# *source* of the copy.
# - The last section is the checksum. Although the other tuples can be
# interleaved or reordered without issue, the checksum must come last.
# The checksum is a function of all of the bytes written up to this point
# plus the \xFF header for the checksum section.
# - The first byte is '\xFF' to distinguish it from the other fields.
# - Because we use SHA-256 as the hash algorithm for the checksum, the
# remaining 32 bytes are used for the hash.
sha = hashlib.sha256()
def hashing_write(data):
# type(bytes) -> None
sha.update(data)
file.write(data)
hashing_write(parents[0])
hashing_write(parents[1])
hashing_write(struct.pack('>I', CURRENT_DIRSTATE_VERSION))
for path, dirstate_tuple in iteritems(tuples_dict):
status, mode, merge_state = dirstate_tuple
hashing_write(b'\x01')
hashing_write(struct.pack('>BIb', ord(status), mode, merge_state))
_write_path(hashing_write, path)
for dest, source in iteritems(copymap):
hashing_write(b'\x02')
_write_path(hashing_write, dest)
_write_path(hashing_write, source)
hashing_write(b'\xFF')
# Write the checksum, so we use file.write() instead of hashing_write().
file.write(sha.digest())
def read(fp, filename): # noqa: C901
# type(IO[bytes], string) -> ([bytes, bytes], Dict[str, [...]],
# Dict[str, str])
'''Returns a tuple of (parents, tuples_dict, copymap) if successful.
Any exception from create_file(), such as IOError with errno == ENOENT, will
be bubbled up to the caller.
If contents of the dirstate file do not match the expected format, then a
DirstateParseException will be thrown.
'''
parents = None
tuples_dict = {}
copymap = {}
sha = hashlib.sha256()
def hashing_read(num):
data = fp.read(num)
sha.update(data)
return data
parent_bytes = hashing_read(40)
num_parents_bytes = len(parent_bytes)
if num_parents_bytes != 40:
raise DirstateParseException(
'Reached EOF while reading dirstate parents in {}.\n'.
format(filename)
)
parents = parent_bytes[:20], parent_bytes[20:40]
binary_version = hashing_read(4)
if len(binary_version) != 4:
raise DirstateParseException(
'Reached EOF while reading the version number in {}.\n'.
format(filename)
)
version = struct.unpack('>I', binary_version)[0]
if version != CURRENT_DIRSTATE_VERSION:
raise DirstateParseException(
'Unknown dirstate version in {}. Found {} but expected {}.\n'.
format(filename, version, CURRENT_DIRSTATE_VERSION)
)
while True:
header = hashing_read(1)
if not header:
# We have reached the end of the file.
break
elif header == b'\x01':
scalars = hashing_read(6)
if len(scalars) != 6:
raise DirstateParseException(
'Malformed dirstate tuple in '.format(filename) +
'. Aborting read().\n'
)
path = _read_path(hashing_read, filename)
status, mode, merge = struct.unpack('>BIb', scalars)
# TODO(mbolin): Verify status and merge?
tuples_dict[path] = (chr(status), mode, merge)
elif header == b'\x02':
dest = _read_path(hashing_read, filename)
source = _read_path(hashing_read, filename)
copymap[dest] = source
elif header == b'\xFF':
# Reading the checksum, so we use fp.read() instead of
# hashing_read().
binary_checksum = fp.read(32)
if len(binary_checksum) != 32:
raise DirstateParseException(
'Reached EOF while reading checksum hash in {}.\n'.
format(filename)
)
digest = sha.digest()
if binary_checksum == digest:
if fp.read(1) == b'':
# There is no more data, as expected.
break
else:
raise DirstateParseException(
'Suspicious data is present after '
'the end of the valid checksum in {}.\n'.
format(filename)
)
else:
raise DirstateParseException(
'Checksum mismatch when reading {}. Observed checksum is '
'{}, but the checksum in the file is {}.\n'.format(
filename,
binascii.hexlify(digest),
binascii.hexlify(binary_checksum)
)
)
else:
raise DirstateParseException(
'Unexpected header byte '
'when reading {}: 0x{0:x}.'.format(filename, header) +
' Ignoring remaining dirstate data.\n'
)
return parents, tuples_dict, copymap
def _write_path(writer, path):
# type(Callable[[bytes], None], bytes) -> None
writer(struct.pack('>H', len(path)))
writer(path)
def _read_path(reader, filename):
# type(Callable[[int], bytes], str, Callable[[str], None]) -> str
binary_path_len = reader(2)
if len(binary_path_len) != 2:
raise DirstateParseException(
'Reached EOF while reading path length in {}.\n'.format(filename)
)
path_len = struct.unpack('>H', binary_path_len)[0]
path = reader(path_len)
if len(path) == path_len:
if isinstance(path, str):
# Python 2.
return path
else:
# Python 3
return str(path, 'utf8')
else:
raise DirstateParseException(
'Reached EOF while reading path in {}.\n'.format(filename)
)
class DirstateParseException(Exception):
pass