update eden doctor to detect broken commit hash

Summary: Update the eden doctor to detect if the commit hash in the SNAPSHOT and dirstate file is valid or not, if invalid, try to fix it

Reviewed By: strager

Differential Revision: D10156841

fbshipit-source-id: 447a508604b87203ee8f1b0cf0b04f9d49752c9d
This commit is contained in:
Shuguang Ye 2018-10-11 12:29:56 -07:00 committed by Facebook Github Bot
parent ce30b1e029
commit 9cfc64124b
2 changed files with 291 additions and 7 deletions

View File

@ -26,6 +26,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple
import eden.dirstate
import facebook.eden.ttypes as eden_ttypes
from thrift.Thrift import TApplicationException
from . import config as config_mod, filesystem, mtab, ui, version
from .config import EdenInstance
@ -843,7 +844,7 @@ def check_snapshot_dirstate_consistency(
dirstate = os.path.join(path, ".hg", "dirstate")
try:
with open(dirstate, "rb") as f:
parents, _tuples_dict, _copymap = eden.dirstate.read(f, dirstate)
parents, tuples_dict, copymap = eden.dirstate.read(f, dirstate)
except OSError as ex:
if ex.errno == errno.ENOENT:
tracker.add_problem(MissingHgDirectory(path))
@ -852,10 +853,117 @@ def check_snapshot_dirstate_consistency(
return
p1_hex = binascii.hexlify(parents[0]).decode("utf-8")
if snapshot_hex != p1_hex:
p2_hex = binascii.hexlify(parents[1]).decode("utf-8")
is_p2_hex_valid = True
try:
is_snapshot_hex_valid = is_commit_hash_valid(instance, path, snapshot_hex)
is_p1_hex_valid = is_commit_hash_valid(instance, path, p1_hex)
if p2_hex != (20 * b"\0"):
is_p2_hex_valid = is_commit_hash_valid(instance, path, p2_hex)
except Exception as ex:
tracker.add_problem(
SnapshotMismatchError(instance, path, snapshot_hex, parents)
Problem(f"Unable to get scm status for path {path}:\n {ex}")
)
return
if is_p2_hex_valid is not True:
p2_hex = "0000000000000000000000000000000000000000"
if snapshot_hex != p1_hex:
if is_p1_hex_valid:
new_parents = (binascii.unhexlify(p1_hex), binascii.unhexlify(p2_hex))
tracker.add_problem(
SnapshotMismatchError(instance, path, snapshot_hex, parents)
)
elif is_snapshot_hex_valid:
new_parents = (binascii.unhexlify(snapshot_hex), binascii.unhexlify(p2_hex))
tracker.add_problem(
DirStateInvalidError( # type: ignore
instance, path, p1_hex, new_parents, tuples_dict, copymap
)
)
if (not is_snapshot_hex_valid) and (not is_p1_hex_valid):
last_valid_commit_hash = get_tip_commit_hash()
new_parents = (
binascii.unhexlify(last_valid_commit_hash),
binascii.unhexlify(p2_hex),
)
tracker.add_problem(
DirStateInvalidError( # type: ignore
instance, path, p1_hex, new_parents, tuples_dict, copymap
)
)
class DirStateInvalidError(FixableProblem):
def __init__(
self,
instance: EdenInstance,
mount_path: str,
invalid_commit_hash: str,
hg_parents: Tuple[bytes, bytes],
tuples_dict: Dict[bytes, Tuple[str, int, bytes]],
copymap: Dict[bytes, bytes],
) -> None:
self._instance = instance
self._mount_path = mount_path
self._invalid_commit_hash = invalid_commit_hash
self._hg_parents = hg_parents
self._tuples_dict = tuples_dict
self._copymap = copymap
def dirstate(self) -> str:
return os.path.join(self._mount_path, ".hg", "dirstate")
def p1_hex(self) -> str:
return binascii.hexlify(self._hg_parents[0]).decode("utf-8")
def description(self) -> str:
return (
f"mercurial's parent commit {self._invalid_commit_hash}"
f" in {self.dirstate()} is invalid\n"
)
def dry_run_msg(self) -> str:
return f"Would fix Eden to point to parent commit {self.p1_hex()}"
def start_msg(self) -> str:
return f"Fixing Eden to point to parent commit {self.p1_hex()}"
def perform_fix(self) -> None:
with open(self.dirstate(), "wb") as f:
eden.dirstate.write( # type: ignore
f, self._hg_parents, self._tuples_dict, self._copymap
)
parents = eden_ttypes.WorkingDirectoryParents(parent1=self._hg_parents[0])
if self._hg_parents[1] != (20 * b"\0"):
parents.parent2 = self._hg_parents[1]
with self._instance.get_thrift_client() as client:
client.resetParentCommits(self._mount_path.encode("utf-8"), parents)
def get_tip_commit_hash() -> str:
args = ["hg", "log", "-T", "{node}\n", "-r", "tip"]
env = dict(os.environ, HGPLAIN="1")
stdout = subprocess.check_output(args, universal_newlines=True, env=env)
lines: List[str] = list(filter(None, stdout.split("\n")))
return lines[-1]
def is_commit_hash_valid(
instance: EdenInstance, mount_path: str, commit_hash: str
) -> bool:
try:
with instance.get_thrift_client() as client:
client.getScmStatus(mount_path, False, commit_hash)
return True
except TApplicationException as ex:
if "RepoLookupError: unknown revision" in str(ex):
return False
raise
class SnapshotMismatchError(FixableProblem):

View File

@ -84,6 +84,22 @@ class DoctorTestBase(unittest.TestCase):
self.assertEqual(num_manual_fixes, fixer.num_manual_fixes)
def _commit_hash_valid_test_dirstate_hex_invalid(
instance, mount_path, commit_hash
) -> bool:
if commit_hash == "12000000" * 5:
return False
else:
return True
def _commit_hash_valid_test_snapshot_invalid(instance, mount_path, commit_hash) -> bool:
if commit_hash == "12345678" * 5:
return False
else:
return True
class DoctorTest(DoctorTestBase):
# The diffs for what is written to stdout can be large.
maxDiff = None
@ -608,13 +624,168 @@ Fixing Eden to point to parent commit 1200000012000000120000001200000012000000..
],
)
@patch(
"eden.cli.doctor.is_commit_hash_valid",
new=_commit_hash_valid_test_snapshot_invalid,
)
def test_snapshot_and_dirstate_file_differ_and_snapshot_invalid(self):
dirstate_hash_hex = "12000000" * 5
snapshot_hex = "12345678" * 5
instance, mount_path, fixer, out = self._test_hash_check(
dirstate_hash_hex, snapshot_hex
)
self.assertEqual(
f"""\
<yellow>- Found problem:<reset>
mercurial's parent commit for {mount_path} is 1200000012000000120000001200000012000000,
but Eden's internal hash in its SNAPSHOT file is \
1234567812345678123456781234567812345678.
Fixing Eden to point to parent commit 1200000012000000120000001200000012000000...\
<green>fixed<reset>
""",
out,
)
self.assert_results(fixer, num_problems=1, num_fixed_problems=1)
# Make sure resetParentCommits() was called once with the expected arguments
self.assertEqual(
instance.get_thrift_client().set_parents_calls,
[
ResetParentsCommitsArgs(
mount=mount_path.encode("utf-8"),
parent1=b"\x12\x00\x00\x00" * 5,
parent2=None,
)
],
)
@patch("eden.cli.doctor.is_commit_hash_valid", return_value=False)
@patch("eden.cli.doctor.get_tip_commit_hash", return_value="87654321" * 5)
def test_snapshot_and_dirstate_file_differ_and_all_commit_hash_invalid(
self, mock_is_commit_hash_valid, mock_get_tip_commit_hash
):
dirstate_hash_hex = "12000000" * 5
snapshot_hex = "12345678" * 5
valid_commit_hash = "87654321" * 5
instance, mount_path, fixer, out = self._test_hash_check(
dirstate_hash_hex, snapshot_hex
)
dirstate = os.path.join(mount_path, ".hg", "dirstate")
self.assertEqual(
f"""\
<yellow>- Found problem:<reset>
mercurial's parent commit {dirstate_hash_hex} in {dirstate} is invalid
Fixing Eden to point to parent commit {valid_commit_hash}...\
<green>fixed<reset>
""",
out,
)
self.assert_results(fixer, num_problems=1, num_fixed_problems=1)
# Make sure resetParentCommits() was called once with the expected arguments
self.assertEqual(
instance.get_thrift_client().set_parents_calls,
[
ResetParentsCommitsArgs(
mount=mount_path.encode("utf-8"),
parent1=b"\x87\x65\x43\x21" * 5,
parent2=None,
)
],
)
@patch("eden.cli.doctor.is_commit_hash_valid", return_value=False)
@patch("eden.cli.doctor.get_tip_commit_hash", return_value="87654321" * 5)
def test_snapshot_and_dirstate_file_differ_and_all_parents_invalid(
self, mock_is_commit_hash_valid, mock_get_tip_commit_hash
):
dirstate_hash_hex = "12000000" * 5
dirstate_parent2_hash_hex = "12340000" * 5
snapshot_hex = "12345678" * 5
valid_commit_hash = "87654321" * 5
instance, mount_path, fixer, out = self._test_hash_check(
dirstate_hash_hex, snapshot_hex, dirstate_parent2_hash_hex
)
dirstate = os.path.join(mount_path, ".hg", "dirstate")
self.assertEqual(
f"""\
<yellow>- Found problem:<reset>
mercurial's parent commit {dirstate_hash_hex} in {dirstate} is invalid
Fixing Eden to point to parent commit {valid_commit_hash}...\
<green>fixed<reset>
""",
out,
)
self.assert_results(fixer, num_problems=1, num_fixed_problems=1)
# Make sure resetParentCommits() was called once with the expected arguments
self.assertEqual(
instance.get_thrift_client().set_parents_calls,
[
ResetParentsCommitsArgs(
mount=mount_path.encode("utf-8"),
parent1=b"\x87\x65\x43\x21" * 5,
parent2=None,
)
],
)
@patch(
"eden.cli.doctor.is_commit_hash_valid",
side_effect=_commit_hash_valid_test_dirstate_hex_invalid,
)
def test_snapshot_and_dirstate_file_differ_and_dirstate_commit_hash_invalid(
self, mock_is_commit_hash_valid
):
dirstate_hash_hex = "12000000" * 5
snapshot_hex = "12345678" * 5
instance, mount_path, fixer, out = self._test_hash_check(
dirstate_hash_hex, snapshot_hex
)
dirstate = os.path.join(mount_path, ".hg", "dirstate")
self.assertEqual(
f"""\
<yellow>- Found problem:<reset>
mercurial's parent commit {dirstate_hash_hex} in {dirstate} is invalid
Fixing Eden to point to parent commit {snapshot_hex}...\
<green>fixed<reset>
""",
out,
)
self.assert_results(fixer, num_problems=1, num_fixed_problems=1)
self.assertEqual(
instance.get_thrift_client().set_parents_calls,
[
ResetParentsCommitsArgs(
mount=mount_path.encode("utf-8"),
parent1=b"\x12\x34\x56\x78" * 5,
parent2=None,
)
],
)
def _test_hash_check(
self, dirstate_hash_hex: str, snapshot_hex: str
self, dirstate_hash_hex: str, snapshot_hex: str, dirstate_parent2_hash_hex=None
) -> Tuple["FakeEdenInstance", str, doctor.ProblemFixer, str]:
instance = FakeEdenInstance(self._create_tmp_dir())
mount_path = instance.create_test_mount(
"path1", snapshot=snapshot_hex, dirstate_parent=dirstate_hash_hex
)
if dirstate_parent2_hash_hex is None:
mount_path = instance.create_test_mount(
"path1", snapshot=snapshot_hex, dirstate_parent=dirstate_hash_hex
)
else:
mount_path = instance.create_test_mount(
"path1",
snapshot=snapshot_hex,
dirstate_parent=(dirstate_hash_hex, dirstate_parent2_hash_hex),
)
fixer, out = self.create_fixer(dry_run=False)
doctor.check_snapshot_dirstate_consistency(
@ -1833,6 +2004,11 @@ class FakeClient:
)
)
def getScmStatus(
self, mountPoint=None, listIgnored=None, commit=None
) -> Optional[eden_ttypes.ScmStatus]:
return None
class FakeEdenInstance:
def __init__(