eden: port the Eden hg extension to core Mercurial

Summary:
This ports the logic from `eden/hg/eden/` to `scm/hg/mercurial/`.
Note this does not delete the logic from `eden/hg/eden` as part of this
change because we may continue to do Eden releases before we roll out a
version of Hg with this code. Only once Hg has been rolled out everywhere
[that is using Eden] can we consider removing
`/usr/local/fb-mercurial/eden/hgext3rd/eden`.

Reviewed By: quark-zju

Differential Revision: D10316761

fbshipit-source-id: cae1dfad831ad6505590628cf969897167e84b30
This commit is contained in:
Wez Furlong 2019-01-16 14:17:09 -08:00 committed by Facebook Github Bot
parent a4b964e12c
commit 30058f0170
13 changed files with 1290 additions and 1 deletions

View File

@ -552,6 +552,15 @@ switch_slashes tag_refname tags_file their_heads todo_total to_export to_pass
to_store total_bytes tree_sha tunnel_host tv_sec_ofs upstream_names
upstream_tips uptodate_annotated_tags url_scheme uuid_re version_info
without_newline
action_type added_files clean_files conflict_paths
create_clone_of_internal_map create_eden_dirstate deleted_files display_mode
eden_files explicit_matches get_merge_string get_merge_string ignored_files
manifest_entry max_to_show merge_state merge_str modified_files
non_removed_matches nonnormal_copy num_remaining orig_pack orig_unpack
parent_mf readlink_retry_estale removed_files to_remove total_conflicts
unknown_files unsure_files why_not_eden wrap_pack wrap_unpack
""".split()
# ported from check-commit

View File

@ -0,0 +1,153 @@
# Copyright (c) 2016-present, Facebook, Inc.
# All Rights Reserved.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
"""
Mercurial extension for supporting eden client checkouts.
This overrides the dirstate to check with the eden daemon for modifications,
instead of doing a normal scan of the filesystem.
"""
import errno
import os
import random
import sys
import time
from . import demandimport, node
if sys.version_info < (2, 7, 6):
# 2.7.6 was the first version to allow unicode format strings in
# struct.{pack,unpack}; our devservers have 2.7.5, so let's
# monkey patch in support for unicode format strings.
import struct
orig_pack = struct.pack
orig_unpack = struct.unpack
# We disable F821 below because we know we are in Python 2.x based on the
# sys.version_info check above.
def wrap_pack(fmt, *args):
if isinstance(fmt, unicode): # noqa: F821
fmt = fmt.encode("utf-8")
return orig_pack(fmt, *args)
def wrap_unpack(fmt, data):
if isinstance(fmt, unicode): # noqa: F821
fmt = fmt.encode("utf-8")
return orig_unpack(fmt, data)
struct.pack = wrap_pack
struct.unpack = wrap_unpack
# Disable demandimport while importing thrift files.
#
# The thrift modules try importing modules which may or may not exist, and they
# handle the ImportError generated if the modules aren't present. demandimport
# breaks this behavior by making it appear like the modules were successfully
# loaded, and only throwing ImportError later when you actually try to use
# them.
with demandimport.deactivated():
import eden.thrift as eden_thrift_module
import facebook.eden.ttypes as eden_ttypes
create_thrift_client = eden_thrift_module.create_thrift_client
ScmFileStatus = eden_ttypes.ScmFileStatus
CheckoutMode = eden_ttypes.CheckoutMode
ConflictType = eden_ttypes.ConflictType
FileInformationOrError = eden_ttypes.FileInformationOrError
ManifestEntry = eden_ttypes.ManifestEntry
NoValueForKeyError = eden_ttypes.NoValueForKeyError
def readlink_retry_estale(path):
attempts = 10
while True:
try:
return os.readlink(path)
except OSError as ex:
if attempts == 0 or ex.errno != errno.ESTALE:
raise
attempts -= 1
time.sleep(random.uniform(0.001, 0.01))
class EdenThriftClient(object):
def __init__(self, repo):
self._repo = repo
self._root = repo.root
self._socket_path = readlink_retry_estale(
os.path.join(self._root, ".eden", "socket")
)
# Read the .eden/root symlink to see what eden thinks the name of this
# mount point is. This might not match self._root in some cases. In
# particular, a parent directory of the eden mount might be bind
# mounted somewhere else, resulting in it appearing at multiple
# separate locations.
self._eden_root = readlink_retry_estale(
os.path.join(self._root, ".eden", "root")
)
def _get_client(self):
"""
Create a new client instance for each call because we may be idle
(from the perspective of the server) between calls and have our
connection snipped by the server.
We could potentially try to speculatively execute a call and
reconnect on transport failure, but for the moment this strategy
is a reasonable compromise.
"""
return create_thrift_client(socket_path=self._socket_path)
def getManifestEntry(self, relativePath):
with self._get_client() as client:
return client.getManifestEntry(self._eden_root, relativePath)
def setHgParents(self, p1, p2, need_flush=True):
if p2 == node.nullid:
p2 = None
if need_flush:
self._flushPendingTransactions()
parents = eden_ttypes.WorkingDirectoryParents(parent1=p1, parent2=p2)
with self._get_client() as client:
client.resetParentCommits(self._eden_root, parents)
def getStatus(self, parent, list_ignored): # noqa: C901
# type(str, bool) -> Dict[str, int]
# If we are in a pending transaction the parent commit we are querying against
# might not have been stored to disk yet. Flush the pending transaction state
# before asking Eden about the status.
self._flushPendingTransactions()
with self._get_client() as client:
return client.getScmStatus(self._eden_root, list_ignored, parent)
def checkout(self, node, checkout_mode, need_flush=True):
if need_flush:
self._flushPendingTransactions()
with self._get_client() as client:
return client.checkOutRevision(self._eden_root, node, checkout_mode)
def glob(self, globs):
with self._get_client() as client:
return client.glob(self._eden_root, globs)
def getFileInformation(self, files):
with self._get_client() as client:
return client.getFileInformation(self._eden_root, files)
def _flushPendingTransactions(self):
# If a transaction is currently in progress, make sure it has flushed
# pending commit data to disk so that eden will be able to access it.
txn = self._repo.currenttransaction()
if txn is not None:
txn.writepending()

View File

@ -21,6 +21,7 @@ from . import (
crecord as crecordmod,
dagop,
dirstateguard,
edenfs,
encoding,
error,
formatter,
@ -3230,6 +3231,9 @@ def forget(ui, repo, match, prefix, explicitonly):
def files(ui, ctx, m, fm, fmt, subrepos):
if (ctx.rev() is None) and (edenfs.requirement in ctx.repo().requirements):
return eden_files(ui, ctx, m, fm, fmt)
rev = ctx.rev()
ret = 1
ds = ctx.repo().dirstate
@ -3259,6 +3263,26 @@ def files(ui, ctx, m, fm, fmt, subrepos):
return ret
def eden_files(ui, ctx, m, fm, fmt):
# The default files() function code looks up the dirstate entry for ever
# single matched file. This is unnecessary in most cases, and will trigger
# a lot of thrift calls to Eden. We have augmented the Eden dirstate with
# a function that can return only non-removed files without requiring
# looking up every single match.
ret = 1
ds = ctx.repo().dirstate
for f in sorted(ds.non_removed_matches(m)):
fm.startitem()
if ui.verbose:
fc = ctx[f]
fm.write("size flags", "% 10d % 1s ", fc.size(), fc.flags())
fm.data(abspath=f)
fm.write("path", fmt, m.rel(f))
ret = 0
return ret
def remove(ui, repo, m, prefix, after, force, subrepos, warnings=None):
join = lambda f: os.path.join(prefix, f)
ret = 0

View File

@ -32,6 +32,7 @@ from .. import (
dagparser,
dagutil,
drawdag,
edenfs,
encoding,
error,
exchange,
@ -798,11 +799,58 @@ def debugdeltachain(ui, repo, file_=None, **opts):
[
("", "nodates", None, _("do not display the saved mtime")),
("", "datesort", None, _("sort by saved mtime")),
(
"j",
"json",
None,
_("In a virtualized checkout, print the output in JSON format"),
),
],
_("[OPTION]..."),
)
def debugstate(ui, repo, **opts):
"""show the contents of the current dirstate"""
if edenfs.requirement in repo.requirements:
import eden.dirstate
def get_merge_string(value):
if value == eden.dirstate.MERGE_STATE_NOT_APPLICABLE:
return ""
elif value == eden.dirstate.MERGE_STATE_OTHER_PARENT:
return "MERGE_OTHER"
elif value == eden.dirstate.MERGE_STATE_BOTH_PARENTS:
return "MERGE_BOTH"
# We don't expect any other merge state values; we probably had a bug
# if the dirstate file contains other values.
# However, just return the integer value as a string so we can use
# debugdirstate to help debug this situation if it does occur.
return str(value)
if opts.get("json"):
data = {}
for path, dirstate_tuple in repo.dirstate.edeniteritems():
status, mode, merge_state = dirstate_tuple
data[path] = {
"status": status,
"mode": mode,
"merge_state": merge_state,
"merge_state_string": get_merge_string(merge_state),
}
ui.write(json.dumps(data))
ui.write("\n")
return
for path, dirstate_tuple in sorted(repo.dirstate._map._map.iteritems()):
status, mode, merge_state = dirstate_tuple
if mode & 0o20000:
display_mode = "lnk"
else:
display_mode = "%3o" % (mode & 0o777 & ~util.umask)
merge_str = get_merge_string(merge_state)
ui.write("%s %s %12s %s\n" % (status, display_mode, merge_str, path))
return 0
nodates = opts.get(r"nodates")
datesort = opts.get(r"datesort")

523
mercurial/eden_dirstate.py Normal file
View File

@ -0,0 +1,523 @@
# Copyright (c) 2016-present, Facebook, Inc.
# All Rights Reserved.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
import os
import stat
from eden.dirstate import MERGE_STATE_BOTH_PARENTS, MERGE_STATE_OTHER_PARENT
from . import (
EdenThriftClient as thrift,
dirstate,
eden_dirstate_map,
encoding,
match as matchmod,
policy,
scmutil,
util,
)
from .i18n import _
from .EdenThriftClient import ScmFileStatus
from .node import nullid
parsers = policy.importmod("parsers")
propertycache = util.propertycache
class statobject(object):
""" this is a stat-like object to represent information from eden."""
__slots__ = ("st_mode", "st_size", "st_mtime")
def __init__(self, mode=None, size=None, mtime=None):
self.st_mode = mode
self.st_size = size
self.st_mtime = mtime
class eden_dirstate(dirstate.dirstate):
def __init__(self, repo, ui, root):
self.eden_client = thrift.EdenThriftClient(repo)
# We should override any logic in dirstate that uses self._validate.
validate = repo._dirstatevalidate
try:
opener = repo.localvfs
except AttributeError:
opener = repo.vfs
try:
super(eden_dirstate, self).__init__(opener, ui, root, validate)
except TypeError:
sparsematchfn = None
super(eden_dirstate, self).__init__(
opener, ui, root, validate, sparsematchfn
)
def create_eden_dirstate(ui, opener, root):
return eden_dirstate_map.eden_dirstate_map(
ui, opener, root, self.eden_client, repo
)
self._mapcls = create_eden_dirstate
def __iter__(self):
# FIXME: This appears to be called by `hg reset`, so we provide a dummy
# response here, but really, we should outright prohibit this.
# Most likely, we will have to replace the implementation of `hg reset`.
return
yield
def iteritems(self): # override
# This seems like the type of O(repo) operation that should not be
# allowed. Or if it is, it should be through a separate, explicit
# codepath.
#
# We do provide edeniteritems() for users to iterate through only the
# files explicitly tracked in the eden dirstate.
raise NotImplementedError("eden_dirstate.iteritems()")
def dirs(self): # override
raise NotImplementedError("eden_dirstate.dirs()")
def edeniteritems(self):
"""
Walk over all items tracked in the eden dirstate.
This includes non-normal files (e.g., files marked for addition or
removal), as well as normal files that have merge state information.
"""
return self._map._map.iteritems()
def _p1_ctx(self):
"""Return the context object for the first parent commit."""
return self._map._repo.unfiltered()[self.p1()]
# Code paths that invoke dirstate.walk()
# - hg add
# unknown=True, ignored=False, full=False
# - only cares about returned paths
# - hg perfwalk (contrib/perf.py)
# unknown=True, ignored=False
# - only cares about returned paths
# - hg grep (hgext/tweakdefaults.py)
# unknown=False, ignored=False
# - cares about returned paths, whether exists, and is symlink or not
# - committablectx.walk()
# unknown=True, ignored=False
# - only cares about returned paths
# - mercurial/scmutil.py: _interestingfiles()
# unknown=True, ignored=False, full=False
# hg addremove
# - cares about returned paths, dirstate status (which it has to
# re-lookup), and whether they exist on disk or not
#
# Code paths that invoke context.walk()
# - mercurial/cmdutil.py:
# - hg cat
# - hg cp
# - hg revert
# - hg annotate (mercurial/commands.py)
# - mercurial/debugcommands.py:
# - hg debugfilerevision
# - hg debugpickmergetool
# - hg debugrename
# - hg debugwalk
# - mercurial/fileset.py (_buildsubset)
# - hgext/catnotate.py
# - hgext/fastannotate/commands.py
# - hgext/sparse.py
# - hgext/remotefilelog/__init__.py
#
# Code paths that invoke scmutil._interestingfiles()
# - scmutil.addremove()
# - scmutil.marktouched()
#
# - full is primarily used by fsmonitor extension
# - I haven't seen any code path that calls with ignored=True
#
# match callbacks:
# - bad: called for file/directory patterns that don't match anything
# - explicitdir: called for patterns that match a directory
# - traversedir: used by `hg purge` (hgext/purge.py) to purge empty
# directories
# - we potentially should just implement purge inside Eden
#
def walk(self, match, subrepos, unknown, ignored, full=True): # override
"""
Walk recursively through the directory tree, finding all files
matched by match.
If full is False, maybe skip some known-clean files.
Return a dict mapping filename to stat-like object
"""
edenstatus = self.eden_client.getStatus(self.p1(), list_ignored=ignored).entries
nonnormal = self._map._map
def get_stat(path):
try:
return os.lstat(os.path.join(self._root, path))
except OSError:
return None
# Store local variables for the status states, so they are cheaper
# to access in the loop below. (It's somewhat unfortunate that python
# make this necessary.)
MODIFIED = ScmFileStatus.MODIFIED
REMOVED = ScmFileStatus.REMOVED
ADDED = ScmFileStatus.ADDED
IGNORED = ScmFileStatus.IGNORED
results = {}
for path, code in edenstatus.iteritems():
if not match(path):
continue
# TODO: It would probably be better to update the thrift APIs to
# return the file status information, so we don't have to call
# os.lstat() here. Most callers only really care about whether the
# file exists and if it is a symlink or a regular file.
if code == MODIFIED:
results[path] = get_stat(path)
elif code == ADDED:
# If unknown is False, we still want to report files explicitly
# marked as added in the dirstate. We'll handle that case
# below when walking over the nonnormal list.
if unknown:
results[path] = get_stat(path)
elif code == IGNORED:
# Eden should only return IGNORED results when ignored is True,
# so just go ahead and add this path to the results
results[path] = get_stat(path)
elif code == REMOVED:
results[path] = None
else:
raise RuntimeError("Unexpected status code: %s" % code)
for path, entry in nonnormal.iteritems():
if path in results:
continue
if not match(path):
continue
results[path] = get_stat(path)
if full:
parent_mf = self._p1_ctx().manifest()
for path, flags in parent_mf.matches(match).iteritems():
if path in edenstatus or path in nonnormal:
continue
if flags == "l":
mode = stat.S_IFLNK | 0o777
elif flags == "x":
mode = stat.S_IFREG | 0o755
else:
mode = stat.S_IFREG | 0o644
# Pretty much all of the callers of walk() only care about
# the st_mode field.
results[path] = statobject(mode=mode, size=0, mtime=0)
explicit_matches = self._call_match_callbacks(match, results, ())
for path, mode in explicit_matches.iteritems():
if mode is None:
results[path] = None
else:
results[path] = statobject(mode=mode, size=0, mtime=0)
return results
def _call_match_callbacks(self, match, results1, results2):
"""
Process all explicit patterns in the match, and call match.bad()
or match.explicitdir() if necessary
Returns a dictionary of (path -> mode) for all explicit matches that
are not already present in the results. The mode will be None if the
path does not exist on disk.
"""
# TODO: We do not currently invoke match.traversedir
# This is currently only used by `hg purge`, which uses it to remove
# empty directories.
# We probably should just build our own Eden-specific version of purge.
explicit_matches = {}
for path in sorted(match.files()):
try:
if path in results1 or path in results2:
continue
mode = os.lstat(os.path.join(self._root, path)).st_mode
if stat.S_ISDIR(mode):
if match.explicitdir:
match.explicitdir(path)
elif stat.S_ISREG(mode) or stat.S_ISLNK(mode):
explicit_matches[path] = mode
except OSError as ex:
# Check to see if this refers to a removed file or directory.
# Call match.bad() otherwise
if self._ismissing(path):
explicit_matches[path] = None
else:
match.bad(path, encoding.strtolocal(ex.strerror))
return explicit_matches
def _ismissing(self, path):
"""
Check to see if this path refers to a deleted file that mercurial
knows about but that no longer exists on disk.
"""
# Check to see if the parent commit knows about this path
parent_mf = self._p1_ctx().manifest()
if parent_mf.hasdir(path):
return True
# Check to see if the non-normal files list knows about this path
# or any child of this path as a directory name.
# (This handles the case where an untracked file was added with
# 'hg add' but then deleted from disk.)
if path in self._map._map:
return True
dirpath = path + "/"
for entry in self._map._map:
if entry.startswith(dirpath):
return True
return False
def status(self, match, subrepos, ignored, clean, unknown): # override
edenstatus = self.eden_client.getStatus(self.p1(), list_ignored=ignored).entries
nonnormal_copy = self._map.create_clone_of_internal_map()
# If the caller also wanted us to return clean files,
# find all matching files from the current commit manifest.
# If they are not in the eden status results or the dirstate
# non-normal list then they must be clean.
clean_files = []
if clean:
for path in self._parent_commit_matches(match):
if path not in edenstatus and path not in nonnormal_copy:
clean_files.append(path)
# Store local variables for the status states, so they are cheaper
# to access in the loop below. (It's somewhat unfortunate that python
# make this necessary.)
MODIFIED = ScmFileStatus.MODIFIED
REMOVED = ScmFileStatus.REMOVED
ADDED = ScmFileStatus.ADDED
IGNORED = ScmFileStatus.IGNORED
# Process the modified file list returned by Eden.
# We must merge it with our list of non-normal files to compute
# the removed/added lists correctly.
modified_files = []
added_files = []
removed_files = []
deleted_files = []
unknown_files = []
ignored_files = []
for path, code in edenstatus.iteritems():
if not match(path):
continue
if code == MODIFIED:
# It is possible that the user can mark a file for removal, but
# then modify it. If it is marked for removal, it should be
# reported as such by `hg status` even though it is still on
# disk.
dirstate = nonnormal_copy.pop(path, None)
if dirstate and dirstate[0] == "r":
removed_files.append(path)
else:
modified_files.append(path)
elif code == REMOVED:
# If the file no longer exits, we must check to see whether the
# user explicitly marked it for removal.
dirstate = nonnormal_copy.pop(path, None)
if dirstate and dirstate[0] == "r":
removed_files.append(path)
else:
deleted_files.append(path)
elif code == ADDED:
dirstate = nonnormal_copy.pop(path, None)
if dirstate:
state = dirstate[0]
if state == "a" or (
state == "n" and dirstate[2] == MERGE_STATE_OTHER_PARENT
):
added_files.append(path)
else:
unknown_files.append(path)
else:
unknown_files.append(path)
elif code == IGNORED:
# Although Eden may think the file should be ignored as per
# .gitignore, it is possible the user has overridden that
# default behavior by marking it for addition.
dirstate = nonnormal_copy.pop(path, None)
if dirstate and dirstate[0] == "a":
added_files.append(path)
else:
ignored_files.append(path)
else:
raise RuntimeError("Unexpected status code: %s" % code)
# Process any remaining files in our non-normal set that were
# not reported as modified by Eden.
for path, entry in nonnormal_copy.iteritems():
if not match(path):
continue
state = entry[0]
if state == "m":
if entry[2] == 0:
self._ui.warn(
_(
"Unexpected Nonnormal file " + path + " has a "
"merge state of NotApplicable while its has been "
'marked as "needs merging".'
)
)
else:
modified_files.append(path)
elif state == "a":
try:
mode = os.lstat(os.path.join(self._root, path)).st_mode
if stat.S_ISREG(mode) or stat.S_ISLNK(mode):
added_files.append(path)
else:
deleted_files.append(path)
except OSError:
deleted_files.append(path)
elif state == "r":
removed_files.append(path)
# Invoked the match callback functions.
explicit_matches = self._call_match_callbacks(match, edenstatus, nonnormal_copy)
for path in explicit_matches:
# Explicit matches that aren't already present in our results
# were either skipped because they are ignored or they are clean.
# Check to figure out which is the case.
if clean:
ignored_files.append(path)
elif path in self._p1_ctx():
clean_files.append(path)
else:
ignored_files.append(path)
status = scmutil.status(
modified_files,
added_files,
removed_files,
deleted_files,
unknown_files,
ignored_files,
clean_files,
)
# We should never have any files we are unsure about
unsure_files = []
return (unsure_files, status)
def _parent_commit_matches(self, match):
# Wrap match.bad()
# We don't want to complain about paths that do not exist in the parent
# commit but do exist in our non-normal files.
#
# However, the default mercurial dirstate.matches() code never invokes
# bad() at all, so lets just ignore all bad() calls entirely.
def bad(fn, msg):
return
m = matchmod.badmatch(match, bad)
return self._p1_ctx().matches(m)
def matches(self, match): # override
# Call matches() on the current working directory parent commit
results = set(self._parent_commit_matches(match))
# Augument the results with anything modified in the dirstate,
# to take care of added/removed files.
for path in self._map._map.keys():
if match(path):
results.add(path)
return results
def non_removed_matches(self, match): # override
"""
Behaves like matches(), but excludes files that have been removed from
the dirstate.
"""
results = set(self._parent_commit_matches(match))
# Augument the results with anything modified in the dirstate,
# to take care of added/removed files.
for path, state in self._map._map.items():
if match(path):
if state[0] == "r":
results.discard(path)
else:
results.add(path)
return results
def rebuild(self, parent, allfiles, changedfiles=None, exact=False):
# Ignore the input allfiles parameter, and always rebuild with
# an empty allfiles list.
#
# edenfs itself will track the file changes correctly.
# We only track merge state and added/removed status in the python
# dirstate code.
super(eden_dirstate, self).rebuild(
parent, allfiles=[], changedfiles=changedfiles, exact=exact
)
def normallookup(self, f): # override
"""Mark a file normal, but possibly dirty."""
if self._pl[1] != nullid:
# if there is a merge going on and the file was either
# in state 'm' (-1) or coming from other parent (-2) before
# being removed, restore that state.
#
# Note that we intentionally use self._map._map.get() here
# rather than self._map.get() to avoid making a thrift call to Eden
# if this file is already normal.
entry = self._map._map.get(f)
if entry is not None:
status, mode, merge_state = entry
if status == "r" and merge_state in (
MERGE_STATE_BOTH_PARENTS,
MERGE_STATE_OTHER_PARENT,
):
source = self._map.copymap.get(f)
if merge_state == MERGE_STATE_BOTH_PARENTS:
self.merge(f)
elif merge_state == MERGE_STATE_OTHER_PARENT:
self.otherparent(f)
if source:
self.copy(source, f)
return
if status == "m":
return
if status == "n" and merge_state == MERGE_STATE_OTHER_PARENT:
return
# TODO: Just invoke self.normal() here for now.
# Our self.status() function always returns an empty list for the first
# entry of the returned tuple. (This is the list of files that we're
# unsure about and need to check on disk.) Therefore the
# workingctx._dirstatestatus() code never fixes up entries with the
# mtime set to -1.
#
# Ideally we should replace self.normal() too; we should be able to
# avoid the filesystem stat call in self.normal() anyway.
self.normal(f)

View File

@ -0,0 +1,157 @@
# Copyright (c) 2016-present, Facebook, Inc.
# All Rights Reserved.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
"""Eden implementation for the dirstatemap class."""
import errno
import eden.dirstate as eden_dirstate_serializer
from . import EdenThriftClient as thrift, dirstate, util
MERGE_STATE_NOT_APPLICABLE = eden_dirstate_serializer.MERGE_STATE_NOT_APPLICABLE
MERGE_STATE_BOTH_PARENTS = eden_dirstate_serializer.MERGE_STATE_BOTH_PARENTS
MERGE_STATE_OTHER_PARENT = eden_dirstate_serializer.MERGE_STATE_OTHER_PARENT
DUMMY_MTIME = 0
class eden_dirstate_map(dirstate.dirstatemap):
def __init__(self, ui, opener, root, thrift_client, repo):
# type(eden_dirstate_map, ui, opener, str, EdenThriftClient) -> None
super(eden_dirstate_map, self).__init__(ui, opener, root)
# Unlike the default self._map, our values in self._map are tuples of
# the form: (status: char, mode: uint32, merge_state: int8).
self._thrift_client = thrift_client
self._repo = repo
def write(self, file, now): # override
# type(eden_dirstate_map, IO[str], float)
parents = self.parents()
# Remove all "clean" entries before writing. (It's possible we should
# never allow these to be inserted into self._map in the first place.)
to_remove = []
for path, v in self._map.iteritems():
if v[0] == "n" and v[2] == MERGE_STATE_NOT_APPLICABLE:
to_remove.append(path)
for path in to_remove:
self._map.pop(path)
eden_dirstate_serializer.write(file, parents, self._map, self.copymap)
file.close()
# Inform the edenfs daemon about the parent change.
# We do not need to flush any pending transaction state here--manifest
# and changelog data for a transaction is always written to disk before the
# dirstate is updated.
self._thrift_client.setHgParents(parents[0], parents[1], need_flush=False)
self._dirtyparents = False
self.nonnormalset, self.otherparentset = self.nonnormalentries()
def read(self): # override
# ignore HG_PENDING because identity is used only for writing
self.identity = util.filestat.frompath(self._opener.join(self._filename))
try:
fp = self._opendirstatefile()
try:
parents, dirstate_tuples, copymap = eden_dirstate_serializer.read(
fp, self._filename
)
finally:
fp.close()
except IOError as e:
if e.errno != errno.ENOENT:
raise
else:
# If the dirstate file does not exist, then we silently ignore
# the error because that's what Mercurial's dirstate does.
return
if not self._dirtyparents:
self.setparents(*parents)
self._map = dirstate_tuples
self.copymap = copymap
def iteritems(self):
raise RuntimeError("Should not invoke iteritems() on eden_dirstate_map!")
def __len__(self):
raise RuntimeError("Should not invoke __len__ on eden_dirstate_map!")
def __iter__(self):
raise RuntimeError("Should not invoke __iter__ on eden_dirstate_map!")
def keys(self):
raise RuntimeError("Should not invoke keys() on eden_dirstate_map!")
def get(self, key, default=None):
try:
return self.__getitem__(key)
except KeyError:
return default
def __contains__(self, key):
return self.get(key) is not None
def __getitem__(self, filename):
# type(str) -> parsers.dirstatetuple
entry = self._map.get(filename)
if entry is not None:
status, mode, merge_state = entry
return (status, mode, merge_state, DUMMY_MTIME)
# TODO: Support Hg submodules.
# Mercurial has a bit of logic that depends on whether .hgsub or
# .hgsubstate is in the dirstate. Currently, Eden does not attempt to
# support submodules (and none of Hg's codepaths that use submodules
# have been tested with Eden), so don't bother to go to the server when
# either .hgsub or .hgsubstate is passed in.
#
# Because we know the Thrift call will fail, we throw the corresponding
# KeyError in this case to avoid the overhead of the Thrift call as a
# performance optimization.
if filename == ".hgsub" or filename == ".hgsubstate":
raise KeyError(filename)
try:
# TODO: Consider fetching this from the commit context rather than
# querying Eden for this information.
manifest_entry = self._thrift_client.getManifestEntry(filename)
return ["n", manifest_entry.mode, MERGE_STATE_NOT_APPLICABLE, DUMMY_MTIME]
except thrift.NoValueForKeyError as e:
raise KeyError(e.key)
def hastrackeddir(self, d): # override
# TODO(mbolin): Unclear whether it is safe to hardcode this to False.
return False
def hasdir(self, d): # override
# TODO(mbolin): Unclear whether it is safe to hardcode this to False.
return False
def _insert_tuple(self, filename, state, mode, size, mtime): # override
if size != MERGE_STATE_BOTH_PARENTS and size != MERGE_STATE_OTHER_PARENT:
merge_state = MERGE_STATE_NOT_APPLICABLE
else:
merge_state = size
self._map[filename] = (state, mode, merge_state)
def nonnormalentries(self):
"""Returns a set of filenames."""
# type() -> Tuple[Set[str], Set[str]]
nonnorm = set()
otherparent = set()
for path, entry in self._map.iteritems():
if entry[0] != "n":
nonnorm.add(path)
elif entry[2] == MERGE_STATE_OTHER_PARENT:
otherparent.add(path)
return nonnorm, otherparent
def create_clone_of_internal_map(self):
return dict(self._map)

275
mercurial/eden_update.py Normal file
View File

@ -0,0 +1,275 @@
# Copyright (c) 2016-present, Facebook, Inc.
# All Rights Reserved.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
"""accelerated hg functionality in Eden checkouts
This overrides the dirstate to check with the eden daemon for modifications,
instead of doing a normal scan of the filesystem.
"""
from . import EdenThriftClient as thrift, error, localrepo, merge as mergemod, util
from .i18n import _
CheckoutMode = thrift.CheckoutMode
ConflictType = thrift.ConflictType
_repoclass = localrepo.localrepository
# This function is called by mercurial.merge.update() in the fast path
# to ask the eden daemon to perform the update operation.
def update(
repo,
node,
branchmerge,
force,
ancestor=None,
mergeancestor=False,
labels=None,
matcher=None,
mergeforce=False,
updatecheck=None,
wc=None,
):
repo.ui.debug("using eden update code path\n")
with repo.wlock():
wctx = repo[None]
parents = wctx.parents()
p1ctx = parents[0]
destctx = repo[node]
deststr = str(destctx)
if not force:
# Make sure there isn't an outstanding merge or unresolved files.
if len(parents) > 1:
raise error.Abort(_("outstanding uncommitted merge"))
ms = mergemod.mergestate.read(repo)
if list(ms.unresolved()):
raise error.Abort(_("outstanding merge conflicts"))
# The vanilla merge code disallows updating between two unrelated
# branches if the working directory is dirty. I don't really see a
# good reason to disallow this; it should be treated the same as if
# we committed the changes, checked out the other branch then tried
# to graft the changes here.
if p1ctx == destctx:
# No update to perform.
# Just invoke the hooks and return.
repo.hook("preupdate", throw=True, parent1=deststr, parent2="")
repo.hook("update", parent1=deststr, parent2="", error=0)
return 0, 0, 0, 0
# If we are in noconflict mode, then we must do a DRY_RUN first to
# see if there are any conflicts that should prevent us from
# attempting the update.
if updatecheck == "noconflict":
conflicts = repo.dirstate.eden_client.checkout(
destctx.node(), CheckoutMode.DRY_RUN
)
if conflicts:
actions = _determine_actions_for_conflicts(repo, p1ctx, conflicts)
_check_actions_and_raise_if_there_are_conflicts(actions)
# Invoke the preupdate hook
repo.hook("preupdate", throw=True, parent1=deststr, parent2="")
# Record that we're in the middle of an update
try:
vfs = repo.localvfs
except AttributeError:
vfs = repo.vfs
vfs.write("updatestate", destctx.hex())
# Ask eden to perform the checkout
if force:
# eden_client.checkout() returns the list of conflicts here,
# but since this is a force update it will have already replaced
# the conflicts with the destination file state, so we don't have
# to do anything with them here.
conflicts = repo.dirstate.eden_client.checkout(
destctx.node(), CheckoutMode.FORCE
)
# We do still need to make sure to update the merge state though.
# In the non-force code path the merge state is updated in
# _handle_update_conflicts().
ms = mergemod.mergestate.clean(repo, p1ctx.node(), destctx.node(), labels)
ms.commit()
stats = 0, 0, 0, 0
actions = {}
else:
conflicts = repo.dirstate.eden_client.checkout(
destctx.node(), CheckoutMode.NORMAL
)
# TODO(mbolin): Add a warning if we did a DRY_RUN and the conflicts
# we get here do not match. Only in the event of a race would we
# expect them to differ from when the DRY_RUN was done (or if we
# decide that DIRECTORY_NOT_EMPTY conflicts do not need to be
# reported during a DRY_RUN).
stats, actions = _handle_update_conflicts(
repo, wctx, p1ctx, destctx, labels, conflicts, force
)
with repo.dirstate.parentchange():
if force:
# If the user has done an `update --clean`, then we should
# remove all entries from the dirstate. Note this call to
# clear() will also remove the parents, but we set them on the
# next line, so we'll be OK.
repo.dirstate.clear()
# TODO(mbolin): Set the second parent, if appropriate.
repo.setparents(destctx.node())
mergemod.recordupdates(repo, actions, branchmerge)
# Clear the update state
util.unlink(vfs.join("updatestate"))
# Invoke the update hook
repo.hook("update", parent1=deststr, parent2="", error=stats[3])
return stats
def _handle_update_conflicts(repo, wctx, src, dest, labels, conflicts, force):
# When resolving conflicts during an update operation, the working
# directory (wctx) is one side of the merge, the destination commit (dest)
# is the other side of the merge, and the source commit (src) is treated as
# the common ancestor.
#
# This is what we want with respect to the graph topology. If we are
# updating from commit A (src) to B (dest), and the real ancestor is C, we
# effectively treat the update operation as reverting all commits from A to
# C, then applying the commits from C to B. We are then trying to re-apply
# the local changes in the working directory (against A) to the new
# location B. Using A as the common ancestor in this operation is the
# desired behavior.
actions = _determine_actions_for_conflicts(repo, src, conflicts)
return _applyupdates(repo, actions, wctx, dest, labels, conflicts)
def _determine_actions_for_conflicts(repo, src, conflicts):
"""Calculate the actions for _applyupdates()."""
# Build a list of actions to pass to mergemod.applyupdates()
actions = dict(
(m, [])
for m in [
"a",
"am",
"cd",
"dc",
"dg",
"dm",
"e",
"f",
"g", # create or modify
"k",
"m",
"p", # path conflicts
"pr", # files to rename
"r",
]
)
for conflict in conflicts:
# The action tuple is:
# - path_in_1, path_in_2, path_in_ancestor, move, ancestor_node
if conflict.type == ConflictType.ERROR:
# We don't record this as a conflict for now.
# We will report the error, but the file will show modified in
# the working directory status after the update returns.
repo.ui.write_err(
_("error updating %s: %s\n") % (conflict.path, conflict.message)
)
continue
elif conflict.type == ConflictType.MODIFIED_REMOVED:
action_type = "cd"
action = (conflict.path, None, conflict.path, False, src.node())
prompt = "prompt changed/deleted"
elif conflict.type == ConflictType.UNTRACKED_ADDED:
# In core Mercurial, this is the case where the file does not exist
# in the manifest of the common ancestor for the merge.
# TODO(mbolin): Check for the "both renamed from " case in
# manifestmerge(), which is the other possibility when the file
# does not exist in the manifest of the common ancestor for the
# merge.
action_type = "m"
action = (conflict.path, conflict.path, None, False, src.node())
prompt = "both created"
elif conflict.type == ConflictType.REMOVED_MODIFIED:
action_type = "dc"
action = (None, conflict.path, conflict.path, False, src.node())
prompt = "prompt deleted/changed"
elif conflict.type == ConflictType.MISSING_REMOVED:
# Nothing to do here really. The file was already removed
# locally in the working directory before, and it was removed
# in the new commit.
continue
elif conflict.type == ConflictType.MODIFIED_MODIFIED:
action_type = "m"
action = (conflict.path, conflict.path, conflict.path, False, src.node())
prompt = "versions differ"
elif conflict.type == ConflictType.DIRECTORY_NOT_EMPTY:
# This is a file in a directory that Eden would have normally
# removed as part of the checkout, but it could not because this
# untracked file was here. Just leave it be.
continue
else:
raise RuntimeError(
"unknown conflict type received from eden: "
"%r, %r, %r" % (conflict.type, conflict.path, conflict.message)
)
actions[action_type].append((conflict.path, action, prompt))
return actions
def _check_actions_and_raise_if_there_are_conflicts(actions):
# In stock Hg, update() performs this check once it gets the set of actions.
conflict_paths = []
for action_type, list_of_tuples in actions.iteritems():
if len(list_of_tuples) == 0:
continue # Note `actions` defaults to [] for all keys.
if action_type not in ("g", "k", "e", "r", "pr"):
conflict_paths.extend(t[0] for t in list_of_tuples)
# Report the exact files with conflicts.
# There can be conflicts even when `hg status` reports no modifications if
# the conflicts are between ignored files that exist in the destination
# commit.
if conflict_paths:
# Only show 10 lines worth of conflicts
conflict_paths.sort()
max_to_show = 10
if len(conflict_paths) > max_to_show:
# If there are more than 10 conflicts, show the first 9
# and make the last line report how many other conflicts there are
total_conflicts = len(conflict_paths)
conflict_paths = conflict_paths[: max_to_show - 1]
num_remaining = total_conflicts - len(conflict_paths)
conflict_paths.append("... (%d more conflicts)" % num_remaining)
msg = _("conflicting changes:\n ") + "\n ".join(conflict_paths)
hint = _("commit or update --clean to discard changes")
raise error.Abort(msg, hint=hint)
def _applyupdates(repo, actions, wctx, dest, labels, conflicts):
numerrors = sum(1 for c in conflicts if c.type == ConflictType.ERROR)
# Call applyupdates
# Note that applyupdates may mutate actions.
stats = mergemod.applyupdates(
repo, actions, wctx, dest, overwrite=False, labels=labels
)
# Add the error count to the number of unresolved files.
# This ensures we exit unsuccessfully if there were any errors
return (stats[0], stats[1], stats[2], stats[3] + numerrors), actions

11
mercurial/edenfs.py Normal file
View File

@ -0,0 +1,11 @@
# Copyright (c) 2016-present, Facebook, Inc.
# All Rights Reserved.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
# Historically, support for EdenFS was provided via a separate Hg extension
# named "eden", so "eden" is what was added to the ".hg/requires" file.
# Going forward, it would be more appropriate to name the requirement "edenfs",
# but we need to run a Hypershell job to update the existing Eden checkouts.
requirement = "eden"

View File

@ -33,6 +33,7 @@ _ignoreextensions = {
"children",
"color",
"configwarn",
"eden",
"factotum",
"fastpartialmatch",
"fbsparse",

View File

@ -19,6 +19,7 @@ from . import (
cmdutil,
destutil,
discovery,
edenfs,
error,
exchange,
extensions,
@ -804,6 +805,9 @@ def clone(
def _showstats(repo, stats, quietempty=False):
if edenfs.requirement in repo.requirements:
return _eden_showstats(repo, stats, quietempty)
if quietempty and not any(stats):
return
repo.ui.status(
@ -815,6 +819,19 @@ def _showstats(repo, stats, quietempty=False):
)
def _eden_showstats(repo, stats, quietempty=False):
# We hide the updated and removed counts, because they are not accurate
# with eden. One of the primary goals of eden is that the entire working
# directory does not need to be accessed or traversed on update operations.
(updated, merged, removed, unresolved) = stats
if merged or unresolved:
repo.ui.status(
_("%d files merged, %d files unresolved\n") % (merged, unresolved)
)
elif not quietempty:
repo.ui.status(_("update complete\n"))
def updaterepo(repo, node, overwrite, updatecheck=None):
"""Update the working directory to node.

View File

@ -27,6 +27,7 @@ from . import (
dirstate,
dirstateguard,
discovery,
edenfs,
encoding,
error,
exchange,
@ -352,6 +353,7 @@ class localrepository(object):
REVLOGV2_REQUIREMENT,
}
_basesupported = supportedformats | {
edenfs.requirement,
"store",
"fncache",
"shared",
@ -852,6 +854,9 @@ class localrepository(object):
@repofilecache(localpaths=["dirstate"])
def dirstate(self):
if edenfs.requirement in self.requirements:
return self._eden_dirstate
istreestate = "treestate" in self.requirements
istreedirstate = "treedirstate" in self.requirements
@ -864,6 +869,19 @@ class localrepository(object):
istreedirstate=istreedirstate,
)
@util.propertycache
def _eden_dirstate(self):
# Disable demand import when pulling in the thrift runtime,
# as it attempts to import missing modules and changes behavior
# based on what it finds. Demand import masks those and causes
# obscure and false import errors at runtime.
import hgdemandimport
with hgdemandimport.disabled():
from . import eden_dirstate as dirstate_reimplementation
return dirstate_reimplementation.eden_dirstate(self, self.ui, self.root)
def _dirstatevalidate(self, node):
try:
self.changelog.rev(node)
@ -1764,6 +1782,11 @@ class localrepository(object):
rereads the dirstate. Use dirstate.invalidate() if you want to
explicitly read the dirstate again (i.e. restoring it to a previous
known good state)."""
# eden_dirstate has its own invalidation logic.
if edenfs.requirement in self.requirements:
self.dirstate.invalidate()
return
if hasunfilteredcache(self, "dirstate"):
for k in self.dirstate._filecache:
try:

View File

@ -14,6 +14,7 @@ import struct
from . import (
copies,
edenfs,
error,
extensions,
filemerge,
@ -2013,9 +2014,56 @@ def update(
Return the same tuple as applyupdates().
"""
# This function used to find the default destination if node was None, but
# that's now in destutil.py.
assert node is not None
if edenfs.requirement in repo.requirements:
if matcher is not None and not matcher.always():
why_not_eden = "We don't support doing a partial update through eden yet."
elif branchmerge:
# TODO: We potentially should support handling this scenario ourself in
# the future. For now we simply haven't investigated what the correct
# semantics are in this case.
why_not_eden = 'branchmerge is "truthy:" %s.' % branchmerge
elif ancestor is not None:
# TODO: We potentially should support handling this scenario ourself in
# the future. For now we simply haven't investigated what the correct
# semantics are in this case.
why_not_eden = "ancestor is not None: %s." % ancestor
elif wc is not None and wc.isinmemory():
# In memory merges do not operate on the working directory,
# so we don't need to ask eden to change the working directory state
# at all, and can use the vanilla merge logic in this case.
why_not_eden = "merge is in-memory"
else:
# TODO: We probably also need to set why_not_eden if there are
# subrepositories. (Personally I might vote for just not supporting
# subrepos in eden.)
why_not_eden = None
if why_not_eden:
repo.ui.debug(
"falling back to non-eden update code path: %s\n" % why_not_eden
)
else:
from . import eden_update
return eden_update.update(
repo,
node,
branchmerge,
force,
ancestor,
mergeancestor,
labels,
matcher,
mergeforce,
updatecheck,
wc,
)
if not branchmerge and not force:
# TODO: remove the default once all callers that pass branchmerge=False
# and force=False pass a value for updatecheck. We may want to allow

View File

@ -287,7 +287,7 @@ Show all commands + options
debugdata: changelog, manifest, dir
debugdate: extended
debugdeltachain: changelog, manifest, dir, template
debugdirstate: nodates, datesort
debugdirstate: nodates, datesort, json
debugdiscovery: old, nonheads, rev, ssh, remotecmd, insecure
debugdrawdag: print
debugextensions: excludedefault, template