From 0a2bb133f6e24d5722c0b8c581a9d1c93f1756c6 Mon Sep 17 00:00:00 2001 From: Muir Manders Date: Thu, 22 Jun 2023 21:57:53 -0700 Subject: [PATCH] eden: share treestate between Python and Rust Summary: Make the eden dirstate map use a treestate object under the hood instead of a plain Python dictionary. This way Python and Rust can stay in-sync wrt the dirstate. I'm working on making everything use the Rust status internally (e.g. "hg commit" uses the Rust status). For this to work, pending dirstate changes from Python need to be visible to the Rust status. Non-EdenFS handles this by sharing the same treestate object between Rust and Python. This doesn't work for EdenFS since EdenFS uses its own dirstate format completely separate from the Rust workingcopy. However, the Rust workingcopy already converts the EdenFS dirstate into an in-memory treestate object for compatibility. In this commit, I pass this treestate object along from Rust to the eden_dirstate_map to act as the underling _map. This way, mutations from Python will be available to the Rust status code. This isn't as nice as completely migrating to treestate, but it is much easier and is a step in the right direction. Reviewed By: quark-zju Differential Revision: D46840436 fbshipit-source-id: 99904622507645001cc5a9f2096da9d83cda98d4 --- eden/scm/edenscm/commands/debug.py | 2 +- eden/scm/edenscm/dirstate.py | 4 +- eden/scm/edenscm/eden_dirstate.py | 19 +-- eden/scm/edenscm/eden_dirstate_map.py | 163 ++++++++++++++++---------- eden/scm/edenscm/ext/sparse.py | 6 +- eden/scm/edenscm/treestate.py | 19 +-- 6 files changed, 127 insertions(+), 86 deletions(-) diff --git a/eden/scm/edenscm/commands/debug.py b/eden/scm/edenscm/commands/debug.py index 2cd98b304d..fd32b052d3 100644 --- a/eden/scm/edenscm/commands/debug.py +++ b/eden/scm/edenscm/commands/debug.py @@ -1135,7 +1135,7 @@ def debugstate(ui, repo, **opts) -> Optional[int]: ui.write("\n") return - for path, dirstate_tuple in sorted(pycompat.iteritems(repo.dirstate._map._map)): + for path, dirstate_tuple in repo.dirstate._map._items(): status, mode, merge_state, _dummymtime = dirstate_tuple if mode & 0o20000: display_mode = "lnk" diff --git a/eden/scm/edenscm/dirstate.py b/eden/scm/edenscm/dirstate.py index 29aceef824..00260db381 100644 --- a/eden/scm/edenscm/dirstate.py +++ b/eden/scm/edenscm/dirstate.py @@ -182,7 +182,9 @@ class dirstate(object): # pyre-ignore self._mapcls = make_treestate else: - if "eden" not in repo.requirements: + if "eden" in repo.requirements: + self._istreestate = True + else: ui.deprecate("dirstatemap", "dirstatemap is replaced by treestate") self._mapcls: "DirstateMapClassType" = dirstatemap self._fs = filesystem.physicalfilesystem(root, self) diff --git a/eden/scm/edenscm/eden_dirstate.py b/eden/scm/edenscm/eden_dirstate.py index 95e584573d..4bae2b1cec 100644 --- a/eden/scm/edenscm/eden_dirstate.py +++ b/eden/scm/edenscm/eden_dirstate.py @@ -6,6 +6,8 @@ import os import stat +import bindings + from eden.dirstate import MERGE_STATE_BOTH_PARENTS, MERGE_STATE_OTHER_PARENT from . import ( @@ -76,7 +78,7 @@ class eden_dirstate(dirstate.dirstate): This includes non-normal files (e.g., files marked for addition or removal), as well as normal files that have merge state information. """ - return pycompat.iteritems(self._map._map) + return self._map._items() def _p1_ctx(self): """Return the context object for the first parent commit.""" @@ -131,11 +133,11 @@ class eden_dirstate(dirstate.dirstate): # or any child of this path as a directory name. # (This handles the case where an untracked file was added with # 'hg add' but then deleted from disk.) - if path in self._map._map: + if path in self._map._tree: return True dirpath = path + "/" - for entry in self._map._map: + for entry in self._map._keys(): if entry.startswith(dirpath): return True @@ -160,7 +162,7 @@ class eden_dirstate(dirstate.dirstate): # Augument the results with anything modified in the dirstate, # to take care of added/removed files. - for path in self._map._map.keys(): + for path in self._map._keys(): if match(path): results.add(path) @@ -175,7 +177,7 @@ class eden_dirstate(dirstate.dirstate): # Augument the results with anything modified in the dirstate, # to take care of added/removed files. - for path, state in self._map._map.items(): + for path, state in self._map._items(): if match(path): if state[0] == "r": results.discard(path) @@ -202,12 +204,13 @@ class eden_dirstate(dirstate.dirstate): # in state 'm' (-1) or coming from other parent (-2) before # being removed, restore that state. # - # Note that we intentionally use self._map._map.get() here + # Note that we intentionally use self._map._tree.get() here # rather than self._map.get() to avoid making a thrift call to Eden # if this file is already normal. - entry = self._map._map.get(f) + entry = self._map._tree.get(f, None) if entry is not None: - status, mode, merge_state, _dummy_mtime = entry + status, _mode, merge_state, *_ = entry + status = bindings.treestate.tohgstate(status) if status == "r" and merge_state in ( MERGE_STATE_BOTH_PARENTS, MERGE_STATE_OTHER_PARENT, diff --git a/eden/scm/edenscm/eden_dirstate_map.py b/eden/scm/edenscm/eden_dirstate_map.py index 8ceaedff22..1a6241f993 100644 --- a/eden/scm/edenscm/eden_dirstate_map.py +++ b/eden/scm/edenscm/eden_dirstate_map.py @@ -9,11 +9,21 @@ import errno import stat from typing import BinaryIO, Dict -import eden.dirstate as eden_dirstate_serializer +import bindings +import eden.dirstate as eden_dirstate_serializer from edenscmnative import parsers -from . import dirstate, EdenThriftClient, localrepo, pycompat, ui as ui_mod, util, vfs +from . import ( + EdenThriftClient, + localrepo, + node, + pycompat, + treestate, + ui as ui_mod, + util, + vfs, +) MERGE_STATE_NOT_APPLICABLE: int = eden_dirstate_serializer.MERGE_STATE_NOT_APPLICABLE @@ -29,7 +39,7 @@ modefromflag: Dict[str, int] = { } -class eden_dirstate_map(dirstate.dirstatemap): +class eden_dirstate_map(treestate.treestatemap): def __init__( self, ui: "ui_mod.ui", @@ -38,10 +48,42 @@ class eden_dirstate_map(dirstate.dirstatemap): thrift_client: "EdenThriftClient.EdenThriftClient", repo: "localrepo.localrepository", ) -> None: - super(eden_dirstate_map, self).__init__(ui, opener, root) self._thrift_client = thrift_client self._repo = repo + # ignore HG_PENDING because identity is used only for writing + self._identity = util.filestat.frompath(opener.join("dirstate")) + + # Each time we load the treestate, make sure we have the latest + # version. + repo._rsrepo.invalidateworkingcopy() + + super().__init__(ui, opener, root, repo._rsrepo.workingcopy().treestate()) + + @property + def identity(self): # override + return self._identity + + def _keys(self): + return self._tree.tracked("") + + def _items(self): + """Iterate treestate, converting treestate "flags" into legacy merge state enum.""" + for k in self._keys(): + entry = self._tree.get(k, None) + if entry is None: + continue + flags, mode, _, mtime, *_ = entry + yield ( + k, + ( + bindings.treestate.tohgstate(flags), + mode, + _merge_state_from_flags(flags), + mtime, + ), + ) + def write(self, st: "BinaryIO", now: int) -> None: # override parents = self.parents() @@ -49,7 +91,7 @@ class eden_dirstate_map(dirstate.dirstatemap): # never allow these to be inserted into self._map in the first place.) m = { k: (v[0], v[1], v[2]) - for k, v in self._map.items() + for k, v in self._items() if not (v[0] == "n" and v[2] == MERGE_STATE_NOT_APPLICABLE) } eden_dirstate_serializer.write(st, parents, m, self.copymap) @@ -65,36 +107,28 @@ class eden_dirstate_map(dirstate.dirstatemap): need_flush=False, p1manifest=self._repo[parents[0]].manifestnode(), ) - self._dirtyparents = False - self.nonnormalset, self.otherparentset = self.nonnormalentries() - def read(self): # override - # ignore HG_PENDING because identity is used only for writing - self.identity = util.filestat.frompath(self._opener.join(self._filename)) + def _read(self, tree): # override + self._tree = tree - try: - fp = self._opendirstatefile() - try: - parents, dirstate_tuples, copymap = eden_dirstate_serializer.read( - fp, self._filename - ) - finally: - fp.close() - except IOError as e: - if e.errno != errno.ENOENT: - raise - else: - # If the dirstate file does not exist, then we silently ignore - # the error because that's what Mercurial's dirstate does. - return + metadata = treestate._unpackmetadata(self._tree.getmetadata()) - if not self._dirtyparents: - self.setparents(*parents) - self._map = { - n: parsers.dirstatetuple(v[0], v[1], v[2], DUMMY_MTIME) - for n, v in dirstate_tuples.items() - } - self.copymap = copymap + self._parents = ( + node.bin(metadata.get("p1") or node.nullhex), + node.bin(metadata.get("p2") or node.nullhex), + ) + + # These shouldn't be needed since we never write out a treestate. + self._threshold = 0 + self._rootid = 0 + + def clear(self): + # This seems to only be called for EdenFS "hg up -C ...". + # Let's just manually remove tracked entries since self._tree.reset() + # doesn't do the right thing with our in-memory treestate. + self.setparents(node.nullid, node.nullid) + for k in self._keys(): + self._tree.remove(k) def iteritems(self): raise RuntimeError( @@ -110,27 +144,30 @@ class eden_dirstate_map(dirstate.dirstatemap): def keys(self): raise RuntimeError("Should not invoke keys() on eden_dirstate_map!") - def get(self, key, default=None): - try: - return self.__getitem__(key) - except KeyError: - return default - def __contains__(self, key): return self.get(key) is not None - def __getitem__(self, filename): - # type(str) -> parsers.dirstatetuple - entry = self._map.get(filename) + # For eden we store a sparse dirstate with only added/removed files. + # For "normal" files, we need to infer their state from the manifest. + def _get(self, path, default=None): + entry = super()._get(path, None) if entry is not None: return entry - # edenfs only tracks one parent commitctx = self._repo["."] - node, flag = commitctx._fileinfo(filename) - mode = modefromflag[flag] - return parsers.dirstatetuple("n", mode, MERGE_STATE_NOT_APPLICABLE, DUMMY_MTIME) + try: + _node, flag = commitctx._fileinfo(path) + except KeyError: + return default + + return ( + bindings.treestate.EXIST_P1 | bindings.treestate.EXIST_NEXT, + modefromflag[flag], + MERGE_STATE_NOT_APPLICABLE, + DUMMY_MTIME, + None, + ) def hastrackeddir(self, d): # override # TODO(mbolin): Unclear whether it is safe to hardcode this to False. @@ -140,24 +177,20 @@ class eden_dirstate_map(dirstate.dirstatemap): # TODO(mbolin): Unclear whether it is safe to hardcode this to False. return False - def _insert_tuple(self, filename, state, mode, size, mtime): # override - if size != MERGE_STATE_BOTH_PARENTS and size != MERGE_STATE_OTHER_PARENT: - merge_state = MERGE_STATE_NOT_APPLICABLE + +def _merge_state_from_flags(flags): + # Convert treestate flags back into legacy merge state enum. This mirrors + # logic in treestate::legacy_eden_dirstate::serialize_entry. + p1 = flags & bindings.treestate.EXIST_P1 + p2 = flags & bindings.treestate.EXIST_P2 + nxt = flags & bindings.treestate.EXIST_NEXT + + if p2: + if p1: + return MERGE_STATE_BOTH_PARENTS else: - merge_state = size - - self._map[filename] = parsers.dirstatetuple( - state, mode, merge_state, DUMMY_MTIME - ) - - def nonnormalentries(self): - """Returns a set of filenames.""" - # type() -> Tuple[Set[str], Set[str]] - nonnorm = set() - otherparent = set() - for path, entry in pycompat.iteritems(self._map): - if entry[0] != "n": - nonnorm.add(path) - elif entry[2] == MERGE_STATE_OTHER_PARENT: - otherparent.add(path) - return nonnorm, otherparent + return MERGE_STATE_OTHER_PARENT + elif not p1 and nxt: + return MERGE_STATE_BOTH_PARENTS + else: + return MERGE_STATE_NOT_APPLICABLE diff --git a/eden/scm/edenscm/ext/sparse.py b/eden/scm/edenscm/ext/sparse.py index ec64316293..69eb7c4de5 100644 --- a/eden/scm/edenscm/ext/sparse.py +++ b/eden/scm/edenscm/ext/sparse.py @@ -502,11 +502,11 @@ def _trackdirstatesizes(lui: "uimod.ui", repo: "localrepo.localrepository") -> N dirstate = repo.dirstate dirstatesize = None try: - # Eden and flat dirstate. + # Flat dirstate. dirstatesize = len(dirstate._map._map) except AttributeError: - # Treestate. - dirstatesize = len(dirstate._map) + # Treestate (including eden): + dirstatesize = len(dirstate._map._tree) if dirstatesize is not None: lui.log("dirstate_size", dirstate_size=dirstatesize) if ( diff --git a/eden/scm/edenscm/treestate.py b/eden/scm/edenscm/treestate.py index 6a6061cc65..055a69a3f7 100644 --- a/eden/scm/edenscm/treestate.py +++ b/eden/scm/edenscm/treestate.py @@ -98,7 +98,7 @@ class treestatemap(object): def copymap(self): result = {} for path in self._tree.walk(treestate.COPIED, 0): - copied = self._tree.get(path, None)[-1] + copied = self._get(path)[-1] if not copied: raise error.Abort( _( @@ -128,7 +128,7 @@ class treestatemap(object): return len(self._tree) def get(self, key, default=None): - entry = self._tree.get(key, None) + entry = self._get(key) if entry is None or len(entry) != 5: return default flags, mode, size, mtime, _copied = entry @@ -174,7 +174,7 @@ class treestatemap(object): self._tree.insert(f, state, mode, size, mtime, None) def removefile(self, f, oldstate, size): - existing = self._tree.get(f, None) + existing = self._get(f) if existing: # preserve "copied" information state, mode, size, mtime, copied = existing @@ -221,7 +221,7 @@ class treestatemap(object): # Typically, dropfile is used in 2 cases: # - "hg forget": mark the file as "untracked". # - "hg update": remove files only tracked by old commit. - entry = self._tree.get(f, None) + entry = self._get(f) if not entry: return False else: @@ -517,7 +517,7 @@ class treestatemap(object): def copy(self, source, dest): if source == dest: return - existing = self._tree.get(dest, None) + existing = self._get(dest) if existing: state, mode, size, mtime, copied = existing if copied != source: @@ -544,7 +544,7 @@ class treestatemap(object): Return True if the file was changed, False if it's already marked. """ - existing = self._tree.get(path, None) + existing = self._get(path) if not existing: # The file was not in dirstate (untracked). Add it. state = treestate.NEED_CHECK @@ -566,7 +566,7 @@ class treestatemap(object): Return True if the file was changed, False if the file does not have NEED_CHECK. """ - existing = self._tree.get(path, None) + existing = self._get(path) if existing: state, mode, size, mtime, copied = existing if treestate.NEED_CHECK & state: @@ -579,13 +579,16 @@ class treestatemap(object): """Return the copysource for path. Return None if it's not copied, or path does not exist. """ - existing = self._tree.get(path, None) + existing = self._get(path) if existing: _state, _mode, _size, _mtime, copied = existing return copied else: return None + def _get(self, path, default=None): + return self._tree.get(path, default) + def currentversion(repo): """get the current dirstate version"""