eden: share treestate between Python and Rust

Summary:
Make the eden dirstate map use a treestate object under the hood instead of a plain Python dictionary. This way Python and Rust can stay in-sync wrt the dirstate.

I'm working on making everything use the Rust status internally (e.g. "hg commit" uses the Rust status). For this to work, pending dirstate changes from Python need to be visible to the Rust status. Non-EdenFS handles this by sharing the same treestate object between Rust and Python. This doesn't work for EdenFS since EdenFS uses its own dirstate format completely separate from the Rust workingcopy.

However, the Rust workingcopy already converts the EdenFS dirstate into an in-memory treestate object for compatibility. In this commit, I pass this treestate object along from Rust to the eden_dirstate_map to act as the underling _map. This way, mutations from Python will be available to the Rust status code.

This isn't as nice as completely migrating to treestate, but it is much easier and is a step in the right direction.

Reviewed By: quark-zju

Differential Revision: D46840436

fbshipit-source-id: 99904622507645001cc5a9f2096da9d83cda98d4
This commit is contained in:
Muir Manders 2023-06-22 21:57:53 -07:00 committed by Facebook GitHub Bot
parent 51081a4442
commit 0a2bb133f6
6 changed files with 127 additions and 86 deletions

View File

@ -1135,7 +1135,7 @@ def debugstate(ui, repo, **opts) -> Optional[int]:
ui.write("\n") ui.write("\n")
return return
for path, dirstate_tuple in sorted(pycompat.iteritems(repo.dirstate._map._map)): for path, dirstate_tuple in repo.dirstate._map._items():
status, mode, merge_state, _dummymtime = dirstate_tuple status, mode, merge_state, _dummymtime = dirstate_tuple
if mode & 0o20000: if mode & 0o20000:
display_mode = "lnk" display_mode = "lnk"

View File

@ -182,7 +182,9 @@ class dirstate(object):
# pyre-ignore # pyre-ignore
self._mapcls = make_treestate self._mapcls = make_treestate
else: else:
if "eden" not in repo.requirements: if "eden" in repo.requirements:
self._istreestate = True
else:
ui.deprecate("dirstatemap", "dirstatemap is replaced by treestate") ui.deprecate("dirstatemap", "dirstatemap is replaced by treestate")
self._mapcls: "DirstateMapClassType" = dirstatemap self._mapcls: "DirstateMapClassType" = dirstatemap
self._fs = filesystem.physicalfilesystem(root, self) self._fs = filesystem.physicalfilesystem(root, self)

View File

@ -6,6 +6,8 @@
import os import os
import stat import stat
import bindings
from eden.dirstate import MERGE_STATE_BOTH_PARENTS, MERGE_STATE_OTHER_PARENT from eden.dirstate import MERGE_STATE_BOTH_PARENTS, MERGE_STATE_OTHER_PARENT
from . import ( from . import (
@ -76,7 +78,7 @@ class eden_dirstate(dirstate.dirstate):
This includes non-normal files (e.g., files marked for addition or This includes non-normal files (e.g., files marked for addition or
removal), as well as normal files that have merge state information. removal), as well as normal files that have merge state information.
""" """
return pycompat.iteritems(self._map._map) return self._map._items()
def _p1_ctx(self): def _p1_ctx(self):
"""Return the context object for the first parent commit.""" """Return the context object for the first parent commit."""
@ -131,11 +133,11 @@ class eden_dirstate(dirstate.dirstate):
# or any child of this path as a directory name. # or any child of this path as a directory name.
# (This handles the case where an untracked file was added with # (This handles the case where an untracked file was added with
# 'hg add' but then deleted from disk.) # 'hg add' but then deleted from disk.)
if path in self._map._map: if path in self._map._tree:
return True return True
dirpath = path + "/" dirpath = path + "/"
for entry in self._map._map: for entry in self._map._keys():
if entry.startswith(dirpath): if entry.startswith(dirpath):
return True return True
@ -160,7 +162,7 @@ class eden_dirstate(dirstate.dirstate):
# Augument the results with anything modified in the dirstate, # Augument the results with anything modified in the dirstate,
# to take care of added/removed files. # to take care of added/removed files.
for path in self._map._map.keys(): for path in self._map._keys():
if match(path): if match(path):
results.add(path) results.add(path)
@ -175,7 +177,7 @@ class eden_dirstate(dirstate.dirstate):
# Augument the results with anything modified in the dirstate, # Augument the results with anything modified in the dirstate,
# to take care of added/removed files. # to take care of added/removed files.
for path, state in self._map._map.items(): for path, state in self._map._items():
if match(path): if match(path):
if state[0] == "r": if state[0] == "r":
results.discard(path) results.discard(path)
@ -202,12 +204,13 @@ class eden_dirstate(dirstate.dirstate):
# in state 'm' (-1) or coming from other parent (-2) before # in state 'm' (-1) or coming from other parent (-2) before
# being removed, restore that state. # being removed, restore that state.
# #
# Note that we intentionally use self._map._map.get() here # Note that we intentionally use self._map._tree.get() here
# rather than self._map.get() to avoid making a thrift call to Eden # rather than self._map.get() to avoid making a thrift call to Eden
# if this file is already normal. # if this file is already normal.
entry = self._map._map.get(f) entry = self._map._tree.get(f, None)
if entry is not None: if entry is not None:
status, mode, merge_state, _dummy_mtime = entry status, _mode, merge_state, *_ = entry
status = bindings.treestate.tohgstate(status)
if status == "r" and merge_state in ( if status == "r" and merge_state in (
MERGE_STATE_BOTH_PARENTS, MERGE_STATE_BOTH_PARENTS,
MERGE_STATE_OTHER_PARENT, MERGE_STATE_OTHER_PARENT,

View File

@ -9,11 +9,21 @@ import errno
import stat import stat
from typing import BinaryIO, Dict from typing import BinaryIO, Dict
import eden.dirstate as eden_dirstate_serializer import bindings
import eden.dirstate as eden_dirstate_serializer
from edenscmnative import parsers from edenscmnative import parsers
from . import dirstate, EdenThriftClient, localrepo, pycompat, ui as ui_mod, util, vfs from . import (
EdenThriftClient,
localrepo,
node,
pycompat,
treestate,
ui as ui_mod,
util,
vfs,
)
MERGE_STATE_NOT_APPLICABLE: int = eden_dirstate_serializer.MERGE_STATE_NOT_APPLICABLE MERGE_STATE_NOT_APPLICABLE: int = eden_dirstate_serializer.MERGE_STATE_NOT_APPLICABLE
@ -29,7 +39,7 @@ modefromflag: Dict[str, int] = {
} }
class eden_dirstate_map(dirstate.dirstatemap): class eden_dirstate_map(treestate.treestatemap):
def __init__( def __init__(
self, self,
ui: "ui_mod.ui", ui: "ui_mod.ui",
@ -38,10 +48,42 @@ class eden_dirstate_map(dirstate.dirstatemap):
thrift_client: "EdenThriftClient.EdenThriftClient", thrift_client: "EdenThriftClient.EdenThriftClient",
repo: "localrepo.localrepository", repo: "localrepo.localrepository",
) -> None: ) -> None:
super(eden_dirstate_map, self).__init__(ui, opener, root)
self._thrift_client = thrift_client self._thrift_client = thrift_client
self._repo = repo self._repo = repo
# ignore HG_PENDING because identity is used only for writing
self._identity = util.filestat.frompath(opener.join("dirstate"))
# Each time we load the treestate, make sure we have the latest
# version.
repo._rsrepo.invalidateworkingcopy()
super().__init__(ui, opener, root, repo._rsrepo.workingcopy().treestate())
@property
def identity(self): # override
return self._identity
def _keys(self):
return self._tree.tracked("")
def _items(self):
"""Iterate treestate, converting treestate "flags" into legacy merge state enum."""
for k in self._keys():
entry = self._tree.get(k, None)
if entry is None:
continue
flags, mode, _, mtime, *_ = entry
yield (
k,
(
bindings.treestate.tohgstate(flags),
mode,
_merge_state_from_flags(flags),
mtime,
),
)
def write(self, st: "BinaryIO", now: int) -> None: # override def write(self, st: "BinaryIO", now: int) -> None: # override
parents = self.parents() parents = self.parents()
@ -49,7 +91,7 @@ class eden_dirstate_map(dirstate.dirstatemap):
# never allow these to be inserted into self._map in the first place.) # never allow these to be inserted into self._map in the first place.)
m = { m = {
k: (v[0], v[1], v[2]) k: (v[0], v[1], v[2])
for k, v in self._map.items() for k, v in self._items()
if not (v[0] == "n" and v[2] == MERGE_STATE_NOT_APPLICABLE) if not (v[0] == "n" and v[2] == MERGE_STATE_NOT_APPLICABLE)
} }
eden_dirstate_serializer.write(st, parents, m, self.copymap) eden_dirstate_serializer.write(st, parents, m, self.copymap)
@ -65,36 +107,28 @@ class eden_dirstate_map(dirstate.dirstatemap):
need_flush=False, need_flush=False,
p1manifest=self._repo[parents[0]].manifestnode(), p1manifest=self._repo[parents[0]].manifestnode(),
) )
self._dirtyparents = False
self.nonnormalset, self.otherparentset = self.nonnormalentries()
def read(self): # override def _read(self, tree): # override
# ignore HG_PENDING because identity is used only for writing self._tree = tree
self.identity = util.filestat.frompath(self._opener.join(self._filename))
try: metadata = treestate._unpackmetadata(self._tree.getmetadata())
fp = self._opendirstatefile()
try: self._parents = (
parents, dirstate_tuples, copymap = eden_dirstate_serializer.read( node.bin(metadata.get("p1") or node.nullhex),
fp, self._filename node.bin(metadata.get("p2") or node.nullhex),
) )
finally:
fp.close()
except IOError as e:
if e.errno != errno.ENOENT:
raise
else:
# If the dirstate file does not exist, then we silently ignore
# the error because that's what Mercurial's dirstate does.
return
if not self._dirtyparents: # These shouldn't be needed since we never write out a treestate.
self.setparents(*parents) self._threshold = 0
self._map = { self._rootid = 0
n: parsers.dirstatetuple(v[0], v[1], v[2], DUMMY_MTIME)
for n, v in dirstate_tuples.items() def clear(self):
} # This seems to only be called for EdenFS "hg up -C ...".
self.copymap = copymap # Let's just manually remove tracked entries since self._tree.reset()
# doesn't do the right thing with our in-memory treestate.
self.setparents(node.nullid, node.nullid)
for k in self._keys():
self._tree.remove(k)
def iteritems(self): def iteritems(self):
raise RuntimeError( raise RuntimeError(
@ -110,27 +144,30 @@ class eden_dirstate_map(dirstate.dirstatemap):
def keys(self): def keys(self):
raise RuntimeError("Should not invoke keys() on eden_dirstate_map!") raise RuntimeError("Should not invoke keys() on eden_dirstate_map!")
def get(self, key, default=None):
try:
return self.__getitem__(key)
except KeyError:
return default
def __contains__(self, key): def __contains__(self, key):
return self.get(key) is not None return self.get(key) is not None
def __getitem__(self, filename): # For eden we store a sparse dirstate with only added/removed files.
# type(str) -> parsers.dirstatetuple # For "normal" files, we need to infer their state from the manifest.
entry = self._map.get(filename) def _get(self, path, default=None):
entry = super()._get(path, None)
if entry is not None: if entry is not None:
return entry return entry
# edenfs only tracks one parent
commitctx = self._repo["."] commitctx = self._repo["."]
node, flag = commitctx._fileinfo(filename)
mode = modefromflag[flag] try:
return parsers.dirstatetuple("n", mode, MERGE_STATE_NOT_APPLICABLE, DUMMY_MTIME) _node, flag = commitctx._fileinfo(path)
except KeyError:
return default
return (
bindings.treestate.EXIST_P1 | bindings.treestate.EXIST_NEXT,
modefromflag[flag],
MERGE_STATE_NOT_APPLICABLE,
DUMMY_MTIME,
None,
)
def hastrackeddir(self, d): # override def hastrackeddir(self, d): # override
# TODO(mbolin): Unclear whether it is safe to hardcode this to False. # TODO(mbolin): Unclear whether it is safe to hardcode this to False.
@ -140,24 +177,20 @@ class eden_dirstate_map(dirstate.dirstatemap):
# TODO(mbolin): Unclear whether it is safe to hardcode this to False. # TODO(mbolin): Unclear whether it is safe to hardcode this to False.
return False return False
def _insert_tuple(self, filename, state, mode, size, mtime): # override
if size != MERGE_STATE_BOTH_PARENTS and size != MERGE_STATE_OTHER_PARENT: def _merge_state_from_flags(flags):
merge_state = MERGE_STATE_NOT_APPLICABLE # Convert treestate flags back into legacy merge state enum. This mirrors
# logic in treestate::legacy_eden_dirstate::serialize_entry.
p1 = flags & bindings.treestate.EXIST_P1
p2 = flags & bindings.treestate.EXIST_P2
nxt = flags & bindings.treestate.EXIST_NEXT
if p2:
if p1:
return MERGE_STATE_BOTH_PARENTS
else: else:
merge_state = size return MERGE_STATE_OTHER_PARENT
elif not p1 and nxt:
self._map[filename] = parsers.dirstatetuple( return MERGE_STATE_BOTH_PARENTS
state, mode, merge_state, DUMMY_MTIME else:
) return MERGE_STATE_NOT_APPLICABLE
def nonnormalentries(self):
"""Returns a set of filenames."""
# type() -> Tuple[Set[str], Set[str]]
nonnorm = set()
otherparent = set()
for path, entry in pycompat.iteritems(self._map):
if entry[0] != "n":
nonnorm.add(path)
elif entry[2] == MERGE_STATE_OTHER_PARENT:
otherparent.add(path)
return nonnorm, otherparent

View File

@ -502,11 +502,11 @@ def _trackdirstatesizes(lui: "uimod.ui", repo: "localrepo.localrepository") -> N
dirstate = repo.dirstate dirstate = repo.dirstate
dirstatesize = None dirstatesize = None
try: try:
# Eden and flat dirstate. # Flat dirstate.
dirstatesize = len(dirstate._map._map) dirstatesize = len(dirstate._map._map)
except AttributeError: except AttributeError:
# Treestate. # Treestate (including eden):
dirstatesize = len(dirstate._map) dirstatesize = len(dirstate._map._tree)
if dirstatesize is not None: if dirstatesize is not None:
lui.log("dirstate_size", dirstate_size=dirstatesize) lui.log("dirstate_size", dirstate_size=dirstatesize)
if ( if (

View File

@ -98,7 +98,7 @@ class treestatemap(object):
def copymap(self): def copymap(self):
result = {} result = {}
for path in self._tree.walk(treestate.COPIED, 0): for path in self._tree.walk(treestate.COPIED, 0):
copied = self._tree.get(path, None)[-1] copied = self._get(path)[-1]
if not copied: if not copied:
raise error.Abort( raise error.Abort(
_( _(
@ -128,7 +128,7 @@ class treestatemap(object):
return len(self._tree) return len(self._tree)
def get(self, key, default=None): def get(self, key, default=None):
entry = self._tree.get(key, None) entry = self._get(key)
if entry is None or len(entry) != 5: if entry is None or len(entry) != 5:
return default return default
flags, mode, size, mtime, _copied = entry flags, mode, size, mtime, _copied = entry
@ -174,7 +174,7 @@ class treestatemap(object):
self._tree.insert(f, state, mode, size, mtime, None) self._tree.insert(f, state, mode, size, mtime, None)
def removefile(self, f, oldstate, size): def removefile(self, f, oldstate, size):
existing = self._tree.get(f, None) existing = self._get(f)
if existing: if existing:
# preserve "copied" information # preserve "copied" information
state, mode, size, mtime, copied = existing state, mode, size, mtime, copied = existing
@ -221,7 +221,7 @@ class treestatemap(object):
# Typically, dropfile is used in 2 cases: # Typically, dropfile is used in 2 cases:
# - "hg forget": mark the file as "untracked". # - "hg forget": mark the file as "untracked".
# - "hg update": remove files only tracked by old commit. # - "hg update": remove files only tracked by old commit.
entry = self._tree.get(f, None) entry = self._get(f)
if not entry: if not entry:
return False return False
else: else:
@ -517,7 +517,7 @@ class treestatemap(object):
def copy(self, source, dest): def copy(self, source, dest):
if source == dest: if source == dest:
return return
existing = self._tree.get(dest, None) existing = self._get(dest)
if existing: if existing:
state, mode, size, mtime, copied = existing state, mode, size, mtime, copied = existing
if copied != source: if copied != source:
@ -544,7 +544,7 @@ class treestatemap(object):
Return True if the file was changed, False if it's already marked. Return True if the file was changed, False if it's already marked.
""" """
existing = self._tree.get(path, None) existing = self._get(path)
if not existing: if not existing:
# The file was not in dirstate (untracked). Add it. # The file was not in dirstate (untracked). Add it.
state = treestate.NEED_CHECK state = treestate.NEED_CHECK
@ -566,7 +566,7 @@ class treestatemap(object):
Return True if the file was changed, False if the file does not have Return True if the file was changed, False if the file does not have
NEED_CHECK. NEED_CHECK.
""" """
existing = self._tree.get(path, None) existing = self._get(path)
if existing: if existing:
state, mode, size, mtime, copied = existing state, mode, size, mtime, copied = existing
if treestate.NEED_CHECK & state: if treestate.NEED_CHECK & state:
@ -579,13 +579,16 @@ class treestatemap(object):
"""Return the copysource for path. Return None if it's not copied, or """Return the copysource for path. Return None if it's not copied, or
path does not exist. path does not exist.
""" """
existing = self._tree.get(path, None) existing = self._get(path)
if existing: if existing:
_state, _mode, _size, _mtime, copied = existing _state, _mode, _size, _mtime, copied = existing
return copied return copied
else: else:
return None return None
def _get(self, path, default=None):
return self._tree.get(path, default)
def currentversion(repo): def currentversion(repo):
"""get the current dirstate version""" """get the current dirstate version"""