eden: share treestate between Python and Rust

Summary:
Make the eden dirstate map use a treestate object under the hood instead of a plain Python dictionary. This way Python and Rust can stay in-sync wrt the dirstate.

I'm working on making everything use the Rust status internally (e.g. "hg commit" uses the Rust status). For this to work, pending dirstate changes from Python need to be visible to the Rust status. Non-EdenFS handles this by sharing the same treestate object between Rust and Python. This doesn't work for EdenFS since EdenFS uses its own dirstate format completely separate from the Rust workingcopy.

However, the Rust workingcopy already converts the EdenFS dirstate into an in-memory treestate object for compatibility. In this commit, I pass this treestate object along from Rust to the eden_dirstate_map to act as the underling _map. This way, mutations from Python will be available to the Rust status code.

This isn't as nice as completely migrating to treestate, but it is much easier and is a step in the right direction.

Reviewed By: quark-zju

Differential Revision: D46840436

fbshipit-source-id: 99904622507645001cc5a9f2096da9d83cda98d4
This commit is contained in:
Muir Manders 2023-06-22 21:57:53 -07:00 committed by Facebook GitHub Bot
parent 51081a4442
commit 0a2bb133f6
6 changed files with 127 additions and 86 deletions

View File

@ -1135,7 +1135,7 @@ def debugstate(ui, repo, **opts) -> Optional[int]:
ui.write("\n")
return
for path, dirstate_tuple in sorted(pycompat.iteritems(repo.dirstate._map._map)):
for path, dirstate_tuple in repo.dirstate._map._items():
status, mode, merge_state, _dummymtime = dirstate_tuple
if mode & 0o20000:
display_mode = "lnk"

View File

@ -182,7 +182,9 @@ class dirstate(object):
# pyre-ignore
self._mapcls = make_treestate
else:
if "eden" not in repo.requirements:
if "eden" in repo.requirements:
self._istreestate = True
else:
ui.deprecate("dirstatemap", "dirstatemap is replaced by treestate")
self._mapcls: "DirstateMapClassType" = dirstatemap
self._fs = filesystem.physicalfilesystem(root, self)

View File

@ -6,6 +6,8 @@
import os
import stat
import bindings
from eden.dirstate import MERGE_STATE_BOTH_PARENTS, MERGE_STATE_OTHER_PARENT
from . import (
@ -76,7 +78,7 @@ class eden_dirstate(dirstate.dirstate):
This includes non-normal files (e.g., files marked for addition or
removal), as well as normal files that have merge state information.
"""
return pycompat.iteritems(self._map._map)
return self._map._items()
def _p1_ctx(self):
"""Return the context object for the first parent commit."""
@ -131,11 +133,11 @@ class eden_dirstate(dirstate.dirstate):
# or any child of this path as a directory name.
# (This handles the case where an untracked file was added with
# 'hg add' but then deleted from disk.)
if path in self._map._map:
if path in self._map._tree:
return True
dirpath = path + "/"
for entry in self._map._map:
for entry in self._map._keys():
if entry.startswith(dirpath):
return True
@ -160,7 +162,7 @@ class eden_dirstate(dirstate.dirstate):
# Augument the results with anything modified in the dirstate,
# to take care of added/removed files.
for path in self._map._map.keys():
for path in self._map._keys():
if match(path):
results.add(path)
@ -175,7 +177,7 @@ class eden_dirstate(dirstate.dirstate):
# Augument the results with anything modified in the dirstate,
# to take care of added/removed files.
for path, state in self._map._map.items():
for path, state in self._map._items():
if match(path):
if state[0] == "r":
results.discard(path)
@ -202,12 +204,13 @@ class eden_dirstate(dirstate.dirstate):
# in state 'm' (-1) or coming from other parent (-2) before
# being removed, restore that state.
#
# Note that we intentionally use self._map._map.get() here
# Note that we intentionally use self._map._tree.get() here
# rather than self._map.get() to avoid making a thrift call to Eden
# if this file is already normal.
entry = self._map._map.get(f)
entry = self._map._tree.get(f, None)
if entry is not None:
status, mode, merge_state, _dummy_mtime = entry
status, _mode, merge_state, *_ = entry
status = bindings.treestate.tohgstate(status)
if status == "r" and merge_state in (
MERGE_STATE_BOTH_PARENTS,
MERGE_STATE_OTHER_PARENT,

View File

@ -9,11 +9,21 @@ import errno
import stat
from typing import BinaryIO, Dict
import eden.dirstate as eden_dirstate_serializer
import bindings
import eden.dirstate as eden_dirstate_serializer
from edenscmnative import parsers
from . import dirstate, EdenThriftClient, localrepo, pycompat, ui as ui_mod, util, vfs
from . import (
EdenThriftClient,
localrepo,
node,
pycompat,
treestate,
ui as ui_mod,
util,
vfs,
)
MERGE_STATE_NOT_APPLICABLE: int = eden_dirstate_serializer.MERGE_STATE_NOT_APPLICABLE
@ -29,7 +39,7 @@ modefromflag: Dict[str, int] = {
}
class eden_dirstate_map(dirstate.dirstatemap):
class eden_dirstate_map(treestate.treestatemap):
def __init__(
self,
ui: "ui_mod.ui",
@ -38,10 +48,42 @@ class eden_dirstate_map(dirstate.dirstatemap):
thrift_client: "EdenThriftClient.EdenThriftClient",
repo: "localrepo.localrepository",
) -> None:
super(eden_dirstate_map, self).__init__(ui, opener, root)
self._thrift_client = thrift_client
self._repo = repo
# ignore HG_PENDING because identity is used only for writing
self._identity = util.filestat.frompath(opener.join("dirstate"))
# Each time we load the treestate, make sure we have the latest
# version.
repo._rsrepo.invalidateworkingcopy()
super().__init__(ui, opener, root, repo._rsrepo.workingcopy().treestate())
@property
def identity(self): # override
return self._identity
def _keys(self):
return self._tree.tracked("")
def _items(self):
"""Iterate treestate, converting treestate "flags" into legacy merge state enum."""
for k in self._keys():
entry = self._tree.get(k, None)
if entry is None:
continue
flags, mode, _, mtime, *_ = entry
yield (
k,
(
bindings.treestate.tohgstate(flags),
mode,
_merge_state_from_flags(flags),
mtime,
),
)
def write(self, st: "BinaryIO", now: int) -> None: # override
parents = self.parents()
@ -49,7 +91,7 @@ class eden_dirstate_map(dirstate.dirstatemap):
# never allow these to be inserted into self._map in the first place.)
m = {
k: (v[0], v[1], v[2])
for k, v in self._map.items()
for k, v in self._items()
if not (v[0] == "n" and v[2] == MERGE_STATE_NOT_APPLICABLE)
}
eden_dirstate_serializer.write(st, parents, m, self.copymap)
@ -65,36 +107,28 @@ class eden_dirstate_map(dirstate.dirstatemap):
need_flush=False,
p1manifest=self._repo[parents[0]].manifestnode(),
)
self._dirtyparents = False
self.nonnormalset, self.otherparentset = self.nonnormalentries()
def read(self): # override
# ignore HG_PENDING because identity is used only for writing
self.identity = util.filestat.frompath(self._opener.join(self._filename))
def _read(self, tree): # override
self._tree = tree
try:
fp = self._opendirstatefile()
try:
parents, dirstate_tuples, copymap = eden_dirstate_serializer.read(
fp, self._filename
)
finally:
fp.close()
except IOError as e:
if e.errno != errno.ENOENT:
raise
else:
# If the dirstate file does not exist, then we silently ignore
# the error because that's what Mercurial's dirstate does.
return
metadata = treestate._unpackmetadata(self._tree.getmetadata())
if not self._dirtyparents:
self.setparents(*parents)
self._map = {
n: parsers.dirstatetuple(v[0], v[1], v[2], DUMMY_MTIME)
for n, v in dirstate_tuples.items()
}
self.copymap = copymap
self._parents = (
node.bin(metadata.get("p1") or node.nullhex),
node.bin(metadata.get("p2") or node.nullhex),
)
# These shouldn't be needed since we never write out a treestate.
self._threshold = 0
self._rootid = 0
def clear(self):
# This seems to only be called for EdenFS "hg up -C ...".
# Let's just manually remove tracked entries since self._tree.reset()
# doesn't do the right thing with our in-memory treestate.
self.setparents(node.nullid, node.nullid)
for k in self._keys():
self._tree.remove(k)
def iteritems(self):
raise RuntimeError(
@ -110,27 +144,30 @@ class eden_dirstate_map(dirstate.dirstatemap):
def keys(self):
raise RuntimeError("Should not invoke keys() on eden_dirstate_map!")
def get(self, key, default=None):
try:
return self.__getitem__(key)
except KeyError:
return default
def __contains__(self, key):
return self.get(key) is not None
def __getitem__(self, filename):
# type(str) -> parsers.dirstatetuple
entry = self._map.get(filename)
# For eden we store a sparse dirstate with only added/removed files.
# For "normal" files, we need to infer their state from the manifest.
def _get(self, path, default=None):
entry = super()._get(path, None)
if entry is not None:
return entry
# edenfs only tracks one parent
commitctx = self._repo["."]
node, flag = commitctx._fileinfo(filename)
mode = modefromflag[flag]
return parsers.dirstatetuple("n", mode, MERGE_STATE_NOT_APPLICABLE, DUMMY_MTIME)
try:
_node, flag = commitctx._fileinfo(path)
except KeyError:
return default
return (
bindings.treestate.EXIST_P1 | bindings.treestate.EXIST_NEXT,
modefromflag[flag],
MERGE_STATE_NOT_APPLICABLE,
DUMMY_MTIME,
None,
)
def hastrackeddir(self, d): # override
# TODO(mbolin): Unclear whether it is safe to hardcode this to False.
@ -140,24 +177,20 @@ class eden_dirstate_map(dirstate.dirstatemap):
# TODO(mbolin): Unclear whether it is safe to hardcode this to False.
return False
def _insert_tuple(self, filename, state, mode, size, mtime): # override
if size != MERGE_STATE_BOTH_PARENTS and size != MERGE_STATE_OTHER_PARENT:
merge_state = MERGE_STATE_NOT_APPLICABLE
def _merge_state_from_flags(flags):
# Convert treestate flags back into legacy merge state enum. This mirrors
# logic in treestate::legacy_eden_dirstate::serialize_entry.
p1 = flags & bindings.treestate.EXIST_P1
p2 = flags & bindings.treestate.EXIST_P2
nxt = flags & bindings.treestate.EXIST_NEXT
if p2:
if p1:
return MERGE_STATE_BOTH_PARENTS
else:
merge_state = size
self._map[filename] = parsers.dirstatetuple(
state, mode, merge_state, DUMMY_MTIME
)
def nonnormalentries(self):
"""Returns a set of filenames."""
# type() -> Tuple[Set[str], Set[str]]
nonnorm = set()
otherparent = set()
for path, entry in pycompat.iteritems(self._map):
if entry[0] != "n":
nonnorm.add(path)
elif entry[2] == MERGE_STATE_OTHER_PARENT:
otherparent.add(path)
return nonnorm, otherparent
return MERGE_STATE_OTHER_PARENT
elif not p1 and nxt:
return MERGE_STATE_BOTH_PARENTS
else:
return MERGE_STATE_NOT_APPLICABLE

View File

@ -502,11 +502,11 @@ def _trackdirstatesizes(lui: "uimod.ui", repo: "localrepo.localrepository") -> N
dirstate = repo.dirstate
dirstatesize = None
try:
# Eden and flat dirstate.
# Flat dirstate.
dirstatesize = len(dirstate._map._map)
except AttributeError:
# Treestate.
dirstatesize = len(dirstate._map)
# Treestate (including eden):
dirstatesize = len(dirstate._map._tree)
if dirstatesize is not None:
lui.log("dirstate_size", dirstate_size=dirstatesize)
if (

View File

@ -98,7 +98,7 @@ class treestatemap(object):
def copymap(self):
result = {}
for path in self._tree.walk(treestate.COPIED, 0):
copied = self._tree.get(path, None)[-1]
copied = self._get(path)[-1]
if not copied:
raise error.Abort(
_(
@ -128,7 +128,7 @@ class treestatemap(object):
return len(self._tree)
def get(self, key, default=None):
entry = self._tree.get(key, None)
entry = self._get(key)
if entry is None or len(entry) != 5:
return default
flags, mode, size, mtime, _copied = entry
@ -174,7 +174,7 @@ class treestatemap(object):
self._tree.insert(f, state, mode, size, mtime, None)
def removefile(self, f, oldstate, size):
existing = self._tree.get(f, None)
existing = self._get(f)
if existing:
# preserve "copied" information
state, mode, size, mtime, copied = existing
@ -221,7 +221,7 @@ class treestatemap(object):
# Typically, dropfile is used in 2 cases:
# - "hg forget": mark the file as "untracked".
# - "hg update": remove files only tracked by old commit.
entry = self._tree.get(f, None)
entry = self._get(f)
if not entry:
return False
else:
@ -517,7 +517,7 @@ class treestatemap(object):
def copy(self, source, dest):
if source == dest:
return
existing = self._tree.get(dest, None)
existing = self._get(dest)
if existing:
state, mode, size, mtime, copied = existing
if copied != source:
@ -544,7 +544,7 @@ class treestatemap(object):
Return True if the file was changed, False if it's already marked.
"""
existing = self._tree.get(path, None)
existing = self._get(path)
if not existing:
# The file was not in dirstate (untracked). Add it.
state = treestate.NEED_CHECK
@ -566,7 +566,7 @@ class treestatemap(object):
Return True if the file was changed, False if the file does not have
NEED_CHECK.
"""
existing = self._tree.get(path, None)
existing = self._get(path)
if existing:
state, mode, size, mtime, copied = existing
if treestate.NEED_CHECK & state:
@ -579,13 +579,16 @@ class treestatemap(object):
"""Return the copysource for path. Return None if it's not copied, or
path does not exist.
"""
existing = self._tree.get(path, None)
existing = self._get(path)
if existing:
_state, _mode, _size, _mtime, copied = existing
return copied
else:
return None
def _get(self, path, default=None):
return self._tree.get(path, default)
def currentversion(repo):
"""get the current dirstate version"""