manifest: add subdir_diff compatibility function for gettreepacks

Summary:
The C++ manifest implements walksubdirtrees which is used to compute the packs
that a "client" wants for a prefetch. In terms of interface the function is very
annoying and couples with storage and tree representations without being part
of any of them.

We reproduce that functionality as a means to replace the C++ implementation.
The long term goal is to do lazy fetches using an iteration style that plays
nicer with batching downloads.

This change also includes fastmanifest updates because they are required to
enable the walksubdirtrees functionality in our tests.

Reviewed By: quark-zju

Differential Revision: D17086669

fbshipit-source-id: 6c1f9fbf975814f0a2071f8d1c8e022e5ad58e29
This commit is contained in:
Stefan Filip 2019-08-30 10:48:01 -07:00 committed by Facebook Github Bot
parent fa4e12df09
commit ffb563f1bb
7 changed files with 281 additions and 13 deletions

View File

@ -11,6 +11,7 @@ import heapq
import os
import time
from edenscm.hgext.treemanifest import _buildtree, nativemanifesttype
from edenscm.mercurial import error, manifest, mdiff, revlog, util
from edenscmnative import cfastmanifest, cstore
@ -144,14 +145,13 @@ class hybridmanifest(object):
if self.node in self.treecache:
self.__treemanifest = self.treecache[self.node]
elif self.node == revlog.nullid:
store = self.manifestlog.datastore
self.__treemanifest = cstore.treemanifest(store)
self.__treemanifest = _buildtree(self.manifestlog)
else:
store = self.manifestlog.datastore
self.ui.pushbuffer()
try:
store.get("", self.node)
self.__treemanifest = cstore.treemanifest(store, self.node)
self.__treemanifest = _buildtree(self.manifestlog, self.node)
# The buffer is only to eat certain errors, so show
# non-error messages.
output = self.ui.popbuffer()
@ -235,7 +235,7 @@ class hybridmanifest(object):
def fastdelta(self, base, changes):
m = self._manifest("fastdelta")
if isinstance(m, cstore.treemanifest):
if isinstance(m, nativemanifesttype):
return fastdelta(m, m.find, base, changes)
return m.fastdelta(base, changes)
@ -246,7 +246,7 @@ class hybridmanifest(object):
return hybridmanifest(self.ui, self.opener, self.manifestlog, fast=m)
elif isinstance(m, manifest.manifestdict):
return hybridmanifest(self.ui, self.opener, self.manifestlog, flat=m)
elif supportsctree and isinstance(m, cstore.treemanifest):
elif supportsctree and isinstance(m, nativemanifesttype):
return hybridmanifest(self.ui, self.opener, self.manifestlog, tree=m)
else:
raise ValueError("unknown manifest type {0}".format(type(m)))

View File

@ -10,6 +10,7 @@ import tempfile
from edenscm.mercurial import match as matchmod, node, revlog, util as hgutil
from edenscmnative import cstore
from edenscmnative.bindings import manifest as rustmanifest
from . import svnexternals, svnwrap, util
@ -284,7 +285,9 @@ class HgEditor(svnwrap.Editor):
def get_files_in_dir(self, ctx, dir):
assert dir == "" or dir.endswith("/")
mf = ctx.manifest()
if isinstance(mf, cstore.treemanifest):
if isinstance(mf, cstore.treemanifest) or isinstance(
mf, rustmanifest.treemanifest
):
matcher = matchmod.match("", "/", patterns=[dir], default="path")
for x in mf.walk(matcher):
yield x

View File

@ -236,6 +236,8 @@ configitem("treemanifest", "usehttp", default=False)
configitem("treemanifest", "prefetchdraftparents", default=True)
configitem("treemanifest", "rustmanifest", default=False)
nativemanifesttype = (cstore.treemanifest, rustmanifest.treemanifest)
PACK_CATEGORY = "manifests"
TREEGROUP_PARTTYPE = "b2x:treegroup"
@ -2417,7 +2419,10 @@ def _generatepackstream(
# implementation cannot handle more yet.
userustmanifest = repo.ui.configbool("treemanifest", "rustmanifest")
if userustmanifest:
raise NotImplementedError
basenodes = [mybasenode for (_path, mybasenode) in basetrees]
subtrees = rustmanifest.subdirdiff(
datastore, rootdir, node, basenodes, depth
)
else:
subtrees = cstore.treemanifest.walksubdirtrees(
(rootdir, node), datastore, comparetrees=basetrees[:2], depth=depth
@ -2659,10 +2664,7 @@ def serverrepack(repo, incremental=False, options=None):
def _debugcmdfindtreemanifest(orig, ctx):
manifest = ctx.manifest()
# Check if the manifest we have is a treemanifest.
if isinstance(manifest, cstore.treemanifest):
return manifest
if isinstance(manifest, rustmanifest.treemanifest):
# should be the same as the cstore treemanifest
if isinstance(manifest, nativemanifesttype):
return manifest
try:
# Look up the treemanifest in the treemanifestlog. There might not be

View File

@ -47,6 +47,20 @@ pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
let name = [package, "manifest"].join(".");
let m = PyModule::new(py, &name)?;
m.add_class::<treemanifest>(py)?;
m.add(
py,
"subdirdiff",
py_fn!(
py,
subdir_diff(
store: PyObject,
path: &PyBytes,
binnode: &PyBytes,
other_binnodes: &PyList,
depth: i32
)
),
)?;
Ok(m)
}
@ -369,6 +383,46 @@ py_class!(class treemanifest |py| {
}
});
pub fn subdir_diff(
py: Python,
store: PyObject,
path: &PyBytes,
binnode: &PyBytes,
other_binnodes: &PyList,
depth: i32,
) -> PyResult<PyObject> {
let store = PythonDataStore::new(store);
let manifest_store = Arc::new(ManifestStore::new(store));
let mut others = vec![];
for pybytes in other_binnodes.iter(py) {
others.push(pybytes_to_node(py, &pybytes.extract(py)?)?);
}
let diff = manifest::compat_subtree_diff(
manifest_store,
&pybytes_to_path(py, path),
pybytes_to_node(py, binnode)?,
others,
depth,
)
.map_pyerr::<exc::RuntimeError>(py)?;
let mut result = vec![];
for (path, node, bytes) in diff {
let tuple = PyTuple::new(
py,
&[
path_to_pybytes(py, &path).into_object(),
node_to_pybytes(py, node).into_object(),
PyBytes::new(py, &bytes).into_object(),
py.None(),
py.None(),
py.None(),
],
);
result.push(tuple);
}
vec_to_iter(py, result)
}
fn vec_to_iter<T: ToPyObject>(py: Python, items: Vec<T>) -> PyResult<PyObject> {
let list: PyList = items.into_py_object(py);
list.into_object().call_method(py, "__iter__", NoArgs, None)

View File

@ -68,4 +68,4 @@ pub enum FsNode {
mod file;
pub mod tree;
pub use crate::file::{FileMetadata, FileType};
pub use crate::tree::{diff, DiffEntry, DiffType, Tree, TreeStore};
pub use crate::tree::{compat_subtree_diff, diff, DiffEntry, DiffType, Tree, TreeStore};

View File

@ -680,6 +680,89 @@ where
}
}
/// The purpose of this function is to provide compatible behavior with the C++ implementation
/// of the treemanifest. This function is problematic because it goes through abstraction
/// boundaries and is built with the assumption that the storage format is the same as the
/// in memory format that is the same as the wire format.
///
/// This function returns the nodes that need to be sent over the wire for a subtree of the
/// manifest to be fully hydrated. The subtree is represented by `path` and `node`. The data
/// that is present locally by the client is represented by `other_nodes`.
// NOTE: The implementation is currently custom. Consider converting the code to use Cursor.
// The suggestion received in code review was also to consider making the return type more
// simple (RepoPath, Node) and letting the call sites deal with the Bytes.
pub fn compat_subtree_diff(
store: Arc<dyn TreeStore + Send + Sync>,
path: &RepoPath,
node: Node,
other_nodes: Vec<Node>,
depth: i32,
) -> Fallible<Vec<(RepoPathBuf, Node, Bytes)>> {
struct State {
store: InnerStore,
path: RepoPathBuf,
result: Vec<(RepoPathBuf, Node, Bytes)>,
depth_remaining: i32,
}
impl State {
fn work(&mut self, node: Node, other_nodes: Vec<Node>) -> Fallible<()> {
let entry = self.store.get_entry(&self.path, node)?;
if self.depth_remaining > 0 {
// TODO: optimize "other_nodes" construction
// We use BTreeMap for convenience only, it is more efficient to use an array since
// the entries are already sorted.
let mut others_map = BTreeMap::new();
for other_node in other_nodes {
let other_entry = self.store.get_entry(&self.path, other_node)?;
for other_element_result in other_entry.elements() {
let other_element = other_element_result?;
others_map
.entry(other_element.component)
.or_insert(vec![])
.push(other_element.node);
}
}
for element_result in entry.elements() {
let element = element_result?;
if element.flag != store::Flag::Directory {
continue;
}
let mut others = others_map
.remove(&element.component)
.unwrap_or_else(|| vec![]);
if others.contains(&element.node) {
continue;
}
others.dedup();
self.path.push(element.component.as_ref());
self.depth_remaining -= 1;
self.work(element.node, others)?;
self.depth_remaining += 1;
self.path.pop();
}
}
// NOTE: order in the result set matters for a lot of the integration tests
self.result
.push((self.path.clone(), node, entry.to_bytes()));
Ok(())
}
}
if other_nodes.contains(&node) {
return Ok(vec![]);
}
let mut state = State {
store: InnerStore::new(store),
path: path.to_owned(),
result: vec![],
depth_remaining: depth - 1,
};
state.work(node, other_nodes)?;
Ok(state.result)
}
#[cfg(test)]
mod tests {
use super::*;
@ -1422,4 +1505,130 @@ mod tests {
"
);
}
#[test]
fn test_compat_subtree_diff() {
let store = Arc::new(TestStore::new());
// add ("", 1), ("foo", 11), ("baz", 21), ("foo/bar", 111)
let root_1_entry = store::Entry::from_elements(vec![
store_element("foo", "11", store::Flag::Directory),
store_element("baz", "21", store::Flag::File(FileType::Regular)),
])
.unwrap();
store
.insert(
RepoPath::empty(),
node("1"),
root_1_entry.clone().to_bytes(),
)
.unwrap();
let foo_11_entry = store::Entry::from_elements(vec![store_element(
"bar",
"111",
store::Flag::File(FileType::Regular),
)])
.unwrap();
store
.insert(
repo_path("foo"),
node("11"),
foo_11_entry.clone().to_bytes(),
)
.unwrap();
// add ("", 2), ("foo", 12), ("baz", 21), ("foo/bar", 112)
let root_2_entry = store::Entry::from_elements(vec![
store_element("foo", "12", store::Flag::Directory),
store_element("baz", "21", store::Flag::File(FileType::Regular)),
])
.unwrap();
store
.insert(RepoPath::empty(), node("2"), root_2_entry.to_bytes())
.unwrap();
let foo_12_entry = store::Entry::from_elements(vec![store_element(
"bar",
"112",
store::Flag::File(FileType::Regular),
)])
.unwrap();
store
.insert(repo_path("foo"), node("12"), foo_12_entry.to_bytes())
.unwrap();
assert_eq!(
compat_subtree_diff(
store.clone(),
RepoPath::empty(),
node("1"),
vec![node("2")],
3
)
.unwrap(),
vec![
(
repo_path_buf("foo"),
node("11"),
foo_11_entry.clone().to_bytes()
),
(
RepoPathBuf::new(),
node("1"),
root_1_entry.clone().to_bytes()
),
]
);
assert_eq!(
compat_subtree_diff(
store.clone(),
RepoPath::empty(),
node("1"),
vec![node("2")],
1
)
.unwrap(),
vec![(
RepoPathBuf::new(),
node("1"),
root_1_entry.clone().to_bytes()
),]
);
assert_eq!(
compat_subtree_diff(
store.clone(),
repo_path("foo"),
node("11"),
vec![node("12")],
3
)
.unwrap(),
vec![(
repo_path_buf("foo"),
node("11"),
foo_11_entry.clone().to_bytes()
),]
);
assert_eq!(
compat_subtree_diff(
store.clone(),
RepoPath::empty(),
node("1"),
vec![node("1")],
3
)
.unwrap(),
vec![]
);
assert_eq!(
compat_subtree_diff(
store.clone(),
repo_path("foo"),
node("11"),
vec![node("11")],
3
)
.unwrap(),
vec![]
);
// it is illegal to call compat_subtree_diff with "baz" but we can't validate for it
}
}

View File

@ -106,7 +106,7 @@ impl Entry {
Ok(Entry(underlying.freeze()))
}
// used in tests and in finalize
// used in tests, finalize and subtree_diff
pub fn to_bytes(self) -> Bytes {
self.0
}