diff --git a/eden/mononoke/walker/src/graph.rs b/eden/mononoke/walker/src/graph.rs index d3e42c35fc..c22d9f76ee 100644 --- a/eden/mononoke/walker/src/graph.rs +++ b/eden/mononoke/walker/src/graph.rs @@ -501,15 +501,32 @@ impl NodeType { } } -// Memoize the hash of the path as it is used frequently +const ROOT_FINGERPRINT: u64 = 0; -#[derive(Debug)] -pub struct MPathHashMemo { - mpath: MPath, - memoized_hash: OnceCell, +/// Represent root or non root path hash. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum WrappedPathHash { + Root, + NonRoot(MPathHash), } -impl MPathHashMemo { +impl WrappedPathHash { + pub fn sampling_fingerprint(&self) -> u64 { + match self { + WrappedPathHash::Root => ROOT_FINGERPRINT, + WrappedPathHash::NonRoot(path_hash) => path_hash.sampling_fingerprint(), + } + } +} + +// Memoize the hash of the path as it is used frequently +#[derive(Debug)] +pub struct MPathWithHashMemo { + mpath: MPath, + memoized_hash: OnceCell, +} + +impl MPathWithHashMemo { fn new(mpath: MPath) -> Self { Self { mpath, @@ -517,9 +534,9 @@ impl MPathHashMemo { } } - pub fn get_path_hash(&self) -> &MPathHash { + pub fn get_path_hash_memo(&self) -> &WrappedPathHash { self.memoized_hash - .get_or_init(|| self.mpath.get_path_hash()) + .get_or_init(|| WrappedPathHash::NonRoot(self.mpath.get_path_hash())) } pub fn mpath(&self) -> &MPath { @@ -527,15 +544,15 @@ impl MPathHashMemo { } } -impl PartialEq for MPathHashMemo { +impl PartialEq for MPathWithHashMemo { fn eq(&self, other: &Self) -> bool { self.mpath == other.mpath } } -impl Eq for MPathHashMemo {} +impl Eq for MPathWithHashMemo {} -impl Hash for MPathHashMemo { +impl Hash for MPathWithHashMemo { fn hash(&self, state: &mut H) { self.mpath.hash(state); } @@ -544,7 +561,7 @@ impl Hash for MPathHashMemo { #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum WrappedPath { Root, - NonRoot(ArcIntern>), + NonRoot(ArcIntern>), } impl WrappedPath { @@ -555,15 +572,15 @@ impl WrappedPath { } } - pub fn get_path_hash(&self) -> Option<&MPathHash> { + pub fn get_path_hash(&self) -> &WrappedPathHash { match self { - WrappedPath::Root => None, - WrappedPath::NonRoot(path) => Some(path.get_path_hash()), + WrappedPath::Root => &WrappedPathHash::Root, + WrappedPath::NonRoot(path) => path.get_path_hash_memo(), } } - pub fn sampling_fingerprint(&self) -> Option { - self.get_path_hash().map(|h| h.sampling_fingerprint()) + pub fn sampling_fingerprint(&self) -> u64 { + self.get_path_hash().sampling_fingerprint() } } @@ -583,7 +600,7 @@ impl From> for WrappedPath { let hasher_fac = PATH_HASHER_FACTORY.get_or_init(|| RandomState::default()); match mpath { Some(mpath) => WrappedPath::NonRoot(ArcIntern::new(EagerHashMemoizer::new( - MPathHashMemo::new(mpath), + MPathWithHashMemo::new(mpath), hasher_fac, ))), None => WrappedPath::Root, diff --git a/eden/mononoke/walker/src/sampling.rs b/eden/mononoke/walker/src/sampling.rs index a41cac9ccc..d5bad6b19d 100644 --- a/eden/mononoke/walker/src/sampling.rs +++ b/eden/mononoke/walker/src/sampling.rs @@ -298,7 +298,7 @@ where sample_rate => { let sampling_fingerprint = repo_path.map_or_else( || step.target.sampling_fingerprint(), - |r| r.sampling_fingerprint(), + |r| Some(r.sampling_fingerprint()), ); sampling_fingerprint .map_or(self.options.sample_offset % sample_rate == 0, |fp| { diff --git a/eden/mononoke/walker/src/scrub.rs b/eden/mononoke/walker/src/scrub.rs index 9519498371..268a78ddf9 100644 --- a/eden/mononoke/walker/src/scrub.rs +++ b/eden/mononoke/walker/src/scrub.rs @@ -168,7 +168,7 @@ where blobstore_key, node_type: n.get_type(), node_fingerprint: n.sampling_fingerprint(), - similarity_key: n.stats_path().and_then(|p| p.sampling_fingerprint()), + similarity_key: n.stats_path().map(|p| p.sampling_fingerprint()), relatedness_key: None, // TODO(ahornby) track mtime like in corpus uncompressed_size, }) diff --git a/eden/mononoke/walker/src/state.rs b/eden/mononoke/walker/src/state.rs index 45cc166a73..d7d31eb597 100644 --- a/eden/mononoke/walker/src/state.rs +++ b/eden/mononoke/walker/src/state.rs @@ -5,7 +5,7 @@ * GNU General Public License version 2. */ -use crate::graph::{EdgeType, Node, NodeData, NodeType, UnodeFlags, WrappedPath}; +use crate::graph::{EdgeType, Node, NodeData, NodeType, UnodeFlags, WrappedPath, WrappedPathHash}; use crate::log; use crate::progress::sort_by_string; use crate::walk::{ @@ -23,7 +23,7 @@ use futures::future::TryFutureExt; use itertools::Itertools; use mercurial_types::{HgChangesetId, HgFileNodeId, HgManifestId}; use mononoke_types::{ - ChangesetId, ContentId, DeletedManifestId, FastlogBatchId, FileUnodeId, FsnodeId, MPathHash, + ChangesetId, ContentId, DeletedManifestId, FastlogBatchId, FileUnodeId, FsnodeId, ManifestUnodeId, RepositoryId, SkeletonManifestId, }; use phases::{Phase, Phases}; @@ -173,7 +173,7 @@ pub struct WalkState { bcs_ids: InternMap>, hg_cs_ids: InternMap>, hg_filenode_ids: InternMap>, - mpath_hashs: InternMap, InternedId>>, + path_hashes: InternMap>, hg_manifest_ids: InternMap>, unode_file_ids: InternMap>, unode_manifest_ids: InternMap>, @@ -191,8 +191,8 @@ pub struct WalkState { visited_hg_cs_mapping: StateMap>, visited_hg_cs_via_bonsai: StateMap>, visited_hg_file_envelope: StateMap>, - visited_hg_filenode: StateMap<(InternedId>, InternedId)>, - visited_hg_manifest: StateMap<(InternedId>, InternedId)>, + visited_hg_filenode: StateMap<(InternedId, InternedId)>, + visited_hg_manifest: StateMap<(InternedId, InternedId)>, // Derived visited_blame: StateMap>, visited_changeset_info: StateMap>, @@ -231,7 +231,7 @@ impl WalkState { bcs_ids: InternMap::with_hasher(fac.clone()), hg_cs_ids: InternMap::with_hasher(fac.clone()), hg_filenode_ids: InternMap::with_hasher(fac.clone()), - mpath_hashs: InternMap::with_hasher(fac.clone()), + path_hashes: InternMap::with_hasher(fac.clone()), hg_manifest_ids: InternMap::with_hasher(fac.clone()), unode_file_ids: InternMap::with_hasher(fac.clone()), unode_manifest_ids: InternMap::with_hasher(fac.clone()), @@ -301,14 +301,14 @@ impl WalkState { /// If the state did not have this value present, true is returned. fn record_with_path( &self, - visited_with_path: &StateMap<(InternedId>, K)>, + visited_with_path: &StateMap<(InternedId, K)>, k: (&WrappedPath, &K), ) -> bool where K: Eq + Hash + Copy, { let (path, id) = k; - let path = self.mpath_hashs.interned(&path.get_path_hash().cloned()); + let path = self.path_hashes.interned(path.get_path_hash()); let key = (path, *id); if visited_with_path.contains_key(&key) { false @@ -424,7 +424,7 @@ impl WalkState { self.clear_mapping(NodeType::UnodeManifest); } InternedType::MPathHash => { - self.mpath_hashs.clear(); + self.path_hashes.clear(); self.clear_mapping(NodeType::HgFileNode); self.clear_mapping(NodeType::HgManifest); }