mononoke: introduce WrappedPathHash to walker

Summary:
First, some background on the existing WrappedPath type:   In Mononoke the MPath type is such that  None==Root and Some(MPath)==NonRoot. This means that where a path may be present one needs to use double-Option with Option<Option<MPath>>, so that Root is Some(None).

To reduce the need for double Option, and subsequently to allow for newtype features like memoization, the walker has WrappedPath, so we can use Option<WrappedPath> instead.

This change introduces a similar type WrappedPathHash for MPathHash, which means that the sample_fingerprint for WrappedPath can be now be non-optional as even root paths/manifests can now have a sample_fingerprint.

Reviewed By: mitrandir77

Differential Revision: D27995143

fbshipit-source-id: b674abd4ec94749f4f5797c697ae7381e1a08d02
This commit is contained in:
Alex Hornby 2021-04-27 05:59:01 -07:00 committed by Facebook GitHub Bot
parent 3f56e43575
commit 923c6c6b52
4 changed files with 46 additions and 29 deletions

View File

@ -501,15 +501,32 @@ impl NodeType {
}
}
// Memoize the hash of the path as it is used frequently
const ROOT_FINGERPRINT: u64 = 0;
#[derive(Debug)]
pub struct MPathHashMemo {
mpath: MPath,
memoized_hash: OnceCell<MPathHash>,
/// Represent root or non root path hash.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum WrappedPathHash {
Root,
NonRoot(MPathHash),
}
impl MPathHashMemo {
impl WrappedPathHash {
pub fn sampling_fingerprint(&self) -> u64 {
match self {
WrappedPathHash::Root => ROOT_FINGERPRINT,
WrappedPathHash::NonRoot(path_hash) => path_hash.sampling_fingerprint(),
}
}
}
// Memoize the hash of the path as it is used frequently
#[derive(Debug)]
pub struct MPathWithHashMemo {
mpath: MPath,
memoized_hash: OnceCell<WrappedPathHash>,
}
impl MPathWithHashMemo {
fn new(mpath: MPath) -> Self {
Self {
mpath,
@ -517,9 +534,9 @@ impl MPathHashMemo {
}
}
pub fn get_path_hash(&self) -> &MPathHash {
pub fn get_path_hash_memo(&self) -> &WrappedPathHash {
self.memoized_hash
.get_or_init(|| self.mpath.get_path_hash())
.get_or_init(|| WrappedPathHash::NonRoot(self.mpath.get_path_hash()))
}
pub fn mpath(&self) -> &MPath {
@ -527,15 +544,15 @@ impl MPathHashMemo {
}
}
impl PartialEq for MPathHashMemo {
impl PartialEq for MPathWithHashMemo {
fn eq(&self, other: &Self) -> bool {
self.mpath == other.mpath
}
}
impl Eq for MPathHashMemo {}
impl Eq for MPathWithHashMemo {}
impl Hash for MPathHashMemo {
impl Hash for MPathWithHashMemo {
fn hash<H: Hasher>(&self, state: &mut H) {
self.mpath.hash(state);
}
@ -544,7 +561,7 @@ impl Hash for MPathHashMemo {
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum WrappedPath {
Root,
NonRoot(ArcIntern<EagerHashMemoizer<MPathHashMemo>>),
NonRoot(ArcIntern<EagerHashMemoizer<MPathWithHashMemo>>),
}
impl WrappedPath {
@ -555,15 +572,15 @@ impl WrappedPath {
}
}
pub fn get_path_hash(&self) -> Option<&MPathHash> {
pub fn get_path_hash(&self) -> &WrappedPathHash {
match self {
WrappedPath::Root => None,
WrappedPath::NonRoot(path) => Some(path.get_path_hash()),
WrappedPath::Root => &WrappedPathHash::Root,
WrappedPath::NonRoot(path) => path.get_path_hash_memo(),
}
}
pub fn sampling_fingerprint(&self) -> Option<u64> {
self.get_path_hash().map(|h| h.sampling_fingerprint())
pub fn sampling_fingerprint(&self) -> u64 {
self.get_path_hash().sampling_fingerprint()
}
}
@ -583,7 +600,7 @@ impl From<Option<MPath>> for WrappedPath {
let hasher_fac = PATH_HASHER_FACTORY.get_or_init(|| RandomState::default());
match mpath {
Some(mpath) => WrappedPath::NonRoot(ArcIntern::new(EagerHashMemoizer::new(
MPathHashMemo::new(mpath),
MPathWithHashMemo::new(mpath),
hasher_fac,
))),
None => WrappedPath::Root,

View File

@ -298,7 +298,7 @@ where
sample_rate => {
let sampling_fingerprint = repo_path.map_or_else(
|| step.target.sampling_fingerprint(),
|r| r.sampling_fingerprint(),
|r| Some(r.sampling_fingerprint()),
);
sampling_fingerprint
.map_or(self.options.sample_offset % sample_rate == 0, |fp| {

View File

@ -168,7 +168,7 @@ where
blobstore_key,
node_type: n.get_type(),
node_fingerprint: n.sampling_fingerprint(),
similarity_key: n.stats_path().and_then(|p| p.sampling_fingerprint()),
similarity_key: n.stats_path().map(|p| p.sampling_fingerprint()),
relatedness_key: None, // TODO(ahornby) track mtime like in corpus
uncompressed_size,
})

View File

@ -5,7 +5,7 @@
* GNU General Public License version 2.
*/
use crate::graph::{EdgeType, Node, NodeData, NodeType, UnodeFlags, WrappedPath};
use crate::graph::{EdgeType, Node, NodeData, NodeType, UnodeFlags, WrappedPath, WrappedPathHash};
use crate::log;
use crate::progress::sort_by_string;
use crate::walk::{
@ -23,7 +23,7 @@ use futures::future::TryFutureExt;
use itertools::Itertools;
use mercurial_types::{HgChangesetId, HgFileNodeId, HgManifestId};
use mononoke_types::{
ChangesetId, ContentId, DeletedManifestId, FastlogBatchId, FileUnodeId, FsnodeId, MPathHash,
ChangesetId, ContentId, DeletedManifestId, FastlogBatchId, FileUnodeId, FsnodeId,
ManifestUnodeId, RepositoryId, SkeletonManifestId,
};
use phases::{Phase, Phases};
@ -173,7 +173,7 @@ pub struct WalkState {
bcs_ids: InternMap<ChangesetId, InternedId<ChangesetId>>,
hg_cs_ids: InternMap<HgChangesetId, InternedId<HgChangesetId>>,
hg_filenode_ids: InternMap<HgFileNodeId, InternedId<HgFileNodeId>>,
mpath_hashs: InternMap<Option<MPathHash>, InternedId<Option<MPathHash>>>,
path_hashes: InternMap<WrappedPathHash, InternedId<WrappedPathHash>>,
hg_manifest_ids: InternMap<HgManifestId, InternedId<HgManifestId>>,
unode_file_ids: InternMap<FileUnodeId, InternedId<FileUnodeId>>,
unode_manifest_ids: InternMap<ManifestUnodeId, InternedId<ManifestUnodeId>>,
@ -191,8 +191,8 @@ pub struct WalkState {
visited_hg_cs_mapping: StateMap<InternedId<HgChangesetId>>,
visited_hg_cs_via_bonsai: StateMap<InternedId<HgChangesetId>>,
visited_hg_file_envelope: StateMap<InternedId<HgFileNodeId>>,
visited_hg_filenode: StateMap<(InternedId<Option<MPathHash>>, InternedId<HgFileNodeId>)>,
visited_hg_manifest: StateMap<(InternedId<Option<MPathHash>>, InternedId<HgManifestId>)>,
visited_hg_filenode: StateMap<(InternedId<WrappedPathHash>, InternedId<HgFileNodeId>)>,
visited_hg_manifest: StateMap<(InternedId<WrappedPathHash>, InternedId<HgManifestId>)>,
// Derived
visited_blame: StateMap<InternedId<FileUnodeId>>,
visited_changeset_info: StateMap<InternedId<ChangesetId>>,
@ -231,7 +231,7 @@ impl WalkState {
bcs_ids: InternMap::with_hasher(fac.clone()),
hg_cs_ids: InternMap::with_hasher(fac.clone()),
hg_filenode_ids: InternMap::with_hasher(fac.clone()),
mpath_hashs: InternMap::with_hasher(fac.clone()),
path_hashes: InternMap::with_hasher(fac.clone()),
hg_manifest_ids: InternMap::with_hasher(fac.clone()),
unode_file_ids: InternMap::with_hasher(fac.clone()),
unode_manifest_ids: InternMap::with_hasher(fac.clone()),
@ -301,14 +301,14 @@ impl WalkState {
/// If the state did not have this value present, true is returned.
fn record_with_path<K>(
&self,
visited_with_path: &StateMap<(InternedId<Option<MPathHash>>, K)>,
visited_with_path: &StateMap<(InternedId<WrappedPathHash>, K)>,
k: (&WrappedPath, &K),
) -> bool
where
K: Eq + Hash + Copy,
{
let (path, id) = k;
let path = self.mpath_hashs.interned(&path.get_path_hash().cloned());
let path = self.path_hashes.interned(path.get_path_hash());
let key = (path, *id);
if visited_with_path.contains_key(&key) {
false
@ -424,7 +424,7 @@ impl WalkState {
self.clear_mapping(NodeType::UnodeManifest);
}
InternedType::MPathHash => {
self.mpath_hashs.clear();
self.path_hashes.clear();
self.clear_mapping(NodeType::HgFileNode);
self.clear_mapping(NodeType::HgManifest);
}