mirror of
https://github.com/facebook/sapling.git
synced 2024-10-10 00:45:18 +03:00
mononoke: introduce WrappedPathHash to walker
Summary: First, some background on the existing WrappedPath type: In Mononoke the MPath type is such that None==Root and Some(MPath)==NonRoot. This means that where a path may be present one needs to use double-Option with Option<Option<MPath>>, so that Root is Some(None). To reduce the need for double Option, and subsequently to allow for newtype features like memoization, the walker has WrappedPath, so we can use Option<WrappedPath> instead. This change introduces a similar type WrappedPathHash for MPathHash, which means that the sample_fingerprint for WrappedPath can be now be non-optional as even root paths/manifests can now have a sample_fingerprint. Reviewed By: mitrandir77 Differential Revision: D27995143 fbshipit-source-id: b674abd4ec94749f4f5797c697ae7381e1a08d02
This commit is contained in:
parent
3f56e43575
commit
923c6c6b52
@ -501,15 +501,32 @@ impl NodeType {
|
||||
}
|
||||
}
|
||||
|
||||
// Memoize the hash of the path as it is used frequently
|
||||
const ROOT_FINGERPRINT: u64 = 0;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MPathHashMemo {
|
||||
mpath: MPath,
|
||||
memoized_hash: OnceCell<MPathHash>,
|
||||
/// Represent root or non root path hash.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum WrappedPathHash {
|
||||
Root,
|
||||
NonRoot(MPathHash),
|
||||
}
|
||||
|
||||
impl MPathHashMemo {
|
||||
impl WrappedPathHash {
|
||||
pub fn sampling_fingerprint(&self) -> u64 {
|
||||
match self {
|
||||
WrappedPathHash::Root => ROOT_FINGERPRINT,
|
||||
WrappedPathHash::NonRoot(path_hash) => path_hash.sampling_fingerprint(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Memoize the hash of the path as it is used frequently
|
||||
#[derive(Debug)]
|
||||
pub struct MPathWithHashMemo {
|
||||
mpath: MPath,
|
||||
memoized_hash: OnceCell<WrappedPathHash>,
|
||||
}
|
||||
|
||||
impl MPathWithHashMemo {
|
||||
fn new(mpath: MPath) -> Self {
|
||||
Self {
|
||||
mpath,
|
||||
@ -517,9 +534,9 @@ impl MPathHashMemo {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_path_hash(&self) -> &MPathHash {
|
||||
pub fn get_path_hash_memo(&self) -> &WrappedPathHash {
|
||||
self.memoized_hash
|
||||
.get_or_init(|| self.mpath.get_path_hash())
|
||||
.get_or_init(|| WrappedPathHash::NonRoot(self.mpath.get_path_hash()))
|
||||
}
|
||||
|
||||
pub fn mpath(&self) -> &MPath {
|
||||
@ -527,15 +544,15 @@ impl MPathHashMemo {
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for MPathHashMemo {
|
||||
impl PartialEq for MPathWithHashMemo {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.mpath == other.mpath
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for MPathHashMemo {}
|
||||
impl Eq for MPathWithHashMemo {}
|
||||
|
||||
impl Hash for MPathHashMemo {
|
||||
impl Hash for MPathWithHashMemo {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.mpath.hash(state);
|
||||
}
|
||||
@ -544,7 +561,7 @@ impl Hash for MPathHashMemo {
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum WrappedPath {
|
||||
Root,
|
||||
NonRoot(ArcIntern<EagerHashMemoizer<MPathHashMemo>>),
|
||||
NonRoot(ArcIntern<EagerHashMemoizer<MPathWithHashMemo>>),
|
||||
}
|
||||
|
||||
impl WrappedPath {
|
||||
@ -555,15 +572,15 @@ impl WrappedPath {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_path_hash(&self) -> Option<&MPathHash> {
|
||||
pub fn get_path_hash(&self) -> &WrappedPathHash {
|
||||
match self {
|
||||
WrappedPath::Root => None,
|
||||
WrappedPath::NonRoot(path) => Some(path.get_path_hash()),
|
||||
WrappedPath::Root => &WrappedPathHash::Root,
|
||||
WrappedPath::NonRoot(path) => path.get_path_hash_memo(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sampling_fingerprint(&self) -> Option<u64> {
|
||||
self.get_path_hash().map(|h| h.sampling_fingerprint())
|
||||
pub fn sampling_fingerprint(&self) -> u64 {
|
||||
self.get_path_hash().sampling_fingerprint()
|
||||
}
|
||||
}
|
||||
|
||||
@ -583,7 +600,7 @@ impl From<Option<MPath>> for WrappedPath {
|
||||
let hasher_fac = PATH_HASHER_FACTORY.get_or_init(|| RandomState::default());
|
||||
match mpath {
|
||||
Some(mpath) => WrappedPath::NonRoot(ArcIntern::new(EagerHashMemoizer::new(
|
||||
MPathHashMemo::new(mpath),
|
||||
MPathWithHashMemo::new(mpath),
|
||||
hasher_fac,
|
||||
))),
|
||||
None => WrappedPath::Root,
|
||||
|
@ -298,7 +298,7 @@ where
|
||||
sample_rate => {
|
||||
let sampling_fingerprint = repo_path.map_or_else(
|
||||
|| step.target.sampling_fingerprint(),
|
||||
|r| r.sampling_fingerprint(),
|
||||
|r| Some(r.sampling_fingerprint()),
|
||||
);
|
||||
sampling_fingerprint
|
||||
.map_or(self.options.sample_offset % sample_rate == 0, |fp| {
|
||||
|
@ -168,7 +168,7 @@ where
|
||||
blobstore_key,
|
||||
node_type: n.get_type(),
|
||||
node_fingerprint: n.sampling_fingerprint(),
|
||||
similarity_key: n.stats_path().and_then(|p| p.sampling_fingerprint()),
|
||||
similarity_key: n.stats_path().map(|p| p.sampling_fingerprint()),
|
||||
relatedness_key: None, // TODO(ahornby) track mtime like in corpus
|
||||
uncompressed_size,
|
||||
})
|
||||
|
@ -5,7 +5,7 @@
|
||||
* GNU General Public License version 2.
|
||||
*/
|
||||
|
||||
use crate::graph::{EdgeType, Node, NodeData, NodeType, UnodeFlags, WrappedPath};
|
||||
use crate::graph::{EdgeType, Node, NodeData, NodeType, UnodeFlags, WrappedPath, WrappedPathHash};
|
||||
use crate::log;
|
||||
use crate::progress::sort_by_string;
|
||||
use crate::walk::{
|
||||
@ -23,7 +23,7 @@ use futures::future::TryFutureExt;
|
||||
use itertools::Itertools;
|
||||
use mercurial_types::{HgChangesetId, HgFileNodeId, HgManifestId};
|
||||
use mononoke_types::{
|
||||
ChangesetId, ContentId, DeletedManifestId, FastlogBatchId, FileUnodeId, FsnodeId, MPathHash,
|
||||
ChangesetId, ContentId, DeletedManifestId, FastlogBatchId, FileUnodeId, FsnodeId,
|
||||
ManifestUnodeId, RepositoryId, SkeletonManifestId,
|
||||
};
|
||||
use phases::{Phase, Phases};
|
||||
@ -173,7 +173,7 @@ pub struct WalkState {
|
||||
bcs_ids: InternMap<ChangesetId, InternedId<ChangesetId>>,
|
||||
hg_cs_ids: InternMap<HgChangesetId, InternedId<HgChangesetId>>,
|
||||
hg_filenode_ids: InternMap<HgFileNodeId, InternedId<HgFileNodeId>>,
|
||||
mpath_hashs: InternMap<Option<MPathHash>, InternedId<Option<MPathHash>>>,
|
||||
path_hashes: InternMap<WrappedPathHash, InternedId<WrappedPathHash>>,
|
||||
hg_manifest_ids: InternMap<HgManifestId, InternedId<HgManifestId>>,
|
||||
unode_file_ids: InternMap<FileUnodeId, InternedId<FileUnodeId>>,
|
||||
unode_manifest_ids: InternMap<ManifestUnodeId, InternedId<ManifestUnodeId>>,
|
||||
@ -191,8 +191,8 @@ pub struct WalkState {
|
||||
visited_hg_cs_mapping: StateMap<InternedId<HgChangesetId>>,
|
||||
visited_hg_cs_via_bonsai: StateMap<InternedId<HgChangesetId>>,
|
||||
visited_hg_file_envelope: StateMap<InternedId<HgFileNodeId>>,
|
||||
visited_hg_filenode: StateMap<(InternedId<Option<MPathHash>>, InternedId<HgFileNodeId>)>,
|
||||
visited_hg_manifest: StateMap<(InternedId<Option<MPathHash>>, InternedId<HgManifestId>)>,
|
||||
visited_hg_filenode: StateMap<(InternedId<WrappedPathHash>, InternedId<HgFileNodeId>)>,
|
||||
visited_hg_manifest: StateMap<(InternedId<WrappedPathHash>, InternedId<HgManifestId>)>,
|
||||
// Derived
|
||||
visited_blame: StateMap<InternedId<FileUnodeId>>,
|
||||
visited_changeset_info: StateMap<InternedId<ChangesetId>>,
|
||||
@ -231,7 +231,7 @@ impl WalkState {
|
||||
bcs_ids: InternMap::with_hasher(fac.clone()),
|
||||
hg_cs_ids: InternMap::with_hasher(fac.clone()),
|
||||
hg_filenode_ids: InternMap::with_hasher(fac.clone()),
|
||||
mpath_hashs: InternMap::with_hasher(fac.clone()),
|
||||
path_hashes: InternMap::with_hasher(fac.clone()),
|
||||
hg_manifest_ids: InternMap::with_hasher(fac.clone()),
|
||||
unode_file_ids: InternMap::with_hasher(fac.clone()),
|
||||
unode_manifest_ids: InternMap::with_hasher(fac.clone()),
|
||||
@ -301,14 +301,14 @@ impl WalkState {
|
||||
/// If the state did not have this value present, true is returned.
|
||||
fn record_with_path<K>(
|
||||
&self,
|
||||
visited_with_path: &StateMap<(InternedId<Option<MPathHash>>, K)>,
|
||||
visited_with_path: &StateMap<(InternedId<WrappedPathHash>, K)>,
|
||||
k: (&WrappedPath, &K),
|
||||
) -> bool
|
||||
where
|
||||
K: Eq + Hash + Copy,
|
||||
{
|
||||
let (path, id) = k;
|
||||
let path = self.mpath_hashs.interned(&path.get_path_hash().cloned());
|
||||
let path = self.path_hashes.interned(path.get_path_hash());
|
||||
let key = (path, *id);
|
||||
if visited_with_path.contains_key(&key) {
|
||||
false
|
||||
@ -424,7 +424,7 @@ impl WalkState {
|
||||
self.clear_mapping(NodeType::UnodeManifest);
|
||||
}
|
||||
InternedType::MPathHash => {
|
||||
self.mpath_hashs.clear();
|
||||
self.path_hashes.clear();
|
||||
self.clear_mapping(NodeType::HgFileNode);
|
||||
self.clear_mapping(NodeType::HgManifest);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user