diff --git a/blobrepo_utils/src/bonsai.rs b/blobrepo_utils/src/bonsai.rs index c5b40e1bbb..86c953cc61 100644 --- a/blobrepo_utils/src/bonsai.rs +++ b/blobrepo_utils/src/bonsai.rs @@ -355,7 +355,7 @@ fn make_entry(repo: &BlobRepo, diff_result: &BonsaiDiffResult) -> Option { + Changed(path, ft, entry_id) | ChangedReusedId(path, ft, entry_id) => { let blobstore = repo.get_blobstore(); let basename = path.basename().clone(); let hash = entry_id.into_nodehash(); diff --git a/bonsai-utils/src/composite.rs b/bonsai-utils/src/composite.rs index 9489a5deff..a2f9c51806 100644 --- a/bonsai-utils/src/composite.rs +++ b/bonsai-utils/src/composite.rs @@ -45,6 +45,23 @@ impl CompositeEntry { self.files.contains_key(&(*file_type, *hash)) } + /// Whether this composite entry contains the same hash but a different type. + #[inline] + pub fn contains_file_other_type(&self, file_type: &FileType, hash: &HgEntryId) -> bool { + file_type + .complement() + .iter() + .any(|ft| self.contains_file(ft, hash)) + } + + /// Whether this composite entry contains a file with this hash but with any possible type. + #[inline] + pub fn contains_file_any_type(&self, hash: &HgEntryId) -> bool { + FileType::all() + .iter() + .any(|ft| self.contains_file(ft, hash)) + } + #[inline] pub fn num_trees(&self) -> usize { self.trees.len() diff --git a/bonsai-utils/src/diff.rs b/bonsai-utils/src/diff.rs index 1ca74c5a22..ea156d8f66 100644 --- a/bonsai-utils/src/diff.rs +++ b/bonsai-utils/src/diff.rs @@ -42,7 +42,18 @@ pub fn bonsai_diff( #[derive(Clone, Debug, Eq, PartialEq)] pub enum BonsaiDiffResult { + /// This file was changed (was added or modified) in this changeset. Changed(MPath, FileType, HgEntryId), + /// The file was marked changed, but one of the parent file node IDs was reused. This can + /// happen in these situations: + /// + /// 1. The file type was changed without a corresponding change in file contents. + /// 2. There's a merge and one of the parent nodes was picked as the resolution. + /// + /// This is separate from `Changed` because in these instances, if copy information is part + /// of the node it wouldn't be recorded. + ChangedReusedId(MPath, FileType, HgEntryId), + /// This file was deleted in this changeset. Deleted(MPath), } @@ -51,7 +62,9 @@ impl BonsaiDiffResult { #[inline] pub fn path(&self) -> &MPath { match self { - BonsaiDiffResult::Changed(path, ..) | BonsaiDiffResult::Deleted(path) => path, + BonsaiDiffResult::Changed(path, ..) + | BonsaiDiffResult::ChangedReusedId(path, ..) + | BonsaiDiffResult::Deleted(path) => path, } } } @@ -64,6 +77,11 @@ impl fmt::Display for BonsaiDiffResult { "[changed] path: {}, hash: {}, type: {}", path, entry_id, ft ), + BonsaiDiffResult::ChangedReusedId(path, ft, entry_id) => write!( + f, + "[changed, reused id] path: {}, hash: {}, type: {}", + path, entry_id, ft + ), BonsaiDiffResult::Deleted(path) => write!(f, "[deleted] path: {}", path), } } @@ -80,21 +98,32 @@ impl PartialOrd for BonsaiDiffResult { impl Ord for BonsaiDiffResult { fn cmp(&self, other: &BonsaiDiffResult) -> Ordering { match self.path().cmp(other.path()) { - Ordering::Equal => { - use BonsaiDiffResult::*; - // treat changed as less than deleted - match (self, other) { - (Changed(..), Deleted(_)) => Ordering::Less, - (Deleted(_), Changed(..)) => Ordering::Greater, - (Changed(_, ft, hash), Changed(_, oft, ohash)) => (ft, hash).cmp(&(oft, ohash)), - (Deleted(_), Deleted(_)) => Ordering::Equal, - } - } + Ordering::Equal => DiffResultCmp::from(self).cmp(&DiffResultCmp::from(other)), other => other, } } } +// (seems like this is the easiest way to do the Ord comparisons necessary) +#[derive(Eq, Ord, PartialEq, PartialOrd)] +enum DiffResultCmp<'a> { + Changed(&'a FileType, &'a HgEntryId), + ChangedReusedId(&'a FileType, &'a HgEntryId), + Deleted, +} + +impl<'a> From<&'a BonsaiDiffResult> for DiffResultCmp<'a> { + fn from(result: &'a BonsaiDiffResult) -> Self { + match result { + BonsaiDiffResult::Changed(_, ft, entry_id) => DiffResultCmp::Changed(ft, entry_id), + BonsaiDiffResult::ChangedReusedId(_, ft, entry_id) => { + DiffResultCmp::ChangedReusedId(ft, entry_id) + } + BonsaiDiffResult::Deleted(_) => DiffResultCmp::Deleted, + } + } +} + /// Represents a specific entry, or the lack of one, in the working manifest. enum WorkingEntry { Absent, @@ -137,11 +166,13 @@ impl WorkingEntry { ) -> impl Stream + Send { let file_result = self.bonsai_diff_file(&path, &composite_entry); let tree_stream = match &file_result { - Some(BonsaiDiffResult::Changed(..)) => { + Some(BonsaiDiffResult::Changed(..)) | Some(BonsaiDiffResult::ChangedReusedId(..)) => { // A changed entry automatically means any entries underneath are deleted. stream::empty().boxify() } - _other => self.bonsai_diff_tree(Some(path), composite_entry), + Some(BonsaiDiffResult::Deleted(..)) | None => { + self.bonsai_diff_tree(Some(path), composite_entry) + } }; let file_stream = stream::iter_ok(file_result); // Fetching the composite and working manifests will be kicked off immediately in @@ -158,22 +189,26 @@ impl WorkingEntry { ) -> Option { match self { WorkingEntry::File(ft, entry) => { + let hash = entry.get_hash(); // Any tree entries being present indicates a file-directory conflict which must be // resolved. // >= 2 entries means there's a file-file conflict which must be resolved. // 0 entries means an added file. // A different entry means a changed file. - if composite_entry.num_trees() == 0 && composite_entry.num_files() == 1 - && composite_entry.contains_file(ft, entry.get_hash()) - { - None + // + // See the doc comment for `BonsaiDiffResult::ChangedReusedId` for more about it. + if composite_entry.num_trees() == 0 && composite_entry.num_files() == 1 { + if composite_entry.contains_file(ft, hash) { + None + } else if composite_entry.contains_file_other_type(ft, hash) { + Some(BonsaiDiffResult::ChangedReusedId(path.clone(), *ft, *hash)) + } else { + Some(BonsaiDiffResult::Changed(path.clone(), *ft, *hash)) + } + } else if composite_entry.contains_file_any_type(hash) { + Some(BonsaiDiffResult::ChangedReusedId(path.clone(), *ft, *hash)) } else { - // XXX compare contents - Some(BonsaiDiffResult::Changed( - path.clone(), - *ft, - *entry.get_hash(), - )) + Some(BonsaiDiffResult::Changed(path.clone(), *ft, *hash)) } } _other => { diff --git a/bonsai-utils/test/main.rs b/bonsai-utils/test/main.rs index 83ce5c7057..1040e78244 100644 --- a/bonsai-utils/test/main.rs +++ b/bonsai-utils/test/main.rs @@ -47,7 +47,7 @@ fn diff_basic() { deleted("dir2/bar"), changed("dir2/dir-to-file", FileType::Executable, DS_EID), // dir2/dir-to-file/foo is *not* a result, because its parent is marked changed - changed("dir2/only-file-type", FileType::Executable, ONES_EID), + changed_reused_id("dir2/only-file-type", FileType::Executable, ONES_EID), changed("dir2/quux", FileType::Symlink, FOURS_EID), ]; @@ -89,10 +89,13 @@ fn diff_merge1() { deleted("dir1/file-to-dir"), // dir1/file-to-dir/foobar is *not* a result because p1 doesn't have it and p2 has the // same contents. - changed("dir1/foo", FileType::Regular, THREES_EID), + // + // This ID was reused from parent2. + changed_reused_id("dir1/foo", FileType::Regular, THREES_EID), deleted("dir2/bar"), - changed("dir2/dir-to-file", FileType::Executable, DS_EID), - changed("dir2/only-file-type", FileType::Executable, ONES_EID), + // This ID was reused from parent2. + changed_reused_id("dir2/dir-to-file", FileType::Executable, DS_EID), + changed_reused_id("dir2/only-file-type", FileType::Executable, ONES_EID), // dir2/quux is not a result because it isn't present in p1 and is present in p2, so // the version from p2 is implicitly chosen. ]; @@ -120,7 +123,9 @@ fn compute_diff( paths.sort_unstable(); check_pcf(paths.iter().map(|diff_result| match diff_result { - BonsaiDiffResult::Changed(path, ..) => (path, true), + BonsaiDiffResult::Changed(path, ..) | BonsaiDiffResult::ChangedReusedId(path, ..) => { + (path, true) + } BonsaiDiffResult::Deleted(path) => (path, false), })).expect("paths must be path-conflict-free"); @@ -133,6 +138,11 @@ fn changed(path: impl AsRef<[u8]>, ft: FileType, hash: HgEntryId) -> BonsaiDiffR BonsaiDiffResult::Changed(path, ft, hash) } +fn changed_reused_id(path: impl AsRef<[u8]>, ft: FileType, hash: HgEntryId) -> BonsaiDiffResult { + let path = MPath::new(path).expect("valid path"); + BonsaiDiffResult::ChangedReusedId(path, ft, hash) +} + fn deleted(path: impl AsRef<[u8]>) -> BonsaiDiffResult { let path = MPath::new(path).expect("valid path"); BonsaiDiffResult::Deleted(path) diff --git a/mononoke-types/src/file_change.rs b/mononoke-types/src/file_change.rs index 9f06b24621..857bba605f 100644 --- a/mononoke-types/src/file_change.rs +++ b/mononoke-types/src/file_change.rs @@ -169,6 +169,20 @@ pub enum FileType { } impl FileType { + /// All possible file types. + pub fn all() -> [FileType; 3] { + [FileType::Regular, FileType::Executable, FileType::Symlink] + } + + /// All the file types that `self` is not. + pub fn complement(&self) -> [FileType; 2] { + match self { + FileType::Regular => [FileType::Executable, FileType::Symlink], + FileType::Executable => [FileType::Regular, FileType::Symlink], + FileType::Symlink => [FileType::Regular, FileType::Executable], + } + } + pub(crate) fn from_thrift(ft: thrift::FileType) -> Result { let file_type = match ft { thrift::FileType::Regular => FileType::Regular,