bonsai-utils/diff: add ChangedReusedId state

Summary: See the doc comment for what this state represents and why it is necessary.

Reviewed By: StanislavGlebik

Differential Revision: D9025772

fbshipit-source-id: b0588d037365194bbf2d9889ead60237ef097359
This commit is contained in:
Rain ⁣ 2018-07-31 10:34:00 -07:00 committed by Facebook Github Bot
parent a9ccfe8cc1
commit e284a0dac2
5 changed files with 105 additions and 29 deletions

View File

@ -355,7 +355,7 @@ fn make_entry(repo: &BlobRepo, diff_result: &BonsaiDiffResult) -> Option<HgBlobE
use self::BonsaiDiffResult::*;
match diff_result {
Changed(path, ft, entry_id) => {
Changed(path, ft, entry_id) | ChangedReusedId(path, ft, entry_id) => {
let blobstore = repo.get_blobstore();
let basename = path.basename().clone();
let hash = entry_id.into_nodehash();

View File

@ -45,6 +45,23 @@ impl CompositeEntry {
self.files.contains_key(&(*file_type, *hash))
}
/// Whether this composite entry contains the same hash but a different type.
#[inline]
pub fn contains_file_other_type(&self, file_type: &FileType, hash: &HgEntryId) -> bool {
file_type
.complement()
.iter()
.any(|ft| self.contains_file(ft, hash))
}
/// Whether this composite entry contains a file with this hash but with any possible type.
#[inline]
pub fn contains_file_any_type(&self, hash: &HgEntryId) -> bool {
FileType::all()
.iter()
.any(|ft| self.contains_file(ft, hash))
}
#[inline]
pub fn num_trees(&self) -> usize {
self.trees.len()

View File

@ -42,7 +42,18 @@ pub fn bonsai_diff(
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum BonsaiDiffResult {
/// This file was changed (was added or modified) in this changeset.
Changed(MPath, FileType, HgEntryId),
/// The file was marked changed, but one of the parent file node IDs was reused. This can
/// happen in these situations:
///
/// 1. The file type was changed without a corresponding change in file contents.
/// 2. There's a merge and one of the parent nodes was picked as the resolution.
///
/// This is separate from `Changed` because in these instances, if copy information is part
/// of the node it wouldn't be recorded.
ChangedReusedId(MPath, FileType, HgEntryId),
/// This file was deleted in this changeset.
Deleted(MPath),
}
@ -51,7 +62,9 @@ impl BonsaiDiffResult {
#[inline]
pub fn path(&self) -> &MPath {
match self {
BonsaiDiffResult::Changed(path, ..) | BonsaiDiffResult::Deleted(path) => path,
BonsaiDiffResult::Changed(path, ..)
| BonsaiDiffResult::ChangedReusedId(path, ..)
| BonsaiDiffResult::Deleted(path) => path,
}
}
}
@ -64,6 +77,11 @@ impl fmt::Display for BonsaiDiffResult {
"[changed] path: {}, hash: {}, type: {}",
path, entry_id, ft
),
BonsaiDiffResult::ChangedReusedId(path, ft, entry_id) => write!(
f,
"[changed, reused id] path: {}, hash: {}, type: {}",
path, entry_id, ft
),
BonsaiDiffResult::Deleted(path) => write!(f, "[deleted] path: {}", path),
}
}
@ -80,21 +98,32 @@ impl PartialOrd for BonsaiDiffResult {
impl Ord for BonsaiDiffResult {
fn cmp(&self, other: &BonsaiDiffResult) -> Ordering {
match self.path().cmp(other.path()) {
Ordering::Equal => {
use BonsaiDiffResult::*;
// treat changed as less than deleted
match (self, other) {
(Changed(..), Deleted(_)) => Ordering::Less,
(Deleted(_), Changed(..)) => Ordering::Greater,
(Changed(_, ft, hash), Changed(_, oft, ohash)) => (ft, hash).cmp(&(oft, ohash)),
(Deleted(_), Deleted(_)) => Ordering::Equal,
}
}
Ordering::Equal => DiffResultCmp::from(self).cmp(&DiffResultCmp::from(other)),
other => other,
}
}
}
// (seems like this is the easiest way to do the Ord comparisons necessary)
#[derive(Eq, Ord, PartialEq, PartialOrd)]
enum DiffResultCmp<'a> {
Changed(&'a FileType, &'a HgEntryId),
ChangedReusedId(&'a FileType, &'a HgEntryId),
Deleted,
}
impl<'a> From<&'a BonsaiDiffResult> for DiffResultCmp<'a> {
fn from(result: &'a BonsaiDiffResult) -> Self {
match result {
BonsaiDiffResult::Changed(_, ft, entry_id) => DiffResultCmp::Changed(ft, entry_id),
BonsaiDiffResult::ChangedReusedId(_, ft, entry_id) => {
DiffResultCmp::ChangedReusedId(ft, entry_id)
}
BonsaiDiffResult::Deleted(_) => DiffResultCmp::Deleted,
}
}
}
/// Represents a specific entry, or the lack of one, in the working manifest.
enum WorkingEntry {
Absent,
@ -137,11 +166,13 @@ impl WorkingEntry {
) -> impl Stream<Item = BonsaiDiffResult, Error = Error> + Send {
let file_result = self.bonsai_diff_file(&path, &composite_entry);
let tree_stream = match &file_result {
Some(BonsaiDiffResult::Changed(..)) => {
Some(BonsaiDiffResult::Changed(..)) | Some(BonsaiDiffResult::ChangedReusedId(..)) => {
// A changed entry automatically means any entries underneath are deleted.
stream::empty().boxify()
}
_other => self.bonsai_diff_tree(Some(path), composite_entry),
Some(BonsaiDiffResult::Deleted(..)) | None => {
self.bonsai_diff_tree(Some(path), composite_entry)
}
};
let file_stream = stream::iter_ok(file_result);
// Fetching the composite and working manifests will be kicked off immediately in
@ -158,22 +189,26 @@ impl WorkingEntry {
) -> Option<BonsaiDiffResult> {
match self {
WorkingEntry::File(ft, entry) => {
let hash = entry.get_hash();
// Any tree entries being present indicates a file-directory conflict which must be
// resolved.
// >= 2 entries means there's a file-file conflict which must be resolved.
// 0 entries means an added file.
// A different entry means a changed file.
if composite_entry.num_trees() == 0 && composite_entry.num_files() == 1
&& composite_entry.contains_file(ft, entry.get_hash())
{
None
//
// See the doc comment for `BonsaiDiffResult::ChangedReusedId` for more about it.
if composite_entry.num_trees() == 0 && composite_entry.num_files() == 1 {
if composite_entry.contains_file(ft, hash) {
None
} else if composite_entry.contains_file_other_type(ft, hash) {
Some(BonsaiDiffResult::ChangedReusedId(path.clone(), *ft, *hash))
} else {
Some(BonsaiDiffResult::Changed(path.clone(), *ft, *hash))
}
} else if composite_entry.contains_file_any_type(hash) {
Some(BonsaiDiffResult::ChangedReusedId(path.clone(), *ft, *hash))
} else {
// XXX compare contents
Some(BonsaiDiffResult::Changed(
path.clone(),
*ft,
*entry.get_hash(),
))
Some(BonsaiDiffResult::Changed(path.clone(), *ft, *hash))
}
}
_other => {

View File

@ -47,7 +47,7 @@ fn diff_basic() {
deleted("dir2/bar"),
changed("dir2/dir-to-file", FileType::Executable, DS_EID),
// dir2/dir-to-file/foo is *not* a result, because its parent is marked changed
changed("dir2/only-file-type", FileType::Executable, ONES_EID),
changed_reused_id("dir2/only-file-type", FileType::Executable, ONES_EID),
changed("dir2/quux", FileType::Symlink, FOURS_EID),
];
@ -89,10 +89,13 @@ fn diff_merge1() {
deleted("dir1/file-to-dir"),
// dir1/file-to-dir/foobar is *not* a result because p1 doesn't have it and p2 has the
// same contents.
changed("dir1/foo", FileType::Regular, THREES_EID),
//
// This ID was reused from parent2.
changed_reused_id("dir1/foo", FileType::Regular, THREES_EID),
deleted("dir2/bar"),
changed("dir2/dir-to-file", FileType::Executable, DS_EID),
changed("dir2/only-file-type", FileType::Executable, ONES_EID),
// This ID was reused from parent2.
changed_reused_id("dir2/dir-to-file", FileType::Executable, DS_EID),
changed_reused_id("dir2/only-file-type", FileType::Executable, ONES_EID),
// dir2/quux is not a result because it isn't present in p1 and is present in p2, so
// the version from p2 is implicitly chosen.
];
@ -120,7 +123,9 @@ fn compute_diff(
paths.sort_unstable();
check_pcf(paths.iter().map(|diff_result| match diff_result {
BonsaiDiffResult::Changed(path, ..) => (path, true),
BonsaiDiffResult::Changed(path, ..) | BonsaiDiffResult::ChangedReusedId(path, ..) => {
(path, true)
}
BonsaiDiffResult::Deleted(path) => (path, false),
})).expect("paths must be path-conflict-free");
@ -133,6 +138,11 @@ fn changed(path: impl AsRef<[u8]>, ft: FileType, hash: HgEntryId) -> BonsaiDiffR
BonsaiDiffResult::Changed(path, ft, hash)
}
fn changed_reused_id(path: impl AsRef<[u8]>, ft: FileType, hash: HgEntryId) -> BonsaiDiffResult {
let path = MPath::new(path).expect("valid path");
BonsaiDiffResult::ChangedReusedId(path, ft, hash)
}
fn deleted(path: impl AsRef<[u8]>) -> BonsaiDiffResult {
let path = MPath::new(path).expect("valid path");
BonsaiDiffResult::Deleted(path)

View File

@ -169,6 +169,20 @@ pub enum FileType {
}
impl FileType {
/// All possible file types.
pub fn all() -> [FileType; 3] {
[FileType::Regular, FileType::Executable, FileType::Symlink]
}
/// All the file types that `self` is not.
pub fn complement(&self) -> [FileType; 2] {
match self {
FileType::Regular => [FileType::Executable, FileType::Symlink],
FileType::Executable => [FileType::Regular, FileType::Symlink],
FileType::Symlink => [FileType::Regular, FileType::Executable],
}
}
pub(crate) fn from_thrift(ft: thrift::FileType) -> Result<Self> {
let file_type = match ft {
thrift::FileType::Regular => FileType::Regular,