From 7c8ee4f52ff5540a9c79f7cf7f5b8c09a146f2a4 Mon Sep 17 00:00:00 2001 From: Mark Thomas Date: Tue, 20 Oct 2020 05:43:05 -0700 Subject: [PATCH] scs_server: add file_diff method Summary: Add a method to diff file contents directly, even if their paths are not known. This provides a headerless diff, just of the content differences. Reviewed By: ahornby Differential Revision: D24200803 fbshipit-source-id: 32cf461d0c43a7b785ae58bec284978ff7976702 --- eden/mononoke/mononoke_api/src/file.rs | 31 ++++++++ eden/mononoke/mononoke_api/src/lib.rs | 4 +- eden/mononoke/mononoke_api/src/test/mod.rs | 1 + .../mononoke_api/src/test/test_file_diff.rs | 74 +++++++++++++++++++ eden/mononoke/scs_server/src/errors.rs | 1 + eden/mononoke/scs_server/src/into_response.rs | 14 +++- eden/mononoke/scs_server/src/methods/file.rs | 28 ++++++- eden/mononoke/scs_server/src/params.rs | 11 +++ .../scs_server/src/source_control_impl.rs | 5 ++ 9 files changed, 165 insertions(+), 4 deletions(-) create mode 100644 eden/mononoke/mononoke_api/src/test/test_file_diff.rs diff --git a/eden/mononoke/mononoke_api/src/file.rs b/eden/mononoke/mononoke_api/src/file.rs index 381724ba4d..43b79c3742 100644 --- a/eden/mononoke/mononoke_api/src/file.rs +++ b/eden/mononoke/mononoke_api/src/file.rs @@ -18,6 +18,7 @@ use filestore::{self, get_metadata, FetchKey}; use futures::compat::{Future01CompatExt, Stream01CompatExt}; use futures::future::{FutureExt, Shared}; use futures::stream::TryStreamExt; +use futures::try_join; use crate::errors::MononokeError; use crate::repo::RepoContext; @@ -193,6 +194,36 @@ impl FileContext { } } +/// A diff between two files in headerless unified diff format +pub struct HeaderlessUnifiedDiff { + /// Raw diff as bytes. + pub raw_diff: Vec, + /// One of the diffed files is binary, raw diff contains just a placeholder. + pub is_binary: bool, +} + +pub async fn headerless_unified_diff( + old_file: &FileContext, + new_file: &FileContext, + context_lines: usize, +) -> Result { + let (old_diff_file, new_diff_file) = + try_join!(old_file.content_concat(), new_file.content_concat(),)?; + let is_binary = old_diff_file.contains(&0) || new_diff_file.contains(&0); + let raw_diff = if is_binary { + b"Binary files differ".to_vec() + } else { + let opts = xdiff::HeaderlessDiffOpts { + context: context_lines, + }; + xdiff::diff_unified_headerless(&old_diff_file, &new_diff_file, opts) + }; + Ok(HeaderlessUnifiedDiff { + raw_diff, + is_binary, + }) +} + /// File contexts should only exist for files that are known to be in the /// blobstore. If attempting to access the content results in an error, this /// error is returned. This is an internal error, as it means either the data diff --git a/eden/mononoke/mononoke_api/src/lib.rs b/eden/mononoke/mononoke_api/src/lib.rs index 1375b24380..5b7d3c5181 100644 --- a/eden/mononoke/mononoke_api/src/lib.rs +++ b/eden/mononoke/mononoke_api/src/lib.rs @@ -52,7 +52,9 @@ pub use crate::changeset_path::{ }; pub use crate::changeset_path_diff::ChangesetPathDiffContext; pub use crate::errors::MononokeError; -pub use crate::file::{FileContext, FileId, FileMetadata, FileType}; +pub use crate::file::{ + headerless_unified_diff, FileContext, FileId, FileMetadata, FileType, HeaderlessUnifiedDiff, +}; pub use crate::path::MononokePath; pub use crate::repo::{BookmarkFreshness, RepoContext}; pub use crate::repo_write::create_changeset::{CreateChange, CreateCopyInfo}; diff --git a/eden/mononoke/mononoke_api/src/test/mod.rs b/eden/mononoke/mononoke_api/src/test/mod.rs index c754fdee13..9f338d1803 100644 --- a/eden/mononoke/mononoke_api/src/test/mod.rs +++ b/eden/mononoke/mononoke_api/src/test/mod.rs @@ -5,6 +5,7 @@ * GNU General Public License version 2. */ +mod test_file_diff; mod test_history; mod test_repo; mod test_repo_bookmarks; diff --git a/eden/mononoke/mononoke_api/src/test/test_file_diff.rs b/eden/mononoke/mononoke_api/src/test/test_file_diff.rs new file mode 100644 index 0000000000..9d508390a5 --- /dev/null +++ b/eden/mononoke/mononoke_api/src/test/test_file_diff.rs @@ -0,0 +1,74 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +use std::collections::BTreeMap; +use std::sync::Arc; + +use anyhow::{Error, Result}; +use fbinit::FacebookInit; +use tests_utils::drawdag::{changes, create_from_dag_with_changes}; + +use crate::{headerless_unified_diff, ChangesetId, CoreContext, Repo, RepoContext}; + +async fn init_repo(ctx: &CoreContext) -> Result<(RepoContext, BTreeMap)> { + let blob_repo = blobrepo_factory::new_memblob_empty(None)?; + let changesets = create_from_dag_with_changes( + ctx, + &blob_repo, + r##" + A-B-C + "##, + changes! { + "B" => |c| c.add_file("file", "test\nbefore\ndata\n").add_file("bin", "bin\0\x01"), + "C" => |c| c.add_file("file", "test\nafter\ndata\n").add_file("bin", "bin\0\x02"), + }, + ) + .await?; + + let repo = Repo::new_test(ctx.clone(), blob_repo).await?; + let repo_ctx = RepoContext::new(ctx.clone(), Arc::new(repo)).await?; + Ok((repo_ctx, changesets)) +} + +#[fbinit::compat_test] +async fn file_diff(fb: FacebookInit) -> Result<(), Error> { + let ctx = CoreContext::test_mock(fb); + let (repo, changesets) = init_repo(&ctx).await?; + + let b = repo + .changeset(changesets["B"]) + .await? + .expect("changeset should exist"); + let c = repo + .changeset(changesets["C"]) + .await? + .expect("changeset should exist"); + + // Compare two regular files + let b_file = b.path("file")?.file().await?.expect("should be a file"); + let c_file = c.path("file")?.file().await?.expect("should be a file"); + let diff = headerless_unified_diff(&b_file, &c_file, 3).await?; + assert_eq!( + std::str::from_utf8(&diff.raw_diff)?, + concat!( + "@@ -1,3 +1,3 @@\n", + " test\n", + "-before\n", + "+after\n", + " data\n" + ) + ); + assert!(!diff.is_binary); + + // Compare two binary files + let b_bin = b.path("bin")?.file().await?.expect("should be a file"); + let c_bin = c.path("bin")?.file().await?.expect("should be a file"); + let diff = headerless_unified_diff(&b_bin, &c_bin, 3).await?; + assert_eq!(std::str::from_utf8(&diff.raw_diff)?, "Binary files differ"); + assert!(diff.is_binary); + Ok(()) +} diff --git a/eden/mononoke/scs_server/src/errors.rs b/eden/mononoke/scs_server/src/errors.rs index 4ba2912150..0d37f5847c 100644 --- a/eden/mononoke/scs_server/src/errors.rs +++ b/eden/mononoke/scs_server/src/errors.rs @@ -123,6 +123,7 @@ impl_into_thrift_error!(service::TreeListExn); impl_into_thrift_error!(service::FileExistsExn); impl_into_thrift_error!(service::FileInfoExn); impl_into_thrift_error!(service::FileContentChunkExn); +impl_into_thrift_error!(service::FileDiffExn); impl_into_thrift_error!(service::CommitLookupXrepoExn); impl_into_thrift_error!(service::RepoListHgManifestExn); diff --git a/eden/mononoke/scs_server/src/into_response.rs b/eden/mononoke/scs_server/src/into_response.rs index f9b83e5bf8..1c32494873 100644 --- a/eden/mononoke/scs_server/src/into_response.rs +++ b/eden/mononoke/scs_server/src/into_response.rs @@ -13,8 +13,9 @@ use futures::try_join; use itertools::Itertools; use maplit::btreemap; use mononoke_api::{ - ChangesetContext, ChangesetId, ChangesetPathContext, FileMetadata, FileType, MononokeError, - PushrebaseOutcome, RepoContext, TreeEntry, TreeId, TreeSummary, UnifiedDiff, + ChangesetContext, ChangesetId, ChangesetPathContext, FileMetadata, FileType, + HeaderlessUnifiedDiff, MononokeError, PushrebaseOutcome, RepoContext, TreeEntry, TreeId, + TreeSummary, UnifiedDiff, }; use source_control as thrift; use std::collections::{BTreeMap, BTreeSet}; @@ -130,6 +131,15 @@ impl IntoResponse for UnifiedDiff { } } +impl IntoResponse for HeaderlessUnifiedDiff { + fn into_response(self) -> thrift::Diff { + thrift::Diff::raw_diff(thrift::RawDiff { + raw_diff: Some(self.raw_diff), + is_binary: self.is_binary, + }) + } +} + #[async_trait] impl AsyncIntoResponse> for ChangesetPathContext { async fn into_response(self) -> Result, errors::ServiceError> { diff --git a/eden/mononoke/scs_server/src/methods/file.rs b/eden/mononoke/scs_server/src/methods/file.rs index 6f189e684f..3cac5b703c 100644 --- a/eden/mononoke/scs_server/src/methods/file.rs +++ b/eden/mononoke/scs_server/src/methods/file.rs @@ -6,10 +6,11 @@ */ use context::CoreContext; +use mononoke_api::{headerless_unified_diff, FileId}; use source_control as thrift; use crate::errors; -use crate::from_request::check_range_and_convert; +use crate::from_request::{check_range_and_convert, FromRequest}; use crate::into_response::IntoResponse; use crate::source_control_impl::SourceControlServiceImpl; use crate::specifiers::SpecifierExt; @@ -65,4 +66,29 @@ impl SourceControlServiceImpl { (_repo, None) => Err(errors::file_not_found(file.description()).into()), } } + + /// Compare a file with another file. + pub(crate) async fn file_diff( + &self, + ctx: CoreContext, + file: thrift::FileSpecifier, + params: thrift::FileDiffParams, + ) -> Result { + let context_lines = params.context as usize; + + let (repo, base_file) = self.repo_file(ctx, &file).await?; + let base_file = base_file.ok_or_else(|| errors::file_not_found(file.description()))?; + + let other_file_id = FileId::from_request(¶ms.other_file_id)?; + let other_file = repo + .file(other_file_id) + .await? + .ok_or_else(|| errors::file_not_found(other_file_id.to_string()))?; + + let diff = headerless_unified_diff(&other_file, &base_file, context_lines) + .await? + .into_response(); + + Ok(thrift::FileDiffResponse { diff }) + } } diff --git a/eden/mononoke/scs_server/src/params.rs b/eden/mononoke/scs_server/src/params.rs index f3be368127..4f5f66f1a8 100644 --- a/eden/mononoke/scs_server/src/params.rs +++ b/eden/mononoke/scs_server/src/params.rs @@ -243,6 +243,17 @@ impl AddScubaParams for thrift::FileExistsParams {} impl AddScubaParams for thrift::FileInfoParams {} +impl AddScubaParams for thrift::FileDiffParams { + fn add_scuba_params(&self, scuba: &mut ScubaSampleBuilder) { + scuba.add( + "other_file", + faster_hex::hex_string(&self.other_file_id).expect("hex_string should never fail"), + ); + scuba.add("param_format", self.format.to_string()); + scuba.add("param_context", self.context); + } +} + impl AddScubaParams for thrift::TreeListParams { fn add_scuba_params(&self, scuba: &mut ScubaSampleBuilder) { scuba.add("param_offset", self.offset); diff --git a/eden/mononoke/scs_server/src/source_control_impl.rs b/eden/mononoke/scs_server/src/source_control_impl.rs index 6397ec9be6..668a677c98 100644 --- a/eden/mononoke/scs_server/src/source_control_impl.rs +++ b/eden/mononoke/scs_server/src/source_control_impl.rs @@ -499,6 +499,11 @@ impl SourceControlService for SourceControlServiceThriftImpl { params: thrift::FileContentChunkParams, ) -> Result; + async fn file_diff( + file: thrift::FileSpecifier, + params: thrift::FileDiffParams, + ) -> Result; + async fn repo_create_commit( repo: thrift::RepoSpecifier, params: thrift::RepoCreateCommitParams,