diff: highlight word-level changes in git diffs

The output looks somewhat similar to color-words diffs. Unified diffs are
verbose, but are easier to follow if adjacent lines are added/removed + modified
for example.

Word-level diffing is forcibly enabled. We can also add a config knob (or
!color condition) to turn it off to save CPU time.

I originally considered disabling highlights in block insertion/deletion, but
that wasn't always great. This can be addressed separately as it also applies
to color-words diffs. #3958
This commit is contained in:
Yuya Nishihara 2024-06-28 12:11:04 +09:00
parent 9e8a739e4d
commit 44a39017f0
4 changed files with 94 additions and 19 deletions

View File

@ -20,6 +20,9 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
individually instead of being passed a directory by setting individually instead of being passed a directory by setting
`merge-tools.$TOOL.diff-invocation-mode="file-by-file"` in config.toml. `merge-tools.$TOOL.diff-invocation-mode="file-by-file"` in config.toml.
* In git diffs, word-level hunks are now highlighted with underline. See [diff
colors and styles](docs/config.md#diff-colors-and-styles) for customization.
* `jj git clone` and `jj git init` with an existing git repository adds the * `jj git clone` and `jj git init` with an existing git repository adds the
default branch of the remote as repository settings for default branch of the remote as repository settings for
`revset-aliases."trunk()"`.` `revset-aliases."trunk()"`.`

View File

@ -14,9 +14,9 @@
use std::cmp::max; use std::cmp::max;
use std::collections::VecDeque; use std::collections::VecDeque;
use std::io;
use std::ops::Range; use std::ops::Range;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::{io, mem};
use futures::{try_join, Stream, StreamExt}; use futures::{try_join, Stream, StreamExt};
use itertools::Itertools; use itertools::Itertools;
@ -794,36 +794,46 @@ fn git_diff_part(
}) })
} }
#[derive(PartialEq)] #[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum DiffLineType { enum DiffLineType {
Context, Context,
Removed, Removed,
Added, Added,
} }
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum DiffTokenType {
Matching,
Different,
}
type DiffTokenVec<'content> = Vec<(DiffTokenType, &'content [u8])>;
struct UnifiedDiffHunk<'content> { struct UnifiedDiffHunk<'content> {
left_line_range: Range<usize>, left_line_range: Range<usize>,
right_line_range: Range<usize>, right_line_range: Range<usize>,
lines: Vec<(DiffLineType, &'content [u8])>, lines: Vec<(DiffLineType, DiffTokenVec<'content>)>,
} }
impl<'content> UnifiedDiffHunk<'content> { impl<'content> UnifiedDiffHunk<'content> {
fn extend_context_lines(&mut self, lines: impl IntoIterator<Item = &'content [u8]>) { fn extend_context_lines(&mut self, lines: impl IntoIterator<Item = &'content [u8]>) {
let old_len = self.lines.len(); let old_len = self.lines.len();
self.lines self.lines.extend(lines.into_iter().map(|line| {
.extend(lines.into_iter().map(|line| (DiffLineType::Context, line))); let tokens = vec![(DiffTokenType::Matching, line)];
(DiffLineType::Context, tokens)
}));
self.left_line_range.end += self.lines.len() - old_len; self.left_line_range.end += self.lines.len() - old_len;
self.right_line_range.end += self.lines.len() - old_len; self.right_line_range.end += self.lines.len() - old_len;
} }
fn extend_removed_lines(&mut self, lines: impl IntoIterator<Item = &'content [u8]>) { fn extend_removed_lines(&mut self, lines: impl IntoIterator<Item = DiffTokenVec<'content>>) {
let old_len = self.lines.len(); let old_len = self.lines.len();
self.lines self.lines
.extend(lines.into_iter().map(|line| (DiffLineType::Removed, line))); .extend(lines.into_iter().map(|line| (DiffLineType::Removed, line)));
self.left_line_range.end += self.lines.len() - old_len; self.left_line_range.end += self.lines.len() - old_len;
} }
fn extend_added_lines(&mut self, lines: impl IntoIterator<Item = &'content [u8]>) { fn extend_added_lines(&mut self, lines: impl IntoIterator<Item = DiffTokenVec<'content>>) {
let old_len = self.lines.len(); let old_len = self.lines.len();
self.lines self.lines
.extend(lines.into_iter().map(|line| (DiffLineType::Added, line))); .extend(lines.into_iter().map(|line| (DiffLineType::Added, line)));
@ -873,9 +883,9 @@ fn unified_diff_hunks<'content>(
// The next hunk should be of DiffHunk::Different type if any. // The next hunk should be of DiffHunk::Different type if any.
current_hunk.extend_context_lines(before_lines.into_iter().rev()); current_hunk.extend_context_lines(before_lines.into_iter().rev());
} }
DiffHunk::Different(content) => { DiffHunk::Different(contents) => {
let left_lines = content[0].split_inclusive(|b| *b == b'\n'); let [left, right] = contents.try_into().unwrap();
let right_lines = content[1].split_inclusive(|b| *b == b'\n'); let (left_lines, right_lines) = inline_diff_hunks(left, right);
current_hunk.extend_removed_lines(left_lines); current_hunk.extend_removed_lines(left_lines);
current_hunk.extend_added_lines(right_lines); current_hunk.extend_added_lines(right_lines);
} }
@ -887,6 +897,60 @@ fn unified_diff_hunks<'content>(
hunks hunks
} }
/// Splits line-level hunks into word-level tokens. Returns lists of tokens per
/// line.
fn inline_diff_hunks<'content>(
left_content: &'content [u8],
right_content: &'content [u8],
) -> (Vec<DiffTokenVec<'content>>, Vec<DiffTokenVec<'content>>) {
let mut left_lines: Vec<DiffTokenVec<'content>> = vec![];
let mut right_lines: Vec<DiffTokenVec<'content>> = vec![];
let mut left_tokens: DiffTokenVec<'content> = vec![];
let mut right_tokens: DiffTokenVec<'content> = vec![];
// Like Diff::default_refinement(), but doesn't try to match up contents by
// lines. We know left/right_contents have no matching lines.
let mut diff = Diff::for_tokenizer(&[left_content, right_content], diff::find_word_ranges);
diff.refine_changed_regions(diff::find_nonword_ranges);
for hunk in diff.hunks() {
match hunk {
DiffHunk::Matching(content) => {
for token in content.split_inclusive(|b| *b == b'\n') {
left_tokens.push((DiffTokenType::Matching, token));
right_tokens.push((DiffTokenType::Matching, token));
if token.ends_with(b"\n") {
left_lines.push(mem::take(&mut left_tokens));
right_lines.push(mem::take(&mut right_tokens));
}
}
}
DiffHunk::Different(contents) => {
let [left, right] = contents.try_into().unwrap();
for token in left.split_inclusive(|b| *b == b'\n') {
left_tokens.push((DiffTokenType::Different, token));
if token.ends_with(b"\n") {
left_lines.push(mem::take(&mut left_tokens));
}
}
for token in right.split_inclusive(|b| *b == b'\n') {
right_tokens.push((DiffTokenType::Different, token));
if token.ends_with(b"\n") {
right_lines.push(mem::take(&mut right_tokens));
}
}
}
}
}
if !left_tokens.is_empty() {
left_lines.push(left_tokens);
}
if !right_tokens.is_empty() {
right_lines.push(right_tokens);
}
(left_lines, right_lines)
}
fn show_unified_diff_hunks( fn show_unified_diff_hunks(
formatter: &mut dyn Formatter, formatter: &mut dyn Formatter,
left_content: &[u8], left_content: &[u8],
@ -902,7 +966,7 @@ fn show_unified_diff_hunks(
hunk.right_line_range.start, hunk.right_line_range.start,
hunk.right_line_range.len() hunk.right_line_range.len()
)?; )?;
for (line_type, content) in hunk.lines { for (line_type, tokens) in &hunk.lines {
let (label, sigil) = match line_type { let (label, sigil) = match line_type {
DiffLineType::Context => ("context", " "), DiffLineType::Context => ("context", " "),
DiffLineType::Removed => ("removed", "-"), DiffLineType::Removed => ("removed", "-"),
@ -910,8 +974,16 @@ fn show_unified_diff_hunks(
}; };
formatter.with_label(label, |formatter| { formatter.with_label(label, |formatter| {
write!(formatter, "{sigil}")?; write!(formatter, "{sigil}")?;
formatter.write_all(content) for (token_type, content) in tokens {
match token_type {
DiffTokenType::Matching => formatter.write_all(content)?,
DiffTokenType::Different => formatter
.with_label("token", |formatter| formatter.write_all(content))?,
}
}
Ok(())
})?; })?;
let (_, content) = tokens.last().expect("hunk line must not be empty");
if !content.ends_with(b"\n") { if !content.ends_with(b"\n") {
write!(formatter, "\n\\ No newline at end of file\n")?; write!(formatter, "\n\\ No newline at end of file\n")?;
} }

View File

@ -140,23 +140,23 @@ fn test_diff_basic() {
<<diff file_header::--- a/>><<diff file_header::file1>><<diff file_header::>> <<diff file_header::--- a/>><<diff file_header::file1>><<diff file_header::>>
<<diff file_header::+++ /dev/null>> <<diff file_header::+++ /dev/null>>
<<diff hunk_header::@@ ->><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::1>><<diff hunk_header:: +>><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::0>><<diff hunk_header:: @@>> <<diff hunk_header::@@ ->><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::1>><<diff hunk_header:: +>><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::0>><<diff hunk_header:: @@>>
<<diff removed::->><<diff removed::foo>> <<diff removed::->><<diff removed token::foo>>
<<diff file_header::diff --git a/>><<diff file_header::file2>><<diff file_header:: b/>><<diff file_header::file2>><<diff file_header::>> <<diff file_header::diff --git a/>><<diff file_header::file2>><<diff file_header:: b/>><<diff file_header::file2>><<diff file_header::>>
<<diff file_header::index >><<diff file_header::523a4a9de8>><<diff file_header::...>><<diff file_header::485b56a572>><<diff file_header:: >><<diff file_header::100644>><<diff file_header::>> <<diff file_header::index >><<diff file_header::523a4a9de8>><<diff file_header::...>><<diff file_header::485b56a572>><<diff file_header:: >><<diff file_header::100644>><<diff file_header::>>
<<diff file_header::--- a/>><<diff file_header::file2>><<diff file_header::>> <<diff file_header::--- a/>><<diff file_header::file2>><<diff file_header::>>
<<diff file_header::+++ b/>><<diff file_header::file2>><<diff file_header::>> <<diff file_header::+++ b/>><<diff file_header::file2>><<diff file_header::>>
<<diff hunk_header::@@ ->><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::2>><<diff hunk_header:: +>><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::3>><<diff hunk_header:: @@>> <<diff hunk_header::@@ ->><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::2>><<diff hunk_header:: +>><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::3>><<diff hunk_header:: @@>>
<<diff context:: >><<diff context::foo>> <<diff context:: >><<diff context::foo>>
<<diff removed::->><<diff removed::baz qux>> <<diff removed::->><<diff removed::baz >><<diff removed token::qux>><<diff removed::>>
<<diff added::+>><<diff added::bar>> <<diff added::+>><<diff added token::bar>>
<<diff added::+>><<diff added::baz quux>> <<diff added::+>><<diff added::baz >><<diff added token::quux>><<diff added::>>
<<diff file_header::diff --git a/>><<diff file_header::file3>><<diff file_header:: b/>><<diff file_header::file3>><<diff file_header::>> <<diff file_header::diff --git a/>><<diff file_header::file3>><<diff file_header:: b/>><<diff file_header::file3>><<diff file_header::>>
<<diff file_header::new file mode >><<diff file_header::100644>><<diff file_header::>> <<diff file_header::new file mode >><<diff file_header::100644>><<diff file_header::>>
<<diff file_header::index 0000000000..>><<diff file_header::257cc5642c>><<diff file_header::>> <<diff file_header::index 0000000000..>><<diff file_header::257cc5642c>><<diff file_header::>>
<<diff file_header::--- /dev/null>> <<diff file_header::--- /dev/null>>
<<diff file_header::+++ b/>><<diff file_header::file3>><<diff file_header::>> <<diff file_header::+++ b/>><<diff file_header::file3>><<diff file_header::>>
<<diff hunk_header::@@ ->><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::0>><<diff hunk_header:: +>><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::1>><<diff hunk_header:: @@>> <<diff hunk_header::@@ ->><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::0>><<diff hunk_header:: +>><<diff hunk_header::1>><<diff hunk_header::,>><<diff hunk_header::1>><<diff hunk_header:: @@>>
<<diff added::+>><<diff added::foo>> <<diff added::+>><<diff added token::foo>>
"###); "###);
let stdout = test_env.jj_cmd_success(&repo_path, &["diff", "-s", "--git"]); let stdout = test_env.jj_cmd_success(&repo_path, &["diff", "-s", "--git"]);

View File

@ -168,8 +168,8 @@ ui.default-description = "\n\nTESTED=TODO"
### Diff colors and styles ### Diff colors and styles
In color-words diffs, hunks are rendered with underline. You can override the In color-words and git diffs, word-level hunks are rendered with underline. You
default style with the following keys: can override the default style with the following keys:
```toml ```toml
[colors] [colors]