From 44a39017f048154b8c6fc618ff9ea35ed0a6d6d0 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Fri, 28 Jun 2024 12:11:04 +0900 Subject: [PATCH] diff: highlight word-level changes in git diffs The output looks somewhat similar to color-words diffs. Unified diffs are verbose, but are easier to follow if adjacent lines are added/removed + modified for example. Word-level diffing is forcibly enabled. We can also add a config knob (or !color condition) to turn it off to save CPU time. I originally considered disabling highlights in block insertion/deletion, but that wasn't always great. This can be addressed separately as it also applies to color-words diffs. #3958 --- CHANGELOG.md | 3 ++ cli/src/diff_util.rs | 96 +++++++++++++++++++++++++++++----- cli/tests/test_diff_command.rs | 10 ++-- docs/config.md | 4 +- 4 files changed, 94 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8925794bd..96ae03261 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,9 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). individually instead of being passed a directory by setting `merge-tools.$TOOL.diff-invocation-mode="file-by-file"` in config.toml. +* In git diffs, word-level hunks are now highlighted with underline. See [diff + colors and styles](docs/config.md#diff-colors-and-styles) for customization. + * `jj git clone` and `jj git init` with an existing git repository adds the default branch of the remote as repository settings for `revset-aliases."trunk()"`.` diff --git a/cli/src/diff_util.rs b/cli/src/diff_util.rs index 0139176b3..af8a9a360 100644 --- a/cli/src/diff_util.rs +++ b/cli/src/diff_util.rs @@ -14,9 +14,9 @@ use std::cmp::max; use std::collections::VecDeque; -use std::io; use std::ops::Range; use std::path::{Path, PathBuf}; +use std::{io, mem}; use futures::{try_join, Stream, StreamExt}; use itertools::Itertools; @@ -794,36 +794,46 @@ fn git_diff_part( }) } -#[derive(PartialEq)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] enum DiffLineType { Context, Removed, Added, } +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum DiffTokenType { + Matching, + Different, +} + +type DiffTokenVec<'content> = Vec<(DiffTokenType, &'content [u8])>; + struct UnifiedDiffHunk<'content> { left_line_range: Range, right_line_range: Range, - lines: Vec<(DiffLineType, &'content [u8])>, + lines: Vec<(DiffLineType, DiffTokenVec<'content>)>, } impl<'content> UnifiedDiffHunk<'content> { fn extend_context_lines(&mut self, lines: impl IntoIterator) { let old_len = self.lines.len(); - self.lines - .extend(lines.into_iter().map(|line| (DiffLineType::Context, line))); + self.lines.extend(lines.into_iter().map(|line| { + let tokens = vec![(DiffTokenType::Matching, line)]; + (DiffLineType::Context, tokens) + })); self.left_line_range.end += self.lines.len() - old_len; self.right_line_range.end += self.lines.len() - old_len; } - fn extend_removed_lines(&mut self, lines: impl IntoIterator) { + fn extend_removed_lines(&mut self, lines: impl IntoIterator>) { let old_len = self.lines.len(); self.lines .extend(lines.into_iter().map(|line| (DiffLineType::Removed, line))); self.left_line_range.end += self.lines.len() - old_len; } - fn extend_added_lines(&mut self, lines: impl IntoIterator) { + fn extend_added_lines(&mut self, lines: impl IntoIterator>) { let old_len = self.lines.len(); self.lines .extend(lines.into_iter().map(|line| (DiffLineType::Added, line))); @@ -873,9 +883,9 @@ fn unified_diff_hunks<'content>( // The next hunk should be of DiffHunk::Different type if any. current_hunk.extend_context_lines(before_lines.into_iter().rev()); } - DiffHunk::Different(content) => { - let left_lines = content[0].split_inclusive(|b| *b == b'\n'); - let right_lines = content[1].split_inclusive(|b| *b == b'\n'); + DiffHunk::Different(contents) => { + let [left, right] = contents.try_into().unwrap(); + let (left_lines, right_lines) = inline_diff_hunks(left, right); current_hunk.extend_removed_lines(left_lines); current_hunk.extend_added_lines(right_lines); } @@ -887,6 +897,60 @@ fn unified_diff_hunks<'content>( hunks } +/// Splits line-level hunks into word-level tokens. Returns lists of tokens per +/// line. +fn inline_diff_hunks<'content>( + left_content: &'content [u8], + right_content: &'content [u8], +) -> (Vec>, Vec>) { + let mut left_lines: Vec> = vec![]; + let mut right_lines: Vec> = vec![]; + let mut left_tokens: DiffTokenVec<'content> = vec![]; + let mut right_tokens: DiffTokenVec<'content> = vec![]; + + // Like Diff::default_refinement(), but doesn't try to match up contents by + // lines. We know left/right_contents have no matching lines. + let mut diff = Diff::for_tokenizer(&[left_content, right_content], diff::find_word_ranges); + diff.refine_changed_regions(diff::find_nonword_ranges); + for hunk in diff.hunks() { + match hunk { + DiffHunk::Matching(content) => { + for token in content.split_inclusive(|b| *b == b'\n') { + left_tokens.push((DiffTokenType::Matching, token)); + right_tokens.push((DiffTokenType::Matching, token)); + if token.ends_with(b"\n") { + left_lines.push(mem::take(&mut left_tokens)); + right_lines.push(mem::take(&mut right_tokens)); + } + } + } + DiffHunk::Different(contents) => { + let [left, right] = contents.try_into().unwrap(); + for token in left.split_inclusive(|b| *b == b'\n') { + left_tokens.push((DiffTokenType::Different, token)); + if token.ends_with(b"\n") { + left_lines.push(mem::take(&mut left_tokens)); + } + } + for token in right.split_inclusive(|b| *b == b'\n') { + right_tokens.push((DiffTokenType::Different, token)); + if token.ends_with(b"\n") { + right_lines.push(mem::take(&mut right_tokens)); + } + } + } + } + } + + if !left_tokens.is_empty() { + left_lines.push(left_tokens); + } + if !right_tokens.is_empty() { + right_lines.push(right_tokens); + } + (left_lines, right_lines) +} + fn show_unified_diff_hunks( formatter: &mut dyn Formatter, left_content: &[u8], @@ -902,7 +966,7 @@ fn show_unified_diff_hunks( hunk.right_line_range.start, hunk.right_line_range.len() )?; - for (line_type, content) in hunk.lines { + for (line_type, tokens) in &hunk.lines { let (label, sigil) = match line_type { DiffLineType::Context => ("context", " "), DiffLineType::Removed => ("removed", "-"), @@ -910,8 +974,16 @@ fn show_unified_diff_hunks( }; formatter.with_label(label, |formatter| { write!(formatter, "{sigil}")?; - formatter.write_all(content) + for (token_type, content) in tokens { + match token_type { + DiffTokenType::Matching => formatter.write_all(content)?, + DiffTokenType::Different => formatter + .with_label("token", |formatter| formatter.write_all(content))?, + } + } + Ok(()) })?; + let (_, content) = tokens.last().expect("hunk line must not be empty"); if !content.ends_with(b"\n") { write!(formatter, "\n\\ No newline at end of file\n")?; } diff --git a/cli/tests/test_diff_command.rs b/cli/tests/test_diff_command.rs index 216abc395..160d00c36 100644 --- a/cli/tests/test_diff_command.rs +++ b/cli/tests/test_diff_command.rs @@ -140,23 +140,23 @@ fn test_diff_basic() { <><><> <> <><><><><><><><><> - <><> + <><> <><><><><> <><><><><><><> <><><> <><><> <><><><><><><><><> <><> - <><> - <><> - <><> + <><><><> + <><> + <><><><> <><><><><> <><><> <><><> <> <><><> <><><><><><><><><> - <><> + <><> "###); let stdout = test_env.jj_cmd_success(&repo_path, &["diff", "-s", "--git"]); diff --git a/docs/config.md b/docs/config.md index ea57d13e0..b2460af35 100644 --- a/docs/config.md +++ b/docs/config.md @@ -168,8 +168,8 @@ ui.default-description = "\n\nTESTED=TODO" ### Diff colors and styles -In color-words diffs, hunks are rendered with underline. You can override the -default style with the following keys: +In color-words and git diffs, word-level hunks are rendered with underline. You +can override the default style with the following keys: ```toml [colors]