mirror of
https://github.com/gitbutlerapp/gitbutler.git
synced 2024-12-28 03:55:02 +03:00
Adjust GitHunk
to not require UTF8 for diffs
This will make the diffing engine more correct overall, as only for display there will be a potentially lossy conversion. This will also prevent it to be considered binary even though it is not.
This commit is contained in:
parent
fe950ec00a
commit
27714d8e0d
@ -15,7 +15,7 @@ toml = "0.8.12"
|
||||
anyhow = "1.0.81"
|
||||
async-trait = "0.1.79"
|
||||
backtrace = { version = "0.3.71", optional = true }
|
||||
bstr = "1.9.1"
|
||||
bstr = { version = "1.9.1", features = ["serde"] }
|
||||
chrono = { version = "0.4.37", features = ["serde"] }
|
||||
diffy = "0.3.0"
|
||||
filetime = "0.2.23"
|
||||
|
@ -2,6 +2,7 @@ use std::path::PathBuf;
|
||||
use std::{collections::HashMap, str};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use bstr::{BStr, BString, ByteSlice, ByteVec};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::instrument;
|
||||
|
||||
@ -42,7 +43,7 @@ pub struct GitHunk {
|
||||
pub old_lines: u32,
|
||||
pub new_start: u32,
|
||||
pub new_lines: u32,
|
||||
pub diff: String,
|
||||
pub diff: BString,
|
||||
pub binary: bool,
|
||||
pub change_type: ChangeType,
|
||||
}
|
||||
@ -179,32 +180,26 @@ fn hunks_by_filepath(
|
||||
let old_start = hunk.as_ref().map_or(0, git2::DiffHunk::old_start);
|
||||
let old_lines = hunk.as_ref().map_or(0, git2::DiffHunk::old_lines);
|
||||
|
||||
let assume_binary = || {
|
||||
let full_path = repository.workdir().unwrap().join(file_path);
|
||||
// save the file_path to the odb
|
||||
if !delta.new_file().id().is_zero() && full_path.exists() {
|
||||
// the binary file wasn't deleted
|
||||
repository.blob_path(full_path.as_path()).unwrap();
|
||||
}
|
||||
Some((delta.new_file().id().to_string(), true))
|
||||
};
|
||||
|
||||
let line = match line.origin() {
|
||||
'+' | '-' | ' ' => {
|
||||
if let Ok(content) = str::from_utf8(line.content()) {
|
||||
Some((format!("{}{}", line.origin(), content), false))
|
||||
} else {
|
||||
assume_binary()
|
||||
}
|
||||
let mut buf = BString::new(Vec::with_capacity(line.content().len() + 1));
|
||||
buf.push_char(line.origin());
|
||||
buf.push_str(line.content());
|
||||
Some((buf, false))
|
||||
}
|
||||
'B' => {
|
||||
let full_path = repository.workdir().unwrap().join(file_path);
|
||||
// save the file_path to the odb
|
||||
if !delta.new_file().id().is_zero() && full_path.exists() {
|
||||
// the binary file wasn't deleted
|
||||
repository.blob_path(full_path.as_path()).unwrap();
|
||||
}
|
||||
Some((delta.new_file().id().to_string().into(), true))
|
||||
}
|
||||
'B' => assume_binary(),
|
||||
'F' => None,
|
||||
_ => {
|
||||
if let Ok(content) = str::from_utf8(line.content()) {
|
||||
Some((content.to_string(), false))
|
||||
} else {
|
||||
assume_binary()
|
||||
}
|
||||
let line: BString = line.content().into();
|
||||
Some((line, false))
|
||||
}
|
||||
};
|
||||
if let Some((line, is_binary)) = line {
|
||||
@ -277,6 +272,12 @@ fn hunks_by_filepath(
|
||||
.map(|(k, v)| {
|
||||
if let Some(binary_hunk) = v.iter().find(|hunk| hunk.binary) {
|
||||
if v.len() > 1 {
|
||||
// TODO(ST): Would it be possible here to permanently discard lines because
|
||||
// they are considered binary? After all, here we create a new change,
|
||||
// turning multiple binary hunks into single line hunk (somehow).
|
||||
// Probably answer: it's likely that this data is only created on the fly,
|
||||
// and only the original source data is relevant - validate it.
|
||||
// But: virtual branches definitely apply hunks.
|
||||
// if there are multiple hunks with binary among them, then the binary hunk
|
||||
// takes precedence
|
||||
(
|
||||
@ -303,7 +304,7 @@ fn hunks_by_filepath(
|
||||
old_lines: 0,
|
||||
new_start: 0,
|
||||
new_lines: 0,
|
||||
diff: String::new(),
|
||||
diff: Default::default(),
|
||||
binary: false,
|
||||
change_type: ChangeType::Modified,
|
||||
}],
|
||||
@ -321,51 +322,58 @@ fn hunks_by_filepath(
|
||||
}
|
||||
|
||||
// returns None if cannot reverse the patch header
|
||||
fn reverse_patch_header(header: &str) -> Option<String> {
|
||||
use itertools::Itertools;
|
||||
|
||||
let mut parts = header.split_whitespace();
|
||||
fn reverse_patch_header(header: &BStr) -> Option<BString> {
|
||||
let mut parts = header.split(|b| b.is_ascii_whitespace());
|
||||
|
||||
match parts.next() {
|
||||
Some("@@") => {}
|
||||
Some(b"@@") => {}
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
let old_range = parts.next()?;
|
||||
let new_range = parts.next()?;
|
||||
|
||||
match parts.next() {
|
||||
Some("@@") => {}
|
||||
_ => return None,
|
||||
if parts.next() != Some(b"@@") {
|
||||
return None;
|
||||
};
|
||||
|
||||
Some(format!(
|
||||
"@@ {} {} @@ {}",
|
||||
new_range.replace('+', "-"),
|
||||
old_range.replace('-', "+"),
|
||||
parts.join(" ")
|
||||
))
|
||||
let mut buf: BString = "@@ ".into();
|
||||
buf.extend_from_slice(&new_range.replace(b"+", b"-"));
|
||||
buf.push(b' ');
|
||||
buf.extend_from_slice(&old_range.replace(b"-", b"+"));
|
||||
buf.push_str(b" @@ ");
|
||||
|
||||
let mut at_least_one_part = false;
|
||||
for part in parts {
|
||||
buf.extend_from_slice(part);
|
||||
buf.push(b' ');
|
||||
at_least_one_part = true;
|
||||
}
|
||||
if at_least_one_part {
|
||||
buf.pop();
|
||||
}
|
||||
Some(buf)
|
||||
}
|
||||
|
||||
fn reverse_patch(patch: &str) -> Option<String> {
|
||||
let mut reversed = String::new();
|
||||
fn reverse_patch(patch: &BStr) -> Option<BString> {
|
||||
let mut reversed = BString::default();
|
||||
for line in patch.lines() {
|
||||
if line.starts_with("@@") {
|
||||
if let Some(header) = reverse_patch_header(line) {
|
||||
if line.starts_with(b"@@") {
|
||||
if let Some(header) = reverse_patch_header(line.as_ref()) {
|
||||
reversed.push_str(&header);
|
||||
reversed.push('\n');
|
||||
reversed.push(b'\n');
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
} else if line.starts_with('+') {
|
||||
reversed.push_str(&line.replacen('+', "-", 1));
|
||||
reversed.push('\n');
|
||||
} else if line.starts_with('-') {
|
||||
reversed.push_str(&line.replacen('-', "+", 1));
|
||||
reversed.push('\n');
|
||||
} else if line.starts_with(b"+") {
|
||||
reversed.push_str(&line.replacen(b"+", b"-", 1));
|
||||
reversed.push(b'\n');
|
||||
} else if line.starts_with(b"-") {
|
||||
reversed.push_str(&line.replacen(b"-", b"+", 1));
|
||||
reversed.push(b'\n');
|
||||
} else {
|
||||
reversed.push_str(line);
|
||||
reversed.push('\n');
|
||||
reversed.push(b'\n');
|
||||
}
|
||||
}
|
||||
Some(reversed)
|
||||
@ -376,7 +384,7 @@ pub fn reverse_hunk(hunk: &GitHunk) -> Option<GitHunk> {
|
||||
if hunk.binary {
|
||||
None
|
||||
} else {
|
||||
reverse_patch(&hunk.diff).map(|diff| GitHunk {
|
||||
reverse_patch(hunk.diff.as_ref()).map(|diff| GitHunk {
|
||||
old_start: hunk.new_start,
|
||||
old_lines: hunk.new_lines,
|
||||
new_start: hunk.old_start,
|
||||
|
@ -7,6 +7,14 @@ pub struct Oid {
|
||||
oid: git2::Oid,
|
||||
}
|
||||
|
||||
impl Oid {
|
||||
pub fn from_bytes(bytes: &[u8]) -> Result<Self, git2::Error> {
|
||||
Ok(Self {
|
||||
oid: git2::Oid::from_bytes(bytes)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Oid {
|
||||
fn default() -> Self {
|
||||
git2::Oid::zero().into()
|
||||
|
@ -1,6 +1,7 @@
|
||||
use std::{fmt::Display, ops::RangeInclusive, str::FromStr};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use bstr::{BStr, ByteSlice};
|
||||
|
||||
use crate::git::diff;
|
||||
|
||||
@ -17,7 +18,7 @@ impl From<&diff::GitHunk> for Hunk {
|
||||
Hunk {
|
||||
start: hunk.new_start,
|
||||
end: hunk.new_start + hunk.new_lines,
|
||||
hash: Some(Hunk::hash(&hunk.diff)),
|
||||
hash: Some(Hunk::hash(hunk.diff.as_ref())),
|
||||
timestamp_ms: None,
|
||||
}
|
||||
}
|
||||
@ -120,6 +121,7 @@ impl Hunk {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(ST): self - prevent many unnecessary copies
|
||||
pub fn with_hash(&self, hash: &str) -> Self {
|
||||
Hunk {
|
||||
start: self.start,
|
||||
@ -129,6 +131,7 @@ impl Hunk {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(ST): self - prevent many unnecessary copies
|
||||
pub fn with_timestamp(&self, timestamp_ms: u128) -> Self {
|
||||
Hunk {
|
||||
start: self.start,
|
||||
@ -157,12 +160,12 @@ impl Hunk {
|
||||
self.start == other.new_start && self.end == other.new_start + other.new_lines
|
||||
}
|
||||
|
||||
pub fn hash(diff: &str) -> String {
|
||||
let addition = diff
|
||||
.lines()
|
||||
.skip(1) // skip the first line which is the diff header
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
format!("{:x}", md5::compute(addition))
|
||||
// TODO(perf): keep the hash as digest to avoid allocation.
|
||||
pub fn hash(diff: &BStr) -> String {
|
||||
let mut ctx = md5::Context::new();
|
||||
diff.lines()
|
||||
.skip(1) // skip the first line which is the diff header.
|
||||
.for_each(|line| ctx.consume(line));
|
||||
format!("{:x}", ctx.compute())
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
#[cfg(target_family = "unix")]
|
||||
use std::os::unix::prelude::*;
|
||||
use std::os::unix::prelude::PermissionsExt;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
hash::Hash,
|
||||
@ -8,8 +8,8 @@ use std::{
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use bstr::ByteSlice;
|
||||
use diffy::{apply as diffy_apply, Line, Patch};
|
||||
use bstr::{BStr, BString, ByteSlice, ByteVec};
|
||||
use diffy::{apply_bytes as diffy_apply, Line, Patch};
|
||||
use git2_hooks::HookResult;
|
||||
use regex::Regex;
|
||||
use serde::Serialize;
|
||||
@ -126,7 +126,7 @@ pub struct VirtualBranchFile {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct VirtualBranchHunk {
|
||||
pub id: String,
|
||||
pub diff: String,
|
||||
pub diff: BString,
|
||||
pub modified_at: u128,
|
||||
pub file_path: PathBuf,
|
||||
pub hash: String,
|
||||
@ -909,9 +909,9 @@ fn branches_with_large_files_abridged(mut branches: Vec<VirtualBranch>) -> Vec<V
|
||||
// Diffs larger than 500kb are considered large
|
||||
if file.hunks.iter().any(|hunk| hunk.diff.len() > 500_000) {
|
||||
file.large = true;
|
||||
file.hunks
|
||||
.iter_mut()
|
||||
.for_each(|hunk| hunk.diff = String::new());
|
||||
file.hunks.iter_mut().for_each(|hunk| {
|
||||
hunk.diff.drain(..);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1603,7 +1603,7 @@ pub fn virtual_hunks_by_filepath(
|
||||
start: hunk.new_start,
|
||||
end: hunk.new_start + hunk.new_lines,
|
||||
binary: hunk.binary,
|
||||
hash: Hunk::hash(&hunk.diff),
|
||||
hash: Hunk::hash(hunk.diff.as_ref()),
|
||||
locked: false,
|
||||
locked_to: None,
|
||||
change_type: hunk.change_type,
|
||||
@ -1784,7 +1784,7 @@ fn get_applied_status(
|
||||
committed_git_hunk.new_start,
|
||||
committed_git_hunk.new_start + committed_git_hunk.new_lines,
|
||||
) {
|
||||
let hash = Hunk::hash(&uncommitted_git_hunk.diff);
|
||||
let hash = Hunk::hash(uncommitted_git_hunk.diff.as_ref());
|
||||
git_hunk_map.insert(hash, branch.id);
|
||||
}
|
||||
}
|
||||
@ -1817,7 +1817,7 @@ fn get_applied_status(
|
||||
.filter_map(|claimed_hunk| {
|
||||
// if any of the current hunks intersects with the owned hunk, we want to keep it
|
||||
for (i, git_diff_hunk) in git_diff_hunks.iter().enumerate() {
|
||||
let hash = Hunk::hash(&git_diff_hunk.diff);
|
||||
let hash = Hunk::hash(git_diff_hunk.diff.as_ref());
|
||||
if let Some(locked_to) = git_hunk_map.get(&hash) {
|
||||
if locked_to != &branch.id {
|
||||
return None;
|
||||
@ -1887,7 +1887,7 @@ fn get_applied_status(
|
||||
|
||||
for (filepath, hunks) in base_diffs {
|
||||
for hunk in hunks {
|
||||
let hash = Hunk::hash(&hunk.diff);
|
||||
let hash = Hunk::hash(hunk.diff.as_ref());
|
||||
let vbranch_pos = if let Some(locked_to) = git_hunk_map.get(&hash) {
|
||||
let p = virtual_branches.iter().position(|vb| vb.id == *locked_to);
|
||||
match p {
|
||||
@ -1904,7 +1904,7 @@ fn get_applied_status(
|
||||
file_path: filepath.clone(),
|
||||
hunks: vec![Hunk::from(&hunk)
|
||||
.with_timestamp(get_mtime(&mut mtimes, &filepath))
|
||||
.with_hash(Hunk::hash(hunk.diff.as_str()).as_str())],
|
||||
.with_hash(Hunk::hash(hunk.diff.as_ref()).as_str())],
|
||||
});
|
||||
|
||||
diffs_by_branch
|
||||
@ -2079,8 +2079,9 @@ pub fn write_tree_onto_tree(
|
||||
let rel_path = Path::new(&filepath);
|
||||
let full_path = project_repository.path().join(rel_path);
|
||||
|
||||
let is_submodule =
|
||||
full_path.is_dir() && hunks.len() == 1 && hunks[0].diff.contains("Subproject commit");
|
||||
let is_submodule = full_path.is_dir()
|
||||
&& hunks.len() == 1
|
||||
&& hunks[0].diff.contains_str(b"Subproject commit");
|
||||
|
||||
// if file exists
|
||||
if full_path.exists() {
|
||||
@ -2130,7 +2131,11 @@ pub fn write_tree_onto_tree(
|
||||
if hunks.len() == 1 && hunks[0].binary {
|
||||
let new_blob_oid = &hunks[0].diff;
|
||||
// convert string to Oid
|
||||
let new_blob_oid = new_blob_oid.parse().context("failed to diff as oid")?;
|
||||
let new_blob_oid = new_blob_oid
|
||||
.to_str()
|
||||
.expect("hex-string")
|
||||
.parse()
|
||||
.context("failed to diff as oid")?;
|
||||
builder.upsert(rel_path, new_blob_oid, filemode);
|
||||
} else {
|
||||
// blob from tree_entry
|
||||
@ -2140,34 +2145,35 @@ pub fn write_tree_onto_tree(
|
||||
.peel_to_blob()
|
||||
.context("failed to get blob")?;
|
||||
|
||||
let mut blob_contents = blob.content().to_str()?.to_string();
|
||||
let blob_contents = blob.content();
|
||||
|
||||
let mut hunks = hunks.clone();
|
||||
hunks.sort_by_key(|hunk| hunk.new_start);
|
||||
let mut all_diffs = String::new();
|
||||
let mut all_diffs = BString::default();
|
||||
for hunk in hunks {
|
||||
all_diffs.push_str(&hunk.diff);
|
||||
}
|
||||
|
||||
let patch = Patch::from_str(&all_diffs)?;
|
||||
blob_contents = apply(&blob_contents, &patch).context(format!(
|
||||
let patch = Patch::from_bytes(&all_diffs)?;
|
||||
let blob_contents = apply(blob_contents.into(), &patch).context(format!(
|
||||
"failed to apply\n{}\nonto:\n{}",
|
||||
&all_diffs, &blob_contents
|
||||
all_diffs.as_bstr(),
|
||||
blob_contents.as_bstr()
|
||||
))?;
|
||||
|
||||
// create a blob
|
||||
let new_blob_oid = git_repository.blob(blob_contents.as_bytes())?;
|
||||
let new_blob_oid = git_repository.blob(&blob_contents)?;
|
||||
// upsert into the builder
|
||||
builder.upsert(rel_path, new_blob_oid, filemode);
|
||||
}
|
||||
} else if is_submodule {
|
||||
let mut blob_contents = String::new();
|
||||
let mut blob_contents = BString::default();
|
||||
|
||||
let mut hunks = hunks.clone();
|
||||
hunks.sort_by_key(|hunk| hunk.new_start);
|
||||
for hunk in hunks {
|
||||
let patch = Patch::from_str(&hunk.diff)?;
|
||||
blob_contents = apply(&blob_contents, &patch)
|
||||
let patch = Patch::from_bytes(&hunk.diff)?;
|
||||
blob_contents = apply(blob_contents.as_ref(), &patch)
|
||||
.context(format!("failed to apply {}", &hunk.diff))?;
|
||||
}
|
||||
|
||||
@ -3640,7 +3646,7 @@ pub fn create_virtual_branch_from_branch(
|
||||
}
|
||||
|
||||
/// Just like [`diffy::apply()`], but on error it will attach hashes of the input `base_image` and `patch`.
|
||||
pub fn apply(base_image: &str, patch: &Patch<'_, str>) -> Result<String> {
|
||||
pub fn apply(base_image: &BStr, patch: &Patch<'_, [u8]>) -> Result<BString> {
|
||||
fn md5_hash_hex(b: impl AsRef<[u8]>) -> String {
|
||||
format!("{:x}", md5::compute(b))
|
||||
}
|
||||
@ -3654,8 +3660,8 @@ pub fn apply(base_image: &str, patch: &Patch<'_, str>) -> Result<String> {
|
||||
Insert(String),
|
||||
}
|
||||
|
||||
impl<'a> From<&diffy::Line<'a, str>> for DebugLine {
|
||||
fn from(line: &Line<'a, str>) -> Self {
|
||||
impl<'a> From<&diffy::Line<'a, [u8]>> for DebugLine {
|
||||
fn from(line: &Line<'a, [u8]>) -> Self {
|
||||
match line {
|
||||
Line::Context(s) => DebugLine::Context(md5_hash_hex(s)),
|
||||
Line::Delete(s) => DebugLine::Delete(md5_hash_hex(s)),
|
||||
@ -3672,8 +3678,8 @@ pub fn apply(base_image: &str, patch: &Patch<'_, str>) -> Result<String> {
|
||||
lines: Vec<DebugLine>,
|
||||
}
|
||||
|
||||
impl<'a> From<&diffy::Hunk<'a, str>> for DebugHunk {
|
||||
fn from(hunk: &diffy::Hunk<'a, str>) -> Self {
|
||||
impl<'a> From<&diffy::Hunk<'a, [u8]>> for DebugHunk {
|
||||
fn from(hunk: &diffy::Hunk<'a, [u8]>) -> Self {
|
||||
Self {
|
||||
old_range: hunk.old_range(),
|
||||
new_range: hunk.new_range(),
|
||||
@ -3695,10 +3701,12 @@ pub fn apply(base_image: &str, patch: &Patch<'_, str>) -> Result<String> {
|
||||
}
|
||||
}
|
||||
|
||||
diffy_apply(base_image, patch).with_context(|| DebugContext {
|
||||
base_image_hash: md5_hash_hex(base_image),
|
||||
hunks: patch.hunks().iter().map(Into::into).collect(),
|
||||
})
|
||||
diffy_apply(base_image, patch)
|
||||
.with_context(|| DebugContext {
|
||||
base_image_hash: md5_hash_hex(base_image),
|
||||
hunks: patch.hunks().iter().map(Into::into).collect(),
|
||||
})
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
// Goes through a set of changes and checks if conflicts are present. If no conflicts
|
||||
@ -3714,10 +3722,10 @@ fn update_conflict_markers(
|
||||
if conflicting_files.contains(&file_path.display().to_string()) {
|
||||
// check file for conflict markers, resolve the file if there are none in any hunk
|
||||
for hunk in non_commited_hunks {
|
||||
if hunk.diff.contains("<<<<<<< ours") {
|
||||
if hunk.diff.contains_str(b"<<<<<<< ours") {
|
||||
conflicted = true;
|
||||
}
|
||||
if hunk.diff.contains(">>>>>>> theirs") {
|
||||
if hunk.diff.contains_str(b">>>>>>> theirs") {
|
||||
conflicted = true;
|
||||
}
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ async fn should_unapply_with_commits() {
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
.unwrap_or_else(|err| panic!("{err:?}"));
|
||||
|
||||
let branch = controller
|
||||
.list_virtual_branches(project_id)
|
||||
|
Loading…
Reference in New Issue
Block a user