From 6c90381bcf2789307b8562b37d1f0562f7218800 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 24 Apr 2024 15:06:32 +0200 Subject: [PATCH] Use iterators for transformation, and avoid copies by default. --- crates/gitbutler-core/src/git/diff.rs | 9 +- .../src/project_repository/conflicts.rs | 3 + .../src/virtual_branches/base.rs | 27 +- .../src/virtual_branches/branch/ownership.rs | 1 + .../src/virtual_branches/virtual.rs | 270 ++++++++---------- 5 files changed, 148 insertions(+), 162 deletions(-) diff --git a/crates/gitbutler-core/src/git/diff.rs b/crates/gitbutler-core/src/git/diff.rs index 40f9d36da..8a879026b 100644 --- a/crates/gitbutler-core/src/git/diff.rs +++ b/crates/gitbutler-core/src/git/diff.rs @@ -9,7 +9,6 @@ use tracing::instrument; use super::Repository; use crate::git; -use crate::virtual_branches::BranchStatus; pub type DiffByPathMap = HashMap; @@ -428,8 +427,8 @@ pub fn reverse_hunk(hunk: &GitHunk) -> Option { } } -// TODO(ST): turning this into an iterator will trigger a cascade of changes that -// mean less unnecessary copies. It also leads to `virtual.rs` - 4k SLOC! -pub fn diff_files_into_hunks(files: DiffByPathMap) -> BranchStatus { - HashMap::from_iter(files.into_iter().map(|(path, file)| (path, file.hunks))) +pub fn diff_files_into_hunks( + files: DiffByPathMap, +) -> impl Iterator)> { + files.into_iter().map(|(path, file)| (path, file.hunks)) } diff --git a/crates/gitbutler-core/src/project_repository/conflicts.rs b/crates/gitbutler-core/src/project_repository/conflicts.rs index d7ae64d80..833d2d3f3 100644 --- a/crates/gitbutler-core/src/project_repository/conflicts.rs +++ b/crates/gitbutler-core/src/project_repository/conflicts.rs @@ -95,6 +95,8 @@ pub fn conflicting_files(repository: &Repository) -> Result> { Ok(reader.lines().map_while(Result::ok).collect()) } +/// Check if `path` is conflicting in `repository`, or if `None`, check if there is any conflict. +// TODO(ST): Should this not rather check the conflicting state in the index? pub fn is_conflicting>(repository: &Repository, path: Option

) -> Result { let conflicts_path = repository.git_repository.path().join("conflicts"); if !conflicts_path.exists() { @@ -105,6 +107,7 @@ pub fn is_conflicting>(repository: &Repository, path: Option

) let reader = std::io::BufReader::new(file); let mut files = reader.lines().map_ok(PathBuf::from); if let Some(pathname) = path { + // TODO(ST): This shouldn't work on UTF8 strings. let pathname = pathname.as_ref(); // check if pathname is one of the lines in conflicts_path file diff --git a/crates/gitbutler-core/src/virtual_branches/base.rs b/crates/gitbutler-core/src/virtual_branches/base.rs index be39546f1..6ee97f592 100644 --- a/crates/gitbutler-core/src/virtual_branches/base.rs +++ b/crates/gitbutler-core/src/virtual_branches/base.rs @@ -6,7 +6,7 @@ use serde::Serialize; use super::{ branch, errors, integration::{update_gitbutler_integration, GITBUTLER_INTEGRATION_REFERENCE}, - target, BranchId, RemoteCommit, VirtualBranchesHandle, + target, BranchId, RemoteCommit, VirtualBranchHunk, VirtualBranchesHandle, }; use crate::{ git::{self, diff}, @@ -193,20 +193,21 @@ pub fn set_base_branch( let wd_diff = diff::workdir(repo, ¤t_head_commit.id())?; if !wd_diff.is_empty() || current_head_commit.id() != target.sha { - let hunks_by_filepath = super::virtual_hunks_by_filepath_from_file_diffs( - &project_repository.project().path, - &wd_diff, - ); - // assign ownership to the branch - let ownership = hunks_by_filepath.values().flatten().fold( + let ownership = wd_diff.iter().fold( BranchOwnershipClaims::default(), - |mut ownership, hunk| { - ownership.put( - &format!("{}:{}", hunk.file_path.display(), hunk.id) + |mut ownership, (file_path, diff)| { + for hunk in &diff.hunks { + ownership.put( + &format!( + "{}:{}", + file_path.display(), + VirtualBranchHunk::gen_id(hunk.new_start, hunk.new_lines) + ) .parse() .unwrap(), - ); + ); + } ownership }, ); @@ -254,7 +255,7 @@ pub fn set_base_branch( tree: super::write_tree_onto_commit( project_repository, current_head_commit.id(), - &diff::diff_files_into_hunks(wd_diff), + diff::diff_files_into_hunks(wd_diff), )?, ownership, order: 0, @@ -267,7 +268,7 @@ pub fn set_base_branch( set_exclude_decoration(project_repository)?; - super::integration::update_gitbutler_integration(&vb_state, project_repository)?; + update_gitbutler_integration(&vb_state, project_repository)?; let base = target_to_base_branch(project_repository, &target)?; Ok(base) diff --git a/crates/gitbutler-core/src/virtual_branches/branch/ownership.rs b/crates/gitbutler-core/src/virtual_branches/branch/ownership.rs index c064c9f49..b792d5812 100644 --- a/crates/gitbutler-core/src/virtual_branches/branch/ownership.rs +++ b/crates/gitbutler-core/src/virtual_branches/branch/ownership.rs @@ -99,6 +99,7 @@ impl BranchOwnershipClaims { } // modifies the ownership in-place and returns the file ownership that was taken, if any. + // TODO(ST): better pass the necessary parts of `ownership` for flexibility - saves allocations pub fn take(&mut self, ownership: &OwnershipClaim) -> Vec { let mut taken = Vec::new(); let mut remaining = Vec::new(); diff --git a/crates/gitbutler-core/src/virtual_branches/virtual.rs b/crates/gitbutler-core/src/virtual_branches/virtual.rs index 367a6efa6..c0114e352 100644 --- a/crates/gitbutler-core/src/virtual_branches/virtual.rs +++ b/crates/gitbutler-core/src/virtual_branches/virtual.rs @@ -1,4 +1,4 @@ -use std::borrow::Cow; +use std::borrow::{Borrow, Cow}; #[cfg(target_family = "unix")] use std::os::unix::prelude::PermissionsExt; use std::time::SystemTime; @@ -23,7 +23,7 @@ use super::{ }, branch_to_remote_branch, errors, target, RemoteBranch, VirtualBranchesHandle, }; -use crate::git::diff::{diff_files_into_hunks, trees, DiffByPathMap}; +use crate::git::diff::{diff_files_into_hunks, trees, FileDiff}; use crate::id::Id; use crate::virtual_branches::branch::HunkHash; use crate::{ @@ -112,6 +112,7 @@ pub struct VirtualBranchCommit { #[derive(Debug, PartialEq, Clone, Serialize)] #[serde(rename_all = "camelCase")] pub struct VirtualBranchFile { + // TODO(ST): `id` is just `path` as string - UI could adapt and avoid this copy. pub id: String, pub path: PathBuf, pub hunks: Vec, @@ -123,7 +124,7 @@ pub struct VirtualBranchFile { // this struct is a mapping to the view `Hunk` type in Typescript // found in src-tauri/src/routes/repo/[project_id]/types.ts -// it holds a materialized view for presentation purposes of one entry of the +// it holds a materialized view for presentation purposes of one entry of // each hunk in one `Branch.ownership` vector entry in Rust. // an array of them are returned as part of the `VirtualBranchFile` struct // @@ -150,24 +151,28 @@ pub struct VirtualBranchHunk { /// Lifecycle impl VirtualBranchHunk { + pub(crate) fn gen_id(new_start: u32, new_lines: u32) -> String { + format!("{}-{}", new_start, new_start + new_lines) + } fn from_git_hunk( project_path: &Path, - file_path: &Path, - hunk: &GitHunk, + file_path: PathBuf, + hunk: GitHunk, mtimes: &mut MTimeCache, ) -> Self { + let hash = Hunk::hash_diff(hunk.diff_lines.as_ref()); Self { - id: format!("{}-{}", hunk.new_start, hunk.new_start + hunk.new_lines), - modified_at: mtimes.mtime_by_path(project_path.join(file_path)), - file_path: file_path.to_owned(), - diff: hunk.diff_lines.clone(), + id: Self::gen_id(hunk.new_start, hunk.new_lines), + modified_at: mtimes.mtime_by_path(project_path.join(&file_path)), + file_path, + diff: hunk.diff_lines, old_start: hunk.old_start, start: hunk.new_start, end: hunk.new_start + hunk.new_lines, binary: hunk.binary, - hash: Hunk::hash_diff(hunk.diff_lines.as_ref()), + hash, locked: hunk.locked_to.len() > 0, - locked_to: Some(hunk.locked_to.clone()), + locked_to: Some(hunk.locked_to), change_type: hunk.change_type, } } @@ -561,7 +566,7 @@ pub fn unapply_ownership( target_commit.tree().context("failed to get target tree"), |final_tree, status| { let final_tree = final_tree?; - let tree_oid = write_tree(project_repository, &integration_commit_id, &status.1)?; + let tree_oid = write_tree(project_repository, &integration_commit_id, status.1)?; let branch_tree = repo.find_tree(tree_oid)?; let mut result = repo.merge_trees(&base_tree, &final_tree, &branch_tree)?; let final_tree_oid = result.write_tree_to(repo)?; @@ -570,7 +575,7 @@ pub fn unapply_ownership( }, )?; - let final_tree_oid = write_tree_onto_tree(project_repository, &final_tree, &diff)?; + let final_tree_oid = write_tree_onto_tree(project_repository, &final_tree, diff)?; let final_tree = repo .find_tree(final_tree_oid) .context("failed to find tree")?; @@ -728,7 +733,7 @@ pub fn unapply_branch( |final_tree, status| { let final_tree = final_tree?; let branch = status.0; - let tree_oid = write_tree(project_repository, &branch.head, &status.1)?; + let tree_oid = write_tree(project_repository, &branch.head, status.1)?; let branch_tree = repo.find_tree(tree_oid)?; let mut result = repo.merge_trees(&base_tree, &final_tree, &branch_tree)?; let final_tree_oid = result.write_tree_to(repo)?; @@ -804,9 +809,9 @@ pub fn list_virtual_branches( .max() .unwrap_or(-1); - for (branch, files) in &statuses { + for (branch, files) in statuses { let repo = &project_repository.git_repository; - update_conflict_markers(project_repository, files)?; + update_conflict_markers(project_repository, &files)?; let upstream_branch = match branch .upstream @@ -871,7 +876,7 @@ pub fn list_virtual_branches( commit_to_vbranch_commit( project_repository, - branch, + &branch, commit, is_integrated, is_remote, @@ -896,29 +901,29 @@ pub fn list_virtual_branches( .transpose()? .flatten(); - let mut files = diffs_to_virtual_files(project_repository, files); + let mut files = diffs_into_virtual_files(project_repository, files); files.sort_by(|a, b| { branch .ownership .claims .iter() .position(|o| o.file_path.eq(&a.path)) - .unwrap_or(999) + .unwrap_or(usize::MAX) .cmp( &branch .ownership .claims .iter() .position(|id| id.file_path.eq(&b.path)) - .unwrap_or(999), + .unwrap_or(usize::MAX), ) }); - let requires_force = is_requires_force(project_repository, branch)?; + let requires_force = is_requires_force(project_repository, &branch)?; let branch = VirtualBranch { id: branch.id, - name: branch.name.clone(), - notes: branch.notes.clone(), + name: branch.name, + notes: branch.notes, active: branch.applied, files, order: branch.order, @@ -927,11 +932,10 @@ pub fn list_virtual_branches( upstream, upstream_name: branch .upstream - .clone() .and_then(|r| Refname::from(r).branch().map(Into::into)), conflicted: conflicts::is_resolving(project_repository), base_current, - ownership: branch.ownership.clone(), + ownership: branch.ownership, updated_at: branch.updated_timestamp_ms, selected_for_changes: branch.selected_for_changes == Some(max_selected_for_changes), head: branch.head, @@ -1011,14 +1015,10 @@ fn list_virtual_commit_files( &parent_tree, &commit_tree, )?; - let hunks_by_filepath = - virtual_hunks_by_filepath_from_file_diffs(&project_repository.project().path, &diff); - Ok(virtual_hunks_to_virtual_files( + let hunks_by_filepath = virtual_hunks_by_file_diffs(&project_repository.project().path, diff); + Ok(virtual_hunks_into_virtual_files( project_repository, - &hunks_by_filepath - .into_values() - .flatten() - .collect::>(), + hunks_by_filepath, )) } @@ -1582,44 +1582,35 @@ impl MTimeCache { } } -pub fn virtual_hunks_by_filepath( - project_path: &Path, - diff: &BranchStatus, -) -> HashMap> { +pub(super) fn virtual_hunks_by_git_hunks<'a>( + project_path: &'a Path, + diff: impl IntoIterator)> + 'a, +) -> impl Iterator)> + 'a { let mut mtimes = MTimeCache::default(); - diff.iter() - .map(|(file_path, hunks)| { - let hunks = hunks - .iter() - .map(|hunk| { - VirtualBranchHunk::from_git_hunk(project_path, file_path, hunk, &mut mtimes) - }) - .collect::>(); - (file_path.clone(), hunks) - }) - .collect::>() + diff.into_iter().map(move |(file_path, hunks)| { + let hunks = hunks + .into_iter() + .map(|hunk| { + VirtualBranchHunk::from_git_hunk(project_path, file_path.clone(), hunk, &mut mtimes) + }) + .collect::>(); + (file_path, hunks) + }) } -pub fn virtual_hunks_by_filepath_from_file_diffs( - project_path: &Path, - diff: &DiffByPathMap, -) -> HashMap> { - let mut mtimes = MTimeCache::default(); - diff.iter() - .map(|(file_path, file)| { - let hunks = file - .hunks - .iter() - .map(|hunk| { - VirtualBranchHunk::from_git_hunk(project_path, file_path, hunk, &mut mtimes) - }) - .collect::>(); - (file_path.clone(), hunks) - }) - .collect::>() +pub fn virtual_hunks_by_file_diffs<'a>( + project_path: &'a Path, + diff: impl IntoIterator + 'a, +) -> impl Iterator)> + 'a { + virtual_hunks_by_git_hunks( + project_path, + diff.into_iter() + .map(move |(file_path, file)| (file_path, file.hunks)), + ) } pub type BranchStatus = HashMap>; +pub type VirtualBranchHunksByPathMap = HashMap>; // list the virtual branches and their file statuses (statusi?) #[allow(clippy::type_complexity)] @@ -1700,7 +1691,7 @@ fn get_non_applied_status( let diff = diff::trees(&project_repository.git_repository, &head_tree, &branch_tree)?; - Ok((branch, diff::diff_files_into_hunks(diff))) + Ok((branch, diff::diff_files_into_hunks(diff).collect())) }) .collect::>>() } @@ -1722,7 +1713,7 @@ fn get_applied_status( skipped_files.push(file_diff.clone()); } } - let mut base_diffs = diff_files_into_hunks(base_file_diffs); + let mut base_diffs: HashMap<_, _> = diff_files_into_hunks(base_file_diffs).collect(); // sort by order, so that the default branch is first (left in the ui) virtual_branches.sort_by(|a, b| a.order.cmp(&b.order)); @@ -1982,31 +1973,29 @@ fn get_applied_status( Ok((hunks_by_branch, skipped_files)) } -fn virtual_hunks_to_virtual_files( +/// NOTE: There is no use returning an iterator here as this acts like the final product. +fn virtual_hunks_into_virtual_files( project_repository: &project_repository::Repository, - hunks: &[VirtualBranchHunk], + hunks: impl IntoIterator)>, ) -> Vec { hunks - .iter() - .fold(HashMap::>::new(), |mut acc, hunk| { - acc.entry(hunk.file_path.clone()) - .or_default() - .push(hunk.clone()); - acc - }) .into_iter() - .map(|(file_path, hunks)| VirtualBranchFile { - id: file_path.display().to_string(), - path: file_path.clone(), - hunks: hunks.clone(), - binary: hunks.iter().any(|h| h.binary), - large: false, - modified_at: hunks.iter().map(|h| h.modified_at).max().unwrap_or(0), - conflicted: conflicts::is_conflicting( - project_repository, - Some(&file_path.display().to_string()), - ) - .unwrap_or(false), + .map(|(path, hunks)| { + let id = path.display().to_string(); + let conflicted = + conflicts::is_conflicting(project_repository, Some(&id)).unwrap_or(false); + let binary = hunks.iter().any(|h| h.binary); + let modified_at = hunks.iter().map(|h| h.modified_at).max().unwrap_or(0); + debug_assert!(hunks.iter().all(|hunk| hunk.file_path == path)); + VirtualBranchFile { + id, + path, + hunks, + binary, + large: false, + modified_at, + conflicted, + } }) .collect::>() } @@ -2096,19 +2085,12 @@ pub fn reset_branch( Ok(()) } -fn diffs_to_virtual_files( +fn diffs_into_virtual_files( project_repository: &project_repository::Repository, - diffs: &BranchStatus, + diffs: BranchStatus, ) -> Vec { - let hunks_by_filepath = virtual_hunks_by_filepath(&project_repository.project().path, diffs); - virtual_hunks_to_virtual_files( - project_repository, - &hunks_by_filepath - .values() - .flatten() - .cloned() - .collect::>(), - ) + let hunks_by_filepath = virtual_hunks_by_git_hunks(&project_repository.project().path, diffs); + virtual_hunks_into_virtual_files(project_repository, hunks_by_filepath) } // this function takes a list of file ownership, @@ -2117,7 +2099,7 @@ fn diffs_to_virtual_files( pub fn write_tree( project_repository: &project_repository::Repository, target: &git::Oid, - files: &BranchStatus, + files: impl IntoIterator, impl Borrow>)>, ) -> Result { write_tree_onto_commit(project_repository, *target, files) } @@ -2125,7 +2107,7 @@ pub fn write_tree( pub fn write_tree_onto_commit( project_repository: &project_repository::Repository, commit_oid: git::Oid, - files: &BranchStatus, + files: impl IntoIterator, impl Borrow>)>, ) -> Result { // read the base sha into an index let git_repository = &project_repository.git_repository; @@ -2139,12 +2121,14 @@ pub fn write_tree_onto_commit( pub fn write_tree_onto_tree( project_repository: &project_repository::Repository, base_tree: &git::Tree, - files: &BranchStatus, + files: impl IntoIterator, impl Borrow>)>, ) -> Result { let git_repository = &project_repository.git_repository; let mut builder = git_repository.treebuilder(Some(base_tree)); // now update the index with content in the working directory for each file for (rel_path, hunks) in files { + let rel_path = rel_path.borrow(); + let hunks = hunks.borrow(); let full_path = project_repository.path().join(rel_path); let is_submodule = full_path.is_dir() @@ -2224,7 +2208,7 @@ pub fn write_tree_onto_tree( let blob_contents = blob.content(); - let mut hunks = hunks.clone(); + let mut hunks = hunks.iter().collect::>(); hunks.sort_by_key(|hunk| hunk.new_start); let mut all_diffs = BString::default(); for hunk in hunks { @@ -2246,7 +2230,7 @@ pub fn write_tree_onto_tree( } else if is_submodule { let mut blob_contents = BString::default(); - let mut hunks = hunks.clone(); + let mut hunks = hunks.iter().collect::>(); hunks.sort_by_key(|hunk| hunk.new_start); for hunk in hunks { let patch = Patch::from_bytes(&hunk.diff_lines)?; @@ -2339,11 +2323,11 @@ pub fn commit( let integration_commit_id = super::integration::get_workspace_head(&vb_state, project_repository)?; // get the files to commit - let (mut statuses, _) = get_status_by_branch(project_repository, Some(&integration_commit_id)) + let (statuses, _) = get_status_by_branch(project_repository, Some(&integration_commit_id)) .context("failed to get status by branch")?; let (ref mut branch, files) = statuses - .iter_mut() + .into_iter() .find(|(branch, _)| branch.id == *branch_id) .ok_or_else(|| { errors::CommitError::BranchNotFound(errors::BranchNotFound { @@ -2352,7 +2336,7 @@ pub fn commit( }) })?; - update_conflict_markers(project_repository, files)?; + update_conflict_markers(project_repository, &files)?; if conflicts::is_conflicting::<&Path>(project_repository, None)? { return Err(errors::CommitError::Conflicted(errors::ProjectConflict { @@ -2361,33 +2345,29 @@ pub fn commit( } let tree_oid = if let Some(ownership) = ownership { - let files = files - .iter() - .filter_map(|(filepath, hunks)| { - let hunks = hunks - .iter() - .filter(|hunk| { - ownership - .claims - .iter() - .find(|f| f.file_path.eq(filepath)) - .map_or(false, |f| { - f.hunks.iter().any(|h| { - h.start == hunk.new_start - && h.end == hunk.new_start + hunk.new_lines - }) + let files = files.into_iter().filter_map(|(filepath, hunks)| { + let hunks = hunks + .into_iter() + .filter(|hunk| { + ownership + .claims + .iter() + .find(|f| f.file_path.eq(&filepath)) + .map_or(false, |f| { + f.hunks.iter().any(|h| { + h.start == hunk.new_start + && h.end == hunk.new_start + hunk.new_lines }) - }) - .cloned() - .collect::>(); - if hunks.is_empty() { - None - } else { - Some((filepath.clone(), hunks)) - } - }) - .collect::>(); - write_tree_onto_commit(project_repository, branch.head, &files)? + }) + }) + .collect::>(); + if hunks.is_empty() { + None + } else { + Some((filepath, hunks)) + } + }); + write_tree_onto_commit(project_repository, branch.head, files)? } else { write_tree_onto_commit(project_repository, branch.head, files)? }; @@ -2836,7 +2816,7 @@ pub fn amend( } let new_tree_oid = - write_tree_onto_commit(project_repository, target_branch.head, &diffs_to_amend)?; + write_tree_onto_commit(project_repository, target_branch.head, diffs_to_amend)?; let new_tree = project_repository .git_repository .find_tree(new_tree_oid) @@ -3475,7 +3455,7 @@ pub fn move_commit( &source_branch_head_tree, )?; - let branch_head_diff = diff::diff_files_into_hunks(branch_head_diff); + let branch_head_diff: HashMap<_, _> = diff::diff_files_into_hunks(branch_head_diff).collect(); let is_source_locked = source_branch_non_comitted_files .iter() .any(|(path, hunks)| { @@ -3539,7 +3519,7 @@ pub fn move_commit( let new_destination_tree_oid = write_tree_onto_commit( project_repository, destination_branch.head, - &branch_head_diff, + branch_head_diff, ) .context("failed to write tree onto commit")?; let new_destination_tree = project_repository @@ -3667,20 +3647,22 @@ pub fn create_virtual_branch_from_branch( &head_commit_tree, ) .context("failed to diff trees")?; - let diff = diff::diff_files_into_hunks(diff); - - let hunks_by_filepath = - super::virtual_hunks_by_filepath(&project_repository.project().path, &diff); // assign ownership to the branch - let ownership = hunks_by_filepath.values().flatten().fold( + let ownership = diff.iter().fold( branch::BranchOwnershipClaims::default(), - |mut ownership, hunk| { - ownership.put( - &format!("{}:{}", hunk.file_path.display(), hunk.id) + |mut ownership, (file_path, file)| { + for hunk in &file.hunks { + ownership.put( + &format!( + "{}:{}", + file_path.display(), + VirtualBranchHunk::gen_id(hunk.new_start, hunk.new_lines) + ) .parse() .unwrap(), - ); + ); + } ownership }, );