mirror of
https://github.com/gitbutlerapp/gitbutler.git
synced 2024-12-23 09:33:01 +03:00
Exclude big files when performing a worktree diff.
This was lost previously when switching it over to a read-only implementation. Implementing it with an ignore list will take time, 400ms in the GitLab repository, but it's not slower than it was before and it is always preferred to not dump objects into the ODB unnecessarily.
This commit is contained in:
parent
a0e236110a
commit
79798c7407
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -2211,6 +2211,7 @@ name = "gitbutler-command-context"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bstr",
|
||||
"git2",
|
||||
"gitbutler-project",
|
||||
"gix",
|
||||
@ -2374,6 +2375,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"git2",
|
||||
"gitbutler-branch",
|
||||
"gitbutler-command-context",
|
||||
"gitbutler-diff",
|
||||
"gitbutler-fs",
|
||||
"gitbutler-project",
|
||||
|
@ -47,16 +47,16 @@ pub fn get_applied_status_cached(
|
||||
worktree_changes: Option<gitbutler_diff::DiffByPathMap>,
|
||||
) -> Result<VirtualBranchesStatus> {
|
||||
assure_open_workspace_mode(ctx).context("ng applied status requires open workspace mode")?;
|
||||
// TODO(ST): this was `get_workspace_head()`, which is slow and ideally, we don't dynamically
|
||||
// calculate which should already be 'fixed' - why do we have the integration branch
|
||||
// if we can't assume it's in the right state? So ideally, we assure that the code
|
||||
// that affects the integration branch also updates it?
|
||||
let integration_commit_id = ctx.repository().head_commit()?.id();
|
||||
let mut virtual_branches = ctx
|
||||
.project()
|
||||
.virtual_branches()
|
||||
.list_branches_in_workspace()?;
|
||||
let base_file_diffs = worktree_changes.map(Ok).unwrap_or_else(|| {
|
||||
// TODO(ST): this was `get_workspace_head()`, which is slow and ideally, we don't dynamically
|
||||
// calculate which should already be 'fixed' - why do we have the integration branch
|
||||
// if we can't assume it's in the right state? So ideally, we assure that the code
|
||||
// that affects the integration branch also updates it?
|
||||
let integration_commit_id = ctx.repository().head_commit()?.id();
|
||||
gitbutler_diff::workdir(ctx.repository(), &integration_commit_id.to_owned())
|
||||
.context("failed to diff workdir")
|
||||
})?;
|
||||
|
@ -12,3 +12,4 @@ gix.workspace = true
|
||||
tracing.workspace = true
|
||||
gitbutler-project.workspace = true
|
||||
itertools = "0.13"
|
||||
bstr = "1.10.0"
|
||||
|
@ -98,3 +98,6 @@ impl CommandContext {
|
||||
)?)
|
||||
}
|
||||
}
|
||||
|
||||
mod repository_ext;
|
||||
pub use repository_ext::RepositoryExtLite;
|
||||
|
50
crates/gitbutler-command-context/src/repository_ext.rs
Normal file
50
crates/gitbutler-command-context/src/repository_ext.rs
Normal file
@ -0,0 +1,50 @@
|
||||
use anyhow::{Context, Result};
|
||||
use gix::bstr::{BString, ByteVec};
|
||||
use tracing::instrument;
|
||||
|
||||
/// An extension trait that should avoid pulling in large amounts of dependency so it can be used
|
||||
/// in more places without causing cycles.
|
||||
/// `gitbutler_repo::RepositoryExt` may not be usable everywhere due to that.
|
||||
pub trait RepositoryExtLite {
|
||||
/// Exclude files that are larger than `limit_in_bytes` (eg. database.sql which may never be intended to be committed)
|
||||
/// so they don't show up in the next diff.
|
||||
fn ignore_large_files_in_diffs(&self, limit_in_bytes: u64) -> Result<()>;
|
||||
}
|
||||
|
||||
impl RepositoryExtLite for git2::Repository {
|
||||
#[instrument(level = tracing::Level::DEBUG, skip(self), err(Debug))]
|
||||
fn ignore_large_files_in_diffs(&self, limit_in_bytes: u64) -> Result<()> {
|
||||
use gix::bstr::ByteSlice;
|
||||
let repo = gix::open(self.path())?;
|
||||
let worktree_dir = repo
|
||||
.work_dir()
|
||||
.context("All repos are expected to have a worktree")?;
|
||||
let files_to_exclude: Vec<_> = repo
|
||||
.dirwalk_iter(
|
||||
repo.index_or_empty()?,
|
||||
None::<BString>,
|
||||
Default::default(),
|
||||
repo.dirwalk_options()?
|
||||
.emit_ignored(None)
|
||||
.emit_pruned(false)
|
||||
.emit_untracked(gix::dir::walk::EmissionMode::Matching),
|
||||
)?
|
||||
.filter_map(Result::ok)
|
||||
.filter_map(|item| {
|
||||
let path = worktree_dir.join(gix::path::from_bstr(item.entry.rela_path.as_bstr()));
|
||||
let file_is_too_large = path
|
||||
.metadata()
|
||||
.map_or(false, |md| md.is_file() && md.len() > limit_in_bytes);
|
||||
file_is_too_large
|
||||
.then(|| Vec::from(item.entry.rela_path).into_string().ok())
|
||||
.flatten()
|
||||
})
|
||||
.collect();
|
||||
// TODO(ST): refactor this to be path-safe and ' ' save - the returned list is space separated (!!)
|
||||
// Just make sure this isn't needed anymore.
|
||||
let ignore_list = files_to_exclude.join(" ");
|
||||
// In-memory, libgit2 internal ignore rule
|
||||
self.add_ignore_rule(&ignore_list)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -3,6 +3,7 @@ use std::{borrow::Cow, collections::HashMap, path::PathBuf, str};
|
||||
use anyhow::{Context, Result};
|
||||
use bstr::{BStr, BString, ByteSlice, ByteVec};
|
||||
use gitbutler_cherry_pick::RepositoryExt;
|
||||
use gitbutler_command_context::RepositoryExtLite;
|
||||
use gitbutler_serde::BStringForFrontend;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::instrument;
|
||||
@ -155,8 +156,8 @@ pub fn workdir(repo: &git2::Repository, commit_oid: &git2::Oid) -> Result<DiffBy
|
||||
for conflict_path_to_resolve in paths_to_add {
|
||||
index.add_path(conflict_path_to_resolve.as_ref())?;
|
||||
}
|
||||
repo.ignore_large_files_in_diffs(50_000_000)?;
|
||||
let diff = repo.diff_tree_to_workdir_with_index(Some(&old_tree), Some(&mut diff_opts))?;
|
||||
// TODO(ST): bring back support for skipped (large) files.
|
||||
hunks_by_filepath(Some(repo), &diff)
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@ strum = { version = "0.26", features = ["derive"] }
|
||||
tracing.workspace = true
|
||||
gix = { workspace = true, features = ["dirwalk", "credentials", "parallel"] }
|
||||
toml.workspace = true
|
||||
gitbutler-command-context.workspace = true
|
||||
gitbutler-project.workspace = true
|
||||
gitbutler-branch.workspace = true
|
||||
gitbutler-serde.workspace = true
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::{
|
||||
collections::{hash_map::Entry, HashMap},
|
||||
fs,
|
||||
path::{Path, PathBuf},
|
||||
path::PathBuf,
|
||||
str::{from_utf8, FromStr},
|
||||
time::Duration,
|
||||
};
|
||||
@ -9,13 +9,13 @@ use std::{
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use git2::{DiffOptions, FileMode};
|
||||
use gitbutler_branch::{Branch, SignaturePurpose, VirtualBranchesHandle, VirtualBranchesState};
|
||||
use gitbutler_command_context::RepositoryExtLite;
|
||||
use gitbutler_diff::{hunks_by_filepath, FileDiff};
|
||||
use gitbutler_project::{
|
||||
access::{WorktreeReadPermission, WorktreeWritePermission},
|
||||
Project,
|
||||
};
|
||||
use gitbutler_repo::RepositoryExt;
|
||||
use gix::bstr::{BString, ByteSlice, ByteVec};
|
||||
use tracing::instrument;
|
||||
|
||||
use super::{
|
||||
@ -286,11 +286,7 @@ impl OplogExt for Project {
|
||||
let old_wd_tree_id = tree_from_applied_vbranches(&repo, commit.parent(0)?.id())?;
|
||||
let old_wd_tree = repo.find_tree(old_wd_tree_id)?;
|
||||
|
||||
// Exclude files that are larger than the limit (eg. database.sql which may never be intended to be committed)
|
||||
let files_to_exclude =
|
||||
worktree_files_larger_than_limit_as_git2_ignore_rule(&repo, worktree_dir)?;
|
||||
// In-memory, libgit2 internal ignore rule
|
||||
repo.add_ignore_rule(&files_to_exclude)?;
|
||||
repo.ignore_large_files_in_diffs(SNAPSHOT_FILE_LIMIT_BYTES)?;
|
||||
|
||||
let mut diff_opts = git2::DiffOptions::new();
|
||||
diff_opts
|
||||
@ -579,11 +575,7 @@ fn restore_snapshot(
|
||||
let workdir_tree_id = tree_from_applied_vbranches(&repo, snapshot_commit_id)?;
|
||||
let workdir_tree = repo.find_tree(workdir_tree_id)?;
|
||||
|
||||
// Exclude files that are larger than the limit (eg. database.sql which may never be intended to be committed)
|
||||
let files_to_exclude =
|
||||
worktree_files_larger_than_limit_as_git2_ignore_rule(&repo, worktree_dir)?;
|
||||
// In-memory, libgit2 internal ignore rule
|
||||
repo.add_ignore_rule(&files_to_exclude)?;
|
||||
repo.ignore_large_files_in_diffs(SNAPSHOT_FILE_LIMIT_BYTES)?;
|
||||
|
||||
// Define the checkout builder
|
||||
let mut checkout_builder = git2::build::CheckoutBuilder::new();
|
||||
@ -711,38 +703,6 @@ fn write_conflicts_tree(
|
||||
Ok(conflicts_tree)
|
||||
}
|
||||
|
||||
/// Exclude files that are larger than the limit (eg. database.sql which may never be intended to be committed)
|
||||
/// TODO(ST): refactor this to be path-safe and ' ' save - the returned list is space separated (!!)
|
||||
#[instrument(level = tracing::Level::DEBUG, skip(repo), err(Debug))]
|
||||
fn worktree_files_larger_than_limit_as_git2_ignore_rule(
|
||||
repo: &git2::Repository,
|
||||
worktree_dir: &Path,
|
||||
) -> Result<String> {
|
||||
let repo = gix::open(repo.path())?;
|
||||
let files_to_exclude: Vec<_> = repo
|
||||
.dirwalk_iter(
|
||||
repo.index_or_empty()?,
|
||||
None::<BString>,
|
||||
Default::default(),
|
||||
repo.dirwalk_options()?
|
||||
.emit_ignored(None)
|
||||
.emit_pruned(false)
|
||||
.emit_untracked(gix::dir::walk::EmissionMode::Matching),
|
||||
)?
|
||||
.filter_map(Result::ok)
|
||||
.filter_map(|item| {
|
||||
let path = worktree_dir.join(gix::path::from_bstr(item.entry.rela_path.as_bstr()));
|
||||
let file_is_too_large = path.metadata().map_or(false, |md| {
|
||||
md.is_file() && md.len() > SNAPSHOT_FILE_LIMIT_BYTES
|
||||
});
|
||||
file_is_too_large
|
||||
.then(|| Vec::from(item.entry.rela_path).into_string().ok())
|
||||
.flatten()
|
||||
})
|
||||
.collect();
|
||||
Ok(files_to_exclude.join(" "))
|
||||
}
|
||||
|
||||
/// Returns the number of lines of code (added + removed) since the last snapshot in `project`.
|
||||
/// Includes untracked files.
|
||||
/// `repo` is an already opened project repository.
|
||||
@ -752,13 +712,7 @@ fn lines_since_snapshot(project: &Project, repo: &git2::Repository) -> Result<us
|
||||
// This looks at the diff between the tree of the currently selected as 'default' branch (where new changes go)
|
||||
// and that same tree in the last snapshot. For some reason, comparing workdir to the workdir subree from
|
||||
// the snapshot simply does not give us what we need here, so instead using tree to tree comparison.
|
||||
let worktree_dir = project.path.as_path();
|
||||
|
||||
// Exclude files that are larger than the limit (eg. database.sql which may never be intended to be committed)
|
||||
let files_to_exclude =
|
||||
worktree_files_larger_than_limit_as_git2_ignore_rule(repo, worktree_dir)?;
|
||||
// In-memory, libgit2 internal ignore rule
|
||||
repo.add_ignore_rule(&files_to_exclude)?;
|
||||
repo.ignore_large_files_in_diffs(SNAPSHOT_FILE_LIMIT_BYTES)?;
|
||||
|
||||
let oplog_state = OplogHandle::new(&project.gb_dir());
|
||||
let Some(oplog_commit_id) = oplog_state.oplog_head()? else {
|
||||
|
Loading…
Reference in New Issue
Block a user