From c2a5014ed025c3600e5a08f8a1d7a74d818d3667 Mon Sep 17 00:00:00 2001 From: Kiril Videlov Date: Thu, 23 May 2024 21:40:07 +0200 Subject: [PATCH 1/3] oplog state persists timestamp of last snapshot --- crates/gitbutler-core/src/ops/state.rs | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/crates/gitbutler-core/src/ops/state.rs b/crates/gitbutler-core/src/ops/state.rs index 4697216fd..f1775d71c 100644 --- a/crates/gitbutler-core/src/ops/state.rs +++ b/crates/gitbutler-core/src/ops/state.rs @@ -1,8 +1,9 @@ -use anyhow::Result; +use anyhow::{Context, Result}; use std::{ fs::File, io::Read, path::{Path, PathBuf}, + time::Duration, }; use serde::{Deserialize, Serialize}; @@ -14,6 +15,9 @@ use super::OPLOG_FILE_NAME; pub struct Oplog { /// This is the sha of the last oplog commit pub head_sha: Option, + /// The time when the last snapshot was created. Seconds since Epoch + #[serde(default)] + pub modified_at: u64, } pub struct OplogHandle { @@ -34,7 +38,7 @@ impl OplogHandle { pub fn set_oplog_head(&self, sha: String) -> Result<()> { let mut oplog = self.read_file()?; oplog.head_sha = Some(sha); - self.write_file(&oplog)?; + self.write_file(oplog)?; Ok(()) } @@ -46,6 +50,14 @@ impl OplogHandle { Ok(oplog.head_sha) } + /// Gets the time when the last snapshot was created. + /// + /// Errors if the file cannot be read or written. + pub fn get_modified_at(&self) -> anyhow::Result { + let oplog = self.read_file()?; + Ok(Duration::from_secs(oplog.modified_at)) + } + /// Reads and parses the state file. /// /// If the file does not exist, it will be created. @@ -64,8 +76,13 @@ impl OplogHandle { Ok(oplog) } - fn write_file(&self, oplog: &Oplog) -> anyhow::Result<()> { - write(self.file_path.as_path(), oplog) + fn write_file(&self, oplog: Oplog) -> anyhow::Result<()> { + let mut oplog = oplog; + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .context("failed to get time since epoch")?; + oplog.modified_at = now.as_secs(); + write(self.file_path.as_path(), &oplog) } } From db4bb914cc3e85b3c1b8a55fa09accb8be5626df Mon Sep 17 00:00:00 2001 From: Kiril Videlov Date: Thu, 23 May 2024 22:17:38 +0200 Subject: [PATCH 2/3] improve auto snapshotting check performance --- crates/gitbutler-core/src/ops/oplog.rs | 146 ++++++++++++-------- crates/gitbutler-watcher/src/handler/mod.rs | 3 +- 2 files changed, 88 insertions(+), 61 deletions(-) diff --git a/crates/gitbutler-core/src/ops/oplog.rs b/crates/gitbutler-core/src/ops/oplog.rs index 49f0091e5..722ab0758 100644 --- a/crates/gitbutler-core/src/ops/oplog.rs +++ b/crates/gitbutler-core/src/ops/oplog.rs @@ -1,4 +1,4 @@ -use anyhow::anyhow; +use anyhow::{anyhow, Context}; use git2::FileMode; use itertools::Itertools; use std::collections::HashMap; @@ -53,10 +53,17 @@ pub trait Oplog { /// If there are files that are untracked and larger than SNAPSHOT_FILE_LIMIT_BYTES, they are excluded from snapshot creation and restoring. /// Returns the sha of the created revert snapshot commit or None if snapshots are disabled. fn restore_snapshot(&self, sha: String) -> Result>; - /// Returns the number of lines of code (added plus removed) since the last snapshot. Includes untracked files. + /// Determines if a new snapshot should be created due to file changes being created since the last snapshot. + /// The needs for the automatic snapshotting are: + /// - It needs to facilitate backup of work in progress code + /// - The snapshots should not be too frequent or small - both for UX and performance reasons + /// - Checking if an automatic snapshot is needed should be fast and efficient since it is called on filesystem events /// - /// If there are no snapshots, 0 is returned. - fn lines_since_snapshot(&self) -> Result; + /// This implementation works as follows: + /// - If it's been more than 5 minutes since the last snapshot, + /// check the sum of added and removed lines since the last snapshot, otherwise return false. + /// - If the sum of added and removed lines is greater than a configured threshold, return true, otherwise return false. + fn should_auto_snapshot(&self) -> Result; /// Returns the diff of the snapshot and it's parent. It only includes the workdir changes. /// /// This is useful to show what has changed in this particular snapshot @@ -539,63 +546,21 @@ impl Oplog for Project { self.create_snapshot(details) } - // This looks at the diff between the tree of the currenly selected as 'default' branch (where new changes go) - // and that same tree in the last snapshot. For some reason, comparing workdir to the workdir subree from - // the snapshot simply does not give us what we need here, so instead using tree to tree comparison. - fn lines_since_snapshot(&self) -> Result { - let repo_path = self.path.as_path(); - let repo = git2::Repository::init(repo_path)?; - - // Exclude files that are larger than the limit (eg. database.sql which may never be intended to be committed) - let files_to_exclude = get_exclude_list(&repo)?; - // In-memory, libgit2 internal ignore rule - repo.add_ignore_rule(&files_to_exclude)?; - + fn should_auto_snapshot(&self) -> Result { let oplog_state = OplogHandle::new(&self.gb_dir()); - let head_sha = oplog_state.get_oplog_head()?; - if head_sha.is_none() { - return Ok(0); + let last_snapshot_time = oplog_state.get_modified_at().unwrap_or_default(); + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .context("failed to get time since epoch")?; + if now - last_snapshot_time < Duration::from_secs(300) { + return Ok(false); + } else { + let changed_lines = lines_since_snapshot(self)?; + if changed_lines > self.snapshot_lines_threshold() { + return Ok(true); + } } - let head_sha = head_sha.unwrap(); - - let vb_state = self.virtual_branches(); - let binding = vb_state.list_branches()?; - let active_branch = binding - .iter() - .filter(|b| b.applied) - .max_by_key(|branch| branch.selected_for_changes.unwrap_or(i64::MIN)); - if active_branch.is_none() { - return Ok(0); - } - let active_branch = active_branch.unwrap(); - let active_branch_tree = repo.find_tree(active_branch.tree.into())?; - - let commit = repo.find_commit(git2::Oid::from_str(&head_sha)?)?; - let head_tree = commit.tree()?; - let virtual_branches = head_tree - .get_name("virtual_branches") - .ok_or(anyhow!("failed to get virtual_branches tree entry"))?; - let virtual_branches = repo.find_tree(virtual_branches.id())?; - let old_active_branch = virtual_branches - .get_name(active_branch.id.to_string().as_str()) - .ok_or(anyhow!("failed to get active branch from tree entry"))?; - let old_active_branch = repo.find_tree(old_active_branch.id())?; - let old_active_branch_tree = old_active_branch - .get_name("tree") - .ok_or(anyhow!("failed to get integration tree entry"))?; - let old_active_branch_tree = repo.find_tree(old_active_branch_tree.id())?; - - let mut opts = git2::DiffOptions::new(); - opts.include_untracked(true); - opts.ignore_submodules(true); - - let diff = repo.diff_tree_to_tree( - Some(&active_branch_tree), - Some(&old_active_branch_tree), - Some(&mut opts), - ); - let stats = diff?.stats()?; - Ok(stats.deletions() + stats.insertions()) + Ok(false) } fn snapshot_diff(&self, sha: String) -> Result> { @@ -734,3 +699,66 @@ fn get_exclude_list(repo: &git2::Repository) -> Result { .join(" "); Ok(files_to_exclude) } + +/// Returns the number of lines of code (added plus removed) since the last snapshot. Includes untracked files. +/// +/// If there are no snapshots, 0 is returned. +fn lines_since_snapshot(project: &Project) -> Result { + // This looks at the diff between the tree of the currenly selected as 'default' branch (where new changes go) + // and that same tree in the last snapshot. For some reason, comparing workdir to the workdir subree from + // the snapshot simply does not give us what we need here, so instead using tree to tree comparison. + + let repo_path = project.path.as_path(); + let repo = git2::Repository::init(repo_path)?; + + // Exclude files that are larger than the limit (eg. database.sql which may never be intended to be committed) + let files_to_exclude = get_exclude_list(&repo)?; + // In-memory, libgit2 internal ignore rule + repo.add_ignore_rule(&files_to_exclude)?; + + let oplog_state = OplogHandle::new(&project.gb_dir()); + let head_sha = oplog_state.get_oplog_head()?; + if head_sha.is_none() { + return Ok(0); + } + let head_sha = head_sha.unwrap(); + + let vb_state = project.virtual_branches(); + let binding = vb_state.list_branches()?; + let active_branch = binding + .iter() + .filter(|b| b.applied) + .max_by_key(|branch| branch.selected_for_changes.unwrap_or(i64::MIN)); + if active_branch.is_none() { + return Ok(0); + } + let active_branch = active_branch.unwrap(); + let active_branch_tree = repo.find_tree(active_branch.tree.into())?; + + let commit = repo.find_commit(git2::Oid::from_str(&head_sha)?)?; + let head_tree = commit.tree()?; + let virtual_branches = head_tree + .get_name("virtual_branches") + .ok_or(anyhow!("failed to get virtual_branches tree entry"))?; + let virtual_branches = repo.find_tree(virtual_branches.id())?; + let old_active_branch = virtual_branches + .get_name(active_branch.id.to_string().as_str()) + .ok_or(anyhow!("failed to get active branch from tree entry"))?; + let old_active_branch = repo.find_tree(old_active_branch.id())?; + let old_active_branch_tree = old_active_branch + .get_name("tree") + .ok_or(anyhow!("failed to get integration tree entry"))?; + let old_active_branch_tree = repo.find_tree(old_active_branch_tree.id())?; + + let mut opts = git2::DiffOptions::new(); + opts.include_untracked(true); + opts.ignore_submodules(true); + + let diff = repo.diff_tree_to_tree( + Some(&active_branch_tree), + Some(&old_active_branch_tree), + Some(&mut opts), + ); + let stats = diff?.stats()?; + Ok(stats.deletions() + stats.insertions()) +} diff --git a/crates/gitbutler-watcher/src/handler/mod.rs b/crates/gitbutler-watcher/src/handler/mod.rs index 9ade2d85e..e50a500e9 100644 --- a/crates/gitbutler-watcher/src/handler/mod.rs +++ b/crates/gitbutler-watcher/src/handler/mod.rs @@ -122,8 +122,7 @@ impl Handler { .projects .get(&project_id) .context("failed to get project")?; - let changed_lines = project.lines_since_snapshot()?; - if changed_lines > project.snapshot_lines_threshold() { + if project.should_auto_snapshot().unwrap_or_default() { project.create_snapshot(SnapshotDetails::new(OperationType::FileChanges))?; } Ok(()) From 879005ae204c1e8aa8a8f328d4a7cdaab216ca6c Mon Sep 17 00:00:00 2001 From: Kiril Videlov Date: Thu, 23 May 2024 22:37:10 +0200 Subject: [PATCH 3/3] check lines since snapshot for all dirty branches This is needed because new changes to a file may be locked to a branch which is not the "default" one --- crates/gitbutler-core/src/ops/oplog.rs | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/crates/gitbutler-core/src/ops/oplog.rs b/crates/gitbutler-core/src/ops/oplog.rs index 722ab0758..a76b58f8b 100644 --- a/crates/gitbutler-core/src/ops/oplog.rs +++ b/crates/gitbutler-core/src/ops/oplog.rs @@ -9,6 +9,7 @@ use std::{fs, path::PathBuf}; use anyhow::Result; use crate::git::diff::FileDiff; +use crate::virtual_branches::Branch; use crate::{git::diff::hunks_by_filepath, projects::Project}; use super::{ @@ -725,15 +726,26 @@ fn lines_since_snapshot(project: &Project) -> Result { let vb_state = project.virtual_branches(); let binding = vb_state.list_branches()?; - let active_branch = binding + + let dirty_branches: Vec<&Branch> = binding .iter() .filter(|b| b.applied) - .max_by_key(|branch| branch.selected_for_changes.unwrap_or(i64::MIN)); - if active_branch.is_none() { - return Ok(0); + .filter(|b| !b.ownership.claims.is_empty()) + .collect(); + + let mut lines_changed = 0; + for branch in dirty_branches { + lines_changed += branch_lines_since_snapshot(branch, &repo, head_sha.clone())?; } - let active_branch = active_branch.unwrap(); - let active_branch_tree = repo.find_tree(active_branch.tree.into())?; + Ok(lines_changed) +} + +fn branch_lines_since_snapshot( + branch: &Branch, + repo: &git2::Repository, + head_sha: String, +) -> Result { + let active_branch_tree = repo.find_tree(branch.tree.into())?; let commit = repo.find_commit(git2::Oid::from_str(&head_sha)?)?; let head_tree = commit.tree()?; @@ -742,7 +754,7 @@ fn lines_since_snapshot(project: &Project) -> Result { .ok_or(anyhow!("failed to get virtual_branches tree entry"))?; let virtual_branches = repo.find_tree(virtual_branches.id())?; let old_active_branch = virtual_branches - .get_name(active_branch.id.to_string().as_str()) + .get_name(branch.id.to_string().as_str()) .ok_or(anyhow!("failed to get active branch from tree entry"))?; let old_active_branch = repo.find_tree(old_active_branch.id())?; let old_active_branch_tree = old_active_branch