From d586a40ada06c95422f5cb3d3ff6a9d791bc2fbb Mon Sep 17 00:00:00 2001 From: Jun Wu Date: Thu, 27 Aug 2020 18:24:10 -0700 Subject: [PATCH] hgcommands: add debugfsync Summary: The `debugfsync` command calls fsync on newly modified files in svfs. Right now it only includes locations that we know have constant number of files. The fsync logic is put in a separate crate to avoid slow compiles. Reviewed By: DurhamG Differential Revision: D23124169 fbshipit-source-id: 438296002eed14db599d6ec225183bf824096940 --- eden/scm/edenscm/hgext/debugshell.py | 3 + eden/scm/lib/fsyncglob/Cargo.toml | 11 ++ eden/scm/lib/fsyncglob/src/lib.rs | 138 ++++++++++++++++++ eden/scm/lib/hgcommands/Cargo.toml | 1 + eden/scm/lib/hgcommands/src/commands.rs | 1 + eden/scm/lib/hgcommands/src/commands/debug.rs | 1 + .../hgcommands/src/commands/debug/fsync.rs | 31 ++++ eden/scm/tests/test-completion.t | 2 + eden/scm/tests/test-help.t | 1 + 9 files changed, 189 insertions(+) create mode 100644 eden/scm/lib/fsyncglob/Cargo.toml create mode 100644 eden/scm/lib/fsyncglob/src/lib.rs create mode 100644 eden/scm/lib/hgcommands/src/commands/debug/fsync.rs diff --git a/eden/scm/edenscm/hgext/debugshell.py b/eden/scm/edenscm/hgext/debugshell.py index 29d000444f..a511c6cc03 100644 --- a/eden/scm/edenscm/hgext/debugshell.py +++ b/eden/scm/edenscm/hgext/debugshell.py @@ -175,6 +175,9 @@ def _configipython(ui, ipython): """Set up IPython features like magics""" from IPython.core.magic import register_line_magic + # get_ipython is used by register_line_magic + get_ipython = ipython.get_ipython # noqa: F841 + @register_line_magic def hg(line): args = ["hg"] + shlex.split(line) diff --git a/eden/scm/lib/fsyncglob/Cargo.toml b/eden/scm/lib/fsyncglob/Cargo.toml new file mode 100644 index 0000000000..bf8f5161f3 --- /dev/null +++ b/eden/scm/lib/fsyncglob/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "fsyncglob" +version = "0.1.0" +edition = "2018" + +[dependencies] +glob = "0.3" +tracing = "0.1" + +[dev-dependencies] +tempfile = "3" diff --git a/eden/scm/lib/fsyncglob/src/lib.rs b/eden/scm/lib/fsyncglob/src/lib.rs new file mode 100644 index 0000000000..de9efdd7e9 --- /dev/null +++ b/eden/scm/lib/fsyncglob/src/lib.rs @@ -0,0 +1,138 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +//! Simple crate to call fsync on files matching glob patterns. +//! +//! This is a standalone crate to help reducing compile time of `hgcommands`. + +use glob::Pattern; +use std::fs; +use std::io; +use std::path::Path; +use std::path::PathBuf; +use std::time::Duration; +use std::time::SystemTime; +use tracing::debug; +use tracing::trace; +use tracing::warn; + +/// Call `fsync` on files matching given glob patterns under the given directory. +/// +/// Errors are silenced and logged to tracing framework. +/// Files not recently modified (older than `newer_than`) are skipped. +/// +/// Returns paths that are fsync-ed. +pub fn fsync_glob(dir: &Path, patterns: &[&str], newer_than: Option) -> Vec { + let escaped_dir = Pattern::escape(&dir.display().to_string()); + let mut result = Vec::new(); + for p in patterns { + let full_pattern = format!("{}/{}", &escaped_dir, p); + debug!("globing {}", &full_pattern); + + let matches = match glob::glob(&full_pattern) { + Err(e) => { + warn!("glob failed: {}", e); + continue; + } + Ok(matches) => matches, + }; + + let newer_than = newer_than.unwrap_or_else(|| { + let now = SystemTime::now(); + now.checked_sub(Duration::from_secs(300)).unwrap_or(now) + }); + + for path in matches { + let path = match path { + Ok(path) => path, + Err(e) => { + warn!("path reading failed: {}", e); + continue; + } + }; + + match try_fsync_if_newer_than(&path, newer_than) { + Ok(true) => { + if let Ok(path) = path.strip_prefix(dir) { + result.push(path.to_path_buf()); + } + debug!("fsynced: {}", path.display()); + } + Ok(false) => trace!("skipped: {}", path.display()), + Err(e) => warn!("cannot fsync {}: {}", path.display(), e), + } + } + } + result.sort_unstable(); + result +} + +/// Attempt to fsync a single file. +/// Return false if the file is skipped (not newly modified or not a file). +/// Return true if the file is synced. +fn try_fsync_if_newer_than(path: &Path, newer_than: SystemTime) -> io::Result { + let metadata = path.symlink_metadata()?; + if !metadata.is_file() || metadata.modified()? < newer_than { + return Ok(false); + } + + let mut open_opts = fs::OpenOptions::new(); + open_opts.read(true).create(false).truncate(false); + + // Windows requires opening with write permission for fsync. + if cfg!(windows) { + open_opts.write(true); + } + + let file = open_opts.open(path)?; + file.sync_all()?; + Ok(true) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::tempdir; + + #[test] + fn test_patterns() { + let dir = tempdir().unwrap(); + let dir = dir.path(); + + fs::write(dir.join("a"), b"1").unwrap(); + fs::write(dir.join("a1"), b"1").unwrap(); + fs::write(dir.join("b"), b"2").unwrap(); + fs::write(dir.join("c"), b"3").unwrap(); + + assert_eq!(d(fsync_glob(&dir, &[], None)), "[]"); + assert_eq!(d(fsync_glob(&dir, &["d"], None)), "[]"); + assert_eq!(d(fsync_glob(&dir, &["?"], None)), "[\"a\", \"b\", \"c\"]"); + assert_eq!( + d(fsync_glob(&dir, &["a*", "c"], None)), + "[\"a\", \"a1\", \"c\"]" + ); + } + + #[test] + fn test_skip_old_files() { + let dir = tempdir().unwrap(); + let dir = dir.path(); + + fs::write(dir.join("a"), b"1").unwrap(); + fs::write(dir.join("b"), b"2").unwrap(); + + let newer_than = SystemTime::now() + .checked_add(Duration::from_secs(10)) + .unwrap(); + assert_eq!(d(fsync_glob(&dir, &["*"], Some(newer_than))), "[]"); + } + + fn d(value: impl std::fmt::Debug) -> String { + format!("{:?}", value) + } +} diff --git a/eden/scm/lib/hgcommands/Cargo.toml b/eden/scm/lib/hgcommands/Cargo.toml index ed0ba252fe..f62e950460 100644 --- a/eden/scm/lib/hgcommands/Cargo.toml +++ b/eden/scm/lib/hgcommands/Cargo.toml @@ -23,6 +23,7 @@ edenfs-client = { path = "../edenfs-client"} encoding = { path = "../encoding" } env_logger = "0.7" flate2 = "1" +fsyncglob = { path = "../fsyncglob" } hgtime = { path = "../hgtime"} indexedlog = { path = "../indexedlog" } libc = "0.2" diff --git a/eden/scm/lib/hgcommands/src/commands.rs b/eden/scm/lib/hgcommands/src/commands.rs index 4630be34b9..b9a064f348 100644 --- a/eden/scm/lib/hgcommands/src/commands.rs +++ b/eden/scm/lib/hgcommands/src/commands.rs @@ -43,6 +43,7 @@ pub fn table() -> CommandTable { debug::dumpindexedlog, debug::dumptrace, debug::dynamicconfig, + debug::fsync, debug::http, debug::python, debug::store, diff --git a/eden/scm/lib/hgcommands/src/commands/debug.rs b/eden/scm/lib/hgcommands/src/commands/debug.rs index 9a627f0a0b..eba872bbba 100644 --- a/eden/scm/lib/hgcommands/src/commands/debug.rs +++ b/eden/scm/lib/hgcommands/src/commands/debug.rs @@ -16,6 +16,7 @@ pub mod causerusterror; pub mod dumpindexedlog; pub mod dumptrace; pub mod dynamicconfig; +pub mod fsync; pub mod http; pub mod python; pub mod store; diff --git a/eden/scm/lib/hgcommands/src/commands/debug/fsync.rs b/eden/scm/lib/hgcommands/src/commands/debug/fsync.rs new file mode 100644 index 0000000000..b0be0a2c83 --- /dev/null +++ b/eden/scm/lib/hgcommands/src/commands/debug/fsync.rs @@ -0,0 +1,31 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +use super::NoOpts; +use super::Repo; +use super::Result; +use super::IO; + +pub fn run(_opts: NoOpts, _io: &mut IO, repo: Repo) -> Result { + let store_path = repo.store_path(); + let patterns = [ + "00changelog.*", + "hgcommits/**/*", + "metalog/**/*", + "mutation/**/*", + ]; + fsyncglob::fsync_glob(store_path, &patterns, None); + Ok(0) +} + +pub fn name() -> &'static str { + "debugfsync" +} + +pub fn doc() -> &'static str { + "call fsync on newly modified key storage files" +} diff --git a/eden/scm/tests/test-completion.t b/eden/scm/tests/test-completion.t index 3d9545c013..71173263a0 100644 --- a/eden/scm/tests/test-completion.t +++ b/eden/scm/tests/test-completion.t @@ -122,6 +122,7 @@ Show debug commands if there are no other candidates debugfileset debugformat debugfsinfo + debugfsync debuggetbundle debughttp debugignore @@ -408,6 +409,7 @@ Show all commands + options debugfileset: rev debugformat: template debugfsinfo: + debugfsync: debuggetbundle: head, common, type debughttp: debugignore: diff --git a/eden/scm/tests/test-help.t b/eden/scm/tests/test-help.t index 4cb0f4cb83..1302d29532 100644 --- a/eden/scm/tests/test-help.t +++ b/eden/scm/tests/test-help.t @@ -997,6 +997,7 @@ Test list of internal help commands debugfileset parse and apply a fileset specification debugformat display format information about the current repository debugfsinfo show information detected about current filesystem + debugfsync call fsync on newly modified key storage files debuggentrees (no help text available) debuggetbundle