From 85b1ac6da9ad3877db2f76ffddff847dc28eeb86 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 8 Jul 2021 08:55:44 -0700 Subject: [PATCH] Better, configurable name munging (#41) A new flag `--munge [rename|filter]` controls how a name munging will work. Name munging only applies to fields named '.', '..', containing a NUL byte, or containing a forward slash '/'. The `rename` option (the default) will change '.' to '_.' and '..' to '_..'; each NUL byte turns into '_NUL_' and each slash turns into '_SLASH_'. This greatly simplified policy means that spaces and other special characters should work just fine in field names now. --- docs/ffs.1.md | 24 +++++++++-- man/ffs.1 | 32 +++++++++++++-- run_tests.sh | 6 ++- src/cli.rs | 13 +++++- src/config.rs | 77 +++++++++++++++++++++++++++-------- src/format.rs | 32 ++++++++++++--- tests/filename_spaces.sh | 42 +++++++++++++++++++ tests/munge_filter.sh | 30 ++++++++++++++ tests/rename_fancy_restore.sh | 26 ++++++------ tests/rename_object.sh | 14 +++---- tests/rename_restore.sh | 18 ++++---- yaml/spaces.yaml | 3 ++ 12 files changed, 254 insertions(+), 63 deletions(-) create mode 100755 tests/filename_spaces.sh create mode 100755 tests/munge_filter.sh create mode 100644 yaml/spaces.yaml diff --git a/docs/ffs.1.md b/docs/ffs.1.md index 562e228..06be484 100644 --- a/docs/ffs.1.md +++ b/docs/ffs.1.md @@ -104,9 +104,27 @@ installed on your system to use *ffs*. : Sets the output file for saving changes (defaults to stdout) +--munge *MUNGE* + +: Set the name munging policy; applies to '.', '..', and files with + NUL and '/' in them [default: rename] [possible values: filter, + rename] + + - Under *--munge rename* (the default), fields named '.' and '..' + will be renamed to '\_.' and '\_..', respectively. Every NUL + byte will be replaced with the text '\_NUL\_' and every forward + slash will be replaced with the text '\_SLASH\_'. Unless you + manually change the name of these renamed files, they will be + saved back with their original names, i.e., '\_..' will turn back + into a field called '..', and 'and\_SLASH\_or' will be turned back + into 'and/or'. New files created with such names will not be + converted back. + - Under *--munge filter*, fields named '.', '..', or with NUL or + '/' in them will simply be dropped (with a warning). + --new *NEW* -: Mounts an empty filesystem, inferring a mountpoint and output format. Running --new *FILE*.*EXT* is morally equivalent to running: +: Mounts an empty filesystem, inferring a mountpoint and output format. Running *--new* *FILE*.*EXT* is morally equivalent to running: ``` echo '{}' | ffs --source json -o *FILE*.*EXT* --target *EXT* -m *FILE* ``` @@ -190,7 +208,7 @@ named : Mapped to a **directory**. Named directories (also known as maps, objects, hashes, or dictionaries) will use field names as the file/directory names for their contents. Some renaming may occur if - fields have special characters in them. + fields have special characters in them; see *--munge* above. null @@ -245,7 +263,7 @@ umount commits # changes are written back to commits.json (-i is in-place mode) ``` -If you want to create a new file wholesale, the --new flag is helpful. +If you want to create a new file wholesale, the *--new* flag is helpful. ```shell ffs --new file.json diff --git a/man/ffs.1 b/man/ffs.1 index 9d9a042..dce1f47 100644 --- a/man/ffs.1 +++ b/man/ffs.1 @@ -96,10 +96,32 @@ specified when running on stdin -o, --output \f[I]OUTPUT\f[R] Sets the output file for saving changes (defaults to stdout) .TP +--munge \f[I]MUNGE\f[R] +Set the name munging policy; applies to \[aq].\[aq], \[aq]..\[aq], and +files with NUL and \[aq]/\[aq] in them [default: rename] [possible +values: filter, rename] +.RS +.IP \[bu] 2 +Under \f[I]--munge rename\f[R] (the default), fields named \[aq].\[aq] +and \[aq]..\[aq] will be renamed to \[aq]_.\[aq] and \[aq]_..\[aq], +respectively. +Every NUL byte will be replaced with the text \[aq]_NUL_\[aq] and every +forward slash will be replaced with the text \[aq]_SLASH_\[aq]. +Unless you manually change the name of these renamed files, they will be +saved back with their original names, i.e., \[aq]_..\[aq] will turn back +into a field called \[aq]..\[aq], and \[aq]and_SLASH_or\[aq] will be +turned back into \[aq]and/or\[aq]. +New files created with such names will not be converted back. +.IP \[bu] 2 +Under \f[I]--munge filter\f[R], fields named \[aq].\[aq], \[aq]..\[aq], +or with NUL or \[aq]/\[aq] in them will simply be dropped (with a +warning). +.RE +.TP --new \f[I]NEW\f[R] Mounts an empty filesystem, inferring a mountpoint and output format. -Running --new \f[I]FILE\f[R].\f[I]EXT\f[R] is morally equivalent to -running: +Running \f[I]--new\f[R] \f[I]FILE\f[R].\f[I]EXT\f[R] is morally +equivalent to running: .RS .IP .nf @@ -180,7 +202,8 @@ named Mapped to a \f[B]directory\f[R]. Named directories (also known as maps, objects, hashes, or dictionaries) will use field names as the file/directory names for their contents. -Some renaming may occur if fields have special characters in them. +Some renaming may occur if fields have special characters in them; see +\f[I]--munge\f[R] above. .TP null Mapped to a \f[B]file\f[R]. @@ -241,7 +264,8 @@ umount commits \f[R] .fi .PP -If you want to create a new file wholesale, the --new flag is helpful. +If you want to create a new file wholesale, the \f[I]--new\f[R] flag is +helpful. .IP .nf \f[C] diff --git a/run_tests.sh b/run_tests.sh index 92b915f..7305f16 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -3,7 +3,11 @@ if ! which ffs >/dev/null 2>&1 then DEBUG="$(pwd)/target/debug" - [ -x "$DEBUG/ffs" ] || { echo Couldn\'t find ffs on "$PATH" or in "$DEBUG". ; exit 1 ; } + [ -x "$DEBUG/ffs" ] || { + echo Couldn\'t find ffs on "$PATH" or in "$DEBUG". >&2 + echo Are you in the root directory of the repo? >&2 + exit 1 + } PATH="$DEBUG:$PATH" fi diff --git a/src/cli.rs b/src/cli.rs index d2802ab..0f2e295 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,10 +1,11 @@ use clap::{App, Arg}; /// The possible formats. -/// -/// These are defined here so that completion-generation in `build.rs` doesn't need to depend on anything but this file. pub const POSSIBLE_FORMATS: &[&str] = &["json", "toml", "yaml"]; +/// The possible name munging policies. +pub const MUNGE_POLICIES: &[&str] = &["filter", "rename"]; + pub fn app() -> App<'static, 'static> { App::new("ffs") .version(env!("CARGO_PKG_VERSION")) @@ -73,6 +74,14 @@ pub fn app() -> App<'static, 'static> { .help("Include ._* extended attribute/resource fork files on macOS") .long("keep-macos-xattr") ) + .arg( + Arg::with_name("MUNGE") + .help("Set the name munging policy; applies to '.', '..', and files with NUL and '/' in them") + .long("munge") + .takes_value(true) + .default_value("rename") + .possible_values(MUNGE_POLICIES) + ) .arg( Arg::with_name("UNPADDED") .help("Don't pad the numeric names of list elements with zeroes; will not sort properly") diff --git a/src/config.rs b/src/config.rs index c852ae9..bb9376e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,10 +1,12 @@ -use fuser::FileType; use std::path::{Path, PathBuf}; +use std::str::FromStr; use tracing::{debug, error, warn}; use tracing_subscriber::prelude::*; use tracing_subscriber::{filter::EnvFilter, fmt}; +use fuser::FileType; + use super::format; use super::format::Format; @@ -13,7 +15,7 @@ use super::cli; /// Configuration information /// /// See `cli.rs` for information on the actual command-line options; see -/// `main.rs` for how those connect to this structure. +/// `Config::from_args` for how those connect to this structure. /// /// NB I know this arrangement sucks, but `clap`'s automatic stuff isn't /// adequate to express what I want here. Command-line interfaces are hard. 😢 @@ -31,6 +33,7 @@ pub struct Config { pub try_decode_base64: bool, pub allow_xattr: bool, pub keep_macos_xattr_file: bool, + pub munge: Munge, pub read_only: bool, pub input: Input, pub output: Output, @@ -63,13 +66,43 @@ pub enum Output { File(PathBuf), } +#[derive(Debug)] +pub enum Munge { + Rename, + Filter, +} + +impl std::fmt::Display for Munge { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + match self { + Munge::Rename => write!(f, "rename"), + Munge::Filter => write!(f, "filter"), + } + } +} + +impl FromStr for Munge { + type Err = (); + + fn from_str(s: &str) -> Result { + let s = s.trim().to_lowercase(); + + if s == "rename" { + Ok(Munge::Rename) + } else if s == "filter" { + Ok(Munge::Filter) + } else { + Err(()) + } + } +} + impl Config { /// Parses arguments from `std::env::Args`, via `cli::app().get_matches()` pub fn from_args() -> Self { let args = cli::app().get_matches(); let mut config = Config::default(); - // generate completions? // // TODO 2021-07-06 good candidate for a subcommand @@ -112,6 +145,18 @@ impl Config { config.keep_macos_xattr_file = args.is_present("KEEPMACOSDOT"); config.pretty = args.is_present("PRETTY"); + // munging policy + config.munge = match args.value_of("MUNGE") { + None => Munge::Filter, + Some(s) => match str::parse(s) { + Ok(munge) => munge, + Err(_) => { + warn!("Invalid `--munge` mode '{}', using 'rename'.", s); + Munge::Filter + } + }, + }; + // perms config.filemode = match u16::from_str_radix(args.value_of("FILEMODE").unwrap(), 8) { Ok(filemode) => filemode, @@ -466,21 +511,18 @@ impl Config { config } + pub fn valid_name(&self, s: &str) -> bool { + s != "." && s != ".." && !s.contains('\0') && !s.contains('/') + } + pub fn normalize_name(&self, s: String) -> String { - // inspired by https://en.wikipedia.org/wiki/Filename - s.replace(".", "dot") - .replace("/", "slash") - .replace("\\", "backslash") - .replace("?", "question") - .replace("*", "star") - .replace(":", "colon") - .replace("\"", "dquote") - .replace("<", "lt") - .replace(">", "gt") - .replace(",", "comma") - .replace(";", "semi") - .replace("=", "equal") - .replace(" ", "space") + if s == "." { + "_.".into() + } else if s == ".." { + "_..".into() + } else { + s.replace("\0", "_NUL_").replace("/", "_SLASH_") + } } #[cfg(target_os = "macos")] @@ -528,6 +570,7 @@ impl Default for Config { try_decode_base64: false, allow_xattr: true, keep_macos_xattr_file: false, + munge: Munge::Rename, read_only: false, input: Input::Stdin, output: Output::Stdout, diff --git a/src/format.rs b/src/format.rs index d1d86b5..cd96fc8 100644 --- a/src/format.rs +++ b/src/format.rs @@ -6,7 +6,7 @@ use tracing::{debug, error, info, instrument, warn}; use fuser::FileType; -use super::config::{Config, Input, Output}; +use super::config::{Config, Input, Munge, Output}; use super::fs::{DirEntry, DirType, Entry, Inode, FS}; use ::toml as serde_toml; @@ -314,6 +314,7 @@ where std::process::exit(1); } + let mut filtered = 0; let mut next_id = fuser::FUSE_ROOT_ID; // parent inum, inum, value let mut worklist: Vec<(u64, u64, V)> = vec![(next_id, next_id, v)]; @@ -361,11 +362,28 @@ where for (field, child) in fvs.into_iter() { let original = field.clone(); - let mut nfield = config.normalize_name(field); - while children.contains_key(&nfield) { - nfield.push('_'); - } + let nfield = if !config.valid_name(&original) { + match config.munge { + Munge::Rename => { + let mut nfield = config.normalize_name(field); + + // TODO 2021-07-08 could be better to check fvs, but it's a vec now... :/ + while children.contains_key(&nfield) { + nfield.push('_'); + } + + nfield + } + Munge::Filter => { + warn!("skipping '{}'", field); + filtered += child.size(); + continue; + } + } + } else { + field + }; let original_name = if original != nfield { info!( @@ -374,6 +392,7 @@ where ); Some(original) } else { + assert!(config.valid_name(&original)); None }; @@ -396,7 +415,8 @@ where inodes[inum as usize] = Some(Inode::new(parent, inum, entry, config)); } - assert_eq!(inodes.len() as u64, next_id); + + assert_eq!((inodes.len() - filtered) as u64, next_id); } /// Walks `fs` starting at the inode with number `inum`, producing an diff --git a/tests/filename_spaces.sh b/tests/filename_spaces.sh new file mode 100755 index 0000000..948cf83 --- /dev/null +++ b/tests/filename_spaces.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +fail() { + echo FAILED: $1 + if [ "$MNT" ] + then + umount "$MNT" + rmdir "$MNT" + rm "$OUT" "$EXP" + fi + exit 1 +} + +MNT=$(mktemp -d) +OUT=$(mktemp) +EXP=$(mktemp) + +printf -- "---\nfield one: 1\nfield two: 2\nfield three: 3" >"$EXP" + +ffs -m "$MNT" --target yaml -o "$OUT" --munge filter ../yaml/spaces.yaml & +PID=$! +sleep 2 +case $(ls "$MNT") in + (field\ one*field\ two) ;; + (*) fail ls;; +esac +[ "$(cat $MNT/field\ one)" -eq 1 ] || fail one +[ "$(cat $MNT/field\ two)" -eq 2 ] || fail two +echo 3 >"$MNT"/field\ three + +umount "$MNT" || fail unmount +sleep 1 +kill -0 $PID >/dev/null 2>&1 && fail process + +grep "field three: 3" $OUT >/dev/null 2>&1 || fail three + +sort $OUT >$OUT.yaml +sort $EXP >$EXP.yaml +diff $OUT.yaml $EXP.yaml || fail diff + +rmdir "$MNT" || fail mount +rm "$OUT" "$EXP" diff --git a/tests/munge_filter.sh b/tests/munge_filter.sh new file mode 100755 index 0000000..751d940 --- /dev/null +++ b/tests/munge_filter.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +fail() { + echo FAILED: $1 + if [ "$MNT" ] + then + cd + umount "$MNT" + rmdir "$MNT" + fi + exit 1 +} + +MNT=$(mktemp -d) + +ffs -m "$MNT" --munge filter ../json/obj_rename.json & +PID=$! +sleep 2 +case $(ls "$MNT") in + (dot*dotdot) ;; + (*) fail ls;; +esac +[ "$(cat $MNT/dot)" = "third" ] || fail dot +[ "$(cat $MNT/dotdot)" = "fourth" ] || fail dotdot +umount "$MNT" || fail unmount +sleep 1 + +kill -0 $PID >/dev/null 2>&1 && fail process + +rmdir "$MNT" || fail mount diff --git a/tests/rename_fancy_restore.sh b/tests/rename_fancy_restore.sh index 53e1ea2..42e4b30 100755 --- a/tests/rename_fancy_restore.sh +++ b/tests/rename_fancy_restore.sh @@ -22,28 +22,28 @@ ffs -m "$MNT" -o "$OUT" --target json ../json/obj_rename.json & PID=$! sleep 2 case $(ls "$MNT") in - (dot*dot_*dotdot*dotdot_) ;; + (_.*_..*dot*dotdot) ;; (*) fail ls;; esac -[ "$(cat $MNT/dot)" = "first" ] || fail dot -[ "$(cat $MNT/dotdot)" = "second" ] || fail dotdot -[ "$(cat $MNT/dot_)" = "third" ] || fail dot_ -[ "$(cat $MNT/dotdot_)" = "fourth" ] || fail dotdot_ +[ "$(cat $MNT/_.)" = "first" ] || fail . +[ "$(cat $MNT/_..)" = "second" ] || fail .. +[ "$(cat $MNT/dot)" = "third" ] || fail dot +[ "$(cat $MNT/dotdot)" = "fourth" ] || fail dotdot -echo primo >"$MNT"/dot -echo secondo >"$MNT"/dotdot -echo shlishi >"$MNT"/dot_ -echo derp >"$MNT"/dotdot_ +echo primo >"$MNT"/_. +echo secondo >"$MNT"/_.. +echo shlishi >"$MNT"/dot +echo derp >"$MNT"/dotdot mkdir "$MNT"/it mkdir "$MNT"/he -mv "$MNT"/dot "$MNT"/it -mv "$MNT"/dotdot "$MNT"/it +mv "$MNT"/_. "$MNT"/it +mv "$MNT"/_.. "$MNT"/it -mv "$MNT"/dot_ "$MNT"/he +mv "$MNT"/dot "$MNT"/he -mv "$MNT"/dotdot_ "$MNT"/imnewhere +mv "$MNT"/dotdot "$MNT"/imnewhere umount "$MNT" || fail unmount sleep 1 diff --git a/tests/rename_object.sh b/tests/rename_object.sh index 43d9fb6..68ac54a 100755 --- a/tests/rename_object.sh +++ b/tests/rename_object.sh @@ -16,16 +16,14 @@ MNT=$(mktemp -d) ffs -m "$MNT" ../json/obj_rename.json & PID=$! sleep 2 -cd "$MNT" -case $(ls) in - (dot*dot_*dotdot*dotdot_) ;; +case $(ls "$MNT") in + (_.*_..*dot*dotdot) ;; (*) fail ls;; esac -[ "$(cat dot)" = "first" ] || fail dot -[ "$(cat dotdot)" = "second" ] || fail dotdot -[ "$(cat dot_)" = "third" ] || fail dot_ -[ "$(cat dotdot_)" = "fourth" ] || fail dotdot_ -cd - >/dev/null 2>&1 +[ "$(cat $MNT/_.)" = "first" ] || fail . +[ "$(cat $MNT/_..)" = "second" ] || fail .. +[ "$(cat $MNT/dot)" = "third" ] || fail dot +[ "$(cat $MNT/dotdot)" = "fourth" ] || fail dotdot umount "$MNT" || fail unmount sleep 1 diff --git a/tests/rename_restore.sh b/tests/rename_restore.sh index 8475cc2..fde7147 100755 --- a/tests/rename_restore.sh +++ b/tests/rename_restore.sh @@ -22,18 +22,18 @@ ffs -m "$MNT" -o "$OUT" --target json ../json/obj_rename.json & PID=$! sleep 2 case $(ls "$MNT") in - (dot*dot_*dotdot*dotdot_) ;; + (_.*_..*dot*dotdot) ;; (*) fail ls;; esac -[ "$(cat $MNT/dot)" = "first" ] || fail dot -[ "$(cat $MNT/dotdot)" = "second" ] || fail dotdot -[ "$(cat $MNT/dot_)" = "third" ] || fail dot_ -[ "$(cat $MNT/dotdot_)" = "fourth" ] || fail dotdot_ +[ "$(cat $MNT/_.)" = "first" ] || fail . +[ "$(cat $MNT/_..)" = "second" ] || fail .. +[ "$(cat $MNT/dot)" = "third" ] || fail dot +[ "$(cat $MNT/dotdot)" = "fourth" ] || fail dotdot -echo primo >"$MNT"/dot -echo secondo >"$MNT"/dotdot -echo terzo >"$MNT"/dot_ -echo quarto >"$MNT"/dotdot_ +echo primo >"$MNT"/_. +echo secondo >"$MNT"/_.. +echo terzo >"$MNT"/dot +echo quarto >"$MNT"/dotdot umount "$MNT" || fail unmount sleep 1 diff --git a/yaml/spaces.yaml b/yaml/spaces.yaml new file mode 100644 index 0000000..c7a1efd --- /dev/null +++ b/yaml/spaces.yaml @@ -0,0 +1,3 @@ +field one: 1 +field two: 2 +