1
1
mirror of https://github.com/mgree/ffs.git synced 2024-07-07 08:16:20 +03:00

Better, configurable name munging (#41)

A new flag `--munge [rename|filter]` controls how a name munging will work.

Name munging only applies to fields named '.', '..', containing a NUL byte, or containing a forward slash '/'.

The `rename` option (the default) will change '.' to '_.' and '..' to '_..'; each NUL byte turns into '_NUL_' and each slash turns into '_SLASH_'.

This greatly simplified policy means that spaces and other special characters should work just fine in field names now.
This commit is contained in:
Michael Greenberg 2021-07-08 08:55:44 -07:00 committed by GitHub
parent 4d6c094504
commit 85b1ac6da9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 254 additions and 63 deletions

View File

@ -104,9 +104,27 @@ installed on your system to use *ffs*.
: Sets the output file for saving changes (defaults to stdout)
--munge *MUNGE*
: Set the name munging policy; applies to '.', '..', and files with
NUL and '/' in them [default: rename] [possible values: filter,
rename]
- Under *--munge rename* (the default), fields named '.' and '..'
will be renamed to '\_.' and '\_..', respectively. Every NUL
byte will be replaced with the text '\_NUL\_' and every forward
slash will be replaced with the text '\_SLASH\_'. Unless you
manually change the name of these renamed files, they will be
saved back with their original names, i.e., '\_..' will turn back
into a field called '..', and 'and\_SLASH\_or' will be turned back
into 'and/or'. New files created with such names will not be
converted back.
- Under *--munge filter*, fields named '.', '..', or with NUL or
'/' in them will simply be dropped (with a warning).
--new *NEW*
: Mounts an empty filesystem, inferring a mountpoint and output format. Running --new *FILE*.*EXT* is morally equivalent to running:
: Mounts an empty filesystem, inferring a mountpoint and output format. Running *--new* *FILE*.*EXT* is morally equivalent to running:
```
echo '{}' | ffs --source json -o *FILE*.*EXT* --target *EXT* -m *FILE*
```
@ -190,7 +208,7 @@ named
: Mapped to a **directory**. Named directories (also known as maps,
objects, hashes, or dictionaries) will use field names as the
file/directory names for their contents. Some renaming may occur if
fields have special characters in them.
fields have special characters in them; see *--munge* above.
null
@ -245,7 +263,7 @@ umount commits
# changes are written back to commits.json (-i is in-place mode)
```
If you want to create a new file wholesale, the --new flag is helpful.
If you want to create a new file wholesale, the *--new* flag is helpful.
```shell
ffs --new file.json

View File

@ -96,10 +96,32 @@ specified when running on stdin
-o, --output \f[I]OUTPUT\f[R]
Sets the output file for saving changes (defaults to stdout)
.TP
--munge \f[I]MUNGE\f[R]
Set the name munging policy; applies to \[aq].\[aq], \[aq]..\[aq], and
files with NUL and \[aq]/\[aq] in them [default: rename] [possible
values: filter, rename]
.RS
.IP \[bu] 2
Under \f[I]--munge rename\f[R] (the default), fields named \[aq].\[aq]
and \[aq]..\[aq] will be renamed to \[aq]_.\[aq] and \[aq]_..\[aq],
respectively.
Every NUL byte will be replaced with the text \[aq]_NUL_\[aq] and every
forward slash will be replaced with the text \[aq]_SLASH_\[aq].
Unless you manually change the name of these renamed files, they will be
saved back with their original names, i.e., \[aq]_..\[aq] will turn back
into a field called \[aq]..\[aq], and \[aq]and_SLASH_or\[aq] will be
turned back into \[aq]and/or\[aq].
New files created with such names will not be converted back.
.IP \[bu] 2
Under \f[I]--munge filter\f[R], fields named \[aq].\[aq], \[aq]..\[aq],
or with NUL or \[aq]/\[aq] in them will simply be dropped (with a
warning).
.RE
.TP
--new \f[I]NEW\f[R]
Mounts an empty filesystem, inferring a mountpoint and output format.
Running --new \f[I]FILE\f[R].\f[I]EXT\f[R] is morally equivalent to
running:
Running \f[I]--new\f[R] \f[I]FILE\f[R].\f[I]EXT\f[R] is morally
equivalent to running:
.RS
.IP
.nf
@ -180,7 +202,8 @@ named
Mapped to a \f[B]directory\f[R].
Named directories (also known as maps, objects, hashes, or dictionaries)
will use field names as the file/directory names for their contents.
Some renaming may occur if fields have special characters in them.
Some renaming may occur if fields have special characters in them; see
\f[I]--munge\f[R] above.
.TP
null
Mapped to a \f[B]file\f[R].
@ -241,7 +264,8 @@ umount commits
\f[R]
.fi
.PP
If you want to create a new file wholesale, the --new flag is helpful.
If you want to create a new file wholesale, the \f[I]--new\f[R] flag is
helpful.
.IP
.nf
\f[C]

View File

@ -3,7 +3,11 @@
if ! which ffs >/dev/null 2>&1
then
DEBUG="$(pwd)/target/debug"
[ -x "$DEBUG/ffs" ] || { echo Couldn\'t find ffs on "$PATH" or in "$DEBUG". ; exit 1 ; }
[ -x "$DEBUG/ffs" ] || {
echo Couldn\'t find ffs on "$PATH" or in "$DEBUG". >&2
echo Are you in the root directory of the repo? >&2
exit 1
}
PATH="$DEBUG:$PATH"
fi

View File

@ -1,10 +1,11 @@
use clap::{App, Arg};
/// The possible formats.
///
/// These are defined here so that completion-generation in `build.rs` doesn't need to depend on anything but this file.
pub const POSSIBLE_FORMATS: &[&str] = &["json", "toml", "yaml"];
/// The possible name munging policies.
pub const MUNGE_POLICIES: &[&str] = &["filter", "rename"];
pub fn app() -> App<'static, 'static> {
App::new("ffs")
.version(env!("CARGO_PKG_VERSION"))
@ -73,6 +74,14 @@ pub fn app() -> App<'static, 'static> {
.help("Include ._* extended attribute/resource fork files on macOS")
.long("keep-macos-xattr")
)
.arg(
Arg::with_name("MUNGE")
.help("Set the name munging policy; applies to '.', '..', and files with NUL and '/' in them")
.long("munge")
.takes_value(true)
.default_value("rename")
.possible_values(MUNGE_POLICIES)
)
.arg(
Arg::with_name("UNPADDED")
.help("Don't pad the numeric names of list elements with zeroes; will not sort properly")

View File

@ -1,10 +1,12 @@
use fuser::FileType;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use tracing::{debug, error, warn};
use tracing_subscriber::prelude::*;
use tracing_subscriber::{filter::EnvFilter, fmt};
use fuser::FileType;
use super::format;
use super::format::Format;
@ -13,7 +15,7 @@ use super::cli;
/// Configuration information
///
/// See `cli.rs` for information on the actual command-line options; see
/// `main.rs` for how those connect to this structure.
/// `Config::from_args` for how those connect to this structure.
///
/// NB I know this arrangement sucks, but `clap`'s automatic stuff isn't
/// adequate to express what I want here. Command-line interfaces are hard. 😢
@ -31,6 +33,7 @@ pub struct Config {
pub try_decode_base64: bool,
pub allow_xattr: bool,
pub keep_macos_xattr_file: bool,
pub munge: Munge,
pub read_only: bool,
pub input: Input,
pub output: Output,
@ -63,13 +66,43 @@ pub enum Output {
File(PathBuf),
}
#[derive(Debug)]
pub enum Munge {
Rename,
Filter,
}
impl std::fmt::Display for Munge {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
match self {
Munge::Rename => write!(f, "rename"),
Munge::Filter => write!(f, "filter"),
}
}
}
impl FromStr for Munge {
type Err = ();
fn from_str(s: &str) -> Result<Self, ()> {
let s = s.trim().to_lowercase();
if s == "rename" {
Ok(Munge::Rename)
} else if s == "filter" {
Ok(Munge::Filter)
} else {
Err(())
}
}
}
impl Config {
/// Parses arguments from `std::env::Args`, via `cli::app().get_matches()`
pub fn from_args() -> Self {
let args = cli::app().get_matches();
let mut config = Config::default();
// generate completions?
//
// TODO 2021-07-06 good candidate for a subcommand
@ -112,6 +145,18 @@ impl Config {
config.keep_macos_xattr_file = args.is_present("KEEPMACOSDOT");
config.pretty = args.is_present("PRETTY");
// munging policy
config.munge = match args.value_of("MUNGE") {
None => Munge::Filter,
Some(s) => match str::parse(s) {
Ok(munge) => munge,
Err(_) => {
warn!("Invalid `--munge` mode '{}', using 'rename'.", s);
Munge::Filter
}
},
};
// perms
config.filemode = match u16::from_str_radix(args.value_of("FILEMODE").unwrap(), 8) {
Ok(filemode) => filemode,
@ -466,21 +511,18 @@ impl Config {
config
}
pub fn valid_name(&self, s: &str) -> bool {
s != "." && s != ".." && !s.contains('\0') && !s.contains('/')
}
pub fn normalize_name(&self, s: String) -> String {
// inspired by https://en.wikipedia.org/wiki/Filename
s.replace(".", "dot")
.replace("/", "slash")
.replace("\\", "backslash")
.replace("?", "question")
.replace("*", "star")
.replace(":", "colon")
.replace("\"", "dquote")
.replace("<", "lt")
.replace(">", "gt")
.replace(",", "comma")
.replace(";", "semi")
.replace("=", "equal")
.replace(" ", "space")
if s == "." {
"_.".into()
} else if s == ".." {
"_..".into()
} else {
s.replace("\0", "_NUL_").replace("/", "_SLASH_")
}
}
#[cfg(target_os = "macos")]
@ -528,6 +570,7 @@ impl Default for Config {
try_decode_base64: false,
allow_xattr: true,
keep_macos_xattr_file: false,
munge: Munge::Rename,
read_only: false,
input: Input::Stdin,
output: Output::Stdout,

View File

@ -6,7 +6,7 @@ use tracing::{debug, error, info, instrument, warn};
use fuser::FileType;
use super::config::{Config, Input, Output};
use super::config::{Config, Input, Munge, Output};
use super::fs::{DirEntry, DirType, Entry, Inode, FS};
use ::toml as serde_toml;
@ -314,6 +314,7 @@ where
std::process::exit(1);
}
let mut filtered = 0;
let mut next_id = fuser::FUSE_ROOT_ID;
// parent inum, inum, value
let mut worklist: Vec<(u64, u64, V)> = vec![(next_id, next_id, v)];
@ -361,11 +362,28 @@ where
for (field, child) in fvs.into_iter() {
let original = field.clone();
let mut nfield = config.normalize_name(field);
while children.contains_key(&nfield) {
nfield.push('_');
}
let nfield = if !config.valid_name(&original) {
match config.munge {
Munge::Rename => {
let mut nfield = config.normalize_name(field);
// TODO 2021-07-08 could be better to check fvs, but it's a vec now... :/
while children.contains_key(&nfield) {
nfield.push('_');
}
nfield
}
Munge::Filter => {
warn!("skipping '{}'", field);
filtered += child.size();
continue;
}
}
} else {
field
};
let original_name = if original != nfield {
info!(
@ -374,6 +392,7 @@ where
);
Some(original)
} else {
assert!(config.valid_name(&original));
None
};
@ -396,7 +415,8 @@ where
inodes[inum as usize] = Some(Inode::new(parent, inum, entry, config));
}
assert_eq!(inodes.len() as u64, next_id);
assert_eq!((inodes.len() - filtered) as u64, next_id);
}
/// Walks `fs` starting at the inode with number `inum`, producing an

42
tests/filename_spaces.sh Executable file
View File

@ -0,0 +1,42 @@
#!/bin/sh
fail() {
echo FAILED: $1
if [ "$MNT" ]
then
umount "$MNT"
rmdir "$MNT"
rm "$OUT" "$EXP"
fi
exit 1
}
MNT=$(mktemp -d)
OUT=$(mktemp)
EXP=$(mktemp)
printf -- "---\nfield one: 1\nfield two: 2\nfield three: 3" >"$EXP"
ffs -m "$MNT" --target yaml -o "$OUT" --munge filter ../yaml/spaces.yaml &
PID=$!
sleep 2
case $(ls "$MNT") in
(field\ one*field\ two) ;;
(*) fail ls;;
esac
[ "$(cat $MNT/field\ one)" -eq 1 ] || fail one
[ "$(cat $MNT/field\ two)" -eq 2 ] || fail two
echo 3 >"$MNT"/field\ three
umount "$MNT" || fail unmount
sleep 1
kill -0 $PID >/dev/null 2>&1 && fail process
grep "field three: 3" $OUT >/dev/null 2>&1 || fail three
sort $OUT >$OUT.yaml
sort $EXP >$EXP.yaml
diff $OUT.yaml $EXP.yaml || fail diff
rmdir "$MNT" || fail mount
rm "$OUT" "$EXP"

30
tests/munge_filter.sh Executable file
View File

@ -0,0 +1,30 @@
#!/bin/sh
fail() {
echo FAILED: $1
if [ "$MNT" ]
then
cd
umount "$MNT"
rmdir "$MNT"
fi
exit 1
}
MNT=$(mktemp -d)
ffs -m "$MNT" --munge filter ../json/obj_rename.json &
PID=$!
sleep 2
case $(ls "$MNT") in
(dot*dotdot) ;;
(*) fail ls;;
esac
[ "$(cat $MNT/dot)" = "third" ] || fail dot
[ "$(cat $MNT/dotdot)" = "fourth" ] || fail dotdot
umount "$MNT" || fail unmount
sleep 1
kill -0 $PID >/dev/null 2>&1 && fail process
rmdir "$MNT" || fail mount

View File

@ -22,28 +22,28 @@ ffs -m "$MNT" -o "$OUT" --target json ../json/obj_rename.json &
PID=$!
sleep 2
case $(ls "$MNT") in
(dot*dot_*dotdot*dotdot_) ;;
(_.*_..*dot*dotdot) ;;
(*) fail ls;;
esac
[ "$(cat $MNT/dot)" = "first" ] || fail dot
[ "$(cat $MNT/dotdot)" = "second" ] || fail dotdot
[ "$(cat $MNT/dot_)" = "third" ] || fail dot_
[ "$(cat $MNT/dotdot_)" = "fourth" ] || fail dotdot_
[ "$(cat $MNT/_.)" = "first" ] || fail .
[ "$(cat $MNT/_..)" = "second" ] || fail ..
[ "$(cat $MNT/dot)" = "third" ] || fail dot
[ "$(cat $MNT/dotdot)" = "fourth" ] || fail dotdot
echo primo >"$MNT"/dot
echo secondo >"$MNT"/dotdot
echo shlishi >"$MNT"/dot_
echo derp >"$MNT"/dotdot_
echo primo >"$MNT"/_.
echo secondo >"$MNT"/_..
echo shlishi >"$MNT"/dot
echo derp >"$MNT"/dotdot
mkdir "$MNT"/it
mkdir "$MNT"/he
mv "$MNT"/dot "$MNT"/it
mv "$MNT"/dotdot "$MNT"/it
mv "$MNT"/_. "$MNT"/it
mv "$MNT"/_.. "$MNT"/it
mv "$MNT"/dot_ "$MNT"/he
mv "$MNT"/dot "$MNT"/he
mv "$MNT"/dotdot_ "$MNT"/imnewhere
mv "$MNT"/dotdot "$MNT"/imnewhere
umount "$MNT" || fail unmount
sleep 1

View File

@ -16,16 +16,14 @@ MNT=$(mktemp -d)
ffs -m "$MNT" ../json/obj_rename.json &
PID=$!
sleep 2
cd "$MNT"
case $(ls) in
(dot*dot_*dotdot*dotdot_) ;;
case $(ls "$MNT") in
(_.*_..*dot*dotdot) ;;
(*) fail ls;;
esac
[ "$(cat dot)" = "first" ] || fail dot
[ "$(cat dotdot)" = "second" ] || fail dotdot
[ "$(cat dot_)" = "third" ] || fail dot_
[ "$(cat dotdot_)" = "fourth" ] || fail dotdot_
cd - >/dev/null 2>&1
[ "$(cat $MNT/_.)" = "first" ] || fail .
[ "$(cat $MNT/_..)" = "second" ] || fail ..
[ "$(cat $MNT/dot)" = "third" ] || fail dot
[ "$(cat $MNT/dotdot)" = "fourth" ] || fail dotdot
umount "$MNT" || fail unmount
sleep 1

View File

@ -22,18 +22,18 @@ ffs -m "$MNT" -o "$OUT" --target json ../json/obj_rename.json &
PID=$!
sleep 2
case $(ls "$MNT") in
(dot*dot_*dotdot*dotdot_) ;;
(_.*_..*dot*dotdot) ;;
(*) fail ls;;
esac
[ "$(cat $MNT/dot)" = "first" ] || fail dot
[ "$(cat $MNT/dotdot)" = "second" ] || fail dotdot
[ "$(cat $MNT/dot_)" = "third" ] || fail dot_
[ "$(cat $MNT/dotdot_)" = "fourth" ] || fail dotdot_
[ "$(cat $MNT/_.)" = "first" ] || fail .
[ "$(cat $MNT/_..)" = "second" ] || fail ..
[ "$(cat $MNT/dot)" = "third" ] || fail dot
[ "$(cat $MNT/dotdot)" = "fourth" ] || fail dotdot
echo primo >"$MNT"/dot
echo secondo >"$MNT"/dotdot
echo terzo >"$MNT"/dot_
echo quarto >"$MNT"/dotdot_
echo primo >"$MNT"/_.
echo secondo >"$MNT"/_..
echo terzo >"$MNT"/dot
echo quarto >"$MNT"/dotdot
umount "$MNT" || fail unmount
sleep 1

3
yaml/spaces.yaml Normal file
View File

@ -0,0 +1,3 @@
field one: 1
field two: 2