mirror of
https://github.com/mgree/ffs.git
synced 2024-10-05 15:18:20 +03:00
Better, configurable name munging (#41)
A new flag `--munge [rename|filter]` controls how a name munging will work. Name munging only applies to fields named '.', '..', containing a NUL byte, or containing a forward slash '/'. The `rename` option (the default) will change '.' to '_.' and '..' to '_..'; each NUL byte turns into '_NUL_' and each slash turns into '_SLASH_'. This greatly simplified policy means that spaces and other special characters should work just fine in field names now.
This commit is contained in:
parent
4d6c094504
commit
85b1ac6da9
@ -104,9 +104,27 @@ installed on your system to use *ffs*.
|
||||
|
||||
: Sets the output file for saving changes (defaults to stdout)
|
||||
|
||||
--munge *MUNGE*
|
||||
|
||||
: Set the name munging policy; applies to '.', '..', and files with
|
||||
NUL and '/' in them [default: rename] [possible values: filter,
|
||||
rename]
|
||||
|
||||
- Under *--munge rename* (the default), fields named '.' and '..'
|
||||
will be renamed to '\_.' and '\_..', respectively. Every NUL
|
||||
byte will be replaced with the text '\_NUL\_' and every forward
|
||||
slash will be replaced with the text '\_SLASH\_'. Unless you
|
||||
manually change the name of these renamed files, they will be
|
||||
saved back with their original names, i.e., '\_..' will turn back
|
||||
into a field called '..', and 'and\_SLASH\_or' will be turned back
|
||||
into 'and/or'. New files created with such names will not be
|
||||
converted back.
|
||||
- Under *--munge filter*, fields named '.', '..', or with NUL or
|
||||
'/' in them will simply be dropped (with a warning).
|
||||
|
||||
--new *NEW*
|
||||
|
||||
: Mounts an empty filesystem, inferring a mountpoint and output format. Running --new *FILE*.*EXT* is morally equivalent to running:
|
||||
: Mounts an empty filesystem, inferring a mountpoint and output format. Running *--new* *FILE*.*EXT* is morally equivalent to running:
|
||||
```
|
||||
echo '{}' | ffs --source json -o *FILE*.*EXT* --target *EXT* -m *FILE*
|
||||
```
|
||||
@ -190,7 +208,7 @@ named
|
||||
: Mapped to a **directory**. Named directories (also known as maps,
|
||||
objects, hashes, or dictionaries) will use field names as the
|
||||
file/directory names for their contents. Some renaming may occur if
|
||||
fields have special characters in them.
|
||||
fields have special characters in them; see *--munge* above.
|
||||
|
||||
null
|
||||
|
||||
@ -245,7 +263,7 @@ umount commits
|
||||
# changes are written back to commits.json (-i is in-place mode)
|
||||
```
|
||||
|
||||
If you want to create a new file wholesale, the --new flag is helpful.
|
||||
If you want to create a new file wholesale, the *--new* flag is helpful.
|
||||
|
||||
```shell
|
||||
ffs --new file.json
|
||||
|
32
man/ffs.1
32
man/ffs.1
@ -96,10 +96,32 @@ specified when running on stdin
|
||||
-o, --output \f[I]OUTPUT\f[R]
|
||||
Sets the output file for saving changes (defaults to stdout)
|
||||
.TP
|
||||
--munge \f[I]MUNGE\f[R]
|
||||
Set the name munging policy; applies to \[aq].\[aq], \[aq]..\[aq], and
|
||||
files with NUL and \[aq]/\[aq] in them [default: rename] [possible
|
||||
values: filter, rename]
|
||||
.RS
|
||||
.IP \[bu] 2
|
||||
Under \f[I]--munge rename\f[R] (the default), fields named \[aq].\[aq]
|
||||
and \[aq]..\[aq] will be renamed to \[aq]_.\[aq] and \[aq]_..\[aq],
|
||||
respectively.
|
||||
Every NUL byte will be replaced with the text \[aq]_NUL_\[aq] and every
|
||||
forward slash will be replaced with the text \[aq]_SLASH_\[aq].
|
||||
Unless you manually change the name of these renamed files, they will be
|
||||
saved back with their original names, i.e., \[aq]_..\[aq] will turn back
|
||||
into a field called \[aq]..\[aq], and \[aq]and_SLASH_or\[aq] will be
|
||||
turned back into \[aq]and/or\[aq].
|
||||
New files created with such names will not be converted back.
|
||||
.IP \[bu] 2
|
||||
Under \f[I]--munge filter\f[R], fields named \[aq].\[aq], \[aq]..\[aq],
|
||||
or with NUL or \[aq]/\[aq] in them will simply be dropped (with a
|
||||
warning).
|
||||
.RE
|
||||
.TP
|
||||
--new \f[I]NEW\f[R]
|
||||
Mounts an empty filesystem, inferring a mountpoint and output format.
|
||||
Running --new \f[I]FILE\f[R].\f[I]EXT\f[R] is morally equivalent to
|
||||
running:
|
||||
Running \f[I]--new\f[R] \f[I]FILE\f[R].\f[I]EXT\f[R] is morally
|
||||
equivalent to running:
|
||||
.RS
|
||||
.IP
|
||||
.nf
|
||||
@ -180,7 +202,8 @@ named
|
||||
Mapped to a \f[B]directory\f[R].
|
||||
Named directories (also known as maps, objects, hashes, or dictionaries)
|
||||
will use field names as the file/directory names for their contents.
|
||||
Some renaming may occur if fields have special characters in them.
|
||||
Some renaming may occur if fields have special characters in them; see
|
||||
\f[I]--munge\f[R] above.
|
||||
.TP
|
||||
null
|
||||
Mapped to a \f[B]file\f[R].
|
||||
@ -241,7 +264,8 @@ umount commits
|
||||
\f[R]
|
||||
.fi
|
||||
.PP
|
||||
If you want to create a new file wholesale, the --new flag is helpful.
|
||||
If you want to create a new file wholesale, the \f[I]--new\f[R] flag is
|
||||
helpful.
|
||||
.IP
|
||||
.nf
|
||||
\f[C]
|
||||
|
@ -3,7 +3,11 @@
|
||||
if ! which ffs >/dev/null 2>&1
|
||||
then
|
||||
DEBUG="$(pwd)/target/debug"
|
||||
[ -x "$DEBUG/ffs" ] || { echo Couldn\'t find ffs on "$PATH" or in "$DEBUG". ; exit 1 ; }
|
||||
[ -x "$DEBUG/ffs" ] || {
|
||||
echo Couldn\'t find ffs on "$PATH" or in "$DEBUG". >&2
|
||||
echo Are you in the root directory of the repo? >&2
|
||||
exit 1
|
||||
}
|
||||
PATH="$DEBUG:$PATH"
|
||||
fi
|
||||
|
||||
|
13
src/cli.rs
13
src/cli.rs
@ -1,10 +1,11 @@
|
||||
use clap::{App, Arg};
|
||||
|
||||
/// The possible formats.
|
||||
///
|
||||
/// These are defined here so that completion-generation in `build.rs` doesn't need to depend on anything but this file.
|
||||
pub const POSSIBLE_FORMATS: &[&str] = &["json", "toml", "yaml"];
|
||||
|
||||
/// The possible name munging policies.
|
||||
pub const MUNGE_POLICIES: &[&str] = &["filter", "rename"];
|
||||
|
||||
pub fn app() -> App<'static, 'static> {
|
||||
App::new("ffs")
|
||||
.version(env!("CARGO_PKG_VERSION"))
|
||||
@ -73,6 +74,14 @@ pub fn app() -> App<'static, 'static> {
|
||||
.help("Include ._* extended attribute/resource fork files on macOS")
|
||||
.long("keep-macos-xattr")
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("MUNGE")
|
||||
.help("Set the name munging policy; applies to '.', '..', and files with NUL and '/' in them")
|
||||
.long("munge")
|
||||
.takes_value(true)
|
||||
.default_value("rename")
|
||||
.possible_values(MUNGE_POLICIES)
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("UNPADDED")
|
||||
.help("Don't pad the numeric names of list elements with zeroes; will not sort properly")
|
||||
|
@ -1,10 +1,12 @@
|
||||
use fuser::FileType;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
|
||||
use tracing::{debug, error, warn};
|
||||
use tracing_subscriber::prelude::*;
|
||||
use tracing_subscriber::{filter::EnvFilter, fmt};
|
||||
|
||||
use fuser::FileType;
|
||||
|
||||
use super::format;
|
||||
use super::format::Format;
|
||||
|
||||
@ -13,7 +15,7 @@ use super::cli;
|
||||
/// Configuration information
|
||||
///
|
||||
/// See `cli.rs` for information on the actual command-line options; see
|
||||
/// `main.rs` for how those connect to this structure.
|
||||
/// `Config::from_args` for how those connect to this structure.
|
||||
///
|
||||
/// NB I know this arrangement sucks, but `clap`'s automatic stuff isn't
|
||||
/// adequate to express what I want here. Command-line interfaces are hard. 😢
|
||||
@ -31,6 +33,7 @@ pub struct Config {
|
||||
pub try_decode_base64: bool,
|
||||
pub allow_xattr: bool,
|
||||
pub keep_macos_xattr_file: bool,
|
||||
pub munge: Munge,
|
||||
pub read_only: bool,
|
||||
pub input: Input,
|
||||
pub output: Output,
|
||||
@ -63,13 +66,43 @@ pub enum Output {
|
||||
File(PathBuf),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Munge {
|
||||
Rename,
|
||||
Filter,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Munge {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
|
||||
match self {
|
||||
Munge::Rename => write!(f, "rename"),
|
||||
Munge::Filter => write!(f, "filter"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Munge {
|
||||
type Err = ();
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, ()> {
|
||||
let s = s.trim().to_lowercase();
|
||||
|
||||
if s == "rename" {
|
||||
Ok(Munge::Rename)
|
||||
} else if s == "filter" {
|
||||
Ok(Munge::Filter)
|
||||
} else {
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Parses arguments from `std::env::Args`, via `cli::app().get_matches()`
|
||||
pub fn from_args() -> Self {
|
||||
let args = cli::app().get_matches();
|
||||
|
||||
let mut config = Config::default();
|
||||
|
||||
// generate completions?
|
||||
//
|
||||
// TODO 2021-07-06 good candidate for a subcommand
|
||||
@ -112,6 +145,18 @@ impl Config {
|
||||
config.keep_macos_xattr_file = args.is_present("KEEPMACOSDOT");
|
||||
config.pretty = args.is_present("PRETTY");
|
||||
|
||||
// munging policy
|
||||
config.munge = match args.value_of("MUNGE") {
|
||||
None => Munge::Filter,
|
||||
Some(s) => match str::parse(s) {
|
||||
Ok(munge) => munge,
|
||||
Err(_) => {
|
||||
warn!("Invalid `--munge` mode '{}', using 'rename'.", s);
|
||||
Munge::Filter
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
// perms
|
||||
config.filemode = match u16::from_str_radix(args.value_of("FILEMODE").unwrap(), 8) {
|
||||
Ok(filemode) => filemode,
|
||||
@ -466,21 +511,18 @@ impl Config {
|
||||
config
|
||||
}
|
||||
|
||||
pub fn valid_name(&self, s: &str) -> bool {
|
||||
s != "." && s != ".." && !s.contains('\0') && !s.contains('/')
|
||||
}
|
||||
|
||||
pub fn normalize_name(&self, s: String) -> String {
|
||||
// inspired by https://en.wikipedia.org/wiki/Filename
|
||||
s.replace(".", "dot")
|
||||
.replace("/", "slash")
|
||||
.replace("\\", "backslash")
|
||||
.replace("?", "question")
|
||||
.replace("*", "star")
|
||||
.replace(":", "colon")
|
||||
.replace("\"", "dquote")
|
||||
.replace("<", "lt")
|
||||
.replace(">", "gt")
|
||||
.replace(",", "comma")
|
||||
.replace(";", "semi")
|
||||
.replace("=", "equal")
|
||||
.replace(" ", "space")
|
||||
if s == "." {
|
||||
"_.".into()
|
||||
} else if s == ".." {
|
||||
"_..".into()
|
||||
} else {
|
||||
s.replace("\0", "_NUL_").replace("/", "_SLASH_")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
@ -528,6 +570,7 @@ impl Default for Config {
|
||||
try_decode_base64: false,
|
||||
allow_xattr: true,
|
||||
keep_macos_xattr_file: false,
|
||||
munge: Munge::Rename,
|
||||
read_only: false,
|
||||
input: Input::Stdin,
|
||||
output: Output::Stdout,
|
||||
|
@ -6,7 +6,7 @@ use tracing::{debug, error, info, instrument, warn};
|
||||
|
||||
use fuser::FileType;
|
||||
|
||||
use super::config::{Config, Input, Output};
|
||||
use super::config::{Config, Input, Munge, Output};
|
||||
use super::fs::{DirEntry, DirType, Entry, Inode, FS};
|
||||
|
||||
use ::toml as serde_toml;
|
||||
@ -314,6 +314,7 @@ where
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
let mut filtered = 0;
|
||||
let mut next_id = fuser::FUSE_ROOT_ID;
|
||||
// parent inum, inum, value
|
||||
let mut worklist: Vec<(u64, u64, V)> = vec![(next_id, next_id, v)];
|
||||
@ -361,11 +362,28 @@ where
|
||||
|
||||
for (field, child) in fvs.into_iter() {
|
||||
let original = field.clone();
|
||||
let mut nfield = config.normalize_name(field);
|
||||
|
||||
while children.contains_key(&nfield) {
|
||||
nfield.push('_');
|
||||
}
|
||||
let nfield = if !config.valid_name(&original) {
|
||||
match config.munge {
|
||||
Munge::Rename => {
|
||||
let mut nfield = config.normalize_name(field);
|
||||
|
||||
// TODO 2021-07-08 could be better to check fvs, but it's a vec now... :/
|
||||
while children.contains_key(&nfield) {
|
||||
nfield.push('_');
|
||||
}
|
||||
|
||||
nfield
|
||||
}
|
||||
Munge::Filter => {
|
||||
warn!("skipping '{}'", field);
|
||||
filtered += child.size();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
field
|
||||
};
|
||||
|
||||
let original_name = if original != nfield {
|
||||
info!(
|
||||
@ -374,6 +392,7 @@ where
|
||||
);
|
||||
Some(original)
|
||||
} else {
|
||||
assert!(config.valid_name(&original));
|
||||
None
|
||||
};
|
||||
|
||||
@ -396,7 +415,8 @@ where
|
||||
|
||||
inodes[inum as usize] = Some(Inode::new(parent, inum, entry, config));
|
||||
}
|
||||
assert_eq!(inodes.len() as u64, next_id);
|
||||
|
||||
assert_eq!((inodes.len() - filtered) as u64, next_id);
|
||||
}
|
||||
|
||||
/// Walks `fs` starting at the inode with number `inum`, producing an
|
||||
|
42
tests/filename_spaces.sh
Executable file
42
tests/filename_spaces.sh
Executable file
@ -0,0 +1,42 @@
|
||||
#!/bin/sh
|
||||
|
||||
fail() {
|
||||
echo FAILED: $1
|
||||
if [ "$MNT" ]
|
||||
then
|
||||
umount "$MNT"
|
||||
rmdir "$MNT"
|
||||
rm "$OUT" "$EXP"
|
||||
fi
|
||||
exit 1
|
||||
}
|
||||
|
||||
MNT=$(mktemp -d)
|
||||
OUT=$(mktemp)
|
||||
EXP=$(mktemp)
|
||||
|
||||
printf -- "---\nfield one: 1\nfield two: 2\nfield three: 3" >"$EXP"
|
||||
|
||||
ffs -m "$MNT" --target yaml -o "$OUT" --munge filter ../yaml/spaces.yaml &
|
||||
PID=$!
|
||||
sleep 2
|
||||
case $(ls "$MNT") in
|
||||
(field\ one*field\ two) ;;
|
||||
(*) fail ls;;
|
||||
esac
|
||||
[ "$(cat $MNT/field\ one)" -eq 1 ] || fail one
|
||||
[ "$(cat $MNT/field\ two)" -eq 2 ] || fail two
|
||||
echo 3 >"$MNT"/field\ three
|
||||
|
||||
umount "$MNT" || fail unmount
|
||||
sleep 1
|
||||
kill -0 $PID >/dev/null 2>&1 && fail process
|
||||
|
||||
grep "field three: 3" $OUT >/dev/null 2>&1 || fail three
|
||||
|
||||
sort $OUT >$OUT.yaml
|
||||
sort $EXP >$EXP.yaml
|
||||
diff $OUT.yaml $EXP.yaml || fail diff
|
||||
|
||||
rmdir "$MNT" || fail mount
|
||||
rm "$OUT" "$EXP"
|
30
tests/munge_filter.sh
Executable file
30
tests/munge_filter.sh
Executable file
@ -0,0 +1,30 @@
|
||||
#!/bin/sh
|
||||
|
||||
fail() {
|
||||
echo FAILED: $1
|
||||
if [ "$MNT" ]
|
||||
then
|
||||
cd
|
||||
umount "$MNT"
|
||||
rmdir "$MNT"
|
||||
fi
|
||||
exit 1
|
||||
}
|
||||
|
||||
MNT=$(mktemp -d)
|
||||
|
||||
ffs -m "$MNT" --munge filter ../json/obj_rename.json &
|
||||
PID=$!
|
||||
sleep 2
|
||||
case $(ls "$MNT") in
|
||||
(dot*dotdot) ;;
|
||||
(*) fail ls;;
|
||||
esac
|
||||
[ "$(cat $MNT/dot)" = "third" ] || fail dot
|
||||
[ "$(cat $MNT/dotdot)" = "fourth" ] || fail dotdot
|
||||
umount "$MNT" || fail unmount
|
||||
sleep 1
|
||||
|
||||
kill -0 $PID >/dev/null 2>&1 && fail process
|
||||
|
||||
rmdir "$MNT" || fail mount
|
@ -22,28 +22,28 @@ ffs -m "$MNT" -o "$OUT" --target json ../json/obj_rename.json &
|
||||
PID=$!
|
||||
sleep 2
|
||||
case $(ls "$MNT") in
|
||||
(dot*dot_*dotdot*dotdot_) ;;
|
||||
(_.*_..*dot*dotdot) ;;
|
||||
(*) fail ls;;
|
||||
esac
|
||||
[ "$(cat $MNT/dot)" = "first" ] || fail dot
|
||||
[ "$(cat $MNT/dotdot)" = "second" ] || fail dotdot
|
||||
[ "$(cat $MNT/dot_)" = "third" ] || fail dot_
|
||||
[ "$(cat $MNT/dotdot_)" = "fourth" ] || fail dotdot_
|
||||
[ "$(cat $MNT/_.)" = "first" ] || fail .
|
||||
[ "$(cat $MNT/_..)" = "second" ] || fail ..
|
||||
[ "$(cat $MNT/dot)" = "third" ] || fail dot
|
||||
[ "$(cat $MNT/dotdot)" = "fourth" ] || fail dotdot
|
||||
|
||||
echo primo >"$MNT"/dot
|
||||
echo secondo >"$MNT"/dotdot
|
||||
echo shlishi >"$MNT"/dot_
|
||||
echo derp >"$MNT"/dotdot_
|
||||
echo primo >"$MNT"/_.
|
||||
echo secondo >"$MNT"/_..
|
||||
echo shlishi >"$MNT"/dot
|
||||
echo derp >"$MNT"/dotdot
|
||||
|
||||
mkdir "$MNT"/it
|
||||
mkdir "$MNT"/he
|
||||
|
||||
mv "$MNT"/dot "$MNT"/it
|
||||
mv "$MNT"/dotdot "$MNT"/it
|
||||
mv "$MNT"/_. "$MNT"/it
|
||||
mv "$MNT"/_.. "$MNT"/it
|
||||
|
||||
mv "$MNT"/dot_ "$MNT"/he
|
||||
mv "$MNT"/dot "$MNT"/he
|
||||
|
||||
mv "$MNT"/dotdot_ "$MNT"/imnewhere
|
||||
mv "$MNT"/dotdot "$MNT"/imnewhere
|
||||
|
||||
umount "$MNT" || fail unmount
|
||||
sleep 1
|
||||
|
@ -16,16 +16,14 @@ MNT=$(mktemp -d)
|
||||
ffs -m "$MNT" ../json/obj_rename.json &
|
||||
PID=$!
|
||||
sleep 2
|
||||
cd "$MNT"
|
||||
case $(ls) in
|
||||
(dot*dot_*dotdot*dotdot_) ;;
|
||||
case $(ls "$MNT") in
|
||||
(_.*_..*dot*dotdot) ;;
|
||||
(*) fail ls;;
|
||||
esac
|
||||
[ "$(cat dot)" = "first" ] || fail dot
|
||||
[ "$(cat dotdot)" = "second" ] || fail dotdot
|
||||
[ "$(cat dot_)" = "third" ] || fail dot_
|
||||
[ "$(cat dotdot_)" = "fourth" ] || fail dotdot_
|
||||
cd - >/dev/null 2>&1
|
||||
[ "$(cat $MNT/_.)" = "first" ] || fail .
|
||||
[ "$(cat $MNT/_..)" = "second" ] || fail ..
|
||||
[ "$(cat $MNT/dot)" = "third" ] || fail dot
|
||||
[ "$(cat $MNT/dotdot)" = "fourth" ] || fail dotdot
|
||||
umount "$MNT" || fail unmount
|
||||
sleep 1
|
||||
|
||||
|
@ -22,18 +22,18 @@ ffs -m "$MNT" -o "$OUT" --target json ../json/obj_rename.json &
|
||||
PID=$!
|
||||
sleep 2
|
||||
case $(ls "$MNT") in
|
||||
(dot*dot_*dotdot*dotdot_) ;;
|
||||
(_.*_..*dot*dotdot) ;;
|
||||
(*) fail ls;;
|
||||
esac
|
||||
[ "$(cat $MNT/dot)" = "first" ] || fail dot
|
||||
[ "$(cat $MNT/dotdot)" = "second" ] || fail dotdot
|
||||
[ "$(cat $MNT/dot_)" = "third" ] || fail dot_
|
||||
[ "$(cat $MNT/dotdot_)" = "fourth" ] || fail dotdot_
|
||||
[ "$(cat $MNT/_.)" = "first" ] || fail .
|
||||
[ "$(cat $MNT/_..)" = "second" ] || fail ..
|
||||
[ "$(cat $MNT/dot)" = "third" ] || fail dot
|
||||
[ "$(cat $MNT/dotdot)" = "fourth" ] || fail dotdot
|
||||
|
||||
echo primo >"$MNT"/dot
|
||||
echo secondo >"$MNT"/dotdot
|
||||
echo terzo >"$MNT"/dot_
|
||||
echo quarto >"$MNT"/dotdot_
|
||||
echo primo >"$MNT"/_.
|
||||
echo secondo >"$MNT"/_..
|
||||
echo terzo >"$MNT"/dot
|
||||
echo quarto >"$MNT"/dotdot
|
||||
|
||||
umount "$MNT" || fail unmount
|
||||
sleep 1
|
||||
|
3
yaml/spaces.yaml
Normal file
3
yaml/spaces.yaml
Normal file
@ -0,0 +1,3 @@
|
||||
field one: 1
|
||||
field two: 2
|
||||
|
Loading…
Reference in New Issue
Block a user