binlore: migrate override lore to package passthru

Lore overrides have been included with binlore's source up to now, but
this hasn't worked very well. (It isn't as easy to self-service for
people working in nixpkgs, and its use of partial pnames for matching
breaks down around some edge cases like version numbers appearing
early in perl pnames, or multiple packages having identical pnames.)
This commit is contained in:
Travis A. Everett 2024-04-16 11:58:38 -05:00
parent 87327df106
commit 8f413d8a44
11 changed files with 259 additions and 32 deletions

View File

@ -49,6 +49,10 @@ resholve.mkDerivation rec {
imagemagick
zip
];
execer = [
# zip can exec; confirmed 2 invocations in pdf2odt don't
"cannot:${zip}/bin/zip"
];
};
meta = with lib; {

View File

@ -23,6 +23,9 @@
, cmake
, nix
, samba
# for passthru.lore
, binlore
}:
assert xarSupport -> libxml2 != null;
@ -125,4 +128,11 @@ stdenv.mkDerivation (finalAttrs: {
passthru.tests = {
inherit cmake nix samba;
};
# bsdtar is detected as "cannot" because its exec is internal to
# calls it makes into libarchive itself. If binlore gains support
# for detecting another layer down into libraries, this can be cut.
passthru.binlore.out = binlore.synthesize finalAttrs.finalPackage ''
execer can bin/bsdtar
'';
})

View File

@ -11,6 +11,7 @@
, mouseSupport ? false, gpm
, unicodeSupport ? true
, testers
, binlore
}:
stdenv.mkDerivation (finalAttrs: {
@ -180,6 +181,17 @@ stdenv.mkDerivation (finalAttrs: {
rm "$out"/lib/*.a
'';
# I'm not very familiar with ncurses, but it looks like most of the
# exec here will run hard-coded executables. There's one that is
# dynamic, but it looks like it only comes from executing a terminfo
# file, so I think it isn't going to be under user control via CLI?
# Happy to have someone help nail this down in either direction!
# The "capability" is 'iprog', and I could only find 1 real example:
# https://invisible-island.net/ncurses/terminfo.ti.html#tic-linux-s
passthru.binlore.out = binlore.synthesize ncurses ''
execer cannot bin/{reset,tput,tset}
'';
meta = with lib; {
homepage = "https://www.gnu.org/software/ncurses/";
description = "Free software emulation of curses in SVR4 and more";

View File

@ -56,58 +56,169 @@ let
# in here, but I'm erring on the side of flexibility
# since this form will make it easier to pilot other
# uses of binlore.
callback = lore: drv: overrides: ''
callback = lore: drv: ''
if [[ -d "${drv}/bin" ]] || [[ -d "${drv}/lib" ]] || [[ -d "${drv}/libexec" ]]; then
echo generating binlore for $drv by running:
echo "${yara}/bin/yara --scan-list --recursive ${lore.rules} <(printf '%s\n' ${drv}/{bin,lib,libexec}) | ${yallback}/bin/yallback ${lore.yallback}"
else
echo "failed to generate binlore for $drv (none of ${drv}/{bin,lib,libexec} exist)"
fi
'' +
/*
Override lore for some packages. Unsure, but for now:
1. start with the ~name (pname-version)
2. remove characters from the end until we find a match
in overrides/
3. execute the override script with the list of expected
lore types
*/
''
i=''${#identifier}
filter=
while [[ $i > 0 ]] && [[ -z "$filter" ]]; do
if [[ -f "${overrides}/''${identifier:0:$i}" ]]; then
filter="${overrides}/''${identifier:0:$i}"
echo using "${overrides}/''${identifier:0:$i}" to generate overriden binlore for $drv
break
fi
((i--)) || true # don't break build
done # || true # don't break build
if [[ -d "${drv}/bin" ]] || [[ -d "${drv}/lib" ]] || [[ -d "${drv}/libexec" ]]; then
${yara}/bin/yara --scan-list --recursive ${lore.rules} <(printf '%s\n' ${drv}/{bin,lib,libexec}) | ${yallback}/bin/yallback ${lore.yallback} "$filter"
${yara}/bin/yara --scan-list --recursive ${lore.rules} <(printf '%s\n' ${drv}/{bin,lib,libexec}) | ${yallback}/bin/yallback ${lore.yallback}
fi
'';
};
overrides = (src + "/overrides");
in rec {
/*
Output a directory containing lore for multiple drvs.
This will `make` lore for drv in drvs and then combine lore
of the same type across all packages into a single file.
When drvs are also specified in the strip argument, corresponding
lore is made relative by stripping the path of each drv from
matching entries. (This is mainly useful in a build process that
uses a chain of two or more derivations where the output of one
is the source for the next. See resholve for an example.)
*/
collect = { lore ? loreDef, drvs, strip ? [ ] }: (runCommand "more-binlore" { } ''
mkdir $out
for lorefile in ${toString lore.types}; do
cat ${lib.concatMapStrings (x: x + "/$lorefile ") (map (make lore) (map lib.getBin (builtins.filter lib.isDerivation drvs)))} > $out/$lorefile
substituteInPlace $out/$lorefile ${lib.concatMapStrings (x: "--replace '${x}/' '' ") strip}
substituteInPlace $out/$lorefile ${lib.concatMapStrings (x: "--replace-quiet '${x}/' '' ") strip}
done
'');
# TODO: echo for debug, can be removed at some point
/*
Output a directory containing lore for a single drv.
This produces lore for the derivation (via lore.callback) and
appends any lore that the derivation itself wrote to nix-support
or which was overridden in drv.binlore.<outputName> (passthru).
> *Note*: Since the passthru is attached to all outputs, binlore
> is an attrset namespaced by outputName to support packages with
> executables in more than one output.
Since the last entry wins, the effective priority is:
drv.binlore.<outputName> > $drv/nix-support > lore generated here by callback
*/
make = lore: drv: runCommand "${drv.name}-binlore" {
identifier = drv.name;
drv = drv;
} (''
mkdir $out
touch $out/{${builtins.concatStringsSep "," lore.types}}
${lore.callback lore drv overrides}
${lore.callback lore drv}
'' +
# append lore from package's $out and drv.binlore.${drv.outputName} (last entry wins)
''
for lore_type in ${builtins.toString lore.types}; do
if [[ -f "${drv}/nix-support/$lore_type" ]]; then
cat "${drv}/nix-support/$lore_type" >> "$out/$lore_type"
fi
'' + lib.optionalString (builtins.hasAttr "binlore" drv && builtins.hasAttr drv.outputName drv.binlore) ''
if [[ -f "${drv.binlore."${drv.outputName}"}/$lore_type" ]]; then
cat "${drv.binlore."${drv.outputName}"}/$lore_type" >> "$out/$lore_type"
fi
'' + ''
done
echo binlore for $drv written to $out
'');
/*
Utility function for creating override lore for drv.
We normally attach this lore to `drv.passthru.binlore.<outputName>`.
> *Notes*:
> - Since the passthru is attached to all outputs, binlore is an
> attrset namespaced by outputName to support packages with
> executables in more than one output. You'll generally just use
> `out` or `bin`.
> - We can reconsider the passthru attr name if someone adds
> a new lore provider. We settled on `.binlore` for now to make it
> easier for people to figure out what this is for.
The lore argument should be a Shell script (string) that generates
the necessary lore. You can use arbitrary Shell, but this function
includes a shell DSL you can use to declare/generate lore in most
cases. It has the following functions:
- `execer <verdict> [<path>...]`
- `wrapper <wrapper_path> <original_path>`
Writing every override explicitly in a Nix list would be tedious
for large packages, but this small shell DSL enables us to express
many overrides efficiently via pathname expansion/globbing.
Here's a very general example of both functions:
passthru.binlore.out = binlore.synthesize finalAttrs.finalPackage ''
execer can bin/hello bin/{a,b,c}
wrapper bin/hello bin/.hello-wrapped
'';
And here's a specific example of how pathname expansion enables us
to express lore for the single-binary variant of coreutils while
being both explicit and (somewhat) efficient:
passthru = {} // optionalAttrs (singleBinary != false) {
binlore.out = binlore.synthesize coreutils ''
execer can bin/{chroot,env,install,nice,nohup,runcon,sort,split,stdbuf,timeout}
execer cannot bin/{[,b2sum,base32,base64,basename,basenc,cat,chcon,chgrp,chmod,chown,cksum,comm,cp,csplit,cut,date,dd,df,dir,dircolors,dirname,du,echo,expand,expr,factor,false,fmt,fold,groups,head,hostid,id,join,kill,link,ln,logname,ls,md5sum,mkdir,mkfifo,mknod,mktemp,mv,nl,nproc,numfmt,od,paste,pathchk,pinky,pr,printenv,printf,ptx,pwd,readlink,realpath,rm,rmdir,seq,sha1sum,sha224sum,sha256sum,sha384sum,sha512sum,shred,shuf,sleep,stat,stty,sum,sync,tac,tail,tee,test,touch,tr,true,truncate,tsort,tty,uname,unexpand,uniq,unlink,uptime,users,vdir,wc,who,whoami,yes}
'';
};
Caution: Be thoughtful about using a bare wildcard (*) glob here.
We should generally override lore only when a human understands if
the executable will exec arbitrary user-passed executables. A bare
glob can match new executables added in future package versions
before anyone can audit them.
*/
synthesize = drv: loreSynthesizingScript: runCommand "${drv.name}-lore-override" {
drv = drv;
} (''
execer(){
local verdict="$1"
shift
for path in "$@"; do
if [[ -f "$PWD/$path" ]]; then
echo "$verdict:$PWD/$path"
else
echo "error: Tried to synthesize execer lore for missing file: $PWD/$path" >&2
exit 2
fi
done
} >> $out/execers
wrapper(){
local wrapper="$1"
local original="$2"
if [[ ! -f "$wrapper" ]]; then
echo "error: Tried to synthesize wrapper lore for missing wrapper: $PWD/$wrapper" >&2
exit 2
fi
if [[ ! -f "$original" ]]; then
echo "error: Tried to synthesize wrapper lore for missing original: $PWD/$original" >&2
exit 2
fi
echo "$PWD/$wrapper:$PWD/$original"
} >> $out/wrappers
mkdir $out
# lore override commands are relative to the drv root
cd $drv
'' + loreSynthesizingScript);
}

View File

@ -10,6 +10,8 @@
, lib
, nixosTests
, installShellFiles
, binlore
, nixos-rebuild
}:
let
fallback = import ./../../../../nixos/modules/installer/tools/nix-fallback-paths.nix;
@ -49,6 +51,13 @@ substitute {
target-host = nixosTests.nixos-rebuild-target-host;
};
# nixos-rebuild cant execute its arguments
# (but it can run ssh with the with the options stored in $NIX_SSHOPTS,
# and ssh can execute its arguments...)
passthru.binlore.out = binlore.synthesize nixos-rebuild ''
execer cannot bin/nixos-rebuild
'';
meta = {
description = "Rebuild your NixOS configuration and switch to it, on local hosts and remote";
homepage = "https://github.com/NixOS/nixpkgs/tree/master/pkgs/os-specific/linux/nixos-rebuild";

View File

@ -15,6 +15,9 @@
# exception is watch which is portable enough to run on pretty much
# any UNIX-compatible system.
, watchOnly ? !(stdenv.isLinux || stdenv.isCygwin)
, binlore
, procps
}:
stdenv.mkDerivation rec {
@ -61,6 +64,12 @@ stdenv.mkDerivation rec {
install -m 0644 -D watch.1 $out/share/man/man1/watch.1
'';
# no obvious exec in documented arguments; haven't trawled source
# to figure out what exec binlore hits on
passthru.binlore.out = binlore.synthesize procps ''
execer cannot bin/{ps,top,free}
'';
meta = with lib; {
homepage = "https://gitlab.com/procps-ng/procps";
description = "Utilities that give information about processes using the /proc filesystem";

View File

@ -7,6 +7,8 @@
, perl
, texinfo
, xz
, binlore
, coreutils
, gmpSupport ? true, gmp
, aclSupport ? stdenv.isLinux, acl
, attrSupport ? stdenv.isLinux, attr
@ -27,7 +29,7 @@ assert aclSupport -> acl != null;
assert selinuxSupport -> libselinux != null && libsepol != null;
let
inherit (lib) concatStringsSep isString optional optionals optionalString;
inherit (lib) concatStringsSep isString optional optionalAttrs optionals optionalString;
isCross = (stdenv.hostPlatform != stdenv.buildPlatform);
in
stdenv.mkDerivation rec {
@ -181,6 +183,26 @@ stdenv.mkDerivation rec {
rm -r "$out/share"
'';
passthru = {} // optionalAttrs (singleBinary != false) {
# everything in the single binary gets the same verdict, so we
# override _that case_ with verdicts from separate binaries.
#
# binlore only spots exec in runcon on some platforms (i.e., not
# darwin; see comment on inverse case below)
binlore.out = binlore.synthesize coreutils ''
execer can bin/{chroot,env,install,nice,nohup,runcon,sort,split,stdbuf,timeout}
execer cannot bin/{[,b2sum,base32,base64,basename,basenc,cat,chcon,chgrp,chmod,chown,cksum,comm,cp,csplit,cut,date,dd,df,dir,dircolors,dirname,du,echo,expand,expr,factor,false,fmt,fold,groups,head,hostid,id,join,kill,link,ln,logname,ls,md5sum,mkdir,mkfifo,mknod,mktemp,mv,nl,nproc,numfmt,od,paste,pathchk,pinky,pr,printenv,printf,ptx,pwd,readlink,realpath,rm,rmdir,seq,sha1sum,sha224sum,sha256sum,sha384sum,sha512sum,shred,shuf,sleep,stat,stty,sum,sync,tac,tail,tee,test,touch,tr,true,truncate,tsort,tty,uname,unexpand,uniq,unlink,uptime,users,vdir,wc,who,whoami,yes}
'';
} // optionalAttrs (singleBinary == false) {
# binlore only spots exec in runcon on some platforms (i.e., not
# darwin; I have a note that the behavior may need selinux?).
# hard-set it so people working on macOS don't miss cases of
# runcon until ofBorg fails.
binlore.out = binlore.synthesize coreutils ''
execer can bin/runcon
'';
};
meta = with lib; {
homepage = "https://www.gnu.org/software/coreutils/";
description = "GNU Core Utilities";

View File

@ -1,4 +1,4 @@
{ lib, stdenv, fetchurl, e2fsprogs, openldap, pkg-config }:
{ lib, stdenv, fetchurl, e2fsprogs, openldap, pkg-config, binlore, linuxquota }:
stdenv.mkDerivation rec {
version = "4.09";
@ -14,6 +14,10 @@ stdenv.mkDerivation rec {
nativeBuildInputs = [ pkg-config ];
buildInputs = [ e2fsprogs openldap ];
passthru.binlore.out = binlore.synthesize linuxquota ''
execer cannot bin/quota
'';
meta = with lib; {
description = "Tools to manage kernel-level quotas in Linux";
homepage = "https://sourceforge.net/projects/linuxquota/";

View File

@ -8,6 +8,7 @@
nixos-install-tools,
runCommand,
nixosTests,
binlore,
}:
let
inherit (nixos {}) config;
@ -62,6 +63,12 @@ in
touch $out
'';
};
# no documented flags show signs of exec; skim of source suggests
# it's just --help execing man
passthru.binlore.out = binlore.synthesize nixos-install-tools ''
execer cannot bin/nixos-generate-config
'';
}).overrideAttrs {
inherit version;
pname = "nixos-install-tools";

View File

@ -1,4 +1,4 @@
{ lib, stdenv, fetchFromGitHub, asciidoctor, gawk, gnused, runtimeShell }:
{ lib, stdenv, fetchFromGitHub, asciidoctor, gawk, gnused, runtimeShell, binlore, esh }:
stdenv.mkDerivation rec {
pname = "esh";
@ -30,6 +30,14 @@ stdenv.mkDerivation rec {
doCheck = true;
checkTarget = "test";
# working around a bug in file. Was fixed in
# file 5.41-5.43 but regressed in 5.44+
# see https://bugs.astron.com/view.php?id=276
# "can" verdict because of `-s SHELL` arg
passthru.binlore.out = binlore.synthesize esh ''
execer can bin/esh
'';
meta = with lib; {
description = "Simple templating engine based on shell";
mainProgram = "esh";

View File

@ -1,4 +1,4 @@
{ pkgs, buildEnv, runCommand, lib, stdenv, freebsd }:
{ pkgs, buildEnv, runCommand, lib, stdenv, freebsd, binlore }:
# These are some unix tools that are commonly included in the /usr/bin
# and /usr/sbin directory under more normal distributions. Along with
@ -30,7 +30,9 @@ let
priority = 10;
platforms = platforms.${stdenv.hostPlatform.parsed.kernel.name} or platforms.all;
};
passthru = { inherit provider; };
passthru = { inherit provider; } // lib.optionalAttrs (builtins.hasAttr "binlore" providers) {
binlore.out = (binlore.synthesize (getBin bins.${cmd}) providers.binlore);
};
preferLocalBuild = true;
} ''
if ! [ -x ${bin} ]; then
@ -76,6 +78,10 @@ let
linux = if stdenv.hostPlatform.libc == "glibc" then pkgs.stdenv.cc.libc
else pkgs.netbsd.getconf;
darwin = pkgs.darwin.system_cmds;
# I don't see any obvious arg exec in the doc/manpage
binlore = ''
execer cannot bin/getconf
'';
};
getent = {
linux = if stdenv.hostPlatform.libc == "glibc" then pkgs.stdenv.cc.libc.getent
@ -118,6 +124,11 @@ let
linux = pkgs.glibc;
darwin = pkgs.darwin.adv_cmds;
freebsd = pkgs.freebsd.locale;
# technically just targeting glibc version
# no obvious exec in manpage
binlore = ''
execer cannot bin/locale
'';
};
logger = {
linux = pkgs.util-linux;
@ -130,6 +141,13 @@ let
linux = pkgs.util-linux;
darwin = pkgs.darwin.diskdev_cmds;
freebsd = freebsd.mount;
# technically just targeting the darwin version; binlore already
# ids the util-linux copy as 'cannot'
# no obvious exec in manpage args; I think binlore flags 'can'
# on the code to run `mount_<filesystem>` variants
binlore = ''
execer cannot bin/mount
'';
};
netstat = {
linux = pkgs.nettools;
@ -145,6 +163,12 @@ let
linux = pkgs.procps;
darwin = pkgs.darwin.ps;
freebsd = pkgs.freebsd.bin;
# technically just targeting procps ps (which ids as can)
# but I don't see obvious exec in args; have yet to look
# for underlying cause in source
binlore = ''
execer cannot bin/ps
'';
};
quota = {
linux = pkgs.linuxquota;
@ -168,6 +192,13 @@ let
linux = pkgs.procps;
darwin = pkgs.darwin.top;
freebsd = pkgs.freebsd.top;
# technically just targeting procps top; haven't needed this in
# any scripts so far, but overriding it for consistency with ps
# override above and in procps. (procps also overrides 'free',
# but it isn't included here.)
binlore = ''
execer cannot bin/top
'';
};
umount = {
linux = pkgs.util-linux;