add support for opt-in FOD fetching

- will ommit storing individual hashes, reducing lock file size
This commit is contained in:
DavHau 2021-09-11 00:06:48 +02:00
parent 72bbad7d9d
commit 4a00410a2b
11 changed files with 260 additions and 42 deletions

View File

@ -6,6 +6,8 @@ It focuses on the following aspects:
- Maintainability
- Nixpkgs Compatibility (not enforcing IFD)
- Code de-duplication across 2nix tools
- Code de-duplication in nixpkgs
- Risk free opt-in FOD fetching
- Common UI across 2nix tools
- Reduce effort to develop new 2nix solutions
@ -26,11 +28,13 @@ The following phases which are generic to basically all existing 2nix solutions:
- building/installing packages
... should be separated from each other with well defined interfaces.
This will allow for free compsition of different approaches for these phases.
Examples:
- Often more than one requirements / lock-file format exists within an ecosystem. Adding support for a new format should be easy and won't require re-inventing the whole procedure.
- Different resolving/fetching strategies: Some users might prefer a more automated approach via IFD, while others are focusing on upstreaming to nixpkgs, where generating intermediary code or lock-files might be the only option.
- Fetching a list of sources in theory should be a standard process. Yet, in practice, many 2nix tools struggle fetching sources from git or including local source trees. A generic fetching layer can reduce effort for maintainers.
The user should be able to freely choose between:
- input metadata formats (eg. lock file formats)
- metadata fetching/translation strategies: IFD vs. in-tree
- source fetching strategies: granular fetching vs fetching via single large FOD to minimize expression file size
- installation strategies: build dependencies individually vs inside a single derivation.
### Customizability
Every Phase mentioned in the previous section should be customizable at a high degree via override functions. Practical examples:
@ -39,7 +43,6 @@ Every Phase mentioned in the previous section should be customizable at a high d
- replace or modify sources
- customize the build/installation procedure
### Maintainability
Due to the modular architecture with strict interfaces, contributers can add support for new lock-file formats or new strategies for fetching, building, installing more easily.
@ -48,12 +51,19 @@ Depending on where the nix code is used, different approaches are desired or dis
All solutions which follow the dream2nix specification will be compatible with both approaches without having to re-invent the tool.
### Code de-duplication
Common problems that apply to many 2nix solutions can be solved once. Examples:
Common problems that apply to many 2nix solutions can be solved once by the framework. Examples:
- handling cyclic dependencies
- handling sources from various origins (http, git, local, ...)
- generate nixpkgs/hydra friendly output (no IFD)
- good user interface
### Code de-duplication in nixpkgs
Essential components like package update scripts or fetching and override logic are provided by the dream2nix framework and are stored only once in the source tree instead of several times.
### Risk free opt-in FOD fetching
Optionally, to save more storag space, individual hashes for source can be ommited and a single large FOD used instead.
Due to a unified minimalistic fetching layer the risk of FOD hash breakages should be very low.
### Common UI across many 2nix tools
2nix solutions which follow the dream2nix framework will have a unified UI for workflows like project initialization or code generation. This will allow quicker onboarding of new users by providing familiar workflows across different build systems.
@ -88,6 +98,7 @@ Input:
- requirement contstraints
- requirement files
- lock-files
- project's source tree
Translator:
- read input and generate generic lock format containing:

View File

@ -20,7 +20,8 @@
"requests": [
"certifi"
]
}
},
"sourcesCombinedHash": "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"
},
"buildSystem": {

View File

@ -12,6 +12,7 @@ in
translate = callPackage ({ python3, writeScript, ... }:
writeScript "cli" ''
translatorsJsonFile=${translators.translatorsJsonFile} \
dream2nixSrc=${../.} \
${python3}/bin/python ${./translators-cli.py} "$@"
''
) {};

View File

@ -1,6 +1,7 @@
import argparse
import json
import os
import re
import subprocess as sp
import sys
import tempfile
@ -15,6 +16,11 @@ def auto_detect_translator(files, subsystem):
return list(translators[subsystem].keys())[0]
def stripHashesFromLock(lock):
for source in lock['sources'].values():
del source['hash']
def parse_args():
parser = argparse.ArgumentParser(
@ -40,6 +46,12 @@ def parse_args():
default="./dream.lock"
)
parser.add_argument(
"-c", "--combined",
help="Store only one hash for all sources combined (smaller lock file -> larger FOD)",
action="store_true"
)
parser.add_argument(
"input",
help="input files containing relevant metadata",
@ -90,9 +102,50 @@ def main():
[f"{translators[subsystem][translator]}/bin/translate", inputJson.name] + sys.argv[1:]
)
# raise error if output wasn't produced
if not os.path.isfile(output):
raise Exception(f"Translator '{translator}' failed to create dream.lock")
# read produced lock file
with open(output) as f:
lock = json.load(f)
# calculate combined hash
if args.combined:
print("Start building combined sourced FOD to get output hash")
# remove hashes from lock file and init sourcesCombinedHash with emtpy string
stripHashesFromLock(lock)
lock['generic']['sourcesCombinedHash'] = ""
with open(output, 'w') as f:
json.dump(lock, f, indent=2)
# compute FOD hash of combined sources
dream2nix_src = os.environ.get("dream2nixSrc")
proc = sp.run(
[
"nix", "build", "--impure", "-L", "--expr",
f"(import {dream2nix_src} {{}}).fetchSources {{ genericLock = {output}; }}"
],
capture_output=True,
)
# read the output hash from the failed build log
match = re.search(r"FOD_PATH=(.*=)", proc.stderr.decode())
if not match:
print(proc.stderr.decode())
print(proc.stdout.decode())
raise Exception("Could not find FOD hash in FOD log")
hash = match.groups()[0]
print(f"Computed FOD hash: {hash}")
# store the hash in the lock
lock['generic']['sourcesCombinedHash'] = hash
with open(output, 'w') as f:
json.dump(lock, f, indent=2)
print(f"Created {output}")

View File

@ -1,7 +1,7 @@
{
callPackage,
}:
rec {
{
python = rec {
default = simpleBuilder;

View File

@ -21,7 +21,9 @@ python.pkgs.buildPythonPackage {
unpackPhase = ''
mkdir dist
for file in $src; do
fname=$(echo $file | cut -d "-" -f 2-)
# pick right most element of path
fname=''${file##*/}
fname=$(stripHash $fname)
cp $file dist/$fname
done
'';

View File

@ -29,16 +29,52 @@ rec {
builders."${buildSystem}".default;
# detect if granular or combined fetching must be used
findFetcher = genericLock:
if null != genericLock.generic.sourcesCombinedHash then
fetchers.combinedFetcher
else
fetchers.defaultFetcher;
parseLock = lock:
if builtins.isPath lock || builtins.isString lock then
builtins.fromJSON (builtins.readFile lock)
else
lock;
fetchSources =
{
genericLock,
builder ? findBuilder (parseLock genericLock),
fetcher ? findFetcher (parseLock genericLock)
}:
let
# is generic lock is a file, read and parse it
genericLock' = (parseLock genericLock);
fetched = fetcher {
sources = genericLock'.sources;
sourcesCombinedHash = genericLock'.generic.sourcesCombinedHash;
};
in
fetched;
# automatically build package defined by generic lock
buildPackage =
{
genericLock,
builder ? findBuilder genericLock,
fetcher ? fetchers.defaultFetcher
}:
builder ? findBuilder (parseLock genericLock),
fetcher ? findFetcher (parseLock genericLock)
}@args:
let
# is generic lock is a file, read and parse it
genericLock' = (parseLock genericLock);
in
builder {
inherit genericLock;
fetchedSources = fetcher { sources = genericLock.sources; };
genericLock = genericLock';
fetchedSources = (fetchSources args).fetchedSources;
};
}

View File

@ -0,0 +1,106 @@
{
defaultFetcher,
bash,
coreutils,
lib,
nix,
stdenv,
writeScript,
...
}:
{
# sources attrset from generic lock
sources,
sourcesCombinedHash,
}:
let
# resolve to individual fetcher calls
defaultFetched = (defaultFetcher { inherit sources; }).fetchedSources;
# extract the arguments from the individual fetcher calls
fetcherArgsAll = lib.mapAttrs (pname: fetched:
(fetched.overrideAttrs (args: {
passthru.originalArgs = args;
})).originalArgs
) defaultFetched;
# convert arbitrary types to string, like nix does with derivation arguments
toString = x:
if lib.isBool x then
if x then
"1"
else
""
else if lib.isList x then
builtins.toString (lib.forEach x (y: toString y))
else if x == null then
""
else
builtins.toJSON x;
# generate script to fetch single item
fetchItem = pname: fetcherArgs: ''
# export arguments for builder
${lib.concatStringsSep "\n" (lib.mapAttrsToList (argName: argVal: ''
export ${argName}=${toString argVal}
'') fetcherArgs)}
# run builder
bash ${fetcherArgs.builder}
'';
# builder which wraps several other FOD builders
# and executes these after each other inside a single build
# TODO: for some reason PATH is unset and we don't have access to the stdenv tools
builder = writeScript "multi-source-fetcher" ''
#!${bash}/bin/bash
export PATH=${coreutils}/bin:${bash}/bin
mkdir $out
${lib.concatStringsSep "\n" (lib.mapAttrsToList (pname: fetcherArgs: ''
OUT_ORIG=$out
export out=$OUT_ORIG/${fetcherArgs.name}
mkdir workdir
pushd workdir
${fetchItem pname fetcherArgs}
popd
rm -r workdir
export out=$OUT_ORIG
'') fetcherArgsAll )}
echo "FOD_PATH=$(${nix}/bin/nix hash-path $out)"
'';
FODAllSources =
let
nativeBuildInputs' = lib.foldl (a: b: a ++ b) [] (
lib.mapAttrsToList
(pname: fetcherArgs: (fetcherArgs.nativeBuildInputs or []))
fetcherArgsAll
);
in
stdenv.mkDerivation rec {
name = "sources-combined";
inherit builder;
nativeBuildInputs = nativeBuildInputs' ++ [
coreutils
];
outputHashAlgo = "sha256";
outputHashMode = "recursive";
outputHash = sourcesCombinedHash;
};
in
{
FOD = FODAllSources;
fetchedSources =
# attrset: pname -> path of downloaded source
lib.genAttrs (lib.attrNames sources) (pname:
"${FODAllSources}/${fetcherArgsAll."${pname}".name}"
);
}

View File

@ -11,27 +11,31 @@
{
# sources attrset from generic lock
sources,
...
}:
lib.mapAttrs (pname: source:
{
# attrset: pname -> path of downloaded source
fetchedSources = lib.mapAttrs (pname: source:
if source.type == "github" then
fetchFromGitHub {
inherit (source) url owner repo rev;
sha256 = source.hash;
sha256 = source.hash or null;
}
else if source.type == "gitlab" then
fetchFromGitLab {
inherit (source) url owner repo rev;
sha256 = source.hash;
sha256 = source.hash or null;
}
else if source.type == "git" then
fetchgit {
inherit (source) url rev;
sha256 = source.hash;
sha256 = source.hash or null;
}
else if source.type == "fetchurl" then
fetchurl {
inherit (source) url;
sha256 = source.hash;
sha256 = source.hash or null;
}
else throw "unsupported source type '${source.type}'"
) sources
) sources;
}

View File

@ -1,6 +1,8 @@
{
callPackage,
}:
{
rec {
defaultFetcher = callPackage ./default-fetcher.nix {};
combinedFetcher = callPackage ./combined-fetcher.nix { inherit defaultFetcher; };
}

View File

@ -47,12 +47,14 @@ def main():
generic={
"buildSystem": "python",
"buildSystemFormatVersion": 1,
"producedBy": "translator-external-pip",
"producedBy": "external-pip",
# This translator is not aware of the exact dependency graph.
# This restricts us to use a single derivation builder later,
# which will install all packages at once
"dependencyGraph": None,
"sourcesCombinedHash": None,
},
buildSystem={
"pythonAttr": f"python{sys.version_info.major}{sys.version_info.minor}",