From 4a00410a2b200ddbb4058b6d599ef7899462f913 Mon Sep 17 00:00:00 2001 From: DavHau Date: Sat, 11 Sep 2021 00:06:48 +0200 Subject: [PATCH] add support for opt-in FOD fetching - will ommit storing individual hashes, reducing lock file size --- README.md | 25 +++-- specifications/generic-lock-example.json | 3 +- src/apps/default.nix | 1 + src/apps/translators-cli.py | 53 +++++++++ src/builders/default.nix | 4 +- .../python/simple-builder/default.nix | 4 +- src/default.nix | 48 +++++++- src/fetchers/combined-fetcher.nix | 106 ++++++++++++++++++ src/fetchers/default-fetcher.nix | 50 +++++---- src/fetchers/default.nix | 4 +- .../external-pip/generate-generic-lock.py | 4 +- 11 files changed, 260 insertions(+), 42 deletions(-) create mode 100644 src/fetchers/combined-fetcher.nix diff --git a/README.md b/README.md index 4632931e..adc5e747 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ It focuses on the following aspects: - Maintainability - Nixpkgs Compatibility (not enforcing IFD) - Code de-duplication across 2nix tools + - Code de-duplication in nixpkgs + - Risk free opt-in FOD fetching - Common UI across 2nix tools - Reduce effort to develop new 2nix solutions @@ -25,12 +27,14 @@ The following phases which are generic to basically all existing 2nix solutions: - fetching sources - building/installing packages -... should be separated from each other with well defined interfaces. +... should be separated from each other with well defined interfaces. + This will allow for free compsition of different approaches for these phases. -Examples: - - Often more than one requirements / lock-file format exists within an ecosystem. Adding support for a new format should be easy and won't require re-inventing the whole procedure. - - Different resolving/fetching strategies: Some users might prefer a more automated approach via IFD, while others are focusing on upstreaming to nixpkgs, where generating intermediary code or lock-files might be the only option. - - Fetching a list of sources in theory should be a standard process. Yet, in practice, many 2nix tools struggle fetching sources from git or including local source trees. A generic fetching layer can reduce effort for maintainers. +The user should be able to freely choose between: + - input metadata formats (eg. lock file formats) + - metadata fetching/translation strategies: IFD vs. in-tree + - source fetching strategies: granular fetching vs fetching via single large FOD to minimize expression file size + - installation strategies: build dependencies individually vs inside a single derivation. ### Customizability Every Phase mentioned in the previous section should be customizable at a high degree via override functions. Practical examples: @@ -39,7 +43,6 @@ Every Phase mentioned in the previous section should be customizable at a high d - replace or modify sources - customize the build/installation procedure - ### Maintainability Due to the modular architecture with strict interfaces, contributers can add support for new lock-file formats or new strategies for fetching, building, installing more easily. @@ -48,12 +51,19 @@ Depending on where the nix code is used, different approaches are desired or dis All solutions which follow the dream2nix specification will be compatible with both approaches without having to re-invent the tool. ### Code de-duplication -Common problems that apply to many 2nix solutions can be solved once. Examples: +Common problems that apply to many 2nix solutions can be solved once by the framework. Examples: - handling cyclic dependencies - handling sources from various origins (http, git, local, ...) - generate nixpkgs/hydra friendly output (no IFD) - good user interface +### Code de-duplication in nixpkgs +Essential components like package update scripts or fetching and override logic are provided by the dream2nix framework and are stored only once in the source tree instead of several times. + +### Risk free opt-in FOD fetching +Optionally, to save more storag space, individual hashes for source can be ommited and a single large FOD used instead. +Due to a unified minimalistic fetching layer the risk of FOD hash breakages should be very low. + ### Common UI across many 2nix tools 2nix solutions which follow the dream2nix framework will have a unified UI for workflows like project initialization or code generation. This will allow quicker onboarding of new users by providing familiar workflows across different build systems. @@ -88,6 +98,7 @@ Input: - requirement contstraints - requirement files - lock-files + - project's source tree Translator: - read input and generate generic lock format containing: diff --git a/specifications/generic-lock-example.json b/specifications/generic-lock-example.json index 87f54fa9..4d0f5171 100644 --- a/specifications/generic-lock-example.json +++ b/specifications/generic-lock-example.json @@ -20,7 +20,8 @@ "requests": [ "certifi" ] - } + }, + "sourcesCombinedHash": "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef" }, "buildSystem": { diff --git a/src/apps/default.nix b/src/apps/default.nix index d5e2cc5d..2ad19a8a 100644 --- a/src/apps/default.nix +++ b/src/apps/default.nix @@ -12,6 +12,7 @@ in translate = callPackage ({ python3, writeScript, ... }: writeScript "cli" '' translatorsJsonFile=${translators.translatorsJsonFile} \ + dream2nixSrc=${../.} \ ${python3}/bin/python ${./translators-cli.py} "$@" '' ) {}; diff --git a/src/apps/translators-cli.py b/src/apps/translators-cli.py index 972c9c63..72cb8ae5 100644 --- a/src/apps/translators-cli.py +++ b/src/apps/translators-cli.py @@ -1,6 +1,7 @@ import argparse import json import os +import re import subprocess as sp import sys import tempfile @@ -15,6 +16,11 @@ def auto_detect_translator(files, subsystem): return list(translators[subsystem].keys())[0] +def stripHashesFromLock(lock): + for source in lock['sources'].values(): + del source['hash'] + + def parse_args(): parser = argparse.ArgumentParser( @@ -40,6 +46,12 @@ def parse_args(): default="./dream.lock" ) + parser.add_argument( + "-c", "--combined", + help="Store only one hash for all sources combined (smaller lock file -> larger FOD)", + action="store_true" + ) + parser.add_argument( "input", help="input files containing relevant metadata", @@ -90,9 +102,50 @@ def main(): [f"{translators[subsystem][translator]}/bin/translate", inputJson.name] + sys.argv[1:] ) + # raise error if output wasn't produced if not os.path.isfile(output): raise Exception(f"Translator '{translator}' failed to create dream.lock") + # read produced lock file + with open(output) as f: + lock = json.load(f) + + # calculate combined hash + if args.combined: + + print("Start building combined sourced FOD to get output hash") + + # remove hashes from lock file and init sourcesCombinedHash with emtpy string + stripHashesFromLock(lock) + lock['generic']['sourcesCombinedHash'] = "" + with open(output, 'w') as f: + json.dump(lock, f, indent=2) + + # compute FOD hash of combined sources + dream2nix_src = os.environ.get("dream2nixSrc") + proc = sp.run( + [ + "nix", "build", "--impure", "-L", "--expr", + f"(import {dream2nix_src} {{}}).fetchSources {{ genericLock = {output}; }}" + ], + capture_output=True, + ) + + # read the output hash from the failed build log + match = re.search(r"FOD_PATH=(.*=)", proc.stderr.decode()) + if not match: + print(proc.stderr.decode()) + print(proc.stdout.decode()) + raise Exception("Could not find FOD hash in FOD log") + hash = match.groups()[0] + print(f"Computed FOD hash: {hash}") + + # store the hash in the lock + lock['generic']['sourcesCombinedHash'] = hash + with open(output, 'w') as f: + json.dump(lock, f, indent=2) + + print(f"Created {output}") diff --git a/src/builders/default.nix b/src/builders/default.nix index c08fc746..73967061 100644 --- a/src/builders/default.nix +++ b/src/builders/default.nix @@ -1,8 +1,8 @@ { callPackage, }: -rec { - python = rec { +{ + python = rec { default = simpleBuilder; diff --git a/src/builders/python/simple-builder/default.nix b/src/builders/python/simple-builder/default.nix index 9f0afba4..812be0ea 100644 --- a/src/builders/python/simple-builder/default.nix +++ b/src/builders/python/simple-builder/default.nix @@ -21,7 +21,9 @@ python.pkgs.buildPythonPackage { unpackPhase = '' mkdir dist for file in $src; do - fname=$(echo $file | cut -d "-" -f 2-) + # pick right most element of path + fname=''${file##*/} + fname=$(stripHash $fname) cp $file dist/$fname done ''; diff --git a/src/default.nix b/src/default.nix index 1f90df74..62663875 100644 --- a/src/default.nix +++ b/src/default.nix @@ -23,22 +23,58 @@ rec { # automatically find a suitable builder for a given generic lock findBuilder = genericLock: - let + let buildSystem = genericLock.generic.buildSystem; in builders."${buildSystem}".default; + # detect if granular or combined fetching must be used + findFetcher = genericLock: + if null != genericLock.generic.sourcesCombinedHash then + fetchers.combinedFetcher + else + fetchers.defaultFetcher; + + + parseLock = lock: + if builtins.isPath lock || builtins.isString lock then + builtins.fromJSON (builtins.readFile lock) + else + lock; + + + fetchSources = + { + genericLock, + builder ? findBuilder (parseLock genericLock), + fetcher ? findFetcher (parseLock genericLock) + }: + let + # is generic lock is a file, read and parse it + genericLock' = (parseLock genericLock); + fetched = fetcher { + sources = genericLock'.sources; + sourcesCombinedHash = genericLock'.generic.sourcesCombinedHash; + }; + in + fetched; + + # automatically build package defined by generic lock buildPackage = { genericLock, - builder ? findBuilder genericLock, - fetcher ? fetchers.defaultFetcher - }: + builder ? findBuilder (parseLock genericLock), + fetcher ? findFetcher (parseLock genericLock) + }@args: + let + # is generic lock is a file, read and parse it + genericLock' = (parseLock genericLock); + in builder { - inherit genericLock; - fetchedSources = fetcher { sources = genericLock.sources; }; + genericLock = genericLock'; + fetchedSources = (fetchSources args).fetchedSources; }; } \ No newline at end of file diff --git a/src/fetchers/combined-fetcher.nix b/src/fetchers/combined-fetcher.nix new file mode 100644 index 00000000..60f61fa8 --- /dev/null +++ b/src/fetchers/combined-fetcher.nix @@ -0,0 +1,106 @@ +{ + defaultFetcher, + + bash, + coreutils, + lib, + nix, + stdenv, + writeScript, + ... +}: +{ + # sources attrset from generic lock + sources, + sourcesCombinedHash, +}: +let + + # resolve to individual fetcher calls + defaultFetched = (defaultFetcher { inherit sources; }).fetchedSources; + + # extract the arguments from the individual fetcher calls + fetcherArgsAll = lib.mapAttrs (pname: fetched: + (fetched.overrideAttrs (args: { + passthru.originalArgs = args; + })).originalArgs + ) defaultFetched; + + # convert arbitrary types to string, like nix does with derivation arguments + toString = x: + if lib.isBool x then + if x then + "1" + else + "" + else if lib.isList x then + builtins.toString (lib.forEach x (y: toString y)) + else if x == null then + "" + else + builtins.toJSON x; + + # generate script to fetch single item + fetchItem = pname: fetcherArgs: '' + + # export arguments for builder + ${lib.concatStringsSep "\n" (lib.mapAttrsToList (argName: argVal: '' + export ${argName}=${toString argVal} + '') fetcherArgs)} + + # run builder + bash ${fetcherArgs.builder} + ''; + + # builder which wraps several other FOD builders + # and executes these after each other inside a single build + # TODO: for some reason PATH is unset and we don't have access to the stdenv tools + builder = writeScript "multi-source-fetcher" '' + #!${bash}/bin/bash + export PATH=${coreutils}/bin:${bash}/bin + + mkdir $out + + ${lib.concatStringsSep "\n" (lib.mapAttrsToList (pname: fetcherArgs: '' + OUT_ORIG=$out + export out=$OUT_ORIG/${fetcherArgs.name} + mkdir workdir + pushd workdir + ${fetchItem pname fetcherArgs} + popd + rm -r workdir + export out=$OUT_ORIG + '') fetcherArgsAll )} + + echo "FOD_PATH=$(${nix}/bin/nix hash-path $out)" + ''; + + FODAllSources = + let + nativeBuildInputs' = lib.foldl (a: b: a ++ b) [] ( + lib.mapAttrsToList + (pname: fetcherArgs: (fetcherArgs.nativeBuildInputs or [])) + fetcherArgsAll + ); + in + stdenv.mkDerivation rec { + name = "sources-combined"; + inherit builder; + nativeBuildInputs = nativeBuildInputs' ++ [ + coreutils + ]; + outputHashAlgo = "sha256"; + outputHashMode = "recursive"; + outputHash = sourcesCombinedHash; + }; + +in + +{ + FOD = FODAllSources; + fetchedSources = + # attrset: pname -> path of downloaded source + lib.genAttrs (lib.attrNames sources) (pname: + "${FODAllSources}/${fetcherArgsAll."${pname}".name}" + ); +} diff --git a/src/fetchers/default-fetcher.nix b/src/fetchers/default-fetcher.nix index 14d483cb..62783fe1 100644 --- a/src/fetchers/default-fetcher.nix +++ b/src/fetchers/default-fetcher.nix @@ -11,27 +11,31 @@ { # sources attrset from generic lock sources, + ... }: -lib.mapAttrs (pname: source: - if source.type == "github" then - fetchFromGitHub { - inherit (source) url owner repo rev; - sha256 = source.hash; - } - else if source.type == "gitlab" then - fetchFromGitLab { - inherit (source) url owner repo rev; - sha256 = source.hash; - } - else if source.type == "git" then - fetchgit { - inherit (source) url rev; - sha256 = source.hash; - } - else if source.type == "fetchurl" then - fetchurl { - inherit (source) url; - sha256 = source.hash; - } - else throw "unsupported source type '${source.type}'" -) sources +{ + # attrset: pname -> path of downloaded source + fetchedSources = lib.mapAttrs (pname: source: + if source.type == "github" then + fetchFromGitHub { + inherit (source) url owner repo rev; + sha256 = source.hash or null; + } + else if source.type == "gitlab" then + fetchFromGitLab { + inherit (source) url owner repo rev; + sha256 = source.hash or null; + } + else if source.type == "git" then + fetchgit { + inherit (source) url rev; + sha256 = source.hash or null; + } + else if source.type == "fetchurl" then + fetchurl { + inherit (source) url; + sha256 = source.hash or null; + } + else throw "unsupported source type '${source.type}'" + ) sources; +} diff --git a/src/fetchers/default.nix b/src/fetchers/default.nix index 78611556..8a0d3434 100644 --- a/src/fetchers/default.nix +++ b/src/fetchers/default.nix @@ -1,6 +1,8 @@ { callPackage, }: -{ +rec { defaultFetcher = callPackage ./default-fetcher.nix {}; + + combinedFetcher = callPackage ./combined-fetcher.nix { inherit defaultFetcher; }; } diff --git a/src/translators/python/external-pip/generate-generic-lock.py b/src/translators/python/external-pip/generate-generic-lock.py index 56e36cc6..b6159593 100644 --- a/src/translators/python/external-pip/generate-generic-lock.py +++ b/src/translators/python/external-pip/generate-generic-lock.py @@ -47,12 +47,14 @@ def main(): generic={ "buildSystem": "python", "buildSystemFormatVersion": 1, - "producedBy": "translator-external-pip", + "producedBy": "external-pip", # This translator is not aware of the exact dependency graph. # This restricts us to use a single derivation builder later, # which will install all packages at once "dependencyGraph": None, + + "sourcesCombinedHash": None, }, buildSystem={ "pythonAttr": f"python{sys.version_info.major}{sys.version_info.minor}",