From 2c1701b68122f44911872e95c0f0e1fe37375ebb Mon Sep 17 00:00:00 2001 From: Andrea Ciceri Date: Thu, 23 Jun 2022 02:39:35 +0200 Subject: [PATCH] Supporting `ca-derivations` experimental feature (#1494) * Supporting `ca-derivations` experimental feature * Improved `contentAddressed.include` option description * Ensure deterministic output for content addressed components * Comment transformed into a Nix comment To avoid possibly useless rebuilds * Use `modules` arg `contentAddressed` flag * Tutorial about CA derivations Co-authored-by: Hamish Mackenzie --- builder/comp-builder.nix | 21 +++++++- docs/tutorials/ca-derivations.md | 65 +++++++++++++++++++++++++ modules/plan.nix | 8 +++ test/ca-derivations-include/default.nix | 57 ++++++++++++++++++++++ test/ca-derivations/default.nix | 59 ++++++++++++++++++++++ test/default.nix | 3 ++ test/tests.sh | 5 +- 7 files changed, 215 insertions(+), 3 deletions(-) create mode 100644 docs/tutorials/ca-derivations.md create mode 100644 test/ca-derivations-include/default.nix create mode 100644 test/ca-derivations/default.nix diff --git a/builder/comp-builder.nix b/builder/comp-builder.nix index 501ee86a..28ba9bf8 100644 --- a/builder/comp-builder.nix +++ b/builder/comp-builder.nix @@ -50,6 +50,7 @@ let self = , writeHieFiles ? component.writeHieFiles , ghcOptions ? component.ghcOptions +, contentAddressed ? component.contentAddressed # Options for Haddock generation , doHaddock ? component.doHaddock # Enable haddock and hoogle generation @@ -316,7 +317,13 @@ let componentDrv = drv; }; - drv = stdenv.mkDerivation (commonAttrs // { + contentAddressedAttrs = lib.optionalAttrs contentAddressed { + __contentAddressed = true; + outputHashMode = "recursive"; + outputHashAlgo = "sha256"; + }; + + drv = stdenv.mkDerivation (commonAttrs // contentAddressedAttrs // { pname = nameOnly; inherit (package.identifier) version; @@ -405,7 +412,17 @@ let (lib.optionalString stdenv.hostPlatform.isWindows '' export pkgsHostTargetAsString="''${pkgsHostTarget[@]}" '') + - (if stdenv.hostPlatform.isGhcjs then '' + # The following could be refactored but would lead to many rebuilds + + # In case of content addressed components we need avoid parallel building (passing -j1) + # in order to have a deterministic output and therefore avoid potential situations + # where the binary cache becomes useless + # See also https://gitlab.haskell.org/ghc/ghc/-/issues/12935 + (if contentAddressed then '' + runHook preBuild + $SETUP_HS build ${haskellLib.componentTarget componentId} -j1 ${lib.concatStringsSep " " setupBuildFlags} + runHook postBuild + '' else if stdenv.hostPlatform.isGhcjs then '' runHook preBuild # https://gitlab.haskell.org/ghc/ghc/issues/9221 $SETUP_HS build ${haskellLib.componentTarget componentId} ${lib.concatStringsSep " " setupBuildFlags} diff --git a/docs/tutorials/ca-derivations.md b/docs/tutorials/ca-derivations.md new file mode 100644 index 00000000..254f935e --- /dev/null +++ b/docs/tutorials/ca-derivations.md @@ -0,0 +1,65 @@ +# Content addressed derivations +## Introduction + +Floating content addressed derivations (from now *CA derivations*) is an experimental feature which substantially change how the hashes in the store paths are calculated. +Indeed, normally derivations are input addressed i.e. the outputs store paths depends only on the derivation inputs, instead with CA derivations they depend on the content of the outputs. + +This has two main advantages: + +- The so-called "early cutoff", namely the ability of Nix to stop a build if the build outputs would be something already built. +For example suppose you add a comment in an Haskell source, at this point Nix will rebuild the component depending on this source but since the output will be the same (adding a comment is an "output-invariant" change for `ghc`) every other component that depends on that will not be rebuilt. +- Users of the same Nix store does not need to trust each other when using substituters. + +You can find more information in the [ca-derivations page on the wiki](https://nixos.wiki/wiki/Ca-derivations) (and in the other resources linked there). + +## Usage +### Enable CA derivations in your system +First of all your Nix installation must support the `ca-derivations` experimental feature, this can done by adding the following in your `nix.conf`: + +``` +experimental-features = ca-derivations +``` + +Or if you use NixOS: +``` +nix.extraOptions = '' + experimental-features = ca-derivations +''; +``` + +## Enable CA derivations in your project +At this point you can pass a new module to `project'` that tells `haskell.nix` to build every component in the project as CA derivation. + +``` +haskell-nix.project' { + # ... + + modules = [{ + contentAddressed = true; + # packages.project-name.components.exes.executable.contentAddressed = true; + }]; +}; +``` + +Optionally you can also specify which components you don't want to be content addressed. + +## Known problems +### Limitation of the current CA derivations implementation + +As explained in the [RFC 62](https://github.com/tweag/rfcs/blob/cas-rfc/rfcs/0062-content-addressed-paths.md) + +> The current implementation has a naive approach that just forbids fetching a path if the local system has a different realisation for the same drv output. This approach is simple and correct, but it's possible that it might not be good-enough in practice as it can result in a totally useless binary cache in some pathological cases. + +For example, suppose that your machine builds a derivation `A` producing an output `A.out` in your store and that after that a CI machine builds the same derivation `A` but producing a different output `A.out'` and populating a cache with this output. +At this point, if you need to build a derivation `B` that depends on `A`, since you already have the realisation `A.out` in your local store and you can't get `B.out` from the cache and you will end up building `B` even if one of its realisation is in the cache. + +This means that, in some cases, enabling CA derivations would lead to more rebuilds than not having it. + +### Hydra +Hydra currently doesn't support CA derivations, efforts are being made in this direction. + + +### GHC is not deterministic +Currently `ghc` is determinstic only disabling the parallel building i.e. passing `-j1`. [Here](https://gitlab.haskell.org/ghc/ghc/-/issues/12935) the upstream issue. + +Having a deterministic `ghc` would be a dream since it will automatically fix all the pathological cases about substituters discussed above and would allow `haskell.nix` to parallel build even when using CA derivations. diff --git a/modules/plan.nix b/modules/plan.nix index 84ba1dc8..b1519571 100644 --- a/modules/plan.nix +++ b/modules/plan.nix @@ -244,6 +244,14 @@ let type = listOfFilteringNulls str; default = def.ghcOptions or []; }; + contentAddressed = mkOption { + type = bool; + default = (def.contentAddressed or false); + description = '' + Build content addressed derivation, requires Nix to have experimental feature + `ca-derivations` enabled. + ''; + }; planned = mkOption { description = "Set to true by `plan-to-nix` for any component that was included in the `plan.json` file."; # This is here so that (rather than in componentOptions) so it can be set project wide for stack projects diff --git a/test/ca-derivations-include/default.nix b/test/ca-derivations-include/default.nix new file mode 100644 index 00000000..3543625a --- /dev/null +++ b/test/ca-derivations-include/default.nix @@ -0,0 +1,57 @@ +# Build a project enabling content addressed derivations for +# only a subset of the components +{ stdenv, pkgs, lib, mkCabalProjectPkgSet, project', haskellLib, recurseIntoAttrs, testSrc, compiler-nix-name, CADerivationsEnabled }: + +with lib; + +let + + cabalProject = '' + packages: . + allow-newer: aeson:* + ''; + + src = testSrc "cabal-simple"; + + # each derivation is content addressed + projectA = project' { + inherit compiler-nix-name src cabalProject; + modules = [{ contentAddressed = true; }]; + }; + + # each derivation but one (the executable) is content addressed + projectB = project' { + inherit compiler-nix-name src cabalProject; + modules = [{ + contentAddressed = true; + packages.cabal-simple.components.exes.cabal-simple.contentAddressed = false; + }]; + }; + + exeA = projectA.hsPkgs.cabal-simple.components.exes.cabal-simple.exePath; + exeB = projectB.hsPkgs.cabal-simple.components.exes.cabal-simple.exePath; + +in +recurseIntoAttrs { + + meta.disabled = !CADerivationsEnabled; + + # check if the built executables are different (one is content addressed) + # the other components are all content addressed (same output paths then) + run = stdenv.mkDerivation { + name = "ca-derivations-include-test"; + + buildCommand = '' + [ "${exeA}" == "${exeB}" ] && exit 1 + touch $out + ''; + + meta.platforms = platforms.all; + + passthru = { + # Used for debugging with nix repl + inherit projectB projectA; + }; + }; + +} diff --git a/test/ca-derivations/default.nix b/test/ca-derivations/default.nix new file mode 100644 index 00000000..a369726e --- /dev/null +++ b/test/ca-derivations/default.nix @@ -0,0 +1,59 @@ +# Test if derivations are content addressed building two derivations producing +# the same outputs and checking if the path stores are equals +{ stdenv, pkgs, lib, mkCabalProjectPkgSet, project', haskellLib, recurseIntoAttrs, testSrc, compiler-nix-name, CADerivationsEnabled }: + +with lib; + +let + + cabalProject = '' + packages: . + allow-newer: aeson:* + ''; + + srcPlain = testSrc "cabal-simple"; + + # we alter the source adding an Haskell comment since they are ignored by ghc + srcWithComment = pkgs.runCommand "src-with-comment" { } '' + mkdir $out + install ${srcPlain}/* $out + echo " -- Altering source without altering executable..." >> $out/Main.hs + ''; + + projectPlain = project' { + inherit compiler-nix-name cabalProject; + src = srcPlain; + modules = [{ contentAddressed = true; }]; + }; + + projectWithComment = project' { + inherit compiler-nix-name cabalProject; + src = srcWithComment; + modules = [{ contentAddressed = true; }]; + }; + + exe-plain = projectPlain.hsPkgs.cabal-simple.components.exes.cabal-simple.exePath; + exe-withComment = projectWithComment.hsPkgs.cabal-simple.components.exes.cabal-simple.exePath; + +in +recurseIntoAttrs { + + meta.disabled = !CADerivationsEnabled; + + run = stdenv.mkDerivation { + name = "ca-derivations-test"; + + buildCommand = '' + [ "${exe-plain}" != "${exe-withComment}" ] && exit 1 + touch $out + ''; + + meta.platforms = platforms.all; + + passthru = { + # Used for debugging with nix repl + inherit projectWithComment projectPlain; + }; + }; + +} diff --git a/test/default.nix b/test/default.nix index 54a9568f..e29d23cc 100644 --- a/test/default.nix +++ b/test/default.nix @@ -4,6 +4,7 @@ , nixpkgsArgs ? haskellNix.nixpkgsArgs , ifdLevel ? 1000 , compiler-nix-name +, CADerivationsEnabled ? false , checkMaterialization ? false }: @@ -206,6 +207,8 @@ let external-static-plugin = callTest ./external-static-plugin { inherit compiler-nix-name; }; exe-dlls = callTest ./exe-dlls { inherit util compiler-nix-name; }; exe-lib-dlls = callTest ./exe-lib-dlls { inherit util compiler-nix-name; }; + ca-derivations = callTest ./ca-derivations { inherit compiler-nix-name CADerivationsEnabled; }; + ca-derivations-include = callTest ./ca-derivations-include { inherit compiler-nix-name CADerivationsEnabled; }; unit = unitTests; }; diff --git a/test/tests.sh b/test/tests.sh index b0bb18b0..2cbf0387 100755 --- a/test/tests.sh +++ b/test/tests.sh @@ -3,6 +3,8 @@ set -euo pipefail +# check if Nix has the `ca-derivations` experimental features (code 0) is enabled +NIX_CA_DERIVATIONS=$(jq -e '."experimental-features".value|any(. == 0)' <<< $(nix show-config --json)) || true NIX_BUILD_ARGS="${NIX_BUILD_ARGS:-}" cd $(dirname $0) @@ -24,7 +26,8 @@ nix build $NIX_BUILD_ARGS \ --option restrict-eval true \ --option allowed-uris "https://github.com/NixOS https://github.com/input-output-hk" \ --no-link --keep-going -f default.nix \ - --argstr compiler-nix-name $GHC + --argstr compiler-nix-name $GHC \ + --arg CADerivationsEnabled $NIX_CA_DERIVATIONS echo >& 2 printf "*** Running the unit tests... " >& 2