Supporting ca-derivations experimental feature (#1494)

* Supporting `ca-derivations` experimental feature

* Improved `contentAddressed.include` option description

* Ensure deterministic output for content addressed components

* Comment transformed into a Nix comment

To avoid possibly useless rebuilds

* Use `modules` arg `contentAddressed` flag

* Tutorial about CA derivations

Co-authored-by: Hamish Mackenzie <Hamish.K.Mackenzie@gmail.com>
This commit is contained in:
Andrea Ciceri 2022-06-23 02:39:35 +02:00 committed by GitHub
parent 2a89c68994
commit 2c1701b681
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 215 additions and 3 deletions

View File

@ -50,6 +50,7 @@ let self =
, writeHieFiles ? component.writeHieFiles
, ghcOptions ? component.ghcOptions
, contentAddressed ? component.contentAddressed
# Options for Haddock generation
, doHaddock ? component.doHaddock # Enable haddock and hoogle generation
@ -316,7 +317,13 @@ let
componentDrv = drv;
};
drv = stdenv.mkDerivation (commonAttrs // {
contentAddressedAttrs = lib.optionalAttrs contentAddressed {
__contentAddressed = true;
outputHashMode = "recursive";
outputHashAlgo = "sha256";
};
drv = stdenv.mkDerivation (commonAttrs // contentAddressedAttrs // {
pname = nameOnly;
inherit (package.identifier) version;
@ -405,7 +412,17 @@ let
(lib.optionalString stdenv.hostPlatform.isWindows ''
export pkgsHostTargetAsString="''${pkgsHostTarget[@]}"
'') +
(if stdenv.hostPlatform.isGhcjs then ''
# The following could be refactored but would lead to many rebuilds
# In case of content addressed components we need avoid parallel building (passing -j1)
# in order to have a deterministic output and therefore avoid potential situations
# where the binary cache becomes useless
# See also https://gitlab.haskell.org/ghc/ghc/-/issues/12935
(if contentAddressed then ''
runHook preBuild
$SETUP_HS build ${haskellLib.componentTarget componentId} -j1 ${lib.concatStringsSep " " setupBuildFlags}
runHook postBuild
'' else if stdenv.hostPlatform.isGhcjs then ''
runHook preBuild
# https://gitlab.haskell.org/ghc/ghc/issues/9221
$SETUP_HS build ${haskellLib.componentTarget componentId} ${lib.concatStringsSep " " setupBuildFlags}

View File

@ -0,0 +1,65 @@
# Content addressed derivations
## Introduction
Floating content addressed derivations (from now *CA derivations*) is an experimental feature which substantially change how the hashes in the store paths are calculated.
Indeed, normally derivations are input addressed i.e. the outputs store paths depends only on the derivation inputs, instead with CA derivations they depend on the content of the outputs.
This has two main advantages:
- The so-called "early cutoff", namely the ability of Nix to stop a build if the build outputs would be something already built.
For example suppose you add a comment in an Haskell source, at this point Nix will rebuild the component depending on this source but since the output will be the same (adding a comment is an "output-invariant" change for `ghc`) every other component that depends on that will not be rebuilt.
- Users of the same Nix store does not need to trust each other when using substituters.
You can find more information in the [ca-derivations page on the wiki](https://nixos.wiki/wiki/Ca-derivations) (and in the other resources linked there).
## Usage
### Enable CA derivations in your system
First of all your Nix installation must support the `ca-derivations` experimental feature, this can done by adding the following in your `nix.conf`:
```
experimental-features = ca-derivations
```
Or if you use NixOS:
```
nix.extraOptions = ''
experimental-features = ca-derivations
'';
```
## Enable CA derivations in your project
At this point you can pass a new module to `project'` that tells `haskell.nix` to build every component in the project as CA derivation.
```
haskell-nix.project' {
# ...
modules = [{
contentAddressed = true;
# packages.project-name.components.exes.executable.contentAddressed = true;
}];
};
```
Optionally you can also specify which components you don't want to be content addressed.
## Known problems
### Limitation of the current CA derivations implementation
As explained in the [RFC 62](https://github.com/tweag/rfcs/blob/cas-rfc/rfcs/0062-content-addressed-paths.md)
> The current implementation has a naive approach that just forbids fetching a path if the local system has a different realisation for the same drv output. This approach is simple and correct, but it's possible that it might not be good-enough in practice as it can result in a totally useless binary cache in some pathological cases.
For example, suppose that your machine builds a derivation `A` producing an output `A.out` in your store and that after that a CI machine builds the same derivation `A` but producing a different output `A.out'` and populating a cache with this output.
At this point, if you need to build a derivation `B` that depends on `A`, since you already have the realisation `A.out` in your local store and you can't get `B.out` from the cache and you will end up building `B` even if one of its realisation is in the cache.
This means that, in some cases, enabling CA derivations would lead to more rebuilds than not having it.
### Hydra
Hydra currently doesn't support CA derivations, efforts are being made in this direction.
### GHC is not deterministic
Currently `ghc` is determinstic only disabling the parallel building i.e. passing `-j1`. [Here](https://gitlab.haskell.org/ghc/ghc/-/issues/12935) the upstream issue.
Having a deterministic `ghc` would be a dream since it will automatically fix all the pathological cases about substituters discussed above and would allow `haskell.nix` to parallel build even when using CA derivations.

View File

@ -244,6 +244,14 @@ let
type = listOfFilteringNulls str;
default = def.ghcOptions or [];
};
contentAddressed = mkOption {
type = bool;
default = (def.contentAddressed or false);
description = ''
Build content addressed derivation, requires Nix to have experimental feature
`ca-derivations` enabled.
'';
};
planned = mkOption {
description = "Set to true by `plan-to-nix` for any component that was included in the `plan.json` file.";
# This is here so that (rather than in componentOptions) so it can be set project wide for stack projects

View File

@ -0,0 +1,57 @@
# Build a project enabling content addressed derivations for
# only a subset of the components
{ stdenv, pkgs, lib, mkCabalProjectPkgSet, project', haskellLib, recurseIntoAttrs, testSrc, compiler-nix-name, CADerivationsEnabled }:
with lib;
let
cabalProject = ''
packages: .
allow-newer: aeson:*
'';
src = testSrc "cabal-simple";
# each derivation is content addressed
projectA = project' {
inherit compiler-nix-name src cabalProject;
modules = [{ contentAddressed = true; }];
};
# each derivation but one (the executable) is content addressed
projectB = project' {
inherit compiler-nix-name src cabalProject;
modules = [{
contentAddressed = true;
packages.cabal-simple.components.exes.cabal-simple.contentAddressed = false;
}];
};
exeA = projectA.hsPkgs.cabal-simple.components.exes.cabal-simple.exePath;
exeB = projectB.hsPkgs.cabal-simple.components.exes.cabal-simple.exePath;
in
recurseIntoAttrs {
meta.disabled = !CADerivationsEnabled;
# check if the built executables are different (one is content addressed)
# the other components are all content addressed (same output paths then)
run = stdenv.mkDerivation {
name = "ca-derivations-include-test";
buildCommand = ''
[ "${exeA}" == "${exeB}" ] && exit 1
touch $out
'';
meta.platforms = platforms.all;
passthru = {
# Used for debugging with nix repl
inherit projectB projectA;
};
};
}

View File

@ -0,0 +1,59 @@
# Test if derivations are content addressed building two derivations producing
# the same outputs and checking if the path stores are equals
{ stdenv, pkgs, lib, mkCabalProjectPkgSet, project', haskellLib, recurseIntoAttrs, testSrc, compiler-nix-name, CADerivationsEnabled }:
with lib;
let
cabalProject = ''
packages: .
allow-newer: aeson:*
'';
srcPlain = testSrc "cabal-simple";
# we alter the source adding an Haskell comment since they are ignored by ghc
srcWithComment = pkgs.runCommand "src-with-comment" { } ''
mkdir $out
install ${srcPlain}/* $out
echo " -- Altering source without altering executable..." >> $out/Main.hs
'';
projectPlain = project' {
inherit compiler-nix-name cabalProject;
src = srcPlain;
modules = [{ contentAddressed = true; }];
};
projectWithComment = project' {
inherit compiler-nix-name cabalProject;
src = srcWithComment;
modules = [{ contentAddressed = true; }];
};
exe-plain = projectPlain.hsPkgs.cabal-simple.components.exes.cabal-simple.exePath;
exe-withComment = projectWithComment.hsPkgs.cabal-simple.components.exes.cabal-simple.exePath;
in
recurseIntoAttrs {
meta.disabled = !CADerivationsEnabled;
run = stdenv.mkDerivation {
name = "ca-derivations-test";
buildCommand = ''
[ "${exe-plain}" != "${exe-withComment}" ] && exit 1
touch $out
'';
meta.platforms = platforms.all;
passthru = {
# Used for debugging with nix repl
inherit projectWithComment projectPlain;
};
};
}

View File

@ -4,6 +4,7 @@
, nixpkgsArgs ? haskellNix.nixpkgsArgs
, ifdLevel ? 1000
, compiler-nix-name
, CADerivationsEnabled ? false
, checkMaterialization ? false
}:
@ -206,6 +207,8 @@ let
external-static-plugin = callTest ./external-static-plugin { inherit compiler-nix-name; };
exe-dlls = callTest ./exe-dlls { inherit util compiler-nix-name; };
exe-lib-dlls = callTest ./exe-lib-dlls { inherit util compiler-nix-name; };
ca-derivations = callTest ./ca-derivations { inherit compiler-nix-name CADerivationsEnabled; };
ca-derivations-include = callTest ./ca-derivations-include { inherit compiler-nix-name CADerivationsEnabled; };
unit = unitTests;
};

View File

@ -3,6 +3,8 @@
set -euo pipefail
# check if Nix has the `ca-derivations` experimental features (code 0) is enabled
NIX_CA_DERIVATIONS=$(jq -e '."experimental-features".value|any(. == 0)' <<< $(nix show-config --json)) || true
NIX_BUILD_ARGS="${NIX_BUILD_ARGS:-}"
cd $(dirname $0)
@ -24,7 +26,8 @@ nix build $NIX_BUILD_ARGS \
--option restrict-eval true \
--option allowed-uris "https://github.com/NixOS https://github.com/input-output-hk" \
--no-link --keep-going -f default.nix \
--argstr compiler-nix-name $GHC
--argstr compiler-nix-name $GHC \
--arg CADerivationsEnabled $NIX_CA_DERIVATIONS
echo >& 2
printf "*** Running the unit tests... " >& 2