Merge pull request #293 from nix-community/fix-index-apps

Improve indexer architecture
This commit is contained in:
DavHau 2022-09-12 12:29:25 +02:00 committed by GitHub
commit e0eefe6d08
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 234 additions and 38 deletions

View File

@ -1,14 +1,25 @@
{ {
callNixWithD2N,
utils, utils,
translate, translate,
coreutils, coreutils,
jq, jq,
parallel, parallel,
python3,
writeScript, writeScript,
... ...
}: let }: let
script = writeScript "run-translate" '' script =
${translate}/bin/translate $1 $targetDir || echo "Failed to translate $1" utils.writePureShellScript
[coreutils translate jq python3]
''
job_nr=$2
time=$(date +%s)
runtime=$(($time - $start_time))
average_runtime=$(python3 -c "print($runtime / $job_nr)")
total_remaining_time=$(python3 -c "print($average_runtime * ($num_jobs - $job_nr))")
echo "starting job nr. $job_nr; average job runtime: $average_runtime sec; remaining time: $total_remaining_time sec"
translate $1 $targetDir || echo "Failed to translate $1"
''; '';
in in
utils.writePureShellScriptBin utils.writePureShellScriptBin
@ -31,6 +42,26 @@ in
export targetDir export targetDir
parallel --halt now,fail=1 -j$(nproc) --delay 1 -a <(jq '.[]' -c -r $index) ${script} export num_jobs=$(jq 'length' -c -r $index)
seq $num_jobs > $TMPDIR/job_numbers
JOBS=''${JOBS:-$(nproc)}
# build translator executables
export TRANSLATOR_DIR=$TMPDIR/translators
for translator in $(jq '.[] | .translator' -c -r libraries-io/index.json); do
bin="$TRANSLATOR_DIR/$translator"
if [ ! -e "$bin" ]; then
echo "building executable for translator $translator"
${callNixWithD2N} build -o "$bin" "
dream2nix.framework.translatorInstances.$translator.translateBin
"
fi
done
export start_time=$(date +%s)
parallel --halt now,fail=1 -j$JOBS --link -a <(jq '.[]' -c -r $index) -a $TMPDIR/job_numbers ${script}
runtime=$(($(date +%s) - $start_time))
echo "FINISHED! Executed $num_jobs jobs in $runtime seconds"
'' ''

View File

@ -46,9 +46,14 @@ utils.writePureShellScriptBin
echo -e "\nTranslating:: $name (translator: $translator) (lock path: $dreamLockPath)" echo -e "\nTranslating:: $name (translator: $translator) (lock path: $dreamLockPath)"
# allow pre-built translator executables to avoid the `nix build` on each run
if [ -n "$TRANSLATOR_DIR" ]; then
translateBin="$TRANSLATOR_DIR/$translator"
else
translateBin=$(${callNixWithD2N} build --print-out-paths --no-link " translateBin=$(${callNixWithD2N} build --print-out-paths --no-link "
dream2nix.framework.translatorInstances.$translator.translateBin dream2nix.framework.translatorInstances.$translator.translateBin
") ")
fi
echo " echo "
{ {

View File

@ -20,6 +20,7 @@
moreutils, moreutils,
nodePackages, nodePackages,
openssh, openssh,
python3,
writeScriptBin, writeScriptBin,
... ...
}: }:
@ -32,6 +33,7 @@
moreutils moreutils
nodePackages.npm nodePackages.npm
openssh openssh
python3
] ]
'' ''
# accroding to the spec, the translator reads the input from a json file # accroding to the spec, the translator reads the input from a json file
@ -42,6 +44,9 @@
name=$(jq '.project.name' -c -r $jsonInput) name=$(jq '.project.name' -c -r $jsonInput)
version=$(jq '.project.version' -c -r $jsonInput) version=$(jq '.project.version' -c -r $jsonInput)
npmArgs=$(jq '.project.subsystemInfo.npmArgs' -c -r $jsonInput) npmArgs=$(jq '.project.subsystemInfo.npmArgs' -c -r $jsonInput)
if [ "$npmArgs" == "null" ]; then
npmArgs=
fi
if [ "$version" = "null" ]; then if [ "$version" = "null" ]; then
candidate="$name" candidate="$name"
@ -49,7 +54,6 @@
candidate="$name@$version" candidate="$name@$version"
fi fi
pushd $TMPDIR pushd $TMPDIR
newSource=$(pwd) newSource=$(pwd)
@ -62,26 +66,14 @@
# call package-lock translator # call package-lock translator
${subsystems.nodejs.translators.package-lock.translateBin} $TMPDIR/newJsonInput ${subsystems.nodejs.translators.package-lock.translateBin} $TMPDIR/newJsonInput
# generate source info for main package # get resolved package version
url=$(npm view $candidate dist.tarball) export version=$(npm view $candidate version)
hash=$(npm view $candidate dist.integrity)
echo " # set correct package version under `packages`
{ cat $outputFile \
\"type\": \"http\", | python3 ${./fixup-dream-lock.py} $TMPDIR/sourceInfo.json \
\"url\": \"$url\", | sponge $outputFile
\"hash\": \"$hash\"
}
" > $TMPDIR/sourceInfo.json
# add main package source info to dream-lock.json
${apps.callNixWithD2N} eval --json "
with dream2nix.utils.dreamLock;
replaceRootSources {
dreamLock = l.fromJSON (l.readFile \"$outputFile\");
newSourceRoot = l.fromJSON (l.readFile \"$TMPDIR/sourceInfo.json\");
}
" \
| sponge "$outputFile"
''; '';
# inherit options from package-lock translator # inherit options from package-lock translator

View File

@ -0,0 +1,14 @@
import json
import os
import sys
lock = json.load(sys.stdin)
version = os.environ.get('version')
# set default package version correctly
defaultPackage = lock['_generic']['defaultPackage']
lock['_generic']['packages'] = {
defaultPackage: version
}
print(json.dumps(lock, indent=2))

View File

@ -34,7 +34,7 @@
translatorUtils = callPackageDream ./translator.nix {}; translatorUtils = callPackageDream ./translator.nix {};
indexUtils = callPackageDream ./index.nix {}; indexUtils = callPackageDream ./index {};
poetry2nixSemver = import "${externalSources.poetry2nix}/semver.nix" { poetry2nixSemver = import "${externalSources.poetry2nix}/semver.nix" {
inherit lib; inherit lib;

View File

@ -0,0 +1,59 @@
import json
import sys
import subprocess as sp
from pathlib import Path
def store_error(attrPath, category, text, name=None):
with open(f"errors/{attrPath.replace('/', '--')}", 'w') as f:
json.dump(
dict(
attrPath=attrPath,
during=category,
error=text,
name=name,
),
f,
)
input = json.loads(sys.argv[1])
attr = input['attr']
attrPath = '.'.join(input['attrPath'])
# handle eval error
if "error" in input:
error = input['error']
print(
f"Evaluation failed. attr: {attr} attrPath: {attrPath}\n"
"Error:\n{error}",
file=sys.stderr
)
store_error(attrPath, 'eval', error)
# try to build package
else:
name = input['name']
drvPath = input['drvPath']
print(
f"Building {name} attr: {attr} attrPath: {attrPath} "
f"drvPath: ({drvPath})",
file=sys.stderr
)
try:
proc = sp.run(
['nix', 'build', '-L', drvPath],
capture_output=True,
check=True,
)
print(
f"Finished {name}. attr: {attr} attrPath: {attrPath}",
file=sys.stderr
)
# handle build error
except sp.CalledProcessError as error:
Path('errors').mkdir(exist_ok=True)
print(
f"Error while building {name}. attr: {attr} attrPath: {attrPath}",
file=sys.stderr
)
store_error(attrPath, 'build', error.stderr.decode(), name)

View File

@ -80,6 +80,7 @@ in rec {
''; '';
in in
mkApp script; mkApp script;
mkTranslateApp = name: mkTranslateApp = name:
mkApp ( mkApp (
pkgs.writers.writeBash "translate-${name}" '' pkgs.writers.writeBash "translate-${name}" ''
@ -88,27 +89,41 @@ in rec {
${name}/index.json ${name}/locks ${name}/index.json ${name}/locks
'' ''
); );
mkCiJobApp = name: input:
mkCiAppWith = commands:
mkApp ( mkApp (
utils.writePureShellScript utils.writePureShellScript
(with pkgs; [ (with pkgs; [
coreutils coreutils
git git
gnugrep gnugrep
openssh
]) ])
'' ''
mainBranch=$(git branch | grep -E '(master)|(main)') flake=$(cat flake.nix)
git branch data || : flakeLock=$(cat flake.lock)
git checkout data set -x
git fetch origin data || :
git checkout origin/data || :
git branch -D data || :
git checkout -b data
# the flake should always be the one from the current main branch # the flake should always be the one from the current main branch
git checkout $mainBranch flake.nix rm -rf ./*
git checkout $mainBranch flake.lock echo "$flake" > flake.nix
${(mkIndexApp name input).program} echo "$flakeLock" > flake.lock
${(mkTranslateApp name).program} ${commands}
git add . git add .
git commit "automatic update - $(date --rfc-3339=seconds)" git commit -m "automatic update - $(date --rfc-3339=seconds)"
'' ''
); );
mkCiJobApp = name: input:
mkCiAppWith
''
${(mkIndexApp name input).program}
${(mkTranslateApp name).program}
'';
translateApps = l.listToAttrs ( translateApps = l.listToAttrs (
l.map l.map
( (
@ -119,6 +134,7 @@ in rec {
) )
indexNames indexNames
); );
indexApps = l.listToAttrs ( indexApps = l.listToAttrs (
l.mapAttrsToList l.mapAttrsToList
( (
@ -129,6 +145,7 @@ in rec {
) )
indexes indexes
); );
ciJobApps = l.listToAttrs ( ciJobApps = l.listToAttrs (
l.mapAttrsToList l.mapAttrsToList
( (
@ -140,6 +157,50 @@ in rec {
indexes indexes
); );
ciJobAllApp =
mkCiAppWith
''
${lib.concatStringsSep "\n" (l.mapAttrsToList (_: app: app.program) indexApps)}
${lib.concatStringsSep "\n" (l.mapAttrsToList (_: app: app.program) translateApps)}
'';
buildAllApp = let
buildScript =
pkgs.writers.writePython3 "build-job" {}
./build-script.py;
statsScript =
pkgs.writers.writePython3 "build-job" {}
./make-stats.py;
in
mkApp (
utils.writePureShellScript
(with pkgs; [
coreutils
git
parallel
nix
nix-eval-jobs
])
''
rm -rf ./errors
mkdir -p ./errors
JOBS=''${JOBS:-$(nproc)}
EVAL_JOBS=''${EVAL_JOBS:-1}
LIMIT=''${LIMIT:-0}
if [ "$LIMIT" -gt "0" ]; then
limit="head -n $LIMIT"
else
limit="cat"
fi
echo "settings: JOBS $JOBS; EVAL_JOBS: $EVAL_JOBS; LIMIT $LIMIT"
parallel --halt now,fail=1 -j$JOBS --link \
-a <(nix-eval-jobs --gc-roots-dir $TMPDIR/gcroot --flake "$(realpath .)#packages.x86_64-linux" --workers $EVAL_JOBS --max-memory-size 3000 | $limit) \
${buildScript}
${statsScript}
rm -r ./errors
''
);
mkIndexOutputs = name: let mkIndexOutputs = name: let
src = "${toString source}/${name}/locks"; src = "${toString source}/${name}/locks";
in in
@ -167,7 +228,9 @@ in rec {
outputs = { outputs = {
packages = allPackages; packages = allPackages;
apps = apps =
indexApps {ci-job-all = ciJobAllApp;}
// {build-all = buildAllApp;}
// indexApps
// translateApps // translateApps
// ciJobApps; // ciJobApps;
}; };

View File

@ -0,0 +1,32 @@
import json
import os
error_files = os.listdir('errors')
eval_errors = 0
build_errors = 0
all_errors = {}
for file in error_files:
with open(file) as f:
error = json.load(f)
# add error to all_errors
all_errors[error['attrPath']] = error
# count error types
if error['category'] == 'eval':
eval_errors += 1
else:
build_errors += 1
num_errors = eval_errors + build_errors
stats = dict(
errors=num_errors,
errors_eval=eval_errors,
errors_build=build_errors,
)
with open("errors.json", 'w') as f:
json.dump(all_errors, f)
with open('stats.json', 'w') as f:
json.dump(stats, f)