Merge pull request #330 from nix-community/indexers

Indexers improvements (multiple branches, log translatio errors, haskell improvements)
This commit is contained in:
DavHau 2022-10-15 00:27:27 +02:00 committed by GitHub
commit 19b63ebd0a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 101 additions and 48 deletions

View File

@ -7,17 +7,21 @@
(inp.dream2nix.lib.makeFlakeOutputsForIndexes {
source = ./.;
systems = ["x86_64-linux"];
indexes = {
libraries-io = {
indexes = [
{
name = "libraries-io";
platform = "npm";
number = 5;
};
crates-io = {};
crates-io-simple = {
}
{
name = "crates-io";
}
{
name = "crates-io-simple";
sortBy = "name";
maxPages = 1;
};
};
}
];
packageOverrides = {
"^.*$".disable-build = {
buildScript = ":";

View File

@ -13,19 +13,30 @@
utils.writePureShellScript
[coreutils translate jq python3]
''
jobJson=$1
job_nr=$2
time=$(date +%s)
runtime=$(($time - $start_time))
average_runtime=$(python3 -c "print($runtime / $job_nr)")
total_remaining_time=$(python3 -c "print($average_runtime * ($num_jobs - $job_nr))")
echo "starting job nr. $job_nr; average job runtime: $average_runtime sec; remaining time: $total_remaining_time sec"
translate $1 $targetDir || echo "Failed to translate $1"
# if job fails, store error in ./translation-errors/$jobId.log
translate $jobJson $targetDir &> $TMPDIR/log \
|| (
echo "Failed to translate $1"
jobId=$(jq '.id' -c -r <(echo "$jobJson"))
logFile="./translation-errors/$jobId.log"
mkdir -p $(dirname "$logFile")
cp $TMPDIR/log "$logFile"
)
'';
in
utils.writePureShellScriptBin
"translate-index"
[coreutils translate jq parallel]
[coreutils translate jq parallel python3]
''
set -e
usage="usage:
$0 INDEX_PATH TARGET_DIR"
@ -49,7 +60,7 @@ in
# build translator executables
export TRANSLATOR_DIR=$TMPDIR/translators
for translator in $(jq '.[] | .translator' -c -r libraries-io/index.json); do
for translator in $(jq '.[] | .translator' -c -r $index); do
bin="$TRANSLATOR_DIR/$translator"
if [ ! -e "$bin" ]; then
echo "building executable for translator $translator"
@ -64,4 +75,6 @@ in
runtime=$(($(date +%s) - $start_time))
echo "FINISHED! Executed $num_jobs jobs in $runtime seconds"
python3 ${./summarize-stats.py} translation-errors.json
''

View File

@ -0,0 +1,11 @@
import json
import os
import sys
failed_proj_ids = list(os.listdir('translation-errors'))
if failed_proj_ids:
print("saving list of failed projects in ./translation-errors.json")
print("failure logs can be found in ./translation-errors/")
with open(sys.argv[1], 'w') as f:
json.dump(failed_proj_ids, f, indent=2)

View File

@ -48,6 +48,7 @@ libraries.io also supports other interesting popularity metrics:
export platform=$(jq '.platform' -c -r $input)
export number=$(jq '.number' -c -r $input)
export urlSuffix=$(jq '.urlSuffix? //""' -c -r $input)
# calculate number of pages to query
# page size is always 100
@ -65,7 +66,7 @@ libraries.io also supports other interesting popularity metrics:
echo "[]" > $outFile
for page in $(seq 1 $numPages); do
echo "requesting page $page"
url="https://libraries.io/api/search?page=$page&sort=dependents_count&per_page=100&platforms=$platformQuery&api_key=$apiKey"
url="https://libraries.io/api/search?page=$page&sort=dependents_count&per_page=100&platforms=$platformQuery&api_key=$apiKey$urlSuffix"
curl -k "$url" | python3 ${./process-result.py} $outFile
done
'';

View File

@ -6,13 +6,24 @@ out_file = sys.argv[1]
platform = os.environ.get("platform")
number = int(os.environ.get("number"))
if platform == "hackage":
sort_key = lambda v: [int(n) for n in v['number'].split('.')]
else:
sort_key = key=lambda v: v['published_at']
input = json.load(sys.stdin)
projects = []
for package in input:
versions = package['versions']
versions = sorted(versions, key=lambda v: v['published_at'], reverse=True)
versions = sorted(versions, key=sort_key, reverse=True)
if versions:
latest_version = versions[0]['number']
# latest_stable_release_number is often wrong for hackage
if platform == "hackage":
latest_version = versions[0]['number']
else:
latest_version = package["latest_stable_release_number"]
if latest_version == None:
latest_version = versions[0]['number']
projects.append(dict(
id=f"{package['name']}-{latest_version}",
name=package['name'],

View File

@ -119,9 +119,6 @@ in {
src = getSource name version;
# ignore if dependencies contain multiple versions of the same package
jailbreak = true;
isLibrary = true;
isExecutable = true;
doCheck = false;
@ -133,16 +130,9 @@ in {
testToolDepends = libraryHaskellDepends;
libraryHaskellDepends =
(with compiler; [
# TODO: remove these deps / find out why they were missing
hspec
QuickCheck
])
++ (
map
(dep: allPackages."${dep.name}"."${dep.version}")
(getDependencies name version)
);
map
(dep: allPackages."${dep.name}"."${dep.version}")
(getDependencies name version);
}
/*
For all transitive dependencies, overwrite cabal file with the one

View File

@ -31,6 +31,7 @@
moreutils
nix
python3
util-linux
])
''
# accroding to the spec, the translator reads the input from a json file
@ -43,15 +44,33 @@
source=$(jq '.source' -c -r $jsonInput)
relPath=$(jq '.project.relPath' -c -r $jsonInput)
# update the cabal index if older than 1 day
(
flock 9 || exit 1
# ... commands executed under lock ...
cabalIndex="$HOME/.cabal/packages/hackage.haskell.org/01-index.cache"
set -x
if [ -e "$cabalIndex" ]; then
indexTime=$(stat -c '%Y' "$cabalIndex")
age=$(( $(date +%s) - $indexTime ))
if [ "$age" -gt "$((60*60*24))" ]; then
cabal update
fi
else
cabal update
fi
) 9>/tmp/cabal-lock
pushd $TMPDIR
# download and unpack package source
mkdir source
curl -L https://hackage.haskell.org/package/$name-$version/$name-$version.tar.gz > $TMPDIR/tarball
url="https://hackage.haskell.org/package/$name-$version/$name-$version.tar.gz"
echo "downloading $url"
curl -L "$url" > $TMPDIR/tarball
cd source
cat $TMPDIR/tarball | tar xz --strip-components 1
# trigger creation of `dist-newstyle` directory
cabal update
cabal freeze
cd -

View File

@ -62,7 +62,7 @@ in rec {
sourceOverrides ? {},
}: let
l = lib // builtins;
indexNames = l.attrNames indexes;
indexNames = l.map (index: index.name) indexes;
mkApp = script: {
type = "app";
@ -70,13 +70,14 @@ in rec {
};
mkIndexApp = name: input: let
inputFinal = {outputFile = "${name}/index.json";} // input;
input' = l.removeAttrs input ["indexer"];
inputFinal = {outputFile = "${name}/index.json";} // input';
script = pkgs.writers.writeBash "index" ''
set -e
inputJson="$(${pkgs.coreutils}/bin/mktemp)"
echo '${l.toJSON inputFinal}' > $inputJson
mkdir -p $(dirname ${inputFinal.outputFile})
${apps.index}/bin/index ${name} $inputJson
${apps.index}/bin/index ${input.indexer} $inputJson
'';
in
mkApp script;
@ -90,7 +91,7 @@ in rec {
''
);
mkCiAppWith = commands:
mkCiAppWith = branchname: commands:
mkApp (
utils.writePureShellScript
(with pkgs; [
@ -100,17 +101,18 @@ in rec {
openssh
])
''
flake=$(cat flake.nix)
flakeLock=$(cat flake.lock)
set -e
rm -rf /tmp/.flake
mkdir -p /tmp/.flake
cp -r ./* /tmp/.flake
set -x
git fetch origin data || :
git checkout -f origin/data || :
git branch -D data || :
git checkout -b data
git fetch origin ${branchname} || :
git checkout -f origin/${branchname} || :
git branch -D ${branchname} || :
git checkout -b ${branchname}
# the flake should always be the one from the current main branch
rm -rf ./*
echo "$flake" > flake.nix
echo "$flakeLock" > flake.lock
cp -r /tmp/.flake/. ./
${commands}
git add .
git commit -m "automatic update - $(date --rfc-3339=seconds)"
@ -119,6 +121,7 @@ in rec {
mkCiJobApp = name: input:
mkCiAppWith
name
''
${(mkIndexApp name input).program}
${(mkTranslateApp name).program}
@ -136,29 +139,30 @@ in rec {
);
indexApps = l.listToAttrs (
l.mapAttrsToList
l.map
(
name: input:
input:
l.nameValuePair
"index-${name}"
(mkIndexApp name input)
"index-${input.name}"
(mkIndexApp input.name input)
)
indexes
);
ciJobApps = l.listToAttrs (
l.mapAttrsToList
l.map
(
name: input:
input:
l.nameValuePair
"ci-job-${name}"
(mkCiJobApp name input)
"ci-job-${input.name}"
(mkCiJobApp input.name input)
)
indexes
);
ciJobAllApp =
mkCiAppWith
"pkgs-all"
''
${lib.concatStringsSep "\n" (l.mapAttrsToList (_: app: app.program) indexApps)}
${lib.concatStringsSep "\n" (l.mapAttrsToList (_: app: app.program) translateApps)}