Merge pull request #261 from nix-community/indexers-no-source

WIP: indexers: make use of source-less translators
This commit is contained in:
DavHau 2022-08-31 14:20:31 +02:00 committed by GitHub
commit 9977e5f015
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 416 additions and 154 deletions

View File

@ -0,0 +1,30 @@
{
inputs = {
dream2nix.url = "github:nix-community/dream2nix";
};
outputs = inp:
(inp.dream2nix.lib.makeFlakeOutputsForIndexes {
source = ./.;
systems = ["x86_64-linux"];
indexes = {
libraries-io = {
platform = "npm";
number = 5;
};
crates-io = {};
crates-io-simple = {
sortBy = "name";
maxPages = 1;
};
};
packageOverrides = {
"^.*$".disable-build = {
buildScript = ":";
};
};
})
// {
checks = inp.self.packages;
};
}

View File

@ -1,5 +1,6 @@
{
# dream2nix deps
framework,
utils,
callNixWithD2N,
translateSourceShortcut,
@ -21,7 +22,7 @@ utils.writePureShellScriptBin
]
''
usage="usage:
$0 SOURCE_SHORTCUT TARGET_DIR"
$0 PROJECT_JSON TARGET_DIR"
if [ "$#" -ne 2 ]; then
echo "error: wrong number of arguments"
@ -29,91 +30,43 @@ utils.writePureShellScriptBin
exit 1
fi
source=''${1:?"error: pass a source shortcut"}
projectJson=''${1:?"error: pass projects spec as json string"}
targetDir=''${2:?"error: please pass a target directory"}
targetDir="$(realpath "$targetDir")"
sourceInfoPath=''${translateSourceInfoPath:-"$TMPDIR/sourceInfo.json"}
translateSkipResolved=''${translateSkipResolved:-"0"}
translator=$(jq '.translator' -c -r <(echo $projectJson))
name=$(jq '.name' -c -r <(echo $projectJson))
id=$(jq '.id' -c -r <(echo $projectJson))
if [ "$id" == "null" ]; then
echo "error: 'id' field not specified for project $name"
exit 1
fi
dreamLockPath="$targetDir/$id/dream-lock.json"
mkdir -p $targetDir && cd $targetDir
export dream2nixConfig="{packagesDir=\"./.\"; projectRoot=\"$targetDir\";}"
echo -e "\nTranslating:: $name (translator: $translator) (lock path: $dreamLockPath)"
# translate the source shortcut
${translateSourceShortcut} $source > $sourceInfoPath
translateBin=$(${callNixWithD2N} build --print-out-paths --no-link "
dream2nix.framework.translatorInstances.$translator.translateBin
")
# collect data for the packages we will resolve
resolveDatas="$TMPDIR/resolveData.json"
${callNixWithD2N} eval --json "
let
data =
l.map
(
p:
let
resolve = p.passthru.resolve or p.resolve;
mkBashEnv = name: value:
\"\''${name}=\" + \"'\" + value + \"'\";
in
# skip this project if we don't want to resolve ones that can be resolved on the fly
if \"$translateSkipResolved\" == \"1\" && resolve.passthru.project ? dreamLock
then null
else
# write a simple bash script for exporting the data we need
# this is better since we don't need to call jq multiple times
# to access the data we need
l.concatStringsSep
\";\"
(
(with resolve.passthru.project; [
(mkBashEnv \"name\" name)
(mkBashEnv \"dreamLockPath\" dreamLockPath)
(mkBashEnv \"subsystem\" subsystem)
]) ++ [
(mkBashEnv \"drvPath\" resolve.drvPath)
]
)
)
(
l.attrValues
(
l.removeAttrs
(dream2nix.makeOutputs {
source = dream2nix.fetchers.fetchSource {
source =
l.fromJSON (l.readFile \"$sourceInfoPath\");
};
}).packages
[\"resolveImpure\"]
)
);
in
l.unique (l.filter (i: i != null) data)
" > $resolveDatas
echo "
{
\"project\": $projectJson,
\"outputFile\": \"$dreamLockPath\"
}
" > $TMPDIR/args.json
# resolve the packages
for resolveData in $(jq '.[]' -c -r $resolveDatas); do
# extract project data so we can determine where the dream-lock.json will be
eval "$resolveData"
$translateBin $TMPDIR/args.json
echo "Resolving:: $name (subsystem: $subsystem) (lock path: $dreamLockPath)"
cat $dreamLockPath \
| python3 ${../cli/format-dream-lock.py} \
| sponge $dreamLockPath
# build the resolve script and run it
nix build --out-link $TMPDIR/resolve $drvPath
$TMPDIR/resolve/bin/resolve
${python3.pkgs.jsonschema}/bin/jsonschema \
--instance $dreamLockPath \
--output pretty \
${../../specifications/dream-lock-schema.json}
# patch the dream-lock with our source info so the dream-lock works standalone
${callNixWithD2N} eval --json "
with dream2nix.utils.dreamLock;
replaceRootSources {
dreamLock = l.fromJSON (l.readFile \"$targetDir/$dreamLockPath\");
newSourceRoot = l.fromJSON (l.readFile \"$sourceInfoPath\");
}
" \
| python3 ${../cli/format-dream-lock.py} \
| sponge "$dreamLockPath"
echo "Resolved:: $name (subsystem: $subsystem) (lock path: $dreamLockPath)"
done
echo -e "\nFinished:: $name (translator: $translator) (lock path: $dreamLockPath)"
''

View File

@ -4,10 +4,11 @@
coreutils,
curl,
jq,
python3,
...
}:
utils.writePureShellScript
[coreutils curl jq]
[coreutils curl jq python3]
''
input=''${1:?"please provide an input as a JSON file"}
@ -19,9 +20,8 @@
maxPages=$(jq '.maxPages' -c -r $input)
for currentPage in $(seq 1 $maxPages); do
jqQuery="$(jq '.' -c -r "$tmpFile") + (.crates | map(\"crates-io:\" + .name + \"\/\" + .max_stable_version))"
url="https://crates.io/api/v1/crates?page=$currentPage&per_page=100&sort=$sortBy"
curl -k "$url" | jq "$jqQuery" -r > "$tmpFile"
curl -k "$url" | python3 ${./process-result.py} > "$tmpFile"
done
mv "$tmpFile" "$(realpath $outFile)"

View File

@ -0,0 +1,13 @@
import json
import sys
input = json.load(sys.stdin)
projects = []
for package in input['crates']:
projects.append(dict(
id=f"{package['name']}-{package['max_stable_version']}",
name=package['name'],
version=package['max_stable_version'],
translator='crates-io',
))
print(json.dumps(projects, indent=2))

View File

@ -1,8 +1,15 @@
#![deny(rust_2018_idioms)]
use serde::{Serialize, Deserialize};
use std::collections::BTreeMap;
#[derive(Serialize, Deserialize)]
struct Project {
id: String,
name: String,
version: String,
translator: String,
}
pub type Index = Vec<String>;
pub type Index = Vec<Project>;
#[cfg(feature = "gen")]
pub use self::indexer::{Indexer, Modifications, Settings};
@ -60,15 +67,15 @@ mod indexer {
self.exclusions.iter().any(|n| n == name)
}
}
fn default_max_pages() -> u32 {
1
}
fn default_sort_by() -> String {
"downloads".to_string()
}
fn default_output_file() -> String {
"./index.json".to_string()
}
@ -127,7 +134,7 @@ mod indexer {
self.page_callback = f;
self
}
pub fn write_info(&mut self) {
let infos = self.generate_info();
let file =
@ -199,9 +206,13 @@ mod indexer {
let pname = summary.name();
let version = summary.version().to_string();
let hash = summary.checksum().unwrap().to_string();
let entry = format!("crates-io:{pname}/{version}?hash={hash}");
let entry = Project {
id: format!("{pname}-{version}"),
name: pname.to_string(),
version: version,
translator: "crates-io".to_string(),
};
index.push(entry);
}

View File

@ -21,6 +21,7 @@ libraries.io also supports other interesting popularity metrics:
curl,
jq,
lib,
python3,
...
}: let
l = lib // builtins;
@ -31,7 +32,7 @@ libraries.io also supports other interesting popularity metrics:
};
in
utils.writePureShellScript
[coreutils curl jq]
[coreutils curl jq python3]
''
input=''${1:?"please provide an input as a JSON file"}
@ -44,29 +45,24 @@ libraries.io also supports other interesting popularity metrics:
fi
apiKey="$API_KEY"
platform=$(jq '.platform' -c -r $input)
number=$(jq '.number' -c -r $input)
export platform=$(jq '.platform' -c -r $input)
export number=$(jq '.number' -c -r $input)
# calculate number of pages to query
# page size is always 100
# result will be truncated to the given $number
# result will be truncated to the given $number later
numPages=$(($number/100 + ($number % 100 > 0)))
# get platform
platformQuery=$(jq ".\"$platform\"" -c -r ${l.toFile "platform-map.json" (l.toJSON platformMap)})
jqQuery=".[] | [(\"$platform:\" + .name + \"/\" + (.versions | sort_by(.published_at))[-1].number)] | add"
echo "Starting to query $numPages pages..."
rm -f $outFile
echo "[]" > $outFile
for page in $(seq 1 $numPages); do
echo "requesting page $page"
url="https://libraries.io/api/search?page=$page&sort=dependents_count&per_page=100&platforms=$platformQuery&api_key=$apiKey"
curl -k "$url" | jq "$jqQuery" -r >> $outFile
curl -k "$url" | python3 ${./process-result.py} $outFile
done
# truncate entries to $number and convert back to json
head -n $number $outFile | jq --raw-input --slurp 'split("\n") | .[0:-1]' > ''${outFile}.final
mv ''${outFile}.final $outFile
'';
}

View File

@ -0,0 +1,27 @@
import json
import os
import sys
out_file = sys.argv[1]
platform = os.environ.get("platform")
number = int(os.environ.get("number"))
input = json.load(sys.stdin)
projects = []
for package in input:
versions = package['versions']
latest_version =\
(sorted(versions, key=lambda v: v['published_at'])[-1])['number']
projects.append(dict(
id=f"{package['name']}-{latest_version}",
name=package['name'],
version=latest_version,
translator=platform,
))
with open(out_file) as f:
existing_projects = json.load(f)
all_projects = (existing_projects + projects)[:number]
with open(out_file, 'w') as f:
json.dump(all_projects, f, indent=2)

View File

@ -4,6 +4,7 @@
coreutils,
curl,
jq,
python3,
...
}:
utils.writePureShellScript
@ -17,8 +18,7 @@
size=$(jq '.maxPackageCount' -c -r $input)
url="https://registry.npmjs.org/-/v1/search?text=$text&popularity=1.0&quality=0.0&maintenance=0.0&size=$size"
jqQuery="[.objects[].package | [(\"npm:\" + .name + \"/\" + .version)]] | add"
curl -k "$url" | jq "$jqQuery" -r > $(realpath $outFile)
curl -k "$url" | ${python3}/bin/python ${./process-result.py} > $(realpath $outFile)
'';
}

View File

@ -0,0 +1,14 @@
import json
import sys
input = json.load(sys.stdin)
projects = []
for object in input['objects']:
package = object['package']
projects.append(dict(
id=f"{package['name']}-{package['version']}".replace('/', '_'),
name=package['name'],
version=package['version'],
translator='npm',
))
print(json.dumps(projects, indent=2))

View File

@ -138,8 +138,7 @@
pkgs ? null,
config ? {},
source,
indexNames,
overrideOutputs ? args: {},
indexes,
inject ? {},
packageOverrides ? {},
settings ? [],
@ -160,8 +159,7 @@
allOutputs = dream2nix.utils.makeOutputsForIndexes {
inherit
source
indexNames
overrideOutputs
indexes
inject
packageOverrides
settings

View File

@ -0,0 +1,100 @@
{
dlib,
lib,
...
}: {
type = "impure";
# the input format is specified in /specifications/translator-call-example.json
# this script receives a json file including the input paths and specialArgs
translateBin = {
# dream2nix utils
apps,
subsystems,
utils,
# nixpkgs dependenies
bash,
coreutils,
git,
jq,
moreutils,
nodePackages,
openssh,
writeScriptBin,
...
}:
utils.writePureShellScript
[
bash
coreutils
git
jq
moreutils
nodePackages.npm
openssh
]
''
# accroding to the spec, the translator reads the input from a json file
jsonInput=$1
# read the json input
outputFile=$(realpath -m $(jq '.outputFile' -c -r $jsonInput))
name=$(jq '.project.name' -c -r $jsonInput)
version=$(jq '.project.version' -c -r $jsonInput)
npmArgs=$(jq '.project.subsystemInfo.npmArgs' -c -r $jsonInput)
if [ "$version" = "null" ]; then
candidate="$name"
else
candidate="$name@$version"
fi
pushd $TMPDIR
newSource=$(pwd)
npm install $candidate --package-lock-only $npmArgs
jq ".source = \"$newSource\" | .project.relPath = \"\"" -c -r $jsonInput > $TMPDIR/newJsonInput
popd
# call package-lock translator
${subsystems.nodejs.translators.package-lock.translateBin} $TMPDIR/newJsonInput
# generate source info for main package
url=$(npm view $candidate dist.tarball)
hash=$(npm view $candidate dist.integrity)
echo "
{
\"type\": \"http\",
\"url\": \"$url\",
\"hash\": \"$hash\"
}
" > $TMPDIR/sourceInfo.json
# add main package source info to dream-lock.json
${apps.callNixWithD2N} eval --json "
with dream2nix.utils.dreamLock;
replaceRootSources {
dreamLock = l.fromJSON (l.readFile \"$outputFile\");
newSourceRoot = l.fromJSON (l.readFile \"$TMPDIR/sourceInfo.json\");
}
" \
| sponge "$outputFile"
'';
# inherit options from package-lock translator
extraArgs =
dlib.translators.translators.nodejs.package-lock.extraArgs
// {
npmArgs = {
description = "Additional arguments for npm";
type = "argument";
default = "";
examples = [
"--force"
];
};
};
}

View File

@ -0,0 +1,101 @@
{
dlib,
lib,
...
}: {
type = "impure";
# the input format is specified in /specifications/translator-call-example.json
# this script receives a json file including the input paths and specialArgs
translateBin = {
# dream2nix utils
apps,
subsystems,
utils,
# nixpkgs dependenies
coreutils,
curl,
gnutar,
gzip,
jq,
moreutils,
rustPlatform,
...
}:
utils.writePureShellScript
[
coreutils
curl
gnutar
gzip
jq
moreutils
rustPlatform.rust.cargo
]
''
# according to the spec, the translator reads the input from a json file
jsonInput=$1
# read the json input
outputFile=$(realpath -m $(jq '.outputFile' -c -r $jsonInput))
cargoArgs=$(jq '.project.subsystemInfo.cargoArgs | select (.!=null)' -c -r $jsonInput)
name=$(jq '.project.name' -c -r $jsonInput)
version=$(jq '.project.version' -c -r $jsonInput)
pushd $TMPDIR
mkdir source
# download and unpack package source
curl -L https://crates.io/api/v1/crates/$name/$version/download > $TMPDIR/tarball
cd source
cat $TMPDIR/tarball | tar xz --strip-components 1
cd -
# generate arguments for cargo-toml translator
echo "{
\"source\": \"$TMPDIR/source\",
\"outputFile\": \"$outputFile\",
\"project\": {
\"relPath\": \"\",
\"subsystemInfo\": {
\"cargoArgs\": \"$cargoArgs\"
}
}
}" > $TMPDIR/newJsonInput
popd
${subsystems.rust.translators.cargo-toml.translateBin} $TMPDIR/newJsonInput
# add main package source info to dream-lock.json
echo "
{
\"type\": \"crates-io\",
\"hash\": \"$(sha256sum $TMPDIR/tarball | cut -d " " -f 1)\"
}
" > $TMPDIR/sourceInfo.json
${apps.callNixWithD2N} eval --json "
with dream2nix.utils.dreamLock;
replaceRootSources {
dreamLock = l.fromJSON (l.readFile \"$outputFile\");
newSourceRoot = l.fromJSON (l.readFile \"$TMPDIR/sourceInfo.json\");
}
" \
| sponge "$outputFile"
'';
# inherit options from cargo-lock translator
extraArgs =
dlib.translators.translators.rust.cargo-lock.extraArgs
// {
cargoArgs = {
description = "Additional arguments for Cargo";
type = "argument";
default = "";
examples = [
"--verbose"
];
};
};
}

View File

@ -104,10 +104,9 @@ in
TMPDIR=$(${coreutils}/bin/mktemp -d)
${script}
trap '${coreutils}/bin/rm -rf "$TMPDIR"' EXIT
cd
${coreutils}/bin/rm -rf $TMPDIR
${script}
'';
# builder to create a shell script that has it's own PATH
@ -121,10 +120,9 @@ in
TMPDIR=$(${coreutils}/bin/mktemp -d)
${script}
trap '${coreutils}/bin/rm -rf "$TMPDIR"' EXIT
cd
${coreutils}/bin/rm -rf $TMPDIR
${script}
'';
# TODO is this really needed? Seems to make builds slower, why not unpack + build?

View File

@ -5,6 +5,7 @@
pkgs,
apps,
callPackageDream,
utils,
...
} @ topArgs: let
l = lib // builtins;
@ -54,30 +55,28 @@ in rec {
(l.map makePackagesForDreamLock dreamLocks);
makeOutputsForIndexes = {
source,
indexNames,
overrideOutputs ? args: {},
indexes,
settings ? [],
inject ? {},
packageOverrides ? {},
sourceOverrides ? {},
}: let
l = lib // builtins;
indexNames = l.attrNames indexes;
mkApp = script: {
type = "app";
program = toString script;
};
mkIndexApp = {
name,
indexerName ? name,
input,
} @ args: let
input = {outputFile = "${name}/index.json";} // args.input;
mkIndexApp = name: input: let
inputFinal = {outputFile = "${name}/index.json";} // input;
script = pkgs.writers.writeBash "index" ''
set -e
inputJson="$(${pkgs.coreutils}/bin/mktemp)"
echo '${l.toJSON input}' > $inputJson
${apps.index}/bin/index ${indexerName} $inputJson
echo '${l.toJSON inputFinal}' > $inputJson
mkdir -p $(dirname ${inputFinal.outputFile})
${apps.index}/bin/index ${name} $inputJson
'';
in
mkApp script;
@ -89,6 +88,27 @@ in rec {
${name}/index.json ${name}/locks
''
);
mkCiJobApp = name: input:
mkApp (
utils.writePureShellScript
(with pkgs; [
coreutils
git
gnugrep
])
''
mainBranch=$(git branch | grep -E '(master)|(main)')
git branch data || :
git checkout data
# the flake should always be the one from the current main branch
git checkout $mainBranch flake.nix
git checkout $mainBranch flake.lock
${(mkIndexApp name input).program}
${(mkTranslateApp name).program}
git add .
git commit "automatic update - $(date --rfc-3339=seconds)"
''
);
translateApps = l.listToAttrs (
l.map
(
@ -99,28 +119,26 @@ in rec {
)
indexNames
);
translateAllApp = let
allTranslators =
l.concatStringsSep
"\n"
(
l.mapAttrsToList
(
name: translator: ''
echo "::translating with ${name}::"
${translator.program}
echo "::translated with ${name}::"
''
)
translateApps
);
in
mkApp (
pkgs.writers.writeBash "translate-all" ''
set -e
${allTranslators}
''
);
indexApps = l.listToAttrs (
l.mapAttrsToList
(
name: input:
l.nameValuePair
"index-${name}"
(mkIndexApp name input)
)
indexes
);
ciJobApps = l.listToAttrs (
l.mapAttrsToList
(
name: input:
l.nameValuePair
"ci-job-${name}"
(mkCiJobApp name input)
)
indexes
);
mkIndexOutputs = name: let
src = "${toString source}/${name}/locks";
@ -149,15 +167,10 @@ in rec {
outputs = {
packages = allPackages;
apps =
translateApps
// {
translate = translateAllApp;
};
indexApps
// translateApps
// ciJobApps;
};
in
outputs
// (callPackageDream overrideOutputs {
inherit mkIndexApp;
prevOutputs = outputs;
});
outputs;
}

View File

@ -5,10 +5,13 @@
bash,
coreutils,
git,
gnugrep,
jq,
parallel,
nix,
utils,
dream2nixWithExternals,
pkgs,
...
}: let
l = lib // builtins;
@ -20,6 +23,8 @@
bash
coreutils
git
gnugrep
jq
nix
]
''
@ -31,11 +36,14 @@
cp -r ${examples}/$dir/* .
chmod -R +w .
nix flake lock --override-input dream2nix ${../../.}
nix run .#resolveImpure --show-trace
if nix flake show | grep -q resolveImpure; then
nix run .#resolveImpure --show-trace
fi
# disable --read-only check for these because they do IFD so they will
# write to store at eval time
evalBlockList=("haskell_cabal-plan" "haskell_stack-lock")
if [[ ! ((''${evalBlockList[*]} =~ "$dir")) ]]; then
if [[ ! ((''${evalBlockList[*]} =~ "$dir")) ]] \
&& [ "$(nix flake show --json | jq 'select(.packages."x86_64-linux".default.name)')" != "" ]; then
nix eval --read-only --no-allow-import-from-derivation .#default.name
fi
nix flake check "$@"