mirror of
https://github.com/nix-community/dream2nix.git
synced 2024-11-22 15:04:46 +03:00
chore(fetchPythonRequirements): refactor code
- add and improve comments - reduce nesting in several places - move requirementsFiles flag logic to python - MAX_DATE -> maxDate - respect column 80 wherever possible
This commit is contained in:
parent
4aa6e86907
commit
a677084257
@ -1,13 +1,14 @@
|
||||
# fetchPythonRequirements downlaods python packages specified by a list of
|
||||
# pip-style python requirements
|
||||
# It also requires a maximum date 'maxDate' being specified.
|
||||
# The result will be as if `pip download` would have been executed
|
||||
# at the point in time specified by maxDate.
|
||||
# fetchPythonRequirements downloads python packages specified by executing
|
||||
# `pip download` on a source tree, or a list of requirements.
|
||||
# This fetcher requires a maximum date 'maxDate' being specified.
|
||||
# The result will be the same as if `pip download` would have been executed
|
||||
# at the point in time specified by maxDate.
|
||||
# This is ensured by putting pip behind a local proxy filtering the
|
||||
# api responses from pypi.org to only contain files for which the
|
||||
# release date is lower than the specified maxDate.
|
||||
# api responses from pypi.org to only contain files for which the
|
||||
# release date is lower than the specified maxDate.
|
||||
# TODO: ignore if packages are yanked
|
||||
# TODO: for MAX_DATE only allow timestamp or format 2023-01-01
|
||||
# TODO: for maxDate only allow timestamp or format 2023-01-01
|
||||
# TODO: Error if maxDate points to the future
|
||||
{
|
||||
buildPackages,
|
||||
cacert,
|
||||
@ -15,40 +16,41 @@
|
||||
lib,
|
||||
python3,
|
||||
stdenv,
|
||||
}: {
|
||||
# Specify the python version for which the packages should be downloaded.
|
||||
# Pip needs to be executed from that specific python version.
|
||||
# Pip accepts '--python-version', but this works only for wheel packages.
|
||||
python,
|
||||
# hash for the fixed output derivation
|
||||
hash,
|
||||
# list of strings of requirements.txt entries
|
||||
requirementsList ? [],
|
||||
# list of requirements.txt files
|
||||
requirementsFiles ? [],
|
||||
# restrict to binary releases (.whl)
|
||||
# this allows buildPlatform independent fetching
|
||||
onlyBinary ? false,
|
||||
# additional flags for `pip download`.
|
||||
# for reference see: https://pip.pypa.io/en/stable/cli/pip_download/
|
||||
pipFlags ? [],
|
||||
name ? null,
|
||||
nameSuffix ? "python-requirements",
|
||||
nativeBuildInputs ? [],
|
||||
# maximum release date for packages
|
||||
maxDate ?
|
||||
throw ''
|
||||
'maxDate' must be specified for fetchPythonRequirements.
|
||||
Choose any date from the past.
|
||||
Example value: "2023-01-01"
|
||||
'',
|
||||
# It's better to not refer to python.pkgs.pip directly, as we want to reduce
|
||||
# the times we have to update the output hash
|
||||
pipVersion ? "23.0",
|
||||
# Write "dependencies.json" to $out, documenting which package depends on which.
|
||||
writeDependencyTree ? true,
|
||||
}: let
|
||||
fetchPythonRequirements = {
|
||||
# This specifies the python version for which the packages should be downloaded
|
||||
# Pip needs to be executed from that specific python version.
|
||||
# Pip accepts '--python-version', but this works only for wheel packages.
|
||||
python,
|
||||
# hash for the fixed output derivation
|
||||
hash,
|
||||
# list of strings of requirements.txt entries
|
||||
requirementsList ? [],
|
||||
# list of requirements.txt files
|
||||
requirementsFiles ? [],
|
||||
# restrict to binary releases (.whl)
|
||||
# this allows buildPlatform independent fetching
|
||||
onlyBinary ? false,
|
||||
# additional flags for `pip download`.
|
||||
# for reference see: https://pip.pypa.io/en/stable/cli/pip_download/
|
||||
pipFlags ? [],
|
||||
name ? null,
|
||||
nameSuffix ? "python-requirements",
|
||||
nativeBuildInputs ? [],
|
||||
# maximum release date for packages
|
||||
maxDate ?
|
||||
throw ''
|
||||
'maxDate' must be specified for fetchPythonRequirements.
|
||||
Changing this value will affect the output hash
|
||||
Example value: "2023-01-01"
|
||||
'',
|
||||
# It's better to not refer to python.pkgs.pip directly, as we want to reduce
|
||||
# the times we have to update the output hash
|
||||
pipVersion ? "23.0",
|
||||
# Write "dependencies.json" to $out, documenting which package depends on which.
|
||||
writeDependencyTree ? true,
|
||||
}:
|
||||
# throws an error if pipDownload is executed with unsafe arguments
|
||||
validateArgs = result:
|
||||
# specifying `--platform` for pip download is only allowed in combination with `--only-binary :all:`
|
||||
# therefore, if onlyBinary is disabled, we must enforce targetPlatform == buildPlatform to ensure reproducibility
|
||||
if ! onlyBinary && stdenv.system != stdenv.buildPlatform.system
|
||||
@ -57,129 +59,146 @@
|
||||
fetchPythonRequirements cannot fetch sdist packages for ${stdenv.system} on a ${stdenv.buildPlatform.system}.
|
||||
Either build on a ${stdenv.system} or set `onlyBinary = true`.
|
||||
''
|
||||
else let
|
||||
# map nixos system strings to python platforms
|
||||
sysToPlatforms = {
|
||||
"x86_64-linux" = [
|
||||
"manylinux1_x86_64"
|
||||
"manylinux2010_x86_64"
|
||||
"manylinux2014_x86_64"
|
||||
"linux_x86_64"
|
||||
];
|
||||
"x86_64-darwin" =
|
||||
lib.forEach (lib.range 0 15) (minor: "macosx_10_${builtins.toString minor}_x86_64");
|
||||
"aarch64-linux" = [
|
||||
"manylinux1_aarch64"
|
||||
"manylinux2010_aarch64"
|
||||
"manylinux2014_aarch64"
|
||||
"linux_aarch64"
|
||||
];
|
||||
};
|
||||
else result;
|
||||
|
||||
platforms =
|
||||
if sysToPlatforms ? "${stdenv.system}"
|
||||
then sysToPlatforms."${stdenv.system}"
|
||||
else
|
||||
throw ''
|
||||
'binaryOnly' fetching is currently not supported for target ${stdenv.system}.
|
||||
You could set 'binaryOnly = false' and execute the build on a ${stdenv.system}.
|
||||
'';
|
||||
# map nixos system strings to python platforms
|
||||
sysToPlatforms = {
|
||||
"x86_64-linux" = [
|
||||
"manylinux1_x86_64"
|
||||
"manylinux2010_x86_64"
|
||||
"manylinux2014_x86_64"
|
||||
"linux_x86_64"
|
||||
];
|
||||
"x86_64-darwin" =
|
||||
lib.forEach (lib.range 0 15)
|
||||
(minor: "macosx_10_${builtins.toString minor}_x86_64");
|
||||
"aarch64-linux" = [
|
||||
"manylinux1_aarch64"
|
||||
"manylinux2010_aarch64"
|
||||
"manylinux2014_aarch64"
|
||||
"linux_aarch64"
|
||||
];
|
||||
};
|
||||
|
||||
# we use mitmproxy to filter the pypi responses
|
||||
pythonWithMitmproxy =
|
||||
python3.withPackages (ps: [ps.mitmproxy ps.python-dateutil ps.pkginfo ps.packaging]);
|
||||
platforms =
|
||||
if sysToPlatforms ? "${stdenv.system}"
|
||||
then sysToPlatforms."${stdenv.system}"
|
||||
else throw errorNoBinaryFetchingForTarget;
|
||||
|
||||
# fixed output derivation containing downloaded packages,
|
||||
# each being symlinked from it's normalized name
|
||||
# Example:
|
||||
# "$out/werkzeug" will point to "$out/Werkzeug-0.14.1-py2.py3-none-any.whl"
|
||||
self = stdenv.mkDerivation (finalAttrs: {
|
||||
# An invalidation hash is embedded into the `name`.
|
||||
# This will prevent `forgot to update the hash` scenarios, as any change
|
||||
# in the derivaiton name enforces a re-build.
|
||||
name = let
|
||||
pythonMajorAndMinorVer =
|
||||
lib.concatStringsSep "."
|
||||
(lib.sublist 0 2 (lib.splitString "." python.version));
|
||||
errorNoBinaryFetchingForTarget = ''
|
||||
'onlyBinary' fetching is currently not supported for target ${stdenv.system}.
|
||||
You could set 'onlyBinary = false' and execute the build on a ${stdenv.system}.
|
||||
'';
|
||||
|
||||
invalidationHash = builtins.hashString "sha256" ''
|
||||
# we use mitmproxy to filter the pypi responses
|
||||
pythonWithMitmproxy =
|
||||
python3.withPackages
|
||||
(ps: [ps.mitmproxy ps.python-dateutil ps.pkginfo ps.packaging]);
|
||||
|
||||
# Ignore the python minor version. It should not affect resolution
|
||||
${python.implementation}
|
||||
${pythonMajorAndMinorVer}
|
||||
${stdenv.system}
|
||||
pythonMajorAndMinorVer =
|
||||
lib.concatStringsSep "."
|
||||
(lib.sublist 0 2 (lib.splitString "." python.version));
|
||||
|
||||
# All variables that might influence the output
|
||||
${finalAttrs.MAX_DATE}
|
||||
${finalAttrs.onlyBinaryFlags}
|
||||
${finalAttrs.pipVersion}
|
||||
${finalAttrs.pipFlags}
|
||||
${toString writeDependencyTree}
|
||||
invalidationHash = finalAttrs:
|
||||
builtins.hashString "sha256" ''
|
||||
|
||||
# Include requirements
|
||||
# We hash the content, as store paths might change more often
|
||||
${toString finalAttrs.requirementsList}
|
||||
${toString finalAttrs.requirementsFiles}
|
||||
# Ignore the python minor version. It should not affect resolution
|
||||
${python.implementation}
|
||||
${pythonMajorAndMinorVer}
|
||||
${stdenv.system}
|
||||
|
||||
# Only hash the content of the python scripts, as the store path
|
||||
# changes with every nixpkgs commit
|
||||
${builtins.readFile finalAttrs.filterPypiResponsesScript}
|
||||
${builtins.readFile finalAttrs.buildScript}
|
||||
${builtins.readFile finalAttrs.writeDependencyTreeScript}
|
||||
'';
|
||||
# All variables that might influence the output
|
||||
${finalAttrs.maxDate}
|
||||
${finalAttrs.onlyBinaryFlags}
|
||||
${finalAttrs.pipVersion}
|
||||
${finalAttrs.pipFlags}
|
||||
${toString writeDependencyTree}
|
||||
|
||||
invalidationHashShort =
|
||||
lib.substring 0 10
|
||||
(builtins.unsafeDiscardStringContext invalidationHash);
|
||||
# Include requirements
|
||||
# We hash the content, as store paths might change more often
|
||||
${toString finalAttrs.requirementsList}
|
||||
${toString finalAttrs.requirementsFiles}
|
||||
|
||||
namePrefix =
|
||||
if name == null
|
||||
then ""
|
||||
else name + "-";
|
||||
in "${namePrefix}${nameSuffix}-${invalidationHashShort}";
|
||||
# Only hash the content of the python scripts, as the store path
|
||||
# changes with every nixpkgs commit
|
||||
${builtins.readFile finalAttrs.filterPypiResponsesScript}
|
||||
${builtins.readFile finalAttrs.buildScript}
|
||||
${builtins.readFile finalAttrs.writeDependencyTreeScript}
|
||||
'';
|
||||
|
||||
outputHashMode = "recursive";
|
||||
outputHashAlgo = "sha256";
|
||||
outputHash = hash;
|
||||
invalidationHashShort = finalAttrs:
|
||||
lib.substring 0 10
|
||||
(builtins.unsafeDiscardStringContext (invalidationHash finalAttrs));
|
||||
|
||||
# Multiple outputs are not allowed in an FOD, therefore use passthru
|
||||
# to export $dist and $names
|
||||
passthru.dist = "${finalAttrs.finalPackage}/dist";
|
||||
passthru.names = "${finalAttrs.finalPackage}/names";
|
||||
namePrefix =
|
||||
if name == null
|
||||
then ""
|
||||
else name + "-";
|
||||
|
||||
nativeBuildInputs =
|
||||
nativeBuildInputs
|
||||
++ [pythonWithMitmproxy curl cacert];
|
||||
# A fixed output derivation containing all downloaded packages.
|
||||
# each single file is located inside a directory named like the package.
|
||||
# Example:
|
||||
# "$out/werkzeug" will contain "Werkzeug-0.14.1-py2.py3-none-any.whl"
|
||||
# Each directory only ever contains a single file
|
||||
pipDownload = stdenv.mkDerivation (finalAttrs: {
|
||||
# An invalidation hash is embedded into the `name`.
|
||||
# This will prevent `forgot to update the hash` scenarios, as any change
|
||||
# in the derivaiton name enforces a re-build.
|
||||
name = "${namePrefix}${nameSuffix}-${invalidationHashShort finalAttrs}";
|
||||
|
||||
dontUnpack = true;
|
||||
dontInstall = true;
|
||||
dontFixup = true;
|
||||
# setup FOD
|
||||
outputHashMode = "recursive";
|
||||
outputHashAlgo = "sha256";
|
||||
outputHash = hash;
|
||||
|
||||
pythonBin = python.interpreter;
|
||||
filterPypiResponsesScript = ./filter-pypi-responses.py;
|
||||
buildScript = ./fetch-python-requirements.py;
|
||||
writeDependencyTreeScript = ./write-dependency-tree.py;
|
||||
inherit
|
||||
pythonWithMitmproxy
|
||||
pipVersion
|
||||
requirementsFiles
|
||||
requirementsList
|
||||
;
|
||||
MAX_DATE = builtins.toString maxDate;
|
||||
pipFlags = lib.concatStringsSep " " pipFlags;
|
||||
onlyBinaryFlags = lib.optionalString onlyBinary "--only-binary :all: ${
|
||||
lib.concatStringsSep " " (lib.forEach platforms (pf: "--platform ${pf}"))
|
||||
}";
|
||||
requirementsFlags =
|
||||
lib.optionalString (requirementsFiles != [])
|
||||
''-r ${lib.concatStringsSep " -r " (map toString finalAttrs.requirementsFiles)}'';
|
||||
# Multiple outputs are not allowed in an FOD, therefore use passthru
|
||||
# to export $dist and $names
|
||||
passthru.dist = "${finalAttrs.finalPackage}/dist";
|
||||
passthru.names = "${finalAttrs.finalPackage}/names";
|
||||
|
||||
buildPhase = ''
|
||||
$pythonWithMitmproxy/bin/python $buildScript
|
||||
${lib.optionalString writeDependencyTree "$pythonWithMitmproxy/bin/python $writeDependencyTreeScript $out/dist > $out/dependencies.json"}
|
||||
'';
|
||||
});
|
||||
in
|
||||
self;
|
||||
# disable some phases
|
||||
dontUnpack = true;
|
||||
dontInstall = true;
|
||||
dontFixup = true;
|
||||
|
||||
# build inputs
|
||||
nativeBuildInputs =
|
||||
nativeBuildInputs
|
||||
++ [pythonWithMitmproxy curl cacert];
|
||||
|
||||
# python scripts
|
||||
filterPypiResponsesScript = ./filter-pypi-responses.py;
|
||||
buildScript = ./fetch-python-requirements.py;
|
||||
writeDependencyTreeScript = ./write-dependency-tree.py;
|
||||
|
||||
# the python interpreter used to run the build script
|
||||
pythonBin = python.interpreter;
|
||||
|
||||
# the python interpreter used to run the proxy script
|
||||
inherit pythonWithMitmproxy;
|
||||
|
||||
# convert maxDate to string and integrate into finalAttrs
|
||||
maxDate = builtins.toString maxDate;
|
||||
|
||||
# add some variables to the derivation to integrate them into finalAttrs
|
||||
inherit
|
||||
pipVersion
|
||||
requirementsFiles
|
||||
requirementsList
|
||||
;
|
||||
|
||||
# prepare flags for `pip download`
|
||||
pipFlags = lib.concatStringsSep " " pipFlags;
|
||||
onlyBinaryFlags = lib.optionalString onlyBinary "--only-binary :all: ${
|
||||
lib.concatStringsSep " " (lib.forEach platforms (pf: "--platform ${pf}"))
|
||||
}";
|
||||
|
||||
# - Execute `pip download` through the filtering proxy.
|
||||
# - optionally add a file to the FOD containing the dependency tree
|
||||
buildPhase = ''
|
||||
$pythonWithMitmproxy/bin/python $buildScript
|
||||
${lib.optionalString writeDependencyTree "$pythonWithMitmproxy/bin/python $writeDependencyTreeScript $out/dist > $out/dependencies.json"}
|
||||
'';
|
||||
});
|
||||
in
|
||||
fetchPythonRequirements
|
||||
validateArgs pipDownload
|
||||
|
@ -24,14 +24,14 @@ PIP_VERSION = os.getenv("pipVersion")
|
||||
PIP_FLAGS = os.getenv("pipFlags")
|
||||
ONLY_BINARY_FLAGS = os.getenv("onlyBinaryFlags")
|
||||
REQUIREMENTS_LIST = os.getenv("requirementsList")
|
||||
REQUIREMENTS_FLAGS = os.getenv("requirementsFlags")
|
||||
REQUIREMENTS_FILES = os.getenv("requirementsFiles")
|
||||
|
||||
|
||||
def get_max_date():
|
||||
try:
|
||||
return int(os.getenv("MAX_DATE"))
|
||||
return int(os.getenv("maxDate"))
|
||||
except ValueError:
|
||||
return dateutil.parser.parse(os.getenv("MAX_DATE"))
|
||||
return dateutil.parser.parse(os.getenv("maxDate"))
|
||||
|
||||
|
||||
def get_free_port():
|
||||
@ -53,7 +53,7 @@ def start_mitmproxy(port):
|
||||
"--script",
|
||||
FILTER_PYPI_RESPONSE_SCRIPTS,
|
||||
],
|
||||
env={"MAX_DATE": os.getenv("MAX_DATE"), "HOME": HOME},
|
||||
env={"maxDate": os.getenv("maxDate"), "HOME": HOME},
|
||||
)
|
||||
return proc
|
||||
|
||||
@ -121,8 +121,10 @@ if __name__ == "__main__":
|
||||
PIP_FLAGS,
|
||||
ONLY_BINARY_FLAGS,
|
||||
REQUIREMENTS_LIST,
|
||||
REQUIREMENTS_FLAGS,
|
||||
]
|
||||
if REQUIREMENTS_FILES:
|
||||
optional_flags += ["-r " + " -r ".join(REQUIREMENTS_FILES.split())]
|
||||
|
||||
optional_flags = " ".join(filter(None, optional_flags)).split(" ")
|
||||
pip(
|
||||
venv_path,
|
||||
|
@ -2,7 +2,7 @@
|
||||
This script is part of fetchPythonRequirements
|
||||
It is meant to be used with mitmproxy via `--script`
|
||||
It will filter api repsonses from the pypi.org api (used by pip),
|
||||
to only contain files with release date < MAX_DATE
|
||||
to only contain files with release date < maxDate
|
||||
|
||||
For retrieving the release dates for files, it uses the pypi.org json api
|
||||
It has to do one extra api request for each queried package name
|
||||
@ -52,9 +52,9 @@ def get_files_to_hide(pname, max_ts):
|
||||
|
||||
# accept unix timestamp or human readable format
|
||||
try:
|
||||
max_ts = int(os.getenv("MAX_DATE"))
|
||||
max_ts = int(os.getenv("maxDate"))
|
||||
except ValueError:
|
||||
max_date = dateutil.parser.parse(os.getenv("MAX_DATE"))
|
||||
max_date = dateutil.parser.parse(os.getenv("maxDate"))
|
||||
max_ts = max_date.timestamp()
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user