chore(fetchPythonRequirements): refactor code

- add and improve comments
- reduce nesting in several places
- move requirementsFiles flag logic to python
- MAX_DATE -> maxDate
- respect column 80 wherever possible
This commit is contained in:
DavHau 2023-03-21 12:35:54 +07:00
parent 4aa6e86907
commit a677084257
3 changed files with 179 additions and 158 deletions

View File

@ -1,13 +1,14 @@
# fetchPythonRequirements downlaods python packages specified by a list of
# pip-style python requirements
# It also requires a maximum date 'maxDate' being specified.
# The result will be as if `pip download` would have been executed
# at the point in time specified by maxDate.
# fetchPythonRequirements downloads python packages specified by executing
# `pip download` on a source tree, or a list of requirements.
# This fetcher requires a maximum date 'maxDate' being specified.
# The result will be the same as if `pip download` would have been executed
# at the point in time specified by maxDate.
# This is ensured by putting pip behind a local proxy filtering the
# api responses from pypi.org to only contain files for which the
# release date is lower than the specified maxDate.
# api responses from pypi.org to only contain files for which the
# release date is lower than the specified maxDate.
# TODO: ignore if packages are yanked
# TODO: for MAX_DATE only allow timestamp or format 2023-01-01
# TODO: for maxDate only allow timestamp or format 2023-01-01
# TODO: Error if maxDate points to the future
{
buildPackages,
cacert,
@ -15,40 +16,41 @@
lib,
python3,
stdenv,
}: {
# Specify the python version for which the packages should be downloaded.
# Pip needs to be executed from that specific python version.
# Pip accepts '--python-version', but this works only for wheel packages.
python,
# hash for the fixed output derivation
hash,
# list of strings of requirements.txt entries
requirementsList ? [],
# list of requirements.txt files
requirementsFiles ? [],
# restrict to binary releases (.whl)
# this allows buildPlatform independent fetching
onlyBinary ? false,
# additional flags for `pip download`.
# for reference see: https://pip.pypa.io/en/stable/cli/pip_download/
pipFlags ? [],
name ? null,
nameSuffix ? "python-requirements",
nativeBuildInputs ? [],
# maximum release date for packages
maxDate ?
throw ''
'maxDate' must be specified for fetchPythonRequirements.
Choose any date from the past.
Example value: "2023-01-01"
'',
# It's better to not refer to python.pkgs.pip directly, as we want to reduce
# the times we have to update the output hash
pipVersion ? "23.0",
# Write "dependencies.json" to $out, documenting which package depends on which.
writeDependencyTree ? true,
}: let
fetchPythonRequirements = {
# This specifies the python version for which the packages should be downloaded
# Pip needs to be executed from that specific python version.
# Pip accepts '--python-version', but this works only for wheel packages.
python,
# hash for the fixed output derivation
hash,
# list of strings of requirements.txt entries
requirementsList ? [],
# list of requirements.txt files
requirementsFiles ? [],
# restrict to binary releases (.whl)
# this allows buildPlatform independent fetching
onlyBinary ? false,
# additional flags for `pip download`.
# for reference see: https://pip.pypa.io/en/stable/cli/pip_download/
pipFlags ? [],
name ? null,
nameSuffix ? "python-requirements",
nativeBuildInputs ? [],
# maximum release date for packages
maxDate ?
throw ''
'maxDate' must be specified for fetchPythonRequirements.
Changing this value will affect the output hash
Example value: "2023-01-01"
'',
# It's better to not refer to python.pkgs.pip directly, as we want to reduce
# the times we have to update the output hash
pipVersion ? "23.0",
# Write "dependencies.json" to $out, documenting which package depends on which.
writeDependencyTree ? true,
}:
# throws an error if pipDownload is executed with unsafe arguments
validateArgs = result:
# specifying `--platform` for pip download is only allowed in combination with `--only-binary :all:`
# therefore, if onlyBinary is disabled, we must enforce targetPlatform == buildPlatform to ensure reproducibility
if ! onlyBinary && stdenv.system != stdenv.buildPlatform.system
@ -57,129 +59,146 @@
fetchPythonRequirements cannot fetch sdist packages for ${stdenv.system} on a ${stdenv.buildPlatform.system}.
Either build on a ${stdenv.system} or set `onlyBinary = true`.
''
else let
# map nixos system strings to python platforms
sysToPlatforms = {
"x86_64-linux" = [
"manylinux1_x86_64"
"manylinux2010_x86_64"
"manylinux2014_x86_64"
"linux_x86_64"
];
"x86_64-darwin" =
lib.forEach (lib.range 0 15) (minor: "macosx_10_${builtins.toString minor}_x86_64");
"aarch64-linux" = [
"manylinux1_aarch64"
"manylinux2010_aarch64"
"manylinux2014_aarch64"
"linux_aarch64"
];
};
else result;
platforms =
if sysToPlatforms ? "${stdenv.system}"
then sysToPlatforms."${stdenv.system}"
else
throw ''
'binaryOnly' fetching is currently not supported for target ${stdenv.system}.
You could set 'binaryOnly = false' and execute the build on a ${stdenv.system}.
'';
# map nixos system strings to python platforms
sysToPlatforms = {
"x86_64-linux" = [
"manylinux1_x86_64"
"manylinux2010_x86_64"
"manylinux2014_x86_64"
"linux_x86_64"
];
"x86_64-darwin" =
lib.forEach (lib.range 0 15)
(minor: "macosx_10_${builtins.toString minor}_x86_64");
"aarch64-linux" = [
"manylinux1_aarch64"
"manylinux2010_aarch64"
"manylinux2014_aarch64"
"linux_aarch64"
];
};
# we use mitmproxy to filter the pypi responses
pythonWithMitmproxy =
python3.withPackages (ps: [ps.mitmproxy ps.python-dateutil ps.pkginfo ps.packaging]);
platforms =
if sysToPlatforms ? "${stdenv.system}"
then sysToPlatforms."${stdenv.system}"
else throw errorNoBinaryFetchingForTarget;
# fixed output derivation containing downloaded packages,
# each being symlinked from it's normalized name
# Example:
# "$out/werkzeug" will point to "$out/Werkzeug-0.14.1-py2.py3-none-any.whl"
self = stdenv.mkDerivation (finalAttrs: {
# An invalidation hash is embedded into the `name`.
# This will prevent `forgot to update the hash` scenarios, as any change
# in the derivaiton name enforces a re-build.
name = let
pythonMajorAndMinorVer =
lib.concatStringsSep "."
(lib.sublist 0 2 (lib.splitString "." python.version));
errorNoBinaryFetchingForTarget = ''
'onlyBinary' fetching is currently not supported for target ${stdenv.system}.
You could set 'onlyBinary = false' and execute the build on a ${stdenv.system}.
'';
invalidationHash = builtins.hashString "sha256" ''
# we use mitmproxy to filter the pypi responses
pythonWithMitmproxy =
python3.withPackages
(ps: [ps.mitmproxy ps.python-dateutil ps.pkginfo ps.packaging]);
# Ignore the python minor version. It should not affect resolution
${python.implementation}
${pythonMajorAndMinorVer}
${stdenv.system}
pythonMajorAndMinorVer =
lib.concatStringsSep "."
(lib.sublist 0 2 (lib.splitString "." python.version));
# All variables that might influence the output
${finalAttrs.MAX_DATE}
${finalAttrs.onlyBinaryFlags}
${finalAttrs.pipVersion}
${finalAttrs.pipFlags}
${toString writeDependencyTree}
invalidationHash = finalAttrs:
builtins.hashString "sha256" ''
# Include requirements
# We hash the content, as store paths might change more often
${toString finalAttrs.requirementsList}
${toString finalAttrs.requirementsFiles}
# Ignore the python minor version. It should not affect resolution
${python.implementation}
${pythonMajorAndMinorVer}
${stdenv.system}
# Only hash the content of the python scripts, as the store path
# changes with every nixpkgs commit
${builtins.readFile finalAttrs.filterPypiResponsesScript}
${builtins.readFile finalAttrs.buildScript}
${builtins.readFile finalAttrs.writeDependencyTreeScript}
'';
# All variables that might influence the output
${finalAttrs.maxDate}
${finalAttrs.onlyBinaryFlags}
${finalAttrs.pipVersion}
${finalAttrs.pipFlags}
${toString writeDependencyTree}
invalidationHashShort =
lib.substring 0 10
(builtins.unsafeDiscardStringContext invalidationHash);
# Include requirements
# We hash the content, as store paths might change more often
${toString finalAttrs.requirementsList}
${toString finalAttrs.requirementsFiles}
namePrefix =
if name == null
then ""
else name + "-";
in "${namePrefix}${nameSuffix}-${invalidationHashShort}";
# Only hash the content of the python scripts, as the store path
# changes with every nixpkgs commit
${builtins.readFile finalAttrs.filterPypiResponsesScript}
${builtins.readFile finalAttrs.buildScript}
${builtins.readFile finalAttrs.writeDependencyTreeScript}
'';
outputHashMode = "recursive";
outputHashAlgo = "sha256";
outputHash = hash;
invalidationHashShort = finalAttrs:
lib.substring 0 10
(builtins.unsafeDiscardStringContext (invalidationHash finalAttrs));
# Multiple outputs are not allowed in an FOD, therefore use passthru
# to export $dist and $names
passthru.dist = "${finalAttrs.finalPackage}/dist";
passthru.names = "${finalAttrs.finalPackage}/names";
namePrefix =
if name == null
then ""
else name + "-";
nativeBuildInputs =
nativeBuildInputs
++ [pythonWithMitmproxy curl cacert];
# A fixed output derivation containing all downloaded packages.
# each single file is located inside a directory named like the package.
# Example:
# "$out/werkzeug" will contain "Werkzeug-0.14.1-py2.py3-none-any.whl"
# Each directory only ever contains a single file
pipDownload = stdenv.mkDerivation (finalAttrs: {
# An invalidation hash is embedded into the `name`.
# This will prevent `forgot to update the hash` scenarios, as any change
# in the derivaiton name enforces a re-build.
name = "${namePrefix}${nameSuffix}-${invalidationHashShort finalAttrs}";
dontUnpack = true;
dontInstall = true;
dontFixup = true;
# setup FOD
outputHashMode = "recursive";
outputHashAlgo = "sha256";
outputHash = hash;
pythonBin = python.interpreter;
filterPypiResponsesScript = ./filter-pypi-responses.py;
buildScript = ./fetch-python-requirements.py;
writeDependencyTreeScript = ./write-dependency-tree.py;
inherit
pythonWithMitmproxy
pipVersion
requirementsFiles
requirementsList
;
MAX_DATE = builtins.toString maxDate;
pipFlags = lib.concatStringsSep " " pipFlags;
onlyBinaryFlags = lib.optionalString onlyBinary "--only-binary :all: ${
lib.concatStringsSep " " (lib.forEach platforms (pf: "--platform ${pf}"))
}";
requirementsFlags =
lib.optionalString (requirementsFiles != [])
''-r ${lib.concatStringsSep " -r " (map toString finalAttrs.requirementsFiles)}'';
# Multiple outputs are not allowed in an FOD, therefore use passthru
# to export $dist and $names
passthru.dist = "${finalAttrs.finalPackage}/dist";
passthru.names = "${finalAttrs.finalPackage}/names";
buildPhase = ''
$pythonWithMitmproxy/bin/python $buildScript
${lib.optionalString writeDependencyTree "$pythonWithMitmproxy/bin/python $writeDependencyTreeScript $out/dist > $out/dependencies.json"}
'';
});
in
self;
# disable some phases
dontUnpack = true;
dontInstall = true;
dontFixup = true;
# build inputs
nativeBuildInputs =
nativeBuildInputs
++ [pythonWithMitmproxy curl cacert];
# python scripts
filterPypiResponsesScript = ./filter-pypi-responses.py;
buildScript = ./fetch-python-requirements.py;
writeDependencyTreeScript = ./write-dependency-tree.py;
# the python interpreter used to run the build script
pythonBin = python.interpreter;
# the python interpreter used to run the proxy script
inherit pythonWithMitmproxy;
# convert maxDate to string and integrate into finalAttrs
maxDate = builtins.toString maxDate;
# add some variables to the derivation to integrate them into finalAttrs
inherit
pipVersion
requirementsFiles
requirementsList
;
# prepare flags for `pip download`
pipFlags = lib.concatStringsSep " " pipFlags;
onlyBinaryFlags = lib.optionalString onlyBinary "--only-binary :all: ${
lib.concatStringsSep " " (lib.forEach platforms (pf: "--platform ${pf}"))
}";
# - Execute `pip download` through the filtering proxy.
# - optionally add a file to the FOD containing the dependency tree
buildPhase = ''
$pythonWithMitmproxy/bin/python $buildScript
${lib.optionalString writeDependencyTree "$pythonWithMitmproxy/bin/python $writeDependencyTreeScript $out/dist > $out/dependencies.json"}
'';
});
in
fetchPythonRequirements
validateArgs pipDownload

View File

@ -24,14 +24,14 @@ PIP_VERSION = os.getenv("pipVersion")
PIP_FLAGS = os.getenv("pipFlags")
ONLY_BINARY_FLAGS = os.getenv("onlyBinaryFlags")
REQUIREMENTS_LIST = os.getenv("requirementsList")
REQUIREMENTS_FLAGS = os.getenv("requirementsFlags")
REQUIREMENTS_FILES = os.getenv("requirementsFiles")
def get_max_date():
try:
return int(os.getenv("MAX_DATE"))
return int(os.getenv("maxDate"))
except ValueError:
return dateutil.parser.parse(os.getenv("MAX_DATE"))
return dateutil.parser.parse(os.getenv("maxDate"))
def get_free_port():
@ -53,7 +53,7 @@ def start_mitmproxy(port):
"--script",
FILTER_PYPI_RESPONSE_SCRIPTS,
],
env={"MAX_DATE": os.getenv("MAX_DATE"), "HOME": HOME},
env={"maxDate": os.getenv("maxDate"), "HOME": HOME},
)
return proc
@ -121,8 +121,10 @@ if __name__ == "__main__":
PIP_FLAGS,
ONLY_BINARY_FLAGS,
REQUIREMENTS_LIST,
REQUIREMENTS_FLAGS,
]
if REQUIREMENTS_FILES:
optional_flags += ["-r " + " -r ".join(REQUIREMENTS_FILES.split())]
optional_flags = " ".join(filter(None, optional_flags)).split(" ")
pip(
venv_path,

View File

@ -2,7 +2,7 @@
This script is part of fetchPythonRequirements
It is meant to be used with mitmproxy via `--script`
It will filter api repsonses from the pypi.org api (used by pip),
to only contain files with release date < MAX_DATE
to only contain files with release date < maxDate
For retrieving the release dates for files, it uses the pypi.org json api
It has to do one extra api request for each queried package name
@ -52,9 +52,9 @@ def get_files_to_hide(pname, max_ts):
# accept unix timestamp or human readable format
try:
max_ts = int(os.getenv("MAX_DATE"))
max_ts = int(os.getenv("maxDate"))
except ValueError:
max_date = dateutil.parser.parse(os.getenv("MAX_DATE"))
max_date = dateutil.parser.parse(os.getenv("maxDate"))
max_ts = max_date.timestamp()