From a677084257afffa254de83295f87a8cc4a4e24e1 Mon Sep 17 00:00:00 2001 From: DavHau Date: Tue, 21 Mar 2023 12:35:54 +0700 Subject: [PATCH] chore(fetchPythonRequirements): refactor code - add and improve comments - reduce nesting in several places - move requirementsFiles flag logic to python - MAX_DATE -> maxDate - respect column 80 wherever possible --- .../fetch-python-requirements.nix | 319 ++++++++++-------- .../fetch-python-requirements.py | 12 +- .../filter-pypi-responses.py | 6 +- 3 files changed, 179 insertions(+), 158 deletions(-) diff --git a/v1/nix/pkgs/fetchPythonRequirements/fetch-python-requirements.nix b/v1/nix/pkgs/fetchPythonRequirements/fetch-python-requirements.nix index 59dad189..939af4a5 100644 --- a/v1/nix/pkgs/fetchPythonRequirements/fetch-python-requirements.nix +++ b/v1/nix/pkgs/fetchPythonRequirements/fetch-python-requirements.nix @@ -1,13 +1,14 @@ -# fetchPythonRequirements downlaods python packages specified by a list of -# pip-style python requirements -# It also requires a maximum date 'maxDate' being specified. -# The result will be as if `pip download` would have been executed -# at the point in time specified by maxDate. +# fetchPythonRequirements downloads python packages specified by executing +# `pip download` on a source tree, or a list of requirements. +# This fetcher requires a maximum date 'maxDate' being specified. +# The result will be the same as if `pip download` would have been executed +# at the point in time specified by maxDate. # This is ensured by putting pip behind a local proxy filtering the -# api responses from pypi.org to only contain files for which the -# release date is lower than the specified maxDate. +# api responses from pypi.org to only contain files for which the +# release date is lower than the specified maxDate. # TODO: ignore if packages are yanked -# TODO: for MAX_DATE only allow timestamp or format 2023-01-01 +# TODO: for maxDate only allow timestamp or format 2023-01-01 +# TODO: Error if maxDate points to the future { buildPackages, cacert, @@ -15,40 +16,41 @@ lib, python3, stdenv, +}: { + # Specify the python version for which the packages should be downloaded. + # Pip needs to be executed from that specific python version. + # Pip accepts '--python-version', but this works only for wheel packages. + python, + # hash for the fixed output derivation + hash, + # list of strings of requirements.txt entries + requirementsList ? [], + # list of requirements.txt files + requirementsFiles ? [], + # restrict to binary releases (.whl) + # this allows buildPlatform independent fetching + onlyBinary ? false, + # additional flags for `pip download`. + # for reference see: https://pip.pypa.io/en/stable/cli/pip_download/ + pipFlags ? [], + name ? null, + nameSuffix ? "python-requirements", + nativeBuildInputs ? [], + # maximum release date for packages + maxDate ? + throw '' + 'maxDate' must be specified for fetchPythonRequirements. + Choose any date from the past. + Example value: "2023-01-01" + '', + # It's better to not refer to python.pkgs.pip directly, as we want to reduce + # the times we have to update the output hash + pipVersion ? "23.0", + # Write "dependencies.json" to $out, documenting which package depends on which. + writeDependencyTree ? true, }: let - fetchPythonRequirements = { - # This specifies the python version for which the packages should be downloaded - # Pip needs to be executed from that specific python version. - # Pip accepts '--python-version', but this works only for wheel packages. - python, - # hash for the fixed output derivation - hash, - # list of strings of requirements.txt entries - requirementsList ? [], - # list of requirements.txt files - requirementsFiles ? [], - # restrict to binary releases (.whl) - # this allows buildPlatform independent fetching - onlyBinary ? false, - # additional flags for `pip download`. - # for reference see: https://pip.pypa.io/en/stable/cli/pip_download/ - pipFlags ? [], - name ? null, - nameSuffix ? "python-requirements", - nativeBuildInputs ? [], - # maximum release date for packages - maxDate ? - throw '' - 'maxDate' must be specified for fetchPythonRequirements. - Changing this value will affect the output hash - Example value: "2023-01-01" - '', - # It's better to not refer to python.pkgs.pip directly, as we want to reduce - # the times we have to update the output hash - pipVersion ? "23.0", - # Write "dependencies.json" to $out, documenting which package depends on which. - writeDependencyTree ? true, - }: + # throws an error if pipDownload is executed with unsafe arguments + validateArgs = result: # specifying `--platform` for pip download is only allowed in combination with `--only-binary :all:` # therefore, if onlyBinary is disabled, we must enforce targetPlatform == buildPlatform to ensure reproducibility if ! onlyBinary && stdenv.system != stdenv.buildPlatform.system @@ -57,129 +59,146 @@ fetchPythonRequirements cannot fetch sdist packages for ${stdenv.system} on a ${stdenv.buildPlatform.system}. Either build on a ${stdenv.system} or set `onlyBinary = true`. '' - else let - # map nixos system strings to python platforms - sysToPlatforms = { - "x86_64-linux" = [ - "manylinux1_x86_64" - "manylinux2010_x86_64" - "manylinux2014_x86_64" - "linux_x86_64" - ]; - "x86_64-darwin" = - lib.forEach (lib.range 0 15) (minor: "macosx_10_${builtins.toString minor}_x86_64"); - "aarch64-linux" = [ - "manylinux1_aarch64" - "manylinux2010_aarch64" - "manylinux2014_aarch64" - "linux_aarch64" - ]; - }; + else result; - platforms = - if sysToPlatforms ? "${stdenv.system}" - then sysToPlatforms."${stdenv.system}" - else - throw '' - 'binaryOnly' fetching is currently not supported for target ${stdenv.system}. - You could set 'binaryOnly = false' and execute the build on a ${stdenv.system}. - ''; + # map nixos system strings to python platforms + sysToPlatforms = { + "x86_64-linux" = [ + "manylinux1_x86_64" + "manylinux2010_x86_64" + "manylinux2014_x86_64" + "linux_x86_64" + ]; + "x86_64-darwin" = + lib.forEach (lib.range 0 15) + (minor: "macosx_10_${builtins.toString minor}_x86_64"); + "aarch64-linux" = [ + "manylinux1_aarch64" + "manylinux2010_aarch64" + "manylinux2014_aarch64" + "linux_aarch64" + ]; + }; - # we use mitmproxy to filter the pypi responses - pythonWithMitmproxy = - python3.withPackages (ps: [ps.mitmproxy ps.python-dateutil ps.pkginfo ps.packaging]); + platforms = + if sysToPlatforms ? "${stdenv.system}" + then sysToPlatforms."${stdenv.system}" + else throw errorNoBinaryFetchingForTarget; - # fixed output derivation containing downloaded packages, - # each being symlinked from it's normalized name - # Example: - # "$out/werkzeug" will point to "$out/Werkzeug-0.14.1-py2.py3-none-any.whl" - self = stdenv.mkDerivation (finalAttrs: { - # An invalidation hash is embedded into the `name`. - # This will prevent `forgot to update the hash` scenarios, as any change - # in the derivaiton name enforces a re-build. - name = let - pythonMajorAndMinorVer = - lib.concatStringsSep "." - (lib.sublist 0 2 (lib.splitString "." python.version)); + errorNoBinaryFetchingForTarget = '' + 'onlyBinary' fetching is currently not supported for target ${stdenv.system}. + You could set 'onlyBinary = false' and execute the build on a ${stdenv.system}. + ''; - invalidationHash = builtins.hashString "sha256" '' + # we use mitmproxy to filter the pypi responses + pythonWithMitmproxy = + python3.withPackages + (ps: [ps.mitmproxy ps.python-dateutil ps.pkginfo ps.packaging]); - # Ignore the python minor version. It should not affect resolution - ${python.implementation} - ${pythonMajorAndMinorVer} - ${stdenv.system} + pythonMajorAndMinorVer = + lib.concatStringsSep "." + (lib.sublist 0 2 (lib.splitString "." python.version)); - # All variables that might influence the output - ${finalAttrs.MAX_DATE} - ${finalAttrs.onlyBinaryFlags} - ${finalAttrs.pipVersion} - ${finalAttrs.pipFlags} - ${toString writeDependencyTree} + invalidationHash = finalAttrs: + builtins.hashString "sha256" '' - # Include requirements - # We hash the content, as store paths might change more often - ${toString finalAttrs.requirementsList} - ${toString finalAttrs.requirementsFiles} + # Ignore the python minor version. It should not affect resolution + ${python.implementation} + ${pythonMajorAndMinorVer} + ${stdenv.system} - # Only hash the content of the python scripts, as the store path - # changes with every nixpkgs commit - ${builtins.readFile finalAttrs.filterPypiResponsesScript} - ${builtins.readFile finalAttrs.buildScript} - ${builtins.readFile finalAttrs.writeDependencyTreeScript} - ''; + # All variables that might influence the output + ${finalAttrs.maxDate} + ${finalAttrs.onlyBinaryFlags} + ${finalAttrs.pipVersion} + ${finalAttrs.pipFlags} + ${toString writeDependencyTree} - invalidationHashShort = - lib.substring 0 10 - (builtins.unsafeDiscardStringContext invalidationHash); + # Include requirements + # We hash the content, as store paths might change more often + ${toString finalAttrs.requirementsList} + ${toString finalAttrs.requirementsFiles} - namePrefix = - if name == null - then "" - else name + "-"; - in "${namePrefix}${nameSuffix}-${invalidationHashShort}"; + # Only hash the content of the python scripts, as the store path + # changes with every nixpkgs commit + ${builtins.readFile finalAttrs.filterPypiResponsesScript} + ${builtins.readFile finalAttrs.buildScript} + ${builtins.readFile finalAttrs.writeDependencyTreeScript} + ''; - outputHashMode = "recursive"; - outputHashAlgo = "sha256"; - outputHash = hash; + invalidationHashShort = finalAttrs: + lib.substring 0 10 + (builtins.unsafeDiscardStringContext (invalidationHash finalAttrs)); - # Multiple outputs are not allowed in an FOD, therefore use passthru - # to export $dist and $names - passthru.dist = "${finalAttrs.finalPackage}/dist"; - passthru.names = "${finalAttrs.finalPackage}/names"; + namePrefix = + if name == null + then "" + else name + "-"; - nativeBuildInputs = - nativeBuildInputs - ++ [pythonWithMitmproxy curl cacert]; + # A fixed output derivation containing all downloaded packages. + # each single file is located inside a directory named like the package. + # Example: + # "$out/werkzeug" will contain "Werkzeug-0.14.1-py2.py3-none-any.whl" + # Each directory only ever contains a single file + pipDownload = stdenv.mkDerivation (finalAttrs: { + # An invalidation hash is embedded into the `name`. + # This will prevent `forgot to update the hash` scenarios, as any change + # in the derivaiton name enforces a re-build. + name = "${namePrefix}${nameSuffix}-${invalidationHashShort finalAttrs}"; - dontUnpack = true; - dontInstall = true; - dontFixup = true; + # setup FOD + outputHashMode = "recursive"; + outputHashAlgo = "sha256"; + outputHash = hash; - pythonBin = python.interpreter; - filterPypiResponsesScript = ./filter-pypi-responses.py; - buildScript = ./fetch-python-requirements.py; - writeDependencyTreeScript = ./write-dependency-tree.py; - inherit - pythonWithMitmproxy - pipVersion - requirementsFiles - requirementsList - ; - MAX_DATE = builtins.toString maxDate; - pipFlags = lib.concatStringsSep " " pipFlags; - onlyBinaryFlags = lib.optionalString onlyBinary "--only-binary :all: ${ - lib.concatStringsSep " " (lib.forEach platforms (pf: "--platform ${pf}")) - }"; - requirementsFlags = - lib.optionalString (requirementsFiles != []) - ''-r ${lib.concatStringsSep " -r " (map toString finalAttrs.requirementsFiles)}''; + # Multiple outputs are not allowed in an FOD, therefore use passthru + # to export $dist and $names + passthru.dist = "${finalAttrs.finalPackage}/dist"; + passthru.names = "${finalAttrs.finalPackage}/names"; - buildPhase = '' - $pythonWithMitmproxy/bin/python $buildScript - ${lib.optionalString writeDependencyTree "$pythonWithMitmproxy/bin/python $writeDependencyTreeScript $out/dist > $out/dependencies.json"} - ''; - }); - in - self; + # disable some phases + dontUnpack = true; + dontInstall = true; + dontFixup = true; + + # build inputs + nativeBuildInputs = + nativeBuildInputs + ++ [pythonWithMitmproxy curl cacert]; + + # python scripts + filterPypiResponsesScript = ./filter-pypi-responses.py; + buildScript = ./fetch-python-requirements.py; + writeDependencyTreeScript = ./write-dependency-tree.py; + + # the python interpreter used to run the build script + pythonBin = python.interpreter; + + # the python interpreter used to run the proxy script + inherit pythonWithMitmproxy; + + # convert maxDate to string and integrate into finalAttrs + maxDate = builtins.toString maxDate; + + # add some variables to the derivation to integrate them into finalAttrs + inherit + pipVersion + requirementsFiles + requirementsList + ; + + # prepare flags for `pip download` + pipFlags = lib.concatStringsSep " " pipFlags; + onlyBinaryFlags = lib.optionalString onlyBinary "--only-binary :all: ${ + lib.concatStringsSep " " (lib.forEach platforms (pf: "--platform ${pf}")) + }"; + + # - Execute `pip download` through the filtering proxy. + # - optionally add a file to the FOD containing the dependency tree + buildPhase = '' + $pythonWithMitmproxy/bin/python $buildScript + ${lib.optionalString writeDependencyTree "$pythonWithMitmproxy/bin/python $writeDependencyTreeScript $out/dist > $out/dependencies.json"} + ''; + }); in - fetchPythonRequirements + validateArgs pipDownload diff --git a/v1/nix/pkgs/fetchPythonRequirements/fetch-python-requirements.py b/v1/nix/pkgs/fetchPythonRequirements/fetch-python-requirements.py index b8b1da7b..f984c49a 100644 --- a/v1/nix/pkgs/fetchPythonRequirements/fetch-python-requirements.py +++ b/v1/nix/pkgs/fetchPythonRequirements/fetch-python-requirements.py @@ -24,14 +24,14 @@ PIP_VERSION = os.getenv("pipVersion") PIP_FLAGS = os.getenv("pipFlags") ONLY_BINARY_FLAGS = os.getenv("onlyBinaryFlags") REQUIREMENTS_LIST = os.getenv("requirementsList") -REQUIREMENTS_FLAGS = os.getenv("requirementsFlags") +REQUIREMENTS_FILES = os.getenv("requirementsFiles") def get_max_date(): try: - return int(os.getenv("MAX_DATE")) + return int(os.getenv("maxDate")) except ValueError: - return dateutil.parser.parse(os.getenv("MAX_DATE")) + return dateutil.parser.parse(os.getenv("maxDate")) def get_free_port(): @@ -53,7 +53,7 @@ def start_mitmproxy(port): "--script", FILTER_PYPI_RESPONSE_SCRIPTS, ], - env={"MAX_DATE": os.getenv("MAX_DATE"), "HOME": HOME}, + env={"maxDate": os.getenv("maxDate"), "HOME": HOME}, ) return proc @@ -121,8 +121,10 @@ if __name__ == "__main__": PIP_FLAGS, ONLY_BINARY_FLAGS, REQUIREMENTS_LIST, - REQUIREMENTS_FLAGS, ] + if REQUIREMENTS_FILES: + optional_flags += ["-r " + " -r ".join(REQUIREMENTS_FILES.split())] + optional_flags = " ".join(filter(None, optional_flags)).split(" ") pip( venv_path, diff --git a/v1/nix/pkgs/fetchPythonRequirements/filter-pypi-responses.py b/v1/nix/pkgs/fetchPythonRequirements/filter-pypi-responses.py index 549740d0..a09d9399 100644 --- a/v1/nix/pkgs/fetchPythonRequirements/filter-pypi-responses.py +++ b/v1/nix/pkgs/fetchPythonRequirements/filter-pypi-responses.py @@ -2,7 +2,7 @@ This script is part of fetchPythonRequirements It is meant to be used with mitmproxy via `--script` It will filter api repsonses from the pypi.org api (used by pip), -to only contain files with release date < MAX_DATE +to only contain files with release date < maxDate For retrieving the release dates for files, it uses the pypi.org json api It has to do one extra api request for each queried package name @@ -52,9 +52,9 @@ def get_files_to_hide(pname, max_ts): # accept unix timestamp or human readable format try: - max_ts = int(os.getenv("MAX_DATE")) + max_ts = int(os.getenv("maxDate")) except ValueError: - max_date = dateutil.parser.parse(os.getenv("MAX_DATE")) + max_date = dateutil.parser.parse(os.getenv("maxDate")) max_ts = max_date.timestamp()