From 4d4ab9e35be9e8e8118ab2b12288b31b1e0976d6 Mon Sep 17 00:00:00 2001 From: phaer Date: Fri, 31 Mar 2023 17:35:54 +0200 Subject: [PATCH] feat(fetchPipMetadata): convert to script, pass... args as json --- .../fetchPipMetadata/fetchPipMetadata.nix | 58 ++---- .../pkgs/fetchPipMetadata/fetchPipMetadata.py | 179 +++++++++--------- 2 files changed, 108 insertions(+), 129 deletions(-) diff --git a/v1/nix/pkgs/fetchPipMetadata/fetchPipMetadata.nix b/v1/nix/pkgs/fetchPipMetadata/fetchPipMetadata.nix index 25eb3aaf..f2a0cec1 100644 --- a/v1/nix/pkgs/fetchPipMetadata/fetchPipMetadata.nix +++ b/v1/nix/pkgs/fetchPipMetadata/fetchPipMetadata.nix @@ -10,9 +10,9 @@ # TODO: for pypiSnapshotDate only allow timestamp or format 2023-01-01 # TODO: Error if pypiSnapshotDate points to the future { - buildPackages, lib, - stdenv, + writers, + writeText, # Use the nixpkgs default python version for the proxy script. # The python version select by the user below might be too old for the # dependencies required by the proxy @@ -27,7 +27,6 @@ # list of requirements.txt files requirementsFiles ? [], pipFlags ? [], - name ? "pip-metadata", nativeBuildInputs ? [], # maximum release date for packages pypiSnapshotDate ? @@ -40,46 +39,24 @@ # We use nixpkgs python3 to run mitmproxy, see function parameters pythonWithMitmproxy = python3.withPackages - (ps: [ps.mitmproxy ps.dateutil]); + (ps: [ps.mitmproxy ps.python-dateutil]); # We use the user-selected python to run pip and friends, this ensures # that version-related markers are resolved correctly. - pythonWithPackaging = - python.withPackages - (ps: [ps.packaging ps.certifi ps.dateutil ps.pip]); + writePython = writers.makePythonWriter python python.pkgs python.pkgs; - pythonMajorAndMinorVer = - lib.concatStringsSep "." - (lib.sublist 0 2 (lib.splitString "." python.version)); + fetchPipMetadata = + writePython + "fetch_pip_metadata" + {libraries = with python.pkgs; [packaging certifi python-dateutil pip];} + ./fetchPipMetadata.py; - # A fixed output derivation containing all downloaded packages. - # each single file is located inside a directory named like the package. - # Example: - # "$out/werkzeug" will contain "Werkzeug-0.14.1-py2.py3-none-any.whl" - # Each directory only ever contains a single file - pipDownload = stdenv.mkDerivation (finalAttrs: { - # An invalidation hash is embedded into the `name`. - # This will prevent `forgot to update the hash` scenarios, as any change - # in the derivaiton name enforces a re-build. - inherit name; - - # disable some phases - dontUnpack = true; - dontInstall = true; - dontFixup = true; - - # build inputs - nativeBuildInputs = nativeBuildInputs ++ [pythonWithMitmproxy]; - - # python scripts + args = writeText "pip-args" (builtins.toJSON { filterPypiResponsesScript = ../fetchPip/filter-pypi-responses.py; buildScript = ./fetchPipMetadata.py; - # the python interpreter used to run the build script - inherit pythonWithPackaging; - # the python interpreter used to run the proxy script - inherit pythonWithMitmproxy; + mitmProxy = "${pythonWithMitmproxy}/bin/mitmdump"; # convert pypiSnapshotDate to string and integrate into finalAttrs pypiSnapshotDate = builtins.toString pypiSnapshotDate; @@ -88,15 +65,10 @@ inherit requirementsFiles requirementsList + pipFlags ; - - # prepare flags for `pip download` - pipFlags = lib.concatStringsSep " " pipFlags; - # - Execute `pip download` through the filtering proxy. - # - optionally add a file to the FOD containing metadata of the packages involved - buildPhase = '' - $pythonWithPackaging/bin/python $buildScript - ''; }); in - pipDownload + writers.writeBash "fetch_pip_metadata" '' + ${fetchPipMetadata} ${args} + '' diff --git a/v1/nix/pkgs/fetchPipMetadata/fetchPipMetadata.py b/v1/nix/pkgs/fetchPipMetadata/fetchPipMetadata.py index 9ae66ab0..6a5c4752 100644 --- a/v1/nix/pkgs/fetchPipMetadata/fetchPipMetadata.py +++ b/v1/nix/pkgs/fetchPipMetadata/fetchPipMetadata.py @@ -1,7 +1,9 @@ import os +import sys import socket import subprocess import time +import tempfile import json import dateutil.parser import urllib.request @@ -14,20 +16,11 @@ from packaging.utils import ( ) -HOME = Path(os.getcwd()) -OUT = Path(os.getenv("out")) -PYTHON_WITH_MITM_PROXY = os.getenv("pythonWithMitmproxy") -FILTER_PYPI_RESPONSE_SCRIPTS = os.getenv("filterPypiResponsesScript") -PIP_FLAGS = os.getenv("pipFlags") -REQUIREMENTS_LIST = os.getenv("requirementsList") -REQUIREMENTS_FILES = os.getenv("requirementsFiles") - - -def get_max_date(): +def get_max_date(args): try: - return int(os.getenv("pypiSnapshotDate")) + return int(args["pypiSnapshotDate"]) except ValueError: - return dateutil.parser.parse(os.getenv("pypiSnapshotDate")) + return dateutil.parser.parse(args["pypiSnapshotDate"]) def get_free_port(): @@ -38,19 +31,22 @@ def get_free_port(): return port -def start_mitmproxy(port): +def start_mitmproxy(args, home, port): proc = subprocess.Popen( [ - f"{PYTHON_WITH_MITM_PROXY}/bin/mitmdump", + args["mitmProxy"], "--listen-port", str(port), + "--quiet", "--anticache", "--ignore-hosts", ".*files.pythonhosted.org.*", "--script", - FILTER_PYPI_RESPONSE_SCRIPTS, + args["filterPypiResponsesScript"], ], - env={"pypiSnapshotDate": os.getenv("pypiSnapshotDate"), "HOME": HOME}, + stdout=sys.stderr, + stderr=sys.stderr, + env={"pypiSnapshotDate": args["pypiSnapshotDate"], "HOME": home}, ) return proc @@ -73,8 +69,9 @@ def wait_for_proxy(proxy_port): # as we only proxy *some* calls, we need to combine upstream # ca certificates and the one from mitm proxy -def generate_ca_bundle(path): - with open(HOME / ".mitmproxy/mitmproxy-ca-cert.pem", "r") as f: +def generate_ca_bundle(home, path): + path = home / path + with open(home / ".mitmproxy/mitmproxy-ca-cert.pem", "r") as f: mitmproxy_cacert = f.read() with open(certifi.where(), "r") as f: certifi_cacert = f.read() @@ -85,78 +82,88 @@ def generate_ca_bundle(path): return path -def pip(*args): - subprocess.run(["pip", *args], check=True) +def pip(*params): + subprocess.run( + [sys.executable, "-m", "pip", *params], + check=True, + stdout=sys.stderr, + stderr=sys.stderr, + ) if __name__ == "__main__": - print( - f"selected maximum release date for python packages: {get_max_date()}" - ) # noqa: E501 - proxy_port = get_free_port() + with open(sys.argv[1], "r") as f: + args = json.load(f) - proxy = start_mitmproxy(proxy_port) - wait_for_proxy(proxy_port) - cafile = generate_ca_bundle(HOME / ".ca-cert.pem") + with tempfile.TemporaryDirectory() as home: + home = Path(home) - flags = [ - PIP_FLAGS, - "--proxy", - f"https://localhost:{proxy_port}", - "--progress-bar", - "off", - "--cert", - cafile, - "--report", - str(OUT / "report.json"), - ] - for req in REQUIREMENTS_LIST.split(" "): - if req: - flags.append(req) - for req in REQUIREMENTS_FILES.split(" "): - if req: - flags += ["-r", req] - - flags = " ".join(map(str, filter(None, flags))).split(" ") - pip( - "install", - "--dry-run", - "--ignore-installed", - *flags, - ) - proxy.kill() - - packages = dict() - extras = "" - with open(OUT / "report.json", "r") as f: - report = json.load(f) - - for install in report["install"]: - metadata = install["metadata"] - name = canonicalize_name(metadata["name"]) - - download_info = install["download_info"] - url = download_info["url"] - sha256 = ( - download_info.get("archive_info", {}) - .get("hashes", {}) - .get("sha256") # noqa: E501 + print( + f"selected maximum release date for python packages: {get_max_date(args)}", # noqa: E501 + file=sys.stderr, ) - requirements = [ - Requirement(req) for req in metadata.get("requires_dist", []) - ] # noqa: E501 - dependencies = sorted( - [ - canonicalize_name(req.name) - for req in requirements - if not req.marker or req.marker.evaluate({"extra": extras}) - ] + proxy_port = get_free_port() + + proxy = start_mitmproxy(args, home, proxy_port) + wait_for_proxy(proxy_port) + cafile = generate_ca_bundle(home, ".ca-cert.pem") + + flags = args["pipFlags"] + [ + "--proxy", + f"https://localhost:{proxy_port}", + "--progress-bar", + "off", + "--cert", + cafile, + "--report", + str(home / "report.json"), + ] + for req in args["requirementsList"]: + if req: + flags.append(req) + for req in args["requirementsFiles"]: + if req: + flags += ["-r", req] + + pip( + "install", + "--dry-run", + "--ignore-installed", + *flags, ) - packages[name] = dict( - version=metadata["version"], - dependencies=dependencies, - url=url, - sha256=sha256, - ) - with open(OUT / "metadata.json", "w") as f: - json.dump(packages, f, indent=2) + proxy.kill() + + packages = dict() + extras = "" + with open(home / "report.json", "r") as f: + report = json.load(f) + + for install in report["install"]: + metadata = install["metadata"] + name = canonicalize_name(metadata["name"]) + + download_info = install["download_info"] + url = download_info["url"] + sha256 = ( + download_info.get("archive_info", {}) + .get("hashes", {}) + .get("sha256") # noqa: E501 + ) + requirements = [ + Requirement(req) for req in metadata.get("requires_dist", []) + ] # noqa: E501 + dependencies = sorted( + [ + canonicalize_name(req.name) + for req in requirements + if not req.marker or req.marker.evaluate({"extra": extras}) + ] + ) + packages[name] = dict( + version=metadata["version"], + dependencies=dependencies, + url=url, + sha256=sha256, + ) + with open(os.getenv("out"), "w") as f: + json.dump(packages, f, indent=2)