feat(fetchPip): use pip 23 new --report feature

This commit is contained in:
phaer 2023-03-30 16:09:15 +02:00
parent 1c7d60c1da
commit 1a9da2931b
4 changed files with 76 additions and 175 deletions

View File

@ -85,7 +85,7 @@ in {
type = t.bool;
default = true;
description = ''
Write "dependencies.json" to $out, documenting which package depends on which.
Write "metadata.json" to $out, documenting which package depends on which.
'';
};
};

View File

@ -48,7 +48,7 @@
'',
# It's better to not refer to python.pkgs.pip directly, as we want to reduce
# the times we have to update the output hash
pipVersion ? "23.0",
pipVersion ? "23.0.1",
# Write "metadata.json" to $out, including which package depends on which.
writeMetaData ? true,
}: let
@ -96,7 +96,7 @@
# we use mitmproxy to filter the pypi responses
pythonWithMitmproxy =
python3.withPackages
(ps: [ps.mitmproxy ps.python-dateutil ps.pkginfo ps.packaging]);
(ps: [ps.mitmproxy ps.python-dateutil ps.packaging]);
pythonMajorAndMinorVer =
lib.concatStringsSep "."
@ -127,7 +127,6 @@
# changes with every nixpkgs commit
${builtins.readFile finalAttrs.filterPypiResponsesScript}
${builtins.readFile finalAttrs.buildScript}
${builtins.readFile finalAttrs.writeMetaDataScript}
'';
invalidationHashShort = finalAttrs:
@ -171,7 +170,6 @@
# python scripts
filterPypiResponsesScript = ./filter-pypi-responses.py;
buildScript = ./fetchPip.py;
writeMetaDataScript = ./write-meta-data.py;
# the python interpreter used to run the build script
pythonBin = python.interpreter;
@ -188,6 +186,7 @@
pipVersion
requirementsFiles
requirementsList
writeMetaData
;
# prepare flags for `pip download`
@ -200,7 +199,6 @@
# - optionally add a file to the FOD containing metadata of the packages involved
buildPhase = ''
$pythonWithMitmproxy/bin/python $buildScript
${lib.optionalString writeMetaData "$pythonWithMitmproxy/bin/python $writeMetaDataScript $out/dist > $out/metadata.json"}
'';
});
in

View File

@ -3,11 +3,13 @@ import socket
import ssl
import subprocess
import time
import json
import dateutil.parser
import urllib.request
from pathlib import Path
import certifi
from packaging.requirements import Requirement
from packaging.utils import (
canonicalize_name,
parse_sdist_filename,
@ -26,6 +28,7 @@ NO_BINARY = os.getenv("noBinary")
ONLY_BINARY_FLAGS = os.getenv("onlyBinaryFlags")
REQUIREMENTS_LIST = os.getenv("requirementsList")
REQUIREMENTS_FILES = os.getenv("requirementsFiles")
WRITE_METADATA = os.getenv("writeMetaData")
def get_max_date():
@ -105,6 +108,8 @@ if __name__ == "__main__":
names_path = OUT / "names"
dist_path.mkdir()
names_path.mkdir()
cache_path = Path("/build/pip_cache")
cache_path.mkdir()
print(f"selected maximum release date for python packages: {get_max_date()}")
proxy_port = get_free_port()
@ -113,35 +118,55 @@ if __name__ == "__main__":
venv_path = Path(".venv").absolute()
create_venv(venv_path)
pip(venv_path, "install", "--upgrade", f"pip=={PIP_VERSION}")
pip(
venv_path,
"install",
"--upgrade",
f"pip=={PIP_VERSION}",
)
cafile = generate_ca_bundle(HOME / ".ca-cert.pem")
wait_for_proxy(proxy_port, cafile)
optional_flags = [
flags = [
PIP_FLAGS,
ONLY_BINARY_FLAGS,
REQUIREMENTS_LIST,
"--proxy",
f"https://localhost:{proxy_port}",
"--progress-bar",
"off",
"--cert",
cafile,
"--cache-dir",
cache_path,
]
if REQUIREMENTS_FILES:
optional_flags += ["-r " + " -r ".join(REQUIREMENTS_FILES.split())]
if NO_BINARY:
optional_flags += ["--no-binary " + " --no-binary ".join(NO_BINARY.split())]
if WRITE_METADATA:
metadata_flags = ["--report", "/build/report.json"]
optional_flags = " ".join(filter(None, optional_flags)).split(" ")
for req in REQUIREMENTS_LIST.split(" "):
if req:
flags.append(req)
for req in REQUIREMENTS_FILES.split(" "):
if req:
flags += ["-r", req]
flags = " ".join(map(str, filter(None, flags))).split(" ")
pip(
venv_path,
"install",
"--dry-run",
"--ignore-installed",
*metadata_flags,
*flags,
)
pip(
venv_path,
"download",
"--no-cache",
"--dest",
dist_path,
"--progress-bar",
"off",
"--proxy",
f"https://localhost:{proxy_port}",
"--cert",
cafile,
*optional_flags,
*flags,
)
proxy.kill()
@ -156,3 +181,36 @@ if __name__ == "__main__":
print(f"creating link {name_path} -> {dist_file}")
name_path.mkdir()
(name_path / dist_file.name).symlink_to(f"../../dist/{dist_file.name}")
if WRITE_METADATA:
packages = dict()
with open("/build/report.json", "r") as f:
report = json.load(f)
for install in report["install"]:
metadata = install["metadata"]
name = canonicalize_name(metadata["name"])
download_info = install["download_info"]
file = download_info["url"].split("/")[-1]
hash = download_info.get("archive_info", {}).get("hashes", {}).get("sha256")
requirements = [
Requirement(req) for req in metadata.get("requires_dist", [])
]
extras = ""
dependencies = sorted(
[
canonicalize_name(req.name)
for req in requirements
if not req.marker or req.marker.evaluate({"extra": extras})
]
)
packages[name] = dict(
version=metadata["version"],
dependencies=dependencies,
file=file,
hash=hash,
)
with open(OUT / "metadata.json", "w") as f:
json.dump(packages, f, indent=2)

View File

@ -1,155 +0,0 @@
#!/usr/bin/env nix-shell
#! nix-shell -i python3 -p python3 python3Packages.pkginfo python3Packages.packaging
"""
Given a directory of python source distributions (.tar.gz) and wheels,
return a JSON representation of their dependency tree.
We want to put each python package into a separate derivation,
therefore nix needs to know which of those packages depend on
each other.
We only care about the graph between packages, as we start from
a complete set of python packages in the right version
- resolved & fetched by `pip download`, `mach-nix` or other tools.
That means that version specifiers as in (PEP 440; https://peps.python.org/pep-0508/)
and extras specified in markers (PEP 508; https://peps.python.org/pep-0508/)
can be ignored for now.
We rely on `pkginfo` (https://pythonhosted.org/pkginfo/) to read `Requires-Dist`
et al as specified in https://packaging.python.org/en/latest/specifications/core-metadata/#id23
And we use `packaging` (https://packaging.pypa.io/en/stable/index.html) to parse
dependency declarations.
The output is a list of tuples. First element in each tuple is the package name,
second a list of dependencies. Output is sorted by the number of dependencies,
so that leafs of the dependency tree come first, the package to install last.
"""
import sys
import tarfile
import zipfile
import json
from pathlib import Path
from importlib.metadata import Distribution
from pkginfo import SDist, Wheel
from packaging.requirements import Requirement
from packaging.utils import (
parse_sdist_filename,
parse_wheel_filename,
canonicalize_name,
)
def _is_tar(pkg_file):
return pkg_file.suffixes[-2:] == [".tar", ".gz"]
def _is_zip(pkg_file):
return pkg_file.suffixes[-1] == ".zip"
def _is_source_dist(pkg_file):
return _is_tar(pkg_file) or _is_zip(pkg_file)
def _get_name_version(pkg_file):
if _is_source_dist(pkg_file):
name, version, *_ = parse_sdist_filename(pkg_file.name)
else:
name, version, *_ = parse_wheel_filename(pkg_file.name)
return canonicalize_name(name), version
def get_pkg_info(pkg_file):
try:
if pkg_file.suffix == ".whl":
return Wheel(str(pkg_file))
elif _is_source_dist(pkg_file):
return SDist(str(pkg_file))
else:
raise NotImplementedError(f"Unknown file format: {pkg_file}")
except ValueError:
pass
def _is_required_dependency(requirement):
# We set the extra field to an empty string to effectively ignore all optional
# dependencies for now.
return not requirement.marker or requirement.marker.evaluate({"extra": ""})
def read_source_dist_file(source_dist_file, filename):
if _is_tar(source_dist_file):
with tarfile.open(source_dist_file) as tar:
try:
with tar.extractfile(filename) as f:
return f.read().decode("utf-8")
except KeyError as e:
return
elif _is_zip(source_dist_file):
with zipfile.ZipFile(source_dist_file) as zip:
try:
with zip.open(filename) as f:
return f.read().decode("utf-8")
except KeyError as e:
return
def usage():
print(f"{sys.argv[0]} <pkgs-directory>")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 2:
usage()
pkgs_path = Path(sys.argv[1])
if not (pkgs_path.exists and pkgs_path.is_dir()):
usage()
dependencies = {}
for pkg_file in pkgs_path.iterdir():
pkg_info = get_pkg_info(pkg_file)
name, version = _get_name_version(pkg_file)
if pkg_info:
requirements = [Requirement(req) for req in pkg_info.requires_dist]
else:
requirements = []
# For source distributions which do *not* include modern metadata,
# we fallback to reading egg-info.
if not requirements and _is_source_dist(pkg_file):
if egg_requires := read_source_dist_file(
pkg_file,
f"{name}-{version}/{name.replace('-', '_')}.egg-info/requires.txt",
):
requirements = [
Requirement(req)
for req in Distribution._deps_from_requires_text(egg_requires)
]
# For source distributions which include neither, we fallback to reading requirements.txt
# This might be re-considered in the future but is currently needed to make things like
# old ansible releases work.
if not requirements and _is_source_dist(pkg_file):
if requirements_txt := read_source_dist_file(
pkg_file, f"{name}-{version}/requirements.txt"
):
requirements = [
Requirement(req)
for req in requirements_txt.split("\n")
if req and not req.startswith("#")
]
requirements = filter(_is_required_dependency, requirements)
dependencies[name] = dict(
version=str(version),
file=str(pkg_file.relative_to(pkgs_path)),
dependencies=sorted([canonicalize_name(req.name) for req in requirements]),
)
dependencies = dict(sorted(dependencies.items()))
print(json.dumps(dependencies, indent=2))