#!/usr/bin/env nix-shell #! nix-shell -i python3 -p python3 python3Packages.pkginfo python3Packages.packaging """ Given a directory of python source distributions (.tar.gz) and wheels, return a JSON representation of their dependency tree. We want to put each python package into a separate derivation, therefore nix needs to know which of those packages depend on each other. We only care about the graph between packages, as we start from a complete set of python packages in the right version - resolved & fetched by `pip download`, `mach-nix` or other tools. That means that version specifiers as in (PEP 440; https://peps.python.org/pep-0508/) and extras specified in markers (PEP 508; https://peps.python.org/pep-0508/) can be ignored for now. We rely on `pkginfo` (https://pythonhosted.org/pkginfo/) to read `Requires-Dist` et al as specified in https://packaging.python.org/en/latest/specifications/core-metadata/#id23 And we use `packaging` (https://packaging.pypa.io/en/stable/index.html) to parse dependency declarations. The output is a list of tuples. First element in each tuple is the package name, second a list of dependencies. Output is sorted by the number of dependencies, so that leafs of the dependency tree come first, the package to install last. """ import sys import tarfile import zipfile import json from pathlib import Path from importlib.metadata import Distribution from pkginfo import SDist, Wheel from packaging.requirements import Requirement from packaging.utils import ( parse_sdist_filename, parse_wheel_filename, canonicalize_name, ) def _is_tar(pkg_file): return pkg_file.suffixes[-2:] == [".tar", ".gz"] def _is_zip(pkg_file): return pkg_file.suffixes[-1] == ".zip" def _is_source_dist(pkg_file): return _is_tar(pkg_file) or _is_zip(pkg_file) def _get_name_version(pkg_file): if _is_source_dist(pkg_file): name, version, *_ = parse_sdist_filename(pkg_file.name) else: name, version, *_ = parse_wheel_filename(pkg_file.name) return canonicalize_name(name), version def get_pkg_info(pkg_file): try: if pkg_file.suffix == ".whl": return Wheel(str(pkg_file)) elif _is_source_dist(pkg_file): return SDist(str(pkg_file)) else: raise NotImplementedError(f"Unknown file format: {pkg_file}") except ValueError: pass def _is_required_dependency(requirement): # We set the extra field to an empty string to effectively ignore all optional # dependencies for now. return not requirement.marker or requirement.marker.evaluate({"extra": ""}) def read_source_dist_file(source_dist_file, filename): if _is_tar(source_dist_file): with tarfile.open(source_dist_file) as tar: try: with tar.extractfile(filename) as f: return f.read().decode("utf-8") except KeyError as e: return elif _is_zip(source_dist_file): with zipfile.ZipFile(source_dist_file) as zip: try: with zip.open(filename) as f: return f.read().decode("utf-8") except KeyError as e: return def usage(): print(f"{sys.argv[0]} ") sys.exit(1) if __name__ == "__main__": if len(sys.argv) != 2: usage() pkgs_path = Path(sys.argv[1]) if not (pkgs_path.exists and pkgs_path.is_dir()): usage() dependencies = {} for pkg_file in pkgs_path.iterdir(): info = get_pkg_info(pkg_file) name, version = _get_name_version(pkg_file) if info: requirements = [Requirement(req) for req in info.requires_dist] else: requirements = [] # For source distributions which do *not* include modern metadata, # we fallback to reading egg-info. if not requirements and _is_source_dist(pkg_file): if egg_requires := read_source_dist_file( pkg_file, f"{name}-{version}/{name.replace('-', '_')}.egg-info/requires.txt", ): requirements = [ Requirement(req) for req in Distribution._deps_from_requires_text(egg_requires) ] # For source distributions which include neither, we fallback to reading requirements.txt # This might be re-considered in the future but is currently needed to make things like # old ansible releases work. if not requirements and _is_source_dist(pkg_file): if requirements_txt := read_source_dist_file( pkg_file, f"{name}-{version}/requirements.txt" ): requirements = [ Requirement(req) for req in requirements_txt.split("\n") if req and not req.startswith("#") ] requirements = filter(_is_required_dependency, requirements) dependencies[name] = sorted( [canonicalize_name(req.name) for req in requirements] ) dependencies = dict(sorted(dependencies.items())) print(json.dumps(dependencies, indent=2))