diff --git a/v1/nix/pkgs/fetchPythonRequirements/fetch-python-requirements.py b/v1/nix/pkgs/fetchPythonRequirements/fetch-python-requirements.py index 1a5c62f9..b8b1da7b 100644 --- a/v1/nix/pkgs/fetchPythonRequirements/fetch-python-requirements.py +++ b/v1/nix/pkgs/fetchPythonRequirements/fetch-python-requirements.py @@ -8,7 +8,11 @@ import urllib.request from pathlib import Path import certifi -from packaging.utils import canonicalize_name, parse_sdist_filename, parse_wheel_filename +from packaging.utils import ( + canonicalize_name, + parse_sdist_filename, + parse_wheel_filename, +) HOME = Path(os.getcwd()) @@ -17,10 +21,11 @@ PYTHON_BIN = os.getenv("pythonBin") PYTHON_WITH_MITM_PROXY = os.getenv("pythonWithMitmproxy") FILTER_PYPI_RESPONSE_SCRIPTS = os.getenv("filterPypiResponsesScript") PIP_VERSION = os.getenv("pipVersion") -PIP_FLAGS = os.getenv('pipFlags') -ONLY_BINARY_FLAGS = os.getenv('onlyBinaryFlags') -REQUIREMENTS_LIST = os.getenv('requirementsList') -REQUIREMENTS_FLAGS = os.getenv('requirementsFlags') +PIP_FLAGS = os.getenv("pipFlags") +ONLY_BINARY_FLAGS = os.getenv("onlyBinaryFlags") +REQUIREMENTS_LIST = os.getenv("requirementsList") +REQUIREMENTS_FLAGS = os.getenv("requirementsFlags") + def get_max_date(): try: @@ -41,23 +46,23 @@ def start_mitmproxy(port): proc = subprocess.Popen( [ f"{PYTHON_WITH_MITM_PROXY}/bin/mitmdump", - "--listen-port", str(port), - "--ignore-hosts", ".*files.pythonhosted.org.*", - "--script", FILTER_PYPI_RESPONSE_SCRIPTS + "--listen-port", + str(port), + "--ignore-hosts", + ".*files.pythonhosted.org.*", + "--script", + FILTER_PYPI_RESPONSE_SCRIPTS, ], - env = { - "MAX_DATE": os.getenv('MAX_DATE'), - "HOME": HOME - } + env={"MAX_DATE": os.getenv("MAX_DATE"), "HOME": HOME}, ) return proc def wait_for_proxy(proxy_port, cafile): timeout = time.time() + 60 * 5 - req = urllib.request.Request('https://pypi.org') - req.set_proxy(f'127.0.0.1:{proxy_port}', 'http') - req.set_proxy(f'127.0.0.1:{proxy_port}', 'https') + req = urllib.request.Request("https://pypi.org") + req.set_proxy(f"127.0.0.1:{proxy_port}", "http") + req.set_proxy(f"127.0.0.1:{proxy_port}", "https") context = ssl.create_default_context(cafile=cafile) while time.time() < timeout: @@ -74,25 +79,26 @@ def wait_for_proxy(proxy_port, cafile): # as we only proxy *some* calls, we need to combine upstream # ca certificates and the one from mitm proxy def generate_ca_bundle(path): - with open(HOME / ".mitmproxy/mitmproxy-ca-cert.pem", "r") as f: - mitmproxy_cacert = f.read() - with open(certifi.where(), "r") as f: - certifi_cacert = f.read() - with open(path, "w") as f: - f.write(mitmproxy_cacert) - f.write("\n") - f.write(certifi_cacert) - return path + with open(HOME / ".mitmproxy/mitmproxy-ca-cert.pem", "r") as f: + mitmproxy_cacert = f.read() + with open(certifi.where(), "r") as f: + certifi_cacert = f.read() + with open(path, "w") as f: + f.write(mitmproxy_cacert) + f.write("\n") + f.write(certifi_cacert) + return path + def create_venv(path): - subprocess.run([PYTHON_BIN, '-m', 'venv', path], check=True) + subprocess.run([PYTHON_BIN, "-m", "venv", path], check=True) def pip(venv_path, *args): subprocess.run([f"{venv_path}/bin/pip", *args], check=True) -if __name__ == '__main__': +if __name__ == "__main__": OUT.mkdir() dist_path = OUT / "dist" names_path = OUT / "names" @@ -104,35 +110,44 @@ if __name__ == '__main__': proxy = start_mitmproxy(proxy_port) - venv_path = Path('.venv').absolute() + venv_path = Path(".venv").absolute() create_venv(venv_path) - pip(venv_path, 'install', '--upgrade', f'pip=={PIP_VERSION}') + pip(venv_path, "install", "--upgrade", f"pip=={PIP_VERSION}") cafile = generate_ca_bundle(HOME / ".ca-cert.pem") wait_for_proxy(proxy_port, cafile) - optional_flags = [PIP_FLAGS, ONLY_BINARY_FLAGS, REQUIREMENTS_LIST, REQUIREMENTS_FLAGS] + optional_flags = [ + PIP_FLAGS, + ONLY_BINARY_FLAGS, + REQUIREMENTS_LIST, + REQUIREMENTS_FLAGS, + ] optional_flags = " ".join(filter(None, optional_flags)).split(" ") pip( venv_path, - 'download', - '--no-cache', - '--dest', dist_path, - '--progress-bar', 'off', - '--proxy', f'https://localhost:{proxy_port}', - '--cert', cafile, - *optional_flags + "download", + "--no-cache", + "--dest", + dist_path, + "--progress-bar", + "off", + "--proxy", + f"https://localhost:{proxy_port}", + "--cert", + cafile, + *optional_flags, ) proxy.kill() for dist_file in dist_path.iterdir(): - if dist_file.suffix == '.whl': + if dist_file.suffix == ".whl": name = parse_wheel_filename(dist_file.name)[0] else: name = parse_sdist_filename(dist_file.name)[0] pname = canonicalize_name(name) name_path = names_path / pname - print(f'creating link {name_path} -> {dist_file}') + print(f"creating link {name_path} -> {dist_file}") name_path.mkdir() (name_path / dist_file.name).symlink_to(f"../../dist/{dist_file.name}") diff --git a/v1/nix/pkgs/fetchPythonRequirements/filter-pypi-responses.py b/v1/nix/pkgs/fetchPythonRequirements/filter-pypi-responses.py index 2e97b53c..549740d0 100644 --- a/v1/nix/pkgs/fetchPythonRequirements/filter-pypi-responses.py +++ b/v1/nix/pkgs/fetchPythonRequirements/filter-pypi-responses.py @@ -23,8 +23,10 @@ from mitmproxy import http Query the pypi json api to get timestamps for all release files of the given pname. return all file names which are newer than the given timestamp """ + + def get_files_to_hide(pname, max_ts): - ca_file = Path(os.getenv('HOME')) / ".ca-cert.pem" + ca_file = Path(os.getenv("HOME")) / ".ca-cert.pem" context = ssl.create_default_context(cafile=ca_file) if not ca_file.exists(): print("mitmproxy ca not found") @@ -33,18 +35,18 @@ def get_files_to_hide(pname, max_ts): # query the api url = f"https://pypi.org/pypi/{pname}/json" req = Request(url) - req.add_header('Accept-Encoding', 'gzip') + req.add_header("Accept-Encoding", "gzip") with urlopen(req, context=context) as response: content = gzip.decompress(response.read()) resp = json.loads(content) # collect files to hide files = set() - for ver, releases in resp['releases'].items(): + for ver, releases in resp["releases"].items(): for release in releases: - ts = dateutil.parser.parse(release['upload_time']).timestamp() + ts = dateutil.parser.parse(release["upload_time"]).timestamp() if ts > max_ts: - files.add(release['filename']) + files.add(release["filename"]) return files @@ -80,14 +82,16 @@ Response format: } } """ + + def response(flow: http.HTTPFlow) -> None: if not "/simple/" in flow.request.url: return - pname = flow.request.url.strip('/').split('/')[-1] + pname = flow.request.url.strip("/").split("/")[-1] badFiles = get_files_to_hide(pname, max_ts) - keepFile = lambda file: file['filename'] not in badFiles + keepFile = lambda file: file["filename"] not in badFiles data = json.loads(flow.response.text) if badFiles: print(f"removing the following files form the API response:\n {badFiles}") - data['files'] = list(filter(keepFile, data['files'])) + data["files"] = list(filter(keepFile, data["files"])) flow.response.text = json.dumps(data) diff --git a/v1/nix/pkgs/fetchPythonRequirements/write-dependency-tree.py b/v1/nix/pkgs/fetchPythonRequirements/write-dependency-tree.py index 40b1abe9..6924d68d 100755 --- a/v1/nix/pkgs/fetchPythonRequirements/write-dependency-tree.py +++ b/v1/nix/pkgs/fetchPythonRequirements/write-dependency-tree.py @@ -1,6 +1,6 @@ #!/usr/bin/env nix-shell #! nix-shell -i python3 -p python3 python3Packages.pkginfo python3Packages.packaging -''' +""" Given a directory of python source distributions (.tar.gz) and wheels, return a JSON representation of their dependency tree. @@ -24,7 +24,7 @@ dependency declarations. The output is a list of tuples. First element in each tuple is the package name, second a list of dependencies. Output is sorted by the number of dependencies, so that leafs of the dependency tree come first, the package to install last. -''' +""" import sys import tarfile @@ -33,16 +33,20 @@ from pathlib import Path from pkginfo import SDist, Wheel from packaging.requirements import Requirement -from packaging.utils import parse_sdist_filename, parse_wheel_filename, canonicalize_name +from packaging.utils import ( + parse_sdist_filename, + parse_wheel_filename, + canonicalize_name, +) def _is_source_dist(pkg_file): - return pkg_file.suffixes[-2:] == ['.tar', '.gz'] + return pkg_file.suffixes[-2:] == [".tar", ".gz"] def _get_name_version(pkg_file): if _is_source_dist(pkg_file): - name, *_ = parse_sdist_filename(pkg_file.name) + name, *_ = parse_sdist_filename(pkg_file.name) else: name, *_ = parse_wheel_filename(pkg_file.name) return canonicalize_name(name) @@ -50,12 +54,12 @@ def _get_name_version(pkg_file): def get_pkg_info(pkg_file): try: - if pkg_file.suffix == '.whl': - return Wheel(str(pkg_file)) - elif _is_source_dist(pkg_file): - return SDist(str(pkg_file)) - else: - raise NotImplemented(f"Unknown file format: {pkg_file}") + if pkg_file.suffix == ".whl": + return Wheel(str(pkg_file)) + elif _is_source_dist(pkg_file): + return SDist(str(pkg_file)) + else: + raise NotImplemented(f"Unknown file format: {pkg_file}") except ValueError: pass @@ -63,7 +67,7 @@ def get_pkg_info(pkg_file): def _is_required_dependency(requirement): # We set the extra field to an empty string to effectively ignore all optional # dependencies for now. - return not requirement.marker or requirement.marker.evaluate({'extra': ""}) + return not requirement.marker or requirement.marker.evaluate({"extra": ""}) def parse_requirements_txt(pkg_file): @@ -72,7 +76,8 @@ def parse_requirements_txt(pkg_file): requirements = [ Requirement(req) for req in requirements_txt.split("\n") - if req and not req.startswith("#")] + if req and not req.startswith("#") + ] return requirements @@ -80,18 +85,18 @@ def read_requirements_txt(source_dist_file): name, version = parse_sdist_filename(source_dist_file.name) with tarfile.open(source_dist_file) as tar: try: - with tar.extractfile(f'{name}-{version}/requirements.txt') as f: - return f.read().decode('utf-8') + with tar.extractfile(f"{name}-{version}/requirements.txt") as f: + return f.read().decode("utf-8") except KeyError as e: return def usage(): - print(f'{sys.argv[0]} ') + print(f"{sys.argv[0]} ") sys.exit(1) -if __name__ == '__main__': +if __name__ == "__main__": if len(sys.argv) != 2: usage() pkgs_path = Path(sys.argv[1]) @@ -113,8 +118,12 @@ if __name__ == '__main__': requirements = parse_requirements_txt(pkg_file) requirements = filter(_is_required_dependency, requirements) - dependencies.append({'name': name, 'dependencies': [canonicalize_name(req.name) for req in requirements]}) - + dependencies.append( + { + "name": name, + "dependencies": [canonicalize_name(req.name) for req in requirements], + } + ) dependencies = sorted(dependencies, key=lambda d: len(d["dependencies"])) print(json.dumps(dependencies, indent=2))