mirror of
https://github.com/nix-community/dream2nix.git
synced 2024-12-24 15:01:56 +03:00
feat: re-implement fetch-python-requirements...
in python. This canonicalizes package name with the official `packaging` package and handels TLS ca certificates, so that pip does not need --trusted-host args anymore.
This commit is contained in:
parent
d3a15ccda7
commit
9ed740937e
@ -96,7 +96,7 @@ let
|
||||
|
||||
# we use mitmproxy to filter the pypi responses
|
||||
pythonWithMitmproxy =
|
||||
python3.withPackages (ps: [ ps.mitmproxy ps.python-dateutil ]);
|
||||
python3.withPackages (ps: [ ps.mitmproxy ps.python-dateutil ps.packaging]);
|
||||
|
||||
# fixed output derivation containing downloaded packages,
|
||||
# each being symlinked from it's normalized name
|
||||
@ -165,7 +165,7 @@ let
|
||||
|
||||
pythonBin = python.interpreter;
|
||||
filterPypiResponsesScript = ./filter-pypi-responses.py;
|
||||
buildScript = ./fetch-python-requirements.sh;
|
||||
buildScript = ./fetch-python-requirements.py;
|
||||
inherit
|
||||
pythonWithMitmproxy
|
||||
pipVersion
|
||||
@ -183,7 +183,7 @@ let
|
||||
'' -r ${lib.concatStringsSep " -r " (map toString finalAttrs.requirementsFiles)}'';
|
||||
|
||||
buildPhase = ''
|
||||
bash $buildScript
|
||||
$pythonWithMitmproxy/bin/python $buildScript
|
||||
'';
|
||||
});
|
||||
in self;
|
||||
|
138
v1/nix/pkgs/fetchPythonRequirements/fetch-python-requirements.py
Normal file
138
v1/nix/pkgs/fetchPythonRequirements/fetch-python-requirements.py
Normal file
@ -0,0 +1,138 @@
|
||||
import os
|
||||
import socket
|
||||
import ssl
|
||||
import subprocess
|
||||
import time
|
||||
import dateutil.parser
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
import certifi
|
||||
from packaging.utils import canonicalize_name, parse_sdist_filename, parse_wheel_filename
|
||||
|
||||
|
||||
HOME = Path(os.getcwd())
|
||||
OUT = Path(os.getenv("out"))
|
||||
PYTHON_BIN = os.getenv("pythonBin")
|
||||
PYTHON_WITH_MITM_PROXY = os.getenv("pythonWithMitmproxy")
|
||||
FILTER_PYPI_RESPONSE_SCRIPTS = os.getenv("filterPypiResponsesScript")
|
||||
PIP_VERSION = os.getenv("pipVersion")
|
||||
PIP_FLAGS = os.getenv('pipFlags')
|
||||
ONLY_BINARY_FLAGS = os.getenv('onlyBinaryFlags')
|
||||
REQUIREMENTS_LIST = os.getenv('requirementsList')
|
||||
REQUIREMENTS_FLAGS = os.getenv('requirementsFlags')
|
||||
|
||||
def get_max_date():
|
||||
try:
|
||||
return int(os.getenv("MAX_DATE"))
|
||||
except ValueError:
|
||||
return dateutil.parser.parse(os.getenv("MAX_DATE"))
|
||||
|
||||
|
||||
def get_free_port():
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
sock.bind(("", 0))
|
||||
port = sock.getsockname()[1]
|
||||
sock.close()
|
||||
return port
|
||||
|
||||
|
||||
def start_mitmproxy(port):
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
f"{PYTHON_WITH_MITM_PROXY}/bin/mitmdump",
|
||||
"--listen-port", str(port),
|
||||
"--ignore-hosts", ".*files.pythonhosted.org.*",
|
||||
"--script", FILTER_PYPI_RESPONSE_SCRIPTS
|
||||
],
|
||||
env = {
|
||||
"MAX_DATE": os.getenv('MAX_DATE'),
|
||||
"HOME": HOME
|
||||
}
|
||||
)
|
||||
return proc
|
||||
|
||||
|
||||
def wait_for_proxy(proxy_port, cafile):
|
||||
timeout = time.time() + 60 * 5
|
||||
req = urllib.request.Request('https://pypi.org')
|
||||
req.set_proxy(f'127.0.0.1:{proxy_port}', 'http')
|
||||
req.set_proxy(f'127.0.0.1:{proxy_port}', 'https')
|
||||
|
||||
context = ssl.create_default_context(cafile=cafile)
|
||||
while time.time() < timeout:
|
||||
try:
|
||||
res = urllib.request.urlopen(req, None, 5, context=context)
|
||||
if res.status < 400:
|
||||
break
|
||||
except urllib.error.URLError as e:
|
||||
pass
|
||||
finally:
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
# as we only proxy *some* calls, we need to combine upstream
|
||||
# ca certificates and the one from mitm proxy
|
||||
def generate_ca_bundle(path):
|
||||
with open(HOME / ".mitmproxy/mitmproxy-ca-cert.pem", "r") as f:
|
||||
mitmproxy_cacert = f.read()
|
||||
with open(certifi.where(), "r") as f:
|
||||
certifi_cacert = f.read()
|
||||
with open(path, "w") as f:
|
||||
f.write(mitmproxy_cacert)
|
||||
f.write("\n")
|
||||
f.write(certifi_cacert)
|
||||
return path
|
||||
|
||||
def create_venv(path):
|
||||
subprocess.run([PYTHON_BIN, '-m', 'venv', path], check=True)
|
||||
|
||||
|
||||
def pip(venv_path, *args):
|
||||
subprocess.run([f"{venv_path}/bin/pip", *args], check=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
OUT.mkdir()
|
||||
dist_path = OUT / "dist"
|
||||
names_path = OUT / "names"
|
||||
dist_path.mkdir()
|
||||
names_path.mkdir()
|
||||
|
||||
print(f"selected maximum release date for python packages: {get_max_date()}")
|
||||
proxy_port = get_free_port()
|
||||
|
||||
proxy = start_mitmproxy(proxy_port)
|
||||
|
||||
venv_path = Path('.venv').absolute()
|
||||
create_venv(venv_path)
|
||||
pip(venv_path, 'install', '--upgrade', f'pip=={PIP_VERSION}')
|
||||
|
||||
cafile = generate_ca_bundle(HOME / ".ca-cert.pem")
|
||||
wait_for_proxy(proxy_port, cafile)
|
||||
|
||||
optional_flags = [PIP_FLAGS, ONLY_BINARY_FLAGS, REQUIREMENTS_LIST, REQUIREMENTS_FLAGS]
|
||||
optional_flags = " ".join(filter(None, optional_flags)).split(" ")
|
||||
pip(
|
||||
venv_path,
|
||||
'download',
|
||||
'--no-cache',
|
||||
'--dest', dist_path,
|
||||
'--progress-bar', 'off',
|
||||
'--proxy', f'https://localhost:{proxy_port}',
|
||||
'--cert', cafile,
|
||||
*optional_flags
|
||||
)
|
||||
|
||||
proxy.kill()
|
||||
|
||||
for dist_file in dist_path.iterdir():
|
||||
if dist_file.suffix == '.whl':
|
||||
name = parse_wheel_filename(dist_file.name)[0]
|
||||
else:
|
||||
name = parse_sdist_filename(dist_file.name)[0]
|
||||
pname = canonicalize_name(name)
|
||||
name_path = names_path / pname
|
||||
print(f'creating link {name_path} -> {dist_file}')
|
||||
name_path.mkdir()
|
||||
(name_path / dist_file.name).symlink_to(f"../../dist/{dist_file.name}")
|
@ -1,74 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -Eeuo pipefail
|
||||
# the script.py will read this date
|
||||
pretty=$(python -c '
|
||||
import os; import dateutil.parser;
|
||||
try:
|
||||
print(int(os.getenv("MAX_DATE")))
|
||||
except ValueError:
|
||||
print(dateutil.parser.parse(os.getenv("MAX_DATE")))
|
||||
')
|
||||
echo "selected maximum release date for python packages: $pretty"
|
||||
|
||||
# find free port for proxy
|
||||
proxyPort=$(python -c '\
|
||||
import socket
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
s.bind(("", 0))
|
||||
print(s.getsockname()[1])
|
||||
s.close()')
|
||||
|
||||
# start proxy to filter pypi responses
|
||||
# mitmproxy wants HOME set
|
||||
# mitmdump == mitmproxy without GUI
|
||||
HOME=$(pwd) $pythonWithMitmproxy/bin/mitmdump \
|
||||
--listen-port "$proxyPort" \
|
||||
--ignore-hosts '.*files.pythonhosted.org.*' \
|
||||
--script $filterPypiResponsesScript &
|
||||
proxyPID=$!
|
||||
|
||||
# install specified version of pip first to ensure reproducible resolver logic
|
||||
$pythonBin -m venv .venv
|
||||
.venv/bin/pip install --upgrade pip==$pipVersion
|
||||
fetcherPip=.venv/bin/pip
|
||||
|
||||
# wait for proxy to come up
|
||||
while sleep 0.5; do
|
||||
timeout 5 curl -fs --proxy http://localhost:$proxyPort http://pypi.org && break
|
||||
done
|
||||
|
||||
# make pip query pypi through the filtering proxy
|
||||
# FIXME: pip does not return ifit crashes. The build will freeze indefinitely.
|
||||
mkdir "$out"
|
||||
mkdir "$out/dist"
|
||||
$fetcherPip download \
|
||||
--no-cache \
|
||||
--dest "$out/dist" \
|
||||
--progress-bar off \
|
||||
--proxy http://localhost:$proxyPort \
|
||||
--trusted-host pypi.org \
|
||||
--trusted-host files.pythonhosted.org \
|
||||
$pipFlags \
|
||||
$onlyBinaryFlags \
|
||||
$(printf " %s" "${requirementsList[@]}") \
|
||||
$requirementsFlags
|
||||
|
||||
# terminate proxy
|
||||
echo "killing proxy with PID: $proxyPID"
|
||||
kill $proxyPID
|
||||
|
||||
# create symlinks to allow files being referenced via their normalized package names
|
||||
# Example:
|
||||
# "$out/names/werkzeug" will point to "$out/dist/Werkzeug-0.14.1-py2.py3-none-any.whl"
|
||||
cd "$out/dist"
|
||||
mkdir "$out/names"
|
||||
for f in $(ls "$out/dist"); do
|
||||
if [[ "$f" == *.whl ]]; then
|
||||
pname=$(echo "$f" | cut -d "-" -f 1 | sed -e 's/_/-/' -e 's/\./-/' -e 's/\(.*\)/\L\1/')
|
||||
else
|
||||
pname=$(echo "${f%-*}" | sed -e 's/_/-/' -e 's/\./-/' -e 's/\(.*\)/\L\1/')
|
||||
fi
|
||||
echo "creating link $out/names/$pname"
|
||||
mkdir "$out/names/$pname"
|
||||
ln -s "../../dist/$f" "$out/names/$pname/$f"
|
||||
done
|
@ -9,7 +9,10 @@ It has to do one extra api request for each queried package name
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import ssl
|
||||
from urllib.request import Request, urlopen
|
||||
from pathlib import Path
|
||||
import dateutil.parser
|
||||
import gzip
|
||||
|
||||
@ -21,11 +24,17 @@ Query the pypi json api to get timestamps for all release files of the given pna
|
||||
return all file names which are newer than the given timestamp
|
||||
"""
|
||||
def get_files_to_hide(pname, max_ts):
|
||||
ca_file = Path(os.getenv('HOME')) / ".ca-cert.pem"
|
||||
context = ssl.create_default_context(cafile=ca_file)
|
||||
if not ca_file.exists():
|
||||
print("mitmproxy ca not found")
|
||||
sys.exit(1)
|
||||
|
||||
# query the api
|
||||
url = f"https://pypi.org/pypi/{pname}/json"
|
||||
req = Request(url)
|
||||
req.add_header('Accept-Encoding', 'gzip')
|
||||
with urlopen(req) as response:
|
||||
with urlopen(req, context=context) as response:
|
||||
content = gzip.decompress(response.read())
|
||||
resp = json.loads(content)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user