chore(formatting): reformat v1 code - python files

This commit is contained in:
DavHau 2023-03-12 22:29:53 +07:00
parent d96e7234e3
commit 600a2cbecc
3 changed files with 93 additions and 65 deletions

View File

@ -8,7 +8,11 @@ import urllib.request
from pathlib import Path
import certifi
from packaging.utils import canonicalize_name, parse_sdist_filename, parse_wheel_filename
from packaging.utils import (
canonicalize_name,
parse_sdist_filename,
parse_wheel_filename,
)
HOME = Path(os.getcwd())
@ -17,10 +21,11 @@ PYTHON_BIN = os.getenv("pythonBin")
PYTHON_WITH_MITM_PROXY = os.getenv("pythonWithMitmproxy")
FILTER_PYPI_RESPONSE_SCRIPTS = os.getenv("filterPypiResponsesScript")
PIP_VERSION = os.getenv("pipVersion")
PIP_FLAGS = os.getenv('pipFlags')
ONLY_BINARY_FLAGS = os.getenv('onlyBinaryFlags')
REQUIREMENTS_LIST = os.getenv('requirementsList')
REQUIREMENTS_FLAGS = os.getenv('requirementsFlags')
PIP_FLAGS = os.getenv("pipFlags")
ONLY_BINARY_FLAGS = os.getenv("onlyBinaryFlags")
REQUIREMENTS_LIST = os.getenv("requirementsList")
REQUIREMENTS_FLAGS = os.getenv("requirementsFlags")
def get_max_date():
try:
@ -41,23 +46,23 @@ def start_mitmproxy(port):
proc = subprocess.Popen(
[
f"{PYTHON_WITH_MITM_PROXY}/bin/mitmdump",
"--listen-port", str(port),
"--ignore-hosts", ".*files.pythonhosted.org.*",
"--script", FILTER_PYPI_RESPONSE_SCRIPTS
"--listen-port",
str(port),
"--ignore-hosts",
".*files.pythonhosted.org.*",
"--script",
FILTER_PYPI_RESPONSE_SCRIPTS,
],
env = {
"MAX_DATE": os.getenv('MAX_DATE'),
"HOME": HOME
}
env={"MAX_DATE": os.getenv("MAX_DATE"), "HOME": HOME},
)
return proc
def wait_for_proxy(proxy_port, cafile):
timeout = time.time() + 60 * 5
req = urllib.request.Request('https://pypi.org')
req.set_proxy(f'127.0.0.1:{proxy_port}', 'http')
req.set_proxy(f'127.0.0.1:{proxy_port}', 'https')
req = urllib.request.Request("https://pypi.org")
req.set_proxy(f"127.0.0.1:{proxy_port}", "http")
req.set_proxy(f"127.0.0.1:{proxy_port}", "https")
context = ssl.create_default_context(cafile=cafile)
while time.time() < timeout:
@ -74,25 +79,26 @@ def wait_for_proxy(proxy_port, cafile):
# as we only proxy *some* calls, we need to combine upstream
# ca certificates and the one from mitm proxy
def generate_ca_bundle(path):
with open(HOME / ".mitmproxy/mitmproxy-ca-cert.pem", "r") as f:
mitmproxy_cacert = f.read()
with open(certifi.where(), "r") as f:
certifi_cacert = f.read()
with open(path, "w") as f:
f.write(mitmproxy_cacert)
f.write("\n")
f.write(certifi_cacert)
return path
with open(HOME / ".mitmproxy/mitmproxy-ca-cert.pem", "r") as f:
mitmproxy_cacert = f.read()
with open(certifi.where(), "r") as f:
certifi_cacert = f.read()
with open(path, "w") as f:
f.write(mitmproxy_cacert)
f.write("\n")
f.write(certifi_cacert)
return path
def create_venv(path):
subprocess.run([PYTHON_BIN, '-m', 'venv', path], check=True)
subprocess.run([PYTHON_BIN, "-m", "venv", path], check=True)
def pip(venv_path, *args):
subprocess.run([f"{venv_path}/bin/pip", *args], check=True)
if __name__ == '__main__':
if __name__ == "__main__":
OUT.mkdir()
dist_path = OUT / "dist"
names_path = OUT / "names"
@ -104,35 +110,44 @@ if __name__ == '__main__':
proxy = start_mitmproxy(proxy_port)
venv_path = Path('.venv').absolute()
venv_path = Path(".venv").absolute()
create_venv(venv_path)
pip(venv_path, 'install', '--upgrade', f'pip=={PIP_VERSION}')
pip(venv_path, "install", "--upgrade", f"pip=={PIP_VERSION}")
cafile = generate_ca_bundle(HOME / ".ca-cert.pem")
wait_for_proxy(proxy_port, cafile)
optional_flags = [PIP_FLAGS, ONLY_BINARY_FLAGS, REQUIREMENTS_LIST, REQUIREMENTS_FLAGS]
optional_flags = [
PIP_FLAGS,
ONLY_BINARY_FLAGS,
REQUIREMENTS_LIST,
REQUIREMENTS_FLAGS,
]
optional_flags = " ".join(filter(None, optional_flags)).split(" ")
pip(
venv_path,
'download',
'--no-cache',
'--dest', dist_path,
'--progress-bar', 'off',
'--proxy', f'https://localhost:{proxy_port}',
'--cert', cafile,
*optional_flags
"download",
"--no-cache",
"--dest",
dist_path,
"--progress-bar",
"off",
"--proxy",
f"https://localhost:{proxy_port}",
"--cert",
cafile,
*optional_flags,
)
proxy.kill()
for dist_file in dist_path.iterdir():
if dist_file.suffix == '.whl':
if dist_file.suffix == ".whl":
name = parse_wheel_filename(dist_file.name)[0]
else:
name = parse_sdist_filename(dist_file.name)[0]
pname = canonicalize_name(name)
name_path = names_path / pname
print(f'creating link {name_path} -> {dist_file}')
print(f"creating link {name_path} -> {dist_file}")
name_path.mkdir()
(name_path / dist_file.name).symlink_to(f"../../dist/{dist_file.name}")

View File

@ -23,8 +23,10 @@ from mitmproxy import http
Query the pypi json api to get timestamps for all release files of the given pname.
return all file names which are newer than the given timestamp
"""
def get_files_to_hide(pname, max_ts):
ca_file = Path(os.getenv('HOME')) / ".ca-cert.pem"
ca_file = Path(os.getenv("HOME")) / ".ca-cert.pem"
context = ssl.create_default_context(cafile=ca_file)
if not ca_file.exists():
print("mitmproxy ca not found")
@ -33,18 +35,18 @@ def get_files_to_hide(pname, max_ts):
# query the api
url = f"https://pypi.org/pypi/{pname}/json"
req = Request(url)
req.add_header('Accept-Encoding', 'gzip')
req.add_header("Accept-Encoding", "gzip")
with urlopen(req, context=context) as response:
content = gzip.decompress(response.read())
resp = json.loads(content)
# collect files to hide
files = set()
for ver, releases in resp['releases'].items():
for ver, releases in resp["releases"].items():
for release in releases:
ts = dateutil.parser.parse(release['upload_time']).timestamp()
ts = dateutil.parser.parse(release["upload_time"]).timestamp()
if ts > max_ts:
files.add(release['filename'])
files.add(release["filename"])
return files
@ -80,14 +82,16 @@ Response format:
}
}
"""
def response(flow: http.HTTPFlow) -> None:
if not "/simple/" in flow.request.url:
return
pname = flow.request.url.strip('/').split('/')[-1]
pname = flow.request.url.strip("/").split("/")[-1]
badFiles = get_files_to_hide(pname, max_ts)
keepFile = lambda file: file['filename'] not in badFiles
keepFile = lambda file: file["filename"] not in badFiles
data = json.loads(flow.response.text)
if badFiles:
print(f"removing the following files form the API response:\n {badFiles}")
data['files'] = list(filter(keepFile, data['files']))
data["files"] = list(filter(keepFile, data["files"]))
flow.response.text = json.dumps(data)

View File

@ -1,6 +1,6 @@
#!/usr/bin/env nix-shell
#! nix-shell -i python3 -p python3 python3Packages.pkginfo python3Packages.packaging
'''
"""
Given a directory of python source distributions (.tar.gz) and wheels,
return a JSON representation of their dependency tree.
@ -24,7 +24,7 @@ dependency declarations.
The output is a list of tuples. First element in each tuple is the package name,
second a list of dependencies. Output is sorted by the number of dependencies,
so that leafs of the dependency tree come first, the package to install last.
'''
"""
import sys
import tarfile
@ -33,16 +33,20 @@ from pathlib import Path
from pkginfo import SDist, Wheel
from packaging.requirements import Requirement
from packaging.utils import parse_sdist_filename, parse_wheel_filename, canonicalize_name
from packaging.utils import (
parse_sdist_filename,
parse_wheel_filename,
canonicalize_name,
)
def _is_source_dist(pkg_file):
return pkg_file.suffixes[-2:] == ['.tar', '.gz']
return pkg_file.suffixes[-2:] == [".tar", ".gz"]
def _get_name_version(pkg_file):
if _is_source_dist(pkg_file):
name, *_ = parse_sdist_filename(pkg_file.name)
name, *_ = parse_sdist_filename(pkg_file.name)
else:
name, *_ = parse_wheel_filename(pkg_file.name)
return canonicalize_name(name)
@ -50,12 +54,12 @@ def _get_name_version(pkg_file):
def get_pkg_info(pkg_file):
try:
if pkg_file.suffix == '.whl':
return Wheel(str(pkg_file))
elif _is_source_dist(pkg_file):
return SDist(str(pkg_file))
else:
raise NotImplemented(f"Unknown file format: {pkg_file}")
if pkg_file.suffix == ".whl":
return Wheel(str(pkg_file))
elif _is_source_dist(pkg_file):
return SDist(str(pkg_file))
else:
raise NotImplemented(f"Unknown file format: {pkg_file}")
except ValueError:
pass
@ -63,7 +67,7 @@ def get_pkg_info(pkg_file):
def _is_required_dependency(requirement):
# We set the extra field to an empty string to effectively ignore all optional
# dependencies for now.
return not requirement.marker or requirement.marker.evaluate({'extra': ""})
return not requirement.marker or requirement.marker.evaluate({"extra": ""})
def parse_requirements_txt(pkg_file):
@ -72,7 +76,8 @@ def parse_requirements_txt(pkg_file):
requirements = [
Requirement(req)
for req in requirements_txt.split("\n")
if req and not req.startswith("#")]
if req and not req.startswith("#")
]
return requirements
@ -80,18 +85,18 @@ def read_requirements_txt(source_dist_file):
name, version = parse_sdist_filename(source_dist_file.name)
with tarfile.open(source_dist_file) as tar:
try:
with tar.extractfile(f'{name}-{version}/requirements.txt') as f:
return f.read().decode('utf-8')
with tar.extractfile(f"{name}-{version}/requirements.txt") as f:
return f.read().decode("utf-8")
except KeyError as e:
return
def usage():
print(f'{sys.argv[0]} <pkgs-directory>')
print(f"{sys.argv[0]} <pkgs-directory>")
sys.exit(1)
if __name__ == '__main__':
if __name__ == "__main__":
if len(sys.argv) != 2:
usage()
pkgs_path = Path(sys.argv[1])
@ -113,8 +118,12 @@ if __name__ == '__main__':
requirements = parse_requirements_txt(pkg_file)
requirements = filter(_is_required_dependency, requirements)
dependencies.append({'name': name, 'dependencies': [canonicalize_name(req.name) for req in requirements]})
dependencies.append(
{
"name": name,
"dependencies": [canonicalize_name(req.name) for req in requirements],
}
)
dependencies = sorted(dependencies, key=lambda d: len(d["dependencies"]))
print(json.dumps(dependencies, indent=2))