mirror of
https://github.com/enso-org/enso.git
synced 2024-12-23 18:34:03 +03:00
912 lines
34 KiB
Python
Executable File
912 lines
34 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
"""
|
|
Script for downloading Engine benchmark results into a single static web page
|
|
that visualizes all the benchmarks. Without any options, downloads and
|
|
visualizes benchmark data for the last 14 days. By default, no data is written
|
|
to the disk except for the generated web page, and the data are downloaded
|
|
asynchronously.
|
|
|
|
Set the `--source` parameter to either `engine` or `stdlib`.
|
|
|
|
The generated website is placed under "generated_site" directory
|
|
|
|
The default GH artifact retention period is 3 months, which means that all
|
|
the artifacts older than 3 months are dropped. If you wish to gather the data
|
|
for benchmarks older than 3 months, make sure that the `use_cache` parameter
|
|
is set to true, and that the cache directory is populated with older data.
|
|
If the script encounters an expired artifact, it prints a warning.
|
|
|
|
This script is under continuous development, so it is advised to use
|
|
`-v|--verbose` option all the time.
|
|
|
|
It queries only successful benchmark runs. If there are no successful benchmarks
|
|
in a given period, no results will be written.
|
|
|
|
The process of the script is roughly as follows:
|
|
- Asynchronously gather all the benchmark results from GH API into job reports (JobReport dataclass)
|
|
- Use cache if possible to avoid unnecessary GH API queries
|
|
- Transform the gathered results into data for a particular benchmark sorted
|
|
by an appropriate commit timestamp.
|
|
- BenchmarkData class
|
|
|
|
If you wish to inspect the data yourself, just use --create-csv option.
|
|
|
|
Dependencies for the script:
|
|
- GH CLI utility
|
|
- https://cli.github.com/
|
|
- Used for convenience to do the GH API queries.
|
|
- It needs to be installed, and you should also authenticate.
|
|
- Python version >= 3.7
|
|
- Python 3rd party packages:
|
|
- pandas
|
|
- Used for convenience for a very simple data processing
|
|
- jinja2
|
|
- Used as a template engine for the HTML.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import logging.config
|
|
import math
|
|
import os
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import zipfile
|
|
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
|
from csv import DictWriter
|
|
from datetime import datetime, timedelta
|
|
from enum import Enum
|
|
from os import path
|
|
from typing import List, Dict, Optional, Any, Union, Set
|
|
from dataclasses import dataclass
|
|
import xml.etree.ElementTree as ET
|
|
|
|
|
|
if not (sys.version_info.major >= 3 and sys.version_info.minor >= 7):
|
|
print("ERROR: python version lower than 3.7")
|
|
exit(1)
|
|
try:
|
|
import pandas as pd
|
|
import numpy as np
|
|
import jinja2
|
|
except ModuleNotFoundError as err:
|
|
print("ERROR: One of pandas, numpy, or jinja2 packages not installed", file=sys.stderr)
|
|
exit(1)
|
|
|
|
DATE_FORMAT = "%Y-%m-%d"
|
|
ENGINE_BENCH_WORKFLOW_ID = 29450898
|
|
"""
|
|
Workflow ID of engine benchmarks, got via `gh api
|
|
'/repos/enso-org/enso/actions/workflows'`.
|
|
The name of the workflow is 'Benchmark Engine'
|
|
"""
|
|
NEW_ENGINE_BENCH_WORKFLOW_ID = 67075764
|
|
"""
|
|
Workflow ID for 'Benchmark Engine' workflow, which is the new workflow
|
|
since 2023-08-22.
|
|
"""
|
|
STDLIBS_BENCH_WORKFLOW_ID = 66661001
|
|
"""
|
|
Workflow ID of stdlibs benchmarks, got via `gh api
|
|
'/repos/enso-org/enso/actions/workflows'`.
|
|
The name is 'Benchmark Standard Libraries'
|
|
"""
|
|
GH_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
|
|
""" Date format as returned from responses in GH API"""
|
|
ENSO_COMMIT_BASE_URL = "https://github.com/enso-org/enso/commit/"
|
|
JINJA_TEMPLATE = "templates/template_jinja.html"
|
|
""" Path to the Jinja HTML template """
|
|
TEMPLATES_DIR = "templates"
|
|
GENERATED_SITE_DIR = "generated_site"
|
|
GH_ARTIFACT_RETENTION_PERIOD = timedelta(days=90)
|
|
|
|
|
|
class Source(Enum):
|
|
ENGINE = "engine"
|
|
STDLIB = "stdlib"
|
|
|
|
def workflow_ids(self) -> List[int]:
|
|
if self == Source.ENGINE:
|
|
return [ENGINE_BENCH_WORKFLOW_ID, NEW_ENGINE_BENCH_WORKFLOW_ID]
|
|
elif self == Source.STDLIB:
|
|
return [STDLIBS_BENCH_WORKFLOW_ID]
|
|
else:
|
|
raise ValueError(f"Unknown source {self}")
|
|
|
|
|
|
@dataclass
|
|
class Author:
|
|
name: str
|
|
|
|
|
|
@dataclass
|
|
class Commit:
|
|
""" Corresponds to the commit from GH API """
|
|
id: str
|
|
author: Author
|
|
timestamp: str
|
|
message: str
|
|
|
|
|
|
@dataclass
|
|
class JobRun:
|
|
"""
|
|
Gathered via the GH API. Defines a single run of an Engine benchmark job.
|
|
"""
|
|
id: str
|
|
display_title: str
|
|
html_url: str
|
|
run_attempt: int
|
|
""" An event as defined by the GitHub API, for example 'push' or 'schedule' """
|
|
event: str
|
|
head_commit: Commit
|
|
|
|
|
|
@dataclass
|
|
class JobReport:
|
|
"""
|
|
Gathered via the GH API - a report that is pushed as an aritfact to the job.
|
|
Contains a XML file with scores for all the benchmarks.
|
|
"""
|
|
label_score_dict: Dict[str, float]
|
|
""" A mapping of benchmark labels to their scores """
|
|
bench_run: JobRun
|
|
|
|
|
|
@dataclass
|
|
class BenchmarkData:
|
|
"""
|
|
Data for a single benchmark compiled from all the job reports.
|
|
"""
|
|
|
|
@dataclass
|
|
class Entry:
|
|
score: float
|
|
commit: Commit
|
|
bench_run_url: str
|
|
bench_run_event: str
|
|
|
|
label: str
|
|
""" Label for the benchmark, as reported by org.enso.interpreter.bench.BenchmarksRunner """
|
|
entries: List[Entry]
|
|
""" Entries sorted by timestamps """
|
|
|
|
|
|
@dataclass
|
|
class BenchDatapoint:
|
|
"""
|
|
A single datapoint that will be on the chart. `timestamp` is on X axis,
|
|
`score` on Y axis, and the rest of the fields is used either for the tooltip,
|
|
or for the selection info.
|
|
"""
|
|
timestamp: datetime
|
|
score: float
|
|
score_diff: str
|
|
""" Difference of the score with previous datapoint, or NaN """
|
|
score_diff_perc: str
|
|
tooltip: str
|
|
bench_run_url: str
|
|
commit_id: str
|
|
commit_msg: str
|
|
commit_author: str
|
|
commit_url: str
|
|
|
|
|
|
@dataclass
|
|
class TemplateBenchData:
|
|
""" Data for one benchmark label (with a unique name and ID) """
|
|
id: str
|
|
""" ID of the benchmark, must not contain dots """
|
|
name: str
|
|
""" Human readable name of the benchmark """
|
|
branches_datapoints: Dict[str, List[BenchDatapoint]]
|
|
""" Mapping of branches to datapoints for that branch """
|
|
|
|
|
|
@dataclass
|
|
class JinjaData:
|
|
bench_source: Source
|
|
bench_datas: List[TemplateBenchData]
|
|
branches: List[str]
|
|
since: datetime
|
|
until: datetime
|
|
display_since: datetime
|
|
""" The date from which all the datapoints are first displayed """
|
|
|
|
|
|
def _parse_bench_run_from_json(obj: Dict[Any, Any]) -> JobRun:
|
|
return JobRun(
|
|
id=str(obj["id"]),
|
|
html_url=obj["html_url"],
|
|
run_attempt=int(obj["run_attempt"]),
|
|
event=obj["event"],
|
|
display_title=obj["display_title"],
|
|
head_commit=Commit(
|
|
id=obj["head_commit"]["id"],
|
|
message=obj["head_commit"]["message"],
|
|
timestamp=obj["head_commit"]["timestamp"],
|
|
author=Author(
|
|
name=obj["head_commit"]["author"]["name"]
|
|
)
|
|
)
|
|
)
|
|
|
|
|
|
def _parse_bench_report_from_json(obj: Dict[Any, Any]) -> JobReport:
|
|
return JobReport(
|
|
bench_run=_parse_bench_run_from_json(obj["bench_run"]),
|
|
label_score_dict=obj["label_score_dict"]
|
|
)
|
|
|
|
|
|
def _bench_report_to_json(bench_report: JobReport) -> Dict[Any, Any]:
|
|
return {
|
|
"bench_run": {
|
|
"id": bench_report.bench_run.id,
|
|
"html_url": bench_report.bench_run.html_url,
|
|
"run_attempt": bench_report.bench_run.run_attempt,
|
|
"event": bench_report.bench_run.event,
|
|
"display_title": bench_report.bench_run.display_title,
|
|
"head_commit": {
|
|
"id": bench_report.bench_run.head_commit.id,
|
|
"message": bench_report.bench_run.head_commit.message,
|
|
"timestamp": bench_report.bench_run.head_commit.timestamp,
|
|
"author": {
|
|
"name": bench_report.bench_run.head_commit.author.name
|
|
}
|
|
}
|
|
},
|
|
"label_score_dict": bench_report.label_score_dict
|
|
}
|
|
|
|
|
|
def _parse_bench_report_from_xml(bench_report_xml_path: str, bench_run: JobRun) -> "JobReport":
|
|
logging.debug(f"Parsing BenchReport from {bench_report_xml_path}")
|
|
tree = ET.parse(bench_report_xml_path)
|
|
root = tree.getroot()
|
|
label_score_dict: Dict[str, float] = dict()
|
|
for cases in root:
|
|
assert cases.tag == "cases"
|
|
for case in cases:
|
|
assert case.tag == "case"
|
|
label = case.findtext("label").strip()
|
|
scores = case.find("scores")
|
|
scores_float = [float(score.text.strip()) for score in scores]
|
|
if len(scores_float) > 1:
|
|
logging.warning(f"More than one score for benchmark {label}, "
|
|
f"using the best one (the smallest one).")
|
|
label_score_dict[label] = min(scores_float)
|
|
return JobReport(
|
|
label_score_dict=label_score_dict,
|
|
bench_run=bench_run
|
|
)
|
|
|
|
|
|
def _is_benchrun_id(name: str) -> bool:
|
|
return re.match("\d{9}", name) is not None
|
|
|
|
|
|
def _read_json(json_file: str) -> Dict[Any, Any]:
|
|
assert path.exists(json_file) and path.isfile(json_file)
|
|
with open(json_file, "r") as f:
|
|
return json.load(f)
|
|
|
|
|
|
async def _invoke_gh_api(endpoint: str,
|
|
query_params: Dict[str, str] = {},
|
|
result_as_text: bool = True) -> Union[Dict[str, Any], bytes]:
|
|
query_str_list = [key + "=" + value for key, value in query_params.items()]
|
|
query_str = "&".join(query_str_list)
|
|
cmd = [
|
|
"gh",
|
|
"api",
|
|
f"/repos/enso-org/enso{endpoint}" + ("" if len(query_str) == 0 else "?" + query_str)
|
|
]
|
|
logging.info(f"Starting subprocess `{' '.join(cmd)}`")
|
|
proc = await asyncio.create_subprocess_exec("gh", *cmd[1:],
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE)
|
|
out, err = await proc.communicate()
|
|
logging.info(f"Finished subprocess `{' '.join(cmd)}`")
|
|
if proc.returncode != 0:
|
|
print("Command `" + " ".join(cmd) + "` FAILED with errcode " + str(
|
|
proc.returncode))
|
|
print(err.decode())
|
|
exit(proc.returncode)
|
|
if result_as_text:
|
|
return json.loads(out.decode())
|
|
else:
|
|
return out
|
|
|
|
|
|
class Cache:
|
|
"""
|
|
Cache is a directory filled with json files that have name of format <bench_run_id>.json, and
|
|
in every json, there is `BenchReport` dataclass serialized.
|
|
"""
|
|
|
|
def __init__(self, dirname: str):
|
|
assert path.exists(dirname) and path.isdir(dirname)
|
|
self._dir = dirname
|
|
# Keys are BenchRun ids
|
|
self._items: Dict[str, JobReport] = {}
|
|
for fname in os.listdir(dirname):
|
|
fname_without_ext, ext = path.splitext(fname)
|
|
if _is_benchrun_id(fname_without_ext) and ext == ".json":
|
|
logging.debug(f"Loading into cache from {fname}")
|
|
bench_report = _parse_bench_report_from_json(
|
|
_read_json(path.join(dirname, fname))
|
|
)
|
|
self._items[fname_without_ext] = bench_report
|
|
|
|
def __len__(self) -> int:
|
|
return len(self._items)
|
|
|
|
def __contains__(self, key: str) -> bool:
|
|
assert _is_benchrun_id(key)
|
|
return key in self._items
|
|
|
|
def __getitem__(self, item: str) -> Optional[JobReport]:
|
|
if not _is_benchrun_id(item):
|
|
return None
|
|
else:
|
|
return self._items[item]
|
|
|
|
def __setitem__(self, bench_run_id: str, bench_report: JobReport) -> None:
|
|
assert isinstance(bench_report, JobReport)
|
|
assert isinstance(bench_run_id, str)
|
|
assert _is_benchrun_id(bench_run_id)
|
|
self._items[bench_run_id] = bench_report
|
|
json_fname = path.join(self._dir, bench_run_id + ".json")
|
|
logging.debug(f"Putting {bench_run_id} into cache {json_fname}")
|
|
with open(json_fname, "w") as json_file:
|
|
json.dump(
|
|
_bench_report_to_json(bench_report),
|
|
json_file,
|
|
indent=2,
|
|
ensure_ascii=False
|
|
)
|
|
|
|
def __str__(self) -> str:
|
|
return str(self._items)
|
|
|
|
def contains(self, bench_run_id: str) -> bool:
|
|
return bench_run_id in self._items
|
|
|
|
|
|
class FakeCache:
|
|
def __getitem__(self, item):
|
|
return None
|
|
|
|
def __setitem__(self, key, value):
|
|
pass
|
|
|
|
def __contains__(self, item):
|
|
return False
|
|
|
|
def __len__(self):
|
|
return 0
|
|
|
|
|
|
async def get_bench_runs(since: datetime, until: datetime, branch: str, workflow_id: int) -> List[JobRun]:
|
|
"""
|
|
Fetches the list of all the job runs from the GH API for the specified `branch`.
|
|
"""
|
|
logging.info(f"Looking for all successful Engine benchmark workflow run "
|
|
f"actions from {since} to {until} for branch {branch} "
|
|
f"and workflow ID {workflow_id}")
|
|
query_fields = {
|
|
"branch": branch,
|
|
"status": "success",
|
|
"created": since.strftime(DATE_FORMAT) + ".." + until.strftime(DATE_FORMAT),
|
|
# Start with 1, just to determine the total count
|
|
"per_page": "1"
|
|
}
|
|
res = await _invoke_gh_api(f"/actions/workflows/{workflow_id}/runs", query_fields)
|
|
total_count = int(res["total_count"])
|
|
per_page = 3
|
|
logging.debug(f"Total count of all runs: {total_count} for workflow ID "
|
|
f"{workflow_id}. Will process {per_page} runs per page")
|
|
|
|
async def get_and_parse_run(page: int, parsed_bench_runs) -> None:
|
|
_query_fields = query_fields.copy()
|
|
_query_fields["page"] = str(page)
|
|
res = await _invoke_gh_api(f"/actions/workflows/{workflow_id}/runs", _query_fields)
|
|
bench_runs_json = res["workflow_runs"]
|
|
_parsed_bench_runs = [_parse_bench_run_from_json(bench_run_json)
|
|
for bench_run_json in bench_runs_json]
|
|
parsed_bench_runs.extend(_parsed_bench_runs)
|
|
|
|
# Now we know the total count, so we can fetch all the runs
|
|
query_fields["per_page"] = str(per_page)
|
|
num_queries = math.ceil(total_count / per_page)
|
|
parsed_bench_runs = []
|
|
|
|
tasks = []
|
|
# Page is indexed from 1
|
|
for page in range(1, num_queries + 1):
|
|
tasks.append(get_and_parse_run(page, parsed_bench_runs))
|
|
await asyncio.gather(*tasks)
|
|
|
|
return parsed_bench_runs
|
|
|
|
|
|
async def get_bench_report(bench_run: JobRun, cache: Cache, temp_dir: str) -> Optional[JobReport]:
|
|
"""
|
|
Extracts some data from the given bench_run, which was fetched via the GH API,
|
|
optionally getting it from the cache.
|
|
An artifact in GH can expire, in such case, returns None.
|
|
:param bench_run:
|
|
:param cache:
|
|
:param temp_dir: Used for downloading and unzipping artifacts.
|
|
:return: None if the corresponding artifact expired.
|
|
"""
|
|
if bench_run.id in cache:
|
|
logging.info(f"Getting bench run with ID {bench_run.id} from cache")
|
|
return cache[bench_run.id]
|
|
|
|
# There might be multiple artifacts in the artifact list for a benchmark run
|
|
# We are looking for the one named 'Runtime Benchmark Report', which will
|
|
# be downloaded as a ZIP file.
|
|
obj: Dict[str, Any] = await _invoke_gh_api(f"/actions/runs/{bench_run.id}/artifacts")
|
|
artifacts = obj["artifacts"]
|
|
assert len(artifacts) == 1, "There should be exactly one artifact for a benchmark run"
|
|
bench_report_artifact = artifacts[0]
|
|
assert bench_report_artifact, "Benchmark Report artifact not found"
|
|
artifact_id = str(bench_report_artifact["id"])
|
|
if bench_report_artifact["expired"]:
|
|
created_at = bench_report_artifact["created_at"]
|
|
updated_at = bench_report_artifact["updated_at"]
|
|
expires_at = bench_report_artifact["expires_at"]
|
|
logging.warning(f"Artifact with ID {artifact_id} from bench report {bench_run.id} has expired. "
|
|
f"created_at={created_at}, updated_at={updated_at}, expires_at={expires_at}")
|
|
return None
|
|
|
|
# Get contents of the ZIP artifact file
|
|
artifact_ret = await _invoke_gh_api(f"/actions/artifacts/{artifact_id}/zip", result_as_text=False)
|
|
zip_file_name = os.path.join(temp_dir, artifact_id + ".zip")
|
|
logging.debug(f"Writing artifact ZIP content into {zip_file_name}")
|
|
with open(zip_file_name, "wb") as zip_file:
|
|
zip_file.write(artifact_ret)
|
|
|
|
extracted_dirname = os.path.join(temp_dir, artifact_id)
|
|
if os.path.exists(extracted_dirname):
|
|
shutil.rmtree(extracted_dirname)
|
|
os.mkdir(extracted_dirname)
|
|
|
|
logging.debug(f"Extracting {zip_file_name} into {extracted_dirname}")
|
|
zip_file = zipfile.ZipFile(zip_file_name, "r")
|
|
zip_file.extractall(extracted_dirname)
|
|
bench_report_xml = path.join(extracted_dirname, "bench-report.xml")
|
|
assert path.exists(bench_report_xml)
|
|
|
|
bench_report_parsed = _parse_bench_report_from_xml(bench_report_xml, bench_run)
|
|
cache[bench_run.id] = bench_report_parsed
|
|
return bench_report_parsed
|
|
|
|
|
|
CSV_FIELDNAMES = [
|
|
"label",
|
|
"score",
|
|
"commit_id",
|
|
"commit_author",
|
|
"commit_timestamp",
|
|
"bench_run_url",
|
|
"bench_run_event"
|
|
]
|
|
|
|
|
|
def write_bench_reports_to_csv(bench_reports: List[JobReport], csv_fname: str) -> None:
|
|
logging.info(
|
|
f"Writing {len(bench_reports)} benchmark reports to {csv_fname}")
|
|
assert len(bench_reports) > 0
|
|
if not path.exists(path.dirname(csv_fname)):
|
|
logging.debug(f"Creating directory {path.dirname(csv_fname)}")
|
|
os.mkdir(path.dirname(csv_fname))
|
|
with open(csv_fname, "w") as csv_file:
|
|
csv_writer = DictWriter(csv_file, CSV_FIELDNAMES)
|
|
csv_writer.writeheader()
|
|
for bench_report in bench_reports:
|
|
for label, score in bench_report.label_score_dict.items():
|
|
csv_writer.writerow({
|
|
"label": label,
|
|
"score": score,
|
|
"commit_id": bench_report.bench_run.head_commit.id,
|
|
"commit_author": bench_report.bench_run.head_commit.author.name,
|
|
"commit_timestamp": bench_report.bench_run.head_commit.timestamp,
|
|
"bench_run_url": bench_report.bench_run.html_url,
|
|
"bench_run_event": bench_report.bench_run.event
|
|
})
|
|
|
|
|
|
def populate_cache(cache_dir: str) -> Cache:
|
|
"""
|
|
Initializes cache from `cache_dir`, if there are any items.
|
|
See docs of `Cache`.
|
|
|
|
:param cache_dir: Path to the cache directory. Does not have to exist
|
|
:return: Populated cache. Might be empty.
|
|
"""
|
|
if not path.exists(cache_dir):
|
|
logging.info(f"No cache at {cache_dir}, creating the cache directory")
|
|
os.mkdir(cache_dir)
|
|
logging.debug(f"Initializing cache from {cache_dir}")
|
|
cache = Cache(cache_dir)
|
|
logging.debug(f"Cache populated with {len(cache)} items")
|
|
return cache
|
|
|
|
|
|
def create_template_data(
|
|
job_reports_per_branch: Dict[str, List[JobReport]],
|
|
bench_labels: Set[str]) -> List[TemplateBenchData]:
|
|
"""
|
|
Creates all the necessary data for the Jinja template from all collected
|
|
benchmark job reports.
|
|
:param job_reports_per_branch: Mapping of branch name to list of job reports.
|
|
job reports should be sorted by the commit date, otherwise the difference
|
|
between scores might be wrongly computed.
|
|
:param bench_labels:
|
|
:return:
|
|
"""
|
|
|
|
def pct_to_str(score_diff_perc: float) -> str:
|
|
if not np.isnan(score_diff_perc):
|
|
buff = "+" if score_diff_perc > 0 else ""
|
|
buff += "{:.5f}".format(score_diff_perc * 100)
|
|
buff += "%"
|
|
return buff
|
|
else:
|
|
return "NaN"
|
|
|
|
def diff_str(score_diff: float, score_diff_perc: float) -> str:
|
|
if not np.isnan(score_diff):
|
|
diff_str = "+" if score_diff > 0 else ""
|
|
diff_str += "{:.5f}".format(score_diff)
|
|
diff_str += " ("
|
|
diff_str += pct_to_str(score_diff_perc)
|
|
diff_str += ")"
|
|
return diff_str
|
|
else:
|
|
return "NA"
|
|
|
|
template_bench_datas: List[TemplateBenchData] = []
|
|
for bench_label in bench_labels:
|
|
logging.debug(f"Creating template data for benchmark {bench_label}")
|
|
branch_datapoints: Dict[str, List[BenchDatapoint]] = {}
|
|
for branch, job_reports in job_reports_per_branch.items():
|
|
logging.debug(f"Creating datapoints for branch {branch} from {len(job_reports)} job reports")
|
|
datapoints: List[BenchDatapoint] = []
|
|
for job_report in job_reports:
|
|
prev_datapoint: Optional[BenchDatapoint] = \
|
|
datapoints[-1] if len(datapoints) > 0 else None
|
|
if bench_label in job_report.label_score_dict:
|
|
score = job_report.label_score_dict[bench_label]
|
|
commit = job_report.bench_run.head_commit
|
|
timestamp = datetime.strptime(
|
|
commit.timestamp,
|
|
GH_DATE_FORMAT
|
|
)
|
|
commit_msg_header = \
|
|
commit.message.splitlines()[0].replace('"', "'")
|
|
series = pd.Series([
|
|
prev_datapoint.score if prev_datapoint else None,
|
|
score
|
|
])
|
|
score_diff = series.diff()[1]
|
|
score_diff_perc = series.pct_change()[1]
|
|
tooltip = "score = " + str(score) + "\\n"
|
|
tooltip += "date = " + str(timestamp) + "\\n"
|
|
tooltip += "branch = " + branch + "\\n"
|
|
tooltip += "diff = " + diff_str(score_diff, score_diff_perc)
|
|
author_name = commit.author.name\
|
|
.replace('"', '\\"')\
|
|
.replace("'", "\\'")
|
|
datapoints.append(BenchDatapoint(
|
|
timestamp=timestamp,
|
|
score=score,
|
|
score_diff=str(score_diff),
|
|
score_diff_perc=pct_to_str(score_diff_perc),
|
|
tooltip=tooltip,
|
|
bench_run_url=job_report.bench_run.html_url,
|
|
commit_id=commit.id,
|
|
commit_msg=commit_msg_header,
|
|
commit_author=author_name,
|
|
commit_url=ENSO_COMMIT_BASE_URL + commit.id,
|
|
))
|
|
logging.debug(f"{len(datapoints)} datapoints created for branch {branch}")
|
|
branch_datapoints[branch] = datapoints.copy()
|
|
logging.debug(f"Template data for benchmark {bench_label} created")
|
|
template_bench_datas.append(TemplateBenchData(
|
|
id=_label_to_id(bench_label),
|
|
name=_label_to_name(bench_label),
|
|
branches_datapoints=branch_datapoints,
|
|
))
|
|
return template_bench_datas
|
|
|
|
|
|
def _label_to_id(label: str) -> str:
|
|
return label.replace(".", "_")
|
|
|
|
|
|
def _label_to_name(label: str) -> str:
|
|
items = label.split(".")
|
|
assert len(items) >= 2
|
|
filtered_items = \
|
|
[item for item in items if item not in (
|
|
"org",
|
|
"enso",
|
|
"benchmark",
|
|
"benchmarks",
|
|
"semantic",
|
|
"interpreter",
|
|
"bench"
|
|
)]
|
|
return "_".join(filtered_items)
|
|
|
|
|
|
def _gather_all_bench_labels(job_reports: List[JobReport]) -> Set[str]:
|
|
"""
|
|
Iterates through all the job reports and gathers all the benchmark labels
|
|
found. Note that every job report can have a different set of benchmark labels.
|
|
:return: List of benchmark labels.
|
|
"""
|
|
all_labels = set()
|
|
for job_report in job_reports:
|
|
for labels in job_report.label_score_dict.keys():
|
|
all_labels.add(labels)
|
|
return all_labels
|
|
|
|
|
|
def render_html(jinja_data: JinjaData, template_file: str, html_out_fname: str) -> None:
|
|
jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader("."))
|
|
jinja_template = jinja_env.get_template(template_file)
|
|
generated_html = jinja_template.render(jinja_data.__dict__)
|
|
if path.exists(html_out_fname):
|
|
logging.info(f"{html_out_fname} already exist, rewritting")
|
|
with open(html_out_fname, "w") as html_file:
|
|
html_file.write(generated_html)
|
|
|
|
|
|
def ensure_gh_installed() -> None:
|
|
try:
|
|
out = subprocess.run(["gh", "--version"], check=True, capture_output=True)
|
|
if out.returncode != 0:
|
|
print("`gh` command not found - GH CLI utility is not installed. "
|
|
"See https://cli.github.com/", file=sys.stderr)
|
|
exit(1)
|
|
except subprocess.CalledProcessError:
|
|
print("`gh` command not found - GH CLI utility is not installed. "
|
|
"See https://cli.github.com/", file=sys.stderr)
|
|
exit(1)
|
|
|
|
|
|
async def main():
|
|
default_since: datetime = (datetime.now() - timedelta(days=14))
|
|
default_until: datetime = datetime.now()
|
|
default_cache_dir = path.expanduser("~/.cache/enso_bench_download")
|
|
default_csv_out = "Engine_Benchs/data/benchs.csv"
|
|
date_format_help = DATE_FORMAT.replace("%", "%%")
|
|
|
|
def _parse_bench_source(_bench_source: str) -> Source:
|
|
try:
|
|
return Source(_bench_source)
|
|
except ValueError:
|
|
print(f"Invalid benchmark source {_bench_source}.", file=sys.stderr)
|
|
print(f"Available sources: {[source.value for source in Source]}", file=sys.stderr)
|
|
exit(1)
|
|
|
|
arg_parser = ArgumentParser(description=__doc__,
|
|
formatter_class=RawDescriptionHelpFormatter)
|
|
arg_parser.add_argument("-v", "--verbose", action="store_true")
|
|
arg_parser.add_argument("-s", "--source",
|
|
action="store",
|
|
required=True,
|
|
metavar=f"({Source.ENGINE.value}|{Source.STDLIB.value})",
|
|
type=lambda s: _parse_bench_source(s),
|
|
help=f"The source of the benchmarks. Available sources: "
|
|
f"{[source.value for source in Source]}")
|
|
arg_parser.add_argument("--since", action="store",
|
|
default=default_since,
|
|
metavar="SINCE_DATE",
|
|
type=lambda s: datetime.strptime(s, DATE_FORMAT),
|
|
help=f"The date from which the benchmark results will be gathered. "
|
|
f"Format is {date_format_help}. "
|
|
f"The default is 14 days before")
|
|
arg_parser.add_argument("--until", action="store",
|
|
default=default_until,
|
|
metavar="UNTIL_DATE",
|
|
type=lambda s: datetime.strptime(s, DATE_FORMAT),
|
|
help=f"The date until which the benchmark results will be gathered. "
|
|
f"Format is {date_format_help}. "
|
|
f"The default is today")
|
|
arg_parser.add_argument("--use-cache",
|
|
default=False,
|
|
metavar="(true|false)",
|
|
type=lambda input: True if input in ("true", "True") else False,
|
|
help="Whether the cache directory should be used. The default is False.")
|
|
arg_parser.add_argument("-c", "--cache", action="store",
|
|
default=default_cache_dir,
|
|
metavar="CACHE_DIR",
|
|
help=f"Cache directory. Makes sense only iff specified with --use-cache argument. "
|
|
f"The default is {default_cache_dir}. If there are any troubles with the "
|
|
f"cache, just do `rm -rf {default_cache_dir}`.")
|
|
arg_parser.add_argument("-b", "--branches", action="store",
|
|
nargs="+",
|
|
default=["develop"],
|
|
help="List of branches to gather the benchmark results from. "
|
|
"The default is ['develop']")
|
|
arg_parser.add_argument("-l", "--labels", action="store",
|
|
nargs="+",
|
|
default=set(),
|
|
help="List of labels to gather the benchmark results from."
|
|
"The default behavior is to gather all the labels")
|
|
arg_parser.add_argument("-t", "--tmp-dir", action="store",
|
|
default=None,
|
|
help="Temporary directory with default created by `tempfile.mkdtemp()`")
|
|
arg_parser.add_argument("--create-csv", action="store_true",
|
|
default=False,
|
|
help="Whether an intermediate `benchs.csv` should be created. "
|
|
"Appropriate to see whether the benchmark downloading was successful. "
|
|
"Or if you wish to inspect the CSV with Enso")
|
|
arg_parser.add_argument("--csv-output",
|
|
default=default_csv_out,
|
|
metavar="CSV_OUTPUT",
|
|
help="Output CSV file. Makes sense only when used with --create-csv argument")
|
|
args = arg_parser.parse_args()
|
|
if args.verbose:
|
|
log_level = logging.DEBUG
|
|
else:
|
|
log_level = logging.INFO
|
|
logging.basicConfig(level=log_level, stream=sys.stdout)
|
|
|
|
since: datetime = args.since
|
|
until: datetime = args.until
|
|
cache_dir: str = args.cache
|
|
if not args.tmp_dir:
|
|
temp_dir: str = tempfile.mkdtemp()
|
|
else:
|
|
temp_dir: str = args.tmp_dir
|
|
use_cache: bool = args.use_cache
|
|
assert cache_dir and temp_dir
|
|
bench_source: Source = args.source
|
|
csv_output: str = args.csv_output
|
|
create_csv: bool = args.create_csv
|
|
branches: List[str] = args.branches
|
|
labels_override: Set[str] = args.labels
|
|
logging.debug(f"parsed args: since={since}, until={until}, cache_dir={cache_dir}, "
|
|
f"temp_dir={temp_dir}, use_cache={use_cache}, bench_source={bench_source}, "
|
|
f"csv_output={csv_output}, "
|
|
f"create_csv={create_csv}, branches={branches}, "
|
|
f"labels_override={labels_override}")
|
|
|
|
ensure_gh_installed()
|
|
|
|
# If the user requires benchmarks for which artifacts are not retained
|
|
# anymore, then cache should be used.
|
|
min_since_without_cache = datetime.today() - GH_ARTIFACT_RETENTION_PERIOD
|
|
if not use_cache and since < min_since_without_cache:
|
|
logging.warning(f"The default GH artifact retention period is "
|
|
f"{GH_ARTIFACT_RETENTION_PERIOD.days} days. "
|
|
f"This means that all the artifacts older than "
|
|
f"{min_since_without_cache.date()} are expired."
|
|
f"The use_cache parameter is set to False, so no "
|
|
f"expired artifacts will be fetched.")
|
|
logging.warning(f"The `since` parameter is reset to "
|
|
f"{min_since_without_cache.date()} to prevent "
|
|
f"unnecessary GH API queries.")
|
|
since = min_since_without_cache
|
|
|
|
if use_cache:
|
|
cache = populate_cache(cache_dir)
|
|
else:
|
|
cache = FakeCache()
|
|
|
|
bench_labels: Optional[Set[str]] = None
|
|
""" Set of all gathered benchmark labels from all the job reports """
|
|
job_reports_per_branch: Dict[str, List[JobReport]] = {}
|
|
for branch in branches:
|
|
bench_runs: List[JobRun] = []
|
|
for workflow_id in bench_source.workflow_ids():
|
|
bench_runs.extend(
|
|
await get_bench_runs(since, until, branch, workflow_id)
|
|
)
|
|
if len(bench_runs) == 0:
|
|
print(
|
|
f"No successful benchmarks found within period since {since}"
|
|
f" until {until} for branch {branch}")
|
|
exit(1)
|
|
|
|
job_reports: List[JobReport] = []
|
|
|
|
async def _process_report(_bench_run):
|
|
_job_report = await get_bench_report(_bench_run, cache, temp_dir)
|
|
if _job_report:
|
|
job_reports.append(_job_report)
|
|
|
|
tasks = []
|
|
for bench_run in bench_runs:
|
|
tasks.append(_process_report(bench_run))
|
|
await asyncio.gather(*tasks)
|
|
|
|
logging.debug(f"Got {len(job_reports)} job reports for branch {branch}")
|
|
if len(job_reports) == 0:
|
|
print(f"There were 0 job_reports in the specified time interval, "
|
|
f"for branch {branch}, so "
|
|
"there is nothing to visualize or compare.")
|
|
exit(1)
|
|
|
|
logging.debug("Sorting job_reports by commit date")
|
|
|
|
def _get_timestamp(job_report: JobReport) -> datetime:
|
|
return datetime.strptime(
|
|
job_report.bench_run.head_commit.timestamp,
|
|
GH_DATE_FORMAT
|
|
)
|
|
|
|
job_reports.sort(key=lambda report: _get_timestamp(report))
|
|
|
|
if create_csv:
|
|
write_bench_reports_to_csv(job_reports, csv_output)
|
|
logging.info(f"Benchmarks written to {csv_output}")
|
|
print(f"The generated CSV is in {csv_output}")
|
|
exit(0)
|
|
|
|
# Gather all the benchmark labels from all the job reports
|
|
if bench_labels is None:
|
|
all_bench_labels = _gather_all_bench_labels(job_reports)
|
|
if len(labels_override) > 0:
|
|
logging.info(f"Subset of labels specified: {labels_override}")
|
|
if not set(labels_override).issubset(all_bench_labels):
|
|
print(f"Specified bench labels {labels_override} are not a subset of "
|
|
f"all bench labels {all_bench_labels}")
|
|
exit(1)
|
|
bench_labels = labels_override
|
|
else:
|
|
bench_labels = all_bench_labels
|
|
logging.debug(f"Gathered bench_labels: {bench_labels}")
|
|
|
|
job_reports_per_branch[branch] = job_reports
|
|
|
|
template_bench_datas: List[TemplateBenchData] = \
|
|
create_template_data(job_reports_per_branch, bench_labels)
|
|
template_bench_datas.sort(key=lambda data: data.id)
|
|
|
|
jinja_data = JinjaData(
|
|
since=since,
|
|
display_since=max(until - timedelta(days=30), since),
|
|
until=until,
|
|
bench_datas=template_bench_datas,
|
|
bench_source=bench_source,
|
|
branches=branches,
|
|
)
|
|
|
|
# Render Jinja template with jinja_data
|
|
if not path.exists(GENERATED_SITE_DIR):
|
|
os.mkdir(GENERATED_SITE_DIR)
|
|
|
|
logging.debug(f"Rendering HTML from {JINJA_TEMPLATE} to {GENERATED_SITE_DIR}")
|
|
site_path = path.join(GENERATED_SITE_DIR, bench_source.value + "-benchs.html")
|
|
render_html(
|
|
jinja_data,
|
|
JINJA_TEMPLATE,
|
|
site_path
|
|
)
|
|
logging.debug(f"Copying static site content from {TEMPLATES_DIR} to {GENERATED_SITE_DIR}")
|
|
shutil.copy(
|
|
path.join(TEMPLATES_DIR, "styles.css"),
|
|
path.join(GENERATED_SITE_DIR, "styles.css")
|
|
)
|
|
|
|
index_html_abs_path = path.abspath(site_path)
|
|
print(f"The generated HTML is in {index_html_abs_path}")
|
|
print(f"Open file://{index_html_abs_path} in the browser")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|