enso/tools/performance/engine-benchmarks/bench_download.py
Pavel Marek 0801fcb4a0
Fix the CSV file generation of bench_download script (#9421)
One can now once more create CSV files from benchmark results with something like:
```
./bench_download.py -v -s stdlib --since 2024-01-01 --create-csv
```

The generated CSV is ready to be read by the Enso IDE.

# Important Notes
- Fix `--create-csv` functionality of the `bench_download.py` script.
- Remove an outdated Enso project from `tools/performance/engine_benchmarks/Engine_Benchs`
- This is now done by book clubs.
2024-03-14 11:59:58 +00:00

323 lines
13 KiB
Python
Executable File

#!/usr/bin/env python
"""
Script for downloading Engine benchmark results into a single static web page
that visualizes all the benchmarks. Without any options, downloads and
visualizes benchmark data for the last 14 days. By default, no data is written
to the disk except for the generated web page, and the data are downloaded
asynchronously.
Set the `--source` parameter to either `engine` or `stdlib`.
The generated website is placed under "generated_site" directory
The default GH artifact retention period is 3 months, which means that all
the artifacts older than 3 months are dropped. If you wish to gather the data
for benchmarks older than 3 months, make sure that the `use_cache` parameter
is set to true, and that the cache directory is populated with older data.
If the script encounters an expired artifact, it prints a warning.
This script is under continuous development, so it is advised to use
`-v|--verbose` option all the time.
It queries only successful benchmark runs. If there are no successful benchmarks
in a given period, no results will be written.
The process of the script is roughly as follows:
- Asynchronously gather all the benchmark results from GH API into job reports (JobReport dataclass)
- Use cache if possible to avoid unnecessary GH API queries
- Transform the gathered results into data for a particular benchmark sorted
by an appropriate commit timestamp.
- BenchmarkData class
If you wish to inspect the data yourself, just use --create-csv option.
Dependencies for the script:
- GH CLI utility
- https://cli.github.com/
- Used for convenience to do the GH API queries.
- It needs to be installed, and you should also authenticate.
- Python version >= 3.7
- Python 3rd party packages:
- pandas
- Used for convenience for a very simple data processing
- jinja2
- Used as a template engine for the HTML.
"""
import sys
from dataclasses import dataclass
from bench_tool.bench_results import get_bench_runs, fetch_job_reports
from bench_tool.remote_cache import ReadonlyRemoteCache
from bench_tool.utils import gather_all_bench_labels, sort_job_reports
if not (sys.version_info.major >= 3 and sys.version_info.minor >= 7):
print("ERROR: python version lower than 3.7")
exit(1)
import asyncio
import logging
import logging.config
import os
import shutil
import tempfile
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from csv import DictWriter
from datetime import datetime, timedelta
from os import path
from typing import List, Dict, Optional, Set
from bench_tool import DATE_FORMAT, GENERATED_SITE_DIR, \
GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, \
JINJA_TEMPLATE, JobRun, JobReport, \
TemplateBenchData, JinjaData, Source
from bench_tool.gh import ensure_gh_installed
from bench_tool.template_render import create_template_data, render_html
try:
import pandas as pd
import numpy as np
import jinja2
except ModuleNotFoundError as err:
print("ERROR: One of pandas, numpy, or jinja2 packages not installed",
file=sys.stderr)
print("Install either with `pip install pandas numpy jinja2` or "
"with `apt-get install python3-pandas python3-numpy python3-jinja2`",
file=sys.stderr)
exit(1)
@dataclass
class CsvRow:
label: str
score: str
commit_id: str
commit_title: str
commit_timestamp: str
commit_author: str
bench_run_id: str
bench_run_url: str
bench_run_event: str
def write_bench_reports_to_csv(bench_reports: List[JobReport],
csv_fname: str) -> None:
logging.info(
f"Writing {len(bench_reports)} benchmark reports to {csv_fname}")
csv_fieldnames = CsvRow.__annotations__.keys()
assert len(bench_reports) > 0
with open(csv_fname, "w") as csv_file:
csv_writer = DictWriter(csv_file, csv_fieldnames)
csv_writer.writeheader()
for bench_report in bench_reports:
for label, score in bench_report.label_score_dict.items():
commit_title = \
bench_report.bench_run.head_commit.message.splitlines()[0]
commit_title = commit_title.replace(",", " ")
# Ensure that score is not printed with exponential notation,
# Enso cannot easily parse that by default now.
score_formatted = f"{score:.9f}"
row = CsvRow(
label=label,
score=score_formatted,
commit_id=bench_report.bench_run.head_commit.id,
commit_title=commit_title,
commit_author=bench_report.bench_run.head_commit.author.name,
commit_timestamp=bench_report.bench_run.head_commit.timestamp,
bench_run_id=bench_report.bench_run.id,
bench_run_url=bench_report.bench_run.html_url,
bench_run_event=bench_report.bench_run.event
)
csv_writer.writerow(row.__dict__)
async def main():
default_since: datetime = (datetime.now() - timedelta(days=14))
default_until: datetime = datetime.now()
default_csv_out = "benchs.csv"
date_format_help = DATE_FORMAT.replace("%", "%%")
def _parse_bench_source(_bench_source: str) -> Source:
try:
return Source(_bench_source)
except ValueError:
print(f"Invalid benchmark source {_bench_source}.", file=sys.stderr)
print(f"Available sources: {[source.value for source in Source]}",
file=sys.stderr)
exit(1)
arg_parser = ArgumentParser(description=__doc__,
formatter_class=RawDescriptionHelpFormatter)
arg_parser.add_argument("-v", "--verbose", action="store_true")
arg_parser.add_argument("-s", "--source",
action="store",
required=True,
metavar=f"({Source.ENGINE.value}|{Source.STDLIB.value})",
type=lambda s: _parse_bench_source(s),
help=f"The source of the benchmarks. Available sources: "
f"{[source.value for source in Source]}")
arg_parser.add_argument("--since", action="store",
default=default_since,
metavar="SINCE_DATE",
type=lambda s: datetime.strptime(s, DATE_FORMAT),
help=f"The date from which the benchmark results will be gathered. "
f"Format is {date_format_help}. "
f"The default is 14 days before")
arg_parser.add_argument("--until", action="store",
default=default_until,
metavar="UNTIL_DATE",
type=lambda s: datetime.strptime(s, DATE_FORMAT),
help=f"The date until which the benchmark results will be gathered. "
f"Format is {date_format_help}. "
f"The default is today")
arg_parser.add_argument("-b", "--branches", action="store",
nargs="+",
default=["develop"],
help="List of branches to gather the benchmark results from. "
"The default is ['develop']")
arg_parser.add_argument("-l", "--labels", action="store",
nargs="+",
default=set(),
help="List of labels to gather the benchmark results from."
"The default behavior is to gather all the labels")
arg_parser.add_argument("-t", "--tmp-dir", action="store",
default=None,
help="Temporary directory with default created by `tempfile.mkdtemp()`")
arg_parser.add_argument("--create-csv", action="store_true",
default=False,
help="Whether an intermediate `benchs.csv` should be created. "
"Appropriate to see whether the benchmark downloading was successful. "
"Or if you wish to inspect the CSV with Enso")
arg_parser.add_argument("--csv-output",
default=default_csv_out,
metavar="CSV_OUTPUT",
help="Output CSV file. Makes sense only when used with --create-csv argument")
args = arg_parser.parse_args()
if args.verbose:
log_level = logging.DEBUG
else:
log_level = logging.INFO
logging.basicConfig(level=log_level, stream=sys.stdout)
since: datetime = args.since
until: datetime = args.until
if not args.tmp_dir:
temp_dir: str = tempfile.mkdtemp()
else:
temp_dir: str = args.tmp_dir
bench_source: Source = args.source
csv_output: str = args.csv_output
create_csv: bool = args.create_csv
branches: List[str] = args.branches
labels_override: Set[str] = args.labels
logging.debug(f"parsed args: since={since}, until={until}, "
f"temp_dir={temp_dir}, bench_source={bench_source}, "
f"csv_output={csv_output}, "
f"create_csv={create_csv}, branches={branches}, "
f"labels_override={labels_override}")
ensure_gh_installed()
# If the user requires benchmarks for which artifacts are not retained
# anymore, then cache should be used.
min_since_without_cache = datetime.today() - GH_ARTIFACT_RETENTION_PERIOD
if since < min_since_without_cache:
logging.info(f"The default GH artifact retention period is "
f"{GH_ARTIFACT_RETENTION_PERIOD.days} days. "
f"This means that all the artifacts older than "
f"{min_since_without_cache.date()} are expired."
f"The since date was set to {since}, so the remote cache is enabled, "
f"and the older artifacts will be fetched from the cache.")
remote_cache = ReadonlyRemoteCache()
bench_labels: Optional[Set[str]] = None
""" Set of all gathered benchmark labels from all the job reports """
job_reports_per_branch: Dict[str, List[JobReport]] = {}
for branch in branches:
bench_runs: List[JobRun] = []
for workflow_id in bench_source.workflow_ids():
bench_runs.extend(
await get_bench_runs(since, until, branch, workflow_id)
)
if len(bench_runs) == 0:
print(
f"No successful benchmarks found within period since {since}"
f" until {until} for branch {branch}")
exit(1)
job_reports = await fetch_job_reports(bench_runs, remote_cache)
logging.debug(f"Got {len(job_reports)} job reports for branch {branch}")
if len(job_reports) == 0:
print(f"There were 0 job_reports in the specified time interval, "
f"for branch {branch}, so "
"there is nothing to visualize or compare.")
exit(1)
logging.debug("Sorting job_reports by commit date")
sort_job_reports(job_reports)
if create_csv:
write_bench_reports_to_csv(job_reports, csv_output)
logging.info(f"Benchmarks written to {csv_output}")
print(f"The generated CSV is in {csv_output}")
exit(0)
# Gather all the benchmark labels from all the job reports
if bench_labels is None:
all_bench_labels = gather_all_bench_labels(job_reports)
if len(labels_override) > 0:
logging.info(f"Subset of labels specified: {labels_override}")
if not set(labels_override).issubset(all_bench_labels):
print(
f"Specified bench labels {labels_override} are not a subset of "
f"all bench labels {all_bench_labels}")
exit(1)
bench_labels = labels_override
else:
bench_labels = all_bench_labels
logging.debug(f"Gathered bench_labels: {bench_labels}")
job_reports_per_branch[branch] = job_reports
template_bench_datas: List[TemplateBenchData] = \
create_template_data(job_reports_per_branch, bench_labels)
template_bench_datas.sort(key=lambda data: data.id)
jinja_data = JinjaData(
since=since,
display_since=max(until - timedelta(days=30), since),
until=until,
bench_datas=template_bench_datas,
bench_source=bench_source,
branches=branches,
timestamp=datetime.now()
)
# Render Jinja template with jinja_data
if not path.exists(GENERATED_SITE_DIR):
os.mkdir(GENERATED_SITE_DIR)
logging.debug(
f"Rendering HTML from {JINJA_TEMPLATE} to {GENERATED_SITE_DIR}")
site_path = GENERATED_SITE_DIR.joinpath(bench_source.value + "-benchs.html")
render_html(
jinja_data,
site_path
)
logging.debug(
f"Copying static site content from {TEMPLATES_DIR} to {GENERATED_SITE_DIR}")
shutil.copy(
path.join(TEMPLATES_DIR, "styles.css"),
path.join(GENERATED_SITE_DIR, "styles.css")
)
index_html_abs_path = path.abspath(site_path)
print(f"The generated HTML is in {index_html_abs_path}")
print(f"Open file://{index_html_abs_path} in the browser")
if __name__ == "__main__":
asyncio.run(main())