add junit output and summaries for benchmarks (#16660)

This commit is contained in:
Kyle Altendorf 2023-10-25 09:37:36 -04:00 committed by GitHub
parent 79f876e9ca
commit 311d0143f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 376 additions and 25 deletions

View File

@ -2,6 +2,11 @@ name: ⚡️ Benchmarks
on:
workflow_dispatch:
inputs:
repeats:
description: "The number of times to execute each benchmark"
type: int
default: 1
push:
paths-ignore:
- '**.md'
@ -23,12 +28,32 @@ concurrency:
cancel-in-progress: true
jobs:
setup:
name: Setup
runs-on: ubuntu-latest
timeout-minutes: 5
outputs:
repeats: ${{ steps.repeats.outputs.repeats }}
timeout: ${{ steps.timeout.outputs.timeout }}
steps:
- name: Calculate repeats
id: repeats
run: |
echo "repeats=${{ inputs.repeats != '' && inputs.repeats || 1 }}" >> "$GITHUB_OUTPUT"
- name: Calculate timeout
id: timeout
run: |
echo "timeout=$(( ${{ steps.repeats.outputs.repeats }} * 20 ))" >> "$GITHUB_OUTPUT"
build:
name: Benchmarks
runs-on: benchmark
needs:
- setup
container:
image: chianetwork/ubuntu-22.04-builder:latest
timeout-minutes: 30
timeout-minutes: ${{ fromJSON(needs.setup.outputs.timeout) }}
strategy:
fail-fast: false
matrix:
@ -81,4 +106,22 @@ jobs:
- name: pytest
run: |
pytest -n 0 --capture no -m benchmark tests
pytest -n 0 --capture no -m benchmark -o 'junit_suite_name=benchmarks' --junitxml=junit-data/benchmarks.raw.xml --benchmark-repeats ${{ needs.setup.outputs.repeats }} tests
- name: Format JUnit data and prepare results
if: always()
run: |
yq junit-data/benchmarks.raw.xml > junit-data/benchmarks.xml
- name: Publish JUnit results
if: always()
uses: actions/upload-artifact@v3
with:
name: junit-data
path: junit-data/*
if-no-files-found: error
- name: Add benchmark results to workflow summary
if: always()
run: |
python -m tests.process_benchmarks --xml junit-data/benchmarks.xml --markdown --link-prefix ${{ github.event.repository.html_url }}/blob/${{ github.sha }}/ --link-line-separator \#L >> "$GITHUB_STEP_SUMMARY"

View File

@ -35,3 +35,4 @@ filterwarnings =
ignore:The --rsyncdir command line argument and rsyncdirs config variable are deprecated.:DeprecationWarning
ignore:Properties from keyring.util are no longer supported. Use jaraco.classes.properties instead.:DeprecationWarning
ignore:pkg_resources is deprecated as an API:DeprecationWarning
ignore:record_property is incompatible with junit_family:pytest.PytestWarning

View File

@ -62,6 +62,7 @@ dev_dependencies = [
"flake8==6.1.0",
"mypy==1.5.1",
"black==23.7.0",
"lxml==4.9.3",
"aiohttp_cors==0.7.0", # For blackd
"pyinstaller==5.13.0",
"types-aiofiles==23.2.0.0",

View File

@ -11,7 +11,7 @@ import random
import sysconfig
import tempfile
from enum import Enum
from typing import Any, AsyncIterator, Dict, Iterator, List, Tuple, Union
from typing import Any, AsyncIterator, Callable, Dict, Iterator, List, Tuple, Union
import aiohttp
import pytest
@ -97,9 +97,18 @@ def benchmark_runner_overhead_fixture() -> float:
@pytest.fixture(name="benchmark_runner")
def benchmark_runner_fixture(request: SubRequest, benchmark_runner_overhead: float) -> BenchmarkRunner:
def benchmark_runner_fixture(
request: SubRequest,
benchmark_runner_overhead: float,
record_property: Callable[[str, object], None],
benchmark_repeat: int,
) -> BenchmarkRunner:
label = request.node.name
return BenchmarkRunner(label=label, overhead=benchmark_runner_overhead)
return BenchmarkRunner(
label=label,
overhead=benchmark_runner_overhead,
record_property=record_property,
)
@pytest.fixture(name="node_name_for_file")
@ -358,9 +367,41 @@ if os.getenv("_PYTEST_RAISE", "0") != "0":
raise excinfo.value
def pytest_addoption(parser: pytest.Parser):
default_repeats = 1
group = parser.getgroup("chia")
group.addoption(
"--benchmark-repeats",
action="store",
default=default_repeats,
type=int,
help=f"The number of times to run each benchmark, default {default_repeats}.",
)
def pytest_configure(config):
config.addinivalue_line("markers", "benchmark: automatically assigned by the benchmark_runner fixture")
benchmark_repeats = config.getoption("--benchmark-repeats")
if benchmark_repeats != 1:
@pytest.fixture(
name="benchmark_repeat",
params=[pytest.param(repeat, id=f"benchmark_repeat{repeat:03d}") for repeat in range(benchmark_repeats)],
)
def benchmark_repeat_fixture(request: SubRequest) -> int:
return request.param
else:
@pytest.fixture(
name="benchmark_repeat",
)
def benchmark_repeat_fixture() -> int:
return 1
globals()[benchmark_repeat_fixture.__name__] = benchmark_repeat_fixture
def pytest_collection_modifyitems(session, config: pytest.Config, items: List[pytest.Function]):
# https://github.com/pytest-dev/pytest/issues/3730#issuecomment-567142496
@ -568,7 +609,7 @@ async def two_nodes_two_wallets_with_same_keys(bt) -> AsyncIterator[SimulatorsAn
yield _
@pytest_asyncio.fixture(scope="module")
@pytest_asyncio.fixture
async def wallet_nodes_perf(blockchain_constants: ConsensusConstants):
async with setup_simulators_and_wallets(
1, 1, blockchain_constants, config_overrides={"MEMPOOL_BLOCK_BUFFER": 1, "MAX_BLOCK_COST_CLVM": 11000000000}

View File

@ -115,7 +115,7 @@ class TestPerformance:
spend_bundles.append(spend_bundle)
spend_bundle_ids.append(spend_bundle.get_hash())
with benchmark_runner.assert_runtime(seconds=0.0055, label=f"{request.node.name} - mempool"):
with benchmark_runner.assert_runtime(seconds=0.0055, label="mempool"):
num_tx: int = 0
for spend_bundle, spend_bundle_id in zip(spend_bundles, spend_bundle_ids):
num_tx += 1
@ -167,12 +167,12 @@ class TestPerformance:
[],
)
with benchmark_runner.assert_runtime(seconds=0.1, label=f"{request.node.name} - unfinished"):
with benchmark_runner.assert_runtime(seconds=0.1, label="unfinished"):
res = await full_node_1.respond_unfinished_block(fnp.RespondUnfinishedBlock(unfinished), fake_peer)
log.warning(f"Res: {res}")
with benchmark_runner.assert_runtime(seconds=0.1, label=f"{request.node.name} - full block"):
with benchmark_runner.assert_runtime(seconds=0.1, label="full block"):
# No transactions generator, the full node already cached it from the unfinished block
block_small = dataclasses.replace(block, transactions_generator=None)
res = await full_node_1.full_node.add_block(block_small)

View File

@ -303,7 +303,7 @@ async def test_get_puzzle_and_solution_for_coin_performance(benchmark_runner: Be
# benchmark the function to pick out the puzzle and solution for a specific
# coin
generator = BlockGenerator(LARGE_BLOCK.transactions_generator, [], [])
with benchmark_runner.assert_runtime(seconds=8.5, label="get_puzzle_and_solution_for_coin"):
with benchmark_runner.assert_runtime(seconds=8.5):
for i in range(3):
for c in spends:
spend_info = get_puzzle_and_solution_for_coin(generator, c, 0, test_constants)

240
tests/process_benchmarks.py Normal file
View File

@ -0,0 +1,240 @@
from __future__ import annotations
import json
import random
import re
from collections import defaultdict
from dataclasses import dataclass, field
from pathlib import Path
from statistics import StatisticsError, mean, stdev
from typing import Any, Dict, List, Set, TextIO, Tuple, final
import click
import lxml.etree
@final
@dataclass(frozen=True, order=True)
class Result:
file_path: Path
test_path: Tuple[str, ...]
label: str
line: int = field(compare=False)
durations: Tuple[float, ...] = field(compare=False)
limit: float = field(compare=False)
def marshal(self) -> Dict[str, Any]:
return {
"file_path": self.file_path.as_posix(),
"test_path": self.test_path,
"label": self.label,
"duration": {
"all": self.durations,
"min": min(self.durations),
"max": max(self.durations),
"mean": mean(self.durations),
},
}
def link(self, prefix: str, line_separator: str) -> str:
return f"{prefix}{self.file_path.as_posix()}{line_separator}{self.line}"
def sub(matchobj: re.Match[str]) -> str:
result = ""
if matchobj.group("start") == "[":
result += "["
if matchobj.group("start") == matchobj.group("end") == "-":
result += "-"
if matchobj.group("end") == "]":
result += "]"
return result
@click.command(context_settings={"help_option_names": ["-h", "--help"]})
@click.option(
"--xml",
"xml_file",
required=True,
type=click.File(),
help="The benchmarks JUnit XML results file",
)
@click.option(
"--link-prefix",
default="",
help="Prefix for output links such as for web links instead of IDE links",
show_default=True,
)
@click.option(
"--link-line-separator",
default=":",
help="The separator between the path and the line number, such as : for local links and #L on GitHub",
show_default=True,
)
@click.option(
"--output",
default="-",
type=click.File(mode="w", encoding="utf-8", lazy=True, atomic=True),
help="Output file, - for stdout",
show_default=True,
)
# TODO: anything but this pattern for output types
@click.option(
"--markdown/--no-markdown",
help="Use markdown as output format",
show_default=True,
)
@click.option(
"--percent-margin",
default=15,
type=int,
help="Highlight results with maximums within this percent of the limit",
show_default=True,
)
@click.option(
"--randomoji/--determimoji",
help="🍿",
show_default=True,
)
def main(
xml_file: TextIO,
link_prefix: str,
link_line_separator: str,
output: TextIO,
markdown: bool,
percent_margin: int,
randomoji: bool,
) -> None:
tree = lxml.etree.parse(xml_file)
root = tree.getroot()
benchmarks = root.find("testsuite[@name='benchmarks']")
# raw_durations: defaultdict[Tuple[str, ...], List[Result]] = defaultdict(list)
cases_by_test_path: defaultdict[Tuple[str, ...], List[lxml.etree.Element]] = defaultdict(list)
for case in benchmarks.findall("testcase"):
raw_name = case.attrib["name"]
name = re.sub(r"(?P<start>[-\[])benchmark_repeat\d{3}(?P<end>[-\])])", sub, raw_name)
# TODO: seems to duplicate the class and function name, though not the parametrizations
test_path = (
*case.attrib["classname"].split("."),
name,
)
cases_by_test_path[test_path].append(case)
results: List[Result] = []
for test_path, cases in cases_by_test_path.items():
labels: Set[str] = set()
for case in cases:
properties = case.find("properties")
labels.update(property.attrib["name"].partition(":")[2] for property in properties)
for label in labels:
query = "properties/property[@name='{property}:{label}']"
durations = [
float(property.attrib["value"])
for case in cases
for property in case.xpath(query.format(label=label, property="duration"))
]
a_case = cases[0]
file_path: Path
[file_path] = [
Path(property.attrib["value"]) for property in a_case.xpath(query.format(label=label, property="path"))
]
line: int
[line] = [
int(property.attrib["value"]) for property in a_case.xpath(query.format(label=label, property="line"))
]
limit: float
[limit] = [
float(property.attrib["value"])
for property in a_case.xpath(query.format(label=label, property="limit"))
]
results.append(
Result(
file_path=file_path,
test_path=test_path,
line=line,
label=label,
durations=tuple(durations),
limit=limit,
)
)
if not markdown:
for result in results:
link = result.link(prefix=link_prefix, line_separator=link_line_separator)
dumped = json.dumps(result.marshal())
output.write(f"{link} {dumped}\n")
else:
output.write("| Test | 🍿 | Mean | Max | 3σ | Limit | Percent |\n")
output.write("| --- | --- | --- | --- | --- | --- | --- |\n")
for result in sorted(results):
link_url = result.link(prefix=link_prefix, line_separator=link_line_separator)
mean_str = "-"
three_sigma_str = "-"
if len(result.durations) > 1:
durations_mean = mean(result.durations)
mean_str = f"{durations_mean:.3f} s"
try:
three_sigma_str = f"{durations_mean + 3 * stdev(result.durations):.3f} s"
except StatisticsError:
pass
durations_max = max(result.durations)
max_str = f"{durations_max:.3f} s"
limit_str = f"{result.limit:.3f} s"
percent = 100 * durations_max / result.limit
if percent >= 100:
# intentionally biasing towards 🍄
choices = "🍄🍄🍎🍅" # 🌶️🍉🍒🍓
elif percent >= (100 - percent_margin):
choices = "🍋🍌" # 🍍🌽
else:
choices = "🫛🍈🍏🍐🥝🥒🥬🥦"
marker: str
if randomoji:
marker = random.choice(choices)
else:
marker = choices[0]
percent_str = f"{percent:.0f} %"
test_path_str = ".".join(result.test_path[1:])
test_link_text: str
if result.label == "":
test_link_text = f"`{test_path_str}`"
else:
test_link_text = f"`{test_path_str}` - {result.label}"
output.write(
f"| [{test_link_text}]({link_url})"
+ f" | {marker}"
+ f" | {mean_str}"
+ f" | {max_str}"
+ f" | {three_sigma_str}"
+ f" | {limit_str}"
+ f" | {percent_str}"
+ " |\n"
)
if __name__ == "__main__":
# pylint: disable = no-value-for-parameter
main()

View File

@ -7,6 +7,7 @@ import functools
import gc
import logging
import os
import pathlib
import subprocess
import sys
from concurrent.futures import Future
@ -15,12 +16,13 @@ from statistics import mean
from textwrap import dedent
from time import thread_time
from types import TracebackType
from typing import Any, Callable, Collection, Iterator, List, Optional, TextIO, Type, Union
from typing import Any, Callable, Collection, Iterator, List, Optional, TextIO, Tuple, Type, Union
import pytest
from chia_rs import Coin
from typing_extensions import Protocol, final
import chia
from chia.types.blockchain_format.sized_bytes import bytes32
from chia.types.condition_opcodes import ConditionOpcode
from chia.util.hash import std_hash
@ -61,9 +63,9 @@ def manage_gc(mode: GcMode) -> Iterator[None]:
gc.disable()
def caller_file_and_line(distance: int = 1) -> str:
def caller_file_and_line(distance: int = 1) -> Tuple[str, int]:
caller = getframeinfo(stack()[distance + 1][0])
return f"{caller.filename}:{caller.lineno}"
return caller.filename, caller.lineno
@dataclasses.dataclass(frozen=True)
@ -71,7 +73,8 @@ class RuntimeResults:
start: float
end: float
duration: float
entry_line: str
entry_file: str
entry_line: int
overhead: Optional[float]
def block(self, label: str = "") -> str:
@ -94,14 +97,15 @@ class AssertRuntimeResults:
start: float
end: float
duration: float
entry_line: str
entry_file: str
entry_line: int
overhead: Optional[float]
limit: float
ratio: float
@classmethod
def from_runtime_results(
cls, results: RuntimeResults, limit: float, entry_line: str, overhead: Optional[float]
cls, results: RuntimeResults, limit: float, entry_file: str, entry_line: int, overhead: Optional[float]
) -> AssertRuntimeResults:
return cls(
start=results.start,
@ -109,6 +113,7 @@ class AssertRuntimeResults:
duration=results.duration,
limit=limit,
ratio=results.duration / limit,
entry_file=entry_file,
entry_line=entry_line,
overhead=overhead,
)
@ -120,7 +125,7 @@ class AssertRuntimeResults:
return dedent(
f"""\
Asserting maximum duration: {label}
{self.entry_line}
{self.entry_file}:{self.entry_line}
run time: {self.duration}
overhead: {self.overhead if self.overhead is not None else "not measured"}
allowed: {self.limit}
@ -168,7 +173,7 @@ def measure_runtime(
overhead: Optional[float] = None,
print_results: bool = True,
) -> Iterator[Future[RuntimeResults]]:
entry_line = caller_file_and_line()
entry_file, entry_line = caller_file_and_line()
results_future: Future[RuntimeResults] = Future()
@ -188,6 +193,7 @@ def measure_runtime(
start=start,
end=end,
duration=duration,
entry_file=entry_file,
entry_line=entry_line,
overhead=overhead,
)
@ -228,14 +234,16 @@ class _AssertRuntime:
gc_mode: GcMode = GcMode.disable
print: bool = True
overhead: Optional[float] = None
entry_line: Optional[str] = None
entry_file: Optional[str] = None
entry_line: Optional[int] = None
_results: Optional[AssertRuntimeResults] = None
runtime_manager: Optional[contextlib.AbstractContextManager[Future[RuntimeResults]]] = None
runtime_results_callable: Optional[Future[RuntimeResults]] = None
enable_assertion: bool = True
record_property: Optional[Callable[[str, object], None]] = None
def __enter__(self) -> Future[AssertRuntimeResults]:
self.entry_line = caller_file_and_line()
self.entry_file, self.entry_line = caller_file_and_line()
self.runtime_manager = measure_runtime(
clock=self.clock, gc_mode=self.gc_mode, overhead=self.overhead, print_results=False
@ -251,7 +259,12 @@ class _AssertRuntime:
exc: Optional[BaseException],
traceback: Optional[TracebackType],
) -> None:
if self.entry_line is None or self.runtime_manager is None or self.runtime_results_callable is None:
if (
self.entry_file is None
or self.entry_line is None
or self.runtime_manager is None
or self.runtime_results_callable is None
):
raise Exception("Context manager must be entered before exiting")
self.runtime_manager.__exit__(exc_type, exc, traceback)
@ -260,6 +273,7 @@ class _AssertRuntime:
results = AssertRuntimeResults.from_runtime_results(
results=runtime,
limit=self.seconds,
entry_file=self.entry_file,
entry_line=self.entry_line,
overhead=self.overhead,
)
@ -269,6 +283,17 @@ class _AssertRuntime:
if self.print:
print(results.block(label=self.label))
if self.record_property is not None:
self.record_property(f"duration:{self.label}", results.duration)
relative_path_str = (
pathlib.Path(results.entry_file).relative_to(pathlib.Path(chia.__file__).parent.parent).as_posix()
)
self.record_property(f"path:{self.label}", relative_path_str)
self.record_property(f"line:{self.label}", results.entry_line)
self.record_property(f"limit:{self.label}", self.seconds)
if exc_type is None and self.enable_assertion:
__tracebackhide__ = True
assert runtime.duration < self.seconds, results.message()
@ -280,13 +305,13 @@ class BenchmarkRunner:
enable_assertion: bool = True
label: Optional[str] = None
overhead: Optional[float] = None
record_property: Optional[Callable[[str, object], None]] = None
@functools.wraps(_AssertRuntime)
def assert_runtime(self, *args: Any, **kwargs: Any) -> _AssertRuntime:
kwargs.setdefault("enable_assertion", self.enable_assertion)
kwargs.setdefault("overhead", self.overhead)
if self.label is not None:
kwargs.setdefault("label", self.label)
kwargs.setdefault("record_property", self.record_property)
return _AssertRuntime(*args, **kwargs)

View File

@ -28,7 +28,7 @@ def enable_profiler(name: str) -> Iterator[None]:
def test_offer_parsing_performance(benchmark_runner: BenchmarkRunner) -> None:
offer_bytes = bytes.fromhex(test_offer)
with benchmark_runner.assert_runtime(seconds=2.15, label="Offer.from_bytes()"):
with benchmark_runner.assert_runtime(seconds=2.15):
with enable_profiler("offer-parsing"):
for _ in range(100):
o = Offer.from_bytes(offer_bytes)
@ -38,7 +38,7 @@ def test_offer_parsing_performance(benchmark_runner: BenchmarkRunner) -> None:
def test_offered_coins_performance(benchmark_runner: BenchmarkRunner) -> None:
offer_bytes = bytes.fromhex(test_offer)
o = Offer.from_bytes(offer_bytes)
with benchmark_runner.assert_runtime(seconds=2.5, label="Offer.from_bytes()"):
with benchmark_runner.assert_runtime(seconds=2.5):
with enable_profiler("offered-coins"):
for _ in range(100):
c = o.get_offered_coins()