ci/benchmarks: tweak what we present in regression report

- Make regression report more compact, remove `min` since it doesn't behave like a limit
- It looks like the low-load variants are probably just noisier with fewer samples, so remove them from the regression view:

![samples](https://user-images.githubusercontent.com/210815/191076137-f5c3a0c6-9586-4ea4-a5a7-66000e7a2540.png)

PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5942
GitOrigin-RevId: 8a9ab00c251f04d7d5a542731696cb5f86ad2b78
This commit is contained in:
kodiakhq[bot] 2022-09-22 13:27:32 +00:00 committed by hasura-bot
parent 37b846c28d
commit 81df74852e
2 changed files with 70 additions and 22 deletions

View File

@ -8,6 +8,7 @@ headers:
constants:
scalars:
- &low_load 20
- &medium_load 100
- &high_load 500
k6_custom: &k6_custom
@ -37,13 +38,26 @@ queries:
# tune this so it's just high enough that we can expect to not need
# to allocate during the test:
preAllocatedVUs: 10
query: |
query: &small_query |
query MyQuery {
aouulefavluzmkd {
afqqxqkiyibuccz
}
}
- name: small_query_high_load
<<: *k6_custom
options:
k6:
scenarios:
main:
<<: *settings
rate: *high_load
# tune this so it's just high enough that we can expect to not need
# to allocate during the test:
preAllocatedVUs: 50
query: *small_query
############################################################################
# A large query returning no rows. How does this compare to above? How does
# it compare to a query from chinook that returns little data?
@ -58,7 +72,7 @@ queries:
<<: *settings
rate: *low_load
preAllocatedVUs: 20
query: |
query: &huge_query |
query MyQuery {
avnnjybkglhndgc {
bhdvbvtikfpzzzi {
@ -168,6 +182,18 @@ queries:
}
}
}
- name: huge_query_medium_load
<<: *k6_custom
options:
k6:
scenarios:
main:
<<: *settings
# NOTE: we can't keep up at high_load, it seems:
rate: *medium_load
preAllocatedVUs: 100
query: *huge_query
############################################################################
# The standard introspection query from server/src-rsr/introspection.json

View File

@ -424,6 +424,14 @@ def generate_regression_report():
# this_bench['requests']['count'] # TODO use this to normalize allocations
name = this_bench['name']
# Skip if: this is a "low load" variation with few samples since these are
# likely redundant / less useful for the purpose of finding regressions
# (see mono #5942)
if "low_load" in name:
warn(f"Skipping '{name}' which has 'low_load' in name")
continue
# Skip if: no result in merge base report to compare to:
try:
merge_base_bench = merge_base_report_dict[name]
except KeyError:
@ -440,6 +448,14 @@ def generate_regression_report():
)
except KeyError:
continue
# For now just report regressions in the stable bytes-allocated metric for adhoc
if name.startswith("ADHOC-"):
warn(f"Just reporting regressions in bytes_alloc_per_req for '{name}' which is adhoc")
benchmark_set_results.append((name, metrics))
# Skip everything else:
continue
# Response body size:
try:
merge_base_body_size = float(merge_base_bench['response']['totalBytes']) / float(merge_base_bench['requests']['count'])
@ -455,7 +471,11 @@ def generate_regression_report():
pass
# NOTE: we decided to omit higher-percentile latencies here since
# they are noisy (which might lead to people ignoring benchmarks)
for m in ['min', 'p50']:
# NOTE: we originally had `min` here, thinking it should be an
# asymptote (we can only get so fast doing a particular workload),
# but this hasn't turned out to be a useful summary statistic (we
# might need several times more samples for it to stabilize)
for m in ['p50']:
try:
this_hist = this_bench['histogram']['json']
merge_base_hist = merge_base_bench['histogram']['json']
@ -485,6 +505,7 @@ def pretty_print_regression_report_github_comment(results, skip_pr_report_names,
def out(s): f.write(s+"\n")
out(f"## Benchmark Results") # NOTE: We use this header to identify benchmark reports in `hide-benchmark-reports.sh`
out(f"<details closed><summary>Click for detailed reports, and help docs</summary>")
out(f"")
out((f"The regression report below shows, for each benchmark, the **percent change** for "
f"different metrics, between the merge base (the changes from **PR {merge_base_pr}**) and "
@ -503,7 +524,7 @@ def pretty_print_regression_report_github_comment(results, skip_pr_report_names,
f"[:bar_chart: merge base]({graphql_bench_url([base_id])})... "
f"[:bar_chart: both compared]({graphql_bench_url([these_id, base_id])})")
out(f"")
out(f"<details open><summary>Click here for a detailed report.</summary>")
out(f"</details>")
out(f"")
# Return what should be the first few chars of the line, which will detemine its styling:
@ -519,31 +540,32 @@ def pretty_print_regression_report_github_comment(results, skip_pr_report_names,
elif -25.0 <= val < 0: return "++ " # GREEN
else: return "+++ " # GREEN
out( f"``` diff ") # START DIFF SYNTAX
out(f"``` diff") # START DIFF SYNTAX
for benchmark_set_name, (mem_in_use_before_diff, live_bytes_before_diff, mem_in_use_after_diff, live_bytes_after_diff, benchmarks) in results.items():
if benchmark_set_name[:-5] in skip_pr_report_names: continue
l0 = live_bytes_before_diff
l1 = live_bytes_after_diff
u0 = mem_in_use_before_diff
u1 = mem_in_use_after_diff
out( f"{col( )}{''*(len(benchmark_set_name)+4)}")
out( f"{col( )}{benchmark_set_name}" )
out( f"{col( )}{''*(len(benchmark_set_name)+4)}")
out( f"{col( )} ")
out( f"{col( )} ᐉ Memory Residency (RTS-reported):")
out( f"{col(u0)} {'mem_in_use':<25}: {u0:>6.1f} (BEFORE benchmarks ran; baseline for schema)")
out( f"{col(l0)} {'live_bytes':<25}: {l0:>6.1f} (BEFORE benchmarks ran; baseline for schema)")
out( f"{col(l1)} {'live_bytes':<25}: {l1:>6.1f} (AFTER benchmarks ran)")
# u1 = mem_in_use_after_diff
out( f"{col(u0)} {benchmark_set_name[:-5]+' ':─<21s}{'┤ MEMORY RESIDENCY (from RTS)': <30}{'mem_in_use (BEFORE benchmarks)': >38}{u0:>12.1f}")
out( f"{col(l0)} { ' ': <21s}{'' : <30}{'live_bytes (BEFORE benchmarks)': >38}{l0:>12.1f}")
out( f"{col(l1)} { ' ': <21s}{'' }{' live_bytes (AFTER benchmarks)':_>67}{l1:>12.1f}")
for bench_name, metrics in benchmarks:
out( f"{col( )} ")
out( f"{col( )}{bench_name.replace('-k6-custom','').replace('_',' ')}:")
bench_name_pretty = bench_name.replace('-k6-custom','').replace('_',' ') # need at least 40 chars
for metric_name, d in metrics.items():
# For now just report regressions in the stable bytes-allocated metric for adhoc
if bench_name.startswith("ADHOC-") and not metric_name is "bytes_alloc_per_req": continue
out(f"{col(d)} {metric_name:<25}: {d:>6.1f}")
out( f"{col( )} ")
out( f"``` ") # END DIFF SYNTAX
out(f"</details>")
if len(list(metrics.items())) == 1: # need to waste a line if only one metric:
out(f"{col(d )} { ' ': <21s}{''+bench_name_pretty : <40}{ metric_name: >28}{d :>12.1f}")
out(f"{col(l1)} { ' ': <21s}{'' }{ '':_>67}{'' :>12s}")
elif metric_name == list(metrics.items())[0][0]: # first:
out(f"{col(d )} { ' ': <21s}{''+bench_name_pretty : <40}{ metric_name: >28}{d :>12.1f}")
elif metric_name == list(metrics.items())[-1][0]: # last:
out(f"{col(l1)} { ' ': <21s}{'' }{ ' '+metric_name:_>67}{d :>12.1f}")
else: # middle, omit name
out(f"{col(d )} { ' ': <21s}{'' : <40}{ metric_name: >28}{d :>12.1f}")
out(f"```") # END DIFF SYNTAX
say(f"Wrote github comment to {REGRESSION_REPORT_COMMENT_FILENAME}")
f.close()