mirror of
https://github.com/hasura/graphql-engine.git
synced 2024-12-15 01:12:56 +03:00
ci/benchmarks: tweak what we present in regression report
- Make regression report more compact, remove `min` since it doesn't behave like a limit - It looks like the low-load variants are probably just noisier with fewer samples, so remove them from the regression view: ![samples](https://user-images.githubusercontent.com/210815/191076137-f5c3a0c6-9586-4ea4-a5a7-66000e7a2540.png) PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5942 GitOrigin-RevId: 8a9ab00c251f04d7d5a542731696cb5f86ad2b78
This commit is contained in:
parent
37b846c28d
commit
81df74852e
@ -8,6 +8,7 @@ headers:
|
||||
constants:
|
||||
scalars:
|
||||
- &low_load 20
|
||||
- &medium_load 100
|
||||
- &high_load 500
|
||||
|
||||
k6_custom: &k6_custom
|
||||
@ -37,13 +38,26 @@ queries:
|
||||
# tune this so it's just high enough that we can expect to not need
|
||||
# to allocate during the test:
|
||||
preAllocatedVUs: 10
|
||||
query: |
|
||||
query: &small_query |
|
||||
query MyQuery {
|
||||
aouulefavluzmkd {
|
||||
afqqxqkiyibuccz
|
||||
}
|
||||
}
|
||||
|
||||
- name: small_query_high_load
|
||||
<<: *k6_custom
|
||||
options:
|
||||
k6:
|
||||
scenarios:
|
||||
main:
|
||||
<<: *settings
|
||||
rate: *high_load
|
||||
# tune this so it's just high enough that we can expect to not need
|
||||
# to allocate during the test:
|
||||
preAllocatedVUs: 50
|
||||
query: *small_query
|
||||
|
||||
############################################################################
|
||||
# A large query returning no rows. How does this compare to above? How does
|
||||
# it compare to a query from chinook that returns little data?
|
||||
@ -58,7 +72,7 @@ queries:
|
||||
<<: *settings
|
||||
rate: *low_load
|
||||
preAllocatedVUs: 20
|
||||
query: |
|
||||
query: &huge_query |
|
||||
query MyQuery {
|
||||
avnnjybkglhndgc {
|
||||
bhdvbvtikfpzzzi {
|
||||
@ -168,6 +182,18 @@ queries:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- name: huge_query_medium_load
|
||||
<<: *k6_custom
|
||||
options:
|
||||
k6:
|
||||
scenarios:
|
||||
main:
|
||||
<<: *settings
|
||||
# NOTE: we can't keep up at high_load, it seems:
|
||||
rate: *medium_load
|
||||
preAllocatedVUs: 100
|
||||
query: *huge_query
|
||||
|
||||
############################################################################
|
||||
# The standard introspection query from server/src-rsr/introspection.json
|
||||
|
62
server/benchmarks/fabfile.py
vendored
62
server/benchmarks/fabfile.py
vendored
@ -424,6 +424,14 @@ def generate_regression_report():
|
||||
# this_bench['requests']['count'] # TODO use this to normalize allocations
|
||||
name = this_bench['name']
|
||||
|
||||
# Skip if: this is a "low load" variation with few samples since these are
|
||||
# likely redundant / less useful for the purpose of finding regressions
|
||||
# (see mono #5942)
|
||||
if "low_load" in name:
|
||||
warn(f"Skipping '{name}' which has 'low_load' in name")
|
||||
continue
|
||||
|
||||
# Skip if: no result in merge base report to compare to:
|
||||
try:
|
||||
merge_base_bench = merge_base_report_dict[name]
|
||||
except KeyError:
|
||||
@ -440,6 +448,14 @@ def generate_regression_report():
|
||||
)
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
# For now just report regressions in the stable bytes-allocated metric for adhoc
|
||||
if name.startswith("ADHOC-"):
|
||||
warn(f"Just reporting regressions in bytes_alloc_per_req for '{name}' which is adhoc")
|
||||
benchmark_set_results.append((name, metrics))
|
||||
# Skip everything else:
|
||||
continue
|
||||
|
||||
# Response body size:
|
||||
try:
|
||||
merge_base_body_size = float(merge_base_bench['response']['totalBytes']) / float(merge_base_bench['requests']['count'])
|
||||
@ -455,7 +471,11 @@ def generate_regression_report():
|
||||
pass
|
||||
# NOTE: we decided to omit higher-percentile latencies here since
|
||||
# they are noisy (which might lead to people ignoring benchmarks)
|
||||
for m in ['min', 'p50']:
|
||||
# NOTE: we originally had `min` here, thinking it should be an
|
||||
# asymptote (we can only get so fast doing a particular workload),
|
||||
# but this hasn't turned out to be a useful summary statistic (we
|
||||
# might need several times more samples for it to stabilize)
|
||||
for m in ['p50']:
|
||||
try:
|
||||
this_hist = this_bench['histogram']['json']
|
||||
merge_base_hist = merge_base_bench['histogram']['json']
|
||||
@ -485,6 +505,7 @@ def pretty_print_regression_report_github_comment(results, skip_pr_report_names,
|
||||
def out(s): f.write(s+"\n")
|
||||
|
||||
out(f"## Benchmark Results") # NOTE: We use this header to identify benchmark reports in `hide-benchmark-reports.sh`
|
||||
out(f"<details closed><summary>Click for detailed reports, and help docs</summary>")
|
||||
out(f"")
|
||||
out((f"The regression report below shows, for each benchmark, the **percent change** for "
|
||||
f"different metrics, between the merge base (the changes from **PR {merge_base_pr}**) and "
|
||||
@ -503,7 +524,7 @@ def pretty_print_regression_report_github_comment(results, skip_pr_report_names,
|
||||
f"[:bar_chart: merge base]({graphql_bench_url([base_id])})... "
|
||||
f"[:bar_chart: both compared]({graphql_bench_url([these_id, base_id])})")
|
||||
out(f"")
|
||||
out(f"<details open><summary>Click here for a detailed report.</summary>")
|
||||
out(f"</details>")
|
||||
out(f"")
|
||||
|
||||
# Return what should be the first few chars of the line, which will detemine its styling:
|
||||
@ -519,31 +540,32 @@ def pretty_print_regression_report_github_comment(results, skip_pr_report_names,
|
||||
elif -25.0 <= val < 0: return "++ " # GREEN
|
||||
else: return "+++ " # GREEN
|
||||
|
||||
out( f"``` diff ") # START DIFF SYNTAX
|
||||
out(f"``` diff") # START DIFF SYNTAX
|
||||
for benchmark_set_name, (mem_in_use_before_diff, live_bytes_before_diff, mem_in_use_after_diff, live_bytes_after_diff, benchmarks) in results.items():
|
||||
if benchmark_set_name[:-5] in skip_pr_report_names: continue
|
||||
l0 = live_bytes_before_diff
|
||||
l1 = live_bytes_after_diff
|
||||
u0 = mem_in_use_before_diff
|
||||
u1 = mem_in_use_after_diff
|
||||
out( f"{col( )} ┌{'─'*(len(benchmark_set_name)+4)}┐")
|
||||
out( f"{col( )} │ {benchmark_set_name} │" )
|
||||
out( f"{col( )} └{'─'*(len(benchmark_set_name)+4)}┘")
|
||||
out( f"{col( )} ")
|
||||
out( f"{col( )} ᐉ Memory Residency (RTS-reported):")
|
||||
out( f"{col(u0)} {'mem_in_use':<25}: {u0:>6.1f} (BEFORE benchmarks ran; baseline for schema)")
|
||||
out( f"{col(l0)} {'live_bytes':<25}: {l0:>6.1f} (BEFORE benchmarks ran; baseline for schema)")
|
||||
out( f"{col(l1)} {'live_bytes':<25}: {l1:>6.1f} (AFTER benchmarks ran)")
|
||||
# u1 = mem_in_use_after_diff
|
||||
|
||||
out( f"{col(u0)} {benchmark_set_name[:-5]+' ':─<21s}{'┤ MEMORY RESIDENCY (from RTS)': <30}{'mem_in_use (BEFORE benchmarks)': >38}{u0:>12.1f} ┐")
|
||||
out( f"{col(l0)} { ' ': <21s}{'│' : <30}{'live_bytes (BEFORE benchmarks)': >38}{l0:>12.1f} │")
|
||||
out( f"{col(l1)} { ' ': <21s}{'│' }{' live_bytes (AFTER benchmarks)':_>67}{l1:>12.1f} ┘")
|
||||
for bench_name, metrics in benchmarks:
|
||||
out( f"{col( )} ")
|
||||
out( f"{col( )} ᐅ {bench_name.replace('-k6-custom','').replace('_',' ')}:")
|
||||
bench_name_pretty = bench_name.replace('-k6-custom','').replace('_',' ') # need at least 40 chars
|
||||
for metric_name, d in metrics.items():
|
||||
# For now just report regressions in the stable bytes-allocated metric for adhoc
|
||||
if bench_name.startswith("ADHOC-") and not metric_name is "bytes_alloc_per_req": continue
|
||||
out(f"{col(d)} {metric_name:<25}: {d:>6.1f}")
|
||||
out( f"{col( )} ")
|
||||
out( f"``` ") # END DIFF SYNTAX
|
||||
out(f"</details>")
|
||||
if len(list(metrics.items())) == 1: # need to waste a line if only one metric:
|
||||
out(f"{col(d )} { ' ': <21s}{'│ '+bench_name_pretty : <40}{ metric_name: >28}{d :>12.1f} ┐")
|
||||
out(f"{col(l1)} { ' ': <21s}{'│' }{ '':_>67}{'' :>12s} ┘")
|
||||
elif metric_name == list(metrics.items())[0][0]: # first:
|
||||
out(f"{col(d )} { ' ': <21s}{'│ '+bench_name_pretty : <40}{ metric_name: >28}{d :>12.1f} ┐")
|
||||
elif metric_name == list(metrics.items())[-1][0]: # last:
|
||||
out(f"{col(l1)} { ' ': <21s}{'│' }{ ' '+metric_name:_>67}{d :>12.1f} ┘")
|
||||
else: # middle, omit name
|
||||
out(f"{col(d )} { ' ': <21s}{'│ ' : <40}{ metric_name: >28}{d :>12.1f} │")
|
||||
|
||||
|
||||
out(f"```") # END DIFF SYNTAX
|
||||
|
||||
say(f"Wrote github comment to {REGRESSION_REPORT_COMMENT_FILENAME}")
|
||||
f.close()
|
||||
|
Loading…
Reference in New Issue
Block a user