From 6e578f229a5a70fcf1264b437633d021a8f94249 Mon Sep 17 00:00:00 2001 From: Brandon Simmons Date: Thu, 1 Sep 2022 16:46:29 -0400 Subject: [PATCH] server/ci/benchmarks: incorporate ad-hoc queries into benchmark reports PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5704 GitOrigin-RevId: e09406170e1245ceeb3be51f498159f76af4f4bf --- server/benchmarks/README.md | 5 +- server/benchmarks/bench.sh | 123 +++++++++++++++++++++++++++++------ server/benchmarks/fabfile.py | 10 +-- 3 files changed, 111 insertions(+), 27 deletions(-) diff --git a/server/benchmarks/README.md b/server/benchmarks/README.md index 728b6e15aae..df1ec40291a 100644 --- a/server/benchmarks/README.md +++ b/server/benchmarks/README.md @@ -28,7 +28,10 @@ the server, where you don't care about concurrent requests and where the work is CPU-bound (because in the future reporting will highlight stable metrics like CPU mutator time and allocations). -**This is currently a WIP and not surfaced in reports** +Because of the overhead of timing things in bash you probably don't want to +rely on the wallclock latency numbers for queries that are faster than 100 ms. +These numbers may also be noisier since we're running single-threaded and so +taking fewer samples. ### bench.sh diff --git a/server/benchmarks/bench.sh b/server/benchmarks/bench.sh index 76512b20681..fddf13afa45 100755 --- a/server/benchmarks/bench.sh +++ b/server/benchmarks/bench.sh @@ -322,6 +322,12 @@ function run_adhoc_operation_benchmarks() ( echo_pretty "Running adhoc operations for $BENCH_DIR..." + # sanity check: + if [ ! -f report.json ]; then + echo "Bug: Somehow a report from graphql-engine isn't present! Exiting" + exit 43 + fi + # NOTE: This loops over each word in `$scripts` with globbing. for script in $scripts; do # The script must define a function named "adhoc_operation" which we @@ -330,18 +336,13 @@ function run_adhoc_operation_benchmarks() ( # iterations to execute here. unset -f adhoc_operation unset iterations - # NOTE: Shellcheck is not able to source these scripts. - # - # cf. https://github.com/koalaman/shellcheck/wiki/SC1090 - # https://github.com/koalaman/shellcheck/wiki/SC1091 - # + # shellcheck disable=SC1090 . "$script" if [[ $(type -t adhoc_operation) != function ]]; then echo "Error: $script must define a function named 'adhoc_operation'! Exiting." >&2; exit 1 fi - # NOTE: See the comment on `adhoc_operation` above. - # + # shellcheck disable=SC2154 if ! [[ "$iterations" =~ ^[0-9]+$ ]] ; then echo "Error: $script must define 'iterations'" >&2; exit 1 @@ -352,33 +353,108 @@ function run_adhoc_operation_benchmarks() ( # Unfortunately that metric is fubar # (https://gitlab.haskell.org/ghc/ghc/-/issues/21082). Trying to use perf # will be a pain, might require root, means this script won't work over a - # network, etc... For now we'll just collect allocated_bytes and noisey - # wall-clock time - allocated_bytes_start=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.allocated_bytes') - time_sec_start=$(date +%s.%N) # N.B. don't time curl - + # network, etc... + live_bytes_before=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.gc.gcdetails_live_bytes') + mem_in_use_bytes_before=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.gc.gcdetails_mem_in_use_bytes') + allocated_bytes_before=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.allocated_bytes') + IFS="./" read -r -a script_name_parts <<< "$script" name=${script_name_parts[1]} + # Space separated list of nanosecond timings + latencies_ns="" + # Select an item from latencies_ns at the percentile requested, this + # never takes the mean of two samples. + rough_percentile() { + echo "$latencies_ns" |\ + tr ' ' '\n' | sort -n |\ + awk '{all[NR] = $0} END{print all[int(NR*'"$1"'+0.5)]}' + # ^^^ select a value based on line number, rounding up + } + echo -n "Running $name $iterations time(s)..." for _ in $(seq 1 "$iterations"); do echo -n "." + # NOTE: Executing a command like `date` takes about 1 ms in Bash, so + # we can't really accurately time operations that are on the order of + # single-digit ms + time_ns_before=$(date +%s%N) adhoc_operation &> /tmp/hasura_bench_adhoc_last_iteration.out + time_ns_after=$(date +%s%N) + latencies_ns+=" $((time_ns_after-time_ns_before))" done echo - time_sec_end=$(date +%s.%N) # N.B. don't time curl - allocated_bytes_end=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.allocated_bytes') + allocated_bytes_after=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.allocated_bytes') + mem_in_use_bytes_after=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.gc.gcdetails_mem_in_use_bytes') + live_bytes_after=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.gc.gcdetails_live_bytes') + + mean_ms=$(echo "$latencies_ns" | jq -s '(add/length)/1000000') + min_ms=$(echo "$latencies_ns" | jq -s 'min/1000000') + max_ms=$(echo "$latencies_ns" | jq -s 'max/1000000') + p50_ms=$(rough_percentile 0.50 | jq -s 'min/1000000') + p90_ms=$(rough_percentile 0.90 | jq -s 'min/1000000') + bytes_allocated_per_request=$(jq -n \("$allocated_bytes_after"-"$allocated_bytes_before"\)/"$iterations") + + echo " Done. For $name, measured $mean_ms ms/op and $bytes_allocated_per_request bytes_allocated/op" + echo "Appending to $PWD/report.json..." + # NOTE: certain metrics that are difficult to collect or that we don't + # care about are set to 0 here, to prevent the graphql-bench web app from + # breaking + # + # Name these "ADHOC-foo" so we can choose to display them differently, + # e.g. in PR regression reports + temp_result_file="$(mktemp)" # ('sponge' unavailable) + jq --arg name "$name" \ + --argjson iterations "$iterations" \ + --argjson p50_ms "$p50_ms" \ + --argjson p90_ms "$p90_ms" \ + --argjson max_ms "$max_ms" \ + --argjson mean_ms "$mean_ms" \ + --argjson min_ms "$min_ms" \ + --argjson bytes_allocated_per_request "$bytes_allocated_per_request" \ + --argjson live_bytes_before "$live_bytes_before" \ + --argjson live_bytes_after "$live_bytes_after" \ + --argjson mem_in_use_bytes_before "$mem_in_use_bytes_before" \ + --argjson mem_in_use_bytes_after "$mem_in_use_bytes_after" \ + '. + + [{ + "name": "ADHOC-\($name)", + "time": { + }, + "requests": { + "count": $iterations, + "average": 0 + }, + "response": { + "totalBytes": 0, + "bytesPerSecond": 0 + }, + "histogram": { + "json": { + "p50": $p50_ms, + "p90": $p90_ms, + "max": $max_ms, + "totalCount": $iterations, + "mean": $mean_ms, + "min": $min_ms, + "stdDeviation": 0 + }, + "parsedStats": [ + ] + }, + "extended_hasura_checks": { + "bytes_allocated_per_request": $bytes_allocated_per_request, + "live_bytes_before": $live_bytes_before, + "live_bytes_after": $live_bytes_after, + "mem_in_use_bytes_before": $mem_in_use_bytes_before, + "mem_in_use_bytes_after": $mem_in_use_bytes_after + } + }]' < report.json > "$temp_result_file" + mv -f "$temp_result_file" report.json - # For now just print stats so they exist in CI logs at least - secs=$(jq -n \("$time_sec_end"-"$time_sec_start"\)/"$iterations") - bytes=$(jq -n \("$allocated_bytes_end"-"$allocated_bytes_start"\)/"$iterations") - echo " Done. For $name, measured $secs sec/op and $bytes bytes_allocated/op" # TODO once GHC issue 21082 fixed: # - collect mutator cpu ns - # - write metrics to CSV file, upload to S3 in fabfile.py - # - integrate with regression report (just compare bytes and mutator cpu time) - # - document new adhoc operations stuff # - add an untrack/track table benchmark to chinook and huge_schema done ) @@ -447,3 +523,8 @@ custom_setup load_data_and_schema run_graphql_benchmarks run_adhoc_operation_benchmarks + +echo_pretty "All done. You can visualize the report using the web app here:" +echo_pretty " https://hasura.github.io/graphql-bench/app/web-app/" +echo_pretty "...and passing it the file we just generated:" +echo_pretty " $BENCH_DIR/report.json" diff --git a/server/benchmarks/fabfile.py b/server/benchmarks/fabfile.py index 5bf3a784ae5..f8f8d51c7c5 100755 --- a/server/benchmarks/fabfile.py +++ b/server/benchmarks/fabfile.py @@ -391,9 +391,9 @@ def generate_regression_report(): warn(f"No results for {benchmark_set_name} found for PR #{merge_base_pr}. Skipping") continue - # A benchmark set may contain no queries (e.g. If it's just using the - # ad hoc operation mode), in which case the results are an empty array. - # Skip in those cases for now + # A benchmark set may contain no queries (e.g. formerly, If it was just + # using the ad hoc operation mode), in which case the results are an + # empty array. Skip in those cases for now if not (this_report and merge_base_report): continue @@ -452,8 +452,6 @@ def generate_regression_report(): pass # NOTE: we decided to omit higher-percentile latencies here since # they are noisy (which might lead to people ignoring benchmarks) - # and there are better ways to view these tail latencies in the works. - # for m in ['min', 'p50', 'p90', 'p97_5']: for m in ['min', 'p50']: try: this_hist = this_bench['histogram']['json'] @@ -538,6 +536,8 @@ def pretty_print_regression_report_github_comment(results, skip_pr_report_names, out( f"{col( )} ") out( f"{col( )} ᐅ {bench_name.replace('-k6-custom','').replace('_',' ')}:") for metric_name, d in metrics.items(): + # For now just report regressions in the stable bytes-allocated metric for adhoc + if bench_name.startswith("ADHOC-") and not metric_name is "bytes_alloc_per_req": continue out(f"{col(d)} {metric_name:<25}: {d:>6.1f}") out( f"{col( )} ") out( f"``` ") # END DIFF SYNTAX