From 6e578f229a5a70fcf1264b437633d021a8f94249 Mon Sep 17 00:00:00 2001
From: Brandon Simmons <brandon@hasura.io>
Date: Thu, 1 Sep 2022 16:46:29 -0400
Subject: [PATCH] server/ci/benchmarks: incorporate ad-hoc queries into
 benchmark reports

PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5704
GitOrigin-RevId: e09406170e1245ceeb3be51f498159f76af4f4bf
---
 server/benchmarks/README.md  |   5 +-
 server/benchmarks/bench.sh   | 123 +++++++++++++++++++++++++++++------
 server/benchmarks/fabfile.py |  10 +--
 3 files changed, 111 insertions(+), 27 deletions(-)

diff --git a/server/benchmarks/README.md b/server/benchmarks/README.md
index 728b6e15aae..df1ec40291a 100644
--- a/server/benchmarks/README.md
+++ b/server/benchmarks/README.md
@@ -28,7 +28,10 @@ the server, where you don't care about concurrent requests and where the work
 is CPU-bound (because in the future reporting will highlight stable metrics
 like CPU mutator time and allocations).
 
-**This is currently a WIP and not surfaced in reports**
+Because of the overhead of timing things in bash you probably don't want to
+rely on the wallclock latency numbers for queries that are faster than 100 ms.
+These numbers may also be noisier since we're running single-threaded and so
+taking fewer samples.
 
 ### bench.sh
 
diff --git a/server/benchmarks/bench.sh b/server/benchmarks/bench.sh
index 76512b20681..fddf13afa45 100755
--- a/server/benchmarks/bench.sh
+++ b/server/benchmarks/bench.sh
@@ -322,6 +322,12 @@ function run_adhoc_operation_benchmarks() (
 
   echo_pretty "Running adhoc operations for $BENCH_DIR..."
 
+  # sanity check:
+  if [ ! -f report.json ]; then
+      echo "Bug: Somehow a report from graphql-engine isn't present! Exiting"
+      exit 43
+  fi
+
   # NOTE: This loops over each word in `$scripts` with globbing.
   for script in $scripts; do
       # The script must define a function named "adhoc_operation" which we
@@ -330,18 +336,13 @@ function run_adhoc_operation_benchmarks() (
       # iterations to execute here.
       unset -f adhoc_operation
       unset iterations
-      # NOTE: Shellcheck is not able to source these scripts.
-      #
-      # cf. https://github.com/koalaman/shellcheck/wiki/SC1090
-      #     https://github.com/koalaman/shellcheck/wiki/SC1091
-      #
+
       # shellcheck disable=SC1090
       . "$script"
       if [[ $(type -t adhoc_operation) != function ]]; then
           echo "Error: $script must define a function named 'adhoc_operation'! Exiting." >&2; exit 1
       fi
-      # NOTE: See the comment on `adhoc_operation` above.
-      #
+
       # shellcheck disable=SC2154
       if ! [[ "$iterations" =~ ^[0-9]+$ ]] ; then
           echo "Error: $script must define 'iterations'" >&2; exit 1
@@ -352,33 +353,108 @@ function run_adhoc_operation_benchmarks() (
       # Unfortunately that metric is fubar
       # (https://gitlab.haskell.org/ghc/ghc/-/issues/21082). Trying to use perf
       # will be a pain, might require root, means this script won't work over a
-      # network, etc... For now we'll just collect allocated_bytes and noisey
-      # wall-clock time
-      allocated_bytes_start=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.allocated_bytes')
-      time_sec_start=$(date +%s.%N)  # N.B. don't time curl
-
+      # network, etc...
+      live_bytes_before=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.gc.gcdetails_live_bytes')
+      mem_in_use_bytes_before=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.gc.gcdetails_mem_in_use_bytes')
+      allocated_bytes_before=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.allocated_bytes')
+ 
       IFS="./" read -r -a script_name_parts <<< "$script"
       name=${script_name_parts[1]}
 
+      # Space separated list of nanosecond timings
+      latencies_ns=""
+      # Select an item from latencies_ns at the percentile requested, this
+      # never takes the mean of two samples.
+      rough_percentile() {
+          echo "$latencies_ns" |\
+              tr ' ' '\n' | sort -n |\
+              awk '{all[NR] = $0} END{print all[int(NR*'"$1"'+0.5)]}' 
+              # ^^^ select a value based on line number, rounding up
+      }
+
       echo -n "Running $name $iterations time(s)..."
       for _ in $(seq 1 "$iterations"); do
          echo -n "."
+         # NOTE: Executing a command like `date` takes about 1 ms in Bash, so
+         # we can't really accurately time operations that are on the order of
+         # single-digit ms
+         time_ns_before=$(date +%s%N)
          adhoc_operation &> /tmp/hasura_bench_adhoc_last_iteration.out
+         time_ns_after=$(date +%s%N)
+         latencies_ns+=" $((time_ns_after-time_ns_before))"
       done
       echo
 
-      time_sec_end=$(date +%s.%N)  # N.B. don't time curl
-      allocated_bytes_end=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.allocated_bytes')
+      allocated_bytes_after=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.allocated_bytes')
+      mem_in_use_bytes_after=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.gc.gcdetails_mem_in_use_bytes')
+      live_bytes_after=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.gc.gcdetails_live_bytes')
+
+      mean_ms=$(echo "$latencies_ns" | jq -s '(add/length)/1000000')
+      min_ms=$(echo "$latencies_ns"  | jq -s 'min/1000000')
+      max_ms=$(echo "$latencies_ns"  | jq -s 'max/1000000')
+      p50_ms=$(rough_percentile 0.50 | jq -s 'min/1000000')
+      p90_ms=$(rough_percentile 0.90 | jq -s 'min/1000000')
+      bytes_allocated_per_request=$(jq -n \("$allocated_bytes_after"-"$allocated_bytes_before"\)/"$iterations")
+
+      echo " Done. For $name, measured $mean_ms ms/op and $bytes_allocated_per_request bytes_allocated/op"
+      echo "Appending to $PWD/report.json..."
+      # NOTE: certain metrics that are difficult to collect or that we don't
+      # care about are set to 0 here, to prevent the graphql-bench web app from
+      # breaking 
+      #
+      # Name these "ADHOC-foo" so we can choose to display them differently,
+      # e.g. in PR regression reports
+      temp_result_file="$(mktemp)"  # ('sponge' unavailable)
+      jq --arg name "$name" \
+         --argjson iterations "$iterations" \
+         --argjson p50_ms "$p50_ms" \
+         --argjson p90_ms "$p90_ms" \
+         --argjson max_ms "$max_ms" \
+         --argjson mean_ms "$mean_ms" \
+         --argjson min_ms "$min_ms" \
+         --argjson bytes_allocated_per_request "$bytes_allocated_per_request" \
+         --argjson live_bytes_before "$live_bytes_before" \
+         --argjson live_bytes_after "$live_bytes_after" \
+         --argjson mem_in_use_bytes_before "$mem_in_use_bytes_before" \
+         --argjson mem_in_use_bytes_after "$mem_in_use_bytes_after" \
+         '. +
+          [{
+            "name": "ADHOC-\($name)",
+            "time": {
+            },
+            "requests": {
+              "count": $iterations,
+              "average": 0
+            },
+            "response": {
+              "totalBytes": 0,
+              "bytesPerSecond": 0
+            },
+            "histogram": {
+              "json": {
+                "p50": $p50_ms,
+                "p90": $p90_ms,
+                "max": $max_ms,
+                "totalCount": $iterations,
+                "mean": $mean_ms,
+                "min": $min_ms,
+                "stdDeviation": 0
+              },
+              "parsedStats": [
+              ]
+            },
+            "extended_hasura_checks": {
+              "bytes_allocated_per_request": $bytes_allocated_per_request,
+              "live_bytes_before": $live_bytes_before,
+              "live_bytes_after": $live_bytes_after,
+              "mem_in_use_bytes_before": $mem_in_use_bytes_before,
+              "mem_in_use_bytes_after": $mem_in_use_bytes_after
+            }
+          }]' < report.json > "$temp_result_file"
+      mv -f "$temp_result_file" report.json
 
-      # For now just print stats so they exist in CI logs at least
-      secs=$(jq -n \("$time_sec_end"-"$time_sec_start"\)/"$iterations")
-      bytes=$(jq -n \("$allocated_bytes_end"-"$allocated_bytes_start"\)/"$iterations")
-      echo " Done. For $name, measured $secs sec/op and $bytes bytes_allocated/op"
       # TODO once GHC issue 21082 fixed:
       #  - collect mutator cpu ns
-      #  - write metrics to CSV file, upload to S3 in fabfile.py
-      #  - integrate with regression report (just compare bytes and mutator cpu time)
-      #  - document new adhoc operations stuff
       #  - add an untrack/track table benchmark to chinook and huge_schema
   done
 )
@@ -447,3 +523,8 @@ custom_setup
 load_data_and_schema
 run_graphql_benchmarks
 run_adhoc_operation_benchmarks
+
+echo_pretty "All done. You can visualize the report using the web app here:"
+echo_pretty "    https://hasura.github.io/graphql-bench/app/web-app/"
+echo_pretty "...and passing it the file we just generated:"
+echo_pretty "    $BENCH_DIR/report.json"
diff --git a/server/benchmarks/fabfile.py b/server/benchmarks/fabfile.py
index 5bf3a784ae5..f8f8d51c7c5 100755
--- a/server/benchmarks/fabfile.py
+++ b/server/benchmarks/fabfile.py
@@ -391,9 +391,9 @@ def generate_regression_report():
             warn(f"No results for {benchmark_set_name} found for PR #{merge_base_pr}. Skipping")
             continue
 
-        # A benchmark set may contain no queries (e.g. If it's just using the
-        # ad hoc operation mode), in which case the results are an empty array.
-        # Skip in those cases for now
+        # A benchmark set may contain no queries (e.g. formerly, If it was just
+        # using the ad hoc operation mode), in which case the results are an
+        # empty array.  Skip in those cases for now
         if not (this_report and merge_base_report):
             continue
 
@@ -452,8 +452,6 @@ def generate_regression_report():
                 pass
             # NOTE: we decided to omit higher-percentile latencies here since
             # they are noisy (which might lead to people ignoring benchmarks)
-            # and there are better ways to view these tail latencies in the works.
-          # for m in ['min', 'p50', 'p90', 'p97_5']:
             for m in ['min', 'p50']:
                 try:
                     this_hist = this_bench['histogram']['json']
@@ -538,6 +536,8 @@ def pretty_print_regression_report_github_comment(results, skip_pr_report_names,
             out(    f"{col( )}                                       ")
             out(    f"{col( )}    ᐅ {bench_name.replace('-k6-custom','').replace('_',' ')}:")
             for metric_name, d in metrics.items():
+                # For now just report regressions in the stable bytes-allocated metric for adhoc
+                if bench_name.startswith("ADHOC-") and not metric_name is "bytes_alloc_per_req": continue
                 out(f"{col(d)}        {metric_name:<25}:  {d:>6.1f}")
         out(        f"{col( )}                                       ")
     out(            f"```                                            ")  # END DIFF SYNTAX