#!/usr/bin/env bash set -euo pipefail shopt -s globstar # NOTE: we want to use --network=host everywhere we use docker here, for # performance (supposedly). We include X:X port mappings which will be used on # Mac and Windows(?) where --network=host is ignored. echo_pretty() { echo ">>> $(tput setaf 2)$1$(tput sgr0)" } die_usage() { cat < [] [] The first argument chooses the particular benchmark set to run e.g. "chinook" or "big_schema" (these correspond to directories under 'benchmark_sets/'). The second optional argument is the docker image name to test. e.g. "hasura/graphql-engine:latest" If omitted we'll look for a hasura instance launched with 'dev.sh graphql-engine'. -------------------------=========########========------------------------- EOL exit 1 } # Dependencies: if ! command -v jq &> /dev/null then echo "Please install 'jq'" >&2; exit 1 fi { [ -z "${1-}" ] || [ ! -d "benchmark_sets/${1-}" ]; } && die_usage BENCH_DIR="$(pwd)/benchmark_sets/$1" REQUESTED_HASURA_DOCKER_IMAGE="${2-}" # We may wish to sleep after setting up the schema, etc. to e.g. allow memory # to settle to a baseline before we measure it: if [ -z "${3-}" ]; then POST_SETUP_SLEEP_TIME=0 else POST_SETUP_SLEEP_TIME="$3" fi # Make sure we clean up, even if something goes wrong: function cleanup { if [ -n "${HASURA_CONTAINER_NAME-}" ]; then echo_pretty "Stopping and removing hasura container" docker stop "$HASURA_CONTAINER_NAME" && docker rm "$HASURA_CONTAINER_NAME" \ || echo "Stopping hasura failed, maybe it never started?" fi pg_cleanup || echo "Stopping postgres failed, maybe it never started?" custom_cleanup || echo "Custom cleanup failed" } trap cleanup EXIT # How can we communicate with localhost from a container? if [ "$(uname -s)" = Darwin ]; then # Docker for mac: LOCALHOST_FROM_CONTAINER=host.docker.internal DOCKER_NETWORK_HOST_MODE="" else LOCALHOST_FROM_CONTAINER=127.0.0.1 DOCKER_NETWORK_HOST_MODE="--network=host" fi # The beefy c4.8xlarge EC2 instance has two sockets, so we'll try our best to # pin hasura on one and postgres on the other if taskset -c 17 sleep 0 ; then echo_pretty "CPUs? Running on a beefy CI machine" TASKSET_HASURA="taskset -c 0-6" # SOCKET/NUMA_NODE 0 TASKSET_K6="taskset -c 7,8" # SOCKET/NUMA_NODE 0 TASKSET_PG="taskset -c 9-17" # SOCKET/NUMA_NODE 1 HASURA_RTS="-qa -N7" # This is a sort of hack to, on CI (where circleci doesn't handle control # characters properly), force K6 to not print progress bar updates. But note # that the fabric script that calls this script will also fail if this is not # run with a pTTY... # This is still not great because K6 spits out progress every second or so. # TODO maybe combine this stuff into a single are-we-on-ci check / env var K6_DOCKER_tty_OR_init="--init" else echo_pretty "CPUs? Running on a puny local machine" TASKSET_HASURA="" TASKSET_PG="" TASKSET_K6="" HASURA_RTS="" K6_DOCKER_tty_OR_init="--tty" fi ################## # Postgres # ################## # FYI this is adapted from scripts/containers/postgres, and uses settings # (ports, passwords, etc) identical to `dev.sh postgres` for compatibility # with `dev.sh graphql-engine` PG_PORT=25432 PG_PASSWORD=postgres PG_CONTAINER_NAME="hasura-dev-postgres-$PG_PORT" PG_DB_URL="postgres://postgres:$PG_PASSWORD@$LOCALHOST_FROM_CONTAINER:$PG_PORT/postgres" PSQL_DOCKER="docker exec -u postgres -i $PG_CONTAINER_NAME psql $PG_DB_URL" if [ "$(awk '/^MemTotal:/{print $2}' /proc/meminfo)" -ge "30000000" ]; then echo_pretty "RAM? Running on a beefy CI machine" # These are the suggested values from https://pgtune.leopard.in.ua/#/ # using the parameters of c4.8xlarge, divided by two (since hasura is running # on the same instance): 9 cores and 30GB RAM, as "web application". # # NOTE: no spaces here or this will break CONF=$(cat <<-EOF shared_buffers=7680MB effective_cache_size=23040MB maintenance_work_mem=1920MB checkpoint_completion_target=0.9 wal_buffers=16MB default_statistics_target=100 random_page_cost=1.1 effective_io_concurrency=200 work_mem=19660kB min_wal_size=1GB max_wal_size=4GB max_worker_processes=9 max_parallel_workers_per_gather=4 max_parallel_workers=9 max_parallel_maintenance_workers=4 port=$PG_PORT EOF ) # otherwise just use a configuration assuming 8GB RAM local dev machine: else echo_pretty "RAM? Running on a puny local machine" CONF=$(cat <<-EOF max_connections=50 shared_buffers=1GB effective_cache_size=3GB maintenance_work_mem=256MB checkpoint_completion_target=0.9 wal_buffers=16MB default_statistics_target=100 random_page_cost=1.1 effective_io_concurrency=200 work_mem=20971kB min_wal_size=1GB max_wal_size=4GB max_worker_processes=2 max_parallel_workers_per_gather=1 max_parallel_workers=2 max_parallel_maintenance_workers=1 port=$PG_PORT EOF ) fi # log lines above as -c flag arguments we pass to postgres CONF_FLAGS=$(echo "$CONF" | sed -e 's/^/-c /' | tr '\n' ' ') # NOTE: after some consideration we decided to serve postgres from ramdisk # here. A few reasons: # # - EBS is incredibly finicky and difficult to provision correctly[1]; we # could easily add a new benchmark which exhausts our IOPS and causes # confusing regression-like results # - SQL-gen regressions should still show up as regressions if we're backed by # tmpfs; only perhaps the magnitidue would change. We also expected PG to be # doing significant in-memory caching on the small datasets here. # - There is some evidence[2] that ramdisk is actually a decent approximation of # the performance of a perfectly-tuned durable PG instance (i.e. the latency # numbers we get here are useful in absolute terms as well, representing ideal # performance) # # [1]: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSPerformance.html # [2]: https://performance.sunlight.io/postgres/ function pg_launch_container(){ echo_pretty "Launching postgres container: $PG_CONTAINER_NAME" # `TASKSET_PG`, `DOCKER_HOST_MODE`, and `CONF_FLAGS` depend on # word-splitting. # # cf. https://github.com/koalaman/shellcheck/wiki/Sc2086 # # shellcheck disable=SC2086 $TASKSET_PG docker run \ --detach \ --name "$PG_CONTAINER_NAME" \ --mount type=tmpfs,destination=/var/lib/postgresql/data \ --publish 127.0.0.1:"$PG_PORT":"$PG_PORT" \ --expose="$PG_PORT" \ --env POSTGRES_PASSWORD="$PG_PASSWORD" \ $DOCKER_NETWORK_HOST_MODE \ circleci/postgres:11.5-alpine-postgis \ $CONF_FLAGS } function pg_wait() { echo -n "Waiting for postgres to come up" until ( $PSQL_DOCKER -c '\l' ) &>/dev/null; do echo -n '.' && sleep 0.2 done echo " Ok" } function pg_cleanup(){ echo_pretty "Removing $PG_CONTAINER_NAME and its volumes" docker stop "$PG_CONTAINER_NAME" docker rm --volumes "$PG_CONTAINER_NAME" } ###################### # graphql-engine # ###################### # This matches the default we use in `dev.sh graphql-engine` HASURA_GRAPHQL_SERVER_PORT=8181 # For Mac compatibility, we need to use this URL for hasura when communicating # FROM a container (in this case graphql-bench): HASURA_URL_FROM_CONTAINER="http://$LOCALHOST_FROM_CONTAINER:$HASURA_GRAPHQL_SERVER_PORT" # ...and for anything outside a container, just: export HASURA_URL="http://127.0.0.1:$HASURA_GRAPHQL_SERVER_PORT" # Maybe launch the hasura instance we'll benchmark function maybe_launch_hasura_container() { if [ -n "$REQUESTED_HASURA_DOCKER_IMAGE" ]; then HASURA_CONTAINER_NAME="graphql-engine-to-benchmark" # `TASKSET_HASURA`, `$DOCKER_NETWORK_HOST_MODE`, and `HASURA_RTS` depend on # word-splitting. # # cf. https://github.com/koalaman/shellcheck/wiki/Sc2086 # # shellcheck disable=SC2086 $TASKSET_HASURA docker run \ --detach \ --name "$HASURA_CONTAINER_NAME" \ --publish 127.0.0.1:"$HASURA_GRAPHQL_SERVER_PORT":"$HASURA_GRAPHQL_SERVER_PORT" \ --env HASURA_GRAPHQL_DATABASE_URL="$PG_DB_URL" \ --env HASURA_GRAPHQL_ENABLE_CONSOLE=true \ --env HASURA_GRAPHQL_SERVER_PORT="$HASURA_GRAPHQL_SERVER_PORT" \ $DOCKER_NETWORK_HOST_MODE \ "$REQUESTED_HASURA_DOCKER_IMAGE" \ graphql-engine serve \ +RTS -T $HASURA_RTS -RTS # ^^^ We run with `+RTS -T` to expose the /dev/rts_stats endpoint for # inspecting memory usage stats else echo_pretty "We'll benchmark the hasura instance at port $HASURA_GRAPHQL_SERVER_PORT" fi } function hasura_wait() { # Wait for the graphql-engine under bench to be ready echo -n "Waiting for graphql-engine at $HASURA_URL" if [ -z "$REQUESTED_HASURA_DOCKER_IMAGE" ]; then echo -n " (e.g. from 'dev.sh graphql-engine')" fi until curl -s "$HASURA_URL/v1/query" &>/dev/null; do echo -n '.' && sleep 0.2 done echo "" echo " Ok" echo -n "Sleeping for an additional $POST_SETUP_SLEEP_TIME seconds as requested... " sleep "$POST_SETUP_SLEEP_TIME" echo " Ok" } ##################### # graphql-bench # ##################### # We want to always use the latest graphql-bench. Installing is idempotent and # fairly speedy the second time, if no changes. function install_latest_graphql_bench() { echo_pretty "Installing/updating graphql-bench" graphql_bench_git=$(mktemp -d -t graphql-bench-XXXXXXXXXX) git clone --depth=1 https://github.com/hasura/graphql-bench.git "$graphql_bench_git" cd "$graphql_bench_git" # We name this 'graphql-bench-ci' so it doesn't interfere with other versions # (e.g. local dev of `graphql-bench`, installed with `make # build_local_docker_image`: docker build -t graphql-bench-ci:latest ./app cd - echo_pretty "Done" } # graphql-bench -powered benchmarks function run_graphql_benchmarks() { echo_pretty "Starting graphql benchmarks" cd "$BENCH_DIR" # This reads config.query.yaml from the current directory, outputting # report.json to the same directory $TASKSET_K6 docker run \ --interactive \ --volume "$PWD":/app/tmp \ $DOCKER_NETWORK_HOST_MODE \ $K6_DOCKER_tty_OR_init \ graphql-bench-ci query \ --config="./tmp/config.query.yaml" \ --outfile="./tmp/report.json" \ --url "$HASURA_URL_FROM_CONTAINER/v1/graphql" echo_pretty "Done. Report at $PWD/report.json" cd - } # adhoc script-based benchmarks (more flexible, but less reporting). function run_adhoc_operation_benchmarks() ( cd "$BENCH_DIR" # Filenames should look like .sh reg=".*\.sh" # Collect any adhoc operations, else skip silently scripts=$(find "adhoc_operations" -maxdepth 1 -executable -type f -regextype sed -regex "$reg" 2>/dev/null ) || return 0 [ -z "$scripts" ] && return 0 echo_pretty "Running adhoc operations for $BENCH_DIR..." # sanity check: if [ ! -f report.json ]; then echo "Bug: Somehow a report from graphql-engine isn't present! Exiting" exit 43 fi # NOTE: This loops over each word in `$scripts` with globbing. for script in $scripts; do # The script must define a function named "adhoc_operation" which we # execute multiple times below. This gives more flexibility and is maybe # faster. It also must define 'iterations' indicating the number of # iterations to execute here. unset -f adhoc_operation unset iterations # shellcheck disable=SC1090 . "$script" if [[ $(type -t adhoc_operation) != function ]]; then echo "Error: $script must define a function named 'adhoc_operation'! Exiting." >&2; exit 1 fi # shellcheck disable=SC2154 if ! [[ "$iterations" =~ ^[0-9]+$ ]] ; then echo "Error: $script must define 'iterations'" >&2; exit 1 fi # TODO I was relying on being able to also get 'mutator_cpu_ns' to get a # stable metric of CPU usage (like counting instructions with 'perf'). # Unfortunately that metric is fubar # (https://gitlab.haskell.org/ghc/ghc/-/issues/21082). Trying to use perf # will be a pain, might require root, means this script won't work over a # network, etc... live_bytes_before=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.gc.gcdetails_live_bytes') mem_in_use_bytes_before=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.gc.gcdetails_mem_in_use_bytes') allocated_bytes_before=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.allocated_bytes') IFS="./" read -r -a script_name_parts <<< "$script" name=${script_name_parts[1]} # Space separated list of nanosecond timings latencies_ns="" # Select an item from latencies_ns at the percentile requested, this # never takes the mean of two samples. rough_percentile() { echo "$latencies_ns" |\ tr ' ' '\n' | sort -n |\ awk '{all[NR] = $0} END{print all[int(NR*'"$1"'+0.5)]}' # ^^^ select a value based on line number, rounding up } echo -n "Running $name $iterations time(s)..." for _ in $(seq 1 "$iterations"); do echo -n "." # NOTE: Executing a command like `date` takes about 1 ms in Bash, so # we can't really accurately time operations that are on the order of # single-digit ms time_ns_before=$(date +%s%N) adhoc_operation &> /tmp/hasura_bench_adhoc_last_iteration.out time_ns_after=$(date +%s%N) latencies_ns+=" $((time_ns_after-time_ns_before))" done echo allocated_bytes_after=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.allocated_bytes') mem_in_use_bytes_after=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.gc.gcdetails_mem_in_use_bytes') live_bytes_after=$(curl "$HASURA_URL/dev/rts_stats" 2>/dev/null | jq '.gc.gcdetails_live_bytes') mean_ms=$(echo "$latencies_ns" | jq -s '(add/length)/1000000') min_ms=$(echo "$latencies_ns" | jq -s 'min/1000000') max_ms=$(echo "$latencies_ns" | jq -s 'max/1000000') p50_ms=$(rough_percentile 0.50 | jq -s 'min/1000000') p90_ms=$(rough_percentile 0.90 | jq -s 'min/1000000') bytes_allocated_per_request=$(jq -n $"$allocated_bytes_after"-"$allocated_bytes_before"$/"$iterations") echo " Done. For $name, measured $mean_ms ms/op and $bytes_allocated_per_request bytes_allocated/op" echo "Appending to $PWD/report.json..." # NOTE: certain metrics that are difficult to collect or that we don't # care about are set to 0 here, to prevent the graphql-bench web app from # breaking # # Name these "ADHOC-foo" so we can choose to display them differently, # e.g. in PR regression reports temp_result_file="$(mktemp)" # ('sponge' unavailable) jq --arg name "$name" \ --argjson iterations "$iterations" \ --argjson p50_ms "$p50_ms" \ --argjson p90_ms "$p90_ms" \ --argjson max_ms "$max_ms" \ --argjson mean_ms "$mean_ms" \ --argjson min_ms "$min_ms" \ --argjson bytes_allocated_per_request "$bytes_allocated_per_request" \ --argjson live_bytes_before "$live_bytes_before" \ --argjson live_bytes_after "$live_bytes_after" \ --argjson mem_in_use_bytes_before "$mem_in_use_bytes_before" \ --argjson mem_in_use_bytes_after "$mem_in_use_bytes_after" \ '. + [{ "name": "ADHOC-\($name)", "time": { }, "requests": { "count": $iterations, "average": 0 }, "response": { "totalBytes": 0, "bytesPerSecond": 0 }, "histogram": { "json": { "p50": $p50_ms, "p90": $p90_ms, "max": $max_ms, "totalCount": $iterations, "mean": $mean_ms, "min": $min_ms, "stdDeviation": 0 }, "parsedStats": [ ] }, "extended_hasura_checks": { "bytes_allocated_per_request": $bytes_allocated_per_request, "live_bytes_before": $live_bytes_before, "live_bytes_after": $live_bytes_after, "mem_in_use_bytes_before": $mem_in_use_bytes_before, "mem_in_use_bytes_after": $mem_in_use_bytes_after } }]' < report.json > "$temp_result_file" mv -f "$temp_result_file" report.json # TODO once GHC issue 21082 fixed: # - collect mutator cpu ns # - add an untrack/track table benchmark to chinook and huge_schema done ) function custom_setup() { cd "$BENCH_DIR" if [ -x setup.sh ]; then echo_pretty "Running custom setup script" ./setup.sh fi cd - } function custom_cleanup() { cd "$BENCH_DIR" if [ -x cleanup.sh ]; then echo_pretty "Running custom cleanup script" ./cleanup.sh fi cd - } function load_data_and_schema() { echo_pretty "Loading data and adding schema" cd "$BENCH_DIR" if [ -f dump.sql.gz ]; then gunzip -c dump.sql.gz | $PSQL_DOCKER &> /dev/null else echo_pretty "No data to load" fi if [ -f replace_metadata.json ]; then # --fail-with-body is what we want, but is not available on older curl: # TODO LATER: use /v1/metadata once stable curl \ --fail \ --request POST \ --header "Content-Type: application/json" \ --data @replace_metadata.json \ "$HASURA_URL/v1/query" else echo_pretty "No metadata to replace" fi cd - } ################################## # bringing it all together... # ################################## # Start this ahead of time... pg_launch_container # meanwhile... install_latest_graphql_bench # Wait for pg, then bring up hasura if needed pg_wait maybe_launch_hasura_container hasura_wait custom_setup load_data_and_schema run_graphql_benchmarks run_adhoc_operation_benchmarks echo_pretty "All done. You can visualize the report using the web app here:" echo_pretty " https://hasura.github.io/graphql-bench/app/web-app/" echo_pretty "...and passing it the file we just generated:" echo_pretty " $BENCH_DIR/report.json"