mirror of
https://github.com/hasura/graphql-engine.git
synced 2024-12-15 01:12:56 +03:00
3332189756
See this earlier iteration of this work for an example of the kind of report we're producing: #7664 And related work in this repo: github.com:hasura/graphql-bench-helper PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7923 GitOrigin-RevId: 99d2a55e2fb5b55f3f33e2570cfd0bc23e448e0c
210 lines
8.0 KiB
Bash
Executable File
210 lines
8.0 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
set -euo pipefail
|
||
shopt -s globstar
|
||
|
||
# Allow killing background process by pgid without killing self
|
||
set -m
|
||
|
||
## This is a rough script that helps us quantify the resources required for
|
||
## hasura depending on the schema and the expected load. We expect only to need
|
||
## to run this quarterly or so.
|
||
##
|
||
## see: https://hasurahq.atlassian.net/browse/PR-56
|
||
|
||
echo_pretty() {
|
||
echo ">>> $(tput setaf 2)$1$(tput sgr0)"
|
||
}
|
||
echo_error() {
|
||
echo ">>> $(tput setaf 1)$1$(tput sgr0)"
|
||
}
|
||
echo_warn() {
|
||
echo ">>> $(tput setaf 3)$1$(tput sgr0)"
|
||
}
|
||
|
||
REPO_TOPLEVEL=$(git rev-parse --show-toplevel)
|
||
|
||
MEM_CPU_OUTFILE=$(mktemp)
|
||
ENGINE_OUT_FILE=$(mktemp)
|
||
POSTGRES_OUT_FILE=$(mktemp)
|
||
|
||
# Test between 1 and 3:
|
||
NUM_SERVER_CORES=2
|
||
|
||
echo_warn "Please make sure your computer has at least 8 cores and that you've disabled lower processor sleep states! "
|
||
echo_warn " $ sudo cpupower frequency-set -g performance && sudo cpupower idle-set -D10 # PERFORMANCE "
|
||
sleep 5
|
||
|
||
if ! command -v gblreg &> /dev/null
|
||
then
|
||
echo_error "Install gbutils for regression tool 'gblreg'"
|
||
exit 1
|
||
fi
|
||
|
||
function start_engine {
|
||
# use gnu time to get the memory high watermark
|
||
# Run with -Fd for faster memory reclamation back to baseline
|
||
command time -f "%M %P" -o "$MEM_CPU_OUTFILE" \
|
||
"$REPO_TOPLEVEL/scripts/dev.sh" graphql-engine --optimized -- +RTS -N"$NUM_SERVER_CORES" -Fd0.01 -RTS \
|
||
&> "$ENGINE_OUT_FILE" & GRAPHQL_ENGINE_PID=$!
|
||
|
||
if [ "${1-}" != "no_wait" ]; then
|
||
echo -n "Waiting for graphql-engine (for logs see: $ENGINE_OUT_FILE)"
|
||
|
||
until curl -s "http://127.0.0.1:8181/v1/query" &>/dev/null; do
|
||
echo -n '.' && sleep 0.2
|
||
# If the server stopped abort immediately
|
||
if ! kill -0 $GRAPHQL_ENGINE_PID ; then
|
||
echo_error "The server crashed or failed to start!!"
|
||
exit 42
|
||
fi
|
||
done
|
||
echo " Ok"
|
||
fi
|
||
}
|
||
|
||
function stop_engine {
|
||
PGID=$(ps -o '%r' "$GRAPHQL_ENGINE_PID" | tail -n1 | xargs)
|
||
# echo "PID/PGID: $$ $GRAPHQL_ENGINE_PID $PGID"
|
||
# Send INT to get output from GNU time!:
|
||
kill -INT "-$PGID"
|
||
# kill -- "-$PGID" # ...not this
|
||
wait "$GRAPHQL_ENGINE_PID" || true
|
||
if [ -f "$ENGINE_OUT_FILE" ]; then
|
||
echo_pretty "Productivity of engine just stopped, FYI:"
|
||
until grep '^ Productivity' "$ENGINE_OUT_FILE" ; do
|
||
sleep 1
|
||
done
|
||
fi
|
||
rm -f "$ENGINE_OUT_FILE"
|
||
|
||
}
|
||
function start_postgres {
|
||
echo "Launching postgres (see logs at $POSTGRES_OUT_FILE)"
|
||
"$REPO_TOPLEVEL/scripts/dev.sh" postgres \
|
||
&> "$POSTGRES_OUT_FILE" & POSTGRES_PID=$!
|
||
}
|
||
function stop_postgres {
|
||
PGID=$(ps -o '%r' "$POSTGRES_PID" | tail -n1 | xargs)
|
||
kill -- "-$PGID"
|
||
wait "$POSTGRES_PID" || true
|
||
rm -f "$POSTGRES_OUT_FILE"
|
||
}
|
||
|
||
function cleanup {
|
||
set +e
|
||
echo_pretty "Cleaning up"
|
||
|
||
stop_engine
|
||
stop_postgres
|
||
|
||
rm -f "$MEM_CPU_OUTFILE"
|
||
|
||
echo "Done"
|
||
}
|
||
trap cleanup EXIT
|
||
|
||
# Get a memory high water mark for replace_metadata
|
||
# Must be executed from a benchmark set directory
|
||
function init_and_replace_metadata {
|
||
echo_pretty "Initializing and doing some replace_metadata"
|
||
gunzip -c dump.sql.gz | PGPASSWORD=postgres psql -h 127.0.0.1 -p 25432 postgres -U postgres &>/dev/null
|
||
|
||
# run replace_metadata a few times (once to initialize schema, a few more to get good high water mark)
|
||
curl -X POST -H 'Content-Type: application/json' -d @replace_metadata.json http://127.0.0.1:8181/v1/query
|
||
curl -X POST -H 'Content-Type: application/json' -d @replace_metadata.json http://127.0.0.1:8181/v1/query
|
||
curl -X POST -H 'Content-Type: application/json' -d @replace_metadata.json http://127.0.0.1:8181/v1/query
|
||
echo
|
||
}
|
||
|
||
### Metadata operations and Baseline + peak memory ####################
|
||
|
||
if true; then
|
||
## huge_schema: ########
|
||
start_postgres
|
||
start_engine
|
||
|
||
cd "$REPO_TOPLEVEL/server/benchmarks/benchmark_sets/huge_schema"
|
||
init_and_replace_metadata
|
||
echo_pretty "Sleeping for 30 seconds and then checking for baseline memory usage"
|
||
sleep 30
|
||
MEM_BASELINE_HUGE_SCHEMA=$(ps -e -o pid,ppid,pgid,rss,comm | awk '$3 == '"$GRAPHQL_ENGINE_PID" | grep graphql-engine | awk '{print $4}')
|
||
|
||
stop_engine
|
||
stop_postgres
|
||
echo "sleeping..." && sleep 30 # TODO wait for all in process group
|
||
MEM_HIGHWATER_HUGE_SCHEMA=$(tail -n1 "$MEM_CPU_OUTFILE" | awk '{print $1}')
|
||
|
||
|
||
## chinook: ########
|
||
start_postgres
|
||
start_engine
|
||
|
||
cd "$REPO_TOPLEVEL/server/benchmarks/benchmark_sets/chinook"
|
||
init_and_replace_metadata
|
||
echo_pretty "Sleeping for 30 seconds and then checking for baseline memory usage"
|
||
sleep 30
|
||
MEM_BASELINE_CHINOOK=$(ps -e -o pid,ppid,pgid,rss,comm | awk '$3 == '"$GRAPHQL_ENGINE_PID" | grep graphql-engine | awk '{print $4}')
|
||
|
||
stop_engine
|
||
stop_postgres
|
||
echo "sleeping..." && sleep 30 # TODO wait for all in process group
|
||
MEM_HIGHWATER_CHINOOK=$(tail -n1 "$MEM_CPU_OUTFILE" | awk '{print $1}')
|
||
fi
|
||
|
||
### Throughput limit and Peak memory under load ####################
|
||
|
||
if true; then
|
||
cd "$REPO_TOPLEVEL/server/benchmarks"
|
||
start_engine no_wait
|
||
|
||
./bench.sh chinook_throughput
|
||
|
||
stop_engine
|
||
echo "sleeping..." && sleep 30 # TODO wait for all in process group
|
||
MEM_HIGHWATER_CHINOOK_UNDER_LOAD=$(tail -n1 "$MEM_CPU_OUTFILE" | awk '{print $1}')
|
||
CPU_CHINOOK_UNDER_LOAD=$(tail -n1 "$MEM_CPU_OUTFILE" | awk '{print $2}')
|
||
fi
|
||
|
||
set +e
|
||
echo_pretty "####################### RAW MEASUREMENTS ###########################"
|
||
echo_pretty ""
|
||
echo_pretty "Memory usage in KB:"
|
||
(echo "| SCHEMA_BASELINE REPLACE_METADATA_PEAK UNDER_LOAD_PEAK" ;\
|
||
echo "huge_schema $MEM_BASELINE_HUGE_SCHEMA $MEM_HIGHWATER_HUGE_SCHEMA N/A" ;\
|
||
echo "chinook $MEM_BASELINE_CHINOOK $MEM_HIGHWATER_CHINOOK $MEM_HIGHWATER_CHINOOK_UNDER_LOAD") |\
|
||
column --table -R1,2,3,4
|
||
echo_pretty ""
|
||
echo_pretty "Avg CPU During Chinook throughput tests: $CPU_CHINOOK_UNDER_LOAD "
|
||
echo "NOTE: The utility of the script relies on the assumption that the throughput "
|
||
echo " tests here are mostly CPU bound. we want the value above to be between 150% and "
|
||
echo " ${NUM_SERVER_CORES}00% (using all $NUM_SERVER_CORES cores allotted to server)"
|
||
echo " FYI: complex_query_high_load_large_result appears to be IO bound, "
|
||
echo " with the server at only 100% CPU (on two cores)"
|
||
echo_pretty ""
|
||
# TODO add uncompressed response body sizes here:
|
||
echo_pretty "Peak sustained throughput for our Chinook queries having different uncompressed response body sizes (server given $NUM_SERVER_CORES cores)"
|
||
paste -d ' ' <(echo -e "simple_query_high_load(600B): \n complex_query_high_load_small_result(650B): \n complex_query_high_load_large_result(33KB): \n full_introspection(190KB):") \
|
||
<(jq '.[] .requests.average |floor' "$REPO_TOPLEVEL/server/benchmarks/benchmark_sets/chinook_throughput/report.json" ) \
|
||
<(echo -e "RPS\nRPS\nRPS\nRPS") |\
|
||
column --table
|
||
echo_pretty ""
|
||
echo_pretty "####################### INTERPRETATION ###########################"
|
||
CHINOOK_PEAK_MEM=$(( MEM_HIGHWATER_CHINOOK_UNDER_LOAD > MEM_HIGHWATER_CHINOOK ? MEM_HIGHWATER_CHINOOK_UNDER_LOAD : MEM_HIGHWATER_CHINOOK ))
|
||
CHINOOK_MEM_SCALE=$(bc -l <<< "scale=1; $CHINOOK_PEAK_MEM/$MEM_BASELINE_CHINOOK")
|
||
HUGE_SCHEMA_MEM_SCALE=$(bc -l <<< "scale=1; $MEM_HIGHWATER_HUGE_SCHEMA/$MEM_BASELINE_HUGE_SCHEMA")
|
||
echo_pretty "Under peak sustained throughput and with some replace_metadata, peak memory usage is typically between..."
|
||
echo_pretty " ${CHINOOK_MEM_SCALE}x and ${HUGE_SCHEMA_MEM_SCALE}x "
|
||
echo_pretty "...above the idle baseline (i.e. the schema overhead)"
|
||
|
||
# TODO automate this
|
||
echo_warn ""
|
||
echo_warn "ABOVE WAS RUN WITH SERVER ALLOCATED < $NUM_SERVER_CORES > CORES."
|
||
echo_warn ""
|
||
echo_warn "Rerun this with one, two and three cores ( this is about the limit you can do on an"
|
||
echo_warn "8 core laptop and still get meaningful numbers). Run a linear regression for each:"
|
||
echo_warn ' $ echo "1 2266\\n2 3587\\n 3 5270" | gblreg'
|
||
echo_warn ' 7.036667e+02 1.502000e+03'
|
||
echo_warn ' A^ B^ in: PEAK_THROUGHPUT=A+B*SERVER_CORES'
|
||
|
||
echo_pretty "Done. Shutting down"
|