mirror of
https://github.com/hasura/graphql-engine.git
synced 2025-01-05 22:34:22 +03:00
210 lines
8.0 KiB
Bash
210 lines
8.0 KiB
Bash
|
#!/usr/bin/env bash
|
|||
|
set -euo pipefail
|
|||
|
shopt -s globstar
|
|||
|
|
|||
|
# Allow killing background process by pgid without killing self
|
|||
|
set -m
|
|||
|
|
|||
|
## This is a rough script that helps us quantify the resources required for
|
|||
|
## hasura depending on the schema and the expected load. We expect only to need
|
|||
|
## to run this quarterly or so.
|
|||
|
##
|
|||
|
## see: https://hasurahq.atlassian.net/browse/PR-56
|
|||
|
|
|||
|
echo_pretty() {
|
|||
|
echo ">>> $(tput setaf 2)$1$(tput sgr0)"
|
|||
|
}
|
|||
|
echo_error() {
|
|||
|
echo ">>> $(tput setaf 1)$1$(tput sgr0)"
|
|||
|
}
|
|||
|
echo_warn() {
|
|||
|
echo ">>> $(tput setaf 3)$1$(tput sgr0)"
|
|||
|
}
|
|||
|
|
|||
|
REPO_TOPLEVEL=$(git rev-parse --show-toplevel)
|
|||
|
|
|||
|
MEM_CPU_OUTFILE=$(mktemp)
|
|||
|
ENGINE_OUT_FILE=$(mktemp)
|
|||
|
POSTGRES_OUT_FILE=$(mktemp)
|
|||
|
|
|||
|
# Test between 1 and 3:
|
|||
|
NUM_SERVER_CORES=2
|
|||
|
|
|||
|
echo_warn "Please make sure your computer has at least 8 cores and that you've disabled lower processor sleep states! "
|
|||
|
echo_warn " $ sudo cpupower frequency-set -g performance && sudo cpupower idle-set -D10 # PERFORMANCE "
|
|||
|
sleep 5
|
|||
|
|
|||
|
if ! command -v gblreg &> /dev/null
|
|||
|
then
|
|||
|
echo_error "Install gbutils for regression tool 'gblreg'"
|
|||
|
exit 1
|
|||
|
fi
|
|||
|
|
|||
|
function start_engine {
|
|||
|
# use gnu time to get the memory high watermark
|
|||
|
# Run with -Fd for faster memory reclamation back to baseline
|
|||
|
command time -f "%M %P" -o "$MEM_CPU_OUTFILE" \
|
|||
|
"$REPO_TOPLEVEL/scripts/dev.sh" graphql-engine --optimized -- +RTS -N"$NUM_SERVER_CORES" -Fd0.01 -RTS \
|
|||
|
&> "$ENGINE_OUT_FILE" & GRAPHQL_ENGINE_PID=$!
|
|||
|
|
|||
|
if [ "${1-}" != "no_wait" ]; then
|
|||
|
echo -n "Waiting for graphql-engine (for logs see: $ENGINE_OUT_FILE)"
|
|||
|
|
|||
|
until curl -s "http://127.0.0.1:8181/v1/query" &>/dev/null; do
|
|||
|
echo -n '.' && sleep 0.2
|
|||
|
# If the server stopped abort immediately
|
|||
|
if ! kill -0 $GRAPHQL_ENGINE_PID ; then
|
|||
|
echo_error "The server crashed or failed to start!!"
|
|||
|
exit 42
|
|||
|
fi
|
|||
|
done
|
|||
|
echo " Ok"
|
|||
|
fi
|
|||
|
}
|
|||
|
|
|||
|
function stop_engine {
|
|||
|
PGID=$(ps -o '%r' "$GRAPHQL_ENGINE_PID" | tail -n1 | xargs)
|
|||
|
# echo "PID/PGID: $$ $GRAPHQL_ENGINE_PID $PGID"
|
|||
|
# Send INT to get output from GNU time!:
|
|||
|
kill -INT "-$PGID"
|
|||
|
# kill -- "-$PGID" # ...not this
|
|||
|
wait "$GRAPHQL_ENGINE_PID" || true
|
|||
|
if [ -f "$ENGINE_OUT_FILE" ]; then
|
|||
|
echo_pretty "Productivity of engine just stopped, FYI:"
|
|||
|
until grep '^ Productivity' "$ENGINE_OUT_FILE" ; do
|
|||
|
sleep 1
|
|||
|
done
|
|||
|
fi
|
|||
|
rm -f "$ENGINE_OUT_FILE"
|
|||
|
|
|||
|
}
|
|||
|
function start_postgres {
|
|||
|
echo "Launching postgres (see logs at $POSTGRES_OUT_FILE)"
|
|||
|
"$REPO_TOPLEVEL/scripts/dev.sh" postgres \
|
|||
|
&> "$POSTGRES_OUT_FILE" & POSTGRES_PID=$!
|
|||
|
}
|
|||
|
function stop_postgres {
|
|||
|
PGID=$(ps -o '%r' "$POSTGRES_PID" | tail -n1 | xargs)
|
|||
|
kill -- "-$PGID"
|
|||
|
wait "$POSTGRES_PID" || true
|
|||
|
rm -f "$POSTGRES_OUT_FILE"
|
|||
|
}
|
|||
|
|
|||
|
function cleanup {
|
|||
|
set +e
|
|||
|
echo_pretty "Cleaning up"
|
|||
|
|
|||
|
stop_engine
|
|||
|
stop_postgres
|
|||
|
|
|||
|
rm -f "$MEM_CPU_OUTFILE"
|
|||
|
|
|||
|
echo "Done"
|
|||
|
}
|
|||
|
trap cleanup EXIT
|
|||
|
|
|||
|
# Get a memory high water mark for replace_metadata
|
|||
|
# Must be executed from a benchmark set directory
|
|||
|
function init_and_replace_metadata {
|
|||
|
echo_pretty "Initializing and doing some replace_metadata"
|
|||
|
gunzip -c dump.sql.gz | PGPASSWORD=postgres psql -h 127.0.0.1 -p 25432 postgres -U postgres &>/dev/null
|
|||
|
|
|||
|
# run replace_metadata a few times (once to initialize schema, a few more to get good high water mark)
|
|||
|
curl -X POST -H 'Content-Type: application/json' -d @replace_metadata.json http://127.0.0.1:8181/v1/query
|
|||
|
curl -X POST -H 'Content-Type: application/json' -d @replace_metadata.json http://127.0.0.1:8181/v1/query
|
|||
|
curl -X POST -H 'Content-Type: application/json' -d @replace_metadata.json http://127.0.0.1:8181/v1/query
|
|||
|
echo
|
|||
|
}
|
|||
|
|
|||
|
### Metadata operations and Baseline + peak memory ####################
|
|||
|
|
|||
|
if true; then
|
|||
|
## huge_schema: ########
|
|||
|
start_postgres
|
|||
|
start_engine
|
|||
|
|
|||
|
cd "$REPO_TOPLEVEL/server/benchmarks/benchmark_sets/huge_schema"
|
|||
|
init_and_replace_metadata
|
|||
|
echo_pretty "Sleeping for 30 seconds and then checking for baseline memory usage"
|
|||
|
sleep 30
|
|||
|
MEM_BASELINE_HUGE_SCHEMA=$(ps -e -o pid,ppid,pgid,rss,comm | awk '$3 == '"$GRAPHQL_ENGINE_PID" | grep graphql-engine | awk '{print $4}')
|
|||
|
|
|||
|
stop_engine
|
|||
|
stop_postgres
|
|||
|
echo "sleeping..." && sleep 30 # TODO wait for all in process group
|
|||
|
MEM_HIGHWATER_HUGE_SCHEMA=$(tail -n1 "$MEM_CPU_OUTFILE" | awk '{print $1}')
|
|||
|
|
|||
|
|
|||
|
## chinook: ########
|
|||
|
start_postgres
|
|||
|
start_engine
|
|||
|
|
|||
|
cd "$REPO_TOPLEVEL/server/benchmarks/benchmark_sets/chinook"
|
|||
|
init_and_replace_metadata
|
|||
|
echo_pretty "Sleeping for 30 seconds and then checking for baseline memory usage"
|
|||
|
sleep 30
|
|||
|
MEM_BASELINE_CHINOOK=$(ps -e -o pid,ppid,pgid,rss,comm | awk '$3 == '"$GRAPHQL_ENGINE_PID" | grep graphql-engine | awk '{print $4}')
|
|||
|
|
|||
|
stop_engine
|
|||
|
stop_postgres
|
|||
|
echo "sleeping..." && sleep 30 # TODO wait for all in process group
|
|||
|
MEM_HIGHWATER_CHINOOK=$(tail -n1 "$MEM_CPU_OUTFILE" | awk '{print $1}')
|
|||
|
fi
|
|||
|
|
|||
|
### Throughput limit and Peak memory under load ####################
|
|||
|
|
|||
|
if true; then
|
|||
|
cd "$REPO_TOPLEVEL/server/benchmarks"
|
|||
|
start_engine no_wait
|
|||
|
|
|||
|
./bench.sh chinook_throughput
|
|||
|
|
|||
|
stop_engine
|
|||
|
echo "sleeping..." && sleep 30 # TODO wait for all in process group
|
|||
|
MEM_HIGHWATER_CHINOOK_UNDER_LOAD=$(tail -n1 "$MEM_CPU_OUTFILE" | awk '{print $1}')
|
|||
|
CPU_CHINOOK_UNDER_LOAD=$(tail -n1 "$MEM_CPU_OUTFILE" | awk '{print $2}')
|
|||
|
fi
|
|||
|
|
|||
|
set +e
|
|||
|
echo_pretty "####################### RAW MEASUREMENTS ###########################"
|
|||
|
echo_pretty ""
|
|||
|
echo_pretty "Memory usage in KB:"
|
|||
|
(echo "| SCHEMA_BASELINE REPLACE_METADATA_PEAK UNDER_LOAD_PEAK" ;\
|
|||
|
echo "huge_schema $MEM_BASELINE_HUGE_SCHEMA $MEM_HIGHWATER_HUGE_SCHEMA N/A" ;\
|
|||
|
echo "chinook $MEM_BASELINE_CHINOOK $MEM_HIGHWATER_CHINOOK $MEM_HIGHWATER_CHINOOK_UNDER_LOAD") |\
|
|||
|
column --table -R1,2,3,4
|
|||
|
echo_pretty ""
|
|||
|
echo_pretty "Avg CPU During Chinook throughput tests: $CPU_CHINOOK_UNDER_LOAD "
|
|||
|
echo "NOTE: The utility of the script relies on the assumption that the throughput "
|
|||
|
echo " tests here are mostly CPU bound. we want the value above to be between 150% and "
|
|||
|
echo " ${NUM_SERVER_CORES}00% (using all $NUM_SERVER_CORES cores allotted to server)"
|
|||
|
echo " FYI: complex_query_high_load_large_result appears to be IO bound, "
|
|||
|
echo " with the server at only 100% CPU (on two cores)"
|
|||
|
echo_pretty ""
|
|||
|
# TODO add uncompressed response body sizes here:
|
|||
|
echo_pretty "Peak sustained throughput for our Chinook queries having different uncompressed response body sizes (server given $NUM_SERVER_CORES cores)"
|
|||
|
paste -d ' ' <(echo -e "simple_query_high_load(600B): \n complex_query_high_load_small_result(650B): \n complex_query_high_load_large_result(33KB): \n full_introspection(190KB):") \
|
|||
|
<(jq '.[] .requests.average |floor' "$REPO_TOPLEVEL/server/benchmarks/benchmark_sets/chinook_throughput/report.json" ) \
|
|||
|
<(echo -e "RPS\nRPS\nRPS\nRPS") |\
|
|||
|
column --table
|
|||
|
echo_pretty ""
|
|||
|
echo_pretty "####################### INTERPRETATION ###########################"
|
|||
|
CHINOOK_PEAK_MEM=$(( MEM_HIGHWATER_CHINOOK_UNDER_LOAD > MEM_HIGHWATER_CHINOOK ? MEM_HIGHWATER_CHINOOK_UNDER_LOAD : MEM_HIGHWATER_CHINOOK ))
|
|||
|
CHINOOK_MEM_SCALE=$(bc -l <<< "scale=1; $CHINOOK_PEAK_MEM/$MEM_BASELINE_CHINOOK")
|
|||
|
HUGE_SCHEMA_MEM_SCALE=$(bc -l <<< "scale=1; $MEM_HIGHWATER_HUGE_SCHEMA/$MEM_BASELINE_HUGE_SCHEMA")
|
|||
|
echo_pretty "Under peak sustained throughput and with some replace_metadata, peak memory usage is typically between..."
|
|||
|
echo_pretty " ${CHINOOK_MEM_SCALE}x and ${HUGE_SCHEMA_MEM_SCALE}x "
|
|||
|
echo_pretty "...above the idle baseline (i.e. the schema overhead)"
|
|||
|
|
|||
|
# TODO automate this
|
|||
|
echo_warn ""
|
|||
|
echo_warn "ABOVE WAS RUN WITH SERVER ALLOCATED < $NUM_SERVER_CORES > CORES."
|
|||
|
echo_warn ""
|
|||
|
echo_warn "Rerun this with one, two and three cores ( this is about the limit you can do on an"
|
|||
|
echo_warn "8 core laptop and still get meaningful numbers). Run a linear regression for each:"
|
|||
|
echo_warn ' $ echo "1 2266\\n2 3587\\n 3 5270" | gblreg'
|
|||
|
echo_warn ' 7.036667e+02 1.502000e+03'
|
|||
|
echo_warn ' A^ B^ in: PEAK_THROUGHPUT=A+B*SERVER_CORES'
|
|||
|
|
|||
|
echo_pretty "Done. Shutting down"
|