[JSON-API] Postgres perf job (#10986)

* Addition of a postgres perf job

CHANGELOG_BEGIN
CHANGELOG_END

* use a single job with strategy matrix

* cleanup test job from main pipeline
This commit is contained in:
akshayshirahatti-da 2021-09-23 09:16:58 +01:00 committed by GitHub
parent f2d9f07417
commit 721575ea73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 121 additions and 69 deletions

View File

@ -173,8 +173,14 @@ jobs:
env:
GCRED: $(GOOGLE_APPLICATION_CREDENTIALS_CONTENT)
- job: perf_http_json_oracle
- job: perf_http_json_querystore
timeoutInMinutes: 120
strategy:
matrix:
postgres:
querystore: postgres
oracle:
querystore: oracle
pool:
name: "ubuntu_20_04"
demands: assignment -equals default
@ -196,30 +202,33 @@ jobs:
eval "$(dev-env/bin/dade assist)"
source $(bash_lib)
docker login --username "$DOCKER_LOGIN" --password "$DOCKER_PASSWORD"
IMAGE=$(cat ci/oracle_image)
docker pull $IMAGE
# Cleanup stray containers that might still be running from
# another build that didnt get shut down cleanly.
docker rm -f oracle || true
# Oracle does not like if you connect to it via localhost if its running in the container.
# Interestingly it works if you use the external IP of the host so the issue is
# not the host it is listening on (it claims for that to be 0.0.0.0).
# --network host is a cheap escape hatch for this.
docker run -d --rm --name oracle --network host -e ORACLE_PWD=$ORACLE_PWD $IMAGE
function cleanup() {
QUERY_STORE=$(querystore)
#setup oracle docker vm for perf tests against oracle.
if [[ "$QUERY_STORE" == "oracle" ]]; then
docker login --username "$DOCKER_LOGIN" --password "$DOCKER_PASSWORD"
IMAGE=$(cat ci/oracle_image)
docker pull $IMAGE
# Cleanup stray containers that might still be running from
# another build that didnt get shut down cleanly.
docker rm -f oracle || true
# Oracle does not like if you connect to it via localhost if its running in the container.
# Interestingly it works if you use the external IP of the host so the issue is
# not the host it is listening on (it claims for that to be 0.0.0.0).
# --network host is a cheap escape hatch for this.
docker run -d --rm --name oracle --network host -e ORACLE_PWD=$ORACLE_PWD $IMAGE
function cleanup() {
docker rm -f oracle
}
trap cleanup EXIT
testConnection() {
}
trap cleanup EXIT
testConnection() {
docker exec oracle bash -c 'sqlplus -L '"$ORACLE_USERNAME"'/'"$ORACLE_PWD"'@//localhost:'"$ORACLE_PORT"'/ORCLPDB1 <<< "select * from dba_users;"; exit $?' >/dev/null
}
}
# dont want to wait forever to test connection , 15m is more than sufficient here.
declare -xf testConnection
timeout 15m bash -c 'until testConnection; do echo "Could not connect to Oracle, trying again..." ; sleep 1 ; done'
# Actually run some tests
# dont want to wait forever to test connection , 15m is more than sufficient here.
declare -xf testConnection
timeout 15m bash -c 'until testConnection; do echo "Could not connect to Oracle, trying again..." ; sleep 1 ; done'
fi
bazel build //ledger-service/http-json-perf/...
DAR="${PWD}/bazel-bin/ledger-service/http-json-perf/LargeAcs.dar"
@ -235,14 +244,8 @@ jobs:
# }
JWT="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJodHRwczovL2RhbWwuY29tL2xlZGdlci1hcGkiOnsibGVkZ2VySWQiOiJNeUxlZGdlciIsImFwcGxpY2F0aW9uSWQiOiJmb29iYXIiLCJhY3RBcyI6WyJBbGljZSJdfX0.VdDI96mw5hrfM5ZNxLyetSVwcD7XtLT4dIdHIOa9lcU"
TEST_CASES="\
populateCache \
fetchByKey \
fetchByQuery \
"
METADATA=$(git log -n1 --format=%cd --date=format:%Y%m%d).$(git rev-list --count HEAD).$(Build.BuildId).$(git log -n1 --format=%h --abbrev=8)
REPORT_ID="http_json_perf_oracle_results_${METADATA}"
REPORT_ID="http_json_perf_${QUERY_STORE}_results_${METADATA}"
OUT="$(Build.StagingDirectory)/${REPORT_ID}"
LOG_DIR="$(Build.StagingDirectory)/log"
@ -254,37 +257,64 @@ jobs:
export NUM_QUERIES=10000 # 10k queries in total
export NUM_READERS=100 # 100 users in parallel.
for CASE in $TEST_CASES; do
RUN_MODE=${CASE} \
USE_DEFAULT_USER=true \
RETAIN_DATA=true \
bazel run //ledger-service/http-json-perf:http-json-perf-binary-ee -- \
--scenario=com.daml.http.perf.scenario.OracleMultiUserQueryScenario \
--dars=${DAR} \
--reports-dir=${OUT}/${CASE} \
--jwt=${JWT} \
--query-store-index=oracle > "${LOG_DIR}/${CASE}_log.out"
done
READ_PERF_KEYS="\
fetchByKey \
fetchByQuery \
"
RES=""
for KEY in $READ_PERF_KEYS; do
# capture the avg, stddev, p90, p99, requests_per_second numbers from gatling summary csv
perf=$(cat ${OUT}/${KEY}/*/summary.csv | tail -n 1 | awk -F, '{printf "%.1f, %.1f, %.1f, %.1f, %.1f", $11, $12, $6, $8, $13 }')
RES="${RES}json_oracle/${KEY}: $perf\n"
done
RESULT=""
RES=${RES%"\n"}
setvar oracle_perf_results "$RES"
if [[ "$QUERY_STORE" == "oracle" ]]
then
# We run test cases in isolation and retain data between them.
TEST_CASES="\
populateCache \
fetchByKey \
fetchByQuery \
"
for CASE in $TEST_CASES; do
RUN_MODE=${CASE} \
USE_DEFAULT_USER=true \
RETAIN_DATA=true \
bazel run //ledger-service/http-json-perf:http-json-perf-binary-ee -- \
--scenario=com.daml.http.perf.scenario.MultiUserQueryScenario \
--dars=${DAR} \
--reports-dir=${OUT}/${CASE} \
--jwt=${JWT} \
--query-store-index=${QUERY_STORE} > "${LOG_DIR}/${CASE}_log.out"
done
for KEY in $READ_PERF_KEYS; do
# capture the avg, stddev, p90, p99, requests_per_second numbers from gatling summary csv
perf=$(cat ${OUT}/${KEY}/*/summary.csv | tail -n 1 | awk -F, '{printf "%.1f, %.1f, %.1f, %.1f, %.1f", $11, $12, $6, $8, $13 }')
RESULT="${RESULT}json_${QUERY_STORE}/${KEY}: $perf\n"
done
elif [[ "$QUERY_STORE" == "postgres" ]]
then
# the test case to run in MultiUserQueryScenario for postgres
TEST_CASE="populateAndFetch"
RUN_MODE=${TEST_CASE} \
bazel run //ledger-service/http-json-perf:http-json-perf-binary-ee -- \
--scenario=com.daml.http.perf.scenario.MultiUserQueryScenario \
--dars=${DAR} \
--reports-dir=${OUT}/${TEST_CASE} \
--jwt=${JWT} \
--query-store-index=${QUERY_STORE} > "${LOG_DIR}/${TEST_CASE}_log.out"
for KEY in $READ_PERF_KEYS; do
# capture the avg, stddev, p90, p99, requests_per_second numbers from gatling summary csv
perf=$(cat ${OUT}/${TEST_CASE}/*/summary.csv | grep -i "$KEY" | awk -F, '{printf "%.1f, %.1f, %.1f, %.1f, %.1f", $11, $12, $6, $8, $13 }')
RESULT="${RESULT}json_${QUERY_STORE}/${KEY}: $perf\n"
done
fi
RESULT=${RESULT%"\n"}
setvar ${QUERY_STORE}_perf_results "$RESULT"
GZIP=-9 tar -zcf ${OUT}.tgz ${OUT}
gcs "$GCRED" cp "$OUT.tgz" "gs://daml-data/perf/http-json-oracle/${REPORT_ID}.tgz"
gcs "$GCRED" cp "$OUT.tgz" "gs://daml-data/perf/http-json-${QUERY_STORE}/${REPORT_ID}.tgz"
displayName: http-json-oracle perf
displayName: http-json-$(querystore) perf
name: out
env:
GCRED: $(GOOGLE_APPLICATION_CREDENTIALS_CONTENT)
@ -293,9 +323,7 @@ jobs:
- task: PublishPipelineArtifact@0
inputs:
targetPath: $(Build.StagingDirectory)/log
artifactName: perf_http_json_oracle
artifactName: perf_http_json_$(querystore)
- job: check_releases
timeoutInMinutes: 360
@ -437,7 +465,7 @@ jobs:
- job: report
dependsOn: [compatibility_ts_libs, compatibility, compatibility_windows,
perf_speedy, perf_http_json, perf_http_json_oracle, check_releases,
perf_speedy, perf_http_json, perf_http_json_querystore, check_releases,
blackduck_scan, run_notices_pr_build]
condition: and(succeededOrFailed(),
eq(variables['Build.SourceBranchName'], 'main'))
@ -451,8 +479,9 @@ jobs:
perf_speedy: $[ dependencies.perf_speedy.result ]
speedy_perf: $[ dependencies.perf_speedy.outputs['out.speedy_perf'] ]
perf_http_json: $[ dependencies.perf_http_json.result ]
perf_http_json_oracle: $[ dependencies.perf_http_json_oracle.result ]
oracle_perf_results: $[ dependencies.perf_http_json_oracle.outputs['out.oracle_perf_results'] ]
perf_http_json_querystore: $[ dependencies.perf_http_json_querystore.result ]
oracle_perf_results: $[ dependencies.perf_http_json_querystore.outputs['out.oracle_perf_results'] ]
postgres_perf_results: $[ dependencies.perf_http_json_querystore.outputs['out.postgres_perf_results'] ]
check_releases: $[ dependencies.check_releases.result ]
blackduck_scan: $[ dependencies.blackduck_scan.result ]
run_notices_pr_build: $[ dependencies.run_notices_pr_build.result ]
@ -474,7 +503,7 @@ jobs:
&& "$(compatibility_windows)" == "Succeeded"
&& "$(perf_speedy)" == "Succeeded"
&& "$(perf_http_json)" == "Succeeded"
&& "$(perf_http_json_oracle)" == "Succeeded"
&& "$(perf_http_json_querystore)" == "Succeeded"
&& "$(check_releases)" == "Succeeded"
&& ("$(blackduck_scan)" == "Succeeded" || "$(blackduck_scan)" == "Skipped")
&& ("$(run_notices_pr_build)" == "Succeeded" || "$(run_notices_pr_build)" == "Skipped") ]]; then
@ -482,6 +511,7 @@ jobs:
REPORT='```
speedy_perf: $(speedy_perf)
$(oracle_perf_results)
$(postgres_perf_results)
```
'
tell_slack "$(echo -e "$MSG\n$REPORT")" "$(Slack.ci-failures-daml)"

View File

@ -34,15 +34,18 @@ $ bazel run //ledger-service/http-json-perf:http-json-perf-binary -- \
--jwt="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJodHRwczovL2RhbWwuY29tL2xlZGdlci1hcGkiOnsibGVkZ2VySWQiOiJNeUxlZGdlciIsImFwcGxpY2F0aW9uSWQiOiJmb29iYXIiLCJhY3RBcyI6WyJBbGljZSJdfX0.VdDI96mw5hrfM5ZNxLyetSVwcD7XtLT4dIdHIOa9lcU"
```
## 2.3 Running OracleMultiUserQueryScenario
## 2.3 Running MultiUserQueryScenario
We use an external docker oracle vm, so we want to retain the data between runs to specifically focus on testing query performance.
use `RETAIN_DATA` and `USE_DEFAULT_USER` env vars to use a static user(`ORACLE_USER`) and preserve data.
This scenario uses a single template `KeyedIou` defined in `LargeAcs.daml`.
For oracle *Query Store* since we use an external docker oracle vm, we might
want to retain the data between runs to specifically focus on testing query performance.
To achieve this use `RETAIN_DATA` and `USE_DEFAULT_USER` env vars to use a static
user(`ORACLE_USER`) and preserve data.
This scenario uses a single template `KeyedIou` defined in `LargeAcs.daml`
We can control a few scenario parameters i.e `NUM_RECORDS` `NUM_QUERIES` `NUM_READERS` `NUM_WRITERS` via env variables
`RUN_MODE` allows you to run specific test case scenarios in isolation
1. Populate Cache
@ -64,7 +67,7 @@ USE_DEFAULT_USER=true RETAIN_DATA=true RUN_MODE="fetchByKey" NUM_QUERIES=100 baz
3. Fetch By Query
Query contracts by a field on the payload which is the `id` in this case.
Query contracts by a field on the payload which is the `currency` in this case.
```

View File

@ -3,7 +3,7 @@
package com.daml.http.perf.scenario
import com.daml.http.perf.scenario.OracleMultiUserQueryScenario._
import com.daml.http.perf.scenario.MultiUserQueryScenario._
import io.gatling.core.Predef._
import io.gatling.core.structure.PopulationBuilder
import io.gatling.http.Predef._
@ -19,15 +19,16 @@ private[scenario] trait HasRandomCurrency {
}
}
object OracleMultiUserQueryScenario {
object MultiUserQueryScenario {
sealed trait RunMode { def name: String }
case object PopulateCache extends RunMode { val name = "populateCache" }
case object FetchByKey extends RunMode { val name = "fetchByKey" }
case object FetchByQuery extends RunMode { val name = "fetchByQuery" }
case object PopulateAndFetch extends RunMode { val name = "populateAndFetch" }
}
@SuppressWarnings(Array("org.wartremover.warts.NonUnitStatements"))
class OracleMultiUserQueryScenario
class MultiUserQueryScenario
extends Simulation
with SimulationConfig
with HasRandomAmount
@ -48,6 +49,8 @@ class OracleMultiUserQueryScenario
case PopulateCache.name => PopulateCache
case FetchByKey.name => FetchByKey
case FetchByQuery.name => FetchByQuery
//run everything in a single run.
case PopulateAndFetch.name => PopulateAndFetch
}
}
@ -142,9 +145,25 @@ class OracleMultiUserQueryScenario
)
case FetchByQuery =>
currQueryScn(numQueries / numReaders, randomCurrency).inject(
nothingFor(2.seconds),
atOnceUsers(numReaders),
atOnceUsers(numReaders)
)
case PopulateAndFetch =>
writeScn
.inject(atOnceUsers(numWriters))
.andThen(
fetchByKeyScn(numQueries / numReaders)
.inject(
nothingFor(2.seconds),
atOnceUsers(numReaders),
)
.andThen(
currQueryScn(numQueries / numReaders, randomCurrency)
.inject(
nothingFor(2.seconds),
atOnceUsers(numReaders),
)
)
)
}
}