diff --git a/ci/cron/daily-compat.yml b/ci/cron/daily-compat.yml index 863202c31e7..fab4eaa2588 100644 --- a/ci/cron/daily-compat.yml +++ b/ci/cron/daily-compat.yml @@ -173,8 +173,14 @@ jobs: env: GCRED: $(GOOGLE_APPLICATION_CREDENTIALS_CONTENT) - - job: perf_http_json_oracle + - job: perf_http_json_querystore timeoutInMinutes: 120 + strategy: + matrix: + postgres: + querystore: postgres + oracle: + querystore: oracle pool: name: "ubuntu_20_04" demands: assignment -equals default @@ -196,30 +202,33 @@ jobs: eval "$(dev-env/bin/dade assist)" source $(bash_lib) - docker login --username "$DOCKER_LOGIN" --password "$DOCKER_PASSWORD" - IMAGE=$(cat ci/oracle_image) - docker pull $IMAGE - # Cleanup stray containers that might still be running from - # another build that didn’t get shut down cleanly. - docker rm -f oracle || true - # Oracle does not like if you connect to it via localhost if it’s running in the container. - # Interestingly it works if you use the external IP of the host so the issue is - # not the host it is listening on (it claims for that to be 0.0.0.0). - # --network host is a cheap escape hatch for this. - docker run -d --rm --name oracle --network host -e ORACLE_PWD=$ORACLE_PWD $IMAGE - function cleanup() { + QUERY_STORE=$(querystore) + + #setup oracle docker vm for perf tests against oracle. + if [[ "$QUERY_STORE" == "oracle" ]]; then + docker login --username "$DOCKER_LOGIN" --password "$DOCKER_PASSWORD" + IMAGE=$(cat ci/oracle_image) + docker pull $IMAGE + # Cleanup stray containers that might still be running from + # another build that didn’t get shut down cleanly. + docker rm -f oracle || true + # Oracle does not like if you connect to it via localhost if it’s running in the container. + # Interestingly it works if you use the external IP of the host so the issue is + # not the host it is listening on (it claims for that to be 0.0.0.0). + # --network host is a cheap escape hatch for this. + docker run -d --rm --name oracle --network host -e ORACLE_PWD=$ORACLE_PWD $IMAGE + function cleanup() { docker rm -f oracle - } - trap cleanup EXIT - testConnection() { + } + trap cleanup EXIT + testConnection() { docker exec oracle bash -c 'sqlplus -L '"$ORACLE_USERNAME"'/'"$ORACLE_PWD"'@//localhost:'"$ORACLE_PORT"'/ORCLPDB1 <<< "select * from dba_users;"; exit $?' >/dev/null - } + } - # dont want to wait forever to test connection , 15m is more than sufficient here. - declare -xf testConnection - timeout 15m bash -c 'until testConnection; do echo "Could not connect to Oracle, trying again..." ; sleep 1 ; done' - - # Actually run some tests + # dont want to wait forever to test connection , 15m is more than sufficient here. + declare -xf testConnection + timeout 15m bash -c 'until testConnection; do echo "Could not connect to Oracle, trying again..." ; sleep 1 ; done' + fi bazel build //ledger-service/http-json-perf/... DAR="${PWD}/bazel-bin/ledger-service/http-json-perf/LargeAcs.dar" @@ -235,14 +244,8 @@ jobs: # } JWT="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJodHRwczovL2RhbWwuY29tL2xlZGdlci1hcGkiOnsibGVkZ2VySWQiOiJNeUxlZGdlciIsImFwcGxpY2F0aW9uSWQiOiJmb29iYXIiLCJhY3RBcyI6WyJBbGljZSJdfX0.VdDI96mw5hrfM5ZNxLyetSVwcD7XtLT4dIdHIOa9lcU" - TEST_CASES="\ - populateCache \ - fetchByKey \ - fetchByQuery \ - " - METADATA=$(git log -n1 --format=%cd --date=format:%Y%m%d).$(git rev-list --count HEAD).$(Build.BuildId).$(git log -n1 --format=%h --abbrev=8) - REPORT_ID="http_json_perf_oracle_results_${METADATA}" + REPORT_ID="http_json_perf_${QUERY_STORE}_results_${METADATA}" OUT="$(Build.StagingDirectory)/${REPORT_ID}" LOG_DIR="$(Build.StagingDirectory)/log" @@ -254,37 +257,64 @@ jobs: export NUM_QUERIES=10000 # 10k queries in total export NUM_READERS=100 # 100 users in parallel. - for CASE in $TEST_CASES; do - RUN_MODE=${CASE} \ - USE_DEFAULT_USER=true \ - RETAIN_DATA=true \ - bazel run //ledger-service/http-json-perf:http-json-perf-binary-ee -- \ - --scenario=com.daml.http.perf.scenario.OracleMultiUserQueryScenario \ - --dars=${DAR} \ - --reports-dir=${OUT}/${CASE} \ - --jwt=${JWT} \ - --query-store-index=oracle > "${LOG_DIR}/${CASE}_log.out" - done - READ_PERF_KEYS="\ fetchByKey \ fetchByQuery \ " - RES="" - for KEY in $READ_PERF_KEYS; do - # capture the avg, stddev, p90, p99, requests_per_second numbers from gatling summary csv - perf=$(cat ${OUT}/${KEY}/*/summary.csv | tail -n 1 | awk -F, '{printf "%.1f, %.1f, %.1f, %.1f, %.1f", $11, $12, $6, $8, $13 }') - RES="${RES}json_oracle/${KEY}: $perf\n" - done + RESULT="" - RES=${RES%"\n"} - setvar oracle_perf_results "$RES" + if [[ "$QUERY_STORE" == "oracle" ]] + then + # We run test cases in isolation and retain data between them. + TEST_CASES="\ + populateCache \ + fetchByKey \ + fetchByQuery \ + " + for CASE in $TEST_CASES; do + RUN_MODE=${CASE} \ + USE_DEFAULT_USER=true \ + RETAIN_DATA=true \ + bazel run //ledger-service/http-json-perf:http-json-perf-binary-ee -- \ + --scenario=com.daml.http.perf.scenario.MultiUserQueryScenario \ + --dars=${DAR} \ + --reports-dir=${OUT}/${CASE} \ + --jwt=${JWT} \ + --query-store-index=${QUERY_STORE} > "${LOG_DIR}/${CASE}_log.out" + done + for KEY in $READ_PERF_KEYS; do + # capture the avg, stddev, p90, p99, requests_per_second numbers from gatling summary csv + perf=$(cat ${OUT}/${KEY}/*/summary.csv | tail -n 1 | awk -F, '{printf "%.1f, %.1f, %.1f, %.1f, %.1f", $11, $12, $6, $8, $13 }') + RESULT="${RESULT}json_${QUERY_STORE}/${KEY}: $perf\n" + done + elif [[ "$QUERY_STORE" == "postgres" ]] + then + # the test case to run in MultiUserQueryScenario for postgres + TEST_CASE="populateAndFetch" + + RUN_MODE=${TEST_CASE} \ + bazel run //ledger-service/http-json-perf:http-json-perf-binary-ee -- \ + --scenario=com.daml.http.perf.scenario.MultiUserQueryScenario \ + --dars=${DAR} \ + --reports-dir=${OUT}/${TEST_CASE} \ + --jwt=${JWT} \ + --query-store-index=${QUERY_STORE} > "${LOG_DIR}/${TEST_CASE}_log.out" + + for KEY in $READ_PERF_KEYS; do + # capture the avg, stddev, p90, p99, requests_per_second numbers from gatling summary csv + perf=$(cat ${OUT}/${TEST_CASE}/*/summary.csv | grep -i "$KEY" | awk -F, '{printf "%.1f, %.1f, %.1f, %.1f, %.1f", $11, $12, $6, $8, $13 }') + RESULT="${RESULT}json_${QUERY_STORE}/${KEY}: $perf\n" + done + fi + + RESULT=${RESULT%"\n"} + setvar ${QUERY_STORE}_perf_results "$RESULT" GZIP=-9 tar -zcf ${OUT}.tgz ${OUT} - gcs "$GCRED" cp "$OUT.tgz" "gs://daml-data/perf/http-json-oracle/${REPORT_ID}.tgz" + gcs "$GCRED" cp "$OUT.tgz" "gs://daml-data/perf/http-json-${QUERY_STORE}/${REPORT_ID}.tgz" - displayName: http-json-oracle perf + displayName: http-json-$(querystore) perf name: out env: GCRED: $(GOOGLE_APPLICATION_CREDENTIALS_CONTENT) @@ -293,9 +323,7 @@ jobs: - task: PublishPipelineArtifact@0 inputs: targetPath: $(Build.StagingDirectory)/log - artifactName: perf_http_json_oracle - - + artifactName: perf_http_json_$(querystore) - job: check_releases timeoutInMinutes: 360 @@ -437,7 +465,7 @@ jobs: - job: report dependsOn: [compatibility_ts_libs, compatibility, compatibility_windows, - perf_speedy, perf_http_json, perf_http_json_oracle, check_releases, + perf_speedy, perf_http_json, perf_http_json_querystore, check_releases, blackduck_scan, run_notices_pr_build] condition: and(succeededOrFailed(), eq(variables['Build.SourceBranchName'], 'main')) @@ -451,8 +479,9 @@ jobs: perf_speedy: $[ dependencies.perf_speedy.result ] speedy_perf: $[ dependencies.perf_speedy.outputs['out.speedy_perf'] ] perf_http_json: $[ dependencies.perf_http_json.result ] - perf_http_json_oracle: $[ dependencies.perf_http_json_oracle.result ] - oracle_perf_results: $[ dependencies.perf_http_json_oracle.outputs['out.oracle_perf_results'] ] + perf_http_json_querystore: $[ dependencies.perf_http_json_querystore.result ] + oracle_perf_results: $[ dependencies.perf_http_json_querystore.outputs['out.oracle_perf_results'] ] + postgres_perf_results: $[ dependencies.perf_http_json_querystore.outputs['out.postgres_perf_results'] ] check_releases: $[ dependencies.check_releases.result ] blackduck_scan: $[ dependencies.blackduck_scan.result ] run_notices_pr_build: $[ dependencies.run_notices_pr_build.result ] @@ -474,7 +503,7 @@ jobs: && "$(compatibility_windows)" == "Succeeded" && "$(perf_speedy)" == "Succeeded" && "$(perf_http_json)" == "Succeeded" - && "$(perf_http_json_oracle)" == "Succeeded" + && "$(perf_http_json_querystore)" == "Succeeded" && "$(check_releases)" == "Succeeded" && ("$(blackduck_scan)" == "Succeeded" || "$(blackduck_scan)" == "Skipped") && ("$(run_notices_pr_build)" == "Succeeded" || "$(run_notices_pr_build)" == "Skipped") ]]; then @@ -482,6 +511,7 @@ jobs: REPORT='``` speedy_perf: $(speedy_perf) $(oracle_perf_results) + $(postgres_perf_results) ``` ' tell_slack "$(echo -e "$MSG\n$REPORT")" "$(Slack.ci-failures-daml)" diff --git a/ledger-service/http-json-perf/README.md b/ledger-service/http-json-perf/README.md index 93575cb69b1..4980463f254 100644 --- a/ledger-service/http-json-perf/README.md +++ b/ledger-service/http-json-perf/README.md @@ -34,15 +34,18 @@ $ bazel run //ledger-service/http-json-perf:http-json-perf-binary -- \ --jwt="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJodHRwczovL2RhbWwuY29tL2xlZGdlci1hcGkiOnsibGVkZ2VySWQiOiJNeUxlZGdlciIsImFwcGxpY2F0aW9uSWQiOiJmb29iYXIiLCJhY3RBcyI6WyJBbGljZSJdfX0.VdDI96mw5hrfM5ZNxLyetSVwcD7XtLT4dIdHIOa9lcU" ``` -## 2.3 Running OracleMultiUserQueryScenario +## 2.3 Running MultiUserQueryScenario -We use an external docker oracle vm, so we want to retain the data between runs to specifically focus on testing query performance. -use `RETAIN_DATA` and `USE_DEFAULT_USER` env vars to use a static user(`ORACLE_USER`) and preserve data. -This scenario uses a single template `KeyedIou` defined in `LargeAcs.daml`. +For oracle *Query Store* since we use an external docker oracle vm, we might +want to retain the data between runs to specifically focus on testing query performance. +To achieve this use `RETAIN_DATA` and `USE_DEFAULT_USER` env vars to use a static +user(`ORACLE_USER`) and preserve data. + +This scenario uses a single template `KeyedIou` defined in `LargeAcs.daml` We can control a few scenario parameters i.e `NUM_RECORDS` `NUM_QUERIES` `NUM_READERS` `NUM_WRITERS` via env variables - +`RUN_MODE` allows you to run specific test case scenarios in isolation 1. Populate Cache @@ -64,7 +67,7 @@ USE_DEFAULT_USER=true RETAIN_DATA=true RUN_MODE="fetchByKey" NUM_QUERIES=100 baz 3. Fetch By Query -Query contracts by a field on the payload which is the `id` in this case. +Query contracts by a field on the payload which is the `currency` in this case. ``` diff --git a/ledger-service/http-json-perf/src/main/scala/com/daml/http/perf/scenario/OracleMultiUserQueryScenario.scala b/ledger-service/http-json-perf/src/main/scala/com/daml/http/perf/scenario/MultiUserQueryScenario.scala similarity index 83% rename from ledger-service/http-json-perf/src/main/scala/com/daml/http/perf/scenario/OracleMultiUserQueryScenario.scala rename to ledger-service/http-json-perf/src/main/scala/com/daml/http/perf/scenario/MultiUserQueryScenario.scala index 008a67bdce7..b170d88721f 100644 --- a/ledger-service/http-json-perf/src/main/scala/com/daml/http/perf/scenario/OracleMultiUserQueryScenario.scala +++ b/ledger-service/http-json-perf/src/main/scala/com/daml/http/perf/scenario/MultiUserQueryScenario.scala @@ -3,7 +3,7 @@ package com.daml.http.perf.scenario -import com.daml.http.perf.scenario.OracleMultiUserQueryScenario._ +import com.daml.http.perf.scenario.MultiUserQueryScenario._ import io.gatling.core.Predef._ import io.gatling.core.structure.PopulationBuilder import io.gatling.http.Predef._ @@ -19,15 +19,16 @@ private[scenario] trait HasRandomCurrency { } } -object OracleMultiUserQueryScenario { +object MultiUserQueryScenario { sealed trait RunMode { def name: String } case object PopulateCache extends RunMode { val name = "populateCache" } case object FetchByKey extends RunMode { val name = "fetchByKey" } case object FetchByQuery extends RunMode { val name = "fetchByQuery" } + case object PopulateAndFetch extends RunMode { val name = "populateAndFetch" } } @SuppressWarnings(Array("org.wartremover.warts.NonUnitStatements")) -class OracleMultiUserQueryScenario +class MultiUserQueryScenario extends Simulation with SimulationConfig with HasRandomAmount @@ -48,6 +49,8 @@ class OracleMultiUserQueryScenario case PopulateCache.name => PopulateCache case FetchByKey.name => FetchByKey case FetchByQuery.name => FetchByQuery + //run everything in a single run. + case PopulateAndFetch.name => PopulateAndFetch } } @@ -142,9 +145,25 @@ class OracleMultiUserQueryScenario ) case FetchByQuery => currQueryScn(numQueries / numReaders, randomCurrency).inject( - nothingFor(2.seconds), - atOnceUsers(numReaders), + atOnceUsers(numReaders) ) + case PopulateAndFetch => + writeScn + .inject(atOnceUsers(numWriters)) + .andThen( + fetchByKeyScn(numQueries / numReaders) + .inject( + nothingFor(2.seconds), + atOnceUsers(numReaders), + ) + .andThen( + currQueryScn(numQueries / numReaders, randomCurrency) + .inject( + nothingFor(2.seconds), + atOnceUsers(numReaders), + ) + ) + ) } }