From d6dd03751f101182f8b16565f0726d12a394aabf Mon Sep 17 00:00:00 2001 From: Gary Verhaegen Date: Fri, 17 Jun 2022 17:56:51 +0200 Subject: [PATCH] cron: faster check_releases (hopefully) (#14209) The check_releases job has been a major player in the flakiness of the daily test lately, simply by _timing out_ despite its 6h limit. There are smarter, more "permanent" fixes we could implement here, but as a quick stopgap measure I wanted to try out how much faster we would go if we didn't need to reestablish a GCloud identity for each file. CHANGELOG_BEGIN CHANGELOG_END run-full-compat: true --- ci/bash-lib.yml | 27 +++++++----------- ci/cron/daily-compat.yml | 3 +- ci/cron/src/CheckReleases.hs | 55 ++++++++++++++---------------------- ci/cron/src/Main.hs | 9 ++---- 4 files changed, 35 insertions(+), 59 deletions(-) diff --git a/ci/bash-lib.yml b/ci/bash-lib.yml index 1034b8ec93..8c9eef7e01 100644 --- a/ci/bash-lib.yml +++ b/ci/bash-lib.yml @@ -71,31 +71,24 @@ steps: jq -n --arg message "$message" '{"text": $message}' \ | curl -XPOST -i -H 'Content-Type: application/json' -d @- $channel } - gcs() { - local args cleanup cmd cred key restore_trap ret - ret=1 - + wrap_gcloud() ( cred="$1" cmd="$2" - args=(${@:3}) - key=$(mktemp) - # There may already be a trap; this will save it - restore_trap=$(trap -p EXIT) config_dir=$(mktemp -d) - cleanup="rm -rf $key $config_dir" - trap "$cleanup; $restore_trap" EXIT + trap "rm -rf $key $config_dir" EXIT echo "$cred" > $key export CLOUDSDK_CONFIG="$config_dir" + export BOTO_CONFIG=/dev/null gcloud auth activate-service-account --key-file=$key + eval "$cmd" + ) + gcs() ( + cred="$1" + cmd="${@:2}" - BOTO_CONFIG=/dev/null gsutil $cmd "${args[@]}" - ret=$? - eval "$cleanup" - trap - EXIT - eval "$restore_trap" - return $ret - } + wrap_gcloud "$cred" "gsutil $cmd" + ) gpg_verify() { local key gpg_dir signature_file res signature_file=$1 diff --git a/ci/cron/daily-compat.yml b/ci/cron/daily-compat.yml index 19f5b3024b..08131316cd 100644 --- a/ci/cron/daily-compat.yml +++ b/ci/cron/daily-compat.yml @@ -334,9 +334,10 @@ jobs: - bash: | set -euo pipefail eval "$(dev-env/bin/dade assist)" + source $(bash_lib) bazel build //ci/cron:cron - bazel-bin/ci/cron/cron check --bash-lib $(bash_lib) --gcp-creds "$GCRED" + wrap_gcloud "$GCRED" "bazel-bin/ci/cron/cron check --bash-lib $(bash_lib)" displayName: check releases env: GCRED: $(GOOGLE_APPLICATION_CREDENTIALS_CONTENT) diff --git a/ci/cron/src/CheckReleases.hs b/ci/cron/src/CheckReleases.hs index d410193b78..f85a55294c 100644 --- a/ci/cron/src/CheckReleases.hs +++ b/ci/cron/src/CheckReleases.hs @@ -9,7 +9,6 @@ import qualified Control.Concurrent.Async import qualified Control.Concurrent.QSem import Control.Exception.Safe import qualified Control.Monad as Control -import qualified Control.Monad.Extra import Control.Retry import Data.Conduit (runConduit, (.|)) import Data.Conduit.Combinators (sinkHandle) @@ -82,16 +81,11 @@ verify_signatures bash_lib tmp version_tag = do "done", "'"] -does_backup_exist :: String -> FilePath -> FilePath -> IO Bool -does_backup_exist gcp_credentials bash_lib path = do +does_backup_exist :: FilePath -> IO Bool +does_backup_exist path = do out <- shell $ unlines ["bash -c '", "set -euo pipefail", - "source \"" <> bash_lib <> "\"", - "GCRED=$(cat < path <> "\" >/dev/null; then", + "if gsutil ls \"" <> path <> "\" >/dev/null; then", "echo True", "else", "echo False", @@ -99,16 +93,11 @@ does_backup_exist gcp_credentials bash_lib path = do "'"] return $ read out -gcs_cp :: String -> FilePath -> FilePath -> FilePath -> IO () -gcs_cp gcp_credentials bash_lib local_path remote_path = do +gcs_cp :: FilePath -> FilePath -> IO () +gcs_cp from to = do shell_ $ unlines ["bash -c '", "set -euo pipefail", - "source \"" <> bash_lib <> "\"", - "GCRED=$(cat < local_path <> "\" \"" <> remote_path <> "\"", + "gsutil cp \"" <> from <> "\" \"" <> to <> "\" &>/dev/null", "'"] check_files_match :: String -> String -> IO Bool @@ -119,8 +108,8 @@ check_files_match f1 f2 = do Exit.ExitFailure 1 -> return False Exit.ExitFailure _ -> fail $ "Diff failed.\n" ++ "STDOUT:\n" ++ stdout ++ "\nSTDERR:\n" ++ stderr -check_releases :: Maybe String -> String -> Maybe Int -> IO () -check_releases gcp_credentials bash_lib max_releases = do +check_releases :: String -> Maybe Int -> IO () +check_releases bash_lib max_releases = do releases' <- fetch_gh_paginated "https://api.github.com/repos/digital-asset/daml/releases" let releases = case max_releases of Nothing -> releases' @@ -131,21 +120,19 @@ check_releases gcp_credentials bash_lib max_releases = do IO.withTempDir $ \temp_dir -> do download_assets temp_dir release verify_signatures bash_lib temp_dir v - Control.Monad.Extra.whenJust gcp_credentials $ \gcred -> do - files <- Directory.listDirectory temp_dir - Control.Concurrent.Async.forConcurrently_ files $ \f -> do - let local_github = temp_dir f - let local_gcp = temp_dir f <> ".gcp" - let remote_gcp = "gs://daml-data/releases/" <> v <> "/github/" <> f - exists <- does_backup_exist gcred bash_lib remote_gcp - if exists then do - gcs_cp gcred bash_lib remote_gcp local_gcp - check_files_match local_github local_gcp >>= \case - True -> putStrLn $ f <> " matches GCS backup." - False -> fail $ f <> " does not match GCS backup." - else do - fail $ remote_gcp <> " does not exist. Aborting.") + files <- Directory.listDirectory temp_dir + Control.Concurrent.Async.forConcurrently_ files $ \f -> do + let local_github = temp_dir f + let local_gcp = temp_dir f <> ".gcp" + let remote_gcp = "gs://daml-data/releases/" <> v <> "/github/" <> f + exists <- does_backup_exist remote_gcp + if exists then do + gcs_cp remote_gcp local_gcp + check_files_match local_github local_gcp >>= \case + True -> putStrLn $ f <> " matches GCS backup." + False -> fail $ f <> " does not match GCS backup." + else do + fail $ remote_gcp <> " does not exist. Aborting.") where -- Retry for 10 minutes total, delay of 1s retryPolicy = limitRetriesByCumulativeDelay (10 * 60 * 1000 * 1000) (constantDelay 1000_000) - diff --git a/ci/cron/src/Main.hs b/ci/cron/src/Main.hs index e05c71ea3c..b09d0dbda1 100644 --- a/ci/cron/src/Main.hs +++ b/ci/cron/src/Main.hs @@ -13,7 +13,6 @@ import qualified System.IO.Extra as IO data CliArgs = Docs | Check { bash_lib :: String, - gcp_credentials :: Maybe String, max_releases :: Maybe Int } | BazelCache BazelCache.Opts @@ -29,10 +28,6 @@ parser = info "This program is meant to be run by CI cron. You probably don't ha (Check <$> Opt.strOption (Opt.long "bash-lib" <> Opt.metavar "PATH" <> Opt.help "Path to Bash library file.") - <*> (Opt.optional $ - Opt.strOption (Opt.long "gcp-creds" - <> Opt.metavar "CRED_STRING" - <> Opt.help "GCP credentials as a string.")) <*> (Opt.optional $ Opt.option Opt.auto (Opt.long "max-releases" <> Opt.metavar "INT" @@ -71,6 +66,6 @@ main = do Docs -> do Docs.docs Docs.sdkDocOpts Docs.docs Docs.damlOnSqlDocOpts - Check { bash_lib, gcp_credentials, max_releases } -> - CheckReleases.check_releases gcp_credentials bash_lib max_releases + Check { bash_lib, max_releases } -> + CheckReleases.check_releases bash_lib max_releases BazelCache opts -> BazelCache.run opts