cron: faster check_releases (hopefully) (#14209)

The check_releases job has been a major player in the flakiness of the
daily test lately, simply by _timing out_ despite its 6h limit.

There are smarter, more "permanent" fixes we could implement here, but
as a quick stopgap measure I wanted to try out how much faster we would
go if we didn't need to reestablish a GCloud identity for each file.

CHANGELOG_BEGIN
CHANGELOG_END

run-full-compat: true
This commit is contained in:
Gary Verhaegen 2022-06-17 17:56:51 +02:00 committed by GitHub
parent 658ae99100
commit d6dd03751f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 35 additions and 59 deletions

View File

@ -71,31 +71,24 @@ steps:
jq -n --arg message "$message" '{"text": $message}' \
| curl -XPOST -i -H 'Content-Type: application/json' -d @- $channel
}
gcs() {
local args cleanup cmd cred key restore_trap ret
ret=1
wrap_gcloud() (
cred="$1"
cmd="$2"
args=(${@:3})
key=$(mktemp)
# There may already be a trap; this will save it
restore_trap=$(trap -p EXIT)
config_dir=$(mktemp -d)
cleanup="rm -rf $key $config_dir"
trap "$cleanup; $restore_trap" EXIT
trap "rm -rf $key $config_dir" EXIT
echo "$cred" > $key
export CLOUDSDK_CONFIG="$config_dir"
export BOTO_CONFIG=/dev/null
gcloud auth activate-service-account --key-file=$key
eval "$cmd"
)
gcs() (
cred="$1"
cmd="${@:2}"
BOTO_CONFIG=/dev/null gsutil $cmd "${args[@]}"
ret=$?
eval "$cleanup"
trap - EXIT
eval "$restore_trap"
return $ret
}
wrap_gcloud "$cred" "gsutil $cmd"
)
gpg_verify() {
local key gpg_dir signature_file res
signature_file=$1

View File

@ -334,9 +334,10 @@ jobs:
- bash: |
set -euo pipefail
eval "$(dev-env/bin/dade assist)"
source $(bash_lib)
bazel build //ci/cron:cron
bazel-bin/ci/cron/cron check --bash-lib $(bash_lib) --gcp-creds "$GCRED"
wrap_gcloud "$GCRED" "bazel-bin/ci/cron/cron check --bash-lib $(bash_lib)"
displayName: check releases
env:
GCRED: $(GOOGLE_APPLICATION_CREDENTIALS_CONTENT)

View File

@ -9,7 +9,6 @@ import qualified Control.Concurrent.Async
import qualified Control.Concurrent.QSem
import Control.Exception.Safe
import qualified Control.Monad as Control
import qualified Control.Monad.Extra
import Control.Retry
import Data.Conduit (runConduit, (.|))
import Data.Conduit.Combinators (sinkHandle)
@ -82,16 +81,11 @@ verify_signatures bash_lib tmp version_tag = do
"done",
"'"]
does_backup_exist :: String -> FilePath -> FilePath -> IO Bool
does_backup_exist gcp_credentials bash_lib path = do
does_backup_exist :: FilePath -> IO Bool
does_backup_exist path = do
out <- shell $ unlines ["bash -c '",
"set -euo pipefail",
"source \"" <> bash_lib <> "\"",
"GCRED=$(cat <<END",
gcp_credentials,
"END",
")",
"if gcs \"$GCRED\" ls \"" <> path <> "\" >/dev/null; then",
"if gsutil ls \"" <> path <> "\" >/dev/null; then",
"echo True",
"else",
"echo False",
@ -99,16 +93,11 @@ does_backup_exist gcp_credentials bash_lib path = do
"'"]
return $ read out
gcs_cp :: String -> FilePath -> FilePath -> FilePath -> IO ()
gcs_cp gcp_credentials bash_lib local_path remote_path = do
gcs_cp :: FilePath -> FilePath -> IO ()
gcs_cp from to = do
shell_ $ unlines ["bash -c '",
"set -euo pipefail",
"source \"" <> bash_lib <> "\"",
"GCRED=$(cat <<END",
gcp_credentials,
"END",
")",
"gcs \"$GCRED\" cp \"" <> local_path <> "\" \"" <> remote_path <> "\"",
"gsutil cp \"" <> from <> "\" \"" <> to <> "\" &>/dev/null",
"'"]
check_files_match :: String -> String -> IO Bool
@ -119,8 +108,8 @@ check_files_match f1 f2 = do
Exit.ExitFailure 1 -> return False
Exit.ExitFailure _ -> fail $ "Diff failed.\n" ++ "STDOUT:\n" ++ stdout ++ "\nSTDERR:\n" ++ stderr
check_releases :: Maybe String -> String -> Maybe Int -> IO ()
check_releases gcp_credentials bash_lib max_releases = do
check_releases :: String -> Maybe Int -> IO ()
check_releases bash_lib max_releases = do
releases' <- fetch_gh_paginated "https://api.github.com/repos/digital-asset/daml/releases"
let releases = case max_releases of
Nothing -> releases'
@ -131,21 +120,19 @@ check_releases gcp_credentials bash_lib max_releases = do
IO.withTempDir $ \temp_dir -> do
download_assets temp_dir release
verify_signatures bash_lib temp_dir v
Control.Monad.Extra.whenJust gcp_credentials $ \gcred -> do
files <- Directory.listDirectory temp_dir
Control.Concurrent.Async.forConcurrently_ files $ \f -> do
let local_github = temp_dir </> f
let local_gcp = temp_dir </> f <> ".gcp"
let remote_gcp = "gs://daml-data/releases/" <> v <> "/github/" <> f
exists <- does_backup_exist gcred bash_lib remote_gcp
if exists then do
gcs_cp gcred bash_lib remote_gcp local_gcp
check_files_match local_github local_gcp >>= \case
True -> putStrLn $ f <> " matches GCS backup."
False -> fail $ f <> " does not match GCS backup."
else do
fail $ remote_gcp <> " does not exist. Aborting.")
files <- Directory.listDirectory temp_dir
Control.Concurrent.Async.forConcurrently_ files $ \f -> do
let local_github = temp_dir </> f
let local_gcp = temp_dir </> f <> ".gcp"
let remote_gcp = "gs://daml-data/releases/" <> v <> "/github/" <> f
exists <- does_backup_exist remote_gcp
if exists then do
gcs_cp remote_gcp local_gcp
check_files_match local_github local_gcp >>= \case
True -> putStrLn $ f <> " matches GCS backup."
False -> fail $ f <> " does not match GCS backup."
else do
fail $ remote_gcp <> " does not exist. Aborting.")
where
-- Retry for 10 minutes total, delay of 1s
retryPolicy = limitRetriesByCumulativeDelay (10 * 60 * 1000 * 1000) (constantDelay 1000_000)

View File

@ -13,7 +13,6 @@ import qualified System.IO.Extra as IO
data CliArgs = Docs
| Check { bash_lib :: String,
gcp_credentials :: Maybe String,
max_releases :: Maybe Int }
| BazelCache BazelCache.Opts
@ -29,10 +28,6 @@ parser = info "This program is meant to be run by CI cron. You probably don't ha
(Check <$> Opt.strOption (Opt.long "bash-lib"
<> Opt.metavar "PATH"
<> Opt.help "Path to Bash library file.")
<*> (Opt.optional $
Opt.strOption (Opt.long "gcp-creds"
<> Opt.metavar "CRED_STRING"
<> Opt.help "GCP credentials as a string."))
<*> (Opt.optional $
Opt.option Opt.auto (Opt.long "max-releases"
<> Opt.metavar "INT"
@ -71,6 +66,6 @@ main = do
Docs -> do
Docs.docs Docs.sdkDocOpts
Docs.docs Docs.damlOnSqlDocOpts
Check { bash_lib, gcp_credentials, max_releases } ->
CheckReleases.check_releases gcp_credentials bash_lib max_releases
Check { bash_lib, max_releases } ->
CheckReleases.check_releases bash_lib max_releases
BazelCache opts -> BazelCache.run opts