From f8c247cadf60b2c5b74a96d96ba0238bc947dc65 Mon Sep 17 00:00:00 2001 From: Gary Verhaegen Date: Fri, 3 Jan 2020 14:43:22 +0100 Subject: [PATCH] partial fix for docs cron (#3941) This commit aims at mitigating two issues we have noticed with the 0.13.41 release: 1. The initial cron run for that release got interrupted at the 50 minutes mark, which happened to be right in the middle of the s3 upload. This means it had already changed the versions.json file, but had not finished updating the actual html files. Right now, the docs.daml.com website shows version 0.13.41 in the drop-down, but actually displays the content for 0.13.40. Additionally, trying to explicitly visit the website for 0.13.41 (https://docs.daml.com/0.13.41) yields a 404. Note that this also means the cron job did not reach the "tell HubSpot" point, so 0.13.41 did not get announced. 2. As the script also did not reach the "clear cache" step, subsequent runs have been rebuilding the documentation for no reason as the sequence of steps was: check versions.json through HTTP, get cached one, see it's not up-to-date, build docs, check versions.json through s3 API, bypassing the cache, see it's up-to-date, stop. To address those issues, this PR changes the cron to: 1. Increase the timeout to 2h instead of 50 minutes. 2. Always check the versions.json file through s3, rather than go through the HTTP cache first. These are not complete solutions but I'm not sure how to do better given that s3 does not have atomic operations. --- azure-cron.yml | 2 +- ci/cron/src/Main.hs | 48 +++++++++++++++++++++++---------------------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/azure-cron.yml b/azure-cron.yml index 2e7b709425..b092e32384 100644 --- a/azure-cron.yml +++ b/azure-cron.yml @@ -26,7 +26,7 @@ trigger: none jobs: - job: docs - timeoutInMinutes: 50 + timeoutInMinutes: 120 pool: name: 'linux-pool' steps: diff --git a/ci/cron/src/Main.hs b/ci/cron/src/Main.hs index ef65f70b7d..2812671108 100644 --- a/ci/cron/src/Main.hs +++ b/ci/cron/src/Main.hs @@ -102,19 +102,12 @@ http_post url headers body = do 2 -> return $ HTTP.responseBody response _ -> Exit.die $ "POST " <> url <> " failed with " <> show status <> "." -github_versions :: [GitHubVersion] -> Set.Set String -github_versions vs = Set.fromList $ map name vs - -remove_prereleases :: [GitHubVersion] -> [GitHubVersion] -remove_prereleases = filter (\v -> not (prerelease v)) - -docs_versions :: H.HashMap String String -> Set.Set String -docs_versions json = - Set.fromList $ H.keys json - newtype Version = Version (Int, Int, Int) deriving (Eq, Ord) +instance Show Version where + show (Version (a, b, c)) = show a <> "." <> show b <> "." <> show c + to_v :: String -> Version to_v s = case map read $ Split.splitOn "." s of [major, minor, patch] -> Version (major, minor, patch) @@ -149,13 +142,15 @@ build_docs_folder path versions = do shell_ $ "tar xzf bazel-bin/docs/html.tar.gz --strip-components=1 -C" <> path <> "/" <> version shell_ $ "git checkout " <> cur_sha -check_s3_versions :: Set.Set String -> IO Bool -check_s3_versions gh_versions = do +fetch_s3_versions :: IO (Set.Set Version) +fetch_s3_versions = do temp <- shell "mktemp" shell_ $ "aws s3 cp s3://docs-daml-com/versions.json " <> temp s3_raw <- shell $ "cat " <> temp - case JSON.decode $ LBS.fromString s3_raw of - Just s3_json -> return $ docs_versions s3_json == gh_versions + let type_annotated_value :: Maybe JSON.Object + type_annotated_value = JSON.decode $ LBS.fromString s3_raw + case type_annotated_value of + Just s3_json -> return $ Set.fromList $ map (to_v . Text.unpack) $ H.keys s3_json Nothing -> Exit.die "Failed to get versions from s3" push_to_s3 :: String -> IO () @@ -240,31 +235,38 @@ instance JSON.FromJSON GitHubVersion where name :: GitHubVersion -> String name gh = tail $ tag_name gh +fetch_gh_versions :: IO (Set.Set Version, GitHubVersion) +fetch_gh_versions = do + resp <- http_get "https://api.github.com/repos/digital-asset/daml/releases" + let releases = filter (not . prerelease) resp + let versions = Set.fromList $ map (to_v . name) releases + let latest = List.maximumOn (to_v . name) resp + return (versions, latest) + main :: IO () main = do robustly_download_nix_packages putStrLn "Checking for new version..." - gh_resp <- remove_prereleases <$> http_get "https://api.github.com/repos/digital-asset/daml/releases" - docs_resp <- docs_versions <$> http_get "https://docs.daml.com/versions.json" - if github_versions gh_resp == docs_resp + (gh_versions, gh_latest) <- fetch_gh_versions + s3_versions_before <- fetch_s3_versions + if s3_versions_before == gh_versions then do putStrLn "No new version found, skipping." Exit.exitSuccess else do Temp.withTempDir $ \docs_folder -> do putStrLn "Building docs listing" - build_docs_folder docs_folder $ List.sortOn (Ord.Down . to_v) $ map name gh_resp + build_docs_folder docs_folder $ map show $ List.sortOn Ord.Down $ Set.toList gh_versions putStrLn "Done building docs bundle. Checking versions again to avoid race condition..." - s3_matches <- check_s3_versions (github_versions gh_resp) - if s3_matches + s3_versions_after <- fetch_s3_versions + if s3_versions_after == gh_versions then do putStrLn "No more new version, another process must have pushed already." Exit.exitSuccess else do push_to_s3 docs_folder - let gh_latest = List.maximumOn (to_v . name) gh_resp - let docs_latest = List.maximumOn to_v $ Set.toList docs_resp - if to_v (name gh_latest) > to_v docs_latest + let prev_latest = List.maximum $ Set.toList s3_versions_before + if to_v (name gh_latest) > prev_latest then do putStrLn "New version detected, telling HubSpot" tell_hubspot gh_latest