simplify docs cron (#6817)

simplify docs cron

This commit changes the "live state" to be that all versions are there
on S3, most of them hidden the way snapshots currently are, and only
displays in the drop-down the list of "supported" versions, i.e. stable
and >= 1.0.0.

The docs cron will now:

- Get list of versions from GitHub (as it does now)
- Get list of versions from S3 (as it does now: versions.json +
  snapshots.json, though it assumes we'll have a follow-up PR to change
  the latter to hidden.json)
- Compare; if the sets of versions are the same, stop there. (Note: this
  "set of versions" here includes the notion of which versions are shown,
  not just which ones exist. See the Versions data type in the code.)
- If there is a new hidden version, just build that, push it, change
  nothing else. No need to download any of the existing versions or mess
  around with anything else (except updating `hidden.json`, otherwise
  we're going to be doing this way too often.)
- If there is a new visible version:
  - check if we have it locally (i.e. from the previous step: it's a
    version we just added)
  - figure out the old and new default versions, and then apply the diff
    to the top-level directory. Basically download the two folders, list
    files that exist in the old one and not in the new one, delete those
    from S3, then push the new one to the top-level on S3.
- update versions.json & hidden.json (and for now snapshots.json)

This means that:

- we never mess with the existing versions; we don't need to download
  them, we don't need to change them, we don't clean them up. Old links
  keep working forever.
- The running time for the docs cron is roughly constant, in that it
  should very rarely have to either build or upload (or download) more
  than 2 versions per run, and if those instances happen they'd be
  accidents (we made 3 actual releases in an hour), not build-up over
  time.
This commit is contained in:
Gary Verhaegen 2020-07-24 14:40:32 +02:00 committed by GitHub
parent 3991286b82
commit 818a52b094
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 148 additions and 223 deletions

View File

@ -19,6 +19,7 @@ da_haskell_binary(
"http-types",
"process",
"regex-tdfa",
"semver",
"split",
"text",
"unordered-containers",

View File

@ -15,10 +15,10 @@ import qualified Data.CaseInsensitive as CI
import qualified Data.Foldable
import qualified Data.HashMap.Strict as H
import qualified Data.List as List
import qualified Data.List.Extra as List
import qualified Data.List.Split as Split
import qualified Data.Maybe as Maybe
import qualified Data.Ord
import qualified Data.SemVer
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Data.Traversable as Traversable
@ -61,13 +61,11 @@ shell_ :: String -> IO ()
shell_ cmd = do
Control.void $ shell cmd
robustly_download_nix_packages :: String -> IO ()
robustly_download_nix_packages v = do
robustly_download_nix_packages :: IO ()
robustly_download_nix_packages = do
h (10 :: Integer)
where
cmd = if to_v v < to_v "0.13.55"
then "nix-build nix -A tools -A cached"
else "nix-build nix -A tools -A ci-cached"
cmd = "nix-build nix -A tools -A ci-cached"
h n = do
(exit, out, err) <- shell_exit_code cmd
case (exit, n) of
@ -90,168 +88,96 @@ http_get url = do
_ -> Exit.die $ unlines ["GET \"" <> url <> "\" returned status code " <> show status <> ".",
show $ HTTP.responseBody response]
newtype Version = Version (Int, Int, Int, Maybe String)
deriving (Eq, Ord)
instance Show Version where
show (Version (a, b, c, q)) = show a <> "." <> show b <> "." <> show c <> Maybe.maybe "" (\qual -> "-" <> qual) q
to_v :: String -> Version
to_v s = case Split.splitOn "-" s of
[prefix, qualifier] -> let (major, minor, patch) = parse_stable prefix
in Version (major, minor, patch, Just qualifier)
[stable] -> let (major, minor, patch) = parse_stable stable
in Version (major, minor, patch, Nothing)
_ -> error $ "Invalid data, needs manual repair. Got this for a version string: " <> s
where parse_stable s = case map read $ Split.splitOn "." s of
[major, minor, patch] -> (major, minor, patch)
_ -> error $ "Invalid data, needs manual repair. Got this for a version string: " <> s
build_docs_folder :: String -> [GitHubVersion] -> String -> IO String
build_docs_folder path versions current = do
build_and_push :: FilePath -> [Version] -> IO ()
build_and_push temp versions = do
restore_sha $ do
let old = path </> "old"
let new = path </> "new"
shell_ $ "mkdir -p " <> new
shell_ $ "mkdir -p " <> old
download_existing_site_from_s3 old
documented_versions <- Maybe.catMaybes <$> Traversable.for versions (\gh_version -> do
let version = name gh_version
putStrLn $ "Building " <> version <> "..."
putStrLn " Checking for existing folder..."
old_version_exists <- exists $ old </> version
if to_v version < to_v "0.13.36"
then do
-- Maven has stopped accepting http requests and now requires
-- https. We have a patch for 0.13.36 and above, which has been
-- merged between 0.13.43 and 0.13.44.
if old_version_exists
then do
putStrLn " Found. Too old to rebuild, copying over..."
copy (old </> version) $ new </> version
return $ Just (gh_version, False)
else do
putStrLn " Too old to rebuild and no existing version. Skipping."
return Nothing
else if to_v version < to_v "0.13.45"
then do
-- Versions prior to 0.13.45 do have a checksum file, and
-- should be buildable with the Maven cherry-pick (see `build`
-- function below), but their build is not reproducible
-- (includes date of build) and therefore the checksum file is
-- useless
if old_version_exists
then do
putStrLn " Found. No reliable checksum; copying over and hoping for the best..."
copy (old </> version) $ new </> version
return $ Just (gh_version, False)
else do
putStrLn " Not found. Building..."
build version new
return $ Just (gh_version, True)
else if old_version_exists
then do
-- Note: this checks for upload errors; this is NOT in any way
-- a protection against tampering at the s3 level as we get the
-- checksums from the s3 bucket.
putStrLn " Found. Checking integrity..."
checksums_match <- checksums $ old </> version
if checksums_match
then do
putStrLn " Checks, reusing existing."
copy (old </> version) $ new </> version
return $ Just (gh_version, False)
else do
putStrLn " Check failed. Rebuilding..."
build version new
return $ Just (gh_version, True)
else do
putStrLn " Not found. Building..."
build version new
return $ Just (gh_version, True))
putStrLn $ "Copying current (" <> current <> ") to top-level..."
copy (new </> current </> "*") (new <> "/")
putStrLn "Creating versions.json..."
let (releases, snapshots) = List.partition (not . prerelease . fst) documented_versions
create_versions_json (map fst releases) (new </> "versions.json")
create_versions_json (map fst snapshots) (new </> "snapshots.json")
-- Starting after 0.13.54, we have changed the way in which we trigger
-- releases. Rather than releasing the current commit by changing the
-- VERSION file, we now mark an existing commit as the source code for
-- a release by changing the LATEST file. This raises the question of
-- the release notes: as we tag a commit from the past, and keep the
-- changelog outside of the worktree (in commit messages), that means
-- that commit cannot contain its own release notes. We have decided to
-- resolve that conundrum by always including the release notes from
-- the most recent release in all releases.
case filter snd documented_versions of
((newly_built,_):_) -> do
putStrLn $ "Copying release notes from " <> name newly_built <> " to all other versions..."
let p v = new </> name v </> "support" </> "release-notes.html"
let top_level_release_notes = new </> "support" </> "release-notes.html"
shell_ $ "cp " <> p newly_built <> " " <> top_level_release_notes
Data.Foldable.for_ documented_versions $ \(gh_version, _) -> do
shell_ $ "cp " <> top_level_release_notes <> " " <> p gh_version
_ -> do
putStrLn "No version built, so no release page copied."
return new
Data.Foldable.for_ versions (\version -> do
putStrLn $ "Building " <> show version <> "..."
build version
putStrLn $ "Pushing " <> show version <> " to S3 (as subfolder)..."
push temp $ show version
putStrLn "Done.")
where
restore_sha io =
Control.Exception.bracket (init <$> shell "git rev-parse HEAD")
(\cur_sha -> shell_ $ "git checkout " <> cur_sha)
(const io)
download_existing_site_from_s3 path = do
shell_ $ "mkdir -p " <> path
shell_ $ "aws s3 sync s3://docs-daml-com/ " <> path
exists dir = Directory.doesDirectoryExist dir
checksums path = do
let cmd = "cd " <> path <> "; sed -i '/support\\/release-notes.html/d' checksum; sha256sum -c checksum"
(code, _, _) <- shell_exit_code cmd
case code of
Exit.ExitSuccess -> return True
_ -> return False
copy from to = do
shell_ $ "cp -r " <> from <> " " <> to
build version path = do
shell_ $ "git checkout v" <> version
-- Maven does not accept http connections anymore; this patches the
-- scala rules for Bazel to use https instead. This is not needed
-- after 0.13.43.
if to_v version < to_v "0.13.44"
then do
shell_ "git -c user.name=CI -c user.email=CI@example.com cherry-pick 0c4f9d7f92c4f2f7e2a75a0d85db02e20cbb497b"
build_helper version path
else do
-- The release-triggering commit does not have a tag, so we
-- need to find it by walking through the git history of the
-- LATEST file.
sha <- find_commit_for_version version
Control.Exception.bracket_
(shell_ $ "git checkout " <> sha <> " -- docs/source/support/release-notes.rst")
(shell_ "git reset --hard")
(build_helper version path)
build_helper version path = do
robustly_download_nix_packages version
shell_ $ "DAML_SDK_RELEASE_VERSION=" <> version <> " bazel build //docs:docs"
shell_ $ "mkdir -p " <> path </> version
shell_ $ "tar xzf bazel-bin/docs/html.tar.gz --strip-components=1 -C" <> path </> version
checksums <- shell $ "cd " <> path </> version <> "; find . -type f -exec sha256sum {} \\; | grep -v 'support/release-notes.html'"
writeFile (path </> version </> "checksum") checksums
create_versions_json versions path = do
build version = do
shell_ $ "git checkout v" <> show version
-- The release-triggering commit does not have a tag, so we need to
-- find it by walking through the git history of the LATEST file.
sha <- find_commit_for_version version
Control.Exception.bracket_
(shell_ $ "git checkout " <> sha <> " -- docs/source/support/release-notes.rst")
(shell_ "git reset --hard")
(build_helper version)
build_helper version = do
robustly_download_nix_packages
shell_ $ "DAML_SDK_RELEASE_VERSION=" <> show version <> " bazel build //docs:docs"
shell_ $ "mkdir -p " <> temp </> show version
shell_ $ "tar xzf bazel-bin/docs/html.tar.gz --strip-components=1 -C" <> temp </> show version
push :: FilePath -> FilePath -> IO ()
push local remote =
shell_ $ "aws s3 cp " <> local <> " " <> "s3://docs-daml-com" </> remote <> " --recursive"
fetch_if_missing :: FilePath -> Version -> IO ()
fetch_if_missing temp v = do
missing <- not <$> Directory.doesDirectoryExist (temp </> show v)
if missing then do
putStrLn $ "Downloading " <> show v <> "..."
shell_ $ "aws s3 cp s3://docs-daml-com" </> show v <> " " <> temp </> show v <> " --recursive"
putStrLn "Done."
else do
putStrLn $ show v <> " already present."
update_s3 :: FilePath -> Versions -> IO ()
update_s3 temp vs = do
putStrLn "Updating versions.json & hidden.json..."
create_versions_json (dropdown vs) (temp </> "versions.json")
let hidden = List.sortOn Data.Ord.Down $ Set.toList $ all_versions vs `Set.difference` (Set.fromList $ dropdown vs)
create_versions_json hidden (temp </> "hidden.json")
shell_ $ "aws s3 cp " <> temp </> "versions.json s3://docs-daml-com/versions.json"
shell_ $ "aws s3 cp " <> temp </> "hidden.json s3://docs-daml-com/hidden.json"
-- FIXME: remove after running once (and updating the reading bit in this file)
shell_ $ "aws s3 cp " <> temp </> "hidden.json s3://docs-daml-com/snapshots.json"
putStrLn "Done."
where
create_versions_json versions file = do
-- Not going through Aeson because it represents JSON objects as
-- unordered maps, and here order matters.
let versions_json = versions
& map name
& List.sortOn (Data.Ord.Down . to_v)
& map show
& map (\s -> "\"" <> s <> "\": \"" <> s <> "\"")
& List.intercalate ", "
& \s -> "{" <> s <> "}"
writeFile path versions_json
writeFile file versions_json
find_commit_for_version :: String -> IO String
update_top_level :: FilePath -> Version -> Version -> IO ()
update_top_level temp new old = do
new_files <- Set.fromList <$> Directory.listDirectory (temp </> show new)
old_files <- Set.fromList <$> Directory.listDirectory (temp </> show old)
let to_delete = Set.toList $ old_files `Set.difference` new_files
Control.when (not $ null to_delete) $ do
putStrLn $ "Deleting top-level files: " <> show to_delete
Data.Foldable.for_ to_delete (\f -> do
shell_ $ "aws s3 rm s3://docs-daml-com" </> f <> " --recursive")
putStrLn "Done."
putStrLn $ "Pushing " <> show new <> " to top-level..."
shell_ $ "aws s3 cp " <> temp </> show new </> " s3://docs-daml-com/ --recursive"
putStrLn "Done."
reset_cloudfront :: IO ()
reset_cloudfront = do
putStrLn "Refreshing CloudFront cache..."
shell_ $ "aws cloudfront create-invalidation"
<> " --distribution-id E1U753I56ERH55"
<> " --paths '/*'"
find_commit_for_version :: Version -> IO String
find_commit_for_version version = do
ver_sha <- init <$> (shell $ "git rev-parse v" <> version)
let expected = ver_sha <> " " <> version
ver_sha <- init <$> (shell $ "git rev-parse v" <> show version)
let expected = ver_sha <> " " <> show version
-- git log -G 'regex' returns all the commits for which 'regex' appears in
-- the diff. To find out the commit that "released" the version. The commit
-- we want is a commit that added a single line, which matches the version
@ -265,46 +191,7 @@ find_commit_for_version version = do
_ -> Nothing)
case matching of
[sha] -> return sha
_ -> fail $ "Expected single commit to match release " <> version <> ", but instead found: " <> show matching
fetch_s3_versions :: IO (Set.Set Version, Set.Set Version)
fetch_s3_versions = do
releases <- fetch "versions.json"
snapshots <- fetch "snapshots.json"
return (releases, snapshots)
where fetch file = do
temp <- shell "mktemp"
shell_ $ "aws s3 cp s3://docs-daml-com/" <> file <> " " <> temp
s3_raw <- shell $ "cat " <> temp
let type_annotated_value :: Maybe JSON.Object
type_annotated_value = JSON.decode $ LBS.fromString s3_raw
case type_annotated_value of
Just s3_json -> return $ Set.fromList $ map (to_v . Text.unpack) $ H.keys s3_json
Nothing -> Exit.die "Failed to get versions from s3"
push_to_s3 :: String -> IO ()
push_to_s3 doc_folder = do
putStrLn "Pushing new versions file first..."
shell_ $ "aws s3 cp " <> doc_folder </> "versions.json s3://docs-daml-com/versions.json --acl public-read"
putStrLn "Pushing to S3 bucket..."
shell_ $ "aws s3 sync " <> doc_folder
<> " s3://docs-daml-com/"
<> " --delete"
<> " --acl public-read"
putStrLn "Refreshing CloudFront cache..."
shell_ $ "aws cloudfront create-invalidation"
<> " --distribution-id E1U753I56ERH55"
<> " --paths '/*'"
data GitHubVersion = GitHubVersion { prerelease :: Bool, tag_name :: String, notes :: String } deriving Show
instance JSON.FromJSON GitHubVersion where
parseJSON = JSON.withObject "GitHubVersion" $ \v -> GitHubVersion
<$> v JSON..: Text.pack "prerelease"
<*> v JSON..: Text.pack "tag_name"
<*> v JSON..:? Text.pack "body" JSON..!= ""
name :: GitHubVersion -> String
name gh = tail $ tag_name gh
_ -> fail $ "Expected single commit to match release " <> show version <> ", but instead found: " <> show matching
fetch_gh_paginated :: JSON.FromJSON a => String -> IO [a]
fetch_gh_paginated url = do
@ -325,43 +212,80 @@ fetch_gh_paginated url = do
(_, _, _, [url, rel]) -> (rel, url)
_ -> fail $ "Assumption violated: link header entry did not match regex.\nEntry: " <> l
fetch_gh_versions :: IO ([GitHubVersion], GitHubVersion)
fetch_gh_versions = do
resp <- fetch_gh_paginated "https://api.github.com/repos/digital-asset/daml/releases"
let latest = List.maximumOn (to_v . name) $ filter (not . prerelease) resp
return (resp, latest)
data PreVersion = PreVersion { prerelease :: Bool, tag :: Version }
instance JSON.FromJSON PreVersion where
parseJSON = JSON.withObject "PreVersion" $ \v -> PreVersion
<$> v JSON..: "prerelease"
<*> let json_text = v JSON..: "tag_name"
in (version . Text.tail <$> json_text)
same_versions :: (Set.Set Version, Set.Set Version) -> [GitHubVersion] -> Bool
same_versions s3_versions gh_versions =
-- Versions 0.13.5 and earlier can no longer be built by this script, and
-- happen to not exist in the s3 repo. This means they are not generated
-- and thus do not appear in the versions.json file on s3. This means that
-- if we do not remove them from the listing here, the docs cron always
-- believes there is a change to deploy.
let gh_releases = Set.fromList $ map (to_v . name) $ filter (\v -> to_v (name v) > to_v "0.13.5") $ filter (not . prerelease) gh_versions
gh_snapshots = Set.fromList $ map (to_v . name) $ filter prerelease gh_versions
in s3_versions == (gh_releases, gh_snapshots)
data Version = Version Data.SemVer.Version
deriving (Ord, Eq)
instance Show Version where
show (Version v) = Data.SemVer.toString v
version :: Text.Text -> Version
version t = Version $ (\case Left s -> (error s); Right v -> v) $ Data.SemVer.fromText t
data Versions = Versions { top :: Version, all_versions :: Set.Set Version, dropdown :: [Version] }
deriving Eq
versions :: [PreVersion] -> Versions
versions vs =
let all_versions = Set.fromList $ map tag vs
dropdown = vs
& filter (not . prerelease)
& map tag
& filter (>= version "1.0.0")
& List.sortOn Data.Ord.Down
top = head dropdown
in Versions {..}
fetch_gh_versions :: IO Versions
fetch_gh_versions = do
response <- fetch_gh_paginated "https://api.github.com/repos/digital-asset/daml/releases"
-- versions prior to 0.13.10 cannot be built anymore and are not present in
-- the repo.
return $ versions $ filter (\v -> tag v >= version "0.13.10") response
fetch_s3_versions :: IO Versions
fetch_s3_versions = do
dropdown <- fetch "versions.json" False
-- TODO: read hidden.json after this has run once
hidden <- fetch "snapshots.json" True
return $ versions $ dropdown <> hidden
where fetch file prerelease = do
temp <- shell "mktemp"
shell_ $ "aws s3 cp s3://docs-daml-com/" <> file <> " " <> temp
s3_raw <- shell $ "cat " <> temp
let type_annotated_value :: Maybe JSON.Object
type_annotated_value = JSON.decode $ LBS.fromString s3_raw
case type_annotated_value of
Just s3_json -> return $ map (\s -> PreVersion prerelease (version s)) $ H.keys s3_json
Nothing -> Exit.die "Failed to get versions from s3"
main :: IO ()
main = do
Control.forM_ [IO.stdout, IO.stderr] $
\h -> IO.hSetBuffering h IO.LineBuffering
putStrLn "Checking for new version..."
(gh_versions, gh_latest) <- fetch_gh_versions
s3_versions_before <- fetch_s3_versions
if same_versions s3_versions_before gh_versions
gh_versions <- fetch_gh_versions
s3_versions <- fetch_s3_versions
if s3_versions == gh_versions
then do
putStrLn "No new version found, skipping."
putStrLn "Versions match, nothing to do."
Exit.exitSuccess
else do
-- We may have added versions. We need to build and push them.
let added = Set.toList $ all_versions gh_versions `Set.difference` all_versions s3_versions
IO.withTempDir $ \temp_dir -> do
putStrLn "Building docs listing"
docs_folder <- build_docs_folder temp_dir gh_versions $ name gh_latest
putStrLn "Done building docs bundle. Checking versions again to avoid race condition..."
s3_versions_after <- fetch_s3_versions
if same_versions s3_versions_after gh_versions
then do
putStrLn "No more new version, another process must have pushed already."
Exit.exitSuccess
else do
push_to_s3 docs_folder
putStrLn $ "Versions to build: " <> show added
build_and_push temp_dir added
Control.when (top gh_versions /= top s3_versions) $ do
putStrLn $ "Updating top-level version from " <> (show $ top s3_versions) <> " to " <> (show $ top gh_versions)
fetch_if_missing temp_dir (top gh_versions)
fetch_if_missing temp_dir (top s3_versions)
update_top_level temp_dir (top gh_versions) (top s3_versions)
putStrLn "Updating versions.json & hidden.json"
update_s3 temp_dir gh_versions
reset_cloudfront