mirror of
https://github.com/digital-asset/daml.git
synced 2024-11-10 00:35:25 +03:00
561c392b69
This PR duplicates the linux CI cluster. This is the first in a three-PR plan to implement #6400 safely while people are working. I usually do cluster updates over the weekend because they require shutting down the entire CI system for about two hours. This is unfortunately not practical while people are working, and timezones make it difficult for me to find a time where people are not working during the week. So instead the plan is as follows: 1. Create a duplicate of our CI cluster (this PR). 2. Wait for the new cluster to be operational (~90-120 minutes ime). 3. In the Azure Pipelines config screen, disable all the nodes of the "old" cluster, so all new jobs get assigned to the temp cluster. Wait for all jobs to finish on the old cluster. 4. Update the old cluster. Wait for it to be deployed. (Second PR.) 5. In Azure, disable temp nodes, wait for jobs to drain. 6. Delete temp nodes (third PR). Reviewing this PR is best done by verifying you can reproduce the following shell session: ``` $ diff vsts_agent_linux.tf vsts_agent_linux_temp.tf 4,7c4,5 < resource "secret_resource" "vsts-token" {} < < data "template_file" "vsts-agent-linux-startup" { < template = "${file("${path.module}/vsts_agent_linux_startup.sh")}" --- > data "template_file" "vsts-agent-linux-startup-temp" { > template = "${file("${path.module}/vsts_agent_linux_startup_temp.sh")}" 16c14 < resource "google_compute_region_instance_group_manager" "vsts-agent-linux" { --- > resource "google_compute_region_instance_group_manager" "vsts-agent-linux-temp" { 18,19c16,17 < name = "vsts-agent-linux" < base_instance_name = "vsts-agent-linux" --- > name = "vsts-agent-linux-temp" > base_instance_name = "vsts-agent-linux-temp" 24,25c22,23 < name = "vsts-agent-linux" < instance_template = "${google_compute_instance_template.vsts-agent-linux.self_link}" --- > name = "vsts-agent-linux-temp" > instance_template = "${google_compute_instance_template.vsts-agent-linux-temp.self_link}" 36,37c34,35 < resource "google_compute_instance_template" "vsts-agent-linux" { < name_prefix = "vsts-agent-linux-" --- > resource "google_compute_instance_template" "vsts-agent-linux-temp" { > name_prefix = "vsts-agent-linux-temp-" 52c50 < startup-script = "${data.template_file.vsts-agent-linux-startup.rendered}" --- > startup-script = "${data.template_file.vsts-agent-linux-startup-temp.rendered}" $ diff vsts_agent_linux_startup.sh vsts_agent_linux_startup_temp.sh 149c149 < su --command "sh <(curl https://nixos.org/nix/install) --daemon" --login vsts --- > su --command "sh <(curl -sSfL https://nixos.org/nix/install) --daemon" --login vsts $ ``` and reviewing that diff, rather than looking at the added files in their entirety. The name changes are benign and needed for Terraform to appropriately keep track of which node belongs to the old vs the temp group. The only change that matters is the new group has the `-sSfL` flag so they will actually boot up. (Hopefully.) CHANGELOG_BEGIN CHANGELOG_END
203 lines
5.5 KiB
Bash
203 lines
5.5 KiB
Bash
#!/usr/bin/env bash
|
|
# Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
# Agent startup script
|
|
set -euo pipefail
|
|
|
|
## Hardening
|
|
|
|
# Commit harakiri on failure
|
|
trap "shutdown -h now" EXIT
|
|
|
|
# replace the default nameserver to not use the metadata server
|
|
echo "nameserver 8.8.8.8" > /etc/resolv.conf
|
|
|
|
# delete self
|
|
rm -vf "$0"
|
|
|
|
## Install system dependencies
|
|
apt-get update -q
|
|
apt-get install -qy \
|
|
curl sudo \
|
|
bzip2 rsync \
|
|
jq liblttng-ust0 libcurl3 libkrb5-3 libicu55 zlib1g \
|
|
git \
|
|
netcat \
|
|
apt-transport-https \
|
|
software-properties-common
|
|
|
|
# Install dependencies for Chrome (to run Puppeteer tests on the gsg)
|
|
# list taken from: https://github.com/puppeteer/puppeteer/blob/a3d1536a6b6e282a43521bea28aef027a7133df8/docs/troubleshooting.md#chrome-headless-doesnt-launch-on-unix
|
|
# see https://github.com/digital-asset/daml/pull/5540 for context
|
|
apt-get install -qy \
|
|
gconf-service \
|
|
libasound2 \
|
|
libatk1.0-0 \
|
|
libatk-bridge2.0-0 \
|
|
libc6 \
|
|
libcairo2 \
|
|
libcups2 \
|
|
libdbus-1-3 \
|
|
libexpat1 \
|
|
libfontconfig1 \
|
|
libgcc1 \
|
|
libgconf-2-4 \
|
|
libgdk-pixbuf2.0-0 \
|
|
libglib2.0-0 \
|
|
libgtk-3-0 \
|
|
libnspr4 \
|
|
libpango-1.0-0 \
|
|
libpangocairo-1.0-0 \
|
|
libstdc++6 \
|
|
libx11-6 \
|
|
libx11-xcb1 \
|
|
libxcb1 \
|
|
libxcomposite1 \
|
|
libxcursor1 \
|
|
libxdamage1 \
|
|
libxext6 \
|
|
libxfixes3 \
|
|
libxi6 \
|
|
libxrandr2 \
|
|
libxrender1 \
|
|
libxss1 \
|
|
libxtst6 \
|
|
ca-certificates \
|
|
fonts-liberation \
|
|
libappindicator1 \
|
|
libnss3 \
|
|
lsb-release \
|
|
xdg-utils \
|
|
wget
|
|
|
|
curl -sSL https://dl.google.com/cloudagents/install-logging-agent.sh | bash
|
|
|
|
#install docker
|
|
DOCKER_VERSION="5:18.09.5~3-0~ubuntu-$(lsb_release -cs)"
|
|
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
|
|
apt-key fingerprint 0EBFCD88
|
|
add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
|
|
apt-get update
|
|
apt-get install -qy docker-ce=$DOCKER_VERSION docker-ce-cli=$DOCKER_VERSION containerd.io
|
|
|
|
#Start docker daemon
|
|
systemctl enable docker
|
|
|
|
## Install the VSTS agent
|
|
groupadd --gid 3000 vsts
|
|
useradd \
|
|
--create-home \
|
|
--gid 3000 \
|
|
--shell /bin/bash \
|
|
--uid 3000 \
|
|
vsts
|
|
#add docker group to user
|
|
usermod -aG docker vsts
|
|
|
|
su --login vsts <<'AGENT_SETUP'
|
|
set -euo pipefail
|
|
|
|
VSTS_ACCOUNT=${vsts_account}
|
|
VSTS_POOL=${vsts_pool}
|
|
VSTS_TOKEN=${vsts_token}
|
|
|
|
mkdir -p ~/agent
|
|
cd ~/agent
|
|
echo 'assignment=default' > .capabilities
|
|
|
|
echo Determining matching VSTS agent...
|
|
VSTS_AGENT_RESPONSE=$(curl -sSfL \
|
|
-u "user:$VSTS_TOKEN" \
|
|
-H 'Accept:application/json;api-version=3.0-preview' \
|
|
"https://$VSTS_ACCOUNT.visualstudio.com/_apis/distributedtask/packages/agent?platform=linux-x64")
|
|
|
|
VSTS_AGENT_URL=$(echo "$VSTS_AGENT_RESPONSE" \
|
|
| jq -r '.value | map([.version.major,.version.minor,.version.patch,.downloadUrl]) | sort | .[length-1] | .[3]')
|
|
|
|
if [ -z "$VSTS_AGENT_URL" -o "$VSTS_AGENT_URL" == "null" ]; then
|
|
echo 1>&2 error: could not determine a matching VSTS agent - check that account \'$VSTS_ACCOUNT\' is correct and the token is valid for that account
|
|
exit 1
|
|
fi
|
|
|
|
echo Downloading and installing VSTS agent...
|
|
curl -sSfL "$VSTS_AGENT_URL" | tar -xz --no-same-owner
|
|
|
|
set +u
|
|
source ./env.sh
|
|
set -u
|
|
|
|
./config.sh \
|
|
--acceptTeeEula \
|
|
--agent "$(hostname)" \
|
|
--auth PAT \
|
|
--pool "$VSTS_POOL" \
|
|
--replace \
|
|
--token "$VSTS_TOKEN" \
|
|
--unattended \
|
|
--url "https://$VSTS_ACCOUNT.visualstudio.com"
|
|
AGENT_SETUP
|
|
|
|
## Hardening
|
|
|
|
chown --recursive root:root /home/vsts/agent/{*.sh,bin,externals}
|
|
|
|
## Install Nix
|
|
|
|
# This needs to run inside of a user with sudo access
|
|
echo "vsts ALL=(ALL:ALL) NOPASSWD:ALL" > /etc/sudoers.d/nix_installation
|
|
su --command "sh <(curl -sSfL https://nixos.org/nix/install) --daemon" --login vsts
|
|
rm /etc/sudoers.d/nix_installation
|
|
|
|
# Note: the "hydra.da-int.net" string is now part of the name of the key for
|
|
# legacy reasons; it bears no relation to the DNS hostname of the current
|
|
# cache.
|
|
cat <<NIX_CONF > /etc/nix/nix.conf
|
|
binary-cache-public-keys = hydra.da-int.net-1:6Oy2+KYvI7xkAOg0gJisD7Nz/6m8CmyKMbWfSKUe03g= cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY= hydra.nixos.org-1:CNHJZBh9K4tP3EKF6FkkgeVYsS3ohTl+oS0Qa8bezVs=
|
|
binary-caches = https://nix-cache.da-ext.net https://cache.nixos.org
|
|
build-users-group = nixbld
|
|
cores = 1
|
|
max-jobs = 0
|
|
sandbox = relaxed
|
|
NIX_CONF
|
|
|
|
systemctl restart nix-daemon
|
|
|
|
# Warm up local caches by building dev-env and current daml master
|
|
# This is allowed to fail, as we still want to have CI machines
|
|
# around, even when their caches are only warmed up halfway
|
|
su --login vsts <<'CACHE_WARMUP'
|
|
# user-wide bazel disk cache override
|
|
echo "build:linux --disk_cache=~/.bazel-cache" > ~/.bazelrc
|
|
|
|
# clone and build
|
|
(
|
|
git clone https://github.com/digital-asset/daml
|
|
cd daml
|
|
./ci/dev-env-install.sh
|
|
./build.sh "_$(uname)"
|
|
) || true
|
|
CACHE_WARMUP
|
|
|
|
# Purge old agents
|
|
su --login vsts <<'PURGE_OLD_AGENTS'
|
|
cd daml && \
|
|
VSTS_ACCOUNT=${vsts_account} VSTS_POOL=${vsts_pool} VSTS_TOKEN=${vsts_token} ./ci/azure-cleanup/purge_old_agents.py || true
|
|
PURGE_OLD_AGENTS
|
|
|
|
# Remove /home/vsts/daml folder that might be present from cache warmup
|
|
rm -R /home/vsts/daml || true
|
|
|
|
## Finish
|
|
|
|
# run the fake local webserver, taken from the docker image
|
|
web-server() {
|
|
while true; do
|
|
printf 'HTTP/1.1 302 Found\r\nLocation: https://%s.visualstudio.com/_admin/_AgentPool\r\n\r\n' "${vsts_account}" | nc -l -p 80 -q 0 > /dev/null
|
|
done
|
|
}
|
|
web-server &
|
|
|
|
# Start the VSTS agent
|
|
su --login --command "cd /home/vsts/agent && exec ./run.sh" - vsts
|