daml/infra/vsts_agent_linux_startup.sh

197 lines
5.3 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
# Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Agent startup script
set -euo pipefail
## Hardening
# Commit harakiri on failure
trap "shutdown -h now" EXIT
# replace the default nameserver to not use the metadata server
echo "nameserver 8.8.8.8" > /etc/resolv.conf
# delete self
rm -vf "$0"
## Install system dependencies
apt-get update -q
apt-get install -qy \
curl sudo \
bzip2 rsync \
jq liblttng-ust0 libcurl3 libkrb5-3 libicu55 zlib1g \
git \
netcat \
apt-transport-https \
software-properties-common
# Install dependencies for Chrome (to run Puppeteer tests on the gsg)
# list taken from: https://github.com/puppeteer/puppeteer/blob/a3d1536a6b6e282a43521bea28aef027a7133df8/docs/troubleshooting.md#chrome-headless-doesnt-launch-on-unix
# see https://github.com/digital-asset/daml/pull/5540 for context
apt-get install -qy \
gconf-service \
libasound2 \
libatk1.0-0 \
libatk-bridge2.0-0 \
libc6 \
libcairo2 \
libcups2 \
libdbus-1-3 \
libexpat1 \
libfontconfig1 \
libgcc1 \
libgconf-2-4 \
libgdk-pixbuf2.0-0 \
libglib2.0-0 \
libgtk-3-0 \
libnspr4 \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libstdc++6 \
libx11-6 \
libx11-xcb1 \
libxcb1 \
libxcomposite1 \
libxcursor1 \
libxdamage1 \
libxext6 \
libxfixes3 \
libxi6 \
libxrandr2 \
libxrender1 \
libxss1 \
libxtst6 \
ca-certificates \
fonts-liberation \
libappindicator1 \
libnss3 \
lsb-release \
xdg-utils \
wget
curl -sSL https://dl.google.com/cloudagents/install-logging-agent.sh | bash
#install docker
DOCKER_VERSION="5:18.09.5~3-0~ubuntu-$(lsb_release -cs)"
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
apt-key fingerprint 0EBFCD88
add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
apt-get update
apt-get install -qy docker-ce=$DOCKER_VERSION docker-ce-cli=$DOCKER_VERSION containerd.io
#Start docker daemon
systemctl enable docker
## Install the VSTS agent
groupadd --gid 3000 vsts
useradd \
--create-home \
--gid 3000 \
--shell /bin/bash \
--uid 3000 \
vsts
#add docker group to user
usermod -aG docker vsts
su --login vsts <<'AGENT_SETUP'
set -euo pipefail
VSTS_ACCOUNT=${vsts_account}
VSTS_POOL=${vsts_pool}
VSTS_TOKEN=${vsts_token}
mkdir -p ~/agent
cd ~/agent
add default machine capability (#5912) add default machine capability We semi-regularly need to do work that has the potential to disrupt a machine's local cache, rendering it broken for other streams of work. This can include upgrading nix, upgrading Bazel, debugging caching issues, or anything related to Windows. Right now we do not have any good solution for these situations. We can either not do those streams of work, or we can proceed with them and just accept that all other builds may get affected depending on which machine they get assigned to. Debugging broken nodes is particularly tricky as we do not have any way to force a build to run on a given node. This PR aims at providing a better alternative by (ab)using an Azure Pipelines feature called [capabilities](https://docs.microsoft.com/en-us/azure/devops/pipelines/agents/agents?view=azure-devops&tabs=browser#capabilities). The idea behind capabilities is that you assign a set of tags to a machine, and then a job can express its [demands](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/demands?view=azure-devops&tabs=yaml), i.e. specify a set of tags machines need to have in order to run it. Support for this is fairly badly documented. We can gather from the documentation that a job can specify two things about a capability (through its `demands`): that a given tag exists, and that a given tag has an exact specified value. In particular, a job cannot specify that a capability should _not_ be present, meaning we cannot rely on, say, adding a "broken" tag to broken machines. Documentation on how to set capabilities for an agent is basically nonexistent, but [looking at the code](https://github.com/microsoft/azure-pipelines-agent/blob/master/src/Microsoft.VisualStudio.Services.Agent/Capabilities/UserCapabilitiesProvider.cs) indicates that they can be set by using a simple `key=value`-formatted text file, provided we can find the right place to put this file. This PR adds this file to our Linux, macOS and Windows node init scripts to define an `assignment` capability and adds a demand for a `default` value on each job. From then on, when we hit a case where we want a PR to run on a specific node, and to prevent other PRs from running on that node, we can manually override the capability from the Azure UI and update the demand in the relevant YAML file in the PR. CHANGELOG_BEGIN CHANGELOG_END
2020-05-09 19:21:42 +03:00
echo 'assignment=default' > .capabilities
echo Determining matching VSTS agent...
VSTS_AGENT_RESPONSE=$(curl -sSfL \
-u "user:$VSTS_TOKEN" \
-H 'Accept:application/json;api-version=3.0-preview' \
"https://$VSTS_ACCOUNT.visualstudio.com/_apis/distributedtask/packages/agent?platform=linux-x64")
VSTS_AGENT_URL=$(echo "$VSTS_AGENT_RESPONSE" \
| jq -r '.value | map([.version.major,.version.minor,.version.patch,.downloadUrl]) | sort | .[length-1] | .[3]')
if [ -z "$VSTS_AGENT_URL" -o "$VSTS_AGENT_URL" == "null" ]; then
echo 1>&2 error: could not determine a matching VSTS agent - check that account \'$VSTS_ACCOUNT\' is correct and the token is valid for that account
exit 1
fi
echo Downloading and installing VSTS agent...
curl -sSfL "$VSTS_AGENT_URL" | tar -xz --no-same-owner
set +u
source ./env.sh
set -u
./config.sh \
--acceptTeeEula \
--agent "$(hostname)" \
--auth PAT \
--pool "$VSTS_POOL" \
--replace \
--token "$VSTS_TOKEN" \
--unattended \
--url "https://$VSTS_ACCOUNT.visualstudio.com"
AGENT_SETUP
## Hardening
chown --recursive root:root /home/vsts/agent/{*.sh,bin,externals}
## Install Nix
# This needs to run inside of a user with sudo access
echo "vsts ALL=(ALL:ALL) NOPASSWD:ALL" > /etc/sudoers.d/nix_installation
su --command "sh <(curl -sSfL https://nixos.org/nix/install) --daemon" --login vsts
rm /etc/sudoers.d/nix_installation
# Note: the "hydra.da-int.net" string is now part of the name of the key for
# legacy reasons; it bears no relation to the DNS hostname of the current
# cache.
cat <<NIX_CONF > /etc/nix/nix.conf
binary-cache-public-keys = hydra.da-int.net-1:6Oy2+KYvI7xkAOg0gJisD7Nz/6m8CmyKMbWfSKUe03g= cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY= hydra.nixos.org-1:CNHJZBh9K4tP3EKF6FkkgeVYsS3ohTl+oS0Qa8bezVs=
binary-caches = https://nix-cache.da-ext.net https://cache.nixos.org
build-users-group = nixbld
cores = 1
max-jobs = 0
sandbox = relaxed
NIX_CONF
systemctl restart nix-daemon
# Warm up local caches by building dev-env and current daml master
# This is allowed to fail, as we still want to have CI machines
# around, even when their caches are only warmed up halfway
su --login vsts <<'CACHE_WARMUP'
# user-wide bazel disk cache override
echo "build:linux --disk_cache=~/.bazel-cache" > ~/.bazelrc
# clone and build
(
git clone https://github.com/digital-asset/daml
cd daml
./ci/dev-env-install.sh
./build.sh "_$(uname)"
) || true
CACHE_WARMUP
# Remove /home/vsts/daml folder that might be present from cache warmup
rm -R /home/vsts/daml || true
## Finish
# run the fake local webserver, taken from the docker image
web-server() {
while true; do
printf 'HTTP/1.1 302 Found\r\nLocation: https://%s.visualstudio.com/_admin/_AgentPool\r\n\r\n' "${vsts_account}" | nc -l -p 80 -q 0 > /dev/null
done
}
web-server &
# Start the VSTS agent
su --login --command "cd /home/vsts/agent && exec ./run.sh" - vsts