2019-04-09 19:59:37 +03:00
#!/usr/bin/env bash
2020-03-27 03:26:10 +03:00
# Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
2019-04-09 19:59:37 +03:00
# SPDX-License-Identifier: Apache-2.0
# Agent startup script
set -euo pipefail
2019-04-11 18:11:14 +03:00
## Hardening
2019-04-09 19:59:37 +03:00
# Commit harakiri on failure
trap "shutdown -h now" EXIT
2019-04-11 18:11:14 +03:00
# replace the default nameserver to not use the metadata server
echo "nameserver 8.8.8.8" > /etc/resolv.conf
2019-04-09 19:59:37 +03:00
# delete self
rm -vf " $0 "
2019-04-11 18:11:14 +03:00
## Install system dependencies
apt-get update -q
2019-04-09 19:59:37 +03:00
apt-get install -qy \
curl sudo \
bzip2 rsync \
2019-04-11 18:11:14 +03:00
jq liblttng-ust0 libcurl3 libkrb5-3 libicu55 zlib1g \
2019-04-09 19:59:37 +03:00
git \
2019-06-09 01:31:55 +03:00
netcat \
apt-transport-https \
software-properties-common
2019-04-09 19:59:37 +03:00
2020-04-17 02:32:25 +03:00
# Install dependencies for Chrome (to run Puppeteer tests on the gsg)
# list taken from: https://github.com/puppeteer/puppeteer/blob/a3d1536a6b6e282a43521bea28aef027a7133df8/docs/troubleshooting.md#chrome-headless-doesnt-launch-on-unix
# see https://github.com/digital-asset/daml/pull/5540 for context
apt-get install -qy \
gconf-service \
libasound2 \
libatk1.0-0 \
libatk-bridge2.0-0 \
libc6 \
libcairo2 \
libcups2 \
libdbus-1-3 \
libexpat1 \
libfontconfig1 \
libgcc1 \
libgconf-2-4 \
libgdk-pixbuf2.0-0 \
libglib2.0-0 \
libgtk-3-0 \
libnspr4 \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libstdc++6 \
libx11-6 \
libx11-xcb1 \
libxcb1 \
libxcomposite1 \
libxcursor1 \
libxdamage1 \
libxext6 \
libxfixes3 \
libxi6 \
libxrandr2 \
libxrender1 \
libxss1 \
libxtst6 \
ca-certificates \
fonts-liberation \
libappindicator1 \
libnss3 \
lsb-release \
xdg-utils \
wget
2019-05-15 21:33:01 +03:00
curl -sSL https://dl.google.com/cloudagents/install-logging-agent.sh | bash
2019-05-05 01:55:52 +03:00
2019-06-09 01:31:55 +03:00
#install docker
DOCKER_VERSION = " 5:18.09.5~3-0~ubuntu- $( lsb_release -cs) "
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
apt-key fingerprint 0EBFCD88
add-apt-repository " deb [arch=amd64] https://download.docker.com/linux/ubuntu $( lsb_release -cs) stable "
apt-get update
apt-get install -qy docker-ce= $DOCKER_VERSION docker-ce-cli= $DOCKER_VERSION containerd.io
#Start docker daemon
systemctl enable docker
2019-04-11 18:11:14 +03:00
## Install the VSTS agent
2019-04-09 19:59:37 +03:00
groupadd --gid 3000 vsts
useradd \
--create-home \
--gid 3000 \
--shell /bin/bash \
--uid 3000 \
vsts
2019-06-09 01:31:55 +03:00
#add docker group to user
usermod -aG docker vsts
2019-04-09 19:59:37 +03:00
su --login vsts <<'AGENT_SETUP'
set -euo pipefail
VSTS_ACCOUNT = ${ vsts_account }
VSTS_POOL = ${ vsts_pool }
VSTS_TOKEN = ${ vsts_token }
mkdir -p ~/agent
cd ~/agent
add default machine capability (#5912)
add default machine capability
We semi-regularly need to do work that has the potential to disrupt a
machine's local cache, rendering it broken for other streams of work.
This can include upgrading nix, upgrading Bazel, debugging caching
issues, or anything related to Windows.
Right now we do not have any good solution for these situations. We can
either not do those streams of work, or we can proceed with them and
just accept that all other builds may get affected depending on which
machine they get assigned to. Debugging broken nodes is particularly
tricky as we do not have any way to force a build to run on a given
node.
This PR aims at providing a better alternative by (ab)using an Azure
Pipelines feature called
[capabilities](https://docs.microsoft.com/en-us/azure/devops/pipelines/agents/agents?view=azure-devops&tabs=browser#capabilities).
The idea behind capabilities is that you assign a set of tags to a
machine, and then a job can express its
[demands](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/demands?view=azure-devops&tabs=yaml),
i.e. specify a set of tags machines need to have in order to run it.
Support for this is fairly badly documented. We can gather from the
documentation that a job can specify two things about a capability
(through its `demands`): that a given tag exists, and that a given tag
has an exact specified value. In particular, a job cannot specify that a
capability should _not_ be present, meaning we cannot rely on, say,
adding a "broken" tag to broken machines.
Documentation on how to set capabilities for an agent is basically
nonexistent, but [looking at the
code](https://github.com/microsoft/azure-pipelines-agent/blob/master/src/Microsoft.VisualStudio.Services.Agent/Capabilities/UserCapabilitiesProvider.cs)
indicates that they can be set by using a simple `key=value`-formatted
text file, provided we can find the right place to put this file.
This PR adds this file to our Linux, macOS and Windows node init scripts
to define an `assignment` capability and adds a demand for a `default`
value on each job. From then on, when we hit a case where we want a PR
to run on a specific node, and to prevent other PRs from running on that
node, we can manually override the capability from the Azure UI and
update the demand in the relevant YAML file in the PR.
CHANGELOG_BEGIN
CHANGELOG_END
2020-05-09 19:21:42 +03:00
echo 'assignment=default' > .capabilities
2019-04-09 19:59:37 +03:00
echo Determining matching VSTS agent...
VSTS_AGENT_RESPONSE = $( curl -sSfL \
-u " user: $VSTS_TOKEN " \
-H 'Accept:application/json;api-version=3.0-preview' \
" https:// $VSTS_ACCOUNT .visualstudio.com/_apis/distributedtask/packages/agent?platform=linux-x64 " )
VSTS_AGENT_URL = $( echo " $VSTS_AGENT_RESPONSE " \
| jq -r '.value | map([.version.major,.version.minor,.version.patch,.downloadUrl]) | sort | .[length-1] | .[3]' )
if [ -z " $VSTS_AGENT_URL " -o " $VSTS_AGENT_URL " = = "null" ] ; then
echo 1>& 2 error: could not determine a matching VSTS agent - check that account \' $VSTS_ACCOUNT \' is correct and the token is valid for that account
exit 1
fi
echo Downloading and installing VSTS agent...
curl -sSfL " $VSTS_AGENT_URL " | tar -xz --no-same-owner
set +u
source ./env.sh
set -u
./config.sh \
--acceptTeeEula \
--agent " $( hostname) " \
--auth PAT \
--pool " $VSTS_POOL " \
--replace \
--token " $VSTS_TOKEN " \
--unattended \
--url " https:// $VSTS_ACCOUNT .visualstudio.com "
AGENT_SETUP
2019-04-11 18:11:14 +03:00
## Hardening
chown --recursive root:root /home/vsts/agent/{ *.sh,bin,externals}
2019-04-09 19:59:37 +03:00
## Install Nix
# This needs to run inside of a user with sudo access
echo "vsts ALL=(ALL:ALL) NOPASSWD:ALL" > /etc/sudoers.d/nix_installation
2020-06-18 15:08:21 +03:00
su --command "sh <(curl -sSfL https://nixos.org/nix/install) --daemon" --login vsts
2019-04-09 19:59:37 +03:00
rm /etc/sudoers.d/nix_installation
2019-08-12 12:42:41 +03:00
# Note: the "hydra.da-int.net" string is now part of the name of the key for
# legacy reasons; it bears no relation to the DNS hostname of the current
# cache.
2019-04-09 19:59:37 +03:00
cat <<NIX_CON F > /etc/nix/nix.conf
binary-cache-public-keys = hydra.da-int.net-1:6Oy2+KYvI7xkAOg0gJisD7Nz/6m8CmyKMbWfSKUe03g= cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY= hydra.nixos.org-1:CNHJZBh9K4tP3EKF6FkkgeVYsS3ohTl+oS0Qa8bezVs=
binary-caches = https://nix-cache.da-ext.net https://cache.nixos.org
build-users-group = nixbld
cores = 1
max-jobs = 0
sandbox = relaxed
NIX_CONF
systemctl restart nix-daemon
2019-04-12 17:47:36 +03:00
# Warm up local caches by building dev-env and current daml master
# This is allowed to fail, as we still want to have CI machines
# around, even when their caches are only warmed up halfway
su --login vsts <<'CAC HE_WARMUP'
# user-wide bazel disk cache override
2019-04-16 12:35:46 +03:00
echo "build:linux --disk_cache=~/.bazel-cache" > ~/.bazelrc
# clone and build
(
git clone https://github.com/digital-asset/daml
cd daml
./ci/dev-env-install.sh
./build.sh " _ $( uname) "
) || true
2019-04-12 17:47:36 +03:00
CACHE_WARMUP
# Remove /home/vsts/daml folder that might be present from cache warmup
rm -R /home/vsts/daml || true
2019-04-09 19:59:37 +03:00
## Finish
2019-04-11 18:11:14 +03:00
# run the fake local webserver, taken from the docker image
2019-04-09 19:59:37 +03:00
web-server( ) {
while true; do
printf 'HTTP/1.1 302 Found\r\nLocation: https://%s.visualstudio.com/_admin/_AgentPool\r\n\r\n' " ${ vsts_account } " | nc -l -p 80 -q 0 > /dev/null
done
}
web-server &
# Start the VSTS agent
su --login --command "cd /home/vsts/agent && exec ./run.sh" - vsts