document how to kill nodes (#7782)

CHANGELOG_BEGIN
CHANGELOG_END
This commit is contained in:
Gary Verhaegen 2020-10-22 15:44:48 +02:00 committed by GitHub
parent 51a97d44e4
commit e4638d9004
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -36,3 +36,49 @@ via `gcloud` by running:
```bash
gcloud auth application-default login --account your.name@gcloud-domain.com
```
## Resetting build nodes
Permissions to reset build nodes are defined in `periodic-killer.tf` using
the `killCiNodes` role. CI nodes are managed so killed nodes will be
immediately replaced by a new one with the exact same configuration (but
starting its initialization from scratch); we can therefore see killing a
node and resetting a node as the same operation.
Nodes can be listed with
```
gcloud compute instances list --project=da-dev-gcp-daml-language
```
and individual nodes can be killed with
```
gcloud compute instances --project=da-dev-gcp-daml-language delete --zone=us-east4-a vsts-agent-linux-dhw4
```
where zone and name have to match.
As a reference, here are a couple `zsh` functions I have added to my shell to
make my life easier:
```zsh
refresh_machines() {
machines=$(gcloud compute instances list --format=json --project=da-dev-gcp-daml-language | jq -c '[.[] | select (.name | startswith("vsts-")) | {key: .name, value: .zone | sub (".*/"; "")}] | from_entries')
}
kill_machine() {
if [ -z "$machines" ]; then
refresh_machines
fi
for machine in $@; do
gcloud -q compute instances --project=da-dev-gcp-daml-language delete --zone=$(echo $machines | jq -r ".[\"$machine\"]") $machine
done
}
_kill_machine() {
local machine_names
if [ -z "$machines" ]; then
refresh_machines
fi
machine_names=$(echo $machines | jq -r "keys - $(echo -n $words | jq -sRc 'split(" ")') | .[]")
_arguments "*: :($machine_names)"
}
compdef _kill_machine kill_machine
```