infra: remove stale GCP resources (#18082)

This commit is contained in:
Gary Verhaegen 2024-01-03 17:32:22 +01:00 committed by GitHub
parent 40d963b50e
commit faf1604308
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 0 additions and 312 deletions

View File

@ -1,101 +0,0 @@
# Copyright (c) 2023 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# This file defines a machine meant to destroy/recreate all our CI nodes every
# night.
resource "google_service_account" "periodic-killer" {
account_id = "periodic-killer"
}
resource "google_project_iam_custom_role" "periodic-killer" {
role_id = "killCiNodes"
title = "Permissions to list & kill CI nodes"
permissions = [
"compute.instances.delete",
"compute.instances.list",
"compute.zoneOperations.get",
"compute.zones.list",
]
}
locals {
accounts_that_can_kill_machines = [
# should reference google_project_iam_custom_role.periodic-killer.id or
# something, but for whatever reason that's not exposed.
"serviceAccount:${google_service_account.periodic-killer.email}",
"user:gary.verhaegen@digitalasset.com",
"user:gerolf.seitz@digitalasset.com",
]
}
resource "google_project_iam_member" "periodic-killer" {
count = length(local.accounts_that_can_kill_machines)
project = local.project
role = google_project_iam_custom_role.periodic-killer.id
member = local.accounts_that_can_kill_machines[count.index]
}
resource "google_compute_instance" "periodic-killer" {
count = 0
name = "periodic-killer"
machine_type = "g1-small"
zone = "us-east4-a"
labels = local.machine-labels
boot_disk {
initialize_params {
image = "ubuntu-1804-lts"
}
}
network_interface {
network = "default"
// Ephemeral IP to get access to the Internet
access_config {}
}
service_account {
email = google_service_account.periodic-killer.email
scopes = ["cloud-platform"]
}
allow_stopping_for_update = true
metadata_startup_script = <<STARTUP
set -euxo pipefail
apt-get update
apt-get install -y jq
echo "$(date -Is -u) boot" > /root/log
cat <<CRON > /root/periodic-kill.sh
#!/usr/bin/env bash
set -euo pipefail
echo "\$(date -Is -u) start"
MACHINES=\$(/snap/bin/gcloud compute instances list --format=json | jq -c '.[] | select(.name | startswith("ci-")) | [.name, .zone]')
for m in \$MACHINES; do
MACHINE_NAME=\$(echo \$m | jq -r '.[0]')
MACHINE_ZONE=\$(echo \$m | jq -r '.[1]')
# We do not want to abort the script on error here because failing to
# reboot one machine should not prevent trying to reboot the others.
/snap/bin/gcloud -q compute instances delete \$MACHINE_NAME --zone=\$MACHINE_ZONE || true
done
echo "\$(date -Is -u) end"
CRON
chmod +x /root/periodic-kill.sh
cat <<CRONTAB >> /etc/crontab
0 4 * * * root /root/periodic-kill.sh >> /root/log 2>&1
CRONTAB
tail -f /root/log
STARTUP
}

View File

@ -3,22 +3,6 @@
locals {
ubuntu = {
gcp = [
{
name = "ci-u1",
disk_size = 400,
size = 0,
assignment = "default",
nix = "su --command \"sh <(curl -sSfL https://nixos.org/nix/install) --daemon\" --login vsts"
},
{
name = "ci-u2",
disk_size = 400,
size = 0,
assignment = "default",
nix = "su --command \"sh <(curl -sSfL https://nixos.org/nix/install) --daemon\" --login vsts"
},
],
azure = [
{
name = "du1",

View File

@ -1,81 +0,0 @@
# Copyright (c) 2023 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
resource "google_compute_region_instance_group_manager" "vsts-agent-ubuntu_20_04" {
count = length(local.ubuntu.gcp)
provider = google-beta
name = local.ubuntu.gcp[count.index].name
base_instance_name = local.ubuntu.gcp[count.index].name
region = "us-east1"
target_size = local.ubuntu.gcp[count.index].size
version {
name = local.ubuntu.gcp[count.index].name
instance_template = google_compute_instance_template.vsts-agent-ubuntu_20_04[count.index].self_link
}
# uncomment when we get a provider >3.55
#distribution_policy_target_shape = "ANY"
update_policy {
type = "PROACTIVE"
minimal_action = "REPLACE"
max_surge_fixed = 3
min_ready_sec = 60
instance_redistribution_type = "NONE"
}
}
resource "google_compute_instance_template" "vsts-agent-ubuntu_20_04" {
count = length(local.ubuntu.gcp)
name_prefix = "${local.ubuntu.gcp[count.index].name}-"
machine_type = "c2-standard-8"
labels = local.machine-labels
disk {
disk_size_gb = local.ubuntu.gcp[count.index].disk_size
disk_type = "pd-ssd"
source_image = "ubuntu-os-cloud/ubuntu-2004-lts"
}
lifecycle {
create_before_destroy = true
}
metadata = {
startup-script = templatefile("${path.module}/ubuntu_startup.sh", {
vsts_token = secret_resource.vsts-token.value
vsts_account = "digitalasset"
vsts_pool = "ubuntu_20_04"
size = local.ubuntu.gcp[count.index].disk_size
gcp_logging = <<EOF
# Taken from https://cloud.google.com/logging/docs/agent/logging/installation
curl -sSL https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
curl -sSL https://dl.google.com/cloudagents/add-logging-agent-repo.sh | bash -s -- --also-install
EOF
assignment = local.ubuntu.gcp[count.index].assignment
nix = local.ubuntu.gcp[count.index].nix
})
shutdown-script = nonsensitive("#!/usr/bin/env bash\nset -euo pipefail\ncd /home/vsts/agent\nsu vsts <<SHUTDOWN_AGENT\nexport VSTS_AGENT_INPUT_TOKEN='${secret_resource.vsts-token.value}'\n./config.sh remove --unattended --auth PAT\nSHUTDOWN_AGENT\n ")
}
network_interface {
network = "default"
// Ephemeral IP to get access to the Internet
access_config {}
}
service_account {
email = "log-writer@da-dev-gcp-daml-language.iam.gserviceaccount.com"
scopes = ["cloud-platform"]
}
scheduling {
automatic_restart = false
on_host_maintenance = "TERMINATE"
preemptible = false
}
}

View File

@ -6,20 +6,6 @@ locals {
vsts_account = "digitalasset"
vsts_pool = "windows-pool"
windows = {
gcp = [
{
name = "ci-w1",
size = 0,
assignment = "default",
disk_size = 400,
},
{
name = "ci-w2"
size = 0,
assignment = "default",
disk_size = 400,
},
],
azure = [
{
name = "dw1",

View File

@ -1,100 +0,0 @@
# Copyright (c) 2023 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
resource "google_compute_region_instance_group_manager" "vsts-agent-windows" {
count = length(local.windows.gcp)
provider = google-beta
name = local.windows.gcp[count.index].name
# keep the name short. windows hostnames are limited to 12(?) chars.
# -5 for the random postfix:
base_instance_name = local.windows.gcp[count.index].name
region = "us-east1"
target_size = local.windows.gcp[count.index].size
version {
name = local.windows.gcp[count.index].name
instance_template = google_compute_instance_template.vsts-agent-windows[count.index].self_link
}
# uncomment when we get a provider >3.55
#distribution_policy_target_shape = "ANY"
update_policy {
type = "PROACTIVE"
minimal_action = "REPLACE"
# minimum is the number of availability zones (3)
max_surge_fixed = 3
# calculated with: serial console last timestamp after boot - VM start
# 09:54:28 - 09:45:55 = 513 seconds
min_ready_sec = 520
instance_redistribution_type = "NONE"
}
}
resource "google_compute_instance_template" "vsts-agent-windows" {
count = length(local.windows.gcp)
name_prefix = "${local.windows.gcp[count.index].name}-"
machine_type = "c2-standard-8"
labels = local.machine-labels
disk {
disk_size_gb = local.windows.gcp[count.index].disk_size
disk_type = "pd-ssd"
# find the image name with `gcloud compute images list`
source_image = "windows-cloud/windows-2016"
}
# Drive D:\ for the agent work folder
disk {
disk_size_gb = local.windows.gcp[count.index].disk_size
disk_type = "pd-ssd"
}
lifecycle {
create_before_destroy = true
}
metadata = {
// Prepare the machine
windows-startup-script-ps1 = templatefile("${path.module}/windows_startup.ps1", {
vsts_token = nonsensitive(secret_resource.vsts-token.value)
vsts_account = "digitalasset"
vsts_pool = "windows-pool"
gcp_logging = <<EOF
# Redirect logs to SumoLogic
cd $env:UserProfile;
Invoke-WebRequest https://dl.google.com/cloudagents/windows/StackdriverLogging-v1-9.exe -OutFile StackdriverLogging-v1-9.exe;
.\StackdriverLogging-v1-9.exe /S /D="C:\Stackdriver\Logging\"
EOF
assignment = local.windows.gcp[count.index].assignment
azure_disk = ""
})
windows-shutdown-script-ps1 = nonsensitive("c://agent/config remove --unattended --auth PAT --token '${secret_resource.vsts-token.value}'")
}
network_interface {
network = "default"
// Ephemeral IP to get access to the Internet
access_config {}
}
service_account {
scopes = ["cloud-platform"]
email = "log-writer@da-dev-gcp-daml-language.iam.gserviceaccount.com"
}
scheduling {
automatic_restart = false
on_host_maintenance = "TERMINATE"
preemptible = false
}
}