mirror of
https://github.com/digital-asset/daml.git
synced 2024-11-10 00:35:25 +03:00
ci: do not kill busy machines (#18151)
This commit is contained in:
parent
90fc42180c
commit
78c33feb2b
140
infra/azure.tf
140
infra/azure.tf
@ -99,59 +99,10 @@ az configure --defaults group=${azurerm_resource_group.daml-ci.name} > /root/log
|
||||
cat <<'CRON' > /root/daily-reset.sh
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "$(date -Is -u) start"
|
||||
|
||||
echo "$(date -Is -u) arg: $1"
|
||||
target="$(/root/get-targets.sh $1)"
|
||||
echo "$(date -Is -u) target: $target"
|
||||
|
||||
AZURE_PAT=${secret_resource.vsts-token.value}
|
||||
|
||||
echo "$(date -Is -u) Shutting down all machines"
|
||||
|
||||
for set in du1 du2 dw1 dw2; do
|
||||
echo "$(date -Is -u) - Setting scale set $set size to 0"
|
||||
az vmss scale -n $set --new-capacity 0 >/dev/null
|
||||
done
|
||||
|
||||
echo "$(date -Is -u) Waiting for scale sets to adapt"
|
||||
|
||||
sleep 300
|
||||
|
||||
echo "$(date -Is -u) Removing all nodes from Azure Pipelines"
|
||||
|
||||
for pool in 11 18; do
|
||||
agents=$(curl -s -u :$AZURE_PAT "https://dev.azure.com/digitalasset/_apis/distributedtask/pools/$pool/agents?api-version=7.0" | jq -c '[.value[] | {name,id}]')
|
||||
for idx in $(seq 0 $(echo "$agents" | jq 'length - 1')); do
|
||||
name=$(echo "$agents" | jq -r ".[$idx].name")
|
||||
id=$(echo "$agents" | jq -r ".[$idx].id")
|
||||
if [[ "$name" =~ d[uw][12]-.* ]]; then
|
||||
echo "$(date -Is -u) - Removing agent $name ($id)"
|
||||
curl -s -u :$AZURE_PAT -XDELETE "https://dev.azure.com/digitalasset/_apis/distributedtask/pools/$${pool}/agents/$${id}?api-version=7.0" &>/dev/null
|
||||
else
|
||||
echo "$(date -Is -u) - Leaving agent $name untouched"
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
echo "$(date -Is -u) Bringing scale sets back up"
|
||||
|
||||
for set in du1 du2 dw1 dw2; do
|
||||
size=$(echo "$target" | jq --arg name $set -r '.[$name]')
|
||||
echo "$(date -Is -u) - Setting scale set $set size to $size"
|
||||
az vmss scale -n $set --new-capacity $size >/dev/null
|
||||
done
|
||||
|
||||
echo "$(date -Is -u) end"
|
||||
CRON
|
||||
|
||||
chmod +x /root/daily-reset.sh
|
||||
|
||||
cat <<'GET_TARGETS' > /root/get-targets.sh
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
arg_target="$1"
|
||||
|
||||
month=$(date +%m)
|
||||
day_of_month=$(date +%d)
|
||||
@ -159,23 +110,102 @@ if [[ "$month" -eq 12 && "$day_of_month" -gt 22 ]]; then
|
||||
# We treat the days after December 22nd as weekend days.
|
||||
target='low'
|
||||
else
|
||||
target="$arg_target"
|
||||
target="$1"
|
||||
fi
|
||||
|
||||
case $target in
|
||||
high)
|
||||
echo '{"du1":10,"du2":0,"dw1":5,"dw2":0}'
|
||||
target='{"du1":10,"du2":0,"dw1":5,"dw2":0}'
|
||||
;;
|
||||
low)
|
||||
echo '{"du1":2,"du2":0,"dw1":1,"dw2":0}'
|
||||
target='{"du1":0,"du2":2,"dw1":0,"dw2":1}'
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: unexpected target '$target'" >&2
|
||||
;;
|
||||
esac
|
||||
GET_TARGETS
|
||||
|
||||
chmod +x /root/get-targets.sh
|
||||
echo "$(date -Is -u) target: $target"
|
||||
|
||||
AZURE_PAT=${secret_resource.vsts-token.value}
|
||||
|
||||
agent_is_busy() (
|
||||
pool_id=$1
|
||||
agent_id=$2
|
||||
curl --silent \
|
||||
--fail \
|
||||
-u :$AZURE_PAT \
|
||||
"https://dev.azure.com/digitalasset/_apis/distributedtask/pools/$pool_id/agents/$agent_id/?includeAssignedRequest=true&includeLastCompletedRequest=true&api-version=5.1" \
|
||||
| jq -e .assignedRequest.requestId \
|
||||
>/dev/null
|
||||
)
|
||||
|
||||
disable_agent() (
|
||||
pool_id=$1
|
||||
agent_id=$2
|
||||
curl --silent \
|
||||
--fail \
|
||||
-u :$AZURE_PAT \
|
||||
"https://dev.azure.com/digitalasset/_apis/distributedtask/pools/$pool_id/agents/$agent_id" \
|
||||
-X 'PATCH' \
|
||||
-H 'Content-Type: application/json' \
|
||||
--data-binary '{"id":'$agent_id',"enabled":false}' \
|
||||
>/dev/null
|
||||
)
|
||||
|
||||
remove_agent() (
|
||||
pool_id=$1
|
||||
agent_id=$2
|
||||
curl --silent \
|
||||
--fail \
|
||||
-u :$AZURE_PAT \
|
||||
-XDELETE \
|
||||
"https://dev.azure.com/digitalasset/_apis/distributedtask/pools/$pool_id/agents/$agent_id?api-version=7.0" \
|
||||
>/dev/null
|
||||
)
|
||||
|
||||
for platform in '{"name":"u","pool":18}' '{"name":"w","pool":11}'; do
|
||||
platform_name=$(echo "$platform" | jq -r .name)
|
||||
pool_id=$(echo "$platform" | jq -r .pool)
|
||||
agents=$(curl --silent \
|
||||
--fail \
|
||||
-u :$AZURE_PAT \
|
||||
"https://dev.azure.com/digitalasset/_apis/distributedtask/pools/$pool_id/agents?api-version=7.0" \
|
||||
| jq -c '[.value[] | {name,id}]')
|
||||
for scale_set in d$${platform_name}1 d$${platform_name}2; do
|
||||
(
|
||||
echo "$(date -Is -u) $scale_set: shutting down all machines"
|
||||
for idx in $(seq 0 $(echo "$agents" | jq 'length - 1')); do
|
||||
(
|
||||
agent_name=$(echo "$agents" | jq -r ".[$idx].name")
|
||||
agent_id=$(echo "$agents" | jq -r ".[$idx].id")
|
||||
if [[ "$${agent_name%-*}" = "$scale_set" ]]; then
|
||||
echo "$(date -Is -u) > $agent_name: disabling agent and waiting for job to finish."
|
||||
disable_agent $pool_id $agent_id
|
||||
while agent_is_busy $pool_id $agent_id; do
|
||||
sleep 30
|
||||
done
|
||||
echo "$(date -Is -u) > $agent_name: removing from Azure Pipelines."
|
||||
remove_agent $pool_id $agent_id
|
||||
echo "$(date -Is -u) > $agent_name: shutting down."
|
||||
instance_id=$((36#$${agent_name#*-}))
|
||||
az vmss delete-instances --resource-group daml-ci --name $scale_set --instance-ids $instance_id
|
||||
fi
|
||||
)
|
||||
done
|
||||
echo "$(date -Is -u) $scale_set: ensuring all machines are gone."
|
||||
az vmss scale --resource-group daml-ci --name $scale_set --new-capacity 0 >/dev/null
|
||||
echo "$(date -Is -u) $scale_set: create new machines."
|
||||
target_size=$(echo "$target" | jq --arg name $scale_set -r '.[$name]')
|
||||
az vmss scale --resource-group daml-ci --name $scale_set --new-capacity $target_size >/dev/null
|
||||
echo "$(date -Is -u) $scale_set: done."
|
||||
)
|
||||
done
|
||||
done
|
||||
echo "$(date -Is -u) end"
|
||||
CRON
|
||||
|
||||
chmod +x /root/daily-reset.sh
|
||||
|
||||
cat <<'CRONTAB' >> /etc/crontab
|
||||
30 5 * * 1-5 root /root/daily-reset.sh high >> /root/log 2>&1
|
||||
|
Loading…
Reference in New Issue
Block a user