abstreet/cloud/worker_script.sh
2021-09-17 18:29:23 -07:00

59 lines
2.3 KiB
Bash
Executable File

#!/bin/bash
# This script runs inside of GCE VMs created by start_batch_import.sh. It
# imports a bunch of cities, then uploads the results to a temporary
# subdirectory in S3.
set -e
set -x
EXPERIMENT_TAG=$1
WORKER_NUM=$2
NUM_WORKERS=$3
if [ "$EXPERIMENT_TAG" == "" ] || [ "$WORKER_NUM" == "" ] || [ "$NUM_WORKERS" == "" ]; then
echo Missing args;
exit 1;
fi
# Install the AWS CLI
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
unzip awscliv2.zip
sudo ./aws/install
cd worker_payload
# Put the credentials in the right place
mv .aws ~/
# If we import without raw files, we'd wind up downloading fresh OSM data!
# Reuse what's in S3. But having a bunch of GCE VMs grab from S3 is expensive,
# so instead, sync from the GCS mirror that I manually update before each job.
gsutil -m cp -r gs://abstreet-importer/ .
mv abstreet-importer/dev/data/input data/input
rm -rf abstreet-importer
find data/input -name '*.gz' -print -exec gunzip '{}' ';'
# Set up Docker, for the elevation data
sudo apt-get install -y apt-transport-https ca-certificates curl gnupg lsb-release
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo \
"deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update
# Also sneak GDAL in there
sudo apt-get install -y docker-ce docker-ce-cli containerd.io libgdal-dev
# Now do the big import!
rm -fv data/input/us/seattle/raw_maps/huge_seattle.bin data/input/us/seattle/popdat.bin
# Run this as root so Docker works. We could add the current user to the group,
# but then we have to fiddle with the shell a weird way to pick up the change
# immediately.
sudo ./target/release/cli importer -- --regen_all --shard_num=$WORKER_NUM --num_shards=$NUM_WORKERS
# Upload the results
./target/release/updater --inc_upload --version=$EXPERIMENT_TAG
# Indicate this VM is done by deleting ourselves. We can't use suspend or stop
# with a local SSD, so just nuke ourselves instead.
ZONE=$(curl -H Metadata-Flavor:Google http://metadata.google.internal/computeMetadata/v1/instance/zone -s | cut -d/ -f4)
echo y | gcloud compute instances delete $HOSTNAME --zone=$ZONE