#!/bin/bash # This script packages up the importer as it exists in the current git repo, # creates a bunch of GCE VMs, and runs the importer there on all cities, using # static sharding. # # This process is only runnable by Dustin, due to current GCE/S3 permissions. # # Run from the repo's root dir: cloud/start_batch_import.sh set -e set -x EXPERIMENT_TAG=$1 if [ "$EXPERIMENT_TAG" == "" ]; then echo Missing args; exit 1; fi if [ "$2" != "gcs_sync_done" ]; then echo First go sync dev/data/input from S3 to GCS. https://console.cloud.google.com/transfer/cloud/jobs exit 1; fi NUM_WORKERS=10 ZONE=us-east1-b # See other options: https://cloud.google.com/compute/docs/machine-types # Particularly... e2-standard-2, n2-standard-2, c2-standard-4 SMALL_MACHINE_TYPE=e2-standard-2 LARGE_MACHINE_TYPE=c2-standard-4 # All of data/ is currently around 30GB DISK_SIZE=40GB # Compressing and checksumming gigantic files needs more IOPS # TODO But wait, e2-standard-2 doesn't support local PD?! DISK_TYPE=pd-ssd # Haha, using a project from college, my last traffic sim... PROJECT=aorta-routes function build_payload { # It's a faster workflow to copy the local binaries into the VMs, rather than # build them there. cargo build --release --bin updater # This is needed to regenerate popdat.bin for Seattle. It requires the # VMs to have a matching GDAL version. Since the VM runs the same # version of Ubuntu as I do locally, this works out fine. cargo build --release --manifest-path importer/Cargo.toml --features scenarios # Build our payload for the VMs # This mkdir deliberately fails if the directory is already there; it probably # means the last run broke somehow mkdir worker_payload mkdir -p worker_payload/target/release cp target/release/importer worker_payload/target/release/ cp target/release/updater worker_payload/target/release/ mkdir worker_payload/data cp data/MANIFEST.json worker_payload/data mkdir worker_payload/importer cp -Rv importer/config worker_payload/importer cp cloud/worker_script.sh worker_payload/ # Copy in AWS credentials! Obviously don't go making worker_payload/ public or # letting anybody into the VMs. # # Alternatively, I could just scp the files from the VMs back to my local # computer. But more than likely, GCE's upstream speed to S3 (even # cross-region) is better than Comcast. :) cp -Rv ~/.aws worker_payload/ zip -r worker_payload worker_payload } function create_vms { # Ideally we'd use the bulk API, but someone's not on top of those # gcloud integration tests... # https://issuetracker.google.com/issues/188462253 for ((i = 0; i < $NUM_WORKERS; i++)); do # The first shard always handles Seattle, which needs more than # 8GB of memory. Just give it really hefty hardware. if [ $i == 0 ]; then machine_type=$LARGE_MACHINE_TYPE; else machine_type=$SMALL_MACHINE_TYPE; fi gcloud compute \ --project=$PROJECT \ instances create "worker-$i" \ --zone=$ZONE \ --machine-type=$machine_type \ --boot-disk-size=$DISK_SIZE \ --boot-disk-type=$DISK_TYPE \ --image-family=ubuntu-2004-lts \ --image-project=ubuntu-os-cloud \ --scopes=compute-rw done # There's a funny history behind the whole "how do I wait for my VM to be # SSHable?" question... sleep 30s } function start_workers { for ((i = 0; i < $NUM_WORKERS; i++)); do gcloud compute scp \ --project=$PROJECT \ --zone=$ZONE \ worker_payload.zip \ worker-$i:~/worker_payload.zip gcloud compute ssh \ --project=$PROJECT \ --zone=$ZONE \ worker-$i \ --command="sudo apt-get -qq install -y unzip; unzip -q worker_payload.zip; ./worker_payload/worker_script.sh $EXPERIMENT_TAG $i $NUM_WORKERS 1> logs 2>&1 &" done } build_payload create_vms start_workers # To follow along with a worker: # > gcloud compute ssh worker-5 --command='tail -f logs' # # To see which workers are still running (or have failed): # > gcloud compute instances list