sapling/build/fbcode_builder/docker_build_with_ccache.sh
Adam Simpkins bcad7419bf add fbcode_builder sources
Summary:
Initial commit to include the fbcode_builder sources in the
eden github repository.

fbshipit-source-id: 49098cecda04a7e9dd9dcc0b569fffc96f0f719b
2019-04-26 11:31:58 -07:00

220 lines
7.4 KiB
Bash
Executable File

#!/bin/bash -uex
# Copyright (c) Facebook, Inc. and its affiliates.
set -o pipefail # Be sure to `|| :` commands that are allowed to fail.
#
# Future: port this to Python if you are making significant changes.
#
# Parse command-line arguments
build_timeout="" # Default to no time-out
print_usage() {
echo "Usage: $0 [--build-timeout TIMEOUT_VAL] SAVE-CCACHE-TO-DIR"
echo "SAVE-CCACHE-TO-DIR is required. An empty string discards the ccache."
}
while [[ $# -gt 0 ]]; do
case "$1" in
--build-timeout)
shift
build_timeout="$1"
if [[ "$build_timeout" != "" ]] ; then
timeout "$build_timeout" true # fail early on invalid timeouts
fi
;;
-h|--help)
print_usage
exit
;;
*)
break
;;
esac
shift
done
# There is one required argument, but an empty string is allowed.
if [[ "$#" != 1 ]] ; then
print_usage
exit 1
fi
save_ccache_to_dir="$1"
if [[ "$save_ccache_to_dir" != "" ]] ; then
mkdir -p "$save_ccache_to_dir" # fail early if there's nowhere to save
else
echo "WARNING: Will not save /ccache from inside the Docker container"
fi
rand_guid() {
echo "$(date +%s)_${RANDOM}_${RANDOM}_${RANDOM}_${RANDOM}"
}
id=fbcode_builder_image_id=$(rand_guid)
logfile=$(mktemp)
echo "
Running build with timeout '$build_timeout', label $id, and log in $logfile
"
if [[ "$build_timeout" != "" ]] ; then
# Kill the container after $build_timeout. Using `/bin/timeout` would cause
# Docker to destroy the most recent container and lose its cache.
(
sleep "$build_timeout"
echo "Build timed out after $build_timeout" 1>&2
while true; do
maybe_container=$(
grep -E '^( ---> Running in [0-9a-f]+|FBCODE_BUILDER_EXIT)$' "$logfile" |
tail -n 1 | awk '{print $NF}'
)
if [[ "$maybe_container" == "FBCODE_BUILDER_EXIT" ]] ; then
echo "Time-out successfully terminated build" 1>&2
break
fi
echo "Time-out: trying to kill $maybe_container" 1>&2
# This kill fail if we get unlucky, try again soon.
docker kill "$maybe_container" || sleep 5
done
) &
fi
build_exit_code=0
# `docker build` is allowed to fail, and `pipefail` means we must check the
# failure explicitly.
if ! docker build --label="$id" . 2>&1 | tee "$logfile" ; then
build_exit_code="${PIPESTATUS[0]}"
# NB: We are going to deliberately forge ahead even if `tee` failed.
# If it did, we have a problem with tempfile creation, and all is sad.
echo "Build failed with code $build_exit_code, trying to save ccache" 1>&2
fi
# Stop trying to kill the container.
echo $'\nFBCODE_BUILDER_EXIT' >> "$logfile"
if [[ "$save_ccache_to_dir" == "" ]] ; then
echo "Not inspecting Docker build, since saving the ccache wasn't requested."
exit "$build_exit_code"
fi
img=$(docker images --filter "label=$id" -a -q)
if [[ "$img" == "" ]] ; then
docker images -a
echo "In the above list, failed to find most recent image with $id" 1>&2
# Usually, the above `docker kill` will leave us with an up-to-the-second
# container, from which we can extract the cache. However, if that fails
# for any reason, this loop will instead grab the latest available image.
#
# It's possible for this log search to get confused due to the output of
# the build command itself, but since our builds aren't **trying** to
# break cache, we probably won't randomly hit an ID from another build.
img=$(
grep -E '^ ---> (Running in [0-9a-f]+|[0-9a-f]+)$' "$logfile" | tac |
sed 's/Running in /container_/;s/ ---> //;' | (
while read -r x ; do
# Both docker commands below print an image ID to stdout on
# success, so we just need to know when to stop.
if [[ "$x" =~ container_.* ]] ; then
if docker commit "${x#container_}" ; then
break
fi
elif docker inspect --type image -f '{{.Id}}' "$x" ; then
break
fi
done
)
)
if [[ "$img" == "" ]] ; then
echo "Failed to find valid container or image ID in log $logfile" 1>&2
exit 1
fi
elif [[ "$(echo "$img" | wc -l)" != 1 ]] ; then
# Shouldn't really happen, but be explicit if it does.
echo "Multiple images with label $id, taking the latest of:"
echo "$img"
img=$(echo "$img" | head -n 1)
fi
container_name="fbcode_builder_container_$(rand_guid)"
echo "Starting $container_name from latest image of the build with $id --"
echo "$img"
# ccache collection must be done outside of the Docker build steps because
# we need to be able to kill it on timeout.
#
# This step grows the max cache size to slightly exceed than the working set
# of a successful build. This simple design persists the max size in the
# cache directory itself (the env var CCACHE_MAXSIZE does not even work with
# older ccaches like the one on 14.04).
#
# Future: copy this script into the Docker image via Dockerfile.
(
# By default, fbcode_builder creates an unsigned image, so the `docker
# run` below would fail if DOCKER_CONTENT_TRUST were set. So we unset it
# just for this one run.
export DOCKER_CONTENT_TRUST=
# CAUTION: The inner bash runs without -uex, so code accordingly.
docker run --user root --name "$container_name" "$img" /bin/bash -c '
build_exit_code='"$build_exit_code"'
# Might be useful if debugging whether max cache size is too small?
grep " Cleaning up cache directory " /tmp/ccache.log
export CCACHE_DIR=/ccache
ccache -s
echo "Total bytes in /ccache:";
total_bytes=$(du -sb /ccache | awk "{print \$1}")
echo "$total_bytes"
echo "Used bytes in /ccache:";
used_bytes=$(
du -sb $(find /ccache -type f -newermt @$(
cat /FBCODE_BUILDER_CCACHE_START_TIME
)) | awk "{t += \$1} END {print t}"
)
echo "$used_bytes"
# Goal: set the max cache to 750MB over 125% of the usage of a
# successful build. If this is too small, it takes too long to get a
# cache fully warmed up. Plus, ccache cleans 100-200MB before reaching
# the max cache size, so a large margin is essential to prevent misses.
desired_mb=$(( 750 + used_bytes / 800000 )) # 125% in decimal MB: 1e6/1.25
if [[ "$build_exit_code" != "0" ]] ; then
# For a bad build, disallow shrinking the max cache size. Instead of
# the max cache size, we use on-disk size, which ccache keeps at least
# 150MB under the actual max size, hence the 400MB safety margin.
cur_max_mb=$(( 400 + total_bytes / 1000000 )) # ccache uses decimal MB
if [[ "$desired_mb" -le "$cur_max_mb" ]] ; then
desired_mb=""
fi
fi
if [[ "$desired_mb" != "" ]] ; then
echo "Updating cache size to $desired_mb MB"
ccache -M "${desired_mb}M"
ccache -s
fi
# Subshell because `time` the binary may not be installed.
if (time tar czf /ccache.tgz /ccache) ; then
ls -l /ccache.tgz
else
# This `else` ensures we never overwrite the current cache with
# partial data in case of error, even if somebody adds code below.
rm /ccache.tgz
exit 1
fi
'
)
echo "Updating $save_ccache_to_dir/ccache.tgz"
# This will not delete the existing cache if `docker run` didn't make one
docker cp "$container_name:/ccache.tgz" "$save_ccache_to_dir/"
# Future: it'd be nice if Travis allowed us to retry if the build timed out,
# since we'll make more progress thanks to the cache. As-is, we have to
# wait for the next commit to land.
echo "Build exited with code $build_exit_code"
exit "$build_exit_code"