Travis: Toolchain only depends on headers, not impls

When libstdc++ was added in 4977fd22b8, just calling
'make install' was the easiest way to install the headers. And the headers are all
that is needed for libstdc++ to determine the ABI. Since then, BuildIt.sh was
rewritten again and again, and somehow everyone just silently assumed that
libstdc++ also depends on libc.a and libm.a, because surely it does?

Turns out, it doesn't! This massively reduces the dependencies of libstdc++,
hopefully meaning that the Toolchain doesn't need to be rebuilt so often on Travis.

Furthermore, the old method of trying to determine the dependency tree with
bash/grep/etc. has finally broken anyways:

    https://travis-ci.com/github/SerenityOS/serenity/builds/179805569#L567

In summary, this should eliminate most of the Toolchain rebuilds on Travis,
and therefore make Travis build blazingly fast! :^)
This commit is contained in:
Ben Wiederhake 2020-08-15 01:11:58 +02:00 committed by Andreas Kling
parent 0df9ddf604
commit 5f724b6ca1
Notes: sideshowbarker 2024-07-19 03:36:41 +09:00
2 changed files with 33 additions and 90 deletions

View File

@ -19,10 +19,10 @@ MAKE="make"
MD5SUM="md5sum" MD5SUM="md5sum"
NPROC="nproc" NPROC="nproc"
# Each cache entry is 70 MB. 10 entries are 700 MiB. # Each cache entry is 70 MB. 5 entries are 350 MiB.
# It seems that Travis starts having trouble around a total # It seems that Travis starts having trouble around a total
# cache size of 9 GiB, so I think this is a good amount. # cache size of 9 GiB, so I think this is a good amount.
KEEP_CACHE_COUNT=10 KEEP_CACHE_COUNT=5
if command -v ginstall &>/dev/null; then if command -v ginstall &>/dev/null; then
INSTALL=ginstall INSTALL=ginstall
@ -76,19 +76,27 @@ GCC_BASE_URL="http://ftp.gnu.org/gnu/gcc"
pushd "$DIR" pushd "$DIR"
if [ "${TRY_USE_LOCAL_TOOLCHAIN}" = "y" ] ; then if [ "${TRY_USE_LOCAL_TOOLCHAIN}" = "y" ] ; then
echo "Checking cached toolchain:" echo "Checking cached toolchain:"
# TODO: This is still overly pessimistic.
DEPS_CONFIG=" DEPS_CONFIG="\
uname=$(uname),TARGET=${TARGET}, uname=$(uname),TARGET=${TARGET},
BuildItHash=$($MD5SUM "$(basename "$0")"), BuildItHash=$($MD5SUM "$(basename "$0")"),
MAKE=${MAKE},MD5SUM=${MD5SUM},NPROC=${NPROC}, MAKE=${MAKE},MD5SUM=${MD5SUM},NPROC=${NPROC},
CC=${CC},CXX=${CXX},with_gmp=${with_gmp},LDFLAGS=${LDFLAGS}, CC=${CC},CXX=${CXX},with_gmp=${with_gmp},LDFLAGS=${LDFLAGS},
BINUTILS_VERSION=${BINUTILS_VERSION},BINUTILS_MD5SUM=${BINUTILS_MD5SUM}, BINUTILS_VERSION=${BINUTILS_VERSION},BINUTILS_MD5SUM=${BINUTILS_MD5SUM},
GCC_VERSION=${GCC_VERSION},GCC_MD5SUM=${GCC_MD5SUM}" GCC_VERSION=${GCC_VERSION},GCC_MD5SUM=${GCC_MD5SUM}"
echo "Config is:${DEPS_CONFIG}"
if ! DEPS_HASH=$("$DIR/ComputeDependenciesHash.sh" "$MD5SUM" <<<"${DEPS_CONFIG}"); then if ! DEPS_HASH=$("$DIR/ComputeDependenciesHash.sh" "$MD5SUM" <<<"${DEPS_CONFIG}"); then
# Make it stand out more
echo
echo
echo
echo
echo "Dependency hashing failed" echo "Dependency hashing failed"
echo "Will rebuild toolchain from scratch, and NOT SAVE THE RESULT." echo "Will rebuild toolchain from scratch, and NOT SAVE THE RESULT."
echo "Someone should look into this, but for now it'll work, albeit inefficient." echo "Someone should look into this, but for now it'll work, albeit inefficient."
echo
echo
echo
echo
# Should be empty anyway, but just to make sure: # Should be empty anyway, but just to make sure:
DEPS_HASH="" DEPS_HASH=""
elif [ -r "Cache/ToolchainLocal_${DEPS_HASH}.tar.gz" ] ; then elif [ -r "Cache/ToolchainLocal_${DEPS_HASH}.tar.gz" ] ; then
@ -238,12 +246,10 @@ pushd "$DIR/Build/"
echo "XXX install gcc and libgcc" echo "XXX install gcc and libgcc"
"$MAKE" install-gcc install-target-libgcc || exit 1 "$MAKE" install-gcc install-target-libgcc || exit 1
echo "XXX serenity libc and libm" echo "XXX serenity libc and libm headers"
mkdir -p "$BUILD" mkdir -p "$BUILD"
pushd "$BUILD" pushd "$BUILD"
CXXFLAGS="-DBUILDING_SERENITY_TOOLCHAIN" cmake .. mkdir -p Root/usr/include/
cmake --build . --target LibC
"$INSTALL" -D Libraries/LibC/libc.a Libraries/LibM/libm.a Root/usr/lib/
SRC_ROOT=$(realpath "$DIR"/..) SRC_ROOT=$(realpath "$DIR"/..)
FILES=$(find "$SRC_ROOT"/Libraries/LibC "$SRC_ROOT"/Libraries/LibM -name '*.h' -print) FILES=$(find "$SRC_ROOT"/Libraries/LibC "$SRC_ROOT"/Libraries/LibM -name '*.h' -print)
for header in $FILES; do for header in $FILES; do

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash #!/usr/bin/env bash
set -eu set -euo pipefail
# This file will need to be run in bash, for now. # This file will need to be run in bash, for now.
if [ $# -lt 1 ] ; then if [ $# -lt 1 ] ; then
@ -23,89 +23,26 @@ function finish {
} }
trap finish EXIT trap finish EXIT
# libstdc++ depends on libc and libm, so we pessimistically assume it depends # First, capture the caller's input.
# on *all* of their implementation and recursive dependencies. echo "$0: Configuration:" >&2
# Scan all files for potential dependencies. cat /dev/stdin | tee /dev/stderr > "${DEPLIST_FILE}"
# Thinking in graphs, this computes the edge list: # "$@" is the md5sum invocation.
cat <(find AK/ Libraries/ Services/ Kernel/ -name '*.h') \ "$@" Toolchain/ComputeDependenciesHash.sh | tee /dev/stderr >> "${DEPLIST_FILE}"
<(find Libraries/LibC/ Libraries/LibM/ -name '*.cpp' ! -name 'Test*.cpp' ) | \
xargs grep -F '#include ' | \
sed -r \
-e 's,^(.*/)([^/]+:)#include "(.*)",\1\2\1\3,' \
-e 's^#include <(Kernel/.*)>^\1^' \
-e 's^#include <(AK/.*)>^\1^' \
-e 's^#include <(Lib[A-Za-z]+/.*)>^Libraries/\1^' \
-e 's^#include <((bits|netinet|sys|arpa|net)/.*)>^Libraries/LibC/\1^' \
-e 's^#include <fd_set.h>^Libraries/LibC/fd_set.h^' \
-e 's^#include <([a-z]{3,10}(_numbers)?\.h)>^Libraries/LibC/\1^' \
-e 's^#include <([A-Z][a-z]+Server/.*)>^Services/\1^' \
-e 's^#include <(.*)>^UNRESOLVED_I/\1^' \
-e 's^#include "(.*)"^UNRESOLVED_L/\1^' > "${DEPLIST_FILE}"
# Some #include's cannot be resolved, like <chrono>. However, these are only
# a problem if they turn up as a transitive dependency of libc and libm.
# We will check for that when the time comes.
# The initial guess is pessimistic: *all* of libc and libm. # libstdc++ depends on the *headers* of libc, so we pessimistically assume it depends
FILE_LIST=$(find Libraries/LibC/ Libraries/LibM/ \( -name '*.cpp' -o -name '*.c' -o -name '*.h' \) ! -name 'Test*') # on *all* of them.
echo "$0: Exploring dependencies of libstdc++" >&2 # This list of files can be cut down considerably:
FILE_LIST_COMPLETE="n" # strace -ff -e trace=file "make install-target-libstdc++-v3" 2>&1 >/dev/null | perl -ne 's/^[^"]+"(([^\\"]|\\[\\"nt])*)".*/$1/ && print' | sort -u | grep -P 'serenity/Build/Root/usr/include/.*\.h$'
# In each iteration, we extend FILE_LIST by the dependencies not listed yet in # However, we don't want to risk breaking the build when we upgrade gcc in the future.
# FILE_LIST. Note that the results are always semantically the same, #
# but the order depends on the initial `find` runs. # If you want to further cut down the Toolchain rebuilds on Travis,
for _ in $(seq 10) ; do # one way would be to reduce this list somehow.
FILE_REGEX=$(echo "${FILE_LIST}" | sed -zr -e 's,\n$,,' -e 's,\.,\\.,g' -e 's,\n,|,g') cd Libraries/LibC/
FURTHER_FILE_LIST=$(grep -P "^(${FILE_REGEX}):" "${DEPLIST_FILE}" | grep -Pv ":(${FILE_REGEX})\$" | sed -re 's,^.*:(.*)$,\1,' | sort -u) find -name '*.h' | sort | xargs "$@" | tee /dev/stderr >> "${DEPLIST_FILE}"
if [ -n "${FURTHER_FILE_LIST}" ] ; then
# FILE_LIST should grow to a maximum of "number of all .cpp and .c and .h files",
# i.e. roughly 700 lines. This should be managable, even as the project grows.
FILE_LIST="${FILE_LIST}
${FURTHER_FILE_LIST}"
else
FILE_LIST_COMPLETE="y"
break
fi
done
FURTHER_FILE_LIST=""
FILE_REGEX=""
if [ "${FILE_LIST_COMPLETE}" != "y" ] ; then
# Dependency chains might grow very long. Also, if for some reason we fail
# to filter out the already listed files, the FILE_LIST would grow
# exponentially. Both of these unpleasant cases are handled by capping the
# iteration count to 10 and giving up:
echo "$0: Dependencies don't seem to converge, giving up." >&2
exit 1
fi
# Sort for reproducability, # The piping might hide non-zero exit-codes,
FILE_LIST=$(echo "${FILE_LIST}" | LC_ALL=C sort -u)
if grep -F 'UNRESOLVED' <<EOLIST >&2 ; then
${FILE_LIST}
EOLIST
echo "$0: Unresolved dependency, giving up."
exit 1
fi
echo "$0: Computing hashes" >&2
# "$@" is the md5sum invocation. The piping might hide non-zero exit-codes,
# but thankfully only the first command can reasonably fail. # but thankfully only the first command can reasonably fail.
# Also, abuse the deplist file as a temporary buffer.
cat /dev/stdin > "${DEPLIST_FILE}"
HASHES=$(xargs "$@" <<EOLIST
${FILE_LIST}
Toolchain/ComputeDependenciesHash.sh
${DEPLIST_FILE}
EOLIST
)
# Caller (probably BuildIt.sh) should inject it's own hash via stdin.
# Mask the temporary (= non-reproducable) name of the DEPLIST_FILE:
HASHES=$(echo "${HASHES}" | sed -re 's,/tmp/serenity_deps_........\.lst,CONFIG,')
echo "$0: Hashes are:" >&2
echo "${HASHES}" >&2
echo "$0: Toolchain hash:" >&2 echo "$0: Toolchain hash:" >&2
cat <<EOHASH | "$@" - | cut -f1 -d' ' | tee /dev/stderr "$@" "${DEPLIST_FILE}" | cut -f1 -d' ' | tee /dev/stderr
${HASHES}
EOHASH
echo "$0: Great success!" >&2 echo "$0: Great success!" >&2