add -flto=thin to mac aarch64 build cflags for ez 10% gain (#469)

Adds somewhere between 5-10% improvement to wall time.

I've ran all tests, booted several fakeships and comets with this
installed, and all seems to work properly.

Mac x86 is coming, I just don't have access to an intel mac right now
[but soon...].
Linux will come later once I figure out some bugs.

The following is time to boot a fakeship from a brass pill:
```
Without LTO:
________________________________________________________
Executed in  180.46 secs    fish           external
usr time   17.83 secs    0.10 millis   17.83 secs
sys time    0.23 secs    1.86 millis    0.23 secs


With LTO:
________________________________________________________
Executed in  164.87 secs    fish           external
usr time   15.65 secs    0.12 millis   15.65 secs
sys time    0.19 secs    1.94 millis    0.19 secs
```
180/164 -> 1.09

---

~barter-simsum

## Brass pill boot:

x86 linux without lto: ~140s
x86 linux with    lto: ~128s

~ 8.6% improvement - note we may be able to squeeze a bit more out if we
can
apply `-flto` to all dependencies and not just urbit binaries. This is
currently
an issue for x86 linux though due to some weird uninvestigated behavior
with
libsigsegv

## For those curious, the following hints at what was inlined

```
readelf -s ./bazel-bin/pkg/vere/urbit | grep lto_priv
  1233: 000000000042d0f0   361 FUNC    LOCAL  DEFAULT    2 _rebalance.lto_priv.1
  1237: 000000000042d6c0   435 FUNC    LOCAL  DEFAULT    2 _rebalance.lto_priv.0
 13002: 0000000000d24a00   200 OBJECT  GLOBAL HIDDEN    14 u3_Signal.lto_priv.0
 13358: 0000000000d36de0     8 OBJECT  GLOBAL HIDDEN    14 sec_u.lto_priv.0
 13429: 0000000000d24ae0    80 OBJECT  GLOBAL HIDDEN    14 u3V.lto_priv.0
 13785: 00000000004516e0   110 FUNC    GLOBAL HIDDEN     2 _tap_in.lto_priv.0
 14094: 0000000000d3aeb0     4 OBJECT  GLOBAL HIDDEN    14 sag_w.lto_priv.0
 14100: 0000000000469af0   971 FUNC    GLOBAL HIDDEN     2 _lord_stop.lto_priv.0
 14120: 00000000004581f0   393 FUNC    GLOBAL HIDDEN     2 _cj_nail.lto_priv.0
 14178: 0000000000456a30   450 FUNC    GLOBAL HIDDEN     2 _cj_fine.lto_priv.0
 14732: 0000000000463eb0    82 FUNC    GLOBAL HIDDEN     2 _box_free.lto_priv.0
 14849: 000000000044a320   658 FUNC    GLOBAL HIDDEN     2 _n_find.lto_priv.0
 15325: 0000000000478dd0  1487 FUNC    GLOBAL HIDDEN     2 _pier_init.lto_priv.0
 15413: 00000000004674a0   939 FUNC    GLOBAL HIDDEN     2 _ca_willoc.lto_priv.0
 15841: 0000000000477300   394 FUNC    GLOBAL HIDDEN     2 _dawn_fail.lto_priv.0
 16128: 00000000004449d0  1746 FUNC    GLOBAL HIDDEN     2 _cr_sing.lto_priv.0
 16214: 0000000000474c40   500 FUNC    GLOBAL HIDDEN     2 _ttyf_loja.lto_priv.0
 16277: 000000000078fe48     8 OBJECT  GLOBAL HIDDEN     5 ver_hos_c.lto_priv.0
 16887: 0000000000443ab0   460 FUNC    GLOBAL HIDDEN     2 _n_bam.lto_priv.0
 17346: 000000000045cc90   540 FUNC    GLOBAL HIDDEN     2 _cj_minx.lto_priv.0
 17897: 0000000000d249e8     8 OBJECT  GLOBAL HIDDEN    14 _file_u.lto_priv.0
 18325: 00000000004447a0    58 FUNC    GLOBAL HIDDEN     2 _n_feb.lto_priv.0
 18517: 00000000004453e0  3981 FUNC    GLOBAL HIDDEN     2 _n_comp.lto_priv.0
 18739: 0000000000d249f6     1 OBJECT  GLOBAL HIDDEN    14 _ct_lop_o.lto_priv.0
 18880: 000000000044fda0  1650 FUNC    GLOBAL HIDDEN     2 _find_home.lto_priv.0
 19091: 0000000000474e40   500 FUNC    GLOBAL HIDDEN     2 _ttyf_hija.lto_priv.0
 19508: 0000000000450420  1393 FUNC    GLOBAL HIDDEN     2 _pave_home.lto_priv.0
 19595: 000000000042a3d0   687 FUNC    GLOBAL HIDDEN     2 _in_uni.lto_priv.0
 19822: 0000000000430e20  1095 FUNC    GLOBAL HIDDEN     2 _block_rip.lto_priv.0
 20236: 0000000000457030   331 FUNC    GLOBAL HIDDEN     2 _cj_axis.lto_priv.0
 20306: 0000000000457180   514 FUNC    GLOBAL HIDDEN     2 _cj_gust.lto_priv.0
 20866: 0000000000456c00   667 FUNC    GLOBAL HIDDEN     2 _cj_cast.lto_priv.0
 21317: 0000000000446fe0 11412 FUNC    GLOBAL HIDDEN     2 _n_burn.lto_priv.0
 21430: 000000000045c820  1130 FUNC    GLOBAL HIDDEN     2 _cj_spot.lto_priv.0
```
This commit is contained in:
barter-simsum 2023-07-18 11:59:43 -04:00 committed by GitHub
commit 50d602ef8b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 117 additions and 27 deletions

View File

@ -20,19 +20,21 @@ build --@io_bazel_rules_docker//transitions:enable=false
build --flag_alias=clang_version=//:clang_version
build --flag_alias=gcc_version=//:gcc_version
# Use optimized build by default. According to the bazel documentation, this
# corresponds to -O2 -DNDEBUG, but these are overriden in
# //bazel/common_settings.bzl:vere_library.
# https://bazel.build/docs/user-manual#build-semantics
build --compilation_mode=opt
# Don't include source level debug info on macOS. See
# https://github.com/urbit/urbit/issues/5561 and
# https://github.com/urbit/vere/issues/131.
build:linux --per_file_copt='pkg/.*@-g'
build:linux --host_copt='-g'
build --strip=never
# Use -O3 as the default optimization level.
build --per_file_copt='pkg/.*@-O3'
# Turn on optimization, CPU and memory debug for exec config, which we only use
# to run the fake ship tests. Also turn on extra snapshot validation.
build --host_copt='-O3'
# Turn on CPU and memory debug for exec config, which we only use to run the
# fake ship tests. Also turn on extra snapshot validation.
build --host_copt='-DU3_CPU_DEBUG'
build --host_copt='-DU3_MEMORY_DEBUG'
build --host_copt='-DC3DBG'
@ -45,12 +47,5 @@ build:mem_dbg --per_file_copt='pkg/.*@-DU3_MEMORY_DEBUG'
build:cpu_dbg --per_file_copt='pkg/.*@-DU3_CPU_DEBUG'
build:snp_dbg --per_file_copt='pkg/.*@-DU3_SNAPSHOT_VALIDATION'
# Enable maximum debug info and disable optimizations for debug config. It's
# important that these lines come after setting the default debug and
# optimization level flags above.
build:dbg --per_file_copt='pkg/.*@-O0'
build:dbg --per_file_copt='pkg/.*@-g3'
build:dbg --per_file_copt='pkg/.*@-DC3DBG'
# Any personal configuration should go in .user.bazelrc.
try-import %workspace%/.user.bazelrc

View File

@ -37,6 +37,38 @@ config_setting(
],
)
#
# CONFIGS DETAILING WHEN TO ENABLE CERTAIN FEATURES BY DEFAULT.
# CHANGES BEHAVIOR OF //bazel/common_settings.bzl:vere_library.
#
config_setting(
name = "thinlto",
constraint_values = [
"@platforms//os:macos",
],
values = {
"compilation_mode": "opt"
}
)
config_setting(
name = "lto",
constraint_values = [
"@platforms//os:linux",
],
values = {
"compilation_mode": "opt"
}
)
config_setting(
name = "debug",
values = {
"compilation_mode": "dbg"
}
)
#
# COMPILERS
#

View File

@ -60,9 +60,10 @@ bazel build :urbit
```
If you want a debug build, which changes the optimization level from `-O3` to
`-O0` and includes more debugging information, specify the `dbg` configuration:
`-O0` and includes more debugging information, specify `dbg` as the
`compilation_mode`:
```console
bazel build --config=dbg :urbit
bazel build --compilation_mode=dbg :urbit
```
Note that you cannot change the optimization level for third party
dependencies--those targets specified in `bazel/third_party`--from the command

View File

@ -10,3 +10,50 @@ string_flag = rule(
build_setting = config.string(flag = True),
doc = "A string-typed build setting that can be set on the command line",
)
def vere_library(copts = [], linkopts = [], **kwargs):
native.cc_library(
copts = copts + select({
"//:debug": ["-O0", "-g3", "-DC3DBG"],
"//conditions:default": ["-O3"]
}) + select({
"//:lto": ['-flto'],
"//:thinlto": ['-flto=thin'],
"//conditions:default": []
}) + select({
# Don't include source level debug info on macOS. See
# https://github.com/urbit/urbit/issues/5561 and
# https://github.com/urbit/vere/issues/131.
"//:debug": [],
"@platforms//os:linux": ["-g"],
"//conditions:default": [],
}),
linkopts = linkopts + ['-g'] + select({
"//:lto": ['-flto'],
"//:thinlto": ['-flto=thin'],
"//conditions:default": []
}),
**kwargs,
)
def vere_binary(copts = [], linkopts = [], **kwargs):
native.cc_binary(
copts = copts + select({
"//:debug": ["-O0", "-g3", "-DC3DBG"],
"//conditions:default": ["-O3"]
}) + select({
"//:lto": ['-flto'],
"//:thinlto": ['-flto=thin'],
"//conditions:default": []
}) + select({
"//:debug": [],
"@platforms//os:linux": ["-g"],
"//conditions:default": [],
}),
linkopts = linkopts + ['-g'] + select({
"//:lto": ['-flto'],
"//:thinlto": ['-flto=thin'],
"//conditions:default": []
}),
**kwargs,
)

View File

@ -50,7 +50,7 @@ _aarch64_gcc = "toolchain-gcc-linux-aarch64"
cc_toolchain_config(
name = "gcc-linux-aarch64-config",
ar = "{}/aarch64-linux-musl/bin/aarch64-linux-musl-ar".format(_install_prefix),
ar = "{}/aarch64-linux-musl/bin/aarch64-linux-musl-gcc-ar".format(_install_prefix),
cc = "{}/aarch64-linux-musl/bin/aarch64-linux-musl-gcc".format(_install_prefix),
cc_flags = [
"-static",
@ -104,7 +104,7 @@ _x86_64_gcc = "toolchain-gcc-linux-x86_64"
cc_toolchain_config(
name = "gcc-linux-x86_64-config",
ar = "{}/x86_64-linux-musl/bin/x86_64-linux-musl-ar".format(_install_prefix),
ar = "{}/x86_64-linux-musl/bin/x86_64-linux-musl-gcc-ar".format(_install_prefix),
cc = "{}/x86_64-linux-musl/bin/x86_64-linux-musl-gcc".format(_install_prefix),
cc_flags = [
"-static",
@ -279,7 +279,7 @@ cc_toolchain_config(
# NOTE: building with `libtool` does not work on macOS due to lack of
# support in the `configure_make` rule provided by `rules_foreign_cc`.
# Therefore, we require setting `ar` as the archiver tool on macOS.
ar = "/usr/bin/ar",
ar = "/usr/local/opt/llvm@15/bin/llvm-ar",
# By default, Bazel passes the `rcsD` flags to `ar`, but macOS's `ar`
# implementation doesn't support `D`. We remove it with this attribute
# and corresponding `ar_flags_feature` in `cfg.bzl`.
@ -288,7 +288,8 @@ cc_toolchain_config(
cc = "/usr/local/opt/llvm@15/bin/clang",
compiler = "clang",
compiler_version = "//:clang_version",
ld = "/usr/bin/ld",
ld = "/usr/local/opt/llvm@15/bin/llvm-lld",
nm = "/usr/local/opt/llvm@15/bin/llvm-nm",
sys_includes = [
"/usr/local/Cellar/llvm@15/15.0.7/lib/clang/15.0.7/include",
"/Library/Developer/CommandLineTools/SDKs/MacOSX12.sdk/usr/include",

View File

@ -2,7 +2,9 @@
# LIBRARIES
#
cc_library(
load("//bazel:common_settings.bzl", "vere_library")
vere_library(
name = "c3",
srcs = glob(
[

View File

@ -2,7 +2,9 @@
# LIBRARIES
#
cc_library(
load("//bazel:common_settings.bzl", "vere_library")
vere_library(
name = "ent",
srcs = ["ent.c"],
hdrs = ["ent.h"],

View File

@ -2,7 +2,9 @@
# LIBRARIES
#
cc_library(
load("//bazel:common_settings.bzl", "vere_library")
vere_library(
name = "noun",
srcs = glob(
[

View File

@ -2,7 +2,9 @@
# LIBRARIES
#
cc_library(
load("//bazel:common_settings.bzl", "vere_library")
vere_library(
name = "ur",
srcs = [
"bitstream.c",

View File

@ -2,7 +2,9 @@
# LIBRARIES
#
cc_library(
load("//bazel:common_settings.bzl", "vere_library")
vere_library(
name = "urcrypt",
srcs = glob(
[

View File

@ -1,4 +1,6 @@
cc_library(
load("//bazel:common_settings.bzl", "vere_library")
vere_library(
name = "ge-additions",
srcs = ["ge-additions.c"],
hdrs = ["ge-additions.h"],

View File

@ -2,6 +2,8 @@
# GENERATED FILES
#
load("//bazel:common_settings.bzl", "vere_library", "vere_binary")
# An approximation of `xxd -i` that runs on all platforms where Bash is
# present. Generates a `.h` file that declares the array and array length as
# `extern` global variables and a `.c` file containing the array and array
@ -86,7 +88,7 @@ genrule(
# LIBRARIES
#
cc_library(
vere_library(
name = "vere",
srcs = glob(
[
@ -138,7 +140,7 @@ cc_library(
# BINARIES
#
cc_binary(
vere_binary(
name = "urbit",
srcs = [
"main.c",