Upgrade emsdk to 3.1.8 (#414)

* Rework WASM compilation options

Necessary to work with newer versions of emscripten that are more picky about which option goes to the compiler, and which to the linker. Also took the opportunity to remove the need for the patching of the bergamot-translation-worker.js file, this can now easily be done through supported apis. Furthermore, I tried to downsize the generated javascript and wasm code a bit.

Initial estimates show that bergamot-translator compiled with emscripten 3.0.0 runs at about 3x the speed of 2.0.9 (when using embedded intgemm). Speed-up when using mozIntGemm is less dramatic.

* Updated marian-dev submodule
* Revert changes specific to patching external gemm modules for wasm
* Better Compilation and Link flags

 - Added "-O3" optimization flag for linking as well
 - "-g2" only for release and debug builds
 - "-g1" for release builds
 - Replaced deprecated "--bind" flag with "-lembind"
 - Removed redundant link flag

* Upgraded emsdk to 3.1.8
* Enclosed EXPORTED_FUNCTIONS values in a list
* Fixed the remaining 2.0.9 reference in circle ci build script
* Updated README

Co-authored-by: Jelmer van der Linde <jelmer@ikhoefgeen.nl>
This commit is contained in:
Abhishek Aggarwal 2022-04-20 01:39:32 +02:00 committed by GitHub
parent 98af5945c5
commit e34420647d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 61 additions and 25 deletions

View File

@ -2,7 +2,7 @@ version: 2.1
jobs:
build-with-wormhole:
docker:
- image: 'emscripten/emsdk:2.0.9'
- image: 'emscripten/emsdk:3.1.8'
resource_class: medium
working_directory: ~/checkout
@ -48,7 +48,7 @@ jobs:
build-without-wormhole:
docker:
- image: 'emscripten/emsdk:2.0.9'
- image: 'emscripten/emsdk:3.1.8'
resource_class: medium
working_directory: ~/checkout

View File

@ -11,7 +11,7 @@ name: "Build"
- '**'
env:
qt_version: "6.2.1" # only used by build-macos
emsdk_version: 2.0.9 # For use in emscripten build
emsdk_version: 3.1.8 # For use in emscripten build
ccache_basedir: ${{ github.workspace }}
ccache_dir: "${{ github.workspace }}/.ccache"
ccache_compilercheck: content

View File

@ -5,6 +5,7 @@ add_subdirectory(marian-dev EXCLUDE_FROM_ALL)
if(COMPILE_WASM)
# This is a bad way of adding compilation flags. Will be improved soon.
add_compile_options(${WASM_COMPILE_FLAGS})
add_link_options(${WASM_LINK_FLAGS})
endif(COMPILE_WASM)
add_subdirectory(ssplit-cpp EXCLUDE_FROM_ALL)

@ -1 +1 @@
Subproject commit 844800efccba6e670250caac1735ca2c8c8e508e
Subproject commit 199201eb89b2941afdadb14164e936d412f897ad

View File

@ -112,9 +112,55 @@ message(STATUS "Project name: ${PROJECT_NAME}")
message(STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}")
if(COMPILE_WASM)
# See https://github.com/emscripten-core/emscripten/blob/main/src/settings.js
set(WORMHOLE ON CACHE BOOL "Use WASM wormhole in intgemm https://bugzilla.mozilla.org/show_bug.cgi?id=1672160")
list(APPEND WASM_COMPILE_FLAGS -O3 -g2 -fPIC -mssse3 -msimd128)
list(APPEND WASM_COMPILE_FLAGS "SHELL:-s WASM=1" "SHELL:-s ASSERTIONS=0" "SHELL:-s DISABLE_EXCEPTION_CATCHING=1" "SHELL:-s LLD_REPORT_UNDEFINED" "SHELL:-s FORCE_FILESYSTEM=1" "SHELL:-s ALLOW_MEMORY_GROWTH=1")
list(APPEND WASM_COMPILE_FLAGS
-O3
# Preserve whitespaces in JS even for release builds; this doesn't increase wasm binary size
$<$<CONFIG:Release>:-g1>
# Relevant Debug info only for release with debug builds as this increases wasm binary size
$<$<CONFIG:RelWithDebInfo>:-g2>
-fPIC
-mssse3
-msimd128
# -fno-exceptions # Can't do that because spdlog uses exceptions
-sDISABLE_EXCEPTION_CATCHING=1
-sSTRICT=1
)
list(APPEND WASM_LINK_FLAGS
-O3
# Preserve whitespaces in JS even for release builds; this doesn't increase wasm binary size
$<$<CONFIG:Release>:-g1>
# Relevant Debug info only for release with debug builds as this increases wasm binary size
$<$<CONFIG:RelWithDebInfo>:-g2>
-lembind
# Save some code, and some speed
-sASSERTIONS=0
-sDISABLE_EXCEPTION_CATCHING=1
# the intgemm functions we call will be undefined since these are linked at
# runtime by our own javascript.
-sLLD_REPORT_UNDEFINED
-sERROR_ON_UNDEFINED_SYMBOLS=0
# Cause we can!
-sSTRICT=1
# You know we need it
-sALLOW_MEMORY_GROWTH=1
-sENVIRONMENT=web,worker
# No need to call main(), there's nothing there.
-sINVOKE_RUN=0
# No need for filesystem code in the generated Javascript
-sFILESYSTEM=0
# If you turn this on, it will mangle names which makes the dynamic linking hard.
-sDECLARE_ASM_MODULE_EXPORTS=0
# Export all of the intgemm functions in case we need to fall back to using the embedded intgemm
-sEXPORTED_FUNCTIONS=[_int8PrepareAFallback,_int8PrepareBFallback,_int8PrepareBFromTransposedFallback,_int8PrepareBFromQuantizedTransposedFallback,_int8PrepareBiasFallback,_int8MultiplyAndAddBiasFallback,_int8SelectColumnsOfBFallback]
# Necessary for mozintgemm linking. This prepares the `wasmMemory` variable ahead of time as
# opposed to delegating that task to the wasm binary itself. This way we can link MozIntGEMM
# module to the same memory as the main bergamot-translator module.
-sIMPORTED_MEMORY=1
# Dynamic execution is either frowned upon or blocked inside browser extensions
-sDYNAMIC_EXECUTION=0
)
endif(COMPILE_WASM)
# Needs to be enabled before including the folder containing tests (src/tests)

View File

@ -23,8 +23,8 @@ Building on wasm requires Emscripten toolchain. It can be downloaded and install
* Get the latest sdk: `git clone https://github.com/emscripten-core/emsdk.git`
* Enter the cloned directory: `cd emsdk`
* Install the lastest sdk tools: `./emsdk install 2.0.9`
* Activate the latest sdk tools: `./emsdk activate 2.0.9`
* Install the sdk: `./emsdk install 3.1.8`
* Activate the sdk: `./emsdk activate 3.1.8`
* Activate path variables: `source ./emsdk_env.sh`
#### <a name="Compile"></a> Compile

View File

@ -51,8 +51,8 @@ if [ "$EMSDK" == "" ]; then
fi
if [ "$EMSDK_UPDATE_REQUIRED" == "1" ]; then
cd emsdk
./emsdk install 2.0.9
./emsdk activate 2.0.9
./emsdk install 3.1.8
./emsdk activate 3.1.8
cd -
fi
source ./emsdk/emsdk_env.sh

View File

@ -31,6 +31,7 @@ if(COMPILE_WASM)
# Enable code that is required for generating JS bindings
target_compile_definitions(bergamot-translator PRIVATE WASM_BINDINGS)
target_compile_options(bergamot-translator PRIVATE ${WASM_COMPILE_FLAGS})
target_link_options(bergamot-translator PRIVATE ${WASM_LINK_FLAGS})
endif(COMPILE_WASM)
if(ENABLE_CACHE_STATS)

View File

@ -14,27 +14,15 @@ target_include_directories(bergamot-translator-worker
PRIVATE ${CMAKE_SOURCE_DIR}/src/translator
PRIVATE ${CMAKE_SOURCE_DIR}
)
# This compile definition is required for generating binding code properly
target_compile_definitions(bergamot-translator-worker PRIVATE WASM_BINDINGS)
target_compile_options(bergamot-translator-worker PRIVATE ${WASM_COMPILE_FLAGS})
set(LINKER_FLAGS "-g2 --bind -s ASSERTIONS=0 -s DISABLE_EXCEPTION_CATCHING=1 -s ALLOW_MEMORY_GROWTH=1 -s NO_DYNAMIC_EXECUTION=1 -s EXPORTED_RUNTIME_METHODS=[addOnPreMain]")
# Avoid node.js-code in emscripten glue-code
set(LINKER_FLAGS "${LINKER_FLAGS} -s ENVIRONMENT=web,worker")
# Append version information in the Javascript artifact
set(LINKER_FLAGS "${LINKER_FLAGS} --extern-pre-js ${CMAKE_CURRENT_BINARY_DIR}/project_version.js")
# Allow importing undefined symbols dynamically
set(LINKER_FLAGS "${LINKER_FLAGS} -s ERROR_ON_UNDEFINED_SYMBOLS=0 -s DECLARE_ASM_MODULE_EXPORTS=0")
# Export all the functions of fallback implementation of GEMM for wasm target
set(LINKER_FLAGS "${LINKER_FLAGS} -s EXPORTED_FUNCTIONS=[_int8PrepareAFallback,_int8PrepareBFallback,_int8PrepareBFromTransposedFallback,_int8PrepareBFromQuantizedTransposedFallback,_int8PrepareBiasFallback,_int8MultiplyAndAddBiasFallback,_int8SelectColumnsOfBFallback]")
target_link_options(bergamot-translator-worker PRIVATE ${WASM_LINK_FLAGS})
target_link_options(bergamot-translator-worker PRIVATE --extern-pre-js=${CMAKE_CURRENT_BINARY_DIR}/project_version.js)
set_target_properties(bergamot-translator-worker PROPERTIES
SUFFIX ".js"
LINK_FLAGS ${LINKER_FLAGS}
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}
)