From e34420647ddc528dac47ec57bab13771bd05abef Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal <66322306+abhi-agg@users.noreply.github.com> Date: Wed, 20 Apr 2022 01:39:32 +0200 Subject: [PATCH] Upgrade emsdk to 3.1.8 (#414) * Rework WASM compilation options Necessary to work with newer versions of emscripten that are more picky about which option goes to the compiler, and which to the linker. Also took the opportunity to remove the need for the patching of the bergamot-translation-worker.js file, this can now easily be done through supported apis. Furthermore, I tried to downsize the generated javascript and wasm code a bit. Initial estimates show that bergamot-translator compiled with emscripten 3.0.0 runs at about 3x the speed of 2.0.9 (when using embedded intgemm). Speed-up when using mozIntGemm is less dramatic. * Updated marian-dev submodule * Revert changes specific to patching external gemm modules for wasm * Better Compilation and Link flags - Added "-O3" optimization flag for linking as well - "-g2" only for release and debug builds - "-g1" for release builds - Replaced deprecated "--bind" flag with "-lembind" - Removed redundant link flag * Upgraded emsdk to 3.1.8 * Enclosed EXPORTED_FUNCTIONS values in a list * Fixed the remaining 2.0.9 reference in circle ci build script * Updated README Co-authored-by: Jelmer van der Linde --- .circleci/config.yml | 4 +-- .github/workflows/build.yml | 2 +- 3rd_party/CMakeLists.txt | 1 + 3rd_party/marian-dev | 2 +- CMakeLists.txt | 50 +++++++++++++++++++++++++++++++++-- README.md | 4 +-- build-wasm.sh | 4 +-- src/translator/CMakeLists.txt | 1 + wasm/CMakeLists.txt | 18 +++---------- 9 files changed, 61 insertions(+), 25 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7275fd4..140e311 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2,7 +2,7 @@ version: 2.1 jobs: build-with-wormhole: docker: - - image: 'emscripten/emsdk:2.0.9' + - image: 'emscripten/emsdk:3.1.8' resource_class: medium working_directory: ~/checkout @@ -48,7 +48,7 @@ jobs: build-without-wormhole: docker: - - image: 'emscripten/emsdk:2.0.9' + - image: 'emscripten/emsdk:3.1.8' resource_class: medium working_directory: ~/checkout diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2246b3c..da3c370 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,7 +11,7 @@ name: "Build" - '**' env: qt_version: "6.2.1" # only used by build-macos - emsdk_version: 2.0.9 # For use in emscripten build + emsdk_version: 3.1.8 # For use in emscripten build ccache_basedir: ${{ github.workspace }} ccache_dir: "${{ github.workspace }}/.ccache" ccache_compilercheck: content diff --git a/3rd_party/CMakeLists.txt b/3rd_party/CMakeLists.txt index 72a49e8..1888d6d 100644 --- a/3rd_party/CMakeLists.txt +++ b/3rd_party/CMakeLists.txt @@ -5,6 +5,7 @@ add_subdirectory(marian-dev EXCLUDE_FROM_ALL) if(COMPILE_WASM) # This is a bad way of adding compilation flags. Will be improved soon. add_compile_options(${WASM_COMPILE_FLAGS}) + add_link_options(${WASM_LINK_FLAGS}) endif(COMPILE_WASM) add_subdirectory(ssplit-cpp EXCLUDE_FROM_ALL) diff --git a/3rd_party/marian-dev b/3rd_party/marian-dev index 844800e..199201e 160000 --- a/3rd_party/marian-dev +++ b/3rd_party/marian-dev @@ -1 +1 @@ -Subproject commit 844800efccba6e670250caac1735ca2c8c8e508e +Subproject commit 199201eb89b2941afdadb14164e936d412f897ad diff --git a/CMakeLists.txt b/CMakeLists.txt index f6e6af4..dc51acf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,9 +112,55 @@ message(STATUS "Project name: ${PROJECT_NAME}") message(STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}") if(COMPILE_WASM) + # See https://github.com/emscripten-core/emscripten/blob/main/src/settings.js set(WORMHOLE ON CACHE BOOL "Use WASM wormhole in intgemm https://bugzilla.mozilla.org/show_bug.cgi?id=1672160") - list(APPEND WASM_COMPILE_FLAGS -O3 -g2 -fPIC -mssse3 -msimd128) - list(APPEND WASM_COMPILE_FLAGS "SHELL:-s WASM=1" "SHELL:-s ASSERTIONS=0" "SHELL:-s DISABLE_EXCEPTION_CATCHING=1" "SHELL:-s LLD_REPORT_UNDEFINED" "SHELL:-s FORCE_FILESYSTEM=1" "SHELL:-s ALLOW_MEMORY_GROWTH=1") + list(APPEND WASM_COMPILE_FLAGS + -O3 + # Preserve whitespaces in JS even for release builds; this doesn't increase wasm binary size + $<$:-g1> + # Relevant Debug info only for release with debug builds as this increases wasm binary size + $<$:-g2> + -fPIC + -mssse3 + -msimd128 + # -fno-exceptions # Can't do that because spdlog uses exceptions + -sDISABLE_EXCEPTION_CATCHING=1 + -sSTRICT=1 + ) + list(APPEND WASM_LINK_FLAGS + -O3 + # Preserve whitespaces in JS even for release builds; this doesn't increase wasm binary size + $<$:-g1> + # Relevant Debug info only for release with debug builds as this increases wasm binary size + $<$:-g2> + -lembind + # Save some code, and some speed + -sASSERTIONS=0 + -sDISABLE_EXCEPTION_CATCHING=1 + # the intgemm functions we call will be undefined since these are linked at + # runtime by our own javascript. + -sLLD_REPORT_UNDEFINED + -sERROR_ON_UNDEFINED_SYMBOLS=0 + # Cause we can! + -sSTRICT=1 + # You know we need it + -sALLOW_MEMORY_GROWTH=1 + -sENVIRONMENT=web,worker + # No need to call main(), there's nothing there. + -sINVOKE_RUN=0 + # No need for filesystem code in the generated Javascript + -sFILESYSTEM=0 + # If you turn this on, it will mangle names which makes the dynamic linking hard. + -sDECLARE_ASM_MODULE_EXPORTS=0 + # Export all of the intgemm functions in case we need to fall back to using the embedded intgemm + -sEXPORTED_FUNCTIONS=[_int8PrepareAFallback,_int8PrepareBFallback,_int8PrepareBFromTransposedFallback,_int8PrepareBFromQuantizedTransposedFallback,_int8PrepareBiasFallback,_int8MultiplyAndAddBiasFallback,_int8SelectColumnsOfBFallback] + # Necessary for mozintgemm linking. This prepares the `wasmMemory` variable ahead of time as + # opposed to delegating that task to the wasm binary itself. This way we can link MozIntGEMM + # module to the same memory as the main bergamot-translator module. + -sIMPORTED_MEMORY=1 + # Dynamic execution is either frowned upon or blocked inside browser extensions + -sDYNAMIC_EXECUTION=0 + ) endif(COMPILE_WASM) # Needs to be enabled before including the folder containing tests (src/tests) diff --git a/README.md b/README.md index 11f144c..b70c818 100644 --- a/README.md +++ b/README.md @@ -23,8 +23,8 @@ Building on wasm requires Emscripten toolchain. It can be downloaded and install * Get the latest sdk: `git clone https://github.com/emscripten-core/emsdk.git` * Enter the cloned directory: `cd emsdk` -* Install the lastest sdk tools: `./emsdk install 2.0.9` -* Activate the latest sdk tools: `./emsdk activate 2.0.9` +* Install the sdk: `./emsdk install 3.1.8` +* Activate the sdk: `./emsdk activate 3.1.8` * Activate path variables: `source ./emsdk_env.sh` #### Compile diff --git a/build-wasm.sh b/build-wasm.sh index adc6556..ff12013 100755 --- a/build-wasm.sh +++ b/build-wasm.sh @@ -51,8 +51,8 @@ if [ "$EMSDK" == "" ]; then fi if [ "$EMSDK_UPDATE_REQUIRED" == "1" ]; then cd emsdk - ./emsdk install 2.0.9 - ./emsdk activate 2.0.9 + ./emsdk install 3.1.8 + ./emsdk activate 3.1.8 cd - fi source ./emsdk/emsdk_env.sh diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index 2beb2e9..1d773b4 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -31,6 +31,7 @@ if(COMPILE_WASM) # Enable code that is required for generating JS bindings target_compile_definitions(bergamot-translator PRIVATE WASM_BINDINGS) target_compile_options(bergamot-translator PRIVATE ${WASM_COMPILE_FLAGS}) + target_link_options(bergamot-translator PRIVATE ${WASM_LINK_FLAGS}) endif(COMPILE_WASM) if(ENABLE_CACHE_STATS) diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt index 92c9e16..ef8fd98 100644 --- a/wasm/CMakeLists.txt +++ b/wasm/CMakeLists.txt @@ -14,27 +14,15 @@ target_include_directories(bergamot-translator-worker PRIVATE ${CMAKE_SOURCE_DIR}/src/translator PRIVATE ${CMAKE_SOURCE_DIR} ) + # This compile definition is required for generating binding code properly target_compile_definitions(bergamot-translator-worker PRIVATE WASM_BINDINGS) target_compile_options(bergamot-translator-worker PRIVATE ${WASM_COMPILE_FLAGS}) - -set(LINKER_FLAGS "-g2 --bind -s ASSERTIONS=0 -s DISABLE_EXCEPTION_CATCHING=1 -s ALLOW_MEMORY_GROWTH=1 -s NO_DYNAMIC_EXECUTION=1 -s EXPORTED_RUNTIME_METHODS=[addOnPreMain]") - -# Avoid node.js-code in emscripten glue-code -set(LINKER_FLAGS "${LINKER_FLAGS} -s ENVIRONMENT=web,worker") - -# Append version information in the Javascript artifact -set(LINKER_FLAGS "${LINKER_FLAGS} --extern-pre-js ${CMAKE_CURRENT_BINARY_DIR}/project_version.js") - -# Allow importing undefined symbols dynamically -set(LINKER_FLAGS "${LINKER_FLAGS} -s ERROR_ON_UNDEFINED_SYMBOLS=0 -s DECLARE_ASM_MODULE_EXPORTS=0") - -# Export all the functions of fallback implementation of GEMM for wasm target -set(LINKER_FLAGS "${LINKER_FLAGS} -s EXPORTED_FUNCTIONS=[_int8PrepareAFallback,_int8PrepareBFallback,_int8PrepareBFromTransposedFallback,_int8PrepareBFromQuantizedTransposedFallback,_int8PrepareBiasFallback,_int8MultiplyAndAddBiasFallback,_int8SelectColumnsOfBFallback]") +target_link_options(bergamot-translator-worker PRIVATE ${WASM_LINK_FLAGS}) +target_link_options(bergamot-translator-worker PRIVATE --extern-pre-js=${CMAKE_CURRENT_BINARY_DIR}/project_version.js) set_target_properties(bergamot-translator-worker PROPERTIES SUFFIX ".js" - LINK_FLAGS ${LINKER_FLAGS} RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} )