From d82e01eda4c40b3fe0a0693066e8b3d310c4e82d Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Sat, 1 May 2021 00:29:23 +0100 Subject: [PATCH] Full windows support with ssplit from browsermt, not a fork (#109) * Update marian-dev to the newest mac version * Attempt windows workflow * force workflow rerun * Separate id * Attempt 3 at github action * Marian dev submodule now compiles with apple clang * Updated ssplit version to something more recent * Attempt to fix compile on wasm * Do not compile subproject tests * Fix emscripten compilation on Mac * 99% on the way to windows compile * Try with a different generator * Build release not debug * Revert CMakeLists.txt hacks * Fix sse2 compilation failure * MSVC settings for WIN32 * Add nodefaultlib LIBCMT * Do not compile ssplit.cpp as it contains sys/mman.h * Revert ab56b9aa4f4360b0ab98d5806658d4302f31db1d * Update paths * Set the build type to release if not set previously * Attempt to build release with the windows workflow * Attempt 5 at VS studio release build * Attempt 6 at getting release build on MSVC generator * The windows build is debug at the moment... * fix ssplit for ubuntu 16.04 * Fix compilation with clang * Compile on ubuntu16.04 * Explain what is going on * Updated ssplit and workflow --- .github/workflows/windows.yml | 13 +++++++------ 3rd_party/ssplit-cpp | 2 +- CMakeLists.txt | 25 ++++++++++++++++++++++++- src/QualityScore.h | 1 + src/translator/definitions.h | 17 +++++++++++++++++ src/translator/sentence_splitter.h | 1 + 6 files changed, 51 insertions(+), 8 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 6c3a05f..00e9cfa 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -37,7 +37,7 @@ jobs: shell: powershell - name: Prepare vcpkg - uses: lukka/run-vcpkg@v4 + uses: lukka/run-vcpkg@v7.3 with: vcpkgArguments: protobuf pcre2 vcpkgGitCommitId: 6185aa76504a5025f36754324abf307cc776f3da @@ -45,22 +45,23 @@ jobs: vcpkgTriplet: x64-windows-static # Windows CPU only minimal build - - name: Build Debug + - name: Build Release # @TODO this is actually a debug build until the ninja generator gets fixed uses: lukka/run-cmake@v3 with: - buildDirectory: ${{ github.workspace }}/build/Debug + buildDirectory: ${{ github.workspace }}/build cmakeAppendedArgs: '-G Ninja - -DCMAKE_BUILD_TYPE="Debug" + -DCMAKE_BUILD_TYPE="Release" -DUSE_WASM_COMPATIBLE_SOURCE="OFF" -DUSE_STATIC_LIBS="TRUE"' cmakeListsOrSettingsJson: CMakeListsTxtAdvanced cmakeListsTxtPath: ${{ github.workspace }}/CMakeLists.txt useVcpkgToolchainFile: true + cmakeBuildType: Release - name: Print versions - working-directory: build/ + working-directory: build run: | - .\app\bergamot-translator-app.exe --version + .\app\service-cli.exe --version dir *.exe shell: cmd diff --git a/3rd_party/ssplit-cpp b/3rd_party/ssplit-cpp index dfefe34..8d338ed 160000 --- a/3rd_party/ssplit-cpp +++ b/3rd_party/ssplit-cpp @@ -1 +1 @@ -Subproject commit dfefe34218fe3aced70266994b6557f029fcbdde +Subproject commit 8d338ed5c77d22f8c86f60554596fa57bf5091e6 diff --git a/CMakeLists.txt b/CMakeLists.txt index 412b386..3fe03c9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,28 @@ project(bergamot_translator CXX C) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +# Note that with CMake MSVC build, the option CMAKE_BUILD_TYPE is automatically derived from the key +# 'configurationType' in CMakeSettings.json configurations +if(NOT CMAKE_BUILD_TYPE) + message(WARNING "CMAKE_BUILD_TYPE not set; setting to Release") + set(CMAKE_BUILD_TYPE "Release") +endif() +#MSVC can't seem to pick up correct flags otherwise: +if(MSVC) + add_definitions(-DUSE_SSE2=1) # Supposed to fix something in the sse_mathfun.h but not sure it does + set(INTRINSICS "/arch:AVX2") # ARCH we're targetting on win32. @TODO variable + + set(CMAKE_CXX_FLAGS "/EHsc /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS /bigobj") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MT /O2 ${INTRINSICS} /Zi /MP /GL /DNDEBUG") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MTd /Od /Ob0 ${INTRINSICS} /RTC1 /Zi /D_DEBUG") + + # ignores warning LNK4049: locally defined symbol free imported - this comes from zlib + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /LTCG:incremental /INCREMENTAL:NO /ignore:4049") + set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRT") + set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRTD") + set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /LTCG:incremental") +endif(MSVC) + include(CMakeDependentOption) # Project specific cmake options @@ -22,11 +44,12 @@ SET(PACKAGE_DIR "" CACHE STRING "Directory including all the files to be package SET(COMPILE_CUDA OFF CACHE BOOL "Compile GPU version") SET(USE_SENTENCEPIECE ON CACHE BOOL "Download and compile SentencePiece") SET(USE_STATIC_LIBS ON CACHE BOOL "Link statically against non-system libs") +SET(SSPLIT_COMPILE_LIBRARY_ONLY ON CACHE BOOL "Do not compile ssplit tests") if (USE_WASM_COMPATIBLE_SOURCE) SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.") SET(USE_MKL OFF CACHE BOOL "Compile with MKL support") # # Setting the ssplit-cpp submodule specific cmake options for wasm - SET(USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2") + SET(SSPLIT_USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2") endif() # Documentation: https://cliutils.gitlab.io/modern-cmake/chapters/projects/submodule.html diff --git a/src/QualityScore.h b/src/QualityScore.h index 3ad6349..a6beb4e 100644 --- a/src/QualityScore.h +++ b/src/QualityScore.h @@ -8,6 +8,7 @@ #include #include +#include "translator/definitions.h" /* All possible Granularities for which Quality Scores can be returned for * translated text. */ diff --git a/src/translator/definitions.h b/src/translator/definitions.h index 73d8320..18b5fca 100644 --- a/src/translator/definitions.h +++ b/src/translator/definitions.h @@ -28,4 +28,21 @@ typedef AlignedVector AlignedMemory; } // namespace bergamot } // namespace marian +// @TODO at the moment the usage of string_view in this repository is a hot mess and a disaster waiting to happen. +// ssplit uses std::string_view if the compiler supports c++17, else falls back to c++11 and absl::string_view +// bergamot-translator uses, depending on the source file std::string_view (which will break if ssplit-cpp uses +// absl::string_view) and marian::string_view which is an export of (confusingly) the sentencepiece module that +// marian has. marian::string_view is our addition to the marian fork, which will make merging even nicer. Not. +// This is just an ugly patchwork that allos gcc5, our lowest targetted gcc to run. We don't actually try to run +// on older compilers. + +#if defined(__GNUC__) && __GNUC__ < 6 && !defined(__clang__) +#include +namespace std { + using string_view = std::experimental::string_view; +} // namespace std +#else +#include +#endif + #endif // SRC_BERGAMOT_DEFINITIONS_H_ diff --git a/src/translator/sentence_splitter.h b/src/translator/sentence_splitter.h index 5175176..1c4742e 100644 --- a/src/translator/sentence_splitter.h +++ b/src/translator/sentence_splitter.h @@ -4,6 +4,7 @@ #include "common/options.h" #include "data/types.h" #include "ssplit.h" +#include "definitions.h" #include namespace marian {