diff --git a/.github/workflows/native-custom_marian-mac.yml b/.github/workflows/native-custom_marian-mac.yml deleted file mode 100644 index 1aae7e5..0000000 --- a/.github/workflows/native-custom_marian-mac.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Native (Custom Marian) MacOS - -on: - push: - branches: [ main, ci-sandbox ] - pull_request: - branches: [ main, ci-sandbox ] - -jobs: - build-macos: - name: MacOS - runs-on: macos-10.15 - - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: recursive - - - name: Configure CMake - run: | - mkdir -p build - cd build - cmake .. - - - name: Compile - working-directory: build - run: make -j2 - - - name: Print versions - working-directory: build - run: | - ./app/bergamot-translator-app --version diff --git a/.github/workflows/native-custom_marian-ubuntu.yml b/.github/workflows/native-custom_marian-ubuntu.yml deleted file mode 100644 index f051871..0000000 --- a/.github/workflows/native-custom_marian-ubuntu.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Native (Custom Marian) Ubuntu - -on: - push: - branches: [ main, ci-sandbox ] - pull_request: - branches: [ main, ci-sandbox ] - -jobs: - build-macos: - name: Ubuntu - runs-on: ubuntu-latest - - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: recursive - - - name: Configure CMake - run: | - mkdir -p build - cd build - cmake .. - - - name: Compile - working-directory: build - run: make -j2 - - - name: Print versions - working-directory: build - run: | - ./app/bergamot-translator-app --version diff --git a/.github/workflows/native-full_marian-mac.yml b/.github/workflows/native-full_marian-mac.yml deleted file mode 100644 index 1928c5c..0000000 --- a/.github/workflows/native-full_marian-mac.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: Native (Full Marian) MacOS - -on: - push: - branches: [ main, ci-sandbox ] - pull_request: - branches: [ main, ci-sandbox ] - -jobs: - build-macos: - name: MacOS CPU-only - runs-on: macos-10.15 - - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: recursive - - - name: Install dependencies - run: brew install openblas protobuf - - # Openblas location is exported explicitly because openblas is keg-only, - # which means it was not symlinked into /usr/local/. - # CMake cannot find BLAS on GitHub runners if Marian is being compiled - # statically, hence USE_STATIC_LIBS=off - - name: Configure CMake - run: | - export LDFLAGS="-L/usr/local/opt/openblas/lib" - export CPPFLAGS="-I/usr/local/opt/openblas/include" - mkdir -p build - cd build - cmake .. \ - -DCOMPILE_CPU=on \ - -DCOMPILE_CUDA=off \ - -DCOMPILE_EXAMPLES=on \ - -DCOMPILE_SERVER=on \ - -DCOMPILE_TESTS=on \ - -DUSE_FBGEMM=on \ - -DUSE_SENTENCEPIECE=on \ - -DUSE_STATIC_LIBS=off \ - -DUSE_WASM_COMPATIBLE_SOURCE=off - - - name: Compile - working-directory: build - run: make -j2 - - - name: Run unit tests - working-directory: build - run: make test - - - name: Print versions - working-directory: build - run: | - ./marian --version - ./marian-decoder --version - ./marian-scorer --version - ./spm_encode --version - diff --git a/.github/workflows/native-full_marian-ubuntu.yml b/.github/workflows/native-full_marian-ubuntu.yml deleted file mode 100644 index e414f64..0000000 --- a/.github/workflows/native-full_marian-ubuntu.yml +++ /dev/null @@ -1,120 +0,0 @@ -name: Native (Full Marian) Ubuntu - -on: - push: - branches: [ main, ci-test ] - pull_request: - branches: [ main, ci-test ] - -jobs: - build-ubuntu: - strategy: - matrix: - include: - # Ubuntu CPU-only build - - name: "Ubuntu CPU-only" - os: ubuntu-latest - cuda: "" - gcc: 8 - cpu: true - gpu: false - # GPU Builds are commented out, for bergamot-translator CI runs. - # Ubuntu GPU-only build - # - name: "Ubuntu GPU-only" - # os: ubuntu-latest - # cuda: "10.2" - # gcc: 7 - # cpu: false - # gpu: true - # Ubuntu 20.04 supports CUDA 11+ - #- name: "Ubuntu 20.04 CUDA 11.0 gcc-9" - #os: ubuntu-20.04 - #cuda: "11.0" - #gcc: 9 - #cpu: false - #gpu: true - # Ubuntu 18.04 supports CUDA 10.1+ - # - name: "Ubuntu 18.04 CUDA 10.2 gcc-8" - # os: ubuntu-18.04 - # cuda: "10.2" - # gcc: 8 - # cpu: true - # gpu: true - # Ubuntu 16.04 supports CUDA 8+ - # - name: "Ubuntu 16.04 CUDA 9.2 gcc-7" - # os: ubuntu-16.04 - # cuda: "9.2" - # gcc: 7 - # cpu: true - # gpu: true - - runs-on: ${{ matrix.os }} - name: ${{ matrix.name }} - - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: recursive - - # The following packages are already installed on GitHub-hosted runners: - # build-essential openssl libssl-dev - # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because - # it is installed together with libprotobuf-dev - - name: Install dependencies - run: sudo apt-get update && sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler libboost-all-dev g++-8 - - # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html - - name: Install MKL - run: | - wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add - - sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list" - sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list" - sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088 - if: matrix.cpu == true - - # The script simplifies installation of different versions of CUDA - - name: Install CUDA - run: ./3rd_party/marian-dev/scripts/ci/install_cuda_ubuntu.sh ${{ matrix.cuda }} - if: matrix.gpu == true - - # Boost is installed on GitHub-hosted runners in a non-standard location - # https://github.com/actions/virtual-environments/issues/687#issuecomment-610471671 - - name: Configure CMake - run: | - mkdir -p build - cd build - CC=/usr/bin/gcc-${{ matrix.gcc }} CXX=/usr/bin/g++-${{ matrix.gcc }} CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }} \ - cmake .. \ - -DBoost_ARCHITECTURE=-x64 \ - -DCMAKE_BUILD_TYPE=Release \ - -DCOMPILE_CPU=${{ matrix.cpu }} \ - -DCOMPILE_CUDA=${{ matrix.gpu }} \ - -DCOMPILE_EXAMPLES=on \ - -DCOMPILE_SERVER=on \ - -DCOMPILE_TESTS=on \ - -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-${{ matrix.cuda }} \ - -DUSE_FBGEMM=${{ matrix.cpu }} \ - -DUSE_SENTENCEPIECE=on \ - -DUSE_STATIC_LIBS=on \ - -DUSE_WASM_COMPATIBLE_SOURCE=off - - - name: Compile - working-directory: build - run: make -j2 - - - name: Run unit tests - working-directory: build - run: make test - # GitHub-hosted VMs do not have GPUs, so can not be run in CUDA builds - if: matrix.gpu == false - - - name: Print versions - working-directory: build - run: | - ./marian --version - ./marian-decoder --version - ./marian-scorer --version - ./marian-server --version - ./spm_encode --version - diff --git a/.github/workflows/native-mac.yml b/.github/workflows/native-mac.yml new file mode 100644 index 0000000..8df203d --- /dev/null +++ b/.github/workflows/native-mac.yml @@ -0,0 +1,108 @@ +name: Native MacOS + +on: + push: + branches: [ main, ci-sandbox ] + pull_request: + branches: [ main, ci-sandbox ] + +jobs: + build-macos: + strategy: + fail-fast: false + matrix: + include: + - name: "full-marian" + os: macos-10.15 + test_tags: "" + cmake: + CMAKE_BUILD_TYPE: "Release" + COMPILE_TESTS: "ON" + USE_WASM_COMPATIBLE_SOURCE: "OFF" + USE_FBGEMM: "OFF" + USE_STATIC_LIBS: "OFF" + COMPILE_SERVER: "OFF" + COMPILE_EXAMPLES: "OFF" + + - name: "minimal-marian" + os: macos-10.15 + test_tags: "'#wasm'" + cmake: + CMAKE_BUILD_TYPE: "Release" + COMPILE_TESTS: "OFF" # Minimal marian has no sqlite support and compile tests fail + USE_WASM_COMPATIBLE_SOURCE: "ON" + USE_FBGEMM: "OFF" + # explicitly set due to requirement of minimal marian being used + # within WASM. This is some yaml ugliness, but issok. + USE_STATIC_LIBS: "ON" + COMPILE_SERVER: "OFF" + COMPILE_EXAMPLES: "OFF" + + name: ${{ matrix.name }} + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + submodules: recursive + + - name: Install dependencies + run: | + brew update + brew install openblas protobuf coreutils + + # Openblas location is exported explicitly because openblas is keg-only, + # which means it was not symlinked into /usr/local/. + - name: Set BLAS Environment variables + run: | + echo "LDFLAGS=-L/usr/local/opt/openblas/lib" >> $GITHUB_ENV + echo "CPPFLAGS=-I/usr/local/opt/openblas/include" >> $GITHUB_ENV + if: matrix.cmake.USE_WASM_COMPATIBLE_SOURCE == 'OFF' + + # CMake cannot find BLAS on GitHub runners if Marian is being compiled + # statically, hence USE_STATIC_LIBS=off + - name: Configure CMake + run: | + mkdir -p build + cd build + cmake .. \ + -DCMAKE_BUILD_TYPE=${{ matrix.cmake.CMAKE_BUILD_TYPE }}\ + -DCOMPILE_TESTS=${{ matrix.cmake.COMPILE_TESTS }}\ + -DCOMPILE_EXAMPLES=${{ matrix.cmake.COMPILE_EXAMPLES }} \ + -DCOMPILE_SERVER=${{ matrix.cmake.COMPILE_SERVER }} \ + -DUSE_STATIC_LIBS=${{ matrix.cmake.USE_STATIC_LIBS }} \ + -DUSE_WASM_COMPATIBLE_SOURCE=${{ matrix.cmake.USE_WASM_COMPATIBLE_SOURCE }} \ + -DUSE_FBGEMM=${{ matrix.cmake.USE_FBGEMM }} + + - name: Compile + working-directory: build + run: make -j2 + + - name: Run unit tests + working-directory: build + run: make test + if: matrix.cmake.COMPILE_TESTS == 'ON' + + - name: Print versions + working-directory: build + run: | + ./app/bergamot-translator-app --version + + - name: Install regression-test framework (BRT) + working-directory: bergamot-translator-tests + run : make install + + - name: Run regression-tests (BRT) + working-directory: bergamot-translator-tests + run : MARIAN=../build ./run_brt.sh ${{ matrix.test_tags }} + + - name: Upload regression-tests artifacts + uses: actions/upload-artifact@v2 + if: ${{ always() }} + with: + name: brt-artifacts-${{ matrix.name }} + path: | + bergamot-translator-tests/**/*.expected + bergamot-translator-tests/**/*.log + bergamot-translator-tests/**/*.out diff --git a/.github/workflows/native-ubuntu.yml b/.github/workflows/native-ubuntu.yml new file mode 100644 index 0000000..dc8016b --- /dev/null +++ b/.github/workflows/native-ubuntu.yml @@ -0,0 +1,117 @@ +name: Native Ubuntu + +on: + push: + branches: [ main, ci-sandbox ] + pull_request: + branches: [ main, ci-sandbox ] + +jobs: + build-ubuntu: + strategy: + fail-fast: false + matrix: + include: + - name: "full-marian" + os: ubuntu-latest + gcc: 8 + cpu: 'ON' + gpu: 'OFF' + test_tags: "" + cmake: + CMAKE_BUILD_TYPE: "Release" + COMPILE_TESTS: "ON" + USE_WASM_COMPATIBLE_SOURCE: "OFF" + COMPILE_SERVER: "OFF" + COMPILE_EXAMPLES: "OFF" + + - name: "minimal-marian" + os: ubuntu-latest + gcc: 8 + cpu: 'ON' + gpu: 'OFF' + test_tags: "'#wasm'" + cmake: + CMAKE_BUILD_TYPE: "Release" + COMPILE_TESTS: "OFF" # Minimal marian has no sqlite support and COMPILE_TEST=ON fails. + USE_WASM_COMPATIBLE_SOURCE: "ON" + COMPILE_SERVER: "OFF" + COMPILE_EXAMPLES: "OFF" + + + runs-on: ${{ matrix.os }} + name: ${{ matrix.name }} + + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + submodules: recursive + + # The following packages are already installed on GitHub-hosted runners: + # build-essential openssl libssl-dev + # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because + # it is installed together with libprotobuf-dev + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + libgoogle-perftools-dev libprotobuf-dev protobuf-compiler \ + libboost-all-dev g++-${{ matrix.gcc }} + + # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html + - name: Install MKL + run: | + wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add - + sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list" + sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list" + sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088 + if: matrix.cmake.USE_WASM_COMPATIBLE_SOURCE == 'OFF' + + # Boost is installed on GitHub-hosted runners in a non-standard location + # https://github.com/actions/virtual-environments/issues/687#issuecomment-610471671 + - name: Configure CMake + run: | + mkdir -p build + cd build + CC=/usr/bin/gcc-${{ matrix.gcc }} CXX=/usr/bin/g++-${{ matrix.gcc }} CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }} \ + cmake .. \ + -DCMAKE_BUILD_TYPE=${{ matrix.cmake.CMAKE_BUILD_TYPE }}\ + -DCOMPILE_TESTS=${{ matrix.cmake.COMPILE_TESTS }}\ + -DCOMPILE_EXAMPLES=${{ matrix.cmake.COMPILE_EXAMPLES }} \ + -DCOMPILE_SERVER=${{ matrix.cmake.COMPILE_SERVER }} \ + -DUSE_WASM_COMPATIBLE_SOURCE=${{ matrix.cmake.USE_WASM_COMPATIBLE_SOURCE }} \ + + - name: Compile bergamot-translator + working-directory: build + run: make -j2 + + - name: Run unit tests + working-directory: build + run: make test + # GitHub-hosted VMs do not have GPUs, so can not be run in CUDA builds + if: matrix.gpu == 'OFF' && matrix.cmake.COMPILE_TESTS == 'ON' + + - name: Print versions + working-directory: build + run: | + ./app/bergamot-translator-app --version + + + - name: Install regression-test framework (BRT) + working-directory: bergamot-translator-tests + run : make install + + - name: Run regression-tests (BRT) + working-directory: bergamot-translator-tests + run : MARIAN=../build ./run_brt.sh ${{ matrix.test_tags }} + + - name: Upload regression-tests artifacts + uses: actions/upload-artifact@v2 + if: ${{ always() }} + with: + name: brt-artifacts-${{ matrix.name }} + path: | + bergamot-translator-tests/**/*.expected + bergamot-translator-tests/**/*.log + bergamot-translator-tests/**/*.out diff --git a/.github/workflows/wasm-custom_marian-mac.yml b/.github/workflows/wasm-custom_marian-mac.yml index 87141c7..c275f3c 100644 --- a/.github/workflows/wasm-custom_marian-mac.yml +++ b/.github/workflows/wasm-custom_marian-mac.yml @@ -40,9 +40,8 @@ jobs: - name: Check artifacts working-directory: build-wasm run: | - export WASM_ARTIFACTS_DIR=wasm - ls -all ${WASM_ARTIFACTS_DIR} - if ls ${WASM_ARTIFACTS_DIR}/*.wasm &>/dev/null && ls ${WASM_ARTIFACTS_DIR}/*.js &>/dev/null + ls -all bergamot* + if ls bergamot*.wasm &>/dev/null && ls bergamot*.js &>/dev/null then echo "Artifacts Successfully Generated" else diff --git a/.github/workflows/wasm-custom_marian-ubuntu.yml b/.github/workflows/wasm-custom_marian-ubuntu.yml index d1364dc..4483546 100644 --- a/.github/workflows/wasm-custom_marian-ubuntu.yml +++ b/.github/workflows/wasm-custom_marian-ubuntu.yml @@ -2,9 +2,9 @@ name: WASM (Custom Marian) Ubuntu on: push: - branches: [ main ] + branches: [ main, ci-sandbox ] pull_request: - branches: [ main ] + branches: [ main, ci-sandbox ] jobs: build-wasm: @@ -40,9 +40,8 @@ jobs: - name: Check artifacts working-directory: build-wasm run: | - export WASM_ARTIFACTS_DIR=wasm - ls -all ${WASM_ARTIFACTS_DIR} - if ls ${WASM_ARTIFACTS_DIR}/*.wasm &>/dev/null && ls ${WASM_ARTIFACTS_DIR}/*.js &>/dev/null + ls -all bergamot* + if ls bergamot*.wasm &>/dev/null && ls bergamot*.js &>/dev/null then echo "Artifacts Successfully Generated" else diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index fd1f21f..00e9cfa 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -17,12 +17,6 @@ jobs: # Windows CPU-only build - name: "Windows CPU-only" cuda: "" - gpu: false - # GPU Builds are commented out, for bergamot-translator CI runs. - # Windows CPU+GPU build - # - name: "Windows CPU+CUDA" - # cuda: "10.2" - # gpu: true runs-on: windows-2019 name: ${{ matrix.name }} @@ -42,89 +36,32 @@ jobs: echo "MKLROOT=${{ github.workspace }}\mkl" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append shell: powershell - - name: Install CUDA - run: | - .\3rd_party\marian-dev\scripts\ci\install_cuda_windows.ps1 "10.2" - # Set CUDA_PATH environment variable so that CMake can find CUDA - echo "CUDA_PATH=$env:CUDA_PATH" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append - echo "$env:CUDA_PATH/bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - shell: powershell - if: matrix.gpu == true - - name: Prepare vcpkg - uses: lukka/run-vcpkg@v4 + uses: lukka/run-vcpkg@v7.3 with: - vcpkgArguments: protobuf - vcpkgGitCommitId: 6185aa76504a5025f36754324abf307cc776f3da + vcpkgArguments: protobuf pcre2 + vcpkgGitCommitId: 6185aa76504a5025f36754324abf307cc776f3da vcpkgDirectory: ${{ github.workspace }}/vcpkg/ vcpkgTriplet: x64-windows-static - # Windows CUDA builds use USE_NCCL=off due to compilation errors. - - name: Build Debug + # Windows CPU only minimal build + - name: Build Release # @TODO this is actually a debug build until the ninja generator gets fixed uses: lukka/run-cmake@v3 with: - buildDirectory: ${{ github.workspace }}/build/Debug + buildDirectory: ${{ github.workspace }}/build cmakeAppendedArgs: '-G Ninja - -DCMAKE_BUILD_TYPE="Debug" - -DOPENSSL_USE_STATIC_LIBS="TRUE" - -DOPENSSL_MSVC_STATIC_RT="TRUE" - -DCOMPILE_CPU="TRUE" - -DCOMPILE_CUDA="${{ matrix.gpu }}" - -DCOMPILE_SERVER="FALSE" - -DCOMPILE_TESTS="TRUE" - -DUSE_FBGEMM="TRUE" - -DUSE_MPI="FALSE" - -DUSE_NCCL="FALSE" - -DUSE_SENTENCEPIECE="TRUE" - -DUSE_STATIC_LIBS="TRUE"' - cmakeListsOrSettingsJson: CMakeListsTxtAdvanced - cmakeListsTxtPath: ${{ github.workspace }}/CMakeLists.txt - useVcpkgToolchainFile: true - # Building in Debug is sufficient for the all-in CPU+GPU compilation; - # its main purpose is to detect warnings that the Release build is not - # able to find sometimes. - if: matrix.gpu == true - - # Windows CUDA builds use USE_NCCL=off due to compilation errors - # Boost is pre-installed on Azure/GitHub-hosted Windows runners - # https://github.com/actions/virtual-environments/blob/main/images/win/Windows2019-Readme.md#boost - # (not used yet) - - name: Build Release - uses: lukka/run-cmake@v3 - with: - buildDirectory: ${{ github.workspace }}/build/ - cmakeAppendedArgs: '-G Ninja - -DBOOST_ROOT="$(BOOST_ROOT_1_72_0)" - -DBOOST_INCLUDEDIR="$(BOOST_ROOT_1_72_0)/include" - -DBOOST_LIBRARYDIR="$(BOOST_ROOT_1_72_0)/lib" -DCMAKE_BUILD_TYPE="Release" - -DOPENSSL_USE_STATIC_LIBS="TRUE" - -DOPENSSL_MSVC_STATIC_RT="TRUE" - -DCOMPILE_CPU="TRUE" - -DCOMPILE_CUDA="${{ matrix.gpu }}" - -DCOMPILE_SERVER="FALSE" - -DCOMPILE_TESTS="TRUE" - -DUSE_FBGEMM="TRUE" - -DUSE_MPI="FALSE" - -DUSE_NCCL="FALSE" - -DUSE_SENTENCEPIECE="TRUE" + -DUSE_WASM_COMPATIBLE_SOURCE="OFF" -DUSE_STATIC_LIBS="TRUE"' cmakeListsOrSettingsJson: CMakeListsTxtAdvanced cmakeListsTxtPath: ${{ github.workspace }}/CMakeLists.txt useVcpkgToolchainFile: true + cmakeBuildType: Release - # Removing unit-tests, taken care of in browsermt/marian-dev - # - name: Run unit tests - # working-directory: build/ - # run: ctest - # # Not run in GPU builds because GitHub-hosted VMs do not have GPUs - # if: matrix.gpu == false - name: Print versions - working-directory: build/ + working-directory: build run: | - .\marian.exe --version - .\marian-decoder.exe --version - .\marian-scorer.exe --version + .\app\service-cli.exe --version dir *.exe shell: cmd diff --git a/.gitmodules b/.gitmodules index cc40735..8aa1014 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "3rd_party/ssplit-cpp"] path = 3rd_party/ssplit-cpp url = https://github.com/browsermt/ssplit-cpp +[submodule "bergamot-translator-tests"] + path = bergamot-translator-tests + url = https://github.com/browsermt/bergamot-translator-tests diff --git a/3rd_party/marian-dev b/3rd_party/marian-dev index 0f0bcf9..94aeaa4 160000 --- a/3rd_party/marian-dev +++ b/3rd_party/marian-dev @@ -1 +1 @@ -Subproject commit 0f0bcf99626c660227bb68b76267a8d2451e7172 +Subproject commit 94aeaa4616a0fb01ac95a23f0e74a214a94e7609 diff --git a/3rd_party/ssplit-cpp b/3rd_party/ssplit-cpp index dfefe34..8d338ed 160000 --- a/3rd_party/ssplit-cpp +++ b/3rd_party/ssplit-cpp @@ -1 +1 @@ -Subproject commit dfefe34218fe3aced70266994b6557f029fcbdde +Subproject commit 8d338ed5c77d22f8c86f60554596fa57bf5091e6 diff --git a/CMakeLists.txt b/CMakeLists.txt index 412b386..3fe03c9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,28 @@ project(bergamot_translator CXX C) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +# Note that with CMake MSVC build, the option CMAKE_BUILD_TYPE is automatically derived from the key +# 'configurationType' in CMakeSettings.json configurations +if(NOT CMAKE_BUILD_TYPE) + message(WARNING "CMAKE_BUILD_TYPE not set; setting to Release") + set(CMAKE_BUILD_TYPE "Release") +endif() +#MSVC can't seem to pick up correct flags otherwise: +if(MSVC) + add_definitions(-DUSE_SSE2=1) # Supposed to fix something in the sse_mathfun.h but not sure it does + set(INTRINSICS "/arch:AVX2") # ARCH we're targetting on win32. @TODO variable + + set(CMAKE_CXX_FLAGS "/EHsc /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS /bigobj") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MT /O2 ${INTRINSICS} /Zi /MP /GL /DNDEBUG") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MTd /Od /Ob0 ${INTRINSICS} /RTC1 /Zi /D_DEBUG") + + # ignores warning LNK4049: locally defined symbol free imported - this comes from zlib + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /LTCG:incremental /INCREMENTAL:NO /ignore:4049") + set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRT") + set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRTD") + set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /LTCG:incremental") +endif(MSVC) + include(CMakeDependentOption) # Project specific cmake options @@ -22,11 +44,12 @@ SET(PACKAGE_DIR "" CACHE STRING "Directory including all the files to be package SET(COMPILE_CUDA OFF CACHE BOOL "Compile GPU version") SET(USE_SENTENCEPIECE ON CACHE BOOL "Download and compile SentencePiece") SET(USE_STATIC_LIBS ON CACHE BOOL "Link statically against non-system libs") +SET(SSPLIT_COMPILE_LIBRARY_ONLY ON CACHE BOOL "Do not compile ssplit tests") if (USE_WASM_COMPATIBLE_SOURCE) SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.") SET(USE_MKL OFF CACHE BOOL "Compile with MKL support") # # Setting the ssplit-cpp submodule specific cmake options for wasm - SET(USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2") + SET(SSPLIT_USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2") endif() # Documentation: https://cliutils.gitlab.io/modern-cmake/chapters/projects/submodule.html diff --git a/README.md b/README.md index a876ac6..9dd47c4 100644 --- a/README.md +++ b/README.md @@ -38,19 +38,18 @@ Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt. cd bergamot-translator ``` -3. Download files (only required if you want to package files in wasm binary) +3. Download files (only required if you want to perform inference using build artifacts) - This step is only required if you want to package files (e.g. models, vocabularies etc.) - into wasm binary. If you don't then just skip this step. + It packages the vocabulary files into wasm binary, which is required only if you want to perform inference. + The compilation commands will preload these files in Emscripten’s virtual file system. - The build preloads the files in Emscripten’s virtual file system. - - If you want to package bergamot project specific models, please follow these instructions: + If you want to package bergamot project specific files, please follow these instructions: ```bash - mkdir models git clone --depth 1 --branch main --single-branch https://github.com/mozilla-applied-ml/bergamot-models + mkdir models cp -rf bergamot-models/prod/* models gunzip models/*/* + find models \( -type f -name "model*" -or -type f -name "lex*" \) -delete ``` 4. Compile @@ -61,14 +60,14 @@ Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt. ``` 2. Compile the artefacts - * If you want to package files into wasm binary then execute following commands (Replace `FILES_TO_PACKAGE` with the path of the - directory containing the files to be packaged in wasm binary) + * If you want to package files into wasm binary then execute following commands (Replace `FILES_TO_PACKAGE` with the + directory containing all the files to be packaged) ```bash emcmake cmake -DCOMPILE_WASM=on -DPACKAGE_DIR=FILES_TO_PACKAGE ../ emmake make -j ``` - e.g. If you want to package bergamot project specific models (downloaded using step 3 above) then + e.g. If you want to package bergamot project specific files (downloaded using step 3 above) then replace `FILES_TO_PACKAGE` with `../models` * If you don't want to package any file into wasm binary then execute following commands: @@ -77,7 +76,7 @@ Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt. emmake make -j ``` - The wasm artifacts (.js and .wasm files) will be available in `wasm` folder of build directory ("build-wasm" in this case). + The wasm artifacts (.js and .wasm files) will be available in the build directory ("build-wasm" in this case). 3. Enable SIMD Wormhole via Wasm instantiation API in generated artifacts ```bash diff --git a/app/bergamot-translator-app-bytearray.cpp b/app/bergamot-translator-app-bytearray.cpp index 1fa5748..91353c0 100644 --- a/app/bergamot-translator-app-bytearray.cpp +++ b/app/bergamot-translator-app-bytearray.cpp @@ -7,9 +7,9 @@ #include -#include "TranslationModel.h" -#include "translator/parser.h" #include "translator/byte_array_util.h" +#include "translator/parser.h" +#include "translator/service.h" int main(int argc, char **argv) { @@ -20,19 +20,17 @@ int main(int argc, char **argv) { std::string config = options->asYamlString(); // Route the config string to construct marian model through TranslationModel - TranslationModel model(config, marian::bergamot::getModelMemoryFromConfig(options)); + marian::bergamot::Service model( + config, marian::bergamot::getModelMemoryFromConfig(options)); TranslationRequest translationRequest; std::vector texts; for (std::string line; std::getline(std::cin, line);) { - texts.emplace_back(line); + texts.emplace_back(line); } - auto results = model.translate(std::move(texts), translationRequest); - - // Resolve the future and get the actual result - //std::vector results = futureResults.get(); + auto results = model.translateMultiple(std::move(texts), translationRequest); for (auto &result : results) { std::cout << result.getTranslatedText() << std::endl; diff --git a/app/bergamot-translator-app.cpp b/app/bergamot-translator-app.cpp index 4fba00b..c487969 100644 --- a/app/bergamot-translator-app.cpp +++ b/app/bergamot-translator-app.cpp @@ -1,16 +1,17 @@ /* * main.cpp * - * An application which accepts line separated texts in stdin and returns translated ones in stdout. - * It is convenient for batch processing and can be used with tools like SacreBLEU. + * An application which accepts line separated texts in stdin and returns + * translated ones in stdout. It is convenient for batch processing and can be + * used with tools like SacreBLEU. * */ #include #include -#include "TranslationModel.h" #include "translator/parser.h" +#include "translator/service.h" int main(int argc, char **argv) { @@ -21,19 +22,16 @@ int main(int argc, char **argv) { std::string config = options->asYamlString(); // Route the config string to construct marian model through TranslationModel - TranslationModel model(config); + marian::bergamot::Service model(config); TranslationRequest translationRequest; std::vector texts; for (std::string line; std::getline(std::cin, line);) { - texts.emplace_back(line); + texts.emplace_back(line); } - auto results = model.translate(std::move(texts), translationRequest); - - // Resolve the future and get the actual result - //std::vector results = futureResults.get(); + auto results = model.translateMultiple(std::move(texts), translationRequest); for (auto &result : results) { std::cout << result.getTranslatedText() << std::endl; diff --git a/app/service-cli-bytearray.cpp b/app/service-cli-bytearray.cpp index f868d4d..d8c7059 100644 --- a/app/service-cli-bytearray.cpp +++ b/app/service-cli-bytearray.cpp @@ -27,8 +27,14 @@ int main(int argc, char *argv[]) { std::string input = std_input.str(); using marian::bergamot::Response; + marian::bergamot::ResponseOptions responseOptions; + responseOptions.qualityScores = true; + responseOptions.alignment = true; + responseOptions.alignmentThreshold = 0.2f; + // Wait on future until Response is complete - std::future responseFuture = service.translate(std::move(input)); + std::future responseFuture = + service.translate(std::move(input), responseOptions); responseFuture.wait(); Response response = responseFuture.get(); diff --git a/app/service-cli.cpp b/app/service-cli.cpp index 6ed4d81..d7c72e6 100644 --- a/app/service-cli.cpp +++ b/app/service-cli.cpp @@ -8,6 +8,7 @@ #include "marian.h" #include "translator/parser.h" #include "translator/response.h" +#include "translator/response_options.h" #include "translator/service.h" int main(int argc, char *argv[]) { @@ -21,8 +22,14 @@ int main(int argc, char *argv[]) { std::string input = std_input.str(); using marian::bergamot::Response; + marian::bergamot::ResponseOptions responseOptions; + responseOptions.qualityScores = true; + responseOptions.alignment = true; + responseOptions.alignmentThreshold = 0.2f; + // Wait on future until Response is complete - std::future responseFuture = service.translate(std::move(input)); + std::future responseFuture = + service.translate(std::move(input), responseOptions); responseFuture.wait(); Response response = responseFuture.get(); diff --git a/bergamot-translator-tests b/bergamot-translator-tests new file mode 160000 index 0000000..3771001 --- /dev/null +++ b/bergamot-translator-tests @@ -0,0 +1 @@ +Subproject commit 3771001720a8f01bba185ee5d5d908b7c266ef31 diff --git a/src/QualityScore.h b/src/QualityScore.h index 3ad6349..a6beb4e 100644 --- a/src/QualityScore.h +++ b/src/QualityScore.h @@ -8,6 +8,7 @@ #include #include +#include "translator/definitions.h" /* All possible Granularities for which Quality Scores can be returned for * translated text. */ diff --git a/src/TranslationModel.h b/src/TranslationModel.h deleted file mode 100644 index 4b1be23..0000000 --- a/src/TranslationModel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * TranslationModel.h - * - * Main interface for translation API. - */ - -#ifndef SRC_TRANSLATOR_TRANSLATIONMODEL_H_ -#define SRC_TRANSLATOR_TRANSLATIONMODEL_H_ - -#include -#include -#include - -// All 3rd party includes -#include "3rd_party/marian-dev/src/common/options.h" - -// All local project includes -#include "TranslationRequest.h" -#include "TranslationResult.h" -#include "translator/definitions.h" -#include "translator/service.h" - -/* A Translation model that translates a plain (without any markups and emojis) - * UTF-8 encoded text. This implementation supports translation from 1 source - * language to 1 target language. - */ -class TranslationModel { -public: - /* Construct the model using the model configuration options as yaml-formatted - * string - */ - /** - * @param config Marian yml config file in the form of a string - * @param model_memory optional byte array (aligned to 64!!!) that contains - * the bytes of a model.bin. - */ - TranslationModel(const std::string &config, - marian::bergamot::AlignedMemory modelMemory = marian::bergamot::AlignedMemory(), - marian::bergamot::AlignedMemory shortlistMemory = marian::bergamot::AlignedMemory()); - - ~TranslationModel(); - - /* This method performs translation on a list of UTF-8 encoded plain text - * (without any markups or emojis) and returns a list of results in the same - * order. The model supports translation from 1 source language to 1 target - * language. - * - * Each text entry can either be a word, a phrase, a sentence or a list of - * sentences. Additional information related to the translated text can be - * requested via TranslationRequest which is applied equally to each text - * entry. The translated text corresponding to each text entry and the - * additional information (as specified in the TranslationRequest) is - * encapsulated and returned in TranslationResult. - * - * The API splits each text entry into sentences internally, which are then - * translated independent of each other. The translated sentences are then - * joined back together and returned in TranslationResult. - * - * Please refer to the TranslationRequest class to find out what additional - * information can be requested. The alignment information can only be - * requested if the model supports it (check isAlignmentSupported() API). - * - * The texts argument will become empty after the execution of this API (each - * entry of texts list will be moved to its corresponding TranslationResult - * object). - */ - std::vector translate(std::vector &&texts, - TranslationRequest request); - - /* Check if the model can provide alignment information b/w original and - * translated text. */ - bool isAlignmentSupported() const; - -private: - // Model configuration options - std::shared_ptr configOptions_; // ORDER DEPENDECNY - marian::bergamot::Service service_; // ORDER DEPENDENCY -}; - -#endif /* SRC_TRANSLATOR_TRANSLATIONMODEL_H_ */ diff --git a/src/TranslationResult.h b/src/TranslationResult.h deleted file mode 100644 index 8c6c806..0000000 --- a/src/TranslationResult.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * TranslationResult.h - * - * The class that represents the result of TranslationModel::translate() - * API for each of its text entry and TranslationRequest. - */ - -#ifndef SRC_TRANSLATOR_TRANSLATIONRESULT_H_ -#define SRC_TRANSLATOR_TRANSLATIONRESULT_H_ - -#include -#include - -#include "QualityScore.h" - -/* This class represents the result of TranslationModel::translate() API - * for each of its text entry and TranslationRequest. - */ -class TranslationResult { -public: - typedef std::vector> - SentenceMappings; -#ifdef WASM_BINDINGS - TranslationResult(const std::string &original, const std::string &translation) - : originalText(original), translatedText(translation), - sentenceMappings() {} -#endif - TranslationResult(const std::string &original, const std::string &translation, - SentenceMappings &sentenceMappings) - : originalText(original), translatedText(translation), - sentenceMappings(sentenceMappings) {} - - TranslationResult(TranslationResult &&other) - : originalText(std::move(other.originalText)), - translatedText(std::move(other.translatedText)), - sentenceMappings(std::move(other.sentenceMappings)) {} - -#ifdef WASM_BINDINGS - TranslationResult(const TranslationResult &other) - : originalText(other.originalText), - translatedText(other.translatedText), - sentenceMappings(other.sentenceMappings) {} -#endif - - TranslationResult(std::string &&original, std::string &&translation, - SentenceMappings &&sentenceMappings) - : originalText(std::move(original)), - translatedText(std::move(translation)), - sentenceMappings(std::move(sentenceMappings)) {} - -#ifndef WASM_BINDINGS - TranslationResult &operator=(const TranslationResult &) = delete; -#else - TranslationResult &operator=(const TranslationResult &result) { - originalText = result.originalText; - translatedText = result.translatedText; - sentenceMappings = result.sentenceMappings; - return *this; - } -#endif - - /* Return the original text. */ - const std::string &getOriginalText() const { return originalText; } - - /* Return the translated text. */ - const std::string &getTranslatedText() const { return translatedText; } - - /* Return the Quality scores of the translated text. */ - const QualityScore &getQualityScore() const { return qualityScore; } - - /* Return the Sentence mappings (information regarding how individual - * sentences of originalText map to corresponding translated sentences in - * translatedText). - */ - const SentenceMappings &getSentenceMappings() const { - return sentenceMappings; - } - -private: - // Original text (in UTF-8 encoded format) that was supposed to be translated - std::string originalText; - - // Translation (in UTF-8 encoded format) of the originalText - std::string translatedText; - - // Quality score of the translated text at the granularity specified in - // TranslationRequest. It is an optional result (it will have no information - // if not requested in TranslationRequest) - QualityScore qualityScore; - - // Information regarding how individual sentences of originalText map to - // corresponding translated sentences in joined translated text - // (translatedText) An example of sentence mapping: - // originalText (contains 2 sentences) = "What is your name? - // My name is Abc." translatedText (contains 2 translated sentences) = - // "Was ist dein Name? Mein Name ist Abc." sentenceMappings = [ - // {"What is your name?", "Was ist dein Name?"}, // - // Pair(originalText[0],translatedText[0]) - // {"My name is Abc", "Mein Name ist Abc."} // - // Pair(originalText[1],translatedText[1]) - // ] - // - // It is an optional result (it will be empty if not requested in - // TranslationRequest). - SentenceMappings sentenceMappings; -}; - -#endif /* SRC_TRANSLATOR_TRANSLATIONRESULT_H_ */ diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index 3ddfa79..25ca916 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -1,13 +1,11 @@ add_library(bergamot-translator STATIC - TranslationModel.cpp byte_array_util.cpp text_processor.cpp sentence_splitter.cpp batch_translator.cpp - multifactor_priority.cpp request.cpp batcher.cpp - response.cpp + response_builder.cpp batch.cpp sentence_ranges.cpp service.cpp @@ -29,5 +27,5 @@ endif(COMPILE_WASM) target_link_libraries(bergamot-translator marian ssplit) target_include_directories(bergamot-translator - PUBLIC ${CMAKE_SOURCE_DIR} - PUBLIC ${CMAKE_SOURCE_DIR}/src) + PUBLIC ${PROJECT_SOURCE_DIR} + ${PROJECT_SOURCE_DIR}/src) diff --git a/src/translator/TranslationModel.cpp b/src/translator/TranslationModel.cpp deleted file mode 100644 index 06b04eb..0000000 --- a/src/translator/TranslationModel.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * TranslationModel.cpp - * - */ - -#include -#include - -// All local project includes -#include "TranslationModel.h" -#include "translator/parser.h" -#include "translator/service.h" - -TranslationModel::TranslationModel(const std::string &config, - marian::bergamot::AlignedMemory model_memory, - marian::bergamot::AlignedMemory lexical_memory) - : service_(config, std::move(model_memory), std::move(lexical_memory)) {} - -TranslationModel::~TranslationModel() {} - -std::vector -TranslationModel::translate(std::vector &&texts, - TranslationRequest request) { - // Implementing a non-async version first. Unpleasant, but should work. - std::promise> promise; - auto future = promise.get_future(); - - // This code, move into async? - std::vector translationResults; - for (auto &text : texts) { - // Collect future as marian::bergamot::TranslationResult - auto intermediate = service_.translate(std::move(text)); - intermediate.wait(); - auto marianResponse(std::move(intermediate.get())); - - TranslationResult::SentenceMappings sentenceMappings; - for (size_t idx = 0; idx < marianResponse.size(); idx++) { - marian::string_view src = marianResponse.source.sentence(idx); - marian::string_view tgt = marianResponse.target.sentence(idx); - sentenceMappings.emplace_back(std::string_view(src.data(), src.size()), - std::string_view(tgt.data(), tgt.size())); - } - - // In place construction. - translationResults.emplace_back( - std::move(marianResponse.source.text), // &&marianResponse.source_ - std::move(marianResponse.target.text), // &&marianResponse.translation_ - std::move(sentenceMappings) // &&sentenceMappings - ); - } - - return translationResults; -} - -bool TranslationModel::isAlignmentSupported() const { return false; } diff --git a/src/translator/batch_translator.cpp b/src/translator/batch_translator.cpp index 19cbaf9..6b2425d 100644 --- a/src/translator/batch_translator.cpp +++ b/src/translator/batch_translator.cpp @@ -63,11 +63,14 @@ void BatchTranslator::translate(Batch &batch) { std::vector batchVector; auto &sentences = batch.sentences(); + size_t batchSequenceNumber{0}; for (auto &sentence : sentences) { - data::SentenceTuple sentence_tuple(sentence.lineNumber()); + data::SentenceTuple sentence_tuple(batchSequenceNumber); Segment segment = sentence.getUnderlyingSegment(); sentence_tuple.push_back(segment); batchVector.push_back(sentence_tuple); + + ++batchSequenceNumber; } size_t batchSize = batchVector.size(); diff --git a/src/translator/definitions.h b/src/translator/definitions.h index 32998b9..18b5fca 100644 --- a/src/translator/definitions.h +++ b/src/translator/definitions.h @@ -22,10 +22,27 @@ template UPtr UNew(Args &&... args) { template UPtr UNew(UPtr p) { return UPtr(p); } -/// Shortcut to AlignedVector for byte arrays -typedef AlignedVector AlignedMemory; +/// Shortcut to AlignedVector for byte arrays +typedef AlignedVector AlignedMemory; } // namespace bergamot } // namespace marian +// @TODO at the moment the usage of string_view in this repository is a hot mess and a disaster waiting to happen. +// ssplit uses std::string_view if the compiler supports c++17, else falls back to c++11 and absl::string_view +// bergamot-translator uses, depending on the source file std::string_view (which will break if ssplit-cpp uses +// absl::string_view) and marian::string_view which is an export of (confusingly) the sentencepiece module that +// marian has. marian::string_view is our addition to the marian fork, which will make merging even nicer. Not. +// This is just an ugly patchwork that allos gcc5, our lowest targetted gcc to run. We don't actually try to run +// on older compilers. + +#if defined(__GNUC__) && __GNUC__ < 6 && !defined(__clang__) +#include +namespace std { + using string_view = std::experimental::string_view; +} // namespace std +#else +#include +#endif + #endif // SRC_BERGAMOT_DEFINITIONS_H_ diff --git a/src/translator/multifactor_priority.cpp b/src/translator/multifactor_priority.cpp deleted file mode 100644 index 0f93a81..0000000 --- a/src/translator/multifactor_priority.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include "multifactor_priority.h" - -namespace marian { -namespace bergamot { - -} // namespace bergamot -} // namespace marian diff --git a/src/translator/multifactor_priority.h b/src/translator/multifactor_priority.h deleted file mode 100644 index 1e239f7..0000000 --- a/src/translator/multifactor_priority.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef SRC_BERGAMOT_MULTIFACTOR_PRIORITY_H_ -#define SRC_BERGAMOT_MULTIFACTOR_PRIORITY_H_ - -#include "data/types.h" -#include "definitions.h" -#include "sys/time.h" - -namespace marian { -namespace bergamot { - -struct MultiFactorPriority { - int nice; /* user configurable priority, at a request */ - unsigned int Id; - /* What else should priority depend on? */ - double priority() { return Id; } -}; -} // namespace bergamot -} // namespace marian - -#endif // SRC_BERGAMOT_MULTIFACTOR_PRIORITY_H_ diff --git a/src/translator/parser.h b/src/translator/parser.h index fa4e7bb..207890c 100644 --- a/src/translator/parser.h +++ b/src/translator/parser.h @@ -31,7 +31,7 @@ inline marian::ConfigParser createConfigParser() { } inline std::shared_ptr -parseOptions(const std::string &config) { +parseOptions(const std::string &config, bool validate = true) { marian::Options options; // @TODO(jerinphilip) There's something off here, @XapaJIaMnu suggests @@ -58,8 +58,11 @@ parseOptions(const std::string &config) { options.parse(config); YAML::Node configCopy = options.cloneToYamlNode(); - marian::ConfigValidator validator(configCopy); - validator.validateOptions(marian::cli::mode::translation); + if (validate) { + // Perform validation on parsed options only when requested + marian::ConfigValidator validator(configCopy); + validator.validateOptions(marian::cli::mode::translation); + } return std::make_shared(options); } diff --git a/src/translator/pcqueue.h b/src/translator/pcqueue.h index f0b3541..d6f4582 100644 --- a/src/translator/pcqueue.h +++ b/src/translator/pcqueue.h @@ -10,12 +10,14 @@ #include #ifdef __APPLE__ -#include -#include #include #include +#include +#include #elif defined(__linux) #include +#elif defined(_WIN32) || defined(_WIN64) +#include #else #include #endif @@ -35,67 +37,107 @@ namespace bergamot { #ifdef __APPLE__ class Semaphore { -public: - explicit Semaphore(int value) : task_(mach_task_self()) { - ABORT_IF(KERN_SUCCESS != - semaphore_create(task_, &back_, SYNC_POLICY_FIFO, value), - "Could not create semaphore"); - } - - ~Semaphore() { - if (KERN_SUCCESS != semaphore_destroy(task_, back_)) { - std::cerr << "Could not destroy semaphore" << std::endl; - abort(); + public: + explicit Semaphore(int value) : task_(mach_task_self()) { + ABORT_IF(KERN_SUCCESS != semaphore_create(task_, &back_, SYNC_POLICY_FIFO, value), "Could not create semaphore"); } - } - void wait() { - ABORT_IF(KERN_SUCCESS != semaphore_wait(back_), - "Wait for semaphore failed"); - } + ~Semaphore() { + if (KERN_SUCCESS != semaphore_destroy(task_, back_)) { + std::cerr << "Could not destroy semaphore" << std::endl; + abort(); + } + } - void post() { - ABORT_IF(KERN_SUCCESS != semaphore_signal(back_), - "Could not post to semaphore"); - } + void wait() { + ABORT_IF(KERN_SUCCESS != semaphore_wait(back_), "Wait for semaphore failed"); + } -private: - semaphore_t back_; - task_t task_; + void post() { + ABORT_IF(KERN_SUCCESS != semaphore_signal(back_), "Could not post to semaphore"); + } + + private: + semaphore_t back_; + task_t task_; }; -inline void WaitSemaphore(Semaphore &semaphore) { semaphore.wait(); } +inline void WaitSemaphore(Semaphore &semaphore) { + semaphore.wait(); +} #elif defined(__linux) class Semaphore { -public: - explicit Semaphore(unsigned int value) { - ABORT_IF(sem_init(&sem_, 0, value), "Could not create semaphore"); - } - - ~Semaphore() { - if (-1 == sem_destroy(&sem_)) { - std::cerr << "Could not destroy semaphore " << std::endl; - abort(); + public: + explicit Semaphore(unsigned int value) { + ABORT_IF(sem_init(&sem_, 0, value), "Could not create semaphore"); } - } - void wait() { - while (UTIL_UNLIKELY(-1 == sem_wait(&sem_))) { - ABORT_IF(errno != EINTR, "Wait for semaphore failed"); + ~Semaphore() { + if (-1 == sem_destroy(&sem_)) { + std::cerr << "Could not destroy semaphore" << std::endl; + abort(); + } } - } - void post() { - ABORT_IF(-1 == sem_post(&sem_), "Could not post to semaphore"); - } + void wait() { + while (-1 == sem_wait(&sem_)) { + ABORT_IF(errno != EINTR, "Wait for semaphore failed"); + } + } -private: - sem_t sem_; + void post() { + ABORT_IF(-1 == sem_post(&sem_), "Could not post to semaphore"); + } + + private: + sem_t sem_; }; -inline void WaitSemaphore(Semaphore &semaphore) { semaphore.wait(); } +inline void WaitSemaphore(Semaphore &semaphore) { + semaphore.wait(); +} + +#elif defined(_WIN32) || defined(_WIN64) + +class Semaphore { + public: + explicit Semaphore(LONG value) : sem_(CreateSemaphoreA(NULL, value, 2147483647, NULL)) { + ABORT_IF(!sem_, "Could not CreateSemaphore {}", GetLastError()); + } + + ~Semaphore() { + CloseHandle(sem_); + } + + + void wait() { + while (true) { + switch (WaitForSingleObject(sem_, 0L)) { + case WAIT_OBJECT_0: + return; + case WAIT_ABANDONED: + ABORT("A semaphore can't be abandoned, confused by Windows"); + case WAIT_TIMEOUT: + continue; + case WAIT_FAILED: + ABORT("Waiting on Semaphore failed {}", GetLastError()); + } + } + } + + void post() { + ABORT_IF(!ReleaseSemaphore(sem_, 1, NULL), "Failed to release Semaphore {}", GetLastError()); + } + + private: + HANDLE sem_; +}; + +inline void WaitSemaphore(Semaphore &semaphore) { + semaphore.wait(); +} #else typedef boost::interprocess::interprocess_semaphore Semaphore; @@ -113,7 +155,7 @@ inline void WaitSemaphore(Semaphore &on) { } } -#endif // Apple +#endif // Cases for semaphore support /** * Producer consumer queue safe for multiple producers and multiple consumers. @@ -124,11 +166,13 @@ inline void WaitSemaphore(Semaphore &on) { * throw. */ template class PCQueue { -public: + public: explicit PCQueue(size_t size) - : empty_(size), used_(0), storage_(new T[size]), - end_(storage_.get() + size), produce_at_(storage_.get()), - consume_at_(storage_.get()) {} + : empty_(size), used_(0), + storage_(new T[size]), + end_(storage_.get() + size), + produce_at_(storage_.get()), + consume_at_(storage_.get()) {} // Add a value to the queue. void Produce(const T &val) { @@ -141,8 +185,7 @@ public: empty_.post(); throw; } - if (++produce_at_ == end_) - produce_at_ = storage_.get(); + if (++produce_at_ == end_) produce_at_ = storage_.get(); } used_.post(); } @@ -158,14 +201,14 @@ public: empty_.post(); throw; } - if (++produce_at_ == end_) - produce_at_ = storage_.get(); + if (++produce_at_ == end_) produce_at_ = storage_.get(); } used_.post(); } + // Consume a value, assigning it to out. - T &Consume(T &out) { + T& Consume(T &out) { WaitSemaphore(used_); { std::lock_guard consume_lock(consume_at_mutex_); @@ -175,15 +218,14 @@ public: used_.post(); throw; } - if (++consume_at_ == end_) - consume_at_ = storage_.get(); + if (++consume_at_ == end_) consume_at_ = storage_.get(); } empty_.post(); return out; } // Consume a value, swapping it to out. - T &ConsumeSwap(T &out) { + T& ConsumeSwap(T &out) { WaitSemaphore(used_); { std::lock_guard consume_lock(consume_at_mutex_); @@ -193,13 +235,13 @@ public: used_.post(); throw; } - if (++consume_at_ == end_) - consume_at_ = storage_.get(); + if (++consume_at_ == end_) consume_at_ = storage_.get(); } empty_.post(); return out; } + // Convenience version of Consume that copies the value to return. // The other version is faster. T Consume() { @@ -208,7 +250,7 @@ public: return ret; } -private: + private: // Number of empty spaces in storage_. Semaphore empty_; // Number of occupied spaces in storage_. @@ -234,63 +276,67 @@ template struct UnboundedPage { }; template class UnboundedSingleQueue { -public: - UnboundedSingleQueue() : valid_(0) { - SetFilling(new UnboundedPage()); - SetReading(filling_); - } - - void Produce(T &&val) { - if (filling_current_ == filling_end_) { - UnboundedPage *next = new UnboundedPage(); - filling_->next = next; - SetFilling(next); + public: + UnboundedSingleQueue() : valid_(0) { + SetFilling(new UnboundedPage()); + SetReading(filling_); } - *(filling_current_++) = std::move(val); - valid_.post(); - } - void Produce(const T &val) { Produce(T(val)); } - - T &Consume(T &out) { - WaitSemaphore(valid_); - if (reading_current_ == reading_end_) { - SetReading(reading_->next); + void Produce(T &&val) { + if (filling_current_ == filling_end_) { + UnboundedPage *next = new UnboundedPage(); + filling_->next = next; + SetFilling(next); + } + *(filling_current_++) = std::move(val); + valid_.post(); } - out = std::move(*(reading_current_++)); - return out; - } - // Warning: very much a no-guarantees race-condition-rich implementation! - // But sufficient for our specific purpose: The single thread that consumes - // is also the only one that checks Empty, and knows that it's racing. - bool Empty() const { return reading_current_ == filling_current_; } + void Produce(const T &val) { + Produce(T(val)); + } -private: - void SetFilling(UnboundedPage *to) { - filling_ = to; - filling_current_ = to->entries; - filling_end_ = filling_current_ + sizeof(to->entries) / sizeof(T); - } - void SetReading(UnboundedPage *to) { - reading_.reset(to); - reading_current_ = to->entries; - reading_end_ = reading_current_ + sizeof(to->entries) / sizeof(T); - } + T& Consume(T &out) { + WaitSemaphore(valid_); + if (reading_current_ == reading_end_) { + SetReading(reading_->next); + } + out = std::move(*(reading_current_++)); + return out; + } - Semaphore valid_; + // Warning: very much a no-guarantees race-condition-rich implementation! + // But sufficient for our specific purpose: The single thread that consumes + // is also the only one that checks Empty, and knows that it's racing. + bool Empty() const { + return reading_current_ == filling_current_; + } - UnboundedPage *filling_; + private: + void SetFilling(UnboundedPage *to) { + filling_ = to; + filling_current_ = to->entries; + filling_end_ = filling_current_ + sizeof(to->entries) / sizeof(T); + } + void SetReading(UnboundedPage *to) { + reading_.reset(to); + reading_current_ = to->entries; + reading_end_ = reading_current_ + sizeof(to->entries) / sizeof(T); + } - std::unique_ptr> reading_; + Semaphore valid_; - T *filling_current_; - T *filling_end_; - T *reading_current_; - T *reading_end_; + UnboundedPage *filling_; - UnboundedSingleQueue(const UnboundedSingleQueue &) = delete; - UnboundedSingleQueue &operator=(const UnboundedSingleQueue &) = delete; + std::unique_ptr > reading_; + + T *filling_current_; + T *filling_end_; + T *reading_current_; + T *reading_end_; + + UnboundedSingleQueue(const UnboundedSingleQueue &) = delete; + UnboundedSingleQueue &operator=(const UnboundedSingleQueue &) = delete; }; } // namespace bergamot diff --git a/src/translator/request.cpp b/src/translator/request.cpp index b6d2438..8e46533 100644 --- a/src/translator/request.cpp +++ b/src/translator/request.cpp @@ -11,18 +11,24 @@ namespace marian { namespace bergamot { // ----------------------------------------------------------------- -Request::Request(size_t Id, size_t lineNumberBegin, - std::vector> &vocabs, AnnotatedText &&source, - Segments &&segments, std::promise responsePromise) - : Id_(Id), lineNumberBegin_(lineNumberBegin), vocabs_(&vocabs), - source_(std::move(source)), segments_(std::move(segments)), - response_(std::move(responsePromise)) { +Request::Request(size_t Id, Segments &&segments, + ResponseBuilder &&responseBuilder) + : Id_(Id), segments_(std::move(segments)), + responseBuilder_(std::move(responseBuilder)) + +{ counter_ = segments_.size(); histories_.resize(segments_.size(), nullptr); + + // If there are no segments_, we are never able to trigger the responseBuilder + // calls from a different thread. However, in this case we want an empty valid + // response. + if (segments_.size() == 0) { + responseBuilder_(std::move(histories_)); + } } -size_t Request::lineNumberBegin() const { return lineNumberBegin_; } size_t Request::numSegments() const { return segments_.size(); } size_t Request::segmentTokens(size_t index) const { @@ -39,17 +45,10 @@ void Request::processHistory(size_t index, Ptr history) { // In case this is last request in, completeRequest is called, which sets the // value of the promise. if (--counter_ == 0) { - completeRequest(); + responseBuilder_(std::move(histories_)); } } -void Request::completeRequest() { - // Request no longer needs to hold the content, can transfer it to - // Response. - Response response(std::move(source_), std::move(histories_), *vocabs_); - response_.set_value(std::move(response)); -} - bool Request::operator<(const Request &b) const { // Among Requests, only sequence id is used for obtaining priority. return Id_ < b.Id_; @@ -64,10 +63,6 @@ size_t RequestSentence::numTokens() const { return (request_->segmentTokens(index_)); } -size_t RequestSentence::lineNumber() const { - return (request_->lineNumberBegin() + index_); -} - void RequestSentence::completeSentence(Ptr history) { // Relays completeSentence into request's processHistory, using index // information. diff --git a/src/translator/request.h b/src/translator/request.h index 605dea7..e2188cd 100644 --- a/src/translator/request.h +++ b/src/translator/request.h @@ -1,24 +1,9 @@ -// -// Defines: -// -// Request: holds the input text of a text, Segments (vector) which are -// to go to the batching mechanism and alignments between the processed -// segments and the input text (sourceTokenRanges). In addition, Request takes -// care of the barrier which fires when all the Segments in a request are done -// translating by the workers (BatchTranslator). -// TODO(jerinphilip): Extend Request with notions of Priority (sequence, -// user-given). -// -// RequestSentence: is a tuple of (index, Ptr). This provides the -// batching mechanism access to the segment within the request. The backref to -// Request allows event triggering the barrier upon completion of the last -// sentence by a worker. - #ifndef SRC_BERGAMOT_REQUEST_H_ #define SRC_BERGAMOT_REQUEST_H_ #include "definitions.h" #include "response.h" +#include "response_builder.h" #include "sentence_ranges.h" #include "common/logging.h" @@ -33,80 +18,96 @@ namespace marian { namespace bergamot { +/// A Request is an internal representation used to represent a request after +/// processed by TextProcessor into sentences constituted by marian::Words. +/// +/// The batching mechanism (Batcher) draws from multiple Requests and compiles +/// sentences into a batch. When a batch completes translation (at +/// BatchTranslator, intended in a different thread), backward propogation +/// happens through: +/// +/// ```cpp +/// Batch::completeBatch(...) +/// -> RequestSentence::completeSentence(..) +/// -> Request::processHistory(...) +/// ``` +/// +/// When all sentences in a Request are completed, responseBuilder is +/// triggered with the compiled Histories, to construct the Response +/// corresponding to the Request and set value of the promise which triggers the +/// future at client. class Request { public: - Request(size_t Id, size_t lineNumberBegin, - std::vector> &vocabs_, AnnotatedText &&source, - Segments &&segments, std::promise responsePromise); + /// Constructs an internal representation of the Request identified by Id, + /// processed Segments and accepts a callback (ResponseBuilder) which builds + /// the Response upon completion of the Request. + /// + /// + /// @param [in] Id: Identifier assigned to Request by Service. + /// @param [in] segments: Each segment is a unit to be translated. + /// @param [in] responseBuilder: Callback function (of ResponseBuilder type) + /// to be triggered upon the completion of translation of all units in a + /// Request. + Request(size_t Id, Segments &&segments, ResponseBuilder &&responseBuilder); - // Obtain the count of tokens in the segment correponding to index. Used to - // insert sentence from multiple requests into the corresponding size bucket. + /// Obtain the count of tokens in the segment correponding to index. Used to + /// insert sentence from multiple requests into the corresponding size bucket. size_t segmentTokens(size_t index) const; - // Obtain number of segments in a request. + /// Obtain number of segments in a request. size_t numSegments() const; - size_t lineNumberBegin() const; - // Obtains segment corresponding to index to create a batch of segments among - // several requests. + /// Obtains segment corresponding to index to create a batch of segments + /// among several requests. Segment getSegment(size_t index) const; - // For notions of priority among requests, used to enable std::set in - // Batcher. + /// For notions of priority among requests, used to enable std::set in + /// Batcher. bool operator<(const Request &request) const; - // Processes a history obtained after translating in a heterogenous batch - // compiled from requests. + /// Processes a history obtained after translating in a heterogenous batch + /// compiled from requests. void processHistory(size_t index, Ptr history); - // On completion of last segment, sets value of the promise. - void completeRequest(); - private: size_t Id_; - size_t lineNumberBegin_; - // Multiple translation-workers can concurrently access the same Request. The - // following atomic atomically operates on the variable holding sentences - // remaining to be translated. + /// Multiple translation-workers can concurrently access the same Request. The + /// following atomic atomically operates on the variable holding sentences + /// remaining to be translated. std::atomic counter_; - // source_ holds the source string to be translated. segments_ hold the - // sentences generated from source_ in vector. sourceRanges_ are - // string_views of the text corresponding to these words, pointing to - // sequences in source_. histories_ is a buffer which eventually stores the - // translations of each segment in the corresponding index. - AnnotatedText source_; + /// segments_ hold the sentences processed into Words which generated from + /// input string. Segments segments_; + + /// histories_ is a buffer which eventually stores the translations of each + /// segment in the corresponding index. std::vector> histories_; - // Members above are moved into newly constructed Response on completion - // of translation of all segments. The promise below is set to this Response - // value. future to this promise is made available to the user through - // Service. - std::promise response_; - - // Constructing Response requires the vocabs_ used to generate Request. - std::vector> *vocabs_; + /// Constructing Response requires the vocabs_ used to generate Request. + /// std::vector> *vocabs_; + ResponseBuilder responseBuilder_; }; +/// A RequestSentence provides a view to a sentence within a Request. Existence +/// of this class allows the sentences and associated information to be kept +/// within Request, while batching mechanism (Batcher) compiles Batch from +/// RequestSentence-s coming from different Requests. class RequestSentence { - // A RequestSentence provides a view to a sentence within a Request. Existence - // of this class allows the sentences and associated information to be kept - // within Request. public: RequestSentence(size_t, Ptr); + + /// Number of tokens in the segment this RequestSentence represents. Used to + /// order by length in batching. size_t numTokens() const; - // lineNumber in Request, used for matching marian-decoder. SentenceTuple - // requires lineNumber to be set for Corpus based batches. - size_t lineNumber() const; - - // Accessor to the segment represented by the RequestSentence. + /// Accessor to the segment represented by the RequestSentence. Segment getUnderlyingSegment() const; - // Forwards call to Request, checking for completion. + /// Forwards history to Request to set history corresponding to this + /// RequestSentence. void completeSentence(Ptr history); friend bool operator<(const RequestSentence &a, const RequestSentence &b); diff --git a/src/translator/response.cpp b/src/translator/response.cpp deleted file mode 100644 index e5bc38f..0000000 --- a/src/translator/response.cpp +++ /dev/null @@ -1,106 +0,0 @@ -#include "response.h" -#include "common/logging.h" -#include "data/alignment.h" -#include "sentence_ranges.h" - -#include - -namespace marian { -namespace bergamot { - -Response::Response(AnnotatedText &&source, Histories &&histories, - std::vector> &vocabs) - : source(std::move(source)) { - // Reserving length at least as much as source_ seems like a reasonable thing - // to do to avoid reallocations. - target.text.reserve(source.text.size()); - - // In a first step, the decoded units (individual senteneces) are compiled - // into a huge string. This is done by computing indices first and appending - // to the string as each sentences are decoded. - std::vector> translationRanges; - std::vector sentenceBegins; - - size_t offset{0}; - bool first{true}; - - for (auto &history : histories) { - // TODO(jerin): Change hardcode of nBest = 1 - NBestList onebest = history->nBest(1); - - Result result = onebest[0]; // Expecting only one result; - Words words = std::get<0>(result); - auto targetVocab = vocabs.back(); - - std::string decoded; - std::vector targetMappings; - targetVocab->decodeWithByteRanges(words, decoded, targetMappings); - - if (first) { - first = false; - } else { - target.text += " "; - ++offset; - } - - sentenceBegins.push_back(translationRanges.size()); - target.text += decoded; - auto decodedStringBeginMarker = targetMappings.front().begin(); - for (auto &sview : targetMappings) { - size_t startIdx = offset + sview.begin() - decodedStringBeginMarker; - translationRanges.emplace_back(startIdx, startIdx + sview.size()); - } - - offset += decoded.size(); - - // Alignments - // TODO(jerinphilip): The following double conversion might not be - // necessary. Hard alignment can directly be exported, but this would mean - // WASM bindings for a structure deep within marian source. - auto hyp = std::get<1>(result); - auto softAlignment = hyp->tracebackAlignment(); - auto hardAlignment = data::ConvertSoftAlignToHardAlign( - softAlignment, /*threshold=*/0.2f); // TODO(jerinphilip): Make this a - // configurable parameter. - - Alignment unified_alignment; - for (auto &p : hardAlignment) { - unified_alignment.emplace_back((Point){p.srcPos, p.tgtPos, p.prob}); - } - - alignments.push_back(std::move(unified_alignment)); - - // Quality scores: Sequence level is obtained as normalized path scores. - // Word level using hypothesis traceback. These are most-likely logprobs. - auto normalizedPathScore = std::get<2>(result); - auto wordQualities = hyp->tracebackWordScores(); - wordQualities.pop_back(); - qualityScores.push_back((Quality){normalizedPathScore, wordQualities}); - } - - // Once we have the indices in translation (which might be resized a few - // times) ready, we can prepare and store the string_view as annotations - // instead. This is accomplished by iterating over available sentences using - // sentenceBegin and using addSentence(...) API from Annotation. - - for (size_t i = 1; i <= sentenceBegins.size(); i++) { - std::vector targetMappings; - size_t begin = sentenceBegins[i - 1]; - size_t safe_end = (i == sentenceBegins.size()) ? translationRanges.size() - : sentenceBegins[i]; - - for (size_t idx = begin; idx < safe_end; idx++) { - auto &p = translationRanges[idx]; - size_t begin_idx = p.first; - size_t end_idx = p.second; - - const char *data = &target.text[begin_idx]; - size_t size = end_idx - begin_idx; - targetMappings.emplace_back(data, size); - } - - target.addSentence(targetMappings); - } -} -} // namespace bergamot -} // namespace marian diff --git a/src/translator/response.h b/src/translator/response.h index 4f87b8d..0f7ecb5 100644 --- a/src/translator/response.h +++ b/src/translator/response.h @@ -40,34 +40,12 @@ struct Quality { /// AnnotatedText provides an API to access markings of (sub)-word and /// sentences boundaries, which are required to interpret Quality and /// Alignment (s) at the moment. -class Response { - -public: - /// - Response(AnnotatedText &&source, Histories &&histories, - std::vector> &vocabs); - - /// \cond HIDDEN_PUBLIC - // Move constructor. - Response(Response &&other) - : source(std::move(other.source)), target(std::move(other.target)), - alignments(std::move(other.alignments)), - qualityScores(std::move(other.qualityScores)){}; - - // The following copy bans are not stricitly required anymore since Annotation - // is composed of the ByteRange primitive (which was previously string_view - // and required to be bound to string), but makes movement efficient by - // banning these letting compiler complain about copies. - - Response(const Response &) = delete; - Response &operator=(const Response &) = delete; - - /// \endcond - - /// Number of sentences translated. The processing of a text of into sentences - /// are handled internally, and this information can be used to iterate - /// through meaningful units of translation for which alignment and quality - /// information are available. +struct Response { + /// Convenience function to obtain number of units translated. Same as + /// `.source.numSentences()` and `.target.numSentences().` The processing of a + /// text of into sentences are handled internally, and this information can be + /// used to iterate through meaningful units of translation for which + /// alignment and quality information are available. const size_t size() const { return source.numSentences(); } /// source text and annotations of (sub-)words and sentences. @@ -86,6 +64,10 @@ public: /// sparse matrix representation with indices corresponding /// to (sub-)words accessible through Annotation. std::vector alignments; + + const std::string &getOriginalText() const { return source.text; } + + const std::string &getTranslatedText() const { return target.text; } }; } // namespace bergamot } // namespace marian diff --git a/src/translator/response_builder.cpp b/src/translator/response_builder.cpp new file mode 100644 index 0000000..c624707 --- /dev/null +++ b/src/translator/response_builder.cpp @@ -0,0 +1,87 @@ +#include "response_builder.h" + +namespace marian { +namespace bergamot { + +void ResponseBuilder::buildQualityScores(Histories &histories, + Response &response) { + std::vector qualityScores; + for (auto &history : histories) { + // TODO(jerin): Change hardcode of nBest = 1 + NBestList onebest = history->nBest(1); + + Result result = onebest[0]; // Expecting only one result; + Words words = std::get<0>(result); + auto hyp = std::get<1>(result); + // Quality scores: Sequence level is obtained as normalized path scores. + // Word level using hypothesis traceback. These are most-likely + // logprobs. + auto normalizedPathScore = std::get<2>(result); + auto wordQualities = hyp->tracebackWordScores(); + wordQualities.pop_back(); + response.qualityScores.push_back( + Quality{normalizedPathScore, wordQualities}); + } +} + +void ResponseBuilder::buildAlignments(Histories &histories, + Response &response) { + for (auto &history : histories) { + // TODO(jerin): Change hardcode of nBest = 1 + NBestList onebest = history->nBest(1); + + Result result = onebest[0]; // Expecting only one result; + Words words = std::get<0>(result); + // Alignments + // TODO(jerinphilip): The following double conversion might not be + // necessary. Hard alignment can directly be exported, but this would + // mean WASM bindings for a structure deep within marian source. + auto hyp = std::get<1>(result); + auto softAlignment = hyp->tracebackAlignment(); + auto threshold = responseOptions_.alignmentThreshold; + auto hardAlignment = + data::ConvertSoftAlignToHardAlign(softAlignment, threshold); + Alignment unified_alignment; + for (auto &p : hardAlignment) { + unified_alignment.emplace_back(Point{p.srcPos, p.tgtPos, p.prob}); + } + + response.alignments.push_back(std::move(unified_alignment)); + } +} + +void ResponseBuilder::buildTranslatedText(Histories &histories, + Response &response) { + // Reserving length at least as much as source_ seems like a reasonable + // thing to do to avoid reallocations. + response.target.text.reserve(response.source.text.size()); + + size_t offset{0}; + bool first{true}; + + for (auto &history : histories) { + // TODO(jerin): Change hardcode of nBest = 1 + NBestList onebest = history->nBest(1); + + Result result = onebest[0]; // Expecting only one result; + Words words = std::get<0>(result); + auto targetVocab = vocabs_->back(); + + std::string decoded; + std::vector targetSentenceMappings; + targetVocab->decodeWithByteRanges(words, decoded, targetSentenceMappings); + + // delimiter can be used to fill in the blanks from source as well. + std::string delimiter; + if (first) { + first = false; + } else { + delimiter = " "; + } + + response.target.appendSentence(delimiter, decoded, targetSentenceMappings); + } +} + +} // namespace bergamot +} // namespace marian diff --git a/src/translator/response_builder.h b/src/translator/response_builder.h new file mode 100644 index 0000000..85caffb --- /dev/null +++ b/src/translator/response_builder.h @@ -0,0 +1,93 @@ +#ifndef SRC_BERGAMOT_RESPONSE_BUILDER_H_ +#define SRC_BERGAMOT_RESPONSE_BUILDER_H_ + +#include "data/types.h" +#include "response.h" +#include "response_options.h" + +// For now we will work with this, to avoid complaints another structure is hard +// to operate with. + +namespace marian { +namespace bergamot { + +/// ResponseBuilder is a callback functor. It is expected to be bound to a +/// Request after giving it the context of options, vocabs and promise to set. +/// It constructs the Response and it's members based on options +/// (quality=on|off, alignments=on|off, mappings=on|off, splitmode=sentence | +/// paragraph). + +class ResponseBuilder { +public: + /// @param [in] responseOptions: ResponseOptions, indicating what to include + /// or not in the response and any additional configurable parameters. + /// @param [in] vocabs: marian vocab object (used in decoding) + /// @param [in] promise: promise to set with the constructed Response. + ResponseBuilder(ResponseOptions responseOptions, AnnotatedText &&source, + std::vector> &vocabs, + std::promise &&promise) + : responseOptions_(responseOptions), source_(std::move(source)), + vocabs_(&vocabs), promise_(std::move(promise)) {} + + /// Constructs and sets the promise of a Response object from obtained + /// histories after translating. + /// @param [in] histories: Histories obtained after translating the Request + /// from which this functor is called. + void operator()(Histories &&histories) { + // TODO(jerinphilip) load ResponseOptions into options and turn build + // functions on or off. + // responseOptions_ is unused, but we can try something here. + ABORT_IF(source_.numSentences() != histories.size(), + "Mismatch in source and translated sentences"); + Response response; + + // Move source_ into response. + response.source = std::move(source_); + + // Should be after source is set + buildTranslatedText(histories, response); + + // Should always be after buildTranslatedText + if (responseOptions_.qualityScores) { + buildQualityScores(histories, response); + } + + if (responseOptions_.alignment) { + buildAlignments(histories, response); + } + + // Once complete, set promise. + promise_.set_value(std::move(response)); + } + +private: + /// Builds qualityScores from histories and writes to response. expects + /// buildTranslatedText to be run before to be able to obtain target text and + /// subword information. + /// @param histories [in] + /// @param response [out] + void buildQualityScores(Histories &histories, Response &response); + + /// Builds alignments from histories and writes onto response. + /// @param histories [in] + /// @param response [out] + void buildAlignments(Histories &histories, Response &response); + + /// Builds translated text and subword annotations and writes onto response. + /// @param histories [in] + /// @param response [out] + void buildTranslatedText(Histories &histories, Response &response); + + // Data members are context/curried args for the functor. + + ResponseOptions responseOptions_; + std::vector> *vocabs_; // vocabs are required for decoding + // and any source validation checks. + std::promise promise_; // To be set when callback triggered and + // after Response constructed. + AnnotatedText source_; +}; +} // namespace bergamot +} // namespace marian + +#endif // SRC_BERGAMOT_RESPONSE_BUILDER_H_ diff --git a/src/translator/response_options.h b/src/translator/response_options.h new file mode 100644 index 0000000..ed3cce3 --- /dev/null +++ b/src/translator/response_options.h @@ -0,0 +1,50 @@ +#ifndef SRC_BERGAMOT_RESPONSE_OPTIONS_H_ +#define SRC_BERGAMOT_RESPONSE_OPTIONS_H_ +#include + +namespace marian { +namespace bergamot { + +enum ConcatStrategy { + /// Target text is constructed faithful to the source-text structure. + FAITHFUL, + + /// Target text is concatenated by a space. + SPACE +}; + +enum QualityScoreType { + /// Provide a free quality-score that comes with the machine-translation model + /// itself. + FREE, + + /// An expensive quality-score that runs additional computations to determine + /// quality of an output. + EXPENSIVE +}; + +/// ResponseOptions dictate how to construct a Response for an input string of +/// text to be translated. +struct ResponseOptions { + bool qualityScores{false}; ///< Include quality-scores or not. + bool alignment{false}; ///< Include alignments or not. + + /// Whether to include sentenceMappings or not. Alignments require + /// sentenceMappings and are available irrespective of this option if + /// `alignment=true`. + bool sentenceMappings{false}; + + /// Threshold between `[0.0f, 1.0f]` to filter alignments into a sparse + /// matrix. Higher value implies stronger filtering leading to provision of + /// higher-confidence matches. `1.0f` gives argmax (not the full-dense + /// matrix). + float alignmentThreshold{0.2f}; + + QualityScoreType qualityScoreType{QualityScoreType::FREE}; + ConcatStrategy concatStrategy{ConcatStrategy::FAITHFUL}; +}; + +} // namespace bergamot +} // namespace marian + +#endif // SRC_BERGAMOT_RESPONSE_OPTIONS_H_ diff --git a/src/translator/sentence_ranges.cpp b/src/translator/sentence_ranges.cpp index aae9dd3..da9d3ee 100644 --- a/src/translator/sentence_ranges.cpp +++ b/src/translator/sentence_ranges.cpp @@ -32,11 +32,11 @@ ByteRange Annotation::sentence(size_t sentenceIdx) const { // the flatByteRange and non-empty sentence before this happened and // construct empty string-view equivalent ByteRange. ByteRange eos = flatByteRanges_[eosId - 1]; - sentenceByteRange = (ByteRange){eos.end, eos.end}; + sentenceByteRange = ByteRange{eos.end, eos.end}; } else { ByteRange bos = flatByteRanges_[bosId]; ByteRange eos = flatByteRanges_[eosId - 1]; - sentenceByteRange = (ByteRange){bos.begin, eos.end}; + sentenceByteRange = ByteRange{bos.begin, eos.end}; } return sentenceByteRange; } @@ -56,6 +56,20 @@ string_view AnnotatedText::sentence(size_t sentenceIdx) const { return asStringView(sentenceAsByteRange); } +void AnnotatedText::appendSentence(std::string prefix, std::string &reference, + std::vector &wordRanges) { + text += prefix; + size_t offset = text.size(); // Get size before to do ByteRange arithmetic + text += reference; // Append reference to text + std::vector sentence; + for (auto &wordView : wordRanges) { + size_t thisWordBegin = offset + wordView.data() - &reference[0]; + sentence.push_back( + ByteRange{thisWordBegin, thisWordBegin + wordView.size()}); + } + annotation.addSentence(sentence); +} + void AnnotatedText::addSentence(std::vector &wordRanges) { addSentence(std::begin(wordRanges), std::end(wordRanges)); }; @@ -65,7 +79,7 @@ void AnnotatedText::addSentence(std::vector::iterator begin, std::vector sentence; for (auto p = begin; p != end; p++) { size_t begin_offset = p->data() - &text[0]; - sentence.push_back((ByteRange){begin_offset, begin_offset + p->size()}); + sentence.push_back(ByteRange{begin_offset, begin_offset + p->size()}); } annotation.addSentence(sentence); }; diff --git a/src/translator/sentence_ranges.h b/src/translator/sentence_ranges.h index b3986e3..f9c881e 100644 --- a/src/translator/sentence_ranges.h +++ b/src/translator/sentence_ranges.h @@ -64,7 +64,6 @@ public: sentenceEndIds_.push_back(0); } - /// Returns the number of sentences annotated in a text. size_t numSentences() const { return sentenceEndIds_.size() - 1; } /// Returns number of words in the sentence identified by `sentenceIdx`. @@ -125,10 +124,6 @@ public: /// constructor is disallowed). AnnotatedText(std::string &&text) : text(std::move(text)){}; - AnnotatedText(AnnotatedText &&annotatedBlob) - : text(std::move(annotatedBlob.text)), - annotation(std::move(annotatedBlob.annotation)) {} - /// Returns the number of sentences in the annotation structure. const size_t numSentences() const { return annotation.numSentences(); } @@ -137,6 +132,11 @@ public: return annotation.numWords(sentenceIdx); } + /// Appends a sentence to the existing text and transparently rebases + /// string_views + void appendSentence(std::string prefix, std::string &reference, + std::vector &wordRanges); + /// Adds a sentence, used to load from SentencePiece annotations conveniently. void addSentence(std::vector &wordRanges); diff --git a/src/translator/sentence_splitter.h b/src/translator/sentence_splitter.h index 5175176..1c4742e 100644 --- a/src/translator/sentence_splitter.h +++ b/src/translator/sentence_splitter.h @@ -4,6 +4,7 @@ #include "common/options.h" #include "data/types.h" #include "ssplit.h" +#include "definitions.h" #include namespace marian { diff --git a/src/translator/service.cpp b/src/translator/service.cpp index 76bcba2..3d19f5e 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -28,8 +28,8 @@ loadVocabularies(marian::Ptr options) { namespace marian { namespace bergamot { -Service::Service(Ptr options, AlignedMemory modelMemory, AlignedMemory shortlistMemory) - : requestId_(0), vocabs_(std::move(loadVocabularies(options))), +Service::Service(Ptr options, AlignedMemory modelMemory, AlignedMemory shortlistMemory) + : requestId_(0), options_(options), vocabs_(std::move(loadVocabularies(options))), text_processor_(vocabs_, options), batcher_(options), numWorkers_(options->get("cpu-threads")), modelMemory_(std::move(modelMemory)), shortlistMemory_(std::move(shortlistMemory)) @@ -112,6 +112,44 @@ void Service::async_translate() { #endif // WASM_COMPATIBLE_SOURCE std::future Service::translate(std::string &&input) { + ResponseOptions responseOptions; // Hardcode responseOptions for now + return translate(std::move(input), responseOptions); +} + +std::vector +Service::translateMultiple(std::vector &&inputs, + TranslationRequest translationRequest) { + ResponseOptions responseOptions; + + // TODO(jerinphilip) Set options based on TranslationRequest, if and when it + // becomes non-dummy. + + // We queue the individual Requests so they get compiled at batches to be + // efficiently translated. + std::vector> responseFutures; + for (auto &input : inputs) { + std::future inputResponse = + queueRequest(std::move(input), responseOptions); + responseFutures.push_back(std::move(inputResponse)); + } + + // Dispatch is called once per request so compilation of sentences from + // multiple Requests happen. + dispatchTranslate(); + + // Now wait for all Requests to complete, the future to fire and return the + // compiled Responses, we can probably return the future, but WASM quirks(?). + std::vector responses; + for (auto &future : responseFutures) { + future.wait(); + responses.push_back(std::move(future.get())); + } + + return responses; +} + +std::future Service::queueRequest(std::string &&input, + ResponseOptions responseOptions) { Segments segments; AnnotatedText source(std::move(input)); text_processor_.process(source, segments); @@ -119,17 +157,29 @@ std::future Service::translate(std::string &&input) { std::promise responsePromise; auto future = responsePromise.get_future(); - Ptr request = New( - requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(source), - std::move(segments), std::move(responsePromise)); + ResponseBuilder responseBuilder(responseOptions, std::move(source), vocabs_, + std::move(responsePromise)); + Ptr request = New(requestId_++, std::move(segments), + std::move(responseBuilder)); batcher_.addWholeRequest(request); + return future; +} + +std::future Service::translate(std::string &&input, + ResponseOptions responseOptions) { + std::future future = + queueRequest(std::move(input), responseOptions); + dispatchTranslate(); + return future; +} + +void Service::dispatchTranslate() { if (numWorkers_ == 0) { blocking_translate(); } else { async_translate(); } - return future; } Service::~Service() { diff --git a/src/translator/service.h b/src/translator/service.h index 72f6d92..288c649 100644 --- a/src/translator/service.h +++ b/src/translator/service.h @@ -1,10 +1,12 @@ #ifndef SRC_BERGAMOT_SERVICE_H_ #define SRC_BERGAMOT_SERVICE_H_ +#include "TranslationRequest.h" #include "batch_translator.h" #include "batcher.h" #include "data/types.h" #include "response.h" +#include "response_builder.h" #include "text_processor.h" #include "translator/parser.h" @@ -18,22 +20,45 @@ namespace marian { namespace bergamot { -/// Service exposes methods to translate an incoming blob of text to the -/// Consumer of bergamot API. +/// Service offers methods create an asynchronous translation service that +/// translates a plain (without any markups and emojis) UTF-8 encoded text. +/// This implementation supports translation from 1 source language to 1 target +/// language. +/// +/// This is intended to be similar to the ones provided for training or +/// decoding in ML pipelines with the following additional capabilities: +/// +/// 1. Provision of a request -> response based translation flow unlike the +/// usual a line based translation or decoding provided in most ML frameworks. +/// 2. Internal handling of normalization etc which changes source text to +/// provide to client translation meta-information like alignments consistent +/// with the unnormalized input text. +/// 3. The API splits each text entry into sentences internally, which are then +/// translated independent of each other. The translated sentences are then +/// joined back together and returned in Response. +/// +/// Service exposes methods to instantiate the service from a string +/// configuration (which can cover most translators) and to translate an +/// incoming blob of text. +/// /// /// An example use of this API looks as follows: -/// +/// ```cpp /// options = ...; /// service = Service(options); /// std::string input_text = "Hello World"; /// std::future -/// response = service.translate(std::move(input_text)); -/// response.wait(); -/// Response result = response.get(); +/// responseFuture = service.translate(std::move(input_text)); +/// responseFuture.wait(); // Wait until translation has completed. +/// Response response(std::move(response.get()); /// -/// Optionally Service can be initialized by also passing model_memory for +/// // Do things with response. +/// ``` +/// +/// Optionally Service can be initialized by also passing model memory for /// purposes of efficiency (which defaults to nullpointer and then reads from /// file supplied through config). +/// class Service { public: @@ -41,9 +66,22 @@ public: /// @param modelMemory byte array (aligned to 256!!!) that contains the bytes /// of a model.bin. Optional, defaults to nullptr when not used /// @param shortlistMemory byte array of shortlist (aligned to 64) - explicit Service(Ptr options, AlignedMemory modelMemory, AlignedMemory shortlistMemory); + explicit Service(Ptr options, AlignedMemory modelMemory, + AlignedMemory shortlistMemory); - explicit Service(Ptr options) : Service(options, AlignedMemory(), AlignedMemory()){} + /// Construct Service purely from Options. This expects options which + /// marian-decoder expects to be set for loading model shortlist and + /// vocabularies from files in addition to parameters that set unset desired + /// features (e.g: alignments, quality-scores). + /// + /// This is equivalent to a call to: + /// ```cpp + /// Service(options, AlignedMemory(), AlignedMemory()) + /// ``` + /// wherein empty memory is passed and internal flow defaults to file-based + /// model, shortlist loading. + explicit Service(Ptr options) + : Service(options, AlignedMemory(), AlignedMemory()) {} /// Construct Service from a string configuration. /// @param [in] config string parsable as YAML expected to adhere with marian @@ -52,20 +90,66 @@ public: /// bytes of a model.bin. Optional. /// @param [in] shortlistMemory byte array of shortlist (aligned to 64) explicit Service(const std::string &config, - AlignedMemory modelMemory = AlignedMemory(), AlignedMemory shortlistMemory = AlignedMemory()) - : Service(parseOptions(config), std::move(modelMemory), std::move(shortlistMemory)) {} + AlignedMemory modelMemory = AlignedMemory(), + AlignedMemory shortlistMemory = AlignedMemory()) + : Service(parseOptions(config, /*validate=*/false), + std::move(modelMemory), std::move(shortlistMemory)) {} /// Explicit destructor to clean up after any threads initialized in /// asynchronous operation mode. ~Service(); /// To stay efficient and to refer to the string for alignments, expects - /// ownership be moved through std::move(..) + /// ownership be moved through `std::move(..)` /// - /// @param [in] rvalue reference of string to be translated. - std::future translate(std::string &&input); + /// @param [in] source: rvalue reference of string to be translated. + std::future translate(std::string &&source); + + /// Translate an input, providing Options to construct Response. This is + /// useful when one has to set/unset alignments or quality in the Response to + /// save compute spent in constructing these objects. + /// + /// @param [in] source: rvalue reference of the string to be translated + /// @param [in] responseOptions: Options indicating whether or not to include + /// some member in the Response, also specify any additional configurable + /// parameters. + std::future translate(std::string &&source, + ResponseOptions options); + + /// Translate multiple text-blobs in a single *blocking* API call, providing + /// TranslationRequest which applies across all text-blobs dictating how to + /// construct Response. TranslationRequest can be used to enable/disable + /// additional information like quality-scores, alignments etc. + /// + /// All texts are combined to efficiently construct batches together providing + /// speedups compared to calling translate() indepdently on individual + /// text-blob. Note that there will be minor differences in output when + /// text-blobs are individually translated due to approximations but similar + /// quality nonetheless. If you have async/multithread capabilities, it is + /// recommended to work with futures and translate() API. + /// + /// @param [in] source: rvalue reference of the string to be translated + /// @param [in] translationRequest: TranslationRequest (Unified API) + /// indicating whether or not to include some member in the Response, also + /// specify any additional configurable parameters. + + std::vector + translateMultiple(std::vector &&source, + TranslationRequest translationRequest); + + /// Returns if model is alignment capable or not. + bool isAlignmentSupported() const { + return options_->hasAndNotEmpty("alignment"); + } private: + /// Queue an input for translation. + std::future queueRequest(std::string &&input, + ResponseOptions responseOptions); + + /// Dispatch call to translate after inserting in queue + void dispatchTranslate(); + /// Build numTranslators number of translators with options from options void build_translators(Ptr options, size_t numTranslators); /// Initializes a blocking translator without using std::thread @@ -83,16 +167,21 @@ private: void async_translate(); /// Number of workers to launch. - size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_) + size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_) + + /// Options object holding the options Service was instantiated with. + Ptr options_; + /// Model memory to load model passed as bytes. - AlignedMemory modelMemory_; // ORDER DEPENDENCY (translators_) + AlignedMemory modelMemory_; // ORDER DEPENDENCY (translators_) /// Shortlist memory passed as bytes. - AlignedMemory shortlistMemory_; // ORDER DEPENDENCY (translators_) + AlignedMemory shortlistMemory_; // ORDER DEPENDENCY (translators_) /// Holds instances of batch translators, just one in case /// of single-threaded application, numWorkers_ in case of multithreaded /// setting. - std::vector translators_; // ORDER DEPENDENCY (modelMemory_, shortlistMemory_) + std::vector + translators_; // ORDER DEPENDENCY (modelMemory_, shortlistMemory_) /// Stores requestId of active request. Used to establish /// ordering among requests and logging/book-keeping. diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt index c89e393..a785ba6 100644 --- a/wasm/CMakeLists.txt +++ b/wasm/CMakeLists.txt @@ -23,6 +23,7 @@ endif() set_target_properties(bergamot-translator-worker PROPERTIES SUFFIX ".js" LINK_FLAGS ${LINKER_FLAGS} - ) + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} + ) target_link_libraries(bergamot-translator-worker bergamot-translator) diff --git a/wasm/README.md b/wasm/README.md index 23564b9..337ae1b 100644 --- a/wasm/README.md +++ b/wasm/README.md @@ -1,17 +1,53 @@ ## Using Bergamot Translator in JavaScript The example file `bergamot.html` in the folder `test_page` demonstrates how to use the bergamot translator in JavaScript via a `