From f1fc4f8041ae321d2304a115810e702b9ca6be4f Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Wed, 14 Apr 2021 13:53:35 +0100 Subject: [PATCH 01/19] Fix the target_include_directories (#98) --- src/translator/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index 3ddfa79..9bf7dca 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -29,5 +29,5 @@ endif(COMPILE_WASM) target_link_libraries(bergamot-translator marian ssplit) target_include_directories(bergamot-translator - PUBLIC ${CMAKE_SOURCE_DIR} - PUBLIC ${CMAKE_SOURCE_DIR}/src) + PUBLIC ${PROJECT_SOURCE_DIR} + ${PROJECT_SOURCE_DIR}/src) From c00c263f8f8e1eb02ecbac3c59acdbe591f4fe0a Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Fri, 16 Apr 2021 11:58:53 +0100 Subject: [PATCH 02/19] Moving small tests to GitHub CI (#93) Adds regression-tests to the workflow for native minimal/custom marian and full builds. Co-authored-by: abhi-agg <66322306+abhi-agg@users.noreply.github.com> --- .../workflows/native-custom_marian-mac.yml | 33 ----- .../workflows/native-custom_marian-ubuntu.yml | 33 ----- .github/workflows/native-full_marian-mac.yml | 59 --------- .../workflows/native-full_marian-ubuntu.yml | 120 ------------------ .github/workflows/native-mac.yml | 108 ++++++++++++++++ .github/workflows/native-ubuntu.yml | 117 +++++++++++++++++ .../workflows/wasm-custom_marian-ubuntu.yml | 4 +- .gitmodules | 3 + bergamot-translator-tests | 1 + 9 files changed, 231 insertions(+), 247 deletions(-) delete mode 100644 .github/workflows/native-custom_marian-mac.yml delete mode 100644 .github/workflows/native-custom_marian-ubuntu.yml delete mode 100644 .github/workflows/native-full_marian-mac.yml delete mode 100644 .github/workflows/native-full_marian-ubuntu.yml create mode 100644 .github/workflows/native-mac.yml create mode 100644 .github/workflows/native-ubuntu.yml create mode 160000 bergamot-translator-tests diff --git a/.github/workflows/native-custom_marian-mac.yml b/.github/workflows/native-custom_marian-mac.yml deleted file mode 100644 index 1aae7e5..0000000 --- a/.github/workflows/native-custom_marian-mac.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Native (Custom Marian) MacOS - -on: - push: - branches: [ main, ci-sandbox ] - pull_request: - branches: [ main, ci-sandbox ] - -jobs: - build-macos: - name: MacOS - runs-on: macos-10.15 - - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: recursive - - - name: Configure CMake - run: | - mkdir -p build - cd build - cmake .. - - - name: Compile - working-directory: build - run: make -j2 - - - name: Print versions - working-directory: build - run: | - ./app/bergamot-translator-app --version diff --git a/.github/workflows/native-custom_marian-ubuntu.yml b/.github/workflows/native-custom_marian-ubuntu.yml deleted file mode 100644 index f051871..0000000 --- a/.github/workflows/native-custom_marian-ubuntu.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Native (Custom Marian) Ubuntu - -on: - push: - branches: [ main, ci-sandbox ] - pull_request: - branches: [ main, ci-sandbox ] - -jobs: - build-macos: - name: Ubuntu - runs-on: ubuntu-latest - - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: recursive - - - name: Configure CMake - run: | - mkdir -p build - cd build - cmake .. - - - name: Compile - working-directory: build - run: make -j2 - - - name: Print versions - working-directory: build - run: | - ./app/bergamot-translator-app --version diff --git a/.github/workflows/native-full_marian-mac.yml b/.github/workflows/native-full_marian-mac.yml deleted file mode 100644 index 1928c5c..0000000 --- a/.github/workflows/native-full_marian-mac.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: Native (Full Marian) MacOS - -on: - push: - branches: [ main, ci-sandbox ] - pull_request: - branches: [ main, ci-sandbox ] - -jobs: - build-macos: - name: MacOS CPU-only - runs-on: macos-10.15 - - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: recursive - - - name: Install dependencies - run: brew install openblas protobuf - - # Openblas location is exported explicitly because openblas is keg-only, - # which means it was not symlinked into /usr/local/. - # CMake cannot find BLAS on GitHub runners if Marian is being compiled - # statically, hence USE_STATIC_LIBS=off - - name: Configure CMake - run: | - export LDFLAGS="-L/usr/local/opt/openblas/lib" - export CPPFLAGS="-I/usr/local/opt/openblas/include" - mkdir -p build - cd build - cmake .. \ - -DCOMPILE_CPU=on \ - -DCOMPILE_CUDA=off \ - -DCOMPILE_EXAMPLES=on \ - -DCOMPILE_SERVER=on \ - -DCOMPILE_TESTS=on \ - -DUSE_FBGEMM=on \ - -DUSE_SENTENCEPIECE=on \ - -DUSE_STATIC_LIBS=off \ - -DUSE_WASM_COMPATIBLE_SOURCE=off - - - name: Compile - working-directory: build - run: make -j2 - - - name: Run unit tests - working-directory: build - run: make test - - - name: Print versions - working-directory: build - run: | - ./marian --version - ./marian-decoder --version - ./marian-scorer --version - ./spm_encode --version - diff --git a/.github/workflows/native-full_marian-ubuntu.yml b/.github/workflows/native-full_marian-ubuntu.yml deleted file mode 100644 index e414f64..0000000 --- a/.github/workflows/native-full_marian-ubuntu.yml +++ /dev/null @@ -1,120 +0,0 @@ -name: Native (Full Marian) Ubuntu - -on: - push: - branches: [ main, ci-test ] - pull_request: - branches: [ main, ci-test ] - -jobs: - build-ubuntu: - strategy: - matrix: - include: - # Ubuntu CPU-only build - - name: "Ubuntu CPU-only" - os: ubuntu-latest - cuda: "" - gcc: 8 - cpu: true - gpu: false - # GPU Builds are commented out, for bergamot-translator CI runs. - # Ubuntu GPU-only build - # - name: "Ubuntu GPU-only" - # os: ubuntu-latest - # cuda: "10.2" - # gcc: 7 - # cpu: false - # gpu: true - # Ubuntu 20.04 supports CUDA 11+ - #- name: "Ubuntu 20.04 CUDA 11.0 gcc-9" - #os: ubuntu-20.04 - #cuda: "11.0" - #gcc: 9 - #cpu: false - #gpu: true - # Ubuntu 18.04 supports CUDA 10.1+ - # - name: "Ubuntu 18.04 CUDA 10.2 gcc-8" - # os: ubuntu-18.04 - # cuda: "10.2" - # gcc: 8 - # cpu: true - # gpu: true - # Ubuntu 16.04 supports CUDA 8+ - # - name: "Ubuntu 16.04 CUDA 9.2 gcc-7" - # os: ubuntu-16.04 - # cuda: "9.2" - # gcc: 7 - # cpu: true - # gpu: true - - runs-on: ${{ matrix.os }} - name: ${{ matrix.name }} - - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: recursive - - # The following packages are already installed on GitHub-hosted runners: - # build-essential openssl libssl-dev - # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because - # it is installed together with libprotobuf-dev - - name: Install dependencies - run: sudo apt-get update && sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler libboost-all-dev g++-8 - - # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html - - name: Install MKL - run: | - wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add - - sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list" - sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list" - sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088 - if: matrix.cpu == true - - # The script simplifies installation of different versions of CUDA - - name: Install CUDA - run: ./3rd_party/marian-dev/scripts/ci/install_cuda_ubuntu.sh ${{ matrix.cuda }} - if: matrix.gpu == true - - # Boost is installed on GitHub-hosted runners in a non-standard location - # https://github.com/actions/virtual-environments/issues/687#issuecomment-610471671 - - name: Configure CMake - run: | - mkdir -p build - cd build - CC=/usr/bin/gcc-${{ matrix.gcc }} CXX=/usr/bin/g++-${{ matrix.gcc }} CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }} \ - cmake .. \ - -DBoost_ARCHITECTURE=-x64 \ - -DCMAKE_BUILD_TYPE=Release \ - -DCOMPILE_CPU=${{ matrix.cpu }} \ - -DCOMPILE_CUDA=${{ matrix.gpu }} \ - -DCOMPILE_EXAMPLES=on \ - -DCOMPILE_SERVER=on \ - -DCOMPILE_TESTS=on \ - -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-${{ matrix.cuda }} \ - -DUSE_FBGEMM=${{ matrix.cpu }} \ - -DUSE_SENTENCEPIECE=on \ - -DUSE_STATIC_LIBS=on \ - -DUSE_WASM_COMPATIBLE_SOURCE=off - - - name: Compile - working-directory: build - run: make -j2 - - - name: Run unit tests - working-directory: build - run: make test - # GitHub-hosted VMs do not have GPUs, so can not be run in CUDA builds - if: matrix.gpu == false - - - name: Print versions - working-directory: build - run: | - ./marian --version - ./marian-decoder --version - ./marian-scorer --version - ./marian-server --version - ./spm_encode --version - diff --git a/.github/workflows/native-mac.yml b/.github/workflows/native-mac.yml new file mode 100644 index 0000000..8df203d --- /dev/null +++ b/.github/workflows/native-mac.yml @@ -0,0 +1,108 @@ +name: Native MacOS + +on: + push: + branches: [ main, ci-sandbox ] + pull_request: + branches: [ main, ci-sandbox ] + +jobs: + build-macos: + strategy: + fail-fast: false + matrix: + include: + - name: "full-marian" + os: macos-10.15 + test_tags: "" + cmake: + CMAKE_BUILD_TYPE: "Release" + COMPILE_TESTS: "ON" + USE_WASM_COMPATIBLE_SOURCE: "OFF" + USE_FBGEMM: "OFF" + USE_STATIC_LIBS: "OFF" + COMPILE_SERVER: "OFF" + COMPILE_EXAMPLES: "OFF" + + - name: "minimal-marian" + os: macos-10.15 + test_tags: "'#wasm'" + cmake: + CMAKE_BUILD_TYPE: "Release" + COMPILE_TESTS: "OFF" # Minimal marian has no sqlite support and compile tests fail + USE_WASM_COMPATIBLE_SOURCE: "ON" + USE_FBGEMM: "OFF" + # explicitly set due to requirement of minimal marian being used + # within WASM. This is some yaml ugliness, but issok. + USE_STATIC_LIBS: "ON" + COMPILE_SERVER: "OFF" + COMPILE_EXAMPLES: "OFF" + + name: ${{ matrix.name }} + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + submodules: recursive + + - name: Install dependencies + run: | + brew update + brew install openblas protobuf coreutils + + # Openblas location is exported explicitly because openblas is keg-only, + # which means it was not symlinked into /usr/local/. + - name: Set BLAS Environment variables + run: | + echo "LDFLAGS=-L/usr/local/opt/openblas/lib" >> $GITHUB_ENV + echo "CPPFLAGS=-I/usr/local/opt/openblas/include" >> $GITHUB_ENV + if: matrix.cmake.USE_WASM_COMPATIBLE_SOURCE == 'OFF' + + # CMake cannot find BLAS on GitHub runners if Marian is being compiled + # statically, hence USE_STATIC_LIBS=off + - name: Configure CMake + run: | + mkdir -p build + cd build + cmake .. \ + -DCMAKE_BUILD_TYPE=${{ matrix.cmake.CMAKE_BUILD_TYPE }}\ + -DCOMPILE_TESTS=${{ matrix.cmake.COMPILE_TESTS }}\ + -DCOMPILE_EXAMPLES=${{ matrix.cmake.COMPILE_EXAMPLES }} \ + -DCOMPILE_SERVER=${{ matrix.cmake.COMPILE_SERVER }} \ + -DUSE_STATIC_LIBS=${{ matrix.cmake.USE_STATIC_LIBS }} \ + -DUSE_WASM_COMPATIBLE_SOURCE=${{ matrix.cmake.USE_WASM_COMPATIBLE_SOURCE }} \ + -DUSE_FBGEMM=${{ matrix.cmake.USE_FBGEMM }} + + - name: Compile + working-directory: build + run: make -j2 + + - name: Run unit tests + working-directory: build + run: make test + if: matrix.cmake.COMPILE_TESTS == 'ON' + + - name: Print versions + working-directory: build + run: | + ./app/bergamot-translator-app --version + + - name: Install regression-test framework (BRT) + working-directory: bergamot-translator-tests + run : make install + + - name: Run regression-tests (BRT) + working-directory: bergamot-translator-tests + run : MARIAN=../build ./run_brt.sh ${{ matrix.test_tags }} + + - name: Upload regression-tests artifacts + uses: actions/upload-artifact@v2 + if: ${{ always() }} + with: + name: brt-artifacts-${{ matrix.name }} + path: | + bergamot-translator-tests/**/*.expected + bergamot-translator-tests/**/*.log + bergamot-translator-tests/**/*.out diff --git a/.github/workflows/native-ubuntu.yml b/.github/workflows/native-ubuntu.yml new file mode 100644 index 0000000..dc8016b --- /dev/null +++ b/.github/workflows/native-ubuntu.yml @@ -0,0 +1,117 @@ +name: Native Ubuntu + +on: + push: + branches: [ main, ci-sandbox ] + pull_request: + branches: [ main, ci-sandbox ] + +jobs: + build-ubuntu: + strategy: + fail-fast: false + matrix: + include: + - name: "full-marian" + os: ubuntu-latest + gcc: 8 + cpu: 'ON' + gpu: 'OFF' + test_tags: "" + cmake: + CMAKE_BUILD_TYPE: "Release" + COMPILE_TESTS: "ON" + USE_WASM_COMPATIBLE_SOURCE: "OFF" + COMPILE_SERVER: "OFF" + COMPILE_EXAMPLES: "OFF" + + - name: "minimal-marian" + os: ubuntu-latest + gcc: 8 + cpu: 'ON' + gpu: 'OFF' + test_tags: "'#wasm'" + cmake: + CMAKE_BUILD_TYPE: "Release" + COMPILE_TESTS: "OFF" # Minimal marian has no sqlite support and COMPILE_TEST=ON fails. + USE_WASM_COMPATIBLE_SOURCE: "ON" + COMPILE_SERVER: "OFF" + COMPILE_EXAMPLES: "OFF" + + + runs-on: ${{ matrix.os }} + name: ${{ matrix.name }} + + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + submodules: recursive + + # The following packages are already installed on GitHub-hosted runners: + # build-essential openssl libssl-dev + # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because + # it is installed together with libprotobuf-dev + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + libgoogle-perftools-dev libprotobuf-dev protobuf-compiler \ + libboost-all-dev g++-${{ matrix.gcc }} + + # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html + - name: Install MKL + run: | + wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add - + sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list" + sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list" + sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088 + if: matrix.cmake.USE_WASM_COMPATIBLE_SOURCE == 'OFF' + + # Boost is installed on GitHub-hosted runners in a non-standard location + # https://github.com/actions/virtual-environments/issues/687#issuecomment-610471671 + - name: Configure CMake + run: | + mkdir -p build + cd build + CC=/usr/bin/gcc-${{ matrix.gcc }} CXX=/usr/bin/g++-${{ matrix.gcc }} CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }} \ + cmake .. \ + -DCMAKE_BUILD_TYPE=${{ matrix.cmake.CMAKE_BUILD_TYPE }}\ + -DCOMPILE_TESTS=${{ matrix.cmake.COMPILE_TESTS }}\ + -DCOMPILE_EXAMPLES=${{ matrix.cmake.COMPILE_EXAMPLES }} \ + -DCOMPILE_SERVER=${{ matrix.cmake.COMPILE_SERVER }} \ + -DUSE_WASM_COMPATIBLE_SOURCE=${{ matrix.cmake.USE_WASM_COMPATIBLE_SOURCE }} \ + + - name: Compile bergamot-translator + working-directory: build + run: make -j2 + + - name: Run unit tests + working-directory: build + run: make test + # GitHub-hosted VMs do not have GPUs, so can not be run in CUDA builds + if: matrix.gpu == 'OFF' && matrix.cmake.COMPILE_TESTS == 'ON' + + - name: Print versions + working-directory: build + run: | + ./app/bergamot-translator-app --version + + + - name: Install regression-test framework (BRT) + working-directory: bergamot-translator-tests + run : make install + + - name: Run regression-tests (BRT) + working-directory: bergamot-translator-tests + run : MARIAN=../build ./run_brt.sh ${{ matrix.test_tags }} + + - name: Upload regression-tests artifacts + uses: actions/upload-artifact@v2 + if: ${{ always() }} + with: + name: brt-artifacts-${{ matrix.name }} + path: | + bergamot-translator-tests/**/*.expected + bergamot-translator-tests/**/*.log + bergamot-translator-tests/**/*.out diff --git a/.github/workflows/wasm-custom_marian-ubuntu.yml b/.github/workflows/wasm-custom_marian-ubuntu.yml index d1364dc..7dfc839 100644 --- a/.github/workflows/wasm-custom_marian-ubuntu.yml +++ b/.github/workflows/wasm-custom_marian-ubuntu.yml @@ -2,9 +2,9 @@ name: WASM (Custom Marian) Ubuntu on: push: - branches: [ main ] + branches: [ main, ci-sandbox ] pull_request: - branches: [ main ] + branches: [ main, ci-sandbox ] jobs: build-wasm: diff --git a/.gitmodules b/.gitmodules index cc40735..8aa1014 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "3rd_party/ssplit-cpp"] path = 3rd_party/ssplit-cpp url = https://github.com/browsermt/ssplit-cpp +[submodule "bergamot-translator-tests"] + path = bergamot-translator-tests + url = https://github.com/browsermt/bergamot-translator-tests diff --git a/bergamot-translator-tests b/bergamot-translator-tests new file mode 160000 index 0000000..3771001 --- /dev/null +++ b/bergamot-translator-tests @@ -0,0 +1 @@ +Subproject commit 3771001720a8f01bba185ee5d5d908b7c266ef31 From 1184875cc9d75d56596736b0487968fdc7a35bb3 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 22 Apr 2021 16:01:39 +0100 Subject: [PATCH 03/19] Windows PCQueue support without Boost (#106) --- src/translator/pcqueue.h | 264 +++++++++++++++++++++++---------------- 1 file changed, 155 insertions(+), 109 deletions(-) diff --git a/src/translator/pcqueue.h b/src/translator/pcqueue.h index f0b3541..d6f4582 100644 --- a/src/translator/pcqueue.h +++ b/src/translator/pcqueue.h @@ -10,12 +10,14 @@ #include #ifdef __APPLE__ -#include -#include #include #include +#include +#include #elif defined(__linux) #include +#elif defined(_WIN32) || defined(_WIN64) +#include #else #include #endif @@ -35,67 +37,107 @@ namespace bergamot { #ifdef __APPLE__ class Semaphore { -public: - explicit Semaphore(int value) : task_(mach_task_self()) { - ABORT_IF(KERN_SUCCESS != - semaphore_create(task_, &back_, SYNC_POLICY_FIFO, value), - "Could not create semaphore"); - } - - ~Semaphore() { - if (KERN_SUCCESS != semaphore_destroy(task_, back_)) { - std::cerr << "Could not destroy semaphore" << std::endl; - abort(); + public: + explicit Semaphore(int value) : task_(mach_task_self()) { + ABORT_IF(KERN_SUCCESS != semaphore_create(task_, &back_, SYNC_POLICY_FIFO, value), "Could not create semaphore"); } - } - void wait() { - ABORT_IF(KERN_SUCCESS != semaphore_wait(back_), - "Wait for semaphore failed"); - } + ~Semaphore() { + if (KERN_SUCCESS != semaphore_destroy(task_, back_)) { + std::cerr << "Could not destroy semaphore" << std::endl; + abort(); + } + } - void post() { - ABORT_IF(KERN_SUCCESS != semaphore_signal(back_), - "Could not post to semaphore"); - } + void wait() { + ABORT_IF(KERN_SUCCESS != semaphore_wait(back_), "Wait for semaphore failed"); + } -private: - semaphore_t back_; - task_t task_; + void post() { + ABORT_IF(KERN_SUCCESS != semaphore_signal(back_), "Could not post to semaphore"); + } + + private: + semaphore_t back_; + task_t task_; }; -inline void WaitSemaphore(Semaphore &semaphore) { semaphore.wait(); } +inline void WaitSemaphore(Semaphore &semaphore) { + semaphore.wait(); +} #elif defined(__linux) class Semaphore { -public: - explicit Semaphore(unsigned int value) { - ABORT_IF(sem_init(&sem_, 0, value), "Could not create semaphore"); - } - - ~Semaphore() { - if (-1 == sem_destroy(&sem_)) { - std::cerr << "Could not destroy semaphore " << std::endl; - abort(); + public: + explicit Semaphore(unsigned int value) { + ABORT_IF(sem_init(&sem_, 0, value), "Could not create semaphore"); } - } - void wait() { - while (UTIL_UNLIKELY(-1 == sem_wait(&sem_))) { - ABORT_IF(errno != EINTR, "Wait for semaphore failed"); + ~Semaphore() { + if (-1 == sem_destroy(&sem_)) { + std::cerr << "Could not destroy semaphore" << std::endl; + abort(); + } } - } - void post() { - ABORT_IF(-1 == sem_post(&sem_), "Could not post to semaphore"); - } + void wait() { + while (-1 == sem_wait(&sem_)) { + ABORT_IF(errno != EINTR, "Wait for semaphore failed"); + } + } -private: - sem_t sem_; + void post() { + ABORT_IF(-1 == sem_post(&sem_), "Could not post to semaphore"); + } + + private: + sem_t sem_; }; -inline void WaitSemaphore(Semaphore &semaphore) { semaphore.wait(); } +inline void WaitSemaphore(Semaphore &semaphore) { + semaphore.wait(); +} + +#elif defined(_WIN32) || defined(_WIN64) + +class Semaphore { + public: + explicit Semaphore(LONG value) : sem_(CreateSemaphoreA(NULL, value, 2147483647, NULL)) { + ABORT_IF(!sem_, "Could not CreateSemaphore {}", GetLastError()); + } + + ~Semaphore() { + CloseHandle(sem_); + } + + + void wait() { + while (true) { + switch (WaitForSingleObject(sem_, 0L)) { + case WAIT_OBJECT_0: + return; + case WAIT_ABANDONED: + ABORT("A semaphore can't be abandoned, confused by Windows"); + case WAIT_TIMEOUT: + continue; + case WAIT_FAILED: + ABORT("Waiting on Semaphore failed {}", GetLastError()); + } + } + } + + void post() { + ABORT_IF(!ReleaseSemaphore(sem_, 1, NULL), "Failed to release Semaphore {}", GetLastError()); + } + + private: + HANDLE sem_; +}; + +inline void WaitSemaphore(Semaphore &semaphore) { + semaphore.wait(); +} #else typedef boost::interprocess::interprocess_semaphore Semaphore; @@ -113,7 +155,7 @@ inline void WaitSemaphore(Semaphore &on) { } } -#endif // Apple +#endif // Cases for semaphore support /** * Producer consumer queue safe for multiple producers and multiple consumers. @@ -124,11 +166,13 @@ inline void WaitSemaphore(Semaphore &on) { * throw. */ template class PCQueue { -public: + public: explicit PCQueue(size_t size) - : empty_(size), used_(0), storage_(new T[size]), - end_(storage_.get() + size), produce_at_(storage_.get()), - consume_at_(storage_.get()) {} + : empty_(size), used_(0), + storage_(new T[size]), + end_(storage_.get() + size), + produce_at_(storage_.get()), + consume_at_(storage_.get()) {} // Add a value to the queue. void Produce(const T &val) { @@ -141,8 +185,7 @@ public: empty_.post(); throw; } - if (++produce_at_ == end_) - produce_at_ = storage_.get(); + if (++produce_at_ == end_) produce_at_ = storage_.get(); } used_.post(); } @@ -158,14 +201,14 @@ public: empty_.post(); throw; } - if (++produce_at_ == end_) - produce_at_ = storage_.get(); + if (++produce_at_ == end_) produce_at_ = storage_.get(); } used_.post(); } + // Consume a value, assigning it to out. - T &Consume(T &out) { + T& Consume(T &out) { WaitSemaphore(used_); { std::lock_guard consume_lock(consume_at_mutex_); @@ -175,15 +218,14 @@ public: used_.post(); throw; } - if (++consume_at_ == end_) - consume_at_ = storage_.get(); + if (++consume_at_ == end_) consume_at_ = storage_.get(); } empty_.post(); return out; } // Consume a value, swapping it to out. - T &ConsumeSwap(T &out) { + T& ConsumeSwap(T &out) { WaitSemaphore(used_); { std::lock_guard consume_lock(consume_at_mutex_); @@ -193,13 +235,13 @@ public: used_.post(); throw; } - if (++consume_at_ == end_) - consume_at_ = storage_.get(); + if (++consume_at_ == end_) consume_at_ = storage_.get(); } empty_.post(); return out; } + // Convenience version of Consume that copies the value to return. // The other version is faster. T Consume() { @@ -208,7 +250,7 @@ public: return ret; } -private: + private: // Number of empty spaces in storage_. Semaphore empty_; // Number of occupied spaces in storage_. @@ -234,63 +276,67 @@ template struct UnboundedPage { }; template class UnboundedSingleQueue { -public: - UnboundedSingleQueue() : valid_(0) { - SetFilling(new UnboundedPage()); - SetReading(filling_); - } - - void Produce(T &&val) { - if (filling_current_ == filling_end_) { - UnboundedPage *next = new UnboundedPage(); - filling_->next = next; - SetFilling(next); + public: + UnboundedSingleQueue() : valid_(0) { + SetFilling(new UnboundedPage()); + SetReading(filling_); } - *(filling_current_++) = std::move(val); - valid_.post(); - } - void Produce(const T &val) { Produce(T(val)); } - - T &Consume(T &out) { - WaitSemaphore(valid_); - if (reading_current_ == reading_end_) { - SetReading(reading_->next); + void Produce(T &&val) { + if (filling_current_ == filling_end_) { + UnboundedPage *next = new UnboundedPage(); + filling_->next = next; + SetFilling(next); + } + *(filling_current_++) = std::move(val); + valid_.post(); } - out = std::move(*(reading_current_++)); - return out; - } - // Warning: very much a no-guarantees race-condition-rich implementation! - // But sufficient for our specific purpose: The single thread that consumes - // is also the only one that checks Empty, and knows that it's racing. - bool Empty() const { return reading_current_ == filling_current_; } + void Produce(const T &val) { + Produce(T(val)); + } -private: - void SetFilling(UnboundedPage *to) { - filling_ = to; - filling_current_ = to->entries; - filling_end_ = filling_current_ + sizeof(to->entries) / sizeof(T); - } - void SetReading(UnboundedPage *to) { - reading_.reset(to); - reading_current_ = to->entries; - reading_end_ = reading_current_ + sizeof(to->entries) / sizeof(T); - } + T& Consume(T &out) { + WaitSemaphore(valid_); + if (reading_current_ == reading_end_) { + SetReading(reading_->next); + } + out = std::move(*(reading_current_++)); + return out; + } - Semaphore valid_; + // Warning: very much a no-guarantees race-condition-rich implementation! + // But sufficient for our specific purpose: The single thread that consumes + // is also the only one that checks Empty, and knows that it's racing. + bool Empty() const { + return reading_current_ == filling_current_; + } - UnboundedPage *filling_; + private: + void SetFilling(UnboundedPage *to) { + filling_ = to; + filling_current_ = to->entries; + filling_end_ = filling_current_ + sizeof(to->entries) / sizeof(T); + } + void SetReading(UnboundedPage *to) { + reading_.reset(to); + reading_current_ = to->entries; + reading_end_ = reading_current_ + sizeof(to->entries) / sizeof(T); + } - std::unique_ptr> reading_; + Semaphore valid_; - T *filling_current_; - T *filling_end_; - T *reading_current_; - T *reading_end_; + UnboundedPage *filling_; - UnboundedSingleQueue(const UnboundedSingleQueue &) = delete; - UnboundedSingleQueue &operator=(const UnboundedSingleQueue &) = delete; + std::unique_ptr > reading_; + + T *filling_current_; + T *filling_end_; + T *reading_current_; + T *reading_end_; + + UnboundedSingleQueue(const UnboundedSingleQueue &) = delete; + UnboundedSingleQueue &operator=(const UnboundedSingleQueue &) = delete; }; } // namespace bergamot From fc6976ae297fe0fe17c9c7179201f915121783e7 Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Thu, 22 Apr 2021 17:29:22 +0100 Subject: [PATCH 04/19] Remove dead code (#107) Co-authored-by: Kenneth Heafield --- src/translator/CMakeLists.txt | 1 - src/translator/multifactor_priority.cpp | 7 ------- src/translator/multifactor_priority.h | 20 -------------------- 3 files changed, 28 deletions(-) delete mode 100644 src/translator/multifactor_priority.cpp delete mode 100644 src/translator/multifactor_priority.h diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index 9bf7dca..cbb8369 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -4,7 +4,6 @@ add_library(bergamot-translator STATIC text_processor.cpp sentence_splitter.cpp batch_translator.cpp - multifactor_priority.cpp request.cpp batcher.cpp response.cpp diff --git a/src/translator/multifactor_priority.cpp b/src/translator/multifactor_priority.cpp deleted file mode 100644 index 0f93a81..0000000 --- a/src/translator/multifactor_priority.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include "multifactor_priority.h" - -namespace marian { -namespace bergamot { - -} // namespace bergamot -} // namespace marian diff --git a/src/translator/multifactor_priority.h b/src/translator/multifactor_priority.h deleted file mode 100644 index 1e239f7..0000000 --- a/src/translator/multifactor_priority.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef SRC_BERGAMOT_MULTIFACTOR_PRIORITY_H_ -#define SRC_BERGAMOT_MULTIFACTOR_PRIORITY_H_ - -#include "data/types.h" -#include "definitions.h" -#include "sys/time.h" - -namespace marian { -namespace bergamot { - -struct MultiFactorPriority { - int nice; /* user configurable priority, at a request */ - unsigned int Id; - /* What else should priority depend on? */ - double priority() { return Id; } -}; -} // namespace bergamot -} // namespace marian - -#endif // SRC_BERGAMOT_MULTIFACTOR_PRIORITY_H_ From 7d2e74f3c0bbcc9e3b153783c7e0db63550cf0bb Mon Sep 17 00:00:00 2001 From: abhi-agg <66322306+abhi-agg@users.noreply.github.com> Date: Mon, 26 Apr 2021 17:26:27 +0200 Subject: [PATCH 05/19] Changed underlying template parameter of AlignedMemory class (#111) - AlignedMemory is AlignedVector now instead of AlignedVector - This solves the issue of allocating 8x of the actual required memory for loading files as bytes --- src/translator/definitions.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/translator/definitions.h b/src/translator/definitions.h index 32998b9..73d8320 100644 --- a/src/translator/definitions.h +++ b/src/translator/definitions.h @@ -22,8 +22,8 @@ template UPtr UNew(Args &&... args) { template UPtr UNew(UPtr p) { return UPtr(p); } -/// Shortcut to AlignedVector for byte arrays -typedef AlignedVector AlignedMemory; +/// Shortcut to AlignedVector for byte arrays +typedef AlignedVector AlignedMemory; } // namespace bergamot } // namespace marian From fdf9e66cef172cc240cb2c1ad1bbf78e964dcc2a Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Mon, 26 Apr 2021 18:59:20 +0100 Subject: [PATCH 06/19] Windows workflows and mac framework accelerate (#108) Windows still failing but getting closer --- .github/workflows/windows.yml | 74 +++-------------------------------- 3rd_party/marian-dev | 2 +- 2 files changed, 6 insertions(+), 70 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index fd1f21f..6c3a05f 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -17,12 +17,6 @@ jobs: # Windows CPU-only build - name: "Windows CPU-only" cuda: "" - gpu: false - # GPU Builds are commented out, for bergamot-translator CI runs. - # Windows CPU+GPU build - # - name: "Windows CPU+CUDA" - # cuda: "10.2" - # gpu: true runs-on: windows-2019 name: ${{ matrix.name }} @@ -42,89 +36,31 @@ jobs: echo "MKLROOT=${{ github.workspace }}\mkl" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append shell: powershell - - name: Install CUDA - run: | - .\3rd_party\marian-dev\scripts\ci\install_cuda_windows.ps1 "10.2" - # Set CUDA_PATH environment variable so that CMake can find CUDA - echo "CUDA_PATH=$env:CUDA_PATH" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append - echo "$env:CUDA_PATH/bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - shell: powershell - if: matrix.gpu == true - - name: Prepare vcpkg uses: lukka/run-vcpkg@v4 with: - vcpkgArguments: protobuf - vcpkgGitCommitId: 6185aa76504a5025f36754324abf307cc776f3da + vcpkgArguments: protobuf pcre2 + vcpkgGitCommitId: 6185aa76504a5025f36754324abf307cc776f3da vcpkgDirectory: ${{ github.workspace }}/vcpkg/ vcpkgTriplet: x64-windows-static - # Windows CUDA builds use USE_NCCL=off due to compilation errors. + # Windows CPU only minimal build - name: Build Debug uses: lukka/run-cmake@v3 with: buildDirectory: ${{ github.workspace }}/build/Debug cmakeAppendedArgs: '-G Ninja -DCMAKE_BUILD_TYPE="Debug" - -DOPENSSL_USE_STATIC_LIBS="TRUE" - -DOPENSSL_MSVC_STATIC_RT="TRUE" - -DCOMPILE_CPU="TRUE" - -DCOMPILE_CUDA="${{ matrix.gpu }}" - -DCOMPILE_SERVER="FALSE" - -DCOMPILE_TESTS="TRUE" - -DUSE_FBGEMM="TRUE" - -DUSE_MPI="FALSE" - -DUSE_NCCL="FALSE" - -DUSE_SENTENCEPIECE="TRUE" - -DUSE_STATIC_LIBS="TRUE"' - cmakeListsOrSettingsJson: CMakeListsTxtAdvanced - cmakeListsTxtPath: ${{ github.workspace }}/CMakeLists.txt - useVcpkgToolchainFile: true - # Building in Debug is sufficient for the all-in CPU+GPU compilation; - # its main purpose is to detect warnings that the Release build is not - # able to find sometimes. - if: matrix.gpu == true - - # Windows CUDA builds use USE_NCCL=off due to compilation errors - # Boost is pre-installed on Azure/GitHub-hosted Windows runners - # https://github.com/actions/virtual-environments/blob/main/images/win/Windows2019-Readme.md#boost - # (not used yet) - - name: Build Release - uses: lukka/run-cmake@v3 - with: - buildDirectory: ${{ github.workspace }}/build/ - cmakeAppendedArgs: '-G Ninja - -DBOOST_ROOT="$(BOOST_ROOT_1_72_0)" - -DBOOST_INCLUDEDIR="$(BOOST_ROOT_1_72_0)/include" - -DBOOST_LIBRARYDIR="$(BOOST_ROOT_1_72_0)/lib" - -DCMAKE_BUILD_TYPE="Release" - -DOPENSSL_USE_STATIC_LIBS="TRUE" - -DOPENSSL_MSVC_STATIC_RT="TRUE" - -DCOMPILE_CPU="TRUE" - -DCOMPILE_CUDA="${{ matrix.gpu }}" - -DCOMPILE_SERVER="FALSE" - -DCOMPILE_TESTS="TRUE" - -DUSE_FBGEMM="TRUE" - -DUSE_MPI="FALSE" - -DUSE_NCCL="FALSE" - -DUSE_SENTENCEPIECE="TRUE" + -DUSE_WASM_COMPATIBLE_SOURCE="OFF" -DUSE_STATIC_LIBS="TRUE"' cmakeListsOrSettingsJson: CMakeListsTxtAdvanced cmakeListsTxtPath: ${{ github.workspace }}/CMakeLists.txt useVcpkgToolchainFile: true - # Removing unit-tests, taken care of in browsermt/marian-dev - # - name: Run unit tests - # working-directory: build/ - # run: ctest - # # Not run in GPU builds because GitHub-hosted VMs do not have GPUs - # if: matrix.gpu == false - name: Print versions working-directory: build/ run: | - .\marian.exe --version - .\marian-decoder.exe --version - .\marian-scorer.exe --version + .\app\bergamot-translator-app.exe --version dir *.exe shell: cmd diff --git a/3rd_party/marian-dev b/3rd_party/marian-dev index 0f0bcf9..46a2218 160000 --- a/3rd_party/marian-dev +++ b/3rd_party/marian-dev @@ -1 +1 @@ -Subproject commit 0f0bcf99626c660227bb68b76267a8d2451e7172 +Subproject commit 46a22187341ff51b3f11a8cb1edf51c995e583ca From fa2003e70d0baf88a5db6f51086ff899c7192e63 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Tue, 27 Apr 2021 15:56:39 +0100 Subject: [PATCH 07/19] Cleanup API: Refactor request on-complete transition (#80) --- app/service-cli-bytearray.cpp | 8 +- app/service-cli.cpp | 9 ++- src/translator/CMakeLists.txt | 2 +- src/translator/TranslationModel.cpp | 25 +++--- src/translator/batch_translator.cpp | 5 +- src/translator/request.cpp | 26 ++---- src/translator/request.h | 119 ++++++++++++++-------------- src/translator/response.cpp | 106 ------------------------- src/translator/response.h | 34 ++------ src/translator/response_builder.cpp | 87 ++++++++++++++++++++ src/translator/response_builder.h | 93 ++++++++++++++++++++++ src/translator/response_options.h | 50 ++++++++++++ src/translator/sentence_ranges.cpp | 20 ++++- src/translator/sentence_ranges.h | 10 +-- src/translator/service.cpp | 58 +++++++++++++- src/translator/service.h | 100 +++++++++++++++++++---- 16 files changed, 492 insertions(+), 260 deletions(-) delete mode 100644 src/translator/response.cpp create mode 100644 src/translator/response_builder.cpp create mode 100644 src/translator/response_builder.h create mode 100644 src/translator/response_options.h diff --git a/app/service-cli-bytearray.cpp b/app/service-cli-bytearray.cpp index f868d4d..d8c7059 100644 --- a/app/service-cli-bytearray.cpp +++ b/app/service-cli-bytearray.cpp @@ -27,8 +27,14 @@ int main(int argc, char *argv[]) { std::string input = std_input.str(); using marian::bergamot::Response; + marian::bergamot::ResponseOptions responseOptions; + responseOptions.qualityScores = true; + responseOptions.alignment = true; + responseOptions.alignmentThreshold = 0.2f; + // Wait on future until Response is complete - std::future responseFuture = service.translate(std::move(input)); + std::future responseFuture = + service.translate(std::move(input), responseOptions); responseFuture.wait(); Response response = responseFuture.get(); diff --git a/app/service-cli.cpp b/app/service-cli.cpp index 6ed4d81..d7c72e6 100644 --- a/app/service-cli.cpp +++ b/app/service-cli.cpp @@ -8,6 +8,7 @@ #include "marian.h" #include "translator/parser.h" #include "translator/response.h" +#include "translator/response_options.h" #include "translator/service.h" int main(int argc, char *argv[]) { @@ -21,8 +22,14 @@ int main(int argc, char *argv[]) { std::string input = std_input.str(); using marian::bergamot::Response; + marian::bergamot::ResponseOptions responseOptions; + responseOptions.qualityScores = true; + responseOptions.alignment = true; + responseOptions.alignmentThreshold = 0.2f; + // Wait on future until Response is complete - std::future responseFuture = service.translate(std::move(input)); + std::future responseFuture = + service.translate(std::move(input), responseOptions); responseFuture.wait(); Response response = responseFuture.get(); diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index cbb8369..d7c8e3c 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -6,7 +6,7 @@ add_library(bergamot-translator STATIC batch_translator.cpp request.cpp batcher.cpp - response.cpp + response_builder.cpp batch.cpp sentence_ranges.cpp service.cpp diff --git a/src/translator/TranslationModel.cpp b/src/translator/TranslationModel.cpp index 06b04eb..026a126 100644 --- a/src/translator/TranslationModel.cpp +++ b/src/translator/TranslationModel.cpp @@ -9,6 +9,7 @@ // All local project includes #include "TranslationModel.h" #include "translator/parser.h" +#include "translator/response.h" #include "translator/service.h" TranslationModel::TranslationModel(const std::string &config, @@ -21,31 +22,25 @@ TranslationModel::~TranslationModel() {} std::vector TranslationModel::translate(std::vector &&texts, TranslationRequest request) { - // Implementing a non-async version first. Unpleasant, but should work. - std::promise> promise; - auto future = promise.get_future(); // This code, move into async? std::vector translationResults; - for (auto &text : texts) { - // Collect future as marian::bergamot::TranslationResult - auto intermediate = service_.translate(std::move(text)); - intermediate.wait(); - auto marianResponse(std::move(intermediate.get())); - + std::vector responses = + service_.translateMultiple(std::move(texts), request); + for (auto &response : responses) { TranslationResult::SentenceMappings sentenceMappings; - for (size_t idx = 0; idx < marianResponse.size(); idx++) { - marian::string_view src = marianResponse.source.sentence(idx); - marian::string_view tgt = marianResponse.target.sentence(idx); + for (size_t idx = 0; idx < response.size(); idx++) { + marian::string_view src = response.source.sentence(idx); + marian::string_view tgt = response.target.sentence(idx); sentenceMappings.emplace_back(std::string_view(src.data(), src.size()), std::string_view(tgt.data(), tgt.size())); } // In place construction. translationResults.emplace_back( - std::move(marianResponse.source.text), // &&marianResponse.source_ - std::move(marianResponse.target.text), // &&marianResponse.translation_ - std::move(sentenceMappings) // &&sentenceMappings + std::move(response.source.text), // &&response.source_ + std::move(response.target.text), // &&response.translation_ + std::move(sentenceMappings) // &&sentenceMappings ); } diff --git a/src/translator/batch_translator.cpp b/src/translator/batch_translator.cpp index 19cbaf9..6b2425d 100644 --- a/src/translator/batch_translator.cpp +++ b/src/translator/batch_translator.cpp @@ -63,11 +63,14 @@ void BatchTranslator::translate(Batch &batch) { std::vector batchVector; auto &sentences = batch.sentences(); + size_t batchSequenceNumber{0}; for (auto &sentence : sentences) { - data::SentenceTuple sentence_tuple(sentence.lineNumber()); + data::SentenceTuple sentence_tuple(batchSequenceNumber); Segment segment = sentence.getUnderlyingSegment(); sentence_tuple.push_back(segment); batchVector.push_back(sentence_tuple); + + ++batchSequenceNumber; } size_t batchSize = batchVector.size(); diff --git a/src/translator/request.cpp b/src/translator/request.cpp index b6d2438..7e9b739 100644 --- a/src/translator/request.cpp +++ b/src/translator/request.cpp @@ -11,18 +11,17 @@ namespace marian { namespace bergamot { // ----------------------------------------------------------------- -Request::Request(size_t Id, size_t lineNumberBegin, - std::vector> &vocabs, AnnotatedText &&source, - Segments &&segments, std::promise responsePromise) - : Id_(Id), lineNumberBegin_(lineNumberBegin), vocabs_(&vocabs), - source_(std::move(source)), segments_(std::move(segments)), - response_(std::move(responsePromise)) { +Request::Request(size_t Id, Segments &&segments, + ResponseBuilder &&responseBuilder) + : Id_(Id), segments_(std::move(segments)), + responseBuilder_(std::move(responseBuilder)) + +{ counter_ = segments_.size(); histories_.resize(segments_.size(), nullptr); } -size_t Request::lineNumberBegin() const { return lineNumberBegin_; } size_t Request::numSegments() const { return segments_.size(); } size_t Request::segmentTokens(size_t index) const { @@ -39,17 +38,10 @@ void Request::processHistory(size_t index, Ptr history) { // In case this is last request in, completeRequest is called, which sets the // value of the promise. if (--counter_ == 0) { - completeRequest(); + responseBuilder_(std::move(histories_)); } } -void Request::completeRequest() { - // Request no longer needs to hold the content, can transfer it to - // Response. - Response response(std::move(source_), std::move(histories_), *vocabs_); - response_.set_value(std::move(response)); -} - bool Request::operator<(const Request &b) const { // Among Requests, only sequence id is used for obtaining priority. return Id_ < b.Id_; @@ -64,10 +56,6 @@ size_t RequestSentence::numTokens() const { return (request_->segmentTokens(index_)); } -size_t RequestSentence::lineNumber() const { - return (request_->lineNumberBegin() + index_); -} - void RequestSentence::completeSentence(Ptr history) { // Relays completeSentence into request's processHistory, using index // information. diff --git a/src/translator/request.h b/src/translator/request.h index 605dea7..e2188cd 100644 --- a/src/translator/request.h +++ b/src/translator/request.h @@ -1,24 +1,9 @@ -// -// Defines: -// -// Request: holds the input text of a text, Segments (vector) which are -// to go to the batching mechanism and alignments between the processed -// segments and the input text (sourceTokenRanges). In addition, Request takes -// care of the barrier which fires when all the Segments in a request are done -// translating by the workers (BatchTranslator). -// TODO(jerinphilip): Extend Request with notions of Priority (sequence, -// user-given). -// -// RequestSentence: is a tuple of (index, Ptr). This provides the -// batching mechanism access to the segment within the request. The backref to -// Request allows event triggering the barrier upon completion of the last -// sentence by a worker. - #ifndef SRC_BERGAMOT_REQUEST_H_ #define SRC_BERGAMOT_REQUEST_H_ #include "definitions.h" #include "response.h" +#include "response_builder.h" #include "sentence_ranges.h" #include "common/logging.h" @@ -33,80 +18,96 @@ namespace marian { namespace bergamot { +/// A Request is an internal representation used to represent a request after +/// processed by TextProcessor into sentences constituted by marian::Words. +/// +/// The batching mechanism (Batcher) draws from multiple Requests and compiles +/// sentences into a batch. When a batch completes translation (at +/// BatchTranslator, intended in a different thread), backward propogation +/// happens through: +/// +/// ```cpp +/// Batch::completeBatch(...) +/// -> RequestSentence::completeSentence(..) +/// -> Request::processHistory(...) +/// ``` +/// +/// When all sentences in a Request are completed, responseBuilder is +/// triggered with the compiled Histories, to construct the Response +/// corresponding to the Request and set value of the promise which triggers the +/// future at client. class Request { public: - Request(size_t Id, size_t lineNumberBegin, - std::vector> &vocabs_, AnnotatedText &&source, - Segments &&segments, std::promise responsePromise); + /// Constructs an internal representation of the Request identified by Id, + /// processed Segments and accepts a callback (ResponseBuilder) which builds + /// the Response upon completion of the Request. + /// + /// + /// @param [in] Id: Identifier assigned to Request by Service. + /// @param [in] segments: Each segment is a unit to be translated. + /// @param [in] responseBuilder: Callback function (of ResponseBuilder type) + /// to be triggered upon the completion of translation of all units in a + /// Request. + Request(size_t Id, Segments &&segments, ResponseBuilder &&responseBuilder); - // Obtain the count of tokens in the segment correponding to index. Used to - // insert sentence from multiple requests into the corresponding size bucket. + /// Obtain the count of tokens in the segment correponding to index. Used to + /// insert sentence from multiple requests into the corresponding size bucket. size_t segmentTokens(size_t index) const; - // Obtain number of segments in a request. + /// Obtain number of segments in a request. size_t numSegments() const; - size_t lineNumberBegin() const; - // Obtains segment corresponding to index to create a batch of segments among - // several requests. + /// Obtains segment corresponding to index to create a batch of segments + /// among several requests. Segment getSegment(size_t index) const; - // For notions of priority among requests, used to enable std::set in - // Batcher. + /// For notions of priority among requests, used to enable std::set in + /// Batcher. bool operator<(const Request &request) const; - // Processes a history obtained after translating in a heterogenous batch - // compiled from requests. + /// Processes a history obtained after translating in a heterogenous batch + /// compiled from requests. void processHistory(size_t index, Ptr history); - // On completion of last segment, sets value of the promise. - void completeRequest(); - private: size_t Id_; - size_t lineNumberBegin_; - // Multiple translation-workers can concurrently access the same Request. The - // following atomic atomically operates on the variable holding sentences - // remaining to be translated. + /// Multiple translation-workers can concurrently access the same Request. The + /// following atomic atomically operates on the variable holding sentences + /// remaining to be translated. std::atomic counter_; - // source_ holds the source string to be translated. segments_ hold the - // sentences generated from source_ in vector. sourceRanges_ are - // string_views of the text corresponding to these words, pointing to - // sequences in source_. histories_ is a buffer which eventually stores the - // translations of each segment in the corresponding index. - AnnotatedText source_; + /// segments_ hold the sentences processed into Words which generated from + /// input string. Segments segments_; + + /// histories_ is a buffer which eventually stores the translations of each + /// segment in the corresponding index. std::vector> histories_; - // Members above are moved into newly constructed Response on completion - // of translation of all segments. The promise below is set to this Response - // value. future to this promise is made available to the user through - // Service. - std::promise response_; - - // Constructing Response requires the vocabs_ used to generate Request. - std::vector> *vocabs_; + /// Constructing Response requires the vocabs_ used to generate Request. + /// std::vector> *vocabs_; + ResponseBuilder responseBuilder_; }; +/// A RequestSentence provides a view to a sentence within a Request. Existence +/// of this class allows the sentences and associated information to be kept +/// within Request, while batching mechanism (Batcher) compiles Batch from +/// RequestSentence-s coming from different Requests. class RequestSentence { - // A RequestSentence provides a view to a sentence within a Request. Existence - // of this class allows the sentences and associated information to be kept - // within Request. public: RequestSentence(size_t, Ptr); + + /// Number of tokens in the segment this RequestSentence represents. Used to + /// order by length in batching. size_t numTokens() const; - // lineNumber in Request, used for matching marian-decoder. SentenceTuple - // requires lineNumber to be set for Corpus based batches. - size_t lineNumber() const; - - // Accessor to the segment represented by the RequestSentence. + /// Accessor to the segment represented by the RequestSentence. Segment getUnderlyingSegment() const; - // Forwards call to Request, checking for completion. + /// Forwards history to Request to set history corresponding to this + /// RequestSentence. void completeSentence(Ptr history); friend bool operator<(const RequestSentence &a, const RequestSentence &b); diff --git a/src/translator/response.cpp b/src/translator/response.cpp deleted file mode 100644 index e5bc38f..0000000 --- a/src/translator/response.cpp +++ /dev/null @@ -1,106 +0,0 @@ -#include "response.h" -#include "common/logging.h" -#include "data/alignment.h" -#include "sentence_ranges.h" - -#include - -namespace marian { -namespace bergamot { - -Response::Response(AnnotatedText &&source, Histories &&histories, - std::vector> &vocabs) - : source(std::move(source)) { - // Reserving length at least as much as source_ seems like a reasonable thing - // to do to avoid reallocations. - target.text.reserve(source.text.size()); - - // In a first step, the decoded units (individual senteneces) are compiled - // into a huge string. This is done by computing indices first and appending - // to the string as each sentences are decoded. - std::vector> translationRanges; - std::vector sentenceBegins; - - size_t offset{0}; - bool first{true}; - - for (auto &history : histories) { - // TODO(jerin): Change hardcode of nBest = 1 - NBestList onebest = history->nBest(1); - - Result result = onebest[0]; // Expecting only one result; - Words words = std::get<0>(result); - auto targetVocab = vocabs.back(); - - std::string decoded; - std::vector targetMappings; - targetVocab->decodeWithByteRanges(words, decoded, targetMappings); - - if (first) { - first = false; - } else { - target.text += " "; - ++offset; - } - - sentenceBegins.push_back(translationRanges.size()); - target.text += decoded; - auto decodedStringBeginMarker = targetMappings.front().begin(); - for (auto &sview : targetMappings) { - size_t startIdx = offset + sview.begin() - decodedStringBeginMarker; - translationRanges.emplace_back(startIdx, startIdx + sview.size()); - } - - offset += decoded.size(); - - // Alignments - // TODO(jerinphilip): The following double conversion might not be - // necessary. Hard alignment can directly be exported, but this would mean - // WASM bindings for a structure deep within marian source. - auto hyp = std::get<1>(result); - auto softAlignment = hyp->tracebackAlignment(); - auto hardAlignment = data::ConvertSoftAlignToHardAlign( - softAlignment, /*threshold=*/0.2f); // TODO(jerinphilip): Make this a - // configurable parameter. - - Alignment unified_alignment; - for (auto &p : hardAlignment) { - unified_alignment.emplace_back((Point){p.srcPos, p.tgtPos, p.prob}); - } - - alignments.push_back(std::move(unified_alignment)); - - // Quality scores: Sequence level is obtained as normalized path scores. - // Word level using hypothesis traceback. These are most-likely logprobs. - auto normalizedPathScore = std::get<2>(result); - auto wordQualities = hyp->tracebackWordScores(); - wordQualities.pop_back(); - qualityScores.push_back((Quality){normalizedPathScore, wordQualities}); - } - - // Once we have the indices in translation (which might be resized a few - // times) ready, we can prepare and store the string_view as annotations - // instead. This is accomplished by iterating over available sentences using - // sentenceBegin and using addSentence(...) API from Annotation. - - for (size_t i = 1; i <= sentenceBegins.size(); i++) { - std::vector targetMappings; - size_t begin = sentenceBegins[i - 1]; - size_t safe_end = (i == sentenceBegins.size()) ? translationRanges.size() - : sentenceBegins[i]; - - for (size_t idx = begin; idx < safe_end; idx++) { - auto &p = translationRanges[idx]; - size_t begin_idx = p.first; - size_t end_idx = p.second; - - const char *data = &target.text[begin_idx]; - size_t size = end_idx - begin_idx; - targetMappings.emplace_back(data, size); - } - - target.addSentence(targetMappings); - } -} -} // namespace bergamot -} // namespace marian diff --git a/src/translator/response.h b/src/translator/response.h index 4f87b8d..3b1f48d 100644 --- a/src/translator/response.h +++ b/src/translator/response.h @@ -40,34 +40,12 @@ struct Quality { /// AnnotatedText provides an API to access markings of (sub)-word and /// sentences boundaries, which are required to interpret Quality and /// Alignment (s) at the moment. -class Response { - -public: - /// - Response(AnnotatedText &&source, Histories &&histories, - std::vector> &vocabs); - - /// \cond HIDDEN_PUBLIC - // Move constructor. - Response(Response &&other) - : source(std::move(other.source)), target(std::move(other.target)), - alignments(std::move(other.alignments)), - qualityScores(std::move(other.qualityScores)){}; - - // The following copy bans are not stricitly required anymore since Annotation - // is composed of the ByteRange primitive (which was previously string_view - // and required to be bound to string), but makes movement efficient by - // banning these letting compiler complain about copies. - - Response(const Response &) = delete; - Response &operator=(const Response &) = delete; - - /// \endcond - - /// Number of sentences translated. The processing of a text of into sentences - /// are handled internally, and this information can be used to iterate - /// through meaningful units of translation for which alignment and quality - /// information are available. +struct Response { + /// Convenience function to obtain number of units translated. Same as + /// `.source.numSentences()` and `.target.numSentences().` The processing of a + /// text of into sentences are handled internally, and this information can be + /// used to iterate through meaningful units of translation for which + /// alignment and quality information are available. const size_t size() const { return source.numSentences(); } /// source text and annotations of (sub-)words and sentences. diff --git a/src/translator/response_builder.cpp b/src/translator/response_builder.cpp new file mode 100644 index 0000000..c624707 --- /dev/null +++ b/src/translator/response_builder.cpp @@ -0,0 +1,87 @@ +#include "response_builder.h" + +namespace marian { +namespace bergamot { + +void ResponseBuilder::buildQualityScores(Histories &histories, + Response &response) { + std::vector qualityScores; + for (auto &history : histories) { + // TODO(jerin): Change hardcode of nBest = 1 + NBestList onebest = history->nBest(1); + + Result result = onebest[0]; // Expecting only one result; + Words words = std::get<0>(result); + auto hyp = std::get<1>(result); + // Quality scores: Sequence level is obtained as normalized path scores. + // Word level using hypothesis traceback. These are most-likely + // logprobs. + auto normalizedPathScore = std::get<2>(result); + auto wordQualities = hyp->tracebackWordScores(); + wordQualities.pop_back(); + response.qualityScores.push_back( + Quality{normalizedPathScore, wordQualities}); + } +} + +void ResponseBuilder::buildAlignments(Histories &histories, + Response &response) { + for (auto &history : histories) { + // TODO(jerin): Change hardcode of nBest = 1 + NBestList onebest = history->nBest(1); + + Result result = onebest[0]; // Expecting only one result; + Words words = std::get<0>(result); + // Alignments + // TODO(jerinphilip): The following double conversion might not be + // necessary. Hard alignment can directly be exported, but this would + // mean WASM bindings for a structure deep within marian source. + auto hyp = std::get<1>(result); + auto softAlignment = hyp->tracebackAlignment(); + auto threshold = responseOptions_.alignmentThreshold; + auto hardAlignment = + data::ConvertSoftAlignToHardAlign(softAlignment, threshold); + Alignment unified_alignment; + for (auto &p : hardAlignment) { + unified_alignment.emplace_back(Point{p.srcPos, p.tgtPos, p.prob}); + } + + response.alignments.push_back(std::move(unified_alignment)); + } +} + +void ResponseBuilder::buildTranslatedText(Histories &histories, + Response &response) { + // Reserving length at least as much as source_ seems like a reasonable + // thing to do to avoid reallocations. + response.target.text.reserve(response.source.text.size()); + + size_t offset{0}; + bool first{true}; + + for (auto &history : histories) { + // TODO(jerin): Change hardcode of nBest = 1 + NBestList onebest = history->nBest(1); + + Result result = onebest[0]; // Expecting only one result; + Words words = std::get<0>(result); + auto targetVocab = vocabs_->back(); + + std::string decoded; + std::vector targetSentenceMappings; + targetVocab->decodeWithByteRanges(words, decoded, targetSentenceMappings); + + // delimiter can be used to fill in the blanks from source as well. + std::string delimiter; + if (first) { + first = false; + } else { + delimiter = " "; + } + + response.target.appendSentence(delimiter, decoded, targetSentenceMappings); + } +} + +} // namespace bergamot +} // namespace marian diff --git a/src/translator/response_builder.h b/src/translator/response_builder.h new file mode 100644 index 0000000..85caffb --- /dev/null +++ b/src/translator/response_builder.h @@ -0,0 +1,93 @@ +#ifndef SRC_BERGAMOT_RESPONSE_BUILDER_H_ +#define SRC_BERGAMOT_RESPONSE_BUILDER_H_ + +#include "data/types.h" +#include "response.h" +#include "response_options.h" + +// For now we will work with this, to avoid complaints another structure is hard +// to operate with. + +namespace marian { +namespace bergamot { + +/// ResponseBuilder is a callback functor. It is expected to be bound to a +/// Request after giving it the context of options, vocabs and promise to set. +/// It constructs the Response and it's members based on options +/// (quality=on|off, alignments=on|off, mappings=on|off, splitmode=sentence | +/// paragraph). + +class ResponseBuilder { +public: + /// @param [in] responseOptions: ResponseOptions, indicating what to include + /// or not in the response and any additional configurable parameters. + /// @param [in] vocabs: marian vocab object (used in decoding) + /// @param [in] promise: promise to set with the constructed Response. + ResponseBuilder(ResponseOptions responseOptions, AnnotatedText &&source, + std::vector> &vocabs, + std::promise &&promise) + : responseOptions_(responseOptions), source_(std::move(source)), + vocabs_(&vocabs), promise_(std::move(promise)) {} + + /// Constructs and sets the promise of a Response object from obtained + /// histories after translating. + /// @param [in] histories: Histories obtained after translating the Request + /// from which this functor is called. + void operator()(Histories &&histories) { + // TODO(jerinphilip) load ResponseOptions into options and turn build + // functions on or off. + // responseOptions_ is unused, but we can try something here. + ABORT_IF(source_.numSentences() != histories.size(), + "Mismatch in source and translated sentences"); + Response response; + + // Move source_ into response. + response.source = std::move(source_); + + // Should be after source is set + buildTranslatedText(histories, response); + + // Should always be after buildTranslatedText + if (responseOptions_.qualityScores) { + buildQualityScores(histories, response); + } + + if (responseOptions_.alignment) { + buildAlignments(histories, response); + } + + // Once complete, set promise. + promise_.set_value(std::move(response)); + } + +private: + /// Builds qualityScores from histories and writes to response. expects + /// buildTranslatedText to be run before to be able to obtain target text and + /// subword information. + /// @param histories [in] + /// @param response [out] + void buildQualityScores(Histories &histories, Response &response); + + /// Builds alignments from histories and writes onto response. + /// @param histories [in] + /// @param response [out] + void buildAlignments(Histories &histories, Response &response); + + /// Builds translated text and subword annotations and writes onto response. + /// @param histories [in] + /// @param response [out] + void buildTranslatedText(Histories &histories, Response &response); + + // Data members are context/curried args for the functor. + + ResponseOptions responseOptions_; + std::vector> *vocabs_; // vocabs are required for decoding + // and any source validation checks. + std::promise promise_; // To be set when callback triggered and + // after Response constructed. + AnnotatedText source_; +}; +} // namespace bergamot +} // namespace marian + +#endif // SRC_BERGAMOT_RESPONSE_BUILDER_H_ diff --git a/src/translator/response_options.h b/src/translator/response_options.h new file mode 100644 index 0000000..ed3cce3 --- /dev/null +++ b/src/translator/response_options.h @@ -0,0 +1,50 @@ +#ifndef SRC_BERGAMOT_RESPONSE_OPTIONS_H_ +#define SRC_BERGAMOT_RESPONSE_OPTIONS_H_ +#include + +namespace marian { +namespace bergamot { + +enum ConcatStrategy { + /// Target text is constructed faithful to the source-text structure. + FAITHFUL, + + /// Target text is concatenated by a space. + SPACE +}; + +enum QualityScoreType { + /// Provide a free quality-score that comes with the machine-translation model + /// itself. + FREE, + + /// An expensive quality-score that runs additional computations to determine + /// quality of an output. + EXPENSIVE +}; + +/// ResponseOptions dictate how to construct a Response for an input string of +/// text to be translated. +struct ResponseOptions { + bool qualityScores{false}; ///< Include quality-scores or not. + bool alignment{false}; ///< Include alignments or not. + + /// Whether to include sentenceMappings or not. Alignments require + /// sentenceMappings and are available irrespective of this option if + /// `alignment=true`. + bool sentenceMappings{false}; + + /// Threshold between `[0.0f, 1.0f]` to filter alignments into a sparse + /// matrix. Higher value implies stronger filtering leading to provision of + /// higher-confidence matches. `1.0f` gives argmax (not the full-dense + /// matrix). + float alignmentThreshold{0.2f}; + + QualityScoreType qualityScoreType{QualityScoreType::FREE}; + ConcatStrategy concatStrategy{ConcatStrategy::FAITHFUL}; +}; + +} // namespace bergamot +} // namespace marian + +#endif // SRC_BERGAMOT_RESPONSE_OPTIONS_H_ diff --git a/src/translator/sentence_ranges.cpp b/src/translator/sentence_ranges.cpp index aae9dd3..da9d3ee 100644 --- a/src/translator/sentence_ranges.cpp +++ b/src/translator/sentence_ranges.cpp @@ -32,11 +32,11 @@ ByteRange Annotation::sentence(size_t sentenceIdx) const { // the flatByteRange and non-empty sentence before this happened and // construct empty string-view equivalent ByteRange. ByteRange eos = flatByteRanges_[eosId - 1]; - sentenceByteRange = (ByteRange){eos.end, eos.end}; + sentenceByteRange = ByteRange{eos.end, eos.end}; } else { ByteRange bos = flatByteRanges_[bosId]; ByteRange eos = flatByteRanges_[eosId - 1]; - sentenceByteRange = (ByteRange){bos.begin, eos.end}; + sentenceByteRange = ByteRange{bos.begin, eos.end}; } return sentenceByteRange; } @@ -56,6 +56,20 @@ string_view AnnotatedText::sentence(size_t sentenceIdx) const { return asStringView(sentenceAsByteRange); } +void AnnotatedText::appendSentence(std::string prefix, std::string &reference, + std::vector &wordRanges) { + text += prefix; + size_t offset = text.size(); // Get size before to do ByteRange arithmetic + text += reference; // Append reference to text + std::vector sentence; + for (auto &wordView : wordRanges) { + size_t thisWordBegin = offset + wordView.data() - &reference[0]; + sentence.push_back( + ByteRange{thisWordBegin, thisWordBegin + wordView.size()}); + } + annotation.addSentence(sentence); +} + void AnnotatedText::addSentence(std::vector &wordRanges) { addSentence(std::begin(wordRanges), std::end(wordRanges)); }; @@ -65,7 +79,7 @@ void AnnotatedText::addSentence(std::vector::iterator begin, std::vector sentence; for (auto p = begin; p != end; p++) { size_t begin_offset = p->data() - &text[0]; - sentence.push_back((ByteRange){begin_offset, begin_offset + p->size()}); + sentence.push_back(ByteRange{begin_offset, begin_offset + p->size()}); } annotation.addSentence(sentence); }; diff --git a/src/translator/sentence_ranges.h b/src/translator/sentence_ranges.h index b3986e3..f9c881e 100644 --- a/src/translator/sentence_ranges.h +++ b/src/translator/sentence_ranges.h @@ -64,7 +64,6 @@ public: sentenceEndIds_.push_back(0); } - /// Returns the number of sentences annotated in a text. size_t numSentences() const { return sentenceEndIds_.size() - 1; } /// Returns number of words in the sentence identified by `sentenceIdx`. @@ -125,10 +124,6 @@ public: /// constructor is disallowed). AnnotatedText(std::string &&text) : text(std::move(text)){}; - AnnotatedText(AnnotatedText &&annotatedBlob) - : text(std::move(annotatedBlob.text)), - annotation(std::move(annotatedBlob.annotation)) {} - /// Returns the number of sentences in the annotation structure. const size_t numSentences() const { return annotation.numSentences(); } @@ -137,6 +132,11 @@ public: return annotation.numWords(sentenceIdx); } + /// Appends a sentence to the existing text and transparently rebases + /// string_views + void appendSentence(std::string prefix, std::string &reference, + std::vector &wordRanges); + /// Adds a sentence, used to load from SentencePiece annotations conveniently. void addSentence(std::vector &wordRanges); diff --git a/src/translator/service.cpp b/src/translator/service.cpp index 76bcba2..f676797 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -112,6 +112,44 @@ void Service::async_translate() { #endif // WASM_COMPATIBLE_SOURCE std::future Service::translate(std::string &&input) { + ResponseOptions responseOptions; // Hardcode responseOptions for now + return translate(std::move(input), responseOptions); +} + +std::vector +Service::translateMultiple(std::vector &&inputs, + TranslationRequest translationRequest) { + ResponseOptions responseOptions; + + // TODO(jerinphilip) Set options based on TranslationRequest, if and when it + // becomes non-dummy. + + // We queue the individual Requests so they get compiled at batches to be + // efficiently translated. + std::vector> responseFutures; + for (auto &input : inputs) { + std::future inputResponse = + queueRequest(std::move(input), responseOptions); + responseFutures.push_back(std::move(inputResponse)); + } + + // Dispatch is called once per request so compilation of sentences from + // multiple Requests happen. + dispatchTranslate(); + + // Now wait for all Requests to complete, the future to fire and return the + // compiled Responses, we can probably return the future, but WASM quirks(?). + std::vector responses; + for (auto &future : responseFutures) { + future.wait(); + responses.push_back(std::move(future.get())); + } + + return responses; +} + +std::future Service::queueRequest(std::string &&input, + ResponseOptions responseOptions) { Segments segments; AnnotatedText source(std::move(input)); text_processor_.process(source, segments); @@ -119,17 +157,29 @@ std::future Service::translate(std::string &&input) { std::promise responsePromise; auto future = responsePromise.get_future(); - Ptr request = New( - requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(source), - std::move(segments), std::move(responsePromise)); + ResponseBuilder responseBuilder(responseOptions, std::move(source), vocabs_, + std::move(responsePromise)); + Ptr request = New(requestId_++, std::move(segments), + std::move(responseBuilder)); batcher_.addWholeRequest(request); + return future; +} + +std::future Service::translate(std::string &&input, + ResponseOptions responseOptions) { + std::future future = + queueRequest(std::move(input), responseOptions); + dispatchTranslate(); + return future; +} + +void Service::dispatchTranslate() { if (numWorkers_ == 0) { blocking_translate(); } else { async_translate(); } - return future; } Service::~Service() { diff --git a/src/translator/service.h b/src/translator/service.h index 72f6d92..476be28 100644 --- a/src/translator/service.h +++ b/src/translator/service.h @@ -1,10 +1,12 @@ #ifndef SRC_BERGAMOT_SERVICE_H_ #define SRC_BERGAMOT_SERVICE_H_ +#include "TranslationRequest.h" #include "batch_translator.h" #include "batcher.h" #include "data/types.h" #include "response.h" +#include "response_builder.h" #include "text_processor.h" #include "translator/parser.h" @@ -18,18 +20,33 @@ namespace marian { namespace bergamot { -/// Service exposes methods to translate an incoming blob of text to the -/// Consumer of bergamot API. +/// Service offers methods create an asynchronous translation service. This is +/// intended to be similar to the ones provided for training or decoding in ML +/// pipelines with the following additional capabilities: +/// +/// 1. Provision of a request -> response based translation flow unlike the +/// usual a line based translation or decoding provided in most ML frameworks. +/// 2. Internal handling of normalization etc which changes source text to +/// provide to client translation meta-information like alignments consistent +/// with the unnormalized input text. +/// +/// Service exposes methods to instantiate the service from a string +/// configuration (which can cover most translators) and to translate an +/// incoming blob of text. +/// /// /// An example use of this API looks as follows: -/// +/// ```cpp /// options = ...; /// service = Service(options); /// std::string input_text = "Hello World"; /// std::future -/// response = service.translate(std::move(input_text)); -/// response.wait(); -/// Response result = response.get(); +/// responseFuture = service.translate(std::move(input_text)); +/// responseFuture.wait(); // Wait until translation has completed. +/// Response response(std::move(response.get()); +/// +/// // Do things with response. +/// ``` /// /// Optionally Service can be initialized by also passing model_memory for /// purposes of efficiency (which defaults to nullpointer and then reads from @@ -41,9 +58,22 @@ public: /// @param modelMemory byte array (aligned to 256!!!) that contains the bytes /// of a model.bin. Optional, defaults to nullptr when not used /// @param shortlistMemory byte array of shortlist (aligned to 64) - explicit Service(Ptr options, AlignedMemory modelMemory, AlignedMemory shortlistMemory); + explicit Service(Ptr options, AlignedMemory modelMemory, + AlignedMemory shortlistMemory); - explicit Service(Ptr options) : Service(options, AlignedMemory(), AlignedMemory()){} + /// Construct Service purely from Options. This expects options which + /// marian-decoder expects to be set for loading model shortlist and + /// vocabularies from files in addition to parameters that set unset desired + /// features (e.g: alignments, quality-scores). + /// + /// This is equivalent to a call to: + /// ```cpp + /// Service(options, AlignedMemory(), AlignedMemory()) + /// ``` + /// wherein empty memory is passed and internal flow defaults to file-based + /// model, shortlist loading. + explicit Service(Ptr options) + : Service(options, AlignedMemory(), AlignedMemory()) {} /// Construct Service from a string configuration. /// @param [in] config string parsable as YAML expected to adhere with marian @@ -52,20 +82,55 @@ public: /// bytes of a model.bin. Optional. /// @param [in] shortlistMemory byte array of shortlist (aligned to 64) explicit Service(const std::string &config, - AlignedMemory modelMemory = AlignedMemory(), AlignedMemory shortlistMemory = AlignedMemory()) - : Service(parseOptions(config), std::move(modelMemory), std::move(shortlistMemory)) {} + AlignedMemory modelMemory = AlignedMemory(), + AlignedMemory shortlistMemory = AlignedMemory()) + : Service(parseOptions(config), std::move(modelMemory), + std::move(shortlistMemory)) {} /// Explicit destructor to clean up after any threads initialized in /// asynchronous operation mode. ~Service(); /// To stay efficient and to refer to the string for alignments, expects - /// ownership be moved through std::move(..) + /// ownership be moved through `std::move(..)` /// - /// @param [in] rvalue reference of string to be translated. - std::future translate(std::string &&input); + /// @param [in] source: rvalue reference of string to be translated. + std::future translate(std::string &&source); + + /// Translate an input, providing Options to construct Response. This is + /// useful when one has to set/unset alignments or quality in the Response to + /// save compute spent in constructing these objects. + /// + /// @param [in] source: rvalue reference of the string to be translated + /// @param [in] responseOptions: Options indicating whether or not to include + /// some member in the Response, also specify any additional configurable + /// parameters. + std::future translate(std::string &&source, + ResponseOptions options); + + /// Translate an input, providing TranslationRequest across all texts to + /// construct Response. Provides the browser with the ability to break texts + /// into multiple Request keeping gains from efficiently batching internally. + /// Also useful when one has to set/unset alignments or quality in the + /// Response to save compute spent in constructing these objects. + + /// @param [in] source: rvalue reference of the string to be translated + /// @param [in] translationRequest: TranslationRequest (Unified API) + /// indicating whether or not to include some member in the Response, also + /// specify any additional configurable parameters. + + std::vector + translateMultiple(std::vector &&source, + TranslationRequest translationRequest); private: + /// Queue an input for translation. + std::future queueRequest(std::string &&input, + ResponseOptions responseOptions); + + /// Dispatch call to translate after inserting in queue + void dispatchTranslate(); + /// Build numTranslators number of translators with options from options void build_translators(Ptr options, size_t numTranslators); /// Initializes a blocking translator without using std::thread @@ -83,16 +148,17 @@ private: void async_translate(); /// Number of workers to launch. - size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_) + size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_) /// Model memory to load model passed as bytes. - AlignedMemory modelMemory_; // ORDER DEPENDENCY (translators_) + AlignedMemory modelMemory_; // ORDER DEPENDENCY (translators_) /// Shortlist memory passed as bytes. - AlignedMemory shortlistMemory_; // ORDER DEPENDENCY (translators_) + AlignedMemory shortlistMemory_; // ORDER DEPENDENCY (translators_) /// Holds instances of batch translators, just one in case /// of single-threaded application, numWorkers_ in case of multithreaded /// setting. - std::vector translators_; // ORDER DEPENDENCY (modelMemory_, shortlistMemory_) + std::vector + translators_; // ORDER DEPENDENCY (modelMemory_, shortlistMemory_) /// Stores requestId of active request. Used to establish /// ordering among requests and logging/book-keeping. From 4be96a97d727b500b3ff4cc3f40a3434a3d5afd9 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Tue, 27 Apr 2021 15:04:23 +0000 Subject: [PATCH 08/19] Handle empty translation requests Fixes https://github.com/browsermt/bergamot-translator/issues/101. ResponseBuilder is called with empty histories to trigger a valid but mostly-empty response. --- src/translator/request.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/translator/request.cpp b/src/translator/request.cpp index 7e9b739..8e46533 100644 --- a/src/translator/request.cpp +++ b/src/translator/request.cpp @@ -20,6 +20,13 @@ Request::Request(size_t Id, Segments &&segments, counter_ = segments_.size(); histories_.resize(segments_.size(), nullptr); + + // If there are no segments_, we are never able to trigger the responseBuilder + // calls from a different thread. However, in this case we want an empty valid + // response. + if (segments_.size() == 0) { + responseBuilder_(std::move(histories_)); + } } size_t Request::numSegments() const { return segments_.size(); } From e5ec5bdd330137febd28369447073eaf00b374d9 Mon Sep 17 00:00:00 2001 From: abhi-agg <66322306+abhi-agg@users.noreply.github.com> Date: Thu, 29 Apr 2021 10:38:09 +0200 Subject: [PATCH 09/19] Control validating the config options via a boolean flag (#116) * Control validating the config options via a boolean flag - parseOptions() function now validates the parsed options based on the validate argument * Minor syntactic fix --- src/translator/parser.h | 9 ++++++--- src/translator/service.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/translator/parser.h b/src/translator/parser.h index fa4e7bb..207890c 100644 --- a/src/translator/parser.h +++ b/src/translator/parser.h @@ -31,7 +31,7 @@ inline marian::ConfigParser createConfigParser() { } inline std::shared_ptr -parseOptions(const std::string &config) { +parseOptions(const std::string &config, bool validate = true) { marian::Options options; // @TODO(jerinphilip) There's something off here, @XapaJIaMnu suggests @@ -58,8 +58,11 @@ parseOptions(const std::string &config) { options.parse(config); YAML::Node configCopy = options.cloneToYamlNode(); - marian::ConfigValidator validator(configCopy); - validator.validateOptions(marian::cli::mode::translation); + if (validate) { + // Perform validation on parsed options only when requested + marian::ConfigValidator validator(configCopy); + validator.validateOptions(marian::cli::mode::translation); + } return std::make_shared(options); } diff --git a/src/translator/service.h b/src/translator/service.h index 476be28..a731653 100644 --- a/src/translator/service.h +++ b/src/translator/service.h @@ -84,7 +84,7 @@ public: explicit Service(const std::string &config, AlignedMemory modelMemory = AlignedMemory(), AlignedMemory shortlistMemory = AlignedMemory()) - : Service(parseOptions(config), std::move(modelMemory), + : Service(parseOptions(config, /*validate=*/false), std::move(modelMemory), std::move(shortlistMemory)) {} /// Explicit destructor to clean up after any threads initialized in From de0abfd795f0160f948f9b84ad550a9485561814 Mon Sep 17 00:00:00 2001 From: abhi-agg <66322306+abhi-agg@users.noreply.github.com> Date: Thu, 29 Apr 2021 12:04:04 +0200 Subject: [PATCH 10/19] JS bindings for loading model and shortlist files as bytes (#117) * Bindings to load model and shortlist files as bytes * Modified wasm test page for byte based loading of files * Updates wasm README for byte loading based usage of TranslationModel --- wasm/README.md | 19 ++++- wasm/bindings/TranslationModelBindings.cpp | 21 ++++- wasm/test_page/bergamot.html | 94 ++++++++++++++++------ 3 files changed, 104 insertions(+), 30 deletions(-) diff --git a/wasm/README.md b/wasm/README.md index 23564b9..e9ef132 100644 --- a/wasm/README.md +++ b/wasm/README.md @@ -1,9 +1,19 @@ ## Using Bergamot Translator in JavaScript The example file `bergamot.html` in the folder `test_page` demonstrates how to use the bergamot translator in JavaScript via a `