mirror of
https://github.com/browsermt/bergamot-translator.git
synced 2024-10-26 05:43:59 +03:00
Merge remote-tracking branch 'upstream/main' into main
- Sync with upstream (https://github.com/browsermt/bergamot-translator)
This commit is contained in:
commit
ec3a785d17
33
.github/workflows/native-custom_marian-mac.yml
vendored
33
.github/workflows/native-custom_marian-mac.yml
vendored
@ -1,33 +0,0 @@
|
|||||||
name: Native (Custom Marian) MacOS
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ main, ci-sandbox ]
|
|
||||||
pull_request:
|
|
||||||
branches: [ main, ci-sandbox ]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-macos:
|
|
||||||
name: MacOS
|
|
||||||
runs-on: macos-10.15
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Configure CMake
|
|
||||||
run: |
|
|
||||||
mkdir -p build
|
|
||||||
cd build
|
|
||||||
cmake ..
|
|
||||||
|
|
||||||
- name: Compile
|
|
||||||
working-directory: build
|
|
||||||
run: make -j2
|
|
||||||
|
|
||||||
- name: Print versions
|
|
||||||
working-directory: build
|
|
||||||
run: |
|
|
||||||
./app/bergamot-translator-app --version
|
|
@ -1,33 +0,0 @@
|
|||||||
name: Native (Custom Marian) Ubuntu
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ main, ci-sandbox ]
|
|
||||||
pull_request:
|
|
||||||
branches: [ main, ci-sandbox ]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-macos:
|
|
||||||
name: Ubuntu
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Configure CMake
|
|
||||||
run: |
|
|
||||||
mkdir -p build
|
|
||||||
cd build
|
|
||||||
cmake ..
|
|
||||||
|
|
||||||
- name: Compile
|
|
||||||
working-directory: build
|
|
||||||
run: make -j2
|
|
||||||
|
|
||||||
- name: Print versions
|
|
||||||
working-directory: build
|
|
||||||
run: |
|
|
||||||
./app/bergamot-translator-app --version
|
|
59
.github/workflows/native-full_marian-mac.yml
vendored
59
.github/workflows/native-full_marian-mac.yml
vendored
@ -1,59 +0,0 @@
|
|||||||
name: Native (Full Marian) MacOS
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ main, ci-sandbox ]
|
|
||||||
pull_request:
|
|
||||||
branches: [ main, ci-sandbox ]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-macos:
|
|
||||||
name: MacOS CPU-only
|
|
||||||
runs-on: macos-10.15
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: brew install openblas protobuf
|
|
||||||
|
|
||||||
# Openblas location is exported explicitly because openblas is keg-only,
|
|
||||||
# which means it was not symlinked into /usr/local/.
|
|
||||||
# CMake cannot find BLAS on GitHub runners if Marian is being compiled
|
|
||||||
# statically, hence USE_STATIC_LIBS=off
|
|
||||||
- name: Configure CMake
|
|
||||||
run: |
|
|
||||||
export LDFLAGS="-L/usr/local/opt/openblas/lib"
|
|
||||||
export CPPFLAGS="-I/usr/local/opt/openblas/include"
|
|
||||||
mkdir -p build
|
|
||||||
cd build
|
|
||||||
cmake .. \
|
|
||||||
-DCOMPILE_CPU=on \
|
|
||||||
-DCOMPILE_CUDA=off \
|
|
||||||
-DCOMPILE_EXAMPLES=on \
|
|
||||||
-DCOMPILE_SERVER=on \
|
|
||||||
-DCOMPILE_TESTS=on \
|
|
||||||
-DUSE_FBGEMM=on \
|
|
||||||
-DUSE_SENTENCEPIECE=on \
|
|
||||||
-DUSE_STATIC_LIBS=off \
|
|
||||||
-DUSE_WASM_COMPATIBLE_SOURCE=off
|
|
||||||
|
|
||||||
- name: Compile
|
|
||||||
working-directory: build
|
|
||||||
run: make -j2
|
|
||||||
|
|
||||||
- name: Run unit tests
|
|
||||||
working-directory: build
|
|
||||||
run: make test
|
|
||||||
|
|
||||||
- name: Print versions
|
|
||||||
working-directory: build
|
|
||||||
run: |
|
|
||||||
./marian --version
|
|
||||||
./marian-decoder --version
|
|
||||||
./marian-scorer --version
|
|
||||||
./spm_encode --version
|
|
||||||
|
|
120
.github/workflows/native-full_marian-ubuntu.yml
vendored
120
.github/workflows/native-full_marian-ubuntu.yml
vendored
@ -1,120 +0,0 @@
|
|||||||
name: Native (Full Marian) Ubuntu
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ main, ci-test ]
|
|
||||||
pull_request:
|
|
||||||
branches: [ main, ci-test ]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-ubuntu:
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
# Ubuntu CPU-only build
|
|
||||||
- name: "Ubuntu CPU-only"
|
|
||||||
os: ubuntu-latest
|
|
||||||
cuda: ""
|
|
||||||
gcc: 8
|
|
||||||
cpu: true
|
|
||||||
gpu: false
|
|
||||||
# GPU Builds are commented out, for bergamot-translator CI runs.
|
|
||||||
# Ubuntu GPU-only build
|
|
||||||
# - name: "Ubuntu GPU-only"
|
|
||||||
# os: ubuntu-latest
|
|
||||||
# cuda: "10.2"
|
|
||||||
# gcc: 7
|
|
||||||
# cpu: false
|
|
||||||
# gpu: true
|
|
||||||
# Ubuntu 20.04 supports CUDA 11+
|
|
||||||
#- name: "Ubuntu 20.04 CUDA 11.0 gcc-9"
|
|
||||||
#os: ubuntu-20.04
|
|
||||||
#cuda: "11.0"
|
|
||||||
#gcc: 9
|
|
||||||
#cpu: false
|
|
||||||
#gpu: true
|
|
||||||
# Ubuntu 18.04 supports CUDA 10.1+
|
|
||||||
# - name: "Ubuntu 18.04 CUDA 10.2 gcc-8"
|
|
||||||
# os: ubuntu-18.04
|
|
||||||
# cuda: "10.2"
|
|
||||||
# gcc: 8
|
|
||||||
# cpu: true
|
|
||||||
# gpu: true
|
|
||||||
# Ubuntu 16.04 supports CUDA 8+
|
|
||||||
# - name: "Ubuntu 16.04 CUDA 9.2 gcc-7"
|
|
||||||
# os: ubuntu-16.04
|
|
||||||
# cuda: "9.2"
|
|
||||||
# gcc: 7
|
|
||||||
# cpu: true
|
|
||||||
# gpu: true
|
|
||||||
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
name: ${{ matrix.name }}
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
|
|
||||||
# The following packages are already installed on GitHub-hosted runners:
|
|
||||||
# build-essential openssl libssl-dev
|
|
||||||
# No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because
|
|
||||||
# it is installed together with libprotobuf-dev
|
|
||||||
- name: Install dependencies
|
|
||||||
run: sudo apt-get update && sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler libboost-all-dev g++-8
|
|
||||||
|
|
||||||
# https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
|
|
||||||
- name: Install MKL
|
|
||||||
run: |
|
|
||||||
wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add -
|
|
||||||
sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list"
|
|
||||||
sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list"
|
|
||||||
sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088
|
|
||||||
if: matrix.cpu == true
|
|
||||||
|
|
||||||
# The script simplifies installation of different versions of CUDA
|
|
||||||
- name: Install CUDA
|
|
||||||
run: ./3rd_party/marian-dev/scripts/ci/install_cuda_ubuntu.sh ${{ matrix.cuda }}
|
|
||||||
if: matrix.gpu == true
|
|
||||||
|
|
||||||
# Boost is installed on GitHub-hosted runners in a non-standard location
|
|
||||||
# https://github.com/actions/virtual-environments/issues/687#issuecomment-610471671
|
|
||||||
- name: Configure CMake
|
|
||||||
run: |
|
|
||||||
mkdir -p build
|
|
||||||
cd build
|
|
||||||
CC=/usr/bin/gcc-${{ matrix.gcc }} CXX=/usr/bin/g++-${{ matrix.gcc }} CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }} \
|
|
||||||
cmake .. \
|
|
||||||
-DBoost_ARCHITECTURE=-x64 \
|
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
|
||||||
-DCOMPILE_CPU=${{ matrix.cpu }} \
|
|
||||||
-DCOMPILE_CUDA=${{ matrix.gpu }} \
|
|
||||||
-DCOMPILE_EXAMPLES=on \
|
|
||||||
-DCOMPILE_SERVER=on \
|
|
||||||
-DCOMPILE_TESTS=on \
|
|
||||||
-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-${{ matrix.cuda }} \
|
|
||||||
-DUSE_FBGEMM=${{ matrix.cpu }} \
|
|
||||||
-DUSE_SENTENCEPIECE=on \
|
|
||||||
-DUSE_STATIC_LIBS=on \
|
|
||||||
-DUSE_WASM_COMPATIBLE_SOURCE=off
|
|
||||||
|
|
||||||
- name: Compile
|
|
||||||
working-directory: build
|
|
||||||
run: make -j2
|
|
||||||
|
|
||||||
- name: Run unit tests
|
|
||||||
working-directory: build
|
|
||||||
run: make test
|
|
||||||
# GitHub-hosted VMs do not have GPUs, so can not be run in CUDA builds
|
|
||||||
if: matrix.gpu == false
|
|
||||||
|
|
||||||
- name: Print versions
|
|
||||||
working-directory: build
|
|
||||||
run: |
|
|
||||||
./marian --version
|
|
||||||
./marian-decoder --version
|
|
||||||
./marian-scorer --version
|
|
||||||
./marian-server --version
|
|
||||||
./spm_encode --version
|
|
||||||
|
|
108
.github/workflows/native-mac.yml
vendored
Normal file
108
.github/workflows/native-mac.yml
vendored
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
name: Native MacOS
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main, ci-sandbox ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ main, ci-sandbox ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-macos:
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- name: "full-marian"
|
||||||
|
os: macos-10.15
|
||||||
|
test_tags: ""
|
||||||
|
cmake:
|
||||||
|
CMAKE_BUILD_TYPE: "Release"
|
||||||
|
COMPILE_TESTS: "ON"
|
||||||
|
USE_WASM_COMPATIBLE_SOURCE: "OFF"
|
||||||
|
USE_FBGEMM: "OFF"
|
||||||
|
USE_STATIC_LIBS: "OFF"
|
||||||
|
COMPILE_SERVER: "OFF"
|
||||||
|
COMPILE_EXAMPLES: "OFF"
|
||||||
|
|
||||||
|
- name: "minimal-marian"
|
||||||
|
os: macos-10.15
|
||||||
|
test_tags: "'#wasm'"
|
||||||
|
cmake:
|
||||||
|
CMAKE_BUILD_TYPE: "Release"
|
||||||
|
COMPILE_TESTS: "OFF" # Minimal marian has no sqlite support and compile tests fail
|
||||||
|
USE_WASM_COMPATIBLE_SOURCE: "ON"
|
||||||
|
USE_FBGEMM: "OFF"
|
||||||
|
# explicitly set due to requirement of minimal marian being used
|
||||||
|
# within WASM. This is some yaml ugliness, but issok.
|
||||||
|
USE_STATIC_LIBS: "ON"
|
||||||
|
COMPILE_SERVER: "OFF"
|
||||||
|
COMPILE_EXAMPLES: "OFF"
|
||||||
|
|
||||||
|
name: ${{ matrix.name }}
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
submodules: recursive
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
brew update
|
||||||
|
brew install openblas protobuf coreutils
|
||||||
|
|
||||||
|
# Openblas location is exported explicitly because openblas is keg-only,
|
||||||
|
# which means it was not symlinked into /usr/local/.
|
||||||
|
- name: Set BLAS Environment variables
|
||||||
|
run: |
|
||||||
|
echo "LDFLAGS=-L/usr/local/opt/openblas/lib" >> $GITHUB_ENV
|
||||||
|
echo "CPPFLAGS=-I/usr/local/opt/openblas/include" >> $GITHUB_ENV
|
||||||
|
if: matrix.cmake.USE_WASM_COMPATIBLE_SOURCE == 'OFF'
|
||||||
|
|
||||||
|
# CMake cannot find BLAS on GitHub runners if Marian is being compiled
|
||||||
|
# statically, hence USE_STATIC_LIBS=off
|
||||||
|
- name: Configure CMake
|
||||||
|
run: |
|
||||||
|
mkdir -p build
|
||||||
|
cd build
|
||||||
|
cmake .. \
|
||||||
|
-DCMAKE_BUILD_TYPE=${{ matrix.cmake.CMAKE_BUILD_TYPE }}\
|
||||||
|
-DCOMPILE_TESTS=${{ matrix.cmake.COMPILE_TESTS }}\
|
||||||
|
-DCOMPILE_EXAMPLES=${{ matrix.cmake.COMPILE_EXAMPLES }} \
|
||||||
|
-DCOMPILE_SERVER=${{ matrix.cmake.COMPILE_SERVER }} \
|
||||||
|
-DUSE_STATIC_LIBS=${{ matrix.cmake.USE_STATIC_LIBS }} \
|
||||||
|
-DUSE_WASM_COMPATIBLE_SOURCE=${{ matrix.cmake.USE_WASM_COMPATIBLE_SOURCE }} \
|
||||||
|
-DUSE_FBGEMM=${{ matrix.cmake.USE_FBGEMM }}
|
||||||
|
|
||||||
|
- name: Compile
|
||||||
|
working-directory: build
|
||||||
|
run: make -j2
|
||||||
|
|
||||||
|
- name: Run unit tests
|
||||||
|
working-directory: build
|
||||||
|
run: make test
|
||||||
|
if: matrix.cmake.COMPILE_TESTS == 'ON'
|
||||||
|
|
||||||
|
- name: Print versions
|
||||||
|
working-directory: build
|
||||||
|
run: |
|
||||||
|
./app/bergamot-translator-app --version
|
||||||
|
|
||||||
|
- name: Install regression-test framework (BRT)
|
||||||
|
working-directory: bergamot-translator-tests
|
||||||
|
run : make install
|
||||||
|
|
||||||
|
- name: Run regression-tests (BRT)
|
||||||
|
working-directory: bergamot-translator-tests
|
||||||
|
run : MARIAN=../build ./run_brt.sh ${{ matrix.test_tags }}
|
||||||
|
|
||||||
|
- name: Upload regression-tests artifacts
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
if: ${{ always() }}
|
||||||
|
with:
|
||||||
|
name: brt-artifacts-${{ matrix.name }}
|
||||||
|
path: |
|
||||||
|
bergamot-translator-tests/**/*.expected
|
||||||
|
bergamot-translator-tests/**/*.log
|
||||||
|
bergamot-translator-tests/**/*.out
|
117
.github/workflows/native-ubuntu.yml
vendored
Normal file
117
.github/workflows/native-ubuntu.yml
vendored
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
name: Native Ubuntu
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main, ci-sandbox ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ main, ci-sandbox ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-ubuntu:
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- name: "full-marian"
|
||||||
|
os: ubuntu-latest
|
||||||
|
gcc: 8
|
||||||
|
cpu: 'ON'
|
||||||
|
gpu: 'OFF'
|
||||||
|
test_tags: ""
|
||||||
|
cmake:
|
||||||
|
CMAKE_BUILD_TYPE: "Release"
|
||||||
|
COMPILE_TESTS: "ON"
|
||||||
|
USE_WASM_COMPATIBLE_SOURCE: "OFF"
|
||||||
|
COMPILE_SERVER: "OFF"
|
||||||
|
COMPILE_EXAMPLES: "OFF"
|
||||||
|
|
||||||
|
- name: "minimal-marian"
|
||||||
|
os: ubuntu-latest
|
||||||
|
gcc: 8
|
||||||
|
cpu: 'ON'
|
||||||
|
gpu: 'OFF'
|
||||||
|
test_tags: "'#wasm'"
|
||||||
|
cmake:
|
||||||
|
CMAKE_BUILD_TYPE: "Release"
|
||||||
|
COMPILE_TESTS: "OFF" # Minimal marian has no sqlite support and COMPILE_TEST=ON fails.
|
||||||
|
USE_WASM_COMPATIBLE_SOURCE: "ON"
|
||||||
|
COMPILE_SERVER: "OFF"
|
||||||
|
COMPILE_EXAMPLES: "OFF"
|
||||||
|
|
||||||
|
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
name: ${{ matrix.name }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
submodules: recursive
|
||||||
|
|
||||||
|
# The following packages are already installed on GitHub-hosted runners:
|
||||||
|
# build-essential openssl libssl-dev
|
||||||
|
# No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because
|
||||||
|
# it is installed together with libprotobuf-dev
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y \
|
||||||
|
libgoogle-perftools-dev libprotobuf-dev protobuf-compiler \
|
||||||
|
libboost-all-dev g++-${{ matrix.gcc }}
|
||||||
|
|
||||||
|
# https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
|
||||||
|
- name: Install MKL
|
||||||
|
run: |
|
||||||
|
wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add -
|
||||||
|
sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list"
|
||||||
|
sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list"
|
||||||
|
sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088
|
||||||
|
if: matrix.cmake.USE_WASM_COMPATIBLE_SOURCE == 'OFF'
|
||||||
|
|
||||||
|
# Boost is installed on GitHub-hosted runners in a non-standard location
|
||||||
|
# https://github.com/actions/virtual-environments/issues/687#issuecomment-610471671
|
||||||
|
- name: Configure CMake
|
||||||
|
run: |
|
||||||
|
mkdir -p build
|
||||||
|
cd build
|
||||||
|
CC=/usr/bin/gcc-${{ matrix.gcc }} CXX=/usr/bin/g++-${{ matrix.gcc }} CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }} \
|
||||||
|
cmake .. \
|
||||||
|
-DCMAKE_BUILD_TYPE=${{ matrix.cmake.CMAKE_BUILD_TYPE }}\
|
||||||
|
-DCOMPILE_TESTS=${{ matrix.cmake.COMPILE_TESTS }}\
|
||||||
|
-DCOMPILE_EXAMPLES=${{ matrix.cmake.COMPILE_EXAMPLES }} \
|
||||||
|
-DCOMPILE_SERVER=${{ matrix.cmake.COMPILE_SERVER }} \
|
||||||
|
-DUSE_WASM_COMPATIBLE_SOURCE=${{ matrix.cmake.USE_WASM_COMPATIBLE_SOURCE }} \
|
||||||
|
|
||||||
|
- name: Compile bergamot-translator
|
||||||
|
working-directory: build
|
||||||
|
run: make -j2
|
||||||
|
|
||||||
|
- name: Run unit tests
|
||||||
|
working-directory: build
|
||||||
|
run: make test
|
||||||
|
# GitHub-hosted VMs do not have GPUs, so can not be run in CUDA builds
|
||||||
|
if: matrix.gpu == 'OFF' && matrix.cmake.COMPILE_TESTS == 'ON'
|
||||||
|
|
||||||
|
- name: Print versions
|
||||||
|
working-directory: build
|
||||||
|
run: |
|
||||||
|
./app/bergamot-translator-app --version
|
||||||
|
|
||||||
|
|
||||||
|
- name: Install regression-test framework (BRT)
|
||||||
|
working-directory: bergamot-translator-tests
|
||||||
|
run : make install
|
||||||
|
|
||||||
|
- name: Run regression-tests (BRT)
|
||||||
|
working-directory: bergamot-translator-tests
|
||||||
|
run : MARIAN=../build ./run_brt.sh ${{ matrix.test_tags }}
|
||||||
|
|
||||||
|
- name: Upload regression-tests artifacts
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
if: ${{ always() }}
|
||||||
|
with:
|
||||||
|
name: brt-artifacts-${{ matrix.name }}
|
||||||
|
path: |
|
||||||
|
bergamot-translator-tests/**/*.expected
|
||||||
|
bergamot-translator-tests/**/*.log
|
||||||
|
bergamot-translator-tests/**/*.out
|
5
.github/workflows/wasm-custom_marian-mac.yml
vendored
5
.github/workflows/wasm-custom_marian-mac.yml
vendored
@ -40,9 +40,8 @@ jobs:
|
|||||||
- name: Check artifacts
|
- name: Check artifacts
|
||||||
working-directory: build-wasm
|
working-directory: build-wasm
|
||||||
run: |
|
run: |
|
||||||
export WASM_ARTIFACTS_DIR=wasm
|
ls -all bergamot*
|
||||||
ls -all ${WASM_ARTIFACTS_DIR}
|
if ls bergamot*.wasm &>/dev/null && ls bergamot*.js &>/dev/null
|
||||||
if ls ${WASM_ARTIFACTS_DIR}/*.wasm &>/dev/null && ls ${WASM_ARTIFACTS_DIR}/*.js &>/dev/null
|
|
||||||
then
|
then
|
||||||
echo "Artifacts Successfully Generated"
|
echo "Artifacts Successfully Generated"
|
||||||
else
|
else
|
||||||
|
@ -2,9 +2,9 @@ name: WASM (Custom Marian) Ubuntu
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main ]
|
branches: [ main, ci-sandbox ]
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches: [ main, ci-sandbox ]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build-wasm:
|
build-wasm:
|
||||||
@ -40,9 +40,8 @@ jobs:
|
|||||||
- name: Check artifacts
|
- name: Check artifacts
|
||||||
working-directory: build-wasm
|
working-directory: build-wasm
|
||||||
run: |
|
run: |
|
||||||
export WASM_ARTIFACTS_DIR=wasm
|
ls -all bergamot*
|
||||||
ls -all ${WASM_ARTIFACTS_DIR}
|
if ls bergamot*.wasm &>/dev/null && ls bergamot*.js &>/dev/null
|
||||||
if ls ${WASM_ARTIFACTS_DIR}/*.wasm &>/dev/null && ls ${WASM_ARTIFACTS_DIR}/*.js &>/dev/null
|
|
||||||
then
|
then
|
||||||
echo "Artifacts Successfully Generated"
|
echo "Artifacts Successfully Generated"
|
||||||
else
|
else
|
||||||
|
83
.github/workflows/windows.yml
vendored
83
.github/workflows/windows.yml
vendored
@ -17,12 +17,6 @@ jobs:
|
|||||||
# Windows CPU-only build
|
# Windows CPU-only build
|
||||||
- name: "Windows CPU-only"
|
- name: "Windows CPU-only"
|
||||||
cuda: ""
|
cuda: ""
|
||||||
gpu: false
|
|
||||||
# GPU Builds are commented out, for bergamot-translator CI runs.
|
|
||||||
# Windows CPU+GPU build
|
|
||||||
# - name: "Windows CPU+CUDA"
|
|
||||||
# cuda: "10.2"
|
|
||||||
# gpu: true
|
|
||||||
|
|
||||||
runs-on: windows-2019
|
runs-on: windows-2019
|
||||||
name: ${{ matrix.name }}
|
name: ${{ matrix.name }}
|
||||||
@ -42,89 +36,32 @@ jobs:
|
|||||||
echo "MKLROOT=${{ github.workspace }}\mkl" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
echo "MKLROOT=${{ github.workspace }}\mkl" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
||||||
shell: powershell
|
shell: powershell
|
||||||
|
|
||||||
- name: Install CUDA
|
|
||||||
run: |
|
|
||||||
.\3rd_party\marian-dev\scripts\ci\install_cuda_windows.ps1 "10.2"
|
|
||||||
# Set CUDA_PATH environment variable so that CMake can find CUDA
|
|
||||||
echo "CUDA_PATH=$env:CUDA_PATH" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
|
||||||
echo "$env:CUDA_PATH/bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
|
||||||
shell: powershell
|
|
||||||
if: matrix.gpu == true
|
|
||||||
|
|
||||||
- name: Prepare vcpkg
|
- name: Prepare vcpkg
|
||||||
uses: lukka/run-vcpkg@v4
|
uses: lukka/run-vcpkg@v7.3
|
||||||
with:
|
with:
|
||||||
vcpkgArguments: protobuf
|
vcpkgArguments: protobuf pcre2
|
||||||
vcpkgGitCommitId: 6185aa76504a5025f36754324abf307cc776f3da
|
vcpkgGitCommitId: 6185aa76504a5025f36754324abf307cc776f3da
|
||||||
vcpkgDirectory: ${{ github.workspace }}/vcpkg/
|
vcpkgDirectory: ${{ github.workspace }}/vcpkg/
|
||||||
vcpkgTriplet: x64-windows-static
|
vcpkgTriplet: x64-windows-static
|
||||||
|
|
||||||
# Windows CUDA builds use USE_NCCL=off due to compilation errors.
|
# Windows CPU only minimal build
|
||||||
- name: Build Debug
|
- name: Build Release # @TODO this is actually a debug build until the ninja generator gets fixed
|
||||||
uses: lukka/run-cmake@v3
|
uses: lukka/run-cmake@v3
|
||||||
with:
|
with:
|
||||||
buildDirectory: ${{ github.workspace }}/build/Debug
|
buildDirectory: ${{ github.workspace }}/build
|
||||||
cmakeAppendedArgs: '-G Ninja
|
cmakeAppendedArgs: '-G Ninja
|
||||||
-DCMAKE_BUILD_TYPE="Debug"
|
|
||||||
-DOPENSSL_USE_STATIC_LIBS="TRUE"
|
|
||||||
-DOPENSSL_MSVC_STATIC_RT="TRUE"
|
|
||||||
-DCOMPILE_CPU="TRUE"
|
|
||||||
-DCOMPILE_CUDA="${{ matrix.gpu }}"
|
|
||||||
-DCOMPILE_SERVER="FALSE"
|
|
||||||
-DCOMPILE_TESTS="TRUE"
|
|
||||||
-DUSE_FBGEMM="TRUE"
|
|
||||||
-DUSE_MPI="FALSE"
|
|
||||||
-DUSE_NCCL="FALSE"
|
|
||||||
-DUSE_SENTENCEPIECE="TRUE"
|
|
||||||
-DUSE_STATIC_LIBS="TRUE"'
|
|
||||||
cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
|
|
||||||
cmakeListsTxtPath: ${{ github.workspace }}/CMakeLists.txt
|
|
||||||
useVcpkgToolchainFile: true
|
|
||||||
# Building in Debug is sufficient for the all-in CPU+GPU compilation;
|
|
||||||
# its main purpose is to detect warnings that the Release build is not
|
|
||||||
# able to find sometimes.
|
|
||||||
if: matrix.gpu == true
|
|
||||||
|
|
||||||
# Windows CUDA builds use USE_NCCL=off due to compilation errors
|
|
||||||
# Boost is pre-installed on Azure/GitHub-hosted Windows runners
|
|
||||||
# https://github.com/actions/virtual-environments/blob/main/images/win/Windows2019-Readme.md#boost
|
|
||||||
# (not used yet)
|
|
||||||
- name: Build Release
|
|
||||||
uses: lukka/run-cmake@v3
|
|
||||||
with:
|
|
||||||
buildDirectory: ${{ github.workspace }}/build/
|
|
||||||
cmakeAppendedArgs: '-G Ninja
|
|
||||||
-DBOOST_ROOT="$(BOOST_ROOT_1_72_0)"
|
|
||||||
-DBOOST_INCLUDEDIR="$(BOOST_ROOT_1_72_0)/include"
|
|
||||||
-DBOOST_LIBRARYDIR="$(BOOST_ROOT_1_72_0)/lib"
|
|
||||||
-DCMAKE_BUILD_TYPE="Release"
|
-DCMAKE_BUILD_TYPE="Release"
|
||||||
-DOPENSSL_USE_STATIC_LIBS="TRUE"
|
-DUSE_WASM_COMPATIBLE_SOURCE="OFF"
|
||||||
-DOPENSSL_MSVC_STATIC_RT="TRUE"
|
|
||||||
-DCOMPILE_CPU="TRUE"
|
|
||||||
-DCOMPILE_CUDA="${{ matrix.gpu }}"
|
|
||||||
-DCOMPILE_SERVER="FALSE"
|
|
||||||
-DCOMPILE_TESTS="TRUE"
|
|
||||||
-DUSE_FBGEMM="TRUE"
|
|
||||||
-DUSE_MPI="FALSE"
|
|
||||||
-DUSE_NCCL="FALSE"
|
|
||||||
-DUSE_SENTENCEPIECE="TRUE"
|
|
||||||
-DUSE_STATIC_LIBS="TRUE"'
|
-DUSE_STATIC_LIBS="TRUE"'
|
||||||
cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
|
cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
|
||||||
cmakeListsTxtPath: ${{ github.workspace }}/CMakeLists.txt
|
cmakeListsTxtPath: ${{ github.workspace }}/CMakeLists.txt
|
||||||
useVcpkgToolchainFile: true
|
useVcpkgToolchainFile: true
|
||||||
|
cmakeBuildType: Release
|
||||||
|
|
||||||
# Removing unit-tests, taken care of in browsermt/marian-dev
|
|
||||||
# - name: Run unit tests
|
|
||||||
# working-directory: build/
|
|
||||||
# run: ctest
|
|
||||||
# # Not run in GPU builds because GitHub-hosted VMs do not have GPUs
|
|
||||||
# if: matrix.gpu == false
|
|
||||||
|
|
||||||
- name: Print versions
|
- name: Print versions
|
||||||
working-directory: build/
|
working-directory: build
|
||||||
run: |
|
run: |
|
||||||
.\marian.exe --version
|
.\app\service-cli.exe --version
|
||||||
.\marian-decoder.exe --version
|
|
||||||
.\marian-scorer.exe --version
|
|
||||||
dir *.exe
|
dir *.exe
|
||||||
shell: cmd
|
shell: cmd
|
||||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -4,3 +4,6 @@
|
|||||||
[submodule "3rd_party/ssplit-cpp"]
|
[submodule "3rd_party/ssplit-cpp"]
|
||||||
path = 3rd_party/ssplit-cpp
|
path = 3rd_party/ssplit-cpp
|
||||||
url = https://github.com/browsermt/ssplit-cpp
|
url = https://github.com/browsermt/ssplit-cpp
|
||||||
|
[submodule "bergamot-translator-tests"]
|
||||||
|
path = bergamot-translator-tests
|
||||||
|
url = https://github.com/browsermt/bergamot-translator-tests
|
||||||
|
2
3rd_party/marian-dev
vendored
2
3rd_party/marian-dev
vendored
@ -1 +1 @@
|
|||||||
Subproject commit 0f0bcf99626c660227bb68b76267a8d2451e7172
|
Subproject commit 94aeaa4616a0fb01ac95a23f0e74a214a94e7609
|
2
3rd_party/ssplit-cpp
vendored
2
3rd_party/ssplit-cpp
vendored
@ -1 +1 @@
|
|||||||
Subproject commit dfefe34218fe3aced70266994b6557f029fcbdde
|
Subproject commit 8d338ed5c77d22f8c86f60554596fa57bf5091e6
|
@ -9,6 +9,28 @@ project(bergamot_translator CXX C)
|
|||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
|
# Note that with CMake MSVC build, the option CMAKE_BUILD_TYPE is automatically derived from the key
|
||||||
|
# 'configurationType' in CMakeSettings.json configurations
|
||||||
|
if(NOT CMAKE_BUILD_TYPE)
|
||||||
|
message(WARNING "CMAKE_BUILD_TYPE not set; setting to Release")
|
||||||
|
set(CMAKE_BUILD_TYPE "Release")
|
||||||
|
endif()
|
||||||
|
#MSVC can't seem to pick up correct flags otherwise:
|
||||||
|
if(MSVC)
|
||||||
|
add_definitions(-DUSE_SSE2=1) # Supposed to fix something in the sse_mathfun.h but not sure it does
|
||||||
|
set(INTRINSICS "/arch:AVX2") # ARCH we're targetting on win32. @TODO variable
|
||||||
|
|
||||||
|
set(CMAKE_CXX_FLAGS "/EHsc /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS /bigobj")
|
||||||
|
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MT /O2 ${INTRINSICS} /Zi /MP /GL /DNDEBUG")
|
||||||
|
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MTd /Od /Ob0 ${INTRINSICS} /RTC1 /Zi /D_DEBUG")
|
||||||
|
|
||||||
|
# ignores warning LNK4049: locally defined symbol free imported - this comes from zlib
|
||||||
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /LTCG:incremental /INCREMENTAL:NO /ignore:4049")
|
||||||
|
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRT")
|
||||||
|
set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRTD")
|
||||||
|
set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /LTCG:incremental")
|
||||||
|
endif(MSVC)
|
||||||
|
|
||||||
include(CMakeDependentOption)
|
include(CMakeDependentOption)
|
||||||
|
|
||||||
# Project specific cmake options
|
# Project specific cmake options
|
||||||
@ -22,11 +44,12 @@ SET(PACKAGE_DIR "" CACHE STRING "Directory including all the files to be package
|
|||||||
SET(COMPILE_CUDA OFF CACHE BOOL "Compile GPU version")
|
SET(COMPILE_CUDA OFF CACHE BOOL "Compile GPU version")
|
||||||
SET(USE_SENTENCEPIECE ON CACHE BOOL "Download and compile SentencePiece")
|
SET(USE_SENTENCEPIECE ON CACHE BOOL "Download and compile SentencePiece")
|
||||||
SET(USE_STATIC_LIBS ON CACHE BOOL "Link statically against non-system libs")
|
SET(USE_STATIC_LIBS ON CACHE BOOL "Link statically against non-system libs")
|
||||||
|
SET(SSPLIT_COMPILE_LIBRARY_ONLY ON CACHE BOOL "Do not compile ssplit tests")
|
||||||
if (USE_WASM_COMPATIBLE_SOURCE)
|
if (USE_WASM_COMPATIBLE_SOURCE)
|
||||||
SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.")
|
SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.")
|
||||||
SET(USE_MKL OFF CACHE BOOL "Compile with MKL support")
|
SET(USE_MKL OFF CACHE BOOL "Compile with MKL support")
|
||||||
# # Setting the ssplit-cpp submodule specific cmake options for wasm
|
# # Setting the ssplit-cpp submodule specific cmake options for wasm
|
||||||
SET(USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2")
|
SET(SSPLIT_USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Documentation: https://cliutils.gitlab.io/modern-cmake/chapters/projects/submodule.html
|
# Documentation: https://cliutils.gitlab.io/modern-cmake/chapters/projects/submodule.html
|
||||||
|
21
README.md
21
README.md
@ -38,19 +38,18 @@ Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt.
|
|||||||
cd bergamot-translator
|
cd bergamot-translator
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Download files (only required if you want to package files in wasm binary)
|
3. Download files (only required if you want to perform inference using build artifacts)
|
||||||
|
|
||||||
This step is only required if you want to package files (e.g. models, vocabularies etc.)
|
It packages the vocabulary files into wasm binary, which is required only if you want to perform inference.
|
||||||
into wasm binary. If you don't then just skip this step.
|
The compilation commands will preload these files in Emscripten’s virtual file system.
|
||||||
|
|
||||||
The build preloads the files in Emscripten’s virtual file system.
|
If you want to package bergamot project specific files, please follow these instructions:
|
||||||
|
|
||||||
If you want to package bergamot project specific models, please follow these instructions:
|
|
||||||
```bash
|
```bash
|
||||||
mkdir models
|
|
||||||
git clone --depth 1 --branch main --single-branch https://github.com/mozilla-applied-ml/bergamot-models
|
git clone --depth 1 --branch main --single-branch https://github.com/mozilla-applied-ml/bergamot-models
|
||||||
|
mkdir models
|
||||||
cp -rf bergamot-models/prod/* models
|
cp -rf bergamot-models/prod/* models
|
||||||
gunzip models/*/*
|
gunzip models/*/*
|
||||||
|
find models \( -type f -name "model*" -or -type f -name "lex*" \) -delete
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Compile
|
4. Compile
|
||||||
@ -61,14 +60,14 @@ Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt.
|
|||||||
```
|
```
|
||||||
|
|
||||||
2. Compile the artefacts
|
2. Compile the artefacts
|
||||||
* If you want to package files into wasm binary then execute following commands (Replace `FILES_TO_PACKAGE` with the path of the
|
* If you want to package files into wasm binary then execute following commands (Replace `FILES_TO_PACKAGE` with the
|
||||||
directory containing the files to be packaged in wasm binary)
|
directory containing all the files to be packaged)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
emcmake cmake -DCOMPILE_WASM=on -DPACKAGE_DIR=FILES_TO_PACKAGE ../
|
emcmake cmake -DCOMPILE_WASM=on -DPACKAGE_DIR=FILES_TO_PACKAGE ../
|
||||||
emmake make -j
|
emmake make -j
|
||||||
```
|
```
|
||||||
e.g. If you want to package bergamot project specific models (downloaded using step 3 above) then
|
e.g. If you want to package bergamot project specific files (downloaded using step 3 above) then
|
||||||
replace `FILES_TO_PACKAGE` with `../models`
|
replace `FILES_TO_PACKAGE` with `../models`
|
||||||
|
|
||||||
* If you don't want to package any file into wasm binary then execute following commands:
|
* If you don't want to package any file into wasm binary then execute following commands:
|
||||||
@ -77,7 +76,7 @@ Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt.
|
|||||||
emmake make -j
|
emmake make -j
|
||||||
```
|
```
|
||||||
|
|
||||||
The wasm artifacts (.js and .wasm files) will be available in `wasm` folder of build directory ("build-wasm" in this case).
|
The wasm artifacts (.js and .wasm files) will be available in the build directory ("build-wasm" in this case).
|
||||||
|
|
||||||
3. Enable SIMD Wormhole via Wasm instantiation API in generated artifacts
|
3. Enable SIMD Wormhole via Wasm instantiation API in generated artifacts
|
||||||
```bash
|
```bash
|
||||||
|
@ -7,9 +7,9 @@
|
|||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "TranslationModel.h"
|
|
||||||
#include "translator/parser.h"
|
|
||||||
#include "translator/byte_array_util.h"
|
#include "translator/byte_array_util.h"
|
||||||
|
#include "translator/parser.h"
|
||||||
|
#include "translator/service.h"
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
|
|
||||||
@ -20,19 +20,17 @@ int main(int argc, char **argv) {
|
|||||||
std::string config = options->asYamlString();
|
std::string config = options->asYamlString();
|
||||||
|
|
||||||
// Route the config string to construct marian model through TranslationModel
|
// Route the config string to construct marian model through TranslationModel
|
||||||
TranslationModel model(config, marian::bergamot::getModelMemoryFromConfig(options));
|
marian::bergamot::Service model(
|
||||||
|
config, marian::bergamot::getModelMemoryFromConfig(options));
|
||||||
|
|
||||||
TranslationRequest translationRequest;
|
TranslationRequest translationRequest;
|
||||||
std::vector<std::string> texts;
|
std::vector<std::string> texts;
|
||||||
|
|
||||||
for (std::string line; std::getline(std::cin, line);) {
|
for (std::string line; std::getline(std::cin, line);) {
|
||||||
texts.emplace_back(line);
|
texts.emplace_back(line);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto results = model.translate(std::move(texts), translationRequest);
|
auto results = model.translateMultiple(std::move(texts), translationRequest);
|
||||||
|
|
||||||
// Resolve the future and get the actual result
|
|
||||||
//std::vector<TranslationResult> results = futureResults.get();
|
|
||||||
|
|
||||||
for (auto &result : results) {
|
for (auto &result : results) {
|
||||||
std::cout << result.getTranslatedText() << std::endl;
|
std::cout << result.getTranslatedText() << std::endl;
|
||||||
|
@ -1,16 +1,17 @@
|
|||||||
/*
|
/*
|
||||||
* main.cpp
|
* main.cpp
|
||||||
*
|
*
|
||||||
* An application which accepts line separated texts in stdin and returns translated ones in stdout.
|
* An application which accepts line separated texts in stdin and returns
|
||||||
* It is convenient for batch processing and can be used with tools like SacreBLEU.
|
* translated ones in stdout. It is convenient for batch processing and can be
|
||||||
|
* used with tools like SacreBLEU.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "TranslationModel.h"
|
|
||||||
#include "translator/parser.h"
|
#include "translator/parser.h"
|
||||||
|
#include "translator/service.h"
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
|
|
||||||
@ -21,19 +22,16 @@ int main(int argc, char **argv) {
|
|||||||
std::string config = options->asYamlString();
|
std::string config = options->asYamlString();
|
||||||
|
|
||||||
// Route the config string to construct marian model through TranslationModel
|
// Route the config string to construct marian model through TranslationModel
|
||||||
TranslationModel model(config);
|
marian::bergamot::Service model(config);
|
||||||
|
|
||||||
TranslationRequest translationRequest;
|
TranslationRequest translationRequest;
|
||||||
std::vector<std::string> texts;
|
std::vector<std::string> texts;
|
||||||
|
|
||||||
for (std::string line; std::getline(std::cin, line);) {
|
for (std::string line; std::getline(std::cin, line);) {
|
||||||
texts.emplace_back(line);
|
texts.emplace_back(line);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto results = model.translate(std::move(texts), translationRequest);
|
auto results = model.translateMultiple(std::move(texts), translationRequest);
|
||||||
|
|
||||||
// Resolve the future and get the actual result
|
|
||||||
//std::vector<TranslationResult> results = futureResults.get();
|
|
||||||
|
|
||||||
for (auto &result : results) {
|
for (auto &result : results) {
|
||||||
std::cout << result.getTranslatedText() << std::endl;
|
std::cout << result.getTranslatedText() << std::endl;
|
||||||
|
@ -27,8 +27,14 @@ int main(int argc, char *argv[]) {
|
|||||||
std::string input = std_input.str();
|
std::string input = std_input.str();
|
||||||
using marian::bergamot::Response;
|
using marian::bergamot::Response;
|
||||||
|
|
||||||
|
marian::bergamot::ResponseOptions responseOptions;
|
||||||
|
responseOptions.qualityScores = true;
|
||||||
|
responseOptions.alignment = true;
|
||||||
|
responseOptions.alignmentThreshold = 0.2f;
|
||||||
|
|
||||||
// Wait on future until Response is complete
|
// Wait on future until Response is complete
|
||||||
std::future<Response> responseFuture = service.translate(std::move(input));
|
std::future<Response> responseFuture =
|
||||||
|
service.translate(std::move(input), responseOptions);
|
||||||
responseFuture.wait();
|
responseFuture.wait();
|
||||||
Response response = responseFuture.get();
|
Response response = responseFuture.get();
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include "marian.h"
|
#include "marian.h"
|
||||||
#include "translator/parser.h"
|
#include "translator/parser.h"
|
||||||
#include "translator/response.h"
|
#include "translator/response.h"
|
||||||
|
#include "translator/response_options.h"
|
||||||
#include "translator/service.h"
|
#include "translator/service.h"
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
@ -21,8 +22,14 @@ int main(int argc, char *argv[]) {
|
|||||||
std::string input = std_input.str();
|
std::string input = std_input.str();
|
||||||
using marian::bergamot::Response;
|
using marian::bergamot::Response;
|
||||||
|
|
||||||
|
marian::bergamot::ResponseOptions responseOptions;
|
||||||
|
responseOptions.qualityScores = true;
|
||||||
|
responseOptions.alignment = true;
|
||||||
|
responseOptions.alignmentThreshold = 0.2f;
|
||||||
|
|
||||||
// Wait on future until Response is complete
|
// Wait on future until Response is complete
|
||||||
std::future<Response> responseFuture = service.translate(std::move(input));
|
std::future<Response> responseFuture =
|
||||||
|
service.translate(std::move(input), responseOptions);
|
||||||
responseFuture.wait();
|
responseFuture.wait();
|
||||||
Response response = responseFuture.get();
|
Response response = responseFuture.get();
|
||||||
|
|
||||||
|
1
bergamot-translator-tests
Submodule
1
bergamot-translator-tests
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 3771001720a8f01bba185ee5d5d908b7c266ef31
|
@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include "translator/definitions.h"
|
||||||
|
|
||||||
/* All possible Granularities for which Quality Scores can be returned for
|
/* All possible Granularities for which Quality Scores can be returned for
|
||||||
* translated text. */
|
* translated text. */
|
||||||
|
@ -1,80 +0,0 @@
|
|||||||
/*
|
|
||||||
* TranslationModel.h
|
|
||||||
*
|
|
||||||
* Main interface for translation API.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef SRC_TRANSLATOR_TRANSLATIONMODEL_H_
|
|
||||||
#define SRC_TRANSLATOR_TRANSLATIONMODEL_H_
|
|
||||||
|
|
||||||
#include <future>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
// All 3rd party includes
|
|
||||||
#include "3rd_party/marian-dev/src/common/options.h"
|
|
||||||
|
|
||||||
// All local project includes
|
|
||||||
#include "TranslationRequest.h"
|
|
||||||
#include "TranslationResult.h"
|
|
||||||
#include "translator/definitions.h"
|
|
||||||
#include "translator/service.h"
|
|
||||||
|
|
||||||
/* A Translation model that translates a plain (without any markups and emojis)
|
|
||||||
* UTF-8 encoded text. This implementation supports translation from 1 source
|
|
||||||
* language to 1 target language.
|
|
||||||
*/
|
|
||||||
class TranslationModel {
|
|
||||||
public:
|
|
||||||
/* Construct the model using the model configuration options as yaml-formatted
|
|
||||||
* string
|
|
||||||
*/
|
|
||||||
/**
|
|
||||||
* @param config Marian yml config file in the form of a string
|
|
||||||
* @param model_memory optional byte array (aligned to 64!!!) that contains
|
|
||||||
* the bytes of a model.bin.
|
|
||||||
*/
|
|
||||||
TranslationModel(const std::string &config,
|
|
||||||
marian::bergamot::AlignedMemory modelMemory = marian::bergamot::AlignedMemory(),
|
|
||||||
marian::bergamot::AlignedMemory shortlistMemory = marian::bergamot::AlignedMemory());
|
|
||||||
|
|
||||||
~TranslationModel();
|
|
||||||
|
|
||||||
/* This method performs translation on a list of UTF-8 encoded plain text
|
|
||||||
* (without any markups or emojis) and returns a list of results in the same
|
|
||||||
* order. The model supports translation from 1 source language to 1 target
|
|
||||||
* language.
|
|
||||||
*
|
|
||||||
* Each text entry can either be a word, a phrase, a sentence or a list of
|
|
||||||
* sentences. Additional information related to the translated text can be
|
|
||||||
* requested via TranslationRequest which is applied equally to each text
|
|
||||||
* entry. The translated text corresponding to each text entry and the
|
|
||||||
* additional information (as specified in the TranslationRequest) is
|
|
||||||
* encapsulated and returned in TranslationResult.
|
|
||||||
*
|
|
||||||
* The API splits each text entry into sentences internally, which are then
|
|
||||||
* translated independent of each other. The translated sentences are then
|
|
||||||
* joined back together and returned in TranslationResult.
|
|
||||||
*
|
|
||||||
* Please refer to the TranslationRequest class to find out what additional
|
|
||||||
* information can be requested. The alignment information can only be
|
|
||||||
* requested if the model supports it (check isAlignmentSupported() API).
|
|
||||||
*
|
|
||||||
* The texts argument will become empty after the execution of this API (each
|
|
||||||
* entry of texts list will be moved to its corresponding TranslationResult
|
|
||||||
* object).
|
|
||||||
*/
|
|
||||||
std::vector<TranslationResult> translate(std::vector<std::string> &&texts,
|
|
||||||
TranslationRequest request);
|
|
||||||
|
|
||||||
/* Check if the model can provide alignment information b/w original and
|
|
||||||
* translated text. */
|
|
||||||
bool isAlignmentSupported() const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
// Model configuration options
|
|
||||||
std::shared_ptr<marian::Options> configOptions_; // ORDER DEPENDECNY
|
|
||||||
marian::bergamot::Service service_; // ORDER DEPENDENCY
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* SRC_TRANSLATOR_TRANSLATIONMODEL_H_ */
|
|
@ -1,108 +0,0 @@
|
|||||||
/*
|
|
||||||
* TranslationResult.h
|
|
||||||
*
|
|
||||||
* The class that represents the result of TranslationModel::translate()
|
|
||||||
* API for each of its text entry and TranslationRequest.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef SRC_TRANSLATOR_TRANSLATIONRESULT_H_
|
|
||||||
#define SRC_TRANSLATOR_TRANSLATIONRESULT_H_
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "QualityScore.h"
|
|
||||||
|
|
||||||
/* This class represents the result of TranslationModel::translate() API
|
|
||||||
* for each of its text entry and TranslationRequest.
|
|
||||||
*/
|
|
||||||
class TranslationResult {
|
|
||||||
public:
|
|
||||||
typedef std::vector<std::pair<std::string_view, std::string_view>>
|
|
||||||
SentenceMappings;
|
|
||||||
#ifdef WASM_BINDINGS
|
|
||||||
TranslationResult(const std::string &original, const std::string &translation)
|
|
||||||
: originalText(original), translatedText(translation),
|
|
||||||
sentenceMappings() {}
|
|
||||||
#endif
|
|
||||||
TranslationResult(const std::string &original, const std::string &translation,
|
|
||||||
SentenceMappings &sentenceMappings)
|
|
||||||
: originalText(original), translatedText(translation),
|
|
||||||
sentenceMappings(sentenceMappings) {}
|
|
||||||
|
|
||||||
TranslationResult(TranslationResult &&other)
|
|
||||||
: originalText(std::move(other.originalText)),
|
|
||||||
translatedText(std::move(other.translatedText)),
|
|
||||||
sentenceMappings(std::move(other.sentenceMappings)) {}
|
|
||||||
|
|
||||||
#ifdef WASM_BINDINGS
|
|
||||||
TranslationResult(const TranslationResult &other)
|
|
||||||
: originalText(other.originalText),
|
|
||||||
translatedText(other.translatedText),
|
|
||||||
sentenceMappings(other.sentenceMappings) {}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
TranslationResult(std::string &&original, std::string &&translation,
|
|
||||||
SentenceMappings &&sentenceMappings)
|
|
||||||
: originalText(std::move(original)),
|
|
||||||
translatedText(std::move(translation)),
|
|
||||||
sentenceMappings(std::move(sentenceMappings)) {}
|
|
||||||
|
|
||||||
#ifndef WASM_BINDINGS
|
|
||||||
TranslationResult &operator=(const TranslationResult &) = delete;
|
|
||||||
#else
|
|
||||||
TranslationResult &operator=(const TranslationResult &result) {
|
|
||||||
originalText = result.originalText;
|
|
||||||
translatedText = result.translatedText;
|
|
||||||
sentenceMappings = result.sentenceMappings;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Return the original text. */
|
|
||||||
const std::string &getOriginalText() const { return originalText; }
|
|
||||||
|
|
||||||
/* Return the translated text. */
|
|
||||||
const std::string &getTranslatedText() const { return translatedText; }
|
|
||||||
|
|
||||||
/* Return the Quality scores of the translated text. */
|
|
||||||
const QualityScore &getQualityScore() const { return qualityScore; }
|
|
||||||
|
|
||||||
/* Return the Sentence mappings (information regarding how individual
|
|
||||||
* sentences of originalText map to corresponding translated sentences in
|
|
||||||
* translatedText).
|
|
||||||
*/
|
|
||||||
const SentenceMappings &getSentenceMappings() const {
|
|
||||||
return sentenceMappings;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
// Original text (in UTF-8 encoded format) that was supposed to be translated
|
|
||||||
std::string originalText;
|
|
||||||
|
|
||||||
// Translation (in UTF-8 encoded format) of the originalText
|
|
||||||
std::string translatedText;
|
|
||||||
|
|
||||||
// Quality score of the translated text at the granularity specified in
|
|
||||||
// TranslationRequest. It is an optional result (it will have no information
|
|
||||||
// if not requested in TranslationRequest)
|
|
||||||
QualityScore qualityScore;
|
|
||||||
|
|
||||||
// Information regarding how individual sentences of originalText map to
|
|
||||||
// corresponding translated sentences in joined translated text
|
|
||||||
// (translatedText) An example of sentence mapping:
|
|
||||||
// originalText (contains 2 sentences) = "What is your name?
|
|
||||||
// My name is Abc." translatedText (contains 2 translated sentences) =
|
|
||||||
// "Was ist dein Name? Mein Name ist Abc." sentenceMappings = [
|
|
||||||
// {"What is your name?", "Was ist dein Name?"}, //
|
|
||||||
// Pair(originalText[0],translatedText[0])
|
|
||||||
// {"My name is Abc", "Mein Name ist Abc."} //
|
|
||||||
// Pair(originalText[1],translatedText[1])
|
|
||||||
// ]
|
|
||||||
//
|
|
||||||
// It is an optional result (it will be empty if not requested in
|
|
||||||
// TranslationRequest).
|
|
||||||
SentenceMappings sentenceMappings;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* SRC_TRANSLATOR_TRANSLATIONRESULT_H_ */
|
|
@ -1,13 +1,11 @@
|
|||||||
add_library(bergamot-translator STATIC
|
add_library(bergamot-translator STATIC
|
||||||
TranslationModel.cpp
|
|
||||||
byte_array_util.cpp
|
byte_array_util.cpp
|
||||||
text_processor.cpp
|
text_processor.cpp
|
||||||
sentence_splitter.cpp
|
sentence_splitter.cpp
|
||||||
batch_translator.cpp
|
batch_translator.cpp
|
||||||
multifactor_priority.cpp
|
|
||||||
request.cpp
|
request.cpp
|
||||||
batcher.cpp
|
batcher.cpp
|
||||||
response.cpp
|
response_builder.cpp
|
||||||
batch.cpp
|
batch.cpp
|
||||||
sentence_ranges.cpp
|
sentence_ranges.cpp
|
||||||
service.cpp
|
service.cpp
|
||||||
@ -29,5 +27,5 @@ endif(COMPILE_WASM)
|
|||||||
target_link_libraries(bergamot-translator marian ssplit)
|
target_link_libraries(bergamot-translator marian ssplit)
|
||||||
|
|
||||||
target_include_directories(bergamot-translator
|
target_include_directories(bergamot-translator
|
||||||
PUBLIC ${CMAKE_SOURCE_DIR}
|
PUBLIC ${PROJECT_SOURCE_DIR}
|
||||||
PUBLIC ${CMAKE_SOURCE_DIR}/src)
|
${PROJECT_SOURCE_DIR}/src)
|
||||||
|
@ -1,55 +0,0 @@
|
|||||||
/*
|
|
||||||
* TranslationModel.cpp
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <future>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
// All local project includes
|
|
||||||
#include "TranslationModel.h"
|
|
||||||
#include "translator/parser.h"
|
|
||||||
#include "translator/service.h"
|
|
||||||
|
|
||||||
TranslationModel::TranslationModel(const std::string &config,
|
|
||||||
marian::bergamot::AlignedMemory model_memory,
|
|
||||||
marian::bergamot::AlignedMemory lexical_memory)
|
|
||||||
: service_(config, std::move(model_memory), std::move(lexical_memory)) {}
|
|
||||||
|
|
||||||
TranslationModel::~TranslationModel() {}
|
|
||||||
|
|
||||||
std::vector<TranslationResult>
|
|
||||||
TranslationModel::translate(std::vector<std::string> &&texts,
|
|
||||||
TranslationRequest request) {
|
|
||||||
// Implementing a non-async version first. Unpleasant, but should work.
|
|
||||||
std::promise<std::vector<TranslationResult>> promise;
|
|
||||||
auto future = promise.get_future();
|
|
||||||
|
|
||||||
// This code, move into async?
|
|
||||||
std::vector<TranslationResult> translationResults;
|
|
||||||
for (auto &text : texts) {
|
|
||||||
// Collect future as marian::bergamot::TranslationResult
|
|
||||||
auto intermediate = service_.translate(std::move(text));
|
|
||||||
intermediate.wait();
|
|
||||||
auto marianResponse(std::move(intermediate.get()));
|
|
||||||
|
|
||||||
TranslationResult::SentenceMappings sentenceMappings;
|
|
||||||
for (size_t idx = 0; idx < marianResponse.size(); idx++) {
|
|
||||||
marian::string_view src = marianResponse.source.sentence(idx);
|
|
||||||
marian::string_view tgt = marianResponse.target.sentence(idx);
|
|
||||||
sentenceMappings.emplace_back(std::string_view(src.data(), src.size()),
|
|
||||||
std::string_view(tgt.data(), tgt.size()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// In place construction.
|
|
||||||
translationResults.emplace_back(
|
|
||||||
std::move(marianResponse.source.text), // &&marianResponse.source_
|
|
||||||
std::move(marianResponse.target.text), // &&marianResponse.translation_
|
|
||||||
std::move(sentenceMappings) // &&sentenceMappings
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return translationResults;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool TranslationModel::isAlignmentSupported() const { return false; }
|
|
@ -63,11 +63,14 @@ void BatchTranslator::translate(Batch &batch) {
|
|||||||
std::vector<data::SentenceTuple> batchVector;
|
std::vector<data::SentenceTuple> batchVector;
|
||||||
|
|
||||||
auto &sentences = batch.sentences();
|
auto &sentences = batch.sentences();
|
||||||
|
size_t batchSequenceNumber{0};
|
||||||
for (auto &sentence : sentences) {
|
for (auto &sentence : sentences) {
|
||||||
data::SentenceTuple sentence_tuple(sentence.lineNumber());
|
data::SentenceTuple sentence_tuple(batchSequenceNumber);
|
||||||
Segment segment = sentence.getUnderlyingSegment();
|
Segment segment = sentence.getUnderlyingSegment();
|
||||||
sentence_tuple.push_back(segment);
|
sentence_tuple.push_back(segment);
|
||||||
batchVector.push_back(sentence_tuple);
|
batchVector.push_back(sentence_tuple);
|
||||||
|
|
||||||
|
++batchSequenceNumber;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t batchSize = batchVector.size();
|
size_t batchSize = batchVector.size();
|
||||||
|
@ -22,10 +22,27 @@ template <class T, typename... Args> UPtr<T> UNew(Args &&... args) {
|
|||||||
|
|
||||||
template <class T> UPtr<T> UNew(UPtr<T> p) { return UPtr<T>(p); }
|
template <class T> UPtr<T> UNew(UPtr<T> p) { return UPtr<T>(p); }
|
||||||
|
|
||||||
/// Shortcut to AlignedVector<const void*> for byte arrays
|
/// Shortcut to AlignedVector<char> for byte arrays
|
||||||
typedef AlignedVector<const void*> AlignedMemory;
|
typedef AlignedVector<char> AlignedMemory;
|
||||||
|
|
||||||
} // namespace bergamot
|
} // namespace bergamot
|
||||||
} // namespace marian
|
} // namespace marian
|
||||||
|
|
||||||
|
// @TODO at the moment the usage of string_view in this repository is a hot mess and a disaster waiting to happen.
|
||||||
|
// ssplit uses std::string_view if the compiler supports c++17, else falls back to c++11 and absl::string_view
|
||||||
|
// bergamot-translator uses, depending on the source file std::string_view (which will break if ssplit-cpp uses
|
||||||
|
// absl::string_view) and marian::string_view which is an export of (confusingly) the sentencepiece module that
|
||||||
|
// marian has. marian::string_view is our addition to the marian fork, which will make merging even nicer. Not.
|
||||||
|
// This is just an ugly patchwork that allos gcc5, our lowest targetted gcc to run. We don't actually try to run
|
||||||
|
// on older compilers.
|
||||||
|
|
||||||
|
#if defined(__GNUC__) && __GNUC__ < 6 && !defined(__clang__)
|
||||||
|
#include <experimental/string_view>
|
||||||
|
namespace std {
|
||||||
|
using string_view = std::experimental::string_view;
|
||||||
|
} // namespace std
|
||||||
|
#else
|
||||||
|
#include <string_view>
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // SRC_BERGAMOT_DEFINITIONS_H_
|
#endif // SRC_BERGAMOT_DEFINITIONS_H_
|
||||||
|
@ -1,7 +0,0 @@
|
|||||||
#include "multifactor_priority.h"
|
|
||||||
|
|
||||||
namespace marian {
|
|
||||||
namespace bergamot {
|
|
||||||
|
|
||||||
} // namespace bergamot
|
|
||||||
} // namespace marian
|
|
@ -1,20 +0,0 @@
|
|||||||
#ifndef SRC_BERGAMOT_MULTIFACTOR_PRIORITY_H_
|
|
||||||
#define SRC_BERGAMOT_MULTIFACTOR_PRIORITY_H_
|
|
||||||
|
|
||||||
#include "data/types.h"
|
|
||||||
#include "definitions.h"
|
|
||||||
#include "sys/time.h"
|
|
||||||
|
|
||||||
namespace marian {
|
|
||||||
namespace bergamot {
|
|
||||||
|
|
||||||
struct MultiFactorPriority {
|
|
||||||
int nice; /* user configurable priority, at a request */
|
|
||||||
unsigned int Id;
|
|
||||||
/* What else should priority depend on? */
|
|
||||||
double priority() { return Id; }
|
|
||||||
};
|
|
||||||
} // namespace bergamot
|
|
||||||
} // namespace marian
|
|
||||||
|
|
||||||
#endif // SRC_BERGAMOT_MULTIFACTOR_PRIORITY_H_
|
|
@ -31,7 +31,7 @@ inline marian::ConfigParser createConfigParser() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline std::shared_ptr<marian::Options>
|
inline std::shared_ptr<marian::Options>
|
||||||
parseOptions(const std::string &config) {
|
parseOptions(const std::string &config, bool validate = true) {
|
||||||
marian::Options options;
|
marian::Options options;
|
||||||
|
|
||||||
// @TODO(jerinphilip) There's something off here, @XapaJIaMnu suggests
|
// @TODO(jerinphilip) There's something off here, @XapaJIaMnu suggests
|
||||||
@ -58,8 +58,11 @@ parseOptions(const std::string &config) {
|
|||||||
options.parse(config);
|
options.parse(config);
|
||||||
YAML::Node configCopy = options.cloneToYamlNode();
|
YAML::Node configCopy = options.cloneToYamlNode();
|
||||||
|
|
||||||
marian::ConfigValidator validator(configCopy);
|
if (validate) {
|
||||||
validator.validateOptions(marian::cli::mode::translation);
|
// Perform validation on parsed options only when requested
|
||||||
|
marian::ConfigValidator validator(configCopy);
|
||||||
|
validator.validateOptions(marian::cli::mode::translation);
|
||||||
|
}
|
||||||
|
|
||||||
return std::make_shared<marian::Options>(options);
|
return std::make_shared<marian::Options>(options);
|
||||||
}
|
}
|
||||||
|
@ -10,12 +10,14 @@
|
|||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
#include <mach/mach.h>
|
|
||||||
#include <mach/mach_traps.h>
|
|
||||||
#include <mach/semaphore.h>
|
#include <mach/semaphore.h>
|
||||||
#include <mach/task.h>
|
#include <mach/task.h>
|
||||||
|
#include <mach/mach_traps.h>
|
||||||
|
#include <mach/mach.h>
|
||||||
#elif defined(__linux)
|
#elif defined(__linux)
|
||||||
#include <semaphore.h>
|
#include <semaphore.h>
|
||||||
|
#elif defined(_WIN32) || defined(_WIN64)
|
||||||
|
#include <windows.h>
|
||||||
#else
|
#else
|
||||||
#include <boost/interprocess/sync/interprocess_semaphore.hpp>
|
#include <boost/interprocess/sync/interprocess_semaphore.hpp>
|
||||||
#endif
|
#endif
|
||||||
@ -35,67 +37,107 @@ namespace bergamot {
|
|||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
|
|
||||||
class Semaphore {
|
class Semaphore {
|
||||||
public:
|
public:
|
||||||
explicit Semaphore(int value) : task_(mach_task_self()) {
|
explicit Semaphore(int value) : task_(mach_task_self()) {
|
||||||
ABORT_IF(KERN_SUCCESS !=
|
ABORT_IF(KERN_SUCCESS != semaphore_create(task_, &back_, SYNC_POLICY_FIFO, value), "Could not create semaphore");
|
||||||
semaphore_create(task_, &back_, SYNC_POLICY_FIFO, value),
|
|
||||||
"Could not create semaphore");
|
|
||||||
}
|
|
||||||
|
|
||||||
~Semaphore() {
|
|
||||||
if (KERN_SUCCESS != semaphore_destroy(task_, back_)) {
|
|
||||||
std::cerr << "Could not destroy semaphore" << std::endl;
|
|
||||||
abort();
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void wait() {
|
~Semaphore() {
|
||||||
ABORT_IF(KERN_SUCCESS != semaphore_wait(back_),
|
if (KERN_SUCCESS != semaphore_destroy(task_, back_)) {
|
||||||
"Wait for semaphore failed");
|
std::cerr << "Could not destroy semaphore" << std::endl;
|
||||||
}
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void post() {
|
void wait() {
|
||||||
ABORT_IF(KERN_SUCCESS != semaphore_signal(back_),
|
ABORT_IF(KERN_SUCCESS != semaphore_wait(back_), "Wait for semaphore failed");
|
||||||
"Could not post to semaphore");
|
}
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
void post() {
|
||||||
semaphore_t back_;
|
ABORT_IF(KERN_SUCCESS != semaphore_signal(back_), "Could not post to semaphore");
|
||||||
task_t task_;
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
semaphore_t back_;
|
||||||
|
task_t task_;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline void WaitSemaphore(Semaphore &semaphore) { semaphore.wait(); }
|
inline void WaitSemaphore(Semaphore &semaphore) {
|
||||||
|
semaphore.wait();
|
||||||
|
}
|
||||||
|
|
||||||
#elif defined(__linux)
|
#elif defined(__linux)
|
||||||
|
|
||||||
class Semaphore {
|
class Semaphore {
|
||||||
public:
|
public:
|
||||||
explicit Semaphore(unsigned int value) {
|
explicit Semaphore(unsigned int value) {
|
||||||
ABORT_IF(sem_init(&sem_, 0, value), "Could not create semaphore");
|
ABORT_IF(sem_init(&sem_, 0, value), "Could not create semaphore");
|
||||||
}
|
|
||||||
|
|
||||||
~Semaphore() {
|
|
||||||
if (-1 == sem_destroy(&sem_)) {
|
|
||||||
std::cerr << "Could not destroy semaphore " << std::endl;
|
|
||||||
abort();
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void wait() {
|
~Semaphore() {
|
||||||
while (UTIL_UNLIKELY(-1 == sem_wait(&sem_))) {
|
if (-1 == sem_destroy(&sem_)) {
|
||||||
ABORT_IF(errno != EINTR, "Wait for semaphore failed");
|
std::cerr << "Could not destroy semaphore" << std::endl;
|
||||||
|
abort();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void post() {
|
void wait() {
|
||||||
ABORT_IF(-1 == sem_post(&sem_), "Could not post to semaphore");
|
while (-1 == sem_wait(&sem_)) {
|
||||||
}
|
ABORT_IF(errno != EINTR, "Wait for semaphore failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
void post() {
|
||||||
sem_t sem_;
|
ABORT_IF(-1 == sem_post(&sem_), "Could not post to semaphore");
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
sem_t sem_;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline void WaitSemaphore(Semaphore &semaphore) { semaphore.wait(); }
|
inline void WaitSemaphore(Semaphore &semaphore) {
|
||||||
|
semaphore.wait();
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(_WIN32) || defined(_WIN64)
|
||||||
|
|
||||||
|
class Semaphore {
|
||||||
|
public:
|
||||||
|
explicit Semaphore(LONG value) : sem_(CreateSemaphoreA(NULL, value, 2147483647, NULL)) {
|
||||||
|
ABORT_IF(!sem_, "Could not CreateSemaphore {}", GetLastError());
|
||||||
|
}
|
||||||
|
|
||||||
|
~Semaphore() {
|
||||||
|
CloseHandle(sem_);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void wait() {
|
||||||
|
while (true) {
|
||||||
|
switch (WaitForSingleObject(sem_, 0L)) {
|
||||||
|
case WAIT_OBJECT_0:
|
||||||
|
return;
|
||||||
|
case WAIT_ABANDONED:
|
||||||
|
ABORT("A semaphore can't be abandoned, confused by Windows");
|
||||||
|
case WAIT_TIMEOUT:
|
||||||
|
continue;
|
||||||
|
case WAIT_FAILED:
|
||||||
|
ABORT("Waiting on Semaphore failed {}", GetLastError());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void post() {
|
||||||
|
ABORT_IF(!ReleaseSemaphore(sem_, 1, NULL), "Failed to release Semaphore {}", GetLastError());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
HANDLE sem_;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline void WaitSemaphore(Semaphore &semaphore) {
|
||||||
|
semaphore.wait();
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
typedef boost::interprocess::interprocess_semaphore Semaphore;
|
typedef boost::interprocess::interprocess_semaphore Semaphore;
|
||||||
@ -113,7 +155,7 @@ inline void WaitSemaphore(Semaphore &on) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // Apple
|
#endif // Cases for semaphore support
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Producer consumer queue safe for multiple producers and multiple consumers.
|
* Producer consumer queue safe for multiple producers and multiple consumers.
|
||||||
@ -124,11 +166,13 @@ inline void WaitSemaphore(Semaphore &on) {
|
|||||||
* throw.
|
* throw.
|
||||||
*/
|
*/
|
||||||
template <class T> class PCQueue {
|
template <class T> class PCQueue {
|
||||||
public:
|
public:
|
||||||
explicit PCQueue(size_t size)
|
explicit PCQueue(size_t size)
|
||||||
: empty_(size), used_(0), storage_(new T[size]),
|
: empty_(size), used_(0),
|
||||||
end_(storage_.get() + size), produce_at_(storage_.get()),
|
storage_(new T[size]),
|
||||||
consume_at_(storage_.get()) {}
|
end_(storage_.get() + size),
|
||||||
|
produce_at_(storage_.get()),
|
||||||
|
consume_at_(storage_.get()) {}
|
||||||
|
|
||||||
// Add a value to the queue.
|
// Add a value to the queue.
|
||||||
void Produce(const T &val) {
|
void Produce(const T &val) {
|
||||||
@ -141,8 +185,7 @@ public:
|
|||||||
empty_.post();
|
empty_.post();
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
if (++produce_at_ == end_)
|
if (++produce_at_ == end_) produce_at_ = storage_.get();
|
||||||
produce_at_ = storage_.get();
|
|
||||||
}
|
}
|
||||||
used_.post();
|
used_.post();
|
||||||
}
|
}
|
||||||
@ -158,14 +201,14 @@ public:
|
|||||||
empty_.post();
|
empty_.post();
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
if (++produce_at_ == end_)
|
if (++produce_at_ == end_) produce_at_ = storage_.get();
|
||||||
produce_at_ = storage_.get();
|
|
||||||
}
|
}
|
||||||
used_.post();
|
used_.post();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Consume a value, assigning it to out.
|
// Consume a value, assigning it to out.
|
||||||
T &Consume(T &out) {
|
T& Consume(T &out) {
|
||||||
WaitSemaphore(used_);
|
WaitSemaphore(used_);
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> consume_lock(consume_at_mutex_);
|
std::lock_guard<std::mutex> consume_lock(consume_at_mutex_);
|
||||||
@ -175,15 +218,14 @@ public:
|
|||||||
used_.post();
|
used_.post();
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
if (++consume_at_ == end_)
|
if (++consume_at_ == end_) consume_at_ = storage_.get();
|
||||||
consume_at_ = storage_.get();
|
|
||||||
}
|
}
|
||||||
empty_.post();
|
empty_.post();
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Consume a value, swapping it to out.
|
// Consume a value, swapping it to out.
|
||||||
T &ConsumeSwap(T &out) {
|
T& ConsumeSwap(T &out) {
|
||||||
WaitSemaphore(used_);
|
WaitSemaphore(used_);
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> consume_lock(consume_at_mutex_);
|
std::lock_guard<std::mutex> consume_lock(consume_at_mutex_);
|
||||||
@ -193,13 +235,13 @@ public:
|
|||||||
used_.post();
|
used_.post();
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
if (++consume_at_ == end_)
|
if (++consume_at_ == end_) consume_at_ = storage_.get();
|
||||||
consume_at_ = storage_.get();
|
|
||||||
}
|
}
|
||||||
empty_.post();
|
empty_.post();
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Convenience version of Consume that copies the value to return.
|
// Convenience version of Consume that copies the value to return.
|
||||||
// The other version is faster.
|
// The other version is faster.
|
||||||
T Consume() {
|
T Consume() {
|
||||||
@ -208,7 +250,7 @@ public:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Number of empty spaces in storage_.
|
// Number of empty spaces in storage_.
|
||||||
Semaphore empty_;
|
Semaphore empty_;
|
||||||
// Number of occupied spaces in storage_.
|
// Number of occupied spaces in storage_.
|
||||||
@ -234,63 +276,67 @@ template <class T> struct UnboundedPage {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <class T> class UnboundedSingleQueue {
|
template <class T> class UnboundedSingleQueue {
|
||||||
public:
|
public:
|
||||||
UnboundedSingleQueue() : valid_(0) {
|
UnboundedSingleQueue() : valid_(0) {
|
||||||
SetFilling(new UnboundedPage<T>());
|
SetFilling(new UnboundedPage<T>());
|
||||||
SetReading(filling_);
|
SetReading(filling_);
|
||||||
}
|
|
||||||
|
|
||||||
void Produce(T &&val) {
|
|
||||||
if (filling_current_ == filling_end_) {
|
|
||||||
UnboundedPage<T> *next = new UnboundedPage<T>();
|
|
||||||
filling_->next = next;
|
|
||||||
SetFilling(next);
|
|
||||||
}
|
}
|
||||||
*(filling_current_++) = std::move(val);
|
|
||||||
valid_.post();
|
|
||||||
}
|
|
||||||
|
|
||||||
void Produce(const T &val) { Produce(T(val)); }
|
void Produce(T &&val) {
|
||||||
|
if (filling_current_ == filling_end_) {
|
||||||
T &Consume(T &out) {
|
UnboundedPage<T> *next = new UnboundedPage<T>();
|
||||||
WaitSemaphore(valid_);
|
filling_->next = next;
|
||||||
if (reading_current_ == reading_end_) {
|
SetFilling(next);
|
||||||
SetReading(reading_->next);
|
}
|
||||||
|
*(filling_current_++) = std::move(val);
|
||||||
|
valid_.post();
|
||||||
}
|
}
|
||||||
out = std::move(*(reading_current_++));
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Warning: very much a no-guarantees race-condition-rich implementation!
|
void Produce(const T &val) {
|
||||||
// But sufficient for our specific purpose: The single thread that consumes
|
Produce(T(val));
|
||||||
// is also the only one that checks Empty, and knows that it's racing.
|
}
|
||||||
bool Empty() const { return reading_current_ == filling_current_; }
|
|
||||||
|
|
||||||
private:
|
T& Consume(T &out) {
|
||||||
void SetFilling(UnboundedPage<T> *to) {
|
WaitSemaphore(valid_);
|
||||||
filling_ = to;
|
if (reading_current_ == reading_end_) {
|
||||||
filling_current_ = to->entries;
|
SetReading(reading_->next);
|
||||||
filling_end_ = filling_current_ + sizeof(to->entries) / sizeof(T);
|
}
|
||||||
}
|
out = std::move(*(reading_current_++));
|
||||||
void SetReading(UnboundedPage<T> *to) {
|
return out;
|
||||||
reading_.reset(to);
|
}
|
||||||
reading_current_ = to->entries;
|
|
||||||
reading_end_ = reading_current_ + sizeof(to->entries) / sizeof(T);
|
|
||||||
}
|
|
||||||
|
|
||||||
Semaphore valid_;
|
// Warning: very much a no-guarantees race-condition-rich implementation!
|
||||||
|
// But sufficient for our specific purpose: The single thread that consumes
|
||||||
|
// is also the only one that checks Empty, and knows that it's racing.
|
||||||
|
bool Empty() const {
|
||||||
|
return reading_current_ == filling_current_;
|
||||||
|
}
|
||||||
|
|
||||||
UnboundedPage<T> *filling_;
|
private:
|
||||||
|
void SetFilling(UnboundedPage<T> *to) {
|
||||||
|
filling_ = to;
|
||||||
|
filling_current_ = to->entries;
|
||||||
|
filling_end_ = filling_current_ + sizeof(to->entries) / sizeof(T);
|
||||||
|
}
|
||||||
|
void SetReading(UnboundedPage<T> *to) {
|
||||||
|
reading_.reset(to);
|
||||||
|
reading_current_ = to->entries;
|
||||||
|
reading_end_ = reading_current_ + sizeof(to->entries) / sizeof(T);
|
||||||
|
}
|
||||||
|
|
||||||
std::unique_ptr<UnboundedPage<T>> reading_;
|
Semaphore valid_;
|
||||||
|
|
||||||
T *filling_current_;
|
UnboundedPage<T> *filling_;
|
||||||
T *filling_end_;
|
|
||||||
T *reading_current_;
|
|
||||||
T *reading_end_;
|
|
||||||
|
|
||||||
UnboundedSingleQueue(const UnboundedSingleQueue &) = delete;
|
std::unique_ptr<UnboundedPage<T> > reading_;
|
||||||
UnboundedSingleQueue &operator=(const UnboundedSingleQueue &) = delete;
|
|
||||||
|
T *filling_current_;
|
||||||
|
T *filling_end_;
|
||||||
|
T *reading_current_;
|
||||||
|
T *reading_end_;
|
||||||
|
|
||||||
|
UnboundedSingleQueue(const UnboundedSingleQueue &) = delete;
|
||||||
|
UnboundedSingleQueue &operator=(const UnboundedSingleQueue &) = delete;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace bergamot
|
} // namespace bergamot
|
||||||
|
@ -11,18 +11,24 @@ namespace marian {
|
|||||||
namespace bergamot {
|
namespace bergamot {
|
||||||
|
|
||||||
// -----------------------------------------------------------------
|
// -----------------------------------------------------------------
|
||||||
Request::Request(size_t Id, size_t lineNumberBegin,
|
Request::Request(size_t Id, Segments &&segments,
|
||||||
std::vector<Ptr<Vocab const>> &vocabs, AnnotatedText &&source,
|
ResponseBuilder &&responseBuilder)
|
||||||
Segments &&segments, std::promise<Response> responsePromise)
|
: Id_(Id), segments_(std::move(segments)),
|
||||||
: Id_(Id), lineNumberBegin_(lineNumberBegin), vocabs_(&vocabs),
|
responseBuilder_(std::move(responseBuilder))
|
||||||
source_(std::move(source)), segments_(std::move(segments)),
|
|
||||||
response_(std::move(responsePromise)) {
|
{
|
||||||
|
|
||||||
counter_ = segments_.size();
|
counter_ = segments_.size();
|
||||||
histories_.resize(segments_.size(), nullptr);
|
histories_.resize(segments_.size(), nullptr);
|
||||||
|
|
||||||
|
// If there are no segments_, we are never able to trigger the responseBuilder
|
||||||
|
// calls from a different thread. However, in this case we want an empty valid
|
||||||
|
// response.
|
||||||
|
if (segments_.size() == 0) {
|
||||||
|
responseBuilder_(std::move(histories_));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t Request::lineNumberBegin() const { return lineNumberBegin_; }
|
|
||||||
size_t Request::numSegments() const { return segments_.size(); }
|
size_t Request::numSegments() const { return segments_.size(); }
|
||||||
|
|
||||||
size_t Request::segmentTokens(size_t index) const {
|
size_t Request::segmentTokens(size_t index) const {
|
||||||
@ -39,17 +45,10 @@ void Request::processHistory(size_t index, Ptr<History> history) {
|
|||||||
// In case this is last request in, completeRequest is called, which sets the
|
// In case this is last request in, completeRequest is called, which sets the
|
||||||
// value of the promise.
|
// value of the promise.
|
||||||
if (--counter_ == 0) {
|
if (--counter_ == 0) {
|
||||||
completeRequest();
|
responseBuilder_(std::move(histories_));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Request::completeRequest() {
|
|
||||||
// Request no longer needs to hold the content, can transfer it to
|
|
||||||
// Response.
|
|
||||||
Response response(std::move(source_), std::move(histories_), *vocabs_);
|
|
||||||
response_.set_value(std::move(response));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Request::operator<(const Request &b) const {
|
bool Request::operator<(const Request &b) const {
|
||||||
// Among Requests, only sequence id is used for obtaining priority.
|
// Among Requests, only sequence id is used for obtaining priority.
|
||||||
return Id_ < b.Id_;
|
return Id_ < b.Id_;
|
||||||
@ -64,10 +63,6 @@ size_t RequestSentence::numTokens() const {
|
|||||||
return (request_->segmentTokens(index_));
|
return (request_->segmentTokens(index_));
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t RequestSentence::lineNumber() const {
|
|
||||||
return (request_->lineNumberBegin() + index_);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RequestSentence::completeSentence(Ptr<History> history) {
|
void RequestSentence::completeSentence(Ptr<History> history) {
|
||||||
// Relays completeSentence into request's processHistory, using index
|
// Relays completeSentence into request's processHistory, using index
|
||||||
// information.
|
// information.
|
||||||
|
@ -1,24 +1,9 @@
|
|||||||
//
|
|
||||||
// Defines:
|
|
||||||
//
|
|
||||||
// Request: holds the input text of a text, Segments (vector<Words>) which are
|
|
||||||
// to go to the batching mechanism and alignments between the processed
|
|
||||||
// segments and the input text (sourceTokenRanges). In addition, Request takes
|
|
||||||
// care of the barrier which fires when all the Segments in a request are done
|
|
||||||
// translating by the workers (BatchTranslator).
|
|
||||||
// TODO(jerinphilip): Extend Request with notions of Priority (sequence,
|
|
||||||
// user-given).
|
|
||||||
//
|
|
||||||
// RequestSentence: is a tuple of (index, Ptr<Request>). This provides the
|
|
||||||
// batching mechanism access to the segment within the request. The backref to
|
|
||||||
// Request allows event triggering the barrier upon completion of the last
|
|
||||||
// sentence by a worker.
|
|
||||||
|
|
||||||
#ifndef SRC_BERGAMOT_REQUEST_H_
|
#ifndef SRC_BERGAMOT_REQUEST_H_
|
||||||
#define SRC_BERGAMOT_REQUEST_H_
|
#define SRC_BERGAMOT_REQUEST_H_
|
||||||
|
|
||||||
#include "definitions.h"
|
#include "definitions.h"
|
||||||
#include "response.h"
|
#include "response.h"
|
||||||
|
#include "response_builder.h"
|
||||||
#include "sentence_ranges.h"
|
#include "sentence_ranges.h"
|
||||||
|
|
||||||
#include "common/logging.h"
|
#include "common/logging.h"
|
||||||
@ -33,80 +18,96 @@
|
|||||||
namespace marian {
|
namespace marian {
|
||||||
namespace bergamot {
|
namespace bergamot {
|
||||||
|
|
||||||
|
/// A Request is an internal representation used to represent a request after
|
||||||
|
/// processed by TextProcessor into sentences constituted by marian::Words.
|
||||||
|
///
|
||||||
|
/// The batching mechanism (Batcher) draws from multiple Requests and compiles
|
||||||
|
/// sentences into a batch. When a batch completes translation (at
|
||||||
|
/// BatchTranslator, intended in a different thread), backward propogation
|
||||||
|
/// happens through:
|
||||||
|
///
|
||||||
|
/// ```cpp
|
||||||
|
/// Batch::completeBatch(...)
|
||||||
|
/// -> RequestSentence::completeSentence(..)
|
||||||
|
/// -> Request::processHistory(...)
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// When all sentences in a Request are completed, responseBuilder is
|
||||||
|
/// triggered with the compiled Histories, to construct the Response
|
||||||
|
/// corresponding to the Request and set value of the promise which triggers the
|
||||||
|
/// future at client.
|
||||||
class Request {
|
class Request {
|
||||||
public:
|
public:
|
||||||
Request(size_t Id, size_t lineNumberBegin,
|
/// Constructs an internal representation of the Request identified by Id,
|
||||||
std::vector<Ptr<Vocab const>> &vocabs_, AnnotatedText &&source,
|
/// processed Segments and accepts a callback (ResponseBuilder) which builds
|
||||||
Segments &&segments, std::promise<Response> responsePromise);
|
/// the Response upon completion of the Request.
|
||||||
|
///
|
||||||
|
///
|
||||||
|
/// @param [in] Id: Identifier assigned to Request by Service.
|
||||||
|
/// @param [in] segments: Each segment is a unit to be translated.
|
||||||
|
/// @param [in] responseBuilder: Callback function (of ResponseBuilder type)
|
||||||
|
/// to be triggered upon the completion of translation of all units in a
|
||||||
|
/// Request.
|
||||||
|
Request(size_t Id, Segments &&segments, ResponseBuilder &&responseBuilder);
|
||||||
|
|
||||||
// Obtain the count of tokens in the segment correponding to index. Used to
|
/// Obtain the count of tokens in the segment correponding to index. Used to
|
||||||
// insert sentence from multiple requests into the corresponding size bucket.
|
/// insert sentence from multiple requests into the corresponding size bucket.
|
||||||
size_t segmentTokens(size_t index) const;
|
size_t segmentTokens(size_t index) const;
|
||||||
|
|
||||||
// Obtain number of segments in a request.
|
/// Obtain number of segments in a request.
|
||||||
size_t numSegments() const;
|
size_t numSegments() const;
|
||||||
size_t lineNumberBegin() const;
|
|
||||||
|
|
||||||
// Obtains segment corresponding to index to create a batch of segments among
|
/// Obtains segment corresponding to index to create a batch of segments
|
||||||
// several requests.
|
/// among several requests.
|
||||||
Segment getSegment(size_t index) const;
|
Segment getSegment(size_t index) const;
|
||||||
|
|
||||||
// For notions of priority among requests, used to enable std::set in
|
/// For notions of priority among requests, used to enable std::set in
|
||||||
// Batcher.
|
/// Batcher.
|
||||||
bool operator<(const Request &request) const;
|
bool operator<(const Request &request) const;
|
||||||
|
|
||||||
// Processes a history obtained after translating in a heterogenous batch
|
/// Processes a history obtained after translating in a heterogenous batch
|
||||||
// compiled from requests.
|
/// compiled from requests.
|
||||||
void processHistory(size_t index, Ptr<History> history);
|
void processHistory(size_t index, Ptr<History> history);
|
||||||
|
|
||||||
// On completion of last segment, sets value of the promise.
|
|
||||||
void completeRequest();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
size_t Id_;
|
size_t Id_;
|
||||||
size_t lineNumberBegin_;
|
|
||||||
|
|
||||||
// Multiple translation-workers can concurrently access the same Request. The
|
/// Multiple translation-workers can concurrently access the same Request. The
|
||||||
// following atomic atomically operates on the variable holding sentences
|
/// following atomic atomically operates on the variable holding sentences
|
||||||
// remaining to be translated.
|
/// remaining to be translated.
|
||||||
std::atomic<int> counter_;
|
std::atomic<int> counter_;
|
||||||
|
|
||||||
// source_ holds the source string to be translated. segments_ hold the
|
/// segments_ hold the sentences processed into Words which generated from
|
||||||
// sentences generated from source_ in vector<Words>. sourceRanges_ are
|
/// input string.
|
||||||
// string_views of the text corresponding to these words, pointing to
|
|
||||||
// sequences in source_. histories_ is a buffer which eventually stores the
|
|
||||||
// translations of each segment in the corresponding index.
|
|
||||||
AnnotatedText source_;
|
|
||||||
Segments segments_;
|
Segments segments_;
|
||||||
|
|
||||||
|
/// histories_ is a buffer which eventually stores the translations of each
|
||||||
|
/// segment in the corresponding index.
|
||||||
std::vector<Ptr<History>> histories_;
|
std::vector<Ptr<History>> histories_;
|
||||||
|
|
||||||
// Members above are moved into newly constructed Response on completion
|
/// Constructing Response requires the vocabs_ used to generate Request.
|
||||||
// of translation of all segments. The promise below is set to this Response
|
/// std::vector<Ptr<Vocab const>> *vocabs_;
|
||||||
// value. future to this promise is made available to the user through
|
ResponseBuilder responseBuilder_;
|
||||||
// Service.
|
|
||||||
std::promise<Response> response_;
|
|
||||||
|
|
||||||
// Constructing Response requires the vocabs_ used to generate Request.
|
|
||||||
std::vector<Ptr<Vocab const>> *vocabs_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// A RequestSentence provides a view to a sentence within a Request. Existence
|
||||||
|
/// of this class allows the sentences and associated information to be kept
|
||||||
|
/// within Request, while batching mechanism (Batcher) compiles Batch from
|
||||||
|
/// RequestSentence-s coming from different Requests.
|
||||||
class RequestSentence {
|
class RequestSentence {
|
||||||
// A RequestSentence provides a view to a sentence within a Request. Existence
|
|
||||||
// of this class allows the sentences and associated information to be kept
|
|
||||||
// within Request.
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
RequestSentence(size_t, Ptr<Request>);
|
RequestSentence(size_t, Ptr<Request>);
|
||||||
|
|
||||||
|
/// Number of tokens in the segment this RequestSentence represents. Used to
|
||||||
|
/// order by length in batching.
|
||||||
size_t numTokens() const;
|
size_t numTokens() const;
|
||||||
|
|
||||||
// lineNumber in Request, used for matching marian-decoder. SentenceTuple
|
/// Accessor to the segment represented by the RequestSentence.
|
||||||
// requires lineNumber to be set for Corpus based batches.
|
|
||||||
size_t lineNumber() const;
|
|
||||||
|
|
||||||
// Accessor to the segment represented by the RequestSentence.
|
|
||||||
Segment getUnderlyingSegment() const;
|
Segment getUnderlyingSegment() const;
|
||||||
|
|
||||||
// Forwards call to Request, checking for completion.
|
/// Forwards history to Request to set history corresponding to this
|
||||||
|
/// RequestSentence.
|
||||||
void completeSentence(Ptr<History> history);
|
void completeSentence(Ptr<History> history);
|
||||||
|
|
||||||
friend bool operator<(const RequestSentence &a, const RequestSentence &b);
|
friend bool operator<(const RequestSentence &a, const RequestSentence &b);
|
||||||
|
@ -1,106 +0,0 @@
|
|||||||
#include "response.h"
|
|
||||||
#include "common/logging.h"
|
|
||||||
#include "data/alignment.h"
|
|
||||||
#include "sentence_ranges.h"
|
|
||||||
|
|
||||||
#include <utility>
|
|
||||||
|
|
||||||
namespace marian {
|
|
||||||
namespace bergamot {
|
|
||||||
|
|
||||||
Response::Response(AnnotatedText &&source, Histories &&histories,
|
|
||||||
std::vector<Ptr<Vocab const>> &vocabs)
|
|
||||||
: source(std::move(source)) {
|
|
||||||
// Reserving length at least as much as source_ seems like a reasonable thing
|
|
||||||
// to do to avoid reallocations.
|
|
||||||
target.text.reserve(source.text.size());
|
|
||||||
|
|
||||||
// In a first step, the decoded units (individual senteneces) are compiled
|
|
||||||
// into a huge string. This is done by computing indices first and appending
|
|
||||||
// to the string as each sentences are decoded.
|
|
||||||
std::vector<std::pair<size_t, size_t>> translationRanges;
|
|
||||||
std::vector<size_t> sentenceBegins;
|
|
||||||
|
|
||||||
size_t offset{0};
|
|
||||||
bool first{true};
|
|
||||||
|
|
||||||
for (auto &history : histories) {
|
|
||||||
// TODO(jerin): Change hardcode of nBest = 1
|
|
||||||
NBestList onebest = history->nBest(1);
|
|
||||||
|
|
||||||
Result result = onebest[0]; // Expecting only one result;
|
|
||||||
Words words = std::get<0>(result);
|
|
||||||
auto targetVocab = vocabs.back();
|
|
||||||
|
|
||||||
std::string decoded;
|
|
||||||
std::vector<string_view> targetMappings;
|
|
||||||
targetVocab->decodeWithByteRanges(words, decoded, targetMappings);
|
|
||||||
|
|
||||||
if (first) {
|
|
||||||
first = false;
|
|
||||||
} else {
|
|
||||||
target.text += " ";
|
|
||||||
++offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
sentenceBegins.push_back(translationRanges.size());
|
|
||||||
target.text += decoded;
|
|
||||||
auto decodedStringBeginMarker = targetMappings.front().begin();
|
|
||||||
for (auto &sview : targetMappings) {
|
|
||||||
size_t startIdx = offset + sview.begin() - decodedStringBeginMarker;
|
|
||||||
translationRanges.emplace_back(startIdx, startIdx + sview.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
offset += decoded.size();
|
|
||||||
|
|
||||||
// Alignments
|
|
||||||
// TODO(jerinphilip): The following double conversion might not be
|
|
||||||
// necessary. Hard alignment can directly be exported, but this would mean
|
|
||||||
// WASM bindings for a structure deep within marian source.
|
|
||||||
auto hyp = std::get<1>(result);
|
|
||||||
auto softAlignment = hyp->tracebackAlignment();
|
|
||||||
auto hardAlignment = data::ConvertSoftAlignToHardAlign(
|
|
||||||
softAlignment, /*threshold=*/0.2f); // TODO(jerinphilip): Make this a
|
|
||||||
// configurable parameter.
|
|
||||||
|
|
||||||
Alignment unified_alignment;
|
|
||||||
for (auto &p : hardAlignment) {
|
|
||||||
unified_alignment.emplace_back((Point){p.srcPos, p.tgtPos, p.prob});
|
|
||||||
}
|
|
||||||
|
|
||||||
alignments.push_back(std::move(unified_alignment));
|
|
||||||
|
|
||||||
// Quality scores: Sequence level is obtained as normalized path scores.
|
|
||||||
// Word level using hypothesis traceback. These are most-likely logprobs.
|
|
||||||
auto normalizedPathScore = std::get<2>(result);
|
|
||||||
auto wordQualities = hyp->tracebackWordScores();
|
|
||||||
wordQualities.pop_back();
|
|
||||||
qualityScores.push_back((Quality){normalizedPathScore, wordQualities});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Once we have the indices in translation (which might be resized a few
|
|
||||||
// times) ready, we can prepare and store the string_view as annotations
|
|
||||||
// instead. This is accomplished by iterating over available sentences using
|
|
||||||
// sentenceBegin and using addSentence(...) API from Annotation.
|
|
||||||
|
|
||||||
for (size_t i = 1; i <= sentenceBegins.size(); i++) {
|
|
||||||
std::vector<string_view> targetMappings;
|
|
||||||
size_t begin = sentenceBegins[i - 1];
|
|
||||||
size_t safe_end = (i == sentenceBegins.size()) ? translationRanges.size()
|
|
||||||
: sentenceBegins[i];
|
|
||||||
|
|
||||||
for (size_t idx = begin; idx < safe_end; idx++) {
|
|
||||||
auto &p = translationRanges[idx];
|
|
||||||
size_t begin_idx = p.first;
|
|
||||||
size_t end_idx = p.second;
|
|
||||||
|
|
||||||
const char *data = &target.text[begin_idx];
|
|
||||||
size_t size = end_idx - begin_idx;
|
|
||||||
targetMappings.emplace_back(data, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
target.addSentence(targetMappings);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // namespace bergamot
|
|
||||||
} // namespace marian
|
|
@ -40,34 +40,12 @@ struct Quality {
|
|||||||
/// AnnotatedText provides an API to access markings of (sub)-word and
|
/// AnnotatedText provides an API to access markings of (sub)-word and
|
||||||
/// sentences boundaries, which are required to interpret Quality and
|
/// sentences boundaries, which are required to interpret Quality and
|
||||||
/// Alignment (s) at the moment.
|
/// Alignment (s) at the moment.
|
||||||
class Response {
|
struct Response {
|
||||||
|
/// Convenience function to obtain number of units translated. Same as
|
||||||
public:
|
/// `.source.numSentences()` and `.target.numSentences().` The processing of a
|
||||||
///
|
/// text of into sentences are handled internally, and this information can be
|
||||||
Response(AnnotatedText &&source, Histories &&histories,
|
/// used to iterate through meaningful units of translation for which
|
||||||
std::vector<Ptr<Vocab const>> &vocabs);
|
/// alignment and quality information are available.
|
||||||
|
|
||||||
/// \cond HIDDEN_PUBLIC
|
|
||||||
// Move constructor.
|
|
||||||
Response(Response &&other)
|
|
||||||
: source(std::move(other.source)), target(std::move(other.target)),
|
|
||||||
alignments(std::move(other.alignments)),
|
|
||||||
qualityScores(std::move(other.qualityScores)){};
|
|
||||||
|
|
||||||
// The following copy bans are not stricitly required anymore since Annotation
|
|
||||||
// is composed of the ByteRange primitive (which was previously string_view
|
|
||||||
// and required to be bound to string), but makes movement efficient by
|
|
||||||
// banning these letting compiler complain about copies.
|
|
||||||
|
|
||||||
Response(const Response &) = delete;
|
|
||||||
Response &operator=(const Response &) = delete;
|
|
||||||
|
|
||||||
/// \endcond
|
|
||||||
|
|
||||||
/// Number of sentences translated. The processing of a text of into sentences
|
|
||||||
/// are handled internally, and this information can be used to iterate
|
|
||||||
/// through meaningful units of translation for which alignment and quality
|
|
||||||
/// information are available.
|
|
||||||
const size_t size() const { return source.numSentences(); }
|
const size_t size() const { return source.numSentences(); }
|
||||||
|
|
||||||
/// source text and annotations of (sub-)words and sentences.
|
/// source text and annotations of (sub-)words and sentences.
|
||||||
@ -86,6 +64,10 @@ public:
|
|||||||
/// sparse matrix representation with indices corresponding
|
/// sparse matrix representation with indices corresponding
|
||||||
/// to (sub-)words accessible through Annotation.
|
/// to (sub-)words accessible through Annotation.
|
||||||
std::vector<Alignment> alignments;
|
std::vector<Alignment> alignments;
|
||||||
|
|
||||||
|
const std::string &getOriginalText() const { return source.text; }
|
||||||
|
|
||||||
|
const std::string &getTranslatedText() const { return target.text; }
|
||||||
};
|
};
|
||||||
} // namespace bergamot
|
} // namespace bergamot
|
||||||
} // namespace marian
|
} // namespace marian
|
||||||
|
87
src/translator/response_builder.cpp
Normal file
87
src/translator/response_builder.cpp
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
#include "response_builder.h"
|
||||||
|
|
||||||
|
namespace marian {
|
||||||
|
namespace bergamot {
|
||||||
|
|
||||||
|
void ResponseBuilder::buildQualityScores(Histories &histories,
|
||||||
|
Response &response) {
|
||||||
|
std::vector<Quality> qualityScores;
|
||||||
|
for (auto &history : histories) {
|
||||||
|
// TODO(jerin): Change hardcode of nBest = 1
|
||||||
|
NBestList onebest = history->nBest(1);
|
||||||
|
|
||||||
|
Result result = onebest[0]; // Expecting only one result;
|
||||||
|
Words words = std::get<0>(result);
|
||||||
|
auto hyp = std::get<1>(result);
|
||||||
|
// Quality scores: Sequence level is obtained as normalized path scores.
|
||||||
|
// Word level using hypothesis traceback. These are most-likely
|
||||||
|
// logprobs.
|
||||||
|
auto normalizedPathScore = std::get<2>(result);
|
||||||
|
auto wordQualities = hyp->tracebackWordScores();
|
||||||
|
wordQualities.pop_back();
|
||||||
|
response.qualityScores.push_back(
|
||||||
|
Quality{normalizedPathScore, wordQualities});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ResponseBuilder::buildAlignments(Histories &histories,
|
||||||
|
Response &response) {
|
||||||
|
for (auto &history : histories) {
|
||||||
|
// TODO(jerin): Change hardcode of nBest = 1
|
||||||
|
NBestList onebest = history->nBest(1);
|
||||||
|
|
||||||
|
Result result = onebest[0]; // Expecting only one result;
|
||||||
|
Words words = std::get<0>(result);
|
||||||
|
// Alignments
|
||||||
|
// TODO(jerinphilip): The following double conversion might not be
|
||||||
|
// necessary. Hard alignment can directly be exported, but this would
|
||||||
|
// mean WASM bindings for a structure deep within marian source.
|
||||||
|
auto hyp = std::get<1>(result);
|
||||||
|
auto softAlignment = hyp->tracebackAlignment();
|
||||||
|
auto threshold = responseOptions_.alignmentThreshold;
|
||||||
|
auto hardAlignment =
|
||||||
|
data::ConvertSoftAlignToHardAlign(softAlignment, threshold);
|
||||||
|
Alignment unified_alignment;
|
||||||
|
for (auto &p : hardAlignment) {
|
||||||
|
unified_alignment.emplace_back(Point{p.srcPos, p.tgtPos, p.prob});
|
||||||
|
}
|
||||||
|
|
||||||
|
response.alignments.push_back(std::move(unified_alignment));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ResponseBuilder::buildTranslatedText(Histories &histories,
|
||||||
|
Response &response) {
|
||||||
|
// Reserving length at least as much as source_ seems like a reasonable
|
||||||
|
// thing to do to avoid reallocations.
|
||||||
|
response.target.text.reserve(response.source.text.size());
|
||||||
|
|
||||||
|
size_t offset{0};
|
||||||
|
bool first{true};
|
||||||
|
|
||||||
|
for (auto &history : histories) {
|
||||||
|
// TODO(jerin): Change hardcode of nBest = 1
|
||||||
|
NBestList onebest = history->nBest(1);
|
||||||
|
|
||||||
|
Result result = onebest[0]; // Expecting only one result;
|
||||||
|
Words words = std::get<0>(result);
|
||||||
|
auto targetVocab = vocabs_->back();
|
||||||
|
|
||||||
|
std::string decoded;
|
||||||
|
std::vector<string_view> targetSentenceMappings;
|
||||||
|
targetVocab->decodeWithByteRanges(words, decoded, targetSentenceMappings);
|
||||||
|
|
||||||
|
// delimiter can be used to fill in the blanks from source as well.
|
||||||
|
std::string delimiter;
|
||||||
|
if (first) {
|
||||||
|
first = false;
|
||||||
|
} else {
|
||||||
|
delimiter = " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
response.target.appendSentence(delimiter, decoded, targetSentenceMappings);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace bergamot
|
||||||
|
} // namespace marian
|
93
src/translator/response_builder.h
Normal file
93
src/translator/response_builder.h
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
#ifndef SRC_BERGAMOT_RESPONSE_BUILDER_H_
|
||||||
|
#define SRC_BERGAMOT_RESPONSE_BUILDER_H_
|
||||||
|
|
||||||
|
#include "data/types.h"
|
||||||
|
#include "response.h"
|
||||||
|
#include "response_options.h"
|
||||||
|
|
||||||
|
// For now we will work with this, to avoid complaints another structure is hard
|
||||||
|
// to operate with.
|
||||||
|
|
||||||
|
namespace marian {
|
||||||
|
namespace bergamot {
|
||||||
|
|
||||||
|
/// ResponseBuilder is a callback functor. It is expected to be bound to a
|
||||||
|
/// Request after giving it the context of options, vocabs and promise to set.
|
||||||
|
/// It constructs the Response and it's members based on options
|
||||||
|
/// (quality=on|off, alignments=on|off, mappings=on|off, splitmode=sentence |
|
||||||
|
/// paragraph).
|
||||||
|
|
||||||
|
class ResponseBuilder {
|
||||||
|
public:
|
||||||
|
/// @param [in] responseOptions: ResponseOptions, indicating what to include
|
||||||
|
/// or not in the response and any additional configurable parameters.
|
||||||
|
/// @param [in] vocabs: marian vocab object (used in decoding)
|
||||||
|
/// @param [in] promise: promise to set with the constructed Response.
|
||||||
|
ResponseBuilder(ResponseOptions responseOptions, AnnotatedText &&source,
|
||||||
|
std::vector<Ptr<Vocab const>> &vocabs,
|
||||||
|
std::promise<Response> &&promise)
|
||||||
|
: responseOptions_(responseOptions), source_(std::move(source)),
|
||||||
|
vocabs_(&vocabs), promise_(std::move(promise)) {}
|
||||||
|
|
||||||
|
/// Constructs and sets the promise of a Response object from obtained
|
||||||
|
/// histories after translating.
|
||||||
|
/// @param [in] histories: Histories obtained after translating the Request
|
||||||
|
/// from which this functor is called.
|
||||||
|
void operator()(Histories &&histories) {
|
||||||
|
// TODO(jerinphilip) load ResponseOptions into options and turn build
|
||||||
|
// functions on or off.
|
||||||
|
// responseOptions_ is unused, but we can try something here.
|
||||||
|
ABORT_IF(source_.numSentences() != histories.size(),
|
||||||
|
"Mismatch in source and translated sentences");
|
||||||
|
Response response;
|
||||||
|
|
||||||
|
// Move source_ into response.
|
||||||
|
response.source = std::move(source_);
|
||||||
|
|
||||||
|
// Should be after source is set
|
||||||
|
buildTranslatedText(histories, response);
|
||||||
|
|
||||||
|
// Should always be after buildTranslatedText
|
||||||
|
if (responseOptions_.qualityScores) {
|
||||||
|
buildQualityScores(histories, response);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (responseOptions_.alignment) {
|
||||||
|
buildAlignments(histories, response);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Once complete, set promise.
|
||||||
|
promise_.set_value(std::move(response));
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// Builds qualityScores from histories and writes to response. expects
|
||||||
|
/// buildTranslatedText to be run before to be able to obtain target text and
|
||||||
|
/// subword information.
|
||||||
|
/// @param histories [in]
|
||||||
|
/// @param response [out]
|
||||||
|
void buildQualityScores(Histories &histories, Response &response);
|
||||||
|
|
||||||
|
/// Builds alignments from histories and writes onto response.
|
||||||
|
/// @param histories [in]
|
||||||
|
/// @param response [out]
|
||||||
|
void buildAlignments(Histories &histories, Response &response);
|
||||||
|
|
||||||
|
/// Builds translated text and subword annotations and writes onto response.
|
||||||
|
/// @param histories [in]
|
||||||
|
/// @param response [out]
|
||||||
|
void buildTranslatedText(Histories &histories, Response &response);
|
||||||
|
|
||||||
|
// Data members are context/curried args for the functor.
|
||||||
|
|
||||||
|
ResponseOptions responseOptions_;
|
||||||
|
std::vector<Ptr<Vocab const>> *vocabs_; // vocabs are required for decoding
|
||||||
|
// and any source validation checks.
|
||||||
|
std::promise<Response> promise_; // To be set when callback triggered and
|
||||||
|
// after Response constructed.
|
||||||
|
AnnotatedText source_;
|
||||||
|
};
|
||||||
|
} // namespace bergamot
|
||||||
|
} // namespace marian
|
||||||
|
|
||||||
|
#endif // SRC_BERGAMOT_RESPONSE_BUILDER_H_
|
50
src/translator/response_options.h
Normal file
50
src/translator/response_options.h
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#ifndef SRC_BERGAMOT_RESPONSE_OPTIONS_H_
|
||||||
|
#define SRC_BERGAMOT_RESPONSE_OPTIONS_H_
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace marian {
|
||||||
|
namespace bergamot {
|
||||||
|
|
||||||
|
enum ConcatStrategy {
|
||||||
|
/// Target text is constructed faithful to the source-text structure.
|
||||||
|
FAITHFUL,
|
||||||
|
|
||||||
|
/// Target text is concatenated by a space.
|
||||||
|
SPACE
|
||||||
|
};
|
||||||
|
|
||||||
|
enum QualityScoreType {
|
||||||
|
/// Provide a free quality-score that comes with the machine-translation model
|
||||||
|
/// itself.
|
||||||
|
FREE,
|
||||||
|
|
||||||
|
/// An expensive quality-score that runs additional computations to determine
|
||||||
|
/// quality of an output.
|
||||||
|
EXPENSIVE
|
||||||
|
};
|
||||||
|
|
||||||
|
/// ResponseOptions dictate how to construct a Response for an input string of
|
||||||
|
/// text to be translated.
|
||||||
|
struct ResponseOptions {
|
||||||
|
bool qualityScores{false}; ///< Include quality-scores or not.
|
||||||
|
bool alignment{false}; ///< Include alignments or not.
|
||||||
|
|
||||||
|
/// Whether to include sentenceMappings or not. Alignments require
|
||||||
|
/// sentenceMappings and are available irrespective of this option if
|
||||||
|
/// `alignment=true`.
|
||||||
|
bool sentenceMappings{false};
|
||||||
|
|
||||||
|
/// Threshold between `[0.0f, 1.0f]` to filter alignments into a sparse
|
||||||
|
/// matrix. Higher value implies stronger filtering leading to provision of
|
||||||
|
/// higher-confidence matches. `1.0f` gives argmax (not the full-dense
|
||||||
|
/// matrix).
|
||||||
|
float alignmentThreshold{0.2f};
|
||||||
|
|
||||||
|
QualityScoreType qualityScoreType{QualityScoreType::FREE};
|
||||||
|
ConcatStrategy concatStrategy{ConcatStrategy::FAITHFUL};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace bergamot
|
||||||
|
} // namespace marian
|
||||||
|
|
||||||
|
#endif // SRC_BERGAMOT_RESPONSE_OPTIONS_H_
|
@ -32,11 +32,11 @@ ByteRange Annotation::sentence(size_t sentenceIdx) const {
|
|||||||
// the flatByteRange and non-empty sentence before this happened and
|
// the flatByteRange and non-empty sentence before this happened and
|
||||||
// construct empty string-view equivalent ByteRange.
|
// construct empty string-view equivalent ByteRange.
|
||||||
ByteRange eos = flatByteRanges_[eosId - 1];
|
ByteRange eos = flatByteRanges_[eosId - 1];
|
||||||
sentenceByteRange = (ByteRange){eos.end, eos.end};
|
sentenceByteRange = ByteRange{eos.end, eos.end};
|
||||||
} else {
|
} else {
|
||||||
ByteRange bos = flatByteRanges_[bosId];
|
ByteRange bos = flatByteRanges_[bosId];
|
||||||
ByteRange eos = flatByteRanges_[eosId - 1];
|
ByteRange eos = flatByteRanges_[eosId - 1];
|
||||||
sentenceByteRange = (ByteRange){bos.begin, eos.end};
|
sentenceByteRange = ByteRange{bos.begin, eos.end};
|
||||||
}
|
}
|
||||||
return sentenceByteRange;
|
return sentenceByteRange;
|
||||||
}
|
}
|
||||||
@ -56,6 +56,20 @@ string_view AnnotatedText::sentence(size_t sentenceIdx) const {
|
|||||||
return asStringView(sentenceAsByteRange);
|
return asStringView(sentenceAsByteRange);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AnnotatedText::appendSentence(std::string prefix, std::string &reference,
|
||||||
|
std::vector<string_view> &wordRanges) {
|
||||||
|
text += prefix;
|
||||||
|
size_t offset = text.size(); // Get size before to do ByteRange arithmetic
|
||||||
|
text += reference; // Append reference to text
|
||||||
|
std::vector<ByteRange> sentence;
|
||||||
|
for (auto &wordView : wordRanges) {
|
||||||
|
size_t thisWordBegin = offset + wordView.data() - &reference[0];
|
||||||
|
sentence.push_back(
|
||||||
|
ByteRange{thisWordBegin, thisWordBegin + wordView.size()});
|
||||||
|
}
|
||||||
|
annotation.addSentence(sentence);
|
||||||
|
}
|
||||||
|
|
||||||
void AnnotatedText::addSentence(std::vector<string_view> &wordRanges) {
|
void AnnotatedText::addSentence(std::vector<string_view> &wordRanges) {
|
||||||
addSentence(std::begin(wordRanges), std::end(wordRanges));
|
addSentence(std::begin(wordRanges), std::end(wordRanges));
|
||||||
};
|
};
|
||||||
@ -65,7 +79,7 @@ void AnnotatedText::addSentence(std::vector<string_view>::iterator begin,
|
|||||||
std::vector<ByteRange> sentence;
|
std::vector<ByteRange> sentence;
|
||||||
for (auto p = begin; p != end; p++) {
|
for (auto p = begin; p != end; p++) {
|
||||||
size_t begin_offset = p->data() - &text[0];
|
size_t begin_offset = p->data() - &text[0];
|
||||||
sentence.push_back((ByteRange){begin_offset, begin_offset + p->size()});
|
sentence.push_back(ByteRange{begin_offset, begin_offset + p->size()});
|
||||||
}
|
}
|
||||||
annotation.addSentence(sentence);
|
annotation.addSentence(sentence);
|
||||||
};
|
};
|
||||||
|
@ -64,7 +64,6 @@ public:
|
|||||||
sentenceEndIds_.push_back(0);
|
sentenceEndIds_.push_back(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the number of sentences annotated in a text.
|
|
||||||
size_t numSentences() const { return sentenceEndIds_.size() - 1; }
|
size_t numSentences() const { return sentenceEndIds_.size() - 1; }
|
||||||
|
|
||||||
/// Returns number of words in the sentence identified by `sentenceIdx`.
|
/// Returns number of words in the sentence identified by `sentenceIdx`.
|
||||||
@ -125,10 +124,6 @@ public:
|
|||||||
/// constructor is disallowed).
|
/// constructor is disallowed).
|
||||||
AnnotatedText(std::string &&text) : text(std::move(text)){};
|
AnnotatedText(std::string &&text) : text(std::move(text)){};
|
||||||
|
|
||||||
AnnotatedText(AnnotatedText &&annotatedBlob)
|
|
||||||
: text(std::move(annotatedBlob.text)),
|
|
||||||
annotation(std::move(annotatedBlob.annotation)) {}
|
|
||||||
|
|
||||||
/// Returns the number of sentences in the annotation structure.
|
/// Returns the number of sentences in the annotation structure.
|
||||||
const size_t numSentences() const { return annotation.numSentences(); }
|
const size_t numSentences() const { return annotation.numSentences(); }
|
||||||
|
|
||||||
@ -137,6 +132,11 @@ public:
|
|||||||
return annotation.numWords(sentenceIdx);
|
return annotation.numWords(sentenceIdx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Appends a sentence to the existing text and transparently rebases
|
||||||
|
/// string_views
|
||||||
|
void appendSentence(std::string prefix, std::string &reference,
|
||||||
|
std::vector<string_view> &wordRanges);
|
||||||
|
|
||||||
/// Adds a sentence, used to load from SentencePiece annotations conveniently.
|
/// Adds a sentence, used to load from SentencePiece annotations conveniently.
|
||||||
void addSentence(std::vector<string_view> &wordRanges);
|
void addSentence(std::vector<string_view> &wordRanges);
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include "common/options.h"
|
#include "common/options.h"
|
||||||
#include "data/types.h"
|
#include "data/types.h"
|
||||||
#include "ssplit.h"
|
#include "ssplit.h"
|
||||||
|
#include "definitions.h"
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace marian {
|
namespace marian {
|
||||||
|
@ -28,8 +28,8 @@ loadVocabularies(marian::Ptr<marian::Options> options) {
|
|||||||
namespace marian {
|
namespace marian {
|
||||||
namespace bergamot {
|
namespace bergamot {
|
||||||
|
|
||||||
Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory)
|
Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory)
|
||||||
: requestId_(0), vocabs_(std::move(loadVocabularies(options))),
|
: requestId_(0), options_(options), vocabs_(std::move(loadVocabularies(options))),
|
||||||
text_processor_(vocabs_, options), batcher_(options),
|
text_processor_(vocabs_, options), batcher_(options),
|
||||||
numWorkers_(options->get<int>("cpu-threads")),
|
numWorkers_(options->get<int>("cpu-threads")),
|
||||||
modelMemory_(std::move(modelMemory)), shortlistMemory_(std::move(shortlistMemory))
|
modelMemory_(std::move(modelMemory)), shortlistMemory_(std::move(shortlistMemory))
|
||||||
@ -112,6 +112,44 @@ void Service::async_translate() {
|
|||||||
#endif // WASM_COMPATIBLE_SOURCE
|
#endif // WASM_COMPATIBLE_SOURCE
|
||||||
|
|
||||||
std::future<Response> Service::translate(std::string &&input) {
|
std::future<Response> Service::translate(std::string &&input) {
|
||||||
|
ResponseOptions responseOptions; // Hardcode responseOptions for now
|
||||||
|
return translate(std::move(input), responseOptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Response>
|
||||||
|
Service::translateMultiple(std::vector<std::string> &&inputs,
|
||||||
|
TranslationRequest translationRequest) {
|
||||||
|
ResponseOptions responseOptions;
|
||||||
|
|
||||||
|
// TODO(jerinphilip) Set options based on TranslationRequest, if and when it
|
||||||
|
// becomes non-dummy.
|
||||||
|
|
||||||
|
// We queue the individual Requests so they get compiled at batches to be
|
||||||
|
// efficiently translated.
|
||||||
|
std::vector<std::future<Response>> responseFutures;
|
||||||
|
for (auto &input : inputs) {
|
||||||
|
std::future<Response> inputResponse =
|
||||||
|
queueRequest(std::move(input), responseOptions);
|
||||||
|
responseFutures.push_back(std::move(inputResponse));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dispatch is called once per request so compilation of sentences from
|
||||||
|
// multiple Requests happen.
|
||||||
|
dispatchTranslate();
|
||||||
|
|
||||||
|
// Now wait for all Requests to complete, the future to fire and return the
|
||||||
|
// compiled Responses, we can probably return the future, but WASM quirks(?).
|
||||||
|
std::vector<Response> responses;
|
||||||
|
for (auto &future : responseFutures) {
|
||||||
|
future.wait();
|
||||||
|
responses.push_back(std::move(future.get()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return responses;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::future<Response> Service::queueRequest(std::string &&input,
|
||||||
|
ResponseOptions responseOptions) {
|
||||||
Segments segments;
|
Segments segments;
|
||||||
AnnotatedText source(std::move(input));
|
AnnotatedText source(std::move(input));
|
||||||
text_processor_.process(source, segments);
|
text_processor_.process(source, segments);
|
||||||
@ -119,17 +157,29 @@ std::future<Response> Service::translate(std::string &&input) {
|
|||||||
std::promise<Response> responsePromise;
|
std::promise<Response> responsePromise;
|
||||||
auto future = responsePromise.get_future();
|
auto future = responsePromise.get_future();
|
||||||
|
|
||||||
Ptr<Request> request = New<Request>(
|
ResponseBuilder responseBuilder(responseOptions, std::move(source), vocabs_,
|
||||||
requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(source),
|
std::move(responsePromise));
|
||||||
std::move(segments), std::move(responsePromise));
|
Ptr<Request> request = New<Request>(requestId_++, std::move(segments),
|
||||||
|
std::move(responseBuilder));
|
||||||
|
|
||||||
batcher_.addWholeRequest(request);
|
batcher_.addWholeRequest(request);
|
||||||
|
return future;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::future<Response> Service::translate(std::string &&input,
|
||||||
|
ResponseOptions responseOptions) {
|
||||||
|
std::future<Response> future =
|
||||||
|
queueRequest(std::move(input), responseOptions);
|
||||||
|
dispatchTranslate();
|
||||||
|
return future;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Service::dispatchTranslate() {
|
||||||
if (numWorkers_ == 0) {
|
if (numWorkers_ == 0) {
|
||||||
blocking_translate();
|
blocking_translate();
|
||||||
} else {
|
} else {
|
||||||
async_translate();
|
async_translate();
|
||||||
}
|
}
|
||||||
return future;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Service::~Service() {
|
Service::~Service() {
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
#ifndef SRC_BERGAMOT_SERVICE_H_
|
#ifndef SRC_BERGAMOT_SERVICE_H_
|
||||||
#define SRC_BERGAMOT_SERVICE_H_
|
#define SRC_BERGAMOT_SERVICE_H_
|
||||||
|
|
||||||
|
#include "TranslationRequest.h"
|
||||||
#include "batch_translator.h"
|
#include "batch_translator.h"
|
||||||
#include "batcher.h"
|
#include "batcher.h"
|
||||||
#include "data/types.h"
|
#include "data/types.h"
|
||||||
#include "response.h"
|
#include "response.h"
|
||||||
|
#include "response_builder.h"
|
||||||
#include "text_processor.h"
|
#include "text_processor.h"
|
||||||
#include "translator/parser.h"
|
#include "translator/parser.h"
|
||||||
|
|
||||||
@ -18,22 +20,45 @@
|
|||||||
namespace marian {
|
namespace marian {
|
||||||
namespace bergamot {
|
namespace bergamot {
|
||||||
|
|
||||||
/// Service exposes methods to translate an incoming blob of text to the
|
/// Service offers methods create an asynchronous translation service that
|
||||||
/// Consumer of bergamot API.
|
/// translates a plain (without any markups and emojis) UTF-8 encoded text.
|
||||||
|
/// This implementation supports translation from 1 source language to 1 target
|
||||||
|
/// language.
|
||||||
|
///
|
||||||
|
/// This is intended to be similar to the ones provided for training or
|
||||||
|
/// decoding in ML pipelines with the following additional capabilities:
|
||||||
|
///
|
||||||
|
/// 1. Provision of a request -> response based translation flow unlike the
|
||||||
|
/// usual a line based translation or decoding provided in most ML frameworks.
|
||||||
|
/// 2. Internal handling of normalization etc which changes source text to
|
||||||
|
/// provide to client translation meta-information like alignments consistent
|
||||||
|
/// with the unnormalized input text.
|
||||||
|
/// 3. The API splits each text entry into sentences internally, which are then
|
||||||
|
/// translated independent of each other. The translated sentences are then
|
||||||
|
/// joined back together and returned in Response.
|
||||||
|
///
|
||||||
|
/// Service exposes methods to instantiate the service from a string
|
||||||
|
/// configuration (which can cover most translators) and to translate an
|
||||||
|
/// incoming blob of text.
|
||||||
|
///
|
||||||
///
|
///
|
||||||
/// An example use of this API looks as follows:
|
/// An example use of this API looks as follows:
|
||||||
///
|
/// ```cpp
|
||||||
/// options = ...;
|
/// options = ...;
|
||||||
/// service = Service(options);
|
/// service = Service(options);
|
||||||
/// std::string input_text = "Hello World";
|
/// std::string input_text = "Hello World";
|
||||||
/// std::future<Response>
|
/// std::future<Response>
|
||||||
/// response = service.translate(std::move(input_text));
|
/// responseFuture = service.translate(std::move(input_text));
|
||||||
/// response.wait();
|
/// responseFuture.wait(); // Wait until translation has completed.
|
||||||
/// Response result = response.get();
|
/// Response response(std::move(response.get());
|
||||||
///
|
///
|
||||||
/// Optionally Service can be initialized by also passing model_memory for
|
/// // Do things with response.
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Optionally Service can be initialized by also passing model memory for
|
||||||
/// purposes of efficiency (which defaults to nullpointer and then reads from
|
/// purposes of efficiency (which defaults to nullpointer and then reads from
|
||||||
/// file supplied through config).
|
/// file supplied through config).
|
||||||
|
///
|
||||||
class Service {
|
class Service {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -41,9 +66,22 @@ public:
|
|||||||
/// @param modelMemory byte array (aligned to 256!!!) that contains the bytes
|
/// @param modelMemory byte array (aligned to 256!!!) that contains the bytes
|
||||||
/// of a model.bin. Optional, defaults to nullptr when not used
|
/// of a model.bin. Optional, defaults to nullptr when not used
|
||||||
/// @param shortlistMemory byte array of shortlist (aligned to 64)
|
/// @param shortlistMemory byte array of shortlist (aligned to 64)
|
||||||
explicit Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory);
|
explicit Service(Ptr<Options> options, AlignedMemory modelMemory,
|
||||||
|
AlignedMemory shortlistMemory);
|
||||||
|
|
||||||
explicit Service(Ptr<Options> options) : Service(options, AlignedMemory(), AlignedMemory()){}
|
/// Construct Service purely from Options. This expects options which
|
||||||
|
/// marian-decoder expects to be set for loading model shortlist and
|
||||||
|
/// vocabularies from files in addition to parameters that set unset desired
|
||||||
|
/// features (e.g: alignments, quality-scores).
|
||||||
|
///
|
||||||
|
/// This is equivalent to a call to:
|
||||||
|
/// ```cpp
|
||||||
|
/// Service(options, AlignedMemory(), AlignedMemory())
|
||||||
|
/// ```
|
||||||
|
/// wherein empty memory is passed and internal flow defaults to file-based
|
||||||
|
/// model, shortlist loading.
|
||||||
|
explicit Service(Ptr<Options> options)
|
||||||
|
: Service(options, AlignedMemory(), AlignedMemory()) {}
|
||||||
|
|
||||||
/// Construct Service from a string configuration.
|
/// Construct Service from a string configuration.
|
||||||
/// @param [in] config string parsable as YAML expected to adhere with marian
|
/// @param [in] config string parsable as YAML expected to adhere with marian
|
||||||
@ -52,20 +90,66 @@ public:
|
|||||||
/// bytes of a model.bin. Optional.
|
/// bytes of a model.bin. Optional.
|
||||||
/// @param [in] shortlistMemory byte array of shortlist (aligned to 64)
|
/// @param [in] shortlistMemory byte array of shortlist (aligned to 64)
|
||||||
explicit Service(const std::string &config,
|
explicit Service(const std::string &config,
|
||||||
AlignedMemory modelMemory = AlignedMemory(), AlignedMemory shortlistMemory = AlignedMemory())
|
AlignedMemory modelMemory = AlignedMemory(),
|
||||||
: Service(parseOptions(config), std::move(modelMemory), std::move(shortlistMemory)) {}
|
AlignedMemory shortlistMemory = AlignedMemory())
|
||||||
|
: Service(parseOptions(config, /*validate=*/false),
|
||||||
|
std::move(modelMemory), std::move(shortlistMemory)) {}
|
||||||
|
|
||||||
/// Explicit destructor to clean up after any threads initialized in
|
/// Explicit destructor to clean up after any threads initialized in
|
||||||
/// asynchronous operation mode.
|
/// asynchronous operation mode.
|
||||||
~Service();
|
~Service();
|
||||||
|
|
||||||
/// To stay efficient and to refer to the string for alignments, expects
|
/// To stay efficient and to refer to the string for alignments, expects
|
||||||
/// ownership be moved through std::move(..)
|
/// ownership be moved through `std::move(..)`
|
||||||
///
|
///
|
||||||
/// @param [in] rvalue reference of string to be translated.
|
/// @param [in] source: rvalue reference of string to be translated.
|
||||||
std::future<Response> translate(std::string &&input);
|
std::future<Response> translate(std::string &&source);
|
||||||
|
|
||||||
|
/// Translate an input, providing Options to construct Response. This is
|
||||||
|
/// useful when one has to set/unset alignments or quality in the Response to
|
||||||
|
/// save compute spent in constructing these objects.
|
||||||
|
///
|
||||||
|
/// @param [in] source: rvalue reference of the string to be translated
|
||||||
|
/// @param [in] responseOptions: Options indicating whether or not to include
|
||||||
|
/// some member in the Response, also specify any additional configurable
|
||||||
|
/// parameters.
|
||||||
|
std::future<Response> translate(std::string &&source,
|
||||||
|
ResponseOptions options);
|
||||||
|
|
||||||
|
/// Translate multiple text-blobs in a single *blocking* API call, providing
|
||||||
|
/// TranslationRequest which applies across all text-blobs dictating how to
|
||||||
|
/// construct Response. TranslationRequest can be used to enable/disable
|
||||||
|
/// additional information like quality-scores, alignments etc.
|
||||||
|
///
|
||||||
|
/// All texts are combined to efficiently construct batches together providing
|
||||||
|
/// speedups compared to calling translate() indepdently on individual
|
||||||
|
/// text-blob. Note that there will be minor differences in output when
|
||||||
|
/// text-blobs are individually translated due to approximations but similar
|
||||||
|
/// quality nonetheless. If you have async/multithread capabilities, it is
|
||||||
|
/// recommended to work with futures and translate() API.
|
||||||
|
///
|
||||||
|
/// @param [in] source: rvalue reference of the string to be translated
|
||||||
|
/// @param [in] translationRequest: TranslationRequest (Unified API)
|
||||||
|
/// indicating whether or not to include some member in the Response, also
|
||||||
|
/// specify any additional configurable parameters.
|
||||||
|
|
||||||
|
std::vector<Response>
|
||||||
|
translateMultiple(std::vector<std::string> &&source,
|
||||||
|
TranslationRequest translationRequest);
|
||||||
|
|
||||||
|
/// Returns if model is alignment capable or not.
|
||||||
|
bool isAlignmentSupported() const {
|
||||||
|
return options_->hasAndNotEmpty("alignment");
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
/// Queue an input for translation.
|
||||||
|
std::future<Response> queueRequest(std::string &&input,
|
||||||
|
ResponseOptions responseOptions);
|
||||||
|
|
||||||
|
/// Dispatch call to translate after inserting in queue
|
||||||
|
void dispatchTranslate();
|
||||||
|
|
||||||
/// Build numTranslators number of translators with options from options
|
/// Build numTranslators number of translators with options from options
|
||||||
void build_translators(Ptr<Options> options, size_t numTranslators);
|
void build_translators(Ptr<Options> options, size_t numTranslators);
|
||||||
/// Initializes a blocking translator without using std::thread
|
/// Initializes a blocking translator without using std::thread
|
||||||
@ -83,16 +167,21 @@ private:
|
|||||||
void async_translate();
|
void async_translate();
|
||||||
|
|
||||||
/// Number of workers to launch.
|
/// Number of workers to launch.
|
||||||
size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_)
|
size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_)
|
||||||
|
|
||||||
|
/// Options object holding the options Service was instantiated with.
|
||||||
|
Ptr<Options> options_;
|
||||||
|
|
||||||
/// Model memory to load model passed as bytes.
|
/// Model memory to load model passed as bytes.
|
||||||
AlignedMemory modelMemory_; // ORDER DEPENDENCY (translators_)
|
AlignedMemory modelMemory_; // ORDER DEPENDENCY (translators_)
|
||||||
/// Shortlist memory passed as bytes.
|
/// Shortlist memory passed as bytes.
|
||||||
AlignedMemory shortlistMemory_; // ORDER DEPENDENCY (translators_)
|
AlignedMemory shortlistMemory_; // ORDER DEPENDENCY (translators_)
|
||||||
|
|
||||||
/// Holds instances of batch translators, just one in case
|
/// Holds instances of batch translators, just one in case
|
||||||
/// of single-threaded application, numWorkers_ in case of multithreaded
|
/// of single-threaded application, numWorkers_ in case of multithreaded
|
||||||
/// setting.
|
/// setting.
|
||||||
std::vector<BatchTranslator> translators_; // ORDER DEPENDENCY (modelMemory_, shortlistMemory_)
|
std::vector<BatchTranslator>
|
||||||
|
translators_; // ORDER DEPENDENCY (modelMemory_, shortlistMemory_)
|
||||||
|
|
||||||
/// Stores requestId of active request. Used to establish
|
/// Stores requestId of active request. Used to establish
|
||||||
/// ordering among requests and logging/book-keeping.
|
/// ordering among requests and logging/book-keeping.
|
||||||
|
@ -23,6 +23,7 @@ endif()
|
|||||||
set_target_properties(bergamot-translator-worker PROPERTIES
|
set_target_properties(bergamot-translator-worker PROPERTIES
|
||||||
SUFFIX ".js"
|
SUFFIX ".js"
|
||||||
LINK_FLAGS ${LINKER_FLAGS}
|
LINK_FLAGS ${LINKER_FLAGS}
|
||||||
)
|
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||||
|
)
|
||||||
|
|
||||||
target_link_libraries(bergamot-translator-worker bergamot-translator)
|
target_link_libraries(bergamot-translator-worker bergamot-translator)
|
||||||
|
@ -1,17 +1,53 @@
|
|||||||
## Using Bergamot Translator in JavaScript
|
## Using Bergamot Translator in JavaScript
|
||||||
The example file `bergamot.html` in the folder `test_page` demonstrates how to use the bergamot translator in JavaScript via a `<script>` tag.
|
The example file `bergamot.html` in the folder `test_page` demonstrates how to use the bergamot translator in JavaScript via a `<script>` tag.
|
||||||
|
|
||||||
Please note that everything below assumes that the [bergamot project specific model files](https://github.com/mozilla-applied-ml/bergamot-models) were packaged in wasm binary (using the compile instructions given in the top level README).
|
### <a name="Pre-requisite"></a> Pre-requisite: Download files required for translation
|
||||||
|
|
||||||
### Using JS APIs
|
Please note that [Using JS APIs](#Using-JS-APIs) and [Demo](#Demo) section below assumes that the [bergamot project specific model files](https://github.com/mozilla-applied-ml/bergamot-models) are already downloaded and present in the `test_page` folder. If this is not done then use following instructions to do so:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd test_page
|
||||||
|
mkdir models
|
||||||
|
git clone --depth 1 --branch main --single-branch https://github.com/mozilla-applied-ml/bergamot-models
|
||||||
|
cp -rf bergamot-models/prod/* models
|
||||||
|
gunzip models/*/*
|
||||||
|
```
|
||||||
|
|
||||||
|
### <a name="Using-JS-APIs"></a> Using JS APIs
|
||||||
|
|
||||||
```js
|
```js
|
||||||
// The model configuration as YAML formatted string. For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/
|
// The model configuration as YAML formatted string. For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/
|
||||||
// This example captures the most relevant options: model file, vocabulary files and shortlist file
|
// This example captures some of the most relevant options
|
||||||
const modelConfig = "{\"models\":[\"/esen/model.esen.npz\"],\"vocabs\":[\"/esen/vocab.esen.spm\",\"/esen/vocab.esen.spm\"],\"shortlist\":[\"/esen/lex.esen.s2t\"],\"beam-size\":1}";
|
const modelConfig = `vocabs:
|
||||||
|
- /esen/vocab.esen.spm
|
||||||
|
- /esen/vocab.esen.spm
|
||||||
|
beam-size: 1
|
||||||
|
normalize: 1.0
|
||||||
|
word-penalty: 0
|
||||||
|
max-length-break: 128
|
||||||
|
mini-batch-words: 1024
|
||||||
|
workspace: 128
|
||||||
|
max-length-factor: 2.0
|
||||||
|
skip-cost: true
|
||||||
|
cpu-threads: 0
|
||||||
|
quiet: true
|
||||||
|
quiet-translation: true
|
||||||
|
gemm-precision: int8shift
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Download model and shortlist files and read them into buffers
|
||||||
|
const modelFile = `models/esen/model.esen.intgemm.alphas.bin`;
|
||||||
|
const shortlistFile = `models/esen/lex.50.50.esen.s2t.bin`;
|
||||||
|
const downloadedBuffers = await Promise.all([downloadAsArrayBuffer(modelFile), downloadAsArrayBuffer(shortlistFile)]); // Please refer to bergamot.html in test_page folder for this function
|
||||||
|
const modelBuffer = downloadedBuffers[0];
|
||||||
|
const shortListBuffer = downloadedBuffers[1];
|
||||||
|
|
||||||
|
// Construct AlignedMemory instances from the buffers
|
||||||
|
var alignedModelMemory = constructAlignedMemoryFromBuffer(modelBuffer, 256); // Please refer to bergamot.html in test_page folder for this function
|
||||||
|
var alignedShortlistMemory = constructAlignedMemoryFromBuffer(shortListBuffer, 64); // Please refer to bergamot.html in test_page folder for this function
|
||||||
|
|
||||||
// Instantiate the TranslationModel
|
// Instantiate the TranslationModel
|
||||||
const model = new Module.TranslationModel(modelConfig);
|
const model = new Module.TranslationModel(modelConfig, alignedModelMemory, alignedShortlistMemory);
|
||||||
|
|
||||||
// Instantiate the arguments of translate() API i.e. TranslationRequest and input (vector<string>)
|
// Instantiate the arguments of translate() API i.e. TranslationRequest and input (vector<string>)
|
||||||
const request = new Module.TranslationRequest();
|
const request = new Module.TranslationRequest();
|
||||||
@ -34,13 +70,18 @@ request.delete();
|
|||||||
input.delete();
|
input.delete();
|
||||||
```
|
```
|
||||||
|
|
||||||
### Demo (see everything in action)
|
### <a name="Demo"></a> Demo (see everything in action)
|
||||||
|
|
||||||
|
* Make sure that you followed [Pre-requisite](#Pre-requisite) instructions before moving forward.
|
||||||
|
|
||||||
* Start the test webserver (ensure you have the latest nodejs installed)
|
* Start the test webserver (ensure you have the latest nodejs installed)
|
||||||
```bash
|
```bash
|
||||||
cd test_page
|
cd test_page
|
||||||
bash start_server.sh
|
bash start_server.sh ../../build-wasm
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Provide the folder containing the wasm artifacts as the first argument of `start_server.sh` script (`../../build-wasm` in this case).
|
||||||
|
|
||||||
* Open any of the browsers below
|
* Open any of the browsers below
|
||||||
* Firefox Nightly +87: make sure the following prefs are on (about:config)
|
* Firefox Nightly +87: make sure the following prefs are on (about:config)
|
||||||
```
|
```
|
||||||
|
@ -6,17 +6,40 @@
|
|||||||
|
|
||||||
#include <emscripten/bind.h>
|
#include <emscripten/bind.h>
|
||||||
|
|
||||||
#include "TranslationModel.h"
|
#include "response.h"
|
||||||
|
#include "service.h"
|
||||||
|
|
||||||
using namespace emscripten;
|
using namespace emscripten;
|
||||||
|
|
||||||
// Binding code
|
typedef marian::bergamot::Service TranslationModel;
|
||||||
|
typedef marian::bergamot::Response TranslationResult;
|
||||||
|
|
||||||
|
val getByteArrayView(marian::bergamot::AlignedMemory& alignedMemory) {
|
||||||
|
return val(typed_memory_view(alignedMemory.size(), alignedMemory.as<char>()));
|
||||||
|
}
|
||||||
|
|
||||||
|
EMSCRIPTEN_BINDINGS(aligned_memory) {
|
||||||
|
class_<marian::bergamot::AlignedMemory>("AlignedMemory")
|
||||||
|
.constructor<std::size_t, std::size_t>()
|
||||||
|
.function("size", &marian::bergamot::AlignedMemory::size)
|
||||||
|
.function("getByteArrayView", &getByteArrayView)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
TranslationModel* TranslationModelFactory(const std::string &config,
|
||||||
|
marian::bergamot::AlignedMemory* modelMemory,
|
||||||
|
marian::bergamot::AlignedMemory* shortlistMemory) {
|
||||||
|
return new TranslationModel(config, std::move(*modelMemory), std::move(*shortlistMemory));
|
||||||
|
}
|
||||||
|
|
||||||
EMSCRIPTEN_BINDINGS(translation_model) {
|
EMSCRIPTEN_BINDINGS(translation_model) {
|
||||||
class_<TranslationModel>("TranslationModel")
|
class_<TranslationModel>("TranslationModel")
|
||||||
.constructor<std::string>()
|
.constructor(&TranslationModelFactory, allow_raw_pointers())
|
||||||
.function("translate", &TranslationModel::translate)
|
.function("translate", &TranslationModel::translateMultiple)
|
||||||
.function("isAlignmentSupported", &TranslationModel::isAlignmentSupported)
|
.function("isAlignmentSupported", &TranslationModel::isAlignmentSupported)
|
||||||
;
|
;
|
||||||
|
// ^ We redirect Service::translateMultiple to WASMBound::translate instead. Sane API is
|
||||||
|
// translate. If and when async comes, we can be done with this inconsistency.
|
||||||
|
|
||||||
register_vector<std::string>("VectorString");
|
register_vector<std::string>("VectorString");
|
||||||
register_vector<TranslationResult>("VectorTranslationResult");
|
register_vector<TranslationResult>("VectorTranslationResult");
|
||||||
|
@ -6,15 +6,16 @@
|
|||||||
#include <emscripten/bind.h>
|
#include <emscripten/bind.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "TranslationResult.h"
|
#include "response.h"
|
||||||
|
|
||||||
|
typedef marian::bergamot::Response TranslationResult;
|
||||||
|
|
||||||
using namespace emscripten;
|
using namespace emscripten;
|
||||||
|
|
||||||
// Binding code
|
// Binding code
|
||||||
EMSCRIPTEN_BINDINGS(translation_result) {
|
EMSCRIPTEN_BINDINGS(translation_result) {
|
||||||
class_<TranslationResult>("TranslationResult")
|
class_<TranslationResult>("TranslationResult")
|
||||||
.constructor<std::string, std::string, TranslationResult::SentenceMappings>()
|
.constructor<>()
|
||||||
.function("getOriginalText", &TranslationResult::getOriginalText)
|
.function("getOriginalText", &TranslationResult::getOriginalText)
|
||||||
.function("getTranslatedText", &TranslationResult::getTranslatedText)
|
.function("getTranslatedText", &TranslationResult::getTranslatedText);
|
||||||
;
|
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,36 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
usage="Patch wasm artifacts to enable wormhole via APIs that compile and instantiate wasm module.
|
||||||
|
|
||||||
echo "Patching wasm artifacts to enable wormhole via APIs that compile and instantiate wasm module"
|
Usage: $(basename "$0") [WASM_ARTIFACTS_FOLDER]
|
||||||
sed -i.bak 's/var result = WebAssembly.instantiateStreaming(response, info);/var result = WebAssembly.instantiateStreaming(response, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
|
|
||||||
sed -i.bak 's/return WebAssembly.instantiate(binary, info);/return WebAssembly.instantiate(binary, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
|
where:
|
||||||
sed -i.bak 's/var module = new WebAssembly.Module(bytes);/var module = new WebAssembly.Module(bytes, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
|
WASM_ARTIFACTS_FOLDER Folder containing wasm artifacts
|
||||||
|
(An optional argument, if unspecified the default is: current folder)"
|
||||||
|
|
||||||
|
if [ "$#" -gt 1 ]; then
|
||||||
|
echo "Illegal number of parameters passed"
|
||||||
|
echo "$usage"
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Parse wasm artifacts folder if provided via script argument or set it to default
|
||||||
|
WASM_ARTIFACTS_FOLDER=$PWD
|
||||||
|
if [ "$#" -eq 1 ]; then
|
||||||
|
if [ ! -e "$1" ]; then
|
||||||
|
echo "Error: Folder \""$1"\" doesn't exist"
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
WASM_ARTIFACTS_FOLDER="$1"
|
||||||
|
fi
|
||||||
|
|
||||||
|
WASM_ARTIFACTS="$WASM_ARTIFACTS_FOLDER/bergamot-translator-worker.js"
|
||||||
|
if [ ! -e "$WASM_ARTIFACTS" ]; then
|
||||||
|
echo "Error: Artifact \"$WASM_ARTIFACTS\" doesn't exist"
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Patching \"$WASM_ARTIFACTS\" to enable wormhole via APIs that compile and instantiate wasm module"
|
||||||
|
sed -i.bak 's/WebAssembly.instantiateStreaming[[:space:]]*([[:space:]]*response[[:space:]]*,[[:space:]]*info[[:space:]]*)/WebAssembly.instantiateStreaming(response, info, {simdWormhole:true})/g' $WASM_ARTIFACTS
|
||||||
|
sed -i.bak 's/WebAssembly.instantiate[[:space:]]*([[:space:]]*binary[[:space:]]*,[[:space:]]*info[[:space:]]*)/WebAssembly.instantiate(binary, info, {simdWormhole:true})/g' $WASM_ARTIFACTS
|
||||||
|
sed -i.bak 's/WebAssembly.Module[[:space:]]*([[:space:]]*bytes[[:space:]]*)/WebAssembly.Module(bytes, {simdWormhole:true})/g' $WASM_ARTIFACTS
|
||||||
echo "Done"
|
echo "Done"
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<link rel="icon" href="data:,">
|
<link rel="icon" href="data:,">
|
||||||
<meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1">
|
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
|
||||||
</head>
|
</head>
|
||||||
<style>
|
<style>
|
||||||
body, html, div {
|
body, html, div {
|
||||||
@ -61,9 +61,27 @@ En consecuencia, durante el año 2011 se introdujeron 180 proyectos de ley que r
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
|
// This function downloads file from a url and returns the array buffer
|
||||||
|
const downloadAsArrayBuffer = async(url) => {
|
||||||
|
const response = await fetch(url);
|
||||||
|
if (!response.ok) {
|
||||||
|
throw Error(`Downloading ${url} failed: HTTP ${response.status} - ${response.statusText}`);
|
||||||
|
}
|
||||||
|
return response.arrayBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
var model, request, input = undefined;
|
// This function constructs the AlignedMemory from the array buffer and the alignment size
|
||||||
const loadModel = (from, to) => {
|
function constructAlignedMemoryFromBuffer(buffer, alignmentSize) {
|
||||||
|
var byteArray = new Int8Array(buffer);
|
||||||
|
console.debug("byteArray size: ", byteArray.byteLength);
|
||||||
|
var alignedMemory = new Module.AlignedMemory(byteArray.byteLength, alignmentSize);
|
||||||
|
const alignedByteArrayView = alignedMemory.getByteArrayView();
|
||||||
|
alignedByteArrayView.set(byteArray);
|
||||||
|
return alignedMemory;
|
||||||
|
}
|
||||||
|
|
||||||
|
var translationModel, request, input = undefined;
|
||||||
|
const constructTranslationModel = async (from, to) => {
|
||||||
|
|
||||||
const languagePair = `${from}${to}`;
|
const languagePair = `${from}${to}`;
|
||||||
|
|
||||||
@ -72,11 +90,11 @@ En consecuencia, durante el año 2011 se introdujeron 180 proyectos de ley que r
|
|||||||
|
|
||||||
// Set the Model Configuration as YAML formatted string.
|
// Set the Model Configuration as YAML formatted string.
|
||||||
// For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/
|
// For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/
|
||||||
const modelConfig = `models:
|
/*const modelConfig = `models:
|
||||||
- /${languagePair}/model.${languagePair}.intgemm.alphas.bin
|
- /${languagePair}/model.${languagePair}.intgemm.alphas.bin
|
||||||
vocabs:
|
vocabs:
|
||||||
- /${vocabLanguagePair}/vocab.${vocabLanguagePair}.spm
|
- /${languagePair}/vocab.${vocabLanguagePair}.spm
|
||||||
- /${vocabLanguagePair}/vocab.${vocabLanguagePair}.spm
|
- /${languagePair}/vocab.${vocabLanguagePair}.spm
|
||||||
beam-size: 1
|
beam-size: 1
|
||||||
normalize: 1.0
|
normalize: 1.0
|
||||||
word-penalty: 0
|
word-penalty: 0
|
||||||
@ -93,22 +111,54 @@ shortlist:
|
|||||||
- 50
|
- 50
|
||||||
- 50
|
- 50
|
||||||
`;
|
`;
|
||||||
/*
|
|
||||||
This config is not valid anymore in new APIs
|
|
||||||
mini-batch: 32
|
|
||||||
maxi-batch: 100
|
|
||||||
maxi-batch-sort: src
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
const modelConfigWithoutModelAndShortList = `vocabs:
|
||||||
|
- /${languagePair}/vocab.${vocabLanguagePair}.spm
|
||||||
|
- /${languagePair}/vocab.${vocabLanguagePair}.spm
|
||||||
|
beam-size: 1
|
||||||
|
normalize: 1.0
|
||||||
|
word-penalty: 0
|
||||||
|
max-length-break: 128
|
||||||
|
mini-batch-words: 1024
|
||||||
|
workspace: 128
|
||||||
|
max-length-factor: 2.0
|
||||||
|
skip-cost: true
|
||||||
|
cpu-threads: 0
|
||||||
|
quiet: true
|
||||||
|
quiet-translation: true
|
||||||
|
gemm-precision: int8shift
|
||||||
|
`;
|
||||||
|
|
||||||
// TODO: Use in model config when wormhole is enabled:
|
// TODO: Use in model config when wormhole is enabled:
|
||||||
// gemm-precision: int8shift
|
// gemm-precision: int8shift
|
||||||
// TODO: Use in model config when loading of binary models is supported and we use model.intgemm.alphas.bin:
|
// TODO: Use in model config when loading of binary models is supported and we use model.intgemm.alphas.bin:
|
||||||
// gemm-precision: int8shiftAlphaAll
|
// gemm-precision: int8shiftAlphaAll
|
||||||
|
|
||||||
console.debug("modelConfig: ", modelConfig);
|
const modelFile = `models/${languagePair}/model.${languagePair}.intgemm.alphas.bin`;
|
||||||
|
console.debug("modelFile: ", modelFile);
|
||||||
|
const shortlistFile = `models/${languagePair}/lex.50.50.${languagePair}.s2t.bin`;
|
||||||
|
console.debug("shortlistFile: ", shortlistFile);
|
||||||
|
|
||||||
// Instantiate the TranslationModel
|
try {
|
||||||
if (model) model.delete();
|
// Download the files as buffers from the given urls
|
||||||
model = new Module.TranslationModel(modelConfig);
|
let start = Date.now();
|
||||||
|
const downloadedBuffers = await Promise.all([downloadAsArrayBuffer(modelFile), downloadAsArrayBuffer(shortlistFile)]);
|
||||||
|
const modelBuffer = downloadedBuffers[0];
|
||||||
|
const shortListBuffer = downloadedBuffers[1];
|
||||||
|
log(`${languagePair} file download took ${(Date.now() - start) / 1000} secs`);
|
||||||
|
|
||||||
|
// Construct AlignedMemory objects with downloaded buffers
|
||||||
|
var alignedModelMemory = constructAlignedMemoryFromBuffer(modelBuffer, 256);
|
||||||
|
var alignedShortlistMemory = constructAlignedMemoryFromBuffer(shortListBuffer, 64);
|
||||||
|
|
||||||
|
// Instantiate the TranslationModel
|
||||||
|
if (translationModel) translationModel.delete();
|
||||||
|
console.debug("Creating TranslationModel with config:", modelConfigWithoutModelAndShortList);
|
||||||
|
translationModel = new Module.TranslationModel(modelConfigWithoutModelAndShortList, alignedModelMemory, alignedShortlistMemory);
|
||||||
|
} catch (error) {
|
||||||
|
log(error);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const translate = (paragraphs) => {
|
const translate = (paragraphs) => {
|
||||||
@ -127,16 +177,9 @@ maxi-batch-sort: src
|
|||||||
})
|
})
|
||||||
// Access input (just for debugging)
|
// Access input (just for debugging)
|
||||||
console.log('Input size=', input.size());
|
console.log('Input size=', input.size());
|
||||||
/*
|
|
||||||
for (let i = 0; i < input.size(); i++) {
|
|
||||||
console.log(' val:' + input.get(i));
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Translate the input; the result is a vector<TranslationResult>
|
// Translate the input; the result is a vector<TranslationResult>
|
||||||
let result = model.translate(input, request);
|
let result = translationModel.translate(input, request);
|
||||||
// Access original and translated text from each entry of vector<TranslationResult>
|
|
||||||
//console.log('Result size=', result.size(), ' - TimeDiff - ', (Date.now() - start)/1000);
|
|
||||||
const translatedParagraphs = [];
|
const translatedParagraphs = [];
|
||||||
for (let i = 0; i < result.size(); i++) {
|
for (let i = 0; i < result.size(); i++) {
|
||||||
translatedParagraphs.push(result.get(i).getTranslatedText());
|
translatedParagraphs.push(result.get(i).getTranslatedText());
|
||||||
@ -147,14 +190,16 @@ maxi-batch-sort: src
|
|||||||
return translatedParagraphs;
|
return translatedParagraphs;
|
||||||
}
|
}
|
||||||
|
|
||||||
document.querySelector("#load").addEventListener("click", () => {
|
document.querySelector("#load").addEventListener("click", async() => {
|
||||||
|
document.querySelector("#load").disabled = true;
|
||||||
const lang = document.querySelector('input[name="modellang"]:checked').value;
|
const lang = document.querySelector('input[name="modellang"]:checked').value;
|
||||||
const from = lang.substring(0, 2);
|
const from = lang.substring(0, 2);
|
||||||
const to = lang.substring(2, 4);
|
const to = lang.substring(2, 4);
|
||||||
let start = Date.now();
|
let start = Date.now();
|
||||||
loadModel(from, to)
|
await constructTranslationModel(from, to);
|
||||||
log(`model ${from}${to} loaded in ${(Date.now() - start) / 1000} secs`);
|
log(`translation model ${from}${to} construction took ${(Date.now() - start) / 1000} secs`);
|
||||||
//log('Model Alignment:', model.isAlignmentSupported());
|
document.querySelector("#load").disabled = false;
|
||||||
|
//log('Model Alignment:', translationModel.isAlignmentSupported());
|
||||||
});
|
});
|
||||||
|
|
||||||
const translateCall = () => {
|
const translateCall = () => {
|
||||||
|
@ -1,9 +1,30 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
echo "Start: Copying artifacts in local folder------"
|
|
||||||
cp ../../build-wasm/wasm/bergamot-translator-worker.data .
|
usage="Copy wasm artifacts from build directory and start httpserver
|
||||||
cp ../../build-wasm/wasm/bergamot-translator-worker.js .
|
|
||||||
cp ../../build-wasm/wasm/bergamot-translator-worker.wasm .
|
Usage: $(basename "$0") [WASM_ARTIFACTS_FOLDER]
|
||||||
cp ../../build-wasm/wasm/bergamot-translator-worker.worker.js .
|
|
||||||
|
where:
|
||||||
|
WASM_ARTIFACTS_FOLDER Folder containing pre-built wasm artifacts"
|
||||||
|
|
||||||
|
if [ "$#" -ne 1 ]; then
|
||||||
|
echo "Illegal number of parameters passed"
|
||||||
|
echo "$usage"
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if WASM_ARTIFACTS_FOLDER is valid or not
|
||||||
|
if [ ! -e "$1" ]; then
|
||||||
|
echo "Error: Folder \""$1"\" doesn't exist"
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
WASM_ARTIFACTS="$1/bergamot-translator-worker.*"
|
||||||
|
for i in $WASM_ARTIFACTS; do
|
||||||
|
[ -f "$i" ] || breaks
|
||||||
|
cp $i .
|
||||||
|
echo "Copied \"$i\""
|
||||||
|
done
|
||||||
|
|
||||||
npm install
|
npm install
|
||||||
echo "Start httpserver"
|
echo "Start httpserver"
|
||||||
|
Loading…
Reference in New Issue
Block a user