diff --git a/.github/workflows/native-custom_marian-mac.yml b/.github/workflows/native-custom_marian-mac.yml
deleted file mode 100644
index 1aae7e5..0000000
--- a/.github/workflows/native-custom_marian-mac.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-name: Native (Custom Marian) MacOS
-
-on:
-  push:
-    branches: [ main, ci-sandbox ]
-  pull_request:
-    branches: [ main, ci-sandbox ]
-
-jobs:
-  build-macos:
-    name: MacOS
-    runs-on: macos-10.15
-
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v2
-      with:
-        submodules: recursive
-
-    - name: Configure CMake
-      run: |
-        mkdir -p build
-        cd build
-        cmake ..
-
-    - name: Compile
-      working-directory: build
-      run: make -j2
-
-    - name: Print versions
-      working-directory: build
-      run: |
-        ./app/bergamot-translator-app --version
diff --git a/.github/workflows/native-custom_marian-ubuntu.yml b/.github/workflows/native-custom_marian-ubuntu.yml
deleted file mode 100644
index f051871..0000000
--- a/.github/workflows/native-custom_marian-ubuntu.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-name: Native (Custom Marian) Ubuntu
-
-on:
-  push:
-    branches: [ main, ci-sandbox ]
-  pull_request:
-    branches: [ main, ci-sandbox ]
-
-jobs:
-  build-macos:
-    name: Ubuntu
-    runs-on: ubuntu-latest
-
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v2
-      with:
-        submodules: recursive
-
-    - name: Configure CMake
-      run: |
-        mkdir -p build
-        cd build
-        cmake ..
-
-    - name: Compile
-      working-directory: build
-      run: make -j2
-
-    - name: Print versions
-      working-directory: build
-      run: |
-        ./app/bergamot-translator-app --version
diff --git a/.github/workflows/native-full_marian-mac.yml b/.github/workflows/native-full_marian-mac.yml
deleted file mode 100644
index 1928c5c..0000000
--- a/.github/workflows/native-full_marian-mac.yml
+++ /dev/null
@@ -1,59 +0,0 @@
-name: Native (Full Marian) MacOS
-
-on:
-  push:
-    branches: [ main, ci-sandbox ]
-  pull_request:
-    branches: [ main, ci-sandbox ]
-
-jobs:
-  build-macos:
-    name: MacOS CPU-only
-    runs-on: macos-10.15
-
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v2
-      with:
-        submodules: recursive
-
-    - name: Install dependencies
-      run: brew install openblas protobuf
-
-    # Openblas location is exported explicitly because openblas is keg-only,
-    # which means it was not symlinked into /usr/local/.
-    # CMake cannot find BLAS on GitHub runners if Marian is being compiled
-    # statically, hence USE_STATIC_LIBS=off
-    - name: Configure CMake
-      run: |
-        export LDFLAGS="-L/usr/local/opt/openblas/lib"
-        export CPPFLAGS="-I/usr/local/opt/openblas/include"
-        mkdir -p build
-        cd build
-        cmake .. \
-          -DCOMPILE_CPU=on \
-          -DCOMPILE_CUDA=off \
-          -DCOMPILE_EXAMPLES=on \
-          -DCOMPILE_SERVER=on \
-          -DCOMPILE_TESTS=on \
-          -DUSE_FBGEMM=on \
-          -DUSE_SENTENCEPIECE=on \
-          -DUSE_STATIC_LIBS=off \
-          -DUSE_WASM_COMPATIBLE_SOURCE=off
-
-    - name: Compile
-      working-directory: build
-      run: make -j2
-
-    - name: Run unit tests
-      working-directory: build
-      run: make test
-
-    - name: Print versions
-      working-directory: build
-      run: |
-        ./marian --version
-        ./marian-decoder --version
-        ./marian-scorer --version
-        ./spm_encode --version
-
diff --git a/.github/workflows/native-full_marian-ubuntu.yml b/.github/workflows/native-full_marian-ubuntu.yml
deleted file mode 100644
index e414f64..0000000
--- a/.github/workflows/native-full_marian-ubuntu.yml
+++ /dev/null
@@ -1,120 +0,0 @@
-name: Native (Full Marian) Ubuntu
-
-on:
-  push:
-    branches: [ main, ci-test ]
-  pull_request:
-    branches: [ main, ci-test ]
-
-jobs:
-  build-ubuntu:
-    strategy:
-      matrix:
-        include:
-          # Ubuntu CPU-only build
-          - name: "Ubuntu CPU-only"
-            os: ubuntu-latest
-            cuda: ""
-            gcc: 8
-            cpu: true
-            gpu: false
-          # GPU Builds are commented out, for bergamot-translator CI runs.
-          # Ubuntu GPU-only build
-          # - name: "Ubuntu GPU-only"
-          #   os: ubuntu-latest
-          #   cuda: "10.2"
-          #   gcc: 7
-          #   cpu: false
-          #   gpu: true
-          # Ubuntu 20.04 supports CUDA 11+
-          #- name: "Ubuntu 20.04 CUDA 11.0 gcc-9"
-            #os: ubuntu-20.04
-            #cuda: "11.0"
-            #gcc: 9
-            #cpu: false
-            #gpu: true
-          # Ubuntu 18.04 supports CUDA 10.1+
-          # - name: "Ubuntu 18.04 CUDA 10.2 gcc-8"
-          #   os: ubuntu-18.04
-          #   cuda: "10.2"
-          #   gcc: 8
-          #   cpu: true
-          #   gpu: true
-          # Ubuntu 16.04 supports CUDA 8+
-          # - name: "Ubuntu 16.04 CUDA 9.2 gcc-7"
-          #   os: ubuntu-16.04
-          #   cuda: "9.2"
-          #   gcc: 7
-          #   cpu: true
-          #   gpu: true
-
-    runs-on: ${{ matrix.os }}
-    name: ${{ matrix.name }}
-
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v2
-      with:
-        submodules: recursive
-
-    # The following packages are already installed on GitHub-hosted runners:
-    # build-essential openssl libssl-dev
-    # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because
-    # it is installed together with libprotobuf-dev
-    - name: Install dependencies
-      run: sudo apt-get update && sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler libboost-all-dev g++-8
-
-    # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
-    - name: Install MKL
-      run: |
-        wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add -
-        sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list"
-        sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list"
-        sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088
-      if: matrix.cpu == true
-
-    # The script simplifies installation of different versions of CUDA
-    - name: Install CUDA
-      run: ./3rd_party/marian-dev/scripts/ci/install_cuda_ubuntu.sh ${{ matrix.cuda }}
-      if: matrix.gpu == true
-
-    # Boost is installed on GitHub-hosted runners in a non-standard location
-    # https://github.com/actions/virtual-environments/issues/687#issuecomment-610471671
-    - name: Configure CMake
-      run: |
-        mkdir -p build
-        cd build
-        CC=/usr/bin/gcc-${{ matrix.gcc }} CXX=/usr/bin/g++-${{ matrix.gcc }} CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }} \
-        cmake .. \
-          -DBoost_ARCHITECTURE=-x64 \
-          -DCMAKE_BUILD_TYPE=Release \
-          -DCOMPILE_CPU=${{ matrix.cpu }} \
-          -DCOMPILE_CUDA=${{ matrix.gpu }} \
-          -DCOMPILE_EXAMPLES=on \
-          -DCOMPILE_SERVER=on \
-          -DCOMPILE_TESTS=on \
-          -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-${{ matrix.cuda }} \
-          -DUSE_FBGEMM=${{ matrix.cpu }} \
-          -DUSE_SENTENCEPIECE=on \
-          -DUSE_STATIC_LIBS=on \
-          -DUSE_WASM_COMPATIBLE_SOURCE=off
-
-    - name: Compile
-      working-directory: build
-      run: make -j2
-
-    - name: Run unit tests
-      working-directory: build
-      run: make test
-      # GitHub-hosted VMs do not have GPUs, so can not be run in CUDA builds
-      if: matrix.gpu == false
-
-    - name: Print versions
-      working-directory: build
-      run: |
-        ./marian --version
-        ./marian-decoder --version
-        ./marian-scorer --version
-        ./marian-server --version
-        ./spm_encode --version
-
diff --git a/.github/workflows/native-mac.yml b/.github/workflows/native-mac.yml
new file mode 100644
index 0000000..8df203d
--- /dev/null
+++ b/.github/workflows/native-mac.yml
@@ -0,0 +1,108 @@
+name: Native MacOS
+
+on:
+  push:
+    branches: [ main, ci-sandbox ]
+  pull_request:
+    branches: [ main, ci-sandbox ]
+
+jobs:
+  build-macos:
+    strategy: 
+      fail-fast: false
+      matrix:
+        include:
+          - name: "full-marian"
+            os: macos-10.15
+            test_tags: ""
+            cmake: 
+              CMAKE_BUILD_TYPE: "Release"
+              COMPILE_TESTS: "ON"
+              USE_WASM_COMPATIBLE_SOURCE: "OFF"
+              USE_FBGEMM: "OFF"
+              USE_STATIC_LIBS: "OFF"
+              COMPILE_SERVER: "OFF"
+              COMPILE_EXAMPLES: "OFF"
+
+          - name: "minimal-marian"
+            os: macos-10.15
+            test_tags: "'#wasm'"
+            cmake: 
+              CMAKE_BUILD_TYPE: "Release"
+              COMPILE_TESTS: "OFF" # Minimal marian has no sqlite support and compile tests fail
+              USE_WASM_COMPATIBLE_SOURCE: "ON"
+              USE_FBGEMM: "OFF"
+              # explicitly set due to requirement of minimal marian being used
+              # within WASM. This is some yaml ugliness, but issok.
+              USE_STATIC_LIBS: "ON" 
+              COMPILE_SERVER: "OFF"
+              COMPILE_EXAMPLES: "OFF"
+        
+    name: ${{ matrix.name }}
+    runs-on: ${{ matrix.os }}
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v2
+      with:
+        submodules: recursive
+
+    - name: Install dependencies
+      run: |
+          brew update
+          brew install openblas protobuf coreutils
+
+    # Openblas location is exported explicitly because openblas is keg-only,
+    # which means it was not symlinked into /usr/local/.
+    - name: Set BLAS Environment variables
+      run: |
+          echo "LDFLAGS=-L/usr/local/opt/openblas/lib" >> $GITHUB_ENV
+          echo "CPPFLAGS=-I/usr/local/opt/openblas/include" >> $GITHUB_ENV
+      if: matrix.cmake.USE_WASM_COMPATIBLE_SOURCE == 'OFF'
+
+    # CMake cannot find BLAS on GitHub runners if Marian is being compiled
+    # statically, hence USE_STATIC_LIBS=off
+    - name: Configure CMake
+      run: |
+        mkdir -p build
+        cd build
+        cmake .. \
+          -DCMAKE_BUILD_TYPE=${{ matrix.cmake.CMAKE_BUILD_TYPE }}\
+          -DCOMPILE_TESTS=${{ matrix.cmake.COMPILE_TESTS }}\
+          -DCOMPILE_EXAMPLES=${{ matrix.cmake.COMPILE_EXAMPLES }} \
+          -DCOMPILE_SERVER=${{ matrix.cmake.COMPILE_SERVER }} \
+          -DUSE_STATIC_LIBS=${{ matrix.cmake.USE_STATIC_LIBS }} \
+          -DUSE_WASM_COMPATIBLE_SOURCE=${{ matrix.cmake.USE_WASM_COMPATIBLE_SOURCE }} \
+          -DUSE_FBGEMM=${{ matrix.cmake.USE_FBGEMM }}
+
+    - name: Compile
+      working-directory: build
+      run: make -j2
+
+    - name: Run unit tests
+      working-directory: build
+      run: make test
+      if: matrix.cmake.COMPILE_TESTS == 'ON'
+
+    - name: Print versions
+      working-directory: build
+      run: |
+        ./app/bergamot-translator-app --version
+
+    - name: Install regression-test framework (BRT)
+      working-directory: bergamot-translator-tests
+      run : make install
+
+    - name: Run regression-tests (BRT)
+      working-directory: bergamot-translator-tests
+      run : MARIAN=../build ./run_brt.sh ${{ matrix.test_tags }}
+
+    - name: Upload regression-tests artifacts
+      uses: actions/upload-artifact@v2
+      if: ${{ always() }}
+      with: 
+        name: brt-artifacts-${{ matrix.name }}
+        path: |
+            bergamot-translator-tests/**/*.expected
+            bergamot-translator-tests/**/*.log
+            bergamot-translator-tests/**/*.out
diff --git a/.github/workflows/native-ubuntu.yml b/.github/workflows/native-ubuntu.yml
new file mode 100644
index 0000000..dc8016b
--- /dev/null
+++ b/.github/workflows/native-ubuntu.yml
@@ -0,0 +1,117 @@
+name: Native Ubuntu
+
+on:
+  push:
+    branches: [ main, ci-sandbox ]
+  pull_request:
+    branches: [ main, ci-sandbox ]
+
+jobs:
+  build-ubuntu:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: "full-marian"
+            os: ubuntu-latest
+            gcc: 8
+            cpu: 'ON'
+            gpu: 'OFF'
+            test_tags: ""
+            cmake: 
+              CMAKE_BUILD_TYPE: "Release"
+              COMPILE_TESTS: "ON"
+              USE_WASM_COMPATIBLE_SOURCE: "OFF"
+              COMPILE_SERVER: "OFF"
+              COMPILE_EXAMPLES: "OFF"
+
+          - name: "minimal-marian"
+            os: ubuntu-latest
+            gcc: 8
+            cpu: 'ON'
+            gpu: 'OFF'
+            test_tags: "'#wasm'"
+            cmake:
+              CMAKE_BUILD_TYPE: "Release"
+              COMPILE_TESTS: "OFF" # Minimal marian has no sqlite support and COMPILE_TEST=ON fails.
+              USE_WASM_COMPATIBLE_SOURCE: "ON"
+              COMPILE_SERVER: "OFF"
+              COMPILE_EXAMPLES: "OFF"
+
+
+    runs-on: ${{ matrix.os }}
+    name: ${{ matrix.name }}
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v2
+      with:
+        submodules: recursive
+
+    # The following packages are already installed on GitHub-hosted runners:
+    # build-essential openssl libssl-dev
+    # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because
+    # it is installed together with libprotobuf-dev
+    - name: Install dependencies
+      run: |
+        sudo apt-get update 
+        sudo apt-get install -y \
+            libgoogle-perftools-dev libprotobuf-dev protobuf-compiler  \
+            libboost-all-dev g++-${{ matrix.gcc }} 
+
+    # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
+    - name: Install MKL
+      run: |
+        wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add -
+        sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list"
+        sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list"
+        sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088
+      if: matrix.cmake.USE_WASM_COMPATIBLE_SOURCE == 'OFF'
+
+    # Boost is installed on GitHub-hosted runners in a non-standard location
+    # https://github.com/actions/virtual-environments/issues/687#issuecomment-610471671
+    - name: Configure CMake
+      run: |
+        mkdir -p build
+        cd build
+        CC=/usr/bin/gcc-${{ matrix.gcc }} CXX=/usr/bin/g++-${{ matrix.gcc }} CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }} \
+        cmake .. \
+          -DCMAKE_BUILD_TYPE=${{ matrix.cmake.CMAKE_BUILD_TYPE }}\
+          -DCOMPILE_TESTS=${{ matrix.cmake.COMPILE_TESTS }}\
+          -DCOMPILE_EXAMPLES=${{ matrix.cmake.COMPILE_EXAMPLES }} \
+          -DCOMPILE_SERVER=${{ matrix.cmake.COMPILE_SERVER }} \
+          -DUSE_WASM_COMPATIBLE_SOURCE=${{ matrix.cmake.USE_WASM_COMPATIBLE_SOURCE }} \
+
+    - name: Compile bergamot-translator
+      working-directory: build
+      run: make -j2
+
+    - name: Run unit tests
+      working-directory: build
+      run: make test
+      # GitHub-hosted VMs do not have GPUs, so can not be run in CUDA builds
+      if: matrix.gpu == 'OFF' && matrix.cmake.COMPILE_TESTS == 'ON'
+
+    - name: Print versions
+      working-directory: build
+      run: |
+        ./app/bergamot-translator-app --version
+
+
+    - name: Install regression-test framework (BRT)
+      working-directory: bergamot-translator-tests
+      run : make install
+
+    - name: Run regression-tests (BRT)
+      working-directory: bergamot-translator-tests
+      run : MARIAN=../build ./run_brt.sh ${{ matrix.test_tags }}
+
+    - name: Upload regression-tests artifacts
+      uses: actions/upload-artifact@v2
+      if: ${{ always() }}
+      with: 
+        name: brt-artifacts-${{ matrix.name }}
+        path: |
+            bergamot-translator-tests/**/*.expected
+            bergamot-translator-tests/**/*.log
+            bergamot-translator-tests/**/*.out
diff --git a/.github/workflows/wasm-custom_marian-mac.yml b/.github/workflows/wasm-custom_marian-mac.yml
index 87141c7..c275f3c 100644
--- a/.github/workflows/wasm-custom_marian-mac.yml
+++ b/.github/workflows/wasm-custom_marian-mac.yml
@@ -40,9 +40,8 @@ jobs:
       - name: Check artifacts
         working-directory: build-wasm
         run: |
-          export WASM_ARTIFACTS_DIR=wasm
-          ls -all ${WASM_ARTIFACTS_DIR}
-          if ls ${WASM_ARTIFACTS_DIR}/*.wasm &>/dev/null && ls ${WASM_ARTIFACTS_DIR}/*.js &>/dev/null
+          ls -all bergamot*
+          if ls bergamot*.wasm &>/dev/null && ls bergamot*.js &>/dev/null
           then
             echo "Artifacts Successfully Generated"
           else
diff --git a/.github/workflows/wasm-custom_marian-ubuntu.yml b/.github/workflows/wasm-custom_marian-ubuntu.yml
index d1364dc..4483546 100644
--- a/.github/workflows/wasm-custom_marian-ubuntu.yml
+++ b/.github/workflows/wasm-custom_marian-ubuntu.yml
@@ -2,9 +2,9 @@ name: WASM (Custom Marian) Ubuntu
 
 on:
   push:
-    branches: [ main ]
+    branches: [ main, ci-sandbox ]
   pull_request:
-    branches: [ main ]
+    branches: [ main, ci-sandbox ]
 
 jobs:
   build-wasm:
@@ -40,9 +40,8 @@ jobs:
       - name: Check artifacts
         working-directory: build-wasm
         run: |
-          export WASM_ARTIFACTS_DIR=wasm
-          ls -all ${WASM_ARTIFACTS_DIR}
-          if ls ${WASM_ARTIFACTS_DIR}/*.wasm &>/dev/null && ls ${WASM_ARTIFACTS_DIR}/*.js &>/dev/null
+          ls -all bergamot*
+          if ls bergamot*.wasm &>/dev/null && ls bergamot*.js &>/dev/null
           then
             echo "Artifacts Successfully Generated"
           else
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index fd1f21f..00e9cfa 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -17,12 +17,6 @@ jobs:
           # Windows CPU-only build
           - name: "Windows CPU-only"
             cuda: ""
-            gpu: false
-          # GPU Builds are commented out, for bergamot-translator CI runs.
-          # Windows CPU+GPU build
-          # - name: "Windows CPU+CUDA"
-          #   cuda: "10.2"
-          #   gpu: true
 
     runs-on: windows-2019
     name: ${{ matrix.name }}
@@ -42,89 +36,32 @@ jobs:
         echo "MKLROOT=${{ github.workspace }}\mkl" | Out-File -FilePath $env:GITHUB_ENV  -Encoding utf8 -Append
       shell: powershell
 
-    - name: Install CUDA
-      run: |
-        .\3rd_party\marian-dev\scripts\ci\install_cuda_windows.ps1 "10.2"
-        # Set CUDA_PATH environment variable so that CMake can find CUDA
-        echo "CUDA_PATH=$env:CUDA_PATH" | Out-File -FilePath $env:GITHUB_ENV  -Encoding utf8 -Append
-        echo "$env:CUDA_PATH/bin"       | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-      shell: powershell
-      if: matrix.gpu == true
-
     - name: Prepare vcpkg
-      uses: lukka/run-vcpkg@v4
+      uses: lukka/run-vcpkg@v7.3
       with:
-        vcpkgArguments: protobuf
-        vcpkgGitCommitId: 6185aa76504a5025f36754324abf307cc776f3da
+        vcpkgArguments: protobuf pcre2
+        vcpkgGitCommitId: 6185aa76504a5025f36754324abf307cc776f3da 
         vcpkgDirectory: ${{ github.workspace }}/vcpkg/
         vcpkgTriplet: x64-windows-static
 
-    # Windows CUDA builds use USE_NCCL=off due to compilation errors.
-    - name: Build Debug
+    # Windows CPU only minimal build
+    - name: Build Release # @TODO this is actually a debug build until the ninja generator gets fixed
       uses: lukka/run-cmake@v3
       with:
-        buildDirectory: ${{ github.workspace }}/build/Debug
+        buildDirectory: ${{ github.workspace }}/build
         cmakeAppendedArgs: '-G Ninja
-          -DCMAKE_BUILD_TYPE="Debug"
-          -DOPENSSL_USE_STATIC_LIBS="TRUE"
-          -DOPENSSL_MSVC_STATIC_RT="TRUE"
-          -DCOMPILE_CPU="TRUE"
-          -DCOMPILE_CUDA="${{ matrix.gpu }}"
-          -DCOMPILE_SERVER="FALSE"
-          -DCOMPILE_TESTS="TRUE"
-          -DUSE_FBGEMM="TRUE"
-          -DUSE_MPI="FALSE"
-          -DUSE_NCCL="FALSE"
-          -DUSE_SENTENCEPIECE="TRUE"
-          -DUSE_STATIC_LIBS="TRUE"'
-        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
-        cmakeListsTxtPath: ${{ github.workspace }}/CMakeLists.txt
-        useVcpkgToolchainFile: true
-      # Building in Debug is sufficient for the all-in CPU+GPU compilation;
-      # its main purpose is to detect warnings that the Release build is not
-      # able to find sometimes.
-      if: matrix.gpu == true
-
-    # Windows CUDA builds use USE_NCCL=off due to compilation errors
-    # Boost is pre-installed on Azure/GitHub-hosted Windows runners
-    # https://github.com/actions/virtual-environments/blob/main/images/win/Windows2019-Readme.md#boost
-    # (not used yet)
-    - name: Build Release
-      uses: lukka/run-cmake@v3
-      with:
-        buildDirectory: ${{ github.workspace }}/build/
-        cmakeAppendedArgs: '-G Ninja
-          -DBOOST_ROOT="$(BOOST_ROOT_1_72_0)"
-          -DBOOST_INCLUDEDIR="$(BOOST_ROOT_1_72_0)/include"
-          -DBOOST_LIBRARYDIR="$(BOOST_ROOT_1_72_0)/lib"
           -DCMAKE_BUILD_TYPE="Release"
-          -DOPENSSL_USE_STATIC_LIBS="TRUE"
-          -DOPENSSL_MSVC_STATIC_RT="TRUE"
-          -DCOMPILE_CPU="TRUE"
-          -DCOMPILE_CUDA="${{ matrix.gpu }}"
-          -DCOMPILE_SERVER="FALSE"
-          -DCOMPILE_TESTS="TRUE"
-          -DUSE_FBGEMM="TRUE"
-          -DUSE_MPI="FALSE"
-          -DUSE_NCCL="FALSE"
-          -DUSE_SENTENCEPIECE="TRUE"
+          -DUSE_WASM_COMPATIBLE_SOURCE="OFF"
           -DUSE_STATIC_LIBS="TRUE"'
         cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
         cmakeListsTxtPath: ${{ github.workspace }}/CMakeLists.txt
         useVcpkgToolchainFile: true
+        cmakeBuildType: Release
 
-    # Removing unit-tests, taken care of in browsermt/marian-dev
-    # - name: Run unit tests
-    #   working-directory: build/
-    #   run: ctest
-    #   # Not run in GPU builds because GitHub-hosted VMs do not have GPUs
-    #   if: matrix.gpu == false
 
     - name: Print versions
-      working-directory: build/
+      working-directory: build
       run: |
-        .\marian.exe --version
-        .\marian-decoder.exe --version
-        .\marian-scorer.exe --version
+        .\app\service-cli.exe --version
         dir *.exe
       shell: cmd
diff --git a/.gitmodules b/.gitmodules
index cc40735..8aa1014 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "3rd_party/ssplit-cpp"]
 	path = 3rd_party/ssplit-cpp
 	url = https://github.com/browsermt/ssplit-cpp
+[submodule "bergamot-translator-tests"]
+	path = bergamot-translator-tests
+	url = https://github.com/browsermt/bergamot-translator-tests
diff --git a/3rd_party/marian-dev b/3rd_party/marian-dev
index 0f0bcf9..94aeaa4 160000
--- a/3rd_party/marian-dev
+++ b/3rd_party/marian-dev
@@ -1 +1 @@
-Subproject commit 0f0bcf99626c660227bb68b76267a8d2451e7172
+Subproject commit 94aeaa4616a0fb01ac95a23f0e74a214a94e7609
diff --git a/3rd_party/ssplit-cpp b/3rd_party/ssplit-cpp
index dfefe34..8d338ed 160000
--- a/3rd_party/ssplit-cpp
+++ b/3rd_party/ssplit-cpp
@@ -1 +1 @@
-Subproject commit dfefe34218fe3aced70266994b6557f029fcbdde
+Subproject commit 8d338ed5c77d22f8c86f60554596fa57bf5091e6
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 412b386..3fe03c9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,6 +9,28 @@ project(bergamot_translator CXX C)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
+# Note that with CMake MSVC build, the option CMAKE_BUILD_TYPE is automatically derived from the key
+# 'configurationType' in CMakeSettings.json configurations
+if(NOT CMAKE_BUILD_TYPE)
+  message(WARNING "CMAKE_BUILD_TYPE not set; setting to Release")
+  set(CMAKE_BUILD_TYPE "Release")
+endif()
+#MSVC can't seem to pick up correct flags otherwise:
+if(MSVC)
+  add_definitions(-DUSE_SSE2=1) # Supposed to fix something in the sse_mathfun.h but not sure it does
+  set(INTRINSICS "/arch:AVX2") # ARCH we're targetting on win32. @TODO variable
+  
+  set(CMAKE_CXX_FLAGS           "/EHsc /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS /bigobj")
+  set(CMAKE_CXX_FLAGS_RELEASE   "${CMAKE_CXX_FLAGS} /MT /O2 ${INTRINSICS} /Zi /MP /GL /DNDEBUG")
+  set(CMAKE_CXX_FLAGS_DEBUG     "${CMAKE_CXX_FLAGS} /MTd /Od /Ob0 ${INTRINSICS} /RTC1 /Zi /D_DEBUG")
+
+  # ignores warning LNK4049: locally defined symbol free imported - this comes from zlib
+  set(CMAKE_EXE_LINKER_FLAGS         "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /LTCG:incremental /INCREMENTAL:NO /ignore:4049")
+  set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRT")
+  set(CMAKE_EXE_LINKER_FLAGS_DEBUG   "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRTD")
+  set(CMAKE_STATIC_LINKER_FLAGS      "${CMAKE_STATIC_LINKER_FLAGS} /LTCG:incremental")
+endif(MSVC)
+
 include(CMakeDependentOption)
 
 # Project specific cmake options
@@ -22,11 +44,12 @@ SET(PACKAGE_DIR "" CACHE STRING "Directory including all the files to be package
 SET(COMPILE_CUDA OFF CACHE BOOL "Compile GPU version")
 SET(USE_SENTENCEPIECE ON CACHE BOOL "Download and compile SentencePiece")
 SET(USE_STATIC_LIBS ON CACHE BOOL "Link statically against non-system libs")
+SET(SSPLIT_COMPILE_LIBRARY_ONLY ON CACHE BOOL "Do not compile ssplit tests")
 if (USE_WASM_COMPATIBLE_SOURCE)
   SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.")
   SET(USE_MKL OFF CACHE BOOL "Compile with MKL support")
   # # Setting the ssplit-cpp submodule specific cmake options for wasm
-  SET(USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2")
+  SET(SSPLIT_USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2")
 endif()
 
 # Documentation: https://cliutils.gitlab.io/modern-cmake/chapters/projects/submodule.html
diff --git a/README.md b/README.md
index a876ac6..9dd47c4 100644
--- a/README.md
+++ b/README.md
@@ -38,19 +38,18 @@ Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt.
     cd bergamot-translator
     ```
 
-3. Download files (only required if you want to package files in wasm binary)
+3. Download files (only required if you want to perform inference using build artifacts)
 
-    This step is only required if you want to package files (e.g. models, vocabularies etc.)
-    into wasm binary. If you don't then just skip this step.
+    It packages the vocabulary files into wasm binary, which is required only if you want to perform inference.
+    The compilation commands will preload these files in Emscripten’s virtual file system.
 
-    The build preloads the files in Emscripten’s virtual file system.
-
-    If you want to package bergamot project specific models, please follow these instructions:
+    If you want to package bergamot project specific files, please follow these instructions:
     ```bash
-    mkdir models
     git clone --depth 1 --branch main --single-branch https://github.com/mozilla-applied-ml/bergamot-models
+    mkdir models
     cp -rf bergamot-models/prod/* models
     gunzip models/*/*
+    find models \( -type f -name "model*" -or -type f -name "lex*" \) -delete
     ```
 
 4. Compile
@@ -61,14 +60,14 @@ Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt.
         ```
 
     2. Compile the artefacts
-        * If you want to package files into wasm binary then execute following commands (Replace `FILES_TO_PACKAGE` with the path of the
-        directory containing the files to be packaged in wasm binary)
+        * If you want to package files into wasm binary then execute following commands (Replace `FILES_TO_PACKAGE` with the
+        directory containing all the files to be packaged)
 
             ```bash
             emcmake cmake -DCOMPILE_WASM=on -DPACKAGE_DIR=FILES_TO_PACKAGE ../
             emmake make -j
             ```
-            e.g. If you want to package bergamot project specific models (downloaded using step 3 above) then
+            e.g. If you want to package bergamot project specific files (downloaded using step 3 above) then
             replace `FILES_TO_PACKAGE` with `../models`
 
         * If you don't want to package any file into wasm binary then execute following commands:
@@ -77,7 +76,7 @@ Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt.
             emmake make -j
             ```
 
-        The wasm artifacts (.js and .wasm files) will be available in `wasm` folder of build directory ("build-wasm" in this case).
+        The wasm artifacts (.js and .wasm files) will be available in the build directory ("build-wasm" in this case).
 
     3. Enable SIMD Wormhole via Wasm instantiation API in generated artifacts
         ```bash
diff --git a/app/bergamot-translator-app-bytearray.cpp b/app/bergamot-translator-app-bytearray.cpp
index 1fa5748..91353c0 100644
--- a/app/bergamot-translator-app-bytearray.cpp
+++ b/app/bergamot-translator-app-bytearray.cpp
@@ -7,9 +7,9 @@
 
 #include <iostream>
 
-#include "TranslationModel.h"
-#include "translator/parser.h"
 #include "translator/byte_array_util.h"
+#include "translator/parser.h"
+#include "translator/service.h"
 
 int main(int argc, char **argv) {
 
@@ -20,19 +20,17 @@ int main(int argc, char **argv) {
   std::string config = options->asYamlString();
 
   // Route the config string to construct marian model through TranslationModel
-  TranslationModel model(config, marian::bergamot::getModelMemoryFromConfig(options));
+  marian::bergamot::Service model(
+      config, marian::bergamot::getModelMemoryFromConfig(options));
 
   TranslationRequest translationRequest;
   std::vector<std::string> texts;
 
   for (std::string line; std::getline(std::cin, line);) {
-        texts.emplace_back(line);
+    texts.emplace_back(line);
   }
 
-  auto results = model.translate(std::move(texts), translationRequest);
-
-  // Resolve the future and get the actual result
-  //std::vector<TranslationResult> results = futureResults.get();
+  auto results = model.translateMultiple(std::move(texts), translationRequest);
 
   for (auto &result : results) {
     std::cout << result.getTranslatedText() << std::endl;
diff --git a/app/bergamot-translator-app.cpp b/app/bergamot-translator-app.cpp
index 4fba00b..c487969 100644
--- a/app/bergamot-translator-app.cpp
+++ b/app/bergamot-translator-app.cpp
@@ -1,16 +1,17 @@
 /*
  * main.cpp
  *
- * An application which accepts line separated texts in stdin and returns translated ones in stdout.
- * It is convenient for batch processing and can be used with tools like SacreBLEU.
+ * An application which accepts line separated texts in stdin and returns
+ * translated ones in stdout. It is convenient for batch processing and can be
+ * used with tools like SacreBLEU.
  *
  */
 
 #include <iostream>
 #include <string>
 
-#include "TranslationModel.h"
 #include "translator/parser.h"
+#include "translator/service.h"
 
 int main(int argc, char **argv) {
 
@@ -21,19 +22,16 @@ int main(int argc, char **argv) {
   std::string config = options->asYamlString();
 
   // Route the config string to construct marian model through TranslationModel
-  TranslationModel model(config);
+  marian::bergamot::Service model(config);
 
   TranslationRequest translationRequest;
   std::vector<std::string> texts;
 
   for (std::string line; std::getline(std::cin, line);) {
-        texts.emplace_back(line);
+    texts.emplace_back(line);
   }
 
-  auto results = model.translate(std::move(texts), translationRequest);
-
-  // Resolve the future and get the actual result
-  //std::vector<TranslationResult> results = futureResults.get();
+  auto results = model.translateMultiple(std::move(texts), translationRequest);
 
   for (auto &result : results) {
     std::cout << result.getTranslatedText() << std::endl;
diff --git a/app/service-cli-bytearray.cpp b/app/service-cli-bytearray.cpp
index f868d4d..d8c7059 100644
--- a/app/service-cli-bytearray.cpp
+++ b/app/service-cli-bytearray.cpp
@@ -27,8 +27,14 @@ int main(int argc, char *argv[]) {
   std::string input = std_input.str();
   using marian::bergamot::Response;
 
+  marian::bergamot::ResponseOptions responseOptions;
+  responseOptions.qualityScores = true;
+  responseOptions.alignment = true;
+  responseOptions.alignmentThreshold = 0.2f;
+
   // Wait on future until Response is complete
-  std::future<Response> responseFuture = service.translate(std::move(input));
+  std::future<Response> responseFuture =
+      service.translate(std::move(input), responseOptions);
   responseFuture.wait();
   Response response = responseFuture.get();
 
diff --git a/app/service-cli.cpp b/app/service-cli.cpp
index 6ed4d81..d7c72e6 100644
--- a/app/service-cli.cpp
+++ b/app/service-cli.cpp
@@ -8,6 +8,7 @@
 #include "marian.h"
 #include "translator/parser.h"
 #include "translator/response.h"
+#include "translator/response_options.h"
 #include "translator/service.h"
 
 int main(int argc, char *argv[]) {
@@ -21,8 +22,14 @@ int main(int argc, char *argv[]) {
   std::string input = std_input.str();
   using marian::bergamot::Response;
 
+  marian::bergamot::ResponseOptions responseOptions;
+  responseOptions.qualityScores = true;
+  responseOptions.alignment = true;
+  responseOptions.alignmentThreshold = 0.2f;
+
   // Wait on future until Response is complete
-  std::future<Response> responseFuture = service.translate(std::move(input));
+  std::future<Response> responseFuture =
+      service.translate(std::move(input), responseOptions);
   responseFuture.wait();
   Response response = responseFuture.get();
 
diff --git a/bergamot-translator-tests b/bergamot-translator-tests
new file mode 160000
index 0000000..3771001
--- /dev/null
+++ b/bergamot-translator-tests
@@ -0,0 +1 @@
+Subproject commit 3771001720a8f01bba185ee5d5d908b7c266ef31
diff --git a/src/QualityScore.h b/src/QualityScore.h
index 3ad6349..a6beb4e 100644
--- a/src/QualityScore.h
+++ b/src/QualityScore.h
@@ -8,6 +8,7 @@
 
 #include <string>
 #include <vector>
+#include "translator/definitions.h"
 
 /* All possible Granularities for which Quality Scores can be returned for
  * translated text. */
diff --git a/src/TranslationModel.h b/src/TranslationModel.h
deleted file mode 100644
index 4b1be23..0000000
--- a/src/TranslationModel.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * TranslationModel.h
- *
- * Main interface for translation API.
- */
-
-#ifndef SRC_TRANSLATOR_TRANSLATIONMODEL_H_
-#define SRC_TRANSLATOR_TRANSLATIONMODEL_H_
-
-#include <future>
-#include <string>
-#include <vector>
-
-// All 3rd party includes
-#include "3rd_party/marian-dev/src/common/options.h"
-
-// All local project includes
-#include "TranslationRequest.h"
-#include "TranslationResult.h"
-#include "translator/definitions.h"
-#include "translator/service.h"
-
-/* A Translation model that translates a plain (without any markups and emojis)
- * UTF-8 encoded text. This implementation supports translation from 1 source
- * language to 1 target language.
- */
-class TranslationModel {
-public:
-  /* Construct the model using the model configuration options as yaml-formatted
-   * string
-   */
-  /**
-   * @param config Marian yml config file in the form of a string
-   * @param model_memory optional byte array (aligned to 64!!!) that contains
-   * the bytes of a model.bin.
-   */
-  TranslationModel(const std::string &config,
-                   marian::bergamot::AlignedMemory modelMemory = marian::bergamot::AlignedMemory(),
-                   marian::bergamot::AlignedMemory shortlistMemory = marian::bergamot::AlignedMemory());
-
-  ~TranslationModel();
-
-  /* This method performs translation on a list of UTF-8 encoded plain text
-   * (without any markups or emojis) and returns a list of results in the same
-   * order. The model supports translation from 1 source language to 1 target
-   * language.
-   *
-   * Each text entry can either be a word, a phrase, a sentence or a list of
-   * sentences. Additional information related to the translated text can be
-   * requested via TranslationRequest which is applied equally to each text
-   * entry. The translated text corresponding to each text entry and the
-   * additional information (as specified in the TranslationRequest) is
-   * encapsulated and returned in TranslationResult.
-   *
-   * The API splits each text entry into sentences internally, which are then
-   * translated independent of each other. The translated sentences are then
-   * joined back together and returned in TranslationResult.
-   *
-   * Please refer to the TranslationRequest class to find out what additional
-   * information can be requested. The alignment information can only be
-   * requested if the model supports it (check isAlignmentSupported() API).
-   *
-   * The texts argument will become empty after the execution of this API (each
-   * entry of texts list will be moved to its corresponding TranslationResult
-   * object).
-   */
-  std::vector<TranslationResult> translate(std::vector<std::string> &&texts,
-                                           TranslationRequest request);
-
-  /* Check if the model can provide alignment information b/w original and
-   * translated text. */
-  bool isAlignmentSupported() const;
-
-private:
-  // Model configuration options
-  std::shared_ptr<marian::Options> configOptions_; // ORDER DEPENDECNY
-  marian::bergamot::Service service_;              // ORDER DEPENDENCY
-};
-
-#endif /* SRC_TRANSLATOR_TRANSLATIONMODEL_H_ */
diff --git a/src/TranslationResult.h b/src/TranslationResult.h
deleted file mode 100644
index 8c6c806..0000000
--- a/src/TranslationResult.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * TranslationResult.h
- *
- * The class that represents the result of TranslationModel::translate()
- * API for each of its text entry and TranslationRequest.
- */
-
-#ifndef SRC_TRANSLATOR_TRANSLATIONRESULT_H_
-#define SRC_TRANSLATOR_TRANSLATIONRESULT_H_
-
-#include <string>
-#include <vector>
-
-#include "QualityScore.h"
-
-/* This class represents the result of TranslationModel::translate() API
- * for each of its text entry and TranslationRequest.
- */
-class TranslationResult {
-public:
-  typedef std::vector<std::pair<std::string_view, std::string_view>>
-      SentenceMappings;
-#ifdef WASM_BINDINGS
-  TranslationResult(const std::string &original, const std::string &translation)
-      : originalText(original), translatedText(translation),
-        sentenceMappings() {}
-#endif
-  TranslationResult(const std::string &original, const std::string &translation,
-                    SentenceMappings &sentenceMappings)
-      : originalText(original), translatedText(translation),
-        sentenceMappings(sentenceMappings) {}
-
-  TranslationResult(TranslationResult &&other)
-      : originalText(std::move(other.originalText)),
-        translatedText(std::move(other.translatedText)),
-        sentenceMappings(std::move(other.sentenceMappings)) {}
-
-#ifdef WASM_BINDINGS
-  TranslationResult(const TranslationResult &other)
-      : originalText(other.originalText),
-        translatedText(other.translatedText),
-        sentenceMappings(other.sentenceMappings) {}
-#endif
-
-  TranslationResult(std::string &&original, std::string &&translation,
-                    SentenceMappings &&sentenceMappings)
-      : originalText(std::move(original)),
-        translatedText(std::move(translation)),
-        sentenceMappings(std::move(sentenceMappings)) {}
-
-#ifndef WASM_BINDINGS
-  TranslationResult &operator=(const TranslationResult &) = delete;
-#else
-  TranslationResult &operator=(const TranslationResult &result) {
-    originalText = result.originalText;
-    translatedText = result.translatedText;
-    sentenceMappings = result.sentenceMappings;
-    return *this;
-  }
-#endif
-
-  /* Return the original text. */
-  const std::string &getOriginalText() const { return originalText; }
-
-  /* Return the translated text. */
-  const std::string &getTranslatedText() const { return translatedText; }
-
-  /* Return the Quality scores of the translated text. */
-  const QualityScore &getQualityScore() const { return qualityScore; }
-
-  /* Return the Sentence mappings (information regarding how individual
-   * sentences of originalText map to corresponding translated sentences in
-   * translatedText).
-   */
-  const SentenceMappings &getSentenceMappings() const {
-    return sentenceMappings;
-  }
-
-private:
-  // Original text (in UTF-8 encoded format) that was supposed to be translated
-  std::string originalText;
-
-  // Translation (in UTF-8 encoded format) of the originalText
-  std::string translatedText;
-
-  // Quality score of the translated text at the granularity specified in
-  // TranslationRequest. It is an optional result (it will have no information
-  // if not requested in TranslationRequest)
-  QualityScore qualityScore;
-
-  // Information regarding how individual sentences of originalText map to
-  // corresponding translated sentences in joined translated text
-  // (translatedText) An example of sentence mapping:
-  //     originalText (contains 2 sentences)              = "What is your name?
-  //     My name is Abc." translatedText (contains 2 translated sentences) =
-  //     "Was ist dein Name? Mein Name ist Abc." sentenceMappings = [
-  //         {"What is your name?", "Was ist dein Name?"},  //
-  //         Pair(originalText[0],translatedText[0])
-  //         {"My name is Abc", "Mein Name ist Abc."}       //
-  //         Pair(originalText[1],translatedText[1])
-  //     ]
-  //
-  // It is an optional result (it will be empty if not requested in
-  // TranslationRequest).
-  SentenceMappings sentenceMappings;
-};
-
-#endif /* SRC_TRANSLATOR_TRANSLATIONRESULT_H_ */
diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt
index 3ddfa79..25ca916 100644
--- a/src/translator/CMakeLists.txt
+++ b/src/translator/CMakeLists.txt
@@ -1,13 +1,11 @@
 add_library(bergamot-translator STATIC
-    TranslationModel.cpp
     byte_array_util.cpp
     text_processor.cpp
     sentence_splitter.cpp
     batch_translator.cpp 
-    multifactor_priority.cpp 
     request.cpp 
     batcher.cpp
-    response.cpp
+    response_builder.cpp
     batch.cpp
     sentence_ranges.cpp
     service.cpp
@@ -29,5 +27,5 @@ endif(COMPILE_WASM)
 target_link_libraries(bergamot-translator marian ssplit)
 
 target_include_directories(bergamot-translator
-    PUBLIC ${CMAKE_SOURCE_DIR}
-    PUBLIC ${CMAKE_SOURCE_DIR}/src)
+    PUBLIC ${PROJECT_SOURCE_DIR}
+           ${PROJECT_SOURCE_DIR}/src)
diff --git a/src/translator/TranslationModel.cpp b/src/translator/TranslationModel.cpp
deleted file mode 100644
index 06b04eb..0000000
--- a/src/translator/TranslationModel.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * TranslationModel.cpp
- *
- */
-
-#include <future>
-#include <vector>
-
-// All local project includes
-#include "TranslationModel.h"
-#include "translator/parser.h"
-#include "translator/service.h"
-
-TranslationModel::TranslationModel(const std::string &config,
-                                   marian::bergamot::AlignedMemory model_memory,
-                                   marian::bergamot::AlignedMemory lexical_memory)
-    : service_(config, std::move(model_memory), std::move(lexical_memory)) {}
-
-TranslationModel::~TranslationModel() {}
-
-std::vector<TranslationResult>
-TranslationModel::translate(std::vector<std::string> &&texts,
-                            TranslationRequest request) {
-  // Implementing a non-async version first. Unpleasant, but should work.
-  std::promise<std::vector<TranslationResult>> promise;
-  auto future = promise.get_future();
-
-  // This code, move into async?
-  std::vector<TranslationResult> translationResults;
-  for (auto &text : texts) {
-    // Collect future as marian::bergamot::TranslationResult
-    auto intermediate = service_.translate(std::move(text));
-    intermediate.wait();
-    auto marianResponse(std::move(intermediate.get()));
-
-    TranslationResult::SentenceMappings sentenceMappings;
-    for (size_t idx = 0; idx < marianResponse.size(); idx++) {
-      marian::string_view src = marianResponse.source.sentence(idx);
-      marian::string_view tgt = marianResponse.target.sentence(idx);
-      sentenceMappings.emplace_back(std::string_view(src.data(), src.size()),
-                                    std::string_view(tgt.data(), tgt.size()));
-    }
-
-    // In place construction.
-    translationResults.emplace_back(
-        std::move(marianResponse.source.text), // &&marianResponse.source_
-        std::move(marianResponse.target.text), // &&marianResponse.translation_
-        std::move(sentenceMappings)            // &&sentenceMappings
-    );
-  }
-
-  return translationResults;
-}
-
-bool TranslationModel::isAlignmentSupported() const { return false; }
diff --git a/src/translator/batch_translator.cpp b/src/translator/batch_translator.cpp
index 19cbaf9..6b2425d 100644
--- a/src/translator/batch_translator.cpp
+++ b/src/translator/batch_translator.cpp
@@ -63,11 +63,14 @@ void BatchTranslator::translate(Batch &batch) {
   std::vector<data::SentenceTuple> batchVector;
 
   auto &sentences = batch.sentences();
+  size_t batchSequenceNumber{0};
   for (auto &sentence : sentences) {
-    data::SentenceTuple sentence_tuple(sentence.lineNumber());
+    data::SentenceTuple sentence_tuple(batchSequenceNumber);
     Segment segment = sentence.getUnderlyingSegment();
     sentence_tuple.push_back(segment);
     batchVector.push_back(sentence_tuple);
+
+    ++batchSequenceNumber;
   }
 
   size_t batchSize = batchVector.size();
diff --git a/src/translator/definitions.h b/src/translator/definitions.h
index 32998b9..18b5fca 100644
--- a/src/translator/definitions.h
+++ b/src/translator/definitions.h
@@ -22,10 +22,27 @@ template <class T, typename... Args> UPtr<T> UNew(Args &&... args) {
 
 template <class T> UPtr<T> UNew(UPtr<T> p) { return UPtr<T>(p); }
 
-/// Shortcut to AlignedVector<const void*> for byte arrays
-typedef AlignedVector<const void*> AlignedMemory;
+/// Shortcut to AlignedVector<char> for byte arrays
+typedef AlignedVector<char> AlignedMemory;
 
 } // namespace bergamot
 } // namespace marian
 
+// @TODO at the moment the usage of string_view in this repository is a hot mess and a disaster waiting to happen.
+// ssplit uses std::string_view if the compiler supports c++17, else falls back to c++11 and absl::string_view
+// bergamot-translator uses, depending on the source file std::string_view (which will break if ssplit-cpp uses
+// absl::string_view) and marian::string_view which is an export of (confusingly) the sentencepiece module that
+// marian has. marian::string_view is our addition to the marian fork, which will make merging even nicer. Not.
+// This is just an ugly patchwork that allos gcc5, our lowest targetted gcc to run. We don't actually try to run
+// on older compilers.
+
+#if defined(__GNUC__) && __GNUC__ < 6 && !defined(__clang__)
+#include <experimental/string_view>
+namespace std {
+  using string_view = std::experimental::string_view;
+} // namespace std
+#else
+#include <string_view>
+#endif
+
 #endif // SRC_BERGAMOT_DEFINITIONS_H_
diff --git a/src/translator/multifactor_priority.cpp b/src/translator/multifactor_priority.cpp
deleted file mode 100644
index 0f93a81..0000000
--- a/src/translator/multifactor_priority.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "multifactor_priority.h"
-
-namespace marian {
-namespace bergamot {
-
-}  // namespace bergamot
-}  // namespace marian
diff --git a/src/translator/multifactor_priority.h b/src/translator/multifactor_priority.h
deleted file mode 100644
index 1e239f7..0000000
--- a/src/translator/multifactor_priority.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef SRC_BERGAMOT_MULTIFACTOR_PRIORITY_H_
-#define SRC_BERGAMOT_MULTIFACTOR_PRIORITY_H_
-
-#include "data/types.h"
-#include "definitions.h"
-#include "sys/time.h"
-
-namespace marian {
-namespace bergamot {
-
-struct MultiFactorPriority {
-  int nice; /* user configurable priority, at a request */
-  unsigned int Id;
-  /* What else should priority depend on? */
-  double priority() { return Id; }
-};
-} // namespace bergamot
-} // namespace marian
-
-#endif // SRC_BERGAMOT_MULTIFACTOR_PRIORITY_H_
diff --git a/src/translator/parser.h b/src/translator/parser.h
index fa4e7bb..207890c 100644
--- a/src/translator/parser.h
+++ b/src/translator/parser.h
@@ -31,7 +31,7 @@ inline marian::ConfigParser createConfigParser() {
 }
 
 inline std::shared_ptr<marian::Options>
-parseOptions(const std::string &config) {
+parseOptions(const std::string &config, bool validate = true) {
   marian::Options options;
 
   // @TODO(jerinphilip) There's something off here, @XapaJIaMnu suggests
@@ -58,8 +58,11 @@ parseOptions(const std::string &config) {
   options.parse(config);
   YAML::Node configCopy = options.cloneToYamlNode();
 
-  marian::ConfigValidator validator(configCopy);
-  validator.validateOptions(marian::cli::mode::translation);
+  if (validate) {
+    // Perform validation on parsed options only when requested
+    marian::ConfigValidator validator(configCopy);
+    validator.validateOptions(marian::cli::mode::translation);
+  }
 
   return std::make_shared<marian::Options>(options);
 }
diff --git a/src/translator/pcqueue.h b/src/translator/pcqueue.h
index f0b3541..d6f4582 100644
--- a/src/translator/pcqueue.h
+++ b/src/translator/pcqueue.h
@@ -10,12 +10,14 @@
 #include <mutex>
 
 #ifdef __APPLE__
-#include <mach/mach.h>
-#include <mach/mach_traps.h>
 #include <mach/semaphore.h>
 #include <mach/task.h>
+#include <mach/mach_traps.h>
+#include <mach/mach.h>
 #elif defined(__linux)
 #include <semaphore.h>
+#elif defined(_WIN32) || defined(_WIN64)
+#include <windows.h>
 #else
 #include <boost/interprocess/sync/interprocess_semaphore.hpp>
 #endif
@@ -35,67 +37,107 @@ namespace bergamot {
 #ifdef __APPLE__
 
 class Semaphore {
-public:
-  explicit Semaphore(int value) : task_(mach_task_self()) {
-    ABORT_IF(KERN_SUCCESS !=
-                 semaphore_create(task_, &back_, SYNC_POLICY_FIFO, value),
-             "Could not create semaphore");
-  }
-
-  ~Semaphore() {
-    if (KERN_SUCCESS != semaphore_destroy(task_, back_)) {
-      std::cerr << "Could not destroy semaphore" << std::endl;
-      abort();
+  public:
+    explicit Semaphore(int value) : task_(mach_task_self()) {
+      ABORT_IF(KERN_SUCCESS != semaphore_create(task_, &back_, SYNC_POLICY_FIFO, value), "Could not create semaphore");
     }
-  }
 
-  void wait() {
-    ABORT_IF(KERN_SUCCESS != semaphore_wait(back_),
-             "Wait for semaphore failed");
-  }
+    ~Semaphore() {
+      if (KERN_SUCCESS != semaphore_destroy(task_, back_)) {
+        std::cerr << "Could not destroy semaphore" << std::endl;
+        abort();
+      }
+    }
 
-  void post() {
-    ABORT_IF(KERN_SUCCESS != semaphore_signal(back_),
-             "Could not post to semaphore");
-  }
+    void wait() {
+      ABORT_IF(KERN_SUCCESS != semaphore_wait(back_), "Wait for semaphore failed");
+    }
 
-private:
-  semaphore_t back_;
-  task_t task_;
+    void post() {
+      ABORT_IF(KERN_SUCCESS != semaphore_signal(back_), "Could not post to semaphore");
+    }
+
+  private:
+    semaphore_t back_;
+    task_t task_;
 };
 
-inline void WaitSemaphore(Semaphore &semaphore) { semaphore.wait(); }
+inline void WaitSemaphore(Semaphore &semaphore) {
+  semaphore.wait();
+}
 
 #elif defined(__linux)
 
 class Semaphore {
-public:
-  explicit Semaphore(unsigned int value) {
-    ABORT_IF(sem_init(&sem_, 0, value), "Could not create semaphore");
-  }
-
-  ~Semaphore() {
-    if (-1 == sem_destroy(&sem_)) {
-      std::cerr << "Could not destroy semaphore " << std::endl;
-      abort();
+  public:
+    explicit Semaphore(unsigned int value) {
+      ABORT_IF(sem_init(&sem_, 0, value), "Could not create semaphore");
     }
-  }
 
-  void wait() {
-    while (UTIL_UNLIKELY(-1 == sem_wait(&sem_))) {
-      ABORT_IF(errno != EINTR, "Wait for semaphore failed");
+    ~Semaphore() {
+      if (-1 == sem_destroy(&sem_)) {
+        std::cerr << "Could not destroy semaphore" << std::endl;
+        abort();
+      }
     }
-  }
 
-  void post() {
-    ABORT_IF(-1 == sem_post(&sem_), "Could not post to semaphore");
-  }
+    void wait() {
+      while (-1 == sem_wait(&sem_)) {
+        ABORT_IF(errno != EINTR, "Wait for semaphore failed");
+      }
+    }
 
-private:
-  sem_t sem_;
+    void post() {
+      ABORT_IF(-1 == sem_post(&sem_), "Could not post to semaphore");
+    }
+
+  private:
+    sem_t sem_;
 };
 
-inline void WaitSemaphore(Semaphore &semaphore) { semaphore.wait(); }
+inline void WaitSemaphore(Semaphore &semaphore) {
+  semaphore.wait();
+}
+
+#elif defined(_WIN32) || defined(_WIN64)
+
+class Semaphore {
+  public:
+    explicit Semaphore(LONG value) : sem_(CreateSemaphoreA(NULL, value, 2147483647, NULL)) {
+      ABORT_IF(!sem_, "Could not CreateSemaphore {}", GetLastError());
+    }
+
+    ~Semaphore() {
+      CloseHandle(sem_);
+    }
+
+
+    void wait() {
+      while (true) {
+        switch (WaitForSingleObject(sem_, 0L)) {
+          case WAIT_OBJECT_0:
+            return;
+          case WAIT_ABANDONED:
+            ABORT("A semaphore can't be abandoned, confused by Windows");
+          case WAIT_TIMEOUT:
+            continue;
+          case WAIT_FAILED:
+            ABORT("Waiting on Semaphore failed {}", GetLastError());
+        }
+      }
+    }
+
+    void post() {
+      ABORT_IF(!ReleaseSemaphore(sem_, 1, NULL), "Failed to release Semaphore {}", GetLastError());
+    }
+
+  private:
+    HANDLE sem_;
+};
+
+inline void WaitSemaphore(Semaphore &semaphore) {
+  semaphore.wait();
+}
 
 #else
 typedef boost::interprocess::interprocess_semaphore Semaphore;
@@ -113,7 +155,7 @@ inline void WaitSemaphore(Semaphore &on) {
   }
 }
 
-#endif // Apple
+#endif // Cases for semaphore support
 
 /**
  * Producer consumer queue safe for multiple producers and multiple consumers.
@@ -124,11 +166,13 @@ inline void WaitSemaphore(Semaphore &on) {
  * throw.
  */
 template <class T> class PCQueue {
-public:
+ public:
   explicit PCQueue(size_t size)
-      : empty_(size), used_(0), storage_(new T[size]),
-        end_(storage_.get() + size), produce_at_(storage_.get()),
-        consume_at_(storage_.get()) {}
+   : empty_(size), used_(0),
+     storage_(new T[size]),
+     end_(storage_.get() + size),
+     produce_at_(storage_.get()),
+     consume_at_(storage_.get()) {}
 
   // Add a value to the queue.
   void Produce(const T &val) {
@@ -141,8 +185,7 @@ public:
         empty_.post();
         throw;
       }
-      if (++produce_at_ == end_)
-        produce_at_ = storage_.get();
+      if (++produce_at_ == end_) produce_at_ = storage_.get();
     }
     used_.post();
   }
@@ -158,14 +201,14 @@ public:
         empty_.post();
         throw;
       }
-      if (++produce_at_ == end_)
-        produce_at_ = storage_.get();
+      if (++produce_at_ == end_) produce_at_ = storage_.get();
     }
     used_.post();
   }
 
+
   // Consume a value, assigning it to out.
-  T &Consume(T &out) {
+  T& Consume(T &out) {
     WaitSemaphore(used_);
     {
       std::lock_guard<std::mutex> consume_lock(consume_at_mutex_);
@@ -175,15 +218,14 @@ public:
         used_.post();
         throw;
       }
-      if (++consume_at_ == end_)
-        consume_at_ = storage_.get();
+      if (++consume_at_ == end_) consume_at_ = storage_.get();
     }
     empty_.post();
     return out;
   }
 
   // Consume a value, swapping it to out.
-  T &ConsumeSwap(T &out) {
+  T& ConsumeSwap(T &out) {
     WaitSemaphore(used_);
     {
       std::lock_guard<std::mutex> consume_lock(consume_at_mutex_);
@@ -193,13 +235,13 @@ public:
         used_.post();
         throw;
       }
-      if (++consume_at_ == end_)
-        consume_at_ = storage_.get();
+      if (++consume_at_ == end_) consume_at_ = storage_.get();
     }
     empty_.post();
     return out;
   }
 
+
   // Convenience version of Consume that copies the value to return.
   // The other version is faster.
   T Consume() {
@@ -208,7 +250,7 @@ public:
     return ret;
   }
 
-private:
+ private:
   // Number of empty spaces in storage_.
   Semaphore empty_;
   // Number of occupied spaces in storage_.
@@ -234,63 +276,67 @@ template <class T> struct UnboundedPage {
 };
 
 template <class T> class UnboundedSingleQueue {
-public:
-  UnboundedSingleQueue() : valid_(0) {
-    SetFilling(new UnboundedPage<T>());
-    SetReading(filling_);
-  }
-
-  void Produce(T &&val) {
-    if (filling_current_ == filling_end_) {
-      UnboundedPage<T> *next = new UnboundedPage<T>();
-      filling_->next = next;
-      SetFilling(next);
+  public:
+    UnboundedSingleQueue() : valid_(0) {
+      SetFilling(new UnboundedPage<T>());
+      SetReading(filling_);
     }
-    *(filling_current_++) = std::move(val);
-    valid_.post();
-  }
 
-  void Produce(const T &val) { Produce(T(val)); }
-
-  T &Consume(T &out) {
-    WaitSemaphore(valid_);
-    if (reading_current_ == reading_end_) {
-      SetReading(reading_->next);
+    void Produce(T &&val) {
+      if (filling_current_ == filling_end_) {
+        UnboundedPage<T> *next = new UnboundedPage<T>();
+        filling_->next = next;
+        SetFilling(next);
+      }
+      *(filling_current_++) = std::move(val);
+      valid_.post();
     }
-    out = std::move(*(reading_current_++));
-    return out;
-  }
 
-  // Warning: very much a no-guarantees race-condition-rich implementation!
-  // But sufficient for our specific purpose: The single thread that consumes
-  // is also the only one that checks Empty, and knows that it's racing.
-  bool Empty() const { return reading_current_ == filling_current_; }
+    void Produce(const T &val) {
+      Produce(T(val));
+    }
 
-private:
-  void SetFilling(UnboundedPage<T> *to) {
-    filling_ = to;
-    filling_current_ = to->entries;
-    filling_end_ = filling_current_ + sizeof(to->entries) / sizeof(T);
-  }
-  void SetReading(UnboundedPage<T> *to) {
-    reading_.reset(to);
-    reading_current_ = to->entries;
-    reading_end_ = reading_current_ + sizeof(to->entries) / sizeof(T);
-  }
+    T& Consume(T &out) {
+      WaitSemaphore(valid_);
+      if (reading_current_ == reading_end_) {
+        SetReading(reading_->next);
+      }
+      out = std::move(*(reading_current_++));
+      return out;
+    }
 
-  Semaphore valid_;
+    // Warning: very much a no-guarantees race-condition-rich implementation!
+    // But sufficient for our specific purpose: The single thread that consumes
+    // is also the only one that checks Empty, and knows that it's racing.
+    bool Empty() const {
+      return reading_current_ == filling_current_;
+    }
 
-  UnboundedPage<T> *filling_;
+  private:
+    void SetFilling(UnboundedPage<T> *to) {
+      filling_ = to;
+      filling_current_ = to->entries;
+      filling_end_ = filling_current_ + sizeof(to->entries) / sizeof(T);
+    }
+    void SetReading(UnboundedPage<T> *to) {
+      reading_.reset(to);
+      reading_current_ = to->entries;
+      reading_end_ = reading_current_ + sizeof(to->entries) / sizeof(T);
+    }
 
-  std::unique_ptr<UnboundedPage<T>> reading_;
+    Semaphore valid_;
 
-  T *filling_current_;
-  T *filling_end_;
-  T *reading_current_;
-  T *reading_end_;
+    UnboundedPage<T> *filling_;
 
-  UnboundedSingleQueue(const UnboundedSingleQueue &) = delete;
-  UnboundedSingleQueue &operator=(const UnboundedSingleQueue &) = delete;
+    std::unique_ptr<UnboundedPage<T> > reading_;
+
+    T *filling_current_;
+    T *filling_end_;
+    T *reading_current_;
+    T *reading_end_;
+
+    UnboundedSingleQueue(const UnboundedSingleQueue &) = delete;
+    UnboundedSingleQueue &operator=(const UnboundedSingleQueue &) = delete;
 };
 
 } // namespace bergamot
diff --git a/src/translator/request.cpp b/src/translator/request.cpp
index b6d2438..8e46533 100644
--- a/src/translator/request.cpp
+++ b/src/translator/request.cpp
@@ -11,18 +11,24 @@ namespace marian {
 namespace bergamot {
 
 // -----------------------------------------------------------------
-Request::Request(size_t Id, size_t lineNumberBegin,
-                 std::vector<Ptr<Vocab const>> &vocabs, AnnotatedText &&source,
-                 Segments &&segments, std::promise<Response> responsePromise)
-    : Id_(Id), lineNumberBegin_(lineNumberBegin), vocabs_(&vocabs),
-      source_(std::move(source)), segments_(std::move(segments)),
-      response_(std::move(responsePromise)) {
+Request::Request(size_t Id, Segments &&segments,
+                 ResponseBuilder &&responseBuilder)
+    : Id_(Id), segments_(std::move(segments)),
+      responseBuilder_(std::move(responseBuilder))
+
+{
 
   counter_ = segments_.size();
   histories_.resize(segments_.size(), nullptr);
+
+  // If there are no segments_, we are never able to trigger the responseBuilder
+  // calls from a different thread. However, in this case we want an empty valid
+  // response.
+  if (segments_.size() == 0) {
+    responseBuilder_(std::move(histories_));
+  }
 }
 
-size_t Request::lineNumberBegin() const { return lineNumberBegin_; }
 size_t Request::numSegments() const { return segments_.size(); }
 
 size_t Request::segmentTokens(size_t index) const {
@@ -39,17 +45,10 @@ void Request::processHistory(size_t index, Ptr<History> history) {
   // In case this is last request in, completeRequest is called, which sets the
   // value of the promise.
   if (--counter_ == 0) {
-    completeRequest();
+    responseBuilder_(std::move(histories_));
   }
 }
 
-void Request::completeRequest() {
-  // Request no longer needs to hold the content, can transfer it to
-  // Response.
-  Response response(std::move(source_), std::move(histories_), *vocabs_);
-  response_.set_value(std::move(response));
-}
-
 bool Request::operator<(const Request &b) const {
   // Among Requests, only sequence id is used for obtaining priority.
   return Id_ < b.Id_;
@@ -64,10 +63,6 @@ size_t RequestSentence::numTokens() const {
   return (request_->segmentTokens(index_));
 }
 
-size_t RequestSentence::lineNumber() const {
-  return (request_->lineNumberBegin() + index_);
-}
-
 void RequestSentence::completeSentence(Ptr<History> history) {
   // Relays completeSentence into request's processHistory, using index
   // information.
diff --git a/src/translator/request.h b/src/translator/request.h
index 605dea7..e2188cd 100644
--- a/src/translator/request.h
+++ b/src/translator/request.h
@@ -1,24 +1,9 @@
-//
-// Defines:
-//
-// Request: holds the input text of a text, Segments (vector<Words>) which are
-// to go to the batching mechanism and alignments between the processed
-// segments and the input text (sourceTokenRanges). In addition, Request takes
-// care of the barrier which fires when all the Segments in a request are done
-// translating by the workers (BatchTranslator).
-// TODO(jerinphilip):  Extend Request with notions of Priority (sequence,
-// user-given).
-//
-// RequestSentence: is a tuple of (index, Ptr<Request>). This provides the
-// batching mechanism access to the segment within the request. The backref to
-// Request allows event triggering the barrier upon completion of the last
-// sentence by a worker.
-
 #ifndef SRC_BERGAMOT_REQUEST_H_
 #define SRC_BERGAMOT_REQUEST_H_
 
 #include "definitions.h"
 #include "response.h"
+#include "response_builder.h"
 #include "sentence_ranges.h"
 
 #include "common/logging.h"
@@ -33,80 +18,96 @@
 namespace marian {
 namespace bergamot {
 
+/// A Request is an internal representation used to represent a request after
+/// processed by TextProcessor into sentences constituted by marian::Words.
+///
+/// The batching mechanism (Batcher) draws from multiple Requests and compiles
+/// sentences into a batch. When a batch completes translation (at
+/// BatchTranslator, intended in a different thread), backward propogation
+/// happens through:
+///
+/// ```cpp
+///   Batch::completeBatch(...)
+///       -> RequestSentence::completeSentence(..)
+///          -> Request::processHistory(...)
+/// ```
+///
+/// When all sentences in a Request are completed, responseBuilder is
+/// triggered with the compiled Histories, to construct the Response
+/// corresponding to the Request and set value of the promise which triggers the
+/// future at client.
 class Request {
 public:
-  Request(size_t Id, size_t lineNumberBegin,
-          std::vector<Ptr<Vocab const>> &vocabs_, AnnotatedText &&source,
-          Segments &&segments, std::promise<Response> responsePromise);
+  /// Constructs an internal representation of the Request identified by Id,
+  /// processed Segments and accepts a callback (ResponseBuilder) which builds
+  /// the Response upon completion of the Request.
+  ///
+  ///
+  /// @param [in] Id: Identifier assigned to Request by Service.
+  /// @param [in] segments: Each segment is a unit to be translated.
+  /// @param [in] responseBuilder: Callback function (of ResponseBuilder type)
+  /// to be triggered upon the completion of translation of all units in a
+  /// Request.
+  Request(size_t Id, Segments &&segments, ResponseBuilder &&responseBuilder);
 
-  // Obtain the count of tokens in the segment correponding to index. Used to
-  // insert sentence from multiple requests into the corresponding size bucket.
+  /// Obtain the count of tokens in the segment correponding to index. Used to
+  /// insert sentence from multiple requests into the corresponding size bucket.
   size_t segmentTokens(size_t index) const;
 
-  // Obtain number of segments in a request.
+  /// Obtain number of segments in a request.
   size_t numSegments() const;
-  size_t lineNumberBegin() const;
 
-  // Obtains segment corresponding to index  to create a batch of segments among
-  // several requests.
+  /// Obtains segment corresponding to index  to create a batch of segments
+  /// among several requests.
   Segment getSegment(size_t index) const;
 
-  // For notions of priority among requests, used to enable std::set in
-  // Batcher.
+  /// For notions of priority among requests, used to enable std::set in
+  /// Batcher.
   bool operator<(const Request &request) const;
 
-  // Processes a history obtained after translating in a heterogenous batch
-  // compiled from requests.
+  /// Processes a history obtained after translating in a heterogenous batch
+  /// compiled from requests.
   void processHistory(size_t index, Ptr<History> history);
 
-  // On completion of last segment, sets value of the promise.
-  void completeRequest();
-
 private:
   size_t Id_;
-  size_t lineNumberBegin_;
 
-  // Multiple translation-workers can concurrently access the same Request. The
-  // following atomic atomically operates on the variable holding sentences
-  // remaining to be translated.
+  /// Multiple translation-workers can concurrently access the same Request. The
+  /// following atomic atomically operates on the variable holding sentences
+  /// remaining to be translated.
   std::atomic<int> counter_;
 
-  // source_ holds the source string to be translated. segments_ hold the
-  // sentences generated from source_ in vector<Words>. sourceRanges_ are
-  // string_views of the text corresponding to these words, pointing to
-  // sequences in source_. histories_ is a buffer which eventually stores the
-  // translations of each segment in the corresponding index.
-  AnnotatedText source_;
+  /// segments_ hold the sentences processed into Words which generated from
+  /// input string.
   Segments segments_;
+
+  /// histories_ is a buffer which eventually stores the translations of each
+  /// segment in the corresponding index.
   std::vector<Ptr<History>> histories_;
 
-  // Members above are moved into newly constructed Response on completion
-  // of translation of all segments. The promise below is set to this Response
-  // value. future to this promise is made available to the user through
-  // Service.
-  std::promise<Response> response_;
-
-  // Constructing Response requires the vocabs_ used to generate Request.
-  std::vector<Ptr<Vocab const>> *vocabs_;
+  /// Constructing Response requires the vocabs_ used to generate Request.
+  /// std::vector<Ptr<Vocab const>> *vocabs_;
+  ResponseBuilder responseBuilder_;
 };
 
+/// A RequestSentence provides a view to a sentence within a Request. Existence
+/// of this class allows the sentences and associated information to be kept
+/// within Request, while batching mechanism (Batcher) compiles Batch from
+/// RequestSentence-s coming from different Requests.
 class RequestSentence {
-  // A RequestSentence provides a view to a sentence within a Request. Existence
-  // of this class allows the sentences and associated information to be kept
-  // within Request.
 
 public:
   RequestSentence(size_t, Ptr<Request>);
+
+  /// Number of tokens in the segment this RequestSentence represents. Used to
+  /// order by length in batching.
   size_t numTokens() const;
 
-  // lineNumber in Request, used for matching marian-decoder. SentenceTuple
-  // requires lineNumber to be set for Corpus based batches.
-  size_t lineNumber() const;
-
-  // Accessor to the segment represented by the RequestSentence.
+  /// Accessor to the segment represented by the RequestSentence.
   Segment getUnderlyingSegment() const;
 
-  // Forwards call to Request, checking for completion.
+  /// Forwards history to Request to set history corresponding to this
+  /// RequestSentence.
   void completeSentence(Ptr<History> history);
 
   friend bool operator<(const RequestSentence &a, const RequestSentence &b);
diff --git a/src/translator/response.cpp b/src/translator/response.cpp
deleted file mode 100644
index e5bc38f..0000000
--- a/src/translator/response.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-#include "response.h"
-#include "common/logging.h"
-#include "data/alignment.h"
-#include "sentence_ranges.h"
-
-#include <utility>
-
-namespace marian {
-namespace bergamot {
-
-Response::Response(AnnotatedText &&source, Histories &&histories,
-                   std::vector<Ptr<Vocab const>> &vocabs)
-    : source(std::move(source)) {
-  // Reserving length at least as much as source_ seems like a reasonable thing
-  // to do to avoid reallocations.
-  target.text.reserve(source.text.size());
-
-  // In a first step, the decoded units (individual senteneces) are compiled
-  // into a huge string. This is done by computing indices first and appending
-  // to the string as each sentences are decoded.
-  std::vector<std::pair<size_t, size_t>> translationRanges;
-  std::vector<size_t> sentenceBegins;
-
-  size_t offset{0};
-  bool first{true};
-
-  for (auto &history : histories) {
-    // TODO(jerin): Change hardcode of nBest = 1
-    NBestList onebest = history->nBest(1);
-
-    Result result = onebest[0]; // Expecting only one result;
-    Words words = std::get<0>(result);
-    auto targetVocab = vocabs.back();
-
-    std::string decoded;
-    std::vector<string_view> targetMappings;
-    targetVocab->decodeWithByteRanges(words, decoded, targetMappings);
-
-    if (first) {
-      first = false;
-    } else {
-      target.text += " ";
-      ++offset;
-    }
-
-    sentenceBegins.push_back(translationRanges.size());
-    target.text += decoded;
-    auto decodedStringBeginMarker = targetMappings.front().begin();
-    for (auto &sview : targetMappings) {
-      size_t startIdx = offset + sview.begin() - decodedStringBeginMarker;
-      translationRanges.emplace_back(startIdx, startIdx + sview.size());
-    }
-
-    offset += decoded.size();
-
-    // Alignments
-    // TODO(jerinphilip): The following double conversion might not be
-    // necessary. Hard alignment can directly be exported, but this would mean
-    // WASM bindings for a structure deep within marian source.
-    auto hyp = std::get<1>(result);
-    auto softAlignment = hyp->tracebackAlignment();
-    auto hardAlignment = data::ConvertSoftAlignToHardAlign(
-        softAlignment, /*threshold=*/0.2f); // TODO(jerinphilip): Make this a
-                                            // configurable parameter.
-
-    Alignment unified_alignment;
-    for (auto &p : hardAlignment) {
-      unified_alignment.emplace_back((Point){p.srcPos, p.tgtPos, p.prob});
-    }
-
-    alignments.push_back(std::move(unified_alignment));
-
-    // Quality scores: Sequence level is obtained as normalized path scores.
-    // Word level using hypothesis traceback. These are most-likely logprobs.
-    auto normalizedPathScore = std::get<2>(result);
-    auto wordQualities = hyp->tracebackWordScores();
-    wordQualities.pop_back();
-    qualityScores.push_back((Quality){normalizedPathScore, wordQualities});
-  }
-
-  // Once we have the indices in translation (which might be resized a few
-  // times) ready, we can prepare and store the string_view as annotations
-  // instead. This is accomplished by iterating over available sentences using
-  // sentenceBegin and using addSentence(...) API from Annotation.
-
-  for (size_t i = 1; i <= sentenceBegins.size(); i++) {
-    std::vector<string_view> targetMappings;
-    size_t begin = sentenceBegins[i - 1];
-    size_t safe_end = (i == sentenceBegins.size()) ? translationRanges.size()
-                                                   : sentenceBegins[i];
-
-    for (size_t idx = begin; idx < safe_end; idx++) {
-      auto &p = translationRanges[idx];
-      size_t begin_idx = p.first;
-      size_t end_idx = p.second;
-
-      const char *data = &target.text[begin_idx];
-      size_t size = end_idx - begin_idx;
-      targetMappings.emplace_back(data, size);
-    }
-
-    target.addSentence(targetMappings);
-  }
-}
-} // namespace bergamot
-} // namespace marian
diff --git a/src/translator/response.h b/src/translator/response.h
index 4f87b8d..0f7ecb5 100644
--- a/src/translator/response.h
+++ b/src/translator/response.h
@@ -40,34 +40,12 @@ struct Quality {
 /// AnnotatedText provides an API to access markings of (sub)-word and
 /// sentences boundaries, which are required to interpret Quality and
 /// Alignment (s) at the moment.
-class Response {
-
-public:
-  ///
-  Response(AnnotatedText &&source, Histories &&histories,
-           std::vector<Ptr<Vocab const>> &vocabs);
-
-  /// \cond HIDDEN_PUBLIC
-  // Move constructor.
-  Response(Response &&other)
-      : source(std::move(other.source)), target(std::move(other.target)),
-        alignments(std::move(other.alignments)),
-        qualityScores(std::move(other.qualityScores)){};
-
-  // The following copy bans are not stricitly required anymore since Annotation
-  // is composed of the ByteRange primitive (which was previously string_view
-  // and required to be bound to string), but makes movement efficient by
-  // banning these letting compiler complain about copies.
-
-  Response(const Response &) = delete;
-  Response &operator=(const Response &) = delete;
-
-  /// \endcond
-
-  /// Number of sentences translated. The processing of a text of into sentences
-  /// are handled internally, and this information can be used to iterate
-  /// through meaningful units of translation for which alignment and quality
-  /// information are available.
+struct Response {
+  /// Convenience function to obtain number of units translated. Same as
+  /// `.source.numSentences()` and `.target.numSentences().` The processing of a
+  /// text of into sentences are handled internally, and this information can be
+  /// used to iterate through meaningful units of translation for which
+  /// alignment and quality information are available.
   const size_t size() const { return source.numSentences(); }
 
   /// source text and annotations of (sub-)words and sentences.
@@ -86,6 +64,10 @@ public:
   /// sparse matrix representation with indices corresponding
   /// to (sub-)words accessible through Annotation.
   std::vector<Alignment> alignments;
+
+  const std::string &getOriginalText() const { return source.text; }
+
+  const std::string &getTranslatedText() const { return target.text; }
 };
 } // namespace bergamot
 } // namespace marian
diff --git a/src/translator/response_builder.cpp b/src/translator/response_builder.cpp
new file mode 100644
index 0000000..c624707
--- /dev/null
+++ b/src/translator/response_builder.cpp
@@ -0,0 +1,87 @@
+#include "response_builder.h"
+
+namespace marian {
+namespace bergamot {
+
+void ResponseBuilder::buildQualityScores(Histories &histories,
+                                         Response &response) {
+  std::vector<Quality> qualityScores;
+  for (auto &history : histories) {
+    // TODO(jerin): Change hardcode of nBest = 1
+    NBestList onebest = history->nBest(1);
+
+    Result result = onebest[0]; // Expecting only one result;
+    Words words = std::get<0>(result);
+    auto hyp = std::get<1>(result);
+    // Quality scores: Sequence level is obtained as normalized path scores.
+    // Word level using hypothesis traceback. These are most-likely
+    // logprobs.
+    auto normalizedPathScore = std::get<2>(result);
+    auto wordQualities = hyp->tracebackWordScores();
+    wordQualities.pop_back();
+    response.qualityScores.push_back(
+        Quality{normalizedPathScore, wordQualities});
+  }
+}
+
+void ResponseBuilder::buildAlignments(Histories &histories,
+                                      Response &response) {
+  for (auto &history : histories) {
+    // TODO(jerin): Change hardcode of nBest = 1
+    NBestList onebest = history->nBest(1);
+
+    Result result = onebest[0]; // Expecting only one result;
+    Words words = std::get<0>(result);
+    // Alignments
+    // TODO(jerinphilip): The following double conversion might not be
+    // necessary. Hard alignment can directly be exported, but this would
+    // mean WASM bindings for a structure deep within marian source.
+    auto hyp = std::get<1>(result);
+    auto softAlignment = hyp->tracebackAlignment();
+    auto threshold = responseOptions_.alignmentThreshold;
+    auto hardAlignment =
+        data::ConvertSoftAlignToHardAlign(softAlignment, threshold);
+    Alignment unified_alignment;
+    for (auto &p : hardAlignment) {
+      unified_alignment.emplace_back(Point{p.srcPos, p.tgtPos, p.prob});
+    }
+
+    response.alignments.push_back(std::move(unified_alignment));
+  }
+}
+
+void ResponseBuilder::buildTranslatedText(Histories &histories,
+                                          Response &response) {
+  // Reserving length at least as much as source_ seems like a reasonable
+  // thing to do to avoid reallocations.
+  response.target.text.reserve(response.source.text.size());
+
+  size_t offset{0};
+  bool first{true};
+
+  for (auto &history : histories) {
+    // TODO(jerin): Change hardcode of nBest = 1
+    NBestList onebest = history->nBest(1);
+
+    Result result = onebest[0]; // Expecting only one result;
+    Words words = std::get<0>(result);
+    auto targetVocab = vocabs_->back();
+
+    std::string decoded;
+    std::vector<string_view> targetSentenceMappings;
+    targetVocab->decodeWithByteRanges(words, decoded, targetSentenceMappings);
+
+    // delimiter can be used to fill in the blanks from source as well.
+    std::string delimiter;
+    if (first) {
+      first = false;
+    } else {
+      delimiter = " ";
+    }
+
+    response.target.appendSentence(delimiter, decoded, targetSentenceMappings);
+  }
+}
+
+} // namespace bergamot
+} // namespace marian
diff --git a/src/translator/response_builder.h b/src/translator/response_builder.h
new file mode 100644
index 0000000..85caffb
--- /dev/null
+++ b/src/translator/response_builder.h
@@ -0,0 +1,93 @@
+#ifndef SRC_BERGAMOT_RESPONSE_BUILDER_H_
+#define SRC_BERGAMOT_RESPONSE_BUILDER_H_
+
+#include "data/types.h"
+#include "response.h"
+#include "response_options.h"
+
+// For now we will work with this, to avoid complaints another structure is hard
+// to operate with.
+
+namespace marian {
+namespace bergamot {
+
+/// ResponseBuilder is a callback functor. It is expected to be bound to a
+/// Request after giving it the context of options, vocabs and promise to set.
+/// It constructs the Response and it's members based on options
+/// (quality=on|off, alignments=on|off, mappings=on|off, splitmode=sentence |
+/// paragraph).
+
+class ResponseBuilder {
+public:
+  /// @param [in] responseOptions: ResponseOptions, indicating what to include
+  /// or not in the response and any additional configurable parameters.
+  /// @param [in] vocabs: marian vocab object (used in decoding)
+  /// @param [in] promise: promise to set with the constructed Response.
+  ResponseBuilder(ResponseOptions responseOptions, AnnotatedText &&source,
+                  std::vector<Ptr<Vocab const>> &vocabs,
+                  std::promise<Response> &&promise)
+      : responseOptions_(responseOptions), source_(std::move(source)),
+        vocabs_(&vocabs), promise_(std::move(promise)) {}
+
+  /// Constructs and sets the promise of a Response object from obtained
+  /// histories after translating.
+  /// @param [in] histories: Histories obtained after translating the Request
+  /// from which this functor is called.
+  void operator()(Histories &&histories) {
+    // TODO(jerinphilip) load ResponseOptions into options and turn build
+    // functions on or off.
+    // responseOptions_ is unused, but we can try something here.
+    ABORT_IF(source_.numSentences() != histories.size(),
+             "Mismatch in source and translated sentences");
+    Response response;
+
+    // Move source_ into response.
+    response.source = std::move(source_);
+
+    // Should be after source is set
+    buildTranslatedText(histories, response);
+
+    // Should always be after buildTranslatedText
+    if (responseOptions_.qualityScores) {
+      buildQualityScores(histories, response);
+    }
+
+    if (responseOptions_.alignment) {
+      buildAlignments(histories, response);
+    }
+
+    // Once complete, set promise.
+    promise_.set_value(std::move(response));
+  }
+
+private:
+  /// Builds qualityScores from histories and writes to response. expects
+  /// buildTranslatedText to be run before to be able to obtain target text and
+  /// subword information.
+  /// @param histories [in]
+  /// @param response [out]
+  void buildQualityScores(Histories &histories, Response &response);
+
+  /// Builds alignments from histories and writes onto response.
+  /// @param histories [in]
+  /// @param response [out]
+  void buildAlignments(Histories &histories, Response &response);
+
+  /// Builds translated text and subword annotations and writes onto response.
+  /// @param histories [in]
+  /// @param response [out]
+  void buildTranslatedText(Histories &histories, Response &response);
+
+  // Data members are context/curried args for the functor.
+
+  ResponseOptions responseOptions_;
+  std::vector<Ptr<Vocab const>> *vocabs_; // vocabs are required for decoding
+                                          // and any source validation checks.
+  std::promise<Response> promise_; //  To be set when callback triggered and
+                                   //  after Response constructed.
+  AnnotatedText source_;
+};
+} // namespace bergamot
+} // namespace marian
+
+#endif //  SRC_BERGAMOT_RESPONSE_BUILDER_H_
diff --git a/src/translator/response_options.h b/src/translator/response_options.h
new file mode 100644
index 0000000..ed3cce3
--- /dev/null
+++ b/src/translator/response_options.h
@@ -0,0 +1,50 @@
+#ifndef SRC_BERGAMOT_RESPONSE_OPTIONS_H_
+#define SRC_BERGAMOT_RESPONSE_OPTIONS_H_
+#include <string>
+
+namespace marian {
+namespace bergamot {
+
+enum ConcatStrategy {
+  /// Target text is constructed faithful to the source-text  structure.
+  FAITHFUL,
+
+  /// Target text is concatenated by a space.
+  SPACE
+};
+
+enum QualityScoreType {
+  /// Provide a free quality-score that comes with the machine-translation model
+  /// itself.
+  FREE,
+
+  /// An expensive quality-score that runs additional computations to determine
+  /// quality of an output.
+  EXPENSIVE
+};
+
+/// ResponseOptions dictate how to construct a Response for an input string of
+/// text to be translated.
+struct ResponseOptions {
+  bool qualityScores{false}; ///< Include quality-scores or not.
+  bool alignment{false};     ///< Include alignments or not.
+
+  /// Whether to include sentenceMappings or not. Alignments require
+  /// sentenceMappings and are available irrespective of this option if
+  /// `alignment=true`.
+  bool sentenceMappings{false};
+
+  /// Threshold between `[0.0f, 1.0f]` to filter alignments into a sparse
+  /// matrix. Higher value implies stronger filtering leading to provision of
+  /// higher-confidence matches. `1.0f` gives argmax (not the full-dense
+  /// matrix).
+  float alignmentThreshold{0.2f};
+
+  QualityScoreType qualityScoreType{QualityScoreType::FREE};
+  ConcatStrategy concatStrategy{ConcatStrategy::FAITHFUL};
+};
+
+} // namespace bergamot
+} // namespace marian
+
+#endif //  SRC_BERGAMOT_RESPONSE_OPTIONS_H_
diff --git a/src/translator/sentence_ranges.cpp b/src/translator/sentence_ranges.cpp
index aae9dd3..da9d3ee 100644
--- a/src/translator/sentence_ranges.cpp
+++ b/src/translator/sentence_ranges.cpp
@@ -32,11 +32,11 @@ ByteRange Annotation::sentence(size_t sentenceIdx) const {
     // the flatByteRange and non-empty sentence before this happened and
     // construct empty string-view equivalent ByteRange.
     ByteRange eos = flatByteRanges_[eosId - 1];
-    sentenceByteRange = (ByteRange){eos.end, eos.end};
+    sentenceByteRange = ByteRange{eos.end, eos.end};
   } else {
     ByteRange bos = flatByteRanges_[bosId];
     ByteRange eos = flatByteRanges_[eosId - 1];
-    sentenceByteRange = (ByteRange){bos.begin, eos.end};
+    sentenceByteRange = ByteRange{bos.begin, eos.end};
   }
   return sentenceByteRange;
 }
@@ -56,6 +56,20 @@ string_view AnnotatedText::sentence(size_t sentenceIdx) const {
   return asStringView(sentenceAsByteRange);
 }
 
+void AnnotatedText::appendSentence(std::string prefix, std::string &reference,
+                                   std::vector<string_view> &wordRanges) {
+  text += prefix;
+  size_t offset = text.size(); // Get size before to do ByteRange arithmetic
+  text += reference;           // Append reference to text
+  std::vector<ByteRange> sentence;
+  for (auto &wordView : wordRanges) {
+    size_t thisWordBegin = offset + wordView.data() - &reference[0];
+    sentence.push_back(
+        ByteRange{thisWordBegin, thisWordBegin + wordView.size()});
+  }
+  annotation.addSentence(sentence);
+}
+
 void AnnotatedText::addSentence(std::vector<string_view> &wordRanges) {
   addSentence(std::begin(wordRanges), std::end(wordRanges));
 };
@@ -65,7 +79,7 @@ void AnnotatedText::addSentence(std::vector<string_view>::iterator begin,
   std::vector<ByteRange> sentence;
   for (auto p = begin; p != end; p++) {
     size_t begin_offset = p->data() - &text[0];
-    sentence.push_back((ByteRange){begin_offset, begin_offset + p->size()});
+    sentence.push_back(ByteRange{begin_offset, begin_offset + p->size()});
   }
   annotation.addSentence(sentence);
 };
diff --git a/src/translator/sentence_ranges.h b/src/translator/sentence_ranges.h
index b3986e3..f9c881e 100644
--- a/src/translator/sentence_ranges.h
+++ b/src/translator/sentence_ranges.h
@@ -64,7 +64,6 @@ public:
     sentenceEndIds_.push_back(0);
   }
 
-  /// Returns the number of sentences annotated in a text.
   size_t numSentences() const { return sentenceEndIds_.size() - 1; }
 
   /// Returns number of words in the sentence identified by `sentenceIdx`.
@@ -125,10 +124,6 @@ public:
   /// constructor is disallowed).
   AnnotatedText(std::string &&text) : text(std::move(text)){};
 
-  AnnotatedText(AnnotatedText &&annotatedBlob)
-      : text(std::move(annotatedBlob.text)),
-        annotation(std::move(annotatedBlob.annotation)) {}
-
   /// Returns the number of sentences in the annotation structure.
   const size_t numSentences() const { return annotation.numSentences(); }
 
@@ -137,6 +132,11 @@ public:
     return annotation.numWords(sentenceIdx);
   }
 
+  /// Appends a sentence to the existing text and transparently rebases
+  /// string_views
+  void appendSentence(std::string prefix, std::string &reference,
+                      std::vector<string_view> &wordRanges);
+
   /// Adds a sentence, used to load from SentencePiece annotations conveniently.
   void addSentence(std::vector<string_view> &wordRanges);
 
diff --git a/src/translator/sentence_splitter.h b/src/translator/sentence_splitter.h
index 5175176..1c4742e 100644
--- a/src/translator/sentence_splitter.h
+++ b/src/translator/sentence_splitter.h
@@ -4,6 +4,7 @@
 #include "common/options.h"
 #include "data/types.h"
 #include "ssplit.h"
+#include "definitions.h"
 #include <string>
 
 namespace marian {
diff --git a/src/translator/service.cpp b/src/translator/service.cpp
index 76bcba2..3d19f5e 100644
--- a/src/translator/service.cpp
+++ b/src/translator/service.cpp
@@ -28,8 +28,8 @@ loadVocabularies(marian::Ptr<marian::Options> options) {
 namespace marian {
 namespace bergamot {
 
-Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory)
-    : requestId_(0), vocabs_(std::move(loadVocabularies(options))),
+Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory) 
+    : requestId_(0), options_(options), vocabs_(std::move(loadVocabularies(options))),
       text_processor_(vocabs_, options), batcher_(options),
       numWorkers_(options->get<int>("cpu-threads")),
       modelMemory_(std::move(modelMemory)), shortlistMemory_(std::move(shortlistMemory))
@@ -112,6 +112,44 @@ void Service::async_translate() {
 #endif // WASM_COMPATIBLE_SOURCE
 
 std::future<Response> Service::translate(std::string &&input) {
+  ResponseOptions responseOptions;  // Hardcode responseOptions for now
+  return translate(std::move(input), responseOptions);
+}
+
+std::vector<Response>
+Service::translateMultiple(std::vector<std::string> &&inputs,
+                           TranslationRequest translationRequest) {
+  ResponseOptions responseOptions;
+
+  // TODO(jerinphilip) Set options based on TranslationRequest, if and when it
+  // becomes non-dummy.
+
+  // We queue the individual Requests so they get compiled at batches to be
+  // efficiently translated.
+  std::vector<std::future<Response>> responseFutures;
+  for (auto &input : inputs) {
+    std::future<Response> inputResponse =
+        queueRequest(std::move(input), responseOptions);
+    responseFutures.push_back(std::move(inputResponse));
+  }
+
+  // Dispatch is called once per request so compilation of sentences from
+  // multiple Requests happen.
+  dispatchTranslate();
+
+  // Now wait for all Requests to complete, the future to fire and return the
+  // compiled Responses, we can probably return the future, but WASM quirks(?).
+  std::vector<Response> responses;
+  for (auto &future : responseFutures) {
+    future.wait();
+    responses.push_back(std::move(future.get()));
+  }
+
+  return responses;
+}
+
+std::future<Response> Service::queueRequest(std::string &&input,
+                                            ResponseOptions responseOptions) {
   Segments segments;
   AnnotatedText source(std::move(input));
   text_processor_.process(source, segments);
@@ -119,17 +157,29 @@ std::future<Response> Service::translate(std::string &&input) {
   std::promise<Response> responsePromise;
   auto future = responsePromise.get_future();
 
-  Ptr<Request> request = New<Request>(
-      requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(source),
-      std::move(segments), std::move(responsePromise));
+  ResponseBuilder responseBuilder(responseOptions, std::move(source), vocabs_,
+                                  std::move(responsePromise));
+  Ptr<Request> request = New<Request>(requestId_++, std::move(segments),
+                                      std::move(responseBuilder));
 
   batcher_.addWholeRequest(request);
+  return future;
+}
+
+std::future<Response> Service::translate(std::string &&input,
+                                         ResponseOptions responseOptions) {
+  std::future<Response> future =
+      queueRequest(std::move(input), responseOptions);
+  dispatchTranslate();
+  return future;
+}
+
+void Service::dispatchTranslate() {
   if (numWorkers_ == 0) {
     blocking_translate();
   } else {
     async_translate();
   }
-  return future;
 }
 
 Service::~Service() {
diff --git a/src/translator/service.h b/src/translator/service.h
index 72f6d92..288c649 100644
--- a/src/translator/service.h
+++ b/src/translator/service.h
@@ -1,10 +1,12 @@
 #ifndef SRC_BERGAMOT_SERVICE_H_
 #define SRC_BERGAMOT_SERVICE_H_
 
+#include "TranslationRequest.h"
 #include "batch_translator.h"
 #include "batcher.h"
 #include "data/types.h"
 #include "response.h"
+#include "response_builder.h"
 #include "text_processor.h"
 #include "translator/parser.h"
 
@@ -18,22 +20,45 @@
 namespace marian {
 namespace bergamot {
 
-/// Service exposes methods to translate an incoming blob of text to the
-/// Consumer of bergamot API.
+/// Service offers methods create an asynchronous translation service that
+/// translates a plain (without any markups and emojis)  UTF-8 encoded text.
+/// This implementation supports translation from 1 source language to 1 target
+/// language.
+///
+///  This is intended to be similar to the ones  provided for training or
+///  decoding in ML pipelines with the following  additional capabilities:
+///
+///  1. Provision of a request -> response based translation flow unlike the
+///  usual a line based translation or decoding provided in most ML frameworks.
+///  2. Internal handling of normalization etc which changes source text to
+///  provide to client translation meta-information like alignments consistent
+///  with the unnormalized input text.
+///  3. The API splits each text entry into sentences internally, which are then
+///  translated independent of each other. The translated sentences are then
+///  joined back together and returned in Response.
+///
+/// Service exposes methods to instantiate the service from a string
+/// configuration (which can cover most translators) and to translate an
+/// incoming blob of text.
+///
 ///
 /// An example use of this API looks as follows:
-///
+/// ```cpp
 ///  options = ...;
 ///  service = Service(options);
 ///  std::string input_text = "Hello World";
 ///  std::future<Response>
-///      response = service.translate(std::move(input_text));
-///  response.wait();
-///  Response result = response.get();
+///      responseFuture = service.translate(std::move(input_text));
+///  responseFuture.wait(); // Wait until translation has completed.
+///  Response response(std::move(response.get());
 ///
-/// Optionally Service can be initialized by also passing model_memory for
+/// // Do things with response.
+/// ```
+///
+/// Optionally Service can be initialized by also passing model memory for
 /// purposes of efficiency (which defaults to nullpointer and then reads from
 /// file supplied through config).
+///
 class Service {
 
 public:
@@ -41,9 +66,22 @@ public:
   /// @param modelMemory byte array (aligned to 256!!!) that contains the bytes
   /// of a model.bin. Optional, defaults to nullptr when not used
   /// @param shortlistMemory byte array of shortlist (aligned to 64)
-  explicit Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory);
+  explicit Service(Ptr<Options> options, AlignedMemory modelMemory,
+                   AlignedMemory shortlistMemory);
 
-  explicit Service(Ptr<Options> options) : Service(options, AlignedMemory(), AlignedMemory()){}
+  /// Construct Service purely from Options. This expects options which
+  /// marian-decoder expects to be set for loading model shortlist and
+  /// vocabularies from files in addition to parameters that set unset desired
+  /// features (e.g: alignments, quality-scores).
+  ///
+  /// This is equivalent to a call to:
+  /// ```cpp
+  ///    Service(options, AlignedMemory(),  AlignedMemory())
+  /// ```
+  /// wherein empty memory is passed and internal flow defaults to file-based
+  /// model, shortlist loading.
+  explicit Service(Ptr<Options> options)
+      : Service(options, AlignedMemory(), AlignedMemory()) {}
 
   /// Construct Service from a string configuration.
   /// @param [in] config string parsable as YAML expected to adhere with marian
@@ -52,20 +90,66 @@ public:
   /// bytes of a model.bin. Optional.
   /// @param [in] shortlistMemory byte array of shortlist (aligned to 64)
   explicit Service(const std::string &config,
-                   AlignedMemory modelMemory = AlignedMemory(), AlignedMemory shortlistMemory = AlignedMemory())
-      : Service(parseOptions(config), std::move(modelMemory), std::move(shortlistMemory)) {}
+                   AlignedMemory modelMemory = AlignedMemory(),
+                   AlignedMemory shortlistMemory = AlignedMemory())
+      : Service(parseOptions(config, /*validate=*/false),
+                std::move(modelMemory), std::move(shortlistMemory)) {}
 
   /// Explicit destructor to clean up after any threads initialized in
   /// asynchronous operation mode.
   ~Service();
 
   /// To stay efficient and to refer to the string for alignments, expects
-  /// ownership be moved through std::move(..)
+  /// ownership be moved through `std::move(..)`
   ///
-  ///  @param [in] rvalue reference of string to be translated.
-  std::future<Response> translate(std::string &&input);
+  ///  @param [in] source: rvalue reference of string to be translated.
+  std::future<Response> translate(std::string &&source);
+
+  /// Translate an input, providing Options to construct Response. This is
+  /// useful when one has to set/unset alignments or quality in the Response to
+  /// save compute spent in constructing these objects.
+  ///
+  /// @param [in] source: rvalue reference of the string to be translated
+  /// @param [in] responseOptions: Options indicating whether or not to include
+  /// some member in the Response, also specify any additional configurable
+  /// parameters.
+  std::future<Response> translate(std::string &&source,
+                                  ResponseOptions options);
+
+  /// Translate multiple text-blobs in a single *blocking* API call, providing
+  /// TranslationRequest which applies across all text-blobs dictating how to
+  /// construct Response. TranslationRequest can be used to enable/disable
+  /// additional information like quality-scores, alignments etc.
+  ///
+  /// All texts are combined to efficiently construct batches together providing
+  /// speedups compared to calling translate() indepdently on individual
+  /// text-blob. Note that there will be minor differences in output when
+  /// text-blobs are individually translated due to approximations but similar
+  /// quality nonetheless. If you have async/multithread capabilities, it is
+  /// recommended to work with futures and translate() API.
+  ///
+  /// @param [in] source: rvalue reference of the string to be translated
+  /// @param [in] translationRequest: TranslationRequest (Unified API)
+  /// indicating whether or not to include some member in the Response, also
+  /// specify any additional configurable parameters.
+
+  std::vector<Response>
+  translateMultiple(std::vector<std::string> &&source,
+                    TranslationRequest translationRequest);
+
+  /// Returns if model is alignment capable or not.
+  bool isAlignmentSupported() const {
+    return options_->hasAndNotEmpty("alignment");
+  }
 
 private:
+  /// Queue an input for translation.
+  std::future<Response> queueRequest(std::string &&input,
+                                     ResponseOptions responseOptions);
+
+  /// Dispatch call to translate after inserting in queue
+  void dispatchTranslate();
+
   /// Build numTranslators number of translators with options from options
   void build_translators(Ptr<Options> options, size_t numTranslators);
   /// Initializes a blocking translator without using std::thread
@@ -83,16 +167,21 @@ private:
   void async_translate();
 
   /// Number of workers to launch.
-  size_t numWorkers_;              // ORDER DEPENDENCY (pcqueue_)
+  size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_)
+
+  /// Options object holding the options Service was instantiated with.
+  Ptr<Options> options_;
+
   /// Model memory to load model passed as bytes.
-  AlignedMemory modelMemory_;      // ORDER DEPENDENCY (translators_)
+  AlignedMemory modelMemory_; // ORDER DEPENDENCY (translators_)
   /// Shortlist memory passed as bytes.
-  AlignedMemory shortlistMemory_;  // ORDER DEPENDENCY (translators_)
+  AlignedMemory shortlistMemory_; // ORDER DEPENDENCY (translators_)
 
   /// Holds instances of batch translators, just one in case
   /// of single-threaded application, numWorkers_ in case of multithreaded
   /// setting.
-  std::vector<BatchTranslator> translators_;  // ORDER DEPENDENCY (modelMemory_, shortlistMemory_)
+  std::vector<BatchTranslator>
+      translators_; // ORDER DEPENDENCY (modelMemory_, shortlistMemory_)
 
   /// Stores requestId of active request. Used to establish
   /// ordering among requests and logging/book-keeping.
diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt
index c89e393..a785ba6 100644
--- a/wasm/CMakeLists.txt
+++ b/wasm/CMakeLists.txt
@@ -23,6 +23,7 @@ endif()
 set_target_properties(bergamot-translator-worker PROPERTIES
                         SUFFIX ".js"
                         LINK_FLAGS ${LINKER_FLAGS}
-                        )
+                        RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}
+                      )
 
 target_link_libraries(bergamot-translator-worker bergamot-translator)
diff --git a/wasm/README.md b/wasm/README.md
index 23564b9..337ae1b 100644
--- a/wasm/README.md
+++ b/wasm/README.md
@@ -1,17 +1,53 @@
 ## Using Bergamot Translator in JavaScript
 The example file `bergamot.html` in the folder `test_page` demonstrates how to use the bergamot translator in JavaScript via a `<script>` tag.
 
-Please note that everything below assumes that the [bergamot project specific model files](https://github.com/mozilla-applied-ml/bergamot-models) were packaged in wasm binary (using the compile instructions given in the top level README).
+### <a name="Pre-requisite"></a> Pre-requisite: Download files required for translation
 
-### Using JS APIs
+Please note that [Using JS APIs](#Using-JS-APIs) and [Demo](#Demo) section below assumes that the [bergamot project specific model files](https://github.com/mozilla-applied-ml/bergamot-models) are already downloaded and present in the `test_page` folder. If this is not done then use following instructions to do so:
+
+```bash
+cd test_page
+mkdir models
+git clone --depth 1 --branch main --single-branch https://github.com/mozilla-applied-ml/bergamot-models
+cp -rf bergamot-models/prod/* models
+gunzip models/*/*
+```
+
+### <a name="Using-JS-APIs"></a> Using JS APIs
 
 ```js
 // The model configuration as YAML formatted string. For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/
-// This example captures the most relevant options: model file, vocabulary files and shortlist file
-const modelConfig = "{\"models\":[\"/esen/model.esen.npz\"],\"vocabs\":[\"/esen/vocab.esen.spm\",\"/esen/vocab.esen.spm\"],\"shortlist\":[\"/esen/lex.esen.s2t\"],\"beam-size\":1}";
+// This example captures some of the most relevant options
+const modelConfig = `vocabs:
+  - /esen/vocab.esen.spm
+  - /esen/vocab.esen.spm
+beam-size: 1
+normalize: 1.0
+word-penalty: 0
+max-length-break: 128
+mini-batch-words: 1024
+workspace: 128
+max-length-factor: 2.0
+skip-cost: true
+cpu-threads: 0
+quiet: true
+quiet-translation: true
+gemm-precision: int8shift
+`;
+
+// Download model and shortlist files and read them into buffers
+const modelFile = `models/esen/model.esen.intgemm.alphas.bin`;
+const shortlistFile = `models/esen/lex.50.50.esen.s2t.bin`;
+const downloadedBuffers = await Promise.all([downloadAsArrayBuffer(modelFile), downloadAsArrayBuffer(shortlistFile)]); // Please refer to bergamot.html in test_page folder for this function
+const modelBuffer = downloadedBuffers[0];
+const shortListBuffer = downloadedBuffers[1];
+
+// Construct AlignedMemory instances from the buffers
+var alignedModelMemory = constructAlignedMemoryFromBuffer(modelBuffer, 256); // Please refer to bergamot.html in test_page folder for this function
+var alignedShortlistMemory = constructAlignedMemoryFromBuffer(shortListBuffer, 64); // Please refer to bergamot.html in test_page folder for this function
 
 // Instantiate the TranslationModel
-const model = new Module.TranslationModel(modelConfig);
+const model = new Module.TranslationModel(modelConfig, alignedModelMemory, alignedShortlistMemory);
 
 // Instantiate the arguments of translate() API i.e. TranslationRequest and input (vector<string>)
 const request = new Module.TranslationRequest();
@@ -34,13 +70,18 @@ request.delete();
 input.delete();
 ```
 
-### Demo (see everything in action)
+### <a name="Demo"></a> Demo (see everything in action)
+
+* Make sure that you followed [Pre-requisite](#Pre-requisite) instructions before moving forward.
 
 * Start the test webserver (ensure you have the latest nodejs installed)
     ```bash
     cd test_page
-    bash start_server.sh
+    bash start_server.sh ../../build-wasm
     ```
+
+    Provide the folder containing the wasm artifacts as the first argument of `start_server.sh` script (`../../build-wasm` in this case).
+
 * Open any of the browsers below
     * Firefox Nightly +87: make sure the following prefs are on (about:config)
         ```
diff --git a/wasm/bindings/TranslationModelBindings.cpp b/wasm/bindings/TranslationModelBindings.cpp
index 245416c..41b9c2e 100644
--- a/wasm/bindings/TranslationModelBindings.cpp
+++ b/wasm/bindings/TranslationModelBindings.cpp
@@ -6,17 +6,40 @@
 
 #include <emscripten/bind.h>
 
-#include "TranslationModel.h"
+#include "response.h"
+#include "service.h"
 
 using namespace emscripten;
 
-// Binding code
+typedef marian::bergamot::Service TranslationModel;
+typedef marian::bergamot::Response TranslationResult;
+
+val getByteArrayView(marian::bergamot::AlignedMemory& alignedMemory) {
+  return val(typed_memory_view(alignedMemory.size(), alignedMemory.as<char>()));
+}
+
+EMSCRIPTEN_BINDINGS(aligned_memory) {
+  class_<marian::bergamot::AlignedMemory>("AlignedMemory")
+    .constructor<std::size_t, std::size_t>()
+    .function("size", &marian::bergamot::AlignedMemory::size)
+	  .function("getByteArrayView", &getByteArrayView)
+    ;
+}
+
+TranslationModel* TranslationModelFactory(const std::string &config,
+                                          marian::bergamot::AlignedMemory* modelMemory,
+                                          marian::bergamot::AlignedMemory* shortlistMemory) {
+  return new TranslationModel(config, std::move(*modelMemory), std::move(*shortlistMemory));
+}
+
 EMSCRIPTEN_BINDINGS(translation_model) {
   class_<TranslationModel>("TranslationModel")
-    .constructor<std::string>()
-    .function("translate", &TranslationModel::translate)
+    .constructor(&TranslationModelFactory, allow_raw_pointers())
+    .function("translate", &TranslationModel::translateMultiple)
 	  .function("isAlignmentSupported", &TranslationModel::isAlignmentSupported)
     ;
+  // ^ We redirect Service::translateMultiple to WASMBound::translate instead. Sane API is
+  // translate. If and when async comes, we can be done with this inconsistency.
 
   register_vector<std::string>("VectorString");
   register_vector<TranslationResult>("VectorTranslationResult");
diff --git a/wasm/bindings/TranslationResultBindings.cpp b/wasm/bindings/TranslationResultBindings.cpp
index a3713a1..c1c0ca8 100644
--- a/wasm/bindings/TranslationResultBindings.cpp
+++ b/wasm/bindings/TranslationResultBindings.cpp
@@ -6,15 +6,16 @@
 #include <emscripten/bind.h>
 #include <vector>
 
-#include "TranslationResult.h"
+#include "response.h"
+
+typedef marian::bergamot::Response TranslationResult;
 
 using namespace emscripten;
 
 // Binding code
 EMSCRIPTEN_BINDINGS(translation_result) {
   class_<TranslationResult>("TranslationResult")
-    .constructor<std::string, std::string, TranslationResult::SentenceMappings>()
-	  .function("getOriginalText", &TranslationResult::getOriginalText)
-	  .function("getTranslatedText", &TranslationResult::getTranslatedText)
-    ;
+      .constructor<>()
+      .function("getOriginalText", &TranslationResult::getOriginalText)
+      .function("getTranslatedText", &TranslationResult::getTranslatedText);
 }
diff --git a/wasm/patch-artifacts-enable-wormhole.sh b/wasm/patch-artifacts-enable-wormhole.sh
index c16ba66..e39988b 100644
--- a/wasm/patch-artifacts-enable-wormhole.sh
+++ b/wasm/patch-artifacts-enable-wormhole.sh
@@ -1,7 +1,36 @@
 #!/bin/bash
+usage="Patch wasm artifacts to enable wormhole via APIs that compile and instantiate wasm module.
 
-echo "Patching wasm artifacts to enable wormhole via APIs that compile and instantiate wasm module"
-sed -i.bak 's/var result = WebAssembly.instantiateStreaming(response, info);/var result = WebAssembly.instantiateStreaming(response, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
-sed -i.bak 's/return WebAssembly.instantiate(binary, info);/return WebAssembly.instantiate(binary, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
-sed -i.bak 's/var module = new WebAssembly.Module(bytes);/var module = new WebAssembly.Module(bytes, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
+Usage: $(basename "$0") [WASM_ARTIFACTS_FOLDER]
+
+    where:
+    WASM_ARTIFACTS_FOLDER    Folder containing wasm artifacts
+                             (An optional argument, if unspecified the default is: current folder)"
+
+if [ "$#" -gt 1 ]; then
+    echo "Illegal number of parameters passed"
+    echo "$usage"
+    exit
+fi
+
+# Parse wasm artifacts folder if provided via script argument or set it to default
+WASM_ARTIFACTS_FOLDER=$PWD
+if [ "$#" -eq 1 ]; then
+    if [ ! -e "$1" ]; then
+        echo "Error: Folder \""$1"\" doesn't exist"
+        exit
+    fi
+    WASM_ARTIFACTS_FOLDER="$1"
+fi
+
+WASM_ARTIFACTS="$WASM_ARTIFACTS_FOLDER/bergamot-translator-worker.js"
+if [ ! -e "$WASM_ARTIFACTS" ]; then
+    echo "Error: Artifact \"$WASM_ARTIFACTS\" doesn't exist"
+    exit
+fi
+
+echo "Patching \"$WASM_ARTIFACTS\" to enable wormhole via APIs that compile and instantiate wasm module"
+sed -i.bak 's/WebAssembly.instantiateStreaming[[:space:]]*([[:space:]]*response[[:space:]]*,[[:space:]]*info[[:space:]]*)/WebAssembly.instantiateStreaming(response, info, {simdWormhole:true})/g' $WASM_ARTIFACTS
+sed -i.bak 's/WebAssembly.instantiate[[:space:]]*([[:space:]]*binary[[:space:]]*,[[:space:]]*info[[:space:]]*)/WebAssembly.instantiate(binary, info, {simdWormhole:true})/g' $WASM_ARTIFACTS
+sed -i.bak 's/WebAssembly.Module[[:space:]]*([[:space:]]*bytes[[:space:]]*)/WebAssembly.Module(bytes, {simdWormhole:true})/g' $WASM_ARTIFACTS
 echo "Done"
diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html
index 4f1f2a0..95ae325 100644
--- a/wasm/test_page/bergamot.html
+++ b/wasm/test_page/bergamot.html
@@ -2,7 +2,7 @@
 <html>
 <head>
     <link rel="icon" href="data:,">
-    <meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1">
+    <meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
 </head>
 <style>
     body, html, div {
@@ -61,9 +61,27 @@ En consecuencia, durante el año 2011 se introdujeron 180 proyectos de ley que r
 </div>
 
 <script>
+  // This function downloads file from a url and returns the array buffer
+  const downloadAsArrayBuffer = async(url) => {
+    const response = await fetch(url);
+    if (!response.ok) {
+      throw Error(`Downloading ${url} failed: HTTP ${response.status} - ${response.statusText}`);
+    }
+    return response.arrayBuffer();
+  }
 
-  var model, request, input = undefined;
-  const loadModel = (from, to) => {
+  // This function constructs the AlignedMemory from the array buffer and the alignment size
+  function constructAlignedMemoryFromBuffer(buffer, alignmentSize) {
+    var byteArray = new Int8Array(buffer);
+    console.debug("byteArray size: ", byteArray.byteLength);
+    var alignedMemory = new Module.AlignedMemory(byteArray.byteLength, alignmentSize);
+    const alignedByteArrayView = alignedMemory.getByteArrayView();
+    alignedByteArrayView.set(byteArray);
+    return alignedMemory;
+  }
+
+  var translationModel, request, input = undefined;
+  const constructTranslationModel = async (from, to) => {
 
     const languagePair = `${from}${to}`;
 
@@ -72,11 +90,11 @@ En consecuencia, durante el año 2011 se introdujeron 180 proyectos de ley que r
 
     // Set the Model Configuration as YAML formatted string.
     // For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/
-    const modelConfig = `models:
+    /*const modelConfig = `models:
   - /${languagePair}/model.${languagePair}.intgemm.alphas.bin
 vocabs:
-  - /${vocabLanguagePair}/vocab.${vocabLanguagePair}.spm
-  - /${vocabLanguagePair}/vocab.${vocabLanguagePair}.spm
+  - /${languagePair}/vocab.${vocabLanguagePair}.spm
+  - /${languagePair}/vocab.${vocabLanguagePair}.spm
 beam-size: 1
 normalize: 1.0
 word-penalty: 0
@@ -93,22 +111,54 @@ shortlist:
     - 50
     - 50
 `;
-/*
-This config is not valid anymore in new APIs
-mini-batch: 32
-maxi-batch: 100
-maxi-batch-sort: src
 */
+
+const modelConfigWithoutModelAndShortList = `vocabs:
+  - /${languagePair}/vocab.${vocabLanguagePair}.spm
+  - /${languagePair}/vocab.${vocabLanguagePair}.spm
+beam-size: 1
+normalize: 1.0
+word-penalty: 0
+max-length-break: 128
+mini-batch-words: 1024
+workspace: 128
+max-length-factor: 2.0
+skip-cost: true
+cpu-threads: 0
+quiet: true
+quiet-translation: true
+gemm-precision: int8shift
+`;
+
 // TODO: Use in model config when wormhole is enabled:
 // gemm-precision: int8shift
 // TODO: Use in model config when loading of binary models is supported and we use model.intgemm.alphas.bin:
 // gemm-precision: int8shiftAlphaAll
 
-    console.debug("modelConfig: ", modelConfig);
+    const modelFile = `models/${languagePair}/model.${languagePair}.intgemm.alphas.bin`;
+    console.debug("modelFile: ", modelFile);
+    const shortlistFile = `models/${languagePair}/lex.50.50.${languagePair}.s2t.bin`;
+    console.debug("shortlistFile: ", shortlistFile);
 
-    // Instantiate the TranslationModel
-    if (model) model.delete();
-    model = new Module.TranslationModel(modelConfig);
+    try {
+      // Download the files as buffers from the given urls
+      let start = Date.now();
+      const downloadedBuffers = await Promise.all([downloadAsArrayBuffer(modelFile), downloadAsArrayBuffer(shortlistFile)]);
+      const modelBuffer = downloadedBuffers[0];
+      const shortListBuffer = downloadedBuffers[1];
+      log(`${languagePair} file download took ${(Date.now() - start) / 1000} secs`);
+
+      // Construct AlignedMemory objects with downloaded buffers
+      var alignedModelMemory = constructAlignedMemoryFromBuffer(modelBuffer, 256);
+      var alignedShortlistMemory = constructAlignedMemoryFromBuffer(shortListBuffer, 64);
+
+      // Instantiate the TranslationModel
+      if (translationModel) translationModel.delete();
+      console.debug("Creating TranslationModel with config:", modelConfigWithoutModelAndShortList);
+      translationModel = new Module.TranslationModel(modelConfigWithoutModelAndShortList, alignedModelMemory, alignedShortlistMemory);
+    } catch (error) {
+      log(error);
+    }
   }
 
   const translate = (paragraphs) => {
@@ -127,16 +177,9 @@ maxi-batch-sort: src
     })
     // Access input (just for debugging)
     console.log('Input size=', input.size());
-    /*
-    for (let i = 0; i < input.size(); i++) {
-      console.log(' val:' + input.get(i));
-    }
-    */
 
     // Translate the input; the result is a vector<TranslationResult>
-    let result = model.translate(input, request);
-    // Access original and translated text from each entry of vector<TranslationResult>
-    //console.log('Result size=', result.size(), ' - TimeDiff - ', (Date.now() - start)/1000);
+    let result = translationModel.translate(input, request);
     const translatedParagraphs = [];
     for (let i = 0; i < result.size(); i++) {
       translatedParagraphs.push(result.get(i).getTranslatedText());
@@ -147,14 +190,16 @@ maxi-batch-sort: src
     return translatedParagraphs;
   }
 
-  document.querySelector("#load").addEventListener("click", () => {
+  document.querySelector("#load").addEventListener("click", async() => {
+    document.querySelector("#load").disabled = true;
     const lang = document.querySelector('input[name="modellang"]:checked').value;
     const from = lang.substring(0, 2);
     const to = lang.substring(2, 4);
     let start = Date.now();
-    loadModel(from, to)
-    log(`model ${from}${to} loaded in ${(Date.now() - start) / 1000} secs`);
-    //log('Model Alignment:', model.isAlignmentSupported());
+    await constructTranslationModel(from, to);
+    log(`translation model ${from}${to} construction took ${(Date.now() - start) / 1000} secs`);
+    document.querySelector("#load").disabled = false;
+    //log('Model Alignment:', translationModel.isAlignmentSupported());
   });
 
   const translateCall = () => {
diff --git a/wasm/test_page/start_server.sh b/wasm/test_page/start_server.sh
index b6fc2a6..9113646 100644
--- a/wasm/test_page/start_server.sh
+++ b/wasm/test_page/start_server.sh
@@ -1,9 +1,30 @@
 #!/bin/bash
-echo "Start: Copying artifacts in local folder------"
-cp ../../build-wasm/wasm/bergamot-translator-worker.data .
-cp ../../build-wasm/wasm/bergamot-translator-worker.js .
-cp ../../build-wasm/wasm/bergamot-translator-worker.wasm .
-cp ../../build-wasm/wasm/bergamot-translator-worker.worker.js .
+
+usage="Copy wasm artifacts from build directory and start httpserver
+
+Usage: $(basename "$0") [WASM_ARTIFACTS_FOLDER]
+
+    where:
+    WASM_ARTIFACTS_FOLDER    Folder containing pre-built wasm artifacts"
+
+if [ "$#" -ne 1 ]; then
+    echo "Illegal number of parameters passed"
+    echo "$usage"
+    exit
+fi
+
+# Check if WASM_ARTIFACTS_FOLDER is valid or not
+if [ ! -e "$1" ]; then
+    echo "Error: Folder \""$1"\" doesn't exist"
+    exit
+fi
+
+WASM_ARTIFACTS="$1/bergamot-translator-worker.*"
+for i in $WASM_ARTIFACTS; do
+    [ -f "$i" ] || breaks
+    cp $i .
+    echo "Copied \"$i\""
+done
 
 npm install
 echo "Start httpserver"