Wasm test page UI for translating b/w non-English language pairs (#231)

* Updated Wasm test page UI for translating b/w non-English language pairs
* Both "from" and "to" language dropdowns now allow non-English languages
This commit is contained in:
Abhishek Aggarwal 2021-10-19 14:40:54 +02:00 committed by GitHub
parent c7b626dfd0
commit a0cb1e4b3d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 1452 additions and 522 deletions

View File

@ -1,95 +1,25 @@
# Using Bergamot Translator in JavaScript
Instructions in this document assume current-directory to be
[wasm](https://github.com/browsermt/bergamot-translator/tree/main/wasm) within
bergamot-translator source.
The example file `bergamot.html` in the folder `test_page` demonstrates how to
use the bergamot translator in JavaScript via a `<script>` tag.
## Pre-requisites
**Download files required for translation**
Please note that [Using JS APIs](#using-js-apis) and [Demo](#demo) section below assumes that the [bergamot project specific model files](https://github.com/mozilla-applied-ml/bergamot-models) are already downloaded and present in the `test_page` folder. If this is not done then use following instructions to do so:
```bash
cd test_page
git clone --depth 1 --branch main --single-branch https://github.com/mozilla-applied-ml/bergamot-models
mkdir models
cp -rf bergamot-models/prod/* models
gunzip models/*/*
```
All the instructions below are meant to run from the current directory.
## Using JS APIs
```js
// The model configuration as YAML formatted string. For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/
// This example captures some of the most relevant options
const modelConfig = `beam-size: 1
normalize: 1.0
word-penalty: 0
max-length-break: 128
mini-batch-words: 1024
workspace: 128
max-length-factor: 2.0
skip-cost: true
cpu-threads: 0
quiet: true
quiet-translation: true
gemm-precision: int8shift
`;
Please refer to the file `test_page/js/worker.js` that demonstrates how to use the bergamot translator in JavaScript via a `<script>` tag.
// Download model, shortlist and vocabulary files and read them into buffers
const modelFile = `models/esen/model.esen.intgemm.alphas.bin`;
const shortlistFile = `models/esen/lex.50.50.esen.s2t.bin`;
const vocabFiles = [`models/${languagePair}/vocab.${vocabLanguagePair}.spm`,
`models/${languagePair}/vocab.${vocabLanguagePair}.spm`];
const uniqueVocabFiles = new Set(vocabFiles);
## Demo
// Please refer to bergamot.html in test_page folder for downloadAsArrayBuffer function
const downloadedBuffers = await Promise.all([downloadAsArrayBuffer(modelFile), downloadAsArrayBuffer(shortlistFile)]);
const downloadedVocabBuffers = [];
for (let item of uniqueVocabFiles.values()) {
downloadedVocabBuffers.push(await downloadAsArrayBuffer(item));
}
* Download bergamot model files required for translation
const modelBuffer = downloadedBuffers[0];
const shortListBuffer = downloadedBuffers[1];
Use following instructions to download [model files](https://github.com/mozilla/firefox-translations-models/) (make sure that `git-lfs` is installed and initialized before running these instructions):
// Construct AlignedMemory instances from the buffers
var alignedModelMemory = constructAlignedMemoryFromBuffer(modelBuffer, 256); // Please refer to bergamot.html in test_page folder for this function
var alignedShortlistMemory = constructAlignedMemoryFromBuffer(shortListBuffer, 64); // Please refer to bergamot.html in test_page folder for this function
var alignedVocabsMemoryList = new Module.AlignedMemoryList;
downloadedVocabBuffers.forEach(item => alignedVocabsMemoryList.push_back(constructAlignedMemoryFromBuffer(item, 64)));
// Instantiate the Translation Service
const translationService = new Module.Service(modelConfig, alignedModelMemory, alignedShortlistMemory, alignedVocabsMemoryList);
// Instantiate the arguments of translate() API i.e. ResponseOptions and input (vector<string>)
const responseOptions = new Module.ResponseOptions();
const input = new Module.VectorString;
// Initialize the input
input.push_back("Hola"); input.push_back("Mundo");
// translate the input; the result is a vector<Response>
const result = translationService.translate(input, responseOptions);
// Print original and translated text from each entry of vector<Response>
for (let i = 0; i < result.size(); i++) {
console.log(' original=' + result.get(i).getOriginalText() + ', translation=' + result.get(i).getTranslatedText());
}
// Don't forget to clean up the instances
translationService.delete();
responseOptions.delete();
input.delete();
```
## Demo
* Make sure that you followed [Pre-requisites](#pre-requisites) instructions before moving forward.
```bash
cd test_page
git clone --depth 1 --branch main --single-branch https://github.com/mozilla/firefox-translations-models/
mkdir models
cp -rf firefox-translations-models/models/prod/* models
cp -rf firefox-translations-models/models/dev/* models
gunzip models/*/*
```
* Start the test webserver (ensure you have the latest nodejs installed)
```bash
@ -114,10 +44,10 @@ input.delete();
* Browse to the following page:
```
http://localhost:8000/bergamot.html
http://localhost:80
```
* Run some translations:
* Choose a model and press `Load Model`
* Type a sentence to be translated in the `From` textbox and press `Translate`
* See the results in the `To` and `Log` textboxes
* Perform translations:
* Choose the source and target languages using `From` and `To` dropdowns.
* Type a sentence to be translated in the `From` textbox.
* See the result in the `To` textbox.

View File

@ -1,6 +1,5 @@
require(__dirname + '/helper.js');
const http = require('http');
const https = require('https')
const express = require('express');
const app = express();
const server = http.createServer(app);
@ -8,11 +7,36 @@ const fs = require('fs');
const url = require('url');
const nocache = require('nocache');
const cors = require('cors');
const path = require('path');
let port = 8000;
if (process.argv[2]) {
port = process.argv[2];
}
let skipssl = 0;
if (process.argv[3]) {
skipssl = process.argv[3];
}
let certpath = "/etc/letsencrypt";
if (process.argv[4]) {
certpath = process.argv[4];
}
app.use(cors())
app.use(nocache());
app.get('/', cors(), function(req, res) {
if (!req.secure && skipssl != 1) {
return res.redirect("https://" + req.headers.host + req.url);
}
res.sendFile(path.join(__dirname + '/index.html'));
res.header('Cross-Origin-Embedder-Policy','require-corp');
res.header('Cross-Origin-Opener-Policy','same-origin');
res.header('Cross-Origin-Resource-Policy','same-origin');
});
app.get('/*.*' , cors(), function(req, res) {
var options = url.parse(req.url, true);
var mime = Helper.getMime(options);
@ -34,5 +58,36 @@ function serveFile(res, pathName, mime) {
});
}
if (skipssl != 1){
https.createServer({
key: fs.readFileSync(`${certpath}/privkey.pem`),
cert: fs.readFileSync(`${certpath}/cert.pem`),
ca: fs.readFileSync(`${certpath}/chain.pem`),
},
app
).listen(443, () => {
console.log('Listening https port 443')
})
}
const Helper = {
types: {
"wasm" : "application/wasm"
, "js" : "application/javascript"
, "html" : "text/html"
, "htm" : "text/html"
, "ico" : "image/vnd.microsoft.icon"
, "css" : "text/css"
},
getMime: function(u) {
var ext = this.getExt(u.pathname).replace('.', '');
return this.types[ext.toLowerCase()] || 'application/octet-stream';
},
getExt: function(path) {
var i = path.lastIndexOf('.');
return (i < 0) ? '' : path.substr(i);
}
};
server.listen(port);
console.log(`HTTP and BinaryJS server started on port ${port}`);
console.log(`HTTP and BinaryJS server started on port ${port}`);

View File

@ -1,66 +0,0 @@
<!doctype html>
<html>
<head>
<link rel="icon" href="data:,">
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
</head>
<style>
body, html, div {
margin-left: 1%;
margin-right: 1%;
margin-bottom: 1%;
margin-top: 1%;
padding-left: 1%;
padding-right: 1%;
padding-bottom: 1%;
padding-top: 1%;
}
textarea, #to, #from {
width: 100%;
max-width: 100%;
}
div {
float: left;
width: 80%;
}
</style>
<body>
<div id="divradios">
<label>Choose the model to use</label>
<input type="radio" name="modellang" value="enes"/><label>English to Spanish</label>
<input type="radio" name="modellang" value="esen" checked/><label>Spanish to English</label>
<input type="radio" name="modellang" value="eten" checked/><label>Estonian to English</label>
<input type="radio" name="modellang" value="enet" checked/><label>English to Estonian</label>
<input type="radio" name="modellang" value="ende" checked/><label>English to German</label>
<input type="button" id="load" value="Load Model"/>
</div>
<div id="divtranslation">
<label for="from">From</label>
<textarea id="from" name="from">
Una estrategia republicana para obstaculizar la reelección de Obama. Los dirigentes republicanos justificaron su política por la necesidad de luchar contra el fraude electoral.
Ahora bien, el Centro Brennan considera esto último un mito y afirma que el fraude electoral es menos frecuente en los Estados Unidos que el número de personas que mueren a causa de la caída de un rayo.
De hecho, los abogados republicanos no han encontrado más que 300 casos de fraude electoral en los Estados Unidos en diez años. Una cosa es cierta: esas nuevas disposiciones afectarán negativamente a la tasa de participación.
En ese sentido, estas medidas minarán en parte el sistema democrático americano. Al contrario de lo que ocurre en Canadá, los estados americanos son responsables de la organización de las elecciones federales en los Estados Unidos.
Y en esa misma línea una mayoría de los gobiernos americanos promulgaron, a partir de 2009, nuevas leyes que dificultaban el proceso de inscripción o de votación. Este fenómeno se ha extendido tras las elecciones de noviembre de 2010, que vieron el aumento de 675 nuevos representantes republicanos en 26 estados.
En consecuencia, durante el año 2011 se introdujeron 180 proyectos de ley que restringían el ejercicio del derecho de voto en 41 estados.
</textarea>
<br><br>
<label for="to">To</label>
<textarea id="to" name="to" readonly></textarea>
<br><br>
<input type="button" id="translate" value="Translate"/>
</div>
<div id="divlog">
<label for="log">Log:</label><br>
<textarea id="log" name="log" rows="50" cols="75"></textarea>
</div>
<script src="bergamot.js"></script>
<script src="bergamot-translator-worker.js"></script>
</body>
</html>

View File

@ -1,54 +0,0 @@
var worker;
if (window.Worker) {
var worker = new Worker('worker.js');
worker.postMessage(["load_module"]);
}
const log = (message) => {
document.querySelector("#log").value += message + "\n";
}
document.querySelector("#translate").addEventListener("click", () => {
translateCall();
});
document.querySelector("#from").addEventListener('keyup', function(event) {
if (event.keyCode === 13) {
translateCall();
}
});
document.querySelector("#load").addEventListener("click", async() => {
document.querySelector("#load").disabled = true;
const lang = document.querySelector('input[name="modellang"]:checked').value;
const from = lang.substring(0, 2);
const to = lang.substring(2, 4);
let start = Date.now();
worker.postMessage(["load_model", from, to]);
document.querySelector("#load").disabled = false;
});
const translateCall = () => {
const text = document.querySelector('#from').value;
const paragraphs = text.split("\n");
document.querySelector("#load").disabled = true;
const lang = document.querySelector('input[name="modellang"]:checked').value;
const from = lang.substring(0, 2);
const to = lang.substring(2, 4);
worker.postMessage(["translate", from, to, paragraphs]);
document.querySelector("#load").disabled = false;
}
worker.onmessage = function(e) {
console.debug(`Message received from worker`);
if (e.data[0] === 'translated_result') {
if (e.data[1]) {
document.querySelector('#to').value = e.data[1].join("\n");
}
log(e.data[2]);
}
if ((e.data[0] === 'module_loaded') || (e.data[0] === 'model_loaded')) {
log(e.data[1]);
}
}

View File

@ -0,0 +1,99 @@
* {
box-sizing: border-box;
}
html,
body {
height: 100%;
margin: 0;
font-size: 18px;
font-family: Optima, Helvetica, Arial;
}
body {
padding: 1rem;
}
.app {
padding: 1rem;
display: grid;
grid: "from swap to" 1fr "status status status" auto / 1fr auto 1fr;
grid-gap: 1rem;
overflow: hidden;
min-height: 400px;
max-width: 1024px;
margin: 1em auto;
}
@media screen and (max-width: 640px) {
.app {
grid: "from from" auto "status swap" auto "to to" auto / 1fr;
}
}
.panel {
display: grid;
grid-template-rows: auto 1fr;
grid-gap: 1rem;
}
label {
padding: 0 0.5em;
display: flex;
align-items: center;
}
.lang-select {
padding: 0.25rem 0.5rem;
margin-left: 1rem;
background: #f4f4f4;
font-size: 0.9rem;
border: 1px solid #ccc;
border-radius: 0.25rem;
cursor: pointer;
}
.panel--from {
grid-area: from;
}
.panel--to {
grid-area: to;
}
.swap {
align-self: center;
grid-area: swap;
font-size: 1.1rem;
}
#status {
grid-area: status;
text-align: center;
align-self: center;
}
textarea {
padding: 1rem;
font-family: sans-serif;
font-size: 1rem;
resize: none;
border-radius: 2px;
border: 1px solid #ccc;
}
button {
cursor: pointer;
border: 1px solid #88c;
border-radius: 4px;
background: #eef;
padding: 0;
padding: 0.25rem 0.5rem;
}
button:hover {
background: #cce;
}
#output {
background-color: #f4f4f4;
}

View File

@ -1,40 +0,0 @@
/*
* @author - Based of a file from Gist here: https://gist.github.com/1757658
*
* @modified - Mike Newell - it was on Gist so I figure I can use it
*
* @Description - Added support for a few more mime types including the new
* .ogv, .webm, and .mp4 file types for HTML5 video.
*
*/
/*
* @modified - Andre Natal - removed unused types for the purpose of this use
case
*/
Helper = {
types: {
"wasm" : "application/wasm"
, "js" : "application/javascript"
, "html" : "text/html"
, "htm" : "text/html"
, "ico" : "image/vnd.microsoft.icon",
},
getMime: function(u) {
var ext = this.getExt(u.pathname).replace('.', '');
return this.types[ext.toLowerCase()] || 'application/octet-stream';
},
getExt: function(path) {
var i = path.lastIndexOf('.');
return (i < 0) ? '' : path.substr(i);
}
};

33
wasm/test_page/index.html Normal file
View File

@ -0,0 +1,33 @@
<!DOCTYPE html>
<html>
<head>
<title>Mozilla Translations</title>
<link rel="stylesheet" href="css/index.css" />
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />
<meta
name="viewport"
content="width=device-width, initial-scale=1.0, viewport-fit=cover"
/>
</head>
<body>
<div class="app">
<div class="panel panel--from">
<label>
From
<select id="lang-from" name="from" class="lang-select"></select>
</label>
<textarea id="input" name="input"></textarea>
</div>
<button class="swap" title="swap">↔️</button>
<div class="panel panel--to">
<label>
To
<select id="lang-to" name="to" class="lang-select"></select>
</label>
<textarea id="output" name="output" readonly></textarea>
</div>
<div class="footer" id="status"></div>
</div>
<script src="js/index.js"></script>
</body>
</html>

101
wasm/test_page/js/index.js Normal file
View File

@ -0,0 +1,101 @@
let worker;
let modelRegistry;
const $ = selector => document.querySelector(selector);
const status = message => ($("#status").innerText = message);
const langFrom = $("#lang-from");
const langTo = $("#lang-to");
const langs = [
["en", "English"],
["it", "Italian"],
["pt", "Portuguese"],
["ru", "Russian"],
["cs", "Czech"],
["de", "German"],
["es", "Spanish"],
["et", "Estonian"],
];
if (window.Worker) {
worker = new Worker("js/worker.js");
worker.postMessage(["import"]);
}
document.querySelector("#input").addEventListener("keyup", function (event) {
translateCall();
});
const translateCall = () => {
const text = document.querySelector("#input").value + " ";
if (!text.trim().length) return;
const paragraphs = text.split("\n");
$("#output").setAttribute("disabled", true);
const lngFrom = langFrom.value;
const lngTo = langTo.value;
worker.postMessage(["translate", lngFrom, lngTo, paragraphs]);
};
worker.onmessage = function (e) {
if (e.data[0] === "translate_reply" && e.data[1]) {
document.querySelector("#output").value = e.data[1].join("\n\n");
$("#output").removeAttribute("disabled");
} else if (e.data[0] === "load_model_reply" && e.data[1]) {
status(e.data[1]);
translateCall();
} else if (e.data[0] === "import_reply" && e.data[1]) {
modelRegistry = e.data[1];
init();
}
};
langs.forEach(([code, name]) => {
langFrom.innerHTML += `<option value="${code}">${name}</option>`;
langTo.innerHTML += `<option value="${code}">${name}</option>`;
});
const loadModel = () => {
const lngFrom = langFrom.value;
const lngTo = langTo.value;
if (lngFrom !== lngTo) {
status(`Installing model...`);
console.log(`Loading model '${lngFrom}${lngTo}'`);
worker.postMessage(["load_model", lngFrom, lngTo]);
} else {
const input = document.querySelector("#input").value;
document.querySelector("#output").value = input;
}
};
langFrom.addEventListener("change", e => {
loadModel();
});
langTo.addEventListener("change", e => {
loadModel();
});
$(".swap").addEventListener("click", e => {
[langFrom.value, langTo.value] = [langTo.value, langFrom.value];
$("#input").value = $("#output").value;
loadModel();
});
function init() {
// try to guess input language from user agent
let myLang = navigator.language;
if (myLang) {
myLang = myLang.split("-")[0];
let langIndex = langs.findIndex(([code]) => code === myLang);
if (langIndex > -1) {
console.log("guessing input language is", myLang);
langFrom.value = myLang;
}
}
// find first output lang that *isn't* input language
langTo.value = langs.find(([code]) => code !== langFrom.value)[0];
// load this model
loadModel();
}

View File

@ -0,0 +1,328 @@
//const rootURL = "https://storage.googleapis.com/bergamot-models-sandbox/0.2.10";
const rootURL = "../models";
const modelRegistry = {
enit: {
vocab: {
name: "vocab.enit.spm",
size: 814128,
estimatedCompressedSize: 405338,
expectedSha256Hash:
"de8cbeb79e0139304bfa47e8559f2447016bf9906225a97d3df1baed4de8f3a3",
},
lex: {
name: "lex.50.50.enit.s2t.bin",
size: 4489920,
estimatedCompressedSize: 2409986,
expectedSha256Hash:
"bb1fad3b3f6a13ebce1698cf7f39ca736c4dea4525f3dab5e1a78436f07445e6",
},
model: {
name: "model.enit.intgemm.alphas.bin",
size: 17140836,
estimatedCompressedSize: 13283223,
expectedSha256Hash:
"a5ce3723f62ead92a0e0373b6df0ad8e3e6d22963adb1333984206e33b8b6c61",
},
},
enpt: {
vocab: {
name: "vocab.enpt.spm",
size: 812781,
estimatedCompressedSize: 406524,
expectedSha256Hash:
"633a3d782c79f7d5e4b94ab96848f47c2fdf8ba82dd99efd1742b8a696bbd0cc",
},
lex: {
name: "lex.50.50.enpt.s2t.bin",
size: 4472528,
estimatedCompressedSize: 2411984,
expectedSha256Hash:
"1e96599123d275afa37353dfe84677a4070f013494fbdc9c52a28445cc9bc38d",
},
model: {
name: "model.enpt.intgemm.alphas.bin",
size: 17140836,
estimatedCompressedSize: 13429592,
expectedSha256Hash:
"d968735704c75e33c2e183b9241f14c0b2a560d01d88a2728e5c0119a4d7fb22",
},
},
enru: {
vocab: {
name: "vocab.enru.spm",
size: 937157,
estimatedCompressedSize: 435776,
expectedSha256Hash:
"feca2d44f01b946c85faba3b15b5eb53344bec84cd14a1a4d4a82ddd774c5edd",
},
lex: {
name: "lex.50.50.enru.s2t.bin",
size: 3049096,
estimatedCompressedSize: 1579779,
expectedSha256Hash:
"7bd3e2c0a72286fe1f3da65c56c49a7cd77efa5f1d1a444e2a9e769480b96ff3",
},
model: {
name: "model.enru.intgemm.alphas.bin",
size: 17140836,
estimatedCompressedSize: 12853987,
expectedSha256Hash:
"4a45186a93b8a2dd9301c66a3b3dad580b1bcfa74aadda583ca383f9fe0dea93",
},
},
iten: {
vocab: {
name: "vocab.iten.spm",
size: 814151,
estimatedCompressedSize: 405416,
expectedSha256Hash:
"22d5ce6973be5360a921103acbe984a9bfca952a1f6c55c9cb5ef7de4fd58266",
},
lex: {
name: "lex.50.50.iten.s2t.bin",
size: 5238420,
estimatedCompressedSize: 2860178,
expectedSha256Hash:
"357d362373022b029ee9965975a133e6f36fdb0fed749202ff578365cf0111f8",
},
model: {
name: "model.iten.intgemm.alphas.bin",
size: 17140836,
estimatedCompressedSize: 13423308,
expectedSha256Hash:
"1fae546faeb9046f80b1b7e940b37b660974ce72902778181d6cd1c30b717f35",
},
},
pten: {
vocab: {
name: "vocab.pten.spm",
size: 812889,
estimatedCompressedSize: 406730,
expectedSha256Hash:
"8389979e3c965688b07aeb712a7e44406e5dcdb2b84087229d26fcc71448c4ed",
},
lex: {
name: "lex.50.50.pten.s2t.bin",
size: 5001420,
estimatedCompressedSize: 2733800,
expectedSha256Hash:
"212ed0ae44a6f920cd6d17ca02f0a523ba6c4b0ef5078ae310c20bc4c51484c5",
},
model: {
name: "model.pten.intgemm.alphas.bin",
size: 17140836,
estimatedCompressedSize: 13584764,
expectedSha256Hash:
"6c3b7af01772022a19712410c63342ba581468c2f1aac34d7488409c4043e697",
},
},
ruen: {
vocab: {
name: "vocab.ruen.spm",
size: 936576,
estimatedCompressedSize: 435801,
expectedSha256Hash:
"aaf9a325c0a988c507d0312cb6ba1a02bac7a370bcd879aedee626a40bfbda78",
},
lex: {
name: "lex.50.50.ruen.s2t.bin",
size: 5090836,
estimatedCompressedSize: 2684919,
expectedSha256Hash:
"e6667e22f5f86be4872e3768b7184727f5dd8c9f2ccfb0639baabcb1176f5d11",
},
model: {
name: "model.ruen.intgemm.alphas.bin",
size: 17140836,
estimatedCompressedSize: 13108893,
expectedSha256Hash:
"3b6a0305e3d232fadd54f5a765365b7b96ad6d8f2e818cba594b02fbd8fadb3d",
},
},
csen: {
vocab: {
name: "vocab.csen.spm",
size: 769763,
estimatedCompressedSize: 366392,
expectedSha256Hash:
"f71cc5d045e479607078e079884f44032f5a0b82547fb96eefa29cd1eb47c6f3",
},
lex: {
name: "lex.50.50.csen.s2t.bin",
size: 4535788,
estimatedCompressedSize: 2418488,
expectedSha256Hash:
"8228a3c3f7887759a62b7d7c674a7bef9b70161913f9b0939ab58f71186835c2",
},
model: {
name: "model.csen.intgemm.alphas.bin",
size: 17140756,
estimatedCompressedSize: 13045032,
expectedSha256Hash:
"5b16661e2864dc50b2f4091a16bdd4ec8d8283e04271e602159ba348df5d6e2d",
},
},
deen: {
vocab: {
name: "vocab.deen.spm",
size: 784269,
estimatedCompressedSize: 410738,
expectedSha256Hash:
"417668f2ed297970febafb5b079a9d5ebc4ed0b3550ac8386d67a90473a09bd7",
},
lex: {
name: "lex.50.50.deen.s2t.bin",
size: 5047568,
estimatedCompressedSize: 2657472,
expectedSha256Hash:
"2f7c0f7bbce97ae5b52454074a892ba7b7610fb98e3c5d341e4ca79f0850c4de",
},
model: {
name: "model.deen.intgemm.alphas.bin",
size: 17140837,
estimatedCompressedSize: 13091214,
expectedSha256Hash:
"dda44d87ab0d8ad3b3871122fd3ee385f37878183a8b4ec139cd909531ec5009",
},
},
encs: {
vocab: {
name: "vocab.csen.spm",
size: 769763,
estimatedCompressedSize: 366392,
expectedSha256Hash:
"f71cc5d045e479607078e079884f44032f5a0b82547fb96eefa29cd1eb47c6f3",
},
lex: {
name: "lex.50.50.encs.s2t.bin",
size: 3556124,
estimatedCompressedSize: 1913246,
expectedSha256Hash:
"e19c77231bf977988e31ff8db15fe79966b5170564bd3e10613f239e7f461d97",
},
model: {
name: "model.encs.intgemm.alphas.bin",
size: 17140756,
estimatedCompressedSize: 12630325,
expectedSha256Hash:
"9a2fe0588bd972accfc801e2f31c945de0557804a91666ae5ab43b94fb74ac4b",
},
},
ende: {
vocab: {
name: "vocab.deen.spm",
size: 797501,
estimatedCompressedSize: 412505,
expectedSha256Hash:
"bc8f8229933d8294c727f3eab12f6f064e7082b929f2d29494c8a1e619ba174c",
},
lex: {
name: "lex.50.50.ende.s2t.bin",
size: 3062492,
estimatedCompressedSize: 1575385,
expectedSha256Hash:
"764797d075f0642c0b079cce6547348d65fe4e92ac69fa6a8605cd8b53dacb3f",
},
model: {
name: "model.ende.intgemm.alphas.bin",
size: 17140498,
estimatedCompressedSize: 13207068,
expectedSha256Hash:
"f0946515c6645304f0706fa66a051c3b7b7c507f12d0c850f276c18165a10c14",
},
},
enes: {
vocab: {
name: "vocab.esen.spm",
size: 825463,
estimatedCompressedSize: 414566,
expectedSha256Hash:
"909b1eea1face0d7f90a474fe29a8c0fef8d104b6e41e65616f864c964ba8845",
},
lex: {
name: "lex.50.50.enes.s2t.bin",
size: 3347104,
estimatedCompressedSize: 1720700,
expectedSha256Hash:
"3a113d713dec3cf1d12bba5b138ae616e28bba4bbc7fe7fd39ba145e26b86d7f",
},
model: {
name: "model.enes.intgemm.alphas.bin",
size: 17140755,
estimatedCompressedSize: 12602853,
expectedSha256Hash:
"fa7460037a3163e03fe1d23602f964bff2331da6ee813637e092ddf37156ef53",
},
},
enet: {
vocab: {
name: "vocab.eten.spm",
size: 828426,
estimatedCompressedSize: 416995,
expectedSha256Hash:
"e3b66bc141f6123cd40746e2fb9b8ee4f89cbf324ab27d6bbf3782e52f15fa2d",
},
lex: {
name: "lex.50.50.enet.s2t.bin",
size: 2700780,
estimatedCompressedSize: 1336443,
expectedSha256Hash:
"3d1b40ff43ebef82cf98d416a88a1ea19eb325a85785eef102f59878a63a829d",
},
model: {
name: "model.enet.intgemm.alphas.bin",
size: 17140754,
estimatedCompressedSize: 12543318,
expectedSha256Hash:
"a28874a8b702a519a14dc71bcee726a5cb4b539eeaada2d06492f751469a1fd6",
},
},
esen: {
vocab: {
name: "vocab.esen.spm",
size: 825463,
estimatedCompressedSize: 414566,
expectedSha256Hash:
"909b1eea1face0d7f90a474fe29a8c0fef8d104b6e41e65616f864c964ba8845",
},
lex: {
name: "lex.50.50.esen.s2t.bin",
size: 3860888,
estimatedCompressedSize: 1978538,
expectedSha256Hash:
"f11a2c23ef85ab1fee1c412b908d69bc20d66fd59faa8f7da5a5f0347eddf969",
},
model: {
name: "model.esen.intgemm.alphas.bin",
size: 17140755,
estimatedCompressedSize: 13215960,
expectedSha256Hash:
"4b6b7f451094aaa447d012658af158ffc708fc8842dde2f871a58404f5457fe0",
},
},
eten: {
vocab: {
name: "vocab.eten.spm",
size: 828426,
estimatedCompressedSize: 416995,
expectedSha256Hash:
"e3b66bc141f6123cd40746e2fb9b8ee4f89cbf324ab27d6bbf3782e52f15fa2d",
},
lex: {
name: "lex.50.50.eten.s2t.bin",
size: 3974944,
estimatedCompressedSize: 1920655,
expectedSha256Hash:
"6992bedc590e60e610a28129c80746fe5f33144a4520e2c5508d87db14ca54f8",
},
model: {
name: "model.eten.intgemm.alphas.bin",
size: 17140754,
estimatedCompressedSize: 12222624,
expectedSha256Hash:
"aac98a2371e216ee2d4843cbe896c617f6687501e17225ac83482eba52fd0028",
},
},
};

298
wasm/test_page/js/worker.js Normal file
View File

@ -0,0 +1,298 @@
// All variables specific to translation service
var translationService, responseOptions, input = undefined;
// A map of language-pair to TranslationModel object
var languagePairToTranslationModels = new Map();
const BERGAMOT_TRANSLATOR_MODULE = "bergamot-translator-worker.js";
const MODEL_REGISTRY = "modelRegistry.js";
const encoder = new TextEncoder(); // string to utf-8 converter
const decoder = new TextDecoder(); // utf-8 to string converter
const start = Date.now();
let moduleLoadStart;
var Module = {
preRun: [function() {
log(`Time until Module.preRun: ${(Date.now() - start) / 1000} secs`);
moduleLoadStart = Date.now();
}],
onRuntimeInitialized: function() {
log(`Wasm Runtime initialized Successfully (preRun -> onRuntimeInitialized) in ${(Date.now() - moduleLoadStart) / 1000} secs`);
importScripts(MODEL_REGISTRY);
postMessage([`import_reply`, modelRegistry]);
}
};
const log = (message) => {
console.debug(message);
}
onmessage = async function(e) {
const command = e.data[0];
log(`Message '${command}' received from main script`);
let result = "";
if (command === 'import') {
importScripts(BERGAMOT_TRANSLATOR_MODULE);
} else if (command === 'load_model') {
let start = Date.now();
let from = e.data[1];
let to = e.data[2];
try {
await constructTranslationService();
await constructTranslationModel(from, to);
log(`Model '${from}${to}' successfully constructed. Time taken: ${(Date.now() - start) / 1000} secs`);
result = "Model successfully loaded";
} catch (error) {
log(`Model '${from}${to}' construction failed: '${error.message}'`);
result = "Model loading failed";
}
log(`'${command}' command done, Posting message back to main script`);
postMessage([`${command}_reply`, result]);
} else if (command === 'translate') {
const from = e.data[1];
const to = e.data[2];
const inputParagraphs = e.data[3];
let inputWordCount = 0;
inputParagraphs.forEach(sentence => {
inputWordCount += sentence.trim().split(" ").filter(word => word.trim() !== "").length;
})
let start = Date.now();
try {
result = translate(from, to, inputParagraphs);
const secs = (Date.now() - start) / 1000;
log(`Translation '${from}${to}' Successful. Speed: ${Math.round(inputWordCount / secs)} WPS (${inputWordCount} words in ${secs} secs)`);
} catch (error) {
log(`Error: ${error.message}`);
}
log(`'${command}' command done, Posting message back to main script`);
postMessage([`${command}_reply`, result]);
}
}
// Instantiates the Translation Service
const constructTranslationService = async () => {
if (!translationService) {
var translationServiceConfig = {};
log(`Creating Translation Service with config: ${translationServiceConfig}`);
translationService = new Module.BlockingService(translationServiceConfig);
log(`Translation Service created successfully`);
}
}
// Constructs a translation model object for the source and target language pair
const constructTranslationModel = async (from, to) => {
// Delete all previously constructed translation models and clear the map
languagePairToTranslationModels.forEach((value, key) => {
log(`Destructing model '${key}'`);
value.delete();
});
languagePairToTranslationModels.clear();
// If none of the languages is English then construct multiple models with
// English as a pivot language.
if (from !== 'en' && to !== 'en') {
log(`Constructing model '${from}${to}' via pivoting: '${from}en' and 'en${to}'`);
await Promise.all([_constructTranslationModelInvolvingEnglish(from, 'en'),
_constructTranslationModelInvolvingEnglish('en', to)]);
}
else {
log(`Constructing model '${from}${to}'`);
await _constructTranslationModelInvolvingEnglish(from, to);
}
}
// Translates text from source language to target language.
const translate = (from, to, paragraphs) => {
// If none of the languages is English then perform translation with
// English as a pivot language.
if (from !== 'en' && to !== 'en') {
log(`Translating '${from}${to}' via pivoting: '${from}en' -> 'en${to}'`);
let translatedParagraphsInEnglish = _translateInvolvingEnglish(from, 'en', paragraphs);
return _translateInvolvingEnglish('en', to, translatedParagraphsInEnglish);
}
else {
log(`Translating '${from}${to}'`);
return _translateInvolvingEnglish(from, to, paragraphs);
}
}
// Downloads file from a url and returns the array buffer
const _downloadAsArrayBuffer = async(url) => {
const response = await fetch(url);
if (!response.ok) {
throw Error(`Downloading ${url} failed: HTTP ${response.status} - ${response.statusText}`);
}
return response.arrayBuffer();
}
// Constructs and initializes the AlignedMemory from the array buffer and alignment size
const _prepareAlignedMemoryFromBuffer = async (buffer, alignmentSize) => {
var byteArray = new Int8Array(buffer);
log(`Constructing Aligned memory. Size: ${byteArray.byteLength} bytes, Alignment: ${alignmentSize}`);
var alignedMemory = new Module.AlignedMemory(byteArray.byteLength, alignmentSize);
log(`Aligned memory construction done`);
const alignedByteArrayView = alignedMemory.getByteArrayView();
alignedByteArrayView.set(byteArray);
log(`Aligned memory initialized`);
return alignedMemory;
}
const _constructTranslationModelInvolvingEnglish = async (from, to) => {
const languagePair = `${from}${to}`;
/*Set the Model Configuration as YAML formatted string.
For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/
Vocab files are re-used in both translation directions
const vocabLanguagePair = from === "en" ? `${to}${from}` : languagePair;
const modelConfig = `models:
- /${languagePair}/model.${languagePair}.intgemm.alphas.bin
vocabs:
- /${languagePair}/vocab.${vocabLanguagePair}.spm
- /${languagePair}/vocab.${vocabLanguagePair}.spm
beam-size: 1
normalize: 1.0
word-penalty: 0
max-length-break: 128
mini-batch-words: 1024
workspace: 128
max-length-factor: 2.0
skip-cost: true
cpu-threads: 0
quiet: true
quiet-translation: true
shortlist:
- /${languagePair}/lex.${languagePair}.s2t
- 50
- 50
`;
*/
// TODO: gemm-precision: int8shiftAlphaAll (for the models that support this)
// DONOT CHANGE THE SPACES BETWEEN EACH ENTRY OF CONFIG
const modelConfig = `beam-size: 1
normalize: 1.0
word-penalty: 0
max-length-break: 128
mini-batch-words: 1024
workspace: 128
max-length-factor: 2.0
skip-cost: true
cpu-threads: 0
quiet: true
quiet-translation: true
gemm-precision: int8shiftAll
`;
const modelFile = `${rootURL}/${languagePair}/${modelRegistry[languagePair]["model"].name}`;
const shortlistFile = `${rootURL}/${languagePair}/${modelRegistry[languagePair]["lex"].name}`;
const vocabFiles = [`${rootURL}/${languagePair}/${modelRegistry[languagePair]["vocab"].name}`,
`${rootURL}/${languagePair}/${modelRegistry[languagePair]["vocab"].name}`];
const uniqueVocabFiles = new Set(vocabFiles);
log(`modelFile: ${modelFile}\nshortlistFile: ${shortlistFile}\nNo. of unique vocabs: ${uniqueVocabFiles.size}`);
uniqueVocabFiles.forEach(item => log(`unique vocabFile: ${item}`));
// Download the files as buffers from the given urls
let start = Date.now();
const downloadedBuffers = await Promise.all([_downloadAsArrayBuffer(modelFile), _downloadAsArrayBuffer(shortlistFile)]);
const modelBuffer = downloadedBuffers[0];
const shortListBuffer = downloadedBuffers[1];
const downloadedVocabBuffers = [];
for (let item of uniqueVocabFiles.values()) {
downloadedVocabBuffers.push(await _downloadAsArrayBuffer(item));
}
log(`Total Download time for all files of '${languagePair}': ${(Date.now() - start) / 1000} secs`);
// Construct AlignedMemory objects with downloaded buffers
let constructedAlignedMemories = await Promise.all([_prepareAlignedMemoryFromBuffer(modelBuffer, 256),
_prepareAlignedMemoryFromBuffer(shortListBuffer, 64)]);
let alignedModelMemory = constructedAlignedMemories[0];
let alignedShortlistMemory = constructedAlignedMemories[1];
let alignedVocabsMemoryList = new Module.AlignedMemoryList;
for(let item of downloadedVocabBuffers) {
let alignedMemory = await _prepareAlignedMemoryFromBuffer(item, 64);
alignedVocabsMemoryList.push_back(alignedMemory);
}
for (let vocabs=0; vocabs < alignedVocabsMemoryList.size(); vocabs++) {
log(`Aligned vocab memory${vocabs+1} size: ${alignedVocabsMemoryList.get(vocabs).size()}`);
}
log(`Aligned model memory size: ${alignedModelMemory.size()}`);
log(`Aligned shortlist memory size: ${alignedShortlistMemory.size()}`);
log(`Translation Model config: ${modelConfig}`);
var translationModel = new Module.TranslationModel(modelConfig, alignedModelMemory, alignedShortlistMemory, alignedVocabsMemoryList);
languagePairToTranslationModels.set(languagePair, translationModel);
}
const _translateInvolvingEnglish = (from, to, paragraphs) => {
const languagePair = `${from}${to}`;
if (!languagePairToTranslationModels.has(languagePair)) {
throw Error(`Please load translation model '${languagePair}' before translating`);
}
translationModel = languagePairToTranslationModels.get(languagePair);
// Instantiate the arguments of translate() API i.e. ResponseOptions and input (vector<string>)
var responseOptions = new Module.ResponseOptions();
let input = new Module.VectorString;
// Initialize the input
paragraphs.forEach(paragraph => {
// prevent empty paragraph - it breaks the translation
if (paragraph.trim() === "") {
return;
}
input.push_back(paragraph.trim())
})
// Access input (just for debugging)
log(`Input size: ${input.size()}`);
// Translate the input, which is a vector<String>; the result is a vector<Response>
let result = translationService.translate(translationModel, input, responseOptions);
const translatedParagraphs = [];
const translatedSentencesOfParagraphs = [];
const sourceSentencesOfParagraphs = [];
for (let i = 0; i < result.size(); i++) {
translatedParagraphs.push(result.get(i).getTranslatedText());
translatedSentencesOfParagraphs.push(_getAllTranslatedSentencesOfParagraph(result.get(i)));
sourceSentencesOfParagraphs.push(_getAllSourceSentencesOfParagraph(result.get(i)));
}
responseOptions.delete();
input.delete();
return translatedParagraphs;
}
// Extracts all the translated sentences from the Response and returns them.
const _getAllTranslatedSentencesOfParagraph = (response) => {
const sentences = [];
const text = response.getTranslatedText();
for (let sentenceIndex = 0; sentenceIndex < response.size(); sentenceIndex++) {
const utf8SentenceByteRange = response.getTranslatedSentence(sentenceIndex);
sentences.push(_getSentenceFromByteRange(text, utf8SentenceByteRange));
}
return sentences;
}
// Extracts all the source sentences from the Response and returns them.
const _getAllSourceSentencesOfParagraph = (response) => {
const sentences = [];
const text = response.getOriginalText();
for (let sentenceIndex = 0; sentenceIndex < response.size(); sentenceIndex++) {
const utf8SentenceByteRange = response.getSourceSentence(sentenceIndex);
sentences.push(_getSentenceFromByteRange(text, utf8SentenceByteRange));
}
return sentences;
}
/*
* Returns a substring of text (a string). The substring is represented by
* byteRange (begin and end endices) within the utf-8 encoded version of the text.
*/
const _getSentenceFromByteRange = (text, byteRange) => {
const utf8BytesView = encoder.encode(text);
const utf8SentenceBytes = utf8BytesView.subarray(byteRange.begin, byteRange.end);
return decoder.decode(utf8SentenceBytes);
}

View File

@ -1,6 +1,519 @@
{
"name": "test_page",
"lockfileVersion": 2,
"requires": true,
"lockfileVersion": 1,
"packages": {
"": {
"dependencies": {
"cors": "^2.8.5",
"express": "^4.17.1",
"nocache": "^2.1.0"
}
},
"node_modules/accepts": {
"version": "1.3.7",
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz",
"integrity": "sha512-Il80Qs2WjYlJIBNzNkK6KYqlVMTbZLXgHx2oT0pU/fjRHyEp+PEfEPY0R3WCwAGVOtauxh1hOxNgIf5bv7dQpA==",
"dependencies": {
"mime-types": "~2.1.24",
"negotiator": "0.6.2"
},
"engines": {
"node": ">= 0.6"
}
},
"node_modules/array-flatten": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
"integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI="
},
"node_modules/body-parser": {
"version": "1.19.0",
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.19.0.tgz",
"integrity": "sha512-dhEPs72UPbDnAQJ9ZKMNTP6ptJaionhP5cBb541nXPlW60Jepo9RV/a4fX4XWW9CuFNK22krhrj1+rgzifNCsw==",
"dependencies": {
"bytes": "3.1.0",
"content-type": "~1.0.4",
"debug": "2.6.9",
"depd": "~1.1.2",
"http-errors": "1.7.2",
"iconv-lite": "0.4.24",
"on-finished": "~2.3.0",
"qs": "6.7.0",
"raw-body": "2.4.0",
"type-is": "~1.6.17"
},
"engines": {
"node": ">= 0.8"
}
},
"node_modules/bytes": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz",
"integrity": "sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg==",
"engines": {
"node": ">= 0.8"
}
},
"node_modules/content-disposition": {
"version": "0.5.3",
"resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.3.tgz",
"integrity": "sha512-ExO0774ikEObIAEV9kDo50o+79VCUdEB6n6lzKgGwupcVeRlhrj3qGAfwq8G6uBJjkqLrhT0qEYFcWng8z1z0g==",
"dependencies": {
"safe-buffer": "5.1.2"
},
"engines": {
"node": ">= 0.6"
}
},
"node_modules/content-type": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz",
"integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/cookie": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.0.tgz",
"integrity": "sha512-+Hp8fLp57wnUSt0tY0tHEXh4voZRDnoIrZPqlo3DPiI4y9lwg/jqx+1Om94/W6ZaPDOUbnjOt/99w66zk+l1Xg==",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/cookie-signature": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz",
"integrity": "sha1-4wOogrNCzD7oylE6eZmXNNqzriw="
},
"node_modules/cors": {
"version": "2.8.5",
"resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz",
"integrity": "sha512-KIHbLJqu73RGr/hnbrO9uBeixNGuvSQjul/jdFvS/KFSIH1hWVd1ng7zOHx+YrEfInLG7q4n6GHQ9cDtxv/P6g==",
"dependencies": {
"object-assign": "^4",
"vary": "^1"
},
"engines": {
"node": ">= 0.10"
}
},
"node_modules/debug": {
"version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
"dependencies": {
"ms": "2.0.0"
}
},
"node_modules/depd": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz",
"integrity": "sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/destroy": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz",
"integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA="
},
"node_modules/ee-first": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
"integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0="
},
"node_modules/encodeurl": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
"integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=",
"engines": {
"node": ">= 0.8"
}
},
"node_modules/escape-html": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
"integrity": "sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg="
},
"node_modules/etag": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
"integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc=",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/express": {
"version": "4.17.1",
"resolved": "https://registry.npmjs.org/express/-/express-4.17.1.tgz",
"integrity": "sha512-mHJ9O79RqluphRrcw2X/GTh3k9tVv8YcoyY4Kkh4WDMUYKRZUq0h1o0w2rrrxBqM7VoeUVqgb27xlEMXTnYt4g==",
"dependencies": {
"accepts": "~1.3.7",
"array-flatten": "1.1.1",
"body-parser": "1.19.0",
"content-disposition": "0.5.3",
"content-type": "~1.0.4",
"cookie": "0.4.0",
"cookie-signature": "1.0.6",
"debug": "2.6.9",
"depd": "~1.1.2",
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"etag": "~1.8.1",
"finalhandler": "~1.1.2",
"fresh": "0.5.2",
"merge-descriptors": "1.0.1",
"methods": "~1.1.2",
"on-finished": "~2.3.0",
"parseurl": "~1.3.3",
"path-to-regexp": "0.1.7",
"proxy-addr": "~2.0.5",
"qs": "6.7.0",
"range-parser": "~1.2.1",
"safe-buffer": "5.1.2",
"send": "0.17.1",
"serve-static": "1.14.1",
"setprototypeof": "1.1.1",
"statuses": "~1.5.0",
"type-is": "~1.6.18",
"utils-merge": "1.0.1",
"vary": "~1.1.2"
},
"engines": {
"node": ">= 0.10.0"
}
},
"node_modules/finalhandler": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.2.tgz",
"integrity": "sha512-aAWcW57uxVNrQZqFXjITpW3sIUQmHGG3qSb9mUah9MgMC4NeWhNOlNjXEYq3HjRAvL6arUviZGGJsBg6z0zsWA==",
"dependencies": {
"debug": "2.6.9",
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"on-finished": "~2.3.0",
"parseurl": "~1.3.3",
"statuses": "~1.5.0",
"unpipe": "~1.0.0"
},
"engines": {
"node": ">= 0.8"
}
},
"node_modules/forwarded": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz",
"integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ=",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/fresh": {
"version": "0.5.2",
"resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz",
"integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/http-errors": {
"version": "1.7.2",
"resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.7.2.tgz",
"integrity": "sha512-uUQBt3H/cSIVfch6i1EuPNy/YsRSOUBXTVfZ+yR7Zjez3qjBz6i9+i4zjNaoqcoFVI4lQJ5plg63TvGfRSDCRg==",
"dependencies": {
"depd": "~1.1.2",
"inherits": "2.0.3",
"setprototypeof": "1.1.1",
"statuses": ">= 1.5.0 < 2",
"toidentifier": "1.0.0"
},
"engines": {
"node": ">= 0.6"
}
},
"node_modules/iconv-lite": {
"version": "0.4.24",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
"integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==",
"dependencies": {
"safer-buffer": ">= 2.1.2 < 3"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/inherits": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz",
"integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4="
},
"node_modules/ipaddr.js": {
"version": "1.9.1",
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
"integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==",
"engines": {
"node": ">= 0.10"
}
},
"node_modules/media-typer": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
"integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/merge-descriptors": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
"integrity": "sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E="
},
"node_modules/methods": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz",
"integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4=",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/mime": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
"integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==",
"bin": {
"mime": "cli.js"
},
"engines": {
"node": ">=4"
}
},
"node_modules/mime-db": {
"version": "1.45.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.45.0.tgz",
"integrity": "sha512-CkqLUxUk15hofLoLyljJSrukZi8mAtgd+yE5uO4tqRZsdsAJKv0O+rFMhVDRJgozy+yG6md5KwuXhD4ocIoP+w==",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/mime-types": {
"version": "2.1.28",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.28.tgz",
"integrity": "sha512-0TO2yJ5YHYr7M2zzT7gDU1tbwHxEUWBCLt0lscSNpcdAfFyJOVEpRYNS7EXVcTLNj/25QO8gulHC5JtTzSE2UQ==",
"dependencies": {
"mime-db": "1.45.0"
},
"engines": {
"node": ">= 0.6"
}
},
"node_modules/ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g="
},
"node_modules/negotiator": {
"version": "0.6.2",
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz",
"integrity": "sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/nocache": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/nocache/-/nocache-2.1.0.tgz",
"integrity": "sha512-0L9FvHG3nfnnmaEQPjT9xhfN4ISk0A8/2j4M37Np4mcDesJjHgEUfgPhdCyZuFI954tjokaIj/A3NdpFNdEh4Q==",
"engines": {
"node": ">=4.0.0"
}
},
"node_modules/object-assign": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
"integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/on-finished": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz",
"integrity": "sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=",
"dependencies": {
"ee-first": "1.1.1"
},
"engines": {
"node": ">= 0.8"
}
},
"node_modules/parseurl": {
"version": "1.3.3",
"resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
"integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==",
"engines": {
"node": ">= 0.8"
}
},
"node_modules/path-to-regexp": {
"version": "0.1.7",
"resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz",
"integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w="
},
"node_modules/proxy-addr": {
"version": "2.0.6",
"resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.6.tgz",
"integrity": "sha512-dh/frvCBVmSsDYzw6n926jv974gddhkFPfiN8hPOi30Wax25QZyZEGveluCgliBnqmuM+UJmBErbAUFIoDbjOw==",
"dependencies": {
"forwarded": "~0.1.2",
"ipaddr.js": "1.9.1"
},
"engines": {
"node": ">= 0.10"
}
},
"node_modules/qs": {
"version": "6.7.0",
"resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz",
"integrity": "sha512-VCdBRNFTX1fyE7Nb6FYoURo/SPe62QCaAyzJvUjwRaIsc+NePBEniHlvxFmmX56+HZphIGtV0XeCirBtpDrTyQ==",
"engines": {
"node": ">=0.6"
}
},
"node_modules/range-parser": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
"integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/raw-body": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.4.0.tgz",
"integrity": "sha512-4Oz8DUIwdvoa5qMJelxipzi/iJIi40O5cGV1wNYp5hvZP8ZN0T+jiNkL0QepXs+EsQ9XJ8ipEDoiH70ySUJP3Q==",
"dependencies": {
"bytes": "3.1.0",
"http-errors": "1.7.2",
"iconv-lite": "0.4.24",
"unpipe": "1.0.0"
},
"engines": {
"node": ">= 0.8"
}
},
"node_modules/safe-buffer": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
},
"node_modules/safer-buffer": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
},
"node_modules/send": {
"version": "0.17.1",
"resolved": "https://registry.npmjs.org/send/-/send-0.17.1.tgz",
"integrity": "sha512-BsVKsiGcQMFwT8UxypobUKyv7irCNRHk1T0G680vk88yf6LBByGcZJOTJCrTP2xVN6yI+XjPJcNuE3V4fT9sAg==",
"dependencies": {
"debug": "2.6.9",
"depd": "~1.1.2",
"destroy": "~1.0.4",
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"etag": "~1.8.1",
"fresh": "0.5.2",
"http-errors": "~1.7.2",
"mime": "1.6.0",
"ms": "2.1.1",
"on-finished": "~2.3.0",
"range-parser": "~1.2.1",
"statuses": "~1.5.0"
},
"engines": {
"node": ">= 0.8.0"
}
},
"node_modules/send/node_modules/ms": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz",
"integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg=="
},
"node_modules/serve-static": {
"version": "1.14.1",
"resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.14.1.tgz",
"integrity": "sha512-JMrvUwE54emCYWlTI+hGrGv5I8dEwmco/00EvkzIIsR7MqrHonbD9pO2MOfFnpFntl7ecpZs+3mW+XbQZu9QCg==",
"dependencies": {
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"parseurl": "~1.3.3",
"send": "0.17.1"
},
"engines": {
"node": ">= 0.8.0"
}
},
"node_modules/setprototypeof": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.1.tgz",
"integrity": "sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw=="
},
"node_modules/statuses": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/statuses/-/statuses-1.5.0.tgz",
"integrity": "sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow=",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/toidentifier": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.0.tgz",
"integrity": "sha512-yaOH/Pk/VEhBWWTlhI+qXxDFXlejDGcQipMlyxda9nthulaxLZUNcUqFxokp0vcYnvteJln5FNQDRrxj3YcbVw==",
"engines": {
"node": ">=0.6"
}
},
"node_modules/type-is": {
"version": "1.6.18",
"resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz",
"integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==",
"dependencies": {
"media-typer": "0.3.0",
"mime-types": "~2.1.24"
},
"engines": {
"node": ">= 0.6"
}
},
"node_modules/unpipe": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
"integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=",
"engines": {
"node": ">= 0.8"
}
},
"node_modules/utils-merge": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
"integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=",
"engines": {
"node": ">= 0.4.0"
}
},
"node_modules/vary": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
"integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=",
"engines": {
"node": ">= 0.8"
}
}
},
"dependencies": {
"accepts": {
"version": "1.3.7",

View File

@ -19,13 +19,13 @@ if [ ! -e "$1" ]; then
exit
fi
WASM_ARTIFACTS="$1/bergamot-translator-worker.*"
WASM_ARTIFACTS="$1/bergamot-translator-worker.js $1/bergamot-translator-worker.wasm"
for i in $WASM_ARTIFACTS; do
[ -f "$i" ] || breaks
cp $i .
cp $i js/.
echo "Copied \"$i\""
done
npm install
echo "Start httpserver"
node bergamot-httpserver.js
node bergamot-httpserver.js 80 1 0

View File

@ -1,267 +0,0 @@
var translationService, responseOptions, input = undefined;
// A map of language-pair to TranslationModel object
var translationModels = new Map();
const BERGAMOT_TRANSLATOR_MODULE = "bergamot-translator-worker.js";
const encoder = new TextEncoder(); // string to utf-8 converter
const decoder = new TextDecoder(); // utf-8 to string converter
const start = Date.now();
let moduleLoadStart;
var Module = {
preRun: [function() {
log(`Time until Module.preRun: ${(Date.now() - start) / 1000} secs`);
moduleLoadStart = Date.now();
}],
onRuntimeInitialized: function() {
log(`Wasm Runtime initialized (preRun -> onRuntimeInitialized) in ${(Date.now() - moduleLoadStart) / 1000} secs`);
}
};
const log = (message) => {
console.debug(message);
}
onmessage = async function(e) {
let command = e.data[0];
log(`Message '${command}' received from main script`);
let result = "";
if (command === 'load_module') {
importScripts(BERGAMOT_TRANSLATOR_MODULE);
result = `Translator wasm module successfully loaded`;
log(result);
log('Posting message back to main script');
postMessage(['module_loaded', result]);
}
else if (command === 'load_model') {
let start = Date.now();
try {
await constructTranslationService();
await constructTranslationModel(e.data[1], e.data[2]);
result = `translation model '${e.data[1]}${e.data[2]}' successfully loaded; took ${(Date.now() - start) / 1000} secs`;
} catch (error) {
result = `translation model '${e.data[1]}${e.data[2]}' loading failed: '${error.message}'`;
}
log(result);
log('Posting message back to main script');
postMessage(['model_loaded', result]);
}
else if (command === 'translate') {
const from = e.data[1];
const to = e.data[2];
const inputParagraphs = e.data[3];
let inputWordCount = 0;
inputParagraphs.forEach(sentence => {
inputWordCount += sentence.trim().split(" ").filter(word => word.trim() !== "").length;
})
let start = Date.now();
var translatedParagraphs;
try {
translatedParagraphs = translate(from, to, inputParagraphs);
const secs = (Date.now() - start) / 1000;
result = `Translation '${from}${to}' Successful. Speed: ${Math.round(inputWordCount / secs)} Words per second (${inputWordCount} words in ${secs} secs)`;
} catch (error) {
result = `Error: ${error.message}`;
}
log(result);
log('Posting message back to main script');
postMessage(['translated_result', translatedParagraphs, result]);
}
}
// This function downloads file from a url and returns the array buffer
const downloadAsArrayBuffer = async(url) => {
const response = await fetch(url);
if (!response.ok) {
throw Error(`Downloading ${url} failed: HTTP ${response.status} - ${response.statusText}`);
}
return response.arrayBuffer();
}
// This function constructs and initializes the AlignedMemory from the array buffer and alignment size
const prepareAlignedMemoryFromBuffer = async (buffer, alignmentSize) => {
var byteArray = new Int8Array(buffer);
log(`Constructing Aligned memory with size: ${byteArray.byteLength} bytes with alignment: ${alignmentSize}`);
var alignedMemory = new Module.AlignedMemory(byteArray.byteLength, alignmentSize);
log(`Aligned memory construction done`);
const alignedByteArrayView = alignedMemory.getByteArrayView();
alignedByteArrayView.set(byteArray);
log(`Aligned memory initialized`);
return alignedMemory;
}
// Instantiate the Translation Service
const constructTranslationService = async () => {
if (!translationService) {
var translationServiceConfig = {};
log(`Creating Translation Service with config: ${translationServiceConfig}`);
translationService = new Module.BlockingService(translationServiceConfig);
log(`Translation Service created successfully`);
}
}
const constructTranslationModel = async (from, to) => {
const languagePair = `${from}${to}`;
if (translationModels.has(languagePair)) {
var oldModel = translationModels.get(languagePair);
// Destruct the old TranslationModel explicitly and Remove its entry from the map
oldModel.delete();
translationModels.delete(languagePair);
}
// Vocab files are re-used in both translation directions
const vocabLanguagePair = from === "en" ? `${to}${from}` : languagePair;
// Set the Model Configuration as YAML formatted string.
// For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/
/*const modelConfig = `models:
- /${languagePair}/model.${languagePair}.intgemm.alphas.bin
vocabs:
- /${languagePair}/vocab.${vocabLanguagePair}.spm
- /${languagePair}/vocab.${vocabLanguagePair}.spm
beam-size: 1
normalize: 1.0
word-penalty: 0
max-length-break: 128
mini-batch-words: 1024
workspace: 128
max-length-factor: 2.0
skip-cost: true
cpu-threads: 0
quiet: true
quiet-translation: true
shortlist:
- /${languagePair}/lex.${languagePair}.s2t
- 50
- 50
`;
*/
// TODO: gemm-precision: int8shiftAlphaAll (for the models that support this)
// DONOT CHANGE THE SPACES BETWEEN EACH ENTRY OF CONFIG
const modelConfig = `beam-size: 1
normalize: 1.0
word-penalty: 0
max-length-break: 128
mini-batch-words: 1024
workspace: 128
max-length-factor: 2.0
skip-cost: true
cpu-threads: 0
quiet: true
quiet-translation: true
gemm-precision: int8shift
`;
const modelFile = `models/${languagePair}/model.${languagePair}.intgemm.alphas.bin`;
const shortlistFile = `models/${languagePair}/lex.50.50.${languagePair}.s2t.bin`;
const vocabFiles = [`models/${languagePair}/vocab.${vocabLanguagePair}.spm`,
`models/${languagePair}/vocab.${vocabLanguagePair}.spm`];
const uniqueVocabFiles = new Set(vocabFiles);
log(`modelFile: ${modelFile}\nshortlistFile: ${shortlistFile}\nNo. of unique vocabs: ${uniqueVocabFiles.size}`);
uniqueVocabFiles.forEach(item => log(`unique vocabFile: ${item}`));
// Download the files as buffers from the given urls
let start = Date.now();
const downloadedBuffers = await Promise.all([downloadAsArrayBuffer(modelFile), downloadAsArrayBuffer(shortlistFile)]);
const modelBuffer = downloadedBuffers[0];
const shortListBuffer = downloadedBuffers[1];
const downloadedVocabBuffers = [];
for (let item of uniqueVocabFiles.values()) {
downloadedVocabBuffers.push(await downloadAsArrayBuffer(item));
}
log(`All files for ${languagePair} language pair took ${(Date.now() - start) / 1000} secs to download`);
// Construct AlignedMemory objects with downloaded buffers
let constructedAlignedMemories = await Promise.all([prepareAlignedMemoryFromBuffer(modelBuffer, 256),
prepareAlignedMemoryFromBuffer(shortListBuffer, 64)]);
let alignedModelMemory = constructedAlignedMemories[0];
let alignedShortlistMemory = constructedAlignedMemories[1];
let alignedVocabsMemoryList = new Module.AlignedMemoryList;
for(let item of downloadedVocabBuffers) {
let alignedMemory = await prepareAlignedMemoryFromBuffer(item, 64);
alignedVocabsMemoryList.push_back(alignedMemory);
}
log(`Aligned vocab memories: ${alignedVocabsMemoryList.get(0).size()}`);
log(`Aligned model memory: ${alignedModelMemory.size()}`);
log(`Aligned shortlist memory: ${alignedShortlistMemory.size()}`);
log(`Creating Translation Model with config: ${modelConfig}`);
var translationModel = new Module.TranslationModel(modelConfig, alignedModelMemory, alignedShortlistMemory, alignedVocabsMemoryList);
translationModels.set(languagePair, translationModel);
}
const translate = (from, to, paragraphs) => {
const languagePair = `${from}${to}`;
if (!translationModels.has(languagePair)) {
throw Error(`Please load translation model '${languagePair}' before translating`);
}
translationModel = translationModels.get(languagePair);
// Instantiate the arguments of translate() API i.e. ResponseOptions and input (vector<string>)
var responseOptions = new Module.ResponseOptions();
let input = new Module.VectorString;
// Initialize the input
paragraphs.forEach(paragraph => {
// prevent empty paragraph - it breaks the translation
if (paragraph.trim() === "") {
return;
}
input.push_back(paragraph.trim())
})
// Access input (just for debugging)
log(`Input size: ${input.size()}`);
// Translate the input, which is a vector<String>; the result is a vector<Response>
let result = translationService.translate(translationModel, input, responseOptions);
const translatedParagraphs = [];
const translatedSentencesOfParagraphs = [];
const sourceSentencesOfParagraphs = [];
for (let i = 0; i < result.size(); i++) {
translatedParagraphs.push(result.get(i).getTranslatedText());
translatedSentencesOfParagraphs.push(getAllTranslatedSentencesOfParagraph(result.get(i)));
sourceSentencesOfParagraphs.push(getAllSourceSentencesOfParagraph(result.get(i)));
}
log({ translatedParagraphs });
log({ translatedSentencesOfParagraphs });
log({ sourceSentencesOfParagraphs });
responseOptions.delete();
input.delete();
return translatedParagraphs;
}
// This function extracts all the translated sentences from the Response and returns them.
const getAllTranslatedSentencesOfParagraph = (response) => {
const sentences = [];
const text = response.getTranslatedText();
for (let sentenceIndex = 0; sentenceIndex < response.size(); sentenceIndex++) {
const utf8SentenceByteRange = response.getTranslatedSentence(sentenceIndex);
sentences.push(_getSentenceFromByteRange(text, utf8SentenceByteRange));
}
return sentences;
}
// This function extracts all the source sentences from the Response and returns them.
const getAllSourceSentencesOfParagraph = (response) => {
const sentences = [];
const text = response.getOriginalText();
for (let sentenceIndex = 0; sentenceIndex < response.size(); sentenceIndex++) {
const utf8SentenceByteRange = response.getSourceSentence(sentenceIndex);
sentences.push(_getSentenceFromByteRange(text, utf8SentenceByteRange));
}
return sentences;
}
// This function returns a substring of text (a string). The substring is represented by
// byteRange (begin and end endices) within the utf-8 encoded version of the text.
const _getSentenceFromByteRange = (text, byteRange) => {
const utf8BytesView = encoder.encode(text);
const utf8SentenceBytes = utf8BytesView.subarray(byteRange.begin, byteRange.end);
return decoder.decode(utf8SentenceBytes);
}