JS: Reuse Model registry from firefox-translation-models for test page (#377)

* JS: Reuse Model registry from firefox-translation-models repo for test page

 - https://github.com/mozilla/firefox-translations-models/blob/main/registry.json
   is reused
 - Removed existing registry
This commit is contained in:
Abhishek Aggarwal 2022-03-14 18:05:22 +01:00 committed by GitHub
parent 22d6bc07e7
commit 2c0e65c2ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 13 additions and 335 deletions

View File

@ -16,6 +16,7 @@ Please refer to the file `test_page/js/worker.js` that demonstrates how to use t
cd test_page
git clone --depth 1 --branch main --single-branch https://github.com/mozilla/firefox-translations-models/
mkdir models
cp -rf firefox-translations-models/registry.json models
cp -rf firefox-translations-models/models/prod/* models
cp -rf firefox-translations-models/models/dev/* models
gunzip models/*/*

View File

@ -1,328 +0,0 @@
//const rootURL = "https://storage.googleapis.com/bergamot-models-sandbox/0.2.10";
const rootURL = "../models";
const modelRegistry = {
enit: {
vocab: {
name: "vocab.enit.spm",
size: 814128,
estimatedCompressedSize: 405338,
expectedSha256Hash:
"de8cbeb79e0139304bfa47e8559f2447016bf9906225a97d3df1baed4de8f3a3",
},
lex: {
name: "lex.50.50.enit.s2t.bin",
size: 4489920,
estimatedCompressedSize: 2409986,
expectedSha256Hash:
"bb1fad3b3f6a13ebce1698cf7f39ca736c4dea4525f3dab5e1a78436f07445e6",
},
model: {
name: "model.enit.intgemm.alphas.bin",
size: 17140836,
estimatedCompressedSize: 13283223,
expectedSha256Hash:
"a5ce3723f62ead92a0e0373b6df0ad8e3e6d22963adb1333984206e33b8b6c61",
},
},
enpt: {
vocab: {
name: "vocab.enpt.spm",
size: 812781,
estimatedCompressedSize: 406524,
expectedSha256Hash:
"633a3d782c79f7d5e4b94ab96848f47c2fdf8ba82dd99efd1742b8a696bbd0cc",
},
lex: {
name: "lex.50.50.enpt.s2t.bin",
size: 4472528,
estimatedCompressedSize: 2411984,
expectedSha256Hash:
"1e96599123d275afa37353dfe84677a4070f013494fbdc9c52a28445cc9bc38d",
},
model: {
name: "model.enpt.intgemm.alphas.bin",
size: 17140836,
estimatedCompressedSize: 13429592,
expectedSha256Hash:
"d968735704c75e33c2e183b9241f14c0b2a560d01d88a2728e5c0119a4d7fb22",
},
},
enru: {
vocab: {
name: "vocab.enru.spm",
size: 937157,
estimatedCompressedSize: 435776,
expectedSha256Hash:
"feca2d44f01b946c85faba3b15b5eb53344bec84cd14a1a4d4a82ddd774c5edd",
},
lex: {
name: "lex.50.50.enru.s2t.bin",
size: 3049096,
estimatedCompressedSize: 1579779,
expectedSha256Hash:
"7bd3e2c0a72286fe1f3da65c56c49a7cd77efa5f1d1a444e2a9e769480b96ff3",
},
model: {
name: "model.enru.intgemm.alphas.bin",
size: 17140836,
estimatedCompressedSize: 12853987,
expectedSha256Hash:
"4a45186a93b8a2dd9301c66a3b3dad580b1bcfa74aadda583ca383f9fe0dea93",
},
},
iten: {
vocab: {
name: "vocab.iten.spm",
size: 814151,
estimatedCompressedSize: 405416,
expectedSha256Hash:
"22d5ce6973be5360a921103acbe984a9bfca952a1f6c55c9cb5ef7de4fd58266",
},
lex: {
name: "lex.50.50.iten.s2t.bin",
size: 5238420,
estimatedCompressedSize: 2860178,
expectedSha256Hash:
"357d362373022b029ee9965975a133e6f36fdb0fed749202ff578365cf0111f8",
},
model: {
name: "model.iten.intgemm.alphas.bin",
size: 17140836,
estimatedCompressedSize: 13423308,
expectedSha256Hash:
"1fae546faeb9046f80b1b7e940b37b660974ce72902778181d6cd1c30b717f35",
},
},
pten: {
vocab: {
name: "vocab.pten.spm",
size: 812889,
estimatedCompressedSize: 406730,
expectedSha256Hash:
"8389979e3c965688b07aeb712a7e44406e5dcdb2b84087229d26fcc71448c4ed",
},
lex: {
name: "lex.50.50.pten.s2t.bin",
size: 5001420,
estimatedCompressedSize: 2733800,
expectedSha256Hash:
"212ed0ae44a6f920cd6d17ca02f0a523ba6c4b0ef5078ae310c20bc4c51484c5",
},
model: {
name: "model.pten.intgemm.alphas.bin",
size: 17140836,
estimatedCompressedSize: 13584764,
expectedSha256Hash:
"6c3b7af01772022a19712410c63342ba581468c2f1aac34d7488409c4043e697",
},
},
ruen: {
vocab: {
name: "vocab.ruen.spm",
size: 936576,
estimatedCompressedSize: 435801,
expectedSha256Hash:
"aaf9a325c0a988c507d0312cb6ba1a02bac7a370bcd879aedee626a40bfbda78",
},
lex: {
name: "lex.50.50.ruen.s2t.bin",
size: 5090836,
estimatedCompressedSize: 2684919,
expectedSha256Hash:
"e6667e22f5f86be4872e3768b7184727f5dd8c9f2ccfb0639baabcb1176f5d11",
},
model: {
name: "model.ruen.intgemm.alphas.bin",
size: 17140836,
estimatedCompressedSize: 13108893,
expectedSha256Hash:
"3b6a0305e3d232fadd54f5a765365b7b96ad6d8f2e818cba594b02fbd8fadb3d",
},
},
csen: {
vocab: {
name: "vocab.csen.spm",
size: 769763,
estimatedCompressedSize: 366392,
expectedSha256Hash:
"f71cc5d045e479607078e079884f44032f5a0b82547fb96eefa29cd1eb47c6f3",
},
lex: {
name: "lex.50.50.csen.s2t.bin",
size: 4535788,
estimatedCompressedSize: 2418488,
expectedSha256Hash:
"8228a3c3f7887759a62b7d7c674a7bef9b70161913f9b0939ab58f71186835c2",
},
model: {
name: "model.csen.intgemm.alphas.bin",
size: 17140756,
estimatedCompressedSize: 13045032,
expectedSha256Hash:
"5b16661e2864dc50b2f4091a16bdd4ec8d8283e04271e602159ba348df5d6e2d",
},
},
deen: {
vocab: {
name: "vocab.deen.spm",
size: 784269,
estimatedCompressedSize: 410738,
expectedSha256Hash:
"417668f2ed297970febafb5b079a9d5ebc4ed0b3550ac8386d67a90473a09bd7",
},
lex: {
name: "lex.50.50.deen.s2t.bin",
size: 5047568,
estimatedCompressedSize: 2657472,
expectedSha256Hash:
"2f7c0f7bbce97ae5b52454074a892ba7b7610fb98e3c5d341e4ca79f0850c4de",
},
model: {
name: "model.deen.intgemm.alphas.bin",
size: 17140837,
estimatedCompressedSize: 13091214,
expectedSha256Hash:
"dda44d87ab0d8ad3b3871122fd3ee385f37878183a8b4ec139cd909531ec5009",
},
},
encs: {
vocab: {
name: "vocab.csen.spm",
size: 769763,
estimatedCompressedSize: 366392,
expectedSha256Hash:
"f71cc5d045e479607078e079884f44032f5a0b82547fb96eefa29cd1eb47c6f3",
},
lex: {
name: "lex.50.50.encs.s2t.bin",
size: 3556124,
estimatedCompressedSize: 1913246,
expectedSha256Hash:
"e19c77231bf977988e31ff8db15fe79966b5170564bd3e10613f239e7f461d97",
},
model: {
name: "model.encs.intgemm.alphas.bin",
size: 17140756,
estimatedCompressedSize: 12630325,
expectedSha256Hash:
"9a2fe0588bd972accfc801e2f31c945de0557804a91666ae5ab43b94fb74ac4b",
},
},
ende: {
vocab: {
name: "vocab.deen.spm",
size: 797501,
estimatedCompressedSize: 412505,
expectedSha256Hash:
"bc8f8229933d8294c727f3eab12f6f064e7082b929f2d29494c8a1e619ba174c",
},
lex: {
name: "lex.50.50.ende.s2t.bin",
size: 3062492,
estimatedCompressedSize: 1575385,
expectedSha256Hash:
"764797d075f0642c0b079cce6547348d65fe4e92ac69fa6a8605cd8b53dacb3f",
},
model: {
name: "model.ende.intgemm.alphas.bin",
size: 17140498,
estimatedCompressedSize: 13207068,
expectedSha256Hash:
"f0946515c6645304f0706fa66a051c3b7b7c507f12d0c850f276c18165a10c14",
},
},
enes: {
vocab: {
name: "vocab.esen.spm",
size: 825463,
estimatedCompressedSize: 414566,
expectedSha256Hash:
"909b1eea1face0d7f90a474fe29a8c0fef8d104b6e41e65616f864c964ba8845",
},
lex: {
name: "lex.50.50.enes.s2t.bin",
size: 3347104,
estimatedCompressedSize: 1720700,
expectedSha256Hash:
"3a113d713dec3cf1d12bba5b138ae616e28bba4bbc7fe7fd39ba145e26b86d7f",
},
model: {
name: "model.enes.intgemm.alphas.bin",
size: 17140755,
estimatedCompressedSize: 12602853,
expectedSha256Hash:
"fa7460037a3163e03fe1d23602f964bff2331da6ee813637e092ddf37156ef53",
},
},
enet: {
vocab: {
name: "vocab.eten.spm",
size: 828426,
estimatedCompressedSize: 416995,
expectedSha256Hash:
"e3b66bc141f6123cd40746e2fb9b8ee4f89cbf324ab27d6bbf3782e52f15fa2d",
},
lex: {
name: "lex.50.50.enet.s2t.bin",
size: 2700780,
estimatedCompressedSize: 1336443,
expectedSha256Hash:
"3d1b40ff43ebef82cf98d416a88a1ea19eb325a85785eef102f59878a63a829d",
},
model: {
name: "model.enet.intgemm.alphas.bin",
size: 17140754,
estimatedCompressedSize: 12543318,
expectedSha256Hash:
"a28874a8b702a519a14dc71bcee726a5cb4b539eeaada2d06492f751469a1fd6",
},
},
esen: {
vocab: {
name: "vocab.esen.spm",
size: 825463,
estimatedCompressedSize: 414566,
expectedSha256Hash:
"909b1eea1face0d7f90a474fe29a8c0fef8d104b6e41e65616f864c964ba8845",
},
lex: {
name: "lex.50.50.esen.s2t.bin",
size: 3860888,
estimatedCompressedSize: 1978538,
expectedSha256Hash:
"f11a2c23ef85ab1fee1c412b908d69bc20d66fd59faa8f7da5a5f0347eddf969",
},
model: {
name: "model.esen.intgemm.alphas.bin",
size: 17140755,
estimatedCompressedSize: 13215960,
expectedSha256Hash:
"4b6b7f451094aaa447d012658af158ffc708fc8842dde2f871a58404f5457fe0",
},
},
eten: {
vocab: {
name: "vocab.eten.spm",
size: 828426,
estimatedCompressedSize: 416995,
expectedSha256Hash:
"e3b66bc141f6123cd40746e2fb9b8ee4f89cbf324ab27d6bbf3782e52f15fa2d",
},
lex: {
name: "lex.50.50.eten.s2t.bin",
size: 3974944,
estimatedCompressedSize: 1920655,
expectedSha256Hash:
"6992bedc590e60e610a28129c80746fe5f33144a4520e2c5508d87db14ca54f8",
},
model: {
name: "model.eten.intgemm.alphas.bin",
size: 17140754,
estimatedCompressedSize: 12222624,
expectedSha256Hash:
"aac98a2371e216ee2d4843cbe896c617f6687501e17225ac83482eba52fd0028",
},
},
};

View File

@ -1,11 +1,15 @@
// All variables specific to translation service
var translationService = undefined;
// Model registry
let modelRegistry = undefined;
// A map of language-pair to TranslationModel object
var languagePairToTranslationModels = new Map();
const BERGAMOT_TRANSLATOR_MODULE = "bergamot-translator-worker.js";
const MODEL_REGISTRY = "modelRegistry.js";
const MODEL_REGISTRY = "../models/registry.json";
const MODEL_ROOT_URL = "../models/";
const PIVOT_LANGUAGE = 'en';
const encoder = new TextEncoder(); // string to utf-8 converter
@ -18,9 +22,10 @@ var Module = {
log(`Time until Module.preRun: ${(Date.now() - start) / 1000} secs`);
moduleLoadStart = Date.now();
}],
onRuntimeInitialized: function() {
onRuntimeInitialized: async function() {
log(`Wasm Runtime initialized Successfully (preRun -> onRuntimeInitialized) in ${(Date.now() - moduleLoadStart) / 1000} secs`);
importScripts(MODEL_REGISTRY);
const response = await fetch(MODEL_REGISTRY);
modelRegistry = await response.json();
postMessage([`import_reply`, modelRegistry]);
}
};
@ -196,10 +201,10 @@ gemm-precision: int8shiftAlphaAll
alignment: soft
`;
const modelFile = `${rootURL}/${languagePair}/${modelRegistry[languagePair]["model"].name}`;
const shortlistFile = `${rootURL}/${languagePair}/${modelRegistry[languagePair]["lex"].name}`;
const vocabFiles = [`${rootURL}/${languagePair}/${modelRegistry[languagePair]["vocab"].name}`,
`${rootURL}/${languagePair}/${modelRegistry[languagePair]["vocab"].name}`];
const modelFile = `${MODEL_ROOT_URL}/${languagePair}/${modelRegistry[languagePair]["model"].name}`;
const shortlistFile = `${MODEL_ROOT_URL}/${languagePair}/${modelRegistry[languagePair]["lex"].name}`;
const vocabFiles = [`${MODEL_ROOT_URL}/${languagePair}/${modelRegistry[languagePair]["vocab"].name}`,
`${MODEL_ROOT_URL}/${languagePair}/${modelRegistry[languagePair]["vocab"].name}`];
const uniqueVocabFiles = new Set(vocabFiles);
log(`modelFile: ${modelFile}\nshortlistFile: ${shortlistFile}\nNo. of unique vocabs: ${uniqueVocabFiles.size}`);