This commit is contained in:
Joerg Tiedemann 2020-01-20 23:37:40 +02:00
parent f97bc1895c
commit f32ddd06ce
26 changed files with 365 additions and 229 deletions

View File

@ -15,6 +15,47 @@
#
#--------------------------------------------------------------------
#
# Makefile.tasks ...... various common and specific tasks/experiments
# Makefile.generic .... generic targets (in form of prefixes to be added to other targets)
#
# Examples from Makefile.tasks:
#
# * submit job to train a model in one specific translation direction
# (make data on CPU and then start a job on a GPU node with 4 GPUs)
# make SRCLANGS=en TRGLANGS=de unidrectional.submitcpu
#
# * submit jobs to train a model in both translation directions
# (make data on CPU, reverse data and start 2 jobs on a GPU nodes with 4 GPUs each)
# make SRCLANGS=en TRGLANGS=de bilingual.submitcpu
#
# * same as bilingual but guess some HPC settings based on data size
# make SRCLANGS=en TRGLANGS=de bilingual-dynamic.submitcpu
#
# * submit jobs for all OPUS languages to PIVOT language in both directions using bilingual-dynamic
# make PIVOT=en allopus2pivot # run loop on command line
# make PIVOT=en allopus2pivot.submitcpu # submit the same as CPU-based job
# make all2en.submitcpu # short form of the same
#
# * submit jobs for all combinations of OPUS languages (this is huge!)
# (only if there is no train.submit in the workdir of the language pair)
# make PIVOT=en allopus.submitcpu
#
# * submit a job to train a multilingual model with the same languages on both sides
# make LANGS="en de fr" multilingual.submitcpu
#
#--------------------------------------------------------------------
# Some examples using generic extensions
#
# * submit job to train en-ru with backtranslation data from backtranslate/
# make HPC_CORES=4 WALLTIME=24 SRCLANGS=en TRGLANGS=ru unidirectional-add-backtranslations.submitcpu
#
# * submit job that evaluates all currently trained models:
# make eval-allmodels.submit
# make eval-allbilingual.submit # only bilingual models
# make eval-allbilingual.submit # only multilingual models
#
#--------------------------------------------------------------------
#
# general parameters / variables (see Makefile.config)
# SRCLANGS ............ set source language(s) (en)
# TRGLANGS ............ set target language(s) (de)
@ -254,8 +295,9 @@ ${WORKDIR}/${MODEL}.transformer.model${NR}.done: \
--valid-sets ${word 3,$^} ${word 4,$^} \
--valid-metrics perplexity \
--valid-mini-batch ${MARIAN_VALID_MINI_BATCH} \
--beam-size 12 --normalize 1 \
--log $(@:.model${NR}.done=.train${NR}.log) --valid-log $(@:.model${NR}.done=.valid${NR}.log) \
--beam-size 12 --normalize 1 --allow-unk \
--log $(@:.model${NR}.done=.train${NR}.log) \
--valid-log $(@:.model${NR}.done=.valid${NR}.log) \
--enc-depth 6 --dec-depth 6 \
--transformer-heads 8 \
--transformer-postprocess-emb d \
@ -304,8 +346,9 @@ ${WORKDIR}/${MODEL}.transformer-align.model${NR}.done: \
--valid-sets ${word 4,$^} ${word 5,$^} \
--valid-metrics perplexity \
--valid-mini-batch ${MARIAN_VALID_MINI_BATCH} \
--beam-size 12 --normalize 1 \
--log $(@:.model${NR}.done=.train${NR}.log) --valid-log $(@:.model${NR}.done=.valid${NR}.log) \
--beam-size 12 --normalize 1 --allow-unk \
--log $(@:.model${NR}.done=.train${NR}.log) \
--valid-log $(@:.model${NR}.done=.valid${NR}.log) \
--enc-depth 6 --dec-depth 6 \
--transformer-heads 8 \
--transformer-postprocess-emb d \

View File

@ -20,49 +20,6 @@ ifndef LANGS
endif
## run things with individual data sets only
%-fiskmo:
${MAKE} TRAINSET=fiskmo ${@:-fiskmo=}
%-opensubtitles:
${MAKE} TRAINSET=OpenSubtitles ${@:-opensubtitles=}
%-finlex:
${MAKE} TRAINSET=Finlex ${@:-finlex=}
## a batch of interesting models ....
## germanic to germanic
germanic:
${MAKE} LANGS="${GERMANIC}" HPC_DISK=1500 multilingual
scandinavian:
${MAKE} LANGS="${SCANDINAVIAN}" multilingual-medium
memad2en:
${MAKE} LANGS="${MEMAD_LANGS}" PIVOT=en all2pivot
fiet:
${MAKE} SRCLANGS=fi TRGLANGS=et bilingual-medium
icelandic:
${MAKE} SRCLANGS=is TRGLANGS=en bilingual
${MAKE} SRCLANGS=is TRGLANGS="da no nn nb sv" bilingual
${MAKE} SRCLANGS=is TRGLANGS=fi bilingual
enru-yandex:
${MAKE} DATASET=opus+yandex SRCLANGS=ru TRGLANGS=en EXTRA_TRAINSET=yandex data
${MAKE} DATASET=opus+yandex SRCLANGS=ru TRGLANGS=en EXTRA_TRAINSET=yandex reverse-data
${MAKE} DATASET=opus+yandex SRCLANGS=en TRGLANGS=ru EXTRA_TRAINSET=yandex \
WALLTIME=72 HPC_CORES=1 HPC_MEM=8g MARIAN_WORKSPACE=12000 train.submit-multigpu
${MAKE} DATASET=opus+yandex SRCLANGS=ru TRGLANGS=en EXTRA_TRAINSET=yandex \
WALLTIME=72 HPC_CORES=1 HPC_MEM=4g train.submit-multigpu
unidirectional:
${MAKE} data
${MAKE} WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train.submit-multigpu
@ -229,6 +186,53 @@ all2en:
## run things with individual data sets only
%-fiskmo:
${MAKE} TRAINSET=fiskmo ${@:-fiskmo=}
%-opensubtitles:
${MAKE} TRAINSET=OpenSubtitles ${@:-opensubtitles=}
%-finlex:
${MAKE} TRAINSET=Finlex ${@:-finlex=}
## a batch of interesting models ....
## germanic to germanic
germanic:
${MAKE} LANGS="${GERMANIC}" HPC_DISK=1500 multilingual
scandinavian:
${MAKE} LANGS="${SCANDINAVIAN}" multilingual-medium
memad2en:
${MAKE} LANGS="${MEMAD_LANGS}" PIVOT=en all2pivot
fiet:
${MAKE} SRCLANGS=fi TRGLANGS=et bilingual-medium
icelandic:
${MAKE} SRCLANGS=is TRGLANGS=en bilingual
${MAKE} SRCLANGS=is TRGLANGS="da no nn nb sv" bilingual
${MAKE} SRCLANGS=is TRGLANGS=fi bilingual
enru-yandex:
${MAKE} DATASET=opus+yandex SRCLANGS=ru TRGLANGS=en EXTRA_TRAINSET=yandex data
${MAKE} DATASET=opus+yandex SRCLANGS=ru TRGLANGS=en EXTRA_TRAINSET=yandex reverse-data
${MAKE} DATASET=opus+yandex SRCLANGS=en TRGLANGS=ru EXTRA_TRAINSET=yandex \
WALLTIME=72 HPC_CORES=1 HPC_MEM=8g MARIAN_WORKSPACE=12000 train.submit-multigpu
${MAKE} DATASET=opus+yandex SRCLANGS=ru TRGLANGS=en EXTRA_TRAINSET=yandex \
WALLTIME=72 HPC_CORES=1 HPC_MEM=4g train.submit-multigpu
enit:
${MAKE} SRCLANGS=en TRGLANGS=it traindata-spm
${MAKE} SRCLANGS=en TRGLANGS=it devdata-spm

View File

@ -37,7 +37,7 @@ WIKILANGS = ${sort $(patsubst >%${WIKISOURCE}-,%,${shell grep -o '>..${WIKISOUR
LANGID = ${SRC}
WIKI_DIR = wiki/${LANGPAIR}
WIKI_DIR = wiki/${LANGID}
WIKI_TXT = ${WIKI_DIR}/${WIKISOURCE}.${LANGID}.${PART}.gz
WIKI_SRC = ${LANGPAIR}/${WIKISOURCE}.${PART}_${MODELNAME}.${LANGPAIR}.${SRC}.gz
WIKI_PRE = ${LANGPAIR}/${WIKISOURCE}.${PART}_${MODELNAME}.${LANGPAIR}.${SRC}.spm.gz
@ -67,7 +67,17 @@ UDPIPE_MODEL = ${notdir $(shell ${LOAD_MODULES} find ${UDPIPE_MODELS}/ -name "${
all: index.html
${MAKE} ${WIKI_SRC} ${WIKI_TRG}
all-wikis: index.html
WIKISOURCES = wiki wikibooks wikinews wikiquote wikisource
all-wikis:
for w in ${WIKISOURCES}; do \
${MAKE} WIKISOURCE=$$w prepare-data; \
${MAKE} WIKISOURCE=$$w HPC_CORES=1 WALLTIME=72 translate.submit; \
done
all-wikilangs: index.html
for l in ${WIKILANGS}; do \
${MAKE} LANGID=$$l extract-text; \
done

View File

@ -13,3 +13,18 @@
|-----------------------|-------|-------|
| Tatoeba.de.nl | 52.6 | 0.697 |
# opus-2020-01-20.zip
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/de-nl/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/de-nl/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/de-nl/opus-2020-01-20.eval.txt)
## Benchmarks
| testset | BLEU | chr-F |
|-----------------------|-------|-------|
| Tatoeba.de.nl | 52.8 | 0.699 |

View File

@ -1,11 +1,11 @@
# QED-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: QED
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [QED-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/ase-en/QED-2020-01-17.zip)
* test set translations: [QED-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/ase-en/QED-2020-01-17.test.txt)
* test set scores: [QED-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/ase-en/QED-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/ase-en/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/ase-en/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/ase-en/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# wikimedia-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: wikimedia
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [wikimedia-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/bcl-en/wikimedia-2020-01-17.zip)
* test set translations: [wikimedia-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/bcl-en/wikimedia-2020-01-17.test.txt)
* test set scores: [wikimedia-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/bcl-en/wikimedia-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/bcl-en/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/bcl-en/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/bcl-en/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# QED-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: QED
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [QED-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/bi-en/QED-2020-01-17.zip)
* test set translations: [QED-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/bi-en/QED-2020-01-17.test.txt)
* test set scores: [QED-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/bi-en/QED-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/bi-en/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/bi-en/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/bi-en/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# QED-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: QED
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [QED-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/efi-en/QED-2020-01-17.zip)
* test set translations: [QED-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/efi-en/QED-2020-01-17.test.txt)
* test set scores: [QED-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/efi-en/QED-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/efi-en/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/efi-en/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/efi-en/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# QED-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: QED
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [QED-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-ase/QED-2020-01-17.zip)
* test set translations: [QED-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-ase/QED-2020-01-17.test.txt)
* test set scores: [QED-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-ase/QED-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-ase/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-ase/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-ase/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# wikimedia-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: wikimedia
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [wikimedia-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-bcl/wikimedia-2020-01-17.zip)
* test set translations: [wikimedia-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-bcl/wikimedia-2020-01-17.test.txt)
* test set scores: [wikimedia-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-bcl/wikimedia-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-bcl/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-bcl/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-bcl/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# QED-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: QED
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [QED-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-bi/QED-2020-01-17.zip)
* test set translations: [QED-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-bi/QED-2020-01-17.test.txt)
* test set scores: [QED-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-bi/QED-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-bi/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-bi/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-bi/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# QED-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: QED
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [QED-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-efi/QED-2020-01-17.zip)
* test set translations: [QED-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-efi/QED-2020-01-17.test.txt)
* test set scores: [QED-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-efi/QED-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-efi/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-efi/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-efi/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# Tatoeba-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: Tatoeba
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [Tatoeba-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-gil/Tatoeba-2020-01-17.zip)
* test set translations: [Tatoeba-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-gil/Tatoeba-2020-01-17.test.txt)
* test set scores: [Tatoeba-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-gil/Tatoeba-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-gil/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-gil/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-gil/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# QED-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: QED
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [QED-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-ho/QED-2020-01-17.zip)
* test set translations: [QED-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-ho/QED-2020-01-17.test.txt)
* test set scores: [QED-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-ho/QED-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-ho/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-ho/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-ho/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# QED-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: QED
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [QED-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-kj/QED-2020-01-17.zip)
* test set translations: [QED-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-kj/QED-2020-01-17.test.txt)
* test set scores: [QED-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-kj/QED-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-kj/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-kj/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-kj/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# QED-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: QED
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [QED-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-luo/QED-2020-01-17.zip)
* test set translations: [QED-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-luo/QED-2020-01-17.test.txt)
* test set scores: [QED-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-luo/QED-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-luo/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-luo/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-luo/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# QED-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: QED
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [QED-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-mos/QED-2020-01-17.zip)
* test set translations: [QED-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-mos/QED-2020-01-17.test.txt)
* test set scores: [QED-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-mos/QED-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-mos/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-mos/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-mos/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# Tatoeba-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: Tatoeba
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [Tatoeba-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-niu/Tatoeba-2020-01-17.zip)
* test set translations: [Tatoeba-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-niu/Tatoeba-2020-01-17.test.txt)
* test set scores: [Tatoeba-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-niu/Tatoeba-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-niu/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-niu/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-niu/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# Tatoeba-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: Tatoeba
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [Tatoeba-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-pag/Tatoeba-2020-01-17.zip)
* test set translations: [Tatoeba-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-pag/Tatoeba-2020-01-17.test.txt)
* test set scores: [Tatoeba-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-pag/Tatoeba-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-pag/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-pag/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-pag/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# QED-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: QED
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [QED-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-run/QED-2020-01-17.zip)
* test set translations: [QED-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-run/QED-2020-01-17.test.txt)
* test set scores: [QED-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-run/QED-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-run/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-run/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-run/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# wikimedia-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: wikimedia
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [wikimedia-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-srn/wikimedia-2020-01-17.zip)
* test set translations: [wikimedia-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-srn/wikimedia-2020-01-17.test.txt)
* test set scores: [wikimedia-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-srn/wikimedia-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-srn/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-srn/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-srn/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# wikimedia-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: wikimedia
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [wikimedia-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-tn/wikimedia-2020-01-17.zip)
* test set translations: [wikimedia-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tn/wikimedia-2020-01-17.test.txt)
* test set scores: [wikimedia-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tn/wikimedia-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-tn/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tn/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tn/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# Tatoeba-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: Tatoeba
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [Tatoeba-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-tvl/Tatoeba-2020-01-17.zip)
* test set translations: [Tatoeba-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tvl/Tatoeba-2020-01-17.test.txt)
* test set scores: [Tatoeba-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tvl/Tatoeba-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-tvl/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tvl/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tvl/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# Tatoeba-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: Tatoeba
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [Tatoeba-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-ty/Tatoeba-2020-01-17.zip)
* test set translations: [Tatoeba-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-ty/Tatoeba-2020-01-17.test.txt)
* test set scores: [Tatoeba-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-ty/Tatoeba-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-ty/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-ty/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-ty/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,11 +1,11 @@
# bible-uedin-2020-01-17.zip
# opus-2020-01-20.zip
* dataset: bible-uedin
* dataset: opus
* model: transformer-align
* pre-processing: normalization + SentencePiece
* download: [bible-uedin-2020-01-17.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-wal/bible-uedin-2020-01-17.zip)
* test set translations: [bible-uedin-2020-01-17.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-wal/bible-uedin-2020-01-17.test.txt)
* test set scores: [bible-uedin-2020-01-17.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-wal/bible-uedin-2020-01-17.eval.txt)
* download: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-wal/opus-2020-01-20.zip)
* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-wal/opus-2020-01-20.test.txt)
* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-wal/opus-2020-01-20.eval.txt)
## Benchmarks

View File

@ -1,6 +1,7 @@
ab de JW300.ab.de 1.4 0.148
ab en JW300.ab.en 2.9 0.144
ab en Tatoeba.ab.en 2.3 0.097
ab es JW300.ab.es 2.2 0.138
ab fi JW300.ab.fi 1.5 0.147
ab fr JW300.ab.fr 1.8 0.129
ab sv JW300.ab.sv 2.4 0.147
@ -45,6 +46,7 @@ ar sv GlobalVoices.ar.sv 12.9 0.386
as de JW300.as.de 1.1 0.176
ase de JW300.ase.de 27.2 0.478
ase en JW300.ase.en 99.5 0.997
ase es JW300.ase.es 31.7 0.498
ase fr JW300.ase.fr 37.8 0.553
as en JW300.as.en 1.7 0.137
as en Tatoeba.as.en 58.3 0.644
@ -107,6 +109,7 @@ bg sv JW300.bg.sv 29.1 0.494
bhw en JW300.bhw.en 7.7 0.235
bi de JW300.bi.de 15.9 0.355
bi en JW300.bi.en 30.3 0.458
bi es JW300.bi.es 21.1 0.388
bi fi JW300.bi.fi 0.6 0.124
bi fr JW300.bi.fr 21.5 0.382
bi sv JW300.bi.sv 22.7 0.403
@ -146,12 +149,14 @@ ca es Tatoeba.ca.es 74.9 0.863
ca fr Tatoeba.ca.fr 50.4 0.672
cak de JW300.cak.de 0.7 0.077
cak en JW300.cak.en 2.6 0.140
cak es JW300.cak.es 4.3 0.181
cak fi JW300.cak.fi 0.6 0.109
cak fr JW300.cak.fr 2.2 0.132
cak sv JW300.cak.sv 0.6 0.084
ca sv GlobalVoices.ca.sv 11.2 0.366
cat de JW300.cat.de 1.4 0.143
cat en JW300.cat.en 3.3 0.171
cat es JW300.cat.es 2.9 0.163
cat fi JW300.cat.fi 1.6 0.155
cat fr JW300.cat.fr 3.5 0.163
cat sv JW300.cat.sv 2.5 0.154
@ -340,8 +345,8 @@ de fr newstest2009.de.fr 25.6 0.539
de fr newstest2010.de.fr 29.2 0.573
de fr newstest2011.de.fr 27.2 0.554
de fr newstest2012.de.fr 27.7 0.554
de fr newstest2013.de.fr 29.5 0.560
de fr newstest2019-defr.de.fr 36.6 0.625
de fr newstest2013.de.fr 29.8 0.561
de fr newstest2019-defr.de.fr 36.9 0.625
de fr Tatoeba.de.fr 49.2 0.664
de fse JW300.de.fse 3.2 0.180
de gaa JW300.de.gaa 26.3 0.471
@ -755,13 +760,13 @@ en ro newsdev2016-enro.en.ro 30.8 0.592
en ro newstest2016-enro.en.ro 28.8 0.571
en ro Tatoeba.en.ro 45.3 0.670
en rsl JW300.en.rsl 3.2 0.156
en ru newstest2012.en.ru 23.7 0.436
en ru newstest2013.en.ru 19.4 0.413
en ru newstest2015-enru.en.ru 21.6 0.440
en ru newstest2016-enru.en.ru 21.0 0.432
en ru newstest2017-enru.en.ru 22.7 0.443
en ru newstest2018-enru.en.ru 19.2 0.413
en ru newstest2019-enru.en.ru 22.3 0.412
en ru newstest2012.en.ru 25.2 0.453
en ru newstest2013.en.ru 20.2 0.423
en ru newstest2015-enru.en.ru 23.2 0.461
en ru newstest2016-enru.en.ru 22.5 0.456
en ru newstest2017-enru.en.ru 24.5 0.468
en ru newstest2018-enru.en.ru 20.7 0.434
en ru newstest2019-enru.en.ru 23.5 0.430
en run JW300.en.run 34.2 0.591
en ru Tatoeba.en.ru 46.9 0.656
en rw JW300.en.rw 33.3 0.569
@ -821,11 +826,13 @@ eo es Tatoeba.eo.es 44.2 0.631
eo fi Tatoeba.eo.fi 13.9 0.325
eo fr Tatoeba.eo.fr 50.9 0.675
eo sv Tatoeba.eo.sv 25.7 0.439
es ab JW300.es.ab 0.9 0.112
es ach JW300.es.ach 3.9 0.175
es ada JW300.es.ada 7.1 0.206
es aed JW300.es.aed 89.2 0.915
es af JW300.es.af 36.6 0.564
es am GlobalVoices.es.am 0.7 0.125
es ase JW300.es.ase 31.5 0.488
es as JW300.es.as 0.6 0.095
es ay JW300.es.ay 9.8 0.325
es az_IR+az JW300.es.az 12.6 0.319
@ -837,11 +844,14 @@ es bcl JW300.es.bcl 37.1 0.586
es bem JW300.es.bem 19.9 0.440
es ber Tatoeba.es.ber 21.8 0.444
es bg GlobalVoices.es.bg 21.0 0.493
es bi JW300.es.bi 28.0 0.473
es bn GlobalVoices.es.bn 5.5 0.327
es bum JW300.es.bum 5.2 0.186
es bzs JW300.es.bzs 26.4 0.451
es cab JW300.es.cab 5.4 0.275
es cak JW300.es.cak 6.6 0.241
es ca Tatoeba.es.ca 68.9 0.832
es cat JW300.es.cat 3.5 0.175
es ceb JW300.es.ceb 33.9 0.564
es chk JW300.es.chk 17.1 0.378
es cjk JW300.es.cjk 3.5 0.214
@ -849,35 +859,54 @@ es crp bible-uedin.es.crp 4.5 0.202
es crs JW300.es.crs 26.4 0.453
es csg JW300.es.csg 91.2 0.937
es csl JW300.es.csl 2.7 0.052
es cs newssyscomb2009.es.cs 21.4 0.497
es cs news-test2008.es.cs 20.1 0.477
es cs newstest2009.es.cs 20.6 0.486
es cs newstest2010.es.cs 22.4 0.503
es cs newstest2011.es.cs 21.8 0.488
es cs newstest2012.es.cs 20.9 0.476
es cs newstest2013.es.cs 23.7 0.502
es csn JW300.es.csn 87.8 0.901
es cs Tatoeba.es.cs 46.4 0.655
es cv JW300.es.cv 1.8 0.136
es cy JW300.es.cy 5.2 0.211
es da Tatoeba.es.da 55.7 0.712
es de newssyscomb2009.es.de 21.1 0.523
es de news-test2008.es.de 20.4 0.510
es de newstest2009.es.de 20.8 0.514
es de newstest2010.es.de 24.2 0.538
es de newstest2011.es.de 22.4 0.520
es de newstest2012.es.de 23.2 0.523
es de newstest2013.es.de 24.7 0.539
es de Tatoeba.es.de 50.0 0.683
es dje bible-uedin.es.dje 4.7 0.227
es djk JW300.es.djk 4.7 0.182
es ecs JW300.es.ecs 17.2 0.426
es ee JW300.es.ee 25.6 0.470
es efi JW300.es.efi 24.6 0.452
es eo Tatoeba.es.eo 44.7 0.657
es es Tatoeba.es.es 51.7 0.688
es et JW300.es.et 20.7 0.466
es eu Tatoeba.es.eu 36.9 0.637
es fa GlobalVoices.es.fa 5.9 0.282
es fil GlobalVoices.es.fil 0.2 0.108
es fi Tatoeba.es.fi 43.5 0.662
es fj JW300.es.fj 24.8 0.472
es fon JW300.es.fon 5.3 0.163
es fr newssyscomb2009.es.fr 33.6 0.610
es fr news-test2008.es.fr 32.0 0.585
es fr newstest2009.es.fr 32.5 0.590
es fr newstest2010.es.fr 35.0 0.615
es fr newssyscomb2009.es.fr 33.9 0.611
es fr news-test2008.es.fr 31.9 0.585
es fr newstest2009.es.fr 32.5 0.591
es fr newstest2010.es.fr 35.1 0.616
es fr newstest2011.es.fr 33.9 0.607
es fr newstest2012.es.fr 32.4 0.602
es fr newstest2013.es.fr 32.1 0.593
es fr newstest2012.es.fr 32.7 0.603
es fr newstest2013.es.fr 32.3 0.594
es fr Tatoeba.es.fr 58.4 0.731
es fse JW300.es.fse 2.7 0.171
es gaa JW300.es.gaa 27.8 0.479
es gd bible-uedin.es.gd 0.0 0.095
es gil JW300.es.gil 23.8 0.470
es gl Tatoeba.es.gl 63.8 0.778
es guc JW300.es.guc 3.5 0.225
es gug JW300.es.gug 14.0 0.359
es gu JW300.es.gu 2.4 0.124
es guw JW300.es.guw 28.6 0.480
@ -886,6 +915,7 @@ es gym JW300.es.gym 5.5 0.267
es ha JW300.es.ha 20.6 0.421
es he Tatoeba.es.he 44.3 0.642
es hi GlobalVoices.es.hi 0.7 0.125
es hil JW300.es.hil 35.8 0.584
es hmn JW300.es.hmn 4.6 0.195
es ho JW300.es.ho 22.8 0.463
es hr JW300.es.hr 21.7 0.459
@ -902,6 +932,7 @@ es is JW300.es.is 22.3 0.418
es iso JW300.es.iso 22.4 0.396
es jap bible-uedin.es.jap 3.9 0.121
es ja Tatoeba.es.ja 1.1 0.352
es jp GlobalVoices.es.jp 0.1 0.022
es jsl JW300.es.jsl 3.5 0.071
es jv JW300.es.jv 5.1 0.199
es ka JW300.es.ka 10.8 0.279
@ -999,6 +1030,8 @@ es rar JW300.es.rar 6.6 0.255
es rnd JW300.es.rnd 14.6 0.347
es ro Tatoeba.es.ro 45.7 0.666
es rsl JW300.es.rsl 3.0 0.140
es ru newstest2012.es.ru 20.9 0.489
es ru newstest2013.es.ru 23.4 0.504
es run JW300.es.run 20.0 0.456
es ru Tatoeba.es.ru 47.0 0.657
es rw JW300.es.rw 22.6 0.472
@ -1084,28 +1117,28 @@ fa es GlobalVoices.fa.es 12.6 0.369
fa fi JW300.fa.fi 18.0 0.405
fa fr GlobalVoices.fa.fr 10.6 0.341
fa sv JW300.fa.sv 18.3 0.353
fi ab JW300.fi.ab 1.0 0.123
fi ab JW300.fi.ab 0.1 0.058
fi ach JW300.fi.ach 4.0 0.194
fi ada JW300.fi.ada 7.4 0.208
fi aed JW300.fi.aed 3.1 0.155
fi af JW300.fi.af 39.3 0.588
fi am JW300.fi.am 14.8 0.295
fi ar JW300.fi.ar 8.0 0.225
fi ase JW300.fi.ase 36.6 0.553
fi ase JW300.fi.ase 1.1 0.111
fi as JW300.fi.as 0.9 0.095
fi az_IR+az JW300.fi.az 16.8 0.385
fi ba JW300.fi.ba 2.1 0.134
fi ba JW300.fi.ba 0.1 0.034
fi bas JW300.fi.bas 4.6 0.182
fi bci JW300.fi.bci 10.9 0.266
fi bcl JW300.fi.bcl 38.4 0.604
fi bem JW300.fi.bem 21.4 0.465
fi bg JW300.fi.bg 27.8 0.502
fi bi JW300.fi.bi 3.6 0.202
fi bi JW300.fi.bi 1.3 0.174
fi bn JW300.fi.bn 3.5 0.178
fi bum JW300.fi.bum 5.1 0.184
fi bzs JW300.fi.bzs 27.2 0.459
fi cab JW300.fi.cab 2.6 0.189
fi cak JW300.fi.cak 1.8 0.154
fi cak JW300.fi.cak 1.5 0.151
fi cat JW300.fi.cat 3.1 0.160
fi ceb JW300.fi.ceb 35.1 0.581
fi chk JW300.fi.chk 17.8 0.394
@ -1116,7 +1149,7 @@ fi csg JW300.fi.csg 3.6 0.162
fi cs JW300.fi.cs 25.0 0.470
fi csl JW300.fi.csl 2.4 0.051
fi csn JW300.fi.csn 2.7 0.157
fi cv JW300.fi.cv 2.2 0.151
fi cv JW300.fi.cv 0.2 0.062
fi cy JW300.fi.cy 1.8 0.115
fi dhv JW300.fi.dhv 5.4 0.241
fi dje bible-uedin.fi.dje 4.7 0.219
@ -1224,9 +1257,10 @@ fi sl JW300.fi.sl 24.1 0.481
fi sm JW300.fi.sm 24.0 0.443
fi sn JW300.fi.sn 25.3 0.547
fi sq JW300.fi.sq 32.0 0.535
fi sr_ME+sr+srp bible-uedin.fi.srp 16.4 0.421
fi srn JW300.fi.srn 29.2 0.491
fi ssp JW300.fi.ssp 3.9 0.176
fi sv fiskmo_testset.fi.sv 27.0 0.599
fi sv fiskmo_testset.fi.sv 27.4 0.601
fi sv Tatoeba.fi.sv 55.0 0.706
fi sw JW300.fi.sw 29.9 0.548
fi tg_TJ+tg JW300.fi.tg 9.7 0.260
@ -1255,7 +1289,7 @@ fi zul+zu JW300.fi.zu 28.1 0.584
fj de JW300.fj.de 18.8 0.377
fj en JW300.fj.en 31.0 0.471
fj en Tatoeba.fj.en 79.7 0.835
fj fi JW300.fj.fi 20.1 0.421
fj fi JW300.fj.fi 0.3 0.061
fj fr JW300.fj.fr 24.0 0.407
fon de JW300.fon.de 1.7 0.150
fon en JW300.fon.en 4.7 0.204
@ -1298,26 +1332,26 @@ fr crs JW300.fr.crs 31.6 0.492
fr csg JW300.fr.csg 4.0 0.177
fr cs GlobalVoices.fr.cs 16.1 0.426
fr csl JW300.fr.csl 1.8 0.045
fr cs newssyscomb2009.fr.cs 19.3 0.476
fr cs news-test2008.fr.cs 17.9 0.461
fr cs newstest2009.fr.cs 18.0 0.464
fr cs newstest2010.fr.cs 18.4 0.466
fr cs newstest2011.fr.cs 19.4 0.471
fr cs newstest2012.fr.cs 18.4 0.456
fr cs newstest2013.fr.cs 19.6 0.463
fr cs newssyscomb2009.fr.cs 21.5 0.497
fr cs news-test2008.fr.cs 20.1 0.482
fr cs newstest2009.fr.cs 20.3 0.485
fr cs newstest2010.fr.cs 20.4 0.485
fr cs newstest2011.fr.cs 21.6 0.491
fr cs newstest2012.fr.cs 20.4 0.474
fr cs newstest2013.fr.cs 21.9 0.485
fr csn JW300.fr.csn 3.4 0.169
fr ctu JW300.fr.ctu 4.9 0.233
fr cv JW300.fr.cv 1.9 0.149
fr cy JW300.fr.cy 6.9 0.245
fr de euelections_dev2019.transformer-align.fr 26.4 0.571
fr de newssyscomb2009.fr.de 22.1 0.524
fr de euelections_dev2019.transformer-align.fr 26.6 0.571
fr de newssyscomb2009.fr.de 22.2 0.526
fr de news-test2008.fr.de 22.1 0.524
fr de newstest2009.fr.de 21.6 0.520
fr de newstest2009.fr.de 21.9 0.522
fr de newstest2010.fr.de 22.6 0.527
fr de newstest2011.fr.de 21.5 0.518
fr de newstest2012.fr.de 22.4 0.516
fr de newstest2013.fr.de 24.2 0.532
fr de newstest2019-frde.fr.de 27.9 0.595
fr de newstest2011.fr.de 21.7 0.519
fr de newstest2012.fr.de 22.8 0.516
fr de newstest2013.fr.de 24.3 0.534
fr de newstest2019-frde.fr.de 27.9 0.594
fr de Tatoeba.fr.de 49.1 0.676
fr dhv JW300.fr.dhv 7.5 0.259
fr dje bible-uedin.fr.dje 4.6 0.224
@ -1326,13 +1360,13 @@ fr ee JW300.fr.ee 26.3 0.466
fr efi JW300.fr.efi 26.9 0.462
fr el Tatoeba.fr.el 56.2 0.719
fr eo Tatoeba.fr.eo 52.0 0.695
fr es newssyscomb2009.fr.es 34.3 0.601
fr es news-test2008.fr.es 32.5 0.583
fr es newstest2009.fr.es 31.6 0.586
fr es newstest2010.fr.es 36.5 0.616
fr es newstest2011.fr.es 38.3 0.622
fr es newssyscomb2009.fr.es 34.5 0.602
fr es news-test2008.fr.es 32.6 0.583
fr es newstest2009.fr.es 31.7 0.587
fr es newstest2010.fr.es 36.4 0.616
fr es newstest2011.fr.es 38.4 0.623
fr es newstest2012.fr.es 38.1 0.619
fr es newstest2013.fr.es 34.0 0.587
fr es newstest2013.fr.es 34.2 0.588
fr es Tatoeba.fr.es 53.2 0.709
fr et JW300.fr.et 19.4 0.452
fr eu bible-uedin.fr.eu 0.9 0.249
@ -1434,7 +1468,10 @@ fr ngl JW300.fr.ngl 2.2 0.196
fr ngu JW300.fr.ngu 3.6 0.220
fr nia JW300.fr.nia 4.8 0.212
fr niu JW300.fr.niu 34.5 0.537
fr nr JW300.fr.nr 1.6 0.158
fr nso JW300.fr.nso 33.3 0.527
fr nya JW300.fr.nya 2.2 0.182
fr ny JW300.fr.ny 23.2 0.481
fr nyk JW300.fr.nyk 14.8 0.366
fr nyn JW300.fr.nyn 3.0 0.211
fr nzi JW300.fr.nzi 3.3 0.183
@ -1442,10 +1479,12 @@ fr oc Tatoeba.fr.oc 0.1 0.064
fr oke JW300.fr.oke 6.2 0.212
fr om JW300.fr.om 14.4 0.393
fr os JW300.fr.os 13.2 0.327
fr pag JW300.fr.pag 27.0 0.486
fr pa JW300.fr.pa 9.1 0.211
fr pap JW300.fr.pap 27.8 0.464
fr pck bible-uedin.fr.pck 3.6 0.208
fr pcm JW300.fr.pcm 4.3 0.183
fr pes bible-uedin.fr.pes 2.7 0.191
fr pis JW300.fr.pis 29.0 0.486
fr pl Tatoeba.fr.pl 40.7 0.625
fr plt bible-uedin.fr.plt 2.8 0.253
@ -1453,15 +1492,19 @@ fr pon JW300.fr.pon 23.9 0.458
fr prl JW300.fr.prl 4.0 0.182
fr pso JW300.fr.pso 3.3 0.154
fr pt_br+pt_BR+pt_PT+pt Tatoeba.fr.pt 42.8 0.640
fr que JW300.fr.que 6.3 0.248
fr qu JW300.fr.qu 7.6 0.261
fr quy JW300.fr.quy 8.4 0.287
fr quz JW300.fr.quz 8.7 0.269
fr qvi JW300.fr.qvi 1.7 0.179
fr rar JW300.fr.rar 6.2 0.250
fr rnd JW300.fr.rnd 21.8 0.431
fr ro Tatoeba.fr.ro 42.1 0.640
fr rsl JW300.fr.rsl 2.9 0.147
fr run JW300.fr.run 23.8 0.482
fr rw JW300.fr.rw 25.5 0.483
fr seh JW300.fr.seh 3.2 0.205
fr sg JW300.fr.sg 29.7 0.473
fr sid JW300.fr.sid 2.3 0.186
fr si JW300.fr.si 10.2 0.228
fr sk JW300.fr.sk 24.9 0.456
@ -1473,6 +1516,7 @@ fr sop JW300.fr.sop 2.8 0.211
fr sq GlobalVoices.fr.sq 15.9 0.432
fr srm JW300.fr.srm 7.7 0.225
fr srn JW300.fr.srn 27.4 0.459
fr ss JW300.fr.ss 14.1 0.383
fr ssp JW300.fr.ssp 4.1 0.178
fr st JW300.fr.st 34.6 0.540
fr swc JW300.fr.swc 28.2 0.499
@ -1483,6 +1527,7 @@ fr tdt JW300.fr.tdt 13.9 0.314
fr te JW300.fr.te 3.6 0.166
fr tg_TJ+tg JW300.fr.tg 8.9 0.242
fr th JW300.fr.th 19.4 0.342
fr ti JW300.fr.ti 17.8 0.292
fr tiv JW300.fr.tiv 23.5 0.406
fr tk JW300.fr.tk 10.5 0.275
fr tll JW300.fr.tll 24.6 0.467
@ -1492,6 +1537,7 @@ fr tog JW300.fr.tog 3.0 0.209
fr toj JW300.fr.toj 4.1 0.205
fr to JW300.fr.to 37.0 0.518
fr top JW300.fr.top 2.3 0.172
fr tpi JW300.fr.tpi 30.0 0.487
fr tsc JW300.fr.tsc 4.5 0.226
fr ts JW300.fr.ts 31.4 0.525
fr tt JW300.fr.tt 13.3 0.336
@ -1504,9 +1550,11 @@ fr ug Tatoeba.fr.ug 0.0 0.084
fr uk Tatoeba.fr.uk 39.4 0.581
fr urh JW300.fr.urh 4.6 0.194
fr ur_PK+ur GlobalVoices.fr.ur 0.2 0.117
fr ve JW300.fr.ve 26.3 0.481
fr vi_VN+vi Tatoeba.fr.vi 33.0 0.508
fr vmw JW300.fr.vmw 2.1 0.184
fr vsl JW300.fr.vsl 4.6 0.189
fr wal JW300.fr.wal 10.6 0.336
fr war JW300.fr.war 33.7 0.538
fr wes JW300.fr.wes 5.0 0.192
fr wls JW300.fr.wls 27.5 0.478
@ -1548,9 +1596,9 @@ gl en Tatoeba.gl.en 42.5 0.604
gl es Tatoeba.gl.es 68.7 0.802
guc de JW300.guc.de 1.5 0.122
guc en JW300.guc.en 2.4 0.132
guc fi JW300.guc.fi 1.1 0.125
guc fi JW300.guc.fi 0.3 0.077
guc fr JW300.guc.fr 2.0 0.135
guc sv JW300.guc.sv 1.7 0.126
guc sv JW300.guc.sv 0.1 0.055
gu de JW300.gu.de 2.6 0.185
gu en JW300.gu.en 7.7 0.200
gu en newsdev2019-engu.gu.en 2.7 0.220
@ -1598,7 +1646,7 @@ hi fr GlobalVoices.hi.fr 1.4 0.163
hil de JW300.hil.de 26.4 0.479
hil en JW300.hil.en 49.2 0.638
hil fr JW300.hil.fr 31.2 0.494
hil sv JW300.hil.sv 33.6 0.525
hil sv JW300.hil.sv 0.7 0.125
hi sv JW300.hi.sv 11.6 0.255
hmn en JW300.hmn.en 5.5 0.184
hmn es JW300.hmn.es 3.0 0.148
@ -1636,7 +1684,7 @@ ia es Tatoeba.ia.es 0.1 0.092
ia fr Tatoeba.ia.fr 0.2 0.112
iba en JW300.iba.en 9.8 0.286
iba es JW300.iba.es 4.7 0.181
iba fr JW300.iba.fr 5.2 0.203
iba fr JW300.iba.fr 0.1 0.074
ibg de JW300.ibg.de 2.2 0.159
ibg en JW300.ibg.en 5.0 0.206
ibg es JW300.ibg.es 3.4 0.169
@ -1844,9 +1892,9 @@ lg sv JW300.lg.sv 24.5 0.423
ln de JW300.ln.de 23.3 0.428
ln en JW300.ln.en 35.9 0.516
ln es JW300.ln.es 26.5 0.444
ln fi JW300.ln.fi 24.8 0.471
ln fi JW300.ln.fi 0.7 0.128
ln fr JW300.ln.fr 28.4 0.456
ln sv JW300.ln.sv 29.1 0.467
ln sv JW300.ln.sv 1.0 0.136
lo en JW300.lo.en 3.7 0.150
lo en Tatoeba.lo.en 1.4 0.104
lo es JW300.lo.es 3.5 0.150
@ -1889,7 +1937,7 @@ luo de JW300.luo.de 11.7 0.283
luo en JW300.luo.en 29.1 0.452
luo es JW300.luo.es 14.9 0.311
luo fi JW300.luo.fi 17.2 0.373
luo fr JW300.luo.fr 19.2 0.356
luo fr JW300.luo.fr 0.6 0.120
luo sv JW300.luo.sv 19.1 0.365
lus de JW300.lus.de 17.7 0.363
lus en JW300.lus.en 37.0 0.534
@ -1910,9 +1958,9 @@ mai en Tatoeba.mai.en 79.5 0.918
mam de JW300.mam.de 1.1 0.114
mam en JW300.mam.en 2.1 0.136
mam es JW300.mam.es 3.0 0.167
mam fi JW300.mam.fi 2.0 0.137
mam fr JW300.mam.fr 2.9 0.143
mam sv JW300.mam.sv 2.2 0.121
mam fi JW300.mam.fi 0.6 0.109
mam fr JW300.mam.fr 1.3 0.127
mam sv JW300.mam.sv 0.6 0.094
mau en JW300.mau.en 2.5 0.142
mau es JW300.mau.es 3.5 0.178
mau fr JW300.mau.fr 2.5 0.143
@ -1924,9 +1972,9 @@ mco sv JW300.mco.sv 1.4 0.129
mfe de JW300.mfe.de 18.5 0.376
mfe en JW300.mfe.en 39.9 0.552
mfe es JW300.mfe.es 24.0 0.418
mfe fi JW300.mfe.fi 23.5 0.450
mfe fr JW300.mfe.fr 30.3 0.482
mfe sv JW300.mfe.sv 27.8 0.464
mfe fi JW300.mfe.fi 2.2 0.160
mfe fr JW300.mfe.fr 2.7 0.177
mfe sv JW300.mfe.sv 2.2 0.169
mfs de JW300.mfs.de 2.2 0.147
mfs en JW300.mfs.en 3.8 0.172
mfs es JW300.mfs.es 88.9 0.910
@ -1946,8 +1994,8 @@ mg sv GlobalVoices.mg.sv 4.9 0.275
mh de JW300.mh.de 19.2 0.380
mh en JW300.mh.en 36.5 0.505
mh es JW300.mh.es 23.6 0.407
mh fr JW300.mh.fr 24.2 0.408
mh sv JW300.mh.sv 26.6 0.439
mh fr JW300.mh.fr 0.3 0.063
mh sv JW300.mh.sv 0.2 0.031
mi de bible-uedin.mi.de 3.7 0.221
mi en bible-uedin.mi.en 2.7 0.191
mi en Tatoeba.mi.en 6.9 0.239
@ -1974,7 +2022,7 @@ mn sv JW300.mn.sv 9.8 0.249
mos de JW300.mos.de 15.5 0.322
mos en JW300.mos.en 26.1 0.408
mos es JW300.mos.es 16.4 0.329
mos fr JW300.mos.fr 19.6 0.349
mos fr JW300.mos.fr 0.5 0.160
mos sv JW300.mos.sv 20.0 0.372
mr de JW300.mr.de 1.3 0.179
mr en Tatoeba.mr.en 38.2 0.515
@ -1997,9 +2045,9 @@ my fi JW300.my.fi 0.8 0.171
my fr GlobalVoices.my.fr 0.2 0.130
my sv JW300.my.sv 0.9 0.139
nba en JW300.nba.en 8.1 0.253
nch en JW300.nch.en 4.1 0.165
nch en JW300.nch.en 4.2 0.165
nch es JW300.nch.es 5.4 0.202
nch fr JW300.nch.fr 3.6 0.162
nch fr JW300.nch.fr 0.1 0.049
ncj de JW300.ncj.de 1.3 0.131
ncj en JW300.ncj.en 3.5 0.156
ncj es JW300.ncj.es 5.3 0.217
@ -2051,8 +2099,9 @@ nl sv GlobalVoices.nl.sv 25.0 0.518
nr de JW300.nr.de 1.6 0.145
nr en JW300.nr.en 3.8 0.190
nr es JW300.nr.es 2.6 0.150
nr fi JW300.nr.fi 1.7 0.161
nr sv JW300.nr.sv 2.2 0.160
nr fi JW300.nr.fi 0.1 0.052
nr fr JW300.nr.fr 0.3 0.041
nr sv JW300.nr.sv 0.2 0.068
nso de JW300.nso.de 24.7 0.461
nso en JW300.nso.en 48.6 0.634
nso es JW300.nso.es 29.5 0.485
@ -2063,12 +2112,14 @@ nya de JW300.nya.de 1.7 0.137
nya en JW300.nya.en 5.0 0.188
nya es JW300.nya.es 3.1 0.153
nya fi JW300.nya.fi 2.0 0.157
nya fr JW300.nya.fr 0.1 0.022
nya sv JW300.nya.sv 2.8 0.153
ny de JW300.ny.de 23.9 0.440
ny en JW300.ny.en 39.7 0.547
ny en Tatoeba.ny.en 44.2 0.562
ny es JW300.ny.es 27.9 0.457
ny fi JW300.ny.fi 25.1 0.479
ny fi JW300.ny.fi 0.4 0.073
ny fr JW300.ny.fr 0.4 0.085
nyk de JW300.nyk.de 1.5 0.138
nyk en JW300.nyk.en 27.3 0.423
nyk es JW300.nyk.es 2.9 0.165
@ -2081,7 +2132,7 @@ nyn es JW300.nyn.es 3.3 0.163
nyn fi JW300.nyn.fi 2.0 0.160
nyn fr JW300.nyn.fr 4.0 0.171
nyn sv JW300.nyn.sv 2.8 0.167
ny sv JW300.ny.sv 30.5 0.485
ny sv JW300.ny.sv 0.2 0.108
nyu en JW300.nyu.en 7.1 0.229
nzi de JW300.nzi.de 1.6 0.140
nzi en JW300.nzi.en 5.4 0.197
@ -2101,7 +2152,7 @@ om de JW300.om.de 10.8 0.273
om en JW300.om.en 27.3 0.448
om es JW300.om.es 13.4 0.296
om fr JW300.om.fr 17.0 0.338
om sv JW300.om.sv 16.1 0.330
om sv JW300.om.sv 0.6 0.142
or en Tatoeba.or.en 1.2 0.093
os de JW300.os.de 13.3 0.297
os en JW300.os.en 17.7 0.329
@ -2118,13 +2169,14 @@ pa fr JW300.pa.fr 14.3 0.278
pag de JW300.pag.de 22.8 0.435
pag en JW300.pag.en 42.4 0.580
pag es JW300.pag.es 27.9 0.459
pag fr JW300.pag.fr 0.2 0.043
pag sv JW300.pag.sv 29.8 0.492
pap de JW300.pap.de 25.0 0.466
pap en JW300.pap.en 47.3 0.634
pap en Tatoeba.pap.en 63.2 0.684
pap es JW300.pap.es 32.3 0.518
pap fr JW300.pap.fr 31.0 0.498
pap sv JW300.pap.sv 33.1 0.524
pap sv JW300.pap.sv 1.7 0.142
pa sv JW300.pa.sv 11.8 0.253
pck de bible-uedin.pck.de 3.4 0.207
pck en bible-uedin.pck.en 4.0 0.214
@ -2140,6 +2192,7 @@ pcm sv JW300.pcm.sv 4.1 0.168
pes de Tatoeba.pes.de 0.6 0.159
pes en Tatoeba.pes.en 0.7 0.147
pes es bible-uedin.pes.es 3.7 0.216
pes fr bible-uedin.pes.fr 0.6 0.126
pes sv bible-uedin.pes.sv 3.8 0.197
pis de JW300.pis.de 19.7 0.391
pis en JW300.pis.en 33.3 0.493
@ -2177,13 +2230,14 @@ qu de JW300.qu.de 5.7 0.200
que de JW300.que.de 2.4 0.147
que en JW300.que.en 8.0 0.216
que es JW300.que.es 14.7 0.310
que fr JW300.que.fr 0.6 0.126
qu en JW300.qu.en 8.6 0.218
qu en Tatoeba.qu.en 22.7 0.348
qu es JW300.qu.es 12.1 0.274
que sv JW300.que.sv 2.5 0.152
que sv JW300.que.sv 0.0 0.020
qu fr JW300.qu.fr 6.9 0.208
qug en JW300.qug.en 3.6 0.155
qu sv JW300.qu.sv 7.6 0.216
qu sv JW300.qu.sv 0.4 0.086
quy de JW300.quy.de 1.8 0.157
quy en JW300.quy.en 9.0 0.225
quy es JW300.quy.es 14.5 0.306
@ -2192,7 +2246,8 @@ quy sv JW300.quy.sv 2.4 0.159
quz de JW300.quz.de 6.2 0.204
quz en JW300.quz.en 8.9 0.222
quz es JW300.quz.es 13.4 0.285
quz sv JW300.quz.sv 7.6 0.213
quz fr JW300.quz.fr 0.5 0.116
quz sv JW300.quz.sv 0.2 0.102
qvi de JW300.qvi.de 1.6 0.130
qvi en JW300.qvi.en 3.1 0.145
qvi es JW300.qvi.es 3.4 0.156
@ -2217,7 +2272,7 @@ rsl fi JW300.rsl.fi 2.4 0.169
rsl fr JW300.rsl.fr 3.6 0.162
rsl sv JW300.rsl.sv 3.7 0.168
ru en newstest2012.ru.en 35.3 0.606
ru en newstest2013.ru.en 28.2 0.547
ru en newstest2013.ru.en 28.2 0.546
ru en newstest2014-ruen.ru.en 32.5 0.595
ru en newstest2015-enru.ru.en 30.7 0.569
ru en newstest2016-enru.ru.en 30.5 0.569
@ -2225,11 +2280,16 @@ ru en newstest2017-enru.ru.en 33.8 0.596
ru en newstest2018-enru.ru.en 30.0 0.569
ru en newstest2019-ruen.ru.en 32.0 0.581
ru en Tatoeba.ru.en 59.8 0.726
ru es newstest2012.ru.es 26.1 0.527
ru es newstest2013.ru.es 28.2 0.538
ru es Tatoeba.ru.es 49.4 0.675
ru fi Tatoeba.ru.fi 40.9 0.649
ru fr newstest2012.ru.fr 18.3 0.497
ru fr newstest2013.ru.fr 21.6 0.516
ru fr Tatoeba.ru.fr 51.1 0.666
run en JW300.run.en 42.7 0.583
run es JW300.run.es 26.9 0.452
run fr JW300.run.fr 0.6 0.102
run sv JW300.run.sv 30.1 0.484
rw en JW300.rw.en 37.3 0.530
rw en Tatoeba.rw.en 49.8 0.643
@ -2243,6 +2303,7 @@ seh fr JW300.seh.fr 3.7 0.167
seh sv JW300.seh.sv 3.0 0.166
sg en JW300.sg.en 32.0 0.477
sg es JW300.sg.es 21.3 0.385
sg fr JW300.sg.fr 24.9 0.420
sg sv JW300.sg.sv 25.3 0.428
sid en JW300.sid.en 4.2 0.176
sid es JW300.sid.es 2.9 0.158
@ -2267,7 +2328,7 @@ sl sv JW300.sl.sv 27.8 0.509
sm en JW300.sm.en 36.1 0.520
sm es JW300.sm.es 21.3 0.390
sm fr JW300.sm.fr 24.6 0.419
sm sv JW300.sm.sv 24.7 0.435
sm sv JW300.sm.sv 0.0 0.057
sn en JW300.sn.en 51.8 0.648
sn es JW300.sn.es 32.5 0.509
sn fr JW300.sn.fr 30.8 0.491
@ -2287,6 +2348,7 @@ sq fr GlobalVoices.sq.fr 19.4 0.464
sq sv JW300.sq.sv 36.2 0.559
srm en JW300.srm.en 7.8 0.250
srm es JW300.srm.es 4.5 0.182
sr_ME+sr+srp fi bible-uedin.sr_ME.fi 19.7 0.481
srm fr JW300.srm.fr 5.0 0.189
srn en JW300.srn.en 40.3 0.555
srn es JW300.srn.es 30.4 0.481
@ -2294,13 +2356,14 @@ srn fr JW300.srn.fr 28.9 0.462
srn sv JW300.srn.sv 32.2 0.500
ss en JW300.ss.en 30.9 0.478
ss es JW300.ss.es 11.3 0.269
ss fi JW300.ss.fi 1.5 0.170
ss fi JW300.ss.fi 0.2 0.084
ss fr JW300.ss.fr 1.1 0.140
ssp en JW300.ssp.en 4.3 0.174
ssp es JW300.ssp.es 89.7 0.930
ssp fi JW300.ssp.fi 2.4 0.160
ssp fr JW300.ssp.fr 4.3 0.179
ssp sv JW300.ssp.sv 3.6 0.161
ss sv JW300.ss.sv 2.3 0.160
ss sv JW300.ss.sv 0.2 0.096
st en JW300.st.en 45.7 0.609
st es JW300.st.es 31.3 0.499
st fi JW300.st.fi 28.8 0.520
@ -2559,7 +2622,8 @@ th fr JW300.th.fr 20.4 0.363
th sv JW300.th.sv 17.8 0.321
ti en JW300.ti.en 30.4 0.461
ti es JW300.ti.es 17.4 0.337
ti sv JW300.ti.sv 19.5 0.368
ti fr JW300.ti.fr 0.3 0.060
ti sv JW300.ti.sv 0.3 0.069
tiv en JW300.tiv.en 31.5 0.473
tiv es JW300.tiv.es 19.1 0.360
tiv fi JW300.tiv.fi 19.7 0.412