diff --git a/Makefile b/Makefile index a45f7f63..7a5ea054 100644 --- a/Makefile +++ b/Makefile @@ -220,6 +220,15 @@ finished: fi \ done +## only bilingual models +%-allmultilingual: + for l in ${ALL_MULTILINGUAL_MODELS}; do \ + if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.npz' | wc -l` -gt 0 ]; then \ + ${MAKE} SRCLANGS="`echo $$l | cut -f1 -d'-' | sed 's/\\+/ /g'`" \ + TRGLANGS="`echo $$l | cut -f2 -d'-' | sed 's/\\+/ /g'`" ${@:-allmultilingual=}; \ + fi \ + done + ## run something over all language pairs but make it possible to do it in parallel, for example ## - make dist-all-parallel @@ -555,10 +564,16 @@ endif %.compare: %.eval - paste -d "\n" ${TEST_SRC} ${TEST_TRG} ${<:.eval=} |\ + grep . ${TEST_SRC} > $@.1 + grep . ${TEST_TRG} > $@.2 + grep . ${<:.eval=} > $@.3 + paste -d "\n" $@.1 $@.2 $@.3 |\ sed -e "s/'/'/g" \ -e 's/"/"/g' \ -e 's/<//g' \ -e 's/&/&/g' |\ sed 'n;n;G;' > $@ + rm -f $@.1 $@.2 $@.3 + +# paste -d "\n" ${TEST_SRC} ${TEST_TRG} ${<:.eval=} |\ diff --git a/Makefile.config b/Makefile.config index 7c53838e..f1072cb8 100644 --- a/Makefile.config +++ b/Makefile.config @@ -45,8 +45,11 @@ OPUSCORPORA = ${patsubst %/latest/xml/${LANGPAIR}.xml.gz,%,\ ALL_LANG_PAIRS = ${shell ls ${WORKHOME} | grep -- '-' | grep -v old} -ALL_BILINGUAL_MODELS = ${shell ls ${WORKHOME} | grep -- '-' | grep -v old | grep -v -- '\+'} -ALL_MULTILINGUAL_MODELS = ${shell ls ${WORKHOME} | grep -- '-' | grep -v old | grep -- '\+'} +ALL_BILINGUAL_MODELS = ${shell echo '${ALL_LANG_PAIRS}' | tr ' ' "\n" | grep -v -- '\+'} +ALL_MULTILINGUAL_MODELS = ${shell echo '${ALL_LANG_PAIRS}' | tr ' ' "\n" | grep -- '\+'} + +# ALL_BILINGUAL_MODELS = ${shell ls ${WORKHOME} | grep -- '-' | grep -v old | grep -v -- '\+'} +# ALL_MULTILINGUAL_MODELS = ${shell ls ${WORKHOME} | grep -- '-' | grep -v old | grep -- '\+'} ## size of dev data, test data and BPE merge operations @@ -75,6 +78,8 @@ HELDOUTSIZE = ${DEVSIZE} ## dev/test data: default = Tatoeba otherwise, GlobalVoices, JW300, GNOME or bibl-uedin ## - check that data exist ## - check that there are at least 2 x DEVMINSIZE examples +## TODO: this does not work well for multilingual models! + ifneq ($(wildcard ${OPUSHOME}/Tatoeba/latest/moses/${LANGPAIR}.txt.zip),) ifeq ($(shell if (( `head -1 ${OPUSHOME}/Tatoeba/latest/info/${LANGPAIR}.txt.info` \ > $$((${DEVMINSIZE} + ${DEVMINSIZE})) )); then echo "ok"; fi),ok) diff --git a/evaluate/Makefile b/evaluate/Makefile index 299759d4..17aadf1d 100644 --- a/evaluate/Makefile +++ b/evaluate/Makefile @@ -8,20 +8,21 @@ SRC = en TRG = de LANGPAIR = ${SRC}-${TRG} -MODELHOME = ../models/${LANGPAIR} -MODELZIP = ${lastword ${sort ${wildcard ${MODELHOME}/*-20*.zip}}} -MODELNAME = ${patsubst %.zip,%,${notdir ${MODELZIP}}} -MODELSTORE = OPUS-MT-models -MODELURL = https://object.pouta.csc.fi/${MODELSTORE}/${LANGPAIR} +MODELHOME = ../models/${LANGPAIR} +MODELZIP = ${lastword ${sort ${wildcard ${MODELHOME}/*-20*.zip}}} +MODELNAME = ${patsubst %.zip,%,${notdir ${MODELZIP}}} +MODELSTORE = OPUS-MT-models +MODELURL = https://object.pouta.csc.fi/${MODELSTORE}/${LANGPAIR} -TESTSETDIR = ../testsets/${LANGPAIR} -TESTSETS = ${sort ${wildcard ${TESTSETDIR}/*.${SRC}.gz}} -TESTSET = ${lastword ${TESTSETS}} +TESTSETDIR = ../testsets/${LANGPAIR} +TESTSETS = ${sort ${wildcard ${TESTSETDIR}/*.${SRC}.gz}} +TESTSET = ${lastword ${TESTSETS}} -WORKDIR = ${PWD}/${LANGPAIR}/${MODELNAME} -TEST_PRE = ${WORKDIR}/${patsubst %.gz,%.pre,${notdir ${TESTSET}}} -TEST_TRANS = ${TEST_PRE}.${TRG} -TEST_EVAL = ${TEST_TRANS}.eval +WORKDIR = ${PWD}/${LANGPAIR}/${MODELNAME} +TEST_PRE = ${WORKDIR}/${patsubst %.gz,%.pre,${notdir ${TESTSET}}} +TEST_TRANS = ${TEST_PRE}.${TRG} +TEST_EVAL = ${TEST_TRANS}.eval +TEST_EVALNORM = ${TEST_TRANS}.eval-norm @@ -102,6 +103,16 @@ ${TEST_EVAL}: ${TEST_TRANS} cat $< | sacrebleu --metrics=chrf --width=3 $@.ref >> $@ rm -f $@.ref +${TEST_EVALNORM}: ${TEST_TRANS} + zcat ${patsubst %.${SRC}.gz,%.${TRG}.gz,${TESTSET}} |\ + ${TOKENIZER}/replace-unicode-punctuation.perl |\ + ${TOKENIZER}/remove-non-printing-char.perl |\ + ${TOKENIZER}/normalize-punctuation.perl -l ${TRG} |\ + sed 's/ */ /g;s/^ *//g;s/ *$$//g' > $@.ref + cat $< | sacrebleu $@.ref > $@ + cat $< | sacrebleu --metrics=chrf --width=3 $@.ref >> $@ + rm -f $@.ref + cleanup: rm -fr ${WORKDIR}/model diff --git a/finetune/Makefile b/finetune/Makefile index 377ae202..c1b9699e 100644 --- a/finetune/Makefile +++ b/finetune/Makefile @@ -91,6 +91,49 @@ goethe-defi: TEST_TRG=de-fi/goethe/test/goethe-institute-test1.fi \ all +goethe2-defi: + ${MAKE} SRC=de TRG=fi MODEL=goethe2 \ + TRAIN_SRC=de-fi/goethe/train/goethe-institute-train2.de \ + TRAIN_TRG=de-fi/goethe/train/goethe-institute-train2.fi \ + DEV_SRC=de-fi/goethe/dev/goethe-institute-dev2.de \ + DEV_TRG=de-fi/goethe/dev/goethe-institute-dev2.fi \ + TEST_SRC=de-fi/goethe/test/goethe-institute-test1.de \ + TEST_TRG=de-fi/goethe/test/goethe-institute-test1.fi \ + all + + +## without reference normalisation +goethe-other: + zcat de-fi/goethe/test/goethe-institute-test1.fi.gz > $@.ref + for s in systran yandex google; do \ + cat ${HOME}/research/GoetheInstitute/data/test_de_oaversetted_van_$$s.txt |\ + gzip -c > de-fi/goethe/test/goethe-institute-test1.de.$$s.de.gz; \ + zcat de-fi/goethe/test/goethe-institute-test1.de.$$s.de.gz | sacrebleu $@.ref \ + > de-fi/goethe/test/goethe-institute-test1.de.$$s.de.eval; \ + zcat de-fi/goethe/test/goethe-institute-test1.de.$$s.de.gz | sacrebleu --metrics=chrf --width=3 $@.ref \ + >> de-fi/goethe/test/goethe-institute-test1.de.$$s.de.eval; \ + done + rm -f $@.ref + +## with reference normalisation (should not do this) +goethe-other-norm: + zcat de-fi/goethe/test/goethe-institute-test1.fi.gz |\ + ${TOKENIZER}/replace-unicode-punctuation.perl |\ + ${TOKENIZER}/remove-non-printing-char.perl |\ + ${TOKENIZER}/normalize-punctuation.perl -l ${TRG} |\ + sed 's/ */ /g;s/^ *//g;s/ *$$//g' > $@.ref + for s in systran yandex google; do \ + cat ${HOME}/research/GoetheInstitute/data/test_de_oaversetted_van_$$s.txt |\ + ${TOKENIZER}/replace-unicode-punctuation.perl |\ + ${TOKENIZER}/remove-non-printing-char.perl |\ + ${TOKENIZER}/normalize-punctuation.perl -l ${TRG} |\ + sed 's/ */ /g;s/^ *//g;s/ *$$//g' | gzip -c > de-fi/goethe/test/goethe-institute-test1.de.$$s.de.gz; \ + zcat de-fi/goethe/test/goethe-institute-test1.de.$$s.de.gz | sacrebleu $@.ref \ + > de-fi/goethe/test/goethe-institute-test1.de.$$s.de.eval-norm; \ + zcat de-fi/goethe/test/goethe-institute-test1.de.$$s.de.gz | sacrebleu --metrics=chrf --width=3 $@.ref \ + >> de-fi/goethe/test/goethe-institute-test1.de.$$s.de.eval-norm; \ + done + rm -f $@.ref goethe-test: ${MAKE} SRC=de TRG=fi MODEL=goethe-test \ @@ -244,12 +287,25 @@ ${TEST_SRC}.baseline.${TRG}.gz: ${TEST_SRC}.pre.gz ${LANGPAIR}/${BASEMODELNAME}/ eval: ${TEST_SRC}.${TRG}.eval eval-baseline: ${TEST_SRC}.baseline.${TRG}.eval +## without reference normalisation ${TEST_SRC}.${TRG}.eval ${TEST_SRC}.baseline.${TRG}.eval: %.eval: %.gz ${TEST_TRG}.gz - zcat ${TEST_TRG} > $@.ref + zcat ${TEST_TRG}.gz > $@.ref zcat $< | sacrebleu $@.ref > $@ zcat $< | sacrebleu --metrics=chrf --width=3 $@.ref >> $@ rm -f $@.ref +## with reference normalisation (should not do this) +${TEST_SRC}.${TRG}.eval-norm ${TEST_SRC}.baseline.${TRG}.eval-norm: %.eval-norm: %.gz ${TEST_TRG}.gz + zcat ${TEST_TRG}.gz |\ + ${TOKENIZER}/replace-unicode-punctuation.perl |\ + ${TOKENIZER}/remove-non-printing-char.perl |\ + ${TOKENIZER}/normalize-punctuation.perl -l ${TRG} |\ + sed 's/ */ /g;s/^ *//g;s/ *$$//g' > $@.ref + zcat $< | sacrebleu $@.ref > $@ + zcat $< | sacrebleu --metrics=chrf --width=3 $@.ref >> $@ + rm -f $@.ref + + .PHONY: compare compare-baseline diff --git a/finetune/de-fi/goethe/test/goethe-institute-test1.de.fi.gz b/finetune/de-fi/goethe/test/goethe-institute-test1.de.fi.gz index aa490749..632ee7d4 100644 Binary files a/finetune/de-fi/goethe/test/goethe-institute-test1.de.fi.gz and b/finetune/de-fi/goethe/test/goethe-institute-test1.de.fi.gz differ diff --git a/work-spm/eval/scores.txt b/work-spm/eval/scores.txt index 3174afe5..1ff76462 100644 --- a/work-spm/eval/scores.txt +++ b/work-spm/eval/scores.txt @@ -4,25 +4,30 @@ ab fi JW300.ab.fi 1.5 0.147 ab fr JW300.ab.fr 1.8 0.129 ab sv JW300.ab.sv 2.4 0.147 ach en JW300.ach.en 5.4 0.207 +ach es JW300.ach.es 2.6 0.153 ach fi JW300.ach.fi 1.7 0.163 ach fr JW300.ach.fr 3.5 0.159 ach sv JW300.ach.sv 2.7 0.160 acu en bible-uedin.acu.en 3.8 0.202 ada en JW300.ada.en 4.3 0.182 +ada es JW300.ada.es 2.7 0.153 ada fi JW300.ada.fi 1.7 0.154 ada fr JW300.ada.fr 3.1 0.152 ada sv JW300.ada.sv 2.1 0.146 aed en JW300.aed.en 4.0 0.177 +aed es JW300.aed.es 89.1 0.915 aed fi JW300.aed.fi 2.2 0.163 aed fr JW300.aed.fr 3.5 0.165 aed sv JW300.aed.sv 3.3 0.170 af en Tatoeba.af.en 60.8 0.736 +af es JW300.af.es 35.7 0.554 af fi JW300.af.fi 32.3 0.576 af fr JW300.af.fr 35.3 0.543 af sv JW300.af.sv 40.4 0.599 agr en bible-uedin.agr.en 4.5 0.222 am en GlobalVoices.am.en 6.1 0.286 am en Tatoeba.am.en 63.8 0.744 +am es GlobalVoices.am.es 3.9 0.251 am fi JW300.am.fi 18.1 0.394 am fr GlobalVoices.am.fr 3.4 0.233 am sv JW300.am.sv 21.0 0.377 @@ -34,42 +39,52 @@ ase en JW300.ase.en 99.5 0.997 ase fr JW300.ase.fr 37.8 0.553 as en JW300.as.en 1.7 0.137 as en Tatoeba.as.en 58.3 0.644 +as es JW300.as.es 1.2 0.156 ase sv JW300.ase.sv 39.7 0.576 as fi JW300.as.fi 1.1 0.167 as fr JW300.as.fr 1.4 0.154 as sv JW300.as.sv 1.0 0.148 ast en Tatoeba.ast.en 81.4 0.858 ay en JW300.ay.en 7.2 0.202 +ay es JW300.ay.es 11.3 0.265 ay fi JW300.ay.fi 6.5 0.222 ay fr JW300.ay.fr 6.4 0.203 ay sv JW300.ay.sv 6.8 0.212 ba en JW300.ba.en 2.8 0.144 ba en Tatoeba.ba.en 0.8 0.134 +ba es JW300.ba.es 2.0 0.141 ba fi JW300.ba.fi 1.7 0.164 ba fr JW300.ba.fr 3.1 0.150 bas en JW300.bas.en 5.8 0.207 +bas es JW300.bas.es 4.0 0.175 bas fi JW300.bas.fi 2.4 0.174 bas fr JW300.bas.fr 3.9 0.177 bas sv JW300.bas.sv 3.9 0.174 ba sv JW300.ba.sv 2.2 0.139 bbc en JW300.bbc.en 6.7 0.204 +bbc es JW300.bbc.es 4.4 0.178 bbc fr JW300.bbc.fr 4.4 0.172 bci en JW300.bci.en 13.9 0.269 +bci es JW300.bci.es 5.9 0.223 bci fi JW300.bci.fi 5.8 0.242 bci fr JW300.bci.fr 6.9 0.216 bci sv JW300.bci.sv 7.6 0.235 bcl en JW300.bcl.en 56.8 0.705 +bcl es JW300.bcl.es 37.0 0.551 bcl fi JW300.bcl.fi 33.3 0.573 bcl fr JW300.bcl.fr 35.0 0.527 bcl sv JW300.bcl.sv 38.0 0.565 bem en JW300.bem.en 33.4 0.491 +bem es JW300.bem.es 22.8 0.403 bem fi JW300.bem.fi 22.8 0.439 bem fr JW300.bem.fr 25.0 0.417 bem sv JW300.bem.sv 25.6 0.434 ber en Tatoeba.ber.en 37.3 0.566 +ber es Tatoeba.ber.es 33.8 0.487 ber fr Tatoeba.ber.fr 60.2 0.754 bfi en JW300.bfi.en 20.0 0.423 bg en Tatoeba.bg.en 59.4 0.727 +bg es GlobalVoices.bg.es 24.5 0.526 bg fi JW300.bg.fi 23.7 0.505 bg fr GlobalVoices.bg.fr 20.9 0.480 bg sv JW300.bg.sv 29.1 0.494 @@ -79,6 +94,7 @@ bi fi JW300.bi.fi 0.6 0.124 bi fr JW300.bi.fr 21.5 0.382 bi sv JW300.bi.sv 22.7 0.403 bn en Tatoeba.bn.en 49.8 0.644 +bn es GlobalVoices.bn.es 12.7 0.372 bn fi JW300.bn.fi 5.5 0.214 bn fr GlobalVoices.bn.fr 8.4 0.311 bn sv GlobalVoices.bn.sv 2.3 0.171 @@ -89,18 +105,22 @@ bs en Tatoeba.bs.en 64.9 0.784 bsn en bible-uedin.bsn.en 1.2 0.117 btx en JW300.btx.en 7.0 0.236 bum en JW300.bum.en 4.6 0.182 +bum es JW300.bum.es 3.2 0.162 bum fi JW300.bum.fi 2.2 0.161 bum fr JW300.bum.fr 4.0 0.173 bum sv JW300.bum.sv 3.5 0.163 bzs en JW300.bzs.en 44.5 0.605 +bzs es JW300.bzs.es 28.1 0.464 bzs fi JW300.bzs.fi 24.7 0.464 bzs fr JW300.bzs.fr 30.0 0.479 bzs sv JW300.bzs.sv 30.7 0.489 cab en JW300.cab.en 3.0 0.154 +cab es JW300.cab.es 5.1 0.225 cab fi JW300.cab.fi 1.7 0.150 cab fr JW300.cab.fr 3.1 0.153 cab sv JW300.cab.sv 2.6 0.152 ca en Tatoeba.ca.en 51.4 0.678 +ca es Tatoeba.ca.es 74.9 0.863 ca fr Tatoeba.ca.fr 50.4 0.672 cak en JW300.cak.en 2.6 0.140 cak fi JW300.cak.fi 0.6 0.109 @@ -113,20 +133,28 @@ cat fr JW300.cat.fr 3.5 0.163 cat sv JW300.cat.sv 2.5 0.154 ceb en JW300.ceb.en 52.6 0.670 ceb en Tatoeba.ceb.en 59.5 0.704 +ceb es JW300.ceb.es 31.6 0.508 ceb fi JW300.ceb.fi 27.4 0.525 ceb fr JW300.ceb.fr 30.0 0.491 ceb sv JW300.ceb.sv 35.5 0.552 chk en JW300.chk.en 31.2 0.465 +chk es JW300.chk.es 20.8 0.374 chk fi JW300.chk.fi 19.4 0.395 chk fr JW300.chk.fr 22.4 0.387 chk sv JW300.chk.sv 23.6 0.406 cjk en JW300.cjk.en 6.8 0.226 +cjk es JW300.cjk.es 3.8 0.169 cjk fr JW300.cjk.fr 4.3 0.174 +cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh de Tatoeba.cmn.de 33.1 0.530 +cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh fi bible-uedin.cmn.fi 21.6 0.497 +cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh sv Tatoeba.cmn.sv 46.6 0.620 cnh en JW300.cnh.en 6.9 0.240 +crp es bible-uedin.crp.es 2.8 0.187 crp fi bible-uedin.crp.fi 2.0 0.181 crp fr bible-uedin.crp.fr 2.9 0.190 crp sv bible-uedin.crp.sv 3.1 0.184 crs en JW300.crs.en 42.9 0.589 +crs es JW300.crs.es 26.1 0.445 crs fi JW300.crs.fi 25.6 0.479 crs fr JW300.crs.fr 29.4 0.475 crs sv JW300.crs.sv 29.3 0.480 @@ -140,14 +168,17 @@ cs en Tatoeba.cs.en 58.0 0.721 cs fi JW300.cs.fi 25.5 0.523 cs fr GlobalVoices.cs.fr 21.0 0.488 csg en JW300.csg.en 4.6 0.183 +csg es JW300.csg.es 93.1 0.952 csg fi JW300.csg.fi 2.3 0.160 csg fr JW300.csg.fr 4.7 0.184 csg sv JW300.csg.sv 4.5 0.176 csl en JW300.csl.en 4.1 0.162 +csl es JW300.csl.es 3.1 0.141 csl fi JW300.csl.fi 2.5 0.152 csl fr JW300.csl.fr 3.0 0.156 csl sv JW300.csl.sv 3.3 0.142 csn en JW300.csn.en 3.8 0.172 +csn es JW300.csn.es 87.4 0.899 csn fi JW300.csn.fi 2.0 0.162 csn fr JW300.csn.fr 3.5 0.164 csn sv JW300.csn.sv 3.8 0.173 @@ -156,17 +187,24 @@ ctu en JW300.ctu.en 2.9 0.157 ctu fr JW300.ctu.fr 3.3 0.166 cv en JW300.cv.en 2.6 0.151 cv en Tatoeba.cv.en 0.3 0.102 +cv es JW300.cv.es 2.0 0.152 cv fi JW300.cv.fi 1.2 0.148 cv fr JW300.cv.fr 2.6 0.154 cv sv JW300.cv.sv 2.1 0.144 cy en Tatoeba.cy.en 33.0 0.525 +cy es JW300.cy.es 0.0 0.025 cy fi JW300.cy.fi 0.3 0.067 cy fr JW300.cy.fr 8.7 0.266 cy sv JW300.cy.sv 6.6 0.218 da en Tatoeba.da.en 63.6 0.769 +da es Tatoeba.da.es 53.7 0.715 da fi Tatoeba.da.fi 39.0 0.629 da+fo+is+no+nb+nn+sv da+fo+is+no+nb+nn+sv Tatoeba.da.sv 69.2 0.811 da fr Tatoeba.da.fr 62.2 0.751 +de az_IR+az JW300.de.az 13.4 0.342 +de be_tarask+be Tatoeba.de.be 3.1 0.106 +de cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh bible-uedin.de.zh 21.9 0.293 +de de Tatoeba.de.de 40.8 0.617 de en newssyscomb2009.de.en 28.6 0.553 de en news-test2008.de.en 27.6 0.547 de en newstest2009.de.en 26.9 0.544 @@ -181,6 +219,9 @@ de en newstest2017-ende.de.en 35.6 0.609 de en newstest2018-ende.de.en 43.8 0.667 de en newstest2019-deen.de.en 39.6 0.637 de en Tatoeba.de.en 55.1 0.704 +de es Tatoeba.de.es 48.5 0.676 +de fi goethe-institute-test1.de.fi 18.3 0.493 +de fi goethe-institute-test2.de.fi 18.0 0.494 de fi Tatoeba.de.fi 40.0 0.628 de fr euelections_dev2019.transformer-align.de 32.2 0.590 de fr newssyscomb2009.de.fr 26.8 0.553 @@ -192,18 +233,23 @@ de fr newstest2012.de.fr 27.7 0.554 de fr newstest2013.de.fr 29.5 0.560 de fr newstest2019-defr.de.fr 36.6 0.625 de fr Tatoeba.de.fr 49.2 0.664 +de+nl+fy+af+da+fo+is+no+nb+nn+sv de+nl+fy+af+da+fo+is+no+nb+nn+sv Tatoeba.de.sv 48.1 0.663 +de pt_br+pt_BR+pt_PT+pt Tatoeba.de.pt 35.2 0.577 dhv en JW300.dhv.en 4.7 0.190 dhv fi JW300.dhv.fi 1.2 0.149 dhv fr JW300.dhv.fr 3.2 0.157 dhv sv JW300.dhv.sv 2.5 0.160 dje en bible-uedin.dje.en 4.4 0.228 +dje es bible-uedin.dje.es 3.8 0.215 dje fi bible-uedin.dje.fi 3.0 0.211 dje fr bible-uedin.dje.fr 4.1 0.211 dje sv bible-uedin.dje.sv 4.4 0.214 djk en JW300.djk.en 4.3 0.177 djk fr JW300.djk.fr 3.0 0.156 +ecs es JW300.ecs.es 16.2 0.423 ee en JW300.ee.en 39.3 0.556 ee en Tatoeba.ee.en 21.2 0.569 +ee es JW300.ee.es 26.4 0.449 ee fi JW300.ee.fi 25.0 0.482 ee fr JW300.ee.fr 27.1 0.450 ee sv JW300.ee.sv 28.9 0.472 @@ -485,7 +531,15 @@ en ro newsdev2016-enro.en.ro 30.8 0.592 en ro newstest2016-enro.en.ro 28.8 0.571 en ro Tatoeba.en.ro 45.3 0.670 en rsl JW300.en.rsl 3.2 0.156 +en ru newstest2012.en.ru 23.7 0.436 +en ru newstest2013.en.ru 19.4 0.413 +en ru newstest2015-enru.en.ru 21.6 0.440 +en ru newstest2016-enru.en.ru 21.0 0.432 +en ru newstest2017-enru.en.ru 22.7 0.443 +en ru newstest2018-enru.en.ru 19.2 0.413 +en ru newstest2019-enru.en.ru 22.3 0.412 en run JW300.en.run 34.2 0.591 +en ru Tatoeba.en.ru 46.9 0.656 en rw JW300.en.rw 33.3 0.569 en rw Tatoeba.en.rw 13.8 0.503 en seh JW300.en.seh 5.6 0.260 @@ -538,10 +592,54 @@ en wal JW300.en.wal 20.2 0.471 en war Tatoeba.en.war 9.1 0.382 en xh JW300.en.xh 37.9 0.652 eo en Tatoeba.eo.en 54.8 0.694 +eo es Tatoeba.eo.es 44.2 0.631 eo fi Tatoeba.eo.fi 13.9 0.325 eo fr Tatoeba.eo.fr 50.9 0.675 eo sv Tatoeba.eo.sv 25.7 0.439 +es ach JW300.es.ach 3.9 0.175 +es ada JW300.es.ada 7.1 0.206 +es aed JW300.es.aed 89.2 0.915 +es af JW300.es.af 36.6 0.564 +es am GlobalVoices.es.am 0.7 0.125 +es as JW300.es.as 0.6 0.095 +es ay JW300.es.ay 9.8 0.325 +es az_IR+az JW300.es.az 12.6 0.319 +es ba JW300.es.ba 1.8 0.129 +es bas JW300.es.bas 4.5 0.167 +es bbc JW300.es.bbc 5.4 0.202 +es bci JW300.es.bci 6.7 0.213 +es bcl JW300.es.bcl 37.1 0.586 +es bem JW300.es.bem 19.9 0.440 +es ber Tatoeba.es.ber 21.8 0.444 +es bg GlobalVoices.es.bg 21.0 0.493 +es bn GlobalVoices.es.bn 5.5 0.327 +es bum JW300.es.bum 5.2 0.186 +es bzs JW300.es.bzs 26.4 0.451 +es cab JW300.es.cab 5.4 0.275 +es ca Tatoeba.es.ca 68.9 0.832 +es ceb JW300.es.ceb 33.9 0.564 +es chk JW300.es.chk 17.1 0.378 +es cjk JW300.es.cjk 3.5 0.214 +es crp bible-uedin.es.crp 4.5 0.202 +es crs JW300.es.crs 26.4 0.453 +es csg JW300.es.csg 91.2 0.937 +es csl JW300.es.csl 2.7 0.052 +es csn JW300.es.csn 87.8 0.901 +es cs Tatoeba.es.cs 46.4 0.655 +es cv JW300.es.cv 1.8 0.136 +es cy JW300.es.cy 5.2 0.211 +es da Tatoeba.es.da 55.7 0.712 +es de Tatoeba.es.de 50.0 0.683 +es dje bible-uedin.es.dje 4.7 0.227 +es ecs JW300.es.ecs 17.2 0.426 +es ee JW300.es.ee 25.6 0.470 +es eo Tatoeba.es.eo 44.7 0.657 +es et JW300.es.et 20.7 0.466 +es eu Tatoeba.es.eu 36.9 0.637 +es fa GlobalVoices.es.fa 5.9 0.282 +es fil GlobalVoices.es.fil 0.2 0.108 es fi Tatoeba.es.fi 43.5 0.662 +es fon JW300.es.fon 5.3 0.163 es fr newssyscomb2009.es.fr 33.6 0.610 es fr news-test2008.es.fr 32.0 0.585 es fr newstest2009.es.fr 32.5 0.590 @@ -550,17 +648,211 @@ es fr newstest2011.es.fr 33.9 0.607 es fr newstest2012.es.fr 32.4 0.602 es fr newstest2013.es.fr 32.1 0.593 es fr Tatoeba.es.fr 58.4 0.731 +es fse JW300.es.fse 2.7 0.171 +es gaa JW300.es.gaa 27.8 0.479 +es gd bible-uedin.es.gd 0.0 0.095 +es gil JW300.es.gil 23.8 0.470 +es gl Tatoeba.es.gl 63.8 0.778 +es gug JW300.es.gug 14.0 0.359 +es gu JW300.es.gu 2.4 0.124 +es guw JW300.es.guw 28.6 0.480 +es gv bible-uedin.es.gv 0.0 0.034 +es gym JW300.es.gym 5.5 0.267 +es ha JW300.es.ha 20.6 0.421 +es he Tatoeba.es.he 44.3 0.642 +es hi GlobalVoices.es.hi 0.7 0.125 +es hmn JW300.es.hmn 4.6 0.195 +es ho JW300.es.ho 22.8 0.463 +es hr JW300.es.hr 21.7 0.459 +es ht JW300.es.ht 23.3 0.407 +es hy JW300.es.hy 4.2 0.157 +es ia Tatoeba.es.ia 0.1 0.089 +es iba JW300.es.iba 5.3 0.218 +es ibg JW300.es.ibg 5.0 0.206 +es id GlobalVoices.es.id 21.1 0.516 +es ig JW300.es.ig 27.0 0.434 +es ilo JW300.es.ilo 31.0 0.544 +es ise JW300.es.ise 3.5 0.181 +es is JW300.es.is 22.3 0.418 +es iso JW300.es.iso 22.4 0.396 +es jap bible-uedin.es.jap 3.9 0.121 +es ja Tatoeba.es.ja 1.1 0.352 +es jsl JW300.es.jsl 3.5 0.071 +es jv JW300.es.jv 5.1 0.199 +es ka JW300.es.ka 10.8 0.279 +es kam JW300.es.kam 2.6 0.167 +es kbp JW300.es.kbp 4.3 0.164 +es kek JW300.es.kek 0.0 0.010 +es kg JW300.es.kg 25.6 0.488 +es ki JW300.es.ki 2.7 0.168 +es kj JW300.es.kj 3.8 0.210 +es kl JW300.es.kl 7.3 0.330 +es kmb JW300.es.kmb 4.5 0.198 +es km Tatoeba.es.km 0.1 0.072 +es kn JW300.es.kn 3.1 0.123 +es koo JW300.es.koo 2.9 0.193 +es ko Tatoeba.es.ko 5.8 0.274 +es kqn JW300.es.kqn 19.9 0.437 +es kri JW300.es.kri 6.3 0.194 +es kss JW300.es.kss 3.7 0.178 +es ksw JW300.es.ksw 3.9 0.170 +es kvk JW300.es.kvk 0.8 0.070 +es kwn JW300.es.kwn 10.0 0.293 +es kwy JW300.es.kwy 16.9 0.375 +es ky JW300.es.ky 10.8 0.274 +es la Tatoeba.es.la 0.2 0.092 +es lg JW300.es.lg 19.5 0.438 +es ln JW300.es.ln 27.1 0.508 +es lo JW300.es.lo 3.0 0.134 +es loz JW300.es.loz 28.6 0.493 +es lt JW300.es.lt 17.9 0.423 +es lua JW300.es.lua 23.4 0.473 +es lue JW300.es.lue 20.0 0.472 +es lu JW300.es.lu 20.0 0.432 +es lun JW300.es.lun 12.1 0.354 +es luo JW300.es.luo 13.5 0.330 +es lus JW300.es.lus 20.9 0.414 +es lv JW300.es.lv 17.2 0.398 +es mam JW300.es.mam 5.1 0.249 +es mau JW300.es.mau 5.3 0.254 +es mco JW300.es.mco 3.2 0.210 +es mfe JW300.es.mfe 19.5 0.391 +es mfs JW300.es.mfs 88.6 0.907 +es mg GlobalVoices.es.mg 18.3 0.532 +es mgr JW300.es.mgr 2.7 0.194 +es mh JW300.es.mh 19.5 0.388 +es mi bible-uedin.es.mi 6.5 0.271 +es mk GlobalVoices.es.mk 24.1 0.526 +es ml JW300.es.ml 5.4 0.214 +es mn JW300.es.mn 5.9 0.200 +es mos JW300.es.mos 17.8 0.323 +es mr JW300.es.mr 1.9 0.118 +es mt JW300.es.mt 28.1 0.460 +es my GlobalVoices.es.my 0.1 0.082 +es nb_NO+nb+nn_NO+nn+nog+no_nb+no JW300.es.no 31.6 0.523 +es nch JW300.es.nch 5.4 0.275 +es ncj JW300.es.ncj 5.2 0.280 +es ncx JW300.es.ncx 7.6 0.297 +es ndc JW300.es.ndc 2.7 0.191 +es nd JW300.es.nd 1.8 0.179 +es nds_nl+nds Tatoeba.es.nds 4.2 0.228 +es ne JW300.es.ne 3.7 0.156 +es ng JW300.es.ng 3.6 0.213 +es ngl JW300.es.ngl 2.3 0.186 +es ngu JW300.es.ngu 5.0 0.274 +es nia JW300.es.nia 4.4 0.202 +es niu JW300.es.niu 29.9 0.506 +es nl Tatoeba.es.nl 50.4 0.681 +es nr JW300.es.nr 1.8 0.156 +es nso JW300.es.nso 33.2 0.531 +es nya JW300.es.nya 2.8 0.186 +es ny JW300.es.ny 22.9 0.494 +es nyk JW300.es.nyk 2.1 0.188 +es nyn JW300.es.nyn 2.7 0.201 +es nzi JW300.es.nzi 3.0 0.183 +es oke JW300.es.oke 5.6 0.208 +es om JW300.es.om 9.6 0.320 +es os JW300.es.os 10.6 0.284 +es pag JW300.es.pag 25.3 0.478 +es pa JW300.es.pa 5.8 0.179 +es pap JW300.es.pap 28.2 0.486 +es pck bible-uedin.es.pck 3.8 0.216 +es pcm JW300.es.pcm 4.7 0.181 +es pes bible-uedin.es.pes 3.4 0.206 +es pis JW300.es.pis 27.1 0.484 +es pl Tatoeba.es.pl 44.0 0.646 +es plt bible-uedin.es.plt 3.1 0.268 +es pon JW300.es.pon 21.6 0.448 +es prl JW300.es.prl 92.2 0.950 +es pso JW300.es.pso 3.6 0.151 +es que JW300.es.que 11.8 0.377 +es qu JW300.es.qu 12.1 0.339 +es quy JW300.es.quy 13.6 0.399 +es quz JW300.es.quz 12.7 0.353 +es qvi JW300.es.qvi 1.9 0.203 +es rar JW300.es.rar 6.6 0.255 +es rnd JW300.es.rnd 14.6 0.347 +es ro Tatoeba.es.ro 45.4 0.663 +es rsl JW300.es.rsl 3.0 0.140 +es run JW300.es.run 20.0 0.456 +es ru Tatoeba.es.ru 43.8 0.631 +es rw JW300.es.rw 22.6 0.472 +es seh JW300.es.seh 3.1 0.191 +es sg JW300.es.sg 24.8 0.435 +es sid JW300.es.sid 2.1 0.176 +es si JW300.es.si 6.7 0.189 +es sl JW300.es.sl 19.4 0.438 +es sm JW300.es.sm 25.5 0.450 +es sn JW300.es.sn 23.6 0.528 +es so bible-uedin.es.so 1.7 0.192 +es sop JW300.es.sop 2.5 0.201 +es sq GlobalVoices.es.sq 19.1 0.478 +es srm JW300.es.srm 6.7 0.214 +es srn JW300.es.srn 28.7 0.487 +es ss JW300.es.ss 9.1 0.304 +es ssp JW300.es.ssp 17.1 0.435 +es st JW300.es.st 35.5 0.556 +es swc JW300.es.swc 26.0 0.490 +es sw GlobalVoices.es.sw 16.9 0.455 +es ta_LK+ta JW300.es.ta 1.7 0.119 +es tcf JW300.es.tcf 7.6 0.296 +es tdt JW300.es.tdt 12.5 0.299 +es te JW300.es.te 1.8 0.131 +es tg_TJ+tg JW300.es.tg 7.1 0.212 +es th JW300.es.th 17.0 0.334 +es ti JW300.es.ti 14.1 0.251 +es tiv JW300.es.tiv 19.0 0.368 +es tk JW300.es.tk 8.1 0.244 +es tll JW300.es.tll 20.7 0.434 +es tl_PH+tl Tatoeba.es.tl 25.5 0.543 +es tn JW300.es.tn 32.2 0.528 +es tog JW300.es.tog 3.1 0.205 +es toj JW300.es.toj 9.3 0.305 +es to JW300.es.to 35.7 0.510 +es top JW300.es.top 4.1 0.256 +es tpi JW300.es.tpi 27.0 0.472 +es tsc JW300.es.tsc 3.9 0.192 +es tt JW300.es.tt 10.7 0.281 +es tum JW300.es.tum 19.3 0.430 +es tvl JW300.es.tvl 28.3 0.464 +es tw JW300.es.tw 26.3 0.465 +es ty JW300.es.ty 37.3 0.544 +es tzh JW300.es.tzh 9.7 0.294 +es tzo JW300.es.tzo 22.6 0.469 +es uk Tatoeba.es.uk 38.6 0.573 +es umb JW300.es.umb 17.5 0.397 +es urh JW300.es.urh 4.7 0.188 +es ur_PK+ur GlobalVoices.es.ur 0.4 0.129 +es ve JW300.es.ve 21.7 0.440 +es vi_VN+vi Tatoeba.es.vi 31.1 0.486 +es vmw JW300.es.vmw 2.0 0.165 +es vsl JW300.es.vsl 18.0 0.441 +es wal JW300.es.wal 7.9 0.288 +es war JW300.es.war 31.7 0.530 +es wes JW300.es.wes 4.9 0.189 +es wls JW300.es.wls 22.9 0.437 +es xh JW300.es.xh 25.0 0.541 +es yao JW300.es.yao 3.0 0.182 +es yo JW300.es.yo 22.3 0.387 +es yua JW300.es.yua 23.6 0.471 +es zai JW300.es.zai 20.8 0.426 +es zlm JW300.es.zlm 3.8 0.200 +es zne JW300.es.zne 19.7 0.427 +es zul+zu JW300.es.zu 26.0 0.557 et en newsdev2018-enet.et.en 30.1 0.574 et en newstest2018-enet.et.en 30.3 0.581 et en Tatoeba.et.en 59.9 0.738 +et es JW300.et.es 27.2 0.490 et fi JW300.et.fi 26.6 0.546 et fr JW300.et.fr 26.2 0.484 et sv JW300.et.sv 28.9 0.513 eu en Tatoeba.eu.en 46.1 0.638 +eu es Tatoeba.eu.es 48.8 0.672 eu fi bible-uedin.eu.fi 0.6 0.121 eu fr bible-uedin.eu.fr 3.4 0.236 eu sv bible-uedin.eu.sv 0.6 0.121 fa en GlobalVoices.fa.en 14.3 0.399 +fa es GlobalVoices.fa.es 12.6 0.369 fa fi JW300.fa.fi 18.0 0.405 fa fr GlobalVoices.fa.fr 10.6 0.341 fa sv JW300.fa.sv 18.3 0.353 @@ -573,6 +865,7 @@ fi am JW300.fi.am 14.8 0.295 fi ar JW300.fi.ar 8.0 0.225 fi ase JW300.fi.ase 36.6 0.553 fi as JW300.fi.as 0.9 0.095 +fi az_IR+az JW300.fi.az 16.8 0.385 fi ba JW300.fi.ba 2.1 0.134 fi bas JW300.fi.bas 4.6 0.182 fi bci JW300.fi.bci 10.9 0.266 @@ -588,6 +881,7 @@ fi cak JW300.fi.cak 1.8 0.154 fi cat JW300.fi.cat 3.1 0.160 fi ceb JW300.fi.ceb 35.1 0.581 fi chk JW300.fi.chk 17.8 0.394 +fi cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh bible-uedin.fi.zh 23.4 0.326 fi crp bible-uedin.fi.crp 3.6 0.188 fi crs JW300.fi.crs 29.6 0.491 fi csg JW300.fi.csg 3.6 0.162 @@ -657,6 +951,7 @@ fi koo JW300.fi.koo 2.5 0.199 fi kqn JW300.fi.kqn 22.3 0.476 fi kwn JW300.fi.kwn 2.9 0.197 fil en GlobalVoices.fil.en 0.3 0.099 +fil es GlobalVoices.fil.es 0.2 0.085 fil fr GlobalVoices.fil.fr 0.2 0.093 fi lt JW300.fi.lt 18.6 0.439 fi lua JW300.fi.lua 26.6 0.505 @@ -672,6 +967,7 @@ fi mgr JW300.fi.mgr 2.6 0.197 fi mi bible-uedin.fi.mi 5.6 0.238 fi mt JW300.fi.mt 29.9 0.490 fi my JW300.fi.my 1.6 0.145 +fi nb_NO+nb+nn_NO+nn+nog+no_nb+no JW300.fi.no 34.2 0.545 fi ncj JW300.fi.ncj 2.5 0.199 fi nd JW300.fi.nd 1.8 0.188 fi ne JW300.fi.ne 3.3 0.161 @@ -688,6 +984,7 @@ fi nzi JW300.fi.nzi 3.5 0.183 fi oke JW300.fi.oke 5.3 0.213 fi pcm JW300.fi.pcm 4.3 0.179 fi pon JW300.fi.pon 23.7 0.475 +fi pt_br+pt_BR+pt_PT+pt JW300.fi.pt 30.5 0.531 fi ro JW300.fi.ro 27.0 0.490 fi rsl JW300.fi.rsl 3.2 0.151 fi run JW300.fi.run 23.2 0.498 @@ -702,8 +999,9 @@ fi sq JW300.fi.sq 32.0 0.535 fi srn JW300.fi.srn 29.2 0.491 fi ssp JW300.fi.ssp 3.9 0.176 fi sv fiskmo_testset.fi.sv 27.0 0.599 -fi sv Tatoeba.fi.sv 54.8 0.705 +fi sv Tatoeba.fi.sv 55.0 0.706 fi sw JW300.fi.sw 29.9 0.548 +fi tg_TJ+tg JW300.fi.tg 9.7 0.260 fi ti JW300.fi.ti 16.6 0.303 fi tn JW300.fi.tn 34.5 0.555 fi tog JW300.fi.tog 3.2 0.216 @@ -713,7 +1011,9 @@ fi tvl JW300.fi.tvl 33.6 0.517 fi uk JW300.fi.uk 23.3 0.445 fi umb JW300.fi.umb 19.8 0.426 fi urh JW300.fi.urh 4.5 0.192 +fi ur_PK+ur JW300.fi.ur 11.2 0.261 fi ve JW300.fi.ve 26.0 0.495 +fi vi_VN+vi JW300.fi.vi 38.8 0.520 fi vmw JW300.fi.vmw 2.2 0.183 fi vsl JW300.fi.vsl 4.5 0.178 fi wal JW300.fi.wal 10.1 0.343 @@ -723,11 +1023,13 @@ fi yap JW300.fi.yap 25.4 0.445 fi yua JW300.fi.yua 12.5 0.341 fi zai JW300.fi.zai 5.9 0.229 fi zne JW300.fi.zne 22.7 0.464 +fi zul+zu JW300.fi.zu 28.1 0.584 fj en JW300.fj.en 31.0 0.471 fj en Tatoeba.fj.en 79.7 0.835 fj fi JW300.fj.fi 20.1 0.421 fj fr JW300.fj.fr 24.0 0.407 fon en JW300.fon.en 4.7 0.204 +fon es JW300.fon.es 3.1 0.165 fon fi JW300.fon.fi 2.2 0.167 fon fr JW300.fon.fr 3.7 0.168 fon sv JW300.fon.sv 2.9 0.160 @@ -740,6 +1042,7 @@ fr am GlobalVoices.fr.am 0.5 0.108 fr ase JW300.fr.ase 38.5 0.545 fr as JW300.fr.as 1.5 0.127 fr ay JW300.fr.ay 6.4 0.256 +fr az_IR+az JW300.fr.az 14.5 0.355 fr ba JW300.fr.ba 2.1 0.144 fr bas JW300.fr.bas 4.8 0.181 fr bbc JW300.fr.bbc 5.0 0.198 @@ -747,7 +1050,7 @@ fr bci JW300.fr.bci 10.4 0.255 fr bcl JW300.fr.bcl 35.9 0.566 fr bem JW300.fr.bem 22.8 0.456 fr ber Tatoeba.fr.ber 37.2 0.641 -fr bg GlobalVoices.fr.bg 18.8 0.461 +fr bg GlobalVoices.fr.bg 18.7 0.459 fr bi JW300.fr.bi 28.4 0.464 fr bn GlobalVoices.fr.bn 3.5 0.266 fr br Tatoeba.fr.br 3.5 0.201 @@ -763,7 +1066,7 @@ fr cjk JW300.fr.cjk 3.1 0.214 fr crp bible-uedin.fr.crp 4.2 0.201 fr crs JW300.fr.crs 31.6 0.492 fr csg JW300.fr.csg 4.0 0.177 -fr cs GlobalVoices.fr.cs 15.9 0.425 +fr cs GlobalVoices.fr.cs 16.1 0.426 fr csl JW300.fr.csl 1.8 0.045 fr cs newssyscomb2009.fr.cs 19.3 0.476 fr cs news-test2008.fr.cs 17.9 0.461 @@ -791,7 +1094,7 @@ fr dje bible-uedin.fr.dje 4.6 0.224 fr djk JW300.fr.djk 4.4 0.187 fr ee JW300.fr.ee 26.3 0.466 fr efi JW300.fr.efi 26.9 0.462 -fr el Tatoeba.fr.el 54.7 0.709 +fr el Tatoeba.fr.el 56.2 0.719 fr eo Tatoeba.fr.eo 52.0 0.695 fr es newssyscomb2009.fr.es 34.3 0.601 fr es news-test2008.fr.es 32.5 0.583 @@ -801,7 +1104,7 @@ fr es newstest2011.fr.es 38.3 0.622 fr es newstest2012.fr.es 38.1 0.619 fr es newstest2013.fr.es 34.0 0.587 fr es Tatoeba.fr.es 53.2 0.709 -fr et JW300.fr.et 18.8 0.447 +fr et JW300.fr.et 19.4 0.452 fr eu bible-uedin.fr.eu 0.9 0.249 fr fa GlobalVoices.fr.fa 5.5 0.276 fr fil GlobalVoices.fr.fil 0.3 0.117 @@ -822,7 +1125,7 @@ fr hi GlobalVoices.fr.hi 2.5 0.161 fr hil JW300.fr.hil 34.7 0.559 fr hmn JW300.fr.hmn 5.1 0.212 fr ho JW300.fr.ho 25.4 0.480 -fr hr JW300.fr.hr 19.9 0.436 +fr hr JW300.fr.hr 20.7 0.442 fr ht JW300.fr.ht 29.2 0.461 fr hy JW300.fr.hy 7.8 0.225 fr ia Tatoeba.fr.ia 0.2 0.092 @@ -833,7 +1136,7 @@ fr ig JW300.fr.ig 29.0 0.445 fr ilo JW300.fr.ilo 30.6 0.528 fr iso JW300.fr.iso 26.7 0.429 fr jap bible-uedin.fr.jap 3.7 0.113 -fr ja Tatoeba.fr.ja 1.1 0.305 +fr ja Tatoeba.fr.ja 1.0 0.316 fr jp GlobalVoices.fr.jp 0.1 0.021 fr jsl JW300.fr.jsl 4.6 0.082 fr jv JW300.fr.jv 5.3 0.211 @@ -849,7 +1152,7 @@ fr kl JW300.fr.kl 13.8 0.408 fr kmb JW300.fr.kmb 4.5 0.212 fr km GlobalVoices.fr.km 0.0 0.002 fr kn JW300.fr.kn 5.3 0.159 -fr ko GlobalVoices.fr.ko 2.1 0.152 +fr ko GlobalVoices.fr.ko 2.2 0.154 fr koo JW300.fr.koo 2.6 0.209 fr kqn JW300.fr.kqn 23.3 0.469 fr kri JW300.fr.kri 6.7 0.199 @@ -864,12 +1167,14 @@ fr lg JW300.fr.lg 21.7 0.454 fr ln JW300.fr.ln 30.5 0.527 fr lo JW300.fr.lo 3.2 0.140 fr loz JW300.fr.loz 30.0 0.498 +fr lt JW300.fr.lt 17.2 0.406 fr lua JW300.fr.lua 27.3 0.496 fr lue JW300.fr.lue 23.1 0.485 fr lu JW300.fr.lu 25.5 0.471 fr lun JW300.fr.lun 18.1 0.432 fr luo JW300.fr.luo 17.7 0.380 fr lus JW300.fr.lus 25.5 0.455 +fr lv JW300.fr.lv 18.2 0.402 fr mam JW300.fr.mam 4.1 0.222 fr mau JW300.fr.mau 3.7 0.215 fr mco JW300.fr.mco 2.9 0.194 @@ -879,44 +1184,134 @@ fr mg GlobalVoices.fr.mg 15.7 0.501 fr mgr JW300.fr.mgr 2.9 0.199 fr mh JW300.fr.mh 21.7 0.399 fr mi bible-uedin.fr.mi 7.3 0.289 -fr mk GlobalVoices.fr.mk 18.9 0.471 +fr mk GlobalVoices.fr.mk 19.1 0.473 fr ml JW300.fr.ml 9.4 0.266 fr mn JW300.fr.mn 9.9 0.245 fr mos JW300.fr.mos 21.1 0.353 fr mr JW300.fr.mr 2.2 0.108 -fr mt JW300.fr.mt 25.7 0.442 +fr mt JW300.fr.mt 28.7 0.466 fr my GlobalVoices.fr.my 0.0 0.076 +fr nb_NO+nb+nn_NO+nn+nog+no_nb+no JW300.fr.no 29.7 0.494 fr nch JW300.fr.nch 3.6 0.221 fr ncj JW300.fr.ncj 2.7 0.200 fr ncx JW300.fr.ncx 3.3 0.214 fr ndc JW300.fr.ndc 2.6 0.200 fr nd JW300.fr.nd 1.9 0.185 +fr nds_nl+nds Tatoeba.fr.nds 3.0 0.212 fr ne JW300.fr.ne 3.7 0.157 fr ng JW300.fr.ng 2.8 0.194 fr ngl JW300.fr.ngl 2.2 0.196 fr ngu JW300.fr.ngu 3.6 0.220 fr nia JW300.fr.nia 4.8 0.212 fr niu JW300.fr.niu 34.5 0.537 +fr nso JW300.fr.nso 33.3 0.527 +fr nyk JW300.fr.nyk 14.8 0.366 +fr nyn JW300.fr.nyn 3.0 0.211 +fr nzi JW300.fr.nzi 3.3 0.183 +fr oc Tatoeba.fr.oc 0.1 0.064 +fr oke JW300.fr.oke 6.2 0.212 +fr om JW300.fr.om 14.4 0.393 +fr os JW300.fr.os 13.2 0.327 +fr pa JW300.fr.pa 9.1 0.211 +fr pap JW300.fr.pap 27.8 0.464 +fr pck bible-uedin.fr.pck 3.6 0.208 +fr pcm JW300.fr.pcm 4.3 0.183 +fr pis JW300.fr.pis 29.0 0.486 +fr pl Tatoeba.fr.pl 40.7 0.625 +fr plt bible-uedin.fr.plt 2.8 0.253 +fr pon JW300.fr.pon 23.9 0.458 +fr prl JW300.fr.prl 4.0 0.182 +fr pso JW300.fr.pso 3.3 0.154 +fr pt_br+pt_BR+pt_PT+pt Tatoeba.fr.pt 42.8 0.640 +fr qu JW300.fr.qu 7.6 0.261 +fr quy JW300.fr.quy 8.4 0.287 +fr qvi JW300.fr.qvi 1.7 0.179 +fr rar JW300.fr.rar 6.2 0.250 +fr rnd JW300.fr.rnd 21.8 0.431 +fr ro Tatoeba.fr.ro 42.1 0.640 +fr rsl JW300.fr.rsl 2.9 0.147 +fr rw JW300.fr.rw 25.5 0.483 +fr seh JW300.fr.seh 3.2 0.205 +fr sid JW300.fr.sid 2.3 0.186 +fr si JW300.fr.si 10.2 0.228 +fr sk JW300.fr.sk 24.9 0.456 +fr sl JW300.fr.sl 20.1 0.433 +fr sm JW300.fr.sm 28.8 0.474 +fr sn JW300.fr.sn 23.4 0.507 +fr so bible-uedin.fr.so 1.6 0.187 +fr sop JW300.fr.sop 2.8 0.211 +fr sq GlobalVoices.fr.sq 15.9 0.432 +fr srm JW300.fr.srm 7.7 0.225 +fr srn JW300.fr.srn 27.4 0.459 +fr ssp JW300.fr.ssp 4.1 0.178 +fr st JW300.fr.st 34.6 0.540 +fr swc JW300.fr.swc 28.2 0.499 +fr sw GlobalVoices.fr.sw 12.0 0.404 +fr ta_LK+ta JW300.fr.ta 2.5 0.116 +fr tcf JW300.fr.tcf 3.3 0.216 +fr tdt JW300.fr.tdt 13.9 0.314 +fr te JW300.fr.te 3.6 0.166 +fr tg_TJ+tg JW300.fr.tg 8.9 0.242 +fr th JW300.fr.th 19.4 0.342 +fr tiv JW300.fr.tiv 23.5 0.406 +fr tk JW300.fr.tk 10.5 0.275 +fr tll JW300.fr.tll 24.6 0.467 +fr tl_PH+tl Tatoeba.fr.tl 23.7 0.537 +fr tn JW300.fr.tn 33.1 0.525 +fr tog JW300.fr.tog 3.0 0.209 +fr toj JW300.fr.toj 4.1 0.205 +fr to JW300.fr.to 37.0 0.518 +fr top JW300.fr.top 2.3 0.172 +fr tsc JW300.fr.tsc 4.5 0.226 +fr ts JW300.fr.ts 31.4 0.525 +fr tt JW300.fr.tt 13.3 0.336 +fr tum JW300.fr.tum 23.0 0.458 +fr tvl JW300.fr.tvl 32.6 0.497 +fr tw JW300.fr.tw 27.9 0.469 +fr ty JW300.fr.ty 39.6 0.561 +fr tzo JW300.fr.tzo 9.5 0.299 +fr ug Tatoeba.fr.ug 0.0 0.084 +fr uk Tatoeba.fr.uk 39.4 0.581 +fr urh JW300.fr.urh 4.6 0.194 +fr ur_PK+ur GlobalVoices.fr.ur 0.2 0.117 +fr vi_VN+vi Tatoeba.fr.vi 33.0 0.508 +fr vmw JW300.fr.vmw 2.1 0.184 +fr vsl JW300.fr.vsl 4.6 0.189 +fr war JW300.fr.war 33.7 0.538 +fr wes JW300.fr.wes 5.0 0.192 +fr wls JW300.fr.wls 27.5 0.478 +fr xh JW300.fr.xh 25.1 0.523 +fr yao JW300.fr.yao 2.5 0.188 +fr yap JW300.fr.yap 25.8 0.434 +fr yo JW300.fr.yo 25.9 0.415 +fr zlm JW300.fr.zlm 3.8 0.203 +fr zne JW300.fr.zne 24.1 0.460 +fr zul+zu JW300.fr.zu 27.2 0.548 fse en JW300.fse.en 5.1 0.199 +fse es JW300.fse.es 4.1 0.173 fse fi JW300.fse.fi 90.2 0.943 fse fr JW300.fse.fr 4.9 0.186 fse sv JW300.fse.sv 5.4 0.194 fur en Tatoeba.fur.en 93.4 0.944 fy en Tatoeba.fy.en 0.2 0.053 gaa en JW300.gaa.en 41.0 0.567 +gaa es JW300.gaa.es 28.6 0.463 gaa fi JW300.gaa.fi 26.4 0.498 gaa fr JW300.gaa.fr 27.8 0.455 gaa sv JW300.gaa.sv 30.1 0.489 ga en Tatoeba.ga.en 50.1 0.671 gd en Tatoeba.gd.en 0.4 0.104 +gd es bible-uedin.gd.es 0.0 0.075 gd fi bible-uedin.gd.fi 0.1 0.044 gd fr bible-uedin.gd.fr 0.0 0.104 gd sv bible-uedin.gd.sv 0.0 0.037 gil en JW300.gil.en 36.0 0.522 +gil es JW300.gil.es 21.8 0.398 gil fi JW300.gil.fi 23.1 0.447 gil fr JW300.gil.fr 24.9 0.424 gil sv JW300.gil.sv 25.8 0.441 gl en Tatoeba.gl.en 42.5 0.604 +gl es Tatoeba.gl.es 68.7 0.802 guc en JW300.guc.en 2.4 0.132 guc fi JW300.guc.fi 1.1 0.125 guc fr JW300.guc.fr 2.0 0.135 @@ -961,7 +1356,7 @@ ho en JW300.ho.en 26.8 0.428 ho fr JW300.ho.fr 19.8 0.367 ho sv JW300.ho.sv 19.8 0.377 hr fi JW300.hr.fi 25.0 0.519 -hr fr JW300.hr.fr 25.4 0.478 +hr fr JW300.hr.fr 26.1 0.482 hr sv JW300.hr.sv 30.5 0.526 ht en JW300.ht.en 37.5 0.542 ht en Tatoeba.ht.en 57.0 0.689 @@ -970,7 +1365,7 @@ ht fr JW300.ht.fr 28.4 0.469 ht sv JW300.ht.sv 27.9 0.463 hu en Tatoeba.hu.en 52.9 0.683 hu fi Tatoeba.hu.fi 48.2 0.700 -hu fr Tatoeba.hu.fr 49.6 0.657 +hu fr Tatoeba.hu.fr 50.3 0.660 hy en Tatoeba.hy.en 29.5 0.466 hy fi JW300.hy.fi 13.4 0.338 hy fr JW300.hy.fr 17.7 0.346 @@ -1013,7 +1408,7 @@ it en newstest2009.it.en 34.0 0.594 it en Tatoeba.it.en 70.9 0.808 ja en Tatoeba.ja.en 41.7 0.589 ja fi Tatoeba.ja.fi 21.2 0.448 -ja fr Tatoeba.ja.fr 32.4 0.522 +ja fr Tatoeba.ja.fr 33.6 0.534 jap en bible-uedin.jap.en 52.6 0.703 jap fi bible-uedin.jap.fi 1.8 0.166 jap fr bible-uedin.jap.fr 1.9 0.162 @@ -1338,14 +1733,14 @@ ro sv JW300.ro.sv 31.2 0.529 rsl en JW300.rsl.en 4.2 0.175 rsl fi JW300.rsl.fi 2.4 0.169 rsl sv JW300.rsl.sv 3.7 0.168 -ru en newstest2012.ru.en 35.3 0.607 -ru en newstest2013.ru.en 28.4 0.548 -ru en newstest2014-ruen.ru.en 32.6 0.595 -ru en newstest2015-enru.ru.en 30.6 0.569 -ru en newstest2016-enru.ru.en 30.4 0.570 +ru en newstest2012.ru.en 35.3 0.606 +ru en newstest2013.ru.en 28.2 0.547 +ru en newstest2014-ruen.ru.en 32.5 0.595 +ru en newstest2015-enru.ru.en 30.7 0.569 +ru en newstest2016-enru.ru.en 30.5 0.569 ru en newstest2017-enru.ru.en 33.8 0.596 -ru en newstest2018-enru.ru.en 30.1 0.570 -ru en newstest2019-ruen.ru.en 32.2 0.582 +ru en newstest2018-enru.ru.en 30.0 0.569 +ru en newstest2019-ruen.ru.en 32.0 0.581 ru en Tatoeba.ru.en 60.1 0.727 ru fi Tatoeba.ru.fi 40.9 0.649 run en JW300.run.en 42.7 0.583 @@ -1402,6 +1797,7 @@ sv ar GlobalVoices.sv.ar 4.6 0.312 sv ase JW300.sv.ase 40.5 0.572 sv as JW300.sv.as 0.3 0.066 sv ay JW300.sv.ay 6.6 0.262 +sv az_IR+az JW300.sv.az 16.3 0.378 sv ba JW300.sv.ba 2.0 0.127 sv bas JW300.sv.bas 5.1 0.180 sv bci JW300.sv.bci 12.1 0.280 @@ -1418,6 +1814,7 @@ sv cak JW300.sv.cak 1.7 0.136 sv cat JW300.sv.cat 3.6 0.164 sv ceb JW300.sv.ceb 39.2 0.609 sv chk JW300.sv.chk 20.7 0.421 +sv cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh bible-uedin.sv.zh 24.2 0.342 sv crp bible-uedin.sv.crp 4.1 0.198 sv crs JW300.sv.crs 32.4 0.512 sv csg JW300.sv.csg 3.9 0.167 @@ -1513,6 +1910,7 @@ sv mos JW300.sv.mos 22.4 0.379 sv mr JW300.sv.mr 2.5 0.128 sv mt JW300.sv.mt 32.2 0.509 sv my JW300.sv.my 2.1 0.129 +sv nb_NO+nb+nn_NO+nn+nog+no_nb+no JW300.sv.no 39.3 0.590 sv ncj JW300.sv.ncj 2.5 0.189 sv nd JW300.sv.nd 2.2 0.191 sv ne JW300.sv.ne 3.5 0.142 @@ -1540,6 +1938,7 @@ sv plt bible-uedin.sv.plt 2.9 0.254 sv pon JW300.sv.pon 26.0 0.491 sv prl JW300.sv.prl 4.0 0.164 sv pso JW300.sv.pso 3.7 0.155 +sv pt_br+pt_BR+pt_PT+pt GlobalVoices.sv.pt 22.8 0.496 sv que JW300.sv.que 1.7 0.154 sv qu JW300.sv.qu 8.0 0.263 sv quy JW300.sv.quy 1.4 0.171 @@ -1569,13 +1968,16 @@ sv st JW300.sv.st 38.8 0.584 sv sv Tatoeba.sv.sv 49.2 0.741 sv swc JW300.sv.swc 30.1 0.536 sv sw GlobalVoices.sv.sw 10.1 0.390 +sv ta_LK+ta JW300.sv.ta 5.4 0.163 sv tdt JW300.sv.tdt 17.3 0.359 sv te JW300.sv.te 1.9 0.135 +sv tg_TJ+tg JW300.sv.tg 9.6 0.251 sv th JW300.sv.th 21.2 0.373 sv ti JW300.sv.ti 15.7 0.282 sv tiv JW300.sv.tiv 25.2 0.439 sv tk JW300.sv.tk 9.9 0.270 sv tll JW300.sv.tll 24.9 0.484 +sv tl_PH+tl JW300.sv.tl 39.0 0.612 sv tn JW300.sv.tn 36.3 0.561 sv tog JW300.sv.tog 3.4 0.211 sv toi JW300.sv.toi 23.2 0.512 @@ -1594,7 +1996,9 @@ sv tzo JW300.sv.tzo 10.7 0.311 sv uk JW300.sv.uk 24.0 0.447 sv umb JW300.sv.umb 20.4 0.431 sv urh JW300.sv.urh 5.1 0.196 +sv ur_PK+ur JW300.sv.ur 6.3 0.188 sv ve JW300.sv.ve 26.4 0.496 +sv vi_VN+vi JW300.sv.vi 38.8 0.516 sv vmw JW300.sv.vmw 2.5 0.186 sv vsl JW300.sv.vsl 4.4 0.169 sv wal JW300.sv.wal 10.4 0.340 @@ -1609,6 +2013,7 @@ sv yua JW300.sv.yua 14.3 0.363 sv zai JW300.sv.zai 7.9 0.251 sv zlm JW300.sv.zlm 4.5 0.232 sv zne JW300.sv.zne 23.8 0.474 +sv zul+zu JW300.sv.zu 31.1 0.603 swc en JW300.swc.en 41.1 0.569 swc fi JW300.swc.fi 26.0 0.489 swc sv JW300.swc.sv 30.7 0.495