all models = opus

This commit is contained in:
Joerg Tiedemann 2020-01-15 23:18:07 +02:00
parent f749bd7a87
commit 58690950b0
6 changed files with 528 additions and 36 deletions

View File

@ -220,6 +220,15 @@ finished:
fi \
done
## only bilingual models
%-allmultilingual:
for l in ${ALL_MULTILINGUAL_MODELS}; do \
if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.npz' | wc -l` -gt 0 ]; then \
${MAKE} SRCLANGS="`echo $$l | cut -f1 -d'-' | sed 's/\\+/ /g'`" \
TRGLANGS="`echo $$l | cut -f2 -d'-' | sed 's/\\+/ /g'`" ${@:-allmultilingual=}; \
fi \
done
## run something over all language pairs but make it possible to do it in parallel, for example
## - make dist-all-parallel
@ -555,10 +564,16 @@ endif
%.compare: %.eval
paste -d "\n" ${TEST_SRC} ${TEST_TRG} ${<:.eval=} |\
grep . ${TEST_SRC} > $@.1
grep . ${TEST_TRG} > $@.2
grep . ${<:.eval=} > $@.3
paste -d "\n" $@.1 $@.2 $@.3 |\
sed -e "s/&apos;/'/g" \
-e 's/&quot;/"/g' \
-e 's/&lt;/</g' \
-e 's/&gt;/>/g' \
-e 's/&amp;/&/g' |\
sed 'n;n;G;' > $@
rm -f $@.1 $@.2 $@.3
# paste -d "\n" ${TEST_SRC} ${TEST_TRG} ${<:.eval=} |\

View File

@ -45,8 +45,11 @@ OPUSCORPORA = ${patsubst %/latest/xml/${LANGPAIR}.xml.gz,%,\
ALL_LANG_PAIRS = ${shell ls ${WORKHOME} | grep -- '-' | grep -v old}
ALL_BILINGUAL_MODELS = ${shell ls ${WORKHOME} | grep -- '-' | grep -v old | grep -v -- '\+'}
ALL_MULTILINGUAL_MODELS = ${shell ls ${WORKHOME} | grep -- '-' | grep -v old | grep -- '\+'}
ALL_BILINGUAL_MODELS = ${shell echo '${ALL_LANG_PAIRS}' | tr ' ' "\n" | grep -v -- '\+'}
ALL_MULTILINGUAL_MODELS = ${shell echo '${ALL_LANG_PAIRS}' | tr ' ' "\n" | grep -- '\+'}
# ALL_BILINGUAL_MODELS = ${shell ls ${WORKHOME} | grep -- '-' | grep -v old | grep -v -- '\+'}
# ALL_MULTILINGUAL_MODELS = ${shell ls ${WORKHOME} | grep -- '-' | grep -v old | grep -- '\+'}
## size of dev data, test data and BPE merge operations
@ -75,6 +78,8 @@ HELDOUTSIZE = ${DEVSIZE}
## dev/test data: default = Tatoeba otherwise, GlobalVoices, JW300, GNOME or bibl-uedin
## - check that data exist
## - check that there are at least 2 x DEVMINSIZE examples
## TODO: this does not work well for multilingual models!
ifneq ($(wildcard ${OPUSHOME}/Tatoeba/latest/moses/${LANGPAIR}.txt.zip),)
ifeq ($(shell if (( `head -1 ${OPUSHOME}/Tatoeba/latest/info/${LANGPAIR}.txt.info` \
> $$((${DEVMINSIZE} + ${DEVMINSIZE})) )); then echo "ok"; fi),ok)

View File

@ -8,20 +8,21 @@ SRC = en
TRG = de
LANGPAIR = ${SRC}-${TRG}
MODELHOME = ../models/${LANGPAIR}
MODELZIP = ${lastword ${sort ${wildcard ${MODELHOME}/*-20*.zip}}}
MODELNAME = ${patsubst %.zip,%,${notdir ${MODELZIP}}}
MODELSTORE = OPUS-MT-models
MODELURL = https://object.pouta.csc.fi/${MODELSTORE}/${LANGPAIR}
MODELHOME = ../models/${LANGPAIR}
MODELZIP = ${lastword ${sort ${wildcard ${MODELHOME}/*-20*.zip}}}
MODELNAME = ${patsubst %.zip,%,${notdir ${MODELZIP}}}
MODELSTORE = OPUS-MT-models
MODELURL = https://object.pouta.csc.fi/${MODELSTORE}/${LANGPAIR}
TESTSETDIR = ../testsets/${LANGPAIR}
TESTSETS = ${sort ${wildcard ${TESTSETDIR}/*.${SRC}.gz}}
TESTSET = ${lastword ${TESTSETS}}
TESTSETDIR = ../testsets/${LANGPAIR}
TESTSETS = ${sort ${wildcard ${TESTSETDIR}/*.${SRC}.gz}}
TESTSET = ${lastword ${TESTSETS}}
WORKDIR = ${PWD}/${LANGPAIR}/${MODELNAME}
TEST_PRE = ${WORKDIR}/${patsubst %.gz,%.pre,${notdir ${TESTSET}}}
TEST_TRANS = ${TEST_PRE}.${TRG}
TEST_EVAL = ${TEST_TRANS}.eval
WORKDIR = ${PWD}/${LANGPAIR}/${MODELNAME}
TEST_PRE = ${WORKDIR}/${patsubst %.gz,%.pre,${notdir ${TESTSET}}}
TEST_TRANS = ${TEST_PRE}.${TRG}
TEST_EVAL = ${TEST_TRANS}.eval
TEST_EVALNORM = ${TEST_TRANS}.eval-norm
@ -102,6 +103,16 @@ ${TEST_EVAL}: ${TEST_TRANS}
cat $< | sacrebleu --metrics=chrf --width=3 $@.ref >> $@
rm -f $@.ref
${TEST_EVALNORM}: ${TEST_TRANS}
zcat ${patsubst %.${SRC}.gz,%.${TRG}.gz,${TESTSET}} |\
${TOKENIZER}/replace-unicode-punctuation.perl |\
${TOKENIZER}/remove-non-printing-char.perl |\
${TOKENIZER}/normalize-punctuation.perl -l ${TRG} |\
sed 's/ */ /g;s/^ *//g;s/ *$$//g' > $@.ref
cat $< | sacrebleu $@.ref > $@
cat $< | sacrebleu --metrics=chrf --width=3 $@.ref >> $@
rm -f $@.ref
cleanup:
rm -fr ${WORKDIR}/model

View File

@ -91,6 +91,49 @@ goethe-defi:
TEST_TRG=de-fi/goethe/test/goethe-institute-test1.fi \
all
goethe2-defi:
${MAKE} SRC=de TRG=fi MODEL=goethe2 \
TRAIN_SRC=de-fi/goethe/train/goethe-institute-train2.de \
TRAIN_TRG=de-fi/goethe/train/goethe-institute-train2.fi \
DEV_SRC=de-fi/goethe/dev/goethe-institute-dev2.de \
DEV_TRG=de-fi/goethe/dev/goethe-institute-dev2.fi \
TEST_SRC=de-fi/goethe/test/goethe-institute-test1.de \
TEST_TRG=de-fi/goethe/test/goethe-institute-test1.fi \
all
## without reference normalisation
goethe-other:
zcat de-fi/goethe/test/goethe-institute-test1.fi.gz > $@.ref
for s in systran yandex google; do \
cat ${HOME}/research/GoetheInstitute/data/test_de_oaversetted_van_$$s.txt |\
gzip -c > de-fi/goethe/test/goethe-institute-test1.de.$$s.de.gz; \
zcat de-fi/goethe/test/goethe-institute-test1.de.$$s.de.gz | sacrebleu $@.ref \
> de-fi/goethe/test/goethe-institute-test1.de.$$s.de.eval; \
zcat de-fi/goethe/test/goethe-institute-test1.de.$$s.de.gz | sacrebleu --metrics=chrf --width=3 $@.ref \
>> de-fi/goethe/test/goethe-institute-test1.de.$$s.de.eval; \
done
rm -f $@.ref
## with reference normalisation (should not do this)
goethe-other-norm:
zcat de-fi/goethe/test/goethe-institute-test1.fi.gz |\
${TOKENIZER}/replace-unicode-punctuation.perl |\
${TOKENIZER}/remove-non-printing-char.perl |\
${TOKENIZER}/normalize-punctuation.perl -l ${TRG} |\
sed 's/ */ /g;s/^ *//g;s/ *$$//g' > $@.ref
for s in systran yandex google; do \
cat ${HOME}/research/GoetheInstitute/data/test_de_oaversetted_van_$$s.txt |\
${TOKENIZER}/replace-unicode-punctuation.perl |\
${TOKENIZER}/remove-non-printing-char.perl |\
${TOKENIZER}/normalize-punctuation.perl -l ${TRG} |\
sed 's/ */ /g;s/^ *//g;s/ *$$//g' | gzip -c > de-fi/goethe/test/goethe-institute-test1.de.$$s.de.gz; \
zcat de-fi/goethe/test/goethe-institute-test1.de.$$s.de.gz | sacrebleu $@.ref \
> de-fi/goethe/test/goethe-institute-test1.de.$$s.de.eval-norm; \
zcat de-fi/goethe/test/goethe-institute-test1.de.$$s.de.gz | sacrebleu --metrics=chrf --width=3 $@.ref \
>> de-fi/goethe/test/goethe-institute-test1.de.$$s.de.eval-norm; \
done
rm -f $@.ref
goethe-test:
${MAKE} SRC=de TRG=fi MODEL=goethe-test \
@ -244,12 +287,25 @@ ${TEST_SRC}.baseline.${TRG}.gz: ${TEST_SRC}.pre.gz ${LANGPAIR}/${BASEMODELNAME}/
eval: ${TEST_SRC}.${TRG}.eval
eval-baseline: ${TEST_SRC}.baseline.${TRG}.eval
## without reference normalisation
${TEST_SRC}.${TRG}.eval ${TEST_SRC}.baseline.${TRG}.eval: %.eval: %.gz ${TEST_TRG}.gz
zcat ${TEST_TRG} > $@.ref
zcat ${TEST_TRG}.gz > $@.ref
zcat $< | sacrebleu $@.ref > $@
zcat $< | sacrebleu --metrics=chrf --width=3 $@.ref >> $@
rm -f $@.ref
## with reference normalisation (should not do this)
${TEST_SRC}.${TRG}.eval-norm ${TEST_SRC}.baseline.${TRG}.eval-norm: %.eval-norm: %.gz ${TEST_TRG}.gz
zcat ${TEST_TRG}.gz |\
${TOKENIZER}/replace-unicode-punctuation.perl |\
${TOKENIZER}/remove-non-printing-char.perl |\
${TOKENIZER}/normalize-punctuation.perl -l ${TRG} |\
sed 's/ */ /g;s/^ *//g;s/ *$$//g' > $@.ref
zcat $< | sacrebleu $@.ref > $@
zcat $< | sacrebleu --metrics=chrf --width=3 $@.ref >> $@
rm -f $@.ref
.PHONY: compare compare-baseline

View File

@ -4,25 +4,30 @@ ab fi JW300.ab.fi 1.5 0.147
ab fr JW300.ab.fr 1.8 0.129
ab sv JW300.ab.sv 2.4 0.147
ach en JW300.ach.en 5.4 0.207
ach es JW300.ach.es 2.6 0.153
ach fi JW300.ach.fi 1.7 0.163
ach fr JW300.ach.fr 3.5 0.159
ach sv JW300.ach.sv 2.7 0.160
acu en bible-uedin.acu.en 3.8 0.202
ada en JW300.ada.en 4.3 0.182
ada es JW300.ada.es 2.7 0.153
ada fi JW300.ada.fi 1.7 0.154
ada fr JW300.ada.fr 3.1 0.152
ada sv JW300.ada.sv 2.1 0.146
aed en JW300.aed.en 4.0 0.177
aed es JW300.aed.es 89.1 0.915
aed fi JW300.aed.fi 2.2 0.163
aed fr JW300.aed.fr 3.5 0.165
aed sv JW300.aed.sv 3.3 0.170
af en Tatoeba.af.en 60.8 0.736
af es JW300.af.es 35.7 0.554
af fi JW300.af.fi 32.3 0.576
af fr JW300.af.fr 35.3 0.543
af sv JW300.af.sv 40.4 0.599
agr en bible-uedin.agr.en 4.5 0.222
am en GlobalVoices.am.en 6.1 0.286
am en Tatoeba.am.en 63.8 0.744
am es GlobalVoices.am.es 3.9 0.251
am fi JW300.am.fi 18.1 0.394
am fr GlobalVoices.am.fr 3.4 0.233
am sv JW300.am.sv 21.0 0.377
@ -34,42 +39,52 @@ ase en JW300.ase.en 99.5 0.997
ase fr JW300.ase.fr 37.8 0.553
as en JW300.as.en 1.7 0.137
as en Tatoeba.as.en 58.3 0.644
as es JW300.as.es 1.2 0.156
ase sv JW300.ase.sv 39.7 0.576
as fi JW300.as.fi 1.1 0.167
as fr JW300.as.fr 1.4 0.154
as sv JW300.as.sv 1.0 0.148
ast en Tatoeba.ast.en 81.4 0.858
ay en JW300.ay.en 7.2 0.202
ay es JW300.ay.es 11.3 0.265
ay fi JW300.ay.fi 6.5 0.222
ay fr JW300.ay.fr 6.4 0.203
ay sv JW300.ay.sv 6.8 0.212
ba en JW300.ba.en 2.8 0.144
ba en Tatoeba.ba.en 0.8 0.134
ba es JW300.ba.es 2.0 0.141
ba fi JW300.ba.fi 1.7 0.164
ba fr JW300.ba.fr 3.1 0.150
bas en JW300.bas.en 5.8 0.207
bas es JW300.bas.es 4.0 0.175
bas fi JW300.bas.fi 2.4 0.174
bas fr JW300.bas.fr 3.9 0.177
bas sv JW300.bas.sv 3.9 0.174
ba sv JW300.ba.sv 2.2 0.139
bbc en JW300.bbc.en 6.7 0.204
bbc es JW300.bbc.es 4.4 0.178
bbc fr JW300.bbc.fr 4.4 0.172
bci en JW300.bci.en 13.9 0.269
bci es JW300.bci.es 5.9 0.223
bci fi JW300.bci.fi 5.8 0.242
bci fr JW300.bci.fr 6.9 0.216
bci sv JW300.bci.sv 7.6 0.235
bcl en JW300.bcl.en 56.8 0.705
bcl es JW300.bcl.es 37.0 0.551
bcl fi JW300.bcl.fi 33.3 0.573
bcl fr JW300.bcl.fr 35.0 0.527
bcl sv JW300.bcl.sv 38.0 0.565
bem en JW300.bem.en 33.4 0.491
bem es JW300.bem.es 22.8 0.403
bem fi JW300.bem.fi 22.8 0.439
bem fr JW300.bem.fr 25.0 0.417
bem sv JW300.bem.sv 25.6 0.434
ber en Tatoeba.ber.en 37.3 0.566
ber es Tatoeba.ber.es 33.8 0.487
ber fr Tatoeba.ber.fr 60.2 0.754
bfi en JW300.bfi.en 20.0 0.423
bg en Tatoeba.bg.en 59.4 0.727
bg es GlobalVoices.bg.es 24.5 0.526
bg fi JW300.bg.fi 23.7 0.505
bg fr GlobalVoices.bg.fr 20.9 0.480
bg sv JW300.bg.sv 29.1 0.494
@ -79,6 +94,7 @@ bi fi JW300.bi.fi 0.6 0.124
bi fr JW300.bi.fr 21.5 0.382
bi sv JW300.bi.sv 22.7 0.403
bn en Tatoeba.bn.en 49.8 0.644
bn es GlobalVoices.bn.es 12.7 0.372
bn fi JW300.bn.fi 5.5 0.214
bn fr GlobalVoices.bn.fr 8.4 0.311
bn sv GlobalVoices.bn.sv 2.3 0.171
@ -89,18 +105,22 @@ bs en Tatoeba.bs.en 64.9 0.784
bsn en bible-uedin.bsn.en 1.2 0.117
btx en JW300.btx.en 7.0 0.236
bum en JW300.bum.en 4.6 0.182
bum es JW300.bum.es 3.2 0.162
bum fi JW300.bum.fi 2.2 0.161
bum fr JW300.bum.fr 4.0 0.173
bum sv JW300.bum.sv 3.5 0.163
bzs en JW300.bzs.en 44.5 0.605
bzs es JW300.bzs.es 28.1 0.464
bzs fi JW300.bzs.fi 24.7 0.464
bzs fr JW300.bzs.fr 30.0 0.479
bzs sv JW300.bzs.sv 30.7 0.489
cab en JW300.cab.en 3.0 0.154
cab es JW300.cab.es 5.1 0.225
cab fi JW300.cab.fi 1.7 0.150
cab fr JW300.cab.fr 3.1 0.153
cab sv JW300.cab.sv 2.6 0.152
ca en Tatoeba.ca.en 51.4 0.678
ca es Tatoeba.ca.es 74.9 0.863
ca fr Tatoeba.ca.fr 50.4 0.672
cak en JW300.cak.en 2.6 0.140
cak fi JW300.cak.fi 0.6 0.109
@ -113,20 +133,28 @@ cat fr JW300.cat.fr 3.5 0.163
cat sv JW300.cat.sv 2.5 0.154
ceb en JW300.ceb.en 52.6 0.670
ceb en Tatoeba.ceb.en 59.5 0.704
ceb es JW300.ceb.es 31.6 0.508
ceb fi JW300.ceb.fi 27.4 0.525
ceb fr JW300.ceb.fr 30.0 0.491
ceb sv JW300.ceb.sv 35.5 0.552
chk en JW300.chk.en 31.2 0.465
chk es JW300.chk.es 20.8 0.374
chk fi JW300.chk.fi 19.4 0.395
chk fr JW300.chk.fr 22.4 0.387
chk sv JW300.chk.sv 23.6 0.406
cjk en JW300.cjk.en 6.8 0.226
cjk es JW300.cjk.es 3.8 0.169
cjk fr JW300.cjk.fr 4.3 0.174
cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh de Tatoeba.cmn.de 33.1 0.530
cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh fi bible-uedin.cmn.fi 21.6 0.497
cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh sv Tatoeba.cmn.sv 46.6 0.620
cnh en JW300.cnh.en 6.9 0.240
crp es bible-uedin.crp.es 2.8 0.187
crp fi bible-uedin.crp.fi 2.0 0.181
crp fr bible-uedin.crp.fr 2.9 0.190
crp sv bible-uedin.crp.sv 3.1 0.184
crs en JW300.crs.en 42.9 0.589
crs es JW300.crs.es 26.1 0.445
crs fi JW300.crs.fi 25.6 0.479
crs fr JW300.crs.fr 29.4 0.475
crs sv JW300.crs.sv 29.3 0.480
@ -140,14 +168,17 @@ cs en Tatoeba.cs.en 58.0 0.721
cs fi JW300.cs.fi 25.5 0.523
cs fr GlobalVoices.cs.fr 21.0 0.488
csg en JW300.csg.en 4.6 0.183
csg es JW300.csg.es 93.1 0.952
csg fi JW300.csg.fi 2.3 0.160
csg fr JW300.csg.fr 4.7 0.184
csg sv JW300.csg.sv 4.5 0.176
csl en JW300.csl.en 4.1 0.162
csl es JW300.csl.es 3.1 0.141
csl fi JW300.csl.fi 2.5 0.152
csl fr JW300.csl.fr 3.0 0.156
csl sv JW300.csl.sv 3.3 0.142
csn en JW300.csn.en 3.8 0.172
csn es JW300.csn.es 87.4 0.899
csn fi JW300.csn.fi 2.0 0.162
csn fr JW300.csn.fr 3.5 0.164
csn sv JW300.csn.sv 3.8 0.173
@ -156,17 +187,24 @@ ctu en JW300.ctu.en 2.9 0.157
ctu fr JW300.ctu.fr 3.3 0.166
cv en JW300.cv.en 2.6 0.151
cv en Tatoeba.cv.en 0.3 0.102
cv es JW300.cv.es 2.0 0.152
cv fi JW300.cv.fi 1.2 0.148
cv fr JW300.cv.fr 2.6 0.154
cv sv JW300.cv.sv 2.1 0.144
cy en Tatoeba.cy.en 33.0 0.525
cy es JW300.cy.es 0.0 0.025
cy fi JW300.cy.fi 0.3 0.067
cy fr JW300.cy.fr 8.7 0.266
cy sv JW300.cy.sv 6.6 0.218
da en Tatoeba.da.en 63.6 0.769
da es Tatoeba.da.es 53.7 0.715
da fi Tatoeba.da.fi 39.0 0.629
da+fo+is+no+nb+nn+sv da+fo+is+no+nb+nn+sv Tatoeba.da.sv 69.2 0.811
da fr Tatoeba.da.fr 62.2 0.751
de az_IR+az JW300.de.az 13.4 0.342
de be_tarask+be Tatoeba.de.be 3.1 0.106
de cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh bible-uedin.de.zh 21.9 0.293
de de Tatoeba.de.de 40.8 0.617
de en newssyscomb2009.de.en 28.6 0.553
de en news-test2008.de.en 27.6 0.547
de en newstest2009.de.en 26.9 0.544
@ -181,6 +219,9 @@ de en newstest2017-ende.de.en 35.6 0.609
de en newstest2018-ende.de.en 43.8 0.667
de en newstest2019-deen.de.en 39.6 0.637
de en Tatoeba.de.en 55.1 0.704
de es Tatoeba.de.es 48.5 0.676
de fi goethe-institute-test1.de.fi 18.3 0.493
de fi goethe-institute-test2.de.fi 18.0 0.494
de fi Tatoeba.de.fi 40.0 0.628
de fr euelections_dev2019.transformer-align.de 32.2 0.590
de fr newssyscomb2009.de.fr 26.8 0.553
@ -192,18 +233,23 @@ de fr newstest2012.de.fr 27.7 0.554
de fr newstest2013.de.fr 29.5 0.560
de fr newstest2019-defr.de.fr 36.6 0.625
de fr Tatoeba.de.fr 49.2 0.664
de+nl+fy+af+da+fo+is+no+nb+nn+sv de+nl+fy+af+da+fo+is+no+nb+nn+sv Tatoeba.de.sv 48.1 0.663
de pt_br+pt_BR+pt_PT+pt Tatoeba.de.pt 35.2 0.577
dhv en JW300.dhv.en 4.7 0.190
dhv fi JW300.dhv.fi 1.2 0.149
dhv fr JW300.dhv.fr 3.2 0.157
dhv sv JW300.dhv.sv 2.5 0.160
dje en bible-uedin.dje.en 4.4 0.228
dje es bible-uedin.dje.es 3.8 0.215
dje fi bible-uedin.dje.fi 3.0 0.211
dje fr bible-uedin.dje.fr 4.1 0.211
dje sv bible-uedin.dje.sv 4.4 0.214
djk en JW300.djk.en 4.3 0.177
djk fr JW300.djk.fr 3.0 0.156
ecs es JW300.ecs.es 16.2 0.423
ee en JW300.ee.en 39.3 0.556
ee en Tatoeba.ee.en 21.2 0.569
ee es JW300.ee.es 26.4 0.449
ee fi JW300.ee.fi 25.0 0.482
ee fr JW300.ee.fr 27.1 0.450
ee sv JW300.ee.sv 28.9 0.472
@ -485,7 +531,15 @@ en ro newsdev2016-enro.en.ro 30.8 0.592
en ro newstest2016-enro.en.ro 28.8 0.571
en ro Tatoeba.en.ro 45.3 0.670
en rsl JW300.en.rsl 3.2 0.156
en ru newstest2012.en.ru 23.7 0.436
en ru newstest2013.en.ru 19.4 0.413
en ru newstest2015-enru.en.ru 21.6 0.440
en ru newstest2016-enru.en.ru 21.0 0.432
en ru newstest2017-enru.en.ru 22.7 0.443
en ru newstest2018-enru.en.ru 19.2 0.413
en ru newstest2019-enru.en.ru 22.3 0.412
en run JW300.en.run 34.2 0.591
en ru Tatoeba.en.ru 46.9 0.656
en rw JW300.en.rw 33.3 0.569
en rw Tatoeba.en.rw 13.8 0.503
en seh JW300.en.seh 5.6 0.260
@ -538,10 +592,54 @@ en wal JW300.en.wal 20.2 0.471
en war Tatoeba.en.war 9.1 0.382
en xh JW300.en.xh 37.9 0.652
eo en Tatoeba.eo.en 54.8 0.694
eo es Tatoeba.eo.es 44.2 0.631
eo fi Tatoeba.eo.fi 13.9 0.325
eo fr Tatoeba.eo.fr 50.9 0.675
eo sv Tatoeba.eo.sv 25.7 0.439
es ach JW300.es.ach 3.9 0.175
es ada JW300.es.ada 7.1 0.206
es aed JW300.es.aed 89.2 0.915
es af JW300.es.af 36.6 0.564
es am GlobalVoices.es.am 0.7 0.125
es as JW300.es.as 0.6 0.095
es ay JW300.es.ay 9.8 0.325
es az_IR+az JW300.es.az 12.6 0.319
es ba JW300.es.ba 1.8 0.129
es bas JW300.es.bas 4.5 0.167
es bbc JW300.es.bbc 5.4 0.202
es bci JW300.es.bci 6.7 0.213
es bcl JW300.es.bcl 37.1 0.586
es bem JW300.es.bem 19.9 0.440
es ber Tatoeba.es.ber 21.8 0.444
es bg GlobalVoices.es.bg 21.0 0.493
es bn GlobalVoices.es.bn 5.5 0.327
es bum JW300.es.bum 5.2 0.186
es bzs JW300.es.bzs 26.4 0.451
es cab JW300.es.cab 5.4 0.275
es ca Tatoeba.es.ca 68.9 0.832
es ceb JW300.es.ceb 33.9 0.564
es chk JW300.es.chk 17.1 0.378
es cjk JW300.es.cjk 3.5 0.214
es crp bible-uedin.es.crp 4.5 0.202
es crs JW300.es.crs 26.4 0.453
es csg JW300.es.csg 91.2 0.937
es csl JW300.es.csl 2.7 0.052
es csn JW300.es.csn 87.8 0.901
es cs Tatoeba.es.cs 46.4 0.655
es cv JW300.es.cv 1.8 0.136
es cy JW300.es.cy 5.2 0.211
es da Tatoeba.es.da 55.7 0.712
es de Tatoeba.es.de 50.0 0.683
es dje bible-uedin.es.dje 4.7 0.227
es ecs JW300.es.ecs 17.2 0.426
es ee JW300.es.ee 25.6 0.470
es eo Tatoeba.es.eo 44.7 0.657
es et JW300.es.et 20.7 0.466
es eu Tatoeba.es.eu 36.9 0.637
es fa GlobalVoices.es.fa 5.9 0.282
es fil GlobalVoices.es.fil 0.2 0.108
es fi Tatoeba.es.fi 43.5 0.662
es fon JW300.es.fon 5.3 0.163
es fr newssyscomb2009.es.fr 33.6 0.610
es fr news-test2008.es.fr 32.0 0.585
es fr newstest2009.es.fr 32.5 0.590
@ -550,17 +648,211 @@ es fr newstest2011.es.fr 33.9 0.607
es fr newstest2012.es.fr 32.4 0.602
es fr newstest2013.es.fr 32.1 0.593
es fr Tatoeba.es.fr 58.4 0.731
es fse JW300.es.fse 2.7 0.171
es gaa JW300.es.gaa 27.8 0.479
es gd bible-uedin.es.gd 0.0 0.095
es gil JW300.es.gil 23.8 0.470
es gl Tatoeba.es.gl 63.8 0.778
es gug JW300.es.gug 14.0 0.359
es gu JW300.es.gu 2.4 0.124
es guw JW300.es.guw 28.6 0.480
es gv bible-uedin.es.gv 0.0 0.034
es gym JW300.es.gym 5.5 0.267
es ha JW300.es.ha 20.6 0.421
es he Tatoeba.es.he 44.3 0.642
es hi GlobalVoices.es.hi 0.7 0.125
es hmn JW300.es.hmn 4.6 0.195
es ho JW300.es.ho 22.8 0.463
es hr JW300.es.hr 21.7 0.459
es ht JW300.es.ht 23.3 0.407
es hy JW300.es.hy 4.2 0.157
es ia Tatoeba.es.ia 0.1 0.089
es iba JW300.es.iba 5.3 0.218
es ibg JW300.es.ibg 5.0 0.206
es id GlobalVoices.es.id 21.1 0.516
es ig JW300.es.ig 27.0 0.434
es ilo JW300.es.ilo 31.0 0.544
es ise JW300.es.ise 3.5 0.181
es is JW300.es.is 22.3 0.418
es iso JW300.es.iso 22.4 0.396
es jap bible-uedin.es.jap 3.9 0.121
es ja Tatoeba.es.ja 1.1 0.352
es jsl JW300.es.jsl 3.5 0.071
es jv JW300.es.jv 5.1 0.199
es ka JW300.es.ka 10.8 0.279
es kam JW300.es.kam 2.6 0.167
es kbp JW300.es.kbp 4.3 0.164
es kek JW300.es.kek 0.0 0.010
es kg JW300.es.kg 25.6 0.488
es ki JW300.es.ki 2.7 0.168
es kj JW300.es.kj 3.8 0.210
es kl JW300.es.kl 7.3 0.330
es kmb JW300.es.kmb 4.5 0.198
es km Tatoeba.es.km 0.1 0.072
es kn JW300.es.kn 3.1 0.123
es koo JW300.es.koo 2.9 0.193
es ko Tatoeba.es.ko 5.8 0.274
es kqn JW300.es.kqn 19.9 0.437
es kri JW300.es.kri 6.3 0.194
es kss JW300.es.kss 3.7 0.178
es ksw JW300.es.ksw 3.9 0.170
es kvk JW300.es.kvk 0.8 0.070
es kwn JW300.es.kwn 10.0 0.293
es kwy JW300.es.kwy 16.9 0.375
es ky JW300.es.ky 10.8 0.274
es la Tatoeba.es.la 0.2 0.092
es lg JW300.es.lg 19.5 0.438
es ln JW300.es.ln 27.1 0.508
es lo JW300.es.lo 3.0 0.134
es loz JW300.es.loz 28.6 0.493
es lt JW300.es.lt 17.9 0.423
es lua JW300.es.lua 23.4 0.473
es lue JW300.es.lue 20.0 0.472
es lu JW300.es.lu 20.0 0.432
es lun JW300.es.lun 12.1 0.354
es luo JW300.es.luo 13.5 0.330
es lus JW300.es.lus 20.9 0.414
es lv JW300.es.lv 17.2 0.398
es mam JW300.es.mam 5.1 0.249
es mau JW300.es.mau 5.3 0.254
es mco JW300.es.mco 3.2 0.210
es mfe JW300.es.mfe 19.5 0.391
es mfs JW300.es.mfs 88.6 0.907
es mg GlobalVoices.es.mg 18.3 0.532
es mgr JW300.es.mgr 2.7 0.194
es mh JW300.es.mh 19.5 0.388
es mi bible-uedin.es.mi 6.5 0.271
es mk GlobalVoices.es.mk 24.1 0.526
es ml JW300.es.ml 5.4 0.214
es mn JW300.es.mn 5.9 0.200
es mos JW300.es.mos 17.8 0.323
es mr JW300.es.mr 1.9 0.118
es mt JW300.es.mt 28.1 0.460
es my GlobalVoices.es.my 0.1 0.082
es nb_NO+nb+nn_NO+nn+nog+no_nb+no JW300.es.no 31.6 0.523
es nch JW300.es.nch 5.4 0.275
es ncj JW300.es.ncj 5.2 0.280
es ncx JW300.es.ncx 7.6 0.297
es ndc JW300.es.ndc 2.7 0.191
es nd JW300.es.nd 1.8 0.179
es nds_nl+nds Tatoeba.es.nds 4.2 0.228
es ne JW300.es.ne 3.7 0.156
es ng JW300.es.ng 3.6 0.213
es ngl JW300.es.ngl 2.3 0.186
es ngu JW300.es.ngu 5.0 0.274
es nia JW300.es.nia 4.4 0.202
es niu JW300.es.niu 29.9 0.506
es nl Tatoeba.es.nl 50.4 0.681
es nr JW300.es.nr 1.8 0.156
es nso JW300.es.nso 33.2 0.531
es nya JW300.es.nya 2.8 0.186
es ny JW300.es.ny 22.9 0.494
es nyk JW300.es.nyk 2.1 0.188
es nyn JW300.es.nyn 2.7 0.201
es nzi JW300.es.nzi 3.0 0.183
es oke JW300.es.oke 5.6 0.208
es om JW300.es.om 9.6 0.320
es os JW300.es.os 10.6 0.284
es pag JW300.es.pag 25.3 0.478
es pa JW300.es.pa 5.8 0.179
es pap JW300.es.pap 28.2 0.486
es pck bible-uedin.es.pck 3.8 0.216
es pcm JW300.es.pcm 4.7 0.181
es pes bible-uedin.es.pes 3.4 0.206
es pis JW300.es.pis 27.1 0.484
es pl Tatoeba.es.pl 44.0 0.646
es plt bible-uedin.es.plt 3.1 0.268
es pon JW300.es.pon 21.6 0.448
es prl JW300.es.prl 92.2 0.950
es pso JW300.es.pso 3.6 0.151
es que JW300.es.que 11.8 0.377
es qu JW300.es.qu 12.1 0.339
es quy JW300.es.quy 13.6 0.399
es quz JW300.es.quz 12.7 0.353
es qvi JW300.es.qvi 1.9 0.203
es rar JW300.es.rar 6.6 0.255
es rnd JW300.es.rnd 14.6 0.347
es ro Tatoeba.es.ro 45.4 0.663
es rsl JW300.es.rsl 3.0 0.140
es run JW300.es.run 20.0 0.456
es ru Tatoeba.es.ru 43.8 0.631
es rw JW300.es.rw 22.6 0.472
es seh JW300.es.seh 3.1 0.191
es sg JW300.es.sg 24.8 0.435
es sid JW300.es.sid 2.1 0.176
es si JW300.es.si 6.7 0.189
es sl JW300.es.sl 19.4 0.438
es sm JW300.es.sm 25.5 0.450
es sn JW300.es.sn 23.6 0.528
es so bible-uedin.es.so 1.7 0.192
es sop JW300.es.sop 2.5 0.201
es sq GlobalVoices.es.sq 19.1 0.478
es srm JW300.es.srm 6.7 0.214
es srn JW300.es.srn 28.7 0.487
es ss JW300.es.ss 9.1 0.304
es ssp JW300.es.ssp 17.1 0.435
es st JW300.es.st 35.5 0.556
es swc JW300.es.swc 26.0 0.490
es sw GlobalVoices.es.sw 16.9 0.455
es ta_LK+ta JW300.es.ta 1.7 0.119
es tcf JW300.es.tcf 7.6 0.296
es tdt JW300.es.tdt 12.5 0.299
es te JW300.es.te 1.8 0.131
es tg_TJ+tg JW300.es.tg 7.1 0.212
es th JW300.es.th 17.0 0.334
es ti JW300.es.ti 14.1 0.251
es tiv JW300.es.tiv 19.0 0.368
es tk JW300.es.tk 8.1 0.244
es tll JW300.es.tll 20.7 0.434
es tl_PH+tl Tatoeba.es.tl 25.5 0.543
es tn JW300.es.tn 32.2 0.528
es tog JW300.es.tog 3.1 0.205
es toj JW300.es.toj 9.3 0.305
es to JW300.es.to 35.7 0.510
es top JW300.es.top 4.1 0.256
es tpi JW300.es.tpi 27.0 0.472
es tsc JW300.es.tsc 3.9 0.192
es tt JW300.es.tt 10.7 0.281
es tum JW300.es.tum 19.3 0.430
es tvl JW300.es.tvl 28.3 0.464
es tw JW300.es.tw 26.3 0.465
es ty JW300.es.ty 37.3 0.544
es tzh JW300.es.tzh 9.7 0.294
es tzo JW300.es.tzo 22.6 0.469
es uk Tatoeba.es.uk 38.6 0.573
es umb JW300.es.umb 17.5 0.397
es urh JW300.es.urh 4.7 0.188
es ur_PK+ur GlobalVoices.es.ur 0.4 0.129
es ve JW300.es.ve 21.7 0.440
es vi_VN+vi Tatoeba.es.vi 31.1 0.486
es vmw JW300.es.vmw 2.0 0.165
es vsl JW300.es.vsl 18.0 0.441
es wal JW300.es.wal 7.9 0.288
es war JW300.es.war 31.7 0.530
es wes JW300.es.wes 4.9 0.189
es wls JW300.es.wls 22.9 0.437
es xh JW300.es.xh 25.0 0.541
es yao JW300.es.yao 3.0 0.182
es yo JW300.es.yo 22.3 0.387
es yua JW300.es.yua 23.6 0.471
es zai JW300.es.zai 20.8 0.426
es zlm JW300.es.zlm 3.8 0.200
es zne JW300.es.zne 19.7 0.427
es zul+zu JW300.es.zu 26.0 0.557
et en newsdev2018-enet.et.en 30.1 0.574
et en newstest2018-enet.et.en 30.3 0.581
et en Tatoeba.et.en 59.9 0.738
et es JW300.et.es 27.2 0.490
et fi JW300.et.fi 26.6 0.546
et fr JW300.et.fr 26.2 0.484
et sv JW300.et.sv 28.9 0.513
eu en Tatoeba.eu.en 46.1 0.638
eu es Tatoeba.eu.es 48.8 0.672
eu fi bible-uedin.eu.fi 0.6 0.121
eu fr bible-uedin.eu.fr 3.4 0.236
eu sv bible-uedin.eu.sv 0.6 0.121
fa en GlobalVoices.fa.en 14.3 0.399
fa es GlobalVoices.fa.es 12.6 0.369
fa fi JW300.fa.fi 18.0 0.405
fa fr GlobalVoices.fa.fr 10.6 0.341
fa sv JW300.fa.sv 18.3 0.353
@ -573,6 +865,7 @@ fi am JW300.fi.am 14.8 0.295
fi ar JW300.fi.ar 8.0 0.225
fi ase JW300.fi.ase 36.6 0.553
fi as JW300.fi.as 0.9 0.095
fi az_IR+az JW300.fi.az 16.8 0.385
fi ba JW300.fi.ba 2.1 0.134
fi bas JW300.fi.bas 4.6 0.182
fi bci JW300.fi.bci 10.9 0.266
@ -588,6 +881,7 @@ fi cak JW300.fi.cak 1.8 0.154
fi cat JW300.fi.cat 3.1 0.160
fi ceb JW300.fi.ceb 35.1 0.581
fi chk JW300.fi.chk 17.8 0.394
fi cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh bible-uedin.fi.zh 23.4 0.326
fi crp bible-uedin.fi.crp 3.6 0.188
fi crs JW300.fi.crs 29.6 0.491
fi csg JW300.fi.csg 3.6 0.162
@ -657,6 +951,7 @@ fi koo JW300.fi.koo 2.5 0.199
fi kqn JW300.fi.kqn 22.3 0.476
fi kwn JW300.fi.kwn 2.9 0.197
fil en GlobalVoices.fil.en 0.3 0.099
fil es GlobalVoices.fil.es 0.2 0.085
fil fr GlobalVoices.fil.fr 0.2 0.093
fi lt JW300.fi.lt 18.6 0.439
fi lua JW300.fi.lua 26.6 0.505
@ -672,6 +967,7 @@ fi mgr JW300.fi.mgr 2.6 0.197
fi mi bible-uedin.fi.mi 5.6 0.238
fi mt JW300.fi.mt 29.9 0.490
fi my JW300.fi.my 1.6 0.145
fi nb_NO+nb+nn_NO+nn+nog+no_nb+no JW300.fi.no 34.2 0.545
fi ncj JW300.fi.ncj 2.5 0.199
fi nd JW300.fi.nd 1.8 0.188
fi ne JW300.fi.ne 3.3 0.161
@ -688,6 +984,7 @@ fi nzi JW300.fi.nzi 3.5 0.183
fi oke JW300.fi.oke 5.3 0.213
fi pcm JW300.fi.pcm 4.3 0.179
fi pon JW300.fi.pon 23.7 0.475
fi pt_br+pt_BR+pt_PT+pt JW300.fi.pt 30.5 0.531
fi ro JW300.fi.ro 27.0 0.490
fi rsl JW300.fi.rsl 3.2 0.151
fi run JW300.fi.run 23.2 0.498
@ -702,8 +999,9 @@ fi sq JW300.fi.sq 32.0 0.535
fi srn JW300.fi.srn 29.2 0.491
fi ssp JW300.fi.ssp 3.9 0.176
fi sv fiskmo_testset.fi.sv 27.0 0.599
fi sv Tatoeba.fi.sv 54.8 0.705
fi sv Tatoeba.fi.sv 55.0 0.706
fi sw JW300.fi.sw 29.9 0.548
fi tg_TJ+tg JW300.fi.tg 9.7 0.260
fi ti JW300.fi.ti 16.6 0.303
fi tn JW300.fi.tn 34.5 0.555
fi tog JW300.fi.tog 3.2 0.216
@ -713,7 +1011,9 @@ fi tvl JW300.fi.tvl 33.6 0.517
fi uk JW300.fi.uk 23.3 0.445
fi umb JW300.fi.umb 19.8 0.426
fi urh JW300.fi.urh 4.5 0.192
fi ur_PK+ur JW300.fi.ur 11.2 0.261
fi ve JW300.fi.ve 26.0 0.495
fi vi_VN+vi JW300.fi.vi 38.8 0.520
fi vmw JW300.fi.vmw 2.2 0.183
fi vsl JW300.fi.vsl 4.5 0.178
fi wal JW300.fi.wal 10.1 0.343
@ -723,11 +1023,13 @@ fi yap JW300.fi.yap 25.4 0.445
fi yua JW300.fi.yua 12.5 0.341
fi zai JW300.fi.zai 5.9 0.229
fi zne JW300.fi.zne 22.7 0.464
fi zul+zu JW300.fi.zu 28.1 0.584
fj en JW300.fj.en 31.0 0.471
fj en Tatoeba.fj.en 79.7 0.835
fj fi JW300.fj.fi 20.1 0.421
fj fr JW300.fj.fr 24.0 0.407
fon en JW300.fon.en 4.7 0.204
fon es JW300.fon.es 3.1 0.165
fon fi JW300.fon.fi 2.2 0.167
fon fr JW300.fon.fr 3.7 0.168
fon sv JW300.fon.sv 2.9 0.160
@ -740,6 +1042,7 @@ fr am GlobalVoices.fr.am 0.5 0.108
fr ase JW300.fr.ase 38.5 0.545
fr as JW300.fr.as 1.5 0.127
fr ay JW300.fr.ay 6.4 0.256
fr az_IR+az JW300.fr.az 14.5 0.355
fr ba JW300.fr.ba 2.1 0.144
fr bas JW300.fr.bas 4.8 0.181
fr bbc JW300.fr.bbc 5.0 0.198
@ -747,7 +1050,7 @@ fr bci JW300.fr.bci 10.4 0.255
fr bcl JW300.fr.bcl 35.9 0.566
fr bem JW300.fr.bem 22.8 0.456
fr ber Tatoeba.fr.ber 37.2 0.641
fr bg GlobalVoices.fr.bg 18.8 0.461
fr bg GlobalVoices.fr.bg 18.7 0.459
fr bi JW300.fr.bi 28.4 0.464
fr bn GlobalVoices.fr.bn 3.5 0.266
fr br Tatoeba.fr.br 3.5 0.201
@ -763,7 +1066,7 @@ fr cjk JW300.fr.cjk 3.1 0.214
fr crp bible-uedin.fr.crp 4.2 0.201
fr crs JW300.fr.crs 31.6 0.492
fr csg JW300.fr.csg 4.0 0.177
fr cs GlobalVoices.fr.cs 15.9 0.425
fr cs GlobalVoices.fr.cs 16.1 0.426
fr csl JW300.fr.csl 1.8 0.045
fr cs newssyscomb2009.fr.cs 19.3 0.476
fr cs news-test2008.fr.cs 17.9 0.461
@ -791,7 +1094,7 @@ fr dje bible-uedin.fr.dje 4.6 0.224
fr djk JW300.fr.djk 4.4 0.187
fr ee JW300.fr.ee 26.3 0.466
fr efi JW300.fr.efi 26.9 0.462
fr el Tatoeba.fr.el 54.7 0.709
fr el Tatoeba.fr.el 56.2 0.719
fr eo Tatoeba.fr.eo 52.0 0.695
fr es newssyscomb2009.fr.es 34.3 0.601
fr es news-test2008.fr.es 32.5 0.583
@ -801,7 +1104,7 @@ fr es newstest2011.fr.es 38.3 0.622
fr es newstest2012.fr.es 38.1 0.619
fr es newstest2013.fr.es 34.0 0.587
fr es Tatoeba.fr.es 53.2 0.709
fr et JW300.fr.et 18.8 0.447
fr et JW300.fr.et 19.4 0.452
fr eu bible-uedin.fr.eu 0.9 0.249
fr fa GlobalVoices.fr.fa 5.5 0.276
fr fil GlobalVoices.fr.fil 0.3 0.117
@ -822,7 +1125,7 @@ fr hi GlobalVoices.fr.hi 2.5 0.161
fr hil JW300.fr.hil 34.7 0.559
fr hmn JW300.fr.hmn 5.1 0.212
fr ho JW300.fr.ho 25.4 0.480
fr hr JW300.fr.hr 19.9 0.436
fr hr JW300.fr.hr 20.7 0.442
fr ht JW300.fr.ht 29.2 0.461
fr hy JW300.fr.hy 7.8 0.225
fr ia Tatoeba.fr.ia 0.2 0.092
@ -833,7 +1136,7 @@ fr ig JW300.fr.ig 29.0 0.445
fr ilo JW300.fr.ilo 30.6 0.528
fr iso JW300.fr.iso 26.7 0.429
fr jap bible-uedin.fr.jap 3.7 0.113
fr ja Tatoeba.fr.ja 1.1 0.305
fr ja Tatoeba.fr.ja 1.0 0.316
fr jp GlobalVoices.fr.jp 0.1 0.021
fr jsl JW300.fr.jsl 4.6 0.082
fr jv JW300.fr.jv 5.3 0.211
@ -849,7 +1152,7 @@ fr kl JW300.fr.kl 13.8 0.408
fr kmb JW300.fr.kmb 4.5 0.212
fr km GlobalVoices.fr.km 0.0 0.002
fr kn JW300.fr.kn 5.3 0.159
fr ko GlobalVoices.fr.ko 2.1 0.152
fr ko GlobalVoices.fr.ko 2.2 0.154
fr koo JW300.fr.koo 2.6 0.209
fr kqn JW300.fr.kqn 23.3 0.469
fr kri JW300.fr.kri 6.7 0.199
@ -864,12 +1167,14 @@ fr lg JW300.fr.lg 21.7 0.454
fr ln JW300.fr.ln 30.5 0.527
fr lo JW300.fr.lo 3.2 0.140
fr loz JW300.fr.loz 30.0 0.498
fr lt JW300.fr.lt 17.2 0.406
fr lua JW300.fr.lua 27.3 0.496
fr lue JW300.fr.lue 23.1 0.485
fr lu JW300.fr.lu 25.5 0.471
fr lun JW300.fr.lun 18.1 0.432
fr luo JW300.fr.luo 17.7 0.380
fr lus JW300.fr.lus 25.5 0.455
fr lv JW300.fr.lv 18.2 0.402
fr mam JW300.fr.mam 4.1 0.222
fr mau JW300.fr.mau 3.7 0.215
fr mco JW300.fr.mco 2.9 0.194
@ -879,44 +1184,134 @@ fr mg GlobalVoices.fr.mg 15.7 0.501
fr mgr JW300.fr.mgr 2.9 0.199
fr mh JW300.fr.mh 21.7 0.399
fr mi bible-uedin.fr.mi 7.3 0.289
fr mk GlobalVoices.fr.mk 18.9 0.471
fr mk GlobalVoices.fr.mk 19.1 0.473
fr ml JW300.fr.ml 9.4 0.266
fr mn JW300.fr.mn 9.9 0.245
fr mos JW300.fr.mos 21.1 0.353
fr mr JW300.fr.mr 2.2 0.108
fr mt JW300.fr.mt 25.7 0.442
fr mt JW300.fr.mt 28.7 0.466
fr my GlobalVoices.fr.my 0.0 0.076
fr nb_NO+nb+nn_NO+nn+nog+no_nb+no JW300.fr.no 29.7 0.494
fr nch JW300.fr.nch 3.6 0.221
fr ncj JW300.fr.ncj 2.7 0.200
fr ncx JW300.fr.ncx 3.3 0.214
fr ndc JW300.fr.ndc 2.6 0.200
fr nd JW300.fr.nd 1.9 0.185
fr nds_nl+nds Tatoeba.fr.nds 3.0 0.212
fr ne JW300.fr.ne 3.7 0.157
fr ng JW300.fr.ng 2.8 0.194
fr ngl JW300.fr.ngl 2.2 0.196
fr ngu JW300.fr.ngu 3.6 0.220
fr nia JW300.fr.nia 4.8 0.212
fr niu JW300.fr.niu 34.5 0.537
fr nso JW300.fr.nso 33.3 0.527
fr nyk JW300.fr.nyk 14.8 0.366
fr nyn JW300.fr.nyn 3.0 0.211
fr nzi JW300.fr.nzi 3.3 0.183
fr oc Tatoeba.fr.oc 0.1 0.064
fr oke JW300.fr.oke 6.2 0.212
fr om JW300.fr.om 14.4 0.393
fr os JW300.fr.os 13.2 0.327
fr pa JW300.fr.pa 9.1 0.211
fr pap JW300.fr.pap 27.8 0.464
fr pck bible-uedin.fr.pck 3.6 0.208
fr pcm JW300.fr.pcm 4.3 0.183
fr pis JW300.fr.pis 29.0 0.486
fr pl Tatoeba.fr.pl 40.7 0.625
fr plt bible-uedin.fr.plt 2.8 0.253
fr pon JW300.fr.pon 23.9 0.458
fr prl JW300.fr.prl 4.0 0.182
fr pso JW300.fr.pso 3.3 0.154
fr pt_br+pt_BR+pt_PT+pt Tatoeba.fr.pt 42.8 0.640
fr qu JW300.fr.qu 7.6 0.261
fr quy JW300.fr.quy 8.4 0.287
fr qvi JW300.fr.qvi 1.7 0.179
fr rar JW300.fr.rar 6.2 0.250
fr rnd JW300.fr.rnd 21.8 0.431
fr ro Tatoeba.fr.ro 42.1 0.640
fr rsl JW300.fr.rsl 2.9 0.147
fr rw JW300.fr.rw 25.5 0.483
fr seh JW300.fr.seh 3.2 0.205
fr sid JW300.fr.sid 2.3 0.186
fr si JW300.fr.si 10.2 0.228
fr sk JW300.fr.sk 24.9 0.456
fr sl JW300.fr.sl 20.1 0.433
fr sm JW300.fr.sm 28.8 0.474
fr sn JW300.fr.sn 23.4 0.507
fr so bible-uedin.fr.so 1.6 0.187
fr sop JW300.fr.sop 2.8 0.211
fr sq GlobalVoices.fr.sq 15.9 0.432
fr srm JW300.fr.srm 7.7 0.225
fr srn JW300.fr.srn 27.4 0.459
fr ssp JW300.fr.ssp 4.1 0.178
fr st JW300.fr.st 34.6 0.540
fr swc JW300.fr.swc 28.2 0.499
fr sw GlobalVoices.fr.sw 12.0 0.404
fr ta_LK+ta JW300.fr.ta 2.5 0.116
fr tcf JW300.fr.tcf 3.3 0.216
fr tdt JW300.fr.tdt 13.9 0.314
fr te JW300.fr.te 3.6 0.166
fr tg_TJ+tg JW300.fr.tg 8.9 0.242
fr th JW300.fr.th 19.4 0.342
fr tiv JW300.fr.tiv 23.5 0.406
fr tk JW300.fr.tk 10.5 0.275
fr tll JW300.fr.tll 24.6 0.467
fr tl_PH+tl Tatoeba.fr.tl 23.7 0.537
fr tn JW300.fr.tn 33.1 0.525
fr tog JW300.fr.tog 3.0 0.209
fr toj JW300.fr.toj 4.1 0.205
fr to JW300.fr.to 37.0 0.518
fr top JW300.fr.top 2.3 0.172
fr tsc JW300.fr.tsc 4.5 0.226
fr ts JW300.fr.ts 31.4 0.525
fr tt JW300.fr.tt 13.3 0.336
fr tum JW300.fr.tum 23.0 0.458
fr tvl JW300.fr.tvl 32.6 0.497
fr tw JW300.fr.tw 27.9 0.469
fr ty JW300.fr.ty 39.6 0.561
fr tzo JW300.fr.tzo 9.5 0.299
fr ug Tatoeba.fr.ug 0.0 0.084
fr uk Tatoeba.fr.uk 39.4 0.581
fr urh JW300.fr.urh 4.6 0.194
fr ur_PK+ur GlobalVoices.fr.ur 0.2 0.117
fr vi_VN+vi Tatoeba.fr.vi 33.0 0.508
fr vmw JW300.fr.vmw 2.1 0.184
fr vsl JW300.fr.vsl 4.6 0.189
fr war JW300.fr.war 33.7 0.538
fr wes JW300.fr.wes 5.0 0.192
fr wls JW300.fr.wls 27.5 0.478
fr xh JW300.fr.xh 25.1 0.523
fr yao JW300.fr.yao 2.5 0.188
fr yap JW300.fr.yap 25.8 0.434
fr yo JW300.fr.yo 25.9 0.415
fr zlm JW300.fr.zlm 3.8 0.203
fr zne JW300.fr.zne 24.1 0.460
fr zul+zu JW300.fr.zu 27.2 0.548
fse en JW300.fse.en 5.1 0.199
fse es JW300.fse.es 4.1 0.173
fse fi JW300.fse.fi 90.2 0.943
fse fr JW300.fse.fr 4.9 0.186
fse sv JW300.fse.sv 5.4 0.194
fur en Tatoeba.fur.en 93.4 0.944
fy en Tatoeba.fy.en 0.2 0.053
gaa en JW300.gaa.en 41.0 0.567
gaa es JW300.gaa.es 28.6 0.463
gaa fi JW300.gaa.fi 26.4 0.498
gaa fr JW300.gaa.fr 27.8 0.455
gaa sv JW300.gaa.sv 30.1 0.489
ga en Tatoeba.ga.en 50.1 0.671
gd en Tatoeba.gd.en 0.4 0.104
gd es bible-uedin.gd.es 0.0 0.075
gd fi bible-uedin.gd.fi 0.1 0.044
gd fr bible-uedin.gd.fr 0.0 0.104
gd sv bible-uedin.gd.sv 0.0 0.037
gil en JW300.gil.en 36.0 0.522
gil es JW300.gil.es 21.8 0.398
gil fi JW300.gil.fi 23.1 0.447
gil fr JW300.gil.fr 24.9 0.424
gil sv JW300.gil.sv 25.8 0.441
gl en Tatoeba.gl.en 42.5 0.604
gl es Tatoeba.gl.es 68.7 0.802
guc en JW300.guc.en 2.4 0.132
guc fi JW300.guc.fi 1.1 0.125
guc fr JW300.guc.fr 2.0 0.135
@ -961,7 +1356,7 @@ ho en JW300.ho.en 26.8 0.428
ho fr JW300.ho.fr 19.8 0.367
ho sv JW300.ho.sv 19.8 0.377
hr fi JW300.hr.fi 25.0 0.519
hr fr JW300.hr.fr 25.4 0.478
hr fr JW300.hr.fr 26.1 0.482
hr sv JW300.hr.sv 30.5 0.526
ht en JW300.ht.en 37.5 0.542
ht en Tatoeba.ht.en 57.0 0.689
@ -970,7 +1365,7 @@ ht fr JW300.ht.fr 28.4 0.469
ht sv JW300.ht.sv 27.9 0.463
hu en Tatoeba.hu.en 52.9 0.683
hu fi Tatoeba.hu.fi 48.2 0.700
hu fr Tatoeba.hu.fr 49.6 0.657
hu fr Tatoeba.hu.fr 50.3 0.660
hy en Tatoeba.hy.en 29.5 0.466
hy fi JW300.hy.fi 13.4 0.338
hy fr JW300.hy.fr 17.7 0.346
@ -1013,7 +1408,7 @@ it en newstest2009.it.en 34.0 0.594
it en Tatoeba.it.en 70.9 0.808
ja en Tatoeba.ja.en 41.7 0.589
ja fi Tatoeba.ja.fi 21.2 0.448
ja fr Tatoeba.ja.fr 32.4 0.522
ja fr Tatoeba.ja.fr 33.6 0.534
jap en bible-uedin.jap.en 52.6 0.703
jap fi bible-uedin.jap.fi 1.8 0.166
jap fr bible-uedin.jap.fr 1.9 0.162
@ -1338,14 +1733,14 @@ ro sv JW300.ro.sv 31.2 0.529
rsl en JW300.rsl.en 4.2 0.175
rsl fi JW300.rsl.fi 2.4 0.169
rsl sv JW300.rsl.sv 3.7 0.168
ru en newstest2012.ru.en 35.3 0.607
ru en newstest2013.ru.en 28.4 0.548
ru en newstest2014-ruen.ru.en 32.6 0.595
ru en newstest2015-enru.ru.en 30.6 0.569
ru en newstest2016-enru.ru.en 30.4 0.570
ru en newstest2012.ru.en 35.3 0.606
ru en newstest2013.ru.en 28.2 0.547
ru en newstest2014-ruen.ru.en 32.5 0.595
ru en newstest2015-enru.ru.en 30.7 0.569
ru en newstest2016-enru.ru.en 30.5 0.569
ru en newstest2017-enru.ru.en 33.8 0.596
ru en newstest2018-enru.ru.en 30.1 0.570
ru en newstest2019-ruen.ru.en 32.2 0.582
ru en newstest2018-enru.ru.en 30.0 0.569
ru en newstest2019-ruen.ru.en 32.0 0.581
ru en Tatoeba.ru.en 60.1 0.727
ru fi Tatoeba.ru.fi 40.9 0.649
run en JW300.run.en 42.7 0.583
@ -1402,6 +1797,7 @@ sv ar GlobalVoices.sv.ar 4.6 0.312
sv ase JW300.sv.ase 40.5 0.572
sv as JW300.sv.as 0.3 0.066
sv ay JW300.sv.ay 6.6 0.262
sv az_IR+az JW300.sv.az 16.3 0.378
sv ba JW300.sv.ba 2.0 0.127
sv bas JW300.sv.bas 5.1 0.180
sv bci JW300.sv.bci 12.1 0.280
@ -1418,6 +1814,7 @@ sv cak JW300.sv.cak 1.7 0.136
sv cat JW300.sv.cat 3.6 0.164
sv ceb JW300.sv.ceb 39.2 0.609
sv chk JW300.sv.chk 20.7 0.421
sv cmn+cn+yue+ze_zh+zh_cn+zh_CN+zh_HK+zh_tw+zh_TW+zh_yue+zhs+zht+zh bible-uedin.sv.zh 24.2 0.342
sv crp bible-uedin.sv.crp 4.1 0.198
sv crs JW300.sv.crs 32.4 0.512
sv csg JW300.sv.csg 3.9 0.167
@ -1513,6 +1910,7 @@ sv mos JW300.sv.mos 22.4 0.379
sv mr JW300.sv.mr 2.5 0.128
sv mt JW300.sv.mt 32.2 0.509
sv my JW300.sv.my 2.1 0.129
sv nb_NO+nb+nn_NO+nn+nog+no_nb+no JW300.sv.no 39.3 0.590
sv ncj JW300.sv.ncj 2.5 0.189
sv nd JW300.sv.nd 2.2 0.191
sv ne JW300.sv.ne 3.5 0.142
@ -1540,6 +1938,7 @@ sv plt bible-uedin.sv.plt 2.9 0.254
sv pon JW300.sv.pon 26.0 0.491
sv prl JW300.sv.prl 4.0 0.164
sv pso JW300.sv.pso 3.7 0.155
sv pt_br+pt_BR+pt_PT+pt GlobalVoices.sv.pt 22.8 0.496
sv que JW300.sv.que 1.7 0.154
sv qu JW300.sv.qu 8.0 0.263
sv quy JW300.sv.quy 1.4 0.171
@ -1569,13 +1968,16 @@ sv st JW300.sv.st 38.8 0.584
sv sv Tatoeba.sv.sv 49.2 0.741
sv swc JW300.sv.swc 30.1 0.536
sv sw GlobalVoices.sv.sw 10.1 0.390
sv ta_LK+ta JW300.sv.ta 5.4 0.163
sv tdt JW300.sv.tdt 17.3 0.359
sv te JW300.sv.te 1.9 0.135
sv tg_TJ+tg JW300.sv.tg 9.6 0.251
sv th JW300.sv.th 21.2 0.373
sv ti JW300.sv.ti 15.7 0.282
sv tiv JW300.sv.tiv 25.2 0.439
sv tk JW300.sv.tk 9.9 0.270
sv tll JW300.sv.tll 24.9 0.484
sv tl_PH+tl JW300.sv.tl 39.0 0.612
sv tn JW300.sv.tn 36.3 0.561
sv tog JW300.sv.tog 3.4 0.211
sv toi JW300.sv.toi 23.2 0.512
@ -1594,7 +1996,9 @@ sv tzo JW300.sv.tzo 10.7 0.311
sv uk JW300.sv.uk 24.0 0.447
sv umb JW300.sv.umb 20.4 0.431
sv urh JW300.sv.urh 5.1 0.196
sv ur_PK+ur JW300.sv.ur 6.3 0.188
sv ve JW300.sv.ve 26.4 0.496
sv vi_VN+vi JW300.sv.vi 38.8 0.516
sv vmw JW300.sv.vmw 2.5 0.186
sv vsl JW300.sv.vsl 4.4 0.169
sv wal JW300.sv.wal 10.4 0.340
@ -1609,6 +2013,7 @@ sv yua JW300.sv.yua 14.3 0.363
sv zai JW300.sv.zai 7.9 0.251
sv zlm JW300.sv.zlm 4.5 0.232
sv zne JW300.sv.zne 23.8 0.474
sv zul+zu JW300.sv.zu 31.1 0.603
swc en JW300.swc.en 41.1 0.569
swc fi JW300.swc.fi 26.0 0.489
swc sv JW300.swc.sv 30.7 0.495