mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2024-10-26 21:19:02 +03:00
latest models
This commit is contained in:
parent
44182291dc
commit
2805bf49e7
@ -55,7 +55,10 @@ best_dist_all:
|
||||
## (BLEU needs to be above MIN_BLEU_SCORE)
|
||||
## NEW: don't trust models tested with GNOME test sets!
|
||||
|
||||
best_dist:
|
||||
|
||||
## OLD version of finding the best model
|
||||
## --> this didn't properly look at different variants in the same folder
|
||||
best_dist_old:
|
||||
@m=0;\
|
||||
s=''; \
|
||||
echo "------------------------------------------------"; \
|
||||
@ -85,6 +88,54 @@ best_dist:
|
||||
fi
|
||||
|
||||
|
||||
## new version of finding the best model
|
||||
## --> look at different model variants in each work-dir
|
||||
## --> take only the best one to publish
|
||||
best_dist:
|
||||
@m=0;\
|
||||
s=''; \
|
||||
echo "------------------------------------------------"; \
|
||||
echo "search best model for ${LANGPAIRSTR}"; \
|
||||
for d in ${ALT_MODEL_DIR}; do \
|
||||
e=`ls work-$$d/${LANGPAIRSTR}/val/*.trg | xargs basename | sed 's/\.trg//'`; \
|
||||
echo "evaldata = $$e"; \
|
||||
if [ "$$e" != "GNOME" ]; then \
|
||||
I=`find work-$$d/${LANGPAIRSTR}/ -maxdepth 1 -name "$$e.*.eval" -printf "%f\n"`; \
|
||||
for i in $$I; do \
|
||||
x=`echo $$i | cut -f3 -d. | cut -f1 -d-`; \
|
||||
y=`echo $$i | cut -f3 -d. | cut -f2 -d- | sed 's/[0-9]$$//'`; \
|
||||
z=`echo $$i | cut -f2 -d.`; \
|
||||
v=`echo $$i | cut -f4 -d.`; \
|
||||
b=`grep 'BLEU+' work-$$d/${LANGPAIRSTR}/$$e.$$z.$$x-$$y[0-9].$$v.*.eval | cut -f3 -d' '`; \
|
||||
if (( $$(echo "$$m-$$b < 0" |bc -l) )); then \
|
||||
echo "$$d/$$i ($$b) is better than $$s ($$m)!"; \
|
||||
m=$$b; \
|
||||
E=$$i; \
|
||||
s=$$d; \
|
||||
else \
|
||||
echo "$$d/$$i ($$b) is worse than $$s ($$m)!"; \
|
||||
fi \
|
||||
done; \
|
||||
fi \
|
||||
done; \
|
||||
echo "------------------------------------------------"; \
|
||||
if [ "$$s" != "" ]; then \
|
||||
if (( $$(echo "$$m > ${MIN_BLEU_SCORE}" |bc -l) )); then \
|
||||
x=`echo $$E | cut -f3 -d. | cut -f1 -d-`; \
|
||||
y=`echo $$E | cut -f3 -d. | cut -f2 -d- | sed 's/[0-9]$$//'`; \
|
||||
z=`echo $$E | cut -f2 -d.`; \
|
||||
v=`echo $$E | cut -f4 -d.`; \
|
||||
${MAKE} \
|
||||
MODELSHOME=${PWD}/models \
|
||||
PRE_SRC=$$x PRE_TRG=$$y \
|
||||
DATASET=$$z \
|
||||
MODELTYPE=$$v \
|
||||
MODELS_URL=https://object.pouta.csc.fi/OPUS-MT-models dist-$$s; \
|
||||
fi; \
|
||||
fi
|
||||
|
||||
|
||||
|
||||
|
||||
## make a package for distribution
|
||||
|
||||
@ -119,6 +170,9 @@ endif
|
||||
POSTPROCESS_SCRIPT = postprocess-${PREPROCESS_TYPE}.sh
|
||||
|
||||
|
||||
|
||||
## make the distribution package including test evaluation files and README
|
||||
|
||||
${DIST_PACKAGE}: ${MODEL_FINAL}
|
||||
ifneq (${SKIP_DIST_EVAL},1)
|
||||
@${MAKE} $(TEST_EVALUATION)
|
||||
@ -147,10 +201,10 @@ endif
|
||||
echo '## Benchmarks' >> ${WORKDIR}/README.md; \
|
||||
echo '' >> ${WORKDIR}/README.md; \
|
||||
cd ${WORKDIR}; \
|
||||
grep -H BLEU *k${NR}.*eval | \
|
||||
grep -H BLEU *.${DATASET}.${PRE_SRC}-${PRE_TRG}${NR}.${MODELTYPE}.*.eval | \
|
||||
tr '.' '/' | cut -f1,5,6 -d '/' | tr '/' "." > $@.1; \
|
||||
grep BLEU *k${NR}.*eval | cut -f3 -d ' ' > $@.2; \
|
||||
grep chrF *k${NR}.*eval | cut -f3 -d ' ' > $@.3; \
|
||||
grep BLEU *.${DATASET}.${PRE_SRC}-${PRE_TRG}${NR}.${MODELTYPE}.*.eval | cut -f3 -d ' ' > $@.2; \
|
||||
grep chrF *.${DATASET}.${PRE_SRC}-${PRE_TRG}${NR}.${MODELTYPE}.*.eval | cut -f3 -d ' ' > $@.3; \
|
||||
echo '| testset | BLEU | chr-F |' >> README.md; \
|
||||
echo '|-----------------------|-------|-------|' >> README.md; \
|
||||
paste $@.1 $@.2 $@.3 | sed "s/\t/ | /g;s/^/| /;s/$$/ |/" >> README.md; \
|
||||
|
@ -20,6 +20,7 @@ NR_GPUS = 1
|
||||
HPC_NODES = 1
|
||||
HPC_DISK = 500
|
||||
HPC_QUEUE = serial
|
||||
HPC_GPUQUEUE = gpu
|
||||
# HPC_MODULES = nlpl-opus python-env/3.4.1 efmaral moses
|
||||
# HPC_MODULES = nlpl-opus moses cuda-env marian python-3.5.3-ml
|
||||
HPC_MODULES = ${GPU_MODULES}
|
||||
|
@ -20,12 +20,56 @@
|
||||
## - make dist-allmodels
|
||||
%-allmodels:
|
||||
for l in ${ALL_LANG_PAIRS}; do \
|
||||
if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz' | wc -l` -gt 0 ]; then \
|
||||
${MAKE} SRCLANGS="`echo $$l | cut -f1 -d'-' | sed 's/\\+/ /g'`" \
|
||||
TRGLANGS="`echo $$l | cut -f2 -d'-' | sed 's/\\+/ /g'`" ${@:-allmodels=}; \
|
||||
fi \
|
||||
m=`find ${WORKHOME}/$$l -maxdepth 1 -name '*.best-perplexity.npz' -printf "%f\n"`; \
|
||||
for i in $$m; do \
|
||||
s=`echo $$l | cut -f1 -d'-' | sed 's/\\+/ /g'`; \
|
||||
t=`echo $$l | cut -f2 -d'-' | sed 's/\\+/ /g'`; \
|
||||
d=`echo $$i | cut -f1 -d.`; \
|
||||
x=`echo $$i | cut -f2 -d. | cut -f1 -d-`; \
|
||||
y=`echo $$i | cut -f2 -d. | cut -f2 -d-`; \
|
||||
v=`echo $$i | cut -f3 -d.`; \
|
||||
echo "model = $$i"; \
|
||||
echo "dataset = $$d"; \
|
||||
echo "src-lang = $$s"; \
|
||||
echo "trg-lang = $$t"; \
|
||||
echo "pre-src = $$x"; \
|
||||
echo "pre-trg = $$y"; \
|
||||
echo "type = $$v"; \
|
||||
${MAKE} \
|
||||
SRCLANGS="$$s" TRGLANGS="$$t" \
|
||||
DATASET=$$d \
|
||||
PRE_SRC=$$x PRE_TRG=$$y \
|
||||
MODELTYPE=$$v ${@:-allmodels=}; \
|
||||
done \
|
||||
done
|
||||
|
||||
## OLD: doesn't work for different model variants
|
||||
##
|
||||
# %-allmodels:
|
||||
# for l in ${ALL_LANG_PAIRS}; do \
|
||||
# if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz' | wc -l` -gt 0 ]; then \
|
||||
# ${MAKE} SRCLANGS="`echo $$l | cut -f1 -d'-' | sed 's/\\+/ /g'`" \
|
||||
# TRGLANGS="`echo $$l | cut -f2 -d'-' | sed 's/\\+/ /g'`" ${@:-allmodels=}; \
|
||||
# fi \
|
||||
# done
|
||||
|
||||
|
||||
listallmodels:
|
||||
@m=`find ${WORKDIR} -maxdepth 1 -name '*.best-perplexity.npz' -printf "%f\n"`; \
|
||||
for i in $$m; do \
|
||||
d=`echo $$i | cut -f1 -d.`; \
|
||||
s=`echo $$i | cut -f2 -d. | cut -f1 -d-`; \
|
||||
t=`echo $$i | cut -f2 -d. | cut -f1 -d-`; \
|
||||
v=`echo $$i | cut -f3 -d.`; \
|
||||
echo "model = $$i"; \
|
||||
echo "dataset = $$d"; \
|
||||
echo "pre-src = $$s"; \
|
||||
echo "pre-trg = $$t"; \
|
||||
echo "type = $$v"; \
|
||||
done
|
||||
|
||||
|
||||
|
||||
## only bilingual models
|
||||
%-allbilingual:
|
||||
for l in ${ALL_BILINGUAL_MODELS}; do \
|
||||
|
@ -31,7 +31,7 @@ ifdef EMAIL
|
||||
endif
|
||||
echo '#SBATCH -n 1' >> $@
|
||||
echo '#SBATCH -N 1' >> $@
|
||||
echo '#SBATCH -p gpu' >> $@
|
||||
echo '#SBATCH -p ${HPC_GPUQUEUE}' >> $@
|
||||
ifeq (${shell hostname --domain},bullx)
|
||||
echo '#SBATCH --account=${CSCPROJECT}' >> $@
|
||||
echo '#SBATCH --gres=gpu:${GPU}:${NR_GPUS},nvme:${HPC_DISK}' >> $@
|
||||
|
@ -55,7 +55,7 @@ MODELINDEX = ${OBJECTSTORAGE}/${MODELCONTAINER}/index.txt
|
||||
# BASEMODELNAME = ${patsubst %.zip,%,${notdir ${BASEMODELZIP}}}
|
||||
|
||||
BASEMODELHOME = ${OBJECTSTORAGE}/${MODELCONTAINER}/${LANGPAIR}
|
||||
BASEMODELZIP = ${lastword ${sort ${notdir ${shell grep '${LANGPAIR}/opus-.*\.zip' model-index.txt}}}}
|
||||
BASEMODELZIP = ${lastword ${sort ${notdir ${shell grep '${LANGPAIR}/${DATASET}-.*\.zip' model-index.txt}}}}
|
||||
BASEMODELNAME = ${BASEMODELZIP:.zip=}
|
||||
|
||||
TUNED_MODEL = ${LANGPAIR}/${MODEL}/model/${BASEMODELNAME}_${MODEL}.transformer.model
|
||||
|
@ -54,3 +54,31 @@
|
||||
| newstest2019-deen.de.en | 39.6 | 0.637 |
|
||||
| Tatoeba.de.en | 55.1 | 0.704 |
|
||||
|
||||
# opus-2020-02-26.zip
|
||||
|
||||
* dataset: opus
|
||||
* model: transformer-align
|
||||
* pre-processing: normalization + SentencePiece
|
||||
* download: [opus-2020-02-26.zip](https://object.pouta.csc.fi/OPUS-MT-models/de-en/opus-2020-02-26.zip)
|
||||
* test set translations: [opus-2020-02-26.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/de-en/opus-2020-02-26.test.txt)
|
||||
* test set scores: [opus-2020-02-26.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/de-en/opus-2020-02-26.eval.txt)
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| testset | BLEU | chr-F |
|
||||
|-----------------------|-------|-------|
|
||||
| newssyscomb2009.de.en | 29.4 | 0.557 |
|
||||
| news-test2008.de.en | 27.8 | 0.548 |
|
||||
| newstest2009.de.en | 26.8 | 0.543 |
|
||||
| newstest2010.de.en | 30.2 | 0.584 |
|
||||
| newstest2011.de.en | 27.4 | 0.556 |
|
||||
| newstest2012.de.en | 29.1 | 0.569 |
|
||||
| newstest2013.de.en | 32.1 | 0.583 |
|
||||
| newstest2014-deen.de.en | 34.0 | 0.600 |
|
||||
| newstest2015-ende.de.en | 34.2 | 0.599 |
|
||||
| newstest2016-ende.de.en | 40.4 | 0.649 |
|
||||
| newstest2017-ende.de.en | 35.7 | 0.610 |
|
||||
| newstest2018-ende.de.en | 43.7 | 0.667 |
|
||||
| newstest2019-deen.de.en | 40.1 | 0.642 |
|
||||
| Tatoeba.de.en | 55.4 | 0.707 |
|
||||
|
||||
|
@ -43,3 +43,18 @@
|
||||
|-----------------------|-------|-------|
|
||||
| JW300.en.bcl | 53.8 | 0.719 |
|
||||
|
||||
# opus+bt-2020-02-26.zip
|
||||
|
||||
* dataset: opus+bt
|
||||
* model: transformer-align
|
||||
* pre-processing: normalization + SentencePiece
|
||||
* download: [opus+bt-2020-02-26.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-bcl/opus+bt-2020-02-26.zip)
|
||||
* test set translations: [opus+bt-2020-02-26.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-bcl/opus+bt-2020-02-26.test.txt)
|
||||
* test set scores: [opus+bt-2020-02-26.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-bcl/opus+bt-2020-02-26.eval.txt)
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| testset | BLEU | chr-F |
|
||||
|-----------------------|-------|-------|
|
||||
| JW300.en.bcl | 54.3 | 0.722 |
|
||||
|
||||
|
@ -52,3 +52,30 @@
|
||||
| newstest2019-ende.en.de | 41.6 | 0.655 |
|
||||
| Tatoeba.en.de | 46.8 | 0.655 |
|
||||
|
||||
# opus-2020-02-26.zip
|
||||
|
||||
* dataset: opus
|
||||
* model: transformer-align
|
||||
* pre-processing: normalization + SentencePiece
|
||||
* download: [opus-2020-02-26.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-de/opus-2020-02-26.zip)
|
||||
* test set translations: [opus-2020-02-26.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-de/opus-2020-02-26.test.txt)
|
||||
* test set scores: [opus-2020-02-26.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-de/opus-2020-02-26.eval.txt)
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| testset | BLEU | chr-F |
|
||||
|-----------------------|-------|-------|
|
||||
| newssyscomb2009.en.de | 23.5 | 0.540 |
|
||||
| news-test2008.en.de | 23.5 | 0.529 |
|
||||
| newstest2009.en.de | 22.3 | 0.530 |
|
||||
| newstest2010.en.de | 24.9 | 0.544 |
|
||||
| newstest2011.en.de | 22.5 | 0.524 |
|
||||
| newstest2012.en.de | 23.0 | 0.525 |
|
||||
| newstest2013.en.de | 26.9 | 0.553 |
|
||||
| newstest2015-ende.en.de | 31.1 | 0.594 |
|
||||
| newstest2016-ende.en.de | 37.0 | 0.636 |
|
||||
| newstest2017-ende.en.de | 29.9 | 0.586 |
|
||||
| newstest2018-ende.en.de | 45.2 | 0.690 |
|
||||
| newstest2019-ende.en.de | 40.9 | 0.654 |
|
||||
| Tatoeba.en.de | 47.3 | 0.664 |
|
||||
|
||||
|
@ -67,3 +67,26 @@
|
||||
| newstestB2017-enfi.en.fi | 22.3 | 0.560 |
|
||||
| Tatoeba.en.fi | 41.1 | 0.645 |
|
||||
|
||||
# opus+bt-2020-02-26.zip
|
||||
|
||||
* dataset: opus+bt
|
||||
* model: transformer-align
|
||||
* pre-processing: normalization + SentencePiece
|
||||
* download: [opus+bt-2020-02-26.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-fi/opus+bt-2020-02-26.zip)
|
||||
* test set translations: [opus+bt-2020-02-26.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-fi/opus+bt-2020-02-26.test.txt)
|
||||
* test set scores: [opus+bt-2020-02-26.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-fi/opus+bt-2020-02-26.eval.txt)
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| testset | BLEU | chr-F |
|
||||
|-----------------------|-------|-------|
|
||||
| newsdev2015-enfi.en.fi | 23.5 | 0.572 |
|
||||
| newstest2015-enfi.en.fi | 25.7 | 0.586 |
|
||||
| newstest2016-enfi.en.fi | 26.4 | 0.594 |
|
||||
| newstest2017-enfi.en.fi | 29.8 | 0.624 |
|
||||
| newstest2018-enfi.en.fi | 19.8 | 0.545 |
|
||||
| newstest2019-enfi.en.fi | 23.7 | 0.562 |
|
||||
| newstestB2016-enfi.en.fi | 21.3 | 0.558 |
|
||||
| newstestB2017-enfi.en.fi | 24.9 | 0.584 |
|
||||
| Tatoeba.en.fi | 41.4 | 0.650 |
|
||||
|
||||
|
@ -22,3 +22,27 @@
|
||||
| newstest2013.en.fr | 24.0 | 0.525 |
|
||||
| Tatoeba.en.fr | 56.7 | 0.682 |
|
||||
|
||||
# opus-2020-02-26.zip
|
||||
|
||||
* dataset: opus
|
||||
* model: transformer-align
|
||||
* pre-processing: normalization + SentencePiece
|
||||
* download: [opus-2020-02-26.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-fr/opus-2020-02-26.zip)
|
||||
* test set translations: [opus-2020-02-26.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-fr/opus-2020-02-26.test.txt)
|
||||
* test set scores: [opus-2020-02-26.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-fr/opus-2020-02-26.eval.txt)
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| testset | BLEU | chr-F |
|
||||
|-----------------------|-------|-------|
|
||||
| newsdiscussdev2015-enfr.en.fr | 33.8 | 0.602 |
|
||||
| newsdiscusstest2015-enfr.en.fr | 40.0 | 0.643 |
|
||||
| newssyscomb2009.en.fr | 29.8 | 0.584 |
|
||||
| news-test2008.en.fr | 27.5 | 0.554 |
|
||||
| newstest2009.en.fr | 29.4 | 0.577 |
|
||||
| newstest2010.en.fr | 32.7 | 0.596 |
|
||||
| newstest2011.en.fr | 34.3 | 0.611 |
|
||||
| newstest2012.en.fr | 31.8 | 0.592 |
|
||||
| newstest2013.en.fr | 33.2 | 0.589 |
|
||||
| Tatoeba.en.fr | 50.5 | 0.672 |
|
||||
|
||||
|
@ -13,3 +13,18 @@
|
||||
|-----------------------|-------|-------|
|
||||
| Tatoeba.en.sv | 60.4 | 0.733 |
|
||||
|
||||
# opus-2020-02-26.zip
|
||||
|
||||
* dataset: opus
|
||||
* model: transformer-align
|
||||
* pre-processing: normalization + SentencePiece
|
||||
* download: [opus-2020-02-26.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-sv/opus-2020-02-26.zip)
|
||||
* test set translations: [opus-2020-02-26.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-sv/opus-2020-02-26.test.txt)
|
||||
* test set scores: [opus-2020-02-26.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-sv/opus-2020-02-26.eval.txt)
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| testset | BLEU | chr-F |
|
||||
|-----------------------|-------|-------|
|
||||
| Tatoeba.en.sv | 60.1 | 0.736 |
|
||||
|
||||
|
15
models/en-tl/README.md
Normal file
15
models/en-tl/README.md
Normal file
@ -0,0 +1,15 @@
|
||||
# opus+bt-2020-02-26.zip
|
||||
|
||||
* dataset: opus+bt
|
||||
* model: transformer-align
|
||||
* pre-processing: normalization + SentencePiece
|
||||
* download: [opus+bt-2020-02-26.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-tl/opus+bt-2020-02-26.zip)
|
||||
* test set translations: [opus+bt-2020-02-26.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tl/opus+bt-2020-02-26.test.txt)
|
||||
* test set scores: [opus+bt-2020-02-26.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tl/opus+bt-2020-02-26.eval.txt)
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| testset | BLEU | chr-F |
|
||||
|-----------------------|-------|-------|
|
||||
| Tatoeba.en.tl | 26.6 | 0.577 |
|
||||
|
@ -28,3 +28,18 @@
|
||||
|-----------------------|-------|-------|
|
||||
| Tatoeba.fi.fr | 49.9 | 0.657 |
|
||||
|
||||
# opus-2020-02-26.zip
|
||||
|
||||
* dataset: opus
|
||||
* model: transformer-align
|
||||
* pre-processing: normalization + SentencePiece
|
||||
* download: [opus-2020-02-26.zip](https://object.pouta.csc.fi/OPUS-MT-models/fi-fr/opus-2020-02-26.zip)
|
||||
* test set translations: [opus-2020-02-26.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/fi-fr/opus-2020-02-26.test.txt)
|
||||
* test set scores: [opus-2020-02-26.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/fi-fr/opus-2020-02-26.eval.txt)
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| testset | BLEU | chr-F |
|
||||
|-----------------------|-------|-------|
|
||||
| Tatoeba.fi.fr | 50.7 | 0.670 |
|
||||
|
||||
|
@ -28,3 +28,18 @@
|
||||
|-----------------------|-------|-------|
|
||||
| JW300.fi.nl | 30.2 | 0.544 |
|
||||
|
||||
# opus-2020-02-26.zip
|
||||
|
||||
* dataset: opus
|
||||
* model: transformer-align
|
||||
* pre-processing: normalization + SentencePiece
|
||||
* download: [opus-2020-02-26.zip](https://object.pouta.csc.fi/OPUS-MT-models/fi-nl/opus-2020-02-26.zip)
|
||||
* test set translations: [opus-2020-02-26.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/fi-nl/opus-2020-02-26.test.txt)
|
||||
* test set scores: [opus-2020-02-26.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/fi-nl/opus-2020-02-26.eval.txt)
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| testset | BLEU | chr-F |
|
||||
|-----------------------|-------|-------|
|
||||
| JW300.fi.nl | 30.5 | 0.557 |
|
||||
|
||||
|
@ -23,3 +23,28 @@
|
||||
| newstest2014-fren.fr.en | 35.4 | 0.607 |
|
||||
| Tatoeba.fr.en | 60.9 | 0.726 |
|
||||
|
||||
# opus-2020-02-26.zip
|
||||
|
||||
* dataset: opus
|
||||
* model: transformer-align
|
||||
* pre-processing: normalization + SentencePiece
|
||||
* download: [opus-2020-02-26.zip](https://object.pouta.csc.fi/OPUS-MT-models/fr-en/opus-2020-02-26.zip)
|
||||
* test set translations: [opus-2020-02-26.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/fr-en/opus-2020-02-26.test.txt)
|
||||
* test set scores: [opus-2020-02-26.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/fr-en/opus-2020-02-26.eval.txt)
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| testset | BLEU | chr-F |
|
||||
|-----------------------|-------|-------|
|
||||
| newsdiscussdev2015-enfr.fr.en | 33.1 | 0.580 |
|
||||
| newsdiscusstest2015-enfr.fr.en | 38.7 | 0.614 |
|
||||
| newssyscomb2009.fr.en | 30.3 | 0.569 |
|
||||
| news-test2008.fr.en | 26.2 | 0.542 |
|
||||
| newstest2009.fr.en | 30.2 | 0.570 |
|
||||
| newstest2010.fr.en | 32.2 | 0.590 |
|
||||
| newstest2011.fr.en | 33.0 | 0.597 |
|
||||
| newstest2012.fr.en | 32.8 | 0.591 |
|
||||
| newstest2013.fr.en | 33.9 | 0.591 |
|
||||
| newstest2014-fren.fr.en | 37.8 | 0.633 |
|
||||
| Tatoeba.fr.en | 57.5 | 0.720 |
|
||||
|
||||
|
@ -13,3 +13,18 @@
|
||||
|-----------------------|-------|-------|
|
||||
| JW300.nl.fi | 28.1 | 0.553 |
|
||||
|
||||
# opus-2020-02-26.zip
|
||||
|
||||
* dataset: opus
|
||||
* model: transformer-align
|
||||
* pre-processing: normalization + SentencePiece
|
||||
* download: [opus-2020-02-26.zip](https://object.pouta.csc.fi/OPUS-MT-models/nl-fi/opus-2020-02-26.zip)
|
||||
* test set translations: [opus-2020-02-26.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/nl-fi/opus-2020-02-26.test.txt)
|
||||
* test set scores: [opus-2020-02-26.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/nl-fi/opus-2020-02-26.eval.txt)
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| testset | BLEU | chr-F |
|
||||
|-----------------------|-------|-------|
|
||||
| JW300.nl.fi | 28.6 | 0.569 |
|
||||
|
||||
|
@ -90,3 +90,26 @@
|
||||
| newstest2019-ruen.ru.en | 31.5 | 0.577 |
|
||||
| Tatoeba.ru.en | 60.8 | 0.734 |
|
||||
|
||||
# opus-2020-02-26.zip
|
||||
|
||||
* dataset: opus
|
||||
* model: transformer-align
|
||||
* pre-processing: normalization + SentencePiece
|
||||
* download: [opus-2020-02-26.zip](https://object.pouta.csc.fi/OPUS-MT-models/ru-en/opus-2020-02-26.zip)
|
||||
* test set translations: [opus-2020-02-26.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/ru-en/opus-2020-02-26.test.txt)
|
||||
* test set scores: [opus-2020-02-26.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/ru-en/opus-2020-02-26.eval.txt)
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| testset | BLEU | chr-F |
|
||||
|-----------------------|-------|-------|
|
||||
| newstest2012.ru.en | 34.8 | 0.603 |
|
||||
| newstest2013.ru.en | 27.9 | 0.545 |
|
||||
| newstest2014-ruen.ru.en | 31.9 | 0.591 |
|
||||
| newstest2015-enru.ru.en | 30.4 | 0.568 |
|
||||
| newstest2016-enru.ru.en | 30.1 | 0.565 |
|
||||
| newstest2017-enru.ru.en | 33.4 | 0.593 |
|
||||
| newstest2018-enru.ru.en | 29.6 | 0.565 |
|
||||
| newstest2019-ruen.ru.en | 31.4 | 0.576 |
|
||||
| Tatoeba.ru.en | 61.1 | 0.736 |
|
||||
|
||||
|
@ -43,3 +43,18 @@
|
||||
|-----------------------|-------|-------|
|
||||
| Tatoeba.sv.en | 65.3 | 0.769 |
|
||||
|
||||
# opus-2020-02-26.zip
|
||||
|
||||
* dataset: opus
|
||||
* model: transformer-align
|
||||
* pre-processing: normalization + SentencePiece
|
||||
* download: [opus-2020-02-26.zip](https://object.pouta.csc.fi/OPUS-MT-models/sv-en/opus-2020-02-26.zip)
|
||||
* test set translations: [opus-2020-02-26.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/sv-en/opus-2020-02-26.test.txt)
|
||||
* test set scores: [opus-2020-02-26.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/sv-en/opus-2020-02-26.eval.txt)
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| testset | BLEU | chr-F |
|
||||
|-----------------------|-------|-------|
|
||||
| Tatoeba.sv.en | 64.5 | 0.763 |
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user