mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2024-10-26 21:19:02 +03:00
evaluate arbitrary models
This commit is contained in:
parent
5fd415e69b
commit
62af78cbd8
98
evaluate/Makefile
Normal file
98
evaluate/Makefile
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
|
||||||
|
include ../Makefile.env
|
||||||
|
include ../Makefile.config
|
||||||
|
include ../Makefile.slurm
|
||||||
|
|
||||||
|
|
||||||
|
SRC = en
|
||||||
|
TRG = de
|
||||||
|
LANGPAIR = ${SRC}-${TRG}
|
||||||
|
|
||||||
|
MODELHOME = ../models/${LANGPAIR}
|
||||||
|
MODELZIP = ${lastword ${sort ${wildcard ${MODELHOME}/*-20*.zip}}}
|
||||||
|
MODELNAME = ${patsubst %.zip,%,${notdir ${MODELZIP}}}
|
||||||
|
MODELURL = https://object.pouta.csc.fi/OPUS-MT-models/${LANGPAIR}
|
||||||
|
|
||||||
|
TESTSETDIR = ../testsets/${LANGPAIR}
|
||||||
|
TESTSETS = ${sort ${wildcard ${TESTSETDIR}/*.${SRC}.gz}}
|
||||||
|
TESTSET = ${lastword ${TESTSETS}}
|
||||||
|
|
||||||
|
WORKDIR = ${PWD}/${LANGPAIR}/${MODELNAME}
|
||||||
|
TEST_PRE = ${WORKDIR}/${patsubst %.gz,%.pre,${notdir ${TESTSET}}}
|
||||||
|
TEST_TRANS = ${TEST_PRE}.${TRG}
|
||||||
|
TEST_EVAL = ${TEST_TRANS}.eval
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
all: ${WORKDIR}/model/decoder.yml
|
||||||
|
${MAKE} ${TEST_EVAL}
|
||||||
|
${MAKE} cleanup
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.INTERMEDIATE: ${WORKDIR}/model/decoder.yml
|
||||||
|
|
||||||
|
prepare-model: ${WORKDIR}/model/decoder.yml
|
||||||
|
${WORKDIR}/model/decoder.yml:
|
||||||
|
mkdir -p ${dir $@}
|
||||||
|
ifneq (${wildcard ${MODELZIP}},)
|
||||||
|
cp ${MODELZIP} ${dir $@}
|
||||||
|
else
|
||||||
|
cd ${dir $@} && wget ${MODELURL}/${notdir ${MODELZIP}}
|
||||||
|
endif
|
||||||
|
cd ${dir $@} && unzip -u *.zip
|
||||||
|
|
||||||
|
|
||||||
|
SUBWORD_MODEL = ${filter-out ${WORKDIR}/model/source.tcmodel,${wildcard ${WORKDIR}/model/source.*}}
|
||||||
|
|
||||||
|
ifneq (${wildcard ${WORKDIR}/model/preprocess.sh},)
|
||||||
|
PREPROCESS = ${WORKDIR}/model/preprocess.sh ${SRC} ${SUBWORD_MODEL}
|
||||||
|
else
|
||||||
|
PREPROCESS = ${TOKENIZER}/replace-unicode-punctuation.perl |\
|
||||||
|
${TOKENIZER}/remove-non-printing-char.perl |\
|
||||||
|
${TOKENIZER}/normalize-punctuation.perl -l ${SRC} |\
|
||||||
|
${TOKENIZER}/tokenizer.perl -a -l ${SRC} |\
|
||||||
|
sed 's/ */ /g;s/^ *//g;s/ *$$//g' |\
|
||||||
|
python3 ${SNMTPATH}/apply_bpe.py -c ${SUBWORD_MODEL}
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq (${wildcard ${WORKDIR}/model/postprocess.sh},)
|
||||||
|
POSTPROCESS = ${WORKDIR}/model/postprocess.sh
|
||||||
|
else
|
||||||
|
POSTPROCESS = sed 's/\@\@ //g;s/ \@\@//g;s/ \@\-\@ /-/g' |\
|
||||||
|
$(TOKENIZER)/detokenizer.perl -l ${TRG}
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
prepare-data: ${TEST_PRE}
|
||||||
|
${TEST_PRE}: ${WORKDIR}/%.pre: ${TESTSETDIR}/%.gz ${WORKDIR}/model/decoder.yml
|
||||||
|
zcat $< | ${PREPROCESS} > $@
|
||||||
|
|
||||||
|
|
||||||
|
translate: ${TEST_TRANS}
|
||||||
|
|
||||||
|
## translate test set
|
||||||
|
${TEST_TRANS}: ${TEST_PRE} ${WORKDIR}/model/decoder.yml
|
||||||
|
mkdir -p ${dir $@}
|
||||||
|
cd ${dir ${word 2,$^}}; \
|
||||||
|
${LOADMODS} && ${MARIAN}/marian-decoder -i $< \
|
||||||
|
-c decoder.yml \
|
||||||
|
-d ${MARIAN_GPUS} \
|
||||||
|
${MARIAN_DECODER_FLAGS} |\
|
||||||
|
${POSTPROCESS} |\
|
||||||
|
sed 's/^ *//;s/ *$$//' > $@
|
||||||
|
|
||||||
|
|
||||||
|
eval: ${TEST_EVAL}
|
||||||
|
|
||||||
|
${TEST_EVAL}: ${TEST_TRANS}
|
||||||
|
zcat ${patsubst %.${SRC}.gz,%.${TRG}.gz,${TESTSET}} > $@.ref
|
||||||
|
cat $< | sacrebleu $@.ref > $@
|
||||||
|
cat $< | sacrebleu --metrics=chrf --width=3 $@.ref >> $@
|
||||||
|
rm -f $@.ref
|
||||||
|
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
rm -fr ${WORKDIR}/model
|
@ -1,11 +1,26 @@
|
|||||||
#
|
#
|
||||||
# fine-tune an existing model
|
# fine-tune an existing model
|
||||||
|
# set SRC and TRG to source and target language IDs
|
||||||
|
# defaults: SRC=en TRG=de
|
||||||
#
|
#
|
||||||
# make news-tune-data ...... create tunig data from newstest sets
|
# make SRC=xx TRG=yy news-tune-data ...... create tuning data from newstest sets
|
||||||
# make all ................. tune and eval
|
# make SRC=xx TRG=yy all ................. tune and eval
|
||||||
|
#
|
||||||
|
# other targets:
|
||||||
|
#
|
||||||
|
# make news-enfi ......... make tuned model for en-fi News
|
||||||
|
# make goethe-defi ....... make model for Goethe Institute data
|
||||||
|
# make data .............. pre-process train/dev data
|
||||||
|
# make tune .............. fine-tune model
|
||||||
|
# make translate ......... translate test set with fine-tuned model
|
||||||
|
# make translate-baseline translate test set with baseline model
|
||||||
|
# make eval .............. evaluate test set translation (fine-tuned)
|
||||||
|
# make eval-baseline ..... evaluate test set translation (baseline)
|
||||||
|
# make compare ........... put together source, reference translation and system output
|
||||||
|
# make compare-baseline .. same as compare but with baseline translation
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# NOTE: this only works for SentencePiece models
|
# NOTE: all this only works for SentencePiece models
|
||||||
#
|
#
|
||||||
# TODO
|
# TODO
|
||||||
# - download base models from ObjectStorage
|
# - download base models from ObjectStorage
|
||||||
@ -51,7 +66,7 @@ MARIAN_EARLY_STOPPING = 5
|
|||||||
|
|
||||||
|
|
||||||
.PHONY: all
|
.PHONY: all
|
||||||
all: ${TEST_SRC}.${TRG}.compare
|
all: ${TEST_SRC}.${TRG}.compare ${TEST_SRC}.baseline.${TRG}.compare
|
||||||
|
|
||||||
|
|
||||||
.PHONY: news-enfi
|
.PHONY: news-enfi
|
||||||
@ -65,15 +80,15 @@ news-enfi:
|
|||||||
TEST_TRG=en-fi/news/test/newstest2019-enfi.fi \
|
TEST_TRG=en-fi/news/test/newstest2019-enfi.fi \
|
||||||
all
|
all
|
||||||
|
|
||||||
.PHONY: goethe-fide
|
.PHONY: goethe-defi
|
||||||
goethe-ende:
|
goethe-defi:
|
||||||
${MAKE} SRC=fi TRG=de MODEL=goethe \
|
${MAKE} SRC=de TRG=fi MODEL=goethe \
|
||||||
TRAIN_SRC=fi-de/goethe/train/goethe-institute-train.fi \
|
TRAIN_SRC=de-fi/goethe/train/goethe-institute-train.de \
|
||||||
TRAIN_TRG=fi-de/goethe/train/goethe-institute-train.de \
|
TRAIN_TRG=de-fi/goethe/train/goethe-institute-train.fi \
|
||||||
DEV_SRC=fi-de/goethe/dev/goethe-institute-dev1.fi \
|
DEV_SRC=de-fi/goethe/dev/goethe-institute-dev1.de \
|
||||||
DEV_TRG=fi-de/goethe/dev/goethe-institute-dev1.de \
|
DEV_TRG=de-fi/goethe/dev/goethe-institute-dev1.fi \
|
||||||
TEST_SRC=fi-de/goethe/test/goethe-institute-test1.fi \
|
TEST_SRC=de-fi/goethe/test/goethe-institute-test1.de \
|
||||||
TEST_TRG=fi-de/goethe/test/goethe-institute-test1.de \
|
TEST_TRG=de-fi/goethe/test/goethe-institute-test1.fi \
|
||||||
all
|
all
|
||||||
|
|
||||||
|
|
||||||
@ -196,11 +211,27 @@ ${TEST_SRC}.${TRG}.gz: ${TEST_SRC}.pre.gz ${TUNED_MODEL}.npz.best-perplexity.npz
|
|||||||
gzip -c > $@
|
gzip -c > $@
|
||||||
|
|
||||||
|
|
||||||
|
.PHONY: translate-baseline
|
||||||
|
translate-baseline: ${TEST_SRC}.baseline.${TRG}.gz
|
||||||
|
|
||||||
.PHONY: eval
|
## translate test set
|
||||||
|
${TEST_SRC}.baseline.${TRG}.gz: ${TEST_SRC}.pre.gz ${LANGPAIR}/${BASEMODELNAME}/decoder.yml
|
||||||
|
mkdir -p ${dir $@}
|
||||||
|
cd ${LANGPAIR}/${BASEMODELNAME}; \
|
||||||
|
${LOADMODS} && ${MARIAN}/marian-decoder -i ${PWD}/$< \
|
||||||
|
-c decoder.yml \
|
||||||
|
-d ${MARIAN_GPUS} \
|
||||||
|
${MARIAN_DECODER_FLAGS} |\
|
||||||
|
sed 's/ //g;s/▁/ /g' | sed 's/^ *//;s/ *$$//' |\
|
||||||
|
gzip -c > ${PWD}/$@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.PHONY: eval eval-baseline
|
||||||
eval: ${TEST_SRC}.${TRG}.eval
|
eval: ${TEST_SRC}.${TRG}.eval
|
||||||
|
eval-baseline: ${TEST_SRC}.baseline.${TRG}.eval
|
||||||
|
|
||||||
${TEST_SRC}.${TRG}.eval: ${TEST_SRC}.${TRG}.gz ${TEST_TRG}.gz
|
${TEST_SRC}.${TRG}.eval ${TEST_SRC}.baseline.${TRG}.eval: %.eval: %.gz ${TEST_TRG}.gz
|
||||||
zcat ${TEST_TRG} > $@.ref
|
zcat ${TEST_TRG} > $@.ref
|
||||||
zcat $< | sacrebleu $@.ref > $@
|
zcat $< | sacrebleu $@.ref > $@
|
||||||
zcat $< | sacrebleu --metrics=chrf --width=3 $@.ref >> $@
|
zcat $< | sacrebleu --metrics=chrf --width=3 $@.ref >> $@
|
||||||
@ -208,10 +239,11 @@ ${TEST_SRC}.${TRG}.eval: ${TEST_SRC}.${TRG}.gz ${TEST_TRG}.gz
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
.PHONY: compare
|
.PHONY: compare compare-baseline
|
||||||
compare: ${TEST_SRC}.${TRG}.compare
|
compare: ${TEST_SRC}.${TRG}.compare
|
||||||
|
compare-baseline: ${TEST_SRC}.baseline.${TRG}.compare
|
||||||
|
|
||||||
${TEST_SRC}.${TRG}.compare: ${TEST_SRC}.${TRG}.eval
|
${TEST_SRC}.${TRG}.compare ${TEST_SRC}.baseline.${TRG}.compare: %.compare: %.eval
|
||||||
zcat ${TEST_SRC}.gz > $@.1
|
zcat ${TEST_SRC}.gz > $@.1
|
||||||
zcat ${TEST_TRG}.gz > $@.2
|
zcat ${TEST_TRG}.gz > $@.2
|
||||||
zcat ${<:.eval=.gz} > $@.3
|
zcat ${<:.eval=.gz} > $@.3
|
||||||
|
Loading…
Reference in New Issue
Block a user