mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2024-10-03 23:57:47 +03:00
comet scores
This commit is contained in:
parent
4f5bc5780f
commit
a3fd40003b
@ -1 +1 @@
|
||||
Subproject commit a04b403bc9ee3d2a50be716d28453fd1d3e45896
|
||||
Subproject commit 6c08afb24684b468635e0471f92efa3d6e3def82
|
@ -1 +1 @@
|
||||
Subproject commit c0247d60f9221255bdb11fbfe73d5b33336278ea
|
||||
Subproject commit 86f3589668521eef16a0a6e6435c531e91ec98ae
|
@ -246,6 +246,7 @@ DEVMINSIZE ?= 250
|
||||
OPUSREAD_ARGS =
|
||||
|
||||
|
||||
|
||||
##----------------------------------------------------------------------------
|
||||
## resources in OPUS
|
||||
##----------------------------------------------------------------------------
|
||||
@ -650,9 +651,6 @@ else
|
||||
endif
|
||||
|
||||
|
||||
## TODO: do we need to reduce workspace for decoding?
|
||||
# MARIAN_DECODER_WORKSPACE = $$((${MARIAN_WORKSPACE} / 2))
|
||||
MARIAN_DECODER_WORKSPACE = 10000
|
||||
|
||||
|
||||
## weights associated with training examples
|
||||
@ -684,6 +682,11 @@ MARIAN_MAXI_BATCH = 512
|
||||
# MARIAN_MAXI_BATCH = 2048
|
||||
|
||||
|
||||
## TODO: do we need to reduce workspace for decoding?
|
||||
# MARIAN_DECODER_WORKSPACE = $$((${MARIAN_WORKSPACE} / 2))
|
||||
MARIAN_DECODER_WORKSPACE = 10000
|
||||
|
||||
|
||||
ifeq ($(GPU_AVAILABLE),1)
|
||||
MARIAN_SCORER_FLAGS = -n1 -d ${MARIAN_GPUS} \
|
||||
--quiet-translation -w ${MARIAN_DECODER_WORKSPACE} \
|
||||
|
@ -121,7 +121,6 @@ TATOEBA_AVAILABLE_SUBSET_SRC = ${sort ${filter-out ${TRG},${subst -, ,${filter
|
||||
|
||||
|
||||
|
||||
|
||||
## all available language pairs
|
||||
## (download the file once and keep it here to get the language pairs in the release)
|
||||
TATOEBA_LANGPAIRS := ${shell if [ ! -e ${RELEASED_TATOEBA_DATA_FILE} ]; then \
|
||||
|
24
lib/dist.mk
24
lib/dist.mk
@ -78,6 +78,7 @@ ifeq (${wildcard $(TEST_EVALUATION)},)
|
||||
endif
|
||||
if [ -e $(TEST_EVALUATION) ]; then \
|
||||
if [ `grep BLEU $(TEST_EVALUATION) | cut -f3 -d ' ' | cut -f1 -d '.'` -ge ${MIN_BLEU_SCORE} ]; then \
|
||||
${MAKE} MODELSHOME=${RELEASEDIR} link-latest-model; \
|
||||
${MAKE} MODELSHOME=${RELEASEDIR} \
|
||||
MODELS_URL=https://object.pouta.csc.fi/${MODEL_CONTAINER} \
|
||||
dist; \
|
||||
@ -347,15 +348,17 @@ endif
|
||||
|
||||
|
||||
link-latest-model:
|
||||
if [ `ls ${patsubst %.zip,%_*,${DIST_PACKAGE}} 2>/dev/null | wc -l` -gt 0 ]; then \
|
||||
rm -f ${DIST_PACKAGE}; \
|
||||
cd ${dir ${DIST_PACKAGE}}; \
|
||||
ln -s `ls -t ${patsubst %.zip,%_*.zip,$(notdir ${DIST_PACKAGE})} | head -1` \
|
||||
${notdir ${DIST_PACKAGE}}; \
|
||||
if [ `ls ${patsubst %.yml,%_*.yml,${DIST_YML}} 2>/dev/null | wc -l` -gt 0 ]; then \
|
||||
rm -f ${DIST_YML}; \
|
||||
cd ${dir ${DIST_YML}}; \
|
||||
ln -s `ls -t $(patsubst %.yml,%_*.yml,$(notdir ${DIST_YML})) | head -1` $(notdir ${DIST_YML}); \
|
||||
if [ `ls $(patsubst %.zip,%_*.zip,$(notdir ${DIST_PACKAGE})) 2>/dev/null | wc -l` -gt 0 ]; then \
|
||||
rm -f $(notdir ${DIST_PACKAGE}); \
|
||||
ln -s `ls -t $(patsubst %.zip,%_*.zip,$(notdir ${DIST_PACKAGE})) | head -1` $(notdir ${DIST_PACKAGE}); \
|
||||
fi; \
|
||||
fi
|
||||
|
||||
|
||||
|
||||
${DIST_PACKAGE}: ${MODEL_FINAL}
|
||||
ifneq (${SKIP_DIST_EVAL},1)
|
||||
@${MAKE} $(TEST_EVALUATION)
|
||||
@ -512,9 +515,14 @@ endif
|
||||
|
||||
.PHONY: upload
|
||||
upload:
|
||||
which a-put
|
||||
if [ -e models-links.tar ]; then \
|
||||
tar -xf models-links.tar; \
|
||||
rm -f models-links.tar; \
|
||||
fi
|
||||
find ${RELEASEDIR}/ -type l | tar -cf models-links.tar -T -
|
||||
find ${RELEASEDIR}/ -type l -delete
|
||||
cd ${RELEASEDIR} && swift upload ${MODEL_CONTAINER} --changed --skip-identical *
|
||||
-find ${RELEASEDIR}/ -type l -delete
|
||||
-cd ${RELEASEDIR} && swift upload ${MODEL_CONTAINER} --changed --skip-identical *
|
||||
tar -xf models-links.tar
|
||||
rm -f models-links.tar
|
||||
swift post ${MODEL_CONTAINER} --read-acl ".r:*"
|
||||
|
@ -124,6 +124,9 @@ EXTRACT_LEX ?= ${shell which extract_lex 2>/dev/null || echo ${TOOLSDIR}/extr
|
||||
MOSESSCRIPTS ?= ${TOOLSDIR}/moses-scripts/scripts
|
||||
TMX2MOSES ?= ${shell which tmx2moses 2>/dev/null || echo ${TOOLSDIR}/OpusTools-perl/scripts/convert/tmx2moses}
|
||||
|
||||
GET_ISO_CODE ?= ${ISO639} -m
|
||||
|
||||
|
||||
## marian-nmt binaries
|
||||
|
||||
MARIAN_TRAIN = ${MARIAN_HOME}marian
|
||||
|
2
lib/env/mahti.mk
vendored
2
lib/env/mahti.mk
vendored
@ -107,3 +107,5 @@ MARIAN_BUILD_OPTIONS = -DCUDNN=ON \
|
||||
|
||||
# LOAD_EXTRACTLEX_BUILD_ENV = cmake gcc/9.3.0 boost/1.68.0
|
||||
LOAD_EXTRACTLEX_BUILD_ENV = module load cmake boost
|
||||
|
||||
LOAD_COMET_ENV = module load python-data &&
|
||||
|
11
lib/env/puhti.mk
vendored
11
lib/env/puhti.mk
vendored
@ -46,8 +46,10 @@ else
|
||||
endif
|
||||
|
||||
|
||||
CPU_MODULES = gcc/8.3.0 cuda/10.1.168 cudnn/7.6.1.34-10.1 intel-mkl/2019.0.4 python-env
|
||||
GPU_MODULES = gcc/8.3.0 cuda/10.1.168 cudnn/7.6.1.34-10.1 intel-mkl/2019.0.4 python-env
|
||||
# CPU_MODULES = gcc/8.3.0 cuda/10.1.168 cudnn/7.6.1.34-10.1 intel-mkl/2019.0.4 python-env
|
||||
# GPU_MODULES = gcc/8.3.0 cuda/10.1.168 cudnn/7.6.1.34-10.1 intel-mkl/2019.0.4 python-env
|
||||
CPU_MODULES = perl python-data cuda intel-oneapi-mkl openmpi
|
||||
GPU_MODULES = perl python-data cuda intel-oneapi-mkl openmpi
|
||||
LOAD_CPU_ENV = module load ${CPU_MODULES} && module list
|
||||
LOAD_GPU_ENV = module load ${GPU_MODULES} && module list
|
||||
|
||||
@ -70,10 +72,10 @@ endif
|
||||
HPC_EXTRA1 = \#SBATCH --account=${CSCPROJECT}
|
||||
|
||||
|
||||
BUILD_MODULES = StdEnv python-env cmake perl/5.30.0
|
||||
BUILD_MODULES = StdEnv perl python-data cuda intel-oneapi-mkl openmpi cmake
|
||||
LOAD_BUILD_ENV = module purge && module load ${BUILD_MODULES} && module list
|
||||
|
||||
MARIAN_BUILD_MODULES = gcc/8.3.0 cuda/10.1.168 cudnn/7.6.1.34-10.1 intel-mkl/2019.0.4 cmake/3.18.2
|
||||
MARIAN_BUILD_MODULES = StdEnv perl python-data cuda intel-oneapi-mkl openmpi cmake
|
||||
LOAD_MARIAN_BUILD_ENV = module purge && module load ${MARIAN_BUILD_MODULES} && module list
|
||||
MARIAN_BUILD_OPTIONS = -DTcmalloc_INCLUDE_DIR=/appl/spack/install-tree/gcc-8.3.0/gperftools-2.7-5w7w2c/include \
|
||||
-DTcmalloc_LIBRARY=/appl/spack/install-tree/gcc-8.3.0/gperftools-2.7-5w7w2c/lib/libtcmalloc.so \
|
||||
@ -91,3 +93,4 @@ MARIAN_BUILD_OPTIONS = -DTcmalloc_INCLUDE_DIR=/appl/spack/install-tree/gcc-8.3.
|
||||
-DFBGEMM_STATIC=1
|
||||
|
||||
|
||||
LOAD_COMET_ENV = module load pytorch &&
|
||||
|
@ -438,28 +438,68 @@ elg-new-bigmodels4:
|
||||
done
|
||||
|
||||
|
||||
elg-new-bigmodels5:
|
||||
${MAKE} MODELTYPE=transformer-big MARIAN_EXTRA=--no-restore-corpus \
|
||||
SKIP_SAME_LANG=1 \
|
||||
DATA_SAMPLING_WEIGHT=0.5 \
|
||||
SRCLANGS="jpn kor zho" \
|
||||
TRGLANGS="jpn kor zho" tatoeba-job
|
||||
|
||||
|
||||
|
||||
elg-new-bigmodels-multieval:
|
||||
for l in zls zlw; do \
|
||||
-for l in ara deu fin fra gmq heb jpn por spa zho; do \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-sla2$${l}-multieval; \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-$${l}2sla-multieval; \
|
||||
done
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-sla2sla-multieval
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-sla2kor-multieval-separate-spm
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-kor2sla-multieval-separate-spm
|
||||
-for l in zls zlw; do \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-$${l}2fin-multieval; \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-$${l}2deu-multieval; \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-fin2$${l}-multieval; \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-deu2$${l}-multieval; \
|
||||
done
|
||||
for l in bat gmq; do \
|
||||
-for l in bat gmq; do \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-$${l}2deu-multieval; \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-deu2$${l}-multieval; \
|
||||
done
|
||||
for l in bat cel gmq zle zls zlw; do \
|
||||
-for l in ara bat cel eus fas gmq gmw heb sqi tur vie zho zle zls zlw; do \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-$${l}2itc-multieval; \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-itc2$${l}-multieval; \
|
||||
done
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-cel2deu-multieval; \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-deu2cel-multieval; \
|
||||
for l in bat cel zle zls zlw; do \
|
||||
-for l in ara bat cel eus fas heb sqi tur vie zho zle zls zlw; do \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-$${l}2gmq-multieval; \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-gmq2$${l}-multieval; \
|
||||
done
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-cel2deu-multieval
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-deu2cel-multieval
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-bat2bat-multieval
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-cel2cel-multieval
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-gmq2gmq-multieval
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-itc2itc-multieval
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-gmw2gmw-multieval
|
||||
|
||||
|
||||
|
||||
elg-sla-train:
|
||||
-for l in ara deu fin fra gmq heb jpn por spa zho; do \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-sla2$${l}-trainjob; \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-$${l}2sla-trainjob; \
|
||||
done
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-sla2sla-trainjob
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-sla2kor-trainjob-separate-spm
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-kor2sla-trainjob-separate-spm
|
||||
|
||||
elg-sla-multieval:
|
||||
-for l in ara deu fin fra gmq heb jpn por spa zho; do \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-sla2$${l}-multieval; \
|
||||
${MAKE} MODELTYPE=transformer-big tatoeba-$${l}2sla-multieval; \
|
||||
done
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-sla2sla-multieval
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-sla2kor-multieval-separate-spm
|
||||
-${MAKE} MODELTYPE=transformer-big tatoeba-kor2sla-multieval-separate-spm
|
||||
|
||||
|
||||
|
||||
|
@ -37,6 +37,8 @@ fetch-datasets fetch-tatoeba-datasets:
|
||||
for t in ${MACRO_TRGLANGS}; do \
|
||||
if [ `echo "$$s-$$t $$t-$$s" | egrep '${SKIP_LANGPAIRS}' | wc -l` -gt 0 ]; then \
|
||||
echo "!!!!!!!!!!! skip language pair $$s-$$t !!!!!!!!!!!!!!!!"; \
|
||||
elif [ `echo '${TATOEBA_LANGPAIRS}' | tr ' ' "\n" | egrep "$$s-$$t|$$t-$$s" | wc -l` -eq 0 ]; then \
|
||||
echo ".... no package released for $$s-$$t!"; \
|
||||
else \
|
||||
if [ "$$s" \< "$$t" ]; then \
|
||||
if [ ! -e ${TATOEBA_DATA}/${TATOEBA_TRAINSET}.$$s-$$t.clean.$$s.gz ]; then \
|
||||
|
@ -134,6 +134,7 @@ LEADERBOARD_DIR = ${REPOHOME}scores
|
||||
compare-bleu-score-table:
|
||||
@grep BLEU ${WORKHOME}/*/*.eval |\
|
||||
perl -pe 's#^${WORKHOME}/([^/]*)/([^\.]+)\.(.*?-.*?\.)?([^\.]+\.[^\.]+\.[^\.]+)\.([^\.]+)\.([^\.]+)\.eval:.*? = ([0-9\.]+) .*$$#$$5-$$6\t$$7\t$$2\t$$1\t$$4#' |\
|
||||
grep -v '^[a-z\-]*multi' |\
|
||||
perl -pe '@a=split(/\t/);if($$a[0]=~/multi/){$$a[0]=$$a[3];};$$_=join("\t",@a);' |\
|
||||
perl -pe '@a=split(/\t/);$$a[2]=lc($$a[2]);$$a[2]=~s/^(.*)\-[a-z]{4}$$/$$1/;$$a[2]=~s/^(.*)\-[a-z]{6}$$/$$1/;$$a[2]=~s/^(news.*)\-[a-z]{4}/$$1/;if (-e "${LEADERBOARD_DIR}/$$a[0]/$$a[2]/bleu-scores.txt"){$$b=`head -1 ${LEADERBOARD_DIR}/$$a[0]/$$a[2]/bleu-scores.txt | cut -f1`;$$b+=0;}else{$$b=0;}$$d=$$a[1]-$$b;splice(@a,2,0,$$b,$$d);$$_=join("\t",@a);' |\
|
||||
sort -k5,5 -k1,1 -k2,2nr
|
||||
@ -144,20 +145,19 @@ compare-bleu-scores:
|
||||
|
||||
print-improved-models:
|
||||
@make -s compare-bleu-scores |\
|
||||
grep -v ' 0.00' | grep -v ' -[0-9]'
|
||||
grep -v ' 0.00 [a-z]' | grep -v ' -[0-9]'
|
||||
|
||||
print-decreased-models:
|
||||
@make -s compare-bleu-scores |\
|
||||
grep ' -[0-9]'
|
||||
|
||||
|
||||
|
||||
|
||||
## compare BLEU scores for the current model
|
||||
|
||||
compare-model-bleu-score-table:
|
||||
@grep BLEU ${WORKDIR}/*.eval |\
|
||||
perl -pe 's#^${WORKHOME}/([^/]*)/([^\.]+)\.(.*?-.*?\.)?([^\.]+\.[^\.]+\.[^\.]+)\.([^\.]+)\.([^\.]+)\.eval:.*? = ([0-9\.]+) .*$$#$$5-$$6\t$$7\t$$2\t$$1\t$$4#' |\
|
||||
grep -v '^[a-z\-]*multi' |\
|
||||
perl -pe '@a=split(/\t/);if($$a[0]=~/multi/){$$a[0]=$$a[3];};$$_=join("\t",@a);' |\
|
||||
perl -pe '@a=split(/\t/);$$a[2]=lc($$a[2]);$$a[2]=~s/^(.*)\-[a-z]{4}$$/$$1/;$$a[2]=~s/^(.*)\-[a-z]{6}$$/$$1/;$$a[2]=~s/^(news.*)\-[a-z]{4}$$/$$1/;if (-e "${LEADERBOARD_DIR}/$$a[0]/$$a[2]/bleu-scores.txt"){$$b=`head -1 ${LEADERBOARD_DIR}/$$a[0]/$$a[2]/bleu-scores.txt | cut -f1`;$$b+=0;}else{$$b=0;}$$d=$$a[1]-$$b;splice(@a,2,0,$$b,$$d);$$_=join("\t",@a);' |\
|
||||
sort -k5,5 -k1,1 -k2,2nr
|
||||
|
@ -27,3 +27,24 @@ fix-config:
|
||||
rm -f decoder.yml
|
||||
|
||||
|
||||
|
||||
|
||||
SCOREFILES := ${wildcard */*.scores.txt}
|
||||
BLEUSCOREFILES := ${SCOREFILES:.scores.txt=.bleu-scores.txt}
|
||||
CHRFSCOREFILES := ${SCOREFILES:.scores.txt=.chrf-scores.txt}
|
||||
|
||||
create-score-files: ${BLEUSCOREFILES} ${CHRFSCOREFILES}
|
||||
|
||||
%.bleu-scores.txt: %.scores.txt
|
||||
cut -f1,2,4 $< | \
|
||||
sed 's/\(news.*[0-9][0-9][0-9][0-9]\)\-[a-z][a-z][a-z][a-z] /\1 /' |\
|
||||
sed -e 's/\(news.*2021\)\.[a-z][a-z]\-[a-z][a-z] /\1 /' |\
|
||||
rev | uniq -f1 | rev > $@
|
||||
|
||||
|
||||
%.chrf-scores.txt: %.scores.txt
|
||||
cut -f1,2,3 $< |\
|
||||
sed 's/\(news.*[0-9][0-9][0-9][0-9]\)\-[a-z][a-z][a-z][a-z] /\1 /' |\
|
||||
sed -e 's/\(news.*2021\)\.[a-z][a-z]\-[a-z][a-z] /\1 /' |\
|
||||
rev | uniq -f1 | rev > $@
|
||||
|
||||
|
@ -395,14 +395,14 @@ print-langgroups:
|
||||
|
||||
show-improved-models:
|
||||
make -s compare-bleu-score-table-tatoeba | \
|
||||
grep -v ' 0 ' | grep -v ' -[0-9]' | \
|
||||
grep -v ' 0 [a-z]' | grep -v ' -[0-9]' | \
|
||||
cut -f6 | sort -u | xargs
|
||||
|
||||
|
||||
|
||||
release-improved-models:
|
||||
for l in ${shell make -s compare-bleu-score-table-tatoeba | \
|
||||
grep -v ' 0 ' | grep -v ' -[0-9]' | \
|
||||
grep -v ' 0 [a-z]' | grep -v ' -[0-9]' | \
|
||||
cut -f6 | sort -u | xargs}; do \
|
||||
s=`echo "$$l" | cut -f1 -d-`; \
|
||||
t=`echo "$$l" | cut -f2 -d-`; \
|
||||
@ -412,7 +412,7 @@ release-improved-models:
|
||||
## release all models with improved scores even if they are not yet done
|
||||
release-all-improved-models:
|
||||
for l in ${shell make -s compare-bleu-score-table-tatoeba | \
|
||||
grep -v ' 0 ' | grep -v ' -[0-9]' | \
|
||||
grep -v ' 0 [a-z]' | grep -v ' -[0-9]' | \
|
||||
cut -f6 | sort -u | xargs}; do \
|
||||
s=`echo "$$l" | cut -f1 -d-`; \
|
||||
t=`echo "$$l" | cut -f2 -d-`; \
|
||||
@ -485,12 +485,37 @@ find-trglanggroup = $(call find-langgroup,$(lastword ${subst -, ,${subst 2, ,${1
|
||||
find-langgroup-pair = $(sort $(call find-srclanggroup,${1}) $(call find-trglanggroup,${1}) ${2})
|
||||
|
||||
|
||||
|
||||
## expand language groups to individual languages and language pairs
|
||||
|
||||
LANGGROUP_SRCLANGS := ${call find-srclanggroup,${SRCLANGGROUP},${PIVOT}}
|
||||
LANGGROUP_TRGLANGS := ${call find-srclanggroup,${TRGLANGGROUP},${PIVOT}}
|
||||
LANGGROUP_LANGPAIRS := $(foreach S,${LANGGROUP_SRCLANGS},$(foreach T,${LANGGROUP_TRGLANGS},${S}-${T}))
|
||||
|
||||
## remove non-supported language pairs (make pattern to skip those language pairs)
|
||||
## also remove combinations of the same language if SKIP_SAME_LANG is set to 1
|
||||
|
||||
ifeq (${SKIP_SAME_LANG},1)
|
||||
LANGGROUP_SAMELANG_LANGPAIRS := $(foreach L,${LANGGROUP_SRCLANGS},${L}-${L})
|
||||
LANGGROUP_SKIP_LANGPAIRS := ${LANGGROUP_SAMELANG_LANGPAIRS} $(filter-out ${TATOEBA_LANGPAIRS},${LANGGROUP_LANGPAIRS})
|
||||
LANGGROUP_USE_LANGPAIRS := $(filter-out ${LANGGROUP_SKIP_LANGPAIRS},${LANGGROUP_LANGPAIRS})
|
||||
LANGGROUP_SKIP_LANGPAIR_PATTERN := $(subst ${SPACE},|,${LANGGROUP_SKIP_LANGPAIRS})
|
||||
else
|
||||
LANGGROUP_SKIP_LANGPAIRS := $(filter-out ${TATOEBA_LANGPAIRS},${LANGGROUP_LANGPAIRS})
|
||||
LANGGROUP_USE_LANGPAIRS := $(filter ${TATOEBA_LANGPAIRS},${LANGGROUP_LANGPAIRS})
|
||||
LANGGROUP_SKIP_LANGPAIR_PATTERN := $(subst ${SPACE},|,${LANGGROUP_SKIP_LANGPAIRS})
|
||||
endif
|
||||
|
||||
|
||||
## print languages in this set
|
||||
tatoeba-%-langs:
|
||||
-( s=$(firstword $(subst 2, ,$(patsubst tatoeba-%-langs,%,$@))); \
|
||||
@( s=$(firstword $(subst 2, ,$(patsubst tatoeba-%-langs,%,$@))); \
|
||||
t=$(lastword $(subst 2, ,$(patsubst tatoeba-%-langs,%,$@))); \
|
||||
echo "${call find-srclanggroup,${patsubst tatoeba-%-langs,%,$@},${PIVOT}}"; \
|
||||
echo "${call find-trglanggroup,${patsubst tatoeba-%-langs,%,$@},${PIVOT}}"; )
|
||||
@echo 'use: ${LANGGROUP_USE_LANGPAIRS}'
|
||||
@echo 'skip: ${LANGGROUP_SKIP_LANGPAIR_PATTERN}'
|
||||
|
||||
|
||||
## shortcut to start a target only if certain language group limits are met
|
||||
## (maximum and minimum number of languages)
|
||||
|
@ -2,12 +2,20 @@
|
||||
# evaluate released Tatoeba MT models
|
||||
# with existing benchmarks (collected in OPUS-MT-testsets)
|
||||
#
|
||||
#
|
||||
#
|
||||
# comet-score:
|
||||
# on puhti: module load pytorch && comet-score
|
||||
# on mahti: module load python-data && comet-score
|
||||
|
||||
|
||||
|
||||
## set the home directory of the repository
|
||||
## this is to find the included makefiles
|
||||
## (important to have a trailing '/')
|
||||
|
||||
|
||||
|
||||
SHELL := bash
|
||||
PWD := ${shell pwd}
|
||||
REPOHOME := ${PWD}/../../
|
||||
@ -20,11 +28,19 @@ GPUJOB_HPC_MEM = 20g
|
||||
|
||||
|
||||
MODEL_STORAGE := https://object.pouta.csc.fi/Tatoeba-MT-models
|
||||
MODEL_DISTS := ${shell ${WGET} -q -O - ${MODEL_STORAGE}/index.txt | grep '.zip$$' | grep -v '.eval.zip$$'}
|
||||
ifndef MODEL_DISTS
|
||||
ifneq ($(wildcard models.missing),)
|
||||
MODEL_DISTS := $(shell cat models.missing)
|
||||
else
|
||||
MODEL_DISTS := ${shell ${WGET} -q -O - ${MODEL_STORAGE}/index.txt | grep '.zip$$' | grep -v '.eval.zip$$'}
|
||||
endif
|
||||
endif
|
||||
|
||||
MODEL_DIST = ${firstword ${MODEL_DISTS}}
|
||||
MODEL = ${MODEL_DIST:.zip=}
|
||||
MODEL_LANGPAIR = ${firstword ${subst /, ,${MODEL_DIST}}}
|
||||
MODEL_URL = ${MODEL_STORAGE}/${MODEL_DIST}
|
||||
MODEL_EVAL_URL = ${MODEL_URL:.zip=.eval.zip}
|
||||
|
||||
## directory with all test sets (submodule OPUS-MT-testsets)
|
||||
TESTSET_HOME := ${REPOHOME}OPUS-MT-testsets/testsets
|
||||
@ -41,9 +57,42 @@ MODEL_HOME = ${REPOHOME}tatoeba/models
|
||||
MODEL_DIR = ${MODEL_HOME}/${MODEL}
|
||||
MODEL_SCORES = ${MODEL_DIR}.scores.txt
|
||||
MODEL_EVALZIP = ${MODEL_DIR}.eval.zip
|
||||
|
||||
LEADERBOARD_DIR = ${REPOHOME}scores
|
||||
|
||||
|
||||
MODEL_BLEUSCORES = ${MODEL_DIR}.bleu-scores.txt
|
||||
MODEL_CHRFSCORES = ${MODEL_DIR}.chrf-scores.txt
|
||||
MODEL_COMETSCORES = ${MODEL_DIR}.comet-scores.txt
|
||||
|
||||
|
||||
## fix individual score files for all modesl in the index!
|
||||
|
||||
ALL_MODEL_BLEUSCORES = ${patsubst %.zip,%.bleu-scores,${MODEL_DISTS}}
|
||||
ALL_MODEL_CHRFSCORES = ${patsubst %.zip,%.chrf-scores,${MODEL_DISTS}}
|
||||
|
||||
all-individual-scores: ${ALL_MODEL_BLEUSCORES} ${ALL_MODEL_CHRFSCORES}
|
||||
|
||||
${ALL_MODEL_BLEUSCORES}:
|
||||
-${MAKE} MODEL_DISTS=${@:.bleu-scores=.zip} individual-scores
|
||||
# -${MAKE} MODEL_DISTS=${@:.bleu-scores=.zip} ${MODEL_HOME}/$@.txt
|
||||
|
||||
${ALL_MODEL_CHRFSCORES}:
|
||||
-${MAKE} MODEL_DISTS=${@:.chrf-scores=.zip} ${MODEL_HOME}/$@.txt
|
||||
|
||||
|
||||
|
||||
## MODEL_NOTEVALS ... all released models that do not have an evaluation file yet
|
||||
## MODEL_LOCAL ...... all model packages in the local release dir
|
||||
##
|
||||
## NEW: don't set those variables by default as this slows down other makefile calls
|
||||
|
||||
|
||||
# MODEL_NOTEVALS := $(shell ${WGET} -q -O - ${MODEL_STORAGE}/index.txt | grep '.zip$$' | \
|
||||
sed 's/\.eval\.zip/.zip/' | sort | uniq -c | sed 's/^ *//' | grep '^1 ' | cut -f2 -d' ')
|
||||
# MODEL_LOCAL := $(patsubst ${MODEL_HOME}/%,%,$(filter-out %.eval.zip,$(shell find ${MODEL_HOME}/ -type f -name '*.zip')))
|
||||
|
||||
|
||||
|
||||
## all zip files with benchmark results
|
||||
MODEL_EVALZIPS := ${patsubst %.zip,${MODEL_HOME}/%.eval.zip,${MODEL_DISTS}}
|
||||
|
||||
@ -59,38 +108,38 @@ all: ${MODEL_EVALZIPS}
|
||||
first: $(firstword ${MODEL_EVALZIPS})
|
||||
|
||||
|
||||
## check models that still need to be evaluated
|
||||
## (i.e. *.eval.zip does not exist)
|
||||
MODEL_EVALCHECK := ${patsubst %.zip,${MODEL_HOME}/%.eval.check,${MODEL_DISTS}}
|
||||
|
||||
.PNONY: print-eval-needed ${MODEL_EVALCHECK}
|
||||
print-eval-needed: ${MODEL_EVALCHECK}
|
||||
${MODEL_EVALCHECK}:
|
||||
@if [ ! -e $(@:.check=.zip) ]; then \
|
||||
echo "need to make $(@:.check=.zip)"; \
|
||||
fi
|
||||
print-model-list:
|
||||
@echo "${MODEL_DISTS}"
|
||||
@echo "number of models: ${words ${MODEL_DISTS}}"
|
||||
|
||||
#-------------------------------------------------
|
||||
## phony targets to evaluate only new models
|
||||
## or only models that exist locally
|
||||
## (no dependency on testset index)
|
||||
#-------------------------------------------------
|
||||
MODEL_EVALNEW := ${patsubst %.zip,${MODEL_HOME}/%.eval.new,${MODEL_DISTS}}
|
||||
|
||||
.PNONY: eval-new eval-new-models ${MODEL_EVALNEW}
|
||||
eval-new eval-new-models: ${MODEL_EVALNEW}
|
||||
${MODEL_EVALNEW}:
|
||||
@if [ ! -e $(@:.new=.zip) ]; then \
|
||||
${MAKE} MODEL_DIST=${patsubst ${MODEL_HOME}/%.eval.new,%.zip,$@} eval-model; \
|
||||
fi
|
||||
## check models that still need to be evaluated
|
||||
## (i.e. *.eval.zip does not exist)
|
||||
|
||||
.PNONY: print-eval-needed
|
||||
print-eval-needed:
|
||||
@echo "$(shell ${WGET} -q -O - ${MODEL_STORAGE}/index.txt | grep '.zip$$' | \
|
||||
sed 's/\.eval\.zip/.zip/' | sort | uniq -c | sed 's/^ *//' | grep '^1 ' | cut -f2 -d' ')" | \
|
||||
tr ' ' "\n"
|
||||
|
||||
.PNONY: eval-new eval-new-models
|
||||
eval-new eval-new-models:
|
||||
${MAKE} MODEL_DISTS="$(shell ${WGET} -q -O - ${MODEL_STORAGE}/index.txt | grep '.zip$$' | \
|
||||
sed 's/\.eval\.zip/.zip/' | sort | uniq -c | sed 's/^ *//' | grep '^1 ' | cut -f2 -d' ')" all
|
||||
|
||||
|
||||
## filter out all models that exist locally
|
||||
MODEL_LOCAL := ${filter ${patsubst %.zip,%.eval.new,$(wildcard ${MODEL_HOME}/*/*.zip)},${MODEL_EVALNEW}}
|
||||
eval-local: ${MODEL_LOCAL}
|
||||
.PHONY: print-eval-local
|
||||
print-eval-local:
|
||||
@echo ${MODEL_LOCAL} | tr ' ' "\n"
|
||||
@echo "$(patsubst ${MODEL_HOME}/%,%,$(filter-out %.eval.zip,$(shell find ${MODEL_HOME}/ -type f -name '*.zip')))" | tr ' ' "\n"
|
||||
|
||||
.PHONY: eval-local
|
||||
eval-local:
|
||||
${MAKE} MODEL_DISTS="$(patsubst ${MODEL_HOME}/%,%,$(filter-out %.eval.zip,$(shell find ${MODEL_HOME}/ -type f -name '*.zip')))" all
|
||||
|
||||
#-------------------------------------------------
|
||||
## create zip-files with all evaluation files
|
||||
@ -103,7 +152,7 @@ ${MODEL_EVALZIPS}: ${TESTSET_INDEX}
|
||||
mkdir -p ${@:.eval.zip=}; \
|
||||
unzip -d ${@:.eval.zip=} $@; \
|
||||
fi
|
||||
${MAKE} MODEL_DIST=${patsubst ${MODEL_HOME}/%.eval.zip,%.zip,$@} eval-model
|
||||
-${MAKE} MODEL_DISTS=${patsubst ${MODEL_HOME}/%.eval.zip,%.zip,$@} eval-model
|
||||
|
||||
|
||||
#-------------------------------------------------
|
||||
@ -118,13 +167,16 @@ eval-model: ${MODEL_SCORES}
|
||||
${MAKE} sort-leaderboards; \
|
||||
fi
|
||||
if [ -d ${MODEL_DIR} ]; then \
|
||||
cd ${MODEL_DIR} && zip ${MODEL_EVALZIP} *.eval *.compare; \
|
||||
cd ${MODEL_DIR} && zip ${MODEL_EVALZIP} *.*; \
|
||||
rm -f ${MODEL_DIR}/*.eval; \
|
||||
rm -f ${MODEL_DIR}/*.compare; \
|
||||
rm -f ${MODEL_DIR}/*.comet; \
|
||||
rm -f ${MODEL_DIR}.done; \
|
||||
rmdir ${MODEL_DIR}; \
|
||||
fi
|
||||
|
||||
# cd ${MODEL_DIR} && zip ${MODEL_EVALZIP} *.eval *.compare;
|
||||
|
||||
|
||||
## temporary directory with all benchmark results
|
||||
${MODEL_DIR}.done:
|
||||
@ -148,15 +200,46 @@ cleanup:
|
||||
|
||||
## fetch translation model
|
||||
.PHONY: fetch
|
||||
fetch: ${WORK_DIR}/model/decoder.yml
|
||||
fetch: ${WORK_DIR}/model/decoder.yml ${MODEL_DIR}
|
||||
|
||||
|
||||
## prepare the model evaluation file directory
|
||||
## fetch already existing evaluations
|
||||
${MODEL_DIR}:
|
||||
mkdir -p $@
|
||||
-if [ -e ${MODEL_EVALZIP} ]; then \
|
||||
cd ${MODEL_DIR}; \
|
||||
unzip -n ${MODEL_EVALZIP}; \
|
||||
fi
|
||||
-${WGET} -q -O ${MODEL_DIR}/eval.zip ${MODEL_EVAL_URL}
|
||||
-if [ -e ${MODEL_DIR}/eval.zip ]; then \
|
||||
cd ${MODEL_DIR}; \
|
||||
unzip -n eval.zip; \
|
||||
rm -f eval.zip; \
|
||||
fi
|
||||
|
||||
|
||||
localmodel:
|
||||
if [ -e ${MODEL_HOME}/${MODEL_DIST} ]; then \
|
||||
echo "local model found: ${MODEL_HOME}/${MODEL_DIST}"; \
|
||||
else \
|
||||
echo "${MODEL_URL}"; \
|
||||
fi
|
||||
|
||||
## fetch the model (either from local release dir or from the model storage)
|
||||
${WORK_DIR}/model/decoder.yml:
|
||||
mkdir -p ${dir $@}
|
||||
${WGET} -q -O ${dir $@}model.zip ${MODEL_URL}
|
||||
if [ -e ${MODEL_HOME}/${MODEL_DIST} ]; then \
|
||||
cp ${MODEL_HOME}/${MODEL_DIST} ${dir $@}model.zip; \
|
||||
else \
|
||||
${WGET} -q -O ${dir $@}model.zip ${MODEL_URL}; \
|
||||
fi
|
||||
unzip -d ${dir $@} ${dir $@}model.zip
|
||||
## fix an old problem with the pre-process script
|
||||
mv ${dir $@}preprocess.sh ${dir $@}preprocess-old.sh
|
||||
sed 's#perl -C -pe.*$$#perl -C -pe "s/(?!\\n)\\p{C}/ /g;" |#' \
|
||||
sed -e 's#perl -C -pe.*$$#perl -C -pe "s/(?!\\n)\\p{C}/ /g;" |#' \
|
||||
-e 's#/projappl/project_2001569#$${HOME}/projappl#' \
|
||||
-e 's#SPMENCODE=.*$$#SPMENCODE=`which spm_encode || echo "$${PWD}/tools/marian-dev/build/spm_encode"`#' \
|
||||
< ${dir $@}preprocess-old.sh > ${dir $@}preprocess.sh
|
||||
chmod +x ${dir $@}preprocess.sh
|
||||
|
||||
@ -194,6 +277,33 @@ TESTSETS = ${notdir ${basename ${wildcard ${TESTSET_DIR}/*.${SRC}}}}
|
||||
TESTSET = ${firstword ${TESTSETS}}
|
||||
|
||||
|
||||
|
||||
MODEL_EVAL_MISSING = $(patsubst %,%.missing,${ALL_LANGPAIRS})
|
||||
METRICS = bleu chrf comet
|
||||
|
||||
.PHONY: find-missing
|
||||
find-missing: models.missing
|
||||
models.missing: ${MODEL_EVAL_MISSING}
|
||||
find . -name '*.missing' | xargs cat | cut -f1 | sort -u > $@
|
||||
|
||||
${MODEL_EVAL_MISSING}:
|
||||
if [ -e ${LEADERBOARD_DIR}/$(@:.missing=)/model-list.txt ]; then \
|
||||
for m in `grep 'Tatoeba-MT-models' ${LEADERBOARD_DIR}/$(@:.missing=)/model-list.txt`; do\
|
||||
for t in $(sort $(basename $(filter-out %.labels,$(notdir $(wildcard ${TESTSET_HOME}/$(@:.missing=)/*.*))))); do \
|
||||
for b in ${METRICS}; do \
|
||||
if [ ! -f ${LEADERBOARD_DIR}/$(@:.missing=)/$$t/$$b-scores.txt ]; then \
|
||||
echo "$$m $$t $$b" | sed 's#^.*MT-models/##' >> $@; \
|
||||
elif [ `grep "$$m" ${LEADERBOARD_DIR}/$(@:.missing=)/$$t/$$b-scores.txt | wc -l` -eq 0 ]; then \
|
||||
echo "$$m $$t $$b" | sed 's#^.*MT-models/##' >> $@; \
|
||||
fi \
|
||||
done \
|
||||
done \
|
||||
done \
|
||||
fi
|
||||
|
||||
# for t in `find ${LEADERBOARD_DIR}/$$l -mindepth 1 -maxdepth 1 -type d -printf " %f"`; do \
|
||||
|
||||
|
||||
## eval all language pairs
|
||||
.PHONY: eval-langpairs
|
||||
eval-langpairs:
|
||||
@ -205,9 +315,50 @@ eval-langpairs:
|
||||
.PHONY: eval-testsets
|
||||
eval-testsets:
|
||||
for t in ${TESTSETS}; do \
|
||||
${MAKE} TESTSET=$$t eval; \
|
||||
${MAKE} TESTSET=$$t eval comet-eval; \
|
||||
done
|
||||
|
||||
|
||||
|
||||
|
||||
## make score files for individual metrics
|
||||
## (more convenient to read and extend with new metrics)
|
||||
## TODO: make them by default and create proper dependencies
|
||||
|
||||
individual-scores: ${MODEL_BLEUSCORES} ${MODEL_CHRFSCORES} ${MODEL_COMETSCORES}
|
||||
|
||||
${MODEL_BLEUSCORES}: ${MODEL_SCORES}
|
||||
cut -f1,2,4 ${MODEL_SCORES} | \
|
||||
sed 's/\(news.*[0-9][0-9][0-9][0-9]\)\-[a-z][a-z][a-z][a-z] /\1 /' |\
|
||||
sed -e 's/\(news.*2021\)\.[a-z][a-z]\-[a-z][a-z] /\1 /' |\
|
||||
rev | uniq -f1 | rev > $@
|
||||
|
||||
|
||||
${MODEL_CHRFSCORES}: ${MODEL_SCORES}
|
||||
cut -f1,2,3 ${MODEL_SCORES} |\
|
||||
sed 's/\(news.*[0-9][0-9][0-9][0-9]\)\-[a-z][a-z][a-z][a-z] /\1 /' |\
|
||||
sed -e 's/\(news.*2021\)\.[a-z][a-z]\-[a-z][a-z] /\1 /' |\
|
||||
rev | uniq -f1 | rev > $@
|
||||
|
||||
|
||||
EVAL_FILES = ${wildcard ${MODEL_DIR}/*.eval}
|
||||
COMET_EVAL_FILES = ${wildcard ${MODEL_DIR}/*.comet}
|
||||
${MODEL_COMETSCORES}: ${COMET_EVAL_FILES}
|
||||
if [ -d ${MODEL_DIR} ]; then \
|
||||
mkdir -p $(dir $@); \
|
||||
grep -H COMET ${MODEL_DIR}/*eval | sort > $@.comet; \
|
||||
cut -f1 -d: $@.comet | rev | cut -f2 -d. | rev > $@.langs; \
|
||||
cut -f1 -d: $@.comet | rev | cut -f1 -d/ | cut -f3- -d. | rev > $@.testsets; \
|
||||
cat $@.comet | rev | cut -f1 -d' ' | rev > $@.comet-scores; \
|
||||
paste $@.langs $@.testsets $@.comet-scores >> $@; \
|
||||
cat $@ |\
|
||||
sed -e 's/\(news.*[0-9][0-9][0-9][0-9]\)-[a-z][a-z][a-z][a-z] /\1 /' | \
|
||||
sed -e 's/\(news.*2021\)\.[a-z][a-z]\-[a-z][a-z] /\1 /' |\
|
||||
rev | uniq -f1 | rev > $@.sorted; \
|
||||
mv -f $@.sorted $@; \
|
||||
rm -f $@.comet $@.langs $@.testsets $@.comet-scores; \
|
||||
fi
|
||||
|
||||
#-------------------------------------------------
|
||||
# create input file for translation
|
||||
#-------------------------------------------------
|
||||
@ -224,6 +375,9 @@ else
|
||||
USE_TARGET_LABELS = 0
|
||||
endif
|
||||
|
||||
|
||||
ifneq (${wildcard ${WORK_DIR}}/model/preprocess.sh,)
|
||||
|
||||
## double-check whether the preprocessing script
|
||||
## requires both language IDs or not
|
||||
ifeq (${shell grep 'source-langid target-langid' ${WORK_DIR}/model/preprocess.sh 2>/dev/null | wc -l},1)
|
||||
@ -237,6 +391,8 @@ else
|
||||
PREPROCESS = ${WORK_DIR}/model/preprocess.sh ${SRC} ${WORK_DIR}/model/source.spm
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
|
||||
${WORK_DIR}/${TESTSET}.${LANGPAIR}.input: ${TESTSET_DIR}/${TESTSET}.${SRC}
|
||||
${PREPROCESS} < $< > $@
|
||||
@ -306,14 +462,71 @@ ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.eval:
|
||||
fi
|
||||
|
||||
|
||||
|
||||
## make the comet score
|
||||
|
||||
.PHONY: comet
|
||||
comet:
|
||||
${MAKE} fetch
|
||||
${MAKE} comet-langpairs
|
||||
${MAKE} ${MODEL_COMETSCORES}
|
||||
|
||||
|
||||
comet-score-file: ${MODEL_COMETSCORES}
|
||||
comet-register-scores: ${MODEL_COMETSCORES:.txt=.registered}
|
||||
bleu-register-scores: ${MODEL_BLEUSCORES:.txt=.registered}
|
||||
chrf-register-scores: ${MODEL_CHRFSCORES:.txt=.registered}
|
||||
|
||||
|
||||
|
||||
.PHONY: comet-langpairs
|
||||
comet-langpairs:
|
||||
for l in ${LANGPAIRS}; do \
|
||||
${MAKE} LANGPAIR=$$l comet-testsets; \
|
||||
done
|
||||
|
||||
.PHONY: comet-testsets
|
||||
comet-testsets:
|
||||
for t in ${TESTSETS}; do \
|
||||
${MAKE} TESTSET=$$t comet-eval; \
|
||||
done
|
||||
|
||||
.PHONY: comet-eval
|
||||
comet-eval: ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.comet
|
||||
|
||||
ifneq (${GPU_AVAILABLE},1)
|
||||
COMET_PARAM += --gpus 0
|
||||
endif
|
||||
|
||||
${MODEL_DIR}/${TESTSET}.${LANGPAIR}.comet: ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.eval
|
||||
mkdir -p ${dir $@}
|
||||
sed -n '1~4p' $(<:.eval=.compare) > $@.src
|
||||
sed -n '2~4p' $(<:.eval=.compare) > $@.ref
|
||||
sed -n '3~4p' $(<:.eval=.compare) > $@.hyp
|
||||
${LOAD_COMET_ENV} comet-score ${COMET_PARAM} \
|
||||
-s $@.src -r $@.ref -t $@.hyp | cut -f2,3 > $@
|
||||
tail -1 $@ | sed 's/^.*score:/COMET+default =/' >> $<
|
||||
rm -f $@.src $@.ref $@.hyp
|
||||
|
||||
|
||||
#-------------------------------------------------
|
||||
# collect all scores in a file
|
||||
#-------------------------------------------------
|
||||
#
|
||||
# updating scores for models that already have some scores registered
|
||||
# - need to fetch eval file package
|
||||
# - avoid re-running things that are already done
|
||||
# - ingest the new evaluation scores
|
||||
#
|
||||
|
||||
.PHONY: scores
|
||||
scores: ${MODEL_SCORES}
|
||||
|
||||
${MODEL_SCORES}: ${TESTSET_INDEX}
|
||||
${MODEL_SCORES}: ${TESTSET_INDEX} ${MODEL_COMETSCORES}
|
||||
-if [ ! -e $@ ]; then \
|
||||
mkdir -p $(dir $@); \
|
||||
wget -qq -O $@ ${MODEL_STORAGE}/${MODEL}.scores.txt; \
|
||||
fi
|
||||
${MAKE} ${MODEL_DIR}.done
|
||||
if [ -d ${MODEL_DIR} ]; then \
|
||||
grep -H BLEU ${MODEL_DIR}/*eval | sort > $@.bleu; \
|
||||
@ -330,11 +543,17 @@ ${MODEL_SCORES}: ${TESTSET_INDEX}
|
||||
cat $@.bleu | rev | cut -f1 -d' ' | rev | cut -f1 -d')' > $@.nrwords; \
|
||||
paste $@.langs $@.testsets \
|
||||
$@.chrf-scores $@.bleu-scores \
|
||||
$@.urls $@.nrlines $@.nrwords > $@; \
|
||||
$@.urls $@.nrlines $@.nrwords >> $@; \
|
||||
cat $@ | \
|
||||
sed -e 's/\(news.*[0-9][0-9][0-9][0-9]\)-[a-z][a-z][a-z][a-z] /\1 /' | \
|
||||
sed -e 's/\(news.*2021\)\.[a-z][a-z]\-[a-z][a-z] /\1 /' |\
|
||||
rev | uniq -f5 | rev | sort -u > $@.sorted; \
|
||||
mv -f $@.sorted $@; \
|
||||
rm -f $@.bleu $@.chrf $@.langs $@.testsets \
|
||||
$@.chrf-scores $@.bleu-scores \
|
||||
$@.urls $@.nrlines $@.nrwords; \
|
||||
fi
|
||||
${MAKE} individual-scores
|
||||
|
||||
|
||||
|
||||
@ -348,6 +567,11 @@ SCOREFILES := ${wildcard ${MODEL_HOME}/*/*.scores.txt}
|
||||
SCOREFILES_DONE = ${SCOREFILES:.txt=.registered}
|
||||
SCOREFILE_DONE = ${MODEL_SCORES:.txt=.registered}
|
||||
|
||||
BLEUSCOREFILE_DONE = ${MODEL_BLEUSCORES:.txt=.registered}
|
||||
CHRFSCOREFILE_DONE = ${MODEL_CHRFSCORES:.txt=.registered}
|
||||
COMETSCOREFILE_DONE = ${MODEL_COMETSCORES:.txt=.registered}
|
||||
|
||||
|
||||
## update all leader boards with all scores
|
||||
update-leaderboards: ${SCOREFILES_DONE}
|
||||
${MAKE} sort-leaderboards
|
||||
@ -355,13 +579,21 @@ update-leaderboards: ${SCOREFILES_DONE}
|
||||
## register the scores for the current model
|
||||
## (scores will be added to some temporary files sorted by language pair and benchmark)
|
||||
## NOTE: this removes langIDs from newstest sets to avoid confusion and duplicates
|
||||
register-scores: ${SCOREFILE_DONE}
|
||||
|
||||
# register-scores: ${SCOREFILE_DONE}
|
||||
# register-scores: ${BLEUSCOREFILE_DONE} ${CHRFSCOREFILE_DONE} ${COMETSCOREFILE_DONE}
|
||||
register-scores: ${SCOREFILE_DONE} ${COMETSCOREFILE_DONE}
|
||||
|
||||
|
||||
${SCOREFILES_DONE}: %.registered: %.txt
|
||||
@echo "register scores from ${patsubst ${MODEL_HOME}/%,%,$<}"
|
||||
@cat $< | perl -e 'while (<>){ @a=split(/\t/); $$a[1]=~s/^(news.*)\-[a-z]{4}/$$1/; system "mkdir -p ${LEADERBOARD_DIR}/$$a[0]/$$a[1]"; open B,">>${LEADERBOARD_DIR}/$$a[0]/$$a[1]/bleu-scores.$(subst /,.,${patsubst ${MODEL_HOME}/%,%,$<}).unsorted.txt"; open C,">>${LEADERBOARD_DIR}/$$a[0]/$$a[1]/chrf-scores.$(subst /,.,${patsubst ${MODEL_HOME}/%,%,$<}).unsorted.txt"; print B "$$a[3]\t$$a[4]\n"; print C "$$a[2]\t$$a[4]\n"; close B; close C; }'
|
||||
touch $@
|
||||
|
||||
${MODEL_DIR}.%-scores.registered: ${MODEL_DIR}.%-scores.txt
|
||||
@echo "register scores from ${patsubst ${MODEL_HOME}/%,%,$<}"
|
||||
@cat $< | perl -e 'while (<>){ chomp; @a=split(/\t/);system "mkdir -p ${LEADERBOARD_DIR}/$$a[0]/$$a[1]"; open C,">>${LEADERBOARD_DIR}/$$a[0]/$$a[1]/$(patsubst ${MODEL_DIR}.%-scores.txt,%-scores,$<).$(subst /,.,${patsubst ${MODEL_HOME}/%,%,$<}).unsorted.txt"; print C "$$a[2]\t${MODEL_URL}\n"; close C; }'
|
||||
touch $@
|
||||
|
||||
##-------------------------------------------------------------------
|
||||
## UPDATE_SCORE_DIRS = directory that contains new scores
|
||||
@ -372,10 +604,14 @@ ${SCOREFILES_DONE}: %.registered: %.txt
|
||||
UPDATE_SCORE_DIRS := $(sort $(dir ${wildcard ${LEADERBOARD_DIR}/*/*/*.unsorted.txt}))
|
||||
LEADERBOARDS_BLEU := $(patsubst %,%bleu-scores.txt,${UPDATE_SCORE_DIRS})
|
||||
LEADERBOARDS_CHRF := $(patsubst %,%chrf-scores.txt,${UPDATE_SCORE_DIRS})
|
||||
LEADERBOARDS_COMET := $(patsubst %,%comet-scores.txt,${UPDATE_SCORE_DIRS})
|
||||
|
||||
## sort all leaderboards for which we have new unsorted scores
|
||||
.PHONY: sort-leaderboards
|
||||
sort-leaderboards: ${LEADERBOARDS_BLEU} ${LEADERBOARDS_CHRF}
|
||||
.PHONY: sort-leaderboards sort-bleu-leaderboards sort-chrf-leaderboards sort-comet-leaderboards
|
||||
sort-leaderboards: ${LEADERBOARDS_BLEU} ${LEADERBOARDS_CHRF} ${LEADERBOARDS_COMET}
|
||||
sort-bleu-leaderboards: ${LEADERBOARDS_BLEU}
|
||||
sort-chrf-leaderboards: ${LEADERBOARDS_CHRF}
|
||||
sort-comet-leaderboards: ${LEADERBOARDS_COMET}
|
||||
|
||||
${LEADERBOARDS_BLEU}: ${UPDATE_SCORE_DIRS}
|
||||
@echo "sort ${patsubst ${LEADERBOARD_DIR}/%,%,$@}"
|
||||
@ -389,3 +625,9 @@ ${LEADERBOARDS_CHRF}: ${UPDATE_SCORE_DIRS}
|
||||
@rm -f $(dir $@)chrf-scores*.txt
|
||||
@mv $@.sorted $@
|
||||
|
||||
${LEADERBOARDS_COMET}: ${UPDATE_SCORE_DIRS}
|
||||
@echo "sort ${patsubst ${LEADERBOARD_DIR}/%,%,$@}"
|
||||
@cat $(dir $@)comet-scores*.txt | grep '^[0-9]' | sort -k1,1nr | uniq -f1 > $@.sorted
|
||||
@rm -f $(dir $@)comet-scores*.txt
|
||||
@mv $@.sorted $@
|
||||
|
||||
|
2
tools/jq
2
tools/jq
@ -1 +1 @@
|
||||
Subproject commit f9afa950e26f5d548d955f92e83e6b8e10cc8438
|
||||
Subproject commit cff5336ec71b6fee396a95bb0e4bea365e0cd1e8
|
@ -1 +1 @@
|
||||
Subproject commit 95720ae19fa21b1726787fb2db57535cafba84fa
|
||||
Subproject commit e27da623938b84f9abe600774af6fad4fd5f1dd6
|
Loading…
Reference in New Issue
Block a user