mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2024-08-16 07:40:39 +03:00
model evaluation
This commit is contained in:
parent
bebe532a26
commit
37e5d3cc33
@ -1 +1 @@
|
||||
Subproject commit 7d96d730f001e6f634856cf973f1cad83ed64a9d
|
||||
Subproject commit 7f01e63e1758c45067537f0e241b6cabb4e1c031
|
@ -8,6 +8,7 @@ include Makefile.def
|
||||
#-------------------------------------------------
|
||||
## make all evaluation zip-files
|
||||
#-------------------------------------------------
|
||||
|
||||
.PHONY: all
|
||||
all: ${MODEL_EVALZIPS}
|
||||
|
||||
@ -16,8 +17,6 @@ all: ${MODEL_EVALZIPS}
|
||||
.PHONY: first
|
||||
first: $(firstword ${MODEL_EVALZIPS})
|
||||
|
||||
|
||||
|
||||
## do things in reverse order
|
||||
## (just to start another job)
|
||||
|
||||
@ -28,13 +27,54 @@ MODEL_EVALZIPS_REVERSE = $(call reverse,${MODEL_EVALZIPS})
|
||||
|
||||
all-reverse: ${MODEL_EVALZIPS_REVERSE}
|
||||
|
||||
add-new-metrics:
|
||||
${MAKE} -j16 METRICS="spbleu chrf++" all
|
||||
|
||||
|
||||
## only do COMET scores
|
||||
|
||||
all-comet:
|
||||
make METRICS="comet" all
|
||||
|
||||
all-comet-reverse:
|
||||
make METRICS="comet" all-reverse
|
||||
|
||||
|
||||
##---------------------------------------------------
|
||||
## pack evaluation files if a model directory exists
|
||||
##---------------------------------------------------
|
||||
|
||||
MODEL_PACK_EVAL := ${patsubst %.zip,%.pack,${MODEL_DISTS}}
|
||||
|
||||
.PHONY: pack-all-model-scores
|
||||
pack-all-model-scores: ${MODEL_PACK_EVAL}
|
||||
|
||||
.PHONY: ${MODEL_PACK_EVAL}
|
||||
${MODEL_PACK_EVAL}:
|
||||
if [ -d ${MODEL_HOME}/$(@:.pack=) ]; then \
|
||||
${MAKE} MODEL_DISTS=$(@:.pack=.zip) pack-model-scores; \
|
||||
fi
|
||||
|
||||
|
||||
##------------------
|
||||
## register scores
|
||||
##------------------
|
||||
|
||||
# phony targets to register model scores
|
||||
MODEL_REGISTER := ${patsubst %.zip,%.register,${MODEL_DISTS}}
|
||||
|
||||
register-all-metrics:
|
||||
${MAKE} ${MODEL_REGISTER}
|
||||
|
||||
# only register selected metrics
|
||||
register-new-metrics:
|
||||
${MAKE} METRICS="spbleu chrf++" ${MODEL_REGISTER}
|
||||
|
||||
.PHONY: ${MODEL_REGISTER}
|
||||
${MODEL_REGISTER}:
|
||||
${MAKE} MODEL_DISTS=$(@:.register=.zip) fetch-model-scores
|
||||
${MAKE} MODEL_DISTS=$(@:.register=.zip) model-score-files
|
||||
${MAKE} -f Makefile.register MODEL_DISTS=$(@:.register=.zip) register-scores
|
||||
${MAKE} MODEL_DISTS=$(@:.register=.zip) pack-model-scores
|
||||
|
||||
|
||||
#-------------------------------------------------
|
||||
## phony targets to evaluate only new models
|
||||
@ -98,6 +138,14 @@ scores model-scores: ${MODEL_EVAL_SCORES}
|
||||
${MAKE} ${MODEL_EVAL_SCORES}; \
|
||||
fi
|
||||
|
||||
|
||||
## only create model score files from individual benchmark scores
|
||||
## but don't run new evaluations if benchmark scores do not exist yet
|
||||
|
||||
.PHONY: model-score-files
|
||||
model-score-files: ${MODEL_EVAL_SCORES}
|
||||
|
||||
|
||||
##-------------------------------------------------
|
||||
## evaluate the model with all benchmarks available
|
||||
## register the scores and update the leaderboard
|
||||
@ -109,15 +157,21 @@ eval-model: ${MODEL_EVAL_SCORES}
|
||||
${MAKE} model-scores
|
||||
if [ -e $< ]; then \
|
||||
${MAKE} -f Makefile.register register-scores; \
|
||||
${MAKE} -f Makefile.register sort-leaderboards; \
|
||||
fi
|
||||
${MAKE} pack-model-scores
|
||||
|
||||
|
||||
# delay this to avoid racing conditions in case several
|
||||
# updates run simultaneously
|
||||
#
|
||||
# ${MAKE} -f Makefile.register sort-leaderboards; \
|
||||
|
||||
|
||||
.PHONY: pack-model-scores
|
||||
pack-model-scores:
|
||||
if [ -d ${MODEL_DIR} ]; then \
|
||||
cd ${MODEL_DIR} && zip ${MODEL_EVALZIP} *.*; \
|
||||
rm -f ${MODEL_DIR}/*.*; \
|
||||
cd ${MODEL_DIR} && find . -name '*.*' | xargs zip ${MODEL_EVALZIP}; \
|
||||
find ${MODEL_DIR} -name '*.*' -delete; \
|
||||
rmdir ${MODEL_DIR}; \
|
||||
fi
|
||||
|
||||
@ -138,6 +192,12 @@ cleanup:
|
||||
.PHONY: fetch
|
||||
fetch: ${WORK_DIR}/model/decoder.yml ${MODEL_DIR}
|
||||
|
||||
.PHONY: fetch-model
|
||||
fetch-model: ${WORK_DIR}/model/decoder.yml
|
||||
|
||||
.PHONY: fetch-model-scores
|
||||
fetch-model-scores: ${MODEL_DIR}
|
||||
|
||||
|
||||
## prepare the model evaluation file directory
|
||||
## fetch already existing evaluations
|
||||
@ -356,11 +416,6 @@ ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.eval: ${INDIVIDUAL_EVAL_FILES}
|
||||
rev $@ | sort | uniq -f2 | rev > $@.uniq
|
||||
mv -f $@.uniq $@
|
||||
|
||||
# cat ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.bleu \
|
||||
# ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.chrf > $@
|
||||
# tail -1 ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.comet | \
|
||||
# sed 's/^.*score:/COMET+default =/' >> $@
|
||||
|
||||
|
||||
${MODEL_DIR}/${TESTSET}.${LANGPAIR}.compare:
|
||||
${MAKE} ${WORK_DIR}/${TESTSET}.${LANGPAIR}.output
|
||||
@ -574,22 +629,3 @@ ${MODEL_DIR}.comet-scores.txt: ${MODEL_SCORES}
|
||||
fi
|
||||
|
||||
|
||||
|
||||
|
||||
## OLD: extract BLEU and chrF scores from the combined score file
|
||||
|
||||
# ${MODEL_BLEUSCORES}: ${MODEL_SCORES}
|
||||
# cut -f1,2,4 ${MODEL_SCORES} | \
|
||||
# sed 's/\(news.*[0-9][0-9][0-9][0-9]\)\-[a-z][a-z][a-z][a-z] /\1 /' |\
|
||||
# sed -e 's/\(news.*2021\)\.[a-z][a-z]\-[a-z][a-z] /\1 /' |\
|
||||
# rev | uniq -f1 | rev > $@
|
||||
|
||||
|
||||
# ${MODEL_CHRFSCORES}: ${MODEL_SCORES}
|
||||
# cut -f1,2,3 ${MODEL_SCORES} |\
|
||||
# sed 's/\(news.*[0-9][0-9][0-9][0-9]\)\-[a-z][a-z][a-z][a-z] /\1 /' |\
|
||||
# sed -e 's/\(news.*2021\)\.[a-z][a-z]\-[a-z][a-z] /\1 /' |\
|
||||
# rev | uniq -f1 | rev > $@
|
||||
|
||||
|
||||
|
||||
|
@ -36,39 +36,3 @@ ${SCOREFILES_DONE}: ${MODEL_DIR}.%-scores.registered: ${MODEL_DIR}.%-scores.txt
|
||||
@touch $@
|
||||
|
||||
|
||||
|
||||
##-------------------------------------------------------------------
|
||||
## UPDATE_SCORE_DIRS = directory that contains new scores
|
||||
## LEADERBOARDS = list of BLEU leader boards that need to be sorted
|
||||
##-------------------------------------------------------------------
|
||||
|
||||
UPDATE_SCORE_DIRS := $(sort $(dir ${wildcard ${LEADERBOARD_DIR}/*/*/*.unsorted.txt}))
|
||||
LEADERBOARDS := $(foreach m,${METRICS},$(patsubst %,%$(m)-scores.txt,${UPDATE_SCORE_DIRS}))
|
||||
|
||||
|
||||
## sort all leaderboards for which we have new unsorted scores
|
||||
sort-leaderboards: ${LEADERBOARDS}
|
||||
|
||||
${LEADERBOARDS}: ${UPDATE_SCORE_DIRS}
|
||||
@if [ -e $@ ]; then \
|
||||
if [ $(words $(wildcard ${@:.txt=}*.unsorted.txt)) -gt 0 ]; then \
|
||||
echo "merge and sort ${patsubst ${LEADERBOARD_DIR}/%,%,$@}"; \
|
||||
sort -k2,2 -k1,1nr $@ > $@.old.txt; \
|
||||
cat $(wildcard ${@:.txt=}*.unsorted.txt) | \
|
||||
grep '^[0-9\-]' | sort -k2,2 -k1,1nr > $@.new.txt; \
|
||||
sort -m $@.new.txt $@.old.txt |\
|
||||
uniq -f1 | sort -k1,1nr -u > $@.sorted; \
|
||||
rm -f $@.old.txt $@.new.txt; \
|
||||
rm -f $(wildcard ${@:.txt=}*.unsorted.txt); \
|
||||
mv $@.sorted $@; \
|
||||
fi; \
|
||||
else \
|
||||
if [ $(words $(wildcard ${@:.txt=}*.txt)) -gt 0 ]; then \
|
||||
echo "merge and sort ${patsubst ${LEADERBOARD_DIR}/%,%,$@}"; \
|
||||
cat $(wildcard ${@:.txt=}*.txt) | grep '^[0-9\-]' |\
|
||||
sort -k2,2 -k1,1nr | uniq -f1 | sort -k1,1nr -u > $@.sorted; \
|
||||
rm -f $(wildcard ${@:.txt=}*.txt); \
|
||||
mv $@.sorted $@; \
|
||||
fi; \
|
||||
fi
|
||||
|
||||
|
52
tools/model_info.py
Normal file
52
tools/model_info.py
Normal file
@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
|
||||
DESC = "Prints keys and values from model.npz file."
|
||||
|
||||
non_parameter_keys = ["special:model.yml"]
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
model = np.load(args.model)
|
||||
|
||||
file_size=os.path.getsize(args.model)
|
||||
|
||||
if args.key:
|
||||
if args.key not in model:
|
||||
print("Key not found")
|
||||
exit(1)
|
||||
else:
|
||||
print(model[args.key])
|
||||
else:
|
||||
objects=0
|
||||
parameters=0
|
||||
for key in (k for k in model if k not in non_parameter_keys):
|
||||
objects+=1
|
||||
parameters+=model[key].size
|
||||
if not args.summary:
|
||||
print(key, model[key].shape)
|
||||
|
||||
# Summary
|
||||
parameters/=1e6
|
||||
file_size=np.ceil(file_size/1024**2)
|
||||
print(f"{args.model}: {objects} objects with a total of {parameters:.1f}M parameters; {file_size:.0f} MB")
|
||||
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="")
|
||||
parser.add_argument("-m", "--model",metavar='model.npz', help="model file", required=True)
|
||||
parser.add_argument("-k", "--key", help="print value for specific key")
|
||||
parser.add_argument("-s", "--summary", action="store_true",
|
||||
help="only show summary")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user