model evaluation

This commit is contained in:
Joerg Tiedemann 2023-01-15 14:24:32 +02:00
parent bebe532a26
commit 37e5d3cc33
4 changed files with 120 additions and 68 deletions

@ -1 +1 @@
Subproject commit 7d96d730f001e6f634856cf973f1cad83ed64a9d
Subproject commit 7f01e63e1758c45067537f0e241b6cabb4e1c031

View File

@ -8,6 +8,7 @@ include Makefile.def
#-------------------------------------------------
## make all evaluation zip-files
#-------------------------------------------------
.PHONY: all
all: ${MODEL_EVALZIPS}
@ -16,8 +17,6 @@ all: ${MODEL_EVALZIPS}
.PHONY: first
first: $(firstword ${MODEL_EVALZIPS})
## do things in reverse order
## (just to start another job)
@ -28,13 +27,54 @@ MODEL_EVALZIPS_REVERSE = $(call reverse,${MODEL_EVALZIPS})
all-reverse: ${MODEL_EVALZIPS_REVERSE}
add-new-metrics:
${MAKE} -j16 METRICS="spbleu chrf++" all
## only do COMET scores
all-comet:
make METRICS="comet" all
all-comet-reverse:
make METRICS="comet" all-reverse
##---------------------------------------------------
## pack evaluation files if a model directory exists
##---------------------------------------------------
MODEL_PACK_EVAL := ${patsubst %.zip,%.pack,${MODEL_DISTS}}
.PHONY: pack-all-model-scores
pack-all-model-scores: ${MODEL_PACK_EVAL}
.PHONY: ${MODEL_PACK_EVAL}
${MODEL_PACK_EVAL}:
if [ -d ${MODEL_HOME}/$(@:.pack=) ]; then \
${MAKE} MODEL_DISTS=$(@:.pack=.zip) pack-model-scores; \
fi
##------------------
## register scores
##------------------
# phony targets to register model scores
MODEL_REGISTER := ${patsubst %.zip,%.register,${MODEL_DISTS}}
register-all-metrics:
${MAKE} ${MODEL_REGISTER}
# only register selected metrics
register-new-metrics:
${MAKE} METRICS="spbleu chrf++" ${MODEL_REGISTER}
.PHONY: ${MODEL_REGISTER}
${MODEL_REGISTER}:
${MAKE} MODEL_DISTS=$(@:.register=.zip) fetch-model-scores
${MAKE} MODEL_DISTS=$(@:.register=.zip) model-score-files
${MAKE} -f Makefile.register MODEL_DISTS=$(@:.register=.zip) register-scores
${MAKE} MODEL_DISTS=$(@:.register=.zip) pack-model-scores
#-------------------------------------------------
## phony targets to evaluate only new models
@ -98,6 +138,14 @@ scores model-scores: ${MODEL_EVAL_SCORES}
${MAKE} ${MODEL_EVAL_SCORES}; \
fi
## only create model score files from individual benchmark scores
## but don't run new evaluations if benchmark scores do not exist yet
.PHONY: model-score-files
model-score-files: ${MODEL_EVAL_SCORES}
##-------------------------------------------------
## evaluate the model with all benchmarks available
## register the scores and update the leaderboard
@ -109,15 +157,21 @@ eval-model: ${MODEL_EVAL_SCORES}
${MAKE} model-scores
if [ -e $< ]; then \
${MAKE} -f Makefile.register register-scores; \
${MAKE} -f Makefile.register sort-leaderboards; \
fi
${MAKE} pack-model-scores
# delay this to avoid racing conditions in case several
# updates run simultaneously
#
# ${MAKE} -f Makefile.register sort-leaderboards; \
.PHONY: pack-model-scores
pack-model-scores:
if [ -d ${MODEL_DIR} ]; then \
cd ${MODEL_DIR} && zip ${MODEL_EVALZIP} *.*; \
rm -f ${MODEL_DIR}/*.*; \
cd ${MODEL_DIR} && find . -name '*.*' | xargs zip ${MODEL_EVALZIP}; \
find ${MODEL_DIR} -name '*.*' -delete; \
rmdir ${MODEL_DIR}; \
fi
@ -138,6 +192,12 @@ cleanup:
.PHONY: fetch
fetch: ${WORK_DIR}/model/decoder.yml ${MODEL_DIR}
.PHONY: fetch-model
fetch-model: ${WORK_DIR}/model/decoder.yml
.PHONY: fetch-model-scores
fetch-model-scores: ${MODEL_DIR}
## prepare the model evaluation file directory
## fetch already existing evaluations
@ -356,11 +416,6 @@ ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.eval: ${INDIVIDUAL_EVAL_FILES}
rev $@ | sort | uniq -f2 | rev > $@.uniq
mv -f $@.uniq $@
# cat ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.bleu \
# ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.chrf > $@
# tail -1 ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.comet | \
# sed 's/^.*score:/COMET+default =/' >> $@
${MODEL_DIR}/${TESTSET}.${LANGPAIR}.compare:
${MAKE} ${WORK_DIR}/${TESTSET}.${LANGPAIR}.output
@ -574,22 +629,3 @@ ${MODEL_DIR}.comet-scores.txt: ${MODEL_SCORES}
fi
## OLD: extract BLEU and chrF scores from the combined score file
# ${MODEL_BLEUSCORES}: ${MODEL_SCORES}
# cut -f1,2,4 ${MODEL_SCORES} | \
# sed 's/\(news.*[0-9][0-9][0-9][0-9]\)\-[a-z][a-z][a-z][a-z] /\1 /' |\
# sed -e 's/\(news.*2021\)\.[a-z][a-z]\-[a-z][a-z] /\1 /' |\
# rev | uniq -f1 | rev > $@
# ${MODEL_CHRFSCORES}: ${MODEL_SCORES}
# cut -f1,2,3 ${MODEL_SCORES} |\
# sed 's/\(news.*[0-9][0-9][0-9][0-9]\)\-[a-z][a-z][a-z][a-z] /\1 /' |\
# sed -e 's/\(news.*2021\)\.[a-z][a-z]\-[a-z][a-z] /\1 /' |\
# rev | uniq -f1 | rev > $@

View File

@ -36,39 +36,3 @@ ${SCOREFILES_DONE}: ${MODEL_DIR}.%-scores.registered: ${MODEL_DIR}.%-scores.txt
@touch $@
##-------------------------------------------------------------------
## UPDATE_SCORE_DIRS = directory that contains new scores
## LEADERBOARDS = list of BLEU leader boards that need to be sorted
##-------------------------------------------------------------------
UPDATE_SCORE_DIRS := $(sort $(dir ${wildcard ${LEADERBOARD_DIR}/*/*/*.unsorted.txt}))
LEADERBOARDS := $(foreach m,${METRICS},$(patsubst %,%$(m)-scores.txt,${UPDATE_SCORE_DIRS}))
## sort all leaderboards for which we have new unsorted scores
sort-leaderboards: ${LEADERBOARDS}
${LEADERBOARDS}: ${UPDATE_SCORE_DIRS}
@if [ -e $@ ]; then \
if [ $(words $(wildcard ${@:.txt=}*.unsorted.txt)) -gt 0 ]; then \
echo "merge and sort ${patsubst ${LEADERBOARD_DIR}/%,%,$@}"; \
sort -k2,2 -k1,1nr $@ > $@.old.txt; \
cat $(wildcard ${@:.txt=}*.unsorted.txt) | \
grep '^[0-9\-]' | sort -k2,2 -k1,1nr > $@.new.txt; \
sort -m $@.new.txt $@.old.txt |\
uniq -f1 | sort -k1,1nr -u > $@.sorted; \
rm -f $@.old.txt $@.new.txt; \
rm -f $(wildcard ${@:.txt=}*.unsorted.txt); \
mv $@.sorted $@; \
fi; \
else \
if [ $(words $(wildcard ${@:.txt=}*.txt)) -gt 0 ]; then \
echo "merge and sort ${patsubst ${LEADERBOARD_DIR}/%,%,$@}"; \
cat $(wildcard ${@:.txt=}*.txt) | grep '^[0-9\-]' |\
sort -k2,2 -k1,1nr | uniq -f1 | sort -k1,1nr -u > $@.sorted; \
rm -f $(wildcard ${@:.txt=}*.txt); \
mv $@.sorted $@; \
fi; \
fi

52
tools/model_info.py Normal file
View File

@ -0,0 +1,52 @@
#!/usr/bin/env python3
import sys
import argparse
import numpy as np
import os
DESC = "Prints keys and values from model.npz file."
non_parameter_keys = ["special:model.yml"]
def main():
args = parse_args()
model = np.load(args.model)
file_size=os.path.getsize(args.model)
if args.key:
if args.key not in model:
print("Key not found")
exit(1)
else:
print(model[args.key])
else:
objects=0
parameters=0
for key in (k for k in model if k not in non_parameter_keys):
objects+=1
parameters+=model[key].size
if not args.summary:
print(key, model[key].shape)
# Summary
parameters/=1e6
file_size=np.ceil(file_size/1024**2)
print(f"{args.model}: {objects} objects with a total of {parameters:.1f}M parameters; {file_size:.0f} MB")
def parse_args():
parser = argparse.ArgumentParser(description="")
parser.add_argument("-m", "--model",metavar='model.npz', help="model file", required=True)
parser.add_argument("-k", "--key", help="print value for specific key")
parser.add_argument("-s", "--summary", action="store_true",
help="only show summary")
return parser.parse_args()
if __name__ == "__main__":
main()