diff --git a/OPUS-MT-leaderboard b/OPUS-MT-leaderboard index 7d96d730..7f01e63e 160000 --- a/OPUS-MT-leaderboard +++ b/OPUS-MT-leaderboard @@ -1 +1 @@ -Subproject commit 7d96d730f001e6f634856cf973f1cad83ed64a9d +Subproject commit 7f01e63e1758c45067537f0e241b6cabb4e1c031 diff --git a/tatoeba/eval/Makefile b/tatoeba/eval/Makefile index dae9f552..f4d3f078 100644 --- a/tatoeba/eval/Makefile +++ b/tatoeba/eval/Makefile @@ -8,6 +8,7 @@ include Makefile.def #------------------------------------------------- ## make all evaluation zip-files #------------------------------------------------- + .PHONY: all all: ${MODEL_EVALZIPS} @@ -16,8 +17,6 @@ all: ${MODEL_EVALZIPS} .PHONY: first first: $(firstword ${MODEL_EVALZIPS}) - - ## do things in reverse order ## (just to start another job) @@ -28,13 +27,54 @@ MODEL_EVALZIPS_REVERSE = $(call reverse,${MODEL_EVALZIPS}) all-reverse: ${MODEL_EVALZIPS_REVERSE} -add-new-metrics: - ${MAKE} -j16 METRICS="spbleu chrf++" all + + +## only do COMET scores + +all-comet: + make METRICS="comet" all all-comet-reverse: make METRICS="comet" all-reverse +##--------------------------------------------------- +## pack evaluation files if a model directory exists +##--------------------------------------------------- + +MODEL_PACK_EVAL := ${patsubst %.zip,%.pack,${MODEL_DISTS}} + +.PHONY: pack-all-model-scores +pack-all-model-scores: ${MODEL_PACK_EVAL} + +.PHONY: ${MODEL_PACK_EVAL} +${MODEL_PACK_EVAL}: + if [ -d ${MODEL_HOME}/$(@:.pack=) ]; then \ + ${MAKE} MODEL_DISTS=$(@:.pack=.zip) pack-model-scores; \ + fi + + +##------------------ +## register scores +##------------------ + +# phony targets to register model scores +MODEL_REGISTER := ${patsubst %.zip,%.register,${MODEL_DISTS}} + +register-all-metrics: + ${MAKE} ${MODEL_REGISTER} + +# only register selected metrics +register-new-metrics: + ${MAKE} METRICS="spbleu chrf++" ${MODEL_REGISTER} + +.PHONY: ${MODEL_REGISTER} +${MODEL_REGISTER}: + ${MAKE} MODEL_DISTS=$(@:.register=.zip) fetch-model-scores + ${MAKE} MODEL_DISTS=$(@:.register=.zip) model-score-files + ${MAKE} -f Makefile.register MODEL_DISTS=$(@:.register=.zip) register-scores + ${MAKE} MODEL_DISTS=$(@:.register=.zip) pack-model-scores + #------------------------------------------------- ## phony targets to evaluate only new models @@ -98,6 +138,14 @@ scores model-scores: ${MODEL_EVAL_SCORES} ${MAKE} ${MODEL_EVAL_SCORES}; \ fi + +## only create model score files from individual benchmark scores +## but don't run new evaluations if benchmark scores do not exist yet + +.PHONY: model-score-files +model-score-files: ${MODEL_EVAL_SCORES} + + ##------------------------------------------------- ## evaluate the model with all benchmarks available ## register the scores and update the leaderboard @@ -109,15 +157,21 @@ eval-model: ${MODEL_EVAL_SCORES} ${MAKE} model-scores if [ -e $< ]; then \ ${MAKE} -f Makefile.register register-scores; \ - ${MAKE} -f Makefile.register sort-leaderboards; \ fi ${MAKE} pack-model-scores + +# delay this to avoid racing conditions in case several +# updates run simultaneously +# +# ${MAKE} -f Makefile.register sort-leaderboards; \ + + .PHONY: pack-model-scores pack-model-scores: if [ -d ${MODEL_DIR} ]; then \ - cd ${MODEL_DIR} && zip ${MODEL_EVALZIP} *.*; \ - rm -f ${MODEL_DIR}/*.*; \ + cd ${MODEL_DIR} && find . -name '*.*' | xargs zip ${MODEL_EVALZIP}; \ + find ${MODEL_DIR} -name '*.*' -delete; \ rmdir ${MODEL_DIR}; \ fi @@ -138,6 +192,12 @@ cleanup: .PHONY: fetch fetch: ${WORK_DIR}/model/decoder.yml ${MODEL_DIR} +.PHONY: fetch-model +fetch-model: ${WORK_DIR}/model/decoder.yml + +.PHONY: fetch-model-scores +fetch-model-scores: ${MODEL_DIR} + ## prepare the model evaluation file directory ## fetch already existing evaluations @@ -356,11 +416,6 @@ ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.eval: ${INDIVIDUAL_EVAL_FILES} rev $@ | sort | uniq -f2 | rev > $@.uniq mv -f $@.uniq $@ -# cat ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.bleu \ -# ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.chrf > $@ -# tail -1 ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.comet | \ -# sed 's/^.*score:/COMET+default =/' >> $@ - ${MODEL_DIR}/${TESTSET}.${LANGPAIR}.compare: ${MAKE} ${WORK_DIR}/${TESTSET}.${LANGPAIR}.output @@ -574,22 +629,3 @@ ${MODEL_DIR}.comet-scores.txt: ${MODEL_SCORES} fi - - -## OLD: extract BLEU and chrF scores from the combined score file - -# ${MODEL_BLEUSCORES}: ${MODEL_SCORES} -# cut -f1,2,4 ${MODEL_SCORES} | \ -# sed 's/\(news.*[0-9][0-9][0-9][0-9]\)\-[a-z][a-z][a-z][a-z] /\1 /' |\ -# sed -e 's/\(news.*2021\)\.[a-z][a-z]\-[a-z][a-z] /\1 /' |\ -# rev | uniq -f1 | rev > $@ - - -# ${MODEL_CHRFSCORES}: ${MODEL_SCORES} -# cut -f1,2,3 ${MODEL_SCORES} |\ -# sed 's/\(news.*[0-9][0-9][0-9][0-9]\)\-[a-z][a-z][a-z][a-z] /\1 /' |\ -# sed -e 's/\(news.*2021\)\.[a-z][a-z]\-[a-z][a-z] /\1 /' |\ -# rev | uniq -f1 | rev > $@ - - - diff --git a/tatoeba/eval/Makefile.register b/tatoeba/eval/Makefile.register index 32775bae..709d229f 100644 --- a/tatoeba/eval/Makefile.register +++ b/tatoeba/eval/Makefile.register @@ -36,39 +36,3 @@ ${SCOREFILES_DONE}: ${MODEL_DIR}.%-scores.registered: ${MODEL_DIR}.%-scores.txt @touch $@ - -##------------------------------------------------------------------- -## UPDATE_SCORE_DIRS = directory that contains new scores -## LEADERBOARDS = list of BLEU leader boards that need to be sorted -##------------------------------------------------------------------- - -UPDATE_SCORE_DIRS := $(sort $(dir ${wildcard ${LEADERBOARD_DIR}/*/*/*.unsorted.txt})) -LEADERBOARDS := $(foreach m,${METRICS},$(patsubst %,%$(m)-scores.txt,${UPDATE_SCORE_DIRS})) - - -## sort all leaderboards for which we have new unsorted scores -sort-leaderboards: ${LEADERBOARDS} - -${LEADERBOARDS}: ${UPDATE_SCORE_DIRS} - @if [ -e $@ ]; then \ - if [ $(words $(wildcard ${@:.txt=}*.unsorted.txt)) -gt 0 ]; then \ - echo "merge and sort ${patsubst ${LEADERBOARD_DIR}/%,%,$@}"; \ - sort -k2,2 -k1,1nr $@ > $@.old.txt; \ - cat $(wildcard ${@:.txt=}*.unsorted.txt) | \ - grep '^[0-9\-]' | sort -k2,2 -k1,1nr > $@.new.txt; \ - sort -m $@.new.txt $@.old.txt |\ - uniq -f1 | sort -k1,1nr -u > $@.sorted; \ - rm -f $@.old.txt $@.new.txt; \ - rm -f $(wildcard ${@:.txt=}*.unsorted.txt); \ - mv $@.sorted $@; \ - fi; \ - else \ - if [ $(words $(wildcard ${@:.txt=}*.txt)) -gt 0 ]; then \ - echo "merge and sort ${patsubst ${LEADERBOARD_DIR}/%,%,$@}"; \ - cat $(wildcard ${@:.txt=}*.txt) | grep '^[0-9\-]' |\ - sort -k2,2 -k1,1nr | uniq -f1 | sort -k1,1nr -u > $@.sorted; \ - rm -f $(wildcard ${@:.txt=}*.txt); \ - mv $@.sorted $@; \ - fi; \ - fi - diff --git a/tools/model_info.py b/tools/model_info.py new file mode 100644 index 00000000..2399305b --- /dev/null +++ b/tools/model_info.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +import sys +import argparse +import numpy as np +import os + + +DESC = "Prints keys and values from model.npz file." + +non_parameter_keys = ["special:model.yml"] + + +def main(): + args = parse_args() + model = np.load(args.model) + + file_size=os.path.getsize(args.model) + + if args.key: + if args.key not in model: + print("Key not found") + exit(1) + else: + print(model[args.key]) + else: + objects=0 + parameters=0 + for key in (k for k in model if k not in non_parameter_keys): + objects+=1 + parameters+=model[key].size + if not args.summary: + print(key, model[key].shape) + + # Summary + parameters/=1e6 + file_size=np.ceil(file_size/1024**2) + print(f"{args.model}: {objects} objects with a total of {parameters:.1f}M parameters; {file_size:.0f} MB") + + + +def parse_args(): + parser = argparse.ArgumentParser(description="") + parser.add_argument("-m", "--model",metavar='model.npz', help="model file", required=True) + parser.add_argument("-k", "--key", help="print value for specific key") + parser.add_argument("-s", "--summary", action="store_true", + help="only show summary") + return parser.parse_args() + + +if __name__ == "__main__": + main()