From f97bc1895c3f6f6e40ca90fb85f3036b3bead42a Mon Sep 17 00:00:00 2001 From: Joerg Tiedemann Date: Mon, 20 Jan 2020 00:37:24 +0200 Subject: [PATCH] fixed model names --- Makefile | 11 +++++++++++ Makefile.dist | 26 ++++++++++++++++++++++++++ Makefile.generic | 8 +++++--- Makefile.slurm | 2 +- 4 files changed, 43 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 51eb2cba..5b179096 100644 --- a/Makefile +++ b/Makefile @@ -181,6 +181,17 @@ finished: echo "${WORKDIR}/${MODEL_BASENAME} unfinished"; \ fi +## remove job files if no trained file exists +delete-broken-submit: + for l in ${ALL_LANG_PAIRS}; do \ + if [ -e ${WORKHOME}/$$l/train.submit ]; then \ + if [ ! `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz' | wc -l` -gt 0 ]; then \ + echo "rm -f ${WORKHOME}/$$l/train.submit"; \ + rm -f ${WORKHOME}/$$l/train.submit; \ + fi \ + fi \ + done + ## resume training on an existing model resume: diff --git a/Makefile.dist b/Makefile.dist index cb92aa56..0892df3e 100644 --- a/Makefile.dist +++ b/Makefile.dist @@ -380,3 +380,29 @@ link-old-models: rm -f ${MODEL_TRANSLATE}.eval rm -f ${MODEL_TRANSLATE}.compare + +ifneq (${DATASET},${OLDDATASET}) + TRAINFILES = ${wildcard ${WORKDIR}/train/*${OLDDATASET}*.*} + MODELFILES = ${wildcard ${WORKDIR}/*${OLDDATASET}*.*} + DECODERFILES = ${wildcard ${WORKDIR}/*${OLDDATASET}*.decoder.yml} +endif + + +## fix model names from old style +## where models trained on a single corpus got the name +## of that corpus +## Now: always use 'opus' as the name of the default dataset + +fix-model-names: +ifneq (${DATASET},${OLDDATASET}) + for f in ${DECODERFILES}; do \ + perl -i.bak -pe 's/${OLDDATASET}/${DATASET}/' $$f; \ + done + for f in ${TRAINFILES}; do \ + mv -f $$f `echo $$f | sed 's/${OLDDATASET}/${DATASET}/'`; \ + ln -s `echo $$f | sed 's/${OLDDATASET}/${DATASET}/'` $$f; \ + done + for f in ${MODELFILES}; do \ + mv -f $$f `echo $$f | sed 's/${OLDDATASET}/${DATASET}/'`; \ + done +endif diff --git a/Makefile.generic b/Makefile.generic index 52151f4a..d03f1dc4 100644 --- a/Makefile.generic +++ b/Makefile.generic @@ -20,7 +20,7 @@ ## - make dist-allmodels %-allmodels: for l in ${ALL_LANG_PAIRS}; do \ - if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.npz' | wc -l` -gt 0 ]; then \ + if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz' | wc -l` -gt 0 ]; then \ ${MAKE} SRCLANGS="`echo $$l | cut -f1 -d'-' | sed 's/\\+/ /g'`" \ TRGLANGS="`echo $$l | cut -f2 -d'-' | sed 's/\\+/ /g'`" ${@:-allmodels=}; \ fi \ @@ -29,7 +29,7 @@ ## only bilingual models %-allbilingual: for l in ${ALL_BILINGUAL_MODELS}; do \ - if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.npz' | wc -l` -gt 0 ]; then \ + if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz' | wc -l` -gt 0 ]; then \ ${MAKE} SRCLANGS="`echo $$l | cut -f1 -d'-' | sed 's/\\+/ /g'`" \ TRGLANGS="`echo $$l | cut -f2 -d'-' | sed 's/\\+/ /g'`" ${@:-allbilingual=}; \ fi \ @@ -38,7 +38,7 @@ ## only bilingual models %-allmultilingual: for l in ${ALL_MULTILINGUAL_MODELS}; do \ - if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.npz' | wc -l` -gt 0 ]; then \ + if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz' | wc -l` -gt 0 ]; then \ ${MAKE} SRCLANGS="`echo $$l | cut -f1 -d'-' | sed 's/\\+/ /g'`" \ TRGLANGS="`echo $$l | cut -f2 -d'-' | sed 's/\\+/ /g'`" ${@:-allmultilingual=}; \ fi \ @@ -74,10 +74,12 @@ BT_MODEL = ${MODEL_SUBDIR}opus+bt${TRAINSIZE}.${PRE_SRC}-${PRE_TRG} BT_MODEL_BASE = ${BT_MODEL}.${MODELTYPE}.model${NR} BT_MODEL_START = ${WORKDIR}/${BT_MODEL_BASE}.npz +BT_MODEL_VOCAB = ${WORKDIR}/${BT_MODEL}.vocab.${MODEL_VOCABTYPE} %-add-backtranslations: ifneq (${wildcard ${MODEL_FINAL}},) cp ${MODEL_FINAL} ${BT_MODEL_START} + cp ${MODEL_VOCAB} ${BT_MODEL_VOCAB} endif ${MAKE} DATASET=opus+bt \ CLEAN_TRAIN_SRC="${CLEAN_TRAIN_SRC} ${BACKTRANS_SRC}" \ diff --git a/Makefile.slurm b/Makefile.slurm index 8a6ad3af..3b0ff36b 100644 --- a/Makefile.slurm +++ b/Makefile.slurm @@ -24,7 +24,7 @@ endif echo '#SBATCH -o ${DATASET}-${@:.submit=}.out.%j' >> $@ echo '#SBATCH -e ${DATASET}-${@:.submit=}.err.%j' >> $@ echo '#SBATCH --mem=${HPC_MEM}' >> $@ -# echo '#SBATCH --exclude=r18g05' >> $@ + echo '#SBATCH --exclude=r18g08' >> $@ ifdef EMAIL echo '#SBATCH --mail-type=END' >> $@ echo '#SBATCH --mail-user=${EMAIL}' >> $@