fixed model names

This commit is contained in:
Joerg Tiedemann 2020-01-20 00:37:24 +02:00
parent 2887762198
commit f97bc1895c
4 changed files with 43 additions and 4 deletions

View File

@ -181,6 +181,17 @@ finished:
echo "${WORKDIR}/${MODEL_BASENAME} unfinished"; \
fi
## remove job files if no trained file exists
delete-broken-submit:
for l in ${ALL_LANG_PAIRS}; do \
if [ -e ${WORKHOME}/$$l/train.submit ]; then \
if [ ! `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz' | wc -l` -gt 0 ]; then \
echo "rm -f ${WORKHOME}/$$l/train.submit"; \
rm -f ${WORKHOME}/$$l/train.submit; \
fi \
fi \
done
## resume training on an existing model
resume:

View File

@ -380,3 +380,29 @@ link-old-models:
rm -f ${MODEL_TRANSLATE}.eval
rm -f ${MODEL_TRANSLATE}.compare
ifneq (${DATASET},${OLDDATASET})
TRAINFILES = ${wildcard ${WORKDIR}/train/*${OLDDATASET}*.*}
MODELFILES = ${wildcard ${WORKDIR}/*${OLDDATASET}*.*}
DECODERFILES = ${wildcard ${WORKDIR}/*${OLDDATASET}*.decoder.yml}
endif
## fix model names from old style
## where models trained on a single corpus got the name
## of that corpus
## Now: always use 'opus' as the name of the default dataset
fix-model-names:
ifneq (${DATASET},${OLDDATASET})
for f in ${DECODERFILES}; do \
perl -i.bak -pe 's/${OLDDATASET}/${DATASET}/' $$f; \
done
for f in ${TRAINFILES}; do \
mv -f $$f `echo $$f | sed 's/${OLDDATASET}/${DATASET}/'`; \
ln -s `echo $$f | sed 's/${OLDDATASET}/${DATASET}/'` $$f; \
done
for f in ${MODELFILES}; do \
mv -f $$f `echo $$f | sed 's/${OLDDATASET}/${DATASET}/'`; \
done
endif

View File

@ -20,7 +20,7 @@
## - make dist-allmodels
%-allmodels:
for l in ${ALL_LANG_PAIRS}; do \
if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.npz' | wc -l` -gt 0 ]; then \
if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz' | wc -l` -gt 0 ]; then \
${MAKE} SRCLANGS="`echo $$l | cut -f1 -d'-' | sed 's/\\+/ /g'`" \
TRGLANGS="`echo $$l | cut -f2 -d'-' | sed 's/\\+/ /g'`" ${@:-allmodels=}; \
fi \
@ -29,7 +29,7 @@
## only bilingual models
%-allbilingual:
for l in ${ALL_BILINGUAL_MODELS}; do \
if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.npz' | wc -l` -gt 0 ]; then \
if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz' | wc -l` -gt 0 ]; then \
${MAKE} SRCLANGS="`echo $$l | cut -f1 -d'-' | sed 's/\\+/ /g'`" \
TRGLANGS="`echo $$l | cut -f2 -d'-' | sed 's/\\+/ /g'`" ${@:-allbilingual=}; \
fi \
@ -38,7 +38,7 @@
## only bilingual models
%-allmultilingual:
for l in ${ALL_MULTILINGUAL_MODELS}; do \
if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.npz' | wc -l` -gt 0 ]; then \
if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz' | wc -l` -gt 0 ]; then \
${MAKE} SRCLANGS="`echo $$l | cut -f1 -d'-' | sed 's/\\+/ /g'`" \
TRGLANGS="`echo $$l | cut -f2 -d'-' | sed 's/\\+/ /g'`" ${@:-allmultilingual=}; \
fi \
@ -74,10 +74,12 @@
BT_MODEL = ${MODEL_SUBDIR}opus+bt${TRAINSIZE}.${PRE_SRC}-${PRE_TRG}
BT_MODEL_BASE = ${BT_MODEL}.${MODELTYPE}.model${NR}
BT_MODEL_START = ${WORKDIR}/${BT_MODEL_BASE}.npz
BT_MODEL_VOCAB = ${WORKDIR}/${BT_MODEL}.vocab.${MODEL_VOCABTYPE}
%-add-backtranslations:
ifneq (${wildcard ${MODEL_FINAL}},)
cp ${MODEL_FINAL} ${BT_MODEL_START}
cp ${MODEL_VOCAB} ${BT_MODEL_VOCAB}
endif
${MAKE} DATASET=opus+bt \
CLEAN_TRAIN_SRC="${CLEAN_TRAIN_SRC} ${BACKTRANS_SRC}" \

View File

@ -24,7 +24,7 @@ endif
echo '#SBATCH -o ${DATASET}-${@:.submit=}.out.%j' >> $@
echo '#SBATCH -e ${DATASET}-${@:.submit=}.err.%j' >> $@
echo '#SBATCH --mem=${HPC_MEM}' >> $@
# echo '#SBATCH --exclude=r18g05' >> $@
echo '#SBATCH --exclude=r18g08' >> $@
ifdef EMAIL
echo '#SBATCH --mail-type=END' >> $@
echo '#SBATCH --mail-user=${EMAIL}' >> $@