mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2025-01-06 01:37:00 +03:00
fixed model names
This commit is contained in:
parent
2887762198
commit
f97bc1895c
11
Makefile
11
Makefile
@ -181,6 +181,17 @@ finished:
|
||||
echo "${WORKDIR}/${MODEL_BASENAME} unfinished"; \
|
||||
fi
|
||||
|
||||
## remove job files if no trained file exists
|
||||
delete-broken-submit:
|
||||
for l in ${ALL_LANG_PAIRS}; do \
|
||||
if [ -e ${WORKHOME}/$$l/train.submit ]; then \
|
||||
if [ ! `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz' | wc -l` -gt 0 ]; then \
|
||||
echo "rm -f ${WORKHOME}/$$l/train.submit"; \
|
||||
rm -f ${WORKHOME}/$$l/train.submit; \
|
||||
fi \
|
||||
fi \
|
||||
done
|
||||
|
||||
|
||||
## resume training on an existing model
|
||||
resume:
|
||||
|
@ -380,3 +380,29 @@ link-old-models:
|
||||
rm -f ${MODEL_TRANSLATE}.eval
|
||||
rm -f ${MODEL_TRANSLATE}.compare
|
||||
|
||||
|
||||
ifneq (${DATASET},${OLDDATASET})
|
||||
TRAINFILES = ${wildcard ${WORKDIR}/train/*${OLDDATASET}*.*}
|
||||
MODELFILES = ${wildcard ${WORKDIR}/*${OLDDATASET}*.*}
|
||||
DECODERFILES = ${wildcard ${WORKDIR}/*${OLDDATASET}*.decoder.yml}
|
||||
endif
|
||||
|
||||
|
||||
## fix model names from old style
|
||||
## where models trained on a single corpus got the name
|
||||
## of that corpus
|
||||
## Now: always use 'opus' as the name of the default dataset
|
||||
|
||||
fix-model-names:
|
||||
ifneq (${DATASET},${OLDDATASET})
|
||||
for f in ${DECODERFILES}; do \
|
||||
perl -i.bak -pe 's/${OLDDATASET}/${DATASET}/' $$f; \
|
||||
done
|
||||
for f in ${TRAINFILES}; do \
|
||||
mv -f $$f `echo $$f | sed 's/${OLDDATASET}/${DATASET}/'`; \
|
||||
ln -s `echo $$f | sed 's/${OLDDATASET}/${DATASET}/'` $$f; \
|
||||
done
|
||||
for f in ${MODELFILES}; do \
|
||||
mv -f $$f `echo $$f | sed 's/${OLDDATASET}/${DATASET}/'`; \
|
||||
done
|
||||
endif
|
||||
|
@ -20,7 +20,7 @@
|
||||
## - make dist-allmodels
|
||||
%-allmodels:
|
||||
for l in ${ALL_LANG_PAIRS}; do \
|
||||
if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.npz' | wc -l` -gt 0 ]; then \
|
||||
if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz' | wc -l` -gt 0 ]; then \
|
||||
${MAKE} SRCLANGS="`echo $$l | cut -f1 -d'-' | sed 's/\\+/ /g'`" \
|
||||
TRGLANGS="`echo $$l | cut -f2 -d'-' | sed 's/\\+/ /g'`" ${@:-allmodels=}; \
|
||||
fi \
|
||||
@ -29,7 +29,7 @@
|
||||
## only bilingual models
|
||||
%-allbilingual:
|
||||
for l in ${ALL_BILINGUAL_MODELS}; do \
|
||||
if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.npz' | wc -l` -gt 0 ]; then \
|
||||
if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz' | wc -l` -gt 0 ]; then \
|
||||
${MAKE} SRCLANGS="`echo $$l | cut -f1 -d'-' | sed 's/\\+/ /g'`" \
|
||||
TRGLANGS="`echo $$l | cut -f2 -d'-' | sed 's/\\+/ /g'`" ${@:-allbilingual=}; \
|
||||
fi \
|
||||
@ -38,7 +38,7 @@
|
||||
## only bilingual models
|
||||
%-allmultilingual:
|
||||
for l in ${ALL_MULTILINGUAL_MODELS}; do \
|
||||
if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.npz' | wc -l` -gt 0 ]; then \
|
||||
if [ `find ${WORKHOME}/$$l -name '*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz' | wc -l` -gt 0 ]; then \
|
||||
${MAKE} SRCLANGS="`echo $$l | cut -f1 -d'-' | sed 's/\\+/ /g'`" \
|
||||
TRGLANGS="`echo $$l | cut -f2 -d'-' | sed 's/\\+/ /g'`" ${@:-allmultilingual=}; \
|
||||
fi \
|
||||
@ -74,10 +74,12 @@
|
||||
BT_MODEL = ${MODEL_SUBDIR}opus+bt${TRAINSIZE}.${PRE_SRC}-${PRE_TRG}
|
||||
BT_MODEL_BASE = ${BT_MODEL}.${MODELTYPE}.model${NR}
|
||||
BT_MODEL_START = ${WORKDIR}/${BT_MODEL_BASE}.npz
|
||||
BT_MODEL_VOCAB = ${WORKDIR}/${BT_MODEL}.vocab.${MODEL_VOCABTYPE}
|
||||
|
||||
%-add-backtranslations:
|
||||
ifneq (${wildcard ${MODEL_FINAL}},)
|
||||
cp ${MODEL_FINAL} ${BT_MODEL_START}
|
||||
cp ${MODEL_VOCAB} ${BT_MODEL_VOCAB}
|
||||
endif
|
||||
${MAKE} DATASET=opus+bt \
|
||||
CLEAN_TRAIN_SRC="${CLEAN_TRAIN_SRC} ${BACKTRANS_SRC}" \
|
||||
|
@ -24,7 +24,7 @@ endif
|
||||
echo '#SBATCH -o ${DATASET}-${@:.submit=}.out.%j' >> $@
|
||||
echo '#SBATCH -e ${DATASET}-${@:.submit=}.err.%j' >> $@
|
||||
echo '#SBATCH --mem=${HPC_MEM}' >> $@
|
||||
# echo '#SBATCH --exclude=r18g05' >> $@
|
||||
echo '#SBATCH --exclude=r18g08' >> $@
|
||||
ifdef EMAIL
|
||||
echo '#SBATCH --mail-type=END' >> $@
|
||||
echo '#SBATCH --mail-user=${EMAIL}' >> $@
|
||||
|
Loading…
Reference in New Issue
Block a user