mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2024-09-17 15:17:22 +03:00
Merge branch 'master' of github.com:Helsinki-NLP/OPUS-MT-train
This commit is contained in:
commit
0e7b3e173a
19
Makefile
19
Makefile
@ -378,3 +378,22 @@ train-and-start-bt-jobs: ${WORKDIR}/${MODEL}.${MODELTYPE}.model${NR}.done
|
|||||||
${MAKE} local-dist
|
${MAKE} local-dist
|
||||||
${MAKE} -C backtranslate MODELHOME=${MODELDIR} translate-all-wikis-jobs
|
${MAKE} -C backtranslate MODELHOME=${MODELDIR} translate-all-wikis-jobs
|
||||||
|
|
||||||
|
|
||||||
|
ALL_RELEASED_MODELS = ${wildcard models-tatoeba/*/*.zip}
|
||||||
|
ALL_VOCABS_FIXED = ${patsubst %.zip,%.fixed-vocab,${ALL_RELEASED_MODELS}}
|
||||||
|
|
||||||
|
fix-released-vocabs: ${ALL_VOCABS_FIXED}
|
||||||
|
|
||||||
|
%.fixed-vocab: %.zip
|
||||||
|
@( v=`unzip -l $< | grep 'vocab.yml$$' | sed 's/^.* //'`; \
|
||||||
|
if [ "$$v" != "" ]; then \
|
||||||
|
unzip $< $$v; \
|
||||||
|
python3 scripts/fix_vocab.py $$v; \
|
||||||
|
if [ -e $$v.bak ]; then \
|
||||||
|
echo "update $$v in $<"; \
|
||||||
|
zip $< $$v $$v.bak; \
|
||||||
|
else \
|
||||||
|
echo "vocab $$v is fine in $<"; \
|
||||||
|
fi; \
|
||||||
|
rm -f $$v $$v.bak; \
|
||||||
|
fi )
|
||||||
|
41
lib/allas.mk
41
lib/allas.mk
@ -20,6 +20,9 @@ WORK_DESTDIR ?= ${WORKHOME}
|
|||||||
WORK_CONTAINER ?= OPUS-MT-train_${notdir ${WORKHOME}}-${WHOAMI}
|
WORK_CONTAINER ?= OPUS-MT-train_${notdir ${WORKHOME}}-${WHOAMI}
|
||||||
WORK_CONTAINER_JT ?= OPUS-MT-train_${notdir ${WORKHOME}}-tiedeman
|
WORK_CONTAINER_JT ?= OPUS-MT-train_${notdir ${WORKHOME}}-tiedeman
|
||||||
|
|
||||||
|
ALLAS_STORAGE_URL = https://object.pouta.csc.fi/
|
||||||
|
|
||||||
|
|
||||||
## store workdir on allas
|
## store workdir on allas
|
||||||
store:
|
store:
|
||||||
cd ${WORK_SRCDIR} && a-put -b ${WORK_CONTAINER} --nc --follow-links --override ${LANGPAIRSTR}
|
cd ${WORK_SRCDIR} && a-put -b ${WORK_CONTAINER} --nc --follow-links --override ${LANGPAIRSTR}
|
||||||
@ -43,3 +46,41 @@ fetch-data:
|
|||||||
mkdir -p ${WORK_DESTDIR}
|
mkdir -p ${WORK_DESTDIR}
|
||||||
cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/data.tar
|
cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/data.tar
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## generic recipe for storing work data and removing it from the file system
|
||||||
|
## DANGEROUS --- this really deletes the data!
|
||||||
|
## NOTE: makes container also world-readable (see swift post command)
|
||||||
|
## --> this makes it easier to fetch things without login credentials
|
||||||
|
## --> should not store sensitive data here!
|
||||||
|
%.stored: %
|
||||||
|
if [ "$(firstword $(subst -, ,$(subst /, ,$@)))" == "work" ]; then \
|
||||||
|
b=OPUS-MT-train_$(subst /,-,$(dir $@))${WHOAMI}; \
|
||||||
|
cd $(dir $@); \
|
||||||
|
a-put -b $$b --nc --follow-links --override $(notdir $<); \
|
||||||
|
rm -fr $(notdir $<); \
|
||||||
|
touch $(notdir $@); \
|
||||||
|
rm -f $(notdir $(@:stored=.fetched)); \
|
||||||
|
swift post $$b --read-acl ".r:*"
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
## TODO: fetch with wget instead of using a-commands
|
||||||
|
## fetch work data from allas
|
||||||
|
%.fetched:
|
||||||
|
if [ "$(firstword $(subst -, ,$(subst /, ,$@)))" == "work" ]; then \
|
||||||
|
cd $(dir $@); \
|
||||||
|
a-get OPUS-MT-train_$(subst /,-,$(dir $@))${WHOAMI}/$(notdir $(@:.fetched=.tar)); \
|
||||||
|
touch $(notdir $@); \
|
||||||
|
rm -f $(notdir $(@:fetched=.stored)); \
|
||||||
|
fi
|
||||||
|
|
||||||
|
## another way of fetching work data
|
||||||
|
## requires settings SRCLANGS and TRGLANGS (or LANGPAIRSTR directly)
|
||||||
|
work-%/${LANGPAIRSTR}:
|
||||||
|
mkdir -p $(dir $@)
|
||||||
|
cd $(dir $@) && a-get OPUS-MT-train_$(subst /,-,$(dir $@))${WHOAMI}/${LANGPAIRSTR}.tar
|
||||||
|
|
||||||
|
|
||||||
|
UPLOAD_MODELS=$(patsubst %,%.stored,${wildcard work-tatoeba/[dg-rt-z]*})
|
||||||
|
upload-workfiles: ${UPLOAD_MODELS}
|
||||||
|
@ -281,8 +281,6 @@ TUNE_GPUJOB_SUBMIT ?=
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## existing projects in WORKHOME
|
## existing projects in WORKHOME
|
||||||
ALL_LANG_PAIRS := ${shell ls ${WORKHOME} | grep -- '-' | grep -v old}
|
ALL_LANG_PAIRS := ${shell ls ${WORKHOME} | grep -- '-' | grep -v old}
|
||||||
ALL_BILINGUAL_MODELS := ${shell echo '${ALL_LANG_PAIRS}' | tr ' ' "\n" | grep -v -- '\+'}
|
ALL_BILINGUAL_MODELS := ${shell echo '${ALL_LANG_PAIRS}' | tr ' ' "\n" | grep -v -- '\+'}
|
||||||
@ -293,6 +291,8 @@ ALL_MULTILINGUAL_MODELS := ${shell echo '${ALL_LANG_PAIRS}' | tr ' ' "\n" | grep
|
|||||||
## pre-processing and vocabulary
|
## pre-processing and vocabulary
|
||||||
##----------------------------------------------------------------------------
|
##----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
## type of subword segmentation (bpe|spm)
|
||||||
|
## model size (NOTE: BPESIZE is also used for sentencepiece!)
|
||||||
SUBWORDS ?= spm
|
SUBWORDS ?= spm
|
||||||
BPESIZE ?= 32000
|
BPESIZE ?= 32000
|
||||||
SRCBPESIZE ?= ${BPESIZE}
|
SRCBPESIZE ?= ${BPESIZE}
|
||||||
@ -306,10 +306,12 @@ BPETRGMODEL ?= ${WORKDIR}/train/${BPEMODELNAME}.trg.bpe${TRGBPESIZE:000=}k-mode
|
|||||||
SPMSRCMODEL ?= ${WORKDIR}/train/${BPEMODELNAME}.src.spm${SRCBPESIZE:000=}k-model
|
SPMSRCMODEL ?= ${WORKDIR}/train/${BPEMODELNAME}.src.spm${SRCBPESIZE:000=}k-model
|
||||||
SPMTRGMODEL ?= ${WORKDIR}/train/${BPEMODELNAME}.trg.spm${TRGBPESIZE:000=}k-model
|
SPMTRGMODEL ?= ${WORKDIR}/train/${BPEMODELNAME}.trg.spm${TRGBPESIZE:000=}k-model
|
||||||
|
|
||||||
|
## don't delete BPE/sentencepiece models!
|
||||||
.PRECIOUS: ${BPESRCMODEL} ${BPETRGMODEL}
|
.PRECIOUS: ${BPESRCMODEL} ${BPETRGMODEL}
|
||||||
.PRECIOUS: ${SPMSRCMODEL} ${SPMTRGMODEL}
|
.PRECIOUS: ${SPMSRCMODEL} ${SPMTRGMODEL}
|
||||||
|
|
||||||
|
## size of the joined vocabulary
|
||||||
|
## TODO: heuristically add 1,000 to cover language labels is a bit ad-hoc
|
||||||
VOCABSIZE ?= $$((${SRCBPESIZE} + ${TRGBPESIZE} + 1000))
|
VOCABSIZE ?= $$((${SRCBPESIZE} + ${TRGBPESIZE} + 1000))
|
||||||
|
|
||||||
## for document-level models
|
## for document-level models
|
||||||
@ -353,7 +355,7 @@ WORKDIR = ${WORKHOME}/${LANGPAIRSTR}
|
|||||||
MODELDIR = ${WORKHOME}/models/${LANGPAIRSTR}
|
MODELDIR = ${WORKHOME}/models/${LANGPAIRSTR}
|
||||||
SPMDIR = ${WORKHOME}/SentencePieceModels
|
SPMDIR = ${WORKHOME}/SentencePieceModels
|
||||||
|
|
||||||
## data sets
|
## train data sets (word alignment for the guided alignment option)
|
||||||
TRAIN_BASE = ${WORKDIR}/train/${DATASET}
|
TRAIN_BASE = ${WORKDIR}/train/${DATASET}
|
||||||
TRAIN_SRC = ${TRAIN_BASE}.src
|
TRAIN_SRC = ${TRAIN_BASE}.src
|
||||||
TRAIN_TRG = ${TRAIN_BASE}.trg
|
TRAIN_TRG = ${TRAIN_BASE}.trg
|
||||||
@ -364,7 +366,7 @@ LOCAL_TRAIN_SRC = ${TMPDIR}/${LANGPAIRSTR}/train/${DATASET}.src
|
|||||||
LOCAL_TRAIN_TRG = ${TMPDIR}/${LANGPAIRSTR}/train/${DATASET}.trg
|
LOCAL_TRAIN_TRG = ${TMPDIR}/${LANGPAIRSTR}/train/${DATASET}.trg
|
||||||
LOCAL_MONO_DATA = ${TMPDIR}/${LANGSTR}/train/${DATASET}.mono
|
LOCAL_MONO_DATA = ${TMPDIR}/${LANGSTR}/train/${DATASET}.mono
|
||||||
|
|
||||||
|
## dev and test data
|
||||||
DEV_SRC ?= ${WORKDIR}/val/${DEVSET_NAME}.src
|
DEV_SRC ?= ${WORKDIR}/val/${DEVSET_NAME}.src
|
||||||
DEV_TRG ?= ${WORKDIR}/val/${DEVSET_NAME}.trg
|
DEV_TRG ?= ${WORKDIR}/val/${DEVSET_NAME}.trg
|
||||||
|
|
||||||
@ -372,8 +374,15 @@ TEST_SRC ?= ${WORKDIR}/test/${TESTSET_NAME}.src
|
|||||||
TEST_TRG ?= ${WORKDIR}/test/${TESTSET_NAME}.trg
|
TEST_TRG ?= ${WORKDIR}/test/${TESTSET_NAME}.trg
|
||||||
|
|
||||||
|
|
||||||
|
## model basename and optional sub-dir
|
||||||
|
|
||||||
MODEL_SUBDIR =
|
MODEL_SUBDIR =
|
||||||
MODEL = ${MODEL_SUBDIR}${DATASET}${TRAINSIZE}.${PRE_SRC}-${PRE_TRG}
|
MODEL = ${MODEL_SUBDIR}${DATASET}${TRAINSIZE}.${PRE_SRC}-${PRE_TRG}
|
||||||
|
|
||||||
|
|
||||||
|
## supported model types
|
||||||
|
## configuration for each type is in lib/train.mk
|
||||||
|
|
||||||
MODELTYPES = transformer \
|
MODELTYPES = transformer \
|
||||||
transformer-big \
|
transformer-big \
|
||||||
transformer-align \
|
transformer-align \
|
||||||
|
@ -372,6 +372,12 @@ endif
|
|||||||
${@:-pivot=}
|
${@:-pivot=}
|
||||||
|
|
||||||
|
|
||||||
|
%-big-align:
|
||||||
|
${MAKE} PRE_TRAINED_MODEL=${MODEL_FINAL} \
|
||||||
|
MODELTYPE=transformer-big-align \
|
||||||
|
${@:-big-align=}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## run a multigpu job (2 or 4 GPUs)
|
## run a multigpu job (2 or 4 GPUs)
|
||||||
|
@ -74,6 +74,37 @@ afreng-bt-tiny:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
afreng:
|
||||||
|
make TATOEBA_VERSION=v2020-07-28 \
|
||||||
|
SRCLANGS=afr TRGLANGS=eng \
|
||||||
|
all-job-tatoeba
|
||||||
|
|
||||||
|
|
||||||
|
afreng-small:
|
||||||
|
make TATOEBA_VERSION=v2020-07-28 \
|
||||||
|
BT_CONTINUE_EXISTING=0 \
|
||||||
|
SRCLANGS=afr TRGLANGS=eng \
|
||||||
|
MODELTYPE=transformer-small-align \
|
||||||
|
MARIAN_WORKSPACE=10000 \
|
||||||
|
all-job-tatoeba
|
||||||
|
|
||||||
|
afreng-tiny:
|
||||||
|
make TATOEBA_VERSION=v2020-07-28 \
|
||||||
|
BT_CONTINUE_EXISTING=0 \
|
||||||
|
SRCLANGS=afr TRGLANGS=eng \
|
||||||
|
MODELTYPE=transformer-tiny-align \
|
||||||
|
MARIAN_WORKSPACE=10000 \
|
||||||
|
all-job-tatoeba
|
||||||
|
|
||||||
|
afreng-small-eval:
|
||||||
|
make TATOEBA_VERSION=v2020-07-28 \
|
||||||
|
BT_CONTINUE_EXISTING=0 \
|
||||||
|
SRCLANGS=afr TRGLANGS=eng \
|
||||||
|
MODELTYPE=transformer-small-align \
|
||||||
|
MARIAN_WORKSPACE=10000 \
|
||||||
|
eval-tatoeba
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
102
lib/train.mk
102
lib/train.mk
@ -6,35 +6,26 @@
|
|||||||
#------------------------------------------------------------------------
|
#------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## extract vocabulary from sentence piece model
|
||||||
|
|
||||||
|
${WORKDIR}/${MODEL}.src.vocab: ${SPMSRCMODEL}
|
||||||
|
cut -f1 < $<.vocab > $@
|
||||||
|
ifeq (${USE_TARGET_LABELS},1)
|
||||||
|
echo "${TARGET_LABELS}" | tr ' ' "\n" >> $@
|
||||||
|
endif
|
||||||
|
|
||||||
|
${WORKDIR}/${MODEL}.trg.vocab: ${SPMTRGMODEL}
|
||||||
|
cut -f1 < $<.vocab > $@
|
||||||
|
|
||||||
|
|
||||||
ifeq (${SUBWORDS},spm)
|
ifeq (${SUBWORDS},spm)
|
||||||
|
|
||||||
## make vocabulary from the source and target language specific
|
## make vocabulary from the source and target language specific
|
||||||
## sentence piece models (concatenate and yamlify)
|
## sentence piece models (concatenate and yamlify)
|
||||||
## TODO: verify that this becomes valid YAML!
|
|
||||||
|
|
||||||
${MODEL_VOCAB}: ${SPMSRCMODEL} ${SPMTRGMODEL}
|
${WORKDIR}/${MODEL}.vocab.yml: ${WORKDIR}/${MODEL}.src.vocab ${WORKDIR}/${MODEL}.trg.vocab
|
||||||
ifneq (${MODEL_LATEST_VOCAB},)
|
cat $^ | sort -u | scripts/vocab2yaml.py > $@
|
||||||
ifneq (${MODEL_LATEST_VOCAB},${MODEL_VOCAB})
|
|
||||||
cp ${MODEL_LATEST_VOCAB} ${MODEL_VOCAB}
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
cut -f1 < ${word 1,$^}.vocab > ${@:.vocab.yml=.src.vocab}
|
|
||||||
cut -f1 < ${word 2,$^}.vocab > ${@:.vocab.yml=.trg.vocab}
|
|
||||||
ifeq (${USE_TARGET_LABELS},1)
|
|
||||||
echo "${TARGET_LABELS}" | tr ' ' "\n" >> ${@:.vocab.yml=.src.vocab}
|
|
||||||
endif
|
|
||||||
cat ${@:.vocab.yml=.src.vocab} ${@:.vocab.yml=.trg.vocab} | \
|
|
||||||
sort -u | scripts/vocab2yaml.py > $@
|
|
||||||
|
|
||||||
## old buggy style ...
|
|
||||||
# cat ${@:.vocab.yml=.src.vocab} ${@:.vocab.yml=.trg.vocab} | \
|
|
||||||
# sort -u | nl -v 0 | sed 's/^ *//'> $@.numbered
|
|
||||||
# cut -f1 $@.numbered > $@.ids
|
|
||||||
# cut -f2 $@.numbered | sed 's/\\/\\\\/g;s/\"/\\\"/g;s/^\(.*\)$$/"\1"/;s/$$/:/'> $@.tokens
|
|
||||||
# paste -d ' ' $@.tokens $@.ids > $@
|
|
||||||
# rm -f $@.tokens $@.ids $@.numbered
|
|
||||||
|
|
||||||
endif
|
|
||||||
|
|
||||||
else
|
else
|
||||||
|
|
||||||
@ -42,12 +33,12 @@ else
|
|||||||
## - no new vocabulary is created if the file already exists!
|
## - no new vocabulary is created if the file already exists!
|
||||||
## - need to delete the file if you want to create a new one!
|
## - need to delete the file if you want to create a new one!
|
||||||
|
|
||||||
${MODEL_VOCAB}: ${TRAIN_SRC}.clean.${PRE_SRC}${TRAINSIZE}.gz \
|
${WORKDIR}/${MODEL}.vocab.yml: ${TRAIN_SRC}.clean.${PRE_SRC}${TRAINSIZE}.gz \
|
||||||
${TRAIN_TRG}.clean.${PRE_TRG}${TRAINSIZE}.gz
|
${TRAIN_TRG}.clean.${PRE_TRG}${TRAINSIZE}.gz
|
||||||
ifeq ($(wildcard ${MODEL_SRCVOCAB} ${MODEL_TRGVOCAB}),)
|
ifeq ($(wildcard $@),)
|
||||||
ifneq (${MODEL_LATEST_VOCAB},)
|
ifneq ($(wildcard ${MODEL_LATEST_VOCAB}),)
|
||||||
ifneq (${MODEL_LATEST_VOCAB},${MODEL_VOCAB})
|
ifneq (${MODEL_LATEST_VOCAB},$@)
|
||||||
cp ${MODEL_LATEST_VOCAB} ${MODEL_VOCAB}
|
cp ${MODEL_LATEST_VOCAB} $@
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
mkdir -p ${dir $@}
|
mkdir -p ${dir $@}
|
||||||
@ -59,35 +50,12 @@ else
|
|||||||
@echo "WARNING! Delete the file if you want to start from scratch!"
|
@echo "WARNING! Delete the file if you want to start from scratch!"
|
||||||
touch $@
|
touch $@
|
||||||
endif
|
endif
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## if USE_SPM_VOCAB is set:
|
|
||||||
## get separate source and target language vocabularies
|
|
||||||
## from the two individual sentence piece models
|
|
||||||
|
|
||||||
ifeq ($(USE_SPM_VOCAB),1)
|
|
||||||
${MODEL_SRCVOCAB}: ${SPMSRCMODEL}
|
|
||||||
cut -f1 < $<.vocab > $@
|
|
||||||
ifeq (${USE_TARGET_LABELS},1)
|
|
||||||
echo "${TARGET_LABELS}" | tr ' ' "\n" >> $@
|
|
||||||
endif
|
|
||||||
|
|
||||||
${MODEL_TRGVOCAB}: ${SPMTRGMODEL}
|
|
||||||
cut -f1 < $<.vocab > $@
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
print-latest:
|
print-latest:
|
||||||
ifneq (${wildcard ${MODEL_LATEST}},)
|
@echo "latest model: ${MODEL_LATEST}"
|
||||||
ifeq (${wildcard ${MODEL_START}},)
|
@echo "start model: ${MODEL_START}"
|
||||||
@echo "cp ${MODEL_LATEST} ${MODEL_START}"
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -100,15 +68,12 @@ endif
|
|||||||
MARIAN_MODELS_DONE = ${patsubst %,${WORKDIR}/${MODEL}.%.model${NR}.done,${MODELTYPES}}
|
MARIAN_MODELS_DONE = ${patsubst %,${WORKDIR}/${MODEL}.%.model${NR}.done,${MODELTYPES}}
|
||||||
|
|
||||||
MARIAN_TRAIN_PREREQS = ${TRAIN_SRC}.clean.${PRE_SRC}${TRAINSIZE}.gz \
|
MARIAN_TRAIN_PREREQS = ${TRAIN_SRC}.clean.${PRE_SRC}${TRAINSIZE}.gz \
|
||||||
${TRAIN_TRG}.clean.${PRE_TRG}${TRAINSIZE}.gz
|
${TRAIN_TRG}.clean.${PRE_TRG}${TRAINSIZE}.gz \
|
||||||
|
$(sort ${MODEL_SRCVOCAB} ${MODEL_TRGVOCAB})
|
||||||
|
|
||||||
|
|
||||||
## define validation and early-stopping parameters
|
## define validation and early-stopping parameters
|
||||||
## as well as pre-requisites for training the model
|
## as well as pre-requisites for training the model
|
||||||
##
|
|
||||||
## NEW: take away dependency on ${MODEL_VOCAB}
|
|
||||||
## (will be created by marian if it does not exist)
|
|
||||||
## TODO: should we create the dependency again?
|
|
||||||
|
|
||||||
ifndef SKIP_VALIDATION
|
ifndef SKIP_VALIDATION
|
||||||
MARIAN_TRAIN_PREREQS += ${DEV_SRC}.${PRE_SRC} ${DEV_TRG}.${PRE_TRG}
|
MARIAN_TRAIN_PREREQS += ${DEV_SRC}.${PRE_SRC} ${DEV_TRG}.${PRE_TRG}
|
||||||
@ -137,9 +102,15 @@ else
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
# start weights with a pre-trained model
|
||||||
|
|
||||||
|
ifneq (${wildcard ${PRE_TRAINED_MODEL}},)
|
||||||
|
MARIAN_EXTRA += --pretrained-model ${PRE_TRAINED_MODEL}
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
## dependencies and extra parameters
|
## dependencies and extra parameters
|
||||||
## for models with guided alignment
|
## for different models and guided alignment
|
||||||
|
|
||||||
ifeq (${MODELTYPE},transformer-align)
|
ifeq (${MODELTYPE},transformer-align)
|
||||||
MARIAN_TRAIN_PREREQS += ${TRAIN_ALG}
|
MARIAN_TRAIN_PREREQS += ${TRAIN_ALG}
|
||||||
@ -176,7 +147,6 @@ endif
|
|||||||
ifeq (${MODELTYPE},transformer-big-align)
|
ifeq (${MODELTYPE},transformer-big-align)
|
||||||
MARIAN_ENC_DEPTH = 12
|
MARIAN_ENC_DEPTH = 12
|
||||||
MARIAN_ATT_HEADS = 16
|
MARIAN_ATT_HEADS = 16
|
||||||
MARIAN_DIM_EMB = 1024
|
|
||||||
MARIAN_TRAIN_PREREQS += ${TRAIN_ALG}
|
MARIAN_TRAIN_PREREQS += ${TRAIN_ALG}
|
||||||
MARIAN_EXTRA += --guided-alignment ${TRAIN_ALG}
|
MARIAN_EXTRA += --guided-alignment ${TRAIN_ALG}
|
||||||
GPUJOB_HPC_MEM = 16g
|
GPUJOB_HPC_MEM = 16g
|
||||||
@ -185,10 +155,10 @@ endif
|
|||||||
ifeq (${MODELTYPE},transformer-big)
|
ifeq (${MODELTYPE},transformer-big)
|
||||||
MARIAN_ENC_DEPTH = 12
|
MARIAN_ENC_DEPTH = 12
|
||||||
MARIAN_ATT_HEADS = 16
|
MARIAN_ATT_HEADS = 16
|
||||||
MARIAN_DIM_EMB = 1024
|
|
||||||
GPUJOB_HPC_MEM = 16g
|
GPUJOB_HPC_MEM = 16g
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
# MARIAN_DIM_EMB = 1024
|
||||||
|
|
||||||
|
|
||||||
## finally: recipe for training transformer model
|
## finally: recipe for training transformer model
|
||||||
@ -200,19 +170,13 @@ ${MARIAN_MODELS_DONE}: ${MARIAN_TRAIN_PREREQS}
|
|||||||
## (check lib/config.mk to see how the latest model is found)
|
## (check lib/config.mk to see how the latest model is found)
|
||||||
##--------------------------------------------------------------------
|
##--------------------------------------------------------------------
|
||||||
ifeq (${wildcard ${MODEL_START}},)
|
ifeq (${wildcard ${MODEL_START}},)
|
||||||
ifneq (${MODEL_LATEST},)
|
ifneq (${wildcard ${MODEL_LATEST}},)
|
||||||
ifneq (${MODEL_LATEST_VOCAB},)
|
|
||||||
ifneq (${MODEL_LATEST_VOCAB},${MODEL_VOCAB})
|
|
||||||
cp ${MODEL_LATEST_VOCAB} ${MODEL_VOCAB}
|
|
||||||
endif
|
|
||||||
ifneq (${MODEL_LATEST},${MODEL_START})
|
ifneq (${MODEL_LATEST},${MODEL_START})
|
||||||
cp ${MODEL_LATEST} ${MODEL_START}
|
cp ${MODEL_LATEST} ${MODEL_START}
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
##--------------------------------------------------------------------
|
##--------------------------------------------------------------------
|
||||||
${MAKE} ${MODEL_SRCVOCAB} ${MODEL_TRGVOCAB}
|
|
||||||
${LOADMODS} && ${MARIAN_TRAIN} ${MARIAN_EXTRA} \
|
${LOADMODS} && ${MARIAN_TRAIN} ${MARIAN_EXTRA} \
|
||||||
${MARIAN_STOP_CRITERIA} \
|
${MARIAN_STOP_CRITERIA} \
|
||||||
--model $(@:.done=.npz) \
|
--model $(@:.done=.npz) \
|
||||||
|
@ -10,7 +10,7 @@ filename = sys.argv[1]
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
input = open(filename, 'r')
|
input = open(filename, 'r')
|
||||||
yaml.load(input)
|
yaml.safe_load(input)
|
||||||
except:
|
except:
|
||||||
print('YAML file is broken - try to fix it!')
|
print('YAML file is broken - try to fix it!')
|
||||||
print(f'copy {filename} to {filename}.bak')
|
print(f'copy {filename} to {filename}.bak')
|
||||||
|
Loading…
Reference in New Issue
Block a user