mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2025-01-07 10:24:54 +03:00
plain text vocab files from spm models
This commit is contained in:
parent
24e92de56a
commit
c2798e9758
4
Makefile
4
Makefile
@ -323,7 +323,7 @@ train-and-eval-job:
|
||||
data: ${TRAIN_SRC}.clean.${PRE_SRC}.gz ${TRAIN_TRG}.clean.${PRE_TRG}.gz \
|
||||
${DEV_SRC}.${PRE_SRC} ${DEV_TRG}.${PRE_TRG}
|
||||
${MAKE} ${TEST_SRC}.${PRE_SRC} ${TEST_TRG}
|
||||
${MAKE} ${MODEL_VOCAB}
|
||||
${MAKE} ${MODEL_SRCVOCAB} ${MODEL_TRGVOCAB}
|
||||
ifeq (${MODELTYPE},transformer-align)
|
||||
${MAKE} ${TRAIN_ALG}
|
||||
endif
|
||||
@ -344,7 +344,7 @@ wordalign: ${TRAIN_ALG}
|
||||
|
||||
|
||||
## other model types
|
||||
vocab: ${MODEL_VOCAB}
|
||||
vocab: ${MODEL_SRCVOCAB} ${MODEL_TRGVOCAB}
|
||||
train: ${WORKDIR}/${MODEL}.${MODELTYPE}.model${NR}.done
|
||||
translate: ${WORKDIR}/${TESTSET_NAME}.${MODEL}${NR}.${MODELTYPE}.${SRC}.${TRG}
|
||||
eval: ${WORKDIR}/${TESTSET_NAME}.${MODEL}${NR}.${MODELTYPE}.${SRC}.${TRG}.eval
|
||||
|
@ -219,7 +219,7 @@ DEVSET ?= ${firstword ${foreach c,${POTENTIAL_DEVSETS},${filter ${c},${BIGGER_B
|
||||
|
||||
## increase dev/test sets for Tatoeba (very short sentences!)
|
||||
ifeq (${DEVSET},Tatoeba)
|
||||
DEVSIZE = 5000
|
||||
DEVSIZE = 5000
|
||||
TESTSIZE = 5000
|
||||
endif
|
||||
|
||||
@ -254,19 +254,18 @@ TRGBPESIZE ?= ${BPESIZE}
|
||||
|
||||
BPEMODELNAME ?= opus
|
||||
|
||||
.PRECIOUS: ${BPESRCMODEL} ${BPETRGMODEL}
|
||||
# BPESRCMODEL ?= ${WORKDIR}/train/${BPEMODELNAME}.src.bpe${SRCBPESIZE:000=}k-model
|
||||
# BPETRGMODEL ?= ${WORKDIR}/train/${BPEMODELNAME}.trg.bpe${TRGBPESIZE:000=}k-model
|
||||
BPESRCMODEL ?= ${WORKDIR}/train/${BPEMODELNAME}.src.${SRCBPESIZE:000=}k-model.bpe
|
||||
BPETRGMODEL ?= ${WORKDIR}/train/${BPEMODELNAME}.trg.${TRGBPESIZE:000=}k-model.bpe
|
||||
|
||||
|
||||
.PRECIOUS: ${SPMSRCMODEL} ${SPMTRGMODEL}
|
||||
# SPMSRCMODEL ?= ${WORKDIR}/train/${BPEMODELNAME}.src.spm${SRCBPESIZE:000=}k-model
|
||||
# SPMTRGMODEL ?= ${WORKDIR}/train/${BPEMODELNAME}.trg.spm${TRGBPESIZE:000=}k-model
|
||||
SPMSRCMODEL ?= ${WORKDIR}/train/${BPEMODELNAME}.src.${SRCBPESIZE:000=}k-model.spm
|
||||
SPMTRGMODEL ?= ${WORKDIR}/train/${BPEMODELNAME}.trg.${TRGBPESIZE:000=}k-model.spm
|
||||
|
||||
.PRECIOUS: ${BPESRCMODEL} ${BPETRGMODEL}
|
||||
.PRECIOUS: ${SPMSRCMODEL} ${SPMTRGMODEL}
|
||||
|
||||
|
||||
VOCABSIZE ?= $$((${SRCBPESIZE} + ${TRGBPESIZE} + 1000))
|
||||
@ -341,25 +340,29 @@ MODEL = ${MODEL_SUBDIR}${DATASET}${TRAINSIZE}.${PRE_SRC}-${PRE_TRG}
|
||||
MODELTYPE = transformer
|
||||
NR = 1
|
||||
|
||||
MODEL_BASENAME = ${MODEL}.${MODELTYPE}.model${NR}
|
||||
MODEL_VALIDLOG = ${MODEL}.${MODELTYPE}.valid${NR}.log
|
||||
MODEL_TRAINLOG = ${MODEL}.${MODELTYPE}.train${NR}.log
|
||||
MODEL_START = ${WORKDIR}/${MODEL_BASENAME}.npz
|
||||
MODEL_FINAL = ${WORKDIR}/${MODEL_BASENAME}.npz.best-perplexity.npz
|
||||
MODEL_DECODER = ${MODEL_FINAL}.decoder.yml
|
||||
MODEL_BASENAME = ${MODEL}.${MODELTYPE}.model${NR}
|
||||
MODEL_VALIDLOG = ${MODEL}.${MODELTYPE}.valid${NR}.log
|
||||
MODEL_TRAINLOG = ${MODEL}.${MODELTYPE}.train${NR}.log
|
||||
MODEL_START = ${WORKDIR}/${MODEL_BASENAME}.npz
|
||||
MODEL_FINAL = ${WORKDIR}/${MODEL_BASENAME}.npz.best-perplexity.npz
|
||||
MODEL_DECODER = ${MODEL_FINAL}.decoder.yml
|
||||
|
||||
ifeq (${MODELTYPE},transformer-spm)
|
||||
MODEL_VOCABTYPE = spm
|
||||
MODEL_VOCAB = ${WORKDIR}/${MODEL}.vocab.${MODEL_VOCABTYPE}
|
||||
MODEL_SRCVOCAB = ${SPMSRCMODEL}
|
||||
MODEL_TRGVOCAB = ${SPMTRGMODEL}
|
||||
PRE_SRC = plain
|
||||
PRE_TRG = plain
|
||||
# MODEL_SRCVOCAB = ${MODEL_VOCAB}
|
||||
# MODEL_TRGVOCAB = ${MODEL_VOCAB}
|
||||
## for sentence-piece models: get plain text vocabularies
|
||||
## for others: extract vocabulary from training data with MarianNMT
|
||||
## backwards compatibility: if there is already a vocab-file then use it
|
||||
|
||||
ifeq (${SUBWORDS},spm)
|
||||
ifneq ($(wildcard ${WORKDIR}/${MODEL}.vocab.yml),)
|
||||
MODEL_VOCAB = ${WORKDIR}/${MODEL}.vocab.yml
|
||||
MODEL_SRCVOCAB = ${MODEL_VOCAB}
|
||||
MODEL_TRGVOCAB = ${MODEL_VOCAB}
|
||||
else
|
||||
MODEL_VOCABTYPE = yml
|
||||
MODEL_VOCAB = ${WORKDIR}/${MODEL}.vocab.${MODEL_VOCABTYPE}
|
||||
MODEL_VOCAB = ${WORKDIR}/${MODEL}.vocab
|
||||
MODEL_SRCVOCAB = ${WORKDIR}/${MODEL}.src.vocab
|
||||
MODEL_TRGVOCAB = ${WORKDIR}/${MODEL}.trg.vocab
|
||||
endif
|
||||
else
|
||||
MODEL_VOCAB = ${WORKDIR}/${MODEL}.vocab.yml
|
||||
MODEL_SRCVOCAB = ${MODEL_VOCAB}
|
||||
MODEL_TRGVOCAB = ${MODEL_VOCAB}
|
||||
endif
|
||||
@ -367,13 +370,11 @@ endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## latest model with the same pre-processing but any data or modeltype
|
||||
ifdef CONTINUE_EXISTING
|
||||
MODEL_LATEST = $(firstword ${shell ls -t ${WORKDIR}/*.${PRE_SRC}-${PRE_TRG}.*.best-perplexity.npz 2>/dev/null})
|
||||
MODEL_LATEST_VOCAB = $(shell echo "${MODEL_LATEST}" | \
|
||||
sed 's|\.${PRE_SRC}-${PRE_TRG}\..*$$|.${PRE_SRC}-${PRE_TRG}.vocab.${MODEL_VOCABTYPE}|')
|
||||
sed 's|\.${PRE_SRC}-${PRE_TRG}\..*$$|.${PRE_SRC}-${PRE_TRG}.vocab.yml|')
|
||||
endif
|
||||
|
||||
|
||||
@ -398,12 +399,15 @@ MARIAN_MAXI_BATCH = 500
|
||||
MARIAN_DROPOUT = 0.1
|
||||
MARIAN_MAX_LENGTH = 500
|
||||
|
||||
MARIAN_DECODER_GPU = -b 12 -n1 -d ${MARIAN_GPUS} --mini-batch 8 --maxi-batch 32 --maxi-batch-sort src \
|
||||
MARIAN_DECODER_GPU = -b 12 -n1 -d ${MARIAN_GPUS} \
|
||||
--mini-batch 8 --maxi-batch 32 --maxi-batch-sort src \
|
||||
--max-length ${MARIAN_MAX_LENGTH} --max-length-crop
|
||||
MARIAN_DECODER_CPU = -b 12 -n1 --cpu-threads ${HPC_CORES} --mini-batch 8 --maxi-batch 32 --maxi-batch-sort src \
|
||||
MARIAN_DECODER_CPU = -b 12 -n1 --cpu-threads ${HPC_CORES} \
|
||||
--mini-batch 8 --maxi-batch 32 --maxi-batch-sort src \
|
||||
--max-length ${MARIAN_MAX_LENGTH} --max-length-crop
|
||||
MARIAN_DECODER_FLAGS = ${MARIAN_DECODER_GPU}
|
||||
|
||||
|
||||
## TODO: currently marianNMT crashes with workspace > 26000
|
||||
ifeq (${GPU},p100)
|
||||
MARIAN_WORKSPACE = 13000
|
||||
|
10
lib/dist.mk
10
lib/dist.mk
@ -152,14 +152,6 @@ ifneq ("$(wildcard ${BPESRCMODEL})","")
|
||||
PREPROCESS_SRCMODEL = ${BPESRCMODEL}
|
||||
PREPROCESS_TRGMODEL = ${BPETRGMODEL}
|
||||
PREPROCESS_DESCRIPTION = normalization + tokenization + BPE (${PRE_SRC},${PRE_TRG})
|
||||
else ifeq (${MODELTYPE},transformer-spm)
|
||||
PREPROCESS_TYPE = txt
|
||||
SUBWORD_TYPE = spm
|
||||
RELEASE_SRCVOCAB = source.spm
|
||||
RELEASE_TRGVOCAB = target.spm
|
||||
PREPROCESS_SRCMODEL = ${SPMSRCMODEL}
|
||||
PREPROCESS_TRGMODEL = ${SPMTRGMODEL}
|
||||
PREPROCESS_DESCRIPTION = normalization + in-build SentencePiece (${PRE_SRC},${PRE_TRG})
|
||||
else
|
||||
PREPROCESS_TYPE = spm
|
||||
SUBWORD_TYPE = spm
|
||||
@ -468,7 +460,7 @@ LASTTRG = ${lastword ${TRGLANGS}}
|
||||
MODEL_OLD = ${MODEL_SUBDIR}${DATASET}${TRAINSIZE}.${PRE_SRC}-${PRE_TRG}.${LASTSRC}${LASTTRG}
|
||||
MODEL_OLD_BASENAME = ${MODEL_OLD}.${MODELTYPE}.model${NR}
|
||||
MODEL_OLD_FINAL = ${WORKDIR}/${MODEL_OLD_BASENAME}.npz.best-perplexity.npz
|
||||
MODEL_OLD_VOCAB = ${WORKDIR}/${MODEL_OLD}.vocab.${MODEL_VOCABTYPE}
|
||||
MODEL_OLD_VOCAB = ${WORKDIR}/${MODEL_OLD}.vocab.yml
|
||||
MODEL_OLD_DECODER = ${MODEL_OLD_FINAL}.decoder.yml
|
||||
MODEL_TRANSLATE = ${WORKDIR}/${TESTSET_NAME}.${MODEL}${NR}.${MODELTYPE}.${SRC}.${TRG}
|
||||
MODEL_OLD_TRANSLATE = ${WORKDIR}/${TESTSET_NAME}.${MODEL_OLD}${NR}.${MODELTYPE}.${SRC}.${TRG}
|
||||
|
@ -221,7 +221,7 @@ endif
|
||||
PHONY: install-prerequisites install-prereq install-requirements
|
||||
install-prerequisites install-prereq install-requirements:
|
||||
${PIP} install --user -r requirements.txt
|
||||
${MAKE} install-perl-modules:
|
||||
${MAKE} install-perl-modules
|
||||
${MAKE} ${PREREQ_TOOLS}
|
||||
|
||||
.PHONY: install-perl-modules
|
||||
@ -285,5 +285,5 @@ ${TOOLSDIR}/protobuf/bin/protoc:
|
||||
|
||||
${TOOLSDIR}/eflomal/eflomal:
|
||||
${MAKE} -C ${dir $@} all
|
||||
cd ${dir $@} && python3 setup.py install
|
||||
cd ${dir $@} && python3 setup.py install --user
|
||||
# python3 setup.py install --install-dir ${HOME}/.local
|
||||
|
@ -245,7 +245,7 @@ listallmodels:
|
||||
BT_MODEL = ${MODEL_SUBDIR}${DATASET}+bt${TRAINSIZE}.${PRE_SRC}-${PRE_TRG}
|
||||
BT_MODEL_BASE = ${BT_MODEL}.${MODELTYPE}.model${NR}
|
||||
BT_MODEL_START = ${WORKDIR}/${BT_MODEL_BASE}.npz
|
||||
BT_MODEL_VOCAB = ${WORKDIR}/${BT_MODEL}.vocab.${MODEL_VOCABTYPE}
|
||||
BT_MODEL_VOCAB = ${WORKDIR}/${BT_MODEL}.vocab.yml
|
||||
|
||||
# %-add-backtranslations:
|
||||
%-bt:
|
||||
@ -268,7 +268,7 @@ endif
|
||||
PIVOT_MODEL = ${MODEL_SUBDIR}${DATASET}+pivot${TRAINSIZE}.${PRE_SRC}-${PRE_TRG}
|
||||
PIVOT_MODEL_BASE = ${PIVOT_MODEL}.${MODELTYPE}.model${NR}
|
||||
PIVOT_MODEL_START = ${WORKDIR}/${PIVOT_MODEL_BASE}.npz
|
||||
PIVOT_MODEL_VOCAB = ${WORKDIR}/${PIVOT_MODEL}.vocab.${MODEL_VOCABTYPE}
|
||||
PIVOT_MODEL_VOCAB = ${WORKDIR}/${PIVOT_MODEL}.vocab.yml
|
||||
|
||||
%-pivot:
|
||||
ifneq (${wildcard ${MODEL_FINAL}},)
|
||||
|
179
lib/train.mk
179
lib/train.mk
@ -1,8 +1,6 @@
|
||||
# -*-makefile-*-
|
||||
|
||||
|
||||
|
||||
|
||||
## resume training on an existing model
|
||||
resume:
|
||||
if [ -e ${WORKDIR}/${MODEL}.${MODELTYPE}.model${NR}.npz.best-perplexity.npz ]; then \
|
||||
@ -15,10 +13,9 @@ resume:
|
||||
|
||||
|
||||
#------------------------------------------------------------------------
|
||||
# training MarianNMT models
|
||||
# vocabulary
|
||||
#------------------------------------------------------------------------
|
||||
|
||||
|
||||
## make vocabulary
|
||||
## - no new vocabulary is created if the file already exists!
|
||||
## - need to delete the file if you want to create a new one!
|
||||
@ -29,11 +26,9 @@ ifeq ($(wildcard ${MODEL_VOCAB}),)
|
||||
ifneq (${MODEL_LATEST_VOCAB},)
|
||||
cp ${MODEL_LATEST_VOCAB} ${MODEL_VOCAB}
|
||||
else
|
||||
ifneq (${MODEL_VOCABTYPE},spm)
|
||||
mkdir -p ${dir $@}
|
||||
${LOADMODS} && ${ZCAT} $^ | ${MARIAN_VOCAB} --max-size ${VOCABSIZE} > $@
|
||||
endif
|
||||
endif
|
||||
else
|
||||
@echo "$@ already exists!"
|
||||
@echo "WARNING! No new vocabulary is created even though the data has changed!"
|
||||
@ -42,6 +37,16 @@ else
|
||||
endif
|
||||
|
||||
|
||||
## get vocabulary from sentence piece model
|
||||
ifeq (${SUBWORDS},spm)
|
||||
${MODEL_SRCVOCAB}: ${SPMSRCMODEL}
|
||||
cut -f1 < $<.vocab > $@
|
||||
|
||||
${MODEL_TRGVOCAB}: ${SPMTRGMODEL}
|
||||
cut -f1 < $<.vocab > $@
|
||||
endif
|
||||
|
||||
|
||||
print-latest:
|
||||
ifneq (${wildcard ${MODEL_LATEST}},)
|
||||
ifeq (${wildcard ${MODEL_START}},)
|
||||
@ -50,14 +55,32 @@ endif
|
||||
endif
|
||||
|
||||
|
||||
|
||||
#------------------------------------------------------------------------
|
||||
# training MarianNMT models
|
||||
#------------------------------------------------------------------------
|
||||
|
||||
## NEW: take away dependency on ${MODEL_VOCAB}
|
||||
## (will be created by marian if it does not exist)
|
||||
|
||||
|
||||
## possible model variants
|
||||
MARIAN_MODELS_DONE = ${WORKDIR}/${MODEL}.transformer.model${NR}.done \
|
||||
${WORKDIR}/${MODEL}.transformer-align.model${NR}.done
|
||||
|
||||
MARIAN_TRAIN_PREREQS = ${TRAIN_SRC}.clean.${PRE_SRC}${TRAINSIZE}.gz \
|
||||
${TRAIN_TRG}.clean.${PRE_TRG}${TRAINSIZE}.gz \
|
||||
${DEV_SRC}.${PRE_SRC} ${DEV_TRG}.${PRE_TRG}
|
||||
|
||||
## dependencies and extra parameters
|
||||
ifeq (${MODELTYPE},transformer-align)
|
||||
MARIAN_TRAIN_PREREQS += ${TRAIN_ALG}
|
||||
MARIAN_EXTRA += --guided-alignment ${TRAIN_ALG}
|
||||
endif
|
||||
|
||||
|
||||
## train transformer model
|
||||
${WORKDIR}/${MODEL}.transformer.model${NR}.done ${WORKDIR}/${MODEL}.transformer-spm.model${NR}.done: \
|
||||
${TRAIN_SRC}.clean.${PRE_SRC}${TRAINSIZE}.gz \
|
||||
${TRAIN_TRG}.clean.${PRE_TRG}${TRAINSIZE}.gz \
|
||||
${DEV_SRC}.${PRE_SRC} ${DEV_TRG}.${PRE_TRG}
|
||||
${MARIAN_MODELS_DONE}: ${MARIAN_TRAIN_PREREQS}
|
||||
mkdir -p ${dir $@}
|
||||
##--------------------------------------------------------------------
|
||||
## in case we want to continue training from the latest existing model
|
||||
@ -108,139 +131,3 @@ endif
|
||||
--tempdir ${TMPDIR} \
|
||||
--exponential-smoothing
|
||||
touch $@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## NEW: take away dependency on ${MODEL_VOCAB}
|
||||
|
||||
## train transformer model with guided alignment
|
||||
${WORKDIR}/${MODEL}.transformer-align.model${NR}.done: \
|
||||
${TRAIN_SRC}.clean.${PRE_SRC}${TRAINSIZE}.gz \
|
||||
${TRAIN_TRG}.clean.${PRE_TRG}${TRAINSIZE}.gz \
|
||||
${TRAIN_ALG} \
|
||||
${DEV_SRC}.${PRE_SRC} ${DEV_TRG}.${PRE_TRG}
|
||||
mkdir -p ${dir $@}
|
||||
##--------------------------------------------------------------------
|
||||
## in case we want to continue training from the latest existing model
|
||||
## (check lib/config.mk to see how the latest model is found)
|
||||
##--------------------------------------------------------------------
|
||||
ifeq (${wildcard ${MODEL_START}},)
|
||||
ifneq (${MODEL_LATEST},)
|
||||
ifneq (${MODEL_LATEST_VOCAB},)
|
||||
cp ${MODEL_LATEST_VOCAB} ${MODEL_VOCAB}
|
||||
cp ${MODEL_LATEST} ${MODEL_START}
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
##--------------------------------------------------------------------
|
||||
${LOADMODS} && ${MARIAN_TRAIN} ${MARIAN_EXTRA} \
|
||||
--model $(@:.done=.npz) \
|
||||
--type transformer \
|
||||
--train-sets ${word 1,$^} ${word 2,$^} ${MARIAN_TRAIN_WEIGHTS} \
|
||||
--max-length 500 \
|
||||
--vocabs ${MODEL_VOCAB} ${MODEL_VOCAB} \
|
||||
--mini-batch-fit \
|
||||
-w ${MARIAN_WORKSPACE} \
|
||||
--maxi-batch ${MARIAN_MAXI_BATCH} \
|
||||
--early-stopping ${MARIAN_EARLY_STOPPING} \
|
||||
--valid-freq ${MARIAN_VALID_FREQ} \
|
||||
--save-freq ${MARIAN_SAVE_FREQ} \
|
||||
--disp-freq ${MARIAN_DISP_FREQ} \
|
||||
--valid-sets ${word 4,$^} ${word 5,$^} \
|
||||
--valid-metrics perplexity \
|
||||
--valid-mini-batch ${MARIAN_VALID_MINI_BATCH} \
|
||||
--beam-size 12 --normalize 1 --allow-unk \
|
||||
--log $(@:.model${NR}.done=.train${NR}.log) \
|
||||
--valid-log $(@:.model${NR}.done=.valid${NR}.log) \
|
||||
--enc-depth 6 --dec-depth 6 \
|
||||
--transformer-heads 8 \
|
||||
--transformer-postprocess-emb d \
|
||||
--transformer-postprocess dan \
|
||||
--transformer-dropout ${MARIAN_DROPOUT} \
|
||||
--label-smoothing 0.1 \
|
||||
--learn-rate 0.0003 --lr-warmup 16000 --lr-decay-inv-sqrt 16000 --lr-report \
|
||||
--optimizer-params 0.9 0.98 1e-09 --clip-norm 5 \
|
||||
--tied-embeddings-all \
|
||||
--overwrite --keep-best \
|
||||
--devices ${MARIAN_GPUS} \
|
||||
--sync-sgd --seed ${SEED} \
|
||||
--sqlite \
|
||||
--tempdir ${TMPDIR} \
|
||||
--exponential-smoothing \
|
||||
--guided-alignment ${word 3,$^}
|
||||
touch $@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# ${TRAIN_SRC}.clean${TRAINSIZE}.gz: ${TRAIN_SRC}.clean.${PRE_SRC}${TRAINSIZE}.gz
|
||||
# ${ZCAT} $< | sed 's/ //g;s/▁/ /g' | sed 's/^ *//;s/ *$$//' | \
|
||||
# sed 's/\@\@ //g;s/ \@\@//g;s/ \@\-\@ /-/g' | ${GZIP} -c > $@
|
||||
|
||||
# ${TRAIN_TRG}.clean${TRAINSIZE}.gz: ${TRAIN_TRG}.clean.${PRE_TRG}${TRAINSIZE}.gz
|
||||
# ${ZCAT} $< | sed 's/ //g;s/▁/ /g' | sed 's/^ *//;s/ *$$//' | \
|
||||
# sed 's/\@\@ //g;s/ \@\@//g;s/ \@\-\@ /-/g' | ${GZIP} -c > $@
|
||||
|
||||
|
||||
# ## train transformer model
|
||||
# ${WORKDIR}/${MODEL}.transformer-spm.model${NR}.done: \
|
||||
# ${TRAIN_SRC}.clean.${PRE_SRC}${TRAINSIZE}.gz \
|
||||
# ${TRAIN_TRG}.clean.${PRE_TRG}${TRAINSIZE}.gz \
|
||||
# ${DEV_SRC}.${PRE_SRC} ${DEV_TRG}.${PRE_TRG}
|
||||
# mkdir -p ${dir $@}
|
||||
# ##--------------------------------------------------------------------
|
||||
# ## in case we want to continue training from the latest existing model
|
||||
# ## (check lib/config.mk to see how the latest model is found)
|
||||
# ##--------------------------------------------------------------------
|
||||
# ifeq (${wildcard ${MODEL_START}},)
|
||||
# ifneq (${MODEL_LATEST},)
|
||||
# ifneq (${MODEL_LATEST_VOCAB},)
|
||||
# cp ${MODEL_LATEST_VOCAB} ${MODEL_VOCAB}
|
||||
# cp ${MODEL_LATEST} ${MODEL_START}
|
||||
# endif
|
||||
# endif
|
||||
# endif
|
||||
# ##--------------------------------------------------------------------
|
||||
# ${MAKE} ${MODEL_SRCVOCAB} ${MODEL_TRGVOCAB}
|
||||
# ${LOADMODS} && ${MARIAN_TRAIN} ${MARIAN_EXTRA} \
|
||||
# --model $(@:.done=.npz) \
|
||||
# --type transformer \
|
||||
# --train-sets ${word 1,$^} ${word 2,$^} ${MARIAN_TRAIN_WEIGHTS} \
|
||||
# --max-length 500 \
|
||||
# --vocabs ${MODEL_SRCVOCAB} ${MODEL_TRGVOCAB} \
|
||||
# --mini-batch-fit \
|
||||
# -w ${MARIAN_WORKSPACE} \
|
||||
# --maxi-batch ${MARIAN_MAXI_BATCH} \
|
||||
# --early-stopping ${MARIAN_EARLY_STOPPING} \
|
||||
# --valid-freq ${MARIAN_VALID_FREQ} \
|
||||
# --save-freq ${MARIAN_SAVE_FREQ} \
|
||||
# --disp-freq ${MARIAN_DISP_FREQ} \
|
||||
# --valid-sets ${word 3,$^} ${word 4,$^} \
|
||||
# --valid-metrics perplexity \
|
||||
# --valid-mini-batch ${MARIAN_VALID_MINI_BATCH} \
|
||||
# --beam-size 12 --normalize 1 --allow-unk \
|
||||
# --log $(@:.model${NR}.done=.train${NR}.log) \
|
||||
# --valid-log $(@:.model${NR}.done=.valid${NR}.log) \
|
||||
# --enc-depth 6 --dec-depth 6 \
|
||||
# --transformer-heads 8 \
|
||||
# --transformer-postprocess-emb d \
|
||||
# --transformer-postprocess dan \
|
||||
# --transformer-dropout ${MARIAN_DROPOUT} \
|
||||
# --label-smoothing 0.1 \
|
||||
# --learn-rate 0.0003 --lr-warmup 16000 --lr-decay-inv-sqrt 16000 --lr-report \
|
||||
# --optimizer-params 0.9 0.98 1e-09 --clip-norm 5 \
|
||||
# --tied-embeddings-all \
|
||||
# --overwrite --keep-best \
|
||||
# --devices ${MARIAN_GPUS} \
|
||||
# --sync-sgd --seed ${SEED} \
|
||||
# --sqlite \
|
||||
# --tempdir ${TMPDIR} \
|
||||
# --exponential-smoothing
|
||||
# touch $@
|
||||
|
@ -3,3 +3,4 @@ iso-639
|
||||
opustools
|
||||
subword-nmt
|
||||
sacrebleu
|
||||
Cython
|
||||
|
Loading…
Reference in New Issue
Block a user