mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2024-09-11 20:27:19 +03:00
skip word alignment if not necessary
This commit is contained in:
parent
d08fdd4040
commit
44182291dc
4
Makefile
4
Makefile
@ -145,8 +145,10 @@ include Makefile.slurm
|
||||
data: ${TRAIN_SRC}.clean.${PRE_SRC}.gz ${TRAIN_TRG}.clean.${PRE_TRG}.gz \
|
||||
${DEV_SRC}.${PRE_SRC} ${DEV_TRG}.${PRE_TRG}
|
||||
${MAKE} ${TEST_SRC}.${PRE_SRC} ${TEST_TRG}
|
||||
${MAKE} ${TRAIN_ALG}
|
||||
${MAKE} ${MODEL_VOCAB}
|
||||
ifeq (${MODELTYPE},transformer-align)
|
||||
${MAKE} ${TRAIN_ALG}
|
||||
endif
|
||||
|
||||
|
||||
traindata: ${TRAIN_SRC}.clean.${PRE_SRC}.gz ${TRAIN_TRG}.clean.${PRE_TRG}.gz
|
||||
|
@ -137,9 +137,9 @@ EXTRA_TRAINSET =
|
||||
|
||||
## TESTSET= DEVSET, TRAINSET = OPUS - WMT-News,DEVSET.TESTSET
|
||||
TESTSET = ${DEVSET}
|
||||
TRAINSET = $(filter-out WMT-News ${DEVSET} ${TESTSET},${OPUSCORPORA} ${EXTRA_TRAINSET})
|
||||
TRAINSET = $(filter-out WMT-News MPC1 ${DEVSET} ${TESTSET},${OPUSCORPORA} ${EXTRA_TRAINSET})
|
||||
TUNESET = OpenSubtitles
|
||||
MONOSET = $(filter-out WMT-News ${DEVSET} ${TESTSET},${OPUSMONOCORPORA} ${EXTRA_TRAINSET})
|
||||
MONOSET = $(filter-out WMT-News MPC1 ${DEVSET} ${TESTSET},${OPUSMONOCORPORA} ${EXTRA_TRAINSET})
|
||||
|
||||
## 1 = use remaining data from dev/test data for training
|
||||
USE_REST_DEVDATA = 1
|
||||
|
@ -85,7 +85,7 @@ ifeq (${wildcard ${BT_MODEL_START}},)
|
||||
endif
|
||||
endif
|
||||
rm -f ${WORKHOME}/${LANGPAIRSTR}/train.submit
|
||||
${MAKE} DATASET=opus+bt \
|
||||
${MAKE} DATASET=${DATASET}+bt \
|
||||
CLEAN_TRAIN_SRC="${CLEAN_TRAIN_SRC} ${BACKTRANS_SRC}" \
|
||||
CLEAN_TRAIN_TRG="${CLEAN_TRAIN_TRG} ${BACKTRANS_TRG}" \
|
||||
MARIAN_EARLY_STOPPING=15 \
|
||||
|
@ -286,6 +286,11 @@ enru-yandex:
|
||||
${MAKE} DATASET=opus+yandex MODELTYPE=transformer SRCLANGS=ru TRGLANGS=en EXTRA_TRAINSET=yandex \
|
||||
WALLTIME=72 HPC_CORES=1 HPC_MEM=4g MARIAN_EARLY_STOPPING=15 train.submit-multigpu
|
||||
|
||||
enru-yandex-bt:
|
||||
${MAKE} DATASET=opus+yandex MODELTYPE=transformer SRCLANGS=en TRGLANGS=ru EXTRA_TRAINSET=yandex data-bt
|
||||
${MAKE} DATASET=opus+yandex MODELTYPE=transformer SRCLANGS=en TRGLANGS=ru EXTRA_TRAINSET=yandex \
|
||||
WALLTIME=72 HPC_CORES=1 HPC_MEM=8g MARIAN_WORKSPACE=12000 MARIAN_EARLY_STOPPING=15 train-bt.submit-multigpu
|
||||
|
||||
|
||||
enit:
|
||||
${MAKE} SRCLANGS=en TRGLANGS=it traindata-spm
|
||||
|
Loading…
Reference in New Issue
Block a user