mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2024-09-11 20:27:19 +03:00
skip word alignment if not necessary
This commit is contained in:
parent
d08fdd4040
commit
44182291dc
4
Makefile
4
Makefile
@ -145,8 +145,10 @@ include Makefile.slurm
|
|||||||
data: ${TRAIN_SRC}.clean.${PRE_SRC}.gz ${TRAIN_TRG}.clean.${PRE_TRG}.gz \
|
data: ${TRAIN_SRC}.clean.${PRE_SRC}.gz ${TRAIN_TRG}.clean.${PRE_TRG}.gz \
|
||||||
${DEV_SRC}.${PRE_SRC} ${DEV_TRG}.${PRE_TRG}
|
${DEV_SRC}.${PRE_SRC} ${DEV_TRG}.${PRE_TRG}
|
||||||
${MAKE} ${TEST_SRC}.${PRE_SRC} ${TEST_TRG}
|
${MAKE} ${TEST_SRC}.${PRE_SRC} ${TEST_TRG}
|
||||||
${MAKE} ${TRAIN_ALG}
|
|
||||||
${MAKE} ${MODEL_VOCAB}
|
${MAKE} ${MODEL_VOCAB}
|
||||||
|
ifeq (${MODELTYPE},transformer-align)
|
||||||
|
${MAKE} ${TRAIN_ALG}
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
traindata: ${TRAIN_SRC}.clean.${PRE_SRC}.gz ${TRAIN_TRG}.clean.${PRE_TRG}.gz
|
traindata: ${TRAIN_SRC}.clean.${PRE_SRC}.gz ${TRAIN_TRG}.clean.${PRE_TRG}.gz
|
||||||
|
@ -137,9 +137,9 @@ EXTRA_TRAINSET =
|
|||||||
|
|
||||||
## TESTSET= DEVSET, TRAINSET = OPUS - WMT-News,DEVSET.TESTSET
|
## TESTSET= DEVSET, TRAINSET = OPUS - WMT-News,DEVSET.TESTSET
|
||||||
TESTSET = ${DEVSET}
|
TESTSET = ${DEVSET}
|
||||||
TRAINSET = $(filter-out WMT-News ${DEVSET} ${TESTSET},${OPUSCORPORA} ${EXTRA_TRAINSET})
|
TRAINSET = $(filter-out WMT-News MPC1 ${DEVSET} ${TESTSET},${OPUSCORPORA} ${EXTRA_TRAINSET})
|
||||||
TUNESET = OpenSubtitles
|
TUNESET = OpenSubtitles
|
||||||
MONOSET = $(filter-out WMT-News ${DEVSET} ${TESTSET},${OPUSMONOCORPORA} ${EXTRA_TRAINSET})
|
MONOSET = $(filter-out WMT-News MPC1 ${DEVSET} ${TESTSET},${OPUSMONOCORPORA} ${EXTRA_TRAINSET})
|
||||||
|
|
||||||
## 1 = use remaining data from dev/test data for training
|
## 1 = use remaining data from dev/test data for training
|
||||||
USE_REST_DEVDATA = 1
|
USE_REST_DEVDATA = 1
|
||||||
|
@ -85,7 +85,7 @@ ifeq (${wildcard ${BT_MODEL_START}},)
|
|||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
rm -f ${WORKHOME}/${LANGPAIRSTR}/train.submit
|
rm -f ${WORKHOME}/${LANGPAIRSTR}/train.submit
|
||||||
${MAKE} DATASET=opus+bt \
|
${MAKE} DATASET=${DATASET}+bt \
|
||||||
CLEAN_TRAIN_SRC="${CLEAN_TRAIN_SRC} ${BACKTRANS_SRC}" \
|
CLEAN_TRAIN_SRC="${CLEAN_TRAIN_SRC} ${BACKTRANS_SRC}" \
|
||||||
CLEAN_TRAIN_TRG="${CLEAN_TRAIN_TRG} ${BACKTRANS_TRG}" \
|
CLEAN_TRAIN_TRG="${CLEAN_TRAIN_TRG} ${BACKTRANS_TRG}" \
|
||||||
MARIAN_EARLY_STOPPING=15 \
|
MARIAN_EARLY_STOPPING=15 \
|
||||||
|
@ -286,6 +286,11 @@ enru-yandex:
|
|||||||
${MAKE} DATASET=opus+yandex MODELTYPE=transformer SRCLANGS=ru TRGLANGS=en EXTRA_TRAINSET=yandex \
|
${MAKE} DATASET=opus+yandex MODELTYPE=transformer SRCLANGS=ru TRGLANGS=en EXTRA_TRAINSET=yandex \
|
||||||
WALLTIME=72 HPC_CORES=1 HPC_MEM=4g MARIAN_EARLY_STOPPING=15 train.submit-multigpu
|
WALLTIME=72 HPC_CORES=1 HPC_MEM=4g MARIAN_EARLY_STOPPING=15 train.submit-multigpu
|
||||||
|
|
||||||
|
enru-yandex-bt:
|
||||||
|
${MAKE} DATASET=opus+yandex MODELTYPE=transformer SRCLANGS=en TRGLANGS=ru EXTRA_TRAINSET=yandex data-bt
|
||||||
|
${MAKE} DATASET=opus+yandex MODELTYPE=transformer SRCLANGS=en TRGLANGS=ru EXTRA_TRAINSET=yandex \
|
||||||
|
WALLTIME=72 HPC_CORES=1 HPC_MEM=8g MARIAN_WORKSPACE=12000 MARIAN_EARLY_STOPPING=15 train-bt.submit-multigpu
|
||||||
|
|
||||||
|
|
||||||
enit:
|
enit:
|
||||||
${MAKE} SRCLANGS=en TRGLANGS=it traindata-spm
|
${MAKE} SRCLANGS=en TRGLANGS=it traindata-spm
|
||||||
|
Loading…
Reference in New Issue
Block a user