diff --git a/Makefile b/Makefile index 429fe183..37e12abb 100644 --- a/Makefile +++ b/Makefile @@ -145,8 +145,10 @@ include Makefile.slurm data: ${TRAIN_SRC}.clean.${PRE_SRC}.gz ${TRAIN_TRG}.clean.${PRE_TRG}.gz \ ${DEV_SRC}.${PRE_SRC} ${DEV_TRG}.${PRE_TRG} ${MAKE} ${TEST_SRC}.${PRE_SRC} ${TEST_TRG} - ${MAKE} ${TRAIN_ALG} ${MAKE} ${MODEL_VOCAB} +ifeq (${MODELTYPE},transformer-align) + ${MAKE} ${TRAIN_ALG} +endif traindata: ${TRAIN_SRC}.clean.${PRE_SRC}.gz ${TRAIN_TRG}.clean.${PRE_TRG}.gz diff --git a/Makefile.config b/Makefile.config index 8475fecb..44443b4b 100644 --- a/Makefile.config +++ b/Makefile.config @@ -137,9 +137,9 @@ EXTRA_TRAINSET = ## TESTSET= DEVSET, TRAINSET = OPUS - WMT-News,DEVSET.TESTSET TESTSET = ${DEVSET} -TRAINSET = $(filter-out WMT-News ${DEVSET} ${TESTSET},${OPUSCORPORA} ${EXTRA_TRAINSET}) +TRAINSET = $(filter-out WMT-News MPC1 ${DEVSET} ${TESTSET},${OPUSCORPORA} ${EXTRA_TRAINSET}) TUNESET = OpenSubtitles -MONOSET = $(filter-out WMT-News ${DEVSET} ${TESTSET},${OPUSMONOCORPORA} ${EXTRA_TRAINSET}) +MONOSET = $(filter-out WMT-News MPC1 ${DEVSET} ${TESTSET},${OPUSMONOCORPORA} ${EXTRA_TRAINSET}) ## 1 = use remaining data from dev/test data for training USE_REST_DEVDATA = 1 diff --git a/Makefile.generic b/Makefile.generic index ce00879a..8ef71338 100644 --- a/Makefile.generic +++ b/Makefile.generic @@ -85,7 +85,7 @@ ifeq (${wildcard ${BT_MODEL_START}},) endif endif rm -f ${WORKHOME}/${LANGPAIRSTR}/train.submit - ${MAKE} DATASET=opus+bt \ + ${MAKE} DATASET=${DATASET}+bt \ CLEAN_TRAIN_SRC="${CLEAN_TRAIN_SRC} ${BACKTRANS_SRC}" \ CLEAN_TRAIN_TRG="${CLEAN_TRAIN_TRG} ${BACKTRANS_TRG}" \ MARIAN_EARLY_STOPPING=15 \ diff --git a/Makefile.tasks b/Makefile.tasks index bc5ab3e0..5472dfda 100644 --- a/Makefile.tasks +++ b/Makefile.tasks @@ -286,6 +286,11 @@ enru-yandex: ${MAKE} DATASET=opus+yandex MODELTYPE=transformer SRCLANGS=ru TRGLANGS=en EXTRA_TRAINSET=yandex \ WALLTIME=72 HPC_CORES=1 HPC_MEM=4g MARIAN_EARLY_STOPPING=15 train.submit-multigpu +enru-yandex-bt: + ${MAKE} DATASET=opus+yandex MODELTYPE=transformer SRCLANGS=en TRGLANGS=ru EXTRA_TRAINSET=yandex data-bt + ${MAKE} DATASET=opus+yandex MODELTYPE=transformer SRCLANGS=en TRGLANGS=ru EXTRA_TRAINSET=yandex \ + WALLTIME=72 HPC_CORES=1 HPC_MEM=8g MARIAN_WORKSPACE=12000 MARIAN_EARLY_STOPPING=15 train-bt.submit-multigpu + enit: ${MAKE} SRCLANGS=en TRGLANGS=it traindata-spm