mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2024-10-05 16:47:21 +03:00
fixed multithreading issues with data recipe
This commit is contained in:
parent
fc8c2b33c0
commit
72e1bcb7ec
4
Makefile
4
Makefile
@ -326,8 +326,8 @@ train-and-eval-job:
|
||||
#------------------------------------------------------------------------
|
||||
|
||||
.PHONY: data
|
||||
data: ${TRAIN_SRC}.clean.${PRE_SRC}.gz ${TRAIN_TRG}.clean.${PRE_TRG}.gz \
|
||||
${DEV_SRC}.${PRE_SRC} ${DEV_TRG}.${PRE_TRG}
|
||||
data: ${TRAIN_SRC}.clean.${PRE_SRC}.gz ${TRAIN_TRG}.clean.${PRE_TRG}.gz
|
||||
${MAKE} ${DEV_SRC}.${PRE_SRC} ${DEV_TRG}.${PRE_TRG}
|
||||
${MAKE} ${TEST_SRC}.${PRE_SRC} ${TEST_TRG}
|
||||
${MAKE} ${MODEL_SRCVOCAB} ${MODEL_TRGVOCAB}
|
||||
ifeq ($(filter align,${subst -, ,${MODELTYPE}}),align)
|
||||
|
@ -315,7 +315,24 @@ PRE_TRG = ${SUBWORDS}${TRGBPESIZE:000=}k
|
||||
## default name of the data set (and the model)
|
||||
##-------------------------------------
|
||||
|
||||
DATASET ?= opus
|
||||
TRAINSET_NAME ?= opus
|
||||
DATASET ?= ${TRAINSET_NAME}
|
||||
|
||||
## dev and test data come from one specific data set
|
||||
## if we have a bilingual model
|
||||
|
||||
ifeq (${words ${SRCLANGS}},1)
|
||||
ifeq (${words ${TRGLANGS}},1)
|
||||
DEVSET_NAME ?= ${DEVSET}
|
||||
TESTSET_NAME ?= ${TESTSET}
|
||||
endif
|
||||
endif
|
||||
|
||||
## otherwise we give them a generic name
|
||||
|
||||
DEVSET_NAME ?= opus-dev
|
||||
TESTSET_NAME ?= opus-test
|
||||
|
||||
|
||||
## DATADIR = directory where the train/dev/test data are
|
||||
## WORKDIR = directory used for training
|
||||
@ -336,20 +353,6 @@ LOCAL_TRAIN_SRC = ${TMPDIR}/${LANGPAIRSTR}/train/${DATASET}.src
|
||||
LOCAL_TRAIN_TRG = ${TMPDIR}/${LANGPAIRSTR}/train/${DATASET}.trg
|
||||
LOCAL_MONO_DATA = ${TMPDIR}/${LANGSTR}/train/${DATASET}.mono
|
||||
|
||||
## dev and test data come from one specific data set
|
||||
## if we have a bilingual model
|
||||
|
||||
ifeq (${words ${SRCLANGS}},1)
|
||||
ifeq (${words ${TRGLANGS}},1)
|
||||
DEVSET_NAME ?= ${DEVSET}
|
||||
TESTSET_NAME ?= ${TESTSET}
|
||||
endif
|
||||
endif
|
||||
|
||||
## otherwise we give them a generic name
|
||||
|
||||
DEVSET_NAME ?= opus-dev
|
||||
TESTSET_NAME ?= opus-test
|
||||
|
||||
DEV_SRC ?= ${WORKDIR}/val/${DEVSET_NAME}.src
|
||||
DEV_TRG ?= ${WORKDIR}/val/${DEVSET_NAME}.trg
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -10,7 +10,7 @@
|
||||
tmpsrc=`mktemp`
|
||||
tmptrg=`mktemp`
|
||||
tmplang=`mktemp`
|
||||
|
||||
column=0
|
||||
|
||||
if [ "$1" == "kor" ] || [ "$1" == "ko" ]; then
|
||||
column=1
|
||||
|
Loading…
Reference in New Issue
Block a user