mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2024-11-30 12:32:24 +03:00
fixed includes in backtranslate/evaluate/finetune makefiles
This commit is contained in:
parent
c703bb4c2b
commit
d4b71e0261
@ -4,9 +4,9 @@
|
||||
# only works with sentencepiece models!
|
||||
#
|
||||
|
||||
include ../Makefile.env
|
||||
include ../Makefile.config
|
||||
include ../Makefile.slurm
|
||||
include ../lib/env.mk
|
||||
include ../lib/config.mk
|
||||
include ../lib/slurm.mk
|
||||
|
||||
SRC = af
|
||||
TRG = en
|
||||
|
@ -1,7 +1,7 @@
|
||||
|
||||
include ../Makefile.env
|
||||
include ../Makefile.config
|
||||
include ../Makefile.slurm
|
||||
include ../lib/env.mk
|
||||
include ../lib/config.mk
|
||||
include ../lib/slurm.mk
|
||||
|
||||
|
||||
SRC = en
|
||||
|
@ -41,9 +41,10 @@
|
||||
# --> need to adjust preprocess-scripts for those models
|
||||
#
|
||||
|
||||
include ../Makefile.env
|
||||
include ../Makefile.config
|
||||
include ../Makefile.slurm
|
||||
include ../lib/env.mk
|
||||
include ../lib/config.mk
|
||||
include ../lib/slurm.mk
|
||||
|
||||
|
||||
MODEL = news
|
||||
|
||||
|
15
lib/data.mk
15
lib/data.mk
@ -481,7 +481,10 @@ ${DEV_SRC}.shuffled.gz:
|
||||
fi \
|
||||
done \
|
||||
done
|
||||
paste ${DEV_SRC} ${DEV_TRG} | shuf | gzip -c > $@
|
||||
paste ${DEV_SRC} ${DEV_TRG} | ${SHUFFLE} | gzip -c > $@
|
||||
|
||||
# paste ${DEV_SRC} ${DEV_TRG} | shuf | gzip -c > $@
|
||||
|
||||
|
||||
|
||||
## if we have less than twice the amount of DEVMINSIZE in the data set
|
||||
@ -628,7 +631,7 @@ ifneq (${TESTSET},${DEVSET})
|
||||
done \
|
||||
done; \
|
||||
if [ ${TESTSIZE} -lt `cat $@ | wc -l` ]; then \
|
||||
paste ${TEST_SRC} ${TEST_TRG} | shuf | gzip -c > $@.shuffled.gz; \
|
||||
paste ${TEST_SRC} ${TEST_TRG} | ${SHUFFLE} | gzip -c > $@.shuffled.gz; \
|
||||
zcat $@.shuffled.gz | cut -f1 | tail -${TESTSIZE} > ${TEST_SRC}; \
|
||||
zcat $@.shuffled.gz | cut -f2 | tail -${TESTSIZE} > ${TEST_TRG}; \
|
||||
echo "testset = top ${TESTSIZE} lines of $@.shuffled!" >> ${dir $@}/README; \
|
||||
@ -1042,9 +1045,9 @@ ${SPMSRCMODEL}: ${LOCAL_TRAIN_SRC}
|
||||
ifeq ($(wildcard ${SPMSRCMODEL}),)
|
||||
mkdir -p ${dir $@}
|
||||
ifeq ($(TRGLANGS),${firstword ${TRGLANGS}})
|
||||
grep . $< | shuf > $<.text
|
||||
grep . $< | ${SHUFFLE} > $<.text
|
||||
else
|
||||
cut -f2- -d ' ' $< | grep . | shuf > $<.text
|
||||
cut -f2- -d ' ' $< | grep . | ${SHUFFLE} > $<.text
|
||||
endif
|
||||
${MAKE} ${LOCAL_TRAIN_SRC}.charfreq
|
||||
if [ `cat ${LOCAL_TRAIN_SRC}.charfreq | wc -l` -gt 1000 ]; then \
|
||||
@ -1073,7 +1076,7 @@ endif
|
||||
${SPMTRGMODEL}: ${LOCAL_TRAIN_TRG}
|
||||
ifeq ($(wildcard ${SPMTRGMODEL}),)
|
||||
mkdir -p ${dir $@}
|
||||
grep . $< | shuf > $<.text
|
||||
grep . $< | ${SHUFFLE} > $<.text
|
||||
${MAKE} ${LOCAL_TRAIN_TRG}.charfreq
|
||||
if [ `cat ${LOCAL_TRAIN_TRG}.charfreq | wc -l` -gt 1000 ]; then \
|
||||
${SPM_HOME}/spm_train ${SPMEXTRA} \
|
||||
@ -1157,7 +1160,7 @@ endif
|
||||
${SPMMODEL}: ${LOCAL_MONO_DATA}.${PRE}
|
||||
ifeq ($(wildcard ${SPMMODEL}),)
|
||||
mkdir -p ${dir $@}
|
||||
grep . $< | shuf > $<.text
|
||||
grep . $< | ${SHUFFLE} > $<.text
|
||||
${MAKE} ${LOCAL_MONO_DATA}.${PRE}.charfreq
|
||||
if [ `cat ${LOCAL_MONO_DATA}.${PRE}.charfreq | wc -l` -gt 1000 ]; then \
|
||||
${SPM_HOME}/spm_train ${SPMEXTRA} \
|
||||
|
Loading…
Reference in New Issue
Block a user