fixed includes in backtranslate/evaluate/finetune makefiles

This commit is contained in:
Joerg Tiedemann 2020-05-07 22:51:31 +03:00
parent c703bb4c2b
commit d4b71e0261
4 changed files with 19 additions and 15 deletions

View File

@ -4,9 +4,9 @@
# only works with sentencepiece models!
#
include ../Makefile.env
include ../Makefile.config
include ../Makefile.slurm
include ../lib/env.mk
include ../lib/config.mk
include ../lib/slurm.mk
SRC = af
TRG = en

View File

@ -1,7 +1,7 @@
include ../Makefile.env
include ../Makefile.config
include ../Makefile.slurm
include ../lib/env.mk
include ../lib/config.mk
include ../lib/slurm.mk
SRC = en

View File

@ -41,9 +41,10 @@
# --> need to adjust preprocess-scripts for those models
#
include ../Makefile.env
include ../Makefile.config
include ../Makefile.slurm
include ../lib/env.mk
include ../lib/config.mk
include ../lib/slurm.mk
MODEL = news

View File

@ -481,7 +481,10 @@ ${DEV_SRC}.shuffled.gz:
fi \
done \
done
paste ${DEV_SRC} ${DEV_TRG} | shuf | gzip -c > $@
paste ${DEV_SRC} ${DEV_TRG} | ${SHUFFLE} | gzip -c > $@
# paste ${DEV_SRC} ${DEV_TRG} | shuf | gzip -c > $@
## if we have less than twice the amount of DEVMINSIZE in the data set
@ -628,7 +631,7 @@ ifneq (${TESTSET},${DEVSET})
done \
done; \
if [ ${TESTSIZE} -lt `cat $@ | wc -l` ]; then \
paste ${TEST_SRC} ${TEST_TRG} | shuf | gzip -c > $@.shuffled.gz; \
paste ${TEST_SRC} ${TEST_TRG} | ${SHUFFLE} | gzip -c > $@.shuffled.gz; \
zcat $@.shuffled.gz | cut -f1 | tail -${TESTSIZE} > ${TEST_SRC}; \
zcat $@.shuffled.gz | cut -f2 | tail -${TESTSIZE} > ${TEST_TRG}; \
echo "testset = top ${TESTSIZE} lines of $@.shuffled!" >> ${dir $@}/README; \
@ -1042,9 +1045,9 @@ ${SPMSRCMODEL}: ${LOCAL_TRAIN_SRC}
ifeq ($(wildcard ${SPMSRCMODEL}),)
mkdir -p ${dir $@}
ifeq ($(TRGLANGS),${firstword ${TRGLANGS}})
grep . $< | shuf > $<.text
grep . $< | ${SHUFFLE} > $<.text
else
cut -f2- -d ' ' $< | grep . | shuf > $<.text
cut -f2- -d ' ' $< | grep . | ${SHUFFLE} > $<.text
endif
${MAKE} ${LOCAL_TRAIN_SRC}.charfreq
if [ `cat ${LOCAL_TRAIN_SRC}.charfreq | wc -l` -gt 1000 ]; then \
@ -1073,7 +1076,7 @@ endif
${SPMTRGMODEL}: ${LOCAL_TRAIN_TRG}
ifeq ($(wildcard ${SPMTRGMODEL}),)
mkdir -p ${dir $@}
grep . $< | shuf > $<.text
grep . $< | ${SHUFFLE} > $<.text
${MAKE} ${LOCAL_TRAIN_TRG}.charfreq
if [ `cat ${LOCAL_TRAIN_TRG}.charfreq | wc -l` -gt 1000 ]; then \
${SPM_HOME}/spm_train ${SPMEXTRA} \
@ -1157,7 +1160,7 @@ endif
${SPMMODEL}: ${LOCAL_MONO_DATA}.${PRE}
ifeq ($(wildcard ${SPMMODEL}),)
mkdir -p ${dir $@}
grep . $< | shuf > $<.text
grep . $< | ${SHUFFLE} > $<.text
${MAKE} ${LOCAL_MONO_DATA}.${PRE}.charfreq
if [ `cat ${LOCAL_MONO_DATA}.${PRE}.charfreq | wc -l` -gt 1000 ]; then \
${SPM_HOME}/spm_train ${SPMEXTRA} \