diff --git a/lib/data.mk b/lib/data.mk index 11d40305..6c279a16 100644 --- a/lib/data.mk +++ b/lib/data.mk @@ -48,10 +48,10 @@ ifdef DATA_SAMPLING_WEIGHT ifneq (${wildcard ${WORKDIR}/train/size_per_language_pair.txt},) ifdef MAX_DATA_SIZE FIT_DATA_SIZE = ${shell ${REPOHOME}scripts/data-sample-sizes.pl -w ${DATA_SAMPLING_WEIGHT} -m ${MAX_DATA_SIZE} \ - ${WORKDIR}/train/size_per_language_pair.txt | grep '^${SORTED_LANGPAIR} ' | cut -f2} + ${WORKDIR}/train/size_per_language_pair.txt | grep '^${LANGPAIR} ' | cut -f2} else FIT_DATA_SIZE = ${shell ${REPOHOME}scripts/data-sample-sizes.pl -w ${DATA_SAMPLING_WEIGHT} \ - ${WORKDIR}/train/size_per_language_pair.txt | grep '^${SORTED_LANGPAIR} ' | cut -f2} + ${WORKDIR}/train/size_per_language_pair.txt | grep '^${LANGPAIR} ' | cut -f2} endif endif endif @@ -59,7 +59,7 @@ endif print-data-sampling-size: ${REPOHOME}scripts/data-sample-sizes.pl -w 0.3 \ - ${WORKDIR}/train/size_per_language_pair.txt | grep '^${SORTED_LANGPAIR} ' | cut -f2 + ${WORKDIR}/train/size_per_language_pair.txt | grep '^${LANGPAIR} ' | cut -f2 @echo "sample size for ${LANGPAIR}: ${FIT_DATA_SIZE}" print-data-sampling-sizes: diff --git a/lib/projects/elg.mk b/lib/projects/elg.mk index 79aa2ecb..cafb70f3 100644 --- a/lib/projects/elg.mk +++ b/lib/projects/elg.mk @@ -399,7 +399,6 @@ elg-new-bigmodels: ${MAKE} MARIAN_EXTRA=--no-restore-corpus MODELTYPE=transformer-big tatoeba-vie2deu-trainjob elg-new-bigmodels1: - rm -f work/deu-fin/train/*.gz work/deu-fin/train/size_per_language_pair.txt ${MAKE} MARIAN_EXTRA=--no-restore-corpus MODELTYPE=transformer-big tatoeba-deu2fin-trainjob-bt for l in spa fra por ita tur ara zho zls zlw; do \ rm -f work/$${l}-fin/train/*.gz work/$${l}-fin/train/size_per_language_pair.txt; \ @@ -416,10 +415,13 @@ elg-new-bigmodels1: elg-new-bigmodels2: ${MAKE} MARIAN_EXTRA=--no-restore-corpus MODELTYPE=transformer-big tatoeba-fin2deu-trainjob-bt for l in spa fra por ita tur ara zho zls zlw; do \ + rm -f work/fin-$${l}/train/*.gz work/fin-$${l}/train/size_per_language_pair.txt; \ + rm -f work/deu-$${l}/train/*.gz work/deu-$${l}/train/size_per_language_pair.txt; \ ${MAKE} MARIAN_EXTRA=--no-restore-corpus MODELTYPE=transformer-big DATA_SAMPLING_WEIGHT=0.3 tatoeba-fin2$$l-trainjob; \ ${MAKE} MARIAN_EXTRA=--no-restore-corpus MODELTYPE=transformer-big DATA_SAMPLING_WEIGHT=0.3 tatoeba-deu2$$l-trainjob; \ done for l in bat gmq heb vie; do \ + rm -f work/deu-$${l}/train/*.gz work/deu-$${l}/train/size_per_language_pair.txt; \ ${MAKE} MARIAN_EXTRA=--no-restore-corpus MODELTYPE=transformer-big DATA_SAMPLING_WEIGHT=0.3 tatoeba-deu2$$l-trainjob; \ done