fixed bug in release target

This commit is contained in:
Joerg Tiedemann 2020-10-04 00:10:11 +03:00
parent 4cc192da15
commit 40a6b5ab6b
4 changed files with 64 additions and 5 deletions

View File

@ -1,4 +1,17 @@
# more efficient parallelisation
from Bergamot:
https://github.com/browsermt/students/blob/master/train-student/alignment/generate-alignment-and-shortlist.sh
```
# Subword segmentation with SentencePiece.
test -s $DIR/corpus.spm.$SRC || cat $CORPUS_SRC | pigz -dc | parallel --no-notice --pipe -k -j16 --block 50M "$MARIAN/spm_encode --model $VOCAB" > $DIR/corpus.spm.$SRC
test -s $DIR/corpus.spm.$TRG || cat $CORPUS_TRG | pigz -dc | parallel --no-notice --pipe -k -j16 --block 50M "$MARIAN/spm_encode --model $VOCAB" > $DIR/corpus.spm.$TRG
```
# related projects
* https://browser.mt (bergamot project)

View File

@ -38,7 +38,7 @@ local-dist:
global-dist release:
if [ `grep BLEU $(TEST_EVALUATION) | cut -f3 -d ' ' | cut -f1 -d '.'` -ge ${MIN_BLEU_SCORE} ]; then \
${MAKE} MODELSHOME=${PWD}/models \
MODELS_URL=https://object.pouta.csc.fi/${MODEL_CONTAINER}
MODELS_URL=https://object.pouta.csc.fi/${MODEL_CONTAINER} \
dist; \
fi
@ -235,7 +235,7 @@ endif
-e 's/maxi-batch: [0-9]*$$/maxi-batch: 1/' \
-e 's/relative-paths: false/relative-paths: true/' \
< ${MODEL_DECODER} > ${WORKDIR}/decoder.yml
@cd ${WORKDIR} && zip ${notdir $@} \
cd ${WORKDIR} && zip ${notdir $@} \
README.md LICENSE \
${notdir ${MODEL_FINAL}} \
${notdir ${MODEL_SRCVOCAB}} \

View File

@ -18,6 +18,28 @@ memad-multi-subs:
DEVSET=OpenSubtitles TRAINSET= MODELTYPE=transformer \
WALLTIME=72 HPC_MEM=8g HPC_CORES=1 train.submit-multigpu
memad-multi-subs-dist:
${MAKE} SRCLANGS="${MEMAD_LANGS}" TRGLANGS="${MEMAD_LANGS}" \
SKIP_LANGPAIRS="de-de|en-en|fi-fi|fr-fr|nl-nl|sv-sv" \
DEVSET=OpenSubtitles TRAINSET= MODELTYPE=transformer \
WALLTIME=72 HPC_MEM=8g HPC_CORES=1 eval
${MAKE} SRCLANGS="${MEMAD_LANGS}" TRGLANGS="${MEMAD_LANGS}" \
SKIP_LANGPAIRS="de-de|en-en|fi-fi|fr-fr|nl-nl|sv-sv" \
DEVSET=OpenSubtitles TRAINSET= MODELTYPE=transformer \
WALLTIME=72 HPC_MEM=8g HPC_CORES=1 eval-testsets
${MAKE} SRCLANGS="${MEMAD_LANGS}" TRGLANGS="${MEMAD_LANGS}" \
SKIP_LANGPAIRS="de-de|en-en|fi-fi|fr-fr|nl-nl|sv-sv" \
DEVSET=OpenSubtitles TRAINSET= MODELTYPE=transformer \
WALLTIME=72 HPC_MEM=8g HPC_CORES=1 release
memad-multi-subs-release:
${MAKE} SRCLANGS="${MEMAD_LANGS}" TRGLANGS="${MEMAD_LANGS}" \
SKIP_LANGPAIRS="de-de|en-en|fi-fi|fr-fr|nl-nl|sv-sv" \
DEVSET=OpenSubtitles TRAINSET= MODELTYPE=transformer \
WALLTIME=72 HPC_MEM=8g HPC_CORES=1 release
memad-multi-train:
${MAKE} SRCLANGS="${MEMAD_LANGS}" TRGLANGS="${MEMAD_LANGS}" MODELTYPE=transformer data
${MAKE} SRCLANGS="${MEMAD_LANGS}" TRGLANGS="${MEMAD_LANGS}" MODELTYPE=transformer \
@ -29,6 +51,30 @@ memad-multi-train:
${@:-memad-multi=}
memad-multiparallel: memad-multiparallel-basic \
memad-multiparallel-all \
memad-multiparallel-intra \
memad-multiparallel-intra-all
memad-multiparallel-basic:
mkdir $@
cd $@ && opus2multi /projappl/nlpl/data/OPUS/OpenSubtitles/latest/xml en de fi fr nl sv
memad-multiparallel-all:
mkdir $@
cd $@ && opus2multi /projappl/nlpl/data/OPUS/OpenSubtitles/latest/all en de fi fr nl sv
memad-multiparallel-intra:
mkdir $@
cd $@ && opus2multi -i /projappl/nlpl/data/OPUS/OpenSubtitles/latest/xml/en-en.xml.gz\
/projappl/nlpl/data/OPUS/OpenSubtitles/latest/xml en de fi fr nl sv
memad-multiparallel-intra-all:
mkdir $@
cd $@ && opus2multi -i /projappl/nlpl/data/OPUS/OpenSubtitles/latest/xml/en-en.xml.gz\
/projappl/nlpl/data/OPUS/OpenSubtitles/latest/all en de fi fr nl sv
memad2en:
${MAKE} LANGS="${MEMAD_LANGS}" PIVOT=en all2pivot

View File

@ -352,9 +352,9 @@ tatoeba-eng2group-dist:
tatoeba-langgroup-dist:
for g in ${OPUS_LANG_GROUPS}; do \
if [ `find ${TATOEBA_WORK}/$$g-$$g -name '*.npz' | wc -l` -gt 0 ]; then \
${MAKE} MODELTYPE=transformer tatoeba-$${g}2$${g}-eval; \
${MAKE} MODELTYPE=transformer tatoeba-$${g}2$${g}-evalall; \
${MAKE} MODELTYPE=transformer tatoeba-$${g}2$${g}-dist; \
${MAKE} MODELTYPE=transformer PIVOT=eng tatoeba-$${g}2$${g}-eval; \
${MAKE} MODELTYPE=transformer PIVOT=eng tatoeba-$${g}2$${g}-evalall; \
${MAKE} MODELTYPE=transformer PIVOT=eng tatoeba-$${g}2$${g}-dist; \
fi \
done