Merge branch 'puhti' of github.com:Helsinki-NLP/OPUS-MT-train into puhti

This commit is contained in:
Joerg Tiedemann 2022-03-07 15:55:18 +02:00
commit 07ca3914ff
12 changed files with 40 additions and 10 deletions

3
lib/env/mahti.mk vendored
View File

@ -8,7 +8,8 @@ DATA_PREPARE_HPCPARAMS = CPUJOB_HPC_CORES=2 CPUJOB_HPC_MEM=16g
DATA_ALIGN_HPCPARAMS = CPUJOB_HPC_CORES=128 CPUJOB_HPC_JOBS=20 CPUJOB_HPC_MEM=128g
CSCPROJECT = project_2002688
# CSCPROJECT = project_2002688
CSCPROJECT = project_2005625
WORKHOME = ${shell realpath ${PWD}/work}
OPUSHOME = /projappl/nlpl/data/OPUS
HPC_QUEUE = medium

View File

@ -63,6 +63,8 @@ quantize-student:
quantize-alphas-${STUDENT_DATA}-${STUDENT_VOCAB}-tatoeba
quantize-finetuned-student:
make FT_SELECTED=${STUDENT_CEFILTER} HPC_MEM=20g WALLTIME=2 \
quantize-tuned-${STUDENT_DATA}-${STUDENT_VOCAB}-tatoeba
make FT_SELECTED=${STUDENT_CEFILTER} HPC_MEM=20g WALLTIME=2 \
quantize-tuned-alphas-${STUDENT_DATA}-${STUDENT_VOCAB}-tatoeba
@ -73,6 +75,11 @@ test-quantized-student:
test-intgemm8-${STUDENT_DATA}-${STUDENT_VOCAB}-tatoeba \
test-intgemm8-shortlist-${STUDENT_DATA}-${STUDENT_VOCAB}-tatoeba
test-quantized-finetuned-student:
make FT_SELECTED=${STUDENT_CEFILTER} HPC_MEM=20g WALLTIME=2 \
test-intgemm8tunedalpha-${STUDENT_DATA}-${STUDENT_VOCAB}-tatoeba \
test-intgemm8tunedalpha-shortlist-${STUDENT_DATA}-${STUDENT_VOCAB}-tatoeba
test-quantized-all-student:
make FT_SELECTED=${STUDENT_CEFILTER} HPC_MEM=20g WALLTIME=2 \
test-intgemm8-all-${STUDENT_DATA}-${STUDENT_VOCAB}-tatoeba \

View File

@ -37,6 +37,28 @@ ELG_EU_SELECTED_BIG = gmq zle zls zlw spa fra deu
# "cat oci"
ukreng-train-student:
make SRCLANGS=ukr TRGLANGS=eng train-tiny11-student
engukr-train-student:
make SRCLANGS=eng TRGLANGS=ukr train-tiny11-student
ukreng-test-student:
make SRCLANGS=ukr TRGLANGS=eng test-tiny11-student
make SRCLANGS=eng TRGLANGS=ukr test-tiny11-student
# ukreng-quantize-student:
# make SRCLANGS=ukr TRGLANGS=eng quantize-tiny11-student
# make SRCLANGS=ukr TRGLANGS=eng quantize-finetuned-tiny11-student
# make SRCLANGS=ukr TRGLANGS=eng test-quantized-tiny11-student
# make SRCLANGS=ukr TRGLANGS=eng test-quantized-finetuned-tiny11-student
engukr-quantize-student:
make SRCLANGS=eng TRGLANGS=ukr quantize-tiny11-student
make SRCLANGS=eng TRGLANGS=ukr test-quantized-tiny11-student
elg-eval:
${MAKE} elg-eval-tfbig

View File

@ -60,7 +60,7 @@ intgemm8tuned: ${MODEL_INTGEMM8TUNED}
%.intgemm8tuned.alphas.bin: %.finetune-alphas.npz
${BROWSERMT_CONVERT} --gemm-type intgemm8 -f $< -t $@
%.finetune-alphas.npz: %.fine-quantmults %.intgemm8tuned.npz
%.finetune-alphas.npz: %.finetune-quantmults %.intgemm8tuned.npz
${BROWSERMT_HOME}/marian-dev/scripts/alphas/extract_stats.py $^ $@
## NOTE: need to run this on CPU and with one core only!

@ -1 +1 @@
Subproject commit b1edd540dd0b728cd34ed0330f6ba2435dc441de
Subproject commit e2d30a81e2aba5cb6af2c45369433e4a295aa52c

@ -1 +1 @@
Subproject commit 4dbce00415a11537059cd6727eedb7a9afe7dc95
Subproject commit 303f8f4f44fb1681b9edc7527d1715ac12b71a68

@ -1 +1 @@
Subproject commit 9e1bb7131d224ead58f168df89d32fc218a19161
Subproject commit 6575f72aac445e42ff490852161e066588208bc3

@ -1 +1 @@
Subproject commit 80052e5275ae8c45b20411eecdd49c945a64a412
Subproject commit a9f97e9e61a910a374a5d768244e8ad63f407d3e

@ -1 +1 @@
Subproject commit f88ded2ba8fc9c452717674542fcaef231b3f3e9
Subproject commit 601c9ac9807b5ffcbed298952435d9a17d954575

@ -1 +1 @@
Subproject commit b6da942b9ca15eb9149837f07b2b3b6ff21d9845
Subproject commit cb8a432c91a1dbaee896cd1ad90be62e5d82d452

@ -1 +1 @@
Subproject commit 823c880e4bfc4fce5359b8ea87cc14fcf8a60dc7
Subproject commit 7bae758b2eac35168790ad9b617b668d541b44f2

@ -1 +1 @@
Subproject commit 6b0a8b0c2614c9af687d2bdb1851db89fa1cbf38
Subproject commit c5d520cf528dee5c137a852a7afe6d39b5c3fb67