mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2024-10-03 23:57:47 +03:00
24x12 transformer model added
This commit is contained in:
parent
fd13234eb2
commit
7174c98af4
@ -1 +1 @@
|
||||
Subproject commit 899f4b6c0abc66013d0546d36a6681f69e40bcbb
|
||||
Subproject commit 50ac071d6d3c85efc2aa7ab379ba1863c7322d5d
|
@ -64,6 +64,8 @@ MODELTYPES = transformer \
|
||||
transformer-base-align \
|
||||
transformer-big \
|
||||
transformer-big-align \
|
||||
transformer-24x12 \
|
||||
transformer-24x12-align \
|
||||
transformer-small \
|
||||
transformer-small-align \
|
||||
transformer-tiny \
|
||||
|
@ -105,6 +105,7 @@ export TMPWORKDIR
|
||||
SCRIPTDIR ?= ${REPOHOME}scripts
|
||||
TOOLSDIR ?= ${REPOHOME}tools
|
||||
|
||||
MONITOR ?= ${shell which monitor 2>/dev/null || echo ${TOOLSDIR}/monitor}
|
||||
ISO639 ?= ${shell which iso639 2>/dev/null || echo 'perl ${TOOLSDIR}/LanguageCodes/ISO-639-3/bin/iso639'}
|
||||
PIGZ ?= ${shell which pigz 2>/dev/null || echo ${TOOLSDIR}/pigz/pigz}
|
||||
TERASHUF ?= ${shell which terashuf 2>/dev/null || echo ${TOOLSDIR}/terashuf/terashuf}
|
||||
|
@ -287,7 +287,7 @@ PIVOT_LANG ?= ${DEFAULT_PIVOT_LANG}
|
||||
FT_SELECTED ?= 95
|
||||
|
||||
%-ftbest:
|
||||
@for s in ${SRCLANGS}; do \
|
||||
@-for s in ${SRCLANGS}; do \
|
||||
for t in ${TRGLANGS}; do \
|
||||
if [ -e ${FORWARDTRANS_HOME}/$$s-$$t/latest ]; then \
|
||||
if [ `ls ${FORWARDTRANS_HOME}/$$s-$$t/latest/ | grep "best${FT_SELECTED}.gz" | wc -l` -eq 0 ]; then \
|
||||
@ -304,7 +304,7 @@ FT_SELECTED ?= 95
|
||||
${@:-ftbest=}
|
||||
|
||||
%-ftrawbest:
|
||||
@for s in ${SRCLANGS}; do \
|
||||
@-for s in ${SRCLANGS}; do \
|
||||
for t in ${TRGLANGS}; do \
|
||||
if [ -e ${FORWARDTRANS_HOME}/$$s-$$t/latest ]; then \
|
||||
${MAKE} -C ${FORWARDTRANS_HOME} SRC=$$s TRG=$$t \
|
||||
|
@ -38,32 +38,43 @@ fineng-test-student:
|
||||
|
||||
|
||||
|
||||
HPLTLANGS = eus swa glg zho
|
||||
|
||||
euseng-train-tinystudent:
|
||||
make SRCLANGS=eus TRGLANGS=eng train-tiny11-student
|
||||
hplt-train-tinystudents:
|
||||
for l in ${HPLTLANGS}; do \
|
||||
make SRCLANGS=$$l TRGLANGS=eng train-tiny11-student; \
|
||||
make SRCLANGS=$$l TRGLANGS=eng train-small-student; \
|
||||
done
|
||||
|
||||
swaeng-train-tinystudent:
|
||||
make SRCLANGS=swa TRGLANGS=eng train-tiny11-student
|
||||
# make HPC_MEM=32g GPUJOB_HPC_MEM=32g SRCLANGS=$$l TRGLANGS=eng train-base-student; \
|
||||
|
||||
glgeng-train-tinystudent:
|
||||
make SRCLANGS=glg TRGLANGS=eng train-tiny11-student
|
||||
hplt-quantize-students:
|
||||
for l in ${HPLTLANGS}; do \
|
||||
make SRCLANGS=$$l TRGLANGS=eng quantize-tiny11-student; \
|
||||
make SRCLANGS=$$l TRGLANGS=eng quantize-small-student; \
|
||||
done
|
||||
|
||||
hplt-test-quantized-students:
|
||||
for l in ${HPLTLANGS}; do \
|
||||
make SRCLANGS=$$l TRGLANGS=eng test-quantized-tiny11-student; \
|
||||
make SRCLANGS=$$l TRGLANGS=eng test-quantized-small-student; \
|
||||
done
|
||||
|
||||
euseng-train-smallstudent:
|
||||
make SRCLANGS=eus TRGLANGS=eng train-small-student
|
||||
|
||||
swaeng-train-smallstudent:
|
||||
make SRCLANGS=swa TRGLANGS=eng train-small-student
|
||||
|
||||
glgeng-train-smallstudent:
|
||||
make SRCLANGS=glg TRGLANGS=eng train-small-student
|
||||
|
||||
hplt-release-students:
|
||||
for l in ${HPLTLANGS}; do \
|
||||
make SRCLANGS=$$l TRGLANGS=eng release-tiny11-student; \
|
||||
make SRCLANGS=$$l TRGLANGS=eng release-small-student; \
|
||||
done
|
||||
|
||||
|
||||
|
||||
|
||||
## generic recipes for training and testing student models
|
||||
|
||||
data-student:
|
||||
make ${STUDENT_HPCPARAMS} FT_SELECTED=${STUDENT_CEFILTER} \
|
||||
data-${STUDENT_DATA}-${STUDENT_VOCAB}-tatoeba
|
||||
|
||||
train-student:
|
||||
make ${STUDENT_HPCPARAMS} FT_SELECTED=${STUDENT_CEFILTER} \
|
||||
all-job-${STUDENT_DATA}-${STUDENT_VOCAB}-tatoeba
|
||||
|
@ -79,6 +79,42 @@ roa2eng:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
fin2eng-extended:
|
||||
${MAKE} MODELTYPE=transformer-big CONTINUE_EXISTING=1 DATASET=${DATASET}+news tatoeba-fin2eng-trainjob-bt
|
||||
|
||||
eng2fin-extended:
|
||||
${MAKE} MODELTYPE=transformer-big CONTINUE_EXISTING=1 DATASET=${DATASET}+news tatoeba-eng2fin-trainjob-bt
|
||||
|
||||
swe2fin-extended:
|
||||
${MAKE} MODELTYPE=transformer-big CONTINUE_EXISTING=1 DATASET=${DATASET}+news tatoeba-swe2fin-trainjob-bt-pbt
|
||||
|
||||
fin2swe-extended:
|
||||
${MAKE} MODELTYPE=transformer-big CONTINUE_EXISTING=1 tatoeba-fin2swe-trainjob-bt-pbt
|
||||
|
||||
|
||||
|
||||
|
||||
fin2eng-24x12:
|
||||
${MAKE} MODELTYPE=transformer-24x12 DATASET=${DATASET}+news \
|
||||
GPUJOB_HPC_CORES=4 GPUJOB_HPC_MEM=32g \
|
||||
GPUJOB_SUBMIT=-gpu0123 \
|
||||
MARIAN_WORKSPACE=15000 tatoeba-fin2eng-trainjob-bt
|
||||
|
||||
fin2swe-24x12:
|
||||
${MAKE} MODELTYPE=transformer-24x12 \
|
||||
GPUJOB_HPC_CORES=4 GPUJOB_HPC_MEM=32g \
|
||||
GPUJOB_SUBMIT=-gpu0123 \
|
||||
MARIAN_WORKSPACE=15000 tatoeba-fin2swe-trainjob-bt-pbt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
elg-release-models:
|
||||
make MODELTYPE=transformer-big release-all-improved-models-bt
|
||||
make MODELTYPE=transformer-big release-all-improved-models
|
||||
|
@ -1,6 +1,4 @@
|
||||
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# important secondary langs in Finland
|
||||
#-------------------------------------------------------------------
|
||||
|
16
lib/train.mk
16
lib/train.mk
@ -150,6 +150,7 @@ ifneq ($(subst -align,,${MODELTYPE}),${MODELTYPE})
|
||||
MARIAN_EXTRA += --guided-alignment ${TRAIN_ALG}
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(subst -align,,${MODELTYPE}),transformer-tiny)
|
||||
MARIAN_ENC_DEPTH = 3
|
||||
MARIAN_DEC_DEPTH = 2
|
||||
@ -211,6 +212,17 @@ ifeq ($(subst -align,,${MODELTYPE}),transformer-big)
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(subst -align,,${MODELTYPE}),transformer-24x12)
|
||||
MARIAN_ENC_DEPTH = 24
|
||||
MARIAN_DEC_DEPTH = 12
|
||||
MARIAN_ATT_HEADS = 8
|
||||
MARIAN_DIM_EMB = 2024
|
||||
MARIAN_EXTRA += --optimizer-delay 2 --fp16
|
||||
GPUJOB_HPC_MEM = 32g
|
||||
endif
|
||||
|
||||
|
||||
|
||||
|
||||
##------------------------------------------------
|
||||
## set training parameters
|
||||
@ -280,7 +292,7 @@ endif
|
||||
## load anyway before calling make? It is already set in the
|
||||
## SLURM scripts ...
|
||||
##--------------------------------------------------------------------
|
||||
${LOAD_ENV} && ${MARIAN_TRAIN} \
|
||||
${LOAD_ENV} && ${MONITOR} ${MARIAN_TRAIN} \
|
||||
${MARIAN_TRAINING_PARAMETER} \
|
||||
${MARIAN_EXTRA} \
|
||||
${MARIAN_STOP_CRITERIA} \
|
||||
@ -298,7 +310,7 @@ endif
|
||||
--shuffle ${MARIAN_SHUFFLE} \
|
||||
--sharding ${MARIAN_SHARDING} \
|
||||
--overwrite \
|
||||
--keep-best
|
||||
--keep-best 2>>$(@:.done=.log) 1>&2
|
||||
touch $@
|
||||
|
||||
|
||||
|
@ -3,4 +3,4 @@
|
||||
# USAGE postprocess.sh < input > output
|
||||
#
|
||||
|
||||
sed 's/ //g;s/▁/ /g'
|
||||
sed 's/ //g;s/▁/ /g;s/^ *//'
|
||||
|
@ -98,6 +98,11 @@ MODELNAME := ${patsubst %.zip,%,${notdir ${MODELZIP}}}
|
||||
MULTI_TARGET_MODEL := ${shell ${WGET} -qq -O - ${MODELINFO} | grep 'use-target-labels' | wc -l}
|
||||
ifneq (${MULTI_TARGET_MODEL},0)
|
||||
TARGET_LANG_LABEL := ${shell ${WGET} -qq -O - ${MODELINFO} | grep -o '>>${TRG}.*<<' | head -1}
|
||||
ifeq (${TARGET_LANG_LABEL},)
|
||||
ifneq ($(wildcard ${LANGPAIR}/${MODELNAME}/*.vocab.yml),)
|
||||
TARGET_LANG_LABEL := $(shell grep -o '>>${TRG}.*<<' $(wildcard ${LANGPAIR}/${MODELNAME}/*.vocab.yml) | head -1)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
RELEASED_BITEXTS := $(patsubst %.tar,%,${shell ${WGET} -qq -O - ${TATOEBA_GITRAW}/Wiki.md | \
|
||||
@ -137,6 +142,7 @@ all: prepare
|
||||
${MAKE} translate-all-parts
|
||||
${MAKE} score-translations
|
||||
${MAKE} sort-scored-translations
|
||||
${MAKE} extract-best-translations
|
||||
|
||||
|
||||
.PHONY: mtmodel
|
||||
@ -192,6 +198,12 @@ else
|
||||
REV_TRG_PREPROCESS_ARGS = ${SRC} ${REV_LANGPAIR}/${REV_MODELNAME}/target.spm
|
||||
endif
|
||||
|
||||
print-reverse-modelinfo:
|
||||
@echo ${REV_MODELNAME}
|
||||
@echo ${REV_MODELZIP}
|
||||
@echo ${MODELINFO}
|
||||
@echo "multi-target model: ${REV_MULTI_TARGET_MODEL}"
|
||||
|
||||
|
||||
## score translations with reverse translation model
|
||||
## normalize scores (see https://github.com/browsermt/students)
|
||||
|
@ -42,7 +42,8 @@ BT_CWORK_ONTAINER = project-Tatoeba-MT-bt
|
||||
|
||||
## split size in nr-of-lines
|
||||
## default part to be selected = aa
|
||||
SPLIT_SIZE ?= 1000000
|
||||
SPLIT_SIZE ?= 1000000
|
||||
MAX_NR_OF_PARTS ?= 50
|
||||
|
||||
## maximum input length (number sentence piece segments)
|
||||
## maximum number of sentences to be translated (top N lines)
|
||||
@ -97,8 +98,8 @@ BITEXT_LATEST_TRG = ${OUTPUT_DIR}/latest/Tatoeba-train.${PIVOT}-${SRC}-${TRG}
|
||||
BITEXT_LATEST_README = ${OUTPUT_DIR}/latest/README.md
|
||||
|
||||
|
||||
## all parts of the bitext
|
||||
PARTS = $(subst .,,${suffix ${basename ${wildcard ${BITEXT_PRE:${PART}.gz=}??.gz}}})
|
||||
## all parts of the bitext (restricted to a specific max number of parts)
|
||||
PARTS = $(wordlist 1,${MAX_NR_OF_PARTS},$(subst .,,${suffix ${basename ${wildcard ${BITEXT_PRE:${PART}.gz=}??.gz}}}))
|
||||
ALL_BITEXT_LATEST_SRC = ${patsubst %,${OUTPUT_DIR}/latest/Tatoeba-train.${PIVOT}-${SRC}-${TRG}.%.${SRC}.gz,${PARTS}}
|
||||
ALL_BITEXT_LATEST_TRG = ${patsubst %,${OUTPUT_DIR}/latest/Tatoeba-train.${PIVOT}-${SRC}-${TRG}.%.${TRG}.gz,${PARTS}}
|
||||
|
||||
@ -140,7 +141,7 @@ ${MODELDIR}/decoder.yml:
|
||||
ifneq (${MODELZIP},)
|
||||
mkdir -p ${dir $@}
|
||||
${WGET} -O ${dir $@}/model.zip ${MODELZIP}
|
||||
cd ${dir $@} && unzip model.zip
|
||||
cd ${dir $@} && unzip -n model.zip
|
||||
rm -f ${dir $@}/model.zip
|
||||
mv ${dir $@}/preprocess.sh ${dir $@}/preprocess-old.sh
|
||||
sed 's#perl -C -pe.*$$#perl -C -pe "s/(?!\\n)\\p{C}/ /g;" |#' \
|
||||
|
52
tools/monitor
Executable file
52
tools/monitor
Executable file
@ -0,0 +1,52 @@
|
||||
#!/usr/bin/bash
|
||||
|
||||
|
||||
energy_counter() {
|
||||
python3 << END
|
||||
import sys
|
||||
from pynvml import (
|
||||
nvmlInit, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex,
|
||||
nvmlDeviceGetTotalEnergyConsumption, nvmlShutdown
|
||||
)
|
||||
|
||||
nvmlInit()
|
||||
|
||||
deviceCount = nvmlDeviceGetCount()
|
||||
for i in range(deviceCount):
|
||||
handle = nvmlDeviceGetHandleByIndex(i)
|
||||
energy = nvmlDeviceGetTotalEnergyConsumption(handle)
|
||||
print(f" energy counter GPU {i}: {energy} mJ", file=sys.stderr)
|
||||
|
||||
nvmlShutdown()
|
||||
END
|
||||
}
|
||||
|
||||
|
||||
COMMAND=$@
|
||||
|
||||
TIME=$(which time || echo "time")
|
||||
NVIDIA_GPU_QUERY=timestamp,name,pci.bus_id,driver_version,pstate,pcie.link.gen.max,pcie.link.gen.current,temperature.gpu,utilization.gpu,utilization.memory,power.draw,memory.total,memory.free,memory.used
|
||||
|
||||
tmpfile=$(mktemp)
|
||||
|
||||
if command -v nvidia-smi &> /dev/null
|
||||
then
|
||||
nvidia-smi --query-gpu=${NVIDIA_GPU_QUERY} --format=csv -l 1 > ${tmpfile}.gpu &
|
||||
echo " - energy-comsumption counter (start): " >&2
|
||||
energy_counter
|
||||
fi
|
||||
|
||||
${TIME} -v -o ${tmpfile} $@
|
||||
echo " - resources used according to time:" >&2
|
||||
cat ${tmpfile} >&2
|
||||
rm -f ${tmpfile}
|
||||
|
||||
if command -v nvidia-smi &> /dev/null
|
||||
then
|
||||
kill %1
|
||||
echo " - energy-comsumption counter (end): " >&2
|
||||
energy_counter
|
||||
echo " - GPU utlization:" >&2
|
||||
cat ${tmpfile}.gpu >&2
|
||||
rm -f ${tmpfile}.gpu
|
||||
fi
|
Loading…
Reference in New Issue
Block a user