changes on puhti

This commit is contained in:
Joerg Tiedemann 2022-02-02 21:10:06 +02:00
parent 7de13c11f0
commit 60d509f6d8
4 changed files with 39 additions and 13 deletions

10
lib/env/mahti.mk vendored
View File

@ -4,6 +4,10 @@
#
DATAJOB_HPCPARAMS = CPUJOB_HPC_CORES=128 CPUJOB_HPC_MEM=128g CPUJOB_HPC_JOBS=20
ALLJOB_HPCPARAMS = ${DATAJOB_HPCPARAMS}
# CSCPROJECT = project_2003288
CSCPROJECT = project_2002688
# CSCPROJECT = project_2003093
@ -44,10 +48,16 @@ GPU_MODULES = gcc/10.3.0 cuda/11.4.2 cudnn/8.0.4.30-11.0-linux-x64 openblas/0.
LOAD_CPU_ENV = module load ${CPU_MODULES}
LOAD_GPU_ENV = module load ${GPU_MODULES}
ifneq (${HPC_DISK},)
HPC_GPU_ALLOCATION = --gres=gpu:${GPU}:${NR_GPUS},nvme:${HPC_DISK}
endif
ifneq (${GPUJOB_HPC_DISK},)
HPC_GPU_ALLOCATION = --gres=gpu:${GPU}:${NR_GPUS},nvme:${GPUJOB_HPC_DISK}
endif
## extra SLURM directives (up to 5 variables)
HPC_EXTRA1 = \#SBATCH --account=${CSCPROJECT}

28
lib/env/puhti.mk vendored
View File

@ -4,9 +4,12 @@
#
DATAJOB_HPCPARAMS = CPUJOB_HPC_CORES=4 CPUJOB_HPC_MEM=64g CPUJOB_HPC_JOBS=2 CPUJOB_HPC_DISK=500
ALLJOB_HPCPARAMS = ${DATAJOB_HPCPARAMS}
CSCPROJECT = project_2003288
# CSCPROJECT = project_2002688
# CSCPROJECT = project_2003288
CSCPROJECT = project_2002688
# CSCPROJECT = project_2000309
# CSCPROJECT = project_2002982
WORKHOME = ${shell realpath ${PWD}/work}
@ -15,16 +18,19 @@ OPUSHOME = /projappl/nlpl/data/OPUS
MOSESHOME = ${APPLHOME}/mosesdecoder
MOSESSCRIPTS = ${MOSESHOME}/scripts
EFLOMAL_HOME = ${APPLHOME}/eflomal/
# MARIAN_HOME = ${APPLHOME}/marian-dev/build/
# MARIAN = ${APPLHOME}/marian-dev/build
MARIAN_HOME = ${APPLHOME}/marian/build/
MARIAN = ${APPLHOME}/marian/build
MARIAN_HOME = ${APPLHOME}/marian-dev/build/
MARIAN = ${APPLHOME}/marian-dev/build
# MARIAN_HOME = ${APPLHOME}/marian/build/
# MARIAN = ${APPLHOME}/marian/build
SPM_HOME = ${MARIAN_HOME}
GPU = v100
HPC_QUEUE = small
export PATH := ${APPLHOME}/bin:${PATH}
LOCAL_SCRATCH ?= /scratch/${CSCPROJECT}
CPU_MODULES = gcc/8.3.0 cuda/10.1.168 cudnn/7.6.1.34-10.1 intel-mkl/2019.0.4 python-env
GPU_MODULES = gcc/8.3.0 cuda/10.1.168 cudnn/7.6.1.34-10.1 intel-mkl/2019.0.4 python-env
LOAD_CPU_ENV = module load ${CPU_MODULES} && module list
@ -35,6 +41,16 @@ ifneq (${HPC_DISK},)
HPC_CPU_EXTRA1 = \#SBATCH --gres=nvme:${HPC_DISK}
endif
ifneq (${GPUJOB_HPC_DISK},)
HPC_GPU_ALLOCATION = --gres=gpu:${GPU}:${NR_GPUS},nvme:${GPUJOB_HPC_DISK}
endif
ifneq (${CPUJOB_HPC_DISK},)
HPC_CPU_EXTRA1 = \#SBATCH --gres=nvme:${CPUJOB_HPC_DISK}
MAKEARGS += HPC_DISK=${CPUJOB_HPC_DISK}
endif
## extra SLURM directives (up to 3 numbered variables)
HPC_EXTRA1 = \#SBATCH --account=${CSCPROJECT}

View File

@ -169,11 +169,11 @@ all-job:
@if [ "`${MAKE} -s data-done 2>/dev/null | grep 'data sets'`" == "all data sets exist" ]; then \
echo "........ all data files exist already!"; \
echo "........ submit a job for training the model!"; \
${MAKE} train-and-eval.submit${GPUJOB_SUBMIT}; \
${MAKE} ${TRAINJOB_HPCPARAMS} train-and-eval.submit${GPUJOB_SUBMIT}; \
else \
echo "........ submit a CPU job for making data files first!"; \
echo "........ submit training job later!"; \
${MAKE} data-and-train-job.submitcpu; \
${MAKE} ${ALLJOB_HPCPARAMS} data-and-train-job.submitcpu; \
fi
@ -189,24 +189,24 @@ all-job:
data-and-train-job:
ifdef SLURM_JOBID
echo "submit training job after data creation job (${SLURM_JOBID})"
make SBATCH_ARGS="-d afterok:${SLURM_JOBID}" train-and-eval.submit${GPUJOB_SUBMIT}
make ${TRAINJOB_HPCPARAMS} SBATCH_ARGS="-d afterok:${SLURM_JOBID}" train-and-eval.submit${GPUJOB_SUBMIT}
endif
${MAKE} data
ifndef SLURM_JOBID
${MAKE} train-and-eval.submit${GPUJOB_SUBMIT}
${MAKE} ${TRAINJOB_HPCPARAMS} train-and-eval.submit${GPUJOB_SUBMIT}
endif
# train-job:
# - create/submit a jobb for training only (no evaluation!)
.PHONY: train-job
train-job:
${MAKE} train.submit${GPUJOB_SUBMIT}
${MAKE} ${TRAINJOB_HPCPARAMS} train.submit${GPUJOB_SUBMIT}
# train-and-eval-job:
# - create/submit a jobb for training (+ evaluation)
.PHONY: train-and-eval-job
train-and-eval-job:
${MAKE} train-and-eval.submit${GPUJOB_SUBMIT}
${MAKE} ${TRAINJOB_HPCPARAMS} train-and-eval.submit${GPUJOB_SUBMIT}

View File

@ -1708,7 +1708,7 @@ KEEP_LANGIDS = bos_Cyrl cmn cnr cnr_Latn csb diq dnj dty fas fqs ful fur
syr syr_Syrc tgk_Latn thy tlh tmh toi tuk_Cyrl urd_Deva xal_Latn yid_Latn zho zlm
SKIP_LANGIDS = ${filter-out ${KEEP_LANGIDS},${TRAIN_ONLY_LANGIDS}} \
ang ara_Latn arq_Latn apc_Latn bul_Latn ell_Latn eng_Tibt \
eng_Zinh heb_Latn hun_Zinh nob_Hebr rus_Latn \
eng_Zinh eng_.... heb_Latn hun_Zinh nob_Hebr rus_Latn \
..._Qa[ab][a-x] ..._Zinh ..._Zmth ..._Zsym ..._Zxxx ..._Zyyy ..._Zzzz
SKIP_LANGIDS_PATTERN = ^\(${subst ${SPACE},\|,${SKIP_LANGIDS}}\)$$