2020-05-03 20:27:55 +03:00
|
|
|
# -*-makefile-*-
|
|
|
|
#
|
|
|
|
# settings of the environment
|
|
|
|
# - essential tools and their paths
|
|
|
|
# - system-specific settings
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
|
|
## modules to be loaded in sbatch scripts
|
|
|
|
|
|
|
|
CPU_MODULES = gcc/6.2.0 mkl
|
|
|
|
GPU_MODULES = cuda-env/8 mkl
|
|
|
|
# GPU_MODULES = python-env/3.5.3-ml cuda-env/8 mkl
|
|
|
|
|
|
|
|
|
|
|
|
# job-specific settings (overwrite if necessary)
|
|
|
|
# HPC_EXTRA: additional SBATCH commands
|
|
|
|
|
|
|
|
NR_GPUS = 1
|
|
|
|
HPC_NODES = 1
|
|
|
|
HPC_DISK = 500
|
|
|
|
HPC_QUEUE = serial
|
|
|
|
HPC_GPUQUEUE = gpu
|
|
|
|
# HPC_MODULES = nlpl-opus python-env/3.4.1 efmaral moses
|
|
|
|
# HPC_MODULES = nlpl-opus moses cuda-env marian python-3.5.3-ml
|
|
|
|
HPC_MODULES = ${GPU_MODULES}
|
|
|
|
HPC_EXTRA =
|
|
|
|
|
|
|
|
MEM = 4g
|
|
|
|
THREADS = 1
|
|
|
|
WALLTIME = 72
|
|
|
|
|
|
|
|
|
|
|
|
## set variables with HPC prefix
|
|
|
|
|
2020-05-03 21:46:30 +03:00
|
|
|
HPC_TIME ?= ${WALLTIME}:00
|
|
|
|
HPC_CORES ?= ${THREADS}
|
|
|
|
HPC_MEM ?= ${MEM}
|
2020-05-03 20:27:55 +03:00
|
|
|
|
2020-05-03 21:46:30 +03:00
|
|
|
GPUJOB_HPC_MEM ?= 4g
|
2020-05-03 20:27:55 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# GPU = k80
|
|
|
|
GPU = p100
|
|
|
|
DEVICE = cuda
|
|
|
|
LOADCPU = module load ${CPU_MODULES}
|
|
|
|
LOADGPU = module load ${GPU_MODULES}
|
|
|
|
|
|
|
|
ifeq (${shell hostname},dx6-ibs-p2)
|
|
|
|
APPLHOME = /opt/tools
|
2020-08-26 00:44:02 +03:00
|
|
|
WORKHOME = ${shell realpath ${PWD}/work}
|
2020-05-03 20:27:55 +03:00
|
|
|
OPUSHOME = tiedeman@taito.csc.fi:/proj/nlpl/data/OPUS/
|
|
|
|
MOSESHOME = ${APPLHOME}/mosesdecoder
|
2020-08-28 15:51:37 +03:00
|
|
|
MARIAN_HOME = ${APPLHOME}/marian/build/
|
2020-05-03 20:27:55 +03:00
|
|
|
MARIAN = ${APPLHOME}/marian/build
|
|
|
|
LOADMODS = echo "nothing to load"
|
|
|
|
else ifeq (${shell hostname},dx7-nkiel-4gpu)
|
|
|
|
APPLHOME = /opt/tools
|
2020-08-26 00:44:02 +03:00
|
|
|
WORKHOME = ${shell realpath ${PWD}/work}
|
2020-05-03 20:27:55 +03:00
|
|
|
OPUSHOME = tiedeman@taito.csc.fi:/proj/nlpl/data/OPUS/
|
|
|
|
MOSESHOME = ${APPLHOME}/mosesdecoder
|
2020-08-28 15:51:37 +03:00
|
|
|
MARIAN_HOME = ${APPLHOME}/marian/build/
|
2020-05-03 20:27:55 +03:00
|
|
|
MARIAN = ${APPLHOME}/marian/build
|
|
|
|
LOADMODS = echo "nothing to load"
|
|
|
|
else ifneq ($(wildcard /wrk/tiedeman/research),)
|
|
|
|
APPLHOME = /proj/memad/tools
|
2020-08-26 00:44:02 +03:00
|
|
|
WORKHOME = /wrk/tiedeman/research/Opus-MT/work
|
2020-05-03 20:27:55 +03:00
|
|
|
OPUSHOME = /proj/nlpl/data/OPUS
|
|
|
|
MOSESHOME = /proj/nlpl/software/moses/4.0-65c75ff/moses
|
2020-08-28 15:51:37 +03:00
|
|
|
MARIAN_HOME = ${HOME}/appl_taito/tools/marian/build-gpu/
|
2020-05-03 20:27:55 +03:00
|
|
|
MARIAN = ${HOME}/appl_taito/tools/marian/build-gpu
|
|
|
|
MARIANCPU = ${HOME}/appl_taito/tools/marian/build-cpu
|
|
|
|
LOADMODS = ${LOADGPU}
|
2020-08-26 22:18:12 +03:00
|
|
|
else ifeq (${shell hostname --domain 2>/dev/null},bullx)
|
2020-07-25 22:52:33 +03:00
|
|
|
CSCPROJECT = project_2002688
|
2020-08-26 00:44:02 +03:00
|
|
|
WORKHOME = ${shell realpath ${PWD}/work}
|
2020-08-26 01:01:44 +03:00
|
|
|
APPLHOME = /projappl/project_2001194
|
2020-05-03 20:27:55 +03:00
|
|
|
OPUSHOME = /projappl/nlpl/data/OPUS
|
|
|
|
MOSESHOME = ${APPLHOME}/mosesdecoder
|
|
|
|
EFLOMAL_HOME = ${APPLHOME}/eflomal/
|
2020-08-28 15:51:37 +03:00
|
|
|
MARIAN_HOME = ${APPLHOME}/marian-dev/build/
|
2020-06-25 00:45:25 +03:00
|
|
|
MARIAN = ${APPLHOME}/marian-dev/build
|
|
|
|
MARIANCPU = ${APPLHOME}/marian-dev/build
|
2020-08-28 15:51:37 +03:00
|
|
|
SPM_HOME = ${MARIAN_HOME}
|
2020-05-03 20:27:55 +03:00
|
|
|
GPU = v100
|
|
|
|
GPU_MODULES = python-env
|
|
|
|
CPU_MODULES = python-env
|
|
|
|
LOADMODS = echo "nothing to load"
|
|
|
|
HPC_QUEUE = small
|
2020-08-26 14:31:50 +03:00
|
|
|
export PATH := ${APPLHOME}/bin:${PATH}
|
2020-05-03 20:27:55 +03:00
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ifdef LOCAL_SCRATCH
|
|
|
|
TMPDIR = ${LOCAL_SCRATCH}
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
2020-08-28 15:51:37 +03:00
|
|
|
## marian-nmt binaries
|
|
|
|
|
|
|
|
MARIAN_TRAIN = ${MARIAN_HOME}marian
|
|
|
|
MARIAN_DECODER = ${MARIAN_HOME}marian-decoder
|
|
|
|
MARIAN_VOCAB = ${MARIAN_HOME}marian-vocab
|
|
|
|
|
2020-05-03 20:27:55 +03:00
|
|
|
|
|
|
|
## other tools and their locations
|
|
|
|
|
2020-06-11 00:54:40 +03:00
|
|
|
SCRIPTDIR = ${PWD}/scripts
|
2020-05-03 20:27:55 +03:00
|
|
|
WORDALIGN = ${EFLOMAL_HOME}align.py
|
|
|
|
ATOOLS = ${FASTALIGN_HOME}atools
|
|
|
|
|
|
|
|
MOSESSCRIPTS = ${MOSESHOME}/scripts
|
|
|
|
TOKENIZER = ${MOSESSCRIPTS}/tokenizer
|
|
|
|
SNMTPATH = ${APPLHOME}/subword-nmt/subword_nmt
|
|
|
|
|
|
|
|
## SentencePiece
|
2020-08-28 15:51:37 +03:00
|
|
|
SPM_TRAIN = ${SPM_HOME}spm_train
|
|
|
|
SPM_ENCODE = ${SPM_HOME}spm_encode
|
2020-05-03 20:27:55 +03:00
|
|
|
|
|
|
|
|
|
|
|
SORT = sort -T ${TMPDIR} --parallel=${THREADS}
|
|
|
|
SHUFFLE = ${shell which terashuf 2>/dev/null}
|
|
|
|
ifeq (${SHUFFLE},)
|
|
|
|
SHUFFLE = ${SORT} --random-sort
|
|
|
|
endif
|
2020-06-03 15:39:18 +03:00
|
|
|
GZIP := ${shell which pigz 2>/dev/null}
|
|
|
|
GZIP ?= gzip
|
2020-06-06 20:49:54 +03:00
|
|
|
ZCAT = ${GZIP} -cd <
|
2020-08-26 14:31:50 +03:00
|
|
|
|
|
|
|
|
2020-08-28 15:51:37 +03:00
|
|
|
# TODO: delete those?
|
|
|
|
MULTEVALHOME = ${APPLHOME}/multeval
|
|
|
|
|
|
|
|
|
2020-08-26 14:31:50 +03:00
|
|
|
|
|
|
|
|
|
|
|
## install pre-requisites
|
|
|
|
## TODO:
|
|
|
|
## * terashuf (https://github.com/alexandres/terashuf.git)
|
|
|
|
## * OpusTools-perl (https://github.com/Helsinki-NLP/OpusTools-perl)
|
|
|
|
## * marian-nmt
|
|
|
|
|
|
|
|
|
|
|
|
PIP := ${shell which pip3 2>/dev/null}
|
|
|
|
PIP ?= pip
|
|
|
|
|
|
|
|
PHONY: install-prerequisites install-prereq install-requirements
|
|
|
|
install-prerequisites install-prereq install-requirements:
|
|
|
|
${PIP} install --user -r requirements.txt
|
|
|
|
|