2020-05-03 20:27:55 +03:00
|
|
|
# -*-makefile-*-
|
|
|
|
#
|
|
|
|
# settings of the environment
|
|
|
|
# - essential tools and their paths
|
|
|
|
# - system-specific settings
|
|
|
|
#
|
|
|
|
|
2020-09-05 00:16:22 +03:00
|
|
|
SHELL := /bin/bash
|
2020-05-03 20:27:55 +03:00
|
|
|
|
2020-09-09 23:21:07 +03:00
|
|
|
|
|
|
|
## setup local Perl environment
|
|
|
|
## better install local::lib and put this into your .bashrc:
|
|
|
|
##
|
|
|
|
## eval "$(perl -I$HOME/perl5/lib/perl5 -Mlocal::lib)"
|
|
|
|
|
|
|
|
export PATH := ${HOME}/perl5/bin:${PATH}
|
|
|
|
export PERL5LIB := ${HOME}/perl5/lib/perl5:${PERL5LIB}}
|
|
|
|
export PERL_LOCAL_LIB_ROOT := ${HOME}/perl5:${PERL_LOCAL_LIB_ROOT}}
|
|
|
|
export PERL_MB_OPT := --install_base "${HOME}/perl5"
|
|
|
|
export PERL_MM_OPT := INSTALL_BASE=${HOME}/perl5
|
|
|
|
|
|
|
|
|
2020-05-03 20:27:55 +03:00
|
|
|
## modules to be loaded in sbatch scripts
|
|
|
|
|
|
|
|
CPU_MODULES = gcc/6.2.0 mkl
|
|
|
|
GPU_MODULES = cuda-env/8 mkl
|
|
|
|
# GPU_MODULES = python-env/3.5.3-ml cuda-env/8 mkl
|
|
|
|
|
|
|
|
|
|
|
|
# job-specific settings (overwrite if necessary)
|
|
|
|
# HPC_EXTRA: additional SBATCH commands
|
|
|
|
|
|
|
|
NR_GPUS = 1
|
|
|
|
HPC_NODES = 1
|
|
|
|
HPC_DISK = 500
|
|
|
|
HPC_QUEUE = serial
|
|
|
|
HPC_GPUQUEUE = gpu
|
|
|
|
# HPC_MODULES = nlpl-opus python-env/3.4.1 efmaral moses
|
|
|
|
# HPC_MODULES = nlpl-opus moses cuda-env marian python-3.5.3-ml
|
|
|
|
HPC_MODULES = ${GPU_MODULES}
|
|
|
|
HPC_EXTRA =
|
|
|
|
|
|
|
|
MEM = 4g
|
|
|
|
THREADS = 1
|
|
|
|
WALLTIME = 72
|
|
|
|
|
|
|
|
|
|
|
|
## set variables with HPC prefix
|
|
|
|
|
2020-05-03 21:46:30 +03:00
|
|
|
HPC_TIME ?= ${WALLTIME}:00
|
|
|
|
HPC_CORES ?= ${THREADS}
|
|
|
|
HPC_MEM ?= ${MEM}
|
2020-05-03 20:27:55 +03:00
|
|
|
|
2020-05-03 21:46:30 +03:00
|
|
|
GPUJOB_HPC_MEM ?= 4g
|
2020-05-03 20:27:55 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# GPU = k80
|
|
|
|
GPU = p100
|
|
|
|
DEVICE = cuda
|
2020-09-09 23:21:07 +03:00
|
|
|
LOADCPU = echo "nothing to load"
|
|
|
|
LOADGPU = echo "nothing to load"
|
2020-09-02 15:52:34 +03:00
|
|
|
LOADMODS = echo "nothing to load"
|
|
|
|
|
|
|
|
WORKHOME = ${PWD}/work
|
|
|
|
|
2020-05-03 20:27:55 +03:00
|
|
|
|
|
|
|
ifeq (${shell hostname},dx6-ibs-p2)
|
2020-09-05 00:16:22 +03:00
|
|
|
GPU = pascal
|
2020-05-03 20:27:55 +03:00
|
|
|
APPLHOME = /opt/tools
|
2020-08-26 00:44:02 +03:00
|
|
|
WORKHOME = ${shell realpath ${PWD}/work}
|
2020-09-04 15:34:20 +03:00
|
|
|
# OPUSHOME = tiedeman@taito.csc.fi:/proj/nlpl/data/OPUS/
|
|
|
|
# MOSESHOME = ${APPLHOME}/mosesdecoder
|
|
|
|
# MOSESSCRIPTS = ${MOSESHOME}/scripts
|
|
|
|
# MARIAN_HOME = ${APPLHOME}/marian/build/
|
|
|
|
# MARIAN = ${APPLHOME}/marian/build
|
|
|
|
# SUBWORD_HOME = ${APPLHOME}/subword-nmt/subword_nmt
|
2020-05-03 20:27:55 +03:00
|
|
|
else ifeq (${shell hostname},dx7-nkiel-4gpu)
|
2020-09-05 00:16:22 +03:00
|
|
|
GPU = pascal
|
2020-05-03 20:27:55 +03:00
|
|
|
APPLHOME = /opt/tools
|
2020-08-26 00:44:02 +03:00
|
|
|
WORKHOME = ${shell realpath ${PWD}/work}
|
2020-09-05 00:16:22 +03:00
|
|
|
MARIAN_BUILD_OPTIONS += -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-9.2
|
2020-09-04 15:34:20 +03:00
|
|
|
# -DPROTOBUF_LIBRARY=/usr/lib/x86_64-linux-gnu/libprotobuf.so.9 \
|
|
|
|
# -DPROTOBUF_INCLUDE_DIR=/usr/include/google/protobuf \
|
|
|
|
# -DPROTOBUF_PROTOC_EXECUTABLE=${PWD}/tools/protobuf/src/protoc
|
|
|
|
# OPUSHOME = tiedeman@taito.csc.fi:/proj/nlpl/data/OPUS/
|
|
|
|
# MOSESHOME = ${APPLHOME}/mosesdecoder
|
|
|
|
# MOSESSCRIPTS = ${MOSESHOME}/scripts
|
|
|
|
# MARIAN_HOME = ${APPLHOME}/marian/build/
|
|
|
|
# MARIAN = ${APPLHOME}/marian/build
|
|
|
|
# SUBWORD_HOME = ${APPLHOME}/subword-nmt/subword_nmt
|
2020-05-03 20:27:55 +03:00
|
|
|
else ifneq ($(wildcard /wrk/tiedeman/research),)
|
|
|
|
APPLHOME = /proj/memad/tools
|
2020-08-26 00:44:02 +03:00
|
|
|
WORKHOME = /wrk/tiedeman/research/Opus-MT/work
|
2020-05-03 20:27:55 +03:00
|
|
|
OPUSHOME = /proj/nlpl/data/OPUS
|
|
|
|
MOSESHOME = /proj/nlpl/software/moses/4.0-65c75ff/moses
|
2020-09-02 15:52:34 +03:00
|
|
|
MOSESSCRIPTS = ${MOSESHOME}/scripts
|
2020-08-28 15:51:37 +03:00
|
|
|
MARIAN_HOME = ${HOME}/appl_taito/tools/marian/build-gpu/
|
2020-05-03 20:27:55 +03:00
|
|
|
MARIAN = ${HOME}/appl_taito/tools/marian/build-gpu
|
2020-09-09 23:21:07 +03:00
|
|
|
LOADCPU = module load ${CPU_MODULES}
|
|
|
|
LOADGPU = module load ${GPU_MODULES}
|
2020-05-03 20:27:55 +03:00
|
|
|
LOADMODS = ${LOADGPU}
|
2020-08-26 22:18:12 +03:00
|
|
|
else ifeq (${shell hostname --domain 2>/dev/null},bullx)
|
2020-07-25 22:52:33 +03:00
|
|
|
CSCPROJECT = project_2002688
|
2020-08-26 00:44:02 +03:00
|
|
|
WORKHOME = ${shell realpath ${PWD}/work}
|
2020-08-26 01:01:44 +03:00
|
|
|
APPLHOME = /projappl/project_2001194
|
2020-05-03 20:27:55 +03:00
|
|
|
OPUSHOME = /projappl/nlpl/data/OPUS
|
|
|
|
MOSESHOME = ${APPLHOME}/mosesdecoder
|
2020-09-02 15:52:34 +03:00
|
|
|
MOSESSCRIPTS = ${MOSESHOME}/scripts
|
2020-05-03 20:27:55 +03:00
|
|
|
EFLOMAL_HOME = ${APPLHOME}/eflomal/
|
2020-08-28 15:51:37 +03:00
|
|
|
MARIAN_HOME = ${APPLHOME}/marian-dev/build/
|
2020-06-25 00:45:25 +03:00
|
|
|
MARIAN = ${APPLHOME}/marian-dev/build
|
2020-08-28 15:51:37 +03:00
|
|
|
SPM_HOME = ${MARIAN_HOME}
|
2020-05-03 20:27:55 +03:00
|
|
|
GPU = v100
|
|
|
|
GPU_MODULES = python-env
|
|
|
|
CPU_MODULES = python-env
|
|
|
|
HPC_QUEUE = small
|
2020-09-09 23:21:07 +03:00
|
|
|
LOADCPU = module load ${CPU_MODULES}
|
|
|
|
LOADGPU = module load ${GPU_MODULES}
|
2020-08-26 14:31:50 +03:00
|
|
|
export PATH := ${APPLHOME}/bin:${PATH}
|
2020-05-03 20:27:55 +03:00
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
|
|
ifdef LOCAL_SCRATCH
|
|
|
|
TMPDIR = ${LOCAL_SCRATCH}
|
|
|
|
endif
|
|
|
|
|
2020-09-04 15:34:20 +03:00
|
|
|
TMPDIR ?= /tmp
|
|
|
|
|
2020-05-03 20:27:55 +03:00
|
|
|
|
2020-09-02 15:52:34 +03:00
|
|
|
## tools and their locations
|
|
|
|
|
|
|
|
SCRIPTDIR ?= ${PWD}/scripts
|
2020-09-05 00:16:22 +03:00
|
|
|
TOOLSDIR ?= ${PWD}/tools
|
|
|
|
|
2020-09-16 09:25:36 +03:00
|
|
|
ISO639 ?= ${shell which iso639 2>/dev/null || echo 'perl ${TOOLSDIR}/LanguageCodes/ISO-639-3/bin/iso639'}
|
|
|
|
PIGZ ?= ${shell which pigz 2>/dev/null || echo ${TOOLSDIR}/pigz/pigz}
|
|
|
|
TERASHUF ?= ${shell which terashuf 2>/dev/null || echo ${TOOLSDIR}/terashuf/terashuf}
|
|
|
|
JQ ?= ${shell which jq 2>/dev/null || echo ${TOOLSDIR}/jq/jq}
|
|
|
|
PROTOC ?= ${shell which protoc 2>/dev/null || echo ${TOOLSDIR}/protobuf/bin/protoc}
|
|
|
|
MARIAN ?= ${shell which marian 2>/dev/null || echo ${TOOLSDIR}/marian-dev/build/marian}
|
2020-09-02 15:52:34 +03:00
|
|
|
MARIAN_HOME ?= $(dir ${MARIAN})
|
|
|
|
SPM_HOME ?= ${dir ${MARIAN}}
|
2020-09-16 09:25:36 +03:00
|
|
|
FASTALIGN ?= ${shell which fast_align 2>/dev/null || echo ${TOOLSDIR}/fast_align/build/fast_align}
|
2020-09-02 15:52:34 +03:00
|
|
|
FASTALIGN_HOME ?= ${dir ${FASTALIGN}}
|
|
|
|
ATOOLS ?= ${FASTALIGN_HOME}atools
|
2020-09-16 09:25:36 +03:00
|
|
|
EFLOMAL ?= ${shell which eflomal 2>/dev/null || echo ${TOOLSDIR}/eflomal/eflomal}
|
2020-09-03 22:04:44 +03:00
|
|
|
EFLOMAL_HOME ?= ${dir ${EFLOMAL}}
|
2020-09-02 15:52:34 +03:00
|
|
|
WORDALIGN ?= ${EFLOMAL_HOME}align.py
|
2020-09-04 15:34:20 +03:00
|
|
|
EFLOMAL ?= ${EFLOMAL_HOME}eflomal
|
2020-09-05 00:16:22 +03:00
|
|
|
MOSESSCRIPTS ?= ${TOOLSDIR}/moses-scripts/scripts
|
2020-09-16 09:25:36 +03:00
|
|
|
TMX2MOSES ?= ${shell which tmx2moses 2>/dev/null || echo ${TOOLSDIR}/OpusTools-perl/scripts/convert/tmx2moses}
|
2020-09-02 15:52:34 +03:00
|
|
|
|
2020-08-28 15:51:37 +03:00
|
|
|
## marian-nmt binaries
|
|
|
|
|
|
|
|
MARIAN_TRAIN = ${MARIAN_HOME}marian
|
|
|
|
MARIAN_DECODER = ${MARIAN_HOME}marian-decoder
|
|
|
|
MARIAN_VOCAB = ${MARIAN_HOME}marian-vocab
|
|
|
|
|
2020-05-03 20:27:55 +03:00
|
|
|
|
|
|
|
TOKENIZER = ${MOSESSCRIPTS}/tokenizer
|
2020-09-04 15:34:20 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## BPE
|
2020-09-16 09:25:36 +03:00
|
|
|
SUBWORD_BPE ?= ${shell which subword-nmt 2>/dev/null || echo ${TOOLSDIR}/subword-nmt/subword_nmt/subword_nmt.py}
|
2020-09-04 15:34:20 +03:00
|
|
|
SUBWORD_HOME ?= ${dir ${SUBWORD_BPE}}
|
|
|
|
ifeq (${shell which subword-nmt},)
|
2020-09-24 12:15:08 +03:00
|
|
|
BPE_LEARN ?= python3 ${SUBWORD_HOME}/learn_bpe.py
|
2020-09-04 15:34:20 +03:00
|
|
|
BPE_APPLY ?= python3 ${SUBWORD_HOME}/apply_bpe.py
|
|
|
|
else
|
|
|
|
BPE_LEARN ?= ${SUBWORD_BPE} learn-bpe
|
|
|
|
BPE_APPLY ?= ${SUBWORD_BPE} apply-bpe
|
|
|
|
endif
|
2020-05-03 20:27:55 +03:00
|
|
|
|
|
|
|
## SentencePiece
|
2020-08-28 15:51:37 +03:00
|
|
|
SPM_TRAIN = ${SPM_HOME}spm_train
|
|
|
|
SPM_ENCODE = ${SPM_HOME}spm_encode
|
2020-05-03 20:27:55 +03:00
|
|
|
|
|
|
|
|
2020-09-02 15:52:34 +03:00
|
|
|
SORT := sort -T ${TMPDIR} --parallel=${THREADS}
|
2020-09-16 09:25:36 +03:00
|
|
|
SHUFFLE := ${shell which ${TERASHUF} 2>/dev/null || echo "${SORT} --random-sort"}
|
|
|
|
GZIP := ${shell which ${PIGZ} 2>/dev/null || echo gzip}
|
2020-09-02 15:52:34 +03:00
|
|
|
GZCAT := ${GZIP} -cd
|
|
|
|
ZCAT := gzip -cd
|
2020-09-09 23:21:07 +03:00
|
|
|
UNIQ := ${SORT} -u
|
2020-09-02 15:52:34 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
2020-08-26 14:31:50 +03:00
|
|
|
|
|
|
|
|
2020-08-28 15:51:37 +03:00
|
|
|
# TODO: delete those?
|
|
|
|
MULTEVALHOME = ${APPLHOME}/multeval
|
|
|
|
|
|
|
|
|
2020-08-26 14:31:50 +03:00
|
|
|
|
|
|
|
|
|
|
|
## install pre-requisites
|
|
|
|
## TODO:
|
|
|
|
## * terashuf (https://github.com/alexandres/terashuf.git)
|
|
|
|
## * OpusTools-perl (https://github.com/Helsinki-NLP/OpusTools-perl)
|
|
|
|
## * marian-nmt
|
|
|
|
|
|
|
|
|
2020-09-04 15:34:20 +03:00
|
|
|
PREREQ_TOOLS := $(lastword ${ISO639}) ${ATOOLS} ${PIGZ} ${TERASHUF} ${JQ} ${MARIAN} ${EFLOMAL}
|
2020-09-09 23:21:07 +03:00
|
|
|
PREREQ_PERL := ISO::639::3 ISO::639::5 OPUS::Tools XML::Parser
|
2020-09-02 15:52:34 +03:00
|
|
|
|
2020-09-16 09:25:36 +03:00
|
|
|
PIP := ${shell which pip3 2>/dev/null || echo pip}
|
2020-09-02 15:52:34 +03:00
|
|
|
CPAN := ${shell which cpanm 2>/dev/null || echo cpan}
|
|
|
|
|
|
|
|
NVIDIA_SMI := ${shell which nvidia-smi 2>/dev/null}
|
|
|
|
ifneq ($(wildcard ${NVIDIA_SMI}),)
|
|
|
|
ifeq (${shell nvidia-smi | grep failed | wc -l},1)
|
2020-09-04 15:34:20 +03:00
|
|
|
MARIAN_BUILD_OPTIONS += -DCOMPILE_CUDA=off
|
2020-09-02 15:52:34 +03:00
|
|
|
endif
|
|
|
|
else
|
2020-09-04 15:34:20 +03:00
|
|
|
MARIAN_BUILD_OPTIONS += -DCOMPILE_CUDA=off
|
2020-09-02 15:52:34 +03:00
|
|
|
endif
|
|
|
|
|
2020-08-26 14:31:50 +03:00
|
|
|
|
|
|
|
PHONY: install-prerequisites install-prereq install-requirements
|
|
|
|
install-prerequisites install-prereq install-requirements:
|
|
|
|
${PIP} install --user -r requirements.txt
|
2020-09-13 22:17:21 +03:00
|
|
|
${MAKE} install-perl-modules
|
2020-09-02 15:52:34 +03:00
|
|
|
${MAKE} ${PREREQ_TOOLS}
|
|
|
|
|
2020-09-12 12:01:02 +03:00
|
|
|
.PHONY: install-perl-modules
|
2020-09-09 23:21:07 +03:00
|
|
|
install-perl-modules:
|
|
|
|
for p in ${PREREQ_PERL}; do \
|
|
|
|
perl -e "use $$p;" || ${CPAN} -i $$p; \
|
|
|
|
done
|
2020-09-02 15:52:34 +03:00
|
|
|
|
2020-09-05 00:16:22 +03:00
|
|
|
${TOOLSDIR}/LanguageCodes/ISO-639-3/bin/iso639:
|
2020-09-02 15:52:34 +03:00
|
|
|
${MAKE} tools/LanguageCodes/ISO-639-5/lib/ISO/639/5.pm
|
|
|
|
|
2020-09-05 00:16:22 +03:00
|
|
|
${TOOLSDIR}/LanguageCodes/ISO-639-5/lib/ISO/639/5.pm:
|
2020-09-02 15:52:34 +03:00
|
|
|
${MAKE} -C tools/LanguageCodes all
|
|
|
|
|
2020-09-05 00:16:22 +03:00
|
|
|
${TOOLSDIR}/fast_align/build/atools:
|
2020-09-02 15:52:34 +03:00
|
|
|
mkdir -p ${dir $@}
|
|
|
|
cd ${dir $@} && cmake ..
|
|
|
|
${MAKE} -C ${dir $@}
|
|
|
|
|
2020-09-05 00:16:22 +03:00
|
|
|
${TOOLSDIR}/pigz/pigz:
|
2020-09-02 15:52:34 +03:00
|
|
|
${MAKE} -C ${dir $@}
|
|
|
|
|
2020-09-05 00:16:22 +03:00
|
|
|
${TOOLSDIR}/terashuf/terashuf:
|
2020-09-02 15:52:34 +03:00
|
|
|
${MAKE} -C ${dir $@}
|
|
|
|
|
2020-09-05 00:16:22 +03:00
|
|
|
${TOOLSDIR}/jq/jq:
|
2020-09-04 15:34:20 +03:00
|
|
|
cd ${dir $@} && git submodule update --init
|
|
|
|
cd ${dir $@} && autoreconf -fi
|
|
|
|
cd ${dir $@} && ./configure --with-oniguruma=builtin
|
|
|
|
${MAKE} -C ${dir $@} all
|
2020-09-02 15:52:34 +03:00
|
|
|
|
2020-09-02 16:35:35 +03:00
|
|
|
## For Mac users:
|
|
|
|
## - install protobuf: sudo port install protobuf3-cpp
|
|
|
|
## - install MKL (especially for cpu use):
|
|
|
|
## file:///opt/intel/documentation_2020/en/mkl/ps2020/get_started.htm
|
2020-08-26 14:31:50 +03:00
|
|
|
|
2020-09-05 00:16:22 +03:00
|
|
|
${TOOLSDIR}/marian-dev/build/marian: ${PROTOC}
|
2020-09-02 15:52:34 +03:00
|
|
|
mkdir -p ${dir $@}
|
|
|
|
cd ${dir $@} && cmake -DUSE_SENTENCEPIECE=on ${MARIAN_BUILD_OPTIONS} ..
|
|
|
|
${MAKE} -C ${dir $@} -j
|
2020-09-03 22:04:44 +03:00
|
|
|
|
2020-09-05 00:16:22 +03:00
|
|
|
${TOOLSDIR}/protobuf/bin/protoc:
|
2020-09-04 15:34:20 +03:00
|
|
|
cd tools && git clone https://github.com/protocolbuffers/protobuf.git
|
|
|
|
cd tools/protobuf && git submodule update --init --recursive
|
|
|
|
cd tools/protobuf && ./autogen.sh
|
2020-09-05 00:16:22 +03:00
|
|
|
cd tools/protobuf && ./configure --prefix=${TOOLSDIR}/protobuf
|
2020-09-04 15:34:20 +03:00
|
|
|
${MAKE} -C tools/protobuf
|
2020-09-03 22:04:44 +03:00
|
|
|
|
|
|
|
## for Mac users: use gcc to compile eflomal
|
|
|
|
##
|
|
|
|
## sudo port install gcc10
|
|
|
|
## gcc-mp-10 -Ofast -march=native -Wall --std=gnu99 -Wno-unused-function -g -fopenmp -c eflomal.c
|
|
|
|
## gcc-mp-10 -lm -lgomp -fopenmp eflomal.o -o eflomal
|
|
|
|
##
|
|
|
|
## sudo port install llvm-devel py-cython py-numpy
|
|
|
|
## sudo port select --set python python38
|
|
|
|
## sudo port select --set python3 python38
|
|
|
|
## sudo port select --set cython cython38
|
|
|
|
## cd tools/efmoral
|
|
|
|
## sudo env python3 setup.py install
|
|
|
|
|
2020-09-05 00:16:22 +03:00
|
|
|
${TOOLSDIR}/eflomal/eflomal:
|
2020-09-03 22:04:44 +03:00
|
|
|
${MAKE} -C ${dir $@} all
|
2020-09-13 22:17:21 +03:00
|
|
|
cd ${dir $@} && python3 setup.py install --user
|
2020-09-03 22:04:44 +03:00
|
|
|
# python3 setup.py install --install-dir ${HOME}/.local
|