OPUS-MT-train/Makefile.tasks
Joerg Tiedemann f32ddd06ce allwikis
2020-01-20 23:37:40 +02:00

490 lines
20 KiB
Makefile

# -*-makefile-*-
#
# pre-defined tasks that we might want to run
#
MEMAD_LANGS = de en fi fr nl sv
# GERMANIC = en de nl fy af da fo is no nb nn sv
GERMANIC = de nl fy af da fo is no nb nn sv
WESTGERMANIC = de nl af fy
SCANDINAVIAN = da fo is no nb nn sv
ROMANCE = ca es fr gl it la oc pt_br pt ro
FINNO_UGRIC = fi et hu
PIVOT = en
ifndef LANGS
LANGS = ${MEMAD_LANGS}
endif
unidirectional:
${MAKE} data
${MAKE} WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train.submit-multigpu
bilingual:
${MAKE} data
${MAKE} WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train.submit-multigpu
${MAKE} reverse-data
${MAKE} WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train.submit-multigpu
bilingual-medium:
${MAKE} data
${MAKE} WALLTIME=72 HPC_MEM=4g HPC_CORES=1 \
MARIAN_VALID_FREQ=5000 MARIAN_WORKSPACE=10000 train.submit
${MAKE} reverse-data
${MAKE} WALLTIME=72 HPC_MEM=4g HPC_CORES=1 \
MARIAN_VALID_FREQ=5000 MARIAN_WORKSPACE=10000 train.submit
bilingual-small:
${MAKE} data
${MAKE} WALLTIME=72 HPC_MEM=4g HPC_CORES=1 \
MARIAN_WORKSPACE=5000 MARIAN_VALID_FREQ=2500 train.submit
${MAKE} reverse-data
${MAKE} WALLTIME=72 HPC_MEM=4g HPC_CORES=1 \
MARIAN_WORKSPACE=5000 MARIAN_VALID_FREQ=2500 train.submit
multilingual:
${MAKE} SRCLANGS="${LANGS}" TRGLANGS="${LANGS}" data
${MAKE} SRCLANGS="${LANGS}" TRGLANGS="${LANGS}" \
WALLTIME=72 HPC_CORES=1 HPC_MEM=4g train.submit-multigpu
multilingual-medium:
${MAKE} SRCLANGS="${LANGS}" TRGLANGS="${LANGS}" data
${MAKE} SRCLANGS="${LANGS}" TRGLANGS="${LANGS}" \
MARIAN_VALID_FREQ=5000 MARIAN_WORKSPACE=10000 \
WALLTIME=72 HPC_CORES=1 HPC_MEM=4g train.submit-multigpu
all2pivot:
for l in ${filter-out ${PIVOT},${LANGS}}; do \
${MAKE} SRCLANGS="$$l" TRGLANGS="${PIVOT}" data; \
${MAKE} SRCLANGS="$$l" TRGLANGS="${PIVOT}" HPC_CORES=1 HPC_MEM=4g train.submit-multigpu; \
${MAKE} SRCLANGS="$$l" TRGLANGS="${PIVOT}" reverse-data; \
${MAKE} SRCLANGS="${PIVOT}" TRGLANGS="$$l" HPC_CORES=1 HPC_MEM=4g train.submit-multigpu; \
done
bilingual-dynamic:
if [ ! -e "${WORKHOME}/${LANGSTR}/train.submit" ]; then \
${MAKE} data; \
if [ `zcat ${WORKHOME}/${LANGSTR}/train/*.src.clean.${PRE_SRC}.gz | wc -l` -gt 10000000 ]; then \
echo "${LANGSTR} bigger than 10 million"; \
${MAKE} HPC_CORES=1 HPC_MEM=8g train.submit-multigpu; \
if [ "${SRCLANGS}" != "${TRGLANGS}" ]; then \
${MAKE} reverse-data-spm; \
${MAKE} TRGLANGS="${SRCLANGS}" SRCLANGS='${TRGLANGS}' HPC_CORES=1 HPC_MEM=8g train.submit-multigpu; \
fi; \
elif [ `zcat ${WORKHOME}/${LANGSTR}/train/*.src.clean.${PRE_SRC}.gz | wc -l` -gt 1000000 ]; then \
echo "${LANGSTR} bigger than 1 million"; \
${MAKE} \
MARIAN_VALID_FREQ=2500 \
HPC_CORES=1 HPC_MEM=4g train.submit; \
if [ "${SRCLANGS}" != "${TRGLANGS}" ]; then \
${MAKE} reverse-data-spm; \
${MAKE} TRGLANGS="${SRCLANGS}" SRCLANGS='${TRGLANGS}' \
MARIAN_VALID_FREQ=2500 \
HPC_CORES=1 HPC_MEM=4g train.submit; \
fi; \
elif [ `zcat ${WORKHOME}/${LANGSTR}/train/*.src.clean.${PRE_SRC}.gz | wc -l` -gt 100000 ]; then \
echo "${LANGSTR} bigger than 100k"; \
${MAKE} \
MARIAN_VALID_FREQ=1000 \
MARIAN_WORKSPACE=5000 \
MARIAN_VALID_MINI_BATCH=8 \
MARIAN_EARLY_STOPPING=5 \
HPC_CORES=1 HPC_MEM=4g train.submit; \
if [ "${SRCLANGS}" != "${TRGLANGS}" ]; then \
${MAKE} reverse-data-spm; \
${MAKE} TRGLANGS="${SRCLANGS}" SRCLANGS='${TRGLANGS}' \
MARIAN_VALID_FREQ=1000 \
MARIAN_WORKSPACE=5000 \
MARIAN_VALID_MINI_BATCH=8 \
MARIAN_EARLY_STOPPING=5 \
HPC_CORES=1 HPC_MEM=4g train.submit; \
fi; \
elif [ `zcat ${WORKHOME}/${LANGSTR}/train/*.src.clean.${PRE_SRC}.gz | wc -l` -gt 10000 ]; then \
echo "${LANGSTR} bigger than 10k"; \
${MAKE} \
MARIAN_WORKSPACE=3500 \
MARIAN_VALID_MINI_BATCH=4 \
MARIAN_DROPOUT=0.5 \
MARIAN_VALID_FREQ=1000 \
MARIAN_EARLY_STOPPING=5 \
HPC_CORES=1 HPC_MEM=4g train.submit; \
if [ "${SRCLANGS}" != "${TRGLANGS}" ]; then \
${MAKE} reverse-data-spm; \
${MAKE} TRGLANGS="${SRCLANGS}" SRCLANGS='${TRGLANGS}' \
MARIAN_WORKSPACE=3500 \
MARIAN_VALID_MINI_BATCH=4 \
MARIAN_DROPOUT=0.5 \
MARIAN_VALID_FREQ=1000 \
MARIAN_EARLY_STOPPING=5 \
HPC_CORES=1 HPC_MEM=4g train.submit; \
fi; \
else \
echo "${LANGSTR} too small"; \
fi \
fi
# iso639 = aa ab ae af ak am an ar as av ay az ba be bg bh bi bm bn bo br bs ca ce ch cn co cr cs cu cv cy da de dv dz ee el en eo es et eu fa ff fi fj fo fr fy ga gd gl gn gr gu gv ha hb he hi ho hr ht hu hy hz ia id ie ig ik io is it iu ja jp jv ka kg ki kj kk kl km kn ko kr ks ku kv kw ky la lb lg li ln lo lt lu lv me mg mh mi mk ml mn mo mr ms mt my na nb nd ne ng nl nn no nr nv ny oc oj om or os pa pi pl po ps pt qu rm rn ro ru rw ry sa sc sd se sg sh si sk sl sm sn so sq sr ss st su sv sw ta tc te tg th ti tk tl tn to tr ts tt tw ty ua ug uk ur uz ve vi vo wa wo xh yi yo za zh zu
# NO_MEMAD = ${filter-out fi sv de fr nl,${iso639}}
#"de_AT de_CH de_DE de"
#"en_AU en_CA en_GB en_NZ en_US en_ZA en"
#"it_IT if"
#"es_AR es_CL es_CO es_CR es_DO es_EC es_ES es_GT es_HN es_MX es_NI es_PA es_PE es_PR es_SV es_UY es_VE es"
#"eu_ES eu"
#"hi_IN hi"
#"fr_BE fr_CA fr_FR fr"
#"fa_AF fa_IR fa"
#"ar_SY ar_TN ar"
#"bn_IN bn"
#da_DK
#bg_BG
#nb_NO
#nl_BE nl_NL
#tr_TR
### ze_en - English subtitles in chinese movies
OPUSLANGS = fi sv fr es de ar he "cmn cn yue ze_zh zh_cn zh_CN zh_HK zh_tw zh_TW zh_yue zhs zht zh" "pt_br pt_BR pt_PT pt" aa ab ace ach acm acu ada ady aeb aed ae afb afh af agr aha aii ain ajg aka ake akl ak aln alt alz amh ami amu am ang an aoc aoz apc ara arc arh arn arq ary arz ase asf ast as ati atj avk av awa aym ay azb "az_IR az" bal bam ban bar bas ba bbc bbj bci bcl bem ber "be_tarask be" bfi bg bho bhw bh bin bi bjn bm bn bnt bo bpy brx br bsn bs btg bts btx bua bug bum bvl bvy bxr byn byv bzj bzs cab cac cak cat cay ca "cbk_zam cbk" cce cdo ceb ce chf chj chk cho chq chr chw chy ch cjk cjp cjy ckb ckt cku cmo cnh cni cop co "crh_latn crh" crp crs cr csb cse csf csg csl csn csr cs cto ctu cuk cu cv cycl cyo cy daf da dga dhv dik din diq dje djk dng dop dsb dtp dty dua dv dws dyu dz ecs ee efi egl el eml enm eo esn et eu ewo ext fan fat fa fcs ff fil fj fkv fon foo fo frm frp frr fse fsl fuc ful fur fuv fy gaa gag gan ga gbi gbm gcf gcr gd gil glk gl gn gom gor gos got grc gr gsg gsm gss gsw guc gug gum gur guw gu gv gxx gym hai hak hau haw ha haz hb hch hds hif hi hil him hmn hne hnj hoc ho hrx hr hsb hsh hsn ht hup hus hu hyw hy hz ia iba ibg ibo id ie ig ike ik ilo inh inl ins io iro ise ish iso is it iu izh jak jam jap ja jbo jdt jiv jmx jp jsl jv kaa kab kac kam kar kau ka kbd kbh kbp kea kek kg kha kik kin ki kjh kj kk kl kmb kmr km kn koi kok kon koo ko kpv kqn krc kri krl kr ksh kss ksw ks kum ku kvk kv kwn kwy kw kxi ky kzj lad lam la lbe lb ldn lez lfn lg lij lin liv li lkt lld lmo ln lou lo loz lrc lsp ltg lt lua lue lun luo lus luy lu lv lzh lzz mad mai mam map_bms mau max maz mco mcp mdf men me mfe mfs mgm mgr mg mhr mh mic min miq mi mk mlg ml mnc mni mnw mn moh mos mo mrj mrq mr "ms_MY ms" mt mus mvv mwl mww mxv myv my mzn mzy nah nan nap na nba "nb_NO nb nn_NO nn nog no_nb no" nch nci ncj ncs ncx ndc "nds_nl nds" nd new ne ngl ngt ngu ng nhg nhk nhn nia nij niu nlv nl nnh non nov npi nqo nrm nr nso nst nv nya nyk nyn nyu ny nzi oar oc ojb oj oke olo om orm orv or osx os ota ote otk pag pam pan pap pau pa pbb pcd pck pcm pdc pdt pes pfl pid pih pis pi plt pl pms pmy pnb pnt pon pot po ppk ppl prg prl prs pso psp psr ps pys quc que qug qus quw quy qu quz qvi qvz qya rap rar rcf rif rmn rms rmy rm rnd rn rom ro rsl rue run rup ru rw ry sah sat sa sbs scn sco sc sd seh se sfs sfw sgn sgs sg shi shn shs shy sh sid simple si sjn sk sl sma sml sm sna sn som son sop sot so sqk sq "sr_ME sr srp" srm srn ssp ss stq st sux su svk swa swc swg swh sw sxn syr szl "ta_LK ta" tcf tcy tc tdt tdx tet te "tg_TJ tg" thv th tig tir tiv ti tkl tk tlh tll "tl_PH tl" tly tmh tmp tmw tn tob tog toh toi toj toki top to tpi tpw trv tr tsc tss ts tsz ttj tt tum tvl tw tyv ty tzh tzl tzo udm ug uk umb urh "ur_PK ur" usp uz vec vep ve "vi_VN vi" vls vmw vo vro vsl wae wal war wa wba wes wls wlv wol wo wuu xal xho xh xmf xpe yao yap yaq ybb yi yor yo yua zab zai zam za zdj zea zib zlm zne zpa zpg zsl zsm "zul zu" zza
allopus2pivot:
for l in ${filter-out ${PIVOT},${OPUSLANGS}}; do \
${MAKE} WALLTIME=72 SRCLANGS="$$l" bilingual-dynamic; \
done
## this looks dangerous ....
allopus:
for s in ${OPUSLANGS}; do \
for t in ${OPUSLANGS}; do \
if [ ! -e "${WORKHOME}/$$s-$$t/train.submit" ]; then \
echo "${MAKE} WALLTIME=72 SRCLANGS=\"$$s\" SRCLANGS=\"$$t\" bilingual-dynamic"; \
${MAKE} WALLTIME=72 SRCLANGS="$$s" TRGLANGS="$$t" bilingual-dynamic; \
fi \
done \
done
all2en:
${MAKE} PIVOT=en allopus2pivot
## run things with individual data sets only
%-fiskmo:
${MAKE} TRAINSET=fiskmo ${@:-fiskmo=}
%-opensubtitles:
${MAKE} TRAINSET=OpenSubtitles ${@:-opensubtitles=}
%-finlex:
${MAKE} TRAINSET=Finlex ${@:-finlex=}
## a batch of interesting models ....
## germanic to germanic
germanic:
${MAKE} LANGS="${GERMANIC}" HPC_DISK=1500 multilingual
scandinavian:
${MAKE} LANGS="${SCANDINAVIAN}" multilingual-medium
memad2en:
${MAKE} LANGS="${MEMAD_LANGS}" PIVOT=en all2pivot
fiet:
${MAKE} SRCLANGS=fi TRGLANGS=et bilingual-medium
icelandic:
${MAKE} SRCLANGS=is TRGLANGS=en bilingual
${MAKE} SRCLANGS=is TRGLANGS="da no nn nb sv" bilingual
${MAKE} SRCLANGS=is TRGLANGS=fi bilingual
enru-yandex:
${MAKE} DATASET=opus+yandex SRCLANGS=ru TRGLANGS=en EXTRA_TRAINSET=yandex data
${MAKE} DATASET=opus+yandex SRCLANGS=ru TRGLANGS=en EXTRA_TRAINSET=yandex reverse-data
${MAKE} DATASET=opus+yandex SRCLANGS=en TRGLANGS=ru EXTRA_TRAINSET=yandex \
WALLTIME=72 HPC_CORES=1 HPC_MEM=8g MARIAN_WORKSPACE=12000 train.submit-multigpu
${MAKE} DATASET=opus+yandex SRCLANGS=ru TRGLANGS=en EXTRA_TRAINSET=yandex \
WALLTIME=72 HPC_CORES=1 HPC_MEM=4g train.submit-multigpu
enit:
${MAKE} SRCLANGS=en TRGLANGS=it traindata-spm
${MAKE} SRCLANGS=en TRGLANGS=it devdata-spm
${MAKE} SRCLANGS=en TRGLANGS=it wordalign-spm
${MAKE} SRCLANGS=en TRGLANGS=it WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-spm.submit-multigpu
memad-fiensv:
${MAKE} SRCLANGS=sv TRGLANGS=fi traindata-spm
${MAKE} SRCLANGS=sv TRGLANGS=fi devdata-spm
${MAKE} SRCLANGS=sv TRGLANGS=fi wordalign-spm
${MAKE} SRCLANGS=sv TRGLANGS=fi WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-spm.submit-multigpu
${MAKE} SRCLANGS=sv TRGLANGS=fi reverse-data-spm
${MAKE} SRCLANGS=fi TRGLANGS=sv WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-spm.submit-multigpu
${MAKE} SRCLANGS=en TRGLANGS=fi traindata-spm
${MAKE} SRCLANGS=en TRGLANGS=fi devdata-spm
${MAKE} SRCLANGS=en TRGLANGS=fi wordalign-spm
${MAKE} SRCLANGS=en TRGLANGS=fi WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-spm.submit-multigpu
${MAKE} SRCLANGS=en TRGLANGS=fi reverse-data-spm
${MAKE} SRCLANGS=fi TRGLANGS=en WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-spm.submit-multigpu
memad250-fiensv:
${MAKE} CONTEXT_SIZE=250 memad-fiensv_doc
memad-fiensv_doc:
${MAKE} SRCLANGS=sv TRGLANGS=fi traindata-doc
${MAKE} SRCLANGS=sv TRGLANGS=fi devdata-doc
${MAKE} SRCLANGS=sv TRGLANGS=fi WALLTIME=72 HPC_MEM=8g MARIAN_WORKSPACE=20000 HPC_CORES=1 train-doc.submit-multigpu
${MAKE} SRCLANGS=sv TRGLANGS=fi reverse-data-doc
${MAKE} SRCLANGS=fi TRGLANGS=sv WALLTIME=72 HPC_MEM=8g MARIAN_WORKSPACE=20000 HPC_CORES=1 train-doc.submit-multigpu
${MAKE} SRCLANGS=en TRGLANGS=fi traindata-doc
${MAKE} SRCLANGS=en TRGLANGS=fi devdata-doc
${MAKE} SRCLANGS=en TRGLANGS=fi WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-doc.submit-multigpu
${MAKE} SRCLANGS=en TRGLANGS=fi reverse-data-doc
${MAKE} SRCLANGS=fi TRGLANGS=en WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-doc.submit-multigpu
memad-fiensv_more:
${MAKE} SRCLANGS=sv TRGLANGS=fi traindata-doc
${MAKE} SRCLANGS=sv TRGLANGS=fi devdata-doc
${MAKE} SRCLANGS=sv TRGLANGS=fi WALLTIME=72 HPC_MEM=8g MARIAN_WORKSPACE=20000 HPC_CORES=1 train-doc.submit-multigpu
${MAKE} SRCLANGS=sv TRGLANGS=fi reverse-data-doc
${MAKE} SRCLANGS=fi TRGLANGS=sv WALLTIME=72 HPC_MEM=8g MARIAN_WORKSPACE=20000 HPC_CORES=1 train-doc.submit-multigpu
${MAKE} CONTEXT_SIZE=500 memad-fiensv_doc
memad:
for s in fi en sv de fr nl; do \
for t in en fi sv de fr nl; do \
if [ "$$s" != "$$t" ]; then \
if ! grep -q 'stalled ${MARIAN_EARLY_STOPPING} times' ${WORKHOME}/$$s-$$t/*.valid${NR.log}; then\
${MAKE} SRCLANGS=$$s TRGLANGS=$$t traindata devdata wordalign; \
${MAKE} SRCLANGS=$$s TRGLANGS=$$t HPC_CORES=1 HPC_MEM=4g train.submit-multigpu; \
fi \
fi \
done \
done
doclevel:
${MAKE} ost-datasets
${MAKE} traindata-doc-ost
${MAKE} devdata-doc-ost
${MAKE} wordalign-doc-ost
${MAKE} CONTEXT_SIZE=${CONTEXT_SIZE} MODELTYPE=${MODELTYPE} \
HPC_CORES=1 WALLTIME=72 HPC_MEM=4g train-doc-ost.submit
fiensv_bpe:
${MAKE} SRCLANGS=fi TRGLANGS=sv traindata-bpe
${MAKE} SRCLANGS=fi TRGLANGS=sv devdata-bpe
${MAKE} SRCLANGS=fi TRGLANGS=sv wordalign-bpe
${MAKE} SRCLANGS=fi TRGLANGS=sv WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-bpe.submit-multigpu
${MAKE} SRCLANGS=fi TRGLANGS=en traindata-bpe
${MAKE} SRCLANGS=fi TRGLANGS=en devdata-bpe
${MAKE} SRCLANGS=fi TRGLANGS=en wordalign-bpe
${MAKE} SRCLANGS=fi TRGLANGS=en WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-bpe.submit-multigpu
fiensv_spm:
${MAKE} SRCLANGS=fi TRGLANGS=sv traindata-spm
${MAKE} SRCLANGS=fi TRGLANGS=sv devdata-spm
${MAKE} SRCLANGS=fi TRGLANGS=sv wordalign-spm
${MAKE} SRCLANGS=fi TRGLANGS=sv WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-spm.submit-multigpu
${MAKE} SRCLANGS=fi TRGLANGS=en traindata-spm
${MAKE} SRCLANGS=fi TRGLANGS=en devdata-spm
${MAKE} SRCLANGS=fi TRGLANGS=en wordalign-spm
${MAKE} SRCLANGS=fi TRGLANGS=en WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-spm.submit-multigpu
fifr_spm:
${MAKE} SRCLANGS=fr TRGLANGS=fi traindata-spm
${MAKE} SRCLANGS=fr TRGLANGS=fi devdata-spm
${MAKE} SRCLANGS=fr TRGLANGS=fi wordalign-spm
${MAKE} SRCLANGS=fr TRGLANGS=fi WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-spm.submit-multigpu
${MAKE} SRCLANGS=fr TRGLANGS=fi reverse-data-spm
${MAKE} SRCLANGS=fi TRGLANGS=fr WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-spm.submit-multigpu
fifr_doc:
${MAKE} SRCLANGS=fr TRGLANGS=fi traindata-doc
${MAKE} SRCLANGS=fr TRGLANGS=fi devdata-doc
${MAKE} SRCLANGS=fr TRGLANGS=fi WALLTIME=72 HPC_MEM=8g MARIAN_WORKSPACE=20000 HPC_CORES=1 train-doc.submit-multigpu
${MAKE} SRCLANGS=fr TRGLANGS=fi reverse-data-doc
${MAKE} SRCLANGS=fi TRGLANGS=fr WALLTIME=72 HPC_MEM=8g MARIAN_WORKSPACE=20000 HPC_CORES=1 train-doc.submit-multigpu
fide_spm:
${MAKE} SRCLANGS=de TRGLANGS=fi traindata-spm
${MAKE} SRCLANGS=de TRGLANGS=fi devdata-spm
${MAKE} SRCLANGS=de TRGLANGS=fi wordalign-spm
${MAKE} SRCLANGS=de TRGLANGS=fi WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-spm.submit-multigpu
${MAKE} SRCLANGS=de TRGLANGS=fi reverse-data-spm
${MAKE} SRCLANGS=fi TRGLANGS=de WALLTIME=72 HPC_MEM=4g HPC_CORES=1 train-spm.submit-multigpu
memad_spm:
for s in fi en sv de fr nl; do \
for t in en fi sv de fr nl; do \
if [ "$$s" != "$$t" ]; then \
if ! grep -q 'stalled ${MARIAN_EARLY_STOPPING} times' ${WORKHOME}/$$s-$$t/*.valid${NR.log}; then\
${MAKE} SRCLANGS=$$s TRGLANGS=$$t traindata-spm; \
${MAKE} SRCLANGS=$$s TRGLANGS=$$t devdata-spm; \
${MAKE} SRCLANGS=$$s TRGLANGS=$$t wordalign-spm; \
${MAKE} SRCLANGS=$$s TRGLANGS=$$t HPC_CORES=1 HPC_MEM=4g train-spm.submit-multigpu; \
fi \
fi \
done \
done
memad_doc:
for s in fi en sv; do \
for t in en fi sv; do \
if [ "$$s" != "$$t" ]; then \
if ! grep -q 'stalled ${MARIAN_EARLY_STOPPING} times' ${WORKHOME}/$$s-$$t/*.valid${NR.log}; then\
${MAKE} SRCLANGS=$$s TRGLANGS=$$t traindata-doc; \
${MAKE} SRCLANGS=$$s TRGLANGS=$$t devdata-doc; \
${MAKE} SRCLANGS=$$s TRGLANGS=$$t HPC_CORES=1 HPC_MEM=4g MODELTYPE=transformer train-doc.submit-multigpu; \
fi \
fi \
done \
done
memad_docalign:
for s in fi en sv; do \
for t in en fi sv; do \
if [ "$$s" != "$$t" ]; then \
if ! grep -q 'stalled ${MARIAN_EARLY_STOPPING} times' ${WORKHOME}/$$s-$$t/*.valid${NR.log}; then\
${MAKE} SRCLANGS=$$s TRGLANGS=$$t traindata-doc; \
${MAKE} SRCLANGS=$$s TRGLANGS=$$t devdata-doc; \
${MAKE} SRCLANGS=$$s TRGLANGS=$$t HPC_CORES=1 HPC_MEM=4g train-doc.submit-multigpu; \
fi \
fi \
done \
done
enfisv:
${MAKE} SRCLANGS="en fi sv" TRGLANGS="en fi sv" traindata devdata wordalign
${MAKE} SRCLANGS="en fi sv" TRGLANGS="en fi sv" HPC_MEM=4g WALLTIME=72 HPC_CORES=1 train.submit-multigpu
en-fiet:
${MAKE} SRCLANGS="en" TRGLANGS="et fi" traindata devdata
${MAKE} SRCLANGS="en" TRGLANGS="et fi" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu
${MAKE} TRGLANGS="en" SRCLANGS="et fi" traindata devdata
${MAKE} TRGLANGS="en" SRCLANGS="et fi" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu
memad-multi:
for s in "${SCANDINAVIAN}" "en fr" "et hu fi" "${WESTGERMANIC}" "ca es fr ga it la oc pt_br pt"; do \
${MAKE} SRCLANGS="$$s" TRGLANGS="$$s" traindata devdata; \
${MAKE} SRCLANGS="$$s" TRGLANGS="$$s" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu; \
done
for s in "${SCANDINAVIAN}" "en fr" "et hu fi" "${WESTGERMANIC}" "ca es fr ga it la oc pt_br pt"; do \
for t in "${SCANDINAVIAN}" "en fr" "et hu fi" "${WESTGERMANIC}" "ca es fr ga it la oc pt_br pt"; do \
if [ "$$s" != "$$t" ]; then \
${MAKE} SRCLANGS="$$s" TRGLANGS="$$t" traindata devdata; \
${MAKE} SRCLANGS="$$s" TRGLANGS="$$t" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu; \
fi \
done \
done
memad-multi2:
for s in "en fr" "et hu fi" "${WESTGERMANIC}" "ca es fr ga it la oc pt_br pt"; do \
for t in "${SCANDINAVIAN}" "en fr" "et hu fi" "${WESTGERMANIC}" "ca es fr ga it la oc pt_br pt"; do \
if [ "$$s" != "$$t" ]; then \
${MAKE} SRCLANGS="$$s" TRGLANGS="$$t" traindata devdata; \
${MAKE} SRCLANGS="$$s" TRGLANGS="$$t" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu; \
fi \
done \
done
memad-multi3:
for s in "${SCANDINAVIAN}" "${WESTGERMANIC}" "ca es fr ga it la oc pt_br pt"; do \
${MAKE} SRCLANGS="$$s" TRGLANGS="en" traindata devdata; \
${MAKE} SRCLANGS="$$s" TRGLANGS="en" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu; \
${MAKE} SRCLANGS="en" TRGLANGS="$$s" traindata devdata; \
${MAKE} SRCLANGS="en" TRGLANGS="$$s" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu; \
done
${MAKE} SRCLANGS="en" TRGLANGS="fr" traindata devdata
${MAKE} SRCLANGS="en" TRGLANGS="fr" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu
${MAKE} SRCLANGS="fr" TRGLANGS="en" traindata devdata
${MAKE} SRCLANGS="fr" TRGLANGS="en" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu
memad-fi:
for l in en sv de fr; do \
${MAKE} SRCLANGS=$$l TRGLANGS=fi traindata devdata; \
${MAKE} SRCLANGS=$$l TRGLANGS=fi HPC_MEM=4g HPC_CORES=1 train.submit-multigpu; \
${MAKE} TRGLANGS=$$l SRCLANGS=fi traindata devdata; \
${MAKE} TRGLANGS=$$l SRCLANGS=fi HPC_MEM=4g HPC_CORES=1 train.submit-multigpu; \
done
nordic:
${MAKE} SRCLANGS="${SCANDINAVIAN}" TRGLANGS="${FINNO_UGRIC}" traindata
${MAKE} SRCLANGS="${SCANDINAVIAN}" TRGLANGS="${FINNO_UGRIC}" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu
${MAKE} TRGLANGS="${SCANDINAVIAN}" SRCLANGS="${FINNO_UGRIC}" traindata
${MAKE} TRGLANGS="${SCANDINAVIAN}" SRCLANGS="${FINNO_UGRIC}" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu
romance:
${MAKE} SRCLANGS="${ROMANCE}" TRGLANGS="${FINNO_UGRIC}" traindata
${MAKE} SRCLANGS="${ROMANCE}" TRGLANGS="${FINNO_UGRIC}" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu
${MAKE} TRGLANGS="${ROMANCE}" SRCLANGS="${FINNO_UGRIC}" traindata
${MAKE} TRGLANGS="${ROMANCE}" SRCLANGS="${FINNO_UGRIC}" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu
westgermanic:
${MAKE} SRCLANGS="${WESTGERMANIC}" TRGLANGS="${FINNO_UGRIC}" traindata
${MAKE} SRCLANGS="${WESTGERMANIC}" TRGLANGS="${FINNO_UGRIC}" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu
${MAKE} TRGLANGS="${WESTGERMANIC}" SRCLANGS="${FINNO_UGRIC}" traindata
${MAKE} TRGLANGS="${WESTGERMANIC}" SRCLANGS="${FINNO_UGRIC}" HPC_MEM=4g HPC_CORES=1 train.submit-multigpu
germanic-romance:
${MAKE} SRCLANGS="${ROMANCE}" \
TRGLANGS="${GERMANIC}" traindata
${MAKE} HPC_MEM=4g HPC_CORES=1 SRCLANGS="${ROMANCE}" \
TRGLANGS="${GERMANIC}" train.submit-multigpu
${MAKE} TRGLANGS="${ROMANCE}" \
SRCLANGS="${GERMANIC}" traindata devdata
${MAKE} HPC_MEM=4g HPC_CORES=1 TRGLANGS="${ROMANCE}" \
SRCLANGS="${GERMANIC}" train.submit-multigpu