moved allas recipes to a different makefile

This commit is contained in:
Tiedemann 2020-09-02 16:35:35 +03:00
parent 2332732577
commit 1435b7849a
5 changed files with 107 additions and 39 deletions

View File

@ -161,6 +161,7 @@ include lib/test.mk
include lib/misc.mk
include lib/dist.mk
include lib/slurm.mk
include lib/allas.mk
include lib/generic.mk
include lib/langsets.mk
@ -191,41 +192,6 @@ all: ${WORKDIR}/config.mk
${MAKE} compare
#---------------------------------------------------------------------
# store and fetch workdata
# requires module load allas && allas-conf
# select project_2002688 (OPUS-MT)
# - "make store" overrides
# - "make fetch" does not override (delete dir first)
# - storing data will resolve symbolic links
#---------------------------------------------------------------------
.PHONY: store store-data fetch fetch-data
## directories and container names to be used
WORK_SRCDIR ?= ${WORKHOME}
WORK_DESTDIR ?= ${WORKHOME}
WORK_CONTAINER ?= OPUS-MT-train_${notdir ${WORKHOME}}-${WHOAMI}
## store workdir on allas
store:
cd ${WORK_SRCDIR} && a-put -b ${WORK_CONTAINER} --nc --follow-links --override ${LANGPAIRSTR}
## fetch workdir from allas
fetch:
mkdir -p ${WORK_DESTDIR}
cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/${LANGPAIRSTR}.tar
# cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/${LANGPAIRSTR}.tar.zst
## store and fetch data dir (raw data files)
store-data:
cd ${WORK_SRCDIR} && a-put -b ${WORK_CONTAINER} --nc --follow-links --override data
fetch-data:
mkdir -p ${WORK_DESTDIR}
cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/data.tar
# cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/data.tar.zst
#---------------------------------------------------------------------

View File

@ -17,7 +17,7 @@ More information about specific tasks:
* [Training models](Train.md)
* [Testing models](Test.md)
* [Running batch jobs](BatchJobs.md)
* [Packaging, releases and storage](ReleaseAndStore.md)
* [Packaging, releases and storage](ReleaseAndStore.md)
* [Models for the Tatoeba MT Challenge](TatoebaChallenge.md)

View File

@ -10,6 +10,7 @@ Relevant makefiles:
* [lib/env.mk](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/lib/env.mk)
* [lib/config.mk](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/lib/config.mk)
* [lib/dist.mk](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/lib/dist.mk)
* [lib/allas.mk](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/lib/dist.mk)
Main recipes:
@ -23,6 +24,7 @@ Main recipes:
* `store-data`: store data in `WORKHOME/data` (raw bitexts) on CSC ObjectStore (for internal use)
* `fetch`: fetch work files stored on CSC ObjectStore for specified language pair; does not overwrite (for internal use)
* `fetch-data`: fetch `WORKHOME/data` from CSC ObjectStore; does not overwrite (for internal use)
* `fetch-tiedeman`: same as fetch but retrieve data from user `tiedeman` (for internal use)
Parameters / variables:
@ -36,3 +38,57 @@ Parameters / variables:
* `WORK_SRCDIR`: top-level work home directory to fetch models from (for `store`), default: `WORKHOME`
* `WORK_DESTDIR`: top-level work home directory to store models from (for `fetcg`), default: `WORKHOME`
## Work data on Allas
Work directories can be uploaded to allas to safe scratch space and can be fetched later on to continue working on specific models. The makefiles implement some recipes to `store` and `fetch` data files:
* initialise connection to allas
```
module load allas
allas-conf
```
Select project_2002688 (OPUS-MT).
* store the work dorectory for a specific model:
```
make SRCLANGS=xx TRGLANGS=yy store
```
This will pack and upload the work directory `work/xx-yy`. Note that this overwrites an existing package that might be in the same place! The container will be called `OPUS-MT-train_work_<username>` (`<username>` is set by calling `whoami`).
* fetch a work directory from allas:
```
make SRCLANGS=xx TRGLANGS=yy fetch
```
This retrieves the package from allas (from container `OPUS-MT-train_work_<username>`) and unpacks it in `WORK_DESTDIR`. Note that this recipe does NOT overwrite existing files and will fail if the work directory of the corresponding model already exists. Either delete the existing one first or specify a different destination by setting `WORK_DESTDIR`.
* store and fetch the raw data files (bitexts) from `work/data`
```
make store-data
make fetch-data
```
`store-data` overwrites again existing packages on allas but `fetch-data` does not overwrite and breaks if `work/data` already exists.
* fetch work files from user `tiedeman`
```
make SRCLANGS=xx TRGLANGS=yy fetch-tiedeman
```
This is for fetching the work files for the specified model from the container of user `tiedeman`.

45
lib/allas.mk Normal file
View File

@ -0,0 +1,45 @@
# -*-makefile-*-
#
# recipes for interacrting with allas
#---------------------------------------------------------------------
# store and fetch workdata
# requires module load allas && allas-conf
# select project_2002688 (OPUS-MT)
# - "make store" overrides
# - "make fetch" does not override (delete dir first)
# - storing data will resolve symbolic links
#---------------------------------------------------------------------
.PHONY: store store-data fetch fetch-data
## directories and container names to be used
WORK_SRCDIR ?= ${WORKHOME}
WORK_DESTDIR ?= ${WORKHOME}
WORK_CONTAINER ?= OPUS-MT-train_${notdir ${WORKHOME}}-${WHOAMI}
WORK_CONTAINER_JT ?= OPUS-MT-train_${notdir ${WORKHOME}}-tiedeman
## store workdir on allas
store:
cd ${WORK_SRCDIR} && a-put -b ${WORK_CONTAINER} --nc --follow-links --override ${LANGPAIRSTR}
## fetch workdir from allas (user-specific container)
fetch:
mkdir -p ${WORK_DESTDIR}
cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/${LANGPAIRSTR}.tar
## get it from user tiedeman
fetch-tiedeman:
mkdir -p ${WORK_DESTDIR}
cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER_JT}/${LANGPAIRSTR}.tar
## store and fetch data dir (raw data files)
store-data:
cd ${WORK_SRCDIR} && a-put -b ${WORK_CONTAINER} --nc --follow-links --override data
fetch-data:
mkdir -p ${WORK_DESTDIR}
cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/data.tar

View File

@ -196,9 +196,10 @@ tools/terashuf/terashuf:
${MAKE} -C ${dir $@}
## For Mac users: install protobuf
##
## sudo port install protobuf3-cpp
## For Mac users:
## - install protobuf: sudo port install protobuf3-cpp
## - install MKL (especially for cpu use):
## file:///opt/intel/documentation_2020/en/mkl/ps2020/get_started.htm
tools/marian-dev/build/marian:
mkdir -p ${dir $@}