mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2024-11-30 12:32:24 +03:00
moved allas recipes to a different makefile
This commit is contained in:
parent
2332732577
commit
1435b7849a
36
Makefile
36
Makefile
@ -161,6 +161,7 @@ include lib/test.mk
|
||||
include lib/misc.mk
|
||||
include lib/dist.mk
|
||||
include lib/slurm.mk
|
||||
include lib/allas.mk
|
||||
|
||||
include lib/generic.mk
|
||||
include lib/langsets.mk
|
||||
@ -191,41 +192,6 @@ all: ${WORKDIR}/config.mk
|
||||
${MAKE} compare
|
||||
|
||||
|
||||
#---------------------------------------------------------------------
|
||||
# store and fetch workdata
|
||||
# requires module load allas && allas-conf
|
||||
# select project_2002688 (OPUS-MT)
|
||||
# - "make store" overrides
|
||||
# - "make fetch" does not override (delete dir first)
|
||||
# - storing data will resolve symbolic links
|
||||
#---------------------------------------------------------------------
|
||||
|
||||
.PHONY: store store-data fetch fetch-data
|
||||
|
||||
## directories and container names to be used
|
||||
WORK_SRCDIR ?= ${WORKHOME}
|
||||
WORK_DESTDIR ?= ${WORKHOME}
|
||||
WORK_CONTAINER ?= OPUS-MT-train_${notdir ${WORKHOME}}-${WHOAMI}
|
||||
|
||||
## store workdir on allas
|
||||
store:
|
||||
cd ${WORK_SRCDIR} && a-put -b ${WORK_CONTAINER} --nc --follow-links --override ${LANGPAIRSTR}
|
||||
|
||||
## fetch workdir from allas
|
||||
fetch:
|
||||
mkdir -p ${WORK_DESTDIR}
|
||||
cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/${LANGPAIRSTR}.tar
|
||||
# cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/${LANGPAIRSTR}.tar.zst
|
||||
|
||||
|
||||
## store and fetch data dir (raw data files)
|
||||
store-data:
|
||||
cd ${WORK_SRCDIR} && a-put -b ${WORK_CONTAINER} --nc --follow-links --override data
|
||||
|
||||
fetch-data:
|
||||
mkdir -p ${WORK_DESTDIR}
|
||||
cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/data.tar
|
||||
# cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/data.tar.zst
|
||||
|
||||
|
||||
#---------------------------------------------------------------------
|
||||
|
@ -17,7 +17,7 @@ More information about specific tasks:
|
||||
* [Training models](Train.md)
|
||||
* [Testing models](Test.md)
|
||||
* [Running batch jobs](BatchJobs.md)
|
||||
* [Packaging, releases and storage](ReleaseAndStore.md)
|
||||
* [Packaging, releases and storage](ReleaseAndStore.md)
|
||||
* [Models for the Tatoeba MT Challenge](TatoebaChallenge.md)
|
||||
|
||||
|
||||
|
@ -10,6 +10,7 @@ Relevant makefiles:
|
||||
* [lib/env.mk](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/lib/env.mk)
|
||||
* [lib/config.mk](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/lib/config.mk)
|
||||
* [lib/dist.mk](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/lib/dist.mk)
|
||||
* [lib/allas.mk](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/lib/dist.mk)
|
||||
|
||||
|
||||
Main recipes:
|
||||
@ -23,6 +24,7 @@ Main recipes:
|
||||
* `store-data`: store data in `WORKHOME/data` (raw bitexts) on CSC ObjectStore (for internal use)
|
||||
* `fetch`: fetch work files stored on CSC ObjectStore for specified language pair; does not overwrite (for internal use)
|
||||
* `fetch-data`: fetch `WORKHOME/data` from CSC ObjectStore; does not overwrite (for internal use)
|
||||
* `fetch-tiedeman`: same as fetch but retrieve data from user `tiedeman` (for internal use)
|
||||
|
||||
|
||||
Parameters / variables:
|
||||
@ -36,3 +38,57 @@ Parameters / variables:
|
||||
* `WORK_SRCDIR`: top-level work home directory to fetch models from (for `store`), default: `WORKHOME`
|
||||
* `WORK_DESTDIR`: top-level work home directory to store models from (for `fetcg`), default: `WORKHOME`
|
||||
|
||||
|
||||
|
||||
|
||||
## Work data on Allas
|
||||
|
||||
Work directories can be uploaded to allas to safe scratch space and can be fetched later on to continue working on specific models. The makefiles implement some recipes to `store` and `fetch` data files:
|
||||
|
||||
|
||||
* initialise connection to allas
|
||||
|
||||
```
|
||||
module load allas
|
||||
allas-conf
|
||||
```
|
||||
|
||||
Select project_2002688 (OPUS-MT).
|
||||
|
||||
|
||||
|
||||
* store the work dorectory for a specific model:
|
||||
|
||||
```
|
||||
make SRCLANGS=xx TRGLANGS=yy store
|
||||
```
|
||||
|
||||
This will pack and upload the work directory `work/xx-yy`. Note that this overwrites an existing package that might be in the same place! The container will be called `OPUS-MT-train_work_<username>` (`<username>` is set by calling `whoami`).
|
||||
|
||||
|
||||
* fetch a work directory from allas:
|
||||
|
||||
```
|
||||
make SRCLANGS=xx TRGLANGS=yy fetch
|
||||
```
|
||||
|
||||
This retrieves the package from allas (from container `OPUS-MT-train_work_<username>`) and unpacks it in `WORK_DESTDIR`. Note that this recipe does NOT overwrite existing files and will fail if the work directory of the corresponding model already exists. Either delete the existing one first or specify a different destination by setting `WORK_DESTDIR`.
|
||||
|
||||
|
||||
* store and fetch the raw data files (bitexts) from `work/data`
|
||||
|
||||
```
|
||||
make store-data
|
||||
make fetch-data
|
||||
```
|
||||
|
||||
`store-data` overwrites again existing packages on allas but `fetch-data` does not overwrite and breaks if `work/data` already exists.
|
||||
|
||||
|
||||
* fetch work files from user `tiedeman`
|
||||
|
||||
```
|
||||
make SRCLANGS=xx TRGLANGS=yy fetch-tiedeman
|
||||
```
|
||||
|
||||
This is for fetching the work files for the specified model from the container of user `tiedeman`.
|
||||
|
45
lib/allas.mk
Normal file
45
lib/allas.mk
Normal file
@ -0,0 +1,45 @@
|
||||
# -*-makefile-*-
|
||||
#
|
||||
# recipes for interacrting with allas
|
||||
|
||||
|
||||
#---------------------------------------------------------------------
|
||||
# store and fetch workdata
|
||||
# requires module load allas && allas-conf
|
||||
# select project_2002688 (OPUS-MT)
|
||||
# - "make store" overrides
|
||||
# - "make fetch" does not override (delete dir first)
|
||||
# - storing data will resolve symbolic links
|
||||
#---------------------------------------------------------------------
|
||||
|
||||
.PHONY: store store-data fetch fetch-data
|
||||
|
||||
## directories and container names to be used
|
||||
WORK_SRCDIR ?= ${WORKHOME}
|
||||
WORK_DESTDIR ?= ${WORKHOME}
|
||||
WORK_CONTAINER ?= OPUS-MT-train_${notdir ${WORKHOME}}-${WHOAMI}
|
||||
WORK_CONTAINER_JT ?= OPUS-MT-train_${notdir ${WORKHOME}}-tiedeman
|
||||
|
||||
## store workdir on allas
|
||||
store:
|
||||
cd ${WORK_SRCDIR} && a-put -b ${WORK_CONTAINER} --nc --follow-links --override ${LANGPAIRSTR}
|
||||
|
||||
## fetch workdir from allas (user-specific container)
|
||||
fetch:
|
||||
mkdir -p ${WORK_DESTDIR}
|
||||
cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/${LANGPAIRSTR}.tar
|
||||
|
||||
## get it from user tiedeman
|
||||
fetch-tiedeman:
|
||||
mkdir -p ${WORK_DESTDIR}
|
||||
cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER_JT}/${LANGPAIRSTR}.tar
|
||||
|
||||
|
||||
## store and fetch data dir (raw data files)
|
||||
store-data:
|
||||
cd ${WORK_SRCDIR} && a-put -b ${WORK_CONTAINER} --nc --follow-links --override data
|
||||
|
||||
fetch-data:
|
||||
mkdir -p ${WORK_DESTDIR}
|
||||
cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/data.tar
|
||||
|
@ -196,9 +196,10 @@ tools/terashuf/terashuf:
|
||||
${MAKE} -C ${dir $@}
|
||||
|
||||
|
||||
## For Mac users: install protobuf
|
||||
##
|
||||
## sudo port install protobuf3-cpp
|
||||
## For Mac users:
|
||||
## - install protobuf: sudo port install protobuf3-cpp
|
||||
## - install MKL (especially for cpu use):
|
||||
## file:///opt/intel/documentation_2020/en/mkl/ps2020/get_started.htm
|
||||
|
||||
tools/marian-dev/build/marian:
|
||||
mkdir -p ${dir $@}
|
||||
|
Loading…
Reference in New Issue
Block a user