mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2024-11-27 11:03:13 +03:00
moved project makefiles to lib/projects
This commit is contained in:
parent
1a6e29275d
commit
16eef8e45d
18
Makefile
18
Makefile
@ -165,23 +165,7 @@ include lib/allas.mk
|
||||
|
||||
include lib/generic.mk
|
||||
include lib/langsets.mk
|
||||
# include lib/tasks.mk
|
||||
include lib/models/celtic.mk
|
||||
include lib/models/finland.mk
|
||||
include lib/models/fiskmo.mk
|
||||
include lib/models/memad.mk
|
||||
include lib/models/multilingual.mk
|
||||
include lib/models/opus.mk
|
||||
include lib/models/romance.mk
|
||||
include lib/models/russian.mk
|
||||
include lib/models/sami.mk
|
||||
include lib/models/wikimedia.mk
|
||||
include lib/models/wikimatrix.mk
|
||||
|
||||
include lib/models/doclevel.mk
|
||||
include lib/models/simplify.mk
|
||||
|
||||
include lib/models/tatoeba.mk
|
||||
include lib/projects.mk
|
||||
|
||||
|
||||
.PHONY: all
|
||||
|
10
README.md
10
README.md
@ -49,12 +49,12 @@ Essential files for making new models:
|
||||
* `lib/dist.mk`: make packages for distributing models (CSC ObjectStorage based)
|
||||
* `lib/slurm.mk`: submit jobs with SLURM
|
||||
|
||||
There are also make targets for specific models and tasks. Look into `lib/models/` to see what has been defined already.
|
||||
Note that this frequently changes! There is, for example:
|
||||
There are also make targets for specific projects and tasks. Look into `lib/projects/` to see what has been defined already.
|
||||
Note that this frequently changes! Check the file `lib/projects.mk` to see what kind of project files are enabled. There are currently, for example:
|
||||
|
||||
* `lib/models/multilingual.mk`: various multilingual models
|
||||
* `lib/models/celtic.mk`: data and models for Celtic languages
|
||||
* `lib/models/doclevel.mk`: experimental document-level models
|
||||
* `lib/projects/multilingual.mk`: various multilingual models
|
||||
* `lib/projects/celtic.mk`: data and models for Celtic languages
|
||||
* `lib/projects/doclevel.mk`: experimental document-level models
|
||||
|
||||
|
||||
Run this if you want to train a model, for example for translating English to French:
|
||||
|
@ -19,7 +19,6 @@ More information about specific tasks:
|
||||
* [Testing models](Test.md)
|
||||
* [Running batch jobs](BatchJobs.md)
|
||||
* [Packaging, releases and storage](ReleaseAndStore.md)
|
||||
* [Models for the Tatoeba MT Challenge](TatoebaChallenge.md)
|
||||
|
||||
|
||||
Tutorials (to-do)
|
||||
@ -30,17 +29,18 @@ Tutorials (to-do)
|
||||
|
||||
Documentation of project-specific models:
|
||||
|
||||
* [Celtic language models](models/Celtic.md)
|
||||
* [Romance language models](models/Romance.md)
|
||||
* [Russian models](models/Russian.md)
|
||||
* [Sami language models](models/Sami.md)
|
||||
* [Languages in Finland](models/Finland.md)
|
||||
* [Multilingual models](models/Multilingual.md)
|
||||
* [Doc-level models](models/Doclevel.md)
|
||||
* [Simplification models](models/Simplify.md)
|
||||
* [Fiskmö project](models/fiskmo.md)
|
||||
* [MeMAD project](models/memad.md)
|
||||
* [Wikimedia collaboration model](models/Wikimedia.md)
|
||||
* [Models for the Tatoeba MT Challenge](TatoebaChallenge.md)
|
||||
* [Celtic language models](projects/Celtic.md)
|
||||
* [Romance language models](projects/Romance.md)
|
||||
* [Russian models](projects/Russian.md)
|
||||
* [Sami language models](projects/Sami.md)
|
||||
* [Languages in Finland](projects/Finland.md)
|
||||
* [Multilingual models](projects/Multilingual.md)
|
||||
* [Doc-level models](projects/Doclevel.md)
|
||||
* [Simplification models](projects/Simplify.md)
|
||||
* [Fiskmö project](projects/fiskmo.md)
|
||||
* [MeMAD project](projects/memad.md)
|
||||
* [Wikimedia collaboration model](projects/Wikimedia.md)
|
||||
|
||||
|
||||
|
||||
@ -87,25 +87,26 @@ Targets for job management, packaging and other project related tasks:
|
||||
* `lib/misc.mk`: miscellaneuous tasks
|
||||
|
||||
|
||||
Targets for specific models and projects in `lib/models/`, currently:
|
||||
Targets for specific models and projects in `lib/projects/`, currently:
|
||||
|
||||
|
||||
* `lib/models/celtic.mk`: data and models Celtic languages
|
||||
* `lib/models/finland.mk`: main languages spoken in Finland
|
||||
* `lib/models/fiskmo.mk`: models related to the fiskmö project
|
||||
* `lib/models/memad.mk`: models related to the MeMAD project
|
||||
* `lib/models/multilingual.mk`: various multilingual models
|
||||
* `lib/models/opus.mk`: models covering OPUS languages
|
||||
* `lib/models/romance.mk`: Romance languages
|
||||
* `lib/models/russian.mk`: data and models for Russian
|
||||
* `lib/models/sami.mk`: data and models for Sami languages
|
||||
* `lib/models/wikimedia.mk`: models related to WikiMedia collaboration
|
||||
* `lib/models/wikimatrix.mk`: models that include WikiMatrix data
|
||||
* `lib/projects.mk`: high-level makefile that includes enabled projects
|
||||
* `lib/projects/celtic.mk`: data and models Celtic languages
|
||||
* `lib/projects/finland.mk`: main languages spoken in Finland
|
||||
* `lib/projects/fiskmo.mk`: models related to the fiskmö project
|
||||
* `lib/projects/memad.mk`: models related to the MeMAD project
|
||||
* `lib/projects/multilingual.mk`: various multilingual models
|
||||
* `lib/projects/opus.mk`: models covering OPUS languages
|
||||
* `lib/projects/romance.mk`: Romance languages
|
||||
* `lib/projects/russian.mk`: data and models for Russian
|
||||
* `lib/projects/sami.mk`: data and models for Sami languages
|
||||
* `lib/projects/wikimedia.mk`: models related to WikiMedia collaboration
|
||||
* `lib/projects/wikimatrix.mk`: models that include WikiMatrix data
|
||||
|
||||
|
||||
Targets related to the Tatoeba MT Challenge:
|
||||
|
||||
* `lib/models/tatoeba.mk`
|
||||
* `lib/projects/tatoeba.mk`
|
||||
|
||||
|
||||
Scripts for various tasks in `scripts/`:
|
||||
|
@ -2,7 +2,7 @@
|
||||
# Models for the Tatoeba Translation Challenge
|
||||
|
||||
|
||||
This is information about scripts for training and testing models with data from the [Tatoeba Translation Challenge](https://github.com/Helsinki-NLP/Tatoeba-Challenge). The build targets are defined in [lib/models/tatoeba.mk](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/lib/models/tatoeba.mk).
|
||||
This is information about scripts for training and testing models with data from the [Tatoeba Translation Challenge](https://github.com/Helsinki-NLP/Tatoeba-Challenge). The build targets are defined in [lib/projects/tatoeba.mk](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/lib/projects/tatoeba.mk).
|
||||
|
||||
|
||||
## Train and evaluate a single translation pair
|
||||
|
@ -8,7 +8,7 @@ Recipes for training multilingual language models involving Sami languages.
|
||||
Relevant makefiles:
|
||||
|
||||
* [Makefile](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/Makefile)
|
||||
* [lib/models/sami.mk](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/lib/models/sami.mk)
|
||||
* [lib/projects/sami.mk](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/lib/projects/sami.mk)
|
||||
* [backtranslate/Makefile](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/backtranslate/Makefile)
|
||||
|
||||
|
||||
@ -54,7 +54,7 @@ Parameters / variables:
|
||||
|
||||
* `GIELLATEKNO_HOME`: URL for Giellatekno resources (default: https://victorio.uit.no/biggies/trunk)
|
||||
* `GIELLATEKNO_TM_HOME`: directory of translation memories (default: ${GIELLATEKNO_HOME}/mt/omegat)
|
||||
* `GIELLATEKNO_SAMI_TM`: list of translation memories to be downloaded (see [lib/models/sami.mk](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/lib/models/sami.mk))
|
||||
* `GIELLATEKNO_SAMI_TM`: list of translation memories to be downloaded (see [lib/projects/sami.mk](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/lib/projects/sami.mk))
|
||||
|
||||
|
||||
Implicit rules:
|
23
lib/projects.mk
Normal file
23
lib/projects.mk
Normal file
@ -0,0 +1,23 @@
|
||||
# -*-makefile-*-
|
||||
#
|
||||
# project specific recipes
|
||||
# uncomment the projects that you want to activate
|
||||
#
|
||||
|
||||
|
||||
include lib/projects/celtic.mk
|
||||
include lib/projects/finland.mk
|
||||
include lib/projects/fiskmo.mk
|
||||
include lib/projects/memad.mk
|
||||
include lib/projects/multilingual.mk
|
||||
include lib/projects/opus.mk
|
||||
include lib/projects/romance.mk
|
||||
include lib/projects/russian.mk
|
||||
include lib/projects/sami.mk
|
||||
include lib/projects/wikimedia.mk
|
||||
include lib/projects/wikimatrix.mk
|
||||
|
||||
include lib/projects/doclevel.mk
|
||||
include lib/projects/simplify.mk
|
||||
|
||||
include lib/projects/tatoeba.mk
|
@ -143,7 +143,7 @@ ${GIELLATEKNO_SAMI_TM}:
|
||||
|
||||
# FIT_DATA_SIZE=200000 \
|
||||
|
||||
%-sami:
|
||||
%-finno-ugric:
|
||||
${MAKE} DATASET=${DATASET}+giella \
|
||||
HELDOUTSIZE=0 \
|
||||
BPESIZE=4000 \
|
||||
@ -156,6 +156,20 @@ ${GIELLATEKNO_SAMI_TM}:
|
||||
SKIP_LANGPAIRS="en-en|en-et|en-fi|en-nb|en-no|en-nn|en-ru|en-sv|et-et|et-fi|et-nb|et-no|et-nn|et-ru|et-sv|fi-fi|fi-nb|fi-no|fi-nn|fi-ru|fi-sv|nb-nb|nb-no|nb-nn|nb-ru|nb-sv|no-no|no-nn|no-ru|no-sv|nn-nn|nn-ru|nn-sv|ru-ru|ru-sv|sv-sv" \
|
||||
${@:-sami=}
|
||||
|
||||
%-sami:
|
||||
${MAKE} DATASET=${DATASET}+giella \
|
||||
HELDOUTSIZE=0 \
|
||||
BPESIZE=4000 \
|
||||
DEVSET=giella \
|
||||
TESTSET=giella \
|
||||
DEVMINSIZE=100 \
|
||||
EXTRA_TRAINSET="glossary" \
|
||||
SRCLANGS="se sma smj smn sms fi nb no nn ru sv en" \
|
||||
TRGLANGS="se sma smj smn sms fi nb no nn ru sv en" \
|
||||
SKIP_LANGPAIRS="en-en|en-fi|en-nb|en-no|en-nn|en-ru|en-sv|fi-fi|fi-nb|fi-no|fi-nn|fi-ru|fi-sv|nb-nb|nb-no|nb-nn|nb-ru|nb-sv|no-no|no-nn|no-ru|no-sv|nn-nn|nn-ru|nn-sv|ru-ru|ru-sv|sv-sv" \
|
||||
${@:-sami=}
|
||||
|
||||
|
||||
|
||||
%-sami-xx:
|
||||
${MAKE} DATASET=${DATASET}+giella \
|
Loading…
Reference in New Issue
Block a user