mirror of
https://github.com/Helsinki-NLP/OPUS-MT-train.git
synced 2024-11-27 11:03:13 +03:00
small fixes for tatoeba models
This commit is contained in:
parent
c421fbdb15
commit
5beb4e58aa
@ -53,9 +53,21 @@ elg-eval:
|
||||
done
|
||||
for l in ${ELG_EU_SELECTED_MULTILANG}; do \
|
||||
${MAKE} MODELTYPE=transformer-big SRCLANGS="$$l" TRGLANGS=eng eval-bt-tatoeba; \
|
||||
${MAKE} MODELTYPE=transformer-big SRCLANGS="$$l" TRGLANGS=eng tatoeba-multilingual-eval-bt; \
|
||||
${MAKE} MODELTYPE=transformer-big SRCLANGS="$$l" TRGLANGS=eng eval-testsets-bt-tatoeba; \
|
||||
${MAKE} MODELTYPE=transformer-big TRGLANGS="$$l" SRCLANGS=eng eval-bt-tatoeba; \
|
||||
${MAKE} MODELTYPE=transformer-big TRGLANGS="$$l" SRCLANGS=eng tatoeba-multilingual-eval-bt; \
|
||||
${MAKE} MODELTYPE=transformer-big TRGLANGS="$$l" SRCLANGS=eng eval-testsets-bt-tatoeba; \
|
||||
done
|
||||
|
||||
## only separate languages in multilingual models (set of individual languages)
|
||||
elg-multieval:
|
||||
for l in ${ELG_EU_SELECTED_MULTILANG}; do \
|
||||
${MAKE} MODELTYPE=transformer-big SRCLANGS="$$l" TRGLANGS=eng tatoeba-multilingual-eval-bt; \
|
||||
${MAKE} MODELTYPE=transformer-big TRGLANGS="$$l" SRCLANGS=eng tatoeba-multilingual-eval-bt; \
|
||||
done
|
||||
|
||||
# multieval-bt-tatoeba; \
|
||||
|
||||
|
||||
elg-eng2all:
|
||||
@ -128,6 +140,16 @@ elg-eng2cel:
|
||||
CLEAN_DEVDATA_TYPE=clean \
|
||||
tatoeba-eng2cel-trainjob-bt
|
||||
|
||||
elg-por2eng:
|
||||
${MAKE} MODELTYPE=transformer-big \
|
||||
MARIAN_EXTRA=--no-restore-corpus \
|
||||
DATA_PREPARE_HPCPARAMS='CPUJOB_HPC_CORES=2 CPUJOB_HPC_MEM=16g CPUJOB_HPC_DISK=1000' \
|
||||
tatoeba-por2eng-trainjob-bt
|
||||
|
||||
elg-lav2eng:
|
||||
${MAKE} MODELTYPE=transformer-big \
|
||||
MARIAN_EXTRA=--no-restore-corpus \
|
||||
tatoeba-lav2eng-trainjob-bt
|
||||
|
||||
elg-ara2eng:
|
||||
${MAKE} MODELTYPE=transformer-big \
|
||||
|
@ -273,14 +273,18 @@ MAX_TRGLANGS ?= 7000
|
||||
# find-langgroup = $(filter ${OPUS_LANGS3},\
|
||||
# $(sort ${shell langgroup $(1) | xargs iso639 -m -n} ${1} ${2}))
|
||||
|
||||
find-langgroup = $(filter $(sort ${shell langgroup $(1) | xargs iso639 -m -n} ${1} ${2}),${TATOEBA_LANGS})
|
||||
|
||||
## OLD (singleonly - only allows single languages)
|
||||
## NEW: also splits on '+' to allow for multiple languages
|
||||
|
||||
find-langgroup-singleonly = $(filter $(sort ${shell langgroup $(1) | xargs iso639 -m -n} ${1} ${2}),${TATOEBA_LANGS})
|
||||
find-langgroup = $(filter $(sort ${shell langgroup $(subst +, ,$(1)) | xargs iso639 -m -n} $(subst +, ,${1}) ${2}),${TATOEBA_LANGS})
|
||||
find-srclanggroup = $(call find-langgroup,$(firstword ${subst -, ,${subst 2, ,${1}}}),${2})
|
||||
find-trglanggroup = $(call find-langgroup,$(lastword ${subst -, ,${subst 2, ,${1}}}),${2})
|
||||
|
||||
find-langgroup-pair = $(sort $(call find-srclanggroup,${1}) $(call find-trglanggroup,${1}) ${2})
|
||||
|
||||
|
||||
|
||||
## print languages in this set
|
||||
tatoeba-%-langs:
|
||||
-( s=$(firstword $(subst 2, ,$(patsubst tatoeba-%-langs,%,$@))); \
|
||||
|
Loading…
Reference in New Issue
Block a user