From f1635184222857f319528082f6c2c5361b5bfb6b Mon Sep 17 00:00:00 2001 From: Joerg Tiedemann Date: Mon, 14 Mar 2022 18:29:44 +0200 Subject: [PATCH] changed gitraw --- lib/projects/distill.mk | 3 +++ lib/projects/elg.mk | 40 ++++++++++++++++++++++++++++++++- tatoeba/back-translate/Makefile | 3 ++- tatoeba/pivoting/Makefile | 3 ++- 4 files changed, 46 insertions(+), 3 deletions(-) diff --git a/lib/projects/distill.mk b/lib/projects/distill.mk index c583f548..ce7b2759 100644 --- a/lib/projects/distill.mk +++ b/lib/projects/distill.mk @@ -105,6 +105,9 @@ test-quantized-all-finetuned-student: %-small-student: ${MAKE} MODELTYPE=transformer-small-align ${@:small-student=student} +%-base-student: + ${MAKE} MODELTYPE=transformer-base-align ${@:base-student=student} + diff --git a/lib/projects/elg.mk b/lib/projects/elg.mk index d74ba95c..eb84b3e5 100644 --- a/lib/projects/elg.mk +++ b/lib/projects/elg.mk @@ -66,11 +66,49 @@ engukr-quantize-student: ## special thing: student models with pivot-based data (does that work?) +## --> does not work very well ... elg-ukr-students: for l in bul dan deu fin hun nob ron swe slk tur; do \ - ${MAKE} STUDENT_DATA=pft-nopar SRCLANGS=ukr TRGLANGS=$$l train-tiny11-student; \ + ${MAKE} STUDENT_DATA=ftmono-pft-nopar SRCLANGS=ukr TRGLANGS=$$l train-tiny11-student; \ done + + + +## tiny11 transformer model for finnish with pivot data (reuse student recipes) +elg-fin2ukr-tiny11: + ${MAKE} STUDENT_DATA=pft-pbt SRCLANGS=fin TRGLANGS=ukr MARIAN_EXTRA=--no-restore-corpus train-tiny11-student + +elg-ukr2fin-tiny11: + ${MAKE} STUDENT_DATA=pft-pbt SRCLANGS=ukr TRGLANGS=fin train-tiny11-student + + +## tiny11 transformer model for finnish with pivot data (reuse student recipes) +elg-hun2ukr-tiny11: + ${MAKE} STUDENT_DATA=pft-pbt-bt SRCLANGS=hun TRGLANGS=ukr MARIAN_EXTRA=--no-restore-corpus train-tiny11-student + +elg-ukr2hun-tiny11: + ${MAKE} STUDENT_DATA=pft-pbt-bt SRCLANGS=ukr TRGLANGS=hun train-tiny11-student + + +elg-ron2ukr-tiny11: + ${MAKE} STUDENT_DATA=pft-pbt-bt SRCLANGS=ron TRGLANGS=ukr train-tiny11-student + +elg-ukr2ron-tiny11: + ${MAKE} STUDENT_DATA=pft-pbt-bt SRCLANGS=ukr TRGLANGS=ron train-tiny11-student + +elg-swe2ukr-tiny11: + ${MAKE} STUDENT_DATA=pft-pbt-bt SRCLANGS=swe TRGLANGS=ukr train-tiny11-student + +elg-ukr2swe-tiny11: + ${MAKE} STUDENT_DATA=pft-pbt-bt SRCLANGS=ukr TRGLANGS=swe train-tiny11-student + + + + + + + elg-ukr-students-test: ${MAKE} STUDENT_DATA=ftmono-pft-nopar SRCLANGS=ukr TRGLANGS=deu train-tiny11-student ${MAKE} STUDENT_DATA=ftmono-pft-nopar SRCLANGS=ukr TRGLANGS=hun train-tiny11-student diff --git a/tatoeba/back-translate/Makefile b/tatoeba/back-translate/Makefile index 5e6f73c9..55d12ba3 100644 --- a/tatoeba/back-translate/Makefile +++ b/tatoeba/back-translate/Makefile @@ -23,7 +23,8 @@ TRG = eng TATOEBA_RELEASE = v2020-07-28 TATOEBA_STORAGE = https://object.pouta.csc.fi/Tatoeba-Challenge-${TATOEBA_RELEASE} TATOEBA_WIKI_STORAGE = https://object.pouta.csc.fi/Tatoeba-Challenge-WikiShuffled -TATOEBA_GITRAW = https://raw.githubusercontent.com/Helsinki-NLP/Tatoeba-Challenge/master +# TATOEBA_GITRAW = https://raw.githubusercontent.com/Helsinki-NLP/Tatoeba-Challenge/master +TATOEBA_GITRAW = https://github.com/Helsinki-NLP/Tatoeba-Challenge/raw TATOEBA_RELEASED = ${TATOEBA_GITRAW}/models/released-model-results.txt TATOEBA_RELEASED_ALL = ${TATOEBA_GITRAW}/models/released-model-results-all.txt TATOEBA_RELEASED_BT = https://object.pouta.csc.fi/Tatoeba-MT-bt/released-data.txt diff --git a/tatoeba/pivoting/Makefile b/tatoeba/pivoting/Makefile index e230d49a..274baba3 100644 --- a/tatoeba/pivoting/Makefile +++ b/tatoeba/pivoting/Makefile @@ -30,7 +30,8 @@ MARIAN_WORKSPACE=12000 TATOEBA_VERSION ?= v2021-08-07 TATOEBA_VERSION_NOHYPHEN ?= $(subst -,,${TATOEBA_VERSION}) -TATOEBA_GITRAW = https://raw.githubusercontent.com/Helsinki-NLP/Tatoeba-Challenge/master +# TATOEBA_GITRAW = https://raw.githubusercontent.com/Helsinki-NLP/Tatoeba-Challenge/master +TATOEBA_GITRAW = https://github.com/Helsinki-NLP/Tatoeba-Challenge/raw TATOEBA_RELEASED = ${TATOEBA_GITRAW}/models/released-model-results-all.txt TATOEBA_RELEASED_BT = https://object.pouta.csc.fi/Tatoeba-MT-bt/released-data.txt TATOEBA_MODEL_STORAGE = https://object.pouta.csc.fi/Tatoeba-MT-models