allas storage commands

This commit is contained in:
Joerg Tiedemann 2021-11-04 09:57:48 +02:00
parent dbcdab4d6b
commit 08ed366914
3 changed files with 61 additions and 1 deletions

View File

@ -378,3 +378,22 @@ train-and-start-bt-jobs: ${WORKDIR}/${MODEL}.${MODELTYPE}.model${NR}.done
${MAKE} local-dist
${MAKE} -C backtranslate MODELHOME=${MODELDIR} translate-all-wikis-jobs
ALL_RELEASED_MODELS = ${wildcard models-tatoeba/*/*.zip}
ALL_VOCABS_FIXED = ${patsubst %.zip,%.fixed-vocab,${ALL_RELEASED_MODELS}}
fix-released-vocabs: ${ALL_VOCABS_FIXED}
%.fixed-vocab: %.zip
@( v=`unzip -l $< | grep 'vocab.yml$$' | sed 's/^.* //'`; \
if [ "$$v" != "" ]; then \
unzip $< $$v; \
python3 scripts/fix_vocab.py $$v; \
if [ -e $$v.bak ]; then \
echo "update $$v in $<"; \
zip $< $$v $$v.bak; \
else \
echo "vocab $$v is fine in $<"; \
fi; \
rm -f $$v $$v.bak; \
fi )

View File

@ -20,6 +20,9 @@ WORK_DESTDIR ?= ${WORKHOME}
WORK_CONTAINER ?= OPUS-MT-train_${notdir ${WORKHOME}}-${WHOAMI}
WORK_CONTAINER_JT ?= OPUS-MT-train_${notdir ${WORKHOME}}-tiedeman
ALLAS_STORAGE_URL = https://object.pouta.csc.fi/
## store workdir on allas
store:
cd ${WORK_SRCDIR} && a-put -b ${WORK_CONTAINER} --nc --follow-links --override ${LANGPAIRSTR}
@ -43,3 +46,41 @@ fetch-data:
mkdir -p ${WORK_DESTDIR}
cd ${WORK_DESTDIR} && a-get ${WORK_CONTAINER}/data.tar
## generic recipe for storing work data and removing it from the file system
## DANGEROUS --- this really deletes the data!
## NOTE: makes container also world-readable (see swift post command)
## --> this makes it easier to fetch things without login credentials
## --> should not store sensitive data here!
%.stored: %
if [ "$(firstword $(subst -, ,$(subst /, ,$@)))" == "work" ]; then \
b=OPUS-MT-train_$(subst /,-,$(dir $@))${WHOAMI}; \
cd $(dir $@); \
a-put -b $$b --nc --follow-links --override $(notdir $<); \
rm -fr $(notdir $<); \
touch $(notdir $@); \
rm -f $(notdir $(@:stored=.fetched)); \
swift post $$b --read-acl ".r:*"
fi
## TODO: fetch with wget instead of using a-commands
## fetch work data from allas
%.fetched:
if [ "$(firstword $(subst -, ,$(subst /, ,$@)))" == "work" ]; then \
cd $(dir $@); \
a-get OPUS-MT-train_$(subst /,-,$(dir $@))${WHOAMI}/$(notdir $(@:.fetched=.tar)); \
touch $(notdir $@); \
rm -f $(notdir $(@:fetched=.stored)); \
fi
## another way of fetching work data
## requires settings SRCLANGS and TRGLANGS (or LANGPAIRSTR directly)
work-%/${LANGPAIRSTR}:
mkdir -p $(dir $@)
cd $(dir $@) && a-get OPUS-MT-train_$(subst /,-,$(dir $@))${WHOAMI}/${LANGPAIRSTR}.tar
UPLOAD_MODELS=$(patsubst %,%.stored,${wildcard work-tatoeba/[dg-rt-z]*})
upload-workfiles: ${UPLOAD_MODELS}

View File

@ -10,7 +10,7 @@ filename = sys.argv[1]
try:
input = open(filename, 'r')
yaml.load(input)
yaml.safe_load(input)
except:
print('YAML file is broken - try to fix it!')
print(f'copy {filename} to {filename}.bak')