fixed problem with missing link in reverse-data

This commit is contained in:
Joerg Tiedemann 2020-04-19 01:08:26 +03:00
parent 58f042d127
commit e5e58d1a37
3 changed files with 30 additions and 1 deletions

View File

@ -393,7 +393,7 @@ ${WORKDIR}/config:
echo "MARIAN_DROPOUT = 0.5" >> $@; \
echo "MARIAN_VALID_MINI_BATCH = 4" >> $@; \
echo "HELDOUTSIZE = 0" >> $@; \
echo "BPESIZE = 500" >> $@; \
echo "BPESIZE = 1000" >> $@; \
echo "DEVSIZE = 500" >> $@; \
echo "TESTSIZE = 1000" >> $@; \
echo "DEVMINSIZE = 100" >> $@; \

View File

@ -115,6 +115,8 @@ ifeq (${words ${TRGLANGS}},1)
ln -s ${DEV_SRC} ${REV_WORKDIR}/val/${notdir ${DEV_TRG}}; \
ln -s ${DEV_TRG} ${REV_WORKDIR}/val/${notdir ${DEV_SRC}}; \
ln -s ${DEV_SRC}.shuffled.gz ${REV_WORKDIR}/val/${notdir ${DEV_SRC}.shuffled.gz}; \
ln -s ${DEV_SRC}.notused.gz ${REV_WORKDIR}/val/${notdir ${DEV_TRG}.notused.gz}; \
ln -s ${DEV_TRG}.notused.gz ${REV_WORKDIR}/val/${notdir ${DEV_SRC}.notused.gz}; \
fi
-if [ -e ${TEST_SRC} ]; then \
mkdir -p ${REV_WORKDIR}/test; \

View File

@ -946,3 +946,30 @@ germanic-romance:
SRCLANGS="${GERMANIC}" train.submit-multigpu
## fix a problem with missing links from reverse-data
## --> this caused that models with bt data used less data
## --> need to restart those!
fix-missing-val:
for f in `find work/ -type l -name '*.shuffled.gz' | grep -v old | sed 's/.src.shuffled.gz//'`; do \
if [ ! -e $$f.src.notused.gz ]; then \
echo "missing $$f.src.notused.gz!"; \
s=`echo $$f | cut -f2 -d'/' | cut -f1 -d'-'`; \
t=`echo $$f | cut -f2 -d'/' | cut -f2 -d'-'`; \
d=`echo $$f | cut -f4 -d'/'`; \
if [ -e work/$$t-$$s/val/$$d.trg.notused.gz ]; then \
echo "linking ${PWD}/work/$$t-$$s/val/$$d.trg.notused.gz"; \
ln -s ${PWD}/work/$$t-$$s/val/$$d.trg.notused.gz $$f.src.notused.gz; \
ln -s ${PWD}/work/$$t-$$s/val/$$d.src.notused.gz $$f.trg.notused.gz; \
if [ `ls work/$$s-$$t/opus*+bt*valid1.log 2>/dev/null | wc -l` -gt 0 ]; then \
echo "opus+bt model exists! move it away!"; \
mkdir work/$$s-$$t/old-bt-model; \
mv work/$$s-$$t/*+bt* work/$$s-$$t/old-bt-model/; \
fi; \
fi; \
fi \
done