mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
add script for testing on Jenkins
This commit is contained in:
parent
3b6a1734df
commit
3f7dd08d0b
2
.gitignore
vendored
2
.gitignore
vendored
@ -36,3 +36,5 @@
|
||||
|
||||
build
|
||||
scripts/docker
|
||||
amunmt-distribution.tar.gz
|
||||
tests/wmt16/*/*
|
||||
|
@ -8,3 +8,6 @@ mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make
|
||||
|
||||
cd ..
|
||||
tar zvcf amunmt-distribution.tar.gz build/bin/* scripts/download_models.py tests/wmt16/Makefile tests/wmt16/extract_segs.py
|
||||
|
59
tests/wmt16/Makefile
Normal file
59
tests/wmt16/Makefile
Normal file
@ -0,0 +1,59 @@
|
||||
DOMAIN=news
|
||||
SRC=de
|
||||
TRG=en
|
||||
|
||||
MOSES_SCRIPTS=/opt/moses/moses-scripts
|
||||
AMUNMT_BIN=../../build/bin
|
||||
|
||||
DIRECTION=$(SRC)-$(TRG)
|
||||
DIR=$(DOMAIN)-$(DIRECTION)
|
||||
|
||||
NUMBER_OF_LINES=99999
|
||||
|
||||
.SECONDARY:
|
||||
|
||||
$(DIR)/bleu.points: $(DIR)/bleu.score
|
||||
perl -ne '/BLEU = (\d+\.\d+)/; print "BLEU\n$$1\n"' < $< > $@
|
||||
|
||||
$(DIR)/bleu.score: $(DIR)/test-$(SRC)$(TRG)-ref.$(TRG).tok $(DIR)/test-$(SRC)$(TRG)-out.$(TRG).tok
|
||||
$(MOSES_SCRIPTS)/generic/multi-bleu.perl $< < $(DIR)/test-$(SRC)$(TRG)-out.$(TRG).tok > $@
|
||||
|
||||
%.$(TRG).tok: %.$(TRG).txt
|
||||
$(MOSES_SCRIPTS)/tokenizer/tokenizer.perl -l $(TRG) -a < $< > $@
|
||||
|
||||
%-out.$(TRG).txt: %-out.$(TRG).bpe
|
||||
sed 's/\@\@ //g' $< | \
|
||||
$(MOSES_SCRIPTS)/recaser/detruecase.perl | \
|
||||
$(MOSES_SCRIPTS)/tokenizer/detokenizer.perl -l $(TRG) > $@
|
||||
|
||||
%-out.$(TRG).bpe: %-src.$(SRC).bpe $(DIR)/config.yml
|
||||
head -n $(NUMBER_OF_LINES) < $< | $(AMUNMT_BIN)/amun -c $(DIR)/config.yml > $@
|
||||
|
||||
%.$(SRC).bpe: %.$(SRC).pre $(DIR)/$(SRC)$(TRG).bpe
|
||||
$(AMUNMT_BIN)/bpe $(DIR)/$(SRC)$(TRG).bpe < $< > $@
|
||||
|
||||
%.$(SRC).pre: %.$(SRC).txt $(DIR)/truecase-model.$(SRC)
|
||||
$(MOSES_SCRIPTS)/tokenizer/normalize-punctuation.perl -l $(SRC) < $< | \
|
||||
$(MOSES_SCRIPTS)/tokenizer/tokenizer.perl -l $(SRC) -a | \
|
||||
$(MOSES_SCRIPTS)/recaser/truecase.perl -model $(DIR)/truecase-model.$(SRC) > $@
|
||||
|
||||
|
||||
$(DIR)/test-$(SRC)$(TRG)-ref.$(TRG).txt: $(DIR)/test-$(SRC)$(TRG)-ref.$(TRG).sgm
|
||||
./extract_segs.py < $< > $@
|
||||
|
||||
$(DIR)/test-$(SRC)$(TRG)-src.$(SRC).txt: $(DIR)/test-$(SRC)$(TRG)-src.$(SRC).sgm
|
||||
./extract_segs.py < $< > $@
|
||||
|
||||
$(DIR)/test-$(SRC)$(TRG)-ref.$(TRG).sgm: $(DIR)/test.tgz
|
||||
tar --to-stdout -zxvf $< test/$(DOMAIN)test2016-$(SRC)$(TRG)-ref.$(TRG).sgm > $@
|
||||
|
||||
$(DIR)/test-$(SRC)$(TRG)-src.$(SRC).sgm: $(DIR)/test.tgz
|
||||
tar --to-stdout -zxvf $< test/$(DOMAIN)test2016-$(SRC)$(TRG)-src.$(SRC).sgm > $@
|
||||
|
||||
$(DIR)/test.tgz:
|
||||
mkdir -p $(DIR)
|
||||
wget 'http://data.statmt.org/wmt16/translation-task/test.tgz' -O $@
|
||||
|
||||
$(DIR)/model.npz $(DIR)/vocab.$(SRC).json $(DIR)/vocab.$(TRG).json $(DIR)/$(SRC)$(TRG).bpe $(DIR)/truecase-model.$(SRC) $(DIR)/config.yml: ../../scripts/download_models.py
|
||||
mkdir -p $(DIR)
|
||||
$< -w $(DIR) -m $(DIRECTION)
|
10
tests/wmt16/extract_segs.py
Executable file
10
tests/wmt16/extract_segs.py
Executable file
@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env python2
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
for line in sys.stdin:
|
||||
m = re.search(ur'<seg id="\d+">(.*)</seg>', line)
|
||||
if m:
|
||||
print m.group(1)
|
Loading…
Reference in New Issue
Block a user