Merge branch 'perf_moses2' of github.com:hieuhoang/mosesdecoder into perf_moses2

This commit is contained in:
Hieu Hoang 2016-09-29 16:11:46 +01:00
commit a4cfb60c2c

View File

@ -108,7 +108,7 @@ else {
# create model
print "Training Transliteration Module - End ".`date`;
print STDERR "Training Transliteration Module - End ".`date`;
sub learn_transliteration_model{
@ -116,7 +116,7 @@ sub learn_transliteration_model{
`cp $OUT_DIR/training/corpus$t.$OUTPUT_EXTENSION $OUT_DIR/lm/target`;
print "Align Corpus\n";
print STDERR "Align Corpus\n";
`$MOSES_SRC_DIR/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -last-step 1 \\
@ -148,7 +148,7 @@ sub learn_transliteration_model{
-alignment-file $OUT_DIR/model/aligned \\
-alignment-stem $OUT_DIR/model/aligned -alignment grow-diag-final-and`;
print "Train Translation Models\n";
print STDERR "Train Translation Models\n";
`$MOSES_SRC_DIR/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 4 -last-step 4 \\
@ -175,7 +175,7 @@ sub learn_transliteration_model{
-lexical-file $OUT_DIR/model/lex -phrase-translation-table \\
$OUT_DIR/model/phrase-table`;
print "Train Language Models\n";
print STDERR "Train Language Models\n";
`$SRILM_DIR/ngram-count \\
-order 5 -interpolate -kndiscount -addsmooth1 0.0 -unk \\
@ -184,7 +184,7 @@ sub learn_transliteration_model{
`$MOSES_SRC_DIR/bin/build_binary \\
$OUT_DIR/lm/targetLM $OUT_DIR/lm/targetLM.bin`;
print "Create Config File\n";
print STDERR "Create Config File\n";
`$MOSES_SRC_DIR/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \\
@ -216,7 +216,7 @@ sub train_transliteration_module{
`mkdir $OUT_DIR/model`;
`mkdir $OUT_DIR/lm`;
print "Preparing Corpus\n";
print STDERR "Preparing Corpus\n";
`$MOSES_SRC_DIR/scripts/Transliteration/corpusCreator.pl $OUT_DIR 1-1.$INPUT_EXTENSION-$OUTPUT_EXTENSION.mined-pairs $INPUT_EXTENSION $OUTPUT_EXTENSION`;
if (-e "$OUT_DIR/training/corpusA.$OUTPUT_EXTENSION")
@ -228,7 +228,7 @@ sub train_transliteration_module{
learn_transliteration_model("");
}
print "Running Tuning for Transliteration Module\n";
print STDERR "Running Tuning for Transliteration Module\n";
`touch $OUT_DIR/tuning/moses.table.ini`;
@ -258,12 +258,12 @@ my $count = 0;
my $l1 = 1;
my $l2 = 1;
print "Creating Model\n";
print STDERR "Creating Model\n";
print "Extracting 1-1 Alignments\n";
print STDERR "Extracting 1-1 Alignments\n";
`$MOSES_SRC_DIR/bin/1-1-Extraction $OUT_DIR/f $OUT_DIR/e $OUT_DIR/a > $OUT_DIR/1-1.$inp_ext-$op_ext`;
print "Cleaning the list for Miner\n";
print STDERR "Cleaning the list for Miner\n";
`$MOSES_SRC_DIR/scripts/Transliteration/clean.pl $OUT_DIR/1-1.$inp_ext-$op_ext > $OUT_DIR/1-1.$inp_ext-$op_ext.cleaned`;
@ -274,11 +274,11 @@ print "Cleaning the list for Miner\n";
}
else
{
print "Extracting Transliteration Pairs \n";
print STDERR "Extracting Transliteration Pairs \n";
`$MOSES_SRC_DIR/bin/TMining $OUT_DIR/1-1.$inp_ext-$op_ext.cleaned > $OUT_DIR/1-1.$inp_ext-$op_ext.pair-probs`;
}
print "Selecting Transliteration Pairs with threshold 0.5 \n";
print STDERR "Selecting Transliteration Pairs with threshold 0.5 \n";
`echo 0.5 | $MOSES_SRC_DIR/scripts/Transliteration/threshold.pl $OUT_DIR/1-1.$inp_ext-$op_ext.pair-probs > $OUT_DIR/1-1.$inp_ext-$op_ext.mined-pairs`;
}
@ -289,7 +289,7 @@ sub reduce_factors {
my @INCLUDE = sort {$a <=> $b} split(/,/,$factors);
print "Reducing factors to produce $reduced @ ".`date`;
print STDERR "Reducing factors to produce $reduced @ ".`date`;
while(-e $reduced.".lock") {
sleep(10);
}