mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-06 19:49:41 +03:00
Merge branch 'perf_moses2' of github.com:hieuhoang/mosesdecoder into perf_moses2
This commit is contained in:
commit
a4cfb60c2c
@ -108,7 +108,7 @@ else {
|
||||
|
||||
# create model
|
||||
|
||||
print "Training Transliteration Module - End ".`date`;
|
||||
print STDERR "Training Transliteration Module - End ".`date`;
|
||||
|
||||
sub learn_transliteration_model{
|
||||
|
||||
@ -116,7 +116,7 @@ sub learn_transliteration_model{
|
||||
|
||||
`cp $OUT_DIR/training/corpus$t.$OUTPUT_EXTENSION $OUT_DIR/lm/target`;
|
||||
|
||||
print "Align Corpus\n";
|
||||
print STDERR "Align Corpus\n";
|
||||
|
||||
`$MOSES_SRC_DIR/scripts/training/train-model.perl \\
|
||||
-mgiza -mgiza-cpus 10 -dont-zip -last-step 1 \\
|
||||
@ -148,7 +148,7 @@ sub learn_transliteration_model{
|
||||
-alignment-file $OUT_DIR/model/aligned \\
|
||||
-alignment-stem $OUT_DIR/model/aligned -alignment grow-diag-final-and`;
|
||||
|
||||
print "Train Translation Models\n";
|
||||
print STDERR "Train Translation Models\n";
|
||||
|
||||
`$MOSES_SRC_DIR/scripts/training/train-model.perl \\
|
||||
-mgiza -mgiza-cpus 10 -dont-zip -first-step 4 -last-step 4 \\
|
||||
@ -175,7 +175,7 @@ sub learn_transliteration_model{
|
||||
-lexical-file $OUT_DIR/model/lex -phrase-translation-table \\
|
||||
$OUT_DIR/model/phrase-table`;
|
||||
|
||||
print "Train Language Models\n";
|
||||
print STDERR "Train Language Models\n";
|
||||
|
||||
`$SRILM_DIR/ngram-count \\
|
||||
-order 5 -interpolate -kndiscount -addsmooth1 0.0 -unk \\
|
||||
@ -184,7 +184,7 @@ sub learn_transliteration_model{
|
||||
`$MOSES_SRC_DIR/bin/build_binary \\
|
||||
$OUT_DIR/lm/targetLM $OUT_DIR/lm/targetLM.bin`;
|
||||
|
||||
print "Create Config File\n";
|
||||
print STDERR "Create Config File\n";
|
||||
|
||||
`$MOSES_SRC_DIR/scripts/training/train-model.perl \\
|
||||
-mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \\
|
||||
@ -216,7 +216,7 @@ sub train_transliteration_module{
|
||||
|
||||
`mkdir $OUT_DIR/model`;
|
||||
`mkdir $OUT_DIR/lm`;
|
||||
print "Preparing Corpus\n";
|
||||
print STDERR "Preparing Corpus\n";
|
||||
`$MOSES_SRC_DIR/scripts/Transliteration/corpusCreator.pl $OUT_DIR 1-1.$INPUT_EXTENSION-$OUTPUT_EXTENSION.mined-pairs $INPUT_EXTENSION $OUTPUT_EXTENSION`;
|
||||
|
||||
if (-e "$OUT_DIR/training/corpusA.$OUTPUT_EXTENSION")
|
||||
@ -228,7 +228,7 @@ sub train_transliteration_module{
|
||||
learn_transliteration_model("");
|
||||
}
|
||||
|
||||
print "Running Tuning for Transliteration Module\n";
|
||||
print STDERR "Running Tuning for Transliteration Module\n";
|
||||
|
||||
`touch $OUT_DIR/tuning/moses.table.ini`;
|
||||
|
||||
@ -258,12 +258,12 @@ my $count = 0;
|
||||
my $l1 = 1;
|
||||
my $l2 = 1;
|
||||
|
||||
print "Creating Model\n";
|
||||
print STDERR "Creating Model\n";
|
||||
|
||||
print "Extracting 1-1 Alignments\n";
|
||||
print STDERR "Extracting 1-1 Alignments\n";
|
||||
`$MOSES_SRC_DIR/bin/1-1-Extraction $OUT_DIR/f $OUT_DIR/e $OUT_DIR/a > $OUT_DIR/1-1.$inp_ext-$op_ext`;
|
||||
|
||||
print "Cleaning the list for Miner\n";
|
||||
print STDERR "Cleaning the list for Miner\n";
|
||||
|
||||
`$MOSES_SRC_DIR/scripts/Transliteration/clean.pl $OUT_DIR/1-1.$inp_ext-$op_ext > $OUT_DIR/1-1.$inp_ext-$op_ext.cleaned`;
|
||||
|
||||
@ -274,11 +274,11 @@ print "Cleaning the list for Miner\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
print "Extracting Transliteration Pairs \n";
|
||||
print STDERR "Extracting Transliteration Pairs \n";
|
||||
`$MOSES_SRC_DIR/bin/TMining $OUT_DIR/1-1.$inp_ext-$op_ext.cleaned > $OUT_DIR/1-1.$inp_ext-$op_ext.pair-probs`;
|
||||
}
|
||||
|
||||
print "Selecting Transliteration Pairs with threshold 0.5 \n";
|
||||
print STDERR "Selecting Transliteration Pairs with threshold 0.5 \n";
|
||||
`echo 0.5 | $MOSES_SRC_DIR/scripts/Transliteration/threshold.pl $OUT_DIR/1-1.$inp_ext-$op_ext.pair-probs > $OUT_DIR/1-1.$inp_ext-$op_ext.mined-pairs`;
|
||||
|
||||
}
|
||||
@ -289,7 +289,7 @@ sub reduce_factors {
|
||||
|
||||
my @INCLUDE = sort {$a <=> $b} split(/,/,$factors);
|
||||
|
||||
print "Reducing factors to produce $reduced @ ".`date`;
|
||||
print STDERR "Reducing factors to produce $reduced @ ".`date`;
|
||||
while(-e $reduced.".lock") {
|
||||
sleep(10);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user