mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-28 14:32:38 +03:00
Merge branch 'master' of github.com:moses-smt/mosesdecoder
This commit is contained in:
commit
d7ed014c5d
@ -687,7 +687,6 @@ public:
|
||||
checks if a feature function should be evaluated given the
|
||||
current weight setting */
|
||||
bool IsDecodingGraphIgnored( const size_t id ) const {
|
||||
std::cerr << "IsFeatureFunctionIgnored( " << id << " )" << std::endl;
|
||||
if (!GetHasAlternateWeightSettings()) {
|
||||
return false;
|
||||
}
|
||||
@ -697,7 +696,6 @@ public:
|
||||
return false;
|
||||
}
|
||||
const std::set< size_t > &ignoreDP = lookupIgnoreDP->second;
|
||||
std::cerr << "IsFeatureFunctionIgnored( " << id << " ) = " << ignoreDP.count( id ) << std::endl;
|
||||
return ignoreDP.count( id );
|
||||
}
|
||||
|
||||
|
@ -415,7 +415,7 @@ score-settings = "--GoodTuring"
|
||||
|
||||
### sparse lexical features
|
||||
#
|
||||
#sparse-lexical-features = "target-word-insertion top 50, source-word-deletion top 50, word-translation top 50 50, phrase-length"
|
||||
#sparse-features = "target-word-insertion top 50, source-word-deletion top 50, word-translation top 50 50, phrase-length"
|
||||
|
||||
### domain adaptation settings
|
||||
# options: sparse, any of: indicator, subset, ratio
|
||||
|
@ -434,7 +434,7 @@ score-settings = "--GoodTuring"
|
||||
|
||||
### sparse lexical features
|
||||
#
|
||||
#sparse-lexical-features = "target-word-insertion top 50, source-word-deletion top 50, word-translation top 50 50, phrase-length"
|
||||
#sparse-features = "target-word-insertion top 50, source-word-deletion top 50, word-translation top 50 50, phrase-length"
|
||||
|
||||
### domain adaptation settings
|
||||
# options: sparse, any of: indicator, subset, ratio
|
||||
|
@ -413,7 +413,7 @@ score-settings = "--GoodTuring"
|
||||
|
||||
### sparse lexical features
|
||||
#
|
||||
#sparse-lexical-features = "target-word-insertion top 50, source-word-deletion top 50, word-translation top 50 50, phrase-length"
|
||||
#sparse-features = "target-word-insertion top 50, source-word-deletion top 50, word-translation top 50 50, phrase-length"
|
||||
|
||||
### domain adaptation settings
|
||||
# options: sparse, any of: indicator, subset, ratio
|
||||
|
@ -417,7 +417,7 @@ score-settings = "--GoodTuring"
|
||||
|
||||
### sparse lexical features
|
||||
#
|
||||
#sparse-lexical-features = "target-word-insertion top 50, source-word-deletion top 50, word-translation top 50 50, phrase-length"
|
||||
#sparse-features = "target-word-insertion top 50, source-word-deletion top 50, word-translation top 50 50, phrase-length"
|
||||
|
||||
### domain adaptation settings
|
||||
# options: sparse, any of: indicator, subset, ratio
|
||||
|
@ -398,7 +398,7 @@ score-settings = "--GoodTuring"
|
||||
|
||||
### sparse lexical features
|
||||
#
|
||||
#sparse-lexical-features = "target-word-insertion top 50, source-word-deletion top 50, word-translation top 50 50, phrase-length"
|
||||
#sparse-features = "target-word-insertion top 50, source-word-deletion top 50, word-translation top 50 50, phrase-length"
|
||||
|
||||
### domain adaptation settings
|
||||
# options: sparse, any of: indicator, subset, ratio
|
||||
|
@ -576,7 +576,7 @@ build-sparse
|
||||
out: sparse
|
||||
ignore-unless: sparse-features
|
||||
rerun-on-change: sparse-features
|
||||
default-name: model/most-frequent-words
|
||||
default-name: model/sparse-features
|
||||
template: $moses-script-dir/ems/support/build-sparse-features.perl IN $input-extension $output-extension OUT "$sparse-features"
|
||||
create-config
|
||||
in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains INTERPOLATED-LM:binlm LM:binlm
|
||||
|
@ -46,20 +46,20 @@ foreach my $feature_spec (split(/,\s*/,$specification)) {
|
||||
$ini .= "\n";
|
||||
}
|
||||
elsif ($SPEC[0] eq 'word-translation') {
|
||||
$ini .= "WordTranslationFeature input-factor=0 output-factor=0 simple=1 source-context=0 target-context=0";
|
||||
|
||||
my $extra_ini = "";
|
||||
if ($SPEC[1] eq 'top' && $SPEC[2] =~ /^\d+$/ && $SPEC[3] =~ /^\d+$/) {
|
||||
my $file_in = &create_top_words($input_extension, $SPEC[2]);
|
||||
my $file_out = &create_top_words($output_extension, $SPEC[3]);
|
||||
$ini .= " source-path=$file_in target-path=$file_out"
|
||||
$extra_ini .= " source-path=$file_in target-path=$file_out"
|
||||
}
|
||||
elsif ($SPEC[1] eq 'all') {
|
||||
|
||||
# nothing to specify
|
||||
}
|
||||
else {
|
||||
die("ERROR: Unknown parameter specification in '$feature_spec'\n");
|
||||
die("ERROR: Unknown parameter specification in '$SPEC[1]'\n");
|
||||
}
|
||||
$ini .= "\n";
|
||||
my ($input_factor,$output_factor) = split(/\-/,$factor);
|
||||
$ini .= "WordTranslationFeature input-factor=$input_factor output-factor=$output_factor simple=1 source-context=0 target-context=0$extra_ini\n";
|
||||
}
|
||||
elsif ($SPEC[0] eq 'phrase-length') {
|
||||
$ini .= "PhraseLengthFeature\n";
|
||||
|
@ -135,7 +135,7 @@ sub interpolate {
|
||||
die "Failed to mix models: $mixerr" if $mixexitcode != 0;
|
||||
my $mix = $mixout;
|
||||
`rm $tmp/iplm.$$.*`;
|
||||
$mix =~ /best lambda \(([\d\. ]+)\)/ || die("ERROR: computing lambdas failed: $mix");
|
||||
$mix =~ /best lambda \(([\d\. e-]+)\)/ || die("ERROR: computing lambdas failed: $mix");
|
||||
my @LAMBDA = split(/ /,$1);
|
||||
|
||||
# create new language model
|
||||
|
169
scripts/training/convert-moses-ini-to-v2.perl
Executable file
169
scripts/training/convert-moses-ini-to-v2.perl
Executable file
@ -0,0 +1,169 @@
|
||||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
my $header = "";
|
||||
my @INI = <STDIN>;
|
||||
|
||||
my %TTABLE_IMPLEMENTATION = ( 0 => "PhraseDictionaryMemory",
|
||||
1 => "PhraseDictionaryBinary" );
|
||||
my %LM_IMPLEMENTATION = ( 0 => "SRILM",
|
||||
8 => "KENLM lazyken=0" );
|
||||
|
||||
|
||||
my (%FEATURE,%WEIGHT);
|
||||
my $i=0;
|
||||
|
||||
for(; $i<scalar(@INI); $i++) {
|
||||
my $line = $INI[$i];
|
||||
if ($line =~ /^\[(.+)\]/) {
|
||||
my $section = $1;
|
||||
if ($section eq "ttable-file" ||
|
||||
$section eq "distortion-file" ||
|
||||
$section eq "generation-file" ||
|
||||
$section eq "lmodel-file" ||
|
||||
$section eq "ttable-limit" ||
|
||||
$section eq "target-word-insertion-feature" ||
|
||||
$section eq "source-word-deletion-feature" ||
|
||||
$section eq "word-translation-feature" ||
|
||||
$section eq "phrase-length-feature") {
|
||||
$FEATURE{$section} = &get_data();
|
||||
}
|
||||
elsif ($section =~ /weight-(.+)/ && $section ne "weight-file") {
|
||||
$WEIGHT{$1} = &get_data();
|
||||
}
|
||||
elsif ($section eq "report-sparse-features") {}
|
||||
else {
|
||||
print STDERR "include section [$section] verbatim.\n";
|
||||
print $header.$line;
|
||||
my $SECTION = &get_data();
|
||||
foreach (@{$SECTION}) {
|
||||
print $_."\n";
|
||||
}
|
||||
}
|
||||
$header = "";
|
||||
}
|
||||
else {
|
||||
$header .= $line;
|
||||
}
|
||||
}
|
||||
print $header;
|
||||
|
||||
my ($feature,$weight) = ("","");
|
||||
$feature .= "UnknownWordPenalty\n";
|
||||
$weight .= "UnknownWordPenalty0= 1\n";
|
||||
|
||||
$feature .= "WordPenalty\n";
|
||||
$weight .= "WordPenalty0= ".$WEIGHT{"w"}[0]."\n";
|
||||
|
||||
$feature .= "Distortion\n";
|
||||
$weight .= "Distortion0= ".$WEIGHT{"d"}[0]."\n";
|
||||
|
||||
foreach my $section (keys %FEATURE) {
|
||||
if ($section eq "phrase-length-feature") {
|
||||
$feature .= "PhraseLengthFeature name=pl\n";
|
||||
}
|
||||
elsif ($section eq "target-word-insertion-feature") {
|
||||
my ($factor,$file) = split(/ /,$FEATURE{$section}[0]);
|
||||
$feature .= "TargetWordInsertionFeature name=twi factor=$factor";
|
||||
$feature .= " path=$file" if defined($file);
|
||||
$feature .= "\n";
|
||||
}
|
||||
elsif ($section eq "source-word-insertion-feature") {
|
||||
my ($factor,$file) = split(/ /,$FEATURE{$section}[0]);
|
||||
$feature .= "SourceWordDeletionFeature name=swd factor=$file";
|
||||
$feature .= " path=$file" if defined($file);
|
||||
$feature .= "\n";
|
||||
}
|
||||
elsif ($section eq "word-translation-feature") {
|
||||
my ($factors,$simple,$dummy1,$dummy2,$dummy3,$dummy4,$file_f,$file_e) = split(/ /,$FEATURE{$section}[0]);
|
||||
my ($input_factor,$output_factor) = split(/\-/, $factors);
|
||||
$feature .= "WordTranslationFeature name=wt input-factor=$input_factor output-factor=$output_factor simple=$simple source-context=0 target-context=0";
|
||||
$feature .= " source-path=$file_f target-path=$file_e" if defined($file_f);
|
||||
$feature .= "\n";
|
||||
}
|
||||
elsif ($section eq "ttable-file") {
|
||||
my $i = 0;
|
||||
my @TTABLE_LIMIT = @{$FEATURE{"ttable-limit"}};
|
||||
my @W = @{$WEIGHT{"t"}};
|
||||
foreach my $line (@{$FEATURE{$section}}) {
|
||||
my ($imp, $input_factor, $output_factor, $weight_count, $file) = split(/ /,$line);
|
||||
my $implementation = $TTABLE_IMPLEMENTATION{$imp};
|
||||
if (!defined($implementation)) {
|
||||
print STDERR "ERROR: Unknown translation table implementation: $implementation\n";
|
||||
$implementation = "UNKNOWN";
|
||||
}
|
||||
$feature .= "$implementation name=TranslationModel$i num-features=$weight_count path=$file input-factor=$input_factor output-factor=$output_factor";
|
||||
$feature .= " ttable-limit=".$TTABLE_LIMIT[$i] if $#TTABLE_LIMIT >= $i;
|
||||
$feature .= "\n";
|
||||
$weight .= "TranslationModel$i=".&get_weights(\@W,$weight_count)."\n";
|
||||
$i++;
|
||||
}
|
||||
}
|
||||
elsif ($section eq "generation-file") {
|
||||
my $i = 0;
|
||||
my @W = @{$WEIGHT{"generation"}};
|
||||
foreach my $line (@{$FEATURE{$section}}) {
|
||||
my ($input_factor,$output_factor,$weight_count,$file) = split(/ /,$line);
|
||||
$feature .= "Generation name=GenerationModel$i num-features=$weight_count path=$file input-factor=$input_factor output-factor=$output_factor\n";
|
||||
$weight .= "GenerationModel$i=".&get_weights(\@W,$weight_count)."\n";
|
||||
$i++;
|
||||
}
|
||||
}
|
||||
elsif ($section eq "distortion-file") {
|
||||
my $i = 0;
|
||||
my @W = @{$WEIGHT{"d"}};
|
||||
my $ignore = shift @W;
|
||||
foreach my $line (@{$FEATURE{$section}}) {
|
||||
my ($factors,$type,$weight_count,$file) = split(/ /,$line);
|
||||
my ($input_factor,$output_factor) = split(/\-/, $factors);
|
||||
$feature .= "LexicalReordering name=LexicalReordering$i num-features=$weight_count type=$type input-factor=$input_factor output-factor=$output_factor path=$file\n";
|
||||
$weight .= "LexicalReordering$i=".&get_weights(\@W,$weight_count)."\n";
|
||||
$i++;
|
||||
}
|
||||
}
|
||||
|
||||
elsif ($section eq "lmodel-file") {
|
||||
my $i = 0;
|
||||
my @W = @{$WEIGHT{"l"}};
|
||||
foreach my $line (@{$FEATURE{$section}}) {
|
||||
my ($imp,$factor,$order,$file) = split(/ /,$line);
|
||||
my $implementation = $LM_IMPLEMENTATION{$imp};
|
||||
if (!defined($implementation)) {
|
||||
print STDERR "ERROR: Unknown language model implementation: $implementation\n";
|
||||
$implementation = "UNKNOWN";
|
||||
}
|
||||
$feature .= "$implementation name=LM$i factor=$factor path=$file order=$order\n";
|
||||
$weight .= "LM$i=".&get_weights(\@W,1)."\n";
|
||||
$i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
print "\n[feature]\n$feature\n";
|
||||
print "\n[weight]\n$weight\n";
|
||||
|
||||
sub get_data {
|
||||
my ($pattern) = @_;
|
||||
my @DATA;
|
||||
while (++$i < scalar(@INI) &&
|
||||
$INI[$i] !~ /^\s*$/ &&
|
||||
$INI[$i] !~ /^\[/ &&
|
||||
$INI[$i] !~ /^\#/) {
|
||||
push @DATA,$INI[$i];
|
||||
}
|
||||
$i--;
|
||||
chop(@DATA);
|
||||
return \@DATA;
|
||||
}
|
||||
|
||||
sub get_weights {
|
||||
my ($W,$count) = @_;
|
||||
my $list = "";
|
||||
for(my $w=0;$w<$count;$w++) {
|
||||
my $value = shift @{$W};
|
||||
chop($value);
|
||||
$list .= " $value";
|
||||
}
|
||||
return $list;
|
||||
}
|
Loading…
Reference in New Issue
Block a user