Merge ../mosesdecoder into perf_moses2

This commit is contained in:
Hieu Hoang 2016-06-23 11:11:11 +01:00
commit 0eb0bf642c
2 changed files with 35 additions and 23 deletions

View File

@ -23,16 +23,18 @@ $SCRIPTS_ROOTDIR =~ s/\/training$//;
my ($binarizer, $input_config, $output_config);
my $opt_hierarchical = 0;
$binarizer = "$SCRIPTS_ROOTDIR/../bin/processPhraseTable";
my $min_score = undef;
GetOptions(
"Hierarchical" => \$opt_hierarchical,
"Binarizer=s" => \$binarizer
"Binarizer=s" => \$binarizer,
"MinScore=s" => \$min_score,
) or exit(1);
$input_config = shift;
$output_config = shift;
if (!defined $input_config || !defined $output_config) {
print STDERR "usage: binarize-model.perl input-config output-config [-Binarizer binarizer]\n";
print STDERR "usage: binarize-model.perl input-config output-config [-Binarizer binarizer] [-MinScore score-def]\n";
exit 1;
}
@ -40,7 +42,9 @@ my $hierarchical = "";
$hierarchical = "-Hierarchical" if $opt_hierarchical;
my $targetdir = "$output_config.tables";
safesystem("$RealBin/filter-model-given-input.pl $targetdir $input_config /dev/null $hierarchical -nofilter -Binarizer \"$binarizer\"") || die "binarising failed";
my $cmd = "$RealBin/filter-model-given-input.pl $targetdir $input_config /dev/null $hierarchical -nofilter -Binarizer \"$binarizer\" ";
$cmd .= "-MinScore $min_score" if defined($min_score);
safesystem($cmd) || die "binarising failed";
safesystem("rm -f $output_config; ln -s $targetdir/moses.ini $output_config") || die "failed to link new ini file";
#FIXME: Why isn't this in a module?

View File

@ -336,29 +336,37 @@ for(my $i=0;$i<=$#TABLE;$i++) {
my $new_file = $TABLE_NEW_NAME[$i];
print STDERR "filtering $file -> $new_file...\n";
my $mid_file = $new_file; # used when both filtering and binarizing
if (!$opt_filter) {
# check if original file was gzipped
if ($file !~ /\.gz$/ && -e "$file.gz") {
$file .= ".gz";
}
$mid_file .= ".gz" if $file =~ /\.gz$/;
$cmd = "ln -s $file $mid_file";
safesystem($cmd) or die "Failed to make symlink";
$mid_file .= ".gz"
if $mid_file !~ /\.gz/
&& $binarizer && $binarizer =~ /processPhraseTable/;
my $openstring = mk_open_string($file);
my $mid_openstring;
if ($mid_file =~ /\.gz$/) {
$mid_openstring = "| gzip -c > $mid_file";
} else {
$mid_file .= ".gz"
if $mid_file !~ /\.gz/
&& $binarizer && $binarizer =~ /processPhraseTable/;
my $openstring = mk_open_string($file);
my $mid_openstring;
if ($mid_file =~ /\.gz$/) {
$mid_openstring = "| gzip -c > $mid_file";
$mid_openstring = ">$mid_file";
}
if (!$opt_filter) {
# not filtering
if (defined($min_score) and $KNOWN_TTABLE{$i}) {
# Threshold pruning
$cmd = "$openstring $RealBin/threshold-filter.perl $min_score $mid_openstring";
safesystem($cmd) or die "Threshold pruning of phrase table failed";
} else {
$mid_openstring = ">$mid_file";
# If we are not filtering, or threshold pruning a phrase table, then
# we can just sym-link it.
# check if original file was gzipped
if ($file !~ /\.gz$/ && -e "$file.gz") {
$file .= ".gz";
}
$cmd = "ln -s $file $mid_file";
safesystem($cmd) or die "Failed to make symlink";
}
} else {
open(FILE_OUT,$mid_openstring) or die "Can't write to $mid_openstring";