bug fix to filter hierarchical

This commit is contained in:
phikoehn 2012-03-23 07:17:08 +00:00
parent 1794bccd90
commit 4d0fc996ba
3 changed files with 19 additions and 1 deletions

View File

@ -2159,11 +2159,21 @@ sub define_tuningevaluation_filter {
$settings .= " --Hierarchical" if &get("TRAINING:hierarchical-rule-set");
# create pseudo-config file
my $config = "$dir/tuning/moses.table.ini.$VERSION";
my $config = $tuning_flag ? "$dir/tuning/moses.table.ini.$VERSION" : "$dir/evaluation/$set.moses.table.ini.$VERSION";
my $cmd = &get_training_setting(9);
$cmd .= &get_table_name_settings("translation-factors","phrase-translation-table",$phrase_translation_table);
$cmd .= &get_table_name_settings("reordering-factors","reordering-table",$reordering_table)
if $reordering_table;
# additional settings for hierarchical models
if (&get("TRAINING:hierarchical-rule-set")) {
my $extract_version = $VERSION;
$extract_version = $RE_USE[$STEP_LOOKUP{"TRAINING:extract-phrases"}]
if defined($STEP_LOOKUP{"TRAINING:extract-phrases"});
my $glue_grammar_file = &get("TRAINING:glue-grammar");
$glue_grammar_file = &versionize(&long_file_name("glue-grammar","model",""),$extract_version)
unless $glue_grammar_file;
$cmd .= "-glue-grammar-file $glue_grammar_file ";
}
$cmd .= "-lm 0:3:$dir "; # dummy
$cmd .= "-config $config\n";

View File

@ -68,6 +68,8 @@ sub detokenize {
$text =~ s/\&bar;/\|/g;
$text =~ s/\&lt;/\</g;
$text =~ s/\&gt;/\>/g;
$text =~ s/\&bra;/\[/g;
$text =~ s/\&ket;/\]/g;
$text =~ s/\&amp;/\&/g;
my $word;

View File

@ -70,6 +70,10 @@ sub tokenize {
chomp($text);
$text = " $text ";
# remove ASCII junk
$text =~ s/\s+/ /g;
$text =~ s/[\000-\037]//g;
# seperate out all "other" special characters
$text =~ s/([^\p{IsAlnum}\s\.\'\`\,\-])/ $1 /g;
@ -149,6 +153,8 @@ sub tokenize {
$text =~ s/\|/\&bar;/g;
$text =~ s/\</\&lt;/g;
$text =~ s/\>/\&gt;/g;
$text =~ s/\[/\&bra;/g;
$text =~ s/\]/\&ket;/g;
#ensure final line break
$text .= "\n" unless $text =~ /\n$/;