mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-11-13 10:40:52 +03:00
bug fix to filter hierarchical
This commit is contained in:
parent
1794bccd90
commit
4d0fc996ba
@ -2159,11 +2159,21 @@ sub define_tuningevaluation_filter {
|
||||
$settings .= " --Hierarchical" if &get("TRAINING:hierarchical-rule-set");
|
||||
|
||||
# create pseudo-config file
|
||||
my $config = "$dir/tuning/moses.table.ini.$VERSION";
|
||||
my $config = $tuning_flag ? "$dir/tuning/moses.table.ini.$VERSION" : "$dir/evaluation/$set.moses.table.ini.$VERSION";
|
||||
my $cmd = &get_training_setting(9);
|
||||
$cmd .= &get_table_name_settings("translation-factors","phrase-translation-table",$phrase_translation_table);
|
||||
$cmd .= &get_table_name_settings("reordering-factors","reordering-table",$reordering_table)
|
||||
if $reordering_table;
|
||||
# additional settings for hierarchical models
|
||||
if (&get("TRAINING:hierarchical-rule-set")) {
|
||||
my $extract_version = $VERSION;
|
||||
$extract_version = $RE_USE[$STEP_LOOKUP{"TRAINING:extract-phrases"}]
|
||||
if defined($STEP_LOOKUP{"TRAINING:extract-phrases"});
|
||||
my $glue_grammar_file = &get("TRAINING:glue-grammar");
|
||||
$glue_grammar_file = &versionize(&long_file_name("glue-grammar","model",""),$extract_version)
|
||||
unless $glue_grammar_file;
|
||||
$cmd .= "-glue-grammar-file $glue_grammar_file ";
|
||||
}
|
||||
$cmd .= "-lm 0:3:$dir "; # dummy
|
||||
$cmd .= "-config $config\n";
|
||||
|
||||
|
@ -68,6 +68,8 @@ sub detokenize {
|
||||
$text =~ s/\&bar;/\|/g;
|
||||
$text =~ s/\</\</g;
|
||||
$text =~ s/\>/\>/g;
|
||||
$text =~ s/\&bra;/\[/g;
|
||||
$text =~ s/\&ket;/\]/g;
|
||||
$text =~ s/\&/\&/g;
|
||||
|
||||
my $word;
|
||||
|
@ -70,6 +70,10 @@ sub tokenize {
|
||||
chomp($text);
|
||||
$text = " $text ";
|
||||
|
||||
# remove ASCII junk
|
||||
$text =~ s/\s+/ /g;
|
||||
$text =~ s/[\000-\037]//g;
|
||||
|
||||
# seperate out all "other" special characters
|
||||
$text =~ s/([^\p{IsAlnum}\s\.\'\`\,\-])/ $1 /g;
|
||||
|
||||
@ -149,6 +153,8 @@ sub tokenize {
|
||||
$text =~ s/\|/\&bar;/g;
|
||||
$text =~ s/\</\</g;
|
||||
$text =~ s/\>/\>/g;
|
||||
$text =~ s/\[/\&bra;/g;
|
||||
$text =~ s/\]/\&ket;/g;
|
||||
|
||||
#ensure final line break
|
||||
$text .= "\n" unless $text =~ /\n$/;
|
||||
|
Loading…
Reference in New Issue
Block a user