Update the training scripts to support the new format parameter for

'ttable-file'


git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3082 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
hieuhoang1972 2010-04-09 11:37:43 +00:00
parent 8839474d3d
commit c6d20e1f9f
8 changed files with 31 additions and 16 deletions

View File

@ -18,11 +18,11 @@ while (<INI>) {
if (/^[0-9]/) {
if ($section eq "ttable-file" || $section eq "lmodel-file") {
chomp;
my ($a, $b, $c, $fn) = split / /;
my ($a, $b, $c, $d, $fn) = split / /;
$abs = ensure_absolute($fn, $ini);
die "File not found or empty: $fn (interpreted as $abs)"
if ! -s $abs;
$_ = "$a $b $c $abs\n";
$_ = "$a $b $c $d $abs\n";
}
if ($section eq "generation-file") {
chomp;

View File

@ -24,7 +24,7 @@ while (<INI>) {
if (/^[0-9]/) {
if ($section eq "ttable-file") {
chomp;
my ($src, $tgt, $c, $fn) = split / /;
my ($phrase_table_impl, $src, $tgt, $c, $fn) = split / /;
# $fn = ensure_relative_to_origin($fn, $ini);
my $ttstats = get_ttable_stats($fn);
print_ttable_stats($src, $tgt, $fn, $ttstats);

View File

@ -39,14 +39,14 @@ while (<INI>) {
if (/^[0-9]/) {
if ($section eq "ttable-file" || $section eq "lmodel-file") {
chomp;
my ($a, $b, $c, $fn) = split / /;
my ($a, $b, $c, $d, $fn) = split / /;
$cnt{$section}++;
$fn = fixpath($fn);
$fn = ensure_relative_from_origin($fn, $ini);
$fn = ensure_exists_or_gzipped_exists($fn);
my $suffix = ($fn =~ /\.gz$/ ? ".gz" : "");
clone_file_or_die($fn, "./$section.$cnt{$section}$suffix");
$_ = "$a $b $c ./$section.$cnt{$section}$suffix\n";
$_ = "$a $b $c $d ./$section.$cnt{$section}$suffix\n";
}
if ($section eq "generation-file") {
chomp;

View File

@ -61,11 +61,21 @@ while(<INI>) {
if (/ttable-file\]/) {
while(1) {
my $table_spec = <INI>;
if ($table_spec !~ /^([\d\,\-]+) ([\d\,\-]+) (\d+) (\S+)$/) {
if ($table_spec !~ /^(\d+) ([\d\,\-]+) ([\d\,\-]+) (\d+) (\S+)$/) {
print INI_OUT $table_spec;
last;
}
my ($source_factor,$t,$weights,$file) = ($1,$2,$3,$4);
my ($phrase_table_impl,$source_factor,$t,$weights,$file) = ($1,$2,$3,$4);
my $new_phrase_table_impl;
if ($phrase_table_impl eq "0") { # Memory
my $new_phrase_table_impl = 1; # Binary
}
else {
# Can only filter memory-based phrase tables.
print INI_OUT $table_spec;
next;
}
chomp($file);
push @TABLE, $file;
@ -73,7 +83,7 @@ while(<INI>) {
$BINARIZABLE{$#TABLE}++;
my $new_name = "$dir/phrase-table.$source_factor-$t.".(++$TABLE_NUMBER{"$source_factor-$t"});
print INI_OUT "$source_factor $t $weights $new_name\n";
print INI_OUT "$new_phrase_table_impl $source_factor $t $weights $new_name\n";
push @TABLE_NEW_NAME,$new_name;
$CONSIDER_FACTORS{$source_factor} = 1;

View File

@ -64,11 +64,16 @@ while(<INI>) {
if (/ttable-file\]/) {
while(1) {
my $table_spec = <INI>;
if ($table_spec !~ /^([\d\,\-]+) ([\d\,\-]+) (\d+) (\S+)$/) {
if ($table_spec !~ /^(\d+) ([\d\,\-]+) ([\d\,\-]+) (\d+) (\S+)$/) {
print INI_OUT $table_spec;
last;
}
my ($source_factor,$t,$w,$file) = ($1,$2,$3,$4);
my ($phrase_table_impl,$source_factor,$t,$w,$file) = ($1,$2,$3,$4,$5);
if ($phrase_table_impl ne "0") { # Memory
print INI_OUT $table_spec;
next;
}
chomp($file);
push @TABLE, $file;
@ -78,7 +83,7 @@ while(<INI>) {
$cnt ++ while (defined $new_name_used{"$new_name.$cnt"});
$new_name .= ".$cnt";
$new_name_used{$new_name} = 1;
print INI_OUT "$source_factor $t $w $new_name\n";
print INI_OUT "$phrase_table_impl $source_factor $t $w $new_name\n";
push @TABLE_NEW_NAME,$new_name;
$CONSIDER_FACTORS{$source_factor} = 1;

View File

@ -1127,7 +1127,7 @@ sub scan_config {
# in which field (counting from zero) is the filename to check?
my %where_is_filename = (
"ttable-file" => 3,
"ttable-file" => 4,
"generation-file" => 3,
"lmodel-file" => 3,
"distortion-file" => 3,
@ -1136,7 +1136,7 @@ sub scan_config {
# by default, each line of each section means one lambda, but some sections
# explicitly state a custom number of lambdas
my %where_is_lambda_count = (
"ttable-file" => 2,
"ttable-file" => 3,
"generation-file" => 2,
"distortion-file" => 2,
);

View File

@ -1082,7 +1082,7 @@ sub scan_config {
# in which field (counting from zero) is the filename to check?
my %where_is_filename = (
"ttable-file" => 3,
"ttable-file" => 4,
"generation-file" => 3,
"lmodel-file" => 3,
"distortion-file" => 3,
@ -1091,7 +1091,7 @@ sub scan_config {
# by default, each line of each section means one lambda, but some sections
# explicitly state a custom number of lambdas
my %where_is_lambda_count = (
"ttable-file" => 2,
"ttable-file" => 3,
"generation-file" => 2,
"distortion-file" => 2,
);

View File

@ -1598,7 +1598,7 @@ sub create_ini {
$ff =~ s/\-/ /;
my $file = "$___MODEL_DIR/phrase-table".($___NOT_FACTORED ? "" : ".$f").".gz";
$file = shift @SPECIFIED_TABLE if scalar(@SPECIFIED_TABLE);
print INI "$ff 5 $file\n";
print INI "0 $ff 5 $file\n";
}
if ($num_of_ttables != $stepsused{"T"}) {
print STDERR "WARNING: Your [mapping-steps] require translation steps up to id $stepsused{T} but you defined translation steps 0..$num_of_ttables\n";