mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 21:42:19 +03:00
Update the training scripts to support the new format parameter for
'ttable-file' git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3082 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
8839474d3d
commit
c6d20e1f9f
@ -18,11 +18,11 @@ while (<INI>) {
|
||||
if (/^[0-9]/) {
|
||||
if ($section eq "ttable-file" || $section eq "lmodel-file") {
|
||||
chomp;
|
||||
my ($a, $b, $c, $fn) = split / /;
|
||||
my ($a, $b, $c, $d, $fn) = split / /;
|
||||
$abs = ensure_absolute($fn, $ini);
|
||||
die "File not found or empty: $fn (interpreted as $abs)"
|
||||
if ! -s $abs;
|
||||
$_ = "$a $b $c $abs\n";
|
||||
$_ = "$a $b $c $d $abs\n";
|
||||
}
|
||||
if ($section eq "generation-file") {
|
||||
chomp;
|
||||
|
@ -24,7 +24,7 @@ while (<INI>) {
|
||||
if (/^[0-9]/) {
|
||||
if ($section eq "ttable-file") {
|
||||
chomp;
|
||||
my ($src, $tgt, $c, $fn) = split / /;
|
||||
my ($phrase_table_impl, $src, $tgt, $c, $fn) = split / /;
|
||||
# $fn = ensure_relative_to_origin($fn, $ini);
|
||||
my $ttstats = get_ttable_stats($fn);
|
||||
print_ttable_stats($src, $tgt, $fn, $ttstats);
|
||||
|
@ -39,14 +39,14 @@ while (<INI>) {
|
||||
if (/^[0-9]/) {
|
||||
if ($section eq "ttable-file" || $section eq "lmodel-file") {
|
||||
chomp;
|
||||
my ($a, $b, $c, $fn) = split / /;
|
||||
my ($a, $b, $c, $d, $fn) = split / /;
|
||||
$cnt{$section}++;
|
||||
$fn = fixpath($fn);
|
||||
$fn = ensure_relative_from_origin($fn, $ini);
|
||||
$fn = ensure_exists_or_gzipped_exists($fn);
|
||||
my $suffix = ($fn =~ /\.gz$/ ? ".gz" : "");
|
||||
clone_file_or_die($fn, "./$section.$cnt{$section}$suffix");
|
||||
$_ = "$a $b $c ./$section.$cnt{$section}$suffix\n";
|
||||
$_ = "$a $b $c $d ./$section.$cnt{$section}$suffix\n";
|
||||
}
|
||||
if ($section eq "generation-file") {
|
||||
chomp;
|
||||
|
@ -61,11 +61,21 @@ while(<INI>) {
|
||||
if (/ttable-file\]/) {
|
||||
while(1) {
|
||||
my $table_spec = <INI>;
|
||||
if ($table_spec !~ /^([\d\,\-]+) ([\d\,\-]+) (\d+) (\S+)$/) {
|
||||
if ($table_spec !~ /^(\d+) ([\d\,\-]+) ([\d\,\-]+) (\d+) (\S+)$/) {
|
||||
print INI_OUT $table_spec;
|
||||
last;
|
||||
}
|
||||
my ($source_factor,$t,$weights,$file) = ($1,$2,$3,$4);
|
||||
my ($phrase_table_impl,$source_factor,$t,$weights,$file) = ($1,$2,$3,$4);
|
||||
|
||||
my $new_phrase_table_impl;
|
||||
if ($phrase_table_impl eq "0") { # Memory
|
||||
my $new_phrase_table_impl = 1; # Binary
|
||||
}
|
||||
else {
|
||||
# Can only filter memory-based phrase tables.
|
||||
print INI_OUT $table_spec;
|
||||
next;
|
||||
}
|
||||
|
||||
chomp($file);
|
||||
push @TABLE, $file;
|
||||
@ -73,7 +83,7 @@ while(<INI>) {
|
||||
$BINARIZABLE{$#TABLE}++;
|
||||
|
||||
my $new_name = "$dir/phrase-table.$source_factor-$t.".(++$TABLE_NUMBER{"$source_factor-$t"});
|
||||
print INI_OUT "$source_factor $t $weights $new_name\n";
|
||||
print INI_OUT "$new_phrase_table_impl $source_factor $t $weights $new_name\n";
|
||||
push @TABLE_NEW_NAME,$new_name;
|
||||
|
||||
$CONSIDER_FACTORS{$source_factor} = 1;
|
||||
|
@ -64,11 +64,16 @@ while(<INI>) {
|
||||
if (/ttable-file\]/) {
|
||||
while(1) {
|
||||
my $table_spec = <INI>;
|
||||
if ($table_spec !~ /^([\d\,\-]+) ([\d\,\-]+) (\d+) (\S+)$/) {
|
||||
if ($table_spec !~ /^(\d+) ([\d\,\-]+) ([\d\,\-]+) (\d+) (\S+)$/) {
|
||||
print INI_OUT $table_spec;
|
||||
last;
|
||||
}
|
||||
my ($source_factor,$t,$w,$file) = ($1,$2,$3,$4);
|
||||
my ($phrase_table_impl,$source_factor,$t,$w,$file) = ($1,$2,$3,$4,$5);
|
||||
|
||||
if ($phrase_table_impl ne "0") { # Memory
|
||||
print INI_OUT $table_spec;
|
||||
next;
|
||||
}
|
||||
|
||||
chomp($file);
|
||||
push @TABLE, $file;
|
||||
@ -78,7 +83,7 @@ while(<INI>) {
|
||||
$cnt ++ while (defined $new_name_used{"$new_name.$cnt"});
|
||||
$new_name .= ".$cnt";
|
||||
$new_name_used{$new_name} = 1;
|
||||
print INI_OUT "$source_factor $t $w $new_name\n";
|
||||
print INI_OUT "$phrase_table_impl $source_factor $t $w $new_name\n";
|
||||
push @TABLE_NEW_NAME,$new_name;
|
||||
|
||||
$CONSIDER_FACTORS{$source_factor} = 1;
|
||||
|
@ -1127,7 +1127,7 @@ sub scan_config {
|
||||
|
||||
# in which field (counting from zero) is the filename to check?
|
||||
my %where_is_filename = (
|
||||
"ttable-file" => 3,
|
||||
"ttable-file" => 4,
|
||||
"generation-file" => 3,
|
||||
"lmodel-file" => 3,
|
||||
"distortion-file" => 3,
|
||||
@ -1136,7 +1136,7 @@ sub scan_config {
|
||||
# by default, each line of each section means one lambda, but some sections
|
||||
# explicitly state a custom number of lambdas
|
||||
my %where_is_lambda_count = (
|
||||
"ttable-file" => 2,
|
||||
"ttable-file" => 3,
|
||||
"generation-file" => 2,
|
||||
"distortion-file" => 2,
|
||||
);
|
||||
|
@ -1082,7 +1082,7 @@ sub scan_config {
|
||||
|
||||
# in which field (counting from zero) is the filename to check?
|
||||
my %where_is_filename = (
|
||||
"ttable-file" => 3,
|
||||
"ttable-file" => 4,
|
||||
"generation-file" => 3,
|
||||
"lmodel-file" => 3,
|
||||
"distortion-file" => 3,
|
||||
@ -1091,7 +1091,7 @@ sub scan_config {
|
||||
# by default, each line of each section means one lambda, but some sections
|
||||
# explicitly state a custom number of lambdas
|
||||
my %where_is_lambda_count = (
|
||||
"ttable-file" => 2,
|
||||
"ttable-file" => 3,
|
||||
"generation-file" => 2,
|
||||
"distortion-file" => 2,
|
||||
);
|
||||
|
@ -1598,7 +1598,7 @@ sub create_ini {
|
||||
$ff =~ s/\-/ /;
|
||||
my $file = "$___MODEL_DIR/phrase-table".($___NOT_FACTORED ? "" : ".$f").".gz";
|
||||
$file = shift @SPECIFIED_TABLE if scalar(@SPECIFIED_TABLE);
|
||||
print INI "$ff 5 $file\n";
|
||||
print INI "0 $ff 5 $file\n";
|
||||
}
|
||||
if ($num_of_ttables != $stepsused{"T"}) {
|
||||
print STDERR "WARNING: Your [mapping-steps] require translation steps up to id $stepsused{T} but you defined translation steps 0..$num_of_ttables\n";
|
||||
|
Loading…
Reference in New Issue
Block a user