minor improvements: binarizing rule tables in filter script, multiple reference translation in ems analysis

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3284 1f5c12ca-751b-0410-a591-d2e778427230
2024-09-11 19:27:11 +03:00 · 2010-05-28 22:19:58 +00:00 · 2010-05-28 22:19:58 +00:00 · 4e0bc582f6
commit 4e0bc582f6
parent 69d92bfec3
9 changed files with 181 additions and 59 deletions
--- a/scripts/ems/example/config.basic
+++ b/scripts/ems/example/config.basic
@ -32,14 +32,17 @@ wmt10-data = $working-dir/data
 # moses decoder
 decoder = $moses-src-dir/moses-cmd/src/moses

-# conversion of phrase table into binary format
+# conversion of phrase table into binary on-disk format
 ttable-binarizer = $moses-src-dir/misc/processPhraseTable

-# tokenizers
+# conversion of rule table into binary on-disk format
+#ttable-binarizer = "$moses-src-dir/CreateOnDisk/src/CreateOnDiskPt 1 1 5 100 2"
+
+# tokenizers - comment out if all your data is already tokenized
 input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
 output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"

-# truecasers
+# truecasers - comment out if you do not use the truecaser
 input-truecaser = $moses-script-dir/recaser/truecase.perl
 output-truecaser = $moses-script-dir/recaser/truecase.perl
 detruecaser = $moses-script-dir/recaser/detruecase.perl
--- a/scripts/ems/example/config.factored
+++ b/scripts/ems/example/config.factored
@ -32,14 +32,17 @@ wmt10-data = $working-dir/data
 # moses decoder
 decoder = $moses-src-dir/moses-cmd/src/moses

-# conversion of phrase table into binary format
+# conversion of phrase table into binary on-disk format
 ttable-binarizer = $moses-src-dir/misc/processPhraseTable

-# tokenizers
+# conversion of rule table into binary on-disk format
+#ttable-binarizer = "$moses-src-dir/CreateOnDisk/src/CreateOnDiskPt 1 1 5 100 2"
+
+# tokenizers - comment out if all your data is already tokenized
 input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
 output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"

-# truecasers
+# truecasers - comment out if you do not use the truecaser
 input-truecaser = $moses-script-dir/recaser/truecase.perl
 output-truecaser = $moses-script-dir/recaser/truecase.perl
 detruecaser = $moses-script-dir/recaser/detruecase.perl
--- a/scripts/ems/example/config.hierarchical
+++ b/scripts/ems/example/config.hierarchical
@ -32,14 +32,17 @@ wmt10-data = $working-dir/data
 # moses decoder
 decoder = $moses-src-dir/moses-chart-cmd/src/moses_chart

-# conversion of phrase table into binary format
+# conversion of phrase table into binary on-disk format
 #ttable-binarizer = $moses-src-dir/misc/processPhraseTable

-# tokenizers
+# conversion of rule table into binary on-disk format
+ttable-binarizer = "$moses-src-dir/CreateOnDisk/src/CreateOnDiskPt 1 1 5 100 2"
+
+# tokenizers - comment out if all your data is already tokenized
 input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
 output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"

-# truecasers
+# truecasers - comment out if you do not use the truecaser
 input-truecaser = $moses-script-dir/recaser/truecase.perl
 output-truecaser = $moses-script-dir/recaser/truecase.perl
 detruecaser = $moses-script-dir/recaser/detruecase.perl
--- a/scripts/ems/example/config.syntax
+++ b/scripts/ems/example/config.syntax
@ -32,14 +32,17 @@ wmt10-data = $working-dir/data
 # moses decoder
 decoder = $moses-src-dir/moses-chart-cmd/src/moses_chart

-# conversion of phrase table into binary format
+# conversion of phrase table into binary on-disk format
 #ttable-binarizer = $moses-src-dir/misc/processPhraseTable

-# tokenizers
+# conversion of rule table into binary on-disk format
+ttable-binarizer = "$moses-src-dir/CreateOnDisk/src/CreateOnDiskPt 1 1 5 100 2"
+
+# tokenizers - comment out if all your data is already tokenized
 input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
 output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"

-# truecasers
+# truecasers - comment out if you do not use the truecaser
 input-truecaser = $moses-script-dir/recaser/truecase.perl
 output-truecaser = $moses-script-dir/recaser/truecase.perl
 detruecaser = $moses-script-dir/recaser/detruecase.perl
--- a/scripts/ems/example/config.toy
+++ b/scripts/ems/example/config.toy
@ -32,14 +32,17 @@ toy-data = $moses-script-dir/ems/example/data
 # moses decoder
 decoder = $moses-src-dir/moses-cmd/src/moses

-# conversion of phrase table into binary format
+# conversion of phrase table into binary on-disk format
 ttable-binarizer = $moses-src-dir/misc/processPhraseTable

-# tokenizers
+# conversion of rule table into binary on-disk format
+#ttable-binarizer = "$moses-src-dir/CreateOnDisk/src/CreateOnDiskPt 1 1 5 100 2"
+
+# tokenizers - comment out if all your data is already tokenized
 input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
 output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"

-# truecasers
+# truecasers - comment out if you do not use the truecaser
 input-truecaser = $moses-script-dir/recaser/truecase.perl
 output-truecaser = $moses-script-dir/recaser/truecase.perl
 detruecaser = $moses-script-dir/recaser/detruecase.perl
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@ -1133,8 +1133,14 @@ sub check_info {
          return 0;
        }
 	print "\tcheck '$VALUE{$parameter}' eq '$INFO{$parameter}' -> " if $VERBOSE;
-        if (&match_info_strings($VALUE{$parameter},$INFO{$parameter})) { print "ok\n" if $VERBOSE; }
-        else { print "mismatch\n" if $VERBOSE; return 0; }
+        if (defined($INFO{$parameter})
+            && &match_info_strings($VALUE{$parameter},$INFO{$parameter})) { 
+            print "ok\n" if $VERBOSE; 
+        }
+        else { 
+            print "mismatch\n" if $VERBOSE;
+            return 0; 
+        }
    }
    print "\tall parameters match\n" if $VERBOSE;
    return 1;
@ -1500,7 +1506,7 @@ sub define_tuning_tune {
    $tuning_settings = "" unless $tuning_settings;

    my $filter = "$scripts/training/filter-model-given-input.pl";
-    $filter .= " -Binarizer $binarizer" if $binarizer;
+    $filter .= " -Binarizer \"$binarizer\"" if $binarizer;
    if (&get("TRAINING:hierarchical-rule-set")) {
 	$filter .= " --Hierarchical";
 	#$filter .= " --MaxSpan 9999" if &get("GENERAL:input-parser") || &get("GENERAL:output-parser");
@ -1961,7 +1967,7 @@ sub define_evaluation_decode {

    my $filter = "$scripts/training/filter-model-given-input.pl";
    $filter .= " $dir/evaluation/filtered.$set.$VERSION $config $input_filter";
-    $filter .= " -Binarizer $binarizer" if $binarizer;
+    $filter .= " -Binarizer \"$binarizer\"" if $binarizer;

    if (&get("TRAINING:hierarchical-rule-set")) {
 	$filter .= " --Hierarchical";
--- a/scripts/ems/support/analysis.perl
+++ b/scripts/ems/support/analysis.perl
@ -26,15 +26,34 @@ my (%PRECISION_CORRECT,%PRECISION_TOTAL,
 if (defined($system) || defined($reference)) {
  die("you need to you specify both system and reference, not just either")
    unless defined($system) && defined($reference);
+
  die("can't open system file $system") if ! -e $system;
-  die("can't open system file $reference") if ! -e $reference;
  @SYSTEM = `cat $system`;
-  @REFERENCE = `cat $reference`;
  chop(@SYSTEM);
-  chop(@REFERENCE);
+
+  if (! -e $reference && -e $reference.".ref0") {
+      for(my $i=0;-e $reference.".ref".$i;$i++) {
+	  my @REF = `cat $reference.ref$i`;
+	  chop(@REF);
+	  for(my $j=0;$j<scalar(@REF);$j++) {
+	      push @{$REFERENCE[$j]}, $REF[$j];
+	  }
+      }
+  }
+  else {
+      die("can't open system file $reference") if ! -e $reference;
+      @REFERENCE = `cat $reference`;
+      chop(@REFERENCE);
+  }
+
+  for(my $i=0;$i<scalar @SYSTEM;$i++) {
+    &add_match($SYSTEM[$i],$REFERENCE[$i],
+	       \%PRECISION_CORRECT,\%PRECISION_TOTAL);
+    &add_match($REFERENCE[$i],$SYSTEM[$i],
+	       \%RECALL_CORRECT,\%RECALL_TOTAL);
+  }

  open(SUMMARY,">$dir/summary");
-  &create_n_gram_stats();
  &best_matches(\%PRECISION_CORRECT,\%PRECISION_TOTAL,"$dir/n-gram-precision");
  &best_matches(\%RECALL_CORRECT,\%RECALL_TOTAL,"$dir/n-gram-recall");
  &bleu_annotation();
@ -59,15 +78,6 @@ if (defined($ttable) || defined($corpus)) {
  &input_annotation();
 }

-sub create_n_gram_stats {
-    for(my $i=0;$i<scalar @SYSTEM;$i++) {
-	&add_match($SYSTEM[$i],$REFERENCE[$i],
-		   \%PRECISION_CORRECT,\%PRECISION_TOTAL);
-	&add_match($REFERENCE[$i],$SYSTEM[$i],
-		   \%RECALL_CORRECT,\%RECALL_TOTAL);
-    }
-}
-
 sub best_matches {
    my ($CORRECT,$TOTAL,$out) = @_;
    my $type = ($out =~ /precision/) ? "precision" : "recall";
@ -91,6 +101,7 @@ sub best_matches {
 sub input_phrases {
  open(INPUT,$input) or die "Can't read input $input";
  while(my $line = <INPUT>) {
+    $line =~ s/\|\S+//g;
    &extract_n_grams($line,\%INPUT_PHRASE);
  }
  close(INPUT);  
@ -105,7 +116,7 @@ sub bleu_annotation {
 	$system =~ s/ $//;
 	my (%SYS_NGRAM,%REF_NGRAM);
 	&extract_n_grams( $system, \%SYS_NGRAM );
-	&extract_n_grams( $REFERENCE[$i], \%REF_NGRAM );
+	&extract_n_grams_arrayopt( $REFERENCE[$i], \%REF_NGRAM, "max" );

 	my @WORD = split(/ /,$system);
 	my @MATCH;
@ -133,9 +144,20 @@ sub bleu_annotation {
 	    $bleu *= ($ngram_correct/(scalar(@WORD)-$length+2));
 	}
 	$bleu = $bleu ** (1/4);
-	my @RW = split(/ /,$REFERENCE[$i]);
-	my $ref_length = scalar(@RW);
-	if (scalar(@WORD) < $ref_length) {
+
+	my $ref_length = 9999;
+	if (ref($REFERENCE[$i]) eq 'ARRAY') {
+	    foreach my $ref (@{$REFERENCE[$i]}) {
+		my @RW = split(/ /,$ref);
+		$ref_length = scalar(@RW) if scalar(@RW) < $ref_length;
+	    }
+	}
+	else {
+	    my @RW = split(/ /,$REFERENCE[$i]);
+	    $ref_length = scalar(@RW);
+	}
+
+	if (scalar(@WORD) < $ref_length && scalar(@WORD)>0) {
 	    $bleu *= exp(1-$ref_length/scalar(@WORD));
 	}

@ -144,7 +166,15 @@ sub bleu_annotation {
 	    print OUT " " if $i;
 	    print OUT "$WORD[$i]|$MATCH[$i]";
 	}
-	print OUT "\t".$REFERENCE[$i]."\n";
+	if (ref($REFERENCE[$i]) eq 'ARRAY') {
+	    foreach my $ref (@{$REFERENCE[$i]}) {
+		print OUT "\t".$ref;	
+	    }
+	}
+	else {
+	  print OUT "\t".$REFERENCE[$i]  
+	}
+	print OUT "\n";
    }
    close(OUT);
 }
@ -152,8 +182,8 @@ sub bleu_annotation {
 sub add_match {
    my ($system,$reference,$CORRECT,$TOTAL) = @_;
    my (%SYS_NGRAM,%REF_NGRAM);
-    &extract_n_grams( $system, \%SYS_NGRAM );
-    &extract_n_grams( $reference, \%REF_NGRAM );
+    &extract_n_grams_arrayopt( $system, \%SYS_NGRAM, "min" );
+    &extract_n_grams_arrayopt( $reference, \%REF_NGRAM, "max" );
    foreach my $length (keys %SYS_NGRAM) {
 	foreach my $ngram (keys %{$SYS_NGRAM{$length}}) {
 	    my $sys_count = $SYS_NGRAM{$length}{$ngram};
@ -176,7 +206,8 @@ sub ttable_coverage {
    open(TTABLE,$ttable) or die "Can't read ttable $ttable";
  }
  open(REPORT,">$dir/ttable-coverage-by-phrase");
-  my ($last_in,$last_size,$entropy,$size) = ("",0,0);
+  my ($last_in,$last_size,$size) = ("",0);
+  my @DISTRIBUTION = ();
  while(<TTABLE>) {
    chop;
    my ($in,$out,$scores) = split(/ \|\|\| /);	
@ -185,30 +216,44 @@ sub ttable_coverage {
    next unless defined($INPUT_PHRASE{$size}{$in});
    $TTABLE_COVERED{$size}{$in}++;
    my @SCORE = split(/ /,$scores);
-    my $p = $SCORE[2]; # forward probability
    if ($in ne $last_in) {
      if ($last_in ne "") {
+        my $entropy = &compute_entropy(@DISTRIBUTION);
        printf REPORT "%s\t%d\t%.5f\n",$last_in,$TTABLE_COVERED{$last_size}{$last_in},$entropy;
 	$TTABLE_ENTROPY{$last_size}{$last_in} = $entropy;
-        $entropy = 0;
+        @DISTRIBUTION = ();
      }
      $last_in = $in;
      $last_size = $size;
    }
-    # TODO: normalized entropy?
-    $entropy -= $p*log($p)/log(2);
+    push @DISTRIBUTION, $SCORE[2]; # forward probability
  }
+  my $entropy = &compute_entropy(@DISTRIBUTION);
  print REPORT "%s\t%d\t%.5f\n",$last_in,$TTABLE_COVERED{$last_size}{$last_in},$entropy;
+  $TTABLE_ENTROPY{$last_size}{$last_in} = $entropy;
  close(REPORT);
  close(TTABLE);

  &additional_coverage_reports("ttable",\%TTABLE_COVERED);
 }

+sub compute_entropy {
+  my $z = 0; # normalization
+  foreach my $p (@_) {
+    $z += $p;
+  }
+  my $entropy = 0;
+  foreach my $p (@_) {
+    $entropy -= ($p/$z)*log($p/$z)/log(2);
+  }
+  return $entropy;
+}
+
 sub corpus_coverage {
  # compute how often input phrases occur in the corpus
  open(CORPUS,$corpus) or die "Can't read corpus $corpus";
  while(<CORPUS>) {
+    s/\|\S+//g;
    my @WORD = split;
    my $sentence_length = scalar @WORD;
    for(my $start=0;$start < $sentence_length;$start++) {
@ -269,6 +314,7 @@ sub input_annotation {
  open(INPUT,$input) or die "Can't read input $input";
  while(<INPUT>) {
    chop;
+    s/\|\S+//g;
    print OUT $_."\t";
    my @WORD = split;
    my $sentence_length = scalar @WORD;
@ -287,7 +333,7 @@ sub input_annotation {
 	$corpus_covered = 0 unless defined($corpus_covered);
 	
 	if (defined($TTABLE_COVERED{$length}{$phrase})) {
-	  printf OUT "%d-%d:%d:%d:%.5f ",$start,$start+$length-1,$corpus_covered,$ttable_covered,$ttable_entropy
+	  printf OUT "%d-%d:%d:%d:%.5f ",$start,$start+$length-1,$corpus_covered,$ttable_covered,$ttable_entropy;
 	}
      }
    }
@ -297,8 +343,49 @@ sub input_annotation {
  close(OUT);
 }

+sub extract_n_grams_arrayopt {
+    my ($sentence,$NGRAM,$minmax) = @_;
+    if (ref($sentence) eq 'ARRAY') {
+	my %MINMAX_NGRAM;
+	&extract_n_grams($$sentence[0],\%MINMAX_NGRAM);
+	for(my $i=1;$i<scalar(@{$sentence});$i++) {
+	    my %SET_NGRAM;
+	    &extract_n_grams($$sentence[$i],\%SET_NGRAM);
+	    for(my $length=1;$length<=$MAX_LENGTH;$length++) {
+		if ($minmax eq "min") {
+		    foreach my $ngram (keys %{$MINMAX_NGRAM{$length}}) {
+			if (!defined($SET_NGRAM{$length}{$ngram})) {
+			    delete( $MINMAX_NGRAM{$length}{$ngram} );
+			}
+			elsif($MINMAX_NGRAM{$length}{$ngram} > $SET_NGRAM{$length}{$ngram}) {
+			    $MINMAX_NGRAM{$length}{$ngram} = $SET_NGRAM{$length}{$ngram};
+			}
+		    }
+		}
+		else {
+		    foreach my $ngram (keys %{$SET_NGRAM{$length}}) {
+			if (!defined($MINMAX_NGRAM{$length}{$ngram}) ||
+			    $SET_NGRAM{$length}{$ngram} > $MINMAX_NGRAM{$length}{$ngram}) {
+			    $MINMAX_NGRAM{$length}{$ngram} = $SET_NGRAM{$length}{$ngram};
+			}
+		    }
+		}
+	    }
+	}
+	for(my $length=1;$length<=$MAX_LENGTH;$length++) {
+	    foreach my $ngram (keys %{$MINMAX_NGRAM{$length}}) {
+		$$NGRAM{$length}{$ngram} += $MINMAX_NGRAM{$length}{$ngram};
+	    }
+	}
+    }
+    else {
+	&extract_n_grams($sentence,$NGRAM);
+    }
+}
+
 sub extract_n_grams {
    my ($sentence,$NGRAM) = @_;
+
    $sentence =~ s/\s+/ /g;
    $sentence =~ s/^ //;
    $sentence =~ s/ $//;
--- a/scripts/training/filter-model-given-input.pl
+++ b/scripts/training/filter-model-given-input.pl
@ -94,7 +94,7 @@ while(<INI>) {
    	}
    	my ($phrase_table_impl,$source_factor,$t,$w,$file) = ($1,$2,$3,$4,$5);

-        if ($phrase_table_impl ne "0" && $phrase_table_impl ne "6") {
+        if (($phrase_table_impl ne "0" && $phrase_table_impl ne "6") || $file =~ /glue-grammar/) {
            # Only Memory ("0") and NewFormat ("6") can be filtered.
            print INI_OUT $table_spec;
            next;
@ -110,7 +110,12 @@ while(<INI>) {
        $cnt ++ while (defined $new_name_used{"$new_name.$cnt"});
        $new_name .= ".$cnt";
        $new_name_used{$new_name} = 1;
-    	print INI_OUT "$phrase_table_impl $source_factor $t $w $new_name\n";
+	if ($binarizer && $phrase_table_impl == 6) {
+    	  print INI_OUT "2 $source_factor $t $w $new_name.bin\n";
+        }
+        else {
+    	  print INI_OUT "$phrase_table_impl $source_factor $t $w $new_name\n";
+        }
    	push @TABLE_NEW_NAME,$new_name;

    	$CONSIDER_FACTORS{$source_factor} = 1;
@ -240,14 +245,24 @@ for(my $i=0;$i<=$#TABLE;$i++) {
    }

    if(defined($binarizer)) {
+      print STDERR "binarizing...";
+      # translation model
      if ($KNOWN_TTABLE{$i}) {
-        print STDERR "binarizing...";
-        my $cmd = "cat $new_file | LC_ALL=C sort -T $dir | $binarizer -ttable 0 0 - -nscores $TABLE_WEIGHTS[$i] -out $new_file";
-        print STDERR $cmd."\n";
-        print STDERR `$cmd`;
+        # ... hierarchical translation model
+        if ($opt_hierarchical) {
+          my $cmd = "$binarizer $new_file $new_file.bin";
+	  print STDERR $cmd."\n";
+	  print STDERR `$cmd`;
+        }
+        # ... phrase translation model
+        else { 
+          my $cmd = "cat $new_file | LC_ALL=C sort -T $dir | $binarizer -ttable 0 0 - -nscores $TABLE_WEIGHTS[$i] -out $new_file";
+          print STDERR $cmd."\n";
+          print STDERR `$cmd`;
+        }
      }
+      # reordering model
      else {
-        print STDERR "binarizing...";
        my $lexbin = $binarizer; $lexbin =~ s/PhraseTable/LexicalTable/;
        my $cmd = "$lexbin -in $new_file -out $new_file";
        print STDERR $cmd."\n";
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@ -29,16 +29,14 @@ my($_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_
   $_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
   @_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_DONT_ZIP,  $_MGIZA, $_MGIZA_CPUS,  $_HMM_ALIGN, $_CONFIG,
   $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_EXTRACT_OPTIONS,$_SCORE_OPTIONS,
-   $_PHRASE_WORD_ALIGNMENT,
+   $_PHRASE_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
   $_MEMSCORE, $_FINAL_ALIGNMENT_MODEL,
   $_CONTINUE,$_MAX_LEXICAL_REORDERING,$_DO_STEPS);

 my $debug = 0; # debug this script, do not delete any files in debug mode

 # the following line is set installation time by 'make release'.  BEWARE!
-my $BINDIR="";
-
-my $force_factored_filenames = 0;
+my $BINDIR="/home/pkoehn/statmt/bin";

 $_HELP = 1
    unless &GetOptions('root-dir=s' => \$_ROOT_DIR,
@ -62,6 +60,7 @@ $_HELP = 1
 		       'first-step=i' => \$_FIRST_STEP,
 		       'last-step=i' => \$_LAST_STEP,
 		       'giza-option=s' => \$_GIZA_OPTION,
+		       'giza-extension=s' => \$_GIZA_EXTENSION,
 		       'parallel' => \$_PARALLEL,
 		       'lm=s' => \@_LM,
 		       'help' => \$_HELP,
@ -103,7 +102,7 @@ $_HELP = 1
 		       'max-lexical-reordering' => \$_MAX_LEXICAL_REORDERING,
 		       'do-steps=s' => \$_DO_STEPS,
 		       'memscore:s' => \$_MEMSCORE,
-               'force-factored-filenames' => \$force_factored_filenames,
+		       'force-factored-filenames' => \$_FORCE_FACTORED_FILENAMES,
               );

 if ($_HELP) {
@ -436,7 +435,7 @@ for my $mtype ( keys %REORDERING_MODEL_TYPES) {
 }

 ### Factored translation models
-my $___NOT_FACTORED = !$force_factored_filenames;
+my $___NOT_FACTORED = !$_FORCE_FACTORED_FILENAMES;
 my $___ALIGNMENT_FACTORS = "0-0";
 $___ALIGNMENT_FACTORS = $_ALIGNMENT_FACTORS if defined($_ALIGNMENT_FACTORS);
 die("ERROR: format for alignment factors is \"0-0\" or \"0,1,2-0,1\", you provided $___ALIGNMENT_FACTORS\n") if $___ALIGNMENT_FACTORS !~ /^\d+(\,\d+)*\-\d+(\,\d+)*$/;
@ -1602,8 +1601,8 @@ sub create_ini {
 	foreach my $model (@REORDERING_MODELS) {
 	    $weight_d_count += $model->{"numfeatures"};
 	    my $table_file = "$___MODEL_DIR/reordering-table";
-	    $table_file = shift @SPECIFIED_TABLE if scalar(@SPECIFIED_TABLE);
 	    $table_file .= ".$factor" unless $___NOT_FACTORED;
+	    $table_file = shift @SPECIFIED_TABLE if scalar(@SPECIFIED_TABLE);
 	    $table_file .= ".";
 	    $table_file .= $model->{"filename"};
 	    $table_file .= ".gz";