a large number of changes. besides little tweaks:

* training script now has proper default behaviour for single-factor models, * mert script has better handling of default lambda parameters that now works with lexicalized reordering models, and also with multiple models files (e.g. multiple language models) * parallel mert script is more robust when single jobs fail: detects it and resubmits the crashed (or killed) jobs * recaser added that builds on moses * filtering script added that also binarizes filtered model files (this will be eventually replaced when the lexicalized reordering model also uses the binary format) git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1210 1f5c12ca-751b-0410-a591-d2e778427230
2024-12-28 22:45:50 +03:00 · 2007-02-13 19:22:35 +00:00 · 2007-02-13 19:22:35 +00:00 · 14839768c8
commit 14839768c8
parent e247f1da6f
7 changed files with 583 additions and 157 deletions
--- a/scripts/generic/moses-parallel.pl
+++ b/scripts/generic/moses-parallel.pl
@ -19,9 +19,8 @@ use strict;
 #######################
 #Customizable parameters 

-#parameters for submiiting processes through SGE
-#NOTE: group name is ws06ossmt (with 2 's') and not ws06osmt (with 1 's')
-my $queueparameters="-l ws06ossmt=true -l mem_free=0.5G -hard";
+#parameters for submiiting processes through Sun GridEngine
+my $queueparameters="-l mem_free=0.5G -hard";

 # look for the correct pwdcmd 
 my $pwdcmd = getPwdCmd();
@ -53,6 +52,7 @@ my $nbestfile=undef;
 my $orinbestfile=undef;
 my $nbest=undef;
 my $nbestflag=0;
+my $robust=1; # undef; # resubmit crashed jobs
 my $orilogfile="";
 my $logflag="";
 my $qsubname="MOSES";
@ -69,6 +69,7 @@ sub init(){
 	     'debug'=>\$dbg,
 	     'jobs=i'=>\$jobs,
 	     'decoder=s'=> \$mosescmd,
+	     'robust' => \$robust,
       'decoder-parameters=s'=> \$mosesparameters,
 			 'logfile=s'=> \$orilogfile,
 	     'i|inputfile|input-file=s'=> \$orifile,
@ -284,8 +285,16 @@ preparing_script();
 #launching process through the queue
 my @sgepids =();

-my $failure=0;
-foreach my $idx (@idxlist){
+# if robust switch is used, redo jobs that crashed
+my @idx_todo = ();
+foreach (@idxlist) { push @idx_todo,$_; }
+
+my $looped_once = 0;
+while((!$robust && !$looped_once) || ($robust && scalar @idx_todo)) {
+ $looped_once = 1;
+
+ my $failure=0;
+ foreach my $idx (@idx_todo){
  print STDERR "qsub $queueparameters -b no -j yes -o $qsubout$idx -e $qsuberr$idx -N $qsubname$idx ${jobscript}${idx}.bash\n" if $dbg; 

  $cmd="qsub $queueparameters -b no -j yes -o $qsubout$idx -e $qsuberr$idx -N $qsubname$idx ${jobscript}${idx}.bash >& ${jobscript}${idx}.log";
@ -302,16 +311,17 @@ foreach my $idx (@idxlist){
  close(IN);

  push @sgepids, $id;
-}
+ }

-#waiting until all jobs have finished
-my $hj = "-hold_jid " . join(" -hold_jid ", @sgepids);
+ #waiting until all jobs have finished
+ my $hj = "-hold_jid " . join(" -hold_jid ", @sgepids);

-if ($old_sge) {
+ if ($old_sge) {
  # we need to implement our own waiting script
  safesystem("echo 'date' > sync_workaround_script.sh") or kill_all_and_quit();

  my $pwd = `$pwdcmd`; chomp $pwd;
+
  my $checkpointfile = "sync_workaround_checkpoint";

  # delete previous checkpoint, if left from previous runs
@ -344,18 +354,36 @@ if ($old_sge) {
    print STDERR "Extra wait ($nr) for possibly unfinished processes.\n";
    sleep 10;
  }
-} else {
+ } else {
  # use the -sync option for qsub
  $cmd="qsub $queueparameters -sync y $hj -j y -o /dev/null -e /dev/null -N $qsubname.W -b y /bin/ls >& $qsubname.W.log";
  safesystem($cmd) or kill_all_and_quit();

  $failure=&check_exit_status();
+ }
+
+ kill_all_and_quit() if $failure && !$robust;
+
+ # check if some translations failed
+ my @idx_still_todo = check_translation();
+ if ($robust) {
+     # if robust, redo crashed jobs
+     if ((scalar @idx_still_todo) == (scalar @idxlist)) {
+	 # ... but not if all crashed
+	 print STDERR "everything crashed, not trying to resubmit jobs\n";
+	 kill_all_and_quit();
+     }
+     @idx_todo = @idx_still_todo;
+ }
+ else {
+     if (scalar (@idx_still_todo)) {
+	 print STDERR "some jobs crashed: ".join(" ",@idx_still_todo)."\n";
+	 kill_all_and_quit();
+     }
+     
+ }
 }

-kill_all_and_quit() if $failure;
-
-check_translation();
-
 #concatenating translations and removing temporary files
 concatenate_1best();
 concatenate_logs() if $logflag;
@ -509,7 +537,8 @@ sub check_translation(){
  #checking if all sentences were translated
  my $inputN;
  my $outputN;
-  foreach my $idx (@idxlist){
+  my @failed = ();
+  foreach my $idx (@idx_todo){
    if ($inputtype==0){#text input
      chomp($inputN=`wc -l ${testfile}.$splitpfx$idx | cut -d' ' -f1`);
    }
@ -522,10 +551,10 @@ sub check_translation(){
      print STDERR "Split ($idx) were not entirely translated\n";
      print STDERR "outputN=$outputN inputN=$inputN\n";
      print STDERR "outputfile=${testfile}.$splitpfx$idx.trans inputfile=${testfile}.$splitpfx$idx\n";
-      return 0;
+      push @failed,$idx;
    }
  }
-  return 1;
+  return @failed;
 }

 sub remove_temporary_files(){
--- a/scripts/recaser/recase.perl
+++ b/scripts/recaser/recase.perl
@ -0,0 +1,78 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Getopt::Long "GetOptions";
+
+my ($SRC,$INFILE,$RECASE_MODEL);
+my $MOSES = "moses";
+die("recase.perl --in file --model ini-file > out")
+    unless &GetOptions('in=s' => \$INFILE,
+                       'headline=s' => \$SRC,
+		       'moses=s' => \$MOSES,
+                       'model=s' => \$RECASE_MODEL)
+    && defined($INFILE)
+    && defined($RECASE_MODEL);
+
+# lowercase even in headline
+my %ALWAYS_LOWER;
+foreach ("a","after","against","al-.+","and","any","as","at","be","because","between","by","during","el-.+","for","from","his","in","is","its","last","not","of","off","on","than","the","their","this","to","was","were","which","will","with") { $ALWAYS_LOWER{$_} = 1; }
+
+# find out about the headlines
+my @HEADLINE;
+if (defined($SRC)) {
+    open(SRC,$SRC);
+    my $headline_flag = 0;
+    while(<SRC>) {
+	$headline_flag = 1 if /<hl>/;
+	$headline_flag = 0 if /<.hl>/;
+	next unless /^<seg/;
+	push @HEADLINE, $headline_flag;
+    }
+    close(SRC);
+}
+
+my $sentence = 0;
+my $infile = $INFILE;
+$infile =~ s/[\.\/]/_/g;
+open(MODEL,"$MOSES -f $RECASE_MODEL -i $INFILE -dl 1|");
+while(<MODEL>) {
+    chomp;
+    s/\s+$//;
+    my @WORD  = split(/ /);
+
+    # uppercase initial word
+    &uppercase(\$WORD[0]);
+
+    # uppercase after period
+    for(my $i=1;$i<scalar(@WORD);$i++) {
+	if ($WORD[$i-1] eq '.') {
+	    &uppercase(\$WORD[$i]);
+	}
+    }
+
+    # uppercase headlines {
+    if (defined($SRC) && $HEADLINE[$sentence]) {
+	foreach (@WORD) {
+	    &uppercase(\$_) unless $ALWAYS_LOWER{$_};
+	}	
+    }
+
+    # output
+    my $first = 1;
+    foreach (@WORD) {
+	print " " unless $first;
+	$first = 0;
+	print $_;
+    }
+    print "\n";
+    $sentence++;
+}
+close(MODEL);
+
+`rm -rf /tmp/filter.$infile`;
+
+sub uppercase {
+    my ($W) = @_;
+    substr($$W,0,1) =~ tr/a-z/A-Z/;
+    substr($$W,0,1) =~ tr/à-þ/À-Þ/;
+}
--- a/scripts/recaser/train-recaser.perl
+++ b/scripts/recaser/train-recaser.perl
@ -0,0 +1,98 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Getopt::Long "GetOptions";
+
+binmode(STDIN, ":utf8");
+binmode(STDOUT, ":utf8");
+
+# apply switches
+my ($DIR,$CORPUS,$SCRIPTS_ROOT_DIR,$CONFIG);
+my $NGRAM_COUNT = "ngram-count";
+my $TRAIN_SCRIPT = "train-factored-phrase-model.perl";
+my $MAX_LEN = 1;
+my $FIRST_STEP = 1;
+my $LAST_STEP = 11;
+die("train-recaser.perl --dir recaser --corpus cased")
+    unless &GetOptions('first-step=i' => \$FIRST_STEP,
+                       'last-step=i' => \$LAST_STEP,
+                       'corpus=s' => \$CORPUS,
+                       'config=s' => \$CONFIG,
+		       'dir=s' => \$DIR,
+		       'ngram-count=s' => \$NGRAM_COUNT,
+		       'train-script=s' => \$TRAIN_SCRIPT,
+		       'scripts-root-dir=s' => \$SCRIPTS_ROOT_DIR,
+		       'max-len=i' => \$MAX_LEN);
+
+# check and set default to unset parameters
+die("please specify working dir --dir") unless defined($DIR);
+die("please specify --corpus") if !defined($CORPUS) 
+                                  && $FIRST_STEP <= 2 && $LAST_STEP >= 1;
+
+# main loop
+`mkdir -p $DIR`;
+&truecase()           if 0 && $FIRST_STEP == 1;
+&train_lm()           if $FIRST_STEP <= 2;
+&prepare_data()       if $FIRST_STEP <= 3 && $LAST_STEP >= 3;
+&train_recase_model() if $FIRST_STEP <= 10 && $LAST_STEP >= 3;
+&cleanup()            if $LAST_STEP == 11;
+
+### subs ###
+
+sub truecase {
+    # to do
+}
+
+sub train_lm {
+    print STDERR "(2) Train language model on cased data @ ".`date`;
+    my $cmd = "$NGRAM_COUNT -text $CORPUS -lm $DIR/cased.srilm.gz -interpolate -kndiscount";
+    print STDERR $cmd."\n";
+    print STDERR `$cmd`;
+}
+
+sub prepare_data {
+    print STDERR "\n(3) Preparing data for training recasing model @ ".`date`;
+    open(CORPUS,$CORPUS);
+    open(CASED,">$DIR/aligned.cased");
+    print "$DIR/aligned.lowercased\n";
+    open(LOWERCASED,">$DIR/aligned.lowercased");
+    open(ALIGNMENT,">$DIR/aligned.a");
+    while(<CORPUS>) {
+	next if length($_)>2000;
+	s/\x{0}//g;
+	s/\|//g;
+	s/ +/ /g;
+	s/^ //;
+	s/ [\r\n]*$/\n/;
+	next if /^$/;
+	print CASED $_;
+	print LOWERCASED lc($_);
+	my $i=0;
+	foreach (split) {
+	    print ALIGNMENT "$i-$i ";
+	    $i++;
+	}
+	print ALIGNMENT "\n";
+    }
+    close(CORPUS);
+    close(CASED);
+    close(LOWERCASED);
+    close(ALIGNMENT);
+}
+
+sub train_recase_model {
+    my $first = $FIRST_STEP;
+    $first = 4 if $first < 4;
+    print STDERR "\n(4) Training recasing model @ ".`date`;
+    my $cmd = "$TRAIN_SCRIPT --root-dir $DIR --model-dir $DIR --first-step $first --alignment a --corpus $DIR/aligned --f lowercased --e cased --max-phrase-length $MAX_LEN --lm 0:3:$DIR/cased.srilm.gz:0";
+    $cmd .= " -scripts-root-dir $SCRIPTS_ROOT_DIR" if $SCRIPTS_ROOT_DIR;
+    print STDERR $cmd."\n";
+    print STDERR `$cmd`;
+}
+
+sub cleanup {
+    print STDERR "\n(11) Cleaning up @ ".`date`;
+    `rm -f $DIR/extract*`;
+    `rm -f $DIR/aligned*`;
+    `rm -f $DIR/lex*`;
+}
--- a/scripts/training/filter-and-binarize-model-given-input.pl
+++ b/scripts/training/filter-and-binarize-model-given-input.pl
@ -0,0 +1,224 @@
+#!/usr/bin/perl -w
+# Given a moses.ini file and an input text prepare minimized translation
+# tables and a new moses.ini, so that loading of tables is much faster.
+
+# original code by Philipp Koehn
+# changes by Ondrej Bojar
+
+use strict;
+
+my $MAX_LENGTH = 10;
+# consider phrases in input up to this length
+# in other words, all phrase-tables will be truncated at least to 10 words per
+# phrase
+
+my $binarizer = shift;
+my $dir = shift; 
+my $config = shift;
+my $input = shift;
+
+if (!defined $dir || !defined $config || !defined $input) {
+  print STDERR "usage: filter-model-given-input.pl targetdir moses.ini input.text\n";
+  exit 1;
+}
+
+$dir = ensure_full_path($dir);
+
+# buggy directory in place?
+if (-d $dir && ! -e "$dir/info") {
+    print STDERR "The directory $dir exists but does not belong to me. Delete $dir!\n";
+    exit(1);
+}
+
+# already filtered? check if it can be re-used
+if (-d $dir) {
+    my @INFO = `cat $dir/info`;
+    chop(@INFO);
+    if($INFO[0] ne $config 
+       || ($INFO[1] ne $input && 
+	   $INFO[1].".tagged" ne $input)) {
+      print STDERR "WARNING: directory exists but does not match parameters:\n";
+      print STDERR "  ($INFO[0] ne $config || $INFO[1] ne $input)\n";
+      exit 1;
+    }
+    print STDERR "The filtered model was ready in $dir, not doing anything.\n";
+    exit 0;
+}
+
+
+# filter the translation and distortion tables
+safesystem("mkdir -p $dir") or die "Can't mkdir $dir";
+
+# get tables to be filtered (and modify config file)
+my (@TABLE,@TABLE_WEIGHTS,@TABLE_FACTORS,@TABLE_NEW_NAME,%CONSIDER_FACTORS,%BINARIZABLE);
+open(INI_OUT,">$dir/moses.ini") or die "Can't write $dir/moses.ini";
+open(INI,$config) or die "Can't read $config";
+while(<INI>) {
+    print INI_OUT $_;
+    if (/ttable-file\]/) {
+        while(1) {	       
+    	my $table_spec = <INI>;
+    	if ($table_spec !~ /^([\d\,\-]+) ([\d\-]+) (\d+) (\S+)$/) {
+    	    print INI_OUT $table_spec;
+    	    last;
+    	}
+    	my ($source_factor,$t,$weights,$file) = ($1,$2,$3,$4);
+
+    	chomp($file);
+    	push @TABLE, $file;
+	push @TABLE_WEIGHTS,$weights;
+	$BINARIZABLE{$#TABLE}++;
+
+    	my $new_name = "$dir/phrase-table.$source_factor-$t";
+    	print INI_OUT "$source_factor $t $weights $new_name\n";
+    	push @TABLE_NEW_NAME,$new_name;
+
+    	$CONSIDER_FACTORS{$source_factor} = 1;
+        print STDERR "Considering factor $source_factor\n";
+    	push @TABLE_FACTORS, $source_factor;
+        }
+    }
+    elsif (/distortion-file/) {
+        while(1) {
+    	  my $table_spec = <INI>;
+    	  if ($table_spec !~ /^([\d\,\-]+) (\S+) (\d+) (\S+)$/) {
+    	      print INI_OUT $table_spec;
+    	      last;
+    	}
+    	my ($factors,$t,$weights,$file) = ($1,$2,$3,$4);
+	my $source_factor = $factors;
+	$source_factor =~ s/\-\d+$//;
+
+    	chomp($file);
+    	push @TABLE,$file;
+	push @TABLE_WEIGHTS,$weights;
+
+    	$file =~ s/^.*\/+([^\/]+)/$1/g;
+    	my $new_name = "$dir/$file";
+	$new_name =~ s/\.gz//;
+    	print INI_OUT "$factors $t $weights $new_name\n";
+    	push @TABLE_NEW_NAME,$new_name;
+
+    	$CONSIDER_FACTORS{$source_factor} = 1;
+        print STDERR "Considering factor $source_factor\n";
+    	push @TABLE_FACTORS,$source_factor;
+        }
+    }
+}
+close(INI);
+close(INI_OUT);
+
+
+# get the phrase pairs appearing in the input text, up to the $MAX_LENGTH
+my %PHRASE_USED;
+open(INPUT,$input) or die "Can't read $input";
+while(my $line = <INPUT>) {
+    chomp($line);
+    my @WORD = split(/ +/,$line);
+    for(my $i=0;$i<=$#WORD;$i++) {
+        for(my $j=0;$j<$MAX_LENGTH && $j+$i<=$#WORD;$j++) {
+    	foreach (keys %CONSIDER_FACTORS) {
+    	    my @FACTOR = split(/,/);
+    	    my $phrase = "";
+    	    for(my $k=$i;$k<=$i+$j;$k++) {
+    		my @WORD_FACTOR = split(/\|/,$WORD[$k]);
+    		for(my $f=0;$f<=$#FACTOR;$f++) {
+    		    $phrase .= $WORD_FACTOR[$FACTOR[$f]]."|";
+    		}
+    		chop($phrase);
+    		$phrase .= " ";
+    	    }
+    	    chop($phrase);
+    	    $PHRASE_USED{$_}{$phrase}++;
+    	}
+        }
+    }
+}
+close(INPUT);
+
+# filter files
+for(my $i=0;$i<=$#TABLE;$i++) {
+    my ($used,$total) = (0,0);
+    my $file = $TABLE[$i];
+    my $factors = $TABLE_FACTORS[$i];
+    my $new_file = $TABLE_NEW_NAME[$i];
+    print STDERR "filtering $file -> $new_file...\n";
+
+    my $openstring;
+    if ($file !~ /\.gz$/ && -e "$file.gz") {
+      $openstring = "zcat $file.gz |";
+    } elsif ($file =~ /\.gz$/) {
+      $openstring = "zcat $file |";
+    } else {
+      $openstring = "< $file";
+    }
+
+    open(FILE,$openstring) or die "Can't open '$openstring'";
+    open(FILE_OUT,">$new_file") or die "Can't write $new_file";
+
+    while(my $entry = <FILE>) {
+        my ($foreign,$rest) = split(/ \|\|\| /,$entry,2);
+        $foreign =~ s/ $//;
+        if (defined($PHRASE_USED{$factors}{$foreign})) {
+    	print FILE_OUT $entry;
+    	$used++;
+        }
+        $total++;
+    }
+    close(FILE);
+    close(FILE_OUT);
+    die "No phrases found in $file!" if $total == 0;
+    printf STDERR "$used of $total phrases pairs used (%.2f%s) - note: max length $MAX_LENGTH\n",(100*$used/$total),'%';
+    if ($BINARIZABLE{$i}) {
+	print STDERR "binarizing...";
+	my $cmd = "cat $new_file | sort | $binarizer -ttable 0 0 - -nscores $TABLE_WEIGHTS[$i] -out $new_file";
+	print STDERR $cmd."\n";
+	print STDERR `$cmd`;
+    }
+}
+
+open(INFO,">$dir/info");
+print INFO "$config\n$input\n";
+close(INFO);
+
+
+print "To run the decoder, please call:
+  moses -f $dir/moses.ini < $input\n";
+
+sub safesystem {
+  print STDERR "Executing: @_\n";
+  system(@_);
+  if ($? == -1) {
+      print STDERR "Failed to execute: @_\n  $!\n";
+      exit(1);
+  }
+  elsif ($? & 127) {
+      printf STDERR "Execution of: @_\n  died with signal %d, %s coredump\n",
+          ($? & 127),  ($? & 128) ? 'with' : 'without';
+      exit(1);
+  }
+  else {
+    my $exitcode = $? >> 8;
+    print STDERR "Exit code: $exitcode\n" if $exitcode;
+    return ! $exitcode;
+  }
+}
+sub ensure_full_path {
+    my $PATH = shift;
+    return $PATH if $PATH =~ /^\//;
+    my $dir = `pawd 2>/dev/null`;
+    if (!$dir) {$dir = `pwd`;}
+    chomp $dir;
+    $PATH = $dir."/".$PATH;
+    $PATH =~ s/[\r\n]//g;
+    $PATH =~ s/\/\.\//\//g;
+    $PATH =~ s/\/+/\//g;
+    my $sanity = 0;
+    while($PATH =~ /\/\.\.\// && $sanity++<10) {
+        $PATH =~ s/\/+/\//g;
+        $PATH =~ s/\/[^\/]+\/\.\.\//\//g;
+    }
+    $PATH =~ s/\/[^\/]+\/\.\.$//;
+    $PATH =~ s/\/+$//;
+    return $PATH;
+}
--- a/scripts/training/filter-model-given-input.pl
+++ b/scripts/training/filter-model-given-input.pl
@ -141,7 +141,9 @@ for(my $i=0;$i<=$#TABLE;$i++) {
    print STDERR "filtering $file -> $new_file...\n";

    my $openstring;
-    if ($file =~ /\.gz$/) {
+    if ($file !~ /\.gz$/ && -e "$file.gz") {
+      $openstring = "zcat $file.gz |";
+    } elsif ($file =~ /\.gz$/) {
      $openstring = "zcat $file |";
    } else {
      $openstring = "< $file";
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@ -9,6 +9,8 @@

 # Revision history

+# 13 Feb 2007 Better handling of default values for lambda, now works with multiple
+#             models and lexicalized reordering
 # 11 Oct 2006 Handle different input types through parameter --inputype=[0|1]
 #             (0 for text, 1 for confusion network, default is 0) (Nicola Bertoldi)
 # 10 Oct 2006 Allow skip of filtering of phrase tables (--no-filter-phrase-table)
@ -32,25 +34,38 @@
 # 13 Oct 2004 Use alternative decoders (DWC)
 # Original version by Philipp Koehn

+
+# for each _d_istortion, _l_anguage _m_odel, _t_ranslation _m_odel and _w_ord penalty, there is a list
+# of [ default value, lower bound, upper bound ]-triples. In most cases, only one triple is used,
+# but the translation model has currently 5 features
+
 # defaults for initial values and ranges are:
+
 my $default_triples = {
-  # for each _d_istortion, _l_anguage _m_odel, _t_ranslation _m_odel and _w_ord penalty, there is a list
-  # of [ default value, lower bound, upper bound ]-triples. In most cases, only one triple is used,
-  # but the translation model has currently 5 features
-  "d" => [ [ 1.0, 0.0, 2.0 ] ],
-  "lm" => [ [ 1.0, 0.0, 2.0 ] ],
-  "tm" => [
-            [ 0.3, 0.0, 0.5 ],
-            [ 0.2, 0.0, 0.5 ],
-            [ 0.3, 0.0, 0.5 ],
-            [ 0.2, 0.0, 0.5 ],
-            [ 0.0, -1.0, 1.0 ],
-	  ],
-  "g" => [
-           [ 1.0, 0.0, 2.0 ],
-           [ 1.0, 0.0, 2.0 ],
-         ],
-  "w" => [ [ 0.0, -1.0, 1.0 ] ],
+    # these two basic models exist even if not specified, they are
+    # not associated with any model file
+    "d" => [ [ 1.0, 0.0, 2.0 ] ],   # distance-based distortion
+    "w" => [ [ 0.0, -1.0, 1.0 ] ],  # word penalty
+};
+
+my $additional_triples = {
+    # if the more lambda parameters for the weights are needed
+    # (due to additional tables) use the following values for them
+    "d"  => [ [ 1.0, 0.0, 2.0 ],    # lexicalized reordering model
+	      [ 1.0, 0.0, 2.0 ],
+	      [ 1.0, 0.0, 2.0 ],
+	      [ 1.0, 0.0, 2.0 ],
+	      [ 1.0, 0.0, 2.0 ],
+	      [ 1.0, 0.0, 2.0 ],
+	      [ 1.0, 0.0, 2.0 ] ],
+    "lm" => [ [ 1.0, 0.0, 2.0 ] ],  # language model
+    "g"  => [ [ 1.0, 0.0, 2.0 ],    # generation model
+	      [ 1.0, 0.0, 2.0 ] ],
+    "tm" => [ [ 0.3, 0.0, 0.5 ],    # translation model
+	      [ 0.2, 0.0, 0.5 ],
+	      [ 0.3, 0.0, 0.5 ],
+	      [ 0.2, 0.0, 0.5 ],
+	      [ 0.0,-1.0, 1.0 ] ],  # ... last weight is phrase penalty
 };

 # moses.ini file uses FULL names for lambdas, while this training script internally (and on the command line)
@ -66,13 +81,10 @@ my $TABLECONFIG_ABBR_MAP = "ttable-file=tm lmodel-file=lm distortion-file=d gene
 my %TABLECONFIG2ABBR = map {split(/=/,$_,2)} split /\s+/, $TABLECONFIG_ABBR_MAP;

 # There are weights that do not correspond to any input file, they just increase the total number of lambdas we optimize
-my $extra_lambdas_for_model = {
-  "w" => 1,  # word penalty
-  "d" => 1,  # basic distortion
-};
-
-
-
+#my $extra_lambdas_for_model = {
+#  "w" => 1,  # word penalty
+#  "d" => 1,  # basic distortion
+#};

 my $minimum_required_change_in_weights = 0.00001;
    # stop if no lambda changes more than this
@ -218,7 +230,7 @@ if ($___INPUTTYPE == 1)
  %FULL2ABBR = map {my ($a, $b) = split/=/,$_,2; ($b, $a);} split /\s+/, $ABBR_FULL_MAP;

  push @{$default_triples -> {"I"}}, [ 1.0, 0.0, 2.0 ];
-  $extra_lambdas_for_model -> {"I"} = 1; #Confusion network posterior
+  #$extra_lambdas_for_model -> {"I"} = 1; #Confusion network posterior
 }

 # Check validity of input parameters and set defaults if needed
@ -230,9 +242,6 @@ if (!defined $SCRIPTS_ROOTDIR) {

 print STDERR "Using SCRIPTS_ROOTDIR: $SCRIPTS_ROOTDIR\n";

-
-
-
 # path of script for filtering phrase tables and running the decoder
 $filtercmd="$SCRIPTS_ROOTDIR/training/filter-model-given-input.pl" if !defined $filtercmd;

@ -250,8 +259,8 @@ $pythonpath = "$cmertdir/python" if !defined $pythonpath;

 $ENV{PYTHONPATH} = $pythonpath; # other scripts need to know

-
-die "Not executable: $filtercmd" if ! -x $filtercmd;
+my ($just_cmd_filtercmd,$x) = split(/ /,$filtercmd);
+die "Not executable: $just_cmd_filtercmd" if ! -x $just_cmd_filtercmd;
 die "Not executable: $cmertcmd" if ! -x $cmertcmd;
 die "Not executable: $moses_parallel_cmd" if defined $___JOBS && ! -x $moses_parallel_cmd;
 die "Not executable: $qsubwrapper" if defined $___JOBS && ! -x $qsubwrapper;
@ -298,13 +307,13 @@ $___CONFIG = $config_abs;

 # check validity of moses.ini and collect number of models and lambdas per model
 # need to make a copy of $extra_lambdas_for_model, scan_config spoils it
-my %copy_of_extra_lambdas_for_model = %$extra_lambdas_for_model;
-my ($lambdas_per_model, $models_used) = scan_config($___CONFIG, \%copy_of_extra_lambdas_for_model);
+#my %copy_of_extra_lambdas_for_model = %$extra_lambdas_for_model;
+my %used_triples = %{$default_triples};
+my ($models_used) = scan_config($___CONFIG);

-
-# Parse the lambda config string and convert it to a nice structure in the same format as $default_triples
-my $use_triples = undef;
+# Parse the lambda config string and convert it to a nice structure in the same format as $used_triples
 if (defined $___LAMBDA) {
+  my %specified_triples;
  # interpreting lambdas from command line
  foreach (split(/\s+/,$___LAMBDA)) {
      my ($name,$values) = split(/:/);
@ -314,43 +323,25 @@ if (defined $___LAMBDA) {
 	      my $start = $1;
 	      my $min = $2;
 	      my $max = $3;
-              push @{$use_triples->{$name}}, [$start, $min, $max];
+              push @{$specified_triples{$name}}, [$start, $min, $max];
 	  }
 	  else {
 	      die "Malformed feature range definition: $name => $startminmax\n";
 	  }
      } 
  }
-} else {
-  # no lambdas supplied, use the default ones, but do not forget to repeat them accordingly
-  # first for or inherent models
-  foreach my $name (keys %$extra_lambdas_for_model) {
-    foreach (1..$extra_lambdas_for_model->{$name}) {
-      die "No default weights defined for -$name"
-        if !defined $default_triples->{$name};
-      # XXX here was a deadly bug: we need a deep copy of the default values
-      my @copy = ();
-      foreach my $triple (@{$default_triples->{$name}}) {
-        my @copy_triple = @$triple;
-        push @copy, [ @copy_triple ];
-      }
-      push @{$use_triples->{$name}}, @copy;
-    }
+  # sanity checks for specified lambda triples
+  foreach my $name (keys %used_triples) {
+      die "No lambdas specified for '$name', but ".($used_triples{$name})." needed.\n"
+	  unless defined($specified_triples{$name});
+      die "Number of lambdas specified for '$name' (".($specified_triples{$name}).") does not match number needed (".($used_triples{$name}).")\n"
+	  if scalar $used_triples{$name} != scalar $specified_triples{$name};
  }
-  # and then for all models used
-  foreach my $name (keys %$models_used) {
-    foreach (1..$models_used->{$name}) {
-      die "No default weights defined for -$name"
-        if !defined $default_triples->{$name};
-      # XXX here was a deadly bug: we need a deep copy of the default values
-      my @copy = ();
-      foreach my $triple (@{$default_triples->{$name}}) {
-        my @copy_triple = @$triple;
-        push @copy, [ @copy_triple ];
-      }
-      push @{$use_triples->{$name}}, @copy;
-    }
+  foreach my $name (keys %specified_triples) {
+      die "Lambdas specified for '$name' ".($specified_triples{$name}).", but none needed.\n"
+	  unless defined($used_triples{$name});
  }
+  %used_triples = %specified_triples;
 }

 # moses should use our config
@ -363,24 +354,6 @@ if ($___DECODER_FLAGS =~ /(^|\s)-(config|f) /
  die "It is forbidden to supply any of -config, -ttable-file, -distortion-file, -generation-file or -lmodel-file in the --decoder-flags.\nPlease use only the --config option to give the config file that lists all the supplementary files.";
 }

-# walk through all lambdas the user wishes to optimize and check
-# if the number of lambdas matches
-foreach my $name (keys %$use_triples) {
-  my $expected_lambdas = $lambdas_per_model->{$name};
-  $expected_lambdas = 0 if !defined $expected_lambdas;
-  my $got_lambdas = defined $use_triples->{$name} ? scalar @{$use_triples->{$name}}  : 0;
-  if ($got_lambdas != $expected_lambdas) {
-    if ($allow_unknown_lambdas && $expected_lambdas == 0) {
-      print STDERR "Allowing to optimize $name, although I have no idea what it is.\n";
-    } else {
-      print STDERR "Wrong number of lambdas for $name. Expected (given the config file): $expected_lambdas, got: $got_lambdas.
-Use --allow-unknown-lambdas to optimize lambdas that you are just introducing
-and I cannot validate against the models mentioned in moses.ini.\n";
-      exit 1;
-    }
-  }
-}
-
 # as weights are normalized in the next steps (by cmert)
 # normalize initial LAMBDAs, too
 my $need_to_normalize = 1;
@ -399,6 +372,7 @@ my @order_of_lambdas_from_decoder = ();
 my $cwd = `pawd 2>/dev/null`; 
 if(!$cwd){$cwd = `pwd`;}
 chomp($cwd);
+
 safesystem("mkdir -p $___WORKING_DIR") or die "Can't mkdir $___WORKING_DIR";

 {
@ -440,12 +414,11 @@ if ($continue) {
  close IN;
  my @newweights = split /\s+/, $newweights;

-  # dump_triples($use_triples);
-  $use_triples = store_new_lambda_values($use_triples, \@order_of_lambdas_from_decoder, \@newweights);
-  # dump_triples($use_triples);
+  #dump_triples(\%used_triples);
+  store_new_lambda_values(\%used_triples, \@order_of_lambdas_from_decoder, \@newweights);
+  #dump_triples(\%used_triples);
 }

-
 if ($___FILTER_PHRASE_TABLE){
  # filter the phrase tables wih respect to input, use --decoder-flags
  print "filtering the phrase tables... ".`date`;
@ -480,13 +453,13 @@ while(1) {
  print "run $run start at ".`date`;

  # In case something dies later, we might wish to have a copy
-  create_config($___CONFIG, "./run$run.moses.ini", $use_triples, $run, (defined$devbleu?$devbleu:"--not-estimated--"));
+  create_config($___CONFIG, "./run$run.moses.ini", \%used_triples, $run, (defined$devbleu?$devbleu:"--not-estimated--"));


  # skip if the user wanted
  if (!$skip_decoder) {
      print "($run) run decoder to produce n-best lists\n";
-      @order_of_lambdas_from_decoder = run_decoder($use_triples, $PARAMETERS, $run, \@order_of_lambdas_from_decoder, $need_to_normalize);
+      @order_of_lambdas_from_decoder = run_decoder(\%used_triples, $PARAMETERS, $run, \@order_of_lambdas_from_decoder, $need_to_normalize);
      $need_to_normalize = 0;
      safesystem("gzip -f run*out") or die "Failed to gzip run*out";
  }
@ -566,8 +539,8 @@ while(1) {
    next if $visited{$name};
    $visited{$name} = 1;
    die "The decoder produced also some '$name' scores, but we do not know the ranges for them, no way to optimize them\n"
-      if !defined $use_triples->{$name};
-    foreach my $feature (@{$use_triples->{$name}}) {
+      if !defined $used_triples{$name};
+    foreach my $feature (@{$used_triples{$name}}) {
      my ($val, $min, $max) = @$feature;
      push @CURR, $val;
      push @MIN, $min;
@ -624,7 +597,7 @@ while(1) {
  my @newweights = split /\s+/, $bestpoint;

  # update my cache of lambda values
-  $use_triples = store_new_lambda_values($use_triples, \@order_of_lambdas_from_decoder, \@newweights);
+  store_new_lambda_values(\%used_triples, \@order_of_lambdas_from_decoder, \@newweights);

  ## additional stopping criterion: weights have not changed
  my $shouldstop = 1;
@ -653,7 +626,7 @@ print "Training finished at ".`date`;
 safesystem("cp init.opt run$run.init.opt") or die;
 safesystem ("cp cmert.log run$run.cmert.log") or die;

-create_config($___CONFIG, "./moses.ini", $use_triples, $run, $devbleu);
+create_config($___CONFIG, "./moses.ini", \%used_triples, $run, $devbleu);

 # just to be sure that we have the really last finished step marked
 open F, "> finished_step.txt" or die "Can't mark finished step";
@ -693,7 +666,6 @@ sub store_new_lambda_values {
    # print STDERR "Storing $i-th score as $name: $idx{$name}: $values->[$i]\n";
    $triples->{$name}->[$idx{$name}]->[0] = $values->[$i];
  }
-  return $triples;
 }

 sub dump_triples {
@ -820,7 +792,7 @@ sub create_config {
      delete($P{$abbr});
      delete($P{$ABBR2FULL{$abbr}});
      # Then feed P with the current values
-      foreach my $feature (@{$use_triples->{$abbr}}) {
+      foreach my $feature (@{$used_triples{$abbr}}) {
        my ($val, $min, $max) = @$feature;
        my $name = defined $ABBR2FULL{$abbr} ? $ABBR2FULL{$abbr} : $abbr;
        push @{$P{$name}}, $val;
@ -933,7 +905,6 @@ sub ensure_full_path {
 sub scan_config {
  my $ini = shift;
  my $inishortname = $ini; $inishortname =~ s/^.*\///; # for error reporting
-  my $lambda_counts = shift;
  # we get a pre-filled counts, because some lambdas are always needed (word penalty, for instance)
  # as we walk though the ini file, we record how many extra lambdas do we need
  # and finally, we report it
@ -978,12 +949,13 @@ sub scan_config {
      my @flds = split / +/;
      my $fn = $flds[$where_is_filename{$section}];
      if (defined $fn && $fn !~ /^\s+$/) {
+	  print "checking weight-count for $section\n";
        # this is a filename! check it
 	if ($fn !~ /^\//) {
 	  $error = 1;
 	  print STDERR "$inishortname:$nr:Filename not absolute: $fn\n";
 	}
-	if (! -s $fn) {
+	if (! -s $fn && ! -s "$fn.gz") {
 	  $error = 1;
 	  print STDERR "$inishortname:$nr:File does not exist or empty: $fn\n";
 	}
@ -996,12 +968,20 @@ sub scan_config {
        my $needlambdas = defined $where_is_lambda_count{$section} ? $flds[$where_is_lambda_count{$section}] : 1;

        print STDERR "Config needs $needlambdas lambdas for $section (i.e. $shortname)\n" if $verbose;
-	$lambda_counts->{$shortname}+=$needlambdas;
-        if (!defined $___LAMBDA && (!defined $default_triples->{$shortname} || scalar(@{$default_triples->{$shortname}}) != $needlambdas)) {
-          print STDERR "$inishortname:$nr:Your model $shortname needs $needlambdas weights but we define the default ranges for "
-            .scalar(@{$default_triples->{$shortname}})." weights. Cannot use the default, you must supply lambdas by hand.\n";
+        if (!defined $___LAMBDA && (!defined $additional_triples->{$shortname} || scalar(@{$additional_triples->{$shortname}}) < $needlambdas)) {
+          print STDERR "$inishortname:$nr:Your model $shortname needs $needlambdas weights but we define the default ranges for only "
+            .scalar(@{$additional_triples->{$shortname}})." weights. Cannot use the default, you must supply lambdas by hand.\n";
          $error = 1;
        }
+	else {
+	    # note: table may use less parameters than the maximum number
+	    # of triples
+	    for(my $lambda=0;$lambda<$needlambdas;$lambda++) {
+		my ($start, $min, $max) 
+		    = @{${$additional_triples->{$shortname}}[$lambda]};
+		push @{$used_triples{$shortname}}, [$start, $min, $max];
+	    }
+	}
        $defined_files{$shortname}++;
      }
    }
@ -1018,6 +998,6 @@ sub scan_config {
    }
  }
  exit(1) if $error;
-  return ($lambda_counts, \%defined_files);
+  return (\%defined_files);
 }

--- a/scripts/training/train-factored-phrase-model.perl
+++ b/scripts/training/train-factored-phrase-model.perl
@ -11,7 +11,7 @@ use Getopt::Long "GetOptions";
 # -----------------------------------------------------
 $ENV{"LC_ALL"} = "C";

-my($_ROOT_DIR,$_CORPUS_DIR,$_GIZA_E2F,$_GIZA_F2E,$_MODEL_DIR,$_CORPUS,$_CORPUS_COMPRESSION,$_FIRST_STEP,$_LAST_STEP,$_F,$_E,$_MAX_PHRASE_LENGTH,$_LEXICAL_DIR,$_NO_LEXICAL_WEIGHTING,$_VERBOSE,$_ALIGNMENT,@_LM,$_EXTRACT_FILE,$_GIZA_OPTION,$_HELP,$_PARTS,$_DIRECTION,$_ONLY_PRINT_GIZA,$_REORDERING,$_REORDERING_SMOOTH,$_ALIGNMENT_FACTORS,$_TRANSLATION_FACTORS,$_REORDERING_FACTORS,$_GENERATION_FACTORS,$_DECODING_STEPS,$_PARALLEL, $SCRIPTS_ROOTDIR, $_FACTOR_DELIMITER);
+my($_ROOT_DIR,$_CORPUS_DIR,$_GIZA_E2F,$_GIZA_F2E,$_MODEL_DIR,$_CORPUS,$_CORPUS_COMPRESSION,$_FIRST_STEP,$_LAST_STEP,$_F,$_E,$_MAX_PHRASE_LENGTH,$_LEXICAL_DIR,$_NO_LEXICAL_WEIGHTING,$_VERBOSE,$_ALIGNMENT,@_LM,$_EXTRACT_FILE,$_GIZA_OPTION,$_HELP,$_PARTS,$_DIRECTION,$_ONLY_PRINT_GIZA,$_REORDERING,$_REORDERING_SMOOTH,$_ALIGNMENT_FACTORS,$_TRANSLATION_FACTORS,$_REORDERING_FACTORS,$_GENERATION_FACTORS,$_DECODING_STEPS,$_PARALLEL, $SCRIPTS_ROOTDIR, $_FACTOR_DELIMITER,@_PHRASE_TABLE,@_REORDERING_TABLE,$_CONFIG);

 my $debug = 0; # debug this script, do not delete any files in debug mode

@ -56,6 +56,9 @@ $_HELP = 1
 		       'decoding-steps=s' => \$_DECODING_STEPS,
 		       'scripts-root-dir=s' => \$SCRIPTS_ROOTDIR,
                       'factor-delimiter=s' => \$_FACTOR_DELIMITER,
+		       'phrase-table=s' => \@_PHRASE_TABLE,
+		       'config=s' => \$_CONFIG,
+		       'reordering-table=s' => \@_REORDERING_TABLE,
                      );

 if ($_HELP) {
@ -147,6 +150,8 @@ $___MODEL_DIR = $_MODEL_DIR if $_MODEL_DIR;
 my $___EXTRACT_FILE = $___MODEL_DIR."/extract";
 $___EXTRACT_FILE = $_EXTRACT_FILE if $_EXTRACT_FILE;

+my $___CONFIG = $___ROOT_DIR."/model/moses.ini";
+$___CONFIG = $_CONFIG if $_CONFIG;

 my $___MAX_PHRASE_LENGTH = 7;
 my $___LEXICAL_WEIGHTING = 1;
@ -167,12 +172,14 @@ if ($___LAST_STEP == 9) {
  die "use --lm factor:order:filename to specify at least one language model"
    if scalar @_LM == 0;
  foreach my $lm (@_LM) {
-    my ($f, $order, $filename) = split /:/, $lm, 3;
+    my $type = 0; # default to srilm
+    my ($f, $order, $filename);
+    ($f, $order, $filename, $type) = split /:/, $lm, 4;
    die "Wrong format of --lm. Expected: --lm factor:order:filename"
      if $f !~ /^[0-9]+$/ || $order !~ /^[0-9]+$/ || !defined $filename;
    die "Language model file not found or empty: $filename"
      if ! -s $filename;
-    push @___LM, [ $f, $order, $filename ];
+    push @___LM, [ $f, $order, $filename, $type ];
  }
 }

@ -196,16 +203,17 @@ $___REORDERING_SMOOTH = $_REORDERING_SMOOTH if $_REORDERING_SMOOTH;
 my %REORDERING_MODEL;
 my $REORDERING_LEXICAL = 0; # flag for building lexicalized reordering models
 foreach my $r (split(/,/,$___REORDERING)) {
-    if (!( $r eq "orientation-f" ||
-         $r eq "orientation-fe" ||
-         $r eq "orientation-bidirectional-f" ||
-         $r eq "orientation-bidirectional-fe" ||
+    $r =~ s/orientation/msd/;
+    if (!( $r eq "msd-f" ||
+         $r eq "msd-fe" ||
+         $r eq "msd-bidirectional-f" ||
+         $r eq "msd-bidirectional-fe" ||
         $r eq "monotonicity-f" ||
         $r eq "monotonicity-fe" ||
         $r eq "monotonicity-bidirectional-f" ||
         $r eq "monotonicity-bidirectional-fe" ||
         $r eq "distance")) {
-       print STDERR "unknwown reordering type: $r";
+       print STDERR "unknown reordering type: $r";
       exit(1);
    }
    if ($r ne "distance") { $REORDERING_LEXICAL = 1; }
@ -225,11 +233,13 @@ $___ALIGNMENT_FACTORS = $_ALIGNMENT_FACTORS if defined($_ALIGNMENT_FACTORS);
 die("format for alignment factors is \"0-0\" or \"0,1,2-0,1\", you provided $___ALIGNMENT_FACTORS\n") if $___ALIGNMENT_FACTORS !~ /^\d+(\,\d+)*\-\d+(\,\d+)*$/;

 my $___TRANSLATION_FACTORS = undef;
+$___TRANSLATION_FACTORS = "0-0" unless defined($_DECODING_STEPS); # single factor default
 $___TRANSLATION_FACTORS = $_TRANSLATION_FACTORS if defined($_TRANSLATION_FACTORS);
 die("format for translation factors is \"0-0\" or \"0-0+1-1\" or \"0-0+0,1-0,1\", you provided $___TRANSLATION_FACTORS\n") 
  if defined $___TRANSLATION_FACTORS && $___TRANSLATION_FACTORS !~ /^\d+(\,\d+)*\-\d+(\,\d+)*(\+\d+(\,\d+)*\-\d+(\,\d+)*)*$/;

 my $___REORDERING_FACTORS = undef;
+$___REORDERING_FACTORS = "0-0" if defined($_REORDERING) && ! defined($_DECODING_STEPS); # single factor default
 $___REORDERING_FACTORS = $_REORDERING_FACTORS if defined($_REORDERING_FACTORS);
 die("format for reordering factors is \"0-0\" or \"0-0+1-1\" or \"0-0+0,1-0,1\", you provided $___REORDERING_FACTORS\n") 
  if defined $___REORDERING_FACTORS && $___REORDERING_FACTORS !~ /^\d+(\,\d+)*\-\d+(\,\d+)*(\+\d+(\,\d+)*\-\d+(\,\d+)*)*$/;
@ -239,10 +249,10 @@ $___GENERATION_FACTORS = $_GENERATION_FACTORS if defined($_GENERATION_FACTORS);
 die("format for generation factors is \"0-1\" or \"0-1+0-2\" or \"0-1+0,1-1,2\", you provided $___GENERATION_FACTORS\n") 
  if defined $___GENERATION_FACTORS && $___GENERATION_FACTORS !~ /^\d+(\,\d+)*\-\d+(\,\d+)*(\+\d+(\,\d+)*\-\d+(\,\d+)*)*$/;

-my $___DECODING_STEPS = $_DECODING_STEPS;
-die("use --decoding-steps to specify decoding steps") if ( !defined $_DECODING_STEPS && $___LAST_STEP>=9 && $___FIRST_STEP<=9);
+my $___DECODING_STEPS = "t0";
+$___DECODING_STEPS = $_DECODING_STEPS if defined($_DECODING_STEPS);
 die("format for decoding steps is \"t0,g0,t1,g1\", you provided $___DECODING_STEPS\n") 
-  if defined $___DECODING_STEPS && $___DECODING_STEPS !~ /^[tg]\d+(,[tg]\d+)*$/;
+  if defined $_DECODING_STEPS && $_DECODING_STEPS !~ /^[tg]\d+(,[tg]\d+)*$/;

 my ($factor,$factor_e,$factor_f);

@ -1029,14 +1039,14 @@ sub get_reordering {
    print STDERR "(7.2) building tables @ ".`date`;
    open(O,"$___EXTRACT_FILE.$factor.o.sorted")
      or die "Can't read $___EXTRACT_FILE.$factor.o.sorted";
-    open(OF,  "|gzip >$___MODEL_DIR/orientation-table.$factor.f.$___REORDERING_SMOOTH.gz") 
-	if defined($REORDERING_MODEL{"orientation-f"});
-    open(OFE, "|gzip >$___MODEL_DIR/orientation-table.$factor.fe.$___REORDERING_SMOOTH.gz") 
-	if defined($REORDERING_MODEL{"orientation-fe"});
-    open(OBF, "|gzip >$___MODEL_DIR/orientation-table.$factor.bi.f.$___REORDERING_SMOOTH.gz") 
-	if defined($REORDERING_MODEL{"orientation-bidirectional-f"});
-    open(OBFE,"|gzip >$___MODEL_DIR/orientation-table.$factor.bi.fe.$___REORDERING_SMOOTH.gz") 
-	if defined($REORDERING_MODEL{"orientation-bidirectional-fe"});
+    open(OF,  "|gzip >$___MODEL_DIR/msd-table.$factor.f.$___REORDERING_SMOOTH.gz") 
+	if defined($REORDERING_MODEL{"msd-f"});
+    open(OFE, "|gzip >$___MODEL_DIR/msd-table.$factor.fe.$___REORDERING_SMOOTH.gz") 
+	if defined($REORDERING_MODEL{"msd-fe"});
+    open(OBF, "|gzip >$___MODEL_DIR/msd-table.$factor.bi.f.$___REORDERING_SMOOTH.gz") 
+	if defined($REORDERING_MODEL{"msd-bidirectional-f"});
+    open(OBFE,"|gzip >$___MODEL_DIR/msd-table.$factor.bi.fe.$___REORDERING_SMOOTH.gz") 
+	if defined($REORDERING_MODEL{"msd-bidirectional-fe"});
    open(MF,  "|gzip >$___MODEL_DIR/monotonicity-table.$factor.f.$___REORDERING_SMOOTH.gz") 
 	if defined($REORDERING_MODEL{"monotonicity-f"});
    open(MFE, "|gzip >$___MODEL_DIR/monotonicity-table.$factor.fe.$___REORDERING_SMOOTH.gz") 
@ -1107,14 +1117,14 @@ sub get_reordering {
 sub store_reordering_f {
    my $total_previous_f = $mono_previous_f+$swap_previous_f+$other_previous_f;
    my $total_following_f = $mono_following_f+$swap_following_f+$other_following_f;
-    if(defined($REORDERING_MODEL{"orientation-f"})) {
+    if(defined($REORDERING_MODEL{"msd-f"})) {
 	printf OF ("%s ||| %.5f %.5f %.5f\n",
 		   $f_current, 
 		   $mono_previous_f/$total_previous_f,
 		   $swap_previous_f/$total_previous_f,
 		   $other_previous_f/$total_previous_f);
    }
-    if(defined($REORDERING_MODEL{"orientation-bidirectional-f"})) {
+    if(defined($REORDERING_MODEL{"msd-bidirectional-f"})) {
 	printf OBF ("%s ||| %.5f %.5f %.5f %.5f %.5f %.5f\n",
 		    $f_current, 
 		    $mono_previous_f/$total_previous_f,
@ -1144,14 +1154,14 @@ sub store_reordering_fe {
    my $total_previous_fe = $mono_previous_fe+$swap_previous_fe+$other_previous_fe;
    my $total_following_fe = $mono_following_fe+$swap_following_fe+$other_following_fe;
    
-    if(defined($REORDERING_MODEL{"orientation-fe"})) {
+    if(defined($REORDERING_MODEL{"msd-fe"})) {
 	printf OFE ("%s ||| %s ||| %.5f %.5f %.5f\n",
 		   $f_current, $e_current, 
 		   $mono_previous_fe/$total_previous_fe,
 		   $swap_previous_fe/$total_previous_fe,
 		   $other_previous_fe/$total_previous_fe);
    }
-    if(defined($REORDERING_MODEL{"orientation-bidirectional-fe"})) {
+    if(defined($REORDERING_MODEL{"msd-bidirectional-fe"})) {
 	printf OBFE ("%s ||| %s ||| %.5f %.5f %.5f %.5f %.5f %.5f\n",
 		    $f_current, $e_current, 
 		    $mono_previous_fe/$total_previous_fe,
@ -1257,12 +1267,13 @@ sub create_ini {
    &full_path(\$___MODEL_DIR);
    &full_path(\$___VCB_E);
    &full_path(\$___VCB_F);
-    open(INI,">$___MODEL_DIR/moses.ini") or die "Can't write $___MODEL_DIR/moses.ini";
+    `mkdir -p $___MODEL_DIR`;
+    open(INI,">$___CONFIG") or die("Can't write $___CONFIG");
    print INI "#########################
 ### MOSES CONFIG FILE ###
 #########################
 \n";
-
+    
    if (defined $___TRANSLATION_FACTORS) {
      print INI "# input factors\n";
      print INI "[input-factors]\n";
@ -1278,7 +1289,6 @@ sub create_ini {
      die "No translation steps defined, cannot prepare [input-factors] section\n";
    }

-
    my %stepsused;
    print INI "\n# mapping steps
 [mapping]\n";
@ -1292,11 +1302,14 @@ sub create_ini {
   print INI "\n# translation tables: source-factors, target-factors, number of scores, file 
 [ttable-file]\n";
   my $num_of_ttables = 0;
+   my @SPECIFIED_TABLE = @_PHRASE_TABLE;
   foreach my $f (split(/\+/,$___TRANSLATION_FACTORS)) {
     $num_of_ttables++;
     my $ff = $f;
     $ff =~ s/\-/ /;
-     print INI "$ff 5 $___MODEL_DIR/phrase-table.$f.gz\n";
+     my $file = "$___MODEL_DIR/phrase-table.$f.gz";
+     $file = shift @SPECIFIED_TABLE if scalar(@SPECIFIED_TABLE);
+     print INI "$ff 5 $file\n";
   }
   if ($num_of_ttables != $stepsused{"T"}) {
     print STDERR "WARNING: Your [mapping-steps] require translation steps up to id $stepsused{T} but you defined translation steps 0..$num_of_ttables\n";
@ -1326,12 +1339,11 @@ sub create_ini {
 print INI "\n# language models: type(srilm/irstlm), factors, order, file
 [lmodel-file]\n";
  foreach my $lm (@___LM) {
-    my ($f, $o, $fn) = @$lm;
-    my $type = 0; # default to srilm
+    my ($f, $o, $fn, $type) = @{$lm};
    print INI "$type $f $o $fn\n";
  }

-print INI "\n\n# limit on how many phrase translations e for each phrase f are loaded
+print INI "\n\n\# limit on how many phrase translations e for each phrase f are loaded
 # 0 = all elements loaded
 [ttable-limit]
 20\n";
@ -1341,8 +1353,10 @@ print INI "\n\n# limit on how many phrase translations e for each phrase f are l

  my $weight_d_count = 0;
  if ($___REORDERING ne "distance") {
-    my $file = "# distortion (reordering) files\n[distortion-file]\n";
+    my $file = "# distortion (reordering) files\n\[distortion-file]\n";
    my $factor_i = 0;
+ 
+    my @SPECIFIED_TABLE = @_REORDERING_TABLE;
    foreach my $factor (split(/\+/,$___REORDERING_FACTORS)) {
 	foreach my $r (keys %REORDERING_MODEL) {
 	    next if $r eq "fe" || $r eq "f";
@ -1350,23 +1364,24 @@ print INI "\n\n# limit on how many phrase translations e for each phrase f are l
 	    if ($r eq "distance") { $weight_d_count++; } 
 	    else {
 		my $type = $r;
-		$type =~ s/orientation/msd/;
-
 		$r =~ s/-bidirectional/.bi/;
 		$r =~ s/-f/.f/;
-		$r =~ s/orientation/orientation-table.$factor/;
+		$r =~ s/msd/msd-table.$factor/;
 		$r =~ s/monotonicity/monotonicity-table.$factor/;
 		
 		my $w;
-		if ($r =~ /orient/) { $w = 3; } else { $w = 1; }
+		if ($r =~ /msd/) { $w = 3; } else { $w = 1; }
 		if ($r =~ /bi/) { $w *= 2; }
 		$weight_d_count += $w;
-		$file .= "$factor $type $w $___MODEL_DIR/$r.$___REORDERING_SMOOTH.gz\n";
+
+                my $table_file = "$___MODEL_DIR/$r.$___REORDERING_SMOOTH.gz";
+		$table_file = shift @SPECIFIED_TABLE if scalar(@SPECIFIED_TABLE);
+		$file .= "$factor $type $w $table_file\n";
 	    }
 	}
        $factor_i++;
-    }
-    print INI $file."\n";
+      }
+      print INI $file."\n";
  }
  else {
    $weight_d_count = 1;