Merge branch 'master' of github.com:moses-smt/mosesdecoder

2024-12-27 14:05:29 +03:00 · 2013-07-27 04:19:16 +01:00 · 2013-07-27 04:19:16 +01:00 · abe90b5af7
commit abe90b5af7
parent 9dab7950fa 38e312f44c
13 changed files with 116 additions and 42 deletions
--- a/1
+++ b/1
@ -115,6 +115,7 @@ requirements += <os>MACOSX:<library>iconv ;
 project : requirements 
  <threading>multi:<define>WITH_THREADS
  <threading>multi:<library>boost_thread
+  <library>boost_system
  <define>_FILE_OFFSET_BITS=64 <define>_LARGE_FILES
  $(requirements)
  <include>.
--- a/jam-files/sanity.jam
+++ b/jam-files/sanity.jam
@ -143,7 +143,7 @@ rule boost-lib ( name macro : deps * ) {
    alias boost_$(name) : inner_boost_$(name) : <link>shared ;
    requirements += <define>BOOST_$(macro) ;
  } else {
-    alias boost_$(name) : inner_boost_$(name) : <link>static ;
+    alias boost_$(name) : inner_boost_$(name) : : : <link>shared:<define>BOOST_$(macro) ;
  }
 }

--- a/mira/Jamfile
+++ b/mira/Jamfile
@ -2,7 +2,7 @@ lib mira_lib :
 [ glob *.cpp : *Test.cpp Main.cpp ]
 ../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ;

-exe mira : Main.cpp mira_lib ; 
+exe mira : Main.cpp mira_lib ../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ; 

 alias programs : mira ;

--- a/moses/FF/ControlRecombination.cpp
+++ b/moses/FF/ControlRecombination.cpp
@ -4,11 +4,12 @@

 using namespace std;

-namespace Moses {
+namespace Moses
+{

 ControlRecombination::ControlRecombination(const std::string &line)
-:StatefulFeatureFunction("ControlRecombination", 0, line)
-,m_type(Output)
+  :StatefulFeatureFunction("ControlRecombination", 0, line)
+  ,m_type(Output)
 {
 }

@ -44,12 +45,12 @@ const FFState* ControlRecombination::EmptyHypothesisState(const InputType &input
 }

 ControlRecombinationState::ControlRecombinationState()
-:m_hypo(NULL)
+  :m_hypo(NULL)
 {
 }

 ControlRecombinationState::ControlRecombinationState(const Hypothesis *hypo)
-:m_hypo(hypo)
+  :m_hypo(hypo)
 {
 }

--- a/moses/FF/ControlRecombination.h
+++ b/moses/FF/ControlRecombination.h
@ -4,7 +4,8 @@
 #include "StatefulFeatureFunction.h"
 #include "moses/FF/FFState.h"

-namespace Moses {
+namespace Moses
+{

 class ControlRecombinationState;

@ -12,17 +13,16 @@ class ControlRecombinationState;
 class ControlRecombination : public StatefulFeatureFunction
 {
 public:
-	enum Type
-	{
-	  None,
-	  Output,
-	  Segmentation
-	};
+  enum Type {
+    None,
+    Output,
+    Segmentation
+  };

  ControlRecombination(const std::string &line);

  bool IsUseable(const FactorMask &mask) const {
-	  return true;
+    return true;
  }

  virtual FFState* Evaluate(
--- a/moses/LM/Rand.cpp
+++ b/moses/LM/Rand.cpp
@ -42,7 +42,8 @@ LanguageModelRandLM::LanguageModelRandLM(const std::string &line)
 {
 }

-LanguageModelRandLM::~LanguageModelRandLM() {
+LanguageModelRandLM::~LanguageModelRandLM()
+{
  delete m_lm;
 }

@ -100,7 +101,8 @@ randlm::WordID LanguageModelRandLM::GetLmID( const std::string &str ) const
  return m_lm->getWordID(str);
 }

-randlm::WordID LanguageModelRandLM::GetLmID( const Factor *factor ) const {
+randlm::WordID LanguageModelRandLM::GetLmID( const Factor *factor ) const
+{
  size_t factorId = factor->GetId();
  return ( factorId >= m_randlm_ids_vec.size()) ? m_oov_id : m_randlm_ids_vec[factorId];
 }
@ -127,10 +129,12 @@ LMResult LanguageModelRandLM::GetValue(const vector<const Word*> &contextFactor,
  return ret;
 }

-void LanguageModelRandLM::InitializeForInput(InputType const& source) {
+void LanguageModelRandLM::InitializeForInput(InputType const& source)
+{
  m_lm->initThreadSpecificData(); // Creates thread specific data iff                                    // compiled with multithreading.
 }
-void LanguageModelRandLM::CleanUpAfterSentenceProcessing(const InputType& source) {
+void LanguageModelRandLM::CleanUpAfterSentenceProcessing(const InputType& source)
+{
  m_lm->clearCaches(); // clear caches
 }

--- a/moses/LM/Rand.h
+++ b/moses/LM/Rand.h
@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

 namespace randlm
 {
- class RandLM;
+class RandLM;
 }

 namespace Moses
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@ -174,12 +174,12 @@ Parameter::Parameter()
  AddParam("lmodel-file", "DEPRECATED. DO NOT USE. location and properties of the language models");
  AddParam("lmodel-dub", "DEPRECATED. DO NOT USE. dictionary upper bounds of language models");

-  #ifdef HAVE_SYNLM
+#ifdef HAVE_SYNLM
  AddParam("slmodel-file", "DEPRECATED. DO NOT USE. location of the syntactic language model file(s)");
  AddParam("slmodel-factor", "DEPRECATED. DO NOT USE. factor to use with syntactic language model");
  AddParam("slmodel-beam", "DEPRECATED. DO NOT USE. beam width to use with syntactic language model's parser");
 #endif
-AddParam("ttable-file", "DEPRECATED. DO NOT USE. location and properties of the translation tables");
+  AddParam("ttable-file", "DEPRECATED. DO NOT USE. location and properties of the translation tables");
  AddParam("phrase-pair-feature", "DEPRECATED. DO NOT USE. Source and target factors for phrase pair feature");
  AddParam("phrase-boundary-source-feature", "DEPRECATED. DO NOT USE. Source factors for phrase boundary feature");
  AddParam("phrase-boundary-target-feature", "DEPRECATED. DO NOT USE. Target factors for phrase boundary feature");
@ -312,21 +312,21 @@ bool Parameter::LoadParam(int argc, char* argv[])

  // don't mix old and new format
  if ((isParamSpecified("feature") || isParamSpecified("weight"))
-	&& (isParamSpecified("weight-slm") || isParamSpecified("weight-bl") || isParamSpecified("weight-d") ||
-		isParamSpecified("weight-dlm") || isParamSpecified("weight-lrl") || isParamSpecified("weight-generation") ||
-		isParamSpecified("weight-i") || isParamSpecified("weight-l") || isParamSpecified("weight-lex") ||
-		isParamSpecified("weight-glm") || isParamSpecified("weight-wt") || isParamSpecified("weight-pp") ||
-		isParamSpecified("weight-pb") || isParamSpecified("weight-t") || isParamSpecified("weight-w") ||
-		isParamSpecified("weight-u") || isParamSpecified("weight-e") ||
-		isParamSpecified("dlm-mode") || isParamSpecified("generation-file") || isParamSpecified("global-lexical-file") ||
-		isParamSpecified("glm-feature") || isParamSpecified("lmodel-file") || isParamSpecified("lmodel-dub") ||
-		isParamSpecified("slmodel-file") || isParamSpecified("slmodel-factor") ||
-		isParamSpecified("slmodel-beam") || isParamSpecified("ttable-file") || isParamSpecified("phrase-pair-feature") ||
-		isParamSpecified("phrase-boundary-source-feature") || isParamSpecified("phrase-boundary-target-feature") || isParamSpecified("phrase-length-feature") ||
-		isParamSpecified("target-word-insertion-feature") || isParamSpecified("source-word-deletion-feature") || isParamSpecified("word-translation-feature")
-		)
-	 ) {
-	  UTIL_THROW(util::Exception, "Don't mix old and new ini file format");
+      && (isParamSpecified("weight-slm") || isParamSpecified("weight-bl") || isParamSpecified("weight-d") ||
+          isParamSpecified("weight-dlm") || isParamSpecified("weight-lrl") || isParamSpecified("weight-generation") ||
+          isParamSpecified("weight-i") || isParamSpecified("weight-l") || isParamSpecified("weight-lex") ||
+          isParamSpecified("weight-glm") || isParamSpecified("weight-wt") || isParamSpecified("weight-pp") ||
+          isParamSpecified("weight-pb") || isParamSpecified("weight-t") || isParamSpecified("weight-w") ||
+          isParamSpecified("weight-u") || isParamSpecified("weight-e") ||
+          isParamSpecified("dlm-mode") || isParamSpecified("generation-file") || isParamSpecified("global-lexical-file") ||
+          isParamSpecified("glm-feature") || isParamSpecified("lmodel-file") || isParamSpecified("lmodel-dub") ||
+          isParamSpecified("slmodel-file") || isParamSpecified("slmodel-factor") ||
+          isParamSpecified("slmodel-beam") || isParamSpecified("ttable-file") || isParamSpecified("phrase-pair-feature") ||
+          isParamSpecified("phrase-boundary-source-feature") || isParamSpecified("phrase-boundary-target-feature") || isParamSpecified("phrase-length-feature") ||
+          isParamSpecified("target-word-insertion-feature") || isParamSpecified("source-word-deletion-feature") || isParamSpecified("word-translation-feature")
+         )
+     ) {
+    UTIL_THROW(util::Exception, "Don't mix old and new ini file format");
  }

  // convert old weights args to new format
--- a/moses/TranslationModel/RuleTable/UTrie.h
+++ b/moses/TranslationModel/RuleTable/UTrie.h
@ -56,7 +56,7 @@ public:
      const ChartCellCollectionBase &);

 private:
-  const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &) const;
+//  const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &) const;

  TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
    const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
--- a/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@ -391,7 +391,6 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],

  // Workaround for extract-parallel issue.
  if (options.sentenceOffset > 0) {
-    options.glueGrammarFile.clear();
    options.unknownWordFile.clear();
  }
 }
--- a/scripts/generic/ph_numbers.perl
+++ b/scripts/generic/ph_numbers.perl
@ -21,7 +21,7 @@ sub run {
    }
    my $sourceLocale = $opts{s} || "";
    my $targetLocale = $opts{t} || "";
-    my $numberSymbol = $opts{m} || '@NUM@';
+    my $numberSymbol = $opts{m} || '@num@';
    while(<>) {
 	chomp;
 	print mark_numbers($_,$opts{c},$opts{l},$numberSymbol,$_),"\n";
@ -32,7 +32,7 @@ sub mark_numbers {
    my $input = shift;
    my $corpusMode = shift;
    my $legacyMode = shift;
-    my $numberSymbol = shift || '@NUM@';
+    my $numberSymbol = shift || '@num@';

    my $numref = recognize($input);
    my $input_length = length($input);
@ -46,7 +46,7 @@ sub mark_numbers {
 	}
 	my $number = substr($input,$numstart,$numend-$numstart);
 	if($corpusMode) {
-	    $output .= $number;
+      $output .= $numberSymbol;
 	}
 	else {
 	    if($legacyMode) {
--- a/scripts/other/beautify.perl
+++ b/scripts/other/beautify.perl
@ -28,6 +28,9 @@ sub Beautify($)
 		 next if ($name eq "util");
 		 next if ($name eq "lm");
 		 next if ($name eq "search");
+		 next if ($name eq "randlm");
+		 next if ($name eq "srilm");
+		 next if ($name eq "irstlm");

 		 $name = $path ."/" .$name;
 		 if (-d $name) {
--- a/scripts/other/convert-pt.perl
+++ b/scripts/other/convert-pt.perl
@ -0,0 +1,66 @@
+#!/usr/bin/perl
+
+# $Id$
+# convert a phrase-table with alignment in Moses' dead-end format
+#    a . ||| A . ||| (0) (0,1) ||| (0,1) (1) ||| 1 0.0626124 1 0.032119 2.718
+# to
+#    a . ||| A . ||| 1 0.0626124 1 0.032119 2.718 ||| 0-0 1-0 1-1
+
+
+use strict;
+use warnings;
+use Getopt::Long;
+use IO::File;
+use File::Basename;
+
+sub ConvertAlignment($);
+
+binmode(STDIN, ":utf8");
+binmode(STDOUT, ":utf8");
+binmode(STDERR, ":utf8");
+
+
+my $lineNum = 0;
+while (my $line = <STDIN>) {
+  chomp($line);
+	++$lineNum;
+
+  my @toks = split(/\|/, $line);
+  my $source = $toks[0];
+  my $target = $toks[3];
+  my $scores = $toks[12];
+
+  my $alignS = $toks[6];
+  my $align = ConvertAlignment($alignS);
+
+  print "$source|||$target|||$scores ||| $align\n";
+
+}
+
+sub ConvertAlignment($ $)
+{
+  my $ret = "";
+  my $alignS = shift;
+  $alignS =~ s/^\s+//;
+  $alignS =~ s/\s+$//;
+
+  #print "alignS=$alignS\n";
+
+  my @toks = split(/ /, $alignS);
+  for (my $posS = 0; $posS < scalar @toks; ++$posS) {
+    my $tok = $toks[$posS];
+    $tok = substr($tok, 1, length($tok) - 2);
+    #print "tok=$tok\n";
+
+    my @posTvec = split(/,/, $tok);
+    for (my $j = 0; $j < scalar @posTvec; ++$j) {
+      my $posT = $posTvec[$j];
+      $ret .= "$posS-$posT ";
+    }
+  }
+
+  #print "ret=$ret \n";
+  return $ret;
+}
+
+