Merge branch 'master' of https://github.com/moses-smt/mosesdecoder

2024-12-27 05:55:02 +03:00 · 2015-10-28 00:28:16 +00:00 · 2015-10-28 00:28:16 +00:00 · 951bebb037
commit 951bebb037
parent 143d05fa4b 9f28bf5619
14 changed files with 87 additions and 73 deletions
--- a/moses/BitmapContainer.h
+++ b/moses/BitmapContainer.h
@ -129,7 +129,7 @@ public:
        // Fallback: scoreA < scoreB == false, non-deterministic sort
        return false;
      }
-      return (phrA->Compare(*phrB) < 0);
+      return (phrA->Compare(*phrB) > 0);
    }
  }
 };
--- a/moses/Hypothesis.cpp
+++ b/moses/Hypothesis.cpp
@ -150,8 +150,8 @@ EvaluateWhenApplied(StatefulFeatureFunction const& sfff, int state_idx)
    // Manager& manager = this->GetManager(); //Get the manager and the ttask
    // ttasksptr const& ttask = manager.GetTtask();
    FFState const* prev = m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL;
-    m_ffStates[state_idx] 
-      = sfff.EvaluateWhenApplied(*this, prev, &m_currScoreBreakdown);
+    m_ffStates[state_idx]
+    = sfff.EvaluateWhenApplied(*this, prev, &m_currScoreBreakdown);
  }
 }

--- a/moses/LM/IRST.cpp
+++ b/moses/LM/IRST.cpp
@ -93,12 +93,12 @@ LanguageModelIRST::
  TRACE_ERR( "reset mmap\n");
  if (m_lmtb) m_lmtb->reset_mmap();
 #endif
-  
+
  delete m_lmtb;
 }


-bool 
+bool
 LanguageModelIRST::
 IsUseable(const FactorMask &mask) const
 {
@ -106,7 +106,7 @@ IsUseable(const FactorMask &mask) const
  return ret;
 }

-void 
+void
 LanguageModelIRST::
 Load()
 {
@ -135,7 +135,7 @@ Load()
  if (m_lmtb_dub > 0) m_lmtb->setlogOOVpenalty(m_lmtb_dub);
 }

-void 
+void
 LanguageModelIRST::
 CreateFactors(FactorCollection &factorCollection)
 {
@ -179,31 +179,31 @@ CreateFactors(FactorCollection &factorCollection)
  }
 }

-int 
+int
 LanguageModelIRST::
 GetLmID( const std::string &str ) const
 {
  return d->encode( str.c_str() ); // at the level of micro tags
 }

-int 
+int
 LanguageModelIRST::
 GetLmID( const Word &word ) const
 {
  return GetLmID( word.GetFactor(m_factorType) );
 }

-int 
+int
 LanguageModelIRST::
 GetLmID( const Factor *factor ) const
 {
  size_t factorId = factor->GetId();
-  
+
  if  ((factorId >= m_lmIdLookup.size()) || (m_lmIdLookup[factorId] == m_empty)) {
    if (d->incflag()==1) {
      std::string s = factor->GetString().as_string();
      int code = d->encode(s.c_str());
-      
+
      //////////
      ///poiche' non c'e' distinzione tra i factorIDs delle parole sorgenti
      ///e delle parole target in Moses, puo' accadere che una parola target
@ -231,7 +231,7 @@ GetLmID( const Factor *factor ) const
      /// ma si perde in efficienza nell'accesso perche' non e' piu' possibile quello random dei vettori
      /// a te la scelta!!!!
      ////////////////
-      
+

      if (factorId >= m_lmIdLookup.size()) {
        //resize and fill with m_empty
@ -242,7 +242,7 @@ GetLmID( const Factor *factor ) const
      //insert new code
      m_lmIdLookup[factorId] = code;
      return code;
-      
+
    } else {
      return m_unknownId;
    }
@ -251,7 +251,7 @@ GetLmID( const Factor *factor ) const
  }
 }

-FFState const* 
+FFState const*
 LanguageModelIRST::
 EmptyHypothesisState(const InputType &/*input*/) const
 {
@ -260,12 +260,12 @@ EmptyHypothesisState(const InputType &/*input*/) const
  return ret.release();
 }

-void 
+void
 LanguageModelIRST::
 CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
 {
-  bool isContextAdaptive 
-    = m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT;
+  bool isContextAdaptive
+  = m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT;

  fullScore = 0;
  ngramScore = 0;
@ -308,7 +308,7 @@ CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oov
      ++idx;
    }
 #ifdef IRSTLM_CONTEXT_DEPENDENT
-  }    
+  }
 #endif
  ngramScore = 0.0;
  int end_loop = (int) phrase.GetSize();
@ -334,20 +334,20 @@ CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oov
      ngramScore += m_lmtb->clprob(codes,idx,NULL,NULL,&msp);
    }
 #ifdef IRSTLM_CONTEXT_DEPENDENT
-  }    
-#endif  
+  }
+#endif
  before_boundary = TransformLMScore(before_boundary);
  ngramScore = TransformLMScore(ngramScore);
  fullScore = ngramScore + before_boundary;
 }

-FFState* 
+FFState*
 LanguageModelIRST::
-EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, 
+EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
                    ScoreComponentCollection *out) const
 {
-  bool isContextAdaptive 
-    = m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT;
+  bool isContextAdaptive
+  = m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT;

  if (!hypo.GetCurrTargetLength()) {
    std::auto_ptr<IRSTLMState> ret(new IRSTLMState(ps));
@ -387,18 +387,17 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
  position = (const int) begin+1;
  float score;
 #ifdef IRSTLM_CONTEXT_DEPENDENT
-  if (CW)
-    {
-      score = m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
-      while (position < adjust_end) {
-	for (idx=1; idx<m_lmtb_size; idx++) {
-	  codes[idx-1] = codes[idx];
-	}
-	codes[idx-1] =  GetLmID(hypo.GetWord(position));
-	score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
-	++position;
+  if (CW) {
+    score = m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
+    while (position < adjust_end) {
+      for (idx=1; idx<m_lmtb_size; idx++) {
+        codes[idx-1] = codes[idx];
      }
-    } else { 
+      codes[idx-1] =  GetLmID(hypo.GetWord(position));
+      score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
+      ++position;
+    }
+  } else {
 #endif
    score = m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
    position = (const int) begin+1;
@ -430,12 +429,12 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
      --idx;
    }
 #ifdef IRSTLM_CONTEXT_DEPENDENT
-    if (CW) score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp); 
+    if (CW) score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp);
    else
 #else
-      score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp); 
+    score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp);
 #endif
-  } else {
+    } else {
    // need to set the LM state

    if (adjust_end < end)   { //the LMstate of this target phrase refers to the last m_lmtb_size-1 words
@ -447,16 +446,16 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps,
      msp = (char *) m_lmtb->cmaxsuffptr(codes,m_lmtb_size);
    }
  }
-  
+
  score = TransformLMScore(score);
  out->PlusEquals(this, score);
-  
+
  std::auto_ptr<IRSTLMState> ret(new IRSTLMState(msp));
-  
+
  return ret.release();
 }

-LMResult 
+LMResult
 LanguageModelIRST::
 GetValue(const vector<const Word*> &contextFactor, State* finalState) const
 {
@ -494,7 +493,7 @@ GetValue(const vector<const Word*> &contextFactor, State* finalState) const
  return result;
 }

-bool 
+bool
 LMCacheCleanup(const int sentences_done, const size_t m_lmcache_cleanup_threshold)
 {
  if (sentences_done==-1) return true;
@ -512,7 +511,7 @@ void LanguageModelIRST::InitializeForInput(ttasksptr const& ttask)
 #endif
 }

-void 
+void
 LanguageModelIRST::
 CleanUpAfterSentenceProcessing(const InputType& source)
 {
@ -528,7 +527,7 @@ CleanUpAfterSentenceProcessing(const InputType& source)
  }
 }

-void 
+void
 LanguageModelIRST::
 SetParameter(const std::string& key, const std::string& value)
 {
@ -539,6 +538,6 @@ SetParameter(const std::string& key, const std::string& value)
  }
  m_lmtb_size = m_nGramOrder;
 }
-  
+
 }

--- a/moses/Phrase.h
+++ b/moses/Phrase.h
@ -56,7 +56,7 @@ public:
  /// return shared pointer to ttask
  //  only TargetPhrases have non-NULL ttaskptrs!
  virtual ttasksptr GetTtask() const {
-    return ttasksptr(); 
+    return ttasksptr();
  }

  /// check if this phrase belongs to a valid ttask
--- a/moses/SearchCubePruning.cpp
+++ b/moses/SearchCubePruning.cpp
@ -44,7 +44,7 @@ public:
        // Fallback: compare pointers, non-deterministic sort
        return A < B;
      }
-      return (phrA->Compare(*phrB) < 0);
+      return (phrA->Compare(*phrB) > 0);
    }
  }
 };
--- a/moses/Sentence.h
+++ b/moses/Sentence.h
@ -61,7 +61,7 @@ protected:
  void ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders);

  // "Document Level Translation" instructions, see aux_interpret_dlt
-  std::vector<std::map<std::string,std::string> > m_dlt_meta; 
+  std::vector<std::map<std::string,std::string> > m_dlt_meta;

 public:
  Sentence();
--- a/moses/Syntax/F2S/Manager-inl.h
+++ b/moses/Syntax/F2S/Manager-inl.h
@ -287,8 +287,8 @@ void Manager<RuleMatcher>::RecombineAndSort(
  // any 'duplicate' vertices are deleted.
 // TODO Set?
  typedef boost::unordered_map<SVertex *, SVertex *,
-                               SVertexRecombinationHasher,
-                               SVertexRecombinationEqualityPred> Map;
+          SVertexRecombinationHasher,
+          SVertexRecombinationEqualityPred> Map;
  Map map;
  for (std::vector<SHyperedge*>::const_iterator p = buffer.begin();
       p != buffer.end(); ++p) {
--- a/moses/Syntax/S2T/Manager-inl.h
+++ b/moses/Syntax/S2T/Manager-inl.h
@ -351,8 +351,8 @@ void Manager<Parser>::RecombineAndSort(const std::vector<SHyperedge*> &buffer,
  // any 'duplicate' vertices are deleted.
 // TODO Set?
  typedef boost::unordered_map<SVertex *, SVertex *,
-                               SVertexRecombinationHasher,
-                               SVertexRecombinationEqualityPred> Map;
+          SVertexRecombinationHasher,
+          SVertexRecombinationEqualityPred> Map;
  Map map;
  for (std::vector<SHyperedge*>::const_iterator p = buffer.begin();
       p != buffer.end(); ++p) {
--- a/moses/Syntax/SVertexRecombinationEqualityPred.h
+++ b/moses/Syntax/SVertexRecombinationEqualityPred.h
@ -11,7 +11,7 @@ namespace Syntax

 class SVertexRecombinationEqualityPred
 {
- public:
+public:
  bool operator()(const SVertex *v1, const SVertex *v2) const {
    assert(v1->states.size() == v2->states.size());
    for (std::size_t i = 0; i < v1->states.size(); ++i) {
--- a/moses/Syntax/SVertexRecombinationHasher.h
+++ b/moses/Syntax/SVertexRecombinationHasher.h
@ -11,7 +11,7 @@ namespace Syntax

 class SVertexRecombinationHasher
 {
- public:
+public:
  std::size_t operator()(const SVertex *v) const {
    std::size_t seed = 0;
    for (std::vector<FFState*>::const_iterator p = v->states.begin();
--- a/moses/Syntax/T2S/Manager-inl.h
+++ b/moses/Syntax/T2S/Manager-inl.h
@ -247,8 +247,8 @@ void Manager<RuleMatcher>::RecombineAndSort(
  // any 'duplicate' vertices are deleted.
 // TODO Set?
  typedef boost::unordered_map<SVertex *, SVertex *,
-                               SVertexRecombinationHasher,
-                               SVertexRecombinationEqualityPred> Map;
+          SVertexRecombinationHasher,
+          SVertexRecombinationEqualityPred> Map;
  Map map;
  for (std::vector<SHyperedge*>::const_iterator p = buffer.begin();
       p != buffer.end(); ++p) {
--- a/moses/TranslationModel/PhraseDictionaryGroup.cpp
+++ b/moses/TranslationModel/PhraseDictionaryGroup.cpp
@ -33,7 +33,8 @@ namespace Moses
 PhraseDictionaryGroup::PhraseDictionaryGroup(const string &line)
  : PhraseDictionary(line, true),
    m_numModels(0),
-    m_restrict(false)
+    m_restrict(false),
+    m_specifiedZeros(false)
 {
  ReadParameters();
 }
@ -45,6 +46,9 @@ void PhraseDictionaryGroup::SetParameter(const string& key, const string& value)
    m_numModels = m_memberPDStrs.size();
  } else if (key == "restrict") {
    m_restrict = Scan<bool>(value);
+  } else if (key == "zeros") {
+    m_specifiedZeros = true;
+    m_zeros = Scan<float>(Tokenize(value, ","));
  } else {
    PhraseDictionary::SetParameter(key, value);
  }
@ -67,10 +71,20 @@ void PhraseDictionaryGroup::Load()
      }
    }
    UTIL_THROW_IF2(!pdFound,
-                   "Could not find component phrase table " << pdName);
+                   "Could not find member phrase table " << pdName);
  }
  UTIL_THROW_IF2(componentWeights != m_numScoreComponents,
-                 "Total number of component model scores is unequal to specified number of scores");
+                 "Total number of member model scores is unequal to specified number of scores");
+
+  // Determine "zero" scores for features
+  if (m_specifiedZeros) {
+    UTIL_THROW_IF2(m_zeros.size() != m_numScoreComponents,
+                   "Number of specified zeros is unequal to number of member model scores");
+  } else {
+    // Default is all 0 (as opposed to e.g. -99 or similar to approximate log(0)
+    // or a smoothed "not in model" score)
+    m_zeros = vector<float>(m_numScoreComponents, 0);
+  }
 }

 void PhraseDictionaryGroup::GetTargetPhraseCollectionBatch(
@ -150,7 +164,7 @@ CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const
          phrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
          // Add phrase entry
          allPhrases.push_back(phrase);
-          allScores[targetPhrase] = vector<float>(m_numScoreComponents, 0);
+          allScores[targetPhrase] = vector<float>(m_zeros);
        }
        vector<float>& scores = allScores.find(targetPhrase)->second;

--- a/moses/TranslationModel/PhraseDictionaryGroup.h
+++ b/moses/TranslationModel/PhraseDictionaryGroup.h
@ -70,6 +70,8 @@ protected:
  std::vector<PhraseDictionary*> m_memberPDs;
  size_t m_numModels;
  bool m_restrict;
+  bool m_specifiedZeros;
+  std::vector<float> m_zeros;
  std::vector<FeatureFunction*> m_pdFeature;

  typedef std::vector<TargetPhraseCollection::shared_ptr > PhraseCache;
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@ -166,21 +166,20 @@ options() const
 }

 /// parse document-level translation info stored on the input
-void 
+void
 TranslationTask::
 interpret_dlt()
 {
  if (m_source->GetType() != SentenceInput) return;
  Sentence const& snt = static_cast<Sentence const&>(*m_source);
  typedef std::map<std::string,std::string> dltmap_t;
-  BOOST_FOREACH(dltmap_t const& M, snt.GetDltMeta())
-    {
-      dltmap_t::const_iterator i = M.find("type");
-      if (i == M.end() || i->second != "adaptive-lm") continue;
-      dltmap_t::const_iterator j = M.find("context-weights");
-      if (j == M.end()) continue;
-      SetContextWeights(j->second);
-    }
+  BOOST_FOREACH(dltmap_t const& M, snt.GetDltMeta()) {
+    dltmap_t::const_iterator i = M.find("type");
+    if (i == M.end() || i->second != "adaptive-lm") continue;
+    dltmap_t::const_iterator j = M.find("context-weights");
+    if (j == M.end()) continue;
+    SetContextWeights(j->second);
+  }
 }


@ -191,14 +190,14 @@ void TranslationTask::Run()
                 << " input and iowrapper.");

  const size_t translationId = m_source->GetTranslationId();
-  
+

  // report wall time spent on translation
  Timer translationTime;
  translationTime.start();

  interpret_dlt(); // parse document-level translation info stored on the input
-  
+
  // report thread number
 #if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS)
  VERBOSE(2, "Translating line " << translationId << "  in thread id " << pthread_self() << endl);