Merge ../mosesdecoder into perf_moses2

2024-09-11 11:25:40 +03:00 · 2016-06-17 12:47:51 +01:00 · 2016-06-17 12:47:51 +01:00 · 2a6e5777fb
commit 2a6e5777fb
parent e7ec90da58 bc5f8d15c6
11 changed files with 72 additions and 51 deletions
--- a/compile.sh
+++ b/compile.sh
@ -3,6 +3,6 @@
 # you can install all 3rd-party dependencies by running make -f contrib/Makefiles/install-dependencies.gmake

 set -e -o pipefail
-OPT=${OPT:-$(pwd)/OPT}
+OPT=${OPT:-$(pwd)/opt}
 ./bjam --with-irstlm=$OPT/irstlm-5.80.08 --with-boost=$OPT --with-cmph=$OPT --with-xmlrpc-c=$OPT --with-mm --with-probing-pt -j$(getconf _NPROCESSORS_ONLN) $@

--- a/moses/FF/VW/VW.cpp
+++ b/moses/FF/VW/VW.cpp
@ -36,7 +36,8 @@ VW::VW(const std::string &line)
  : StatefulFeatureFunction(1, line)
  , TLSTargetSentence(this)
  , m_train(false)
-  , m_sentenceStartWord(Word()) {
+  , m_sentenceStartWord(Word())
+{
  ReadParameters();
  Discriminative::ClassifierFactory *classifierFactory = m_train
      ? new Discriminative::ClassifierFactory(m_modelPath)
@ -65,7 +66,8 @@ VW::VW(const std::string &line)
    m_sentenceStartWord.SetFactor(i, bosFactor);
 }

-VW::~VW() {
+VW::~VW()
+{
  delete m_tlsClassifier;
  delete m_normalizer;
  // TODO delete more stuff
@ -75,7 +77,7 @@ FFState* VW::EvaluateWhenApplied(
  const Hypothesis& curHypo,
  const FFState* prevState,
  ScoreComponentCollection* accumulator) const
-{ 
+{
  VERBOSE(3, "VW :: Evaluating translation options\n");

  const VWState& prevVWState = *static_cast<const VWState *>(prevState);
@ -96,12 +98,12 @@ FFState* VW::EvaluateWhenApplied(
  // compute our current key
  size_t cacheKey = MakeCacheKey(prevState, spanStart, spanEnd);

-  boost::unordered_map<size_t, FloatHashMap> &computedStateExtensions 
-    = *m_tlsComputedStateExtensions->GetStored();
+  boost::unordered_map<size_t, FloatHashMap> &computedStateExtensions
+  = *m_tlsComputedStateExtensions->GetStored();

  if (computedStateExtensions.find(cacheKey) == computedStateExtensions.end()) {
    // we have not computed this set of translation options yet
-    const TranslationOptionList *topts = 
+    const TranslationOptionList *topts =
      curHypo.GetManager().getSntTranslationOptions()->GetTranslationOptionList(spanStart, spanEnd);

    const InputType& input = curHypo.GetManager().GetSource();
@ -178,17 +180,19 @@ FFState* VW::EvaluateWhenApplied(
  return new VWState(prevVWState, curHypo);
 }

-const FFState* VW::EmptyHypothesisState(const InputType &input) const {
+const FFState* VW::EmptyHypothesisState(const InputType &input) const
+{
  size_t maxContextSize = VWFeatureBase::GetMaximumContextSize(GetScoreProducerDescription());
  Phrase initialPhrase;
  for (size_t i = 0; i < maxContextSize; i++)
    initialPhrase.AddWord(m_sentenceStartWord);
-    
+
  return new VWState(initialPhrase);
 }

 void VW::EvaluateTranslationOptionListWithSourceContext(const InputType &input
-    , const TranslationOptionList &translationOptionList) const {
+    , const TranslationOptionList &translationOptionList) const
+{
  Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();

  if (translationOptionList.size() == 0)
@ -329,7 +333,7 @@ void VW::EvaluateTranslationOptionListWithSourceContext(const InputType &input
      // for future use at decoding time
      size_t toptHash = hash_value(*topt);
      m_tlsTranslationOptionFeatures->GetStored()->insert(
-          std::make_pair(toptHash, outFeaturesTargetNamespace));
+        std::make_pair(toptHash, outFeaturesTargetNamespace));

      // get classifier score
      losses[toptIdx] = classifier.Predict(MakeTargetLabel(targetPhrase));
@ -355,7 +359,7 @@ void VW::EvaluateTranslationOptionListWithSourceContext(const InputType &input
        // We have target context features => this is just a partial score,
        // do not add it to the score component collection.
        size_t toptHash = hash_value(*topt);
-        
+
        // Subtract the score contribution of target-only features, otherwise it would
        // be included twice.
        Discriminative::FeatureVector emptySource;
@ -372,7 +376,8 @@ void VW::EvaluateTranslationOptionListWithSourceContext(const InputType &input
  }
 }

-void VW::SetParameter(const std::string& key, const std::string& value) {
+void VW::SetParameter(const std::string& key, const std::string& value)
+{
  if (key == "train") {
    m_train = Scan<bool>(value);
  } else if (key == "path") {
@ -405,7 +410,8 @@ void VW::SetParameter(const std::string& key, const std::string& value) {
  }
 }

-void VW::InitializeForInput(ttasksptr const& ttask) {
+void VW::InitializeForInput(ttasksptr const& ttask)
+{
  // do not keep future cost estimates across sentences!
  m_tlsFutureScores->GetStored()->clear();

@ -457,7 +463,8 @@ void VW::InitializeForInput(ttasksptr const& ttask) {
 * private methods
 ************************************************************************************/

-const AlignmentInfo *VW::TransformAlignmentInfo(const Hypothesis &curHypo, size_t contextSize) const {
+const AlignmentInfo *VW::TransformAlignmentInfo(const Hypothesis &curHypo, size_t contextSize) const
+{
  std::set<std::pair<size_t, size_t> > alignmentPoints;
  const Hypothesis *contextHypo = curHypo.GetPrevHypo();
  int idxInContext = contextSize - 1;
@ -482,7 +489,8 @@ const AlignmentInfo *VW::TransformAlignmentInfo(const Hypothesis &curHypo, size_
  return AlignmentInfoCollection::Instance().Add(alignmentPoints);
 }

-AlignmentInfo VW::TransformAlignmentInfo(const AlignmentInfo &alignInfo, size_t contextSize, int currentStart) const {
+AlignmentInfo VW::TransformAlignmentInfo(const AlignmentInfo &alignInfo, size_t contextSize, int currentStart) const
+{
  std::set<std::pair<size_t, size_t> > alignmentPoints;
  for (int i = std::max(0, currentStart - (int)contextSize); i < currentStart; i++) {
    std::set<size_t> alignedToTgt = alignInfo.GetAlignmentsForTarget(i);
@ -493,7 +501,8 @@ AlignmentInfo VW::TransformAlignmentInfo(const AlignmentInfo &alignInfo, size_t
  return AlignmentInfo(alignmentPoints);
 }

-std::pair<bool, int> VW::IsCorrectTranslationOption(const TranslationOption &topt) const {
+std::pair<bool, int> VW::IsCorrectTranslationOption(const TranslationOption &topt) const
+{

  //std::cerr << topt.GetSourceWordsRange() << std::endl;

@ -565,7 +574,8 @@ std::pair<bool, int> VW::IsCorrectTranslationOption(const TranslationOption &top
  return std::make_pair(false, -1);
 }

-std::vector<bool> VW::LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const {
+std::vector<bool> VW::LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const
+{
  UTIL_THROW_IF2(m_leaveOneOut.size() == 0 || ! m_train, "LeaveOneOut called in wrong setting!");

  float sourceRawCount = 0.0;
--- a/moses/FF/VW/VW.h
+++ b/moses/FF/VW/VW.h
@ -104,13 +104,13 @@ public:
  // are written to a file, no classifier predictions take place. Target-side
  // context is constant at training time (we know the true target sentence),
  // so target-context features are extracted here as well.
-  virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input 
+  virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
      , const TranslationOptionList &translationOptionList) const;

  // Evaluate VW during decoding. This is only used at prediction time (not in training).
  // When no target-context features are defined, VW predictions were already fully calculated
  // in EvaluateTranslationOptionListWithSourceContext() and the scores were added to the model.
-  // If there are target-context features, we compute the context-dependent part of the 
+  // If there are target-context features, we compute the context-dependent part of the
  // classifier score and combine it with the source-context only partial score which was computed
  // in EvaluateTranslationOptionListWithSourceContext(). Various caches are used to make this
  // method more efficient.
@ -122,16 +122,16 @@ public:
  virtual FFState* EvaluateWhenApplied(
    const ChartHypothesis&,
    int,
-    ScoreComponentCollection* accumulator) const { 
-    throw new std::logic_error("hiearchical/syntax not supported"); 
+    ScoreComponentCollection* accumulator) const {
+    throw new std::logic_error("hiearchical/syntax not supported");
  }

  // Initial VW state; contains unaligned BOS symbols.
-  const FFState* EmptyHypothesisState(const InputType &input) const; 
+  const FFState* EmptyHypothesisState(const InputType &input) const;

  void SetParameter(const std::string& key, const std::string& value);

-  // At prediction time, this clears our caches. At training time, we load the next sentence, its 
+  // At prediction time, this clears our caches. At training time, we load the next sentence, its
  // translation and word alignment.
  virtual void InitializeForInput(ttasksptr const& ttask);

@ -181,7 +181,7 @@ private:

  // normalizer, typically this means softmax
  Discriminative::Normalizer *m_normalizer = NULL;
-  
+
  // thread-specific classifier instance
  TLSClassifier *m_tlsClassifier;

--- a/moses/FF/VW/VWFeatureBase.cpp
+++ b/moses/FF/VW/VWFeatureBase.cpp
@ -14,7 +14,8 @@ std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_targetFeatu
 std::map<std::string, size_t> VWFeatureBase::s_targetContextLength;


-void VWFeatureBase::UpdateContextSize(const std::string &usedBy) {
+void VWFeatureBase::UpdateContextSize(const std::string &usedBy)
+{
  // using the standard map behavior here: if the entry does not
  // exist, it will be added and initialized to zero
  size_t currentSize = s_targetContextLength[usedBy];
--- a/moses/FF/VW/VWFeatureBase.h
+++ b/moses/FF/VW/VWFeatureBase.h
@ -12,7 +12,7 @@
 namespace Moses
 {

-enum VWFeatureType { 
+enum VWFeatureType {
  vwft_source,
  vwft_target,
  vwft_targetContext
--- a/moses/FF/VW/VWFeatureContext.h
+++ b/moses/FF/VW/VWFeatureContext.h
@ -43,7 +43,7 @@ public:
    if (key == "size") {
      m_contextSize = Scan<size_t>(value);
    } else if (key == "factor-positions") {
-      // factor positions: assuming a factor such as positional morphological tag, use this 
+      // factor positions: assuming a factor such as positional morphological tag, use this
      // option to select only certain positions; this assumes that only a single
      // target-side factor is defined
      Tokenize<size_t>(m_factorPositions, value, ",");
@ -92,9 +92,9 @@ protected:

  // get source words aligned to a particular context word
  std::vector<std::string> GetAlignedSourceWords(const Phrase &contextPhrase
-                                          , const InputType &input
-                                          , const AlignmentInfo &alignInfo
-                                          , size_t posFromEnd) const {
+      , const InputType &input
+      , const AlignmentInfo &alignInfo
+      , size_t posFromEnd) const {
    size_t idx = contextPhrase.GetSize() - posFromEnd - 1;
    std::set<size_t> alignedToTarget = alignInfo.GetAlignmentsForTarget(idx);
    std::vector<std::string> out;
@ -108,7 +108,7 @@ protected:
  // required context size
  size_t m_contextSize;

-  // factor positions: assuming a factor such as positional morphological tag, use this 
+  // factor positions: assuming a factor such as positional morphological tag, use this
  // option to select only certain positions
  std::vector<size_t> m_factorPositions;
 };
--- a/moses/FF/VW/VWFeatureContextBigrams.h
+++ b/moses/FF/VW/VWFeatureContextBigrams.h
@ -25,8 +25,8 @@ public:
                          , Discriminative::Classifier &classifier
                          , Discriminative::FeatureVector &outFeatures) const {
    for (size_t i = 1; i < m_contextSize; i++)
-      outFeatures.push_back(classifier.AddLabelIndependentFeature("tcbigram^-" + SPrint(i + 1) 
-            + "^" + GetWord(contextPhrase, i - 1) + "^" + GetWord(contextPhrase, i)));
+      outFeatures.push_back(classifier.AddLabelIndependentFeature("tcbigram^-" + SPrint(i + 1)
+                            + "^" + GetWord(contextPhrase, i - 1) + "^" + GetWord(contextPhrase, i)));
  }

  virtual void SetParameter(const std::string& key, const std::string& value) {
--- a/moses/FF/VW/VWState.cpp
+++ b/moses/FF/VW/VWState.cpp
@ -9,24 +9,28 @@
 #include "moses/TranslationOption.h"
 #include <boost/functional/hash.hpp>

-namespace Moses {
+namespace Moses
+{

-VWState::VWState() : m_spanStart(0), m_spanEnd(0) {
+VWState::VWState() : m_spanStart(0), m_spanEnd(0)
+{
  ComputeHash();
 }

-VWState::VWState(const Phrase &phrase) 
-  : m_phrase(phrase), m_spanStart(0), m_spanEnd(0) {
+VWState::VWState(const Phrase &phrase)
+  : m_phrase(phrase), m_spanStart(0), m_spanEnd(0)
+{
  ComputeHash();
 }

-VWState::VWState(const VWState &prevState, const Hypothesis &curHypo) {
+VWState::VWState(const VWState &prevState, const Hypothesis &curHypo)
+{
  VERBOSE(3, "VW :: updating state\n>> previous state: " << prevState << "\n");

  // copy phrase from previous state
  Phrase phrase = prevState.GetPhrase();
  size_t contextSize = phrase.GetSize(); // identical to VWFeatureBase::GetMaximumContextSize()
-  
+
  // add words from current hypothesis
  phrase.Append(curHypo.GetCurrTargetPhrase());

@ -46,15 +50,17 @@ VWState::VWState(const VWState &prevState, const Hypothesis &curHypo) {
  VERBOSE(3, ">> updated state: " << *this << "\n");
 }

-bool VWState::operator==(const FFState& o) const {
+bool VWState::operator==(const FFState& o) const
+{
  const VWState &other = static_cast<const VWState &>(o);

  return m_phrase == other.GetPhrase()
-    && m_spanStart == other.GetSpanStart()
-    && m_spanEnd == other.GetSpanEnd();
+         && m_spanStart == other.GetSpanStart()
+         && m_spanEnd == other.GetSpanEnd();
 }

-void VWState::ComputeHash() {
+void VWState::ComputeHash()
+{
  m_hash = 0;

  boost::hash_combine(m_hash, m_phrase);
@ -62,7 +68,8 @@ void VWState::ComputeHash() {
  boost::hash_combine(m_hash, m_spanEnd);
 }

-std::ostream &operator<<(std::ostream &out, const VWState &state) {
+std::ostream &operator<<(std::ostream &out, const VWState &state)
+{
  out << state.GetPhrase() << "::" << state.GetSpanStart() << "-" << state.GetSpanEnd();
  return out;
 }
--- a/moses/FF/VW/VWState.h
+++ b/moses/FF/VW/VWState.h
@ -6,15 +6,17 @@
 #include "moses/Phrase.h"
 #include "moses/Hypothesis.h"

-namespace Moses {
+namespace Moses
+{

 /**
 * VW state, used in decoding (when target context is enabled).
 */
-class VWState : public FFState {
+class VWState : public FFState
+{
 public:
  // empty state, used only when VWState is ignored
-  VWState(); 
+  VWState();

  // used for construction of the initial VW state
  VWState(const Phrase &phrase);
--- a/moses/FF/VW/VWTargetSentence.h
+++ b/moses/FF/VW/VWTargetSentence.h
@ -13,7 +13,8 @@ namespace Moses
 /**
 * VW thread-specific data about target sentence.
 */
-class VWTargetSentence {
+class VWTargetSentence
+{
 public:
  VWTargetSentence() : m_sentence(NULL), m_alignment(NULL) {}

--- a/moses/ReorderingConstraint.cpp
+++ b/moses/ReorderingConstraint.cpp
@ -246,12 +246,12 @@ std::ostream& operator<<(std::ostream& out, const ReorderingConstraint &obj)

  out << "Walls:";
  for (size_t i = 0; i < obj.m_size; ++i) {
-      out << obj.m_wall[i];
+    out << obj.m_wall[i];
  }

  out << " Local walls:";
  for (size_t i = 0; i < obj.m_size; ++i) {
-      out << obj.m_localWall[i] << " ";
+    out << obj.m_localWall[i] << " ";
  }

  return out;