redo creation of input paths for lattices. Should redo OOV handling as phrase-table

2024-10-26 11:28:48 +03:00 · 2014-05-01 15:10:16 +01:00 · 2014-05-01 15:10:16 +01:00 · d4b270e796
commit d4b270e796
parent 1465cf94b7
4 changed files with 58 additions and 58 deletions
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@ -102,6 +102,14 @@ void Manager::ProcessSentence()
  }
  m_transOptColl->CreateTranslationOptions();

+  for (size_t i = 0; i < m_transOptColl->GetInputPaths().size(); ++i) {
+	  const InputPath &path = *m_transOptColl->GetInputPaths()[i];
+
+	  if (path.GetTotalRuleSize()) {
+  	    cerr << "path=" << path << endl;
+	  }
+  }
+
  // some reporting on how long this took
  IFVERBOSE(1) {
    GetSentenceStats().StopTimeCollectOpts();
--- a/moses/PDTAimp.h
+++ b/moses/PDTAimp.h
@ -450,15 +450,11 @@ public:
            stack.back().src=newSrc;
          }

-          std::cerr << "newSrc=" << newSrc << std::endl;
-
          std::vector<StringTgtCand> tcands;
          // now, look up the target candidates (aprx. TargetPhraseCollection) for
          // the current path through the CN
          m_dict->GetTargetCandidates(nextP,tcands);

-          std::cerr << "tcands=" << tcands.size() << std::endl;
-
          if(newRange.second>=exploredPaths.size()+newRange.first)
            exploredPaths.resize(newRange.second-newRange.first+1,0);
          ++exploredPaths[newRange.second-newRange.first];
--- a/moses/TranslationOptionCollectionLattice.cpp
+++ b/moses/TranslationOptionCollectionLattice.cpp
@ -58,63 +58,58 @@ TranslationOptionCollectionLattice::TranslationOptionCollectionLattice(

      path->SetNextNode(nextNode);
      m_inputPathQueue.push_back(path);
+
+      // recursive
+      Extend(*path, input);
+
    }
  }
-
-  // iteratively extend all paths
-    for (size_t endPos = 1; endPos < size; ++endPos) {
-      const std::vector<size_t> &nextNodes = input.GetNextNodes(endPos);
-
-      // loop thru every previous paths
-      size_t numPrevPaths = m_inputPathQueue.size();
-
-      for (size_t i = 0; i < numPrevPaths; ++i) {
-        //for (size_t pathInd = 0; pathInd < prevPaths.size(); ++pathInd) {
-        const InputPath &prevPath = *m_inputPathQueue[i];
-
-        size_t nextNode = prevPath.GetNextNode();
-        if (prevPath.GetWordsRange().GetEndPos() + nextNode != endPos) {
-        	continue;
-        }
-
-        size_t startPos = prevPath.GetWordsRange().GetStartPos();
-
-        if (endPos - startPos + 1 > maxPhraseLength) {
-        	continue;
-        }
-
-        WordsRange range(startPos, endPos);
-        const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
-
-        const Phrase &prevPhrase = prevPath.GetPhrase();
-        const ScorePair *prevInputScore = prevPath.GetInputScore();
-        UTIL_THROW_IF2(prevInputScore == NULL,
-        		"Null previous score");
-
-        // loop thru every word at this position
-        const ConfusionNet::Column &col = input.GetColumn(endPos);
-
-        for (size_t i = 0; i < col.size(); ++i) {
-          const Word &word = col[i].first;
-          Phrase subphrase(prevPhrase);
-          subphrase.AddWord(word);
-
-          const ScorePair &scores = col[i].second;
-          ScorePair *inputScore = new ScorePair(*prevInputScore);
-          inputScore->PlusEquals(scores);
-
-          InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore);
-
-          size_t nextNode = nextNodes[i];
-          path->SetNextNode(nextNode);
-
-          m_inputPathQueue.push_back(path);
-        } // for (size_t i = 0; i < col.size(); ++i) {
-
-      } // for (size_t i = 0; i < numPrevPaths; ++i) {
-    }
 }

+void TranslationOptionCollectionLattice::Extend(const InputPath &prevPath, const WordLattice &input)
+{
+	size_t nextPos = prevPath.GetWordsRange().GetEndPos() + 1;
+	if (nextPos >= input.GetSize()) {
+		return;
+	}
+
+	size_t startPos = prevPath.GetWordsRange().GetStartPos();
+    const Phrase &prevPhrase = prevPath.GetPhrase();
+    const ScorePair *prevInputScore = prevPath.GetInputScore();
+    UTIL_THROW_IF2(prevInputScore == NULL,
+    		"Null previous score");
+
+
+	const std::vector<size_t> &nextNodes = input.GetNextNodes(nextPos);
+
+    const ConfusionNet::Column &col = input.GetColumn(nextPos);
+    for (size_t i = 0; i < col.size(); ++i) {
+      const Word &word = col[i].first;
+      UTIL_THROW_IF2(word.IsEpsilon(), "Epsilon not supported");
+
+      size_t nextNode = nextNodes[i];
+      size_t endPos = nextPos + nextNode - 1;
+
+      WordsRange range(startPos, endPos);
+      const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
+
+      Phrase subphrase(prevPhrase);
+      subphrase.AddWord(word);
+
+      const ScorePair &scores = col[i].second;
+      ScorePair *inputScore = new ScorePair(*prevInputScore);
+      inputScore->PlusEquals(scores);
+
+      InputPath *path = new InputPath(subphrase, labels, range, &prevPath, inputScore);
+
+      path->SetNextNode(nextNode);
+      m_inputPathQueue.push_back(path);
+
+      // recursive
+      Extend(*path, input);
+
+    }
+}

 void TranslationOptionCollectionLattice::CreateTranslationOptions()
 {
@ -148,7 +143,7 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
    }
    else if (path.GetPhrase().GetSize() == 1) {
    	// unknown word processing
-    	ProcessOneUnknownWord(path, path.GetWordsRange().GetEndPos(), 1, path.GetInputScore());
+    	ProcessOneUnknownWord(path, path.GetWordsRange().GetStartPos(),  path.GetWordsRange().GetNumWordsCovered() , path.GetInputScore());
    }
  }

--- a/moses/TranslationOptionCollectionLattice.h
+++ b/moses/TranslationOptionCollectionLattice.h
@ -33,6 +33,7 @@ public:
      , size_t graphInd); // do not implement

 protected:
+  void Extend(const InputPath &prevPath, const WordLattice &input);

 };