added some comments

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3911 1f5c12ca-751b-0410-a591-d2e778427230
2024-09-11 19:27:11 +03:00 · 2011-03-02 19:02:07 +00:00 · 2011-03-02 19:02:07 +00:00 · 4ee7e5f673
commit 4ee7e5f673
parent 106c4e0fc2
2 changed files with 133 additions and 38 deletions
--- a/moses-cmd/src/Main.cpp
+++ b/moses-cmd/src/Main.cpp
@ -50,6 +50,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 using namespace std;
 using namespace Moses;

+// output floats with three significant digits
 static const size_t PRECISION = 3;

 /** Enforce rounding */
@ -59,8 +60,10 @@ void fix(std::ostream& stream, size_t size)
  stream.precision(size);
 }

-/**
-  * Translates a sentence.
+/** Translates a sentence.
+  * - calls the search (Manager)
+  * - applies the decision rule
+  * - outputs best translation and additional reporting
  **/
 class TranslationTask : public Task
 {
@ -78,17 +81,29 @@ public:
    m_detailedTranslationCollector(detailedTranslationCollector),
    m_alignmentInfoCollector(alignmentInfoCollector) {}

+	/** Translate one sentence
+   * gets called by main function implemented at end of this source file */
  void Run() {
+
+    // report thread number
 #ifdef BOOST_HAS_PTHREADS
    TRACE_ERR("Translating line " << m_lineNumber << "  in thread id " << pthread_self() << std::endl);
 #endif
+
+    // shorthand for "global data"
    const StaticData &staticData = StaticData::Instance();
+    // input sentence
    Sentence sentence(Input);
+    // set translation system
    const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
+
+    // execute the translation
+    // note: this executes the search, resulting in a search graph
+    //       we still need to apply the decision rule (MAP, MBR, ...)
    Manager manager(*m_source,staticData.GetSearchAlgorithm(), &system);
    manager.ProcessSentence();

-    //Word Graph
+    // output word graph
    if (m_wordGraphCollector) {
      ostringstream out;
      fix(out,PRECISION);
@ -96,7 +111,7 @@ public:
      m_wordGraphCollector->Write(m_lineNumber, out.str());
    }

-    //Search Graph
+    // output search graph
    if (m_searchGraphCollector) {
      ostringstream out;
      fix(out,PRECISION);
@ -113,23 +128,23 @@ public:
        manager.SerializeSearchGraphPB(m_lineNumber, output);
      }
 #endif
-    }
-
-
+    }		

+    // apply decision rule and output best translation(s)
    if (m_outputCollector) {
      ostringstream out;
      ostringstream debug;
      fix(debug,PRECISION);

-      //All derivations - send them to debug stream
+      // all derivations - send them to debug stream
      if (staticData.PrintAllDerivations()) {
        manager.PrintAllDerivations(m_lineNumber, debug);
      }

-      //Best hypothesis
+      // MAP decoding: best hypothesis
      const Hypothesis* bestHypo = NULL;
-      if (!staticData.UseMBR()) {
+      if (!staticData.UseMBR()) 
+			{
        bestHypo = manager.GetBestHypothesis();
        if (bestHypo) {
          if (staticData.IsPathRecoveryEnabled()) {
@ -148,7 +163,12 @@ public:
          }
        }
        out << endl;
-      } else {
+			}
+
+      // MBR decoding (n-best MBR, lattice MBR, consensus)
+      else 
+			{
+        // we first need the n-best translations
        size_t nBestSize = staticData.GetMBRSize();
        if (nBestSize <= 0) {
          cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
@ -161,6 +181,7 @@ public:
          PrintUserTime("calculated n-best list for (L)MBR decoding");
        }

+        // lattice MBR
        if (staticData.UseLatticeMBR()) {
          if (m_nbestCollector) {
            //lattice mbr nbest
@ -179,7 +200,10 @@ public:
              PrintUserTime("finished Lattice MBR decoding");
            }
          }
-        } else if (staticData.UseConsensusDecoding()) {
+        }
+
+        // consensus decoding
+				else if (staticData.UseConsensusDecoding()) {
          const TrellisPath &conBestHypo = doConsensusDecoding(manager,nBestList);
          OutputBestHypo(conBestHypo, m_lineNumber,
                         staticData.GetReportSegmentation(),
@ -188,8 +212,10 @@ public:
          IFVERBOSE(2) {
            PrintUserTime("finished Consensus decoding");
          }
-        } else {
-          //MBR decoding
+				}
+				
+        // n-best MBR decoding
+        else {
          const Moses::TrellisPath &mbrBestHypo = doMBR(nBestList);
          OutputBestHypo(mbrBestHypo, m_lineNumber,
                         staticData.GetReportSegmentation(),
@ -198,11 +224,14 @@ public:
          IFVERBOSE(2) {
            PrintUserTime("finished MBR decoding");
          }
-
        }
      }
+
+      // report best translation to output collector
      m_outputCollector->Write(m_lineNumber,out.str(),debug.str());
    }
+
+    // output n-best list
    if (m_nbestCollector && !staticData.UseLatticeMBR()) {
      TrellisPathList nBestList;
      ostringstream out;
@ -211,7 +240,7 @@ public:
      m_nbestCollector->Write(m_lineNumber, out.str());
    }

-    //detailed translation reporting
+    // detailed translation reporting
    if (m_detailedTranslationCollector) {
      ostringstream out;
      fix(out,PRECISION);
@ -219,10 +248,10 @@ public:
      m_detailedTranslationCollector->Write(m_lineNumber,out.str());
    }

+    // report additional statistics
    IFVERBOSE(2) {
      PrintUserTime("Sentence Decoding Time:");
    }
-
    manager.CalcDecoderStatistics();
  }

@ -279,29 +308,36 @@ static void ShowWeights()
  }
 }

+/** main function of the command line version of the decoder **/
 int main(int argc, char** argv)
 {

 #ifdef HAVE_PROTOBUF
  GOOGLE_PROTOBUF_VERIFY_VERSION;
 #endif
+
+  // echo command line, if verbose
  IFVERBOSE(1) {
    TRACE_ERR("command: ");
    for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
    TRACE_ERR(endl);
  }

+  // set number of significant decimals in output
  fix(cout,PRECISION);
  fix(cerr,PRECISION);

-
+  // load all the settings into the Parameter class
+  // (stores them as strings, or array of strings)
  Parameter* params = new Parameter();
  if (!params->LoadParam(argc,argv)) {
    params->Explain();
    exit(1);
  }

-  //create threadpool, if necessary
+  // create threadpool, if using multi-threaded decoding
+  // note: multi-threading is done on sentence-level,
+  // each thread translates one sentence
  int threadcount = (params->GetParam("threads").size() > 0) ?
                    Scan<size_t>(params->GetParam("threads")[0]) : 1;

@ -318,20 +354,23 @@ int main(int argc, char** argv)
  }
 #endif

-
+  // initialize all "global" variables, which are stored in StaticData
+  // note: this also loads models such as the language model, etc.
  if (!StaticData::LoadDataStatic(params)) {
    exit(1);
  }

+  // setting "-show-weights" -> just dump out weights and exit
  if (params->isParamSpecified("show-weights")) {
    ShowWeights();
    exit(0);
  }

+  // shorthand for accessing information in StaticData
  const StaticData& staticData = StaticData::Instance();
+
  // set up read/writing class
  IOWrapper* ioWrapper = GetIODevice(staticData);
-
  if (!ioWrapper) {
    cerr << "Error; Failed to create IO object" << endl;
    exit(1);
@ -353,20 +392,20 @@ int main(int argc, char** argv)
    exit(1);
  }

-
-  InputType* source = NULL;
-  size_t lineCount = 0;
-  auto_ptr<OutputCollector> outputCollector;//for translations
-  auto_ptr<OutputCollector> nbestCollector;
+  // initialize output streams
+  // note: we can't just write to STDOUT or files
+  // because multithreading may return sentences in shuffled order
+  auto_ptr<OutputCollector> outputCollector; // for translations
+  auto_ptr<OutputCollector> nbestCollector;  // for n-best lists
  auto_ptr<ofstream> nbestOut;
  size_t nbestSize = staticData.GetNBestSize();
  string nbestFile = staticData.GetNBestFilePath();
  if (nbestSize) {
    if (nbestFile == "-" || nbestFile == "/dev/stdout") {
-      //nbest to stdout, no 1-best
+      // nbest to stdout, no 1-best
      nbestCollector.reset(new OutputCollector());
    } else {
-      //nbest to file, 1-best to stdout
+      // nbest to file, 1-best to stdout
      nbestOut.reset(new ofstream(nbestFile.c_str()));
      assert(nbestOut->good());
      nbestCollector.reset(new OutputCollector(nbestOut.get()));
@ -376,44 +415,57 @@ int main(int argc, char** argv)
    outputCollector.reset(new OutputCollector());
  }

+  // initialize stream for word graph (aka: output lattice)
  auto_ptr<OutputCollector> wordGraphCollector;
  if (staticData.GetOutputWordGraph()) {
    wordGraphCollector.reset(new OutputCollector(&(ioWrapper->GetOutputWordGraphStream())));
  }

+  // initialize stream for search graph
+  // note: this is essentially the same as above, but in a different format
  auto_ptr<OutputCollector> searchGraphCollector;
  if (staticData.GetOutputSearchGraph()) {
    searchGraphCollector.reset(new OutputCollector(&(ioWrapper->GetOutputSearchGraphStream())));
  }

+  // initialize stram for details about the decoder run
  auto_ptr<OutputCollector> detailedTranslationCollector;
  if (staticData.IsDetailedTranslationReportingEnabled()) {
    detailedTranslationCollector.reset(new OutputCollector(&(ioWrapper->GetDetailedTranslationReportingStream())));
  }
+
+  // initialize stram for word alignment between input and output
  auto_ptr<OutputCollector> alignmentInfoCollector;
  if (!staticData.GetAlignmentOutputFile().empty()) {
    alignmentInfoCollector.reset(new OutputCollector(ioWrapper->GetAlignmentOutputStream()));
  }

+  // main loop over set of input sentences
+  InputType* source = NULL;
+  size_t lineCount = 0;
  while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
    IFVERBOSE(1) {
      ResetUserTime();
    }
+    // set up task of translating one sentence
    TranslationTask* task =
      new TranslationTask(lineCount,source, outputCollector.get(),
                          nbestCollector.get(), wordGraphCollector.get(),
                          searchGraphCollector.get(),
                          detailedTranslationCollector.get(),
                          alignmentInfoCollector.get() );
+    // execute task
 #ifdef WITH_THREADS
    pool.Submit(task);
-
 #else
    task->Run();
 #endif
+
    source = NULL; //make sure it doesn't get deleted
    ++lineCount;
  }
+
+  // we are done, finishing up
 #ifdef WITH_THREADS
  pool.Stop(true); //flush remaining jobs
 #endif
--- a/moses/src/ChartRuleLookupManagerMemory.cpp
+++ b/moses/src/ChartRuleLookupManagerMemory.cpp
@ -73,21 +73,37 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(

  // MAIN LOOP. create list of nodes of target phrases

+  // get list of all rules that apply to spans at same starting position
  ProcessedRuleColl &processedRuleCol = *m_processedRuleColls[range.GetStartPos()];
  const ProcessedRuleList &runningNodes = processedRuleCol.GetRunningNodes();
-  // Note that runningNodes can be expanded as the loop runs (through calls to
-  // ExtendPartialRuleApplication()).
+
+  // loop through the rules
+  // (note that runningNodes can be expanded as the loop runs 
+  //  through calls to ExtendPartialRuleApplication())
  for (size_t ind = 0; ind < runningNodes.size(); ++ind) {
+    // rule we are about to extend
    const ProcessedRule &prevProcessedRule = *runningNodes[ind];
+    // note where it was found in the prefix tree of the rule dictionary
    const PhraseDictionaryNodeSCFG &prevNode = prevProcessedRule.GetLastNode();
+    // look up end position of the span it covers
    const WordConsumed *prevWordConsumed = prevProcessedRule.GetLastWordConsumed();
+    // we will now try to extend it, starting after where it ended
+    // (note: prevWordConsumed == NULL matches for the dummy rule 
+    //  at root of the prefix tree)
    size_t startPos = (prevWordConsumed == NULL) ? range.GetStartPos() : prevWordConsumed->GetWordsRange().GetEndPos() + 1;

    // search for terminal symbol
+    // (if only one more word position needs to be covered)
    if (startPos == absEndPos) {
+
+      // look up in rule dictionary, if the current rule can be extended
+      // with the source word in the last position
      const Word &sourceWord = GetSentence().GetWord(absEndPos);
      const PhraseDictionaryNodeSCFG *node = prevNode.GetChild(sourceWord);
+
+      // if we found a new rule -> create it and add it to the list
      if (node != NULL) {
+				// create the rule
 #ifdef USE_BOOST_POOL
        WordConsumed *newWordConsumed = m_wordConsumedPool.malloc();
        new (newWordConsumed) WordConsumed(absEndPos, absEndPos, sourceWord,
@ -107,20 +123,28 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(

    // search for non-terminals
    size_t endPos, stackInd;
+
+    // span is already complete covered? nothing can be done
    if (startPos > absEndPos)
      continue;
+
    else if (startPos == range.GetStartPos() && range.GetEndPos() > range.GetStartPos()) {
      // start.
      endPos = absEndPos - 1;
      stackInd = relEndPos;
-    } else {
+    } 
+    else 
+    {
      endPos = absEndPos;
      stackInd = relEndPos + 1;
    }

+    // we have to cover the remainder of the span
+    // source non-terminal labels for the remainder
    const NonTerminalSet &sourceNonTerms =
      GetSentence().GetLabelSet(startPos, endPos);

+    // target non-terminal labels for the remainder
    const NonTerminalSet &targetNonTerms =
      GetCellCollection().GetHeadwords(WordsRange(startPos, endPos));

@ -129,19 +153,22 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(
                                 targetNonTerms, processedRuleCol);
  }

-  // return list of target phrases
-  ProcessedRuleList &nodes = processedRuleCol.Get(relEndPos + 1);
+  // list of rules that that cover the entire span
+  ProcessedRuleList &rules = processedRuleCol.Get(relEndPos + 1);

+  // look up target sides for the rules
  size_t rulesLimit = StaticData::Instance().GetRuleLimit();
-  ProcessedRuleList::const_iterator iterNode;
-  for (iterNode = nodes.begin(); iterNode != nodes.end(); ++iterNode) {
-    const ProcessedRule &processedRule = **iterNode;
+  ProcessedRuleList::const_iterator iterRule;
+  for (iterRule = rules.begin(); iterRule != rules.end(); ++iterRule) {
+    const ProcessedRule &processedRule = **iterRule;
    const PhraseDictionaryNodeSCFG &node = processedRule.GetLastNode();
    const WordConsumed *wordConsumed = processedRule.GetLastWordConsumed();
    assert(wordConsumed);

+    // look up target sides
    const TargetPhraseCollection *targetPhraseCollection = node.GetTargetPhraseCollection();

+    // add the fully expanded rule (with lexical target side)
    if (targetPhraseCollection != NULL) {
      outColl.Add(*targetPhraseCollection, *wordConsumed, adhereTableLimit, rulesLimit);
    }
@ -182,19 +209,29 @@ void ChartRuleLookupManagerMemory::ExtendPartialRuleApplication(
  //      for each source and target NT in the span's sets.
  // We'll do whichever minimises the number of lookups:
  if (numCombinations <= numChildren*2) {
+
+		// loop over possible source non-terminal labels (as found in input tree)
    NonTerminalSet::const_iterator p = sourceNonTerms.begin();
    NonTerminalSet::const_iterator sEnd = sourceNonTerms.end();
    for (; p != sEnd; ++p) {
      const Word & sourceNonTerm = *p;
+
+      // loop over possible target non-terminal labels (as found in chart)
      NonTerminalSet::const_iterator q = targetNonTerms.begin();
      NonTerminalSet::const_iterator tEnd = targetNonTerms.end();
      for (; q != tEnd; ++q) {
        const Word & targetNonTerm = *q;
+
+        // try to match both source and target non-terminal
        const PhraseDictionaryNodeSCFG * child =
          node.GetChild(sourceNonTerm, targetNonTerm);
+
+        // nothing found? then we are done
        if (child == NULL) {
          continue;
        }
+
+        // create new rule
 #ifdef USE_BOOST_POOL
        WordConsumed *wc = m_wordConsumedPool.malloc();
        new (wc) WordConsumed(startPos, endPos, targetNonTerm,
@ -210,11 +247,15 @@ void ChartRuleLookupManagerMemory::ExtendPartialRuleApplication(
        processedRuleColl.Add(stackInd, rule);
      }
    }
-  } else {
+  } 
+  else 
+  {
+    // loop over possible expansions of the rule
    PhraseDictionaryNodeSCFG::NonTerminalMap::const_iterator p;
    PhraseDictionaryNodeSCFG::NonTerminalMap::const_iterator end =
      nonTermMap.end();
    for (p = nonTermMap.begin(); p != end; ++p) {
+      // does it match possible source and target non-terminals?
      const PhraseDictionaryNodeSCFG::NonTerminalMapKey & key = p->first;
      const Word & sourceNonTerm = key.first;
      if (sourceNonTerms.find(sourceNonTerm) == sourceNonTerms.end()) {
@ -224,6 +265,8 @@ void ChartRuleLookupManagerMemory::ExtendPartialRuleApplication(
      if (targetNonTerms.find(targetNonTerm) == targetNonTerms.end()) {
        continue;
      }
+
+      // create new rule
      const PhraseDictionaryNodeSCFG & child = p->second;
 #ifdef USE_BOOST_POOL
      WordConsumed *wc = m_wordConsumedPool.malloc();