mosesdecoder/moses/TranslationModel/RuleTable/LoaderCompact.cpp

/***********************************************************************
 Moses - statistical machine translation system
 Copyright (C) 2006-2011 University of Edinburgh

 This library is free software; you can redistribute it and/or
 modify it under the terms of the GNU Lesser General Public
 License as published by the Free Software Foundation; either
 version 2.1 of the License, or (at your option) any later version.

 This library is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 Lesser General Public License for more details.

 You should have received a copy of the GNU Lesser General Public
 License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#include "LoaderCompact.h"

#include "moses/AlignmentInfoCollection.h"
#include "moses/InputFileStream.h"
#include "moses/UserMessage.h"
#include "moses/Util.h"
#include "moses/Word.h"
#include "Trie.h"

#include <istream>
#include <sstream>

namespace Moses
{

bool RuleTableLoaderCompact::Load(const std::vector<FactorType> &input,
                                  const std::vector<FactorType> &output,
                                  const std::string &inFile,
                                  size_t /* tableLimit */,
                                  RuleTableTrie &ruleTable)
{
  PrintUserTime("Start loading compact rule table");

  InputFileStream inStream(inFile);
  LineReader reader(inStream);

  // Read and check version number.
  reader.ReadLine();
  if (reader.m_line != "1") {
    std::stringstream msg;
    msg << "Unexpected compact rule table format: " << reader.m_line;
    UserMessage::Add(msg.str());
    return false;
  }

  // Load vocabulary.
  std::vector<Word> vocab;
  LoadVocabularySection(reader, input, vocab);

  // Load source phrases.
  std::vector<Phrase> sourcePhrases;
  std::vector<size_t> sourceLhsIds;
  LoadPhraseSection(reader, vocab, sourcePhrases, sourceLhsIds);

  // Load target phrases.
  std::vector<Phrase> targetPhrases;
  std::vector<size_t> targetLhsIds;
  LoadPhraseSection(reader, vocab, targetPhrases, targetLhsIds);

  // Load alignments.
  std::vector<const AlignmentInfo *> alignmentSets;
  LoadAlignmentSection(reader, alignmentSets, sourcePhrases);

  // Load rules.
  if (!LoadRuleSection(reader, vocab, sourcePhrases, targetPhrases,
                       targetLhsIds, alignmentSets,
                       ruleTable)) {
    return false;
  }

  // Sort and prune each target phrase collection.
  SortAndPrune(ruleTable);

  return true;
}

void RuleTableLoaderCompact::LoadVocabularySection(
  LineReader &reader,
  const std::vector<FactorType> &factorTypes,
  std::vector<Word> &vocabulary)
{
  // Read symbol count.
  reader.ReadLine();
  const size_t vocabSize = std::atoi(reader.m_line.c_str());

  // Read symbol lines and create Word objects.
  vocabulary.resize(vocabSize);
  for (size_t i = 0; i < vocabSize; ++i) {
    reader.ReadLine();
    const size_t len = reader.m_line.size();
    bool isNonTerm = (reader.m_line[0] == '[' && reader.m_line[len-1] == ']');
    if (isNonTerm) {
      reader.m_line = reader.m_line.substr(1, len-2);
    }
    vocabulary[i].CreateFromString(Input, factorTypes, reader.m_line, isNonTerm);
  }
}

void RuleTableLoaderCompact::LoadPhraseSection(
  LineReader &reader,
  const std::vector<Word> &vocab,
  std::vector<Phrase> &rhsPhrases,
  std::vector<size_t> &lhsIds)
{
  // Read phrase count.
  reader.ReadLine();
  const size_t phraseCount = std::atoi(reader.m_line.c_str());

  // Reads lines, storing Phrase object for each RHS and vocab ID for each LHS.
  rhsPhrases.resize(phraseCount, Phrase(0));
  lhsIds.resize(phraseCount);
  std::vector<size_t> tokenPositions;
  for (size_t i = 0; i < phraseCount; ++i) {
    reader.ReadLine();
    tokenPositions.clear();
    FindTokens(tokenPositions, reader.m_line);
    const char *charLine = reader.m_line.c_str();
    lhsIds[i] = std::atoi(charLine+tokenPositions[0]);
    for (size_t j = 1; j < tokenPositions.size(); ++j) {
      rhsPhrases[i].AddWord(vocab[std::atoi(charLine+tokenPositions[j])]);
    }
  }
}

void RuleTableLoaderCompact::LoadAlignmentSection(
  LineReader &reader, std::vector<const AlignmentInfo *> &alignmentSets, std::vector<Phrase> &sourcePhrases)
{
  // Read alignment set count.
  reader.ReadLine();
  const size_t alignmentSetCount = std::atoi(reader.m_line.c_str());

  alignmentSets.resize(alignmentSetCount * 2);
  AlignmentInfo::CollType alignTerm, alignNonTerm;
  std::vector<std::string> tokens;
  std::vector<size_t> points;
  for (size_t i = 0; i < alignmentSetCount; ++i) {
    // Read alignment set, lookup in collection, and store pointer.
    alignTerm.clear();
    alignNonTerm.clear();
    tokens.clear();

    reader.ReadLine();
    Tokenize(tokens, reader.m_line);
    std::vector<std::string>::const_iterator p;
    for (p = tokens.begin(); p != tokens.end(); ++p) {
      points.clear();
      Tokenize<size_t>(points, *p, "-");
      std::pair<size_t, size_t> alignmentPair(points[0], points[1]);

      if (sourcePhrases[i].GetWord(alignmentPair.first).IsNonTerminal()) {
        alignNonTerm.insert(alignmentPair);
      } else {
        alignTerm.insert(alignmentPair);
      }

    }
    alignmentSets[i*2] = AlignmentInfoCollection::Instance().Add(alignNonTerm);
    alignmentSets[i*2 + 1] = AlignmentInfoCollection::Instance().Add(alignTerm);
  }
}

bool RuleTableLoaderCompact::LoadRuleSection(
  LineReader &reader,
  const std::vector<Word> &vocab,
  const std::vector<Phrase> &sourcePhrases,
  const std::vector<Phrase> &targetPhrases,
  const std::vector<size_t> &targetLhsIds,
  const std::vector<const AlignmentInfo *> &alignmentSets,
  RuleTableTrie &ruleTable)
{
  // Read rule count.
  reader.ReadLine();
  const size_t ruleCount = std::atoi(reader.m_line.c_str());

  // Read rules and add to table.
  const size_t numScoreComponents = ruleTable.GetNumScoreComponents();
  std::vector<float> scoreVector(numScoreComponents);
  std::vector<size_t> tokenPositions;
  for (size_t i = 0; i < ruleCount; ++i) {
    reader.ReadLine();

    tokenPositions.clear();
    FindTokens(tokenPositions, reader.m_line);

    const char *charLine = reader.m_line.c_str();

    // The first three tokens are IDs for the source phrase, target phrase,
    // and alignment set.
    const int sourcePhraseId = std::atoi(charLine+tokenPositions[0]);
    const int targetPhraseId = std::atoi(charLine+tokenPositions[1]);
    const int alignmentSetId = std::atoi(charLine+tokenPositions[2]);

    const Phrase &sourcePhrase = sourcePhrases[sourcePhraseId];
    const Phrase &targetPhrasePhrase = targetPhrases[targetPhraseId];
    const Word *targetLhs = new Word(vocab[targetLhsIds[targetPhraseId]]);
    Word sourceLHS("X"); // TODO not implemented for compact
    const AlignmentInfo *alignNonTerm = alignmentSets[alignmentSetId];

    // Then there should be one score for each score component.
    for (size_t j = 0; j < numScoreComponents; ++j) {
      float score = std::atof(charLine+tokenPositions[3+j]);
      scoreVector[j] = FloorScore(TransformScore(score));
    }
    if (reader.m_line[tokenPositions[3+numScoreComponents]] != ':') {
      std::stringstream msg;
      msg << "Size of scoreVector != number ("
          << scoreVector.size() << "!=" << numScoreComponents
          << ") of score components on line " << reader.m_lineNum;
      UserMessage::Add(msg.str());
      return false;
    }

    // The remaining columns are currently ignored.

    // Create and score target phrase.
    TargetPhrase *targetPhrase = new TargetPhrase(targetPhrasePhrase, &ruleTable);
    targetPhrase->SetAlignNonTerm(alignNonTerm);
    targetPhrase->SetTargetLHS(targetLhs);

    targetPhrase->Evaluate(sourcePhrase, ruleTable.GetFeaturesToApply());

    // Insert rule into table.
    TargetPhraseCollection &coll = GetOrCreateTargetPhraseCollection(
                                     ruleTable, sourcePhrase, *targetPhrase, &sourceLHS);
    coll.Add(targetPhrase);
  }

  return true;
}

}
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`/***********************************************************************`
			`Moses - statistical machine translation system`
			`Copyright (C) 2006-2011 University of Edinburgh`
beautify 2013-05-29 21:16:15 +04:00
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`This library is free software; you can redistribute it and/or`
			`modify it under the terms of the GNU Lesser General Public`
			`License as published by the Free Software Foundation; either`
			`version 2.1 of the License, or (at your option) any later version.`
beautify 2013-05-29 21:16:15 +04:00
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`This library is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`Lesser General Public License for more details.`
beautify 2013-05-29 21:16:15 +04:00
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`You should have received a copy of the GNU Lesser General Public`
			`License along with this library; if not, write to the Free Software`
			`Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`***********************************************************************/`

move moses/src/* to moses/ 2012-11-12 23:56:18 +04:00			`#include "LoaderCompact.h"`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00
move moses/src/* to moses/ 2012-11-12 23:56:18 +04:00			`#include "moses/AlignmentInfoCollection.h"`
			`#include "moses/InputFileStream.h"`
			`#include "moses/UserMessage.h"`
			`#include "moses/Util.h"`
			`#include "moses/Word.h"`
			`#include "Trie.h"`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00
			`#include <istream>`
			`#include <sstream>`

			`namespace Moses`
			`{`

			`bool RuleTableLoaderCompact::Load(const std::vector<FactorType> &input,`
			`const std::vector<FactorType> &output,`
FilePieceify LoaderStandard. 2012-10-14 23:52:12 +04:00			`const std::string &inFile,`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`size_t /* tableLimit */,`
Add RuleTableTrie, an abstract base class of PhraseDictionarySCFG. 2012-01-24 00:41:49 +04:00			`RuleTableTrie &ruleTable)`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`{`
			`PrintUserTime("Start loading compact rule table");`

FilePieceify LoaderStandard. 2012-10-14 23:52:12 +04:00			`InputFileStream inStream(inFile);`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`LineReader reader(inStream);`

			`// Read and check version number.`
			`reader.ReadLine();`
			`if (reader.m_line != "1") {`
			`std::stringstream msg;`
			`msg << "Unexpected compact rule table format: " << reader.m_line;`
			`UserMessage::Add(msg.str());`
			`return false;`
			`}`

			`// Load vocabulary.`
			`std::vector<Word> vocab;`
			`LoadVocabularySection(reader, input, vocab);`

			`// Load source phrases.`
			`std::vector<Phrase> sourcePhrases;`
			`std::vector<size_t> sourceLhsIds;`
Remove direction from Phrase 2011-11-21 14:49:26 +04:00			`LoadPhraseSection(reader, vocab, sourcePhrases, sourceLhsIds);`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00
			`// Load target phrases.`
			`std::vector<Phrase> targetPhrases;`
			`std::vector<size_t> targetLhsIds;`
Remove direction from Phrase 2011-11-21 14:49:26 +04:00			`LoadPhraseSection(reader, vocab, targetPhrases, targetLhsIds);`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00
			`// Load alignments.`
			`std::vector<const AlignmentInfo *> alignmentSets;`
handle terminal alignments for hierarchical models 2012-04-19 22:08:06 +04:00			`LoadAlignmentSection(reader, alignmentSets, sourcePhrases);`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00
			`// Load rules.`
			`if (!LoadRuleSection(reader, vocab, sourcePhrases, targetPhrases,`
delete references to LMList 2013-05-21 16:00:26 +04:00			`targetLhsIds, alignmentSets,`
delete references to translation feature weights. Everything is done automatically via Evaluate() for each feature function 2013-05-21 17:00:28 +04:00			`ruleTable)) {`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`return false;`
			`}`

			`// Sort and prune each target phrase collection.`
			`SortAndPrune(ruleTable);`

			`return true;`
			`}`

			`void RuleTableLoaderCompact::LoadVocabularySection(`
beautify 2013-05-29 21:16:15 +04:00			`LineReader &reader,`
			`const std::vector<FactorType> &factorTypes,`
			`std::vector<Word> &vocabulary)`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`{`
			`// Read symbol count.`
			`reader.ReadLine();`
			`const size_t vocabSize = std::atoi(reader.m_line.c_str());`

			`// Read symbol lines and create Word objects.`
			`vocabulary.resize(vocabSize);`
			`for (size_t i = 0; i < vocabSize; ++i) {`
			`reader.ReadLine();`
			`const size_t len = reader.m_line.size();`
			`bool isNonTerm = (reader.m_line[0] == '[' && reader.m_line[len-1] == ']');`
			`if (isNonTerm) {`
			`reader.m_line = reader.m_line.substr(1, len-2);`
			`}`
			`vocabulary[i].CreateFromString(Input, factorTypes, reader.m_line, isNonTerm);`
			`}`
			`}`

			`void RuleTableLoaderCompact::LoadPhraseSection(`
beautify 2013-05-29 21:16:15 +04:00			`LineReader &reader,`
			`const std::vector<Word> &vocab,`
			`std::vector<Phrase> &rhsPhrases,`
			`std::vector<size_t> &lhsIds)`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`{`
			`// Read phrase count.`
			`reader.ReadLine();`
			`const size_t phraseCount = std::atoi(reader.m_line.c_str());`

			`// Reads lines, storing Phrase object for each RHS and vocab ID for each LHS.`
Remove direction from Phrase 2011-11-21 14:49:26 +04:00			`rhsPhrases.resize(phraseCount, Phrase(0));`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`lhsIds.resize(phraseCount);`
			`std::vector<size_t> tokenPositions;`
			`for (size_t i = 0; i < phraseCount; ++i) {`
			`reader.ReadLine();`
			`tokenPositions.clear();`
			`FindTokens(tokenPositions, reader.m_line);`
			`const char *charLine = reader.m_line.c_str();`
			`lhsIds[i] = std::atoi(charLine+tokenPositions[0]);`
			`for (size_t j = 1; j < tokenPositions.size(); ++j) {`
			`rhsPhrases[i].AddWord(vocab[std::atoi(charLine+tokenPositions[j])]);`
			`}`
			`}`
			`}`

			`void RuleTableLoaderCompact::LoadAlignmentSection(`
beautify 2013-05-29 21:16:15 +04:00			`LineReader &reader, std::vector<const AlignmentInfo *> &alignmentSets, std::vector<Phrase> &sourcePhrases)`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`{`
			`// Read alignment set count.`
			`reader.ReadLine();`
			`const size_t alignmentSetCount = std::atoi(reader.m_line.c_str());`

terminal and non-terminal word alignments stored in 2 separate objects 2012-10-19 18:10:10 +04:00			`alignmentSets.resize(alignmentSetCount * 2);`
			`AlignmentInfo::CollType alignTerm, alignNonTerm;`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`std::vector<std::string> tokens;`
			`std::vector<size_t> points;`
			`for (size_t i = 0; i < alignmentSetCount; ++i) {`
			`// Read alignment set, lookup in collection, and store pointer.`
beautify 2013-05-29 21:16:15 +04:00			`alignTerm.clear();`
			`alignNonTerm.clear();`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`tokens.clear();`
terminal and non-terminal word alignments stored in 2 separate objects 2012-10-19 18:10:10 +04:00
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`reader.ReadLine();`
			`Tokenize(tokens, reader.m_line);`
			`std::vector<std::string>::const_iterator p;`
			`for (p = tokens.begin(); p != tokens.end(); ++p) {`
			`points.clear();`
			`Tokenize<size_t>(points, *p, "-");`
			`std::pair<size_t, size_t> alignmentPair(points[0], points[1]);`
terminal and non-terminal word alignments stored in 2 separate objects 2012-10-19 18:10:10 +04:00
			`if (sourcePhrases[i].GetWord(alignmentPair.first).IsNonTerminal()) {`
beautify 2013-05-29 21:16:15 +04:00			`alignNonTerm.insert(alignmentPair);`
			`} else {`
			`alignTerm.insert(alignmentPair);`
terminal and non-terminal word alignments stored in 2 separate objects 2012-10-19 18:10:10 +04:00			`}`

Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`}`
terminal and non-terminal word alignments stored in 2 separate objects 2012-10-19 18:10:10 +04:00			`alignmentSets[i*2] = AlignmentInfoCollection::Instance().Add(alignNonTerm);`
			`alignmentSets[i*2 + 1] = AlignmentInfoCollection::Instance().Add(alignTerm);`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`}`
			`}`

			`bool RuleTableLoaderCompact::LoadRuleSection(`
beautify 2013-05-29 21:16:15 +04:00			`LineReader &reader,`
			`const std::vector<Word> &vocab,`
			`const std::vector<Phrase> &sourcePhrases,`
			`const std::vector<Phrase> &targetPhrases,`
			`const std::vector<size_t> &targetLhsIds,`
			`const std::vector<const AlignmentInfo *> &alignmentSets,`
			`RuleTableTrie &ruleTable)`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`{`
			`// Read rule count.`
			`reader.ReadLine();`
			`const size_t ruleCount = std::atoi(reader.m_line.c_str());`

			`// Read rules and add to table.`
change format for phrase model 2013-02-22 23:17:57 +04:00			`const size_t numScoreComponents = ruleTable.GetNumScoreComponents();`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`std::vector<float> scoreVector(numScoreComponents);`
			`std::vector<size_t> tokenPositions;`
			`for (size_t i = 0; i < ruleCount; ++i) {`
			`reader.ReadLine();`

			`tokenPositions.clear();`
			`FindTokens(tokenPositions, reader.m_line);`

			`const char *charLine = reader.m_line.c_str();`

			`// The first three tokens are IDs for the source phrase, target phrase,`
			`// and alignment set.`
			`const int sourcePhraseId = std::atoi(charLine+tokenPositions[0]);`
			`const int targetPhraseId = std::atoi(charLine+tokenPositions[1]);`
			`const int alignmentSetId = std::atoi(charLine+tokenPositions[2]);`

			`const Phrase &sourcePhrase = sourcePhrases[sourcePhraseId];`
			`const Phrase &targetPhrasePhrase = targetPhrases[targetPhraseId];`
Make lhsTarget a pointer 2013-05-22 14:22:17 +04:00			`const Word *targetLhs = new Word(vocab[targetLhsIds[targetPhraseId]]);`
fix bug for tree-to-string. Didn't check sourceLHS 2011-10-24 15:54:42 +04:00			`Word sourceLHS("X"); // TODO not implemented for compact`
terminal and non-terminal word alignments stored in 2 separate objects 2012-10-19 18:10:10 +04:00			`const AlignmentInfo *alignNonTerm = alignmentSets[alignmentSetId];`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00
			`// Then there should be one score for each score component.`
			`for (size_t j = 0; j < numScoreComponents; ++j) {`
			`float score = std::atof(charLine+tokenPositions[3+j]);`
			`scoreVector[j] = FloorScore(TransformScore(score));`
			`}`
			`if (reader.m_line[tokenPositions[3+numScoreComponents]] != ':') {`
			`std::stringstream msg;`
			`msg << "Size of scoreVector != number ("`
			`<< scoreVector.size() << "!=" << numScoreComponents`
			`<< ") of score components on line " << reader.m_lineNum;`
			`UserMessage::Add(msg.str());`
			`return false;`
			`}`

			`// The remaining columns are currently ignored.`

			`// Create and score target phrase.`
initialise m_container 2014-08-04 18:59:32 +04:00			`TargetPhrase *targetPhrase = new TargetPhrase(targetPhrasePhrase, &ruleTable);`
terminal and non-terminal word alignments stored in 2 separate objects 2012-10-19 18:10:10 +04:00			`targetPhrase->SetAlignNonTerm(alignNonTerm);`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`targetPhrase->SetTargetLHS(targetLhs);`

figure out which feature function to apply at which decode step. Book-keeping 2013-05-30 21:34:10 +04:00			`targetPhrase->Evaluate(sourcePhrase, ruleTable.GetFeaturesToApply());`
start framework for evaluating target phrase score 2013-05-02 20:07:03 +04:00
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`// Insert rule into table.`
			`TargetPhraseCollection &coll = GetOrCreateTargetPhraseCollection(`
beautify 2013-05-29 21:16:15 +04:00			`ruleTable, sourcePhrase, *targetPhrase, &sourceLHS);`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`coll.Add(targetPhrase);`
			`}`

			`return true;`
			`}`

			`}`