mosesdecoder/moses/TranslationModel/RuleTable/LoaderCompact.h

/***********************************************************************
 Moses - statistical machine translation system
 Copyright (C) 2006-2011 University of Edinburgh

 This library is free software; you can redistribute it and/or
 modify it under the terms of the GNU Lesser General Public
 License as published by the Free Software Foundation; either
 version 2.1 of the License, or (at your option) any later version.

 This library is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 Lesser General Public License for more details.

 You should have received a copy of the GNU Lesser General Public
 License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#pragma once

#include "moses/Phrase.h"
#include "moses/Word.h"
#include "moses/TypeDef.h"
#include "Loader.h"

#include <istream>
#include <string>
#include <vector>

namespace Moses
{
class RuleTableTrie;

//! @todo ask phil williams
class RuleTableLoaderCompact : public RuleTableLoader
{
public:
  bool Load(const std::vector<FactorType> &input,
            const std::vector<FactorType> &output,
            const std::string &inFile,
            size_t tableLimit,
            RuleTableTrie &);

private:
  struct LineReader {
    LineReader(std::istream &input) : m_input(input), m_lineNum(0) {}
    void ReadLine() {
      std::getline(m_input, m_line);
      // Assume everything's hunky-dory.
      ++m_lineNum;
    }
    std::istream &m_input;
    std::string m_line;
    size_t m_lineNum;
  };

  void LoadVocabularySection(LineReader &,
                             const std::vector<FactorType> &,
                             std::vector<Word> &);

  void LoadPhraseSection(LineReader &,
                         const std::vector<Word> &,
                         std::vector<Phrase> &,
                         std::vector<size_t> &);

  void LoadAlignmentSection(LineReader &,
                            std::vector<const AlignmentInfo *> &,
                            std::vector<Phrase> &);

  bool LoadRuleSection(LineReader &,
                       const std::vector<Word> &,
                       const std::vector<Phrase> &,
                       const std::vector<Phrase> &,
                       const std::vector<size_t> &,
                       const std::vector<const AlignmentInfo *> &,
                       RuleTableTrie &ruleTable);

  // Like Tokenize() but records starting positions of tokens (instead of
  // copying substrings) and assumes delimiter is ASCII space character.
  void FindTokens(std::vector<size_t> &output, const std::string &str) const {
    // Skip delimiters at beginning.
    size_t lastPos = str.find_first_not_of(' ', 0);
    // Find first "non-delimiter".
    size_t pos = str.find_first_of(' ', lastPos);

    while (std::string::npos != pos || std::string::npos != lastPos) {
      // Found a token, add it to the vector.
      output.push_back(lastPos);
      // Skip delimiters.  Note the "not_of"
      lastPos = str.find_first_not_of(' ', pos);
      // Find next "non-delimiter"
      pos = str.find_first_of(' ', lastPos);
    }
  }
};

}  // namespace Moses
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`/***********************************************************************`
			`Moses - statistical machine translation system`
			`Copyright (C) 2006-2011 University of Edinburgh`
beautify 2013-05-29 21:16:15 +04:00
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`This library is free software; you can redistribute it and/or`
			`modify it under the terms of the GNU Lesser General Public`
			`License as published by the Free Software Foundation; either`
			`version 2.1 of the License, or (at your option) any later version.`
beautify 2013-05-29 21:16:15 +04:00
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`This library is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`Lesser General Public License for more details.`
beautify 2013-05-29 21:16:15 +04:00
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`You should have received a copy of the GNU Lesser General Public`
			`License along with this library; if not, write to the Free Software`
			`Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`***********************************************************************/`

			`#pragma once`

move moses/src/* to moses/ 2012-11-12 23:56:18 +04:00			`#include "moses/Phrase.h"`
			`#include "moses/Word.h"`
			`#include "moses/TypeDef.h"`
			`#include "Loader.h"`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00
			`#include <istream>`
			`#include <string>`
			`#include <vector>`

			`namespace Moses`
			`{`
Add RuleTableTrie, an abstract base class of PhraseDictionarySCFG. 2012-01-24 00:41:49 +04:00			`class RuleTableTrie;`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00
commented all classes in moses lib 2012-06-29 02:29:46 +04:00			`//! @todo ask phil williams`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`class RuleTableLoaderCompact : public RuleTableLoader`
			`{`
beautify 2013-05-29 21:16:15 +04:00			`public:`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`bool Load(const std::vector<FactorType> &input,`
			`const std::vector<FactorType> &output,`
FilePieceify LoaderStandard. 2012-10-14 23:52:12 +04:00			`const std::string &inFile,`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`size_t tableLimit,`
Add RuleTableTrie, an abstract base class of PhraseDictionarySCFG. 2012-01-24 00:41:49 +04:00			`RuleTableTrie &);`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00
beautify 2013-05-29 21:16:15 +04:00			`private:`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`struct LineReader {`
			`LineReader(std::istream &input) : m_input(input), m_lineNum(0) {}`
			`void ReadLine() {`
			`std::getline(m_input, m_line);`
			`// Assume everything's hunky-dory.`
			`++m_lineNum;`
			`}`
			`std::istream &m_input;`
			`std::string m_line;`
			`size_t m_lineNum;`
			`};`

			`void LoadVocabularySection(LineReader &,`
			`const std::vector<FactorType> &,`
			`std::vector<Word> &);`

			`void LoadPhraseSection(LineReader &,`
			`const std::vector<Word> &,`
			`std::vector<Phrase> &,`
			`std::vector<size_t> &);`

			`void LoadAlignmentSection(LineReader &,`
handle terminal alignments for hierarchical models 2012-04-19 22:08:06 +04:00			`std::vector<const AlignmentInfo *> &,`
			`std::vector<Phrase> &);`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00
			`bool LoadRuleSection(LineReader &,`
			`const std::vector<Word> &,`
			`const std::vector<Phrase> &,`
			`const std::vector<Phrase> &,`
			`const std::vector<size_t> &,`
			`const std::vector<const AlignmentInfo *> &,`
Add RuleTableTrie, an abstract base class of PhraseDictionarySCFG. 2012-01-24 00:41:49 +04:00			`RuleTableTrie &ruleTable);`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00
			`// Like Tokenize() but records starting positions of tokens (instead of`
			`// copying substrings) and assumes delimiter is ASCII space character.`
beautify 2013-05-29 21:16:15 +04:00			`void FindTokens(std::vector<size_t> &output, const std::string &str) const {`
Move SCFG rule table loading code out of PhraseDictionarySCFG and into a separate RuleTableLoader class. Start adding support for a faster-loading rule table format. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4273 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-27 04:34:46 +04:00			`// Skip delimiters at beginning.`
			`size_t lastPos = str.find_first_not_of(' ', 0);`
			`// Find first "non-delimiter".`
			`size_t pos = str.find_first_of(' ', lastPos);`

			`while (std::string::npos != pos \|\| std::string::npos != lastPos) {`
			`// Found a token, add it to the vector.`
			`output.push_back(lastPos);`
			`// Skip delimiters. Note the "not_of"`
			`lastPos = str.find_first_not_of(' ', pos);`
			`// Find next "non-delimiter"`
			`pos = str.find_first_of(' ', lastPos);`
			`}`
			`}`
			`};`

			`} // namespace Moses`