mosesdecoder/lm/value_build.cc

#include "lm/value_build.hh"

#include "lm/model.hh"
#include "lm/read_arpa.hh"

namespace lm {
namespace ngram {

template <class Model> LowerRestBuild<Model>::LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab) {
  UTIL_THROW_IF(config.rest_lower_files.size() != order - 1, ConfigException, "This model has order " << order << " so there should be " << (order - 1) << " lower-order models for rest cost purposes.");
  Config for_lower = config;
  for_lower.write_mmap = NULL;
  for_lower.rest_lower_files.clear();

  // Unigram models aren't supported, so this is a custom loader.
  // TODO: optimize the unigram loading?
  {
    util::FilePiece uni(config.rest_lower_files[0].c_str());
    std::vector<uint64_t> number;
    ReadARPACounts(uni, number);
    UTIL_THROW_IF(number.size() != 1, FormatLoadException, "Expected the unigram model to have order 1, not " << number.size());
    ReadNGramHeader(uni, 1);
    unigrams_.resize(number[0]);
    unigrams_[0] = config.unknown_missing_logprob;
    PositiveProbWarn warn;
    for (uint64_t i = 0; i < number[0]; ++i) {
      WordIndex w;
      Prob entry;
      ReadNGram(uni, 1, vocab, &w, entry, warn);
      unigrams_[w] = entry.prob;
    }
  }

  try {
    for (unsigned int i = 2; i < order; ++i) {
      models_.push_back(new Model(config.rest_lower_files[i - 1].c_str(), for_lower));
      UTIL_THROW_IF(models_.back()->Order() != i, FormatLoadException, "Lower order file " << config.rest_lower_files[i-1] << " should have order " << i);
    }
  } catch (...) {
    for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {
      delete *i;
    }
    models_.clear();
    throw;
  }

  // TODO: force/check same vocab.
}

template <class Model> LowerRestBuild<Model>::~LowerRestBuild() {
  for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {
    delete *i;
  }
}

template class LowerRestBuild<ProbingModel>;

} // namespace ngram
} // namespace lm
KenLM e3b5c55910 including rest costs for probing 2012-06-28 18:58:59 +04:00			`#include "lm/value_build.hh"`

			`#include "lm/model.hh"`
			`#include "lm/read_arpa.hh"`

Remove trailing whitespace in C++ files. 2015-04-30 08:05:11 +03:00			`namespace lm {`
KenLM e3b5c55910 including rest costs for probing 2012-06-28 18:58:59 +04:00			`namespace ngram {`

			`template <class Model> LowerRestBuild<Model>::LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab) {`
			`UTIL_THROW_IF(config.rest_lower_files.size() != order - 1, ConfigException, "This model has order " << order << " so there should be " << (order - 1) << " lower-order models for rest cost purposes.");`
			`Config for_lower = config;`
KenLM 6f7913cc7ca0f7672c6d899358365f047a742bbb Mostly fixes from Tetsuo Kiso and Jonathan Graehl 2013-09-09 13:43:20 +04:00			`for_lower.write_mmap = NULL;`
KenLM e3b5c55910 including rest costs for probing 2012-06-28 18:58:59 +04:00			`for_lower.rest_lower_files.clear();`

Remove trailing whitespace in C++ files. 2015-04-30 08:05:11 +03:00			`// Unigram models aren't supported, so this is a custom loader.`
			`// TODO: optimize the unigram loading?`
KenLM e3b5c55910 including rest costs for probing 2012-06-28 18:58:59 +04:00			`{`
			`util::FilePiece uni(config.rest_lower_files[0].c_str());`
			`std::vector<uint64_t> number;`
			`ReadARPACounts(uni, number);`
			`UTIL_THROW_IF(number.size() != 1, FormatLoadException, "Expected the unigram model to have order 1, not " << number.size());`
			`ReadNGramHeader(uni, 1);`
			`unigrams_.resize(number[0]);`
			`unigrams_[0] = config.unknown_missing_logprob;`
			`PositiveProbWarn warn;`
			`for (uint64_t i = 0; i < number[0]; ++i) {`
			`WordIndex w;`
			`Prob entry;`
			`ReadNGram(uni, 1, vocab, &w, entry, warn);`
			`unigrams_[w] = entry.prob;`
			`}`
			`}`

			`try {`
			`for (unsigned int i = 2; i < order; ++i) {`
			`models_.push_back(new Model(config.rest_lower_files[i - 1].c_str(), for_lower));`
			`UTIL_THROW_IF(models_.back()->Order() != i, FormatLoadException, "Lower order file " << config.rest_lower_files[i-1] << " should have order " << i);`
			`}`
			`} catch (...) {`
			`for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {`
			`delete *i;`
			`}`
			`models_.clear();`
			`throw;`
			`}`

Remove trailing whitespace in C++ files. 2015-04-30 08:05:11 +03:00			`// TODO: force/check same vocab.`
KenLM e3b5c55910 including rest costs for probing 2012-06-28 18:58:59 +04:00			`}`

			`template <class Model> LowerRestBuild<Model>::~LowerRestBuild() {`
			`for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {`
			`delete *i;`
			`}`
			`}`

			`template class LowerRestBuild<ProbingModel>;`

			`} // namespace ngram`
			`} // namespace lm`