mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-02 17:09:36 +03:00
59 lines
2.0 KiB
C++
59 lines
2.0 KiB
C++
|
#include "lm/value_build.hh"
|
||
|
|
||
|
#include "lm/model.hh"
|
||
|
#include "lm/read_arpa.hh"
|
||
|
|
||
|
namespace lm {
|
||
|
namespace ngram {
|
||
|
|
||
|
template <class Model> LowerRestBuild<Model>::LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab) {
|
||
|
UTIL_THROW_IF(config.rest_lower_files.size() != order - 1, ConfigException, "This model has order " << order << " so there should be " << (order - 1) << " lower-order models for rest cost purposes.");
|
||
|
Config for_lower = config;
|
||
|
for_lower.rest_lower_files.clear();
|
||
|
|
||
|
// Unigram models aren't supported, so this is a custom loader.
|
||
|
// TODO: optimize the unigram loading?
|
||
|
{
|
||
|
util::FilePiece uni(config.rest_lower_files[0].c_str());
|
||
|
std::vector<uint64_t> number;
|
||
|
ReadARPACounts(uni, number);
|
||
|
UTIL_THROW_IF(number.size() != 1, FormatLoadException, "Expected the unigram model to have order 1, not " << number.size());
|
||
|
ReadNGramHeader(uni, 1);
|
||
|
unigrams_.resize(number[0]);
|
||
|
unigrams_[0] = config.unknown_missing_logprob;
|
||
|
PositiveProbWarn warn;
|
||
|
for (uint64_t i = 0; i < number[0]; ++i) {
|
||
|
WordIndex w;
|
||
|
Prob entry;
|
||
|
ReadNGram(uni, 1, vocab, &w, entry, warn);
|
||
|
unigrams_[w] = entry.prob;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
try {
|
||
|
for (unsigned int i = 2; i < order; ++i) {
|
||
|
models_.push_back(new Model(config.rest_lower_files[i - 1].c_str(), for_lower));
|
||
|
UTIL_THROW_IF(models_.back()->Order() != i, FormatLoadException, "Lower order file " << config.rest_lower_files[i-1] << " should have order " << i);
|
||
|
}
|
||
|
} catch (...) {
|
||
|
for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {
|
||
|
delete *i;
|
||
|
}
|
||
|
models_.clear();
|
||
|
throw;
|
||
|
}
|
||
|
|
||
|
// TODO: force/check same vocab.
|
||
|
}
|
||
|
|
||
|
template <class Model> LowerRestBuild<Model>::~LowerRestBuild() {
|
||
|
for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {
|
||
|
delete *i;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
template class LowerRestBuild<ProbingModel>;
|
||
|
|
||
|
} // namespace ngram
|
||
|
} // namespace lm
|