mosesdecoder/kenlm/lm/sri.hh
2010-09-28 16:26:55 +00:00

103 lines
2.5 KiB
C++

#ifndef LM_SRI__
#define LM_SRI__
#include "lm/facade.hh"
#include "util/murmur_hash.hh"
#include <cmath>
#include <exception>
#include <memory>
class Ngram;
class Vocab;
/* The ngram length reported uses some random API I found and may be wrong.
*
* See ngram, which should return equivalent results.
*/
namespace lm {
namespace sri {
static const unsigned int kMaxOrder = 6;
/* This should match VocabIndex found in SRI's Vocab.h
* The reason I define this here independently is that SRI's headers
* pollute and increase compile time.
* It's difficult to extract this from their header and anyway would
* break packaging.
* If these differ there will be a compiler error in ActuallyCall.
*/
typedef unsigned int SRIVocabIndex;
class State {
public:
// You shouldn't need to touch these, but they're public so State will be a POD.
// If valid_length_ < kMaxOrder - 1 then history_[valid_length_] == Vocab_None.
SRIVocabIndex history_[kMaxOrder - 1];
unsigned char valid_length_;
};
inline bool operator==(const State &left, const State &right) {
if (left.valid_length_ != right.valid_length_) {
return false;
}
for (const SRIVocabIndex *l = left.history_, *r = right.history_;
l != left.history_ + left.valid_length_;
++l, ++r) {
if (*l != *r) return false;
}
return true;
}
inline size_t hash_value(const State &state) {
return util::MurmurHashNative(&state.history_, sizeof(SRIVocabIndex) * state.valid_length_);
}
class Vocabulary : public base::Vocabulary {
public:
Vocabulary();
~Vocabulary();
WordIndex Index(const StringPiece &str) const {
std::string temp(str.data(), str.length());
return Index(temp.c_str());
}
WordIndex Index(const std::string &str) const {
return Index(str.c_str());
}
WordIndex Index(const char *str) const;
const char *Word(WordIndex index) const;
private:
friend class Model;
void FinishedLoading();
// The parent class isn't copyable so auto_ptr is the same as scoped_ptr
// but without the boost dependence.
mutable std::auto_ptr<Vocab> sri_;
};
class Model : public base::ModelFacade<Model, State, Vocabulary> {
public:
Model(const char *file_name, unsigned int ngram_length);
~Model();
FullScoreReturn FullScore(const State &in_state, const WordIndex new_word, State &out_state) const;
private:
Vocabulary vocab_;
mutable std::auto_ptr<Ngram> sri_;
WordIndex not_found_;
};
} // namespace sri
} // namespace lm
#endif // LM_SRI__