mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-11-10 10:59:21 +03:00
770df2a92d
time I copied to Moses was getting annoying. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3587 1f5c12ca-751b-0410-a591-d2e778427230
103 lines
2.5 KiB
C++
103 lines
2.5 KiB
C++
#ifndef LM_SRI__
|
|
#define LM_SRI__
|
|
|
|
#include "lm/facade.hh"
|
|
#include "util/murmur_hash.hh"
|
|
|
|
#include <cmath>
|
|
#include <exception>
|
|
#include <memory>
|
|
|
|
class Ngram;
|
|
class Vocab;
|
|
|
|
/* The ngram length reported uses some random API I found and may be wrong.
|
|
*
|
|
* See ngram, which should return equivalent results.
|
|
*/
|
|
|
|
namespace lm {
|
|
namespace sri {
|
|
|
|
static const unsigned int kMaxOrder = 6;
|
|
|
|
/* This should match VocabIndex found in SRI's Vocab.h
|
|
* The reason I define this here independently is that SRI's headers
|
|
* pollute and increase compile time.
|
|
* It's difficult to extract this from their header and anyway would
|
|
* break packaging.
|
|
* If these differ there will be a compiler error in ActuallyCall.
|
|
*/
|
|
typedef unsigned int SRIVocabIndex;
|
|
|
|
class State {
|
|
public:
|
|
// You shouldn't need to touch these, but they're public so State will be a POD.
|
|
// If valid_length_ < kMaxOrder - 1 then history_[valid_length_] == Vocab_None.
|
|
SRIVocabIndex history_[kMaxOrder - 1];
|
|
unsigned char valid_length_;
|
|
};
|
|
|
|
inline bool operator==(const State &left, const State &right) {
|
|
if (left.valid_length_ != right.valid_length_) {
|
|
return false;
|
|
}
|
|
for (const SRIVocabIndex *l = left.history_, *r = right.history_;
|
|
l != left.history_ + left.valid_length_;
|
|
++l, ++r) {
|
|
if (*l != *r) return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
inline size_t hash_value(const State &state) {
|
|
return util::MurmurHashNative(&state.history_, sizeof(SRIVocabIndex) * state.valid_length_);
|
|
}
|
|
|
|
class Vocabulary : public base::Vocabulary {
|
|
public:
|
|
Vocabulary();
|
|
|
|
~Vocabulary();
|
|
|
|
WordIndex Index(const StringPiece &str) const {
|
|
std::string temp(str.data(), str.length());
|
|
return Index(temp.c_str());
|
|
}
|
|
WordIndex Index(const std::string &str) const {
|
|
return Index(str.c_str());
|
|
}
|
|
WordIndex Index(const char *str) const;
|
|
|
|
const char *Word(WordIndex index) const;
|
|
|
|
private:
|
|
friend class Model;
|
|
void FinishedLoading();
|
|
|
|
// The parent class isn't copyable so auto_ptr is the same as scoped_ptr
|
|
// but without the boost dependence.
|
|
mutable std::auto_ptr<Vocab> sri_;
|
|
};
|
|
|
|
class Model : public base::ModelFacade<Model, State, Vocabulary> {
|
|
public:
|
|
Model(const char *file_name, unsigned int ngram_length);
|
|
|
|
~Model();
|
|
|
|
FullScoreReturn FullScore(const State &in_state, const WordIndex new_word, State &out_state) const;
|
|
|
|
private:
|
|
Vocabulary vocab_;
|
|
|
|
mutable std::auto_ptr<Ngram> sri_;
|
|
|
|
WordIndex not_found_;
|
|
};
|
|
|
|
} // namespace sri
|
|
} // namespace lm
|
|
|
|
#endif // LM_SRI__
|