mosesdecoder/contrib/other-builds/moses2/LM/KENLM.h

99 lines
2.5 KiB
C
Raw Normal View History

2015-11-04 16:03:26 +03:00
/*
* KENLM.h
*
* Created on: 4 Nov 2015
* Author: hieu
*/
#ifndef FF_LM_KENLM_H_
#define FF_LM_KENLM_H_
#include <boost/shared_ptr.hpp>
2015-11-04 16:09:53 +03:00
#include "../FF/StatefulFeatureFunction.h"
2015-11-04 16:03:26 +03:00
#include "lm/model.hh"
2015-11-13 01:51:13 +03:00
#include "../legacy/Factor.h"
2015-11-13 13:40:55 +03:00
#include "../legacy/Util2.h"
#include "../Word.h"
2015-11-04 16:03:26 +03:00
2015-12-10 23:49:30 +03:00
namespace Moses2
{
2015-11-04 17:54:20 +03:00
class Word;
2015-11-04 16:03:26 +03:00
class KENLM : public StatefulFeatureFunction
{
public:
KENLM(size_t startInd, const std::string &line);
virtual ~KENLM();
virtual void Load(System &system);
2016-01-05 17:34:59 +03:00
virtual FFState* BlankState(MemPool &pool) const;
2015-11-05 18:34:24 +03:00
2015-11-04 16:03:26 +03:00
//! return the state associated with the empty hypothesis for a given sentence
2015-12-15 18:24:57 +03:00
virtual void EmptyHypothesisState(FFState &state,
const Manager &mgr,
const InputType &input,
const Hypothesis &hypo) const;
2015-11-04 16:03:26 +03:00
virtual void
EvaluateInIsolation(MemPool &pool,
const System &system,
const Phrase &source,
const TargetPhrase &targetPhrase,
2015-11-04 16:03:26 +03:00
Scores &scores,
SCORE *estimatedScore) const;
2015-11-04 16:03:26 +03:00
2015-11-05 19:35:31 +03:00
virtual void EvaluateWhenApplied(const Manager &mgr,
2015-11-04 16:03:26 +03:00
const Hypothesis &hypo,
2015-11-12 23:34:58 +03:00
const FFState &prevState,
2015-11-05 19:35:31 +03:00
Scores &scores,
2015-11-12 23:34:58 +03:00
FFState &state) const;
2015-11-04 16:03:26 +03:00
2015-11-23 18:11:19 +03:00
/*
2015-11-23 18:00:00 +03:00
virtual void EvaluateWhenAppliedNonBatch(const Manager &mgr,
const Hypothesis &hypo,
const FFState &prevState,
Scores &scores,
FFState &state) const
{
EvaluateWhenApplied(mgr, hypo, prevState, scores, state);
}
2015-11-23 18:11:19 +03:00
*/
2015-11-23 18:00:00 +03:00
2015-11-04 16:03:26 +03:00
void SetParameter(const std::string& key, const std::string& value);
virtual void InitializeForInput(const Manager &mgr) const;
// clean up temporary memory, called after processing each sentence
virtual void CleanUpAfterSentenceProcessing(const Manager &mgr) const;
2015-11-04 16:03:26 +03:00
protected:
std::string m_path;
2015-11-13 13:40:55 +03:00
FactorType m_factorType;
2015-11-06 12:04:19 +03:00
bool m_lazy;
2015-11-13 01:51:13 +03:00
const Factor *m_bos;
const Factor *m_eos;
2015-11-04 16:03:26 +03:00
typedef lm::ngram::ProbingModel Model;
boost::shared_ptr<Model> m_ngram;
2015-11-04 17:54:20 +03:00
void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const;
2015-11-04 17:54:20 +03:00
inline lm::WordIndex TranslateID(const Word &word) const
{
std::size_t factor = word[m_factorType]->GetId();
return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
}
// Convert last words of hypothesis into vocab ids, returning an end pointer.
2015-11-04 19:11:56 +03:00
lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const;
2015-11-04 17:54:20 +03:00
std::vector<lm::WordIndex> m_lmIdLookup;
float ScoreAndCache(const Manager &mgr, const lm::ngram::State &in_state, const lm::WordIndex new_word, lm::ngram::State &out_state) const;
2015-11-04 16:03:26 +03:00
};
2015-12-10 23:49:30 +03:00
}
2015-11-04 16:03:26 +03:00
#endif /* FF_LM_KENLM_H_ */