Add InMemoryPerSentenceOnDemandLM

This commit is contained in:
Lane Schwartz 2017-01-02 12:57:52 -06:00
parent 999d6b6371
commit 578e65298f
7 changed files with 246 additions and 2 deletions

View File

@ -68,6 +68,7 @@
#include "moses/FF/SkeletonStatelessFF.h"
#include "moses/FF/SkeletonStatefulFF.h"
#include "moses/LM/SkeletonLM.h"
#include "moses/LM/InMemoryPerSentenceOnDemandLM.h"
#include "moses/FF/SkeletonTranslationOptionListFeature.h"
#include "moses/LM/BilingualLM.h"
#include "moses/TranslationModel/SkeletonPT.h"
@ -299,6 +300,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(SkeletonStatelessFF);
MOSES_FNAME(SkeletonStatefulFF);
MOSES_FNAME(SkeletonLM);
MOSES_FNAME(InMemoryPerSentenceOnDemandLM);
MOSES_FNAME(SkeletonTranslationOptionListFeature);
MOSES_FNAME(SkeletonPT);

View File

@ -61,7 +61,7 @@ void LanguageModelImplementation::ShiftOrPush(std::vector<const Word*> &contextF
{
if (contextFactor.size() < GetNGramOrder()) {
contextFactor.push_back(&word);
} else {
} else if (GetNGramOrder() > 0) {
// shift
for (size_t currNGramOrder = 0 ; currNGramOrder < GetNGramOrder() - 1 ; currNGramOrder++) {
contextFactor[currNGramOrder] = contextFactor[currNGramOrder + 1];

View File

@ -0,0 +1,91 @@
#include <boost/foreach.hpp>
#include "InMemoryPerSentenceOnDemandLM.h"
#include "moses/FactorCollection.h"
#include "moses/Util.h"
#include "moses/StaticData.h"
#include "moses/TranslationTask.h"
#include "moses/ContextScope.h"
#include "moses/LM/Ken.h"
#include "lm/model.hh"
#include "util/mmap.hh"
#include <cstdio>
#include <iostream>
#include <fstream>
using namespace std;
namespace Moses
{
InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false)
{
ReadParameters();
}
InMemoryPerSentenceOnDemandLM::~InMemoryPerSentenceOnDemandLM()
{
}
void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) {
// The context scope object for this translation task
// contains a map of translation task-specific data
boost::shared_ptr<Moses::ContextScope> contextScope = ttask->GetScope();
// The key to the map is this object
void const* key = static_cast<void const*>(this);
// The value stored in the map is a string representing a phrase table
boost::shared_ptr<string> value = contextScope->get<string>(key);
// Create a stream to read the phrase table data
stringstream strme(*(value.get()));
char * nullpointer = (char *) 0;
const char * filename = std::tmpnam(nullpointer);
ofstream tmp;
tmp.open(filename);
// Read the phrase table data, one line at a time
string line;
while (getline(strme, line)) {
tmp << line << "\n";
}
tmp.close();
LanguageModelKen<lm::ngram::ProbingModel> & lm = GetPerThreadLM();
lm.LoadModel("/home/lanes/mosesdecoder/tiny.with_per_sentence/europarl.en.srilm", util::POPULATE_OR_READ);
initialized = true;
VERBOSE(1, filename);
if (initialized) {
VERBOSE(1, "\tLM initialized\n");
}
// std::remove(filename);
}
LanguageModelKen<lm::ngram::ProbingModel>& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const {
LanguageModelKen<lm::ngram::ProbingModel> *lm;
lm = m_perThreadLM.get();
if (lm == NULL) {
lm = new LanguageModelKen<lm::ngram::ProbingModel>();
m_perThreadLM.reset(lm);
}
assert(lm);
return *lm;
}
}

View File

@ -0,0 +1,135 @@
// $Id$
#pragma once
#include <vector>
#include "SingleFactor.h"
#include <boost/thread/tss.hpp>
#include "lm/model.hh"
#include "moses/LM/Ken.h"
#include "moses/FF/FFState.h"
namespace Moses
{
struct InMemoryPerSentenceOnDemandLMState : public FFState {
lm::ngram::State state;
virtual size_t hash() const {
size_t ret = hash_value(state);
return ret;
}
virtual bool operator==(const FFState& o) const {
const InMemoryPerSentenceOnDemandLMState &other = static_cast<const InMemoryPerSentenceOnDemandLMState &>(o);
bool ret = state == other.state;
return ret;
}
};
class InMemoryPerSentenceOnDemandLM : public LanguageModel
{
public:
InMemoryPerSentenceOnDemandLM(const std::string &line);
~InMemoryPerSentenceOnDemandLM();
void InitializeForInput(ttasksptr const& ttask);
virtual void SetParameter(const std::string& key, const std::string& value) {
GetPerThreadLM().SetParameter(key, value);
}
virtual const FFState* EmptyHypothesisState(const InputType &input) const {
if (initialized) {
return GetPerThreadLM().EmptyHypothesisState(input);
} else {
return new InMemoryPerSentenceOnDemandLMState();
}
}
virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const {
if (initialized) {
return GetPerThreadLM().EvaluateWhenApplied(hypo, ps, out);
} else {
UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
}
}
virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const {
if (initialized) {
return GetPerThreadLM().EvaluateWhenApplied(cur_hypo, featureID, accumulator);
} else {
UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
}
}
virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const {
if (initialized) {
return GetPerThreadLM().EvaluateWhenApplied(hyperedge, featureID, accumulator);
} else {
UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
}
}
virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const {
if (initialized) {
GetPerThreadLM().CalcScore(phrase, fullScore, ngramScore, oovCount);
}
}
virtual void CalcScoreFromCache(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const {
if (initialized) {
GetPerThreadLM().CalcScoreFromCache(phrase, fullScore, ngramScore, oovCount);
}
}
virtual void IssueRequestsFor(Hypothesis& hypo, const FFState* input_state) {
GetPerThreadLM().IssueRequestsFor(hypo, input_state);
}
virtual void sync() {
GetPerThreadLM().sync();
}
virtual void SetFFStateIdx(int state_idx) {
if (initialized) {
GetPerThreadLM().SetFFStateIdx(state_idx);
}
}
virtual void IncrementalCallback(Incremental::Manager &manager) const {
if (initialized) {
GetPerThreadLM().IncrementalCallback(manager);
}
}
virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const {
if (initialized) {
GetPerThreadLM().ReportHistoryOrder(out, phrase);
}
}
virtual void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const {
if (initialized) {
GetPerThreadLM().EvaluateInIsolation(source, targetPhrase, scoreBreakdown, estimatedScores);
}
}
bool IsUseable(const FactorMask &mask) const {
return GetPerThreadLM().IsUseable(mask);
}
protected:
LanguageModelKen<lm::ngram::ProbingModel> & GetPerThreadLM() const;
mutable boost::thread_specific_ptr<LanguageModelKen<lm::ngram::ProbingModel> > m_perThreadLM;
bool initialized;
};
}

View File

@ -138,7 +138,7 @@ if $(with-dalm) {
#Top-level LM library. If you've added a file that doesn't depend on external
#libraries, put it here.
alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp
alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp InMemoryPerSentenceOnDemandLM.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp
../../lm//kenlm ..//headers $(dependencies) ;
alias macros : : : : <define>$(lmmacros) ;

View File

@ -105,6 +105,7 @@ template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string
config.load_method = load_method;
m_ngram.reset(new Model(file.c_str(), config));
VERBOSE(2, "LanguageModelKen " << m_description << " reset to " << file << "\n");
}
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method)
@ -116,6 +117,15 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::stri
LoadModel(file, load_method);
}
template <class Model> LanguageModelKen<Model>::LanguageModelKen()
:LanguageModel("KENLM")
,m_beginSentenceFactor(FactorCollection::Instance().AddFactor(BOS_))
,m_factorType(0)
{
ReadParameters();
}
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const LanguageModelKen<Model> &copy_from)
:LanguageModel(copy_from.GetArgLine()),
m_ngram(copy_from.m_ngram),

View File

@ -33,11 +33,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/TypeDef.h"
#include "moses/Word.h"
namespace Moses
{
//class LanguageModel;
class FFState;
class InMemoryPerSentenceOnDemandLM;
LanguageModel *ConstructKenLM(const std::string &line);
@ -67,6 +70,8 @@ public:
virtual bool IsUseable(const FactorMask &mask) const;
friend class InMemoryPerSentenceOnDemandLM;
protected:
boost::shared_ptr<Model> m_ngram;
@ -84,6 +89,7 @@ protected:
std::vector<lm::WordIndex> m_lmIdLookup;
private:
LanguageModelKen();
LanguageModelKen(const LanguageModelKen<Model> &copy_from);
// Convert last words of hypothesis into vocab ids, returning an end pointer.