mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 11:28:48 +03:00
Add InMemoryPerSentenceOnDemandLM
This commit is contained in:
parent
999d6b6371
commit
578e65298f
@ -68,6 +68,7 @@
|
||||
#include "moses/FF/SkeletonStatelessFF.h"
|
||||
#include "moses/FF/SkeletonStatefulFF.h"
|
||||
#include "moses/LM/SkeletonLM.h"
|
||||
#include "moses/LM/InMemoryPerSentenceOnDemandLM.h"
|
||||
#include "moses/FF/SkeletonTranslationOptionListFeature.h"
|
||||
#include "moses/LM/BilingualLM.h"
|
||||
#include "moses/TranslationModel/SkeletonPT.h"
|
||||
@ -299,6 +300,7 @@ FeatureRegistry::FeatureRegistry()
|
||||
MOSES_FNAME(SkeletonStatelessFF);
|
||||
MOSES_FNAME(SkeletonStatefulFF);
|
||||
MOSES_FNAME(SkeletonLM);
|
||||
MOSES_FNAME(InMemoryPerSentenceOnDemandLM);
|
||||
MOSES_FNAME(SkeletonTranslationOptionListFeature);
|
||||
MOSES_FNAME(SkeletonPT);
|
||||
|
||||
|
@ -61,7 +61,7 @@ void LanguageModelImplementation::ShiftOrPush(std::vector<const Word*> &contextF
|
||||
{
|
||||
if (contextFactor.size() < GetNGramOrder()) {
|
||||
contextFactor.push_back(&word);
|
||||
} else {
|
||||
} else if (GetNGramOrder() > 0) {
|
||||
// shift
|
||||
for (size_t currNGramOrder = 0 ; currNGramOrder < GetNGramOrder() - 1 ; currNGramOrder++) {
|
||||
contextFactor[currNGramOrder] = contextFactor[currNGramOrder + 1];
|
||||
|
91
moses/LM/InMemoryPerSentenceOnDemandLM.cpp
Normal file
91
moses/LM/InMemoryPerSentenceOnDemandLM.cpp
Normal file
@ -0,0 +1,91 @@
|
||||
#include <boost/foreach.hpp>
|
||||
#include "InMemoryPerSentenceOnDemandLM.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
#include "moses/Util.h"
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/TranslationTask.h"
|
||||
#include "moses/ContextScope.h"
|
||||
#include "moses/LM/Ken.h"
|
||||
#include "lm/model.hh"
|
||||
#include "util/mmap.hh"
|
||||
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
InMemoryPerSentenceOnDemandLM::~InMemoryPerSentenceOnDemandLM()
|
||||
{
|
||||
}
|
||||
|
||||
void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) {
|
||||
|
||||
// The context scope object for this translation task
|
||||
// contains a map of translation task-specific data
|
||||
boost::shared_ptr<Moses::ContextScope> contextScope = ttask->GetScope();
|
||||
|
||||
// The key to the map is this object
|
||||
void const* key = static_cast<void const*>(this);
|
||||
|
||||
// The value stored in the map is a string representing a phrase table
|
||||
boost::shared_ptr<string> value = contextScope->get<string>(key);
|
||||
|
||||
// Create a stream to read the phrase table data
|
||||
stringstream strme(*(value.get()));
|
||||
|
||||
char * nullpointer = (char *) 0;
|
||||
const char * filename = std::tmpnam(nullpointer);
|
||||
ofstream tmp;
|
||||
tmp.open(filename);
|
||||
|
||||
// Read the phrase table data, one line at a time
|
||||
string line;
|
||||
while (getline(strme, line)) {
|
||||
|
||||
tmp << line << "\n";
|
||||
|
||||
}
|
||||
|
||||
tmp.close();
|
||||
|
||||
LanguageModelKen<lm::ngram::ProbingModel> & lm = GetPerThreadLM();
|
||||
lm.LoadModel("/home/lanes/mosesdecoder/tiny.with_per_sentence/europarl.en.srilm", util::POPULATE_OR_READ);
|
||||
|
||||
initialized = true;
|
||||
|
||||
VERBOSE(1, filename);
|
||||
if (initialized) {
|
||||
VERBOSE(1, "\tLM initialized\n");
|
||||
}
|
||||
|
||||
// std::remove(filename);
|
||||
|
||||
}
|
||||
|
||||
LanguageModelKen<lm::ngram::ProbingModel>& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const {
|
||||
|
||||
LanguageModelKen<lm::ngram::ProbingModel> *lm;
|
||||
lm = m_perThreadLM.get();
|
||||
if (lm == NULL) {
|
||||
lm = new LanguageModelKen<lm::ngram::ProbingModel>();
|
||||
m_perThreadLM.reset(lm);
|
||||
}
|
||||
assert(lm);
|
||||
return *lm;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
135
moses/LM/InMemoryPerSentenceOnDemandLM.h
Normal file
135
moses/LM/InMemoryPerSentenceOnDemandLM.h
Normal file
@ -0,0 +1,135 @@
|
||||
// $Id$
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "SingleFactor.h"
|
||||
#include <boost/thread/tss.hpp>
|
||||
#include "lm/model.hh"
|
||||
#include "moses/LM/Ken.h"
|
||||
#include "moses/FF/FFState.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
struct InMemoryPerSentenceOnDemandLMState : public FFState {
|
||||
lm::ngram::State state;
|
||||
virtual size_t hash() const {
|
||||
size_t ret = hash_value(state);
|
||||
return ret;
|
||||
}
|
||||
virtual bool operator==(const FFState& o) const {
|
||||
const InMemoryPerSentenceOnDemandLMState &other = static_cast<const InMemoryPerSentenceOnDemandLMState &>(o);
|
||||
bool ret = state == other.state;
|
||||
return ret;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class InMemoryPerSentenceOnDemandLM : public LanguageModel
|
||||
{
|
||||
public:
|
||||
InMemoryPerSentenceOnDemandLM(const std::string &line);
|
||||
~InMemoryPerSentenceOnDemandLM();
|
||||
|
||||
void InitializeForInput(ttasksptr const& ttask);
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value) {
|
||||
GetPerThreadLM().SetParameter(key, value);
|
||||
}
|
||||
|
||||
virtual const FFState* EmptyHypothesisState(const InputType &input) const {
|
||||
if (initialized) {
|
||||
return GetPerThreadLM().EmptyHypothesisState(input);
|
||||
} else {
|
||||
return new InMemoryPerSentenceOnDemandLMState();
|
||||
}
|
||||
}
|
||||
|
||||
virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const {
|
||||
if (initialized) {
|
||||
return GetPerThreadLM().EvaluateWhenApplied(hypo, ps, out);
|
||||
} else {
|
||||
UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
|
||||
}
|
||||
}
|
||||
|
||||
virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const {
|
||||
if (initialized) {
|
||||
return GetPerThreadLM().EvaluateWhenApplied(cur_hypo, featureID, accumulator);
|
||||
} else {
|
||||
UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
|
||||
}
|
||||
}
|
||||
|
||||
virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const {
|
||||
if (initialized) {
|
||||
return GetPerThreadLM().EvaluateWhenApplied(hyperedge, featureID, accumulator);
|
||||
} else {
|
||||
UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const {
|
||||
if (initialized) {
|
||||
GetPerThreadLM().CalcScore(phrase, fullScore, ngramScore, oovCount);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void CalcScoreFromCache(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const {
|
||||
if (initialized) {
|
||||
GetPerThreadLM().CalcScoreFromCache(phrase, fullScore, ngramScore, oovCount);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void IssueRequestsFor(Hypothesis& hypo, const FFState* input_state) {
|
||||
GetPerThreadLM().IssueRequestsFor(hypo, input_state);
|
||||
}
|
||||
|
||||
virtual void sync() {
|
||||
GetPerThreadLM().sync();
|
||||
}
|
||||
|
||||
virtual void SetFFStateIdx(int state_idx) {
|
||||
if (initialized) {
|
||||
GetPerThreadLM().SetFFStateIdx(state_idx);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void IncrementalCallback(Incremental::Manager &manager) const {
|
||||
if (initialized) {
|
||||
GetPerThreadLM().IncrementalCallback(manager);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const {
|
||||
if (initialized) {
|
||||
GetPerThreadLM().ReportHistoryOrder(out, phrase);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedScores) const {
|
||||
if (initialized) {
|
||||
GetPerThreadLM().EvaluateInIsolation(source, targetPhrase, scoreBreakdown, estimatedScores);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const {
|
||||
return GetPerThreadLM().IsUseable(mask);
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
LanguageModelKen<lm::ngram::ProbingModel> & GetPerThreadLM() const;
|
||||
|
||||
mutable boost::thread_specific_ptr<LanguageModelKen<lm::ngram::ProbingModel> > m_perThreadLM;
|
||||
|
||||
bool initialized;
|
||||
|
||||
};
|
||||
|
||||
|
||||
}
|
@ -138,7 +138,7 @@ if $(with-dalm) {
|
||||
|
||||
#Top-level LM library. If you've added a file that doesn't depend on external
|
||||
#libraries, put it here.
|
||||
alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp
|
||||
alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp InMemoryPerSentenceOnDemandLM.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp
|
||||
../../lm//kenlm ..//headers $(dependencies) ;
|
||||
|
||||
alias macros : : : : <define>$(lmmacros) ;
|
||||
|
@ -105,6 +105,7 @@ template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string
|
||||
config.load_method = load_method;
|
||||
|
||||
m_ngram.reset(new Model(file.c_str(), config));
|
||||
VERBOSE(2, "LanguageModelKen " << m_description << " reset to " << file << "\n");
|
||||
}
|
||||
|
||||
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method)
|
||||
@ -116,6 +117,15 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::stri
|
||||
LoadModel(file, load_method);
|
||||
}
|
||||
|
||||
template <class Model> LanguageModelKen<Model>::LanguageModelKen()
|
||||
:LanguageModel("KENLM")
|
||||
,m_beginSentenceFactor(FactorCollection::Instance().AddFactor(BOS_))
|
||||
,m_factorType(0)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
|
||||
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const LanguageModelKen<Model> ©_from)
|
||||
:LanguageModel(copy_from.GetArgLine()),
|
||||
m_ngram(copy_from.m_ngram),
|
||||
|
@ -33,11 +33,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "moses/TypeDef.h"
|
||||
#include "moses/Word.h"
|
||||
|
||||
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
//class LanguageModel;
|
||||
class FFState;
|
||||
class InMemoryPerSentenceOnDemandLM;
|
||||
|
||||
LanguageModel *ConstructKenLM(const std::string &line);
|
||||
|
||||
@ -67,6 +70,8 @@ public:
|
||||
|
||||
virtual bool IsUseable(const FactorMask &mask) const;
|
||||
|
||||
friend class InMemoryPerSentenceOnDemandLM;
|
||||
|
||||
protected:
|
||||
boost::shared_ptr<Model> m_ngram;
|
||||
|
||||
@ -84,6 +89,7 @@ protected:
|
||||
std::vector<lm::WordIndex> m_lmIdLookup;
|
||||
|
||||
private:
|
||||
LanguageModelKen();
|
||||
LanguageModelKen(const LanguageModelKen<Model> ©_from);
|
||||
|
||||
// Convert last words of hypothesis into vocab ids, returning an end pointer.
|
||||
|
Loading…
Reference in New Issue
Block a user