Refactoring code from Ken.cpp into Ken.h

in preparation for implemenation of backward language model
from Xiong et al (2011)
This commit is contained in:
Lane Schwartz 2013-04-05 14:46:52 -04:00
parent 972001e345
commit 8459a86137
7 changed files with 216 additions and 81 deletions

69
moses/LM/Backward.cpp Normal file
View File

@ -0,0 +1,69 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "lm/binary_format.hh"
#include "lm/enumerate_vocab.hh"
#include "lm/left.hh"
#include "lm/model.hh"
#include "moses/LM/Ken.h"
#include "moses/LM/Backward.h"
namespace Moses {
template <class Model> BackwardLanguageModel<Model>::BackwardLanguageModel(const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(file,factorType,lazy) {
//
// This space intentionally left blank
//
}
LanguageModel *ConstructBackwardLM(const std::string &file, FactorType factorType, bool lazy) {
try {
lm::ngram::ModelType model_type;
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
switch(model_type) {
case lm::ngram::PROBING:
return new BackwardLanguageModel<lm::ngram::ProbingModel>(file, factorType, lazy);
case lm::ngram::REST_PROBING:
return new BackwardLanguageModel<lm::ngram::RestProbingModel>(file, factorType, lazy);
case lm::ngram::TRIE:
return new BackwardLanguageModel<lm::ngram::TrieModel>(file, factorType, lazy);
case lm::ngram::QUANT_TRIE:
return new BackwardLanguageModel<lm::ngram::QuantTrieModel>(file, factorType, lazy);
case lm::ngram::ARRAY_TRIE:
return new BackwardLanguageModel<lm::ngram::ArrayTrieModel>(file, factorType, lazy);
case lm::ngram::QUANT_ARRAY_TRIE:
return new BackwardLanguageModel<lm::ngram::QuantArrayTrieModel>(file, factorType, lazy);
default:
std::cerr << "Unrecognized kenlm model type " << model_type << std::endl;
abort();
}
} else {
return new BackwardLanguageModel<lm::ngram::ProbingModel>(file, factorType, lazy);
}
} catch (std::exception &e) {
std::cerr << e.what() << std::endl;
abort();
}
}
} // namespace Moses

45
moses/LM/Backward.h Normal file
View File

@ -0,0 +1,45 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#ifndef moses_LanguageModelBackward_h
#define moses_LanguageModelBackward_h
#include <string>
#include "moses/LM/Ken.h"
namespace Moses {
//! This will also load. Returns a templated KenLM class
LanguageModel *ConstructBackwardLM(const std::string &file, FactorType factorType, bool lazy);
/*
* An implementation of single factor backward LM using Kenneth's code.
*/
template <class Model> class BackwardLanguageModel : public LanguageModelKen<Model> {
public:
BackwardLanguageModel(const std::string &file, FactorType factorType, bool lazy);
};
} // namespace Moses
#endif

View File

@ -45,6 +45,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#endif
#include "Ken.h"
#include "Backward.h"
#ifdef LM_LDHT
# include "LDHT.h"
@ -113,6 +114,9 @@ LanguageModel* CreateLanguageModel(LMImplementation lmImplementation
factorTypes[0]);
#endif
break;
case BackwardLM:
return ConstructBackwardLM(languageModelFile, factorTypes[0], lmImplementation == LazyKen);
break;
default:
break;
}

View File

@ -92,7 +92,7 @@ obj Factory.o : Factory.cpp ..//headers $(dependencies) : <include>../DynSAInclu
#Top-level LM library. If you've added a file that doesn't depend on external
#libraries, put it here.
alias LM : Base.cpp Factory.o Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp ORLM.o
alias LM : Backward.cpp Base.cpp Factory.o Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp ORLM.o
../../lm//kenlm ..//headers $(dependencies) ;

View File

@ -57,71 +57,22 @@ struct KenLMState : public FFState {
}
};
/*
* An implementation of single factor LM using Ken's code.
*/
template <class Model> class LanguageModelKen : public LanguageModel {
class LanguageModelChartStateKenLM : public FFState {
public:
LanguageModelKen(const std::string &file, FactorType factorType, bool lazy);
LanguageModelChartStateKenLM() {}
LanguageModel *Duplicate() const;
const lm::ngram::ChartState &GetChartState() const { return m_state; }
lm::ngram::ChartState &GetChartState() { return m_state; }
bool Useable(const Phrase &phrase) const {
return (phrase.GetSize()>0 && phrase.GetFactor(0, m_factorType) != NULL);
}
std::string GetScoreProducerDescription(unsigned) const {
std::ostringstream oss;
oss << "LM_" << (unsigned)m_ngram->Order() << "gram";
return oss.str();
}
const FFState *EmptyHypothesisState(const InputType &/*input*/) const {
KenLMState *ret = new KenLMState();
ret->state = m_ngram->BeginSentenceState();
int Compare(const FFState& o) const
{
const LanguageModelChartStateKenLM &other = static_cast<const LanguageModelChartStateKenLM&>(o);
int ret = m_state.Compare(other.m_state);
return ret;
}
void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
void IncrementalCallback(Incremental::Manager &manager) const {
manager.LMCallback(*m_ngram, m_lmIdLookup);
}
private:
LanguageModelKen(const LanguageModelKen<Model> &copy_from);
lm::WordIndex TranslateID(const Word &word) const {
std::size_t factor = word.GetFactor(m_factorType)->GetId();
return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
}
// Convert last words of hypothesis into vocab ids, returning an end pointer.
lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const {
lm::WordIndex *index = indices;
lm::WordIndex *end = indices + m_ngram->Order() - 1;
int position = hypo.GetCurrTargetWordsRange().GetEndPos();
for (; ; ++index, --position) {
if (index == end) return index;
if (position == -1) {
*index = m_ngram->GetVocabulary().BeginSentence();
return index + 1;
}
*index = TranslateID(hypo.GetWord(position));
}
}
boost::shared_ptr<Model> m_ngram;
std::vector<lm::WordIndex> m_lmIdLookup;
FactorType m_factorType;
const Factor *m_beginSentenceFactor;
lm::ngram::ChartState m_state;
};
class MappingBuilder : public lm::EnumerateVocab {
@ -143,6 +94,8 @@ private:
std::vector<lm::WordIndex> &m_mapping;
};
} // namespace
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &file, FactorType factorType, bool lazy) : m_factorType(factorType) {
lm::ngram::Config config;
IFVERBOSE(1) {
@ -160,10 +113,50 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::stri
m_beginSentenceFactor = collection.AddFactor(BOS_);
}
template <class Model> bool LanguageModelKen<Model>::Useable(const Phrase &phrase) const {
return (phrase.GetSize()>0 && phrase.GetFactor(0, m_factorType) != NULL);
}
template <class Model> std::string LanguageModelKen<Model>::GetScoreProducerDescription(unsigned) const {
std::ostringstream oss;
oss << "LM_" << (unsigned)m_ngram->Order() << "gram";
return oss.str();
}
template <class Model> const FFState *LanguageModelKen<Model>::EmptyHypothesisState(const InputType &/*input*/) const {
KenLMState *ret = new KenLMState();
ret->state = m_ngram->BeginSentenceState();
return ret;
}
template <class Model> lm::WordIndex LanguageModelKen<Model>::TranslateID(const Word &word) const {
std::size_t factor = word.GetFactor(m_factorType)->GetId();
return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
}
template <class Model> lm::WordIndex *LanguageModelKen<Model>::LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const {
lm::WordIndex *index = indices;
lm::WordIndex *end = indices + m_ngram->Order() - 1;
int position = hypo.GetCurrTargetWordsRange().GetEndPos();
for (; ; ++index, --position) {
if (index == end) return index;
if (position == -1) {
*index = m_ngram->GetVocabulary().BeginSentence();
return index + 1;
}
*index = TranslateID(hypo.GetWord(position));
}
}
template <class Model> LanguageModel *LanguageModelKen<Model>::Duplicate() const {
return new LanguageModelKen<Model>(*this);
}
template <class Model> void LanguageModelKen<Model>::IncrementalCallback(Incremental::Manager &manager) const {
manager.LMCallback(*m_ngram, m_lmIdLookup);
}
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const LanguageModelKen<Model> &copy_from) :
m_ngram(copy_from.m_ngram),
// TODO: don't copy this.
@ -277,24 +270,6 @@ template <class Model> FFState *LanguageModelKen<Model>::Evaluate(const Hypothes
return ret.release();
}
class LanguageModelChartStateKenLM : public FFState {
public:
LanguageModelChartStateKenLM() {}
const lm::ngram::ChartState &GetChartState() const { return m_state; }
lm::ngram::ChartState &GetChartState() { return m_state; }
int Compare(const FFState& o) const
{
const LanguageModelChartStateKenLM &other = static_cast<const LanguageModelChartStateKenLM&>(o);
int ret = m_state.Compare(other.m_state);
return ret;
}
private:
lm::ngram::ChartState m_state;
};
template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *accumulator) const {
LanguageModelChartStateKenLM *newState = new LanguageModelChartStateKenLM();
lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState->GetChartState());
@ -335,8 +310,6 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(const Cha
return newState;
}
} // namespace
LanguageModel *ConstructKenLM(const std::string &file, FactorType factorType, bool lazy) {
try {
lm::ngram::ModelType model_type;

View File

@ -24,15 +24,58 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <string>
#include "moses/TypeDef.h"
#include "lm/word_index.hh"
#include "moses/Word.h"
#include "moses/LM/Base.h"
namespace Moses {
class LanguageModel;
class FFState;
//! This will also load. Returns a templated KenLM class
LanguageModel *ConstructKenLM(const std::string &file, FactorType factorType, bool lazy);
/*
* An implementation of single factor LM using Kenneth's code.
*/
template <class Model> class LanguageModelKen : public LanguageModel {
public:
LanguageModelKen(const std::string &file, FactorType factorType, bool lazy);
LanguageModel *Duplicate() const;
bool Useable(const Phrase &phrase) const;
std::string GetScoreProducerDescription(unsigned) const;
const FFState *EmptyHypothesisState(const InputType &/*input*/) const;
void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
void IncrementalCallback(Incremental::Manager &manager) const;
private:
LanguageModelKen(const LanguageModelKen<Model> &copy_from);
lm::WordIndex TranslateID(const Word &word) const;
// Convert last words of hypothesis into vocab ids, returning an end pointer.
lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const;
boost::shared_ptr<Model> m_ngram;
std::vector<lm::WordIndex> m_lmIdLookup;
FactorType m_factorType;
const Factor *m_beginSentenceFactor;
};
} // namespace Moses
#endif

View File

@ -123,6 +123,7 @@ enum LMImplementation {
,LazyKen = 9
,ORLM = 10
,LDHTLM = 11
,BackwardLM = 12
};
enum PhraseTableImplementation {