mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 11:28:48 +03:00
Re-enable backward LM code.
This commit updates that code to take into account Hieu's major refactoring. Additionally, this commit re-instates changes from commit8459a86
(moses/LM/Jamfile, moses/LM/Ken.h, moses/LM/Ken.cpp), commitd340218
(moses/LM/Ken.h), commit95e3a37
(moses/LM/Ken.h, moses/LM/Ken.cpp), and commitaacc936
(moses/LM/Jamfile, moses/LM/Ken.cpp) The above changes appear to have been improperly discarded during the merge in commit4894df6
and then again in commit3ed17bb
.
This commit is contained in:
parent
4156c7acb6
commit
08225d8e57
@ -24,7 +24,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "lm/left.hh"
|
||||
#include "lm/model.hh"
|
||||
|
||||
#include "moses/FFState.h"
|
||||
#include "moses/FF/FFState.h"
|
||||
#include "moses/Hypothesis.h"
|
||||
#include "moses/Phrase.h"
|
||||
|
||||
@ -39,7 +39,7 @@ namespace Moses
|
||||
{
|
||||
|
||||
/** Constructs a new backward language model. */
|
||||
template <class Model> BackwardLanguageModel<Model>::BackwardLanguageModel(const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(file,factorType,lazy)
|
||||
template <class Model> BackwardLanguageModel<Model>::BackwardLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line,file,factorType,lazy)
|
||||
{
|
||||
//
|
||||
// This space intentionally left blank
|
||||
@ -288,30 +288,30 @@ template <class Model> FFState *BackwardLanguageModel<Model>::Evaluate(const Phr
|
||||
|
||||
}
|
||||
|
||||
LanguageModel *ConstructBackwardLM(const std::string &file, FactorType factorType, bool lazy)
|
||||
LanguageModel *ConstructBackwardLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
|
||||
{
|
||||
try {
|
||||
lm::ngram::ModelType model_type;
|
||||
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
|
||||
switch(model_type) {
|
||||
case lm::ngram::PROBING:
|
||||
return new BackwardLanguageModel<lm::ngram::ProbingModel>(file, factorType, lazy);
|
||||
return new BackwardLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::REST_PROBING:
|
||||
return new BackwardLanguageModel<lm::ngram::RestProbingModel>(file, factorType, lazy);
|
||||
return new BackwardLanguageModel<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::TRIE:
|
||||
return new BackwardLanguageModel<lm::ngram::TrieModel>(file, factorType, lazy);
|
||||
return new BackwardLanguageModel<lm::ngram::TrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::QUANT_TRIE:
|
||||
return new BackwardLanguageModel<lm::ngram::QuantTrieModel>(file, factorType, lazy);
|
||||
return new BackwardLanguageModel<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::ARRAY_TRIE:
|
||||
return new BackwardLanguageModel<lm::ngram::ArrayTrieModel>(file, factorType, lazy);
|
||||
return new BackwardLanguageModel<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
|
||||
case lm::ngram::QUANT_ARRAY_TRIE:
|
||||
return new BackwardLanguageModel<lm::ngram::QuantArrayTrieModel>(file, factorType, lazy);
|
||||
return new BackwardLanguageModel<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
|
||||
default:
|
||||
std::cerr << "Unrecognized kenlm model type " << model_type << std::endl;
|
||||
abort();
|
||||
}
|
||||
} else {
|
||||
return new BackwardLanguageModel<lm::ngram::ProbingModel>(file, factorType, lazy);
|
||||
return new BackwardLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
|
||||
}
|
||||
} catch (std::exception &e) {
|
||||
std::cerr << e.what() << std::endl;
|
||||
|
@ -33,7 +33,7 @@ namespace Moses
|
||||
{
|
||||
|
||||
//! This will also load. Returns a templated backward LM.
|
||||
LanguageModel *ConstructBackwardLM(const std::string &file, FactorType factorType, bool lazy);
|
||||
LanguageModel *ConstructBackwardLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
|
||||
|
||||
class FFState;
|
||||
// template<typename M> class BackwardLanguageModelTest;
|
||||
@ -45,7 +45,7 @@ class BackwardLanguageModelTest;
|
||||
template <class Model> class BackwardLanguageModel : public LanguageModelKen<Model>
|
||||
{
|
||||
public:
|
||||
BackwardLanguageModel(const std::string &file, FactorType factorType, bool lazy);
|
||||
BackwardLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
|
||||
|
||||
virtual const FFState *EmptyHypothesisState(const InputType &/*input*/) const;
|
||||
|
||||
|
@ -22,7 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#ifndef moses_BackwardLMState_h
|
||||
#define moses_BackwardLMState_h
|
||||
|
||||
#include "moses/FFState.h"
|
||||
#include "moses/FF/FFState.h"
|
||||
#include "moses/LM/Backward.h"
|
||||
|
||||
#include "lm/state.hh"
|
||||
|
@ -70,6 +70,7 @@ public:
|
||||
backwardLM(
|
||||
static_cast< BackwardLanguageModel<lm::ngram::ProbingModel> * >(
|
||||
ConstructBackwardLM(
|
||||
"LM1=1.0",
|
||||
boost::unit_test::framework::master_test_suite().argv[1],
|
||||
0,
|
||||
false)
|
||||
@ -116,9 +117,11 @@ public:
|
||||
outputFactorOrder.push_back(0);
|
||||
|
||||
phrase.CreateFromString(
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"the",
|
||||
StaticData::Instance().GetFactorDelimiter());
|
||||
StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||
|
||||
@ -141,9 +144,11 @@ public:
|
||||
outputFactorOrder.push_back(0);
|
||||
|
||||
phrase.CreateFromString(
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"the licenses",
|
||||
StaticData::Instance().GetFactorDelimiter());
|
||||
StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 2 );
|
||||
|
||||
@ -166,9 +171,11 @@ public:
|
||||
outputFactorOrder.push_back(0);
|
||||
|
||||
phrase.CreateFromString(
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"the licenses for",
|
||||
StaticData::Instance().GetFactorDelimiter());
|
||||
StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 3 );
|
||||
|
||||
@ -191,9 +198,11 @@ public:
|
||||
outputFactorOrder.push_back(0);
|
||||
|
||||
phrase.CreateFromString(
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"the licenses for most",
|
||||
StaticData::Instance().GetFactorDelimiter());
|
||||
StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 4 );
|
||||
|
||||
@ -235,9 +244,11 @@ public:
|
||||
outputFactorOrder.push_back(0);
|
||||
|
||||
phrase.CreateFromString(
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"the",
|
||||
StaticData::Instance().GetFactorDelimiter());
|
||||
StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||
|
||||
@ -261,9 +272,11 @@ public:
|
||||
outputFactorOrder.push_back(0);
|
||||
|
||||
phrase.CreateFromString(
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"licenses",
|
||||
StaticData::Instance().GetFactorDelimiter());
|
||||
StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||
|
||||
@ -287,9 +300,11 @@ public:
|
||||
outputFactorOrder.push_back(0);
|
||||
|
||||
phrase.CreateFromString(
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"for",
|
||||
StaticData::Instance().GetFactorDelimiter());
|
||||
StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||
|
||||
@ -313,9 +328,11 @@ public:
|
||||
outputFactorOrder.push_back(0);
|
||||
|
||||
phrase.CreateFromString(
|
||||
Input,
|
||||
outputFactorOrder,
|
||||
"most",
|
||||
StaticData::Instance().GetFactorDelimiter());
|
||||
StaticData::Instance().GetFactorDelimiter(),
|
||||
NULL);
|
||||
|
||||
BOOST_CHECK( phrase.GetSize() == 1 );
|
||||
|
||||
|
@ -75,7 +75,11 @@ obj ORLM.o : ORLM.cpp ..//headers ../TranslationModel/DynSAInclude//dynsa : : :
|
||||
|
||||
#Top-level LM library. If you've added a file that doesn't depend on external
|
||||
#libraries, put it here.
|
||||
alias LM : Base.cpp Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp ORLM.o
|
||||
alias LM : Backward.cpp BackwardLMState.cpp Base.cpp Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp ORLM.o
|
||||
../../lm//kenlm ..//headers $(dependencies) ;
|
||||
|
||||
alias macros : : : : <define>$(lmmacros) ;
|
||||
|
||||
#Unit test for Backward LM
|
||||
import testing ;
|
||||
run BackwardTest.cpp ..//moses LM ../../lm//kenlm /top//boost_unit_test_framework : : backward.arpa ;
|
||||
|
126
moses/LM/Ken.cpp
126
moses/LM/Ken.cpp
@ -60,62 +60,62 @@ struct KenLMState : public FFState {
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* An implementation of single factor LM using Ken's code.
|
||||
*/
|
||||
template <class Model> class LanguageModelKen : public LanguageModel
|
||||
{
|
||||
public:
|
||||
LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
|
||||
|
||||
const FFState *EmptyHypothesisState(const InputType &/*input*/) const {
|
||||
KenLMState *ret = new KenLMState();
|
||||
ret->state = m_ngram->BeginSentenceState();
|
||||
return ret;
|
||||
}
|
||||
|
||||
void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
|
||||
|
||||
FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
|
||||
|
||||
FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
|
||||
|
||||
void IncrementalCallback(Incremental::Manager &manager) const {
|
||||
manager.LMCallback(*m_ngram, m_lmIdLookup);
|
||||
}
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const;
|
||||
private:
|
||||
LanguageModelKen(const LanguageModelKen<Model> ©_from);
|
||||
|
||||
lm::WordIndex TranslateID(const Word &word) const {
|
||||
std::size_t factor = word.GetFactor(m_factorType)->GetId();
|
||||
return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
|
||||
}
|
||||
|
||||
// Convert last words of hypothesis into vocab ids, returning an end pointer.
|
||||
lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const {
|
||||
lm::WordIndex *index = indices;
|
||||
lm::WordIndex *end = indices + m_ngram->Order() - 1;
|
||||
int position = hypo.GetCurrTargetWordsRange().GetEndPos();
|
||||
for (; ; ++index, --position) {
|
||||
if (index == end) return index;
|
||||
if (position == -1) {
|
||||
*index = m_ngram->GetVocabulary().BeginSentence();
|
||||
return index + 1;
|
||||
}
|
||||
*index = TranslateID(hypo.GetWord(position));
|
||||
}
|
||||
}
|
||||
|
||||
boost::shared_ptr<Model> m_ngram;
|
||||
|
||||
std::vector<lm::WordIndex> m_lmIdLookup;
|
||||
|
||||
FactorType m_factorType;
|
||||
|
||||
const Factor *m_beginSentenceFactor;
|
||||
};
|
||||
///*
|
||||
// * An implementation of single factor LM using Ken's code.
|
||||
// */
|
||||
//template <class Model> class LanguageModelKen : public LanguageModel
|
||||
//{
|
||||
//public:
|
||||
// LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
|
||||
//
|
||||
// const FFState *EmptyHypothesisState(const InputType &/*input*/) const {
|
||||
// KenLMState *ret = new KenLMState();
|
||||
// ret->state = m_ngram->BeginSentenceState();
|
||||
// return ret;
|
||||
// }
|
||||
//
|
||||
// void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
|
||||
//
|
||||
// FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
|
||||
//
|
||||
// FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
|
||||
//
|
||||
// void IncrementalCallback(Incremental::Manager &manager) const {
|
||||
// manager.LMCallback(*m_ngram, m_lmIdLookup);
|
||||
// }
|
||||
//
|
||||
// bool IsUseable(const FactorMask &mask) const;
|
||||
//private:
|
||||
// LanguageModelKen(const LanguageModelKen<Model> ©_from);
|
||||
//
|
||||
// lm::WordIndex TranslateID(const Word &word) const {
|
||||
// std::size_t factor = word.GetFactor(m_factorType)->GetId();
|
||||
// return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
|
||||
// }
|
||||
//
|
||||
// // Convert last words of hypothesis into vocab ids, returning an end pointer.
|
||||
// lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const {
|
||||
// lm::WordIndex *index = indices;
|
||||
// lm::WordIndex *end = indices + m_ngram->Order() - 1;
|
||||
// int position = hypo.GetCurrTargetWordsRange().GetEndPos();
|
||||
// for (; ; ++index, --position) {
|
||||
// if (index == end) return index;
|
||||
// if (position == -1) {
|
||||
// *index = m_ngram->GetVocabulary().BeginSentence();
|
||||
// return index + 1;
|
||||
// }
|
||||
// *index = TranslateID(hypo.GetWord(position));
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// boost::shared_ptr<Model> m_ngram;
|
||||
//
|
||||
// std::vector<lm::WordIndex> m_lmIdLookup;
|
||||
//
|
||||
// FactorType m_factorType;
|
||||
//
|
||||
// const Factor *m_beginSentenceFactor;
|
||||
//};
|
||||
|
||||
class MappingBuilder : public lm::EnumerateVocab
|
||||
{
|
||||
@ -137,6 +137,8 @@ private:
|
||||
std::vector<lm::WordIndex> &m_mapping;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
|
||||
:LanguageModel("KENLM", line)
|
||||
,m_factorType(factorType)
|
||||
@ -168,6 +170,13 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(const LanguageM
|
||||
{
|
||||
}
|
||||
|
||||
template <class Model> const FFState * LanguageModelKen<Model>::EmptyHypothesisState(const InputType &/*input*/) const
|
||||
{
|
||||
KenLMState *ret = new KenLMState();
|
||||
ret->state = m_ngram->BeginSentenceState();
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class Model> void LanguageModelKen<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
|
||||
{
|
||||
fullScore = 0;
|
||||
@ -342,6 +351,10 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(const Cha
|
||||
return newState;
|
||||
}
|
||||
|
||||
template <class Model> void LanguageModelKen<Model>::IncrementalCallback(Incremental::Manager &manager) const {
|
||||
manager.LMCallback(*m_ngram, m_lmIdLookup);
|
||||
}
|
||||
|
||||
template <class Model>
|
||||
bool LanguageModelKen<Model>::IsUseable(const FactorMask &mask) const
|
||||
{
|
||||
@ -349,7 +362,6 @@ bool LanguageModelKen<Model>::IsUseable(const FactorMask &mask) const
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
LanguageModel *ConstructKenLM(const std::string &line)
|
||||
{
|
||||
|
@ -23,19 +23,80 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#define moses_LanguageModelKen_h
|
||||
|
||||
#include <string>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
|
||||
#include "lm/word_index.hh"
|
||||
|
||||
#include "moses/LM/Base.h"
|
||||
#include "moses/Hypothesis.h"
|
||||
#include "moses/TypeDef.h"
|
||||
#include "moses/Word.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class LanguageModel;
|
||||
//class LanguageModel;
|
||||
class FFState;
|
||||
|
||||
LanguageModel *ConstructKenLM(const std::string &line);
|
||||
|
||||
//! This will also load. Returns a templated KenLM class
|
||||
LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
|
||||
|
||||
/*
|
||||
* An implementation of single factor LM using Kenneth's code.
|
||||
*/
|
||||
template <class Model> class LanguageModelKen : public LanguageModel
|
||||
{
|
||||
public:
|
||||
LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
|
||||
|
||||
virtual const FFState *EmptyHypothesisState(const InputType &/*input*/) const;
|
||||
|
||||
virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
|
||||
|
||||
virtual FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
|
||||
|
||||
virtual FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
|
||||
|
||||
virtual void IncrementalCallback(Incremental::Manager &manager) const;
|
||||
|
||||
virtual bool IsUseable(const FactorMask &mask) const;
|
||||
|
||||
protected:
|
||||
boost::shared_ptr<Model> m_ngram;
|
||||
|
||||
const Factor *m_beginSentenceFactor;
|
||||
|
||||
FactorType m_factorType;
|
||||
|
||||
lm::WordIndex TranslateID(const Word &word) const {
|
||||
std::size_t factor = word.GetFactor(m_factorType)->GetId();
|
||||
return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
|
||||
}
|
||||
|
||||
private:
|
||||
LanguageModelKen(const LanguageModelKen<Model> ©_from);
|
||||
|
||||
// Convert last words of hypothesis into vocab ids, returning an end pointer.
|
||||
lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const {
|
||||
lm::WordIndex *index = indices;
|
||||
lm::WordIndex *end = indices + m_ngram->Order() - 1;
|
||||
int position = hypo.GetCurrTargetWordsRange().GetEndPos();
|
||||
for (; ; ++index, --position) {
|
||||
if (index == end) return index;
|
||||
if (position == -1) {
|
||||
*index = m_ngram->GetVocabulary().BeginSentence();
|
||||
return index + 1;
|
||||
}
|
||||
*index = TranslateID(hypo.GetWord(position));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<lm::WordIndex> m_lmIdLookup;
|
||||
|
||||
};
|
||||
|
||||
} // namespace Moses
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user