Fix OxLM.

This commit is contained in:
Paul Baltescu 2014-10-08 22:07:19 +01:00
parent f2aebe4052
commit 8f74ecd8f3
12 changed files with 136 additions and 184 deletions

View File

@ -114,7 +114,7 @@ requirements += [ option.get "with-mm" : : <define>PT_UG ] ;
requirements += [ option.get "with-mm" : : <define>MAX_NUM_FACTORS=4 ] ;
requirements += [ option.get "unlabelled-source" : : <define>UNLABELLED_SOURCE ] ;
if [ option.get "with-lbllm" ] {
if [ option.get "with-oxlm" ] {
external-lib boost_serialization ;
external-lib gomp ;
requirements += <library>boost_serialization ;

View File

@ -2072,24 +2072,24 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/PhraseOrientation.h</locationURI>
</link>
<link>
<name>LM/oxlm/LBLLM.cpp</name>
<name>LM/oxlm/OxLM.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/LBLLM.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/OxLM.cpp</locationURI>
</link>
<link>
<name>LM/oxlm/LBLLM.h</name>
<name>LM/oxlm/OxLM.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/LBLLM.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/OxLM.h</locationURI>
</link>
<link>
<name>LM/oxlm/Mapper.cpp</name>
<name>LM/oxlm/OxLMMapper.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/Mapper.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/OxLMMapper.cpp</locationURI>
</link>
<link>
<name>LM/oxlm/Mapper.h</name>
<name>LM/oxlm/OxLMMapper.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/Mapper.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/OxLMMapper.h</locationURI>
</link>
<link>
<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.cpp</name>

View File

@ -96,8 +96,8 @@
#include "moses/LM/DALMWrapper.h"
#endif
#ifdef LM_LBL
#include "moses/LM/oxlm/LBLLM.h"
#ifdef LM_OXLM
#include "moses/LM/oxlm/OxLM.h"
#endif
#include "util/exception.hh"
@ -252,10 +252,10 @@ FeatureRegistry::FeatureRegistry()
#ifdef LM_DALM
MOSES_FNAME2("DALM", LanguageModelDALM);
#endif
#ifdef LM_LBL
MOSES_FNAME2("LBLLM-LM", LBLLM<oxlm::LM>);
MOSES_FNAME2("LBLLM-FactoredLM", LBLLM<oxlm::FactoredLM>);
MOSES_FNAME2("LBLLM-FactoredMaxentLM", LBLLM<oxlm::FactoredMaxentLM>);
#ifdef LM_OXLM
MOSES_FNAME2("OxLM", OxLM<oxlm::LM>);
MOSES_FNAME2("OxFactoredLM", OxLM<oxlm::FactoredLM>);
MOSES_FNAME2("OxFactoredMaxentLM", OxLM<oxlm::FactoredMaxentLM>);
#endif
Add("KENLM", new KenFactory());

View File

@ -10,14 +10,14 @@ if $(with-dlib) {
dlib = ;
}
with-lbllm = [ option.get "with-lbllm" ] ;
if $(with-lbllm) {
lbllm2 = <cxxflags>-std=c++0x <define>LM_LBL <include>$(with-lbllm)/src <include>$(with-lbllm)/third_party/eigen ;
with-oxlm = [ option.get "with-oxlm" ] ;
if $(with-oxlm) {
oxlm2 = <cxxflags>-std=c++0x <define>LM_OXLM <include>$(with-oxlm)/src <include>$(with-oxlm)/third_party/eigen ;
} else {
lbllm2 = ;
oxlm2 = ;
}
alias headers : ../util//kenutil : : : $(max-factors) $(dlib) $(lbllm2) ;
alias headers : ../util//kenutil : : : $(max-factors) $(dlib) $(oxlm2) ;
alias ThreadPool : ThreadPool.cpp ;
alias Util : Util.cpp Timer.cpp ;

View File

@ -94,16 +94,16 @@ if $(with-nplm) {
lmmacros += LM_NEURAL ;
}
#LBLLM
local with-lbllm = [ option.get "with-lbllm" ] ;
if $(with-lbllm) {
lib lbl : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
lib murmurhash : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
obj LBLLM.o : oxlm/LBLLM.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/third_party/eigen ;
obj Mapper.o : oxlm/Mapper.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/third_party/eigen ;
alias lbllm : LBLLM.o Mapper.o lbl murmurhash /top//boost_filesystem : : : <cxxflags>-std=c++0x <define>LM_LBL ;
dependencies += lbllm ;
lmmacros += LM_LBL ;
#OxLM
local with-oxlm = [ option.get "with-oxlm" ] ;
if $(with-oxlm) {
lib lbl : : <search>$(with-oxlm)/lib <search>$(with-oxlm)/lib64 ;
lib murmurhash : : <search>$(with-oxlm)/lib <search>$(with-oxlm)/lib64 ;
obj OxLM.o : oxlm/OxLM.cpp lbl ..//headers : <include>$(with-oxlm)/src <include>$(with-oxlm)/third_party/eigen ;
obj OxLMMapper.o : oxlm/OxLMMapper.cpp lbl ..//headers : <include>$(with-oxlm)/src <include>$(with-oxlm)/third_party/eigen ;
alias oxlm : OxLM.o OxLMMapper.o lbl murmurhash /top//boost_filesystem : : : <cxxflags>-std=c++0x <define>LM_OXLM ;
dependencies += oxlm ;
lmmacros += LM_OXLM ;
}

View File

@ -1,67 +0,0 @@
#include "Mapper.h"
#include "moses/FactorCollection.h"
using namespace std;
namespace Moses
{
OXLMMapper::OXLMMapper(const oxlm::Dict& dict) : dict(dict)
{
for (int i = 0; i < dict.size(); ++i) {
const string &str = dict.Convert(i);
FactorCollection &fc = FactorCollection::Instance();
const Moses::Factor *factor = fc.AddFactor(str, false);
moses2lbl[factor] = i;
//add(i, TD::Convert());
}
kUNKNOWN = this->dict.Convert("<unk>");
}
int OXLMMapper::convert(const Moses::Factor *factor) const
{
Coll::const_iterator iter;
iter = moses2lbl.find(factor);
if (iter == moses2lbl.end()) {
return kUNKNOWN;
}
else {
int ret = iter->second;
return ret;
}
}
std::vector<int> OXLMMapper::convert(const Phrase &phrase) const
{
size_t size = phrase.GetSize();
vector<int> ret(size);
for (size_t i = 0; i < size; ++i) {
const Moses::Factor *factor = phrase.GetFactor(i, 0);
int id = convert(factor);
ret[i] = id;
}
return ret;
}
void OXLMMapper::convert(const std::vector<const Word*> &contextFactor, std::vector<int> &ids, int &word) const
{
size_t size = contextFactor.size();
ids.resize(size - 1);
for (size_t i = 0; i < size - 1; ++i) {
const Moses::Factor *factor = contextFactor[i]->GetFactor(0);
int id = convert(factor);
ids[i] = id;
}
std::reverse(ids.begin(), ids.end());
const Moses::Factor *factor = contextFactor.back()->GetFactor(0);
word = convert(factor);
}
} // namespace

View File

@ -1,46 +0,0 @@
#pragma once
#include <map>
#include "corpus/corpus.h"
#include "moses/Factor.h"
#include "moses/Phrase.h"
namespace Moses
{
class OXLMMapper
{
public:
OXLMMapper(const oxlm::Dict& dict);
int convert(const Moses::Factor *factor) const;
std::vector<int> convert(const Phrase &phrase) const;
void convert(const std::vector<const Word*> &contextFactor, std::vector<int> &ids, int &word) const;
private:
void add(int lbl_id, int cdec_id);
oxlm::Dict dict;
typedef std::map<const Moses::Factor*, int> Coll;
Coll moses2lbl;
int kUNKNOWN;
};
/**
* Wraps the feature values computed from the LBL language model.
*/
struct LBLFeatures {
LBLFeatures() : LMScore(0), OOVScore(0) {}
LBLFeatures(double lm_score, double oov_score)
: LMScore(lm_score), OOVScore(oov_score) {}
LBLFeatures& operator+=(const LBLFeatures& other) {
LMScore += other.LMScore;
OOVScore += other.OOVScore;
return *this;
}
double LMScore;
double OOVScore;
};
}

View File

@ -1,4 +1,4 @@
#include "LBLLM.h"
#include "OxLM.h"
#include <boost/archive/binary_iarchive.hpp>
#include <boost/archive/binary_oarchive.hpp>
@ -15,7 +15,7 @@ namespace Moses
{
template<class Model>
LBLLM<Model>::LBLLM(const string &line) : LanguageModelSingleFactor(line) {
OxLM<Model>::OxLM(const string &line) : LanguageModelSingleFactor(line) {
ReadParameters();
FactorCollection &factorCollection = FactorCollection::Instance();
@ -32,7 +32,7 @@ LBLLM<Model>::LBLLM(const string &line) : LanguageModelSingleFactor(line) {
template<class Model>
LBLLM<Model>::~LBLLM() {
OxLM<Model>::~OxLM() {
if (persistentCache) {
double cache_hit_ratio = 100.0 * cacheHits / totalHits;
cerr << "Cache hit ratio: " << cache_hit_ratio << endl;
@ -41,7 +41,7 @@ LBLLM<Model>::~LBLLM() {
template<class Model>
void LBLLM<Model>::SetParameter(const string& key, const string& value) {
void OxLM<Model>::SetParameter(const string& key, const string& value) {
if (key == "persistent-cache") {
persistentCache = Scan<bool>(value);
} else {
@ -50,24 +50,24 @@ void LBLLM<Model>::SetParameter(const string& key, const string& value) {
}
template<class Model>
void LBLLM<Model>::Load() {
void OxLM<Model>::Load() {
model.load(m_filePath);
Dict dict = model.getDict();
mapper = boost::make_shared<OXLMMapper>(dict);
boost::shared_ptr<oxlm::Vocabulary> vocab = model.getVocab();
mapper = boost::make_shared<OxLMMapper>(vocab);
kSTART = dict.Convert("<s>");
kSTOP = dict.Convert("</s>");
kUNKNOWN = dict.Convert("<unk>");
kSTART = vocab->convert("<s>");
kSTOP = vocab->convert("</s>");
kUNKNOWN = vocab->convert("<unk>");
size_t ngram_order = model.getConfig()->ngram_order;
UTIL_THROW_IF2(
m_nGramOrder != ngram_order,
"Wrong order for LBLLM: LM has " << ngram_order << ", but Moses expects " << m_nGramOrder);
"Wrong order for OxLM: LM has " << ngram_order << ", but Moses expects " << m_nGramOrder);
}
template<class Model>
LMResult LBLLM<Model>::GetValue(
LMResult OxLM<Model>::GetValue(
const vector<const Word*> &contextFactor, State* finalState) const {
if (!cache.get()) {
cache.reset(new QueryCache());
@ -95,11 +95,11 @@ LMResult LBLLM<Model>::GetValue(
score = ret.first;
++cacheHits;
} else {
score = model.predict(word, context);
score = model.getLogProb(word, context);
cache->put(query, score);
}
} else {
score = model.predict(word, context);
score = model.getLogProb(word, context);
}
LMResult ret;
@ -119,7 +119,7 @@ LMResult LBLLM<Model>::GetValue(
}
template<class Model>
void LBLLM<Model>::InitializeForInput(const InputType& source) {
void OxLM<Model>::InitializeForInput(const InputType& source) {
LanguageModelSingleFactor::InitializeForInput(source);
if (persistentCache) {
@ -143,7 +143,7 @@ void LBLLM<Model>::InitializeForInput(const InputType& source) {
}
template<class Model>
void LBLLM<Model>::CleanUpAfterSentenceProcessing(const InputType& source) {
void OxLM<Model>::CleanUpAfterSentenceProcessing(const InputType& source) {
model.clearCache();
if (persistentCache) {
@ -162,9 +162,9 @@ void LBLLM<Model>::CleanUpAfterSentenceProcessing(const InputType& source) {
LanguageModelSingleFactor::CleanUpAfterSentenceProcessing(source);
}
template class LBLLM<LM>;
template class LBLLM<FactoredLM>;
template class LBLLM<FactoredMaxentLM>;
template class OxLM<LM>;
template class OxLM<FactoredLM>;
template class OxLM<FactoredMaxentLM>;
}

View File

@ -6,23 +6,19 @@
#include "moses/LM/SingleFactor.h"
// lbl stuff
#include "corpus/corpus.h"
#include "lbl/model.h"
#include "lbl/query_cache.h"
#include "Mapper.h"
namespace Moses
{
#include "OxLMMapper.h"
namespace Moses {
template<class Model>
class LBLLM : public LanguageModelSingleFactor
{
public:
LBLLM(const std::string &line);
class OxLM : public LanguageModelSingleFactor {
public:
OxLM(const std::string &line);
~LBLLM();
~OxLM();
void SetParameter(const std::string& key, const std::string& value);
@ -36,9 +32,9 @@ public:
virtual void CleanUpAfterSentenceProcessing(const InputType& source);
protected:
protected:
Model model;
boost::shared_ptr<OXLMMapper> mapper;
boost::shared_ptr<OxLMMapper> mapper;
int kSTART;
int kSTOP;
@ -49,5 +45,4 @@ protected:
mutable int cacheHits, totalHits;
};
}
} // namespace Moses

View File

@ -0,0 +1,38 @@
#include "OxLMMapper.h"
#include "moses/FactorCollection.h"
using namespace std;
namespace Moses {
OxLMMapper::OxLMMapper(const boost::shared_ptr<oxlm::Vocabulary>& vocab) {
for (int i = 0; i < vocab->size(); ++i) {
const string &str = vocab->convert(i);
FactorCollection &fc = FactorCollection::Instance();
const Moses::Factor *factor = fc.AddFactor(str, false);
moses2Oxlm[factor] = i;
}
kUNKNOWN = vocab->convert("<unk>");
}
int OxLMMapper::convert(const Moses::Factor *factor) const {
Coll::const_iterator iter = moses2Oxlm.find(factor);
return iter == moses2Oxlm.end() ? kUNKNOWN : iter->second;
}
void OxLMMapper::convert(
const vector<const Word*> &contextFactor,
vector<int> &ids, int &word) const {
ids.clear();
for (size_t i = 0; i < contextFactor.size() - 1; ++i) {
const Moses::Factor *factor = contextFactor[i]->GetFactor(0);
ids.push_back(convert(factor));
}
reverse(ids.begin(), ids.end());
const Moses::Factor *factor = contextFactor.back()->GetFactor(0);
word = convert(factor);
}
} // namespace Moses

View File

@ -0,0 +1,28 @@
#pragma once
#include <map>
#include "lbl/vocabulary.h"
#include "moses/Factor.h"
#include "moses/Phrase.h"
namespace Moses {
class OxLMMapper {
public:
OxLMMapper(const boost::shared_ptr<oxlm::Vocabulary>& vocab);
int convert(const Moses::Factor *factor) const;
void convert(
const std::vector<const Word*> &contextFactor,
std::vector<int> &ids, int &word) const;
private:
typedef std::map<const Moses::Factor*, int> Coll;
Coll moses2Oxlm;
int kUNKNOWN;
};
}

View File

@ -52,14 +52,18 @@ BOOST_AUTO_TEST_CASE(manager_configure_domain_except)
{
//Check that configure rejects illegal domain arg combinations
ScoreFeatureManager manager;
vector<string> args = boost::assign::list_of("--DomainRatio")("/dev/null")("--DomainIndicator")("/dev/null");
BOOST_CHECK_THROW(manager.configure(args), ScoreFeatureArgumentException);
args = boost::assign::list_of("--SparseDomainSubset")("/dev/null")("--SparseDomainRatio")("/dev/null");
BOOST_CHECK_THROW(manager.configure(args), ScoreFeatureArgumentException);
args = boost::assign::list_of("--SparseDomainBlah")("/dev/null");
BOOST_CHECK_THROW(manager.configure(args), ScoreFeatureArgumentException);
args = boost::assign::list_of("--DomainSubset");
BOOST_CHECK_THROW(manager.configure(args), ScoreFeatureArgumentException);
BOOST_CHECK_THROW(
manager.configure(boost::assign::list_of("--DomainRatio")("/dev/null")("--DomainIndicator")("/dev/null")),
ScoreFeatureArgumentException);
BOOST_CHECK_THROW(
manager.configure(boost::assign::list_of("--SparseDomainSubset")("/dev/null")("--SparseDomainRatio")("/dev/null")),
ScoreFeatureArgumentException);
BOOST_CHECK_THROW(
manager.configure(boost::assign::list_of("--SparseDomainBlah")("/dev/null")),
ScoreFeatureArgumentException);
BOOST_CHECK_THROW(
manager.configure(boost::assign::list_of("--DomainSubset")),
ScoreFeatureArgumentException);
}
template <class Expected>