mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 14:05:29 +03:00
Fix OxLM.
This commit is contained in:
parent
f2aebe4052
commit
8f74ecd8f3
2
Jamroot
2
Jamroot
@ -114,7 +114,7 @@ requirements += [ option.get "with-mm" : : <define>PT_UG ] ;
|
||||
requirements += [ option.get "with-mm" : : <define>MAX_NUM_FACTORS=4 ] ;
|
||||
requirements += [ option.get "unlabelled-source" : : <define>UNLABELLED_SOURCE ] ;
|
||||
|
||||
if [ option.get "with-lbllm" ] {
|
||||
if [ option.get "with-oxlm" ] {
|
||||
external-lib boost_serialization ;
|
||||
external-lib gomp ;
|
||||
requirements += <library>boost_serialization ;
|
||||
|
@ -2072,24 +2072,24 @@
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/PhraseOrientation.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LM/oxlm/LBLLM.cpp</name>
|
||||
<name>LM/oxlm/OxLM.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/LBLLM.cpp</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/OxLM.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LM/oxlm/LBLLM.h</name>
|
||||
<name>LM/oxlm/OxLM.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/LBLLM.h</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/OxLM.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LM/oxlm/Mapper.cpp</name>
|
||||
<name>LM/oxlm/OxLMMapper.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/Mapper.cpp</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/OxLMMapper.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LM/oxlm/Mapper.h</name>
|
||||
<name>LM/oxlm/OxLMMapper.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/Mapper.h</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/OxLMMapper.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.cpp</name>
|
||||
|
@ -96,8 +96,8 @@
|
||||
#include "moses/LM/DALMWrapper.h"
|
||||
#endif
|
||||
|
||||
#ifdef LM_LBL
|
||||
#include "moses/LM/oxlm/LBLLM.h"
|
||||
#ifdef LM_OXLM
|
||||
#include "moses/LM/oxlm/OxLM.h"
|
||||
#endif
|
||||
|
||||
#include "util/exception.hh"
|
||||
@ -252,10 +252,10 @@ FeatureRegistry::FeatureRegistry()
|
||||
#ifdef LM_DALM
|
||||
MOSES_FNAME2("DALM", LanguageModelDALM);
|
||||
#endif
|
||||
#ifdef LM_LBL
|
||||
MOSES_FNAME2("LBLLM-LM", LBLLM<oxlm::LM>);
|
||||
MOSES_FNAME2("LBLLM-FactoredLM", LBLLM<oxlm::FactoredLM>);
|
||||
MOSES_FNAME2("LBLLM-FactoredMaxentLM", LBLLM<oxlm::FactoredMaxentLM>);
|
||||
#ifdef LM_OXLM
|
||||
MOSES_FNAME2("OxLM", OxLM<oxlm::LM>);
|
||||
MOSES_FNAME2("OxFactoredLM", OxLM<oxlm::FactoredLM>);
|
||||
MOSES_FNAME2("OxFactoredMaxentLM", OxLM<oxlm::FactoredMaxentLM>);
|
||||
#endif
|
||||
|
||||
Add("KENLM", new KenFactory());
|
||||
|
@ -10,14 +10,14 @@ if $(with-dlib) {
|
||||
dlib = ;
|
||||
}
|
||||
|
||||
with-lbllm = [ option.get "with-lbllm" ] ;
|
||||
if $(with-lbllm) {
|
||||
lbllm2 = <cxxflags>-std=c++0x <define>LM_LBL <include>$(with-lbllm)/src <include>$(with-lbllm)/third_party/eigen ;
|
||||
with-oxlm = [ option.get "with-oxlm" ] ;
|
||||
if $(with-oxlm) {
|
||||
oxlm2 = <cxxflags>-std=c++0x <define>LM_OXLM <include>$(with-oxlm)/src <include>$(with-oxlm)/third_party/eigen ;
|
||||
} else {
|
||||
lbllm2 = ;
|
||||
oxlm2 = ;
|
||||
}
|
||||
|
||||
alias headers : ../util//kenutil : : : $(max-factors) $(dlib) $(lbllm2) ;
|
||||
alias headers : ../util//kenutil : : : $(max-factors) $(dlib) $(oxlm2) ;
|
||||
alias ThreadPool : ThreadPool.cpp ;
|
||||
alias Util : Util.cpp Timer.cpp ;
|
||||
|
||||
|
@ -94,16 +94,16 @@ if $(with-nplm) {
|
||||
lmmacros += LM_NEURAL ;
|
||||
}
|
||||
|
||||
#LBLLM
|
||||
local with-lbllm = [ option.get "with-lbllm" ] ;
|
||||
if $(with-lbllm) {
|
||||
lib lbl : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
|
||||
lib murmurhash : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
|
||||
obj LBLLM.o : oxlm/LBLLM.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/third_party/eigen ;
|
||||
obj Mapper.o : oxlm/Mapper.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/third_party/eigen ;
|
||||
alias lbllm : LBLLM.o Mapper.o lbl murmurhash /top//boost_filesystem : : : <cxxflags>-std=c++0x <define>LM_LBL ;
|
||||
dependencies += lbllm ;
|
||||
lmmacros += LM_LBL ;
|
||||
#OxLM
|
||||
local with-oxlm = [ option.get "with-oxlm" ] ;
|
||||
if $(with-oxlm) {
|
||||
lib lbl : : <search>$(with-oxlm)/lib <search>$(with-oxlm)/lib64 ;
|
||||
lib murmurhash : : <search>$(with-oxlm)/lib <search>$(with-oxlm)/lib64 ;
|
||||
obj OxLM.o : oxlm/OxLM.cpp lbl ..//headers : <include>$(with-oxlm)/src <include>$(with-oxlm)/third_party/eigen ;
|
||||
obj OxLMMapper.o : oxlm/OxLMMapper.cpp lbl ..//headers : <include>$(with-oxlm)/src <include>$(with-oxlm)/third_party/eigen ;
|
||||
alias oxlm : OxLM.o OxLMMapper.o lbl murmurhash /top//boost_filesystem : : : <cxxflags>-std=c++0x <define>LM_OXLM ;
|
||||
dependencies += oxlm ;
|
||||
lmmacros += LM_OXLM ;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,67 +0,0 @@
|
||||
#include "Mapper.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
OXLMMapper::OXLMMapper(const oxlm::Dict& dict) : dict(dict)
|
||||
{
|
||||
for (int i = 0; i < dict.size(); ++i) {
|
||||
const string &str = dict.Convert(i);
|
||||
FactorCollection &fc = FactorCollection::Instance();
|
||||
const Moses::Factor *factor = fc.AddFactor(str, false);
|
||||
moses2lbl[factor] = i;
|
||||
|
||||
//add(i, TD::Convert());
|
||||
}
|
||||
|
||||
kUNKNOWN = this->dict.Convert("<unk>");
|
||||
}
|
||||
|
||||
int OXLMMapper::convert(const Moses::Factor *factor) const
|
||||
{
|
||||
Coll::const_iterator iter;
|
||||
iter = moses2lbl.find(factor);
|
||||
if (iter == moses2lbl.end()) {
|
||||
return kUNKNOWN;
|
||||
}
|
||||
else {
|
||||
int ret = iter->second;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> OXLMMapper::convert(const Phrase &phrase) const
|
||||
{
|
||||
size_t size = phrase.GetSize();
|
||||
vector<int> ret(size);
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
const Moses::Factor *factor = phrase.GetFactor(i, 0);
|
||||
int id = convert(factor);
|
||||
ret[i] = id;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void OXLMMapper::convert(const std::vector<const Word*> &contextFactor, std::vector<int> &ids, int &word) const
|
||||
{
|
||||
size_t size = contextFactor.size();
|
||||
|
||||
ids.resize(size - 1);
|
||||
|
||||
for (size_t i = 0; i < size - 1; ++i) {
|
||||
const Moses::Factor *factor = contextFactor[i]->GetFactor(0);
|
||||
int id = convert(factor);
|
||||
ids[i] = id;
|
||||
}
|
||||
std::reverse(ids.begin(), ids.end());
|
||||
|
||||
const Moses::Factor *factor = contextFactor.back()->GetFactor(0);
|
||||
word = convert(factor);
|
||||
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -1,46 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include "corpus/corpus.h"
|
||||
#include "moses/Factor.h"
|
||||
#include "moses/Phrase.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class OXLMMapper
|
||||
{
|
||||
public:
|
||||
OXLMMapper(const oxlm::Dict& dict);
|
||||
|
||||
int convert(const Moses::Factor *factor) const;
|
||||
std::vector<int> convert(const Phrase &phrase) const;
|
||||
void convert(const std::vector<const Word*> &contextFactor, std::vector<int> &ids, int &word) const;
|
||||
|
||||
private:
|
||||
void add(int lbl_id, int cdec_id);
|
||||
|
||||
oxlm::Dict dict;
|
||||
typedef std::map<const Moses::Factor*, int> Coll;
|
||||
Coll moses2lbl;
|
||||
int kUNKNOWN;
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* Wraps the feature values computed from the LBL language model.
|
||||
*/
|
||||
struct LBLFeatures {
|
||||
LBLFeatures() : LMScore(0), OOVScore(0) {}
|
||||
LBLFeatures(double lm_score, double oov_score)
|
||||
: LMScore(lm_score), OOVScore(oov_score) {}
|
||||
LBLFeatures& operator+=(const LBLFeatures& other) {
|
||||
LMScore += other.LMScore;
|
||||
OOVScore += other.OOVScore;
|
||||
return *this;
|
||||
}
|
||||
|
||||
double LMScore;
|
||||
double OOVScore;
|
||||
};
|
||||
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
#include "LBLLM.h"
|
||||
#include "OxLM.h"
|
||||
|
||||
#include <boost/archive/binary_iarchive.hpp>
|
||||
#include <boost/archive/binary_oarchive.hpp>
|
||||
@ -15,7 +15,7 @@ namespace Moses
|
||||
{
|
||||
|
||||
template<class Model>
|
||||
LBLLM<Model>::LBLLM(const string &line) : LanguageModelSingleFactor(line) {
|
||||
OxLM<Model>::OxLM(const string &line) : LanguageModelSingleFactor(line) {
|
||||
ReadParameters();
|
||||
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
@ -32,7 +32,7 @@ LBLLM<Model>::LBLLM(const string &line) : LanguageModelSingleFactor(line) {
|
||||
|
||||
|
||||
template<class Model>
|
||||
LBLLM<Model>::~LBLLM() {
|
||||
OxLM<Model>::~OxLM() {
|
||||
if (persistentCache) {
|
||||
double cache_hit_ratio = 100.0 * cacheHits / totalHits;
|
||||
cerr << "Cache hit ratio: " << cache_hit_ratio << endl;
|
||||
@ -41,7 +41,7 @@ LBLLM<Model>::~LBLLM() {
|
||||
|
||||
|
||||
template<class Model>
|
||||
void LBLLM<Model>::SetParameter(const string& key, const string& value) {
|
||||
void OxLM<Model>::SetParameter(const string& key, const string& value) {
|
||||
if (key == "persistent-cache") {
|
||||
persistentCache = Scan<bool>(value);
|
||||
} else {
|
||||
@ -50,24 +50,24 @@ void LBLLM<Model>::SetParameter(const string& key, const string& value) {
|
||||
}
|
||||
|
||||
template<class Model>
|
||||
void LBLLM<Model>::Load() {
|
||||
void OxLM<Model>::Load() {
|
||||
model.load(m_filePath);
|
||||
|
||||
Dict dict = model.getDict();
|
||||
mapper = boost::make_shared<OXLMMapper>(dict);
|
||||
boost::shared_ptr<oxlm::Vocabulary> vocab = model.getVocab();
|
||||
mapper = boost::make_shared<OxLMMapper>(vocab);
|
||||
|
||||
kSTART = dict.Convert("<s>");
|
||||
kSTOP = dict.Convert("</s>");
|
||||
kUNKNOWN = dict.Convert("<unk>");
|
||||
kSTART = vocab->convert("<s>");
|
||||
kSTOP = vocab->convert("</s>");
|
||||
kUNKNOWN = vocab->convert("<unk>");
|
||||
|
||||
size_t ngram_order = model.getConfig()->ngram_order;
|
||||
UTIL_THROW_IF2(
|
||||
m_nGramOrder != ngram_order,
|
||||
"Wrong order for LBLLM: LM has " << ngram_order << ", but Moses expects " << m_nGramOrder);
|
||||
"Wrong order for OxLM: LM has " << ngram_order << ", but Moses expects " << m_nGramOrder);
|
||||
}
|
||||
|
||||
template<class Model>
|
||||
LMResult LBLLM<Model>::GetValue(
|
||||
LMResult OxLM<Model>::GetValue(
|
||||
const vector<const Word*> &contextFactor, State* finalState) const {
|
||||
if (!cache.get()) {
|
||||
cache.reset(new QueryCache());
|
||||
@ -95,11 +95,11 @@ LMResult LBLLM<Model>::GetValue(
|
||||
score = ret.first;
|
||||
++cacheHits;
|
||||
} else {
|
||||
score = model.predict(word, context);
|
||||
score = model.getLogProb(word, context);
|
||||
cache->put(query, score);
|
||||
}
|
||||
} else {
|
||||
score = model.predict(word, context);
|
||||
score = model.getLogProb(word, context);
|
||||
}
|
||||
|
||||
LMResult ret;
|
||||
@ -119,7 +119,7 @@ LMResult LBLLM<Model>::GetValue(
|
||||
}
|
||||
|
||||
template<class Model>
|
||||
void LBLLM<Model>::InitializeForInput(const InputType& source) {
|
||||
void OxLM<Model>::InitializeForInput(const InputType& source) {
|
||||
LanguageModelSingleFactor::InitializeForInput(source);
|
||||
|
||||
if (persistentCache) {
|
||||
@ -143,7 +143,7 @@ void LBLLM<Model>::InitializeForInput(const InputType& source) {
|
||||
}
|
||||
|
||||
template<class Model>
|
||||
void LBLLM<Model>::CleanUpAfterSentenceProcessing(const InputType& source) {
|
||||
void OxLM<Model>::CleanUpAfterSentenceProcessing(const InputType& source) {
|
||||
model.clearCache();
|
||||
|
||||
if (persistentCache) {
|
||||
@ -162,9 +162,9 @@ void LBLLM<Model>::CleanUpAfterSentenceProcessing(const InputType& source) {
|
||||
LanguageModelSingleFactor::CleanUpAfterSentenceProcessing(source);
|
||||
}
|
||||
|
||||
template class LBLLM<LM>;
|
||||
template class LBLLM<FactoredLM>;
|
||||
template class LBLLM<FactoredMaxentLM>;
|
||||
template class OxLM<LM>;
|
||||
template class OxLM<FactoredLM>;
|
||||
template class OxLM<FactoredMaxentLM>;
|
||||
|
||||
}
|
||||
|
@ -6,23 +6,19 @@
|
||||
#include "moses/LM/SingleFactor.h"
|
||||
|
||||
// lbl stuff
|
||||
#include "corpus/corpus.h"
|
||||
#include "lbl/model.h"
|
||||
#include "lbl/query_cache.h"
|
||||
|
||||
#include "Mapper.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
#include "OxLMMapper.h"
|
||||
|
||||
namespace Moses {
|
||||
|
||||
template<class Model>
|
||||
class LBLLM : public LanguageModelSingleFactor
|
||||
{
|
||||
public:
|
||||
LBLLM(const std::string &line);
|
||||
class OxLM : public LanguageModelSingleFactor {
|
||||
public:
|
||||
OxLM(const std::string &line);
|
||||
|
||||
~LBLLM();
|
||||
~OxLM();
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
@ -36,9 +32,9 @@ public:
|
||||
|
||||
virtual void CleanUpAfterSentenceProcessing(const InputType& source);
|
||||
|
||||
protected:
|
||||
protected:
|
||||
Model model;
|
||||
boost::shared_ptr<OXLMMapper> mapper;
|
||||
boost::shared_ptr<OxLMMapper> mapper;
|
||||
|
||||
int kSTART;
|
||||
int kSTOP;
|
||||
@ -49,5 +45,4 @@ protected:
|
||||
mutable int cacheHits, totalHits;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
} // namespace Moses
|
38
moses/LM/oxlm/OxLMMapper.cpp
Normal file
38
moses/LM/oxlm/OxLMMapper.cpp
Normal file
@ -0,0 +1,38 @@
|
||||
#include "OxLMMapper.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses {
|
||||
|
||||
OxLMMapper::OxLMMapper(const boost::shared_ptr<oxlm::Vocabulary>& vocab) {
|
||||
for (int i = 0; i < vocab->size(); ++i) {
|
||||
const string &str = vocab->convert(i);
|
||||
FactorCollection &fc = FactorCollection::Instance();
|
||||
const Moses::Factor *factor = fc.AddFactor(str, false);
|
||||
moses2Oxlm[factor] = i;
|
||||
}
|
||||
|
||||
kUNKNOWN = vocab->convert("<unk>");
|
||||
}
|
||||
|
||||
int OxLMMapper::convert(const Moses::Factor *factor) const {
|
||||
Coll::const_iterator iter = moses2Oxlm.find(factor);
|
||||
return iter == moses2Oxlm.end() ? kUNKNOWN : iter->second;
|
||||
}
|
||||
|
||||
void OxLMMapper::convert(
|
||||
const vector<const Word*> &contextFactor,
|
||||
vector<int> &ids, int &word) const {
|
||||
ids.clear();
|
||||
for (size_t i = 0; i < contextFactor.size() - 1; ++i) {
|
||||
const Moses::Factor *factor = contextFactor[i]->GetFactor(0);
|
||||
ids.push_back(convert(factor));
|
||||
}
|
||||
reverse(ids.begin(), ids.end());
|
||||
|
||||
const Moses::Factor *factor = contextFactor.back()->GetFactor(0);
|
||||
word = convert(factor);
|
||||
}
|
||||
|
||||
} // namespace Moses
|
28
moses/LM/oxlm/OxLMMapper.h
Normal file
28
moses/LM/oxlm/OxLMMapper.h
Normal file
@ -0,0 +1,28 @@
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "lbl/vocabulary.h"
|
||||
|
||||
#include "moses/Factor.h"
|
||||
#include "moses/Phrase.h"
|
||||
|
||||
namespace Moses {
|
||||
|
||||
class OxLMMapper {
|
||||
public:
|
||||
OxLMMapper(const boost::shared_ptr<oxlm::Vocabulary>& vocab);
|
||||
|
||||
int convert(const Moses::Factor *factor) const;
|
||||
|
||||
void convert(
|
||||
const std::vector<const Word*> &contextFactor,
|
||||
std::vector<int> &ids, int &word) const;
|
||||
|
||||
private:
|
||||
typedef std::map<const Moses::Factor*, int> Coll;
|
||||
Coll moses2Oxlm;
|
||||
int kUNKNOWN;
|
||||
};
|
||||
|
||||
}
|
@ -52,14 +52,18 @@ BOOST_AUTO_TEST_CASE(manager_configure_domain_except)
|
||||
{
|
||||
//Check that configure rejects illegal domain arg combinations
|
||||
ScoreFeatureManager manager;
|
||||
vector<string> args = boost::assign::list_of("--DomainRatio")("/dev/null")("--DomainIndicator")("/dev/null");
|
||||
BOOST_CHECK_THROW(manager.configure(args), ScoreFeatureArgumentException);
|
||||
args = boost::assign::list_of("--SparseDomainSubset")("/dev/null")("--SparseDomainRatio")("/dev/null");
|
||||
BOOST_CHECK_THROW(manager.configure(args), ScoreFeatureArgumentException);
|
||||
args = boost::assign::list_of("--SparseDomainBlah")("/dev/null");
|
||||
BOOST_CHECK_THROW(manager.configure(args), ScoreFeatureArgumentException);
|
||||
args = boost::assign::list_of("--DomainSubset");
|
||||
BOOST_CHECK_THROW(manager.configure(args), ScoreFeatureArgumentException);
|
||||
BOOST_CHECK_THROW(
|
||||
manager.configure(boost::assign::list_of("--DomainRatio")("/dev/null")("--DomainIndicator")("/dev/null")),
|
||||
ScoreFeatureArgumentException);
|
||||
BOOST_CHECK_THROW(
|
||||
manager.configure(boost::assign::list_of("--SparseDomainSubset")("/dev/null")("--SparseDomainRatio")("/dev/null")),
|
||||
ScoreFeatureArgumentException);
|
||||
BOOST_CHECK_THROW(
|
||||
manager.configure(boost::assign::list_of("--SparseDomainBlah")("/dev/null")),
|
||||
ScoreFeatureArgumentException);
|
||||
BOOST_CHECK_THROW(
|
||||
manager.configure(boost::assign::list_of("--DomainSubset")),
|
||||
ScoreFeatureArgumentException);
|
||||
}
|
||||
|
||||
template <class Expected>
|
||||
|
Loading…
Reference in New Issue
Block a user