Merge ../mosesdecoder into perf_moses2

This commit is contained in:
Hieu Hoang 2016-03-28 23:58:20 +01:00
commit ff8caa1226
14 changed files with 46 additions and 40 deletions

View File

@ -3,29 +3,29 @@
namespace Moses
{
OSMLM* ConstructOSMLM(const std::string &file)
OSMLM* ConstructOSMLM(const char *file)
{
lm::ngram::ModelType model_type;
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
lm::ngram::Config config;
if (lm::ngram::RecognizeBinary(file, model_type)) {
switch(model_type) {
case lm::ngram::PROBING:
return new KenOSM<lm::ngram::ProbingModel>(file);
return new KenOSM<lm::ngram::ProbingModel>(file, config);
case lm::ngram::REST_PROBING:
return new KenOSM<lm::ngram::RestProbingModel>(file);
return new KenOSM<lm::ngram::RestProbingModel>(file, config);
case lm::ngram::TRIE:
return new KenOSM<lm::ngram::TrieModel>(file);
return new KenOSM<lm::ngram::TrieModel>(file, config);
case lm::ngram::QUANT_TRIE:
return new KenOSM<lm::ngram::QuantTrieModel>(file);
return new KenOSM<lm::ngram::QuantTrieModel>(file, config);
case lm::ngram::ARRAY_TRIE:
return new KenOSM<lm::ngram::ArrayTrieModel>(file);
return new KenOSM<lm::ngram::ArrayTrieModel>(file, config);
case lm::ngram::QUANT_ARRAY_TRIE:
return new KenOSM<lm::ngram::QuantArrayTrieModel>(file);
return new KenOSM<lm::ngram::QuantArrayTrieModel>(file, config);
default:
UTIL_THROW2("Unrecognized kenlm model type " << model_type);
}
} else {
return new KenOSM<lm::ngram::ProbingModel>(file);
return new KenOSM<lm::ngram::ProbingModel>(file, config);
}
}

View File

@ -2,7 +2,6 @@
#include <string>
#include "lm/model.hh"
#include <boost/shared_ptr.hpp>
namespace Moses
{
@ -12,7 +11,7 @@ class KenOSMBase
public:
virtual ~KenOSMBase() {}
virtual float Score(const lm::ngram::State&, const std::string&,
virtual float Score(const lm::ngram::State&, StringPiece,
lm::ngram::State&) const = 0;
virtual const lm::ngram::State &BeginSentenceState() const = 0;
@ -24,31 +23,31 @@ template <class KenModel>
class KenOSM : public KenOSMBase
{
public:
KenOSM(const std::string& file)
: m_kenlm(new KenModel(file.c_str())) {}
KenOSM(const char *file, const lm::ngram::Config &config)
: m_kenlm(file, config) {}
virtual float Score(const lm::ngram::State &in_state,
const std::string& word,
lm::ngram::State &out_state) const {
return m_kenlm->Score(in_state, m_kenlm->GetVocabulary().Index(word),
out_state);
float Score(const lm::ngram::State &in_state,
StringPiece word,
lm::ngram::State &out_state) const {
return m_kenlm.Score(in_state, m_kenlm.GetVocabulary().Index(word),
out_state);
}
virtual const lm::ngram::State &BeginSentenceState() const {
return m_kenlm->BeginSentenceState();
const lm::ngram::State &BeginSentenceState() const {
return m_kenlm.BeginSentenceState();
}
virtual const lm::ngram::State &NullContextState() const {
return m_kenlm->NullContextState();
const lm::ngram::State &NullContextState() const {
return m_kenlm.NullContextState();
}
private:
boost::shared_ptr<KenModel> m_kenlm;
KenModel m_kenlm;
};
typedef KenOSMBase OSMLM;
OSMLM* ConstructOSMLM(const std::string &file);
OSMLM* ConstructOSMLM(const char *file);
} // namespace

View File

@ -27,7 +27,7 @@ OpSequenceModel::~OpSequenceModel()
void OpSequenceModel :: readLanguageModel(const char *lmFile)
{
string unkOp = "_TRANS_SLF_";
OSM = ConstructOSMLM(m_lmPath);
OSM = ConstructOSMLM(m_lmPath.c_str());
State startState = OSM->NullContextState();
State endState;
@ -200,7 +200,7 @@ FFState* OpSequenceModel::EvaluateWhenApplied(
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
{
UTIL_THROW2("Chart decoding not support by UTIL_THROW2");
UTIL_THROW2("Chart decoding not support by OpSequenceModel");
}

View File

@ -1,4 +1,4 @@
// $Id$
// $Id$\
/***********************************************************************
Moses - factored phrase-based language decoder
@ -93,6 +93,8 @@ private:
template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string &file, util::LoadMethod load_method)
{
m_lmIdLookup.clear();
lm::ngram::Config config;
if(this->m_verbosity >= 1) {
config.messages = &std::cerr;
@ -105,13 +107,12 @@ template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string
config.load_method = load_method;
m_ngram.reset(new Model(file.c_str(), config));
m_beginSentenceFactor = collection.AddFactor(BOS_);
}
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method)
:LanguageModel(line)
,m_factorType(factorType)
,m_beginSentenceFactor(FactorCollection::Instance().AddFactor(BOS_))
{
ReadParameters();
LoadModel(file, load_method);

View File

@ -1,3 +1,4 @@
#include <set>
#include "moses/PP/TargetConstituentBoundariesLeftPhraseProperty.h"
#include "moses/FactorCollection.h"
#include "moses/Util.h"

View File

@ -3,6 +3,7 @@
#include "moses/Util.h"
#include <iostream>
#include <queue>
#include <set>
#include <ostream>
namespace Moses

View File

@ -1015,9 +1015,7 @@ ConvertWeightArgsLM()
+ "order=" + modelToks[2] + " " // order
+ "num-features=" + SPrint(numFF) + " ";
if (lmType == 9) {
featureLine += "lazyken=1 ";
} else if (lmType == 8) {
featureLine += "lazyken=0 ";
featureLine += "load=lazy ";
}
if(oovWeights.size() > lmIndex)

View File

@ -59,6 +59,7 @@ TranslationOptionCollection(ttasksptr const& ttask,
, m_maxNoTransOptPerCoverage(ttask->options()->search.max_trans_opt_per_cov)
, m_translationOptionThreshold(ttask->options()->search.trans_opt_threshold)
, m_max_phrase_length(ttask->options()->search.max_phrase_length)
, max_partial_trans_opt(ttask->options()->search.max_partial_trans_opt)
{
// create 2-d vector
size_t size = src.GetSize();
@ -391,7 +392,7 @@ CreateTranslationOptionsForRange
|| !HasXmlOptionsOverlappingRange(sPos,ePos)) {
// partial trans opt stored in here
PartialTranslOptColl* oldPtoc = new PartialTranslOptColl(m_max_phrase_length);
PartialTranslOptColl* oldPtoc = new PartialTranslOptColl(max_partial_trans_opt);
size_t totalEarlyPruned = 0;
// initial translation step

View File

@ -72,6 +72,7 @@ protected:
const size_t m_maxNoTransOptPerCoverage; /*< maximum number of translation options per input span */
const float m_translationOptionThreshold; /*< threshold for translation options with regard to best option for input span */
size_t m_max_phrase_length;
size_t max_partial_trans_opt;
std::vector<const Phrase*> m_unksrcs;
InputPathList m_inputPathQueue;

View File

@ -112,8 +112,8 @@ else {
`mkdir $OUT_DIR/TUNE`;
`cp $TUNE.$INP_EXT --reduced $OUT_DIR/TUNE/tune.$INP_EXT`;
`cp $TUNE.$OP_EXT --reduced $OUT_DIR/TUNE/tune.$OP_EXT`;
`cp $TUNE.$INP_EXT $OUT_DIR/TUNE/tune.$INP_EXT`;
`cp $TUNE.$OP_EXT $OUT_DIR/TUNE/tune.$OP_EXT`;
create_interpolated_model("");
}

View File

@ -246,7 +246,7 @@ def main(argv):
if show_weights:
sys.stdout.write(subprocess.check_output(cmd))
sys.stdout.flush()
sys.exit(0)
return
# Check inputs
if not (len(cmd) > 0 and moses_ini):

View File

@ -13,7 +13,7 @@ my %TTABLE_IMPLEMENTATION = ( 0 => "PhraseDictionaryMemory",
1 => "PhraseDictionaryBinary" ,
6 => "PhraseDictionaryMemory");
my %LM_IMPLEMENTATION = ( 0 => "SRILM",
8 => "KENLM lazyken=0" );
8 => "KENLM" );
my (%FEATURE,%WEIGHT);

View File

@ -1404,6 +1404,10 @@ sub get_featlist_from_moses {
$cmd .= "$___DECODER $___DECODER_FLAGS -config $configfn";
$cmd .= " -inputtype $___INPUTTYPE" if defined($___INPUTTYPE);
$cmd .= " -show-weights";
if (defined $___USE_MULTI_MOSES) {
# Pass moses command through multi-moses script to handle threads properly
$cmd = "$___MULTI_MOSES $cmd";
}
print STDERR "Executing: $cmd\n";
&submit_or_exec($cmd, $featlistfn, "/dev/null", 1);
}

View File

@ -2299,9 +2299,9 @@ sub create_ini {
} elsif ($type == 1) {
$type = "IRSTLM";
} elsif ($type == 8) {
$type = "KENLM lazyken=0";
$type = "KENLM";
} elsif ($type == 9) {
$type = "KENLM lazyken=1";
$type = "KENLM load=lazy";
} else {
die "Unknown numeric LM type given: $type";
}