Merge ../mosesdecoder into perf_moses2

This commit is contained in:
Hieu Hoang 2015-11-18 11:47:50 +00:00
commit 91bd99e649
10 changed files with 30 additions and 26 deletions

View File

@ -34,10 +34,8 @@ public:
const std::string &GetName() const
{ return m_name; }
size_t HasVocabInd() const
virtual size_t HasVocabInd() const
{ return false; }
size_t GetVocabInd() const
{ return m_vocabInd; }
void SetVocabInd(size_t vocabInd)
{ m_vocabInd = vocabInd; }

View File

@ -21,7 +21,6 @@
#include "../TranslationModel/UnknownWordPenalty.h"
#include "../LM/LanguageModel.h"
#include "../LM/KENLM.h"
#include "../LM/KENLMBatch.h"
#include "util/exception.hh"
using namespace std;
@ -126,8 +125,8 @@ FeatureFunction *FeatureFunctions::Create(const std::string &line)
m_ffStartInd += ret->GetNumScores();
if (ret->HasVocabInd()) {
ret->SetVocabInd(m_hasVocabInd.size());
m_hasVocabInd.push_back(ret);
ret->SetVocabInd(hasVocabInd.size());
hasVocabInd.push_back(ret);
}
return ret;

View File

@ -25,7 +25,9 @@ class Scores;
class FeatureFunctions {
public:
FeatureFunctions(System &system);
std::vector<const FeatureFunction*> hasVocabInd;
FeatureFunctions(System &system);
virtual ~FeatureFunctions();
const std::vector<const StatefulFeatureFunction*> &GetStatefulFeatureFunctions() const
@ -48,7 +50,6 @@ protected:
std::vector<const FeatureFunction*> m_featureFunctions;
std::vector<const StatefulFeatureFunction*> m_statefulFeatureFunctions;
std::vector<const PhraseTable*> m_phraseTables;
std::vector<const FeatureFunction*> m_hasVocabInd;
System &m_system;
size_t m_ffStartInd;

View File

@ -36,25 +36,23 @@ struct KenLMState : public FFState {
class MappingBuilder : public lm::EnumerateVocab
{
public:
MappingBuilder(FactorCollection &factorCollection, System &system, std::vector<lm::WordIndex> &mapping)
MappingBuilder(FactorCollection &factorCollection, System &system, size_t vocabInd)
: m_factorCollection(factorCollection)
, m_system(system)
, m_mapping(mapping)
, m_vocabInd(vocabInd)
{}
void Add(lm::WordIndex index, const StringPiece &str) {
std::size_t factorId = m_factorCollection.AddFactor(str, m_system.featureFunctions)->GetId();
if (m_mapping.size() <= factorId) {
// 0 is <unk> :-)
m_mapping.resize(factorId + 1);
}
m_mapping[factorId] = index;
const Factor *factor = m_factorCollection.AddFactor(str, m_system.featureFunctions);
//cerr << "m_vocabInd=" << m_vocabInd << " ffData=" << factor->ffData.size() << endl;
factor->ffData[m_vocabInd] = (void*) index;
}
private:
FactorCollection &m_factorCollection;
System &m_system;
std::vector<lm::WordIndex> &m_mapping;
size_t m_vocabInd;
};
/////////////////////////////////////////////////////////////////
@ -81,7 +79,7 @@ void KENLM::Load(System &system)
config.messages = NULL;
FactorCollection &collection = system.vocab;
MappingBuilder builder(collection, system, m_lmIdLookup);
MappingBuilder builder(collection, system, m_vocabInd);
config.enumerate_vocab = &builder;
config.load_method = m_lazy ? util::LAZY : util::POPULATE_OR_READ;
@ -255,8 +253,9 @@ void KENLM::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore,
}
lm::WordIndex KENLM::TranslateID(const Word &word) const {
std::size_t factor = word[m_factorType]->GetId();
return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
const Factor *factor = word[m_factorType];
lm::WordIndex ret = (lm::WordIndex)(size_t) factor->ffData[m_vocabInd];
return ret;
}
// Convert last words of hypothesis into vocab ids, returning an end pointer.

View File

@ -43,6 +43,9 @@ public:
void SetParameter(const std::string& key, const std::string& value);
size_t HasVocabInd() const
{ return true; }
protected:
std::string m_path;
FactorType m_factorType;
@ -52,7 +55,6 @@ protected:
typedef lm::ngram::ProbingModel Model;
boost::shared_ptr<Model> m_ngram;
std::vector<lm::WordIndex> m_lmIdLookup;
void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const;

View File

@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <ostream>
#include <string>
#include <vector>
#include "util/string_piece.hh"
struct FactorFriend;
@ -44,6 +45,7 @@ class Factor
mutable StringPiece m_string;
size_t m_id;
//! protected constructor. only friend class, FactorCollection, is allowed to create Factor objects
Factor() {}
@ -54,6 +56,8 @@ class Factor
Factor &operator=(const Factor &factor);
public:
mutable std::vector<void*> ffData;
//! original string representation of the factor
StringPiece GetString() const {
return m_string;

View File

@ -64,6 +64,7 @@ const Factor *FactorCollection::AddFactor(const StringPiece &factorString, const
const Factor *factor = &ret.first->in;
// set vocabs for ffs
factor->ffData.resize(ffs.hasVocabInd.size(), NULL);
return factor;
}

View File

@ -52,7 +52,7 @@ BitextSampler : public Moses::reference_counter
// const members
// SPTR<bitext const> const m_bitext; // keep bitext alive while I am
// should be an
iptr<bitext const> const m_bitext; // keep bitext alive as long as I am
SPTR<bitext const> const m_bitext; // keep bitext alive as long as I am
size_t const m_plen; // length of lookup phrase
bool const m_fwd; // forward or backward direction?
SPTR<tsa const> const m_root; // root of suffix array

View File

@ -16,7 +16,7 @@ struct StatsCollector
typedef lru_cache::LRU_Cache< uint64_t, pstats > hcache_t;
typedef ThreadSafeContainer<uint64_t, SPTR<pstats> > pcache_t;
typedef map<uint64_t, SPTR<pstats> > lcache_t;
iptr<Bitext<Token> const> bitext; // underlying bitext
SPTR<Bitext<Token> const> bitext; // underlying bitext
sampling_method method; // sampling method
size_t sample_size; // sample size
SPTR<SamplingBias const> bias; // sampling bias
@ -26,7 +26,7 @@ struct StatsCollector
SPTR<lcache_t> lcache; // local cache
ug::ThreadPool* tpool; // thread pool to run jobs on
StatsCollector(iptr<Bitext<Token> > xbitext,
StatsCollector(SPTR<Bitext<Token> > xbitext,
SPTR<SamplingBias> const xbias)
: method(ranked_sampling)
, sample_size(100)

View File

@ -71,7 +71,7 @@ namespace Moses
typedef sapt::PhraseScorer<Token> pscorer;
private:
// vector<SPTR<bitext> > shards;
iptr<mmbitext> btfix;
SPTR<mmbitext> btfix;
SPTR<imbitext> btdyn;
std::string m_bname, m_extra_data, m_bias_file,m_bias_server;
std::string L1;
@ -160,7 +160,7 @@ namespace Moses
#if PROVIDES_RANKED_SAMPLING
void
set_bias_for_ranking(ttasksptr const& ttask, iptr<sapt::Bitext<Token> const> bt);
set_bias_for_ranking(ttasksptr const& ttask, SPTR<sapt::Bitext<Token> const> bt);
#endif
private: