Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Hieu Hoang 2016-11-14 23:43:50 +00:00
commit 5d960a2c59
5 changed files with 238 additions and 95 deletions

View File

@ -16,6 +16,7 @@
#include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
#include "moses/TranslationModel/ProbingPT/ProbingPT.h"
#include "moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h"
#include "moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h"
#include "moses/FF/LexicalReordering/LexicalReordering.h"
@ -244,6 +245,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(PhraseDictionaryFuzzyMatch);
MOSES_FNAME(ProbingPT);
MOSES_FNAME(PhraseDictionaryMemoryPerSentence);
MOSES_FNAME(PhraseDictionaryMemoryPerSentenceOnDemand);
MOSES_FNAME2("RuleTable", Syntax::RuleTableFF);
MOSES_FNAME2("SyntaxInputWeight", Syntax::InputWeightFF);

View File

@ -35,113 +35,22 @@ namespace Moses
{
class FFState;
//LanguageModel *ConstructReloadingLM(const std::string &line);
//LanguageModel *ConstructReloadingLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
/*
namespace {
class MappingBuilder : public lm::EnumerateVocab
{
public:
MappingBuilder(FactorCollection &factorCollection, std::vector<lm::WordIndex> &mapping)
: m_factorCollection(factorCollection), m_mapping(mapping) {}
void Add(lm::WordIndex index, const StringPiece &str) {
std::size_t factorId = m_factorCollection.AddFactor(str)->GetId();
if (m_mapping.size() <= factorId) {
// 0 is <unk> :-)
m_mapping.resize(factorId + 1);
}
m_mapping[factorId] = index;
}
private:
FactorCollection &m_factorCollection;
std::vector<lm::WordIndex> &m_mapping;
};
}
*/
template <class Model> class ReloadingLanguageModel : public LanguageModelKen<Model>
{
public:
// TODO(Lane) copy less code, update to load_method
ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line, file, factorType, lazy ? util::LAZY : util::POPULATE_OR_READ), m_file(file), m_lazy(lazy) {
std::cerr << "ReloadingLM constructor: " << m_file << std::endl;
// std::cerr << std::string(line).replace(0,11,"KENLM") << std::endl;
VERBOSE(1, "ReloadingLM constructor: " << m_file << std::endl);
}
virtual void InitializeForInput(ttasksptr const& ttask) {
std::cerr << "ReloadingLM InitializeForInput" << std::endl;
// TODO(lane): load_method
VERBOSE(1, "ReloadingLM InitializeForInput" << std::endl);
LanguageModelKen<Model>::LoadModel(m_file, m_lazy ? util::LAZY : util::POPULATE_OR_READ);
/*
lm::ngram::Config config;
if(this->m_verbosity >= 1) {
config.messages = &std::cerr;
} else {
config.messages = NULL;
}
FactorCollection &collection = FactorCollection::Instance();
MappingBuilder builder(collection, m_lmIdLookup);
config.enumerate_vocab = &builder;
config.load_method = m_lazy ? util::LAZY : util::POPULATE_OR_READ;
m_ngram.reset(new Model(m_file.c_str(), config));
m_beginSentenceFactor = collection.AddFactor(BOS_);
*/
};
/*
ReloadingLanguageModel(const std::string &line) : LanguageModelKen<Model>(ConstructKenLM(std::string(line).replace(0,11,"KENLM"))) {
std::cerr << "ReloadingLM constructor" << std::endl;
std::cerr << std::string(line).replace(0,11,"KENLM") << std::endl;
}
*/
/*
~ReloadingLanguageModel() {
delete m_lm;
}
virtual const FFState *EmptyHypothesisState(const InputType &input) const {
return m_lm->EmptyHypothesisState(input);
}
virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const {
m_lm->CalcScore(phrase, fullScore, ngramScore, oovCount);
}
virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const {
return m_lm->EvaluateWhenApplied(hypo, ps, out);
}
virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const {
return m_lm->EvaluateWhenApplied(cur_hypo, featureID, accumulator);
}
virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const {
return m_lm->EvaluateWhenApplied(hyperedge, featureID, accumulator);
}
virtual void IncrementalCallback(Incremental::Manager &manager) const {
m_lm->IncrementalCallback(manager);
}
virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const {
m_lm->ReportHistoryOrder(out, phrase);
}
virtual bool IsUseable(const FactorMask &mask) const {
return m_lm->IsUseable(mask);
}
private:
LanguageModel *m_lm;
*/
protected:

View File

@ -0,0 +1,146 @@
// vim:tabstop=2
#include "PhraseDictionaryMemoryPerSentenceOnDemand.h"
#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
#include <sstream>
using namespace std;
namespace Moses
{
PhraseDictionaryMemoryPerSentenceOnDemand::PhraseDictionaryMemoryPerSentenceOnDemand(const std::string &line)
: PhraseDictionary(line, true)
{
ReadParameters();
}
void PhraseDictionaryMemoryPerSentenceOnDemand::Load(AllOptions::ptr const& opts)
{
m_options = opts;
SetFeaturesToApply();
// don't load anything. Load when we have the input
}
TargetPhraseCollection::shared_ptr PhraseDictionaryMemoryPerSentenceOnDemand::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const
{
Coll &coll = GetColl();
return coll[source];
}
void PhraseDictionaryMemoryPerSentenceOnDemand::InitializeForInput(ttasksptr const& ttask)
{
Coll &coll = GetColl();
coll.clear();
VERBOSE(2, "Initializing PhraseDictionaryMemoryPerSentenceOnDemand " << m_description << "\n");
// The context scope object for this translation task
// contains a map of translation task-specific data
boost::shared_ptr<Moses::ContextScope> contextScope = ttask->GetScope();
// The key to the map is this object
void const* key = static_cast<void const*>(this);
// The value stored in the map is a string representing a phrase table
boost::shared_ptr<string> value = contextScope->get<string>(key);
// Create a stream to read the phrase table data
stringstream strme(*(value.get()));
// Read the phrase table data, one line at a time
string line;
while (getline(strme, line)) {
VERBOSE(3, "\t" << line);
vector<string> toks = TokenizeMultiCharSeparator(line, "|||");
Phrase source;
source.CreateFromString(Input, m_input, toks[0], NULL);
TargetPhrase *target = new TargetPhrase(this);
target->CreateFromString(Output, m_output, toks[1], NULL);
// score for this phrase table
vector<float> scores = Tokenize<float>(toks[2]);
std::transform(scores.begin(), scores.end(), scores.begin(),TransformScore);
std::transform(scores.begin(), scores.end(), scores.begin(),FloorScore);
target->GetScoreBreakdown().PlusEquals(this, scores);
// score of all other ff when this rule is being loaded
target->EvaluateInIsolation(source, GetFeaturesToApply());
// add to coll
TargetPhraseCollection::shared_ptr &tpsPtr = coll[source];
TargetPhraseCollection *tps = tpsPtr.get();
if (tps == NULL) {
tps = new TargetPhraseCollection();
tpsPtr.reset(tps);
}
tps->Add(target);
}
}
void PhraseDictionaryMemoryPerSentenceOnDemand::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
{
InputPathList::const_iterator iter;
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
InputPath &inputPath = **iter;
const Phrase &source = inputPath.GetPhrase();
Coll &coll = GetColl();
Coll::const_iterator iter = coll.find(source);
if (iter == coll.end()) {
TargetPhraseCollection::shared_ptr tprPtr;
inputPath.SetTargetPhrases(*this, tprPtr, NULL);
} else {
const TargetPhraseCollection::shared_ptr &tprPtr = iter->second;
inputPath.SetTargetPhrases(*this, tprPtr, NULL);
}
}
}
ChartRuleLookupManager* PhraseDictionaryMemoryPerSentenceOnDemand::CreateRuleLookupManager(const ChartParser &parser,
const ChartCellCollectionBase &cellCollection,
std::size_t /*maxChartSpan*/)
{
abort();
}
PhraseDictionaryMemoryPerSentenceOnDemand::Coll &PhraseDictionaryMemoryPerSentenceOnDemand::GetColl() const
{
Coll *coll;
coll = m_coll.get();
if (coll == NULL) {
coll = new Coll;
m_coll.reset(coll);
}
assert(coll);
return *coll;
}
void
PhraseDictionaryMemoryPerSentenceOnDemand::SetParameter(const std::string& key, const std::string& value)
{
if (key == "path") {
UTIL_THROW(util::Exception, "PhraseDictionaryMemoryPerSentenceOnDemand does not support key \"path\".");
} else {
PhraseDictionary::SetParameter(key, value);
}
}
TO_STRING_BODY(PhraseDictionaryMemoryPerSentenceOnDemand);
// friend
ostream& operator<<(ostream& out, const PhraseDictionaryMemoryPerSentenceOnDemand& phraseDict)
{
return out;
}
}

View File

@ -0,0 +1,46 @@
#pragma once
#include "PhraseDictionary.h"
#include "moses/TypeDef.h"
#include "moses/TranslationTask.h"
namespace Moses
{
class ChartParser;
class ChartCellCollectionBase;
class ChartRuleLookupManager;
class PhraseDictionaryMemoryPerSentenceOnDemand : public PhraseDictionary
{
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryMemoryPerSentenceOnDemand&);
public:
PhraseDictionaryMemoryPerSentenceOnDemand(const std::string &line);
void Load(AllOptions::ptr const& opts);
void InitializeForInput(ttasksptr const& ttask);
// for phrase-based model
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
// for syntax/hiero model (CKY+ decoding)
ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t);
void SetParameter(const std::string& key, const std::string& value);
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const;
TO_STRING();
protected:
typedef boost::unordered_map<Phrase, TargetPhraseCollection::shared_ptr> Coll;
mutable boost::thread_specific_ptr<Coll> m_coll;
Coll &GetColl() const;
};
} // namespace Moses

View File

@ -24,6 +24,8 @@ using Moses::FValue;
using Moses::PhraseDictionaryMultiModel;
using Moses::FindPhraseDictionary;
using Moses::Sentence;
using Moses::TokenizeMultiCharSeparator;
using Moses::FeatureFunction;
boost::shared_ptr<TranslationRequest>
TranslationRequest::
@ -312,6 +314,44 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
m_context.reset(new std::vector<std::string>(1,context));
}
si = params.find("context-scope");
if (si != params.end())
{
string context = xmlrpc_c::value_string(si->second);
string groupSeparator("Moses::ContextScope::GroupSeparator");
string recordSeparator("Moses::ContextScope::RecordSeparator");
// Here, we assume that any XML-RPC value
// associated with the key "context-scope"
// has the following format:
//
// FeatureFunctionName followed by recordSeparator
// followed by the value of interest
// followed by groupSeparator
//
// In the following code, the value of interest will be stored
// in contextScope under the key FeatureFunctionName,
// where FeatureFunctionName is the actual name of the feature function
boost::shared_ptr<Moses::ContextScope> contextScope = GetScope();
BOOST_FOREACH(string group, TokenizeMultiCharSeparator(context, groupSeparator)) {
vector<string> record = TokenizeMultiCharSeparator(group, recordSeparator);
// Use the feature function whose name is record[0] as a key
FeatureFunction& ff = Moses::FeatureFunction::FindFeatureFunction(record[0]);
void const* key = static_cast<void const*>(&ff);
// Store (in the context scope) record[1] as the value associated with that key
boost::shared_ptr<string> value = contextScope->get<string>(key,true);
value->replace(value->begin(), value->end(), record[1]);
}
}
// // biased sampling for suffix-array-based sampling phrase table?
// if ((si = params.find("bias")) != params.end())