mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 19:37:58 +03:00
Merge branch 'master' of github.com:moses-smt/mosesdecoder
This commit is contained in:
commit
5d960a2c59
@ -16,6 +16,7 @@
|
||||
#include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
|
||||
#include "moses/TranslationModel/ProbingPT/ProbingPT.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h"
|
||||
|
||||
#include "moses/FF/LexicalReordering/LexicalReordering.h"
|
||||
|
||||
@ -244,6 +245,7 @@ FeatureRegistry::FeatureRegistry()
|
||||
MOSES_FNAME(PhraseDictionaryFuzzyMatch);
|
||||
MOSES_FNAME(ProbingPT);
|
||||
MOSES_FNAME(PhraseDictionaryMemoryPerSentence);
|
||||
MOSES_FNAME(PhraseDictionaryMemoryPerSentenceOnDemand);
|
||||
MOSES_FNAME2("RuleTable", Syntax::RuleTableFF);
|
||||
MOSES_FNAME2("SyntaxInputWeight", Syntax::InputWeightFF);
|
||||
|
||||
|
@ -35,113 +35,22 @@ namespace Moses
|
||||
{
|
||||
|
||||
class FFState;
|
||||
|
||||
//LanguageModel *ConstructReloadingLM(const std::string &line);
|
||||
//LanguageModel *ConstructReloadingLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
|
||||
/*
|
||||
namespace {
|
||||
class MappingBuilder : public lm::EnumerateVocab
|
||||
{
|
||||
public:
|
||||
MappingBuilder(FactorCollection &factorCollection, std::vector<lm::WordIndex> &mapping)
|
||||
: m_factorCollection(factorCollection), m_mapping(mapping) {}
|
||||
|
||||
void Add(lm::WordIndex index, const StringPiece &str) {
|
||||
std::size_t factorId = m_factorCollection.AddFactor(str)->GetId();
|
||||
if (m_mapping.size() <= factorId) {
|
||||
// 0 is <unk> :-)
|
||||
m_mapping.resize(factorId + 1);
|
||||
}
|
||||
m_mapping[factorId] = index;
|
||||
}
|
||||
|
||||
private:
|
||||
FactorCollection &m_factorCollection;
|
||||
std::vector<lm::WordIndex> &m_mapping;
|
||||
};
|
||||
}
|
||||
*/
|
||||
template <class Model> class ReloadingLanguageModel : public LanguageModelKen<Model>
|
||||
{
|
||||
public:
|
||||
// TODO(Lane) copy less code, update to load_method
|
||||
|
||||
ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line, file, factorType, lazy ? util::LAZY : util::POPULATE_OR_READ), m_file(file), m_lazy(lazy) {
|
||||
|
||||
std::cerr << "ReloadingLM constructor: " << m_file << std::endl;
|
||||
// std::cerr << std::string(line).replace(0,11,"KENLM") << std::endl;
|
||||
VERBOSE(1, "ReloadingLM constructor: " << m_file << std::endl);
|
||||
|
||||
}
|
||||
|
||||
virtual void InitializeForInput(ttasksptr const& ttask) {
|
||||
std::cerr << "ReloadingLM InitializeForInput" << std::endl;
|
||||
// TODO(lane): load_method
|
||||
VERBOSE(1, "ReloadingLM InitializeForInput" << std::endl);
|
||||
|
||||
LanguageModelKen<Model>::LoadModel(m_file, m_lazy ? util::LAZY : util::POPULATE_OR_READ);
|
||||
/*
|
||||
lm::ngram::Config config;
|
||||
if(this->m_verbosity >= 1) {
|
||||
config.messages = &std::cerr;
|
||||
} else {
|
||||
config.messages = NULL;
|
||||
}
|
||||
FactorCollection &collection = FactorCollection::Instance();
|
||||
MappingBuilder builder(collection, m_lmIdLookup);
|
||||
config.enumerate_vocab = &builder;
|
||||
config.load_method = m_lazy ? util::LAZY : util::POPULATE_OR_READ;
|
||||
|
||||
m_ngram.reset(new Model(m_file.c_str(), config));
|
||||
|
||||
m_beginSentenceFactor = collection.AddFactor(BOS_);
|
||||
*/
|
||||
};
|
||||
|
||||
/*
|
||||
ReloadingLanguageModel(const std::string &line) : LanguageModelKen<Model>(ConstructKenLM(std::string(line).replace(0,11,"KENLM"))) {
|
||||
std::cerr << "ReloadingLM constructor" << std::endl;
|
||||
std::cerr << std::string(line).replace(0,11,"KENLM") << std::endl;
|
||||
}
|
||||
*/
|
||||
/*
|
||||
~ReloadingLanguageModel() {
|
||||
delete m_lm;
|
||||
}
|
||||
|
||||
virtual const FFState *EmptyHypothesisState(const InputType &input) const {
|
||||
return m_lm->EmptyHypothesisState(input);
|
||||
}
|
||||
|
||||
virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const {
|
||||
m_lm->CalcScore(phrase, fullScore, ngramScore, oovCount);
|
||||
}
|
||||
|
||||
virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const {
|
||||
return m_lm->EvaluateWhenApplied(hypo, ps, out);
|
||||
}
|
||||
|
||||
virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const {
|
||||
return m_lm->EvaluateWhenApplied(cur_hypo, featureID, accumulator);
|
||||
}
|
||||
|
||||
virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const {
|
||||
return m_lm->EvaluateWhenApplied(hyperedge, featureID, accumulator);
|
||||
}
|
||||
|
||||
virtual void IncrementalCallback(Incremental::Manager &manager) const {
|
||||
m_lm->IncrementalCallback(manager);
|
||||
}
|
||||
|
||||
virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const {
|
||||
m_lm->ReportHistoryOrder(out, phrase);
|
||||
}
|
||||
|
||||
virtual bool IsUseable(const FactorMask &mask) const {
|
||||
return m_lm->IsUseable(mask);
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
|
||||
LanguageModel *m_lm;
|
||||
*/
|
||||
|
||||
protected:
|
||||
|
||||
|
@ -0,0 +1,146 @@
|
||||
// vim:tabstop=2
|
||||
#include "PhraseDictionaryMemoryPerSentenceOnDemand.h"
|
||||
#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
|
||||
#include <sstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
PhraseDictionaryMemoryPerSentenceOnDemand::PhraseDictionaryMemoryPerSentenceOnDemand(const std::string &line)
|
||||
: PhraseDictionary(line, true)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
void PhraseDictionaryMemoryPerSentenceOnDemand::Load(AllOptions::ptr const& opts)
|
||||
{
|
||||
m_options = opts;
|
||||
SetFeaturesToApply();
|
||||
|
||||
// don't load anything. Load when we have the input
|
||||
}
|
||||
|
||||
|
||||
TargetPhraseCollection::shared_ptr PhraseDictionaryMemoryPerSentenceOnDemand::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const
|
||||
{
|
||||
|
||||
Coll &coll = GetColl();
|
||||
|
||||
return coll[source];
|
||||
|
||||
}
|
||||
|
||||
|
||||
void PhraseDictionaryMemoryPerSentenceOnDemand::InitializeForInput(ttasksptr const& ttask)
|
||||
{
|
||||
Coll &coll = GetColl();
|
||||
coll.clear();
|
||||
|
||||
VERBOSE(2, "Initializing PhraseDictionaryMemoryPerSentenceOnDemand " << m_description << "\n");
|
||||
|
||||
// The context scope object for this translation task
|
||||
// contains a map of translation task-specific data
|
||||
boost::shared_ptr<Moses::ContextScope> contextScope = ttask->GetScope();
|
||||
|
||||
// The key to the map is this object
|
||||
void const* key = static_cast<void const*>(this);
|
||||
|
||||
// The value stored in the map is a string representing a phrase table
|
||||
boost::shared_ptr<string> value = contextScope->get<string>(key);
|
||||
|
||||
// Create a stream to read the phrase table data
|
||||
stringstream strme(*(value.get()));
|
||||
|
||||
// Read the phrase table data, one line at a time
|
||||
string line;
|
||||
while (getline(strme, line)) {
|
||||
|
||||
VERBOSE(3, "\t" << line);
|
||||
|
||||
vector<string> toks = TokenizeMultiCharSeparator(line, "|||");
|
||||
Phrase source;
|
||||
source.CreateFromString(Input, m_input, toks[0], NULL);
|
||||
|
||||
TargetPhrase *target = new TargetPhrase(this);
|
||||
target->CreateFromString(Output, m_output, toks[1], NULL);
|
||||
|
||||
// score for this phrase table
|
||||
vector<float> scores = Tokenize<float>(toks[2]);
|
||||
std::transform(scores.begin(), scores.end(), scores.begin(),TransformScore);
|
||||
std::transform(scores.begin(), scores.end(), scores.begin(),FloorScore);
|
||||
target->GetScoreBreakdown().PlusEquals(this, scores);
|
||||
|
||||
// score of all other ff when this rule is being loaded
|
||||
target->EvaluateInIsolation(source, GetFeaturesToApply());
|
||||
|
||||
// add to coll
|
||||
TargetPhraseCollection::shared_ptr &tpsPtr = coll[source];
|
||||
TargetPhraseCollection *tps = tpsPtr.get();
|
||||
if (tps == NULL) {
|
||||
tps = new TargetPhraseCollection();
|
||||
tpsPtr.reset(tps);
|
||||
}
|
||||
tps->Add(target);
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryMemoryPerSentenceOnDemand::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
||||
{
|
||||
InputPathList::const_iterator iter;
|
||||
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
|
||||
InputPath &inputPath = **iter;
|
||||
const Phrase &source = inputPath.GetPhrase();
|
||||
|
||||
Coll &coll = GetColl();
|
||||
Coll::const_iterator iter = coll.find(source);
|
||||
if (iter == coll.end()) {
|
||||
TargetPhraseCollection::shared_ptr tprPtr;
|
||||
inputPath.SetTargetPhrases(*this, tprPtr, NULL);
|
||||
} else {
|
||||
const TargetPhraseCollection::shared_ptr &tprPtr = iter->second;
|
||||
inputPath.SetTargetPhrases(*this, tprPtr, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ChartRuleLookupManager* PhraseDictionaryMemoryPerSentenceOnDemand::CreateRuleLookupManager(const ChartParser &parser,
|
||||
const ChartCellCollectionBase &cellCollection,
|
||||
std::size_t /*maxChartSpan*/)
|
||||
{
|
||||
abort();
|
||||
}
|
||||
|
||||
PhraseDictionaryMemoryPerSentenceOnDemand::Coll &PhraseDictionaryMemoryPerSentenceOnDemand::GetColl() const
|
||||
{
|
||||
Coll *coll;
|
||||
coll = m_coll.get();
|
||||
if (coll == NULL) {
|
||||
coll = new Coll;
|
||||
m_coll.reset(coll);
|
||||
}
|
||||
assert(coll);
|
||||
return *coll;
|
||||
}
|
||||
|
||||
void
|
||||
PhraseDictionaryMemoryPerSentenceOnDemand::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "path") {
|
||||
UTIL_THROW(util::Exception, "PhraseDictionaryMemoryPerSentenceOnDemand does not support key \"path\".");
|
||||
} else {
|
||||
PhraseDictionary::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TO_STRING_BODY(PhraseDictionaryMemoryPerSentenceOnDemand);
|
||||
|
||||
// friend
|
||||
ostream& operator<<(ostream& out, const PhraseDictionaryMemoryPerSentenceOnDemand& phraseDict)
|
||||
{
|
||||
return out;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,46 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "PhraseDictionary.h"
|
||||
#include "moses/TypeDef.h"
|
||||
#include "moses/TranslationTask.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class ChartParser;
|
||||
class ChartCellCollectionBase;
|
||||
class ChartRuleLookupManager;
|
||||
|
||||
class PhraseDictionaryMemoryPerSentenceOnDemand : public PhraseDictionary
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryMemoryPerSentenceOnDemand&);
|
||||
|
||||
public:
|
||||
PhraseDictionaryMemoryPerSentenceOnDemand(const std::string &line);
|
||||
|
||||
void Load(AllOptions::ptr const& opts);
|
||||
|
||||
void InitializeForInput(ttasksptr const& ttask);
|
||||
|
||||
// for phrase-based model
|
||||
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
||||
|
||||
// for syntax/hiero model (CKY+ decoding)
|
||||
ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t);
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const;
|
||||
|
||||
TO_STRING();
|
||||
|
||||
|
||||
protected:
|
||||
typedef boost::unordered_map<Phrase, TargetPhraseCollection::shared_ptr> Coll;
|
||||
mutable boost::thread_specific_ptr<Coll> m_coll;
|
||||
|
||||
Coll &GetColl() const;
|
||||
|
||||
};
|
||||
|
||||
} // namespace Moses
|
@ -24,6 +24,8 @@ using Moses::FValue;
|
||||
using Moses::PhraseDictionaryMultiModel;
|
||||
using Moses::FindPhraseDictionary;
|
||||
using Moses::Sentence;
|
||||
using Moses::TokenizeMultiCharSeparator;
|
||||
using Moses::FeatureFunction;
|
||||
|
||||
boost::shared_ptr<TranslationRequest>
|
||||
TranslationRequest::
|
||||
@ -312,6 +314,44 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
|
||||
m_context.reset(new std::vector<std::string>(1,context));
|
||||
}
|
||||
|
||||
si = params.find("context-scope");
|
||||
if (si != params.end())
|
||||
{
|
||||
|
||||
string context = xmlrpc_c::value_string(si->second);
|
||||
|
||||
string groupSeparator("Moses::ContextScope::GroupSeparator");
|
||||
string recordSeparator("Moses::ContextScope::RecordSeparator");
|
||||
|
||||
// Here, we assume that any XML-RPC value
|
||||
// associated with the key "context-scope"
|
||||
// has the following format:
|
||||
//
|
||||
// FeatureFunctionName followed by recordSeparator
|
||||
// followed by the value of interest
|
||||
// followed by groupSeparator
|
||||
//
|
||||
// In the following code, the value of interest will be stored
|
||||
// in contextScope under the key FeatureFunctionName,
|
||||
// where FeatureFunctionName is the actual name of the feature function
|
||||
|
||||
boost::shared_ptr<Moses::ContextScope> contextScope = GetScope();
|
||||
|
||||
BOOST_FOREACH(string group, TokenizeMultiCharSeparator(context, groupSeparator)) {
|
||||
|
||||
vector<string> record = TokenizeMultiCharSeparator(group, recordSeparator);
|
||||
|
||||
// Use the feature function whose name is record[0] as a key
|
||||
FeatureFunction& ff = Moses::FeatureFunction::FindFeatureFunction(record[0]);
|
||||
void const* key = static_cast<void const*>(&ff);
|
||||
|
||||
// Store (in the context scope) record[1] as the value associated with that key
|
||||
boost::shared_ptr<string> value = contextScope->get<string>(key,true);
|
||||
value->replace(value->begin(), value->end(), record[1]);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// // biased sampling for suffix-array-based sampling phrase table?
|
||||
// if ((si = params.find("bias")) != params.end())
|
||||
|
Loading…
Reference in New Issue
Block a user