From c854df84cb869d9e74b8ce1d7b47196c7b1a6949 Mon Sep 17 00:00:00 2001 From: Lane Schwartz Date: Sat, 12 Nov 2016 11:31:43 -0600 Subject: [PATCH] Add per-sentence on-demand translation model. This translation model reads its phrase table from a TranslationTask object's ContextScope. This data can come from, for example, a mosesserver XML-RPC client. --- moses/FF/Factory.cpp | 2 + ...aseDictionaryMemoryPerSentenceOnDemand.cpp | 145 ++++++++++++++++++ ...hraseDictionaryMemoryPerSentenceOnDemand.h | 46 ++++++ 3 files changed, 193 insertions(+) create mode 100644 moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp create mode 100644 moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index a41b8cb2c..a048410d0 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -16,6 +16,7 @@ #include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h" #include "moses/TranslationModel/ProbingPT/ProbingPT.h" #include "moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h" +#include "moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h" #include "moses/FF/LexicalReordering/LexicalReordering.h" @@ -244,6 +245,7 @@ FeatureRegistry::FeatureRegistry() MOSES_FNAME(PhraseDictionaryFuzzyMatch); MOSES_FNAME(ProbingPT); MOSES_FNAME(PhraseDictionaryMemoryPerSentence); + MOSES_FNAME(PhraseDictionaryMemoryPerSentenceOnDemand); MOSES_FNAME2("RuleTable", Syntax::RuleTableFF); MOSES_FNAME2("SyntaxInputWeight", Syntax::InputWeightFF); diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp new file mode 100644 index 000000000..db570968c --- /dev/null +++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp @@ -0,0 +1,145 @@ +// vim:tabstop=2 +#include "PhraseDictionaryMemoryPerSentenceOnDemand.h" +#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h" +#include + +using namespace std; + +namespace Moses +{ +PhraseDictionaryMemoryPerSentenceOnDemand::PhraseDictionaryMemoryPerSentenceOnDemand(const std::string &line) + : PhraseDictionary(line, true) +{ + ReadParameters(); +} + +void PhraseDictionaryMemoryPerSentenceOnDemand::Load(AllOptions::ptr const& opts) +{ + m_options = opts; + SetFeaturesToApply(); + + // don't load anything. Load when we have the input +} + + +TargetPhraseCollection::shared_ptr PhraseDictionaryMemoryPerSentenceOnDemand::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const { + + Coll &coll = GetColl(); + + return coll[source]; + +} + + +void PhraseDictionaryMemoryPerSentenceOnDemand::InitializeForInput(ttasksptr const& ttask) +{ + Coll &coll = GetColl(); + coll.clear(); + + VERBOSE(2, "Initializing PhraseDictionaryMemoryPerSentenceOnDemand " << m_description << "\n"); + + // The context scope object for this translation task + // contains a map of translation task-specific data + boost::shared_ptr contextScope = ttask->GetScope(); + + // The key to the map is this object + void const* key = static_cast(this); + + // The value stored in the map is a string representing a phrase table + boost::shared_ptr value = contextScope->get(key); + + // Create a stream to read the phrase table data + stringstream strme(*(value.get())); + + // Read the phrase table data, one line at a time + string line; + while (getline(strme, line)) { + + VERBOSE(3, "\t" << line); + + vector toks = TokenizeMultiCharSeparator(line, "|||"); + Phrase source; + source.CreateFromString(Input, m_input, toks[0], NULL); + + TargetPhrase *target = new TargetPhrase(this); + target->CreateFromString(Output, m_output, toks[1], NULL); + + // score for this phrase table + vector scores = Tokenize(toks[2]); + std::transform(scores.begin(), scores.end(), scores.begin(),TransformScore); + std::transform(scores.begin(), scores.end(), scores.begin(),FloorScore); + target->GetScoreBreakdown().PlusEquals(this, scores); + + // score of all other ff when this rule is being loaded + target->EvaluateInIsolation(source, GetFeaturesToApply()); + + // add to coll + TargetPhraseCollection::shared_ptr &tpsPtr = coll[source]; + TargetPhraseCollection *tps = tpsPtr.get(); + if (tps == NULL) { + tps = new TargetPhraseCollection(); + tpsPtr.reset(tps); + } + tps->Add(target); + } +} + +void PhraseDictionaryMemoryPerSentenceOnDemand::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const +{ + InputPathList::const_iterator iter; + for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) { + InputPath &inputPath = **iter; + const Phrase &source = inputPath.GetPhrase(); + + Coll &coll = GetColl(); + Coll::const_iterator iter = coll.find(source); + if (iter == coll.end()) { + TargetPhraseCollection::shared_ptr tprPtr; + inputPath.SetTargetPhrases(*this, tprPtr, NULL); + } else { + const TargetPhraseCollection::shared_ptr &tprPtr = iter->second; + inputPath.SetTargetPhrases(*this, tprPtr, NULL); + } + } +} + + +ChartRuleLookupManager* PhraseDictionaryMemoryPerSentenceOnDemand::CreateRuleLookupManager(const ChartParser &parser, + const ChartCellCollectionBase &cellCollection, + std::size_t /*maxChartSpan*/) +{ + abort(); +} + +PhraseDictionaryMemoryPerSentenceOnDemand::Coll &PhraseDictionaryMemoryPerSentenceOnDemand::GetColl() const +{ + Coll *coll; + coll = m_coll.get(); + if (coll == NULL) { + coll = new Coll; + m_coll.reset(coll); + } + assert(coll); + return *coll; +} + +void +PhraseDictionaryMemoryPerSentenceOnDemand::SetParameter(const std::string& key, const std::string& value) +{ + if (key == "path") { + UTIL_THROW(util::Exception, "PhraseDictionaryMemoryPerSentenceOnDemand does not support key \"path\"."); + } else { + PhraseDictionary::SetParameter(key, value); + } +} + + +TO_STRING_BODY(PhraseDictionaryMemoryPerSentenceOnDemand); + +// friend +ostream& operator<<(ostream& out, const PhraseDictionaryMemoryPerSentenceOnDemand& phraseDict) +{ + return out; +} + +} diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h new file mode 100644 index 000000000..bcda0ef77 --- /dev/null +++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h @@ -0,0 +1,46 @@ + +#pragma once + +#include "PhraseDictionary.h" +#include "moses/TypeDef.h" +#include "moses/TranslationTask.h" + +namespace Moses +{ +class ChartParser; +class ChartCellCollectionBase; +class ChartRuleLookupManager; + +class PhraseDictionaryMemoryPerSentenceOnDemand : public PhraseDictionary +{ + friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryMemoryPerSentenceOnDemand&); + +public: + PhraseDictionaryMemoryPerSentenceOnDemand(const std::string &line); + + void Load(AllOptions::ptr const& opts); + + void InitializeForInput(ttasksptr const& ttask); + + // for phrase-based model + void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; + + // for syntax/hiero model (CKY+ decoding) + ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t); + + void SetParameter(const std::string& key, const std::string& value); + + TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const; + + TO_STRING(); + + +protected: + typedef boost::unordered_map Coll; + mutable boost::thread_specific_ptr m_coll; + + Coll &GetColl() const; + +}; + +} // namespace Moses