From 11809630ea23a04326a271703dc134089a90016b Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 29 Dec 2015 20:02:25 +0000 Subject: [PATCH] add PhraseDictionaryMemoryPerSentence --- contrib/other-builds/moses/.project | 44 ++++--- moses/FF/Factory.cpp | 2 + .../PhraseDictionaryMemoryPerSentence.cpp | 108 ++++++++++++++++++ .../PhraseDictionaryMemoryPerSentence.h | 42 +++++++ 4 files changed, 179 insertions(+), 17 deletions(-) create mode 100644 moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp create mode 100644 moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index 287657a11..e8651529d 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -2205,6 +2205,16 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMemory.h + + TranslationModel/PhraseDictionaryMemoryPerSentence.cpp + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp + + + TranslationModel/PhraseDictionaryMemoryPerSentence.h + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h + TranslationModel/PhraseDictionaryMultiModel.cpp 1 @@ -3500,16 +3510,16 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/ptable-describe-features.cc + + TranslationModel/UG/ptable-lookup-corpus.cc + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/ptable-lookup-corpus.cc + TranslationModel/UG/ptable-lookup.cc 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/ptable-lookup.cc - - - TranslationModel/UG/ptable-lookup-corpus.cc - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/ptable-lookup-corpus.cc - + TranslationModel/UG/sapt_phrase_scorers.h 1 @@ -5585,21 +5595,21 @@ 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup + + TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus + + + TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus.o + 1 + PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus.o + TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup.o 1 PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup.o - - - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus - - - TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus.o - 1 - PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup-corpus.o - + TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage 1 diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index cf2aebaf3..3435a6374 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -15,6 +15,7 @@ #include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h" #include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h" #include "moses/TranslationModel/ProbingPT/ProbingPT.h" +#include "moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h" #include "moses/FF/LexicalReordering/LexicalReordering.h" @@ -223,6 +224,7 @@ FeatureRegistry::FeatureRegistry() MOSES_FNAME(PhraseDictionaryDynamicCacheBased); MOSES_FNAME(PhraseDictionaryFuzzyMatch); MOSES_FNAME(ProbingPT); + MOSES_FNAME(PhraseDictionaryMemoryPerSentence); MOSES_FNAME2("RuleTable", Syntax::RuleTableFF); MOSES_FNAME2("SyntaxInputWeight", Syntax::InputWeightFF); diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp b/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp new file mode 100644 index 000000000..2cc1bb461 --- /dev/null +++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp @@ -0,0 +1,108 @@ +// vim:tabstop=2 +#include "PhraseDictionaryMemoryPerSentence.h" +#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h" + +using namespace std; + +namespace Moses +{ +PhraseDictionaryMemoryPerSentence::PhraseDictionaryMemoryPerSentence(const std::string &line) + : PhraseDictionary(line, true) +{ + ReadParameters(); +} + +void PhraseDictionaryMemoryPerSentence::Load(AllOptions::ptr const& opts) +{ + m_options = opts; + SetFeaturesToApply(); + + // don't load anything. Load when we have the input +} + +void PhraseDictionaryMemoryPerSentence::InitializeForInput(ttasksptr const& ttask) +{ + Coll &coll = GetColl(); + coll.clear(); + + string filePath = m_filePath + SPrint(ttask.get()->GetSource()->GetTranslationId()) + ".txt"; + InputFileStream strme(filePath); + + string line; + while (getline(strme, line)) { + vector toks = TokenizeMultiCharSeparator(line, "|||"); + Phrase source; + source.CreateFromString(Input, m_input, toks[0], NULL); + + TargetPhrase *target = new TargetPhrase(this); + target->CreateFromString(Output, m_output, toks[1], NULL); + + // score for this phrase table + vector scores = Tokenize(toks[2]); + std::transform(scores.begin(), scores.end(), scores.begin(),TransformScore); + std::transform(scores.begin(), scores.end(), scores.begin(),FloorScore); + target->GetScoreBreakdown().PlusEquals(this, scores); + + // score of all other ff when this rule is being loaded + target->EvaluateInIsolation(source, GetFeaturesToApply()); + + // add to coll + TargetPhraseCollection::shared_ptr &tpsPtr = coll[source]; + TargetPhraseCollection *tps = tpsPtr.get(); + if (tps == NULL) { + tps = new TargetPhraseCollection(); + tpsPtr.reset(tps); + } + tps->Add(target); + } +} + +void PhraseDictionaryMemoryPerSentence::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const +{ + InputPathList::const_iterator iter; + for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) { + InputPath &inputPath = **iter; + const Phrase &source = inputPath.GetPhrase(); + + Coll &coll = GetColl(); + Coll::const_iterator iter = coll.find(source); + if (iter == coll.end()) { + TargetPhraseCollection::shared_ptr tprPtr; + inputPath.SetTargetPhrases(*this, tprPtr, NULL); + } + else { + const TargetPhraseCollection::shared_ptr &tprPtr = iter->second; + inputPath.SetTargetPhrases(*this, tprPtr, NULL); + } + } +} + + +ChartRuleLookupManager* PhraseDictionaryMemoryPerSentence::CreateRuleLookupManager(const ChartParser &parser, + const ChartCellCollectionBase &cellCollection, + std::size_t /*maxChartSpan*/) +{ + abort(); +} + +PhraseDictionaryMemoryPerSentence::Coll &PhraseDictionaryMemoryPerSentence::GetColl() const +{ + Coll *coll; + coll = m_coll.get(); + if (coll == NULL) { + coll = new Coll; + m_coll.reset(coll); + } + assert(coll); + return *coll; +} + +TO_STRING_BODY(PhraseDictionaryMemoryPerSentence); + +// friend +ostream& operator<<(ostream& out, const PhraseDictionaryMemoryPerSentence& phraseDict) +{ + return out; +} + +} diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h b/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h new file mode 100644 index 000000000..72658a1ea --- /dev/null +++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h @@ -0,0 +1,42 @@ + +#pragma once + +#include "PhraseDictionary.h" +#include "moses/TypeDef.h" +#include "moses/TranslationTask.h" + +namespace Moses +{ +class ChartParser; +class ChartCellCollectionBase; +class ChartRuleLookupManager; + +class PhraseDictionaryMemoryPerSentence : public PhraseDictionary +{ + friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryMemoryPerSentence&); + +public: + PhraseDictionaryMemoryPerSentence(const std::string &line); + + void Load(AllOptions::ptr const& opts); + + void InitializeForInput(ttasksptr const& ttask); + + // for phrase-based model + void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; + + // for syntax/hiero model (CKY+ decoding) + ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t); + + TO_STRING(); + + +protected: + typedef boost::unordered_map Coll; + mutable boost::thread_specific_ptr m_coll; + + Coll &GetColl() const; + +}; + +} // namespace Moses