From bdb0227ee9648ea96e8ee266d32581c63762b8b0 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Sun, 18 Oct 2015 21:27:58 +0100 Subject: [PATCH] Life cycle of TargetPhraseCollection is now managed via shared pointers. --- OnDiskPt/PhraseNode.cpp | 14 +- OnDiskPt/PhraseNode.h | 5 +- OnDiskPt/TargetPhraseCollection.cpp | 13 +- OnDiskPt/TargetPhraseCollection.h | 7 +- OnDiskPt/queryOnDiskPt.cpp | 2 +- defer/PhraseDictionaryInterpolated.cpp | 4 +- defer/PhraseDictionaryInterpolated.h | 4 +- moses/ChartParser.cpp | 2 +- moses/ChartParser.h | 6 +- moses/ChartParserCallback.h | 3 +- moses/ChartTranslationOptionList.cpp | 8 +- moses/ChartTranslationOptionList.h | 2 +- moses/DecodeStepTranslation.cpp | 11 +- moses/DecodeStepTranslation.h | 4 +- moses/Incremental.cpp | 4 +- moses/InputPath.cpp | 57 +++--- moses/InputPath.h | 23 ++- moses/PDTAimp.cpp | 42 ++-- moses/PDTAimp.h | 8 +- moses/Syntax/F2S/GlueRuleSynthesizer.cpp | 6 +- moses/Syntax/F2S/HyperTree.cpp | 6 +- moses/Syntax/F2S/HyperTree.h | 17 +- moses/Syntax/F2S/HyperTreeCreator.h | 2 +- moses/Syntax/F2S/HyperTreeLoader.cpp | 6 +- moses/Syntax/F2S/RuleMatcherHyperTree-inl.h | 4 +- moses/Syntax/PLabel.h | 2 +- moses/Syntax/S2T/OovHandler-inl.h | 7 +- .../RecursiveCYKPlusParser-inl.h | 6 +- .../S2T/Parsers/Scope3Parser/Parser-inl.h | 9 +- .../Scope3Parser/TailLatticeSearcher.h | 7 +- moses/Syntax/S2T/RuleTrie.h | 7 +- moses/Syntax/S2T/RuleTrieCYKPlus.cpp | 11 +- moses/Syntax/S2T/RuleTrieCYKPlus.h | 17 +- moses/Syntax/S2T/RuleTrieCreator.h | 5 +- moses/Syntax/S2T/RuleTrieLoader.cpp | 7 +- moses/Syntax/S2T/RuleTrieScope3.cpp | 23 ++- moses/Syntax/S2T/RuleTrieScope3.h | 12 +- moses/Syntax/SHyperedgeBundle.h | 2 +- moses/Syntax/T2S/GlueRuleSynthesizer.cpp | 6 +- moses/Syntax/T2S/HyperTree.h | 6 +- moses/Syntax/T2S/RuleMatcherSCFG-inl.h | 2 +- moses/Syntax/T2S/RuleTrie.cpp | 52 +++-- moses/Syntax/T2S/RuleTrie.h | 18 +- moses/Syntax/T2S/RuleTrieCreator.h | 2 +- moses/Syntax/T2S/RuleTrieLoader.cpp | 10 +- moses/TargetPhraseCollection.h | 6 + .../ChartRuleLookupManagerMemory.cpp | 6 +- ...hartRuleLookupManagerMemoryPerSentence.cpp | 7 +- .../ChartRuleLookupManagerOnDisk.cpp | 23 ++- .../ChartRuleLookupManagerOnDisk.h | 2 +- .../ChartRuleLookupManagerSkeleton.cpp | 8 +- .../ChartRuleLookupManagerSkeleton.h | 2 +- .../CYKPlusParser/CompletedRuleCollection.h | 2 +- .../CompactPT/PhraseDictionaryCompact.cpp | 22 +- .../CompactPT/PhraseDictionaryCompact.h | 6 +- moses/TranslationModel/PhraseDictionary.cpp | 75 +++---- moses/TranslationModel/PhraseDictionary.h | 55 ++--- .../PhraseDictionaryDynamicCacheBased.cpp | 38 ++-- .../PhraseDictionaryDynamicCacheBased.h | 13 +- .../PhraseDictionaryGroup.cpp | 49 +++-- .../TranslationModel/PhraseDictionaryGroup.h | 11 +- .../PhraseDictionaryMemory.cpp | 24 +-- .../TranslationModel/PhraseDictionaryMemory.h | 20 +- .../PhraseDictionaryMultiModel.cpp | 129 +++++++----- .../PhraseDictionaryMultiModel.h | 76 +++++-- .../PhraseDictionaryMultiModelCounts.cpp | 68 ++++--- .../PhraseDictionaryMultiModelCounts.h | 16 +- .../PhraseDictionaryNodeMemory.cpp | 9 +- .../PhraseDictionaryNodeMemory.h | 13 +- .../PhraseDictionaryTransliteration.cpp | 16 +- .../PhraseDictionaryTreeAdaptor.cpp | 18 +- .../PhraseDictionaryTreeAdaptor.h | 7 +- .../TranslationModel/ProbingPT/ProbingPT.cpp | 8 +- moses/TranslationModel/RuleTable/Loader.h | 14 +- .../RuleTable/LoaderCompact.cpp | 7 +- .../RuleTable/LoaderStandard.cpp | 6 +- .../RuleTable/PhraseDictionaryFuzzyMatch.cpp | 10 +- .../RuleTable/PhraseDictionaryFuzzyMatch.h | 4 +- .../RuleTable/PhraseDictionaryOnDisk.cpp | 47 +++-- .../RuleTable/PhraseDictionaryOnDisk.h | 7 +- moses/TranslationModel/RuleTable/Trie.h | 7 +- moses/TranslationModel/RuleTable/UTrie.cpp | 7 +- moses/TranslationModel/RuleTable/UTrie.h | 8 +- .../TranslationModel/RuleTable/UTrieNode.cpp | 14 +- moses/TranslationModel/RuleTable/UTrieNode.h | 8 +- .../TranslationModel/Scope3Parser/Parser.cpp | 11 +- moses/TranslationModel/Scope3Parser/Parser.h | 11 +- moses/TranslationModel/SkeletonPT.cpp | 5 +- .../UG/TargetPhraseCollectionCache.cc | 188 +++--------------- .../UG/TargetPhraseCollectionCache.h | 58 ++---- moses/TranslationModel/UG/mmsapt.cpp | 63 +++--- moses/TranslationModel/UG/mmsapt.h | 19 +- moses/TranslationModel/UG/ptable-lookup.cc | 5 +- moses/TranslationOptionCollection.cpp | 4 +- moses/TranslationOptionCollectionLattice.cpp | 3 +- 95 files changed, 896 insertions(+), 810 deletions(-) diff --git a/OnDiskPt/PhraseNode.cpp b/OnDiskPt/PhraseNode.cpp index 8e50147b2..74e01d457 100644 --- a/OnDiskPt/PhraseNode.cpp +++ b/OnDiskPt/PhraseNode.cpp @@ -249,16 +249,12 @@ size_t PhraseNode::ReadChild(Word &wordFound, uint64_t &childFilePos, const char return memRead; } -const TargetPhraseCollection *PhraseNode::GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const +TargetPhraseCollection::shared_ptr +PhraseNode:: +GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const { - TargetPhraseCollection *ret = new TargetPhraseCollection(); - - if (m_value > 0) - ret->ReadFromFile(tableLimit, m_value, onDiskWrapper); - else { - - } - + TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection); + if (m_value > 0) ret->ReadFromFile(tableLimit, m_value, onDiskWrapper); return ret; } diff --git a/OnDiskPt/PhraseNode.h b/OnDiskPt/PhraseNode.h index 901852952..39b9b3f21 100644 --- a/OnDiskPt/PhraseNode.h +++ b/OnDiskPt/PhraseNode.h @@ -92,8 +92,11 @@ public: } const PhraseNode *GetChild(const Word &wordSought, OnDiskWrapper &onDiskWrapper) const; - const TargetPhraseCollection *GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const; + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollection(size_t tableLimit, + OnDiskWrapper &onDiskWrapper) const; + void AddCounts(const std::vector &counts) { m_counts = counts; } diff --git a/OnDiskPt/TargetPhraseCollection.cpp b/OnDiskPt/TargetPhraseCollection.cpp index 73ad2540c..a22c3633b 100644 --- a/OnDiskPt/TargetPhraseCollection.cpp +++ b/OnDiskPt/TargetPhraseCollection.cpp @@ -114,23 +114,22 @@ void TargetPhraseCollection::Save(OnDiskWrapper &onDiskWrapper) } -Moses::TargetPhraseCollection *TargetPhraseCollection::ConvertToMoses(const std::vector &inputFactors +Moses::TargetPhraseCollection::shared_ptr TargetPhraseCollection::ConvertToMoses(const std::vector &inputFactors , const std::vector &outputFactors , const Moses::PhraseDictionary &phraseDict , const std::vector &weightT , Vocab &vocab , bool isSyntax) const { - Moses::TargetPhraseCollection *ret = new Moses::TargetPhraseCollection(); + Moses::TargetPhraseCollection::shared_ptr ret; + ret.reset(new Moses::TargetPhraseCollection); CollType::const_iterator iter; for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) { const TargetPhrase &tp = **iter; - Moses::TargetPhrase *mosesPhrase = tp.ConvertToMoses(inputFactors, outputFactors - , vocab - , phraseDict - , weightT - , isSyntax); + Moses::TargetPhrase *mosesPhrase + = tp.ConvertToMoses(inputFactors, outputFactors, vocab, + phraseDict, weightT, isSyntax); /* // debugging output diff --git a/OnDiskPt/TargetPhraseCollection.h b/OnDiskPt/TargetPhraseCollection.h index 227a0ffc2..255c94054 100644 --- a/OnDiskPt/TargetPhraseCollection.h +++ b/OnDiskPt/TargetPhraseCollection.h @@ -21,6 +21,8 @@ #include "TargetPhrase.h" #include "Vocab.h" +#include "moses/TargetPhraseCollection.h" +#include namespace Moses { @@ -50,6 +52,9 @@ protected: std::string m_debugStr; public: + typedef boost::shared_ptr shared_const_ptr; + typedef boost::shared_ptr shared_ptr; + static size_t s_sortScoreInd; TargetPhraseCollection(); @@ -69,7 +74,7 @@ public: uint64_t GetFilePos() const; - Moses::TargetPhraseCollection *ConvertToMoses(const std::vector &inputFactors + Moses::TargetPhraseCollection::shared_ptr ConvertToMoses(const std::vector &inputFactors , const std::vector &outputFactors , const Moses::PhraseDictionary &phraseDict , const std::vector &weightT diff --git a/OnDiskPt/queryOnDiskPt.cpp b/OnDiskPt/queryOnDiskPt.cpp index 77576d956..1eeb65d9a 100644 --- a/OnDiskPt/queryOnDiskPt.cpp +++ b/OnDiskPt/queryOnDiskPt.cpp @@ -56,7 +56,7 @@ int main(int argc, char **argv) if (node) { // source phrase points to a bunch of rules - const TargetPhraseCollection *coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper); + TargetPhraseCollection::shared_ptr coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper); string str = coll->GetDebugStr(); cout << "Found " << coll->GetSize() << endl; diff --git a/defer/PhraseDictionaryInterpolated.cpp b/defer/PhraseDictionaryInterpolated.cpp index 83abf73ba..6b3d9cf0b 100644 --- a/defer/PhraseDictionaryInterpolated.cpp +++ b/defer/PhraseDictionaryInterpolated.cpp @@ -116,7 +116,7 @@ typedef boost::unordered_set PhraseSet; -const TargetPhraseCollection* +TargetPhraseCollection::shared_ptr PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const { @@ -125,7 +125,7 @@ PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const PhraseSet allPhrases; vector phrasesByTable(m_dictionaries.size()); for (size_t i = 0; i < m_dictionaries.size(); ++i) { - const TargetPhraseCollection* phrases = m_dictionaries[i]->GetTargetPhraseCollection(src); + TargetPhraseCollection::shared_ptr phrases = m_dictionaries[i]->GetTargetPhraseCollection(src); if (phrases) { for (TargetPhraseCollection::const_iterator j = phrases->begin(); j != phrases->end(); ++j) { diff --git a/defer/PhraseDictionaryInterpolated.h b/defer/PhraseDictionaryInterpolated.h index aee1de6fa..dea00d892 100644 --- a/defer/PhraseDictionaryInterpolated.h +++ b/defer/PhraseDictionaryInterpolated.h @@ -52,7 +52,7 @@ public: , const LMList &languageModels , float weightWP); - virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const; + virtual TargetPhraseCollection::shared_ptr GetTargetPhraseCollection(const Phrase& src) const; virtual void InitializeForInput(ttasksptr const& ttask); virtual ChartRuleLookupManager *CreateRuleLookupManager( const InputType &, @@ -65,7 +65,7 @@ private: typedef boost::shared_ptr DictionaryHandle; std::vector m_dictionaries; std::vector > m_weights; //feature x table - mutable TargetPhraseCollection* m_targetPhrases; + mutable TargetPhraseCollection::shared_ptr m_targetPhrases; std::vector m_weightT; size_t m_tableLimit; const LMList* m_languageModels; diff --git a/moses/ChartParser.cpp b/moses/ChartParser.cpp index 966e69a7e..b129419f6 100644 --- a/moses/ChartParser.cpp +++ b/moses/ChartParser.cpp @@ -44,7 +44,7 @@ ChartParserUnknown ChartParserUnknown::~ChartParserUnknown() { RemoveAllInColl(m_unksrcs); - RemoveAllInColl(m_cacheTargetPhraseCollection); + // RemoveAllInColl(m_cacheTargetPhraseCollection); } void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range, ChartParserCallback &to) diff --git a/moses/ChartParser.h b/moses/ChartParser.h index 372a05f60..2fb25dd0c 100644 --- a/moses/ChartParser.h +++ b/moses/ChartParser.h @@ -27,7 +27,7 @@ #include "WordsRange.h" #include "StackVec.h" #include "InputPath.h" - +#include "TargetPhraseCollection.h" namespace Moses { @@ -38,7 +38,7 @@ class Sentence; class ChartCellCollectionBase; class Word; class Phrase; -class TargetPhraseCollection; + // class TargetPhraseCollection; class DecodeGraph; class ChartParserUnknown @@ -56,7 +56,7 @@ public: private: std::vector m_unksrcs; - std::list m_cacheTargetPhraseCollection; + std::list m_cacheTargetPhraseCollection; }; class ChartParser diff --git a/moses/ChartParserCallback.h b/moses/ChartParserCallback.h index 9b03e1f5b..7c53654a6 100644 --- a/moses/ChartParserCallback.h +++ b/moses/ChartParserCallback.h @@ -3,6 +3,7 @@ #include "StackVec.h" #include +#include "TargetPhraseCollection.h" namespace Moses { @@ -23,7 +24,7 @@ public: virtual bool Empty() const = 0; - virtual void AddPhraseOOV(TargetPhrase &phrase, std::list &waste_memory, const WordsRange &range) = 0; + virtual void AddPhraseOOV(TargetPhrase &phrase, std::list &waste_memory, const WordsRange &range) = 0; virtual void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath) = 0; diff --git a/moses/ChartTranslationOptionList.cpp b/moses/ChartTranslationOptionList.cpp index 8d3d9b3ab..8af2b124d 100644 --- a/moses/ChartTranslationOptionList.cpp +++ b/moses/ChartTranslationOptionList.cpp @@ -115,9 +115,13 @@ void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc, } } -void ChartTranslationOptionList::AddPhraseOOV(TargetPhrase &phrase, std::list &waste_memory, const WordsRange &range) +void +ChartTranslationOptionList:: +AddPhraseOOV(TargetPhrase &phrase, + std::list &waste_memory, + const WordsRange &range) { - TargetPhraseCollection *tpc = new TargetPhraseCollection(); + TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection); tpc->Add(&phrase); waste_memory.push_back(tpc); StackVec empty; diff --git a/moses/ChartTranslationOptionList.h b/moses/ChartTranslationOptionList.h index 4723bdd1d..c8ca5760c 100644 --- a/moses/ChartTranslationOptionList.h +++ b/moses/ChartTranslationOptionList.h @@ -55,7 +55,7 @@ public: void Add(const TargetPhraseCollection &, const StackVec &, const WordsRange &); - void AddPhraseOOV(TargetPhrase &phrase, std::list &waste_memory, const WordsRange &range); + void AddPhraseOOV(TargetPhrase &phrase, std::list &waste_memory, const WordsRange &range); bool Empty() const { return m_size == 0; diff --git a/moses/DecodeStepTranslation.cpp b/moses/DecodeStepTranslation.cpp index 034c06fc2..f2a5722e1 100644 --- a/moses/DecodeStepTranslation.cpp +++ b/moses/DecodeStepTranslation.cpp @@ -49,7 +49,7 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO , PartialTranslOptColl &outputPartialTranslOptColl , TranslationOptionCollection *toc , bool adhereTableLimit - , const TargetPhraseCollection *phraseColl) const + , TargetPhraseCollection::shared_ptr phraseColl) const { if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) { // word deletion @@ -105,7 +105,7 @@ void DecodeStepTranslation::ProcessInitialTranslation( ,PartialTranslOptColl &outputPartialTranslOptColl , size_t startPos, size_t endPos, bool adhereTableLimit , const InputPath &inputPath - , const TargetPhraseCollection *phraseColl) const + , TargetPhraseCollection::shared_ptr phraseColl) const { const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature(); const size_t tableLimit = phraseDictionary->GetTableLimit(); @@ -147,7 +147,8 @@ void DecodeStepTranslation::ProcessInitialTranslationLEGACY( const size_t tableLimit = phraseDictionary->GetTableLimit(); const WordsRange wordsRange(startPos, endPos); - const TargetPhraseCollectionWithSourcePhrase *phraseColl = phraseDictionary->GetTargetPhraseCollectionLEGACY(source,wordsRange); + TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl + = phraseDictionary->GetTargetPhraseCollectionLEGACY(source,wordsRange); if (phraseColl != NULL) { IFVERBOSE(3) { @@ -237,8 +238,8 @@ ProcessLEGACY(TranslationOption const& in, size_t const currSize = inPhrase.GetSize(); size_t const tableLimit = pdict->GetTableLimit(); - TargetPhraseCollectionWithSourcePhrase const* phraseColl; - phraseColl = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange); + TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl + = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange); if (phraseColl != NULL) { TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd; diff --git a/moses/DecodeStepTranslation.h b/moses/DecodeStepTranslation.h index 2d381e219..eceebb940 100644 --- a/moses/DecodeStepTranslation.h +++ b/moses/DecodeStepTranslation.h @@ -48,7 +48,7 @@ public: , PartialTranslOptColl &outputPartialTranslOptColl , TranslationOptionCollection *toc , bool adhereTableLimit - , const TargetPhraseCollection *phraseColl) const; + , TargetPhraseCollection::shared_ptr phraseColl) const; /*! initialize list of partial translation options by applying the first translation step @@ -58,7 +58,7 @@ public: , PartialTranslOptColl &outputPartialTranslOptColl , size_t startPos, size_t endPos, bool adhereTableLimit , const InputPath &inputPath - , const TargetPhraseCollection *phraseColl) const; + , TargetPhraseCollection::shared_ptr phraseColl) const; // legacy void ProcessInitialTranslationLEGACY(const InputType &source diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp index d1eb3b532..13ed1cb59 100644 --- a/moses/Incremental.cpp +++ b/moses/Incremental.cpp @@ -83,7 +83,7 @@ public: void Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &ignored); - void AddPhraseOOV(TargetPhrase &phrase, std::list &waste_memory, const WordsRange &range); + void AddPhraseOOV(TargetPhrase &phrase, std::list &waste_memory, const WordsRange &range); float GetBestScore(const ChartCellLabel *chartCell) const; @@ -160,7 +160,7 @@ template void Fill::Add(const TargetPhraseCollection &targe } } -template void Fill::AddPhraseOOV(TargetPhrase &phrase, std::list &, const WordsRange &range) +template void Fill::AddPhraseOOV(TargetPhrase &phrase, std::list &, const WordsRange &range) { std::vector words; UTIL_THROW_IF2(phrase.GetSize() > 1, diff --git a/moses/InputPath.cpp b/moses/InputPath.cpp index dfa306085..504b84605 100644 --- a/moses/InputPath.cpp +++ b/moses/InputPath.cpp @@ -39,34 +39,40 @@ InputPath::~InputPath() // std::cerr << "Deconstructing InputPath" << std::endl; - // Since there is no way for the Phrase Dictionaries to tell in - // which (sentence) context phrases were looked up, we tell them - // now that the phrase isn't needed any more by this inputPath - typedef std::pair entry; - std::map::iterator iter; - ttasksptr theTask = this->ttask.lock(); - for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter) - { - // std::cerr << iter->second.first << " decommissioned." << std::endl; - iter->first->Release(theTask, iter->second.first); - } + + // // NOT NEEDED ANY MORE SINCE THE SWITCH TO SHARED POINTERS + // // Since there is no way for the Phrase Dictionaries to tell in + // // which (sentence) context phrases were looked up, we tell them + // // now that the phrase isn't needed any more by this inputPath + // typedef std::pair, const void* > entry; + // std::map::iterator iter; + // ttasksptr theTask = this->ttask.lock(); + // for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter) + // { + // // std::cerr << iter->second.first << " decommissioned." << std::endl; + // iter->first->Release(theTask, iter->second.first); + // } delete m_inputScore; } -const TargetPhraseCollection *InputPath::GetTargetPhrases(const PhraseDictionary &phraseDictionary) const +TargetPhraseCollection::shared_ptr +InputPath:: +GetTargetPhrases(const PhraseDictionary &phraseDictionary) const { - std::map >::const_iterator iter; + TargetPhrases::const_iterator iter; iter = m_targetPhrases.find(&phraseDictionary); if (iter == m_targetPhrases.end()) { - return NULL; + return TargetPhraseCollection::shared_ptr(); } return iter->second.first; } -const void *InputPath::GetPtNode(const PhraseDictionary &phraseDictionary) const +const void* +InputPath:: +GetPtNode(const PhraseDictionary &phraseDictionary) const { - std::map >::const_iterator iter; + TargetPhrases::const_iterator iter; iter = m_targetPhrases.find(&phraseDictionary); if (iter == m_targetPhrases.end()) { return NULL; @@ -74,11 +80,14 @@ const void *InputPath::GetPtNode(const PhraseDictionary &phraseDictionary) const return iter->second.second; } -void InputPath::SetTargetPhrases(const PhraseDictionary &phraseDictionary - , const TargetPhraseCollection *targetPhrases - , const void *ptNode) +void +InputPath:: +SetTargetPhrases(const PhraseDictionary &phraseDictionary, + TargetPhraseCollection::shared_ptr const& targetPhrases, + const void *ptNode) { - std::pair value(targetPhrases, ptNode); + std::pair + value(targetPhrases, ptNode); m_targetPhrases[&phraseDictionary] = value; } @@ -93,10 +102,10 @@ const Word &InputPath::GetLastWord() const size_t InputPath::GetTotalRuleSize() const { size_t ret = 0; - std::map >::const_iterator iter; + TargetPhrases::const_iterator iter; for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter) { // const PhraseDictionary *pt = iter->first; - const TargetPhraseCollection *tpColl = iter->second.first; + TargetPhraseCollection::shared_ptr tpColl = iter->second.first; if (tpColl) { ret += tpColl->GetSize(); @@ -110,10 +119,10 @@ std::ostream& operator<<(std::ostream& out, const InputPath& obj) { out << &obj << " " << obj.GetWordsRange() << " " << obj.GetPrevPath() << " " << obj.GetPhrase(); - std::map >::const_iterator iter; + InputPath::TargetPhrases::const_iterator iter; for (iter = obj.m_targetPhrases.begin(); iter != obj.m_targetPhrases.end(); ++iter) { const PhraseDictionary *pt = iter->first; - const TargetPhraseCollection *tpColl = iter->second.first; + boost::shared_ptr tpColl = iter->second.first; out << pt << "="; if (tpColl) { diff --git a/moses/InputPath.h b/moses/InputPath.h index e8b5978ad..e379b8630 100644 --- a/moses/InputPath.h +++ b/moses/InputPath.h @@ -8,12 +8,12 @@ #include "WordsRange.h" #include "NonTerminal.h" #include "moses/FactorCollection.h" - +#include +#include "TargetPhraseCollection.h" namespace Moses { class PhraseDictionary; -class TargetPhraseCollection; class ScoreComponentCollection; class TargetPhrase; class InputPath; @@ -32,7 +32,12 @@ class InputPath friend std::ostream& operator<<(std::ostream& out, const InputPath &obj); public: - typedef std::map > TargetPhrases; + + typedef std::pair + TPCollStoreEntry; + + typedef std::map + TargetPhrases; public: ttaskwptr const ttask; @@ -96,10 +101,14 @@ public: m_nextNode = nextNode; } - void SetTargetPhrases(const PhraseDictionary &phraseDictionary - , const TargetPhraseCollection *targetPhrases - , const void *ptNode); - const TargetPhraseCollection *GetTargetPhrases(const PhraseDictionary &phraseDictionary) const; + void + SetTargetPhrases(const PhraseDictionary &phraseDictionary, + TargetPhraseCollection::shared_ptr const& targetPhrases, + const void *ptNode); + + TargetPhraseCollection::shared_ptr + GetTargetPhrases(const PhraseDictionary &phraseDictionary) const; + const TargetPhrases &GetTargetPhrases() const { return m_targetPhrases; } diff --git a/moses/PDTAimp.cpp b/moses/PDTAimp.cpp index b8bafeb3e..ea40a2f4f 100644 --- a/moses/PDTAimp.cpp +++ b/moses/PDTAimp.cpp @@ -63,27 +63,29 @@ void PDTAimp::CleanUp() { assert(m_dict); m_dict->FreeMemory(); - for(size_t i=0; i piter; if(useCache) { - piter=m_cache.insert(std::make_pair(src,static_cast(0))); + piter=m_cache.insert(std::make_pair(src, ret)); if(!piter.second) return piter.first->second; } else if (m_cache.size()) { MapSrc2Tgt::const_iterator i=m_cache.find(src); - return (i!=m_cache.end() ? i->second : 0); + return (i!=m_cache.end() ? i->second : ret); } std::vector srcString(src.GetSize()); @@ -97,7 +99,7 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const std::vector wacands; m_dict->GetTargetCandidates(srcString,cands,wacands); if(cands.empty()) { - return 0; + return ret; } //TODO: Multiple models broken here @@ -140,16 +142,14 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const sourcePhrases.push_back(src); } - TargetPhraseCollectionWithSourcePhrase *rv; - rv=PruneTargetCandidates(tCands,costs, sourcePhrases); - if(rv->IsEmpty()) { - delete rv; - return 0; + ret = PruneTargetCandidates(tCands,costs, sourcePhrases); + if(ret->IsEmpty()) { + ret.reset(); } else { - if(useCache) piter.first->second=rv; - m_tgtColls.push_back(rv); - return rv; + if(useCache) piter.first->second = ret; + m_tgtColls.push_back(ret); } + return ret; } @@ -352,7 +352,8 @@ void PDTAimp::CacheSource(ConfusionNet const& src) pathExplored[len]+=exploredPaths[len]; - m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0)); + // m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0)); + m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize())); for(std::map::const_iterator i=cov2cand.begin(); i!=cov2cand.end(); ++i) { assert(i->first.firstfirst.first << "-" << i->first.second << ": " << targetPhrase << std::endl; } - TargetPhraseCollectionWithSourcePhrase *rv=PruneTargetCandidates(tCands, costs, sourcePhrases); + TargetPhraseCollectionWithSourcePhrase::shared_ptr + rv = PruneTargetCandidates(tCands, costs, sourcePhrases); if(rv->IsEmpty()) - delete rv; + rv.reset(); else { m_rangeCache[i->first.first][i->first.second-1]=rv; m_tgtColls.push_back(rv); @@ -428,7 +430,8 @@ void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase, targetPhrase.EvaluateInIsolation(*srcPtr, m_obj->GetFeaturesToApply()); } -TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates +TargetPhraseCollectionWithSourcePhrase::shared_ptr +PDTAimp::PruneTargetCandidates (const std::vector & tCands, std::vector >& costs, const std::vector &sourcePhrases) const @@ -437,7 +440,8 @@ TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(), "Number of target phrases must equal number of source phrases"); - TargetPhraseCollectionWithSourcePhrase *rv=new TargetPhraseCollectionWithSourcePhrase; + TargetPhraseCollectionWithSourcePhrase::shared_ptr rv; + rv.reset(new TargetPhraseCollectionWithSourcePhrase); // set limit to tableLimit or actual size, whatever is smaller diff --git a/moses/PDTAimp.h b/moses/PDTAimp.h index 01de1e88a..81ce22cfe 100644 --- a/moses/PDTAimp.h +++ b/moses/PDTAimp.h @@ -44,10 +44,10 @@ public: std::vector m_input,m_output; PhraseDictionaryTree *m_dict; const InputFeature *m_inputFeature; - typedef std::vector vTPC; + typedef std::vector vTPC; mutable vTPC m_tgtColls; - typedef std::map MapSrc2Tgt; + typedef std::map MapSrc2Tgt; mutable MapSrc2Tgt m_cache; PhraseDictionaryTreeAdaptor *m_obj; int useCache; @@ -69,7 +69,7 @@ public: void CleanUp(); - TargetPhraseCollectionWithSourcePhrase const* + TargetPhraseCollectionWithSourcePhrase::shared_ptr GetTargetPhraseCollection(Phrase const &src) const; void Create(const std::vector &input @@ -121,7 +121,7 @@ public: const std::string *alignmentString, Phrase const* srcPtr=0) const; - TargetPhraseCollectionWithSourcePhrase* PruneTargetCandidates + TargetPhraseCollectionWithSourcePhrase::shared_ptr PruneTargetCandidates (const std::vector & tCands, std::vector >& costs, const std::vector &sourcePhrases) const; diff --git a/moses/Syntax/F2S/GlueRuleSynthesizer.cpp b/moses/Syntax/F2S/GlueRuleSynthesizer.cpp index 1e00c594a..e83daa81b 100644 --- a/moses/Syntax/F2S/GlueRuleSynthesizer.cpp +++ b/moses/Syntax/F2S/GlueRuleSynthesizer.cpp @@ -28,9 +28,9 @@ void GlueRuleSynthesizer::SynthesizeRule(const Forest::Hyperedge &e) HyperPath source; SynthesizeHyperPath(e, source); TargetPhrase *tp = SynthesizeTargetPhrase(e); - TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(m_hyperTree, - source); - tpc.Add(tp); + TargetPhraseCollection::shared_ptr tpc + = GetOrCreateTargetPhraseCollection(m_hyperTree, source); + tpc->Add(tp); } void GlueRuleSynthesizer::SynthesizeHyperPath(const Forest::Hyperedge &e, diff --git a/moses/Syntax/F2S/HyperTree.cpp b/moses/Syntax/F2S/HyperTree.cpp index a7d77eb4c..681ec561c 100644 --- a/moses/Syntax/F2S/HyperTree.cpp +++ b/moses/Syntax/F2S/HyperTree.cpp @@ -14,7 +14,7 @@ void HyperTree::Node::Prune(std::size_t tableLimit) p->second.Prune(tableLimit); } // Prune TargetPhraseCollection at this node. - m_targetPhraseCollection.Prune(true, tableLimit); + m_targetPhraseCollection->Prune(true, tableLimit); } void HyperTree::Node::Sort(std::size_t tableLimit) @@ -24,7 +24,7 @@ void HyperTree::Node::Sort(std::size_t tableLimit) p->second.Sort(tableLimit); } // Sort TargetPhraseCollection at this node. - m_targetPhraseCollection.Sort(true, tableLimit); + m_targetPhraseCollection->Sort(true, tableLimit); } HyperTree::Node *HyperTree::Node::GetOrCreateChild( @@ -40,7 +40,7 @@ const HyperTree::Node *HyperTree::Node::GetChild( return (p == m_map.end()) ? NULL : &p->second; } -TargetPhraseCollection &HyperTree::GetOrCreateTargetPhraseCollection( +TargetPhraseCollection::shared_ptr HyperTree::GetOrCreateTargetPhraseCollection( const HyperPath &hyperPath) { Node &node = GetOrCreateNode(hyperPath); diff --git a/moses/Syntax/F2S/HyperTree.h b/moses/Syntax/F2S/HyperTree.h index e32bfebbe..a000e9f9b 100644 --- a/moses/Syntax/F2S/HyperTree.h +++ b/moses/Syntax/F2S/HyperTree.h @@ -37,7 +37,7 @@ public: } bool HasRules() const { - return !m_targetPhraseCollection.IsEmpty(); + return !m_targetPhraseCollection->IsEmpty(); } void Prune(std::size_t tableLimit); @@ -47,11 +47,13 @@ public: const Node *GetChild(const HyperPath::NodeSeq &) const; - const TargetPhraseCollection &GetTargetPhraseCollection() const { + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollection() const { return m_targetPhraseCollection; } - TargetPhraseCollection &GetTargetPhraseCollection() { + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollection() { return m_targetPhraseCollection; } @@ -59,12 +61,14 @@ public: return m_map; } + Node() : m_targetPhraseCollection(new TargetPhraseCollection) { } + private: Map m_map; - TargetPhraseCollection m_targetPhraseCollection; + TargetPhraseCollection::shared_ptr m_targetPhraseCollection; }; - HyperTree(const RuleTableFF *ff) : RuleTable(ff) {} + HyperTree(const RuleTableFF *ff) : RuleTable(ff) { } const Node &GetRootNode() const { return m_root; @@ -73,7 +77,8 @@ public: private: friend class HyperTreeCreator; - TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const HyperPath &); + TargetPhraseCollection::shared_ptr + GetOrCreateTargetPhraseCollection(const HyperPath &); Node &GetOrCreateNode(const HyperPath &); diff --git a/moses/Syntax/F2S/HyperTreeCreator.h b/moses/Syntax/F2S/HyperTreeCreator.h index 04dbaa6b1..a5111b90e 100644 --- a/moses/Syntax/F2S/HyperTreeCreator.h +++ b/moses/Syntax/F2S/HyperTreeCreator.h @@ -21,7 +21,7 @@ protected: // Provide access to HyperTree's private GetOrCreateTargetPhraseCollection // function. - TargetPhraseCollection &GetOrCreateTargetPhraseCollection( + TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection( HyperTree &trie, const HyperPath &fragment) { return trie.GetOrCreateTargetPhraseCollection(fragment); } diff --git a/moses/Syntax/F2S/HyperTreeLoader.cpp b/moses/Syntax/F2S/HyperTreeLoader.cpp index 21d5b0447..6764963ac 100644 --- a/moses/Syntax/F2S/HyperTreeLoader.cpp +++ b/moses/Syntax/F2S/HyperTreeLoader.cpp @@ -130,9 +130,9 @@ bool HyperTreeLoader::Load(const std::vector &input, ff.GetFeaturesToApply()); // Add rule to trie. - TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection( - trie, sourceFragment); - phraseColl.Add(targetPhrase); + TargetPhraseCollection::shared_ptr phraseColl + = GetOrCreateTargetPhraseCollection(trie, sourceFragment); + phraseColl->Add(targetPhrase); count++; } diff --git a/moses/Syntax/F2S/RuleMatcherHyperTree-inl.h b/moses/Syntax/F2S/RuleMatcherHyperTree-inl.h index 74f2347a6..bf05d4bcc 100644 --- a/moses/Syntax/F2S/RuleMatcherHyperTree-inl.h +++ b/moses/Syntax/F2S/RuleMatcherHyperTree-inl.h @@ -51,8 +51,8 @@ void RuleMatcherHyperTree::EnumerateHyperedges( m_hyperedge.label.inputWeight += (*p)->weight; } // Set the output hyperedge label's translation set pointer. - m_hyperedge.label.translations = - &(item.trieNode->GetTargetPhraseCollection()); + m_hyperedge.label.translations + = item.trieNode->GetTargetPhraseCollection(); // Pass the output hyperedge to the callback. callback(m_hyperedge); } diff --git a/moses/Syntax/PLabel.h b/moses/Syntax/PLabel.h index 4537b86bb..c45f40dfd 100644 --- a/moses/Syntax/PLabel.h +++ b/moses/Syntax/PLabel.h @@ -9,7 +9,7 @@ namespace Syntax struct PLabel { float inputWeight; - const TargetPhraseCollection *translations; + TargetPhraseCollection::shared_ptr translations; }; } // Syntax diff --git a/moses/Syntax/S2T/OovHandler-inl.h b/moses/Syntax/S2T/OovHandler-inl.h index 76eed861e..e5ffe6370 100644 --- a/moses/Syntax/S2T/OovHandler-inl.h +++ b/moses/Syntax/S2T/OovHandler-inl.h @@ -32,9 +32,10 @@ boost::shared_ptr OovHandler::SynthesizeRuleTrie( // TODO Check ownership and fix any leaks. Word *tgtLHS = SynthesizeTargetLhs(targetLhsStr); TargetPhrase *tp = SynthesizeTargetPhrase(oov, *srcPhrase, *tgtLHS, prob); - TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection( - *trie, *srcPhrase, *tp, NULL); // TODO Check NULL is valid argument - tpc.Add(tp); + TargetPhraseCollection::shared_ptr tpc; + tpc= GetOrCreateTargetPhraseCollection(*trie, *srcPhrase, *tp, NULL); + // TODO Check NULL is valid argument + tpc->Add(tp); } } diff --git a/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser-inl.h b/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser-inl.h index a84e16a54..c8f57c4d1 100644 --- a/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser-inl.h +++ b/moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser-inl.h @@ -132,9 +132,9 @@ void RecursiveCYKPlusParser::AddAndExtend( m_hyperedge.tail.push_back(const_cast(&vertex)); // Add target phrase collection (except if rule is empty or unary). - const TargetPhraseCollection &tpc = node.GetTargetPhraseCollection(); - if (!tpc.IsEmpty() && !IsNonLexicalUnary(m_hyperedge)) { - m_hyperedge.label.translations = &tpc; + TargetPhraseCollection::shared_ptr tpc = node.GetTargetPhraseCollection(); + if (!tpc->IsEmpty() && !IsNonLexicalUnary(m_hyperedge)) { + m_hyperedge.label.translations = tpc; (*m_callback)(m_hyperedge, end); } diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/Parser-inl.h b/moses/Syntax/S2T/Parsers/Scope3Parser/Parser-inl.h index 24135c734..da81a5606 100644 --- a/moses/Syntax/S2T/Parsers/Scope3Parser/Parser-inl.h +++ b/moses/Syntax/S2T/Parsers/Scope3Parser/Parser-inl.h @@ -38,8 +38,8 @@ Scope3Parser::~Scope3Parser() } template -void Scope3Parser::EnumerateHyperedges(const WordsRange &range, - Callback &callback) +void Scope3Parser:: +EnumerateHyperedges(const WordsRange &range, Callback &callback) { const std::size_t start = range.GetStartPos(); const std::size_t end = range.GetEndPos(); @@ -64,8 +64,7 @@ void Scope3Parser::EnumerateHyperedges(const WordsRange &range, // Ask the grammar for the mapping from label sequences to target phrase // collections for this pattern. - const RuleTrie::Node::LabelMap &labelMap = - patNode->m_node->GetLabelMap(); + const RuleTrie::Node::LabelMap &labelMap = patNode->m_node->GetLabelMap(); // For each label sequence, search the lattice for the set of PHyperedge // tails. @@ -73,7 +72,7 @@ void Scope3Parser::EnumerateHyperedges(const WordsRange &range, RuleTrie::Node::LabelMap::const_iterator q = labelMap.begin(); for (; q != labelMap.end(); ++q) { const std::vector &labelSeq = q->first; - const TargetPhraseCollection &tpc = q->second; + TargetPhraseCollection::shared_ptr tpc = q->second; // For many label sequences there won't be any corresponding paths through // the lattice. As an optimisation, we use m_quickCheckTable to test // for this and we don't begin a search if there are no paths to find. diff --git a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h index 4f815c78d..407f04d5b 100644 --- a/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h +++ b/moses/Syntax/S2T/Parsers/Scope3Parser/TailLatticeSearcher.h @@ -6,7 +6,7 @@ #include "moses/Syntax/PHyperedge.h" #include "TailLattice.h" - +#include "moses/TargetPhraseCollection.h" namespace Moses { namespace Syntax @@ -25,13 +25,14 @@ public: , m_key(key) , m_ranges(ranges) {} - void Search(const std::vector &labels, const TargetPhraseCollection &tpc, + void Search(const std::vector &labels, + const TargetPhraseCollection::shared_ptr tpc, Callback &callback) { m_labels = &labels; m_matchCB = &callback; m_hyperedge.head = 0; m_hyperedge.tail.clear(); - m_hyperedge.label.translations = &tpc; + m_hyperedge.label.translations = tpc; SearchInner(0, 0, 0); } diff --git a/moses/Syntax/S2T/RuleTrie.h b/moses/Syntax/S2T/RuleTrie.h index 27b0bc838..b9d031673 100644 --- a/moses/Syntax/S2T/RuleTrie.h +++ b/moses/Syntax/S2T/RuleTrie.h @@ -28,9 +28,10 @@ public: private: friend class RuleTrieCreator; - virtual TargetPhraseCollection &GetOrCreateTargetPhraseCollection( - const Phrase &source, const TargetPhrase &target, - const Word *sourceLHS) = 0; + virtual TargetPhraseCollection::shared_ptr + GetOrCreateTargetPhraseCollection(const Phrase &source, + const TargetPhrase &target, + const Word *sourceLHS) = 0; virtual void SortAndPrune(std::size_t) = 0; }; diff --git a/moses/Syntax/S2T/RuleTrieCYKPlus.cpp b/moses/Syntax/S2T/RuleTrieCYKPlus.cpp index 05f8758e9..7c8d08864 100644 --- a/moses/Syntax/S2T/RuleTrieCYKPlus.cpp +++ b/moses/Syntax/S2T/RuleTrieCYKPlus.cpp @@ -33,7 +33,7 @@ void RuleTrieCYKPlus::Node::Prune(std::size_t tableLimit) } // prune TargetPhraseCollection in this node - m_targetPhraseCollection.Prune(true, tableLimit); + m_targetPhraseCollection->Prune(true, tableLimit); } void RuleTrieCYKPlus::Node::Sort(std::size_t tableLimit) @@ -49,7 +49,7 @@ void RuleTrieCYKPlus::Node::Sort(std::size_t tableLimit) } // prune TargetPhraseCollection in this node - m_targetPhraseCollection.Sort(true, tableLimit); + m_targetPhraseCollection->Sort(true, tableLimit); } RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetOrCreateChild( @@ -86,8 +86,11 @@ const RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetNonTerminalChild( return (p == m_nonTermMap.end()) ? NULL : &p->second; } -TargetPhraseCollection &RuleTrieCYKPlus::GetOrCreateTargetPhraseCollection( - const Phrase &source, const TargetPhrase &target, const Word *sourceLHS) +TargetPhraseCollection::shared_ptr +RuleTrieCYKPlus:: +GetOrCreateTargetPhraseCollection(const Phrase &source, + const TargetPhrase &target, + const Word *sourceLHS) { Node &currNode = GetOrCreateNode(source, target, sourceLHS); return currNode.GetTargetPhraseCollection(); diff --git a/moses/Syntax/S2T/RuleTrieCYKPlus.h b/moses/Syntax/S2T/RuleTrieCYKPlus.h index 11cf4c199..0c11a1edb 100644 --- a/moses/Syntax/S2T/RuleTrieCYKPlus.h +++ b/moses/Syntax/S2T/RuleTrieCYKPlus.h @@ -38,7 +38,7 @@ public: } bool HasRules() const { - return !m_targetPhraseCollection.IsEmpty(); + return !m_targetPhraseCollection->IsEmpty(); } void Prune(std::size_t tableLimit); @@ -50,11 +50,13 @@ public: const Node *GetChild(const Word &sourceTerm) const; const Node *GetNonTerminalChild(const Word &targetNonTerm) const; - const TargetPhraseCollection &GetTargetPhraseCollection() const { + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollection() const { return m_targetPhraseCollection; } - TargetPhraseCollection &GetTargetPhraseCollection() { + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollection() { return m_targetPhraseCollection; } @@ -66,10 +68,12 @@ public: return m_nonTermMap; } + Node() : m_targetPhraseCollection(new TargetPhraseCollection) {} + private: SymbolMap m_sourceTermMap; SymbolMap m_nonTermMap; - TargetPhraseCollection m_targetPhraseCollection; + TargetPhraseCollection::shared_ptr m_targetPhraseCollection; }; RuleTrieCYKPlus(const RuleTableFF *ff) : RuleTrie(ff) {} @@ -81,8 +85,9 @@ public: bool HasPreterminalRule(const Word &) const; private: - TargetPhraseCollection &GetOrCreateTargetPhraseCollection( - const Phrase &source, const TargetPhrase &target, const Word *sourceLHS); + TargetPhraseCollection::shared_ptr + GetOrCreateTargetPhraseCollection + (const Phrase &source, const TargetPhrase &target, const Word *sourceLHS); Node &GetOrCreateNode(const Phrase &source, const TargetPhrase &target, const Word *sourceLHS); diff --git a/moses/Syntax/S2T/RuleTrieCreator.h b/moses/Syntax/S2T/RuleTrieCreator.h index e49a2cbde..62da519a1 100644 --- a/moses/Syntax/S2T/RuleTrieCreator.h +++ b/moses/Syntax/S2T/RuleTrieCreator.h @@ -21,8 +21,9 @@ protected: // Provide access to RuleTrie's private GetOrCreateTargetPhraseCollection // function. - TargetPhraseCollection &GetOrCreateTargetPhraseCollection( - RuleTrie &trie, const Phrase &source, const TargetPhrase &target, + TargetPhraseCollection::shared_ptr + GetOrCreateTargetPhraseCollection + ( RuleTrie &trie, const Phrase &source, const TargetPhrase &target, const Word *sourceLHS) { return trie.GetOrCreateTargetPhraseCollection(source, target, sourceLHS); } diff --git a/moses/Syntax/S2T/RuleTrieLoader.cpp b/moses/Syntax/S2T/RuleTrieLoader.cpp index a88c0f5fe..b523953c7 100644 --- a/moses/Syntax/S2T/RuleTrieLoader.cpp +++ b/moses/Syntax/S2T/RuleTrieLoader.cpp @@ -125,9 +125,10 @@ bool RuleTrieLoader::Load(const std::vector &input, targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector); targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply()); - TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection( - trie, sourcePhrase, *targetPhrase, sourceLHS); - phraseColl.Add(targetPhrase); + TargetPhraseCollection::shared_ptr phraseColl + = GetOrCreateTargetPhraseCollection(trie, sourcePhrase, + *targetPhrase, sourceLHS); + phraseColl->Add(targetPhrase); // not implemented correctly in memory pt. just delete it for now delete sourceLHS; diff --git a/moses/Syntax/S2T/RuleTrieScope3.cpp b/moses/Syntax/S2T/RuleTrieScope3.cpp index 7318f09d6..aecaac3f7 100644 --- a/moses/Syntax/S2T/RuleTrieScope3.cpp +++ b/moses/Syntax/S2T/RuleTrieScope3.cpp @@ -33,7 +33,7 @@ void RuleTrieScope3::Node::Prune(std::size_t tableLimit) // Prune TargetPhraseCollections at this node. for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) { - p->second.Prune(true, tableLimit); + p->second->Prune(true, tableLimit); } } @@ -50,7 +50,7 @@ void RuleTrieScope3::Node::Sort(std::size_t tableLimit) // Sort TargetPhraseCollections at this node. for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) { - p->second.Sort(true, tableLimit); + p->second->Sort(true, tableLimit); } } @@ -75,9 +75,10 @@ RuleTrieScope3::Node *RuleTrieScope3::Node::GetOrCreateNonTerminalChild( return m_gapNode; } -TargetPhraseCollection & -RuleTrieScope3::Node::GetOrCreateTargetPhraseCollection( - const TargetPhrase &target) +TargetPhraseCollection::shared_ptr +RuleTrieScope3:: +Node:: +GetOrCreateTargetPhraseCollection(const TargetPhrase &target) { const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm(); const std::size_t rank = alignmentInfo.GetSize(); @@ -94,12 +95,16 @@ RuleTrieScope3::Node::GetOrCreateTargetPhraseCollection( const Word &targetNonTerm = target.GetWord(targetNonTermIndex); vec.push_back(InsertLabel(i++, targetNonTerm)); } - - return m_labelMap[vec]; + TargetPhraseCollection::shared_ptr& ret = m_labelMap[vec]; + if (!ret) ret.reset(new TargetPhraseCollection); + return ret; } -TargetPhraseCollection &RuleTrieScope3::GetOrCreateTargetPhraseCollection( - const Phrase &source, const TargetPhrase &target, const Word *sourceLHS) +TargetPhraseCollection::shared_ptr +RuleTrieScope3:: +GetOrCreateTargetPhraseCollection(const Phrase &source, + const TargetPhrase &target, + const Word *sourceLHS) { Node &currNode = GetOrCreateNode(source, target, sourceLHS); return currNode.GetOrCreateTargetPhraseCollection(target); diff --git a/moses/Syntax/S2T/RuleTrieScope3.h b/moses/Syntax/S2T/RuleTrieScope3.h index 5909b6509..4684e8a78 100644 --- a/moses/Syntax/S2T/RuleTrieScope3.h +++ b/moses/Syntax/S2T/RuleTrieScope3.h @@ -35,7 +35,7 @@ public: SymbolEqualityPred> TerminalMap; typedef boost::unordered_map, - TargetPhraseCollection> LabelMap; + TargetPhraseCollection::shared_ptr> LabelMap; ~Node() { delete m_gapNode; @@ -61,8 +61,8 @@ public: Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm); - TargetPhraseCollection &GetOrCreateTargetPhraseCollection( - const TargetPhrase &); + TargetPhraseCollection::shared_ptr + GetOrCreateTargetPhraseCollection(const TargetPhrase &); bool IsLeaf() const { return m_terminalMap.empty() && m_gapNode == NULL; @@ -106,8 +106,10 @@ public: bool HasPreterminalRule(const Word &) const; private: - TargetPhraseCollection &GetOrCreateTargetPhraseCollection( - const Phrase &source, const TargetPhrase &target, const Word *sourceLHS); + TargetPhraseCollection::shared_ptr + GetOrCreateTargetPhraseCollection(const Phrase &source, + const TargetPhrase &target, + const Word *sourceLHS); Node &GetOrCreateNode(const Phrase &source, const TargetPhrase &target, const Word *sourceLHS); diff --git a/moses/Syntax/SHyperedgeBundle.h b/moses/Syntax/SHyperedgeBundle.h index 54eda73bc..d6e903529 100644 --- a/moses/Syntax/SHyperedgeBundle.h +++ b/moses/Syntax/SHyperedgeBundle.h @@ -17,7 +17,7 @@ struct PVertex; struct SHyperedgeBundle { float inputWeight; std::vector stacks; - const TargetPhraseCollection *translations; + TargetPhraseCollection::shared_ptr translations; friend void swap(SHyperedgeBundle &x, SHyperedgeBundle &y) { using std::swap; diff --git a/moses/Syntax/T2S/GlueRuleSynthesizer.cpp b/moses/Syntax/T2S/GlueRuleSynthesizer.cpp index 0a0c07eea..7514852f2 100644 --- a/moses/Syntax/T2S/GlueRuleSynthesizer.cpp +++ b/moses/Syntax/T2S/GlueRuleSynthesizer.cpp @@ -17,9 +17,9 @@ void GlueRuleSynthesizer::SynthesizeRule(const InputTree::Node &node) const Word &sourceLhs = node.pvertex.symbol; boost::scoped_ptr sourceRhs(SynthesizeSourcePhrase(node)); TargetPhrase *tp = SynthesizeTargetPhrase(node, *sourceRhs); - TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection( - m_ruleTrie, sourceLhs, *sourceRhs); - tpc.Add(tp); + TargetPhraseCollection::shared_ptr tpc + = GetOrCreateTargetPhraseCollection(m_ruleTrie, sourceLhs, *sourceRhs); + tpc->Add(tp); } Phrase *GlueRuleSynthesizer::SynthesizeSourcePhrase(const InputTree::Node &node) diff --git a/moses/Syntax/T2S/HyperTree.h b/moses/Syntax/T2S/HyperTree.h index 800700365..66e7a7de6 100644 --- a/moses/Syntax/T2S/HyperTree.h +++ b/moses/Syntax/T2S/HyperTree.h @@ -48,11 +48,11 @@ public: const Node *GetChild(const HyperPath::NodeSeq &) const; - const TargetPhraseCollection &GetTargetPhraseCollection() const + const TargetPhraseCollection::shared_ptr GetTargetPhraseCollection() const return m_targetPhraseCollection; } - TargetPhraseCollection &GetTargetPhraseCollection() + TargetPhraseCollection::shared_ptr GetTargetPhraseCollection() return m_targetPhraseCollection; } @@ -76,7 +76,7 @@ const Node &GetRootNode() const private: friend class RuleTrieCreator; -TargetPhraseCollection &GetOrCreateTargetPhraseCollection( +TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection( const Word &sourceLHS, const Phrase &sourceRHS); Node &GetOrCreateNode(const Phrase &sourceRHS); diff --git a/moses/Syntax/T2S/RuleMatcherSCFG-inl.h b/moses/Syntax/T2S/RuleMatcherSCFG-inl.h index b782411a4..0eb7cbe2c 100644 --- a/moses/Syntax/T2S/RuleMatcherSCFG-inl.h +++ b/moses/Syntax/T2S/RuleMatcherSCFG-inl.h @@ -61,7 +61,7 @@ void RuleMatcherSCFG::Match(const InputTree::Node &inNode, if (candidate.pvertex.span.GetEndPos() == inNode.pvertex.span.GetEndPos()) { // Check if the trie node has any rules with a LHS that match inNode. const Word &lhs = inNode.pvertex.symbol; - const TargetPhraseCollection *tpc = + TargetPhraseCollection::shared_ptr tpc = newTrieNode.GetTargetPhraseCollection(lhs); if (tpc) { m_hyperedge.label.translations = tpc; diff --git a/moses/Syntax/T2S/RuleTrie.cpp b/moses/Syntax/T2S/RuleTrie.cpp index 0fc7bf24c..e6fc5214c 100644 --- a/moses/Syntax/T2S/RuleTrie.cpp +++ b/moses/Syntax/T2S/RuleTrie.cpp @@ -35,7 +35,7 @@ void RuleTrie::Node::Prune(std::size_t tableLimit) // Prune TargetPhraseCollections at this node. for (TPCMap::iterator p = m_targetPhraseCollections.begin(); p != m_targetPhraseCollections.end(); ++p) { - p->second.Prune(true, tableLimit); + p->second->Prune(true, tableLimit); } } @@ -54,17 +54,21 @@ void RuleTrie::Node::Sort(std::size_t tableLimit) // Sort TargetPhraseCollections at this node. for (TPCMap::iterator p = m_targetPhraseCollections.begin(); p != m_targetPhraseCollections.end(); ++p) { - p->second.Sort(true, tableLimit); + p->second->Sort(true, tableLimit); } } -RuleTrie::Node *RuleTrie::Node::GetOrCreateChild( - const Word &sourceTerm) +RuleTrie::Node* +RuleTrie::Node:: +GetOrCreateChild(const Word &sourceTerm) { return &m_sourceTermMap[sourceTerm]; } -RuleTrie::Node *RuleTrie::Node::GetOrCreateNonTerminalChild(const Word &targetNonTerm) +RuleTrie::Node * +RuleTrie:: +Node:: +GetOrCreateNonTerminalChild(const Word &targetNonTerm) { UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(), "Not a non-terminal: " << targetNonTerm); @@ -72,42 +76,52 @@ RuleTrie::Node *RuleTrie::Node::GetOrCreateNonTerminalChild(const Word &targetNo return &m_nonTermMap[targetNonTerm]; } -TargetPhraseCollection &RuleTrie::Node::GetOrCreateTargetPhraseCollection( - const Word &sourceLHS) +TargetPhraseCollection::shared_ptr +RuleTrie:: +Node:: +GetOrCreateTargetPhraseCollection(const Word &sourceLHS) { UTIL_THROW_IF2(!sourceLHS.IsNonTerminal(), "Not a non-terminal: " << sourceLHS); - return m_targetPhraseCollections[sourceLHS]; + TargetPhraseCollection::shared_ptr& foo + = m_targetPhraseCollections[sourceLHS]; + if (!foo) foo.reset(new TargetPhraseCollection); + return foo; } -const RuleTrie::Node *RuleTrie::Node::GetChild( - const Word &sourceTerm) const +RuleTrie::Node const* +RuleTrie:: +Node:: +GetChild(const Word &sourceTerm) const { - UTIL_THROW_IF2(sourceTerm.IsNonTerminal(), - "Not a terminal: " << sourceTerm); - + UTIL_THROW_IF2(sourceTerm.IsNonTerminal(), "Not a terminal: " << sourceTerm); SymbolMap::const_iterator p = m_sourceTermMap.find(sourceTerm); return (p == m_sourceTermMap.end()) ? NULL : &p->second; } -const RuleTrie::Node *RuleTrie::Node::GetNonTerminalChild( - const Word &targetNonTerm) const +RuleTrie::Node const* +RuleTrie:: +Node:: +GetNonTerminalChild(const Word &targetNonTerm) const { UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(), "Not a non-terminal: " << targetNonTerm); - SymbolMap::const_iterator p = m_nonTermMap.find(targetNonTerm); return (p == m_nonTermMap.end()) ? NULL : &p->second; } -TargetPhraseCollection &RuleTrie::GetOrCreateTargetPhraseCollection( - const Word &sourceLHS, const Phrase &sourceRHS) +TargetPhraseCollection::shared_ptr +RuleTrie:: +GetOrCreateTargetPhraseCollection +( const Word &sourceLHS, const Phrase &sourceRHS ) { Node &currNode = GetOrCreateNode(sourceRHS); return currNode.GetOrCreateTargetPhraseCollection(sourceLHS); } -RuleTrie::Node &RuleTrie::GetOrCreateNode(const Phrase &sourceRHS) +RuleTrie::Node & +RuleTrie:: +GetOrCreateNode(const Phrase &sourceRHS) { const std::size_t size = sourceRHS.GetSize(); diff --git a/moses/Syntax/T2S/RuleTrie.h b/moses/Syntax/T2S/RuleTrie.h index 2807f6e0e..f9d857088 100644 --- a/moses/Syntax/T2S/RuleTrie.h +++ b/moses/Syntax/T2S/RuleTrie.h @@ -32,7 +32,7 @@ public: typedef boost::unordered_map SymbolMap; - typedef boost::unordered_map TPCMap; bool IsLeaf() const { @@ -48,15 +48,18 @@ public: Node *GetOrCreateChild(const Word &sourceTerm); Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm); - TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const Word &); + TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(const Word &); const Node *GetChild(const Word &sourceTerm) const; const Node *GetNonTerminalChild(const Word &targetNonTerm) const; - const TargetPhraseCollection *GetTargetPhraseCollection( - const Word &sourceLHS) const { + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollection(const Word &sourceLHS) const { TPCMap::const_iterator p = m_targetPhraseCollections.find(sourceLHS); - return p == m_targetPhraseCollections.end() ? 0 : &(p->second); + if (p != m_targetPhraseCollections.end()) + return p->second; + else + return TargetPhraseCollection::shared_ptr(); } // FIXME IS there any reason to distinguish these two for T2S? @@ -83,8 +86,9 @@ public: private: friend class RuleTrieCreator; - TargetPhraseCollection &GetOrCreateTargetPhraseCollection( - const Word &sourceLHS, const Phrase &sourceRHS); + TargetPhraseCollection::shared_ptr + GetOrCreateTargetPhraseCollection + (const Word &sourceLHS, const Phrase &sourceRHS); Node &GetOrCreateNode(const Phrase &sourceRHS); diff --git a/moses/Syntax/T2S/RuleTrieCreator.h b/moses/Syntax/T2S/RuleTrieCreator.h index fd29d3838..af5e976b5 100644 --- a/moses/Syntax/T2S/RuleTrieCreator.h +++ b/moses/Syntax/T2S/RuleTrieCreator.h @@ -21,7 +21,7 @@ protected: // Provide access to RuleTrie's private // GetOrCreateTargetPhraseCollection function. - TargetPhraseCollection &GetOrCreateTargetPhraseCollection( + TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection( RuleTrie &trie, const Word &sourceLHS, const Phrase &sourceRHS) { return trie.GetOrCreateTargetPhraseCollection(sourceLHS, sourceRHS); } diff --git a/moses/Syntax/T2S/RuleTrieLoader.cpp b/moses/Syntax/T2S/RuleTrieLoader.cpp index 81924f05d..c96c52b03 100644 --- a/moses/Syntax/T2S/RuleTrieLoader.cpp +++ b/moses/Syntax/T2S/RuleTrieLoader.cpp @@ -55,7 +55,9 @@ bool RuleTrieLoader::Load(const std::vector &input, std::vector scoreVector; StringPiece line; - double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan"); + int noflags = double_conversion::StringToDoubleConverter::NO_FLAGS; + double_conversion::StringToDoubleConverter + converter(noflags, NAN, NAN, "inf", "nan"); while(true) { try { @@ -132,9 +134,9 @@ bool RuleTrieLoader::Load(const std::vector &input, targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector); targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply()); - TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection( - trie, *sourceLHS, sourcePhrase); - phraseColl.Add(targetPhrase); + TargetPhraseCollection::shared_ptr phraseColl + = GetOrCreateTargetPhraseCollection(trie, *sourceLHS, sourcePhrase); + phraseColl->Add(targetPhrase); // not implemented correctly in memory pt. just delete it for now delete sourceLHS; diff --git a/moses/TargetPhraseCollection.h b/moses/TargetPhraseCollection.h index d61ff2c4f..47b3afd57 100644 --- a/moses/TargetPhraseCollection.h +++ b/moses/TargetPhraseCollection.h @@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include #include "TargetPhrase.h" #include "Util.h" +#include namespace Moses { @@ -43,6 +44,8 @@ public: // iters typedef CollType::iterator iterator; typedef CollType::const_iterator const_iterator; + typedef boost::shared_ptr shared_ptr; + typedef boost::shared_ptr shared_const_ptr; TargetPhrase const* operator[](size_t const i) const { @@ -127,6 +130,9 @@ protected: std::vector m_sourcePhrases; public: + typedef boost::shared_ptr shared_ptr; + typedef boost::shared_ptr shared_const_ptr; + const std::vector &GetSourcePhrases() const { return m_sourcePhrases; } diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp index 54f172d1e..61ec03892 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp @@ -167,10 +167,10 @@ void ChartRuleLookupManagerMemory::AddAndExtend( size_t endPos) { - const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection(); + TargetPhraseCollection::shared_ptr tpc = node->GetTargetPhraseCollection(); // add target phrase collection (except if rule is empty or a unary non-terminal rule) - if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) { - m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl); + if (!tpc->IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) { + m_completedRules[endPos].Add(*tpc, m_stackVec, m_stackScores, *m_outColl); } // get all further extensions of rule (until reaching end of sentence or max-chart-span) diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp index e090ee1ae..f81a21205 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp @@ -167,10 +167,11 @@ void ChartRuleLookupManagerMemoryPerSentence::AddAndExtend( size_t endPos) { - const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection(); + TargetPhraseCollection::shared_ptr tpc + = node->GetTargetPhraseCollection(); // add target phrase collection (except if rule is empty or a unary non-terminal rule) - if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) { - m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl); + if (!tpc->IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) { + m_completedRules[endPos].Add(*tpc, m_stackVec, m_stackScores, *m_outColl); } // get all further extensions of rule (until reaching end of sentence or max-chart-span) diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp index 349fc4cbc..5b8c20d27 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp @@ -64,11 +64,12 @@ ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk( ChartRuleLookupManagerOnDisk::~ChartRuleLookupManagerOnDisk() { - std::map::const_iterator iterCache; - for (iterCache = m_cache.begin(); iterCache != m_cache.end(); ++iterCache) { - delete iterCache->second; - } - m_cache.clear(); + // not needed any more due to the switch to shared pointers + // std::map::const_iterator iterCache; + // for (iterCache = m_cache.begin(); iterCache != m_cache.end(); ++iterCache) { + // iterCache->second.reset(); + // } + // m_cache.clear(); RemoveAllInColl(m_expandableDottedRuleListVec); RemoveAllInColl(m_sourcePhraseNode); @@ -236,14 +237,16 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection( if (sourceLHSBerkeleyDb == NULL) continue; - const TargetPhraseCollection *targetPhraseCollection = NULL; - const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper); + TargetPhraseCollection::shared_ptr targetPhraseCollection; + const OnDiskPt::PhraseNode *node + = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper); if (node) { uint64_t tpCollFilePos = node->GetValue(); - std::map::const_iterator iterCache = m_cache.find(tpCollFilePos); + std::map::const_iterator iterCache = m_cache.find(tpCollFilePos); if (iterCache == m_cache.end()) { - const OnDiskPt::TargetPhraseCollection *tpcollBerkeleyDb = node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper); + OnDiskPt::TargetPhraseCollection::shared_ptr tpcollBerkeleyDb + = node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper); std::vector weightT = staticData.GetWeights(&m_dictionary); targetPhraseCollection @@ -254,7 +257,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection( ,m_dbWrapper.GetVocab() ,true); - delete tpcollBerkeleyDb; + tpcollBerkeleyDb.reset(); m_cache[tpCollFilePos] = targetPhraseCollection; } else { // just get out of cache diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h index 42cbdcc46..dee9cc202 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h @@ -55,7 +55,7 @@ private: const std::vector &m_inputFactorsVec; const std::vector &m_outputFactorsVec; std::vector m_expandableDottedRuleListVec; - std::map m_cache; + std::map m_cache; std::list m_sourcePhraseNode; }; diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp index 53011e5ac..7db3e14cd 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp @@ -48,7 +48,7 @@ ChartRuleLookupManagerSkeleton::ChartRuleLookupManagerSkeleton( ChartRuleLookupManagerSkeleton::~ChartRuleLookupManagerSkeleton() { - RemoveAllInColl(m_tpColl); + // RemoveAllInColl(m_tpColl); } void ChartRuleLookupManagerSkeleton::GetChartRuleCollection( @@ -58,7 +58,7 @@ void ChartRuleLookupManagerSkeleton::GetChartRuleCollection( { //m_tpColl.push_back(TargetPhraseCollection()); //TargetPhraseCollection &tpColl = m_tpColl.back(); - TargetPhraseCollection *tpColl = new TargetPhraseCollection(); + TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection); m_tpColl.push_back(tpColl); const WordsRange &range = inputPath.GetWordsRange(); @@ -73,7 +73,9 @@ void ChartRuleLookupManagerSkeleton::GetChartRuleCollection( outColl.Add(*tpColl, m_stackVec, range); } -TargetPhrase *ChartRuleLookupManagerSkeleton::CreateTargetPhrase(const Word &sourceWord) const +TargetPhrase * +ChartRuleLookupManagerSkeleton:: +CreateTargetPhrase(const Word &sourceWord) const { // create a target phrase from the 1st word of the source, prefix with 'ChartManagerSkeleton:' string str = sourceWord.GetFactor(0)->GetString().as_string(); diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h index 0c141d2ef..219d7b2b6 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h @@ -49,7 +49,7 @@ private: TargetPhrase *CreateTargetPhrase(const Word &sourceWord) const; StackVec m_stackVec; - std::vector m_tpColl; + std::vector m_tpColl; const SkeletonPT &m_skeletonPT; }; diff --git a/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h b/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h index 84b583df6..95823328f 100644 --- a/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h +++ b/moses/TranslationModel/CYKPlusParser/CompletedRuleCollection.h @@ -119,4 +119,4 @@ private: } // namespace Moses -#endif \ No newline at end of file +#endif diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp index efa015140..ab14a02f7 100644 --- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp +++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp @@ -107,14 +107,15 @@ void PhraseDictionaryCompact::Load() // } // }; -const TargetPhraseCollection* +TargetPhraseCollection::shared_ptr PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &sourcePhrase) const { + TargetPhraseCollection::shared_ptr ret; // There is no souch source phrase if source phrase is longer than longest // observed source phrase during compilation if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength()) - return NULL; + return ret; // Retrieve target phrase collection from phrase table TargetPhraseVectorPtr decodedPhraseColl @@ -122,7 +123,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &s if(decodedPhraseColl != NULL && decodedPhraseColl->size()) { TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl)); - TargetPhraseCollection* phraseColl = new TargetPhraseCollection(); + TargetPhraseCollection::shared_ptr phraseColl(new TargetPhraseCollection); // Score phrases and if possible apply ttable_limit TargetPhraseVector::iterator nth = @@ -139,7 +140,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &s return phraseColl; } else - return NULL; + return ret; } TargetPhraseVectorPtr @@ -163,7 +164,7 @@ PhraseDictionaryCompact::~PhraseDictionaryCompact() //TO_STRING_BODY(PhraseDictionaryCompact) -void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection* tpc) +void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection::shared_ptr tpc) { if(!m_sentenceCache.get()) m_sentenceCache.reset(new PhraseCache()); @@ -179,12 +180,13 @@ void PhraseDictionaryCompact::CleanUpAfterSentenceProcessing(const InputType &so m_sentenceCache.reset(new PhraseCache()); m_phraseDecoder->PruneCache(); - for(PhraseCache::iterator it = m_sentenceCache->begin(); - it != m_sentenceCache->end(); it++) - delete *it; + // for(PhraseCache::iterator it = m_sentenceCache->begin(); + // it != m_sentenceCache->end(); it++) + // it->reset(); - PhraseCache temp; - temp.swap(*m_sentenceCache); + // PhraseCache temp; + // temp.swap(*m_sentenceCache); + m_sentenceCache->clear(); ReduceCache(); } diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h index 665ded3fc..f1c3dd1d8 100644 --- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h +++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h @@ -51,7 +51,7 @@ protected: bool m_inMemory; bool m_useAlignmentInfo; - typedef std::vector PhraseCache; + typedef std::vector PhraseCache; typedef boost::thread_specific_ptr SentenceCache; static SentenceCache m_sentenceCache; @@ -69,12 +69,12 @@ public: void Load(); - const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const; + TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const; TargetPhraseVectorPtr GetTargetPhraseCollectionRaw(const Phrase &source) const; void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase); - void CacheForCleanup(TargetPhraseCollection* tpc); + void CacheForCleanup(TargetPhraseCollection::shared_ptr tpc); void CleanUpAfterSentenceProcessing(const InputType &source); virtual ChartRuleLookupManager *CreateRuleLookupManager( diff --git a/moses/TranslationModel/PhraseDictionary.cpp b/moses/TranslationModel/PhraseDictionary.cpp index 23d645bb3..b61e6f5eb 100644 --- a/moses/TranslationModel/PhraseDictionary.cpp +++ b/moses/TranslationModel/PhraseDictionary.cpp @@ -35,14 +35,15 @@ namespace Moses { std::vector PhraseDictionary::s_staticColl; -CacheColl::~CacheColl() -{ - for (iterator iter = begin(); iter != end(); ++iter) { - std::pair &key = iter->second; - const TargetPhraseCollection *tps = key.first; - delete tps; - } -} +// CacheColl::~CacheColl() +// { +// // not needed any more since the switch to shared pointers +// // for (iterator iter = begin(); iter != end(); ++iter) { +// // std::pair &key = iter->second; +// // TargetPhraseCollection::shared_ptr tps = key.first; +// // delete tps; +// // } +// } PhraseDictionary::PhraseDictionary(const std::string &line, bool registerNow) : DecodeFeature(line, registerNow) @@ -60,9 +61,12 @@ ProvidesPrefixCheck() const return false; } -const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(const Phrase& src) const +TargetPhraseCollection::shared_ptr +PhraseDictionary:: +GetTargetPhraseCollectionLEGACY(const Phrase& src) const { - const TargetPhraseCollection *ret; + TargetPhraseCollection::shared_ptr ret; + typedef std::pair entry; if (m_maxCacheSize) { CacheColl &cache = GetCache(); @@ -74,18 +78,14 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY( if (iter == cache.end()) { // not in cache, need to look up from phrase table ret = GetTargetPhraseCollectionNonCacheLEGACY(src); - if (ret) { - ret = new TargetPhraseCollection(*ret); + if (ret) { // make a copy + ret.reset(new TargetPhraseCollection(*ret)); } - - std::pair value(ret, clock()); - cache[hash] = value; + cache[hash] = entry(ret, clock()); } else { // in cache. just use it - std::pair &value = iter->second; - value.second = clock(); - - ret = value.first; + iter->second.second = clock(); + ret = iter->second.first; } } else { // don't use cache. look up from phrase table @@ -95,7 +95,7 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY( return ret; } -TargetPhraseCollection const * +TargetPhraseCollection::shared_ptr PhraseDictionary:: GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const { @@ -103,7 +103,7 @@ GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const } -TargetPhraseCollectionWithSourcePhrase const* +TargetPhraseCollectionWithSourcePhrase::shared_ptr PhraseDictionary:: GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const { @@ -140,14 +140,14 @@ SetFeaturesToApply() } -// tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more -void -PhraseDictionary:: -Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const -{ - // do nothing by default - return; -} +// // tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more +// void +// PhraseDictionary:: +// Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const +// { +// // do nothing by default +// return; +// } bool PhraseDictionary:: @@ -170,7 +170,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const } const Phrase &phrase = inputPath.GetPhrase(); - const TargetPhraseCollection *targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase); + TargetPhraseCollection::shared_ptr targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase); inputPath.SetTargetPhrases(*this, targetPhrases, NULL); } } @@ -180,7 +180,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const //void PhraseDictionary::SaveCache() const //{ // CacheColl &cache = GetCache(); -// for( std::map >::iterator iter, +// for( std::map >::iterator iter, // iter != cache.end(), // iter++ ) { // @@ -191,10 +191,10 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const //void PhraseDictionary::LoadCache() const //{ // CacheColl &cache = GetCache(); -// std::map >::iterator iter; +// std::map >::iterator iter; // iter = cache.begin(); // while( iter != cache.end() ) { -// std::map >::iterator iterRemove = iter++; +// std::map >::iterator iterRemove = iter++; // delete iterRemove->second.first; // cache.erase(iterRemove); // } @@ -225,11 +225,12 @@ void PhraseDictionary::ReduceCache() const while( iter != cache.end() ) { if (iter->second.second < cutoffLastUsedTime) { CacheColl::iterator iterRemove = iter++; - delete iterRemove->second.first; + // delete iterRemove->second.first; cache.erase(iterRemove); } else iter++; } - VERBOSE(2,"Reduced persistent translation option cache in " << reduceCacheTime << " seconds." << std::endl); + VERBOSE(2,"Reduced persistent translation option cache in " + << reduceCacheTime << " seconds." << std::endl); } CacheColl &PhraseDictionary::GetCache() const @@ -265,8 +266,8 @@ bool PhraseDictionary::SatisfyBackoff(const InputPath &inputPath) const // lookup translation only if no other translations InputPath::TargetPhrases::const_iterator iter; for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) { - const std::pair &temp = iter->second; - const TargetPhraseCollection *tpCollPrev = temp.first; + const std::pair &temp = iter->second; + TargetPhraseCollection::shared_ptr tpCollPrev = temp.first; if (tpCollPrev && tpCollPrev->GetSize()) { // already have translation from another pt. Don't create translations diff --git a/moses/TranslationModel/PhraseDictionary.h b/moses/TranslationModel/PhraseDictionary.h index d44fea2e1..605c8af7e 100644 --- a/moses/TranslationModel/PhraseDictionary.h +++ b/moses/TranslationModel/PhraseDictionary.h @@ -55,15 +55,18 @@ class ChartCellCollectionBase; class ChartRuleLookupManager; class ChartParser; -class CacheColl : public boost::unordered_map > -{ -// 1st = hash of source phrase/ address of phrase-table node -// 2nd = all translations -// 3rd = time of last access +// typedef std::pair TPCollLastUse; +typedef std::pair CacheCollEntry; +typedef boost::unordered_map CacheColl; +// class CacheColl : public boost::unordered_map +// { +// // 1st = hash of source phrase/ address of phrase-table node +// // 2nd = all translations +// // 3rd = time of last access -public: - ~CacheColl(); -}; +// public: +// ~CacheColl(); +// }; /** * Abstract base class for phrase dictionaries (tables). @@ -95,9 +98,9 @@ public: return m_id; } - virtual - void - Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const; + // virtual + // void + // Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const; /// return true if phrase table entries starting with /phrase/ // exist in the table. @@ -111,24 +114,23 @@ public: //! find list of translations that can translates src. Only for phrase input public: - virtual - TargetPhraseCollection const * + virtual TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(const Phrase& src) const; - virtual - TargetPhraseCollection const * - GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const { + virtual TargetPhraseCollection::shared_ptr + GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, + Phrase const& src) const + { return GetTargetPhraseCollectionLEGACY(src); } - virtual - void + virtual void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; - virtual - void - GetTargetPhraseCollectionBatch(ttasksptr const& ttask, - const InputPathList &inputPathQueue) const { + virtual void + GetTargetPhraseCollectionBatch + (ttasksptr const& ttask, InputPathList const& inputPathQueue) const + { GetTargetPhraseCollectionBatch(inputPathQueue); } @@ -157,7 +159,9 @@ public: // LEGACY //! find list of translations that can translates a portion of src. Used by confusion network decoding - virtual const TargetPhraseCollectionWithSourcePhrase* GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const; + virtual + TargetPhraseCollectionWithSourcePhrase::shared_ptr + GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const; protected: static std::vector s_staticColl; @@ -184,7 +188,10 @@ protected: mutable boost::scoped_ptr m_cache; #endif - virtual const TargetPhraseCollection *GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const; + virtual + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const; + void ReduceCache() const; protected: diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp index 5fc569c90..11346f0b6 100644 --- a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp +++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp @@ -150,15 +150,15 @@ void PhraseDictionaryDynamicCacheBased::InitializeForInput(ttasksptr const& ttas ReduceCache(); } -const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const +TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const { #ifdef WITH_THREADS boost::shared_lock read_lock(m_cacheLock); #endif - TargetPhraseCollection* tpc = NULL; + TargetPhraseCollection::shared_ptr tpc; cacheMap::const_iterator it = m_cacheTM.find(source); if(it != m_cacheTM.end()) { - tpc = new TargetPhraseCollection(*(it->second).first); + tpc.reset(new TargetPhraseCollection(*(it->second).first)); std::vector::const_iterator it2 = tpc->begin(); @@ -174,15 +174,15 @@ const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhrase return tpc; } -const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionLEGACY(Phrase const &src) const +TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionLEGACY(Phrase const &src) const { - const TargetPhraseCollection *ret = GetTargetPhraseCollection(src); + TargetPhraseCollection::shared_ptr ret = GetTargetPhraseCollection(src); return ret; } -const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const +TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const { - const TargetPhraseCollection *ret = GetTargetPhraseCollection(src); + TargetPhraseCollection::shared_ptr ret = GetTargetPhraseCollection(src); return ret; } @@ -366,7 +366,7 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp) // and then add new entry TargetCollectionAgePair TgtCollAgePair = it->second; - TargetPhraseCollection* tpc = TgtCollAgePair.first; + TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first; AgeCollection* ac = TgtCollAgePair.second; const Phrase* p_ptr = NULL; TargetPhrase* tp_ptr = NULL; @@ -397,7 +397,7 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp) if (tpc->GetSize() == 0) { // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection ac->clear(); - delete tpc; + tpc.reset(); delete ac; m_cacheTM.erase(sp); } @@ -451,14 +451,14 @@ void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp) //sp is found TargetCollectionAgePair TgtCollAgePair = it->second; - TargetPhraseCollection* tpc = TgtCollAgePair.first; + TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first; AgeCollection* ac = TgtCollAgePair.second; m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection ac->clear(); - delete tpc; + tpc.reset(); delete ac; m_cacheTM.erase(sp); } else { @@ -558,7 +558,7 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a // and then add new entry TargetCollectionAgePair TgtCollAgePair = it->second; - TargetPhraseCollection* tpc = TgtCollAgePair.first; + TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first; AgeCollection* ac = TgtCollAgePair.second; // const TargetPhrase* p_ptr = NULL; const Phrase* p_ptr = NULL; @@ -599,7 +599,7 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a // create target collection // we have to create new target collection age pair and add new entry to target collection age pair - TargetPhraseCollection* tpc = new TargetPhraseCollection(); + TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection); AgeCollection* ac = new AgeCollection(); m_cacheTM.insert(make_pair(sp,make_pair(tpc,ac))); @@ -629,13 +629,13 @@ void PhraseDictionaryDynamicCacheBased::Decay() void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) { VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) sp:|" << sp << "|" << std::endl); - cacheMap::const_iterator it = m_cacheTM.find(sp); + cacheMap::iterator it = m_cacheTM.find(sp); if (it != m_cacheTM.end()) { VERBOSE(3,"found:|" << sp << "|" << std::endl); //sp is found TargetCollectionAgePair TgtCollAgePair = it->second; - TargetPhraseCollection* tpc = TgtCollAgePair.first; + TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first; AgeCollection* ac = TgtCollAgePair.second; //loop in inverted order to allow a correct deletion of std::vectors tpc and ac @@ -661,7 +661,7 @@ void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection (((*it).second).second)->clear(); delete ((*it).second).second; - delete ((*it).second).first; + ((*it).second).first.reset(); m_cacheTM.erase(sp); } } else { @@ -703,11 +703,11 @@ void PhraseDictionaryDynamicCacheBased::Clear() #ifdef WITH_THREADS boost::shared_lock lock(m_cacheLock); #endif - cacheMap::const_iterator it; + cacheMap::iterator it; for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) { (((*it).second).second)->clear(); delete ((*it).second).second; - delete ((*it).second).first; + ((*it).second).first.reset(); } m_cacheTM.clear(); m_entries = 0; @@ -746,7 +746,7 @@ void PhraseDictionaryDynamicCacheBased::Print() const cacheMap::const_iterator it; for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) { std::string source = (it->first).ToString(); - TargetPhraseCollection* tpc = (it->second).first; + TargetPhraseCollection::shared_ptr tpc = (it->second).first; TargetPhraseCollection::iterator itr; for(itr = tpc->begin(); itr != tpc->end(); itr++) { std::string target = (*itr)->ToString(); diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h index 8aee16051..4a9508be3 100644 --- a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h +++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h @@ -53,7 +53,7 @@ class PhraseDictionaryDynamicCacheBased : public PhraseDictionary { typedef std::vector AgeCollection; - typedef std::pair TargetCollectionAgePair; + typedef std::pair TargetCollectionAgePair; typedef std::map cacheMap; // data structure for the cache @@ -111,9 +111,14 @@ public: void Load(); void Load(const std::string files); - const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &src) const; - const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(Phrase const &src) const; - const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const; + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollection(const Phrase &src) const; + + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollectionLEGACY(Phrase const &src) const; + + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const; // for phrase-based model // void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; diff --git a/moses/TranslationModel/PhraseDictionaryGroup.cpp b/moses/TranslationModel/PhraseDictionaryGroup.cpp index 99429228d..216dbf649 100644 --- a/moses/TranslationModel/PhraseDictionaryGroup.cpp +++ b/moses/TranslationModel/PhraseDictionaryGroup.cpp @@ -86,29 +86,32 @@ void PhraseDictionaryGroup::GetTargetPhraseCollectionBatch( // Look up each input in each model BOOST_FOREACH(InputPath* inputPath, inputPathQueue) { const Phrase &phrase = inputPath->GetPhrase(); - const TargetPhraseCollection* targetPhrases = + TargetPhraseCollection::shared_ptr targetPhrases = this->GetTargetPhraseCollectionLEGACY(ttask, phrase); inputPath->SetTargetPhrases(*this, targetPhrases, NULL); } } -const TargetPhraseCollection* PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY( +TargetPhraseCollection::shared_ptr PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY( const Phrase& src) const { UTIL_THROW2("Don't call me without the translation task."); } -const TargetPhraseCollection* PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY( - const ttasksptr& ttask, const Phrase& src) const +TargetPhraseCollection::shared_ptr +PhraseDictionaryGroup:: +GetTargetPhraseCollectionLEGACY(const ttasksptr& ttask, const Phrase& src) const { - TargetPhraseCollection* ret = CreateTargetPhraseCollection(ttask, src); + TargetPhraseCollection::shared_ptr ret + = CreateTargetPhraseCollection(ttask, src); ret->NthElement(m_tableLimit); // sort the phrases for pruning later const_cast(this)->CacheForCleanup(ret); return ret; } -TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection( - const ttasksptr& ttask, const Phrase& src) const +TargetPhraseCollection::shared_ptr +PhraseDictionaryGroup:: +CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const { // Aggregation of phrases and the scores that will be applied to them vector allPhrases; @@ -121,8 +124,8 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection( // Collect phrases from this table const PhraseDictionary& pd = *m_memberPDs[i]; - const TargetPhraseCollection* ret_raw = pd.GetTargetPhraseCollectionLEGACY( - ttask, src); + TargetPhraseCollection::shared_ptr + ret_raw = pd.GetTargetPhraseCollectionLEGACY(ttask, src); if (ret_raw != NULL) { // Process each phrase from table @@ -162,7 +165,7 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection( } // Apply scores to phrases and add them to return collection - TargetPhraseCollection* ret = new TargetPhraseCollection(); + TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection); const vector pd_feature_const(m_pdFeature); BOOST_FOREACH(TargetPhrase* phrase, allPhrases) { phrase->GetScoreBreakdown().Assign(this, allScores.find(phrase)->second); @@ -174,29 +177,33 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection( return ret; } -ChartRuleLookupManager *PhraseDictionaryGroup::CreateRuleLookupManager( - const ChartParser &, const ChartCellCollectionBase&, size_t) +ChartRuleLookupManager* +PhraseDictionaryGroup:: +CreateRuleLookupManager(const ChartParser &, + const ChartCellCollectionBase&, size_t) { UTIL_THROW(util::Exception, "Phrase table used in chart decoder"); } //copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence -void PhraseDictionaryGroup::CacheForCleanup(TargetPhraseCollection* tpc) +void PhraseDictionaryGroup::CacheForCleanup(TargetPhraseCollection::shared_ptr tpc) { PhraseCache &ref = GetPhraseCache(); ref.push_back(tpc); } -void PhraseDictionaryGroup::CleanUpAfterSentenceProcessing( - const InputType &source) +void +PhraseDictionaryGroup:: +CleanUpAfterSentenceProcessing(const InputType &source) { - PhraseCache &ref = GetPhraseCache(); - for (PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) { - delete *it; - } + GetPhraseCache().clear(); + // PhraseCache &ref = GetPhraseCache(); + // for (PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) { + // delete *it; + // } - PhraseCache temp; - temp.swap(ref); + // PhraseCache temp; + // temp.swap(ref); CleanUpComponentModels(source); } diff --git a/moses/TranslationModel/PhraseDictionaryGroup.h b/moses/TranslationModel/PhraseDictionaryGroup.h index 85e03e12b..390e20def 100644 --- a/moses/TranslationModel/PhraseDictionaryGroup.h +++ b/moses/TranslationModel/PhraseDictionaryGroup.h @@ -43,19 +43,20 @@ class PhraseDictionaryGroup: public PhraseDictionary public: PhraseDictionaryGroup(const std::string& line); void Load(); - TargetPhraseCollection* CreateTargetPhraseCollection(const ttasksptr& ttask, + TargetPhraseCollection::shared_ptr + CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const; std::vector > getWeights(size_t numWeights, bool normalize) const; - void CacheForCleanup(TargetPhraseCollection* tpc); + void CacheForCleanup(TargetPhraseCollection::shared_ptr tpc); void CleanUpAfterSentenceProcessing(const InputType& source); void CleanUpComponentModels(const InputType& source); // functions below override the base class void GetTargetPhraseCollectionBatch(const ttasksptr& ttask, const InputPathList &inputPathQueue) const; - const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY( + TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY( const Phrase& src) const; - const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY( + TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY( const ttasksptr& ttask, const Phrase& src) const; void InitializeForInput(ttasksptr const& ttask) { /* Don't do anything source specific here as this object is shared between threads.*/ @@ -71,7 +72,7 @@ protected: bool m_restrict; std::vector m_pdFeature; - typedef std::vector PhraseCache; + typedef std::vector PhraseCache; #ifdef WITH_THREADS boost::shared_mutex m_lock_cache; typedef std::map SentenceCache; diff --git a/moses/TranslationModel/PhraseDictionaryMemory.cpp b/moses/TranslationModel/PhraseDictionaryMemory.cpp index 1724748bd..a7bd96d99 100644 --- a/moses/TranslationModel/PhraseDictionaryMemory.cpp +++ b/moses/TranslationModel/PhraseDictionaryMemory.cpp @@ -49,16 +49,17 @@ PhraseDictionaryMemory::PhraseDictionaryMemory(const std::string &line) } -TargetPhraseCollection &PhraseDictionaryMemory::GetOrCreateTargetPhraseCollection( - const Phrase &source - , const TargetPhrase &target - , const Word *sourceLHS) +TargetPhraseCollection::shared_ptr +PhraseDictionaryMemory:: +GetOrCreateTargetPhraseCollection(const Phrase &source, + const TargetPhrase &target, + const Word *sourceLHS) { PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(source, target, sourceLHS); return currNode.GetTargetPhraseCollection(); } -const TargetPhraseCollection* +TargetPhraseCollection::shared_ptr PhraseDictionaryMemory:: GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const { @@ -73,10 +74,10 @@ GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const const Word& word = source.GetWord(pos); currNode = currNode->GetChild(word); if (currNode == NULL) - return NULL; + return TargetPhraseCollection::shared_ptr(); } - return &currNode->GetTargetPhraseCollection(); + return currNode->GetTargetPhraseCollection(); } PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase &source @@ -168,12 +169,11 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const lastWord.OnlyTheseFactors(m_inputFactors); const PhraseDictionaryNodeMemory *ptNode = prevPtNode->GetChild(lastWord); + TargetPhraseCollection::shared_ptr targetPhrases; if (ptNode) { - const TargetPhraseCollection &targetPhrases = ptNode->GetTargetPhraseCollection(); - inputPath.SetTargetPhrases(*this, &targetPhrases, ptNode); - } else { - inputPath.SetTargetPhrases(*this, NULL, NULL); - } + targetPhrases = ptNode->GetTargetPhraseCollection(); + } + inputPath.SetTargetPhrases(*this, targetPhrases, ptNode); } } } diff --git a/moses/TranslationModel/PhraseDictionaryMemory.h b/moses/TranslationModel/PhraseDictionaryMemory.h index 723beaea2..1e32f2448 100644 --- a/moses/TranslationModel/PhraseDictionaryMemory.h +++ b/moses/TranslationModel/PhraseDictionaryMemory.h @@ -56,19 +56,23 @@ public: std::size_t); // only used by multi-model phrase table, and other meta-features - const TargetPhraseCollection *GetTargetPhraseCollectionLEGACY(const Phrase& src) const; - void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollectionLEGACY(const Phrase& src) const; + + void + GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; TO_STRING(); protected: - TargetPhraseCollection &GetOrCreateTargetPhraseCollection( - const Phrase &source, const TargetPhrase &target, const Word *sourceLHS); - - PhraseDictionaryNodeMemory &GetOrCreateNode(const Phrase &source - , const TargetPhrase &target - , const Word *sourceLHS); + TargetPhraseCollection::shared_ptr + GetOrCreateTargetPhraseCollection + (const Phrase &source, const TargetPhrase &target, const Word *sourceLHS); + PhraseDictionaryNodeMemory & + GetOrCreateNode(const Phrase &source, const TargetPhrase &target, + const Word *sourceLHS); + void SortAndPrune(); PhraseDictionaryNodeMemory m_collection; diff --git a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp index e3ccaaf65..7384020fb 100644 --- a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp +++ b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp @@ -26,8 +26,10 @@ using namespace std; namespace Moses { -PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line) - :PhraseDictionary(line, true) + +PhraseDictionaryMultiModel:: +PhraseDictionaryMultiModel(const std::string &line) + : PhraseDictionary(line, true) { ReadParameters(); @@ -45,7 +47,8 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line) } } -PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::string &line) +PhraseDictionaryMultiModel:: +PhraseDictionaryMultiModel(int type, const std::string &line) :PhraseDictionary(line, true) { if (type == 1) { @@ -56,7 +59,9 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::stri } } -void PhraseDictionaryMultiModel::SetParameter(const std::string& key, const std::string& value) +void +PhraseDictionaryMultiModel:: +SetParameter(const std::string& key, const std::string& value) { if (key == "mode") { m_mode = value; @@ -70,9 +75,9 @@ void PhraseDictionaryMultiModel::SetParameter(const std::string& key, const std: } } -PhraseDictionaryMultiModel::~PhraseDictionaryMultiModel() -{ -} +PhraseDictionaryMultiModel:: +~PhraseDictionaryMultiModel() +{ } void PhraseDictionaryMultiModel::Load() { @@ -88,18 +93,21 @@ void PhraseDictionaryMultiModel::Load() } } - -const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollectionLEGACY(const Phrase& src) const +TargetPhraseCollection::shared_ptr +PhraseDictionaryMultiModel:: +GetTargetPhraseCollectionLEGACY(const Phrase& src) const { - std::vector > multimodelweights = getWeights(m_numScoreComponents, true); - TargetPhraseCollection *ret = NULL; + std::vector > multimodelweights; + multimodelweights = getWeights(m_numScoreComponents, true); + TargetPhraseCollection::shared_ptr ret; - std::map* allStats = new(std::map); + std::map* allStats; + allStats = new(std::map); CollectSufficientStatistics(src, allStats); ret = CreateTargetPhraseCollectionLinearInterpolation(src, allStats, multimodelweights); RemoveAllInMap(*allStats); - delete allStats; + delete allStats; // ??? Why the detour through malloc? UG ret->NthElement(m_tableLimit); // sort the phrases for pruning later const_cast(this)->CacheForCleanup(ret); @@ -107,16 +115,19 @@ const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollect return ret; } - -void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src, std::map* allStats) const +void +PhraseDictionaryMultiModel:: +CollectSufficientStatistics +(const Phrase& src, std::map* allStats) const { for(size_t i = 0; i < m_numModels; ++i) { const PhraseDictionary &pd = *m_pd[i]; - TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollectionLEGACY( src); + TargetPhraseCollection::shared_ptr ret_raw; + ret_raw = pd.GetTargetPhraseCollectionLEGACY(src); if (ret_raw != NULL) { - TargetPhraseCollection::iterator iterTargetPhrase, iterLast; + TargetPhraseCollection::const_iterator iterTargetPhrase, iterLast; if (m_tableLimit != 0 && ret_raw->GetSize() > m_tableLimit) { iterLast = ret_raw->begin() + m_tableLimit; } else { @@ -130,7 +141,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src, std::string targetString = targetPhrase->GetStringRep(m_output); if (allStats->find(targetString) == allStats->end()) { - multiModelStatistics * statistics = new multiModelStatistics; + multiModelStats * statistics = new multiModelStats; statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info statistics->p.resize(m_numScoreComponents); for(size_t j = 0; j < m_numScoreComponents; ++j) { @@ -149,7 +160,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src, (*allStats)[targetString] = statistics; } - multiModelStatistics * statistics = (*allStats)[targetString]; + multiModelStats * statistics = (*allStats)[targetString]; for(size_t j = 0; j < m_numScoreComponents; ++j) { statistics->p[j][i] = UntransformScore(raw_scores[j]); @@ -161,12 +172,17 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src, } } -TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollectionLinearInterpolation(const Phrase& src, std::map* allStats, std::vector > &multimodelweights) const +TargetPhraseCollection::shared_ptr +PhraseDictionaryMultiModel:: +CreateTargetPhraseCollectionLinearInterpolation +( const Phrase& src, + std::map* allStats, + std::vector > &multimodelweights) const { - TargetPhraseCollection *ret = new TargetPhraseCollection(); - for ( std::map< std::string, multiModelStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) { + TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection); + for ( std::map< std::string, multiModelStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) { - multiModelStatistics * statistics = iter->second; + multiModelStats * statistics = iter->second; Scores scoreVector(m_numScoreComponents); @@ -188,7 +204,9 @@ TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollection } //TODO: is it worth caching the results as long as weights don't change? -std::vector > PhraseDictionaryMultiModel::getWeights(size_t numWeights, bool normalize) const +std::vector > +PhraseDictionaryMultiModel:: +getWeights(size_t numWeights, bool normalize) const { const std::vector* weights_ptr; std::vector raw_weights; @@ -237,7 +255,9 @@ std::vector > PhraseDictionaryMultiModel::getWeights(size_t n return multimodelweights; } -std::vector PhraseDictionaryMultiModel::normalizeWeights(std::vector &weights) const +std::vector +PhraseDictionaryMultiModel:: +normalizeWeights(std::vector &weights) const { std::vector ret (m_numModels); float total = std::accumulate(weights.begin(),weights.end(),0.0); @@ -248,29 +268,36 @@ std::vector PhraseDictionaryMultiModel::normalizeWeights(std::vectorreset(); + // } - PhraseCache temp; - temp.swap(ref); + // PhraseCache temp; + // temp.swap(ref); + GetPhraseCache().clear(); CleanUpComponentModels(source); @@ -279,14 +306,18 @@ void PhraseDictionaryMultiModel::CleanUpAfterSentenceProcessing(const InputType } -void PhraseDictionaryMultiModel::CleanUpComponentModels(const InputType &source) +void +PhraseDictionaryMultiModel:: +CleanUpComponentModels(const InputType &source) { for(size_t i = 0; i < m_numModels; ++i) { m_pd[i]->CleanUpAfterSentenceProcessing(source); } } -const std::vector* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeightsVector() const +const std::vector* +PhraseDictionaryMultiModel:: +GetTemporaryMultiModelWeightsVector() const { #ifdef WITH_THREADS boost::shared_lock read_lock(m_lock_weights); @@ -300,7 +331,9 @@ const std::vector* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeig #endif } -void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector weights) +void +PhraseDictionaryMultiModel:: +SetTemporaryMultiModelWeightsVector(std::vector weights) { #ifdef WITH_THREADS boost::unique_lock lock(m_lock_weights); @@ -311,7 +344,9 @@ void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector } #ifdef WITH_DLIB -vector PhraseDictionaryMultiModel::MinimizePerplexity(vector > &phrase_pair_vector) +vector +PhraseDictionaryMultiModel:: +MinimizePerplexity(vector > &phrase_pair_vector) { map, size_t> phrase_pair_map; @@ -320,7 +355,7 @@ vector PhraseDictionaryMultiModel::MinimizePerplexity(vector optimizerStats; + vector optimizerStats; for ( map, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) { @@ -329,7 +364,7 @@ vector PhraseDictionaryMultiModel::MinimizePerplexity(vector fs(m_numModels); - map* allStats = new(map); + map* allStats = new(map); Phrase sourcePhrase(0); sourcePhrase.CreateFromString(Input, m_input, source_string, NULL); @@ -343,7 +378,7 @@ vector PhraseDictionaryMultiModel::MinimizePerplexity(vectortargetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase); targetStatistics->p = (*allStats)[target_string]->p; targetStatistics->f = iter->second; @@ -383,7 +418,9 @@ vector PhraseDictionaryMultiModel::MinimizePerplexity(vector PhraseDictionaryMultiModel::Optimize(OptimizationObjective *ObjectiveFunction, size_t numModels) +vector +PhraseDictionaryMultiModel:: +Optimize(OptimizationObjective *ObjectiveFunction, size_t numModels) { dlib::matrix starting_point; @@ -428,8 +465,8 @@ double CrossEntropy::operator() ( const dlib::matrix& arg) const weight_vector = m_model->normalizeWeights(weight_vector); } - for ( std::vector::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) { - multiModelStatisticsOptimization* statistics = *iter; + for ( std::vector::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) { + multiModelStatsOptimization* statistics = *iter; size_t f = statistics->f; double score; diff --git a/moses/TranslationModel/PhraseDictionaryMultiModel.h b/moses/TranslationModel/PhraseDictionaryMultiModel.h index b07c05afc..eb7467d3c 100644 --- a/moses/TranslationModel/PhraseDictionaryMultiModel.h +++ b/moses/TranslationModel/PhraseDictionaryMultiModel.h @@ -36,15 +36,15 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA namespace Moses { -struct multiModelStatistics { +struct multiModelStats { TargetPhrase *targetPhrase; std::vector > p; - ~multiModelStatistics() { + ~multiModelStats() { delete targetPhrase; }; }; -struct multiModelStatisticsOptimization: multiModelStatistics { +struct multiModelStatsOptimization: multiModelStats { size_t f; }; @@ -71,27 +71,59 @@ public: PhraseDictionaryMultiModel(int type, const std::string &line); ~PhraseDictionaryMultiModel(); void Load(); - virtual void CollectSufficientStatistics(const Phrase& src, std::map* allStats) const; - virtual TargetPhraseCollection* CreateTargetPhraseCollectionLinearInterpolation(const Phrase& src, std::map* allStats, std::vector > &multimodelweights) const; - std::vector > getWeights(size_t numWeights, bool normalize) const; - std::vector normalizeWeights(std::vector &weights) const; - void CacheForCleanup(TargetPhraseCollection* tpc); - void CleanUpAfterSentenceProcessing(const InputType &source); - virtual void CleanUpComponentModels(const InputType &source); + + virtual void + CollectSufficientStatistics + (const Phrase& src, std::map* allStats) + const; + + virtual TargetPhraseCollection::shared_ptr + CreateTargetPhraseCollectionLinearInterpolation + (const Phrase& src, std::map* allStats, + std::vector > &multimodelweights) const; + + std::vector > + getWeights(size_t numWeights, bool normalize) const; + + std::vector + normalizeWeights(std::vector &weights) const; + + void + CacheForCleanup(TargetPhraseCollection::shared_ptr tpc); + + void + CleanUpAfterSentenceProcessing(const InputType &source); + + virtual void + CleanUpComponentModels(const InputType &source); + #ifdef WITH_DLIB virtual std::vector MinimizePerplexity(std::vector > &phrase_pair_vector); std::vector Optimize(OptimizationObjective * ObjectiveFunction, size_t numModels); #endif - // functions below required by base class - virtual const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const; - virtual void InitializeForInput(ttasksptr const& ttask) { - /* Don't do anything source specific here as this object is shared between threads.*/ - } - ChartRuleLookupManager *CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&, std::size_t); - void SetParameter(const std::string& key, const std::string& value); - const std::vector* GetTemporaryMultiModelWeightsVector() const; - void SetTemporaryMultiModelWeightsVector(std::vector weights); + // functions below required by base class + virtual TargetPhraseCollection::shared_ptr + GetTargetPhraseCollectionLEGACY(const Phrase& src) const; + + virtual void + InitializeForInput(ttasksptr const& ttask) { + // Don't do anything source specific here as this object is shared + // between threads. + } + + ChartRuleLookupManager* + CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&, + std::size_t); + + void + SetParameter(const std::string& key, const std::string& value); + + const std::vector* + GetTemporaryMultiModelWeightsVector() const; + + void + SetTemporaryMultiModelWeightsVector(std::vector weights); protected: std::string m_mode; @@ -100,7 +132,7 @@ protected: size_t m_numModels; std::vector m_multimodelweights; - typedef std::vector PhraseCache; + typedef std::vector PhraseCache; #ifdef WITH_THREADS boost::shared_mutex m_lock_cache; typedef std::map SentenceCache; @@ -146,7 +178,7 @@ class CrossEntropy: public OptimizationObjective public: CrossEntropy ( - std::vector &optimizerStats, + std::vector &optimizerStats, PhraseDictionaryMultiModel * model, size_t iFeature ) { @@ -158,7 +190,7 @@ public: double operator() ( const dlib::matrix& arg) const; protected: - std::vector m_optimizerStats; + std::vector m_optimizerStats; PhraseDictionaryMultiModel * m_model; size_t m_iFeature; }; diff --git a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp index 769e6410f..4c61f16db 100644 --- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp +++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp @@ -120,7 +120,7 @@ void PhraseDictionaryMultiModelCounts::Load() } -const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseCollectionLEGACY(const Phrase& src) const +TargetPhraseCollection::shared_ptr PhraseDictionaryMultiModelCounts::GetTargetPhraseCollectionLEGACY(const Phrase& src) const { vector > multimodelweights; bool normalize; @@ -130,11 +130,12 @@ const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseC //source phrase frequency is shared among all phrase pairs vector fs(m_numModels); - map* allStats = new(map); + map* allStats = new(map); - CollectSufficientStatistics(src, fs, allStats); + CollectSufficientStats(src, fs, allStats); - TargetPhraseCollection *ret = CreateTargetPhraseCollectionCounts(src, fs, allStats, multimodelweights); + TargetPhraseCollection::shared_ptr ret + = CreateTargetPhraseCollectionCounts(src, fs, allStats, multimodelweights); ret->NthElement(m_tableLimit); // sort the phrases for pruning later const_cast(this)->CacheForCleanup(ret); @@ -142,16 +143,17 @@ const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseC } -void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase& src, vector &fs, map* allStats) const +void PhraseDictionaryMultiModelCounts::CollectSufficientStats(const Phrase& src, vector &fs, map* allStats) const //fill fs and allStats with statistics from models { for(size_t i = 0; i < m_numModels; ++i) { const PhraseDictionary &pd = *m_pd[i]; - TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollectionLEGACY( src); + TargetPhraseCollection::shared_ptr ret_raw + = pd.GetTargetPhraseCollectionLEGACY(src); if (ret_raw != NULL) { - TargetPhraseCollection::iterator iterTargetPhrase; + TargetPhraseCollection::const_iterator iterTargetPhrase; for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != ret_raw->end(); ++iterTargetPhrase) { const TargetPhrase * targetPhrase = *iterTargetPhrase; @@ -160,7 +162,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase& string targetString = targetPhrase->GetStringRep(m_output); if (allStats->find(targetString) == allStats->end()) { - multiModelCountsStatistics * statistics = new multiModelCountsStatistics; + multiModelCountsStats * statistics = new multiModelCountsStats; statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info //correct future cost estimates and total score @@ -178,7 +180,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase& (*allStats)[targetString] = statistics; } - multiModelCountsStatistics * statistics = (*allStats)[targetString]; + multiModelCountsStats * statistics = (*allStats)[targetString]; statistics->fst[i] = UntransformScore(raw_scores[0]); statistics->ft[i] = UntransformScore(raw_scores[1]); @@ -189,8 +191,8 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase& } // get target phrase frequency for models which have not seen the phrase pair - for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) { - multiModelCountsStatistics * statistics = iter->second; + for ( map< string, multiModelCountsStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) { + multiModelCountsStats * statistics = iter->second; for (size_t i = 0; i < m_numModels; ++i) { if (!statistics->ft[i]) { @@ -200,12 +202,14 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase& } } -TargetPhraseCollection* PhraseDictionaryMultiModelCounts::CreateTargetPhraseCollectionCounts(const Phrase &src, vector &fs, map* allStats, vector > &multimodelweights) const + TargetPhraseCollection::shared_ptr +PhraseDictionaryMultiModelCounts:: +CreateTargetPhraseCollectionCounts(const Phrase &src, vector &fs, map* allStats, vector > &multimodelweights) const { - TargetPhraseCollection *ret = new TargetPhraseCollection(); - for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) { + TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection); + for ( map< string, multiModelCountsStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) { - multiModelCountsStatistics * statistics = iter->second; + multiModelCountsStats * statistics = iter->second; if (statistics->targetPhrase->GetAlignTerm().GetSize() == 0) { UTIL_THROW(util::Exception, " alignment information empty\ncount-tables need to include alignment information for computation of lexical weights.\nUse --phrase-word-alignment during training; for on-disk tables, also set -alignment-info when creating on-disk tables."); @@ -248,7 +252,7 @@ float PhraseDictionaryMultiModelCounts::GetTargetCount(const Phrase &target, siz { const PhraseDictionary &pd = *m_inverse_pd[modelIndex]; - const TargetPhraseCollection *ret_raw = pd.GetTargetPhraseCollectionLEGACY(target); + TargetPhraseCollection::shared_ptr ret_raw = pd.GetTargetPhraseCollectionLEGACY(target); // in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score) if (ret_raw && ret_raw->GetSize() > 0) { @@ -320,7 +324,7 @@ double PhraseDictionaryMultiModelCounts::ComputeWeightedLexicalTranslation( cons } -lexicalCache PhraseDictionaryMultiModelCounts::CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector &tables, bool is_input ) +lexicalCache PhraseDictionaryMultiModelCounts::CacheLexicalStats( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector &tables, bool is_input ) { //do all the necessary lexical table lookups and get counts, but don't apply weights yet @@ -474,7 +478,7 @@ vector PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector optimizerStats; + vector optimizerStats; for ( map, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) { @@ -483,12 +487,12 @@ vector PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector fs(m_numModels); - map* allStats = new(map); + map* allStats = new(map); Phrase sourcePhrase(0); sourcePhrase.CreateFromString(Input, m_input, source_string, NULL); - CollectSufficientStatistics(sourcePhrase, fs, allStats); //optimization potential: only call this once per source phrase + CollectSufficientStats(sourcePhrase, fs, allStats); //optimization potential: only call this once per source phrase //phrase pair not found; leave cache empty if (allStats->find(target_string) == allStats->end()) { @@ -497,19 +501,19 @@ vector PhraseDictionaryMultiModelCounts::MinimizePerplexity(vectortargetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase); - targetStatistics->fs = fs; - targetStatistics->fst = (*allStats)[target_string]->fst; - targetStatistics->ft = (*allStats)[target_string]->ft; - targetStatistics->f = iter->second; + multiModelCountsStatsOptimization * targetStats = new multiModelCountsStatsOptimization(); + targetStats->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase); + targetStats->fs = fs; + targetStats->fst = (*allStats)[target_string]->fst; + targetStats->ft = (*allStats)[target_string]->ft; + targetStats->f = iter->second; try { - pair >, vector< set > > alignment = GetAlignmentsForLexWeights(sourcePhrase, static_cast(*targetStatistics->targetPhrase), targetStatistics->targetPhrase->GetAlignTerm()); - targetStatistics->lexCachee2f = CacheLexicalStatistics(static_cast(*targetStatistics->targetPhrase), sourcePhrase, alignment.second, m_lexTable_e2f, false ); - targetStatistics->lexCachef2e = CacheLexicalStatistics(sourcePhrase, static_cast(*targetStatistics->targetPhrase), alignment.first, m_lexTable_f2e, true ); + pair >, vector< set > > alignment = GetAlignmentsForLexWeights(sourcePhrase, static_cast(*targetStats->targetPhrase), targetStats->targetPhrase->GetAlignTerm()); + targetStats->lexCachee2f = CacheLexicalStats(static_cast(*targetStats->targetPhrase), sourcePhrase, alignment.second, m_lexTable_e2f, false ); + targetStats->lexCachef2e = CacheLexicalStats(sourcePhrase, static_cast(*targetStats->targetPhrase), alignment.first, m_lexTable_f2e, true ); - optimizerStats.push_back(targetStatistics); + optimizerStats.push_back(targetStats); } catch (AlignmentException& e) {} RemoveAllInMap(*allStats); @@ -561,8 +565,8 @@ double CrossEntropyCounts::operator() ( const dlib::matrix& arg) con weight_vector = m_model->normalizeWeights(weight_vector); } - for ( std::vector::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) { - multiModelCountsStatisticsOptimization* statistics = *iter; + for ( std::vector::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) { + multiModelCountsStatsOptimization* statistics = *iter; size_t f = statistics->f; double score; diff --git a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h index 7e4f32f30..5f59d826b 100644 --- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h +++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h @@ -37,11 +37,11 @@ typedef boost::unordered_map lexicalMapJoint; typedef std::pair, std::vector > lexicalPair; typedef std::vector > lexicalCache; -struct multiModelCountsStatistics : multiModelStatistics { +struct multiModelCountsStats : multiModelStats { std::vector fst, ft; }; -struct multiModelCountsStatisticsOptimization: multiModelCountsStatistics { +struct multiModelCountsStatsOptimization: multiModelCountsStats { std::vector fs; lexicalCache lexCachee2f, lexCachef2e; size_t f; @@ -80,18 +80,18 @@ public: PhraseDictionaryMultiModelCounts(const std::string &line); ~PhraseDictionaryMultiModelCounts(); void Load(); - TargetPhraseCollection* CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector &fs, std::map* allStats, std::vector > &multimodelweights) const; - void CollectSufficientStatistics(const Phrase &src, std::vector &fs, std::map* allStats) const; + TargetPhraseCollection::shared_ptr CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector &fs, std::map* allStats, std::vector > &multimodelweights) const; + void CollectSufficientStats(const Phrase &src, std::vector &fs, std::map* allStats) const; float GetTargetCount(const Phrase& target, size_t modelIndex) const; double GetLexicalProbability( Word &inner, Word &outer, const std::vector &tables, std::vector &multimodelweights ) const; double ComputeWeightedLexicalTranslation( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector &tables, std::vector &multimodelweights, bool is_input ) const; double ComputeWeightedLexicalTranslationFromCache( std::vector, std::vector > > > &cache, std::vector &weights ) const; std::pair GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const; - std::vector, std::vector > > > CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector &tables, bool is_input ); + std::vector, std::vector > > > CacheLexicalStats( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector &tables, bool is_input ); void FillLexicalCountsJoint(Word &wordS, Word &wordT, std::vector &count, const std::vector &tables) const; void FillLexicalCountsMarginal(Word &wordS, std::vector &count, const std::vector &tables) const; void LoadLexicalTable( std::string &fileName, lexicalTable* ltable); - const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const; + TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(const Phrase& src) const; #ifdef WITH_DLIB std::vector MinimizePerplexity(std::vector > &phrase_pair_vector); #endif @@ -117,7 +117,7 @@ class CrossEntropyCounts: public OptimizationObjective public: CrossEntropyCounts ( - std::vector &optimizerStats, + std::vector &optimizerStats, PhraseDictionaryMultiModelCounts * model, size_t iFeature ) { @@ -129,7 +129,7 @@ public: double operator() ( const dlib::matrix& arg) const; private: - std::vector m_optimizerStats; + std::vector m_optimizerStats; PhraseDictionaryMultiModelCounts * m_model; size_t m_iFeature; }; diff --git a/moses/TranslationModel/PhraseDictionaryNodeMemory.cpp b/moses/TranslationModel/PhraseDictionaryNodeMemory.cpp index 84639a737..0c562d4c1 100644 --- a/moses/TranslationModel/PhraseDictionaryNodeMemory.cpp +++ b/moses/TranslationModel/PhraseDictionaryNodeMemory.cpp @@ -39,7 +39,7 @@ void PhraseDictionaryNodeMemory::Prune(size_t tableLimit) } // prune TargetPhraseCollection in this node - m_targetPhraseCollection.Prune(true, tableLimit); + m_targetPhraseCollection->Prune(true, tableLimit); } void PhraseDictionaryNodeMemory::Sort(size_t tableLimit) @@ -53,10 +53,11 @@ void PhraseDictionaryNodeMemory::Sort(size_t tableLimit) } // prune TargetPhraseCollection in this node - m_targetPhraseCollection.Sort(true, tableLimit); + m_targetPhraseCollection->Sort(true, tableLimit); } -PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateChild(const Word &sourceTerm) +PhraseDictionaryNodeMemory* +PhraseDictionaryNodeMemory::GetOrCreateChild(const Word &sourceTerm) { return &m_sourceTermMap[sourceTerm]; } @@ -118,7 +119,7 @@ void PhraseDictionaryNodeMemory::Remove() { m_sourceTermMap.clear(); m_nonTermMap.clear(); - m_targetPhraseCollection.Remove(); + m_targetPhraseCollection->Remove(); } std::ostream& operator<<(std::ostream &out, const PhraseDictionaryNodeMemory &node) diff --git a/moses/TranslationModel/PhraseDictionaryNodeMemory.h b/moses/TranslationModel/PhraseDictionaryNodeMemory.h index 950838ae3..29defe71f 100644 --- a/moses/TranslationModel/PhraseDictionaryNodeMemory.h +++ b/moses/TranslationModel/PhraseDictionaryNodeMemory.h @@ -130,12 +130,13 @@ private: TerminalMap m_sourceTermMap; NonTerminalMap m_nonTermMap; - TargetPhraseCollection m_targetPhraseCollection; + TargetPhraseCollection::shared_ptr m_targetPhraseCollection; public: - PhraseDictionaryNodeMemory() {} - + PhraseDictionaryNodeMemory() + : m_targetPhraseCollection(new TargetPhraseCollection) { } + bool IsLeaf() const { return m_sourceTermMap.empty() && m_nonTermMap.empty(); } @@ -152,10 +153,12 @@ public: const PhraseDictionaryNodeMemory *GetChild(const Word &sourceNonTerm, const Word &targetNonTerm) const; #endif - const TargetPhraseCollection &GetTargetPhraseCollection() const { + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollection() const { return m_targetPhraseCollection; } - TargetPhraseCollection &GetTargetPhraseCollection() { + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollection() { return m_targetPhraseCollection; } diff --git a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp index 03b69d0ad..6a72b265f 100644 --- a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp +++ b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp @@ -54,7 +54,9 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const Input } } -void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &inputPath) const +void +PhraseDictionaryTransliteration:: +GetTargetPhraseCollection(InputPath &inputPath) const { const Phrase &sourcePhrase = inputPath.GetPhrase(); size_t hash = hash_value(sourcePhrase); @@ -66,7 +68,7 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input if (iter != cache.end()) { // already in cache - const TargetPhraseCollection *tpColl = iter->second.first; + TargetPhraseCollection::shared_ptr tpColl = iter->second.first; inputPath.SetTargetPhrases(*this, tpColl, NULL); } else { // TRANSLITERATE @@ -89,17 +91,15 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input int ret = system(cmd.c_str()); UTIL_THROW_IF2(ret != 0, "Transliteration script error"); - TargetPhraseCollection *tpColl = new TargetPhraseCollection(); - vector targetPhrases = CreateTargetPhrases(sourcePhrase, outDir.path()); + TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection); + vector targetPhrases + = CreateTargetPhrases(sourcePhrase, outDir.path()); vector::const_iterator iter; for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) { TargetPhrase *tp = *iter; tpColl->Add(tp); } - - std::pair value(tpColl, clock()); - cache[hash] = value; - + cache[hash] = CacheCollEntry(tpColl, clock()); inputPath.SetTargetPhrases(*this, tpColl, NULL); } } diff --git a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp index f0c744155..fbeb3abe6 100644 --- a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp +++ b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp @@ -74,11 +74,10 @@ void PhraseDictionaryTreeAdaptor::CleanUpAfterSentenceProcessing(InputType const obj.CleanUp(); } -TargetPhraseCollection const* +TargetPhraseCollection::shared_ptr PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const { - const TargetPhraseCollection *ret = GetImplementation().GetTargetPhraseCollection(src); - return ret; + return GetImplementation().GetTargetPhraseCollection(src); } void PhraseDictionaryTreeAdaptor::EnableCache() @@ -107,16 +106,17 @@ const PDTAimp& PhraseDictionaryTreeAdaptor::GetImplementation() const } // legacy -const TargetPhraseCollectionWithSourcePhrase* -PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const &range) const +TargetPhraseCollectionWithSourcePhrase::shared_ptr +PhraseDictionaryTreeAdaptor:: +GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const &range) const { + TargetPhraseCollectionWithSourcePhrase::shared_ptr ret; if(GetImplementation().m_rangeCache.empty()) { - const TargetPhraseCollectionWithSourcePhrase *tpColl = GetImplementation().GetTargetPhraseCollection(src.GetSubString(range)); - return tpColl; + ret = GetImplementation().GetTargetPhraseCollection(src.GetSubString(range)); } else { - const TargetPhraseCollectionWithSourcePhrase *tpColl = GetImplementation().m_rangeCache[range.GetStartPos()][range.GetEndPos()]; - return tpColl; + ret = GetImplementation().m_rangeCache[range.GetStartPos()][range.GetEndPos()]; } + return ret; } } diff --git a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h index 0ed8ed612..564ceae16 100644 --- a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h +++ b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h @@ -59,7 +59,8 @@ public: // get translation candidates for a given source phrase // returns null pointer if nothing found - TargetPhraseCollection const* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const; + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const; void InitializeForInput(ttasksptr const& ttask); void CleanUpAfterSentenceProcessing(InputType const& source); @@ -73,7 +74,9 @@ public: } // legacy - const TargetPhraseCollectionWithSourcePhrase *GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const & srcRange) const; + TargetPhraseCollectionWithSourcePhrase::shared_ptr + GetTargetPhraseCollectionLEGACY(InputType const& src, + WordsRange const & srcRange) const; }; diff --git a/moses/TranslationModel/ProbingPT/ProbingPT.cpp b/moses/TranslationModel/ProbingPT/ProbingPT.cpp index 19b7e8795..ff2e02f1c 100644 --- a/moses/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/moses/TranslationModel/ProbingPT/ProbingPT.cpp @@ -79,11 +79,11 @@ void ProbingPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQue continue; } - TargetPhraseCollection *tpColl = CreateTargetPhrase(sourcePhrase); + TargetPhraseCollection::shared_ptr tpColl = CreateTargetPhrase(sourcePhrase); // add target phrase to phrase-table cache size_t hash = hash_value(sourcePhrase); - std::pair value(tpColl, clock()); + std::pair value(tpColl, clock()); cache[hash] = value; inputPath.SetTargetPhrases(*this, tpColl, NULL); @@ -109,7 +109,7 @@ std::vector ProbingPT::ConvertToProbingSourcePhrase(const Phrase &sour return ret; } -TargetPhraseCollection *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase) const +TargetPhraseCollection::shared_ptr ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase) const { // create a target phrase from the 1st word of the source, prefix with 'ProbingPT:' assert(sourcePhrase.GetSize()); @@ -124,7 +124,7 @@ TargetPhraseCollection *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase std::pair > query_result; - TargetPhraseCollection *tpColl = NULL; + TargetPhraseCollection::shared_ptr tpColl = NULL; //Actual lookup query_result = m_engine->query(probingSource); diff --git a/moses/TranslationModel/RuleTable/Loader.h b/moses/TranslationModel/RuleTable/Loader.h index 48390e37e..66a08b0db 100644 --- a/moses/TranslationModel/RuleTable/Loader.h +++ b/moses/TranslationModel/RuleTable/Loader.h @@ -49,12 +49,14 @@ protected: // Provide access to RuleTableTrie's private // GetOrCreateTargetPhraseCollection function. - TargetPhraseCollection &GetOrCreateTargetPhraseCollection( - RuleTableTrie &ruleTable - , const Phrase &source - , const TargetPhrase &target - , const Word *sourceLHS) { - return ruleTable.GetOrCreateTargetPhraseCollection(source, target, sourceLHS); + TargetPhraseCollection::shared_ptr + GetOrCreateTargetPhraseCollection(RuleTableTrie &ruleTable, + const Phrase &source, + const TargetPhrase &target, + const Word *sourceLHS) + { + return ruleTable.GetOrCreateTargetPhraseCollection(source, target, + sourceLHS); } }; diff --git a/moses/TranslationModel/RuleTable/LoaderCompact.cpp b/moses/TranslationModel/RuleTable/LoaderCompact.cpp index c947dfdc2..cac698cf4 100644 --- a/moses/TranslationModel/RuleTable/LoaderCompact.cpp +++ b/moses/TranslationModel/RuleTable/LoaderCompact.cpp @@ -224,9 +224,10 @@ bool RuleTableLoaderCompact::LoadRuleSection( targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply()); // Insert rule into table. - TargetPhraseCollection &coll = GetOrCreateTargetPhraseCollection( - ruleTable, sourcePhrase, *targetPhrase, &sourceLHS); - coll.Add(targetPhrase); + TargetPhraseCollection::shared_ptr coll; + coll = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, + *targetPhrase, &sourceLHS); + coll->Add(targetPhrase); } return true; diff --git a/moses/TranslationModel/RuleTable/LoaderStandard.cpp b/moses/TranslationModel/RuleTable/LoaderStandard.cpp index 9386f8d81..637cf28a3 100644 --- a/moses/TranslationModel/RuleTable/LoaderStandard.cpp +++ b/moses/TranslationModel/RuleTable/LoaderStandard.cpp @@ -242,8 +242,10 @@ bool RuleTableLoaderStandard::Load(FormatType format targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector); targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply()); - TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS); - phraseColl.Add(targetPhrase); + TargetPhraseCollection::shared_ptr phraseColl + = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, + *targetPhrase, sourceLHS); + phraseColl->Add(targetPhrase); // not implemented correctly in memory pt. just delete it for now delete sourceLHS; diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp index 5f7ddf85d..d2bd9c6cc 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp @@ -282,8 +282,10 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(ttasksptr const& ttask) targetPhrase->GetScoreBreakdown().Assign(this, scoreVector); targetPhrase->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply()); - TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS); - phraseColl.Add(targetPhrase); + TargetPhraseCollection::shared_ptr phraseColl + = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, + *targetPhrase, sourceLHS); + phraseColl->Add(targetPhrase); count++; @@ -301,7 +303,9 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(ttasksptr const& ttask) //removedirectoryrecursively(dirName); } -TargetPhraseCollection &PhraseDictionaryFuzzyMatch::GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode +TargetPhraseCollection::shared_ptr +PhraseDictionaryFuzzyMatch:: +GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode , const Phrase &source , const TargetPhrase &target , const Word *sourceLHS) diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h index 8751b7956..c57061704 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h @@ -1,3 +1,4 @@ +// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- /*********************************************************************** Moses - statistical machine translation system Copyright (C) 2006-2011 University of Edinburgh @@ -59,7 +60,8 @@ public: TO_STRING(); protected: - TargetPhraseCollection &GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode + TargetPhraseCollection::shared_ptr + GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode , const Phrase &source , const TargetPhrase &target , const Word *sourceLHS); diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp index e99385d82..c68cd5d62 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp @@ -149,26 +149,26 @@ void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath lastWord.OnlyTheseFactors(m_inputFactors); OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord); + TargetPhraseCollection::shared_ptr tpc; if (lastWordOnDisk == NULL) { // OOV according to this phrase table. Not possible to extend - inputPath.SetTargetPhrases(*this, NULL, NULL); + inputPath.SetTargetPhrases(*this, tpc, NULL); } else { - const OnDiskPt::PhraseNode *ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper); - if (ptNode) { - const TargetPhraseCollection *targetPhrases = GetTargetPhraseCollection(ptNode); - inputPath.SetTargetPhrases(*this, targetPhrases, ptNode); - } else { - inputPath.SetTargetPhrases(*this, NULL, NULL); - } - + OnDiskPt::PhraseNode const* ptNode; + ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper); + if (ptNode) tpc = GetTargetPhraseCollection(ptNode); + inputPath.SetTargetPhrases(*this, tpc, ptNode); + delete lastWordOnDisk; } } } -const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const +TargetPhraseCollection::shared_ptr +PhraseDictionaryOnDisk:: +GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const { - const TargetPhraseCollection *ret; + TargetPhraseCollection::shared_ptr ret; CacheColl &cache = GetCache(); size_t hash = (size_t) ptNode->GetFilePos(); @@ -181,31 +181,34 @@ const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection( // not in cache, need to look up from phrase table ret = GetTargetPhraseCollectionNonCache(ptNode); - std::pair value(ret, clock()); + std::pair value(ret, clock()); cache[hash] = value; } else { // in cache. just use it - std::pair &value = iter->second; - value.second = clock(); - - ret = value.first; + iter->second.second = clock(); + ret = iter->second.first; } return ret; } -const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const +TargetPhraseCollection::shared_ptr +PhraseDictionaryOnDisk:: +GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const { - OnDiskPt::OnDiskWrapper &wrapper = const_cast(GetImplementation()); + OnDiskPt::OnDiskWrapper& wrapper + = const_cast(GetImplementation()); vector weightT = StaticData::Instance().GetWeights(this); OnDiskPt::Vocab &vocab = wrapper.GetVocab(); - const OnDiskPt::TargetPhraseCollection *targetPhrasesOnDisk = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper); - TargetPhraseCollection *targetPhrases - = targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false); + OnDiskPt::TargetPhraseCollection::shared_ptr targetPhrasesOnDisk + = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper); + TargetPhraseCollection::shared_ptr targetPhrases + = targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, + weightT, vocab, false); - delete targetPhrasesOnDisk; + // delete targetPhrasesOnDisk; return targetPhrases; } diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h index 246690922..2d6befccf 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h @@ -78,8 +78,11 @@ public: virtual void InitializeForInput(ttasksptr const& ttask); void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; - const TargetPhraseCollection *GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const; - const TargetPhraseCollection *GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const; + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const; + + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const; void SetParameter(const std::string& key, const std::string& value); diff --git a/moses/TranslationModel/RuleTable/Trie.h b/moses/TranslationModel/RuleTable/Trie.h index 51cc92e4a..49ae35a4e 100644 --- a/moses/TranslationModel/RuleTable/Trie.h +++ b/moses/TranslationModel/RuleTable/Trie.h @@ -51,9 +51,10 @@ public: private: friend class RuleTableLoader; - virtual TargetPhraseCollection &GetOrCreateTargetPhraseCollection( - const Phrase &source, const TargetPhrase &target, - const Word *sourceLHS) = 0; + virtual TargetPhraseCollection::shared_ptr + GetOrCreateTargetPhraseCollection(const Phrase &source, + const TargetPhrase &target, + const Word *sourceLHS) = 0; virtual void SortAndPrune() = 0; diff --git a/moses/TranslationModel/RuleTable/UTrie.cpp b/moses/TranslationModel/RuleTable/UTrie.cpp index d6ccb4c78..667ff2822 100644 --- a/moses/TranslationModel/RuleTable/UTrie.cpp +++ b/moses/TranslationModel/RuleTable/UTrie.cpp @@ -38,8 +38,11 @@ namespace Moses { -TargetPhraseCollection &RuleTableUTrie::GetOrCreateTargetPhraseCollection( - const Phrase &source, const TargetPhrase &target, const Word *sourceLHS) +TargetPhraseCollection::shared_ptr +RuleTableUTrie:: +GetOrCreateTargetPhraseCollection(const Phrase &source, + const TargetPhrase &target, + const Word *sourceLHS) { UTrieNode &currNode = GetOrCreateNode(source, target, sourceLHS); return currNode.GetOrCreateTargetPhraseCollection(target); diff --git a/moses/TranslationModel/RuleTable/UTrie.h b/moses/TranslationModel/RuleTable/UTrie.h index caab05b04..6e9fc08ef 100644 --- a/moses/TranslationModel/RuleTable/UTrie.h +++ b/moses/TranslationModel/RuleTable/UTrie.h @@ -21,13 +21,13 @@ #include "Trie.h" #include "UTrieNode.h" +#include "moses/TargetPhraseCollection.h" namespace Moses { class Phrase; class TargetPhrase; -class TargetPhraseCollection; class Word; class ChartParser; @@ -57,8 +57,10 @@ public: const ChartCellCollectionBase &, std::size_t); private: - TargetPhraseCollection &GetOrCreateTargetPhraseCollection( - const Phrase &source, const TargetPhrase &target, const Word *sourceLHS); + TargetPhraseCollection::shared_ptr + GetOrCreateTargetPhraseCollection(const Phrase &source, + const TargetPhrase &target, + const Word *sourceLHS); UTrieNode &GetOrCreateNode(const Phrase &source, const TargetPhrase &target, const Word *sourceLHS); diff --git a/moses/TranslationModel/RuleTable/UTrieNode.cpp b/moses/TranslationModel/RuleTable/UTrieNode.cpp index 725f02c97..d85dc662e 100644 --- a/moses/TranslationModel/RuleTable/UTrieNode.cpp +++ b/moses/TranslationModel/RuleTable/UTrieNode.cpp @@ -49,7 +49,7 @@ void UTrieNode::Prune(size_t tableLimit) // Prune TargetPhraseCollections at this node. for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) { - p->second.Prune(true, tableLimit); + p->second->Prune(true, tableLimit); } } @@ -66,7 +66,7 @@ void UTrieNode::Sort(size_t tableLimit) // Sort TargetPhraseCollections at this node. for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) { - p->second.Sort(true, tableLimit); + p->second->Sort(true, tableLimit); } } @@ -89,8 +89,9 @@ UTrieNode *UTrieNode::GetOrCreateNonTerminalChild(const Word &targetNonTerm) return m_gapNode; } -TargetPhraseCollection &UTrieNode::GetOrCreateTargetPhraseCollection( - const TargetPhrase &target) +TargetPhraseCollection::shared_ptr +UTrieNode:: +GetOrCreateTargetPhraseCollection(const TargetPhrase &target) { const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm(); const size_t rank = alignmentInfo.GetSize(); @@ -107,8 +108,9 @@ TargetPhraseCollection &UTrieNode::GetOrCreateTargetPhraseCollection( const Word &targetNonTerm = target.GetWord(targetNonTermIndex); vec.push_back(InsertLabel(i++, targetNonTerm)); } - - return m_labelMap[vec]; + TargetPhraseCollection::shared_ptr& ret = m_labelMap[vec]; + if (ret == NULL) ret.reset(new TargetPhraseCollection); + return ret; } } // namespace Moses diff --git a/moses/TranslationModel/RuleTable/UTrieNode.h b/moses/TranslationModel/RuleTable/UTrieNode.h index 436bcbea1..9438f9bfd 100644 --- a/moses/TranslationModel/RuleTable/UTrieNode.h +++ b/moses/TranslationModel/RuleTable/UTrieNode.h @@ -51,10 +51,10 @@ public: TerminalEqualityPred> TerminalMap; typedef boost::unordered_map, - TargetPhraseCollection> LabelMap; + TargetPhraseCollection::shared_ptr> LabelMap; #else typedef std::map TerminalMap; - typedef std::map, TargetPhraseCollection> LabelMap; + typedef std::map, TargetPhraseCollection::shared_ptr> LabelMap; #endif ~UTrieNode() { @@ -78,8 +78,8 @@ public: UTrieNode *GetOrCreateTerminalChild(const Word &sourceTerm); UTrieNode *GetOrCreateNonTerminalChild(const Word &targetNonTerm); - TargetPhraseCollection &GetOrCreateTargetPhraseCollection( - const TargetPhrase &); + TargetPhraseCollection::shared_ptr + GetOrCreateTargetPhraseCollection(const TargetPhrase &); bool IsLeaf() const { return m_terminalMap.empty() && m_gapNode == NULL; diff --git a/moses/TranslationModel/Scope3Parser/Parser.cpp b/moses/TranslationModel/Scope3Parser/Parser.cpp index c8c8c3e49..c306fc35a 100644 --- a/moses/TranslationModel/Scope3Parser/Parser.cpp +++ b/moses/TranslationModel/Scope3Parser/Parser.cpp @@ -47,7 +47,8 @@ void Scope3Parser::GetChartRuleCollection( const size_t start = range.GetStartPos(); const size_t end = range.GetEndPos(); - std::vector > &pairVec = m_ruleApplications[start][end-start+1]; + std::vector > &pairVec + = m_ruleApplications[start][end-start+1]; MatchCallback matchCB(range, outColl); for (std::vector >::const_iterator p = pairVec.begin(); p != pairVec.end(); ++p) { @@ -58,8 +59,8 @@ void Scope3Parser::GetChartRuleCollection( if (varSpanNode.m_rank == 0) { // Purely lexical rule. assert(labelMap.size() == 1); - const TargetPhraseCollection &tpc = labelMap.begin()->second; - matchCB.m_tpc = &tpc; + TargetPhraseCollection::shared_ptr tpc = labelMap.begin()->second; + matchCB.m_tpc = tpc; matchCB(m_emptyStackVec); } else { // Rule has at least one non-terminal. varSpanNode.CalculateRanges(start, end, m_ranges); @@ -70,7 +71,7 @@ void Scope3Parser::GetChartRuleCollection( UTrieNode::LabelMap::const_iterator p = labelMap.begin(); for (; p != labelMap.end(); ++p) { const std::vector &labels = p->first; - const TargetPhraseCollection &tpc = p->second; + TargetPhraseCollection::shared_ptr tpc = p->second; assert(labels.size() == varSpanNode.m_rank); bool failCheck = false; for (size_t i = 0; i < varSpanNode.m_rank; ++i) { @@ -82,7 +83,7 @@ void Scope3Parser::GetChartRuleCollection( if (failCheck) { continue; } - matchCB.m_tpc = &tpc; + matchCB.m_tpc = tpc; searcher.Search(labels, matchCB); } } diff --git a/moses/TranslationModel/Scope3Parser/Parser.h b/moses/TranslationModel/Scope3Parser/Parser.h index 70b26b50d..6091697fb 100644 --- a/moses/TranslationModel/Scope3Parser/Parser.h +++ b/moses/TranslationModel/Scope3Parser/Parser.h @@ -66,17 +66,16 @@ private: // Define a callback type for use by StackLatticeSearcher. struct MatchCallback { public: - MatchCallback(const WordsRange &range, - ChartParserCallback &out) - : m_range(range) - , m_out(out) - , m_tpc(NULL) {} + MatchCallback(const WordsRange &range, ChartParserCallback &out) + : m_range(range) , m_out(out) // , m_tpc(NULL) + { } + void operator()(const StackVec &stackVec) { m_out.Add(*m_tpc, stackVec, m_range); } const WordsRange &m_range; ChartParserCallback &m_out; - const TargetPhraseCollection *m_tpc; + TargetPhraseCollection::shared_ptr m_tpc; }; void Init(); diff --git a/moses/TranslationModel/SkeletonPT.cpp b/moses/TranslationModel/SkeletonPT.cpp index 8e2b1daa3..a581ca0d8 100644 --- a/moses/TranslationModel/SkeletonPT.cpp +++ b/moses/TranslationModel/SkeletonPT.cpp @@ -32,12 +32,13 @@ void SkeletonPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQu const Phrase &sourcePhrase = inputPath.GetPhrase(); TargetPhrase *tp = CreateTargetPhrase(sourcePhrase); - TargetPhraseCollection *tpColl = new TargetPhraseCollection(); + TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection); tpColl->Add(tp); // add target phrase to phrase-table cache size_t hash = hash_value(sourcePhrase); - std::pair value(tpColl, clock()); + std::pair + value(tpColl, clock()); cache[hash] = value; inputPath.SetTargetPhrases(*this, tpColl, NULL); diff --git a/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc b/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc index b215f34bc..5a860107a 100644 --- a/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc +++ b/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc @@ -4,182 +4,52 @@ namespace Moses { using std::vector; - TPCollCache - ::TPCollCache(size_t capacity) + TPCollCache:: + TPCollCache(size_t capacity) { - m_doomed_first = m_doomed_last = NULL; - m_doomed_count = 0; + m_qfirst = m_qlast = m_cache.end(); m_capacity = capacity; + UTIL_THROW_IF2(m_capacity <= 2, "Cache capacity must be > 1!"); } - - bool - sancheck(TPCollWrapper const* first, TPCollWrapper const* last, size_t count) - { - if (first == NULL) - { - UTIL_THROW_IF2(last != NULL || count != 0, "queue error"); - return true; - } - - size_t s = 0; - for (TPCollWrapper const* x = first; x; x = x->next) - { - std::cerr << ++s << "/" << count << " " - << first << " " - << x->prev << " " << x << " " << x->next << " " - << last << std::endl; - } - std::cerr << std::string(80,'-') << std::endl; - // while (x != last && s < count) - // { - // UTIL_THROW_IF2(x->next == NULL, "queue error"); - // x = x->next; - // ++s; - // std::cerr << x << " " << s << "/" << count << std::endl; - // } - // std::cerr << x << " " << s << "/" << count << std::endl; - - // UTIL_THROW_IF2(x != last, "queue error"); - // UTIL_THROW_IF2(s != count, "queue error"); - // x = last; s = 1; - // while (x != first && s++ < count) - // { - // UTIL_THROW_IF2(x->prev == NULL, "queue error"); - // x = x->prev; - // } - // UTIL_THROW_IF2(x != first, "queue error"); - // UTIL_THROW_IF2(s != count, "queue error"); - return true; - } - - /// remove a TPC from the "doomed" queue - void - TPCollCache - ::remove_from_queue(TPCollWrapper* x) - { - // caller must lock! - - if (m_doomed_first != x && x->prev == NULL) - { // not in the queue - UTIL_THROW_IF2(x->next, "queue error"); - return; - } - - sancheck(m_doomed_first, m_doomed_last, m_doomed_count); - - std::cerr << "Removing " << x << std::endl; - - if (m_doomed_first == x) - m_doomed_first = x->next; - else x->prev->next = x->next; - - if (m_doomed_last == x) - m_doomed_last = x->prev; - else x->next->prev = x->prev; - - x->next = x->prev = NULL; - --m_doomed_count; - - // sancheck(m_doomed_first, m_doomed_last, m_doomed_count); - } - - void - TPCollCache - ::add_to_queue(TPCollWrapper* x) - { - // sancheck(m_doomed_first, m_doomed_last, m_doomed_count); - - // caller must lock! - x->prev = m_doomed_last; - - if (!m_doomed_first) - m_doomed_first = x; - - if (m_doomed_last) m_doomed_last->next = x; - m_doomed_last = x; - - ++m_doomed_count; - - // sancheck(m_doomed_first, m_doomed_last, m_doomed_count); - } - - TPCollWrapper* - TPCollCache - ::get(uint64_t key, size_t revision) + SPTR + TPCollCache:: + get(uint64_t key, size_t revision) { using namespace boost; - upgrade_lock rlock(m_lock); - cache_t::iterator m = m_cache.find(key); - if (m == m_cache.end()) // new + unique_lock lock(m_lock); + std::pair > e(key, SPTR()); + std::pair foo = m_cache.insert(e); + SPTR& ret = foo.first->second; + if (ret) { - std::pair e(key,NULL); - upgrade_to_unique_lock wlock(rlock); - std::pair foo = m_cache.insert(e); - if (foo.second) foo.first->second = new TPCollWrapper(key, revision); - m = foo.first; - // ++m->second->refCount; + if (m_qfirst == foo.first) m_qfirst = ret->next; + else ret->prev->second->next = ret->next; + if (m_qlast != foo.first) + ret->next->second->prev = ret->prev; } - else + if (!ret || ret->revision != revision) + ret.reset(new TPCollWrapper(key,revision)); + ret->prev = m_qlast; + if (m_qlast != m_cache.end()) m_qlast->second->next = foo.first; + m_qlast = foo.first; + + while (m_cache.size() > m_capacity && m_qfirst->second.use_count() == 1) { - if (m->second->refCount == 0) - { - upgrade_to_unique_lock wlock(rlock); - remove_from_queue(m->second); - } - if (m->second->revision != revision) // out of date - { - upgrade_to_unique_lock wlock(rlock); - m->second = new TPCollWrapper(key, revision); - } + m_qfirst = m_qfirst->second->next; + m_cache.erase(m_qfirst->second->prev); } - ++m->second->refCount; - return m->second; + + return ret; } // TPCollCache::get(...) - void - TPCollCache - ::release(TPCollWrapper const* ptr) - { - if (!ptr) return; - std::cerr << "Releasing " << ptr->key << " (" << ptr->refCount << ")" << std::endl; - if (--ptr->refCount == 0) - { - boost::unique_lock lock(m_lock); - if (m_doomed_count == m_capacity) - { - TPCollWrapper* x = m_doomed_first; - remove_from_queue(x); - UTIL_THROW_IF2(x->refCount || x == ptr, "TPC was doomed while still in use!"); - cache_t::iterator m = m_cache.find(ptr->key); - if (m != m_cache.end() && m->second == ptr) - { // the cache could have been updated with a new pointer - // for the same phrase already, so we need to check - // if the pointer we cound is the one we want to get rid of, - // hence the second check - // boost::upgrade_to_unique_lock xlock(lock); - m_cache.erase(m); - } - - std::cerr << "Deleting " << x->key << " " << x->refCount << std::endl; - - // delete x; - } - add_to_queue(const_cast(ptr)); - } - } // TPCollCache::release(...) - TPCollWrapper:: TPCollWrapper(uint64_t key_, size_t revision_) - : refCount(0), prev(NULL), next(NULL) - , revision(revision_), key(key_) + : revision(revision_), key(key_) { } TPCollWrapper:: ~TPCollWrapper() - { - UTIL_THROW_IF2(this->refCount, "TPCollWrapper refCount > 0!"); - assert(this->refCount == 0); - } + { } } // namespace diff --git a/moses/TranslationModel/UG/TargetPhraseCollectionCache.h b/moses/TranslationModel/UG/TargetPhraseCollectionCache.h index dff51db94..31356cb71 100644 --- a/moses/TranslationModel/UG/TargetPhraseCollectionCache.h +++ b/moses/TranslationModel/UG/TargetPhraseCollectionCache.h @@ -3,60 +3,44 @@ #include #include "moses/TargetPhraseCollection.h" #include - +#include "mm/ug_typedefs.h" namespace Moses { - class TPCollCache; + class TPCollWrapper; + + class TPCollCache + { + public: + typedef boost::unordered_map > cache_t; + private: + uint32_t m_capacity; // capacity of cache + cache_t m_cache; // maps from ids to items + cache_t::iterator m_qfirst, m_qlast; + mutable boost::shared_mutex m_lock; + public: + TPCollCache(size_t capacity=10000); + + SPTR + get(uint64_t key, size_t revision); + + }; - class TPCollWrapper // wrapper around TargetPhraseCollection with reference counting // and additional members for caching purposes + class TPCollWrapper : public TargetPhraseCollection { friend class TPCollCache; friend class Mmsapt; - mutable boost::atomic refCount; // reference count public: - TPCollWrapper* prev; // ... in queue of TPCollWrappers used recently - TPCollWrapper* next; // ... in queue of TPCollWrappers used recently + TPCollCache::cache_t::iterator prev, next; public: mutable boost::shared_mutex lock; size_t const revision; // rev. No. of the underlying corpus uint64_t const key; // phrase key -#if defined(timespec) // timespec is better, but not available everywhere - timespec tstamp; // last use -#else - timeval tstamp; // last use -#endif TPCollWrapper(uint64_t const key, size_t const rev); ~TPCollWrapper(); }; - class TPCollCache - { - typedef boost::unordered_map cache_t; - typedef std::vector history_t; - cache_t m_cache; // maps from phrase ids to target phrase collections - // mutable history_t m_history; // heap of live items, least recently used one on top - - mutable boost::shared_mutex m_lock; // locks m_cache - - TPCollWrapper* m_doomed_first; - TPCollWrapper* m_doomed_last; - uint32_t m_doomed_count; // counter of doomed TPCs - uint32_t m_capacity; // capacity of cache - void add_to_queue(TPCollWrapper* x); - void remove_from_queue(TPCollWrapper* x); - public: - TPCollCache(size_t capacity=10000); - - TPCollWrapper* - get(uint64_t key, size_t revision); - - void - release(TPCollWrapper const* tpc); - }; - - } diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp index 502250ed7..13cb61526 100644 --- a/moses/TranslationModel/UG/mmsapt.cpp +++ b/moses/TranslationModel/UG/mmsapt.cpp @@ -627,30 +627,32 @@ namespace Moses { InputPath &inputPath = **iter; const Phrase &phrase = inputPath.GetPhrase(); - const TargetPhraseCollection *targetPhrases + TargetPhraseCollection::shared_ptr targetPhrases = this->GetTargetPhraseCollectionLEGACY(ttask,phrase); inputPath.SetTargetPhrases(*this, targetPhrases, NULL); } } - TargetPhraseCollection const* - Mmsapt:: - GetTargetPhraseCollectionLEGACY(const Phrase& src) const - { - UTIL_THROW2("Don't call me without the translation task."); - } + // TargetPhraseCollection::shared_ptr + // Mmsapt:: + // GetTargetPhraseCollectionLEGACY(const Phrase& src) const + // { + // UTIL_THROW2("Don't call me without the translation task."); + // } // This is not the most efficient way of phrase lookup! - TargetPhraseCollection const* + TargetPhraseCollection::shared_ptr Mmsapt:: GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const { - boost::unique_lock xlock(m_lock); + SPTR ret; + // boost::unique_lock xlock(m_lock); + // map from Moses Phrase to internal id sequence vector sphrase; fillIdSeq(src, m_ifactor, *(btfix->V1), sphrase); - if (sphrase.size() == 0) return NULL; - + if (sphrase.size() == 0) return ret; + // Reserve a local copy of the dynamic bitext in its current form. /btdyn/ // is set to a new copy of the dynamic bitext every time a sentence pair // is added. /dyn/ keeps the old bitext around as long as we need it. @@ -665,42 +667,42 @@ namespace Moses // lookup phrases in both bitexts TSA::tree_iterator mfix(btfix->I1.get(), &sphrase[0], sphrase.size()); TSA::tree_iterator mdyn(dyn->I1.get()); - if (dyn->I1.get()) + if (dyn->I1.get()) // we have a dynamic bitext for (size_t i = 0; mdyn.size() == i && i < sphrase.size(); ++i) mdyn.extend(sphrase[i]); if (mdyn.size() != sphrase.size() && mfix.size() != sphrase.size()) - return NULL; // phrase not found in either bitext + return ret; // phrase not found in either bitext // do we have cached results for this phrase? uint64_t phrasekey = (mfix.size() == sphrase.size() - ? (mfix.getPid()<<1) : (mdyn.getPid()<<1)+1); - - // std::cerr << "Phrasekey is " << phrasekey << " at " << HERE << std::endl; + ? (mfix.getPid()<<1) + : (mdyn.getPid()<<1)+1); // get context-specific cache of items previously looked up SPTR const& scope = ttask->GetScope(); SPTR cache = scope->get(cache_key); - if (!cache) cache = m_cache; - TPCollWrapper* ret = cache->get(phrasekey, dyn->revision()); - // TO DO: we should revise the revision mechanism: we take the length - // of the dynamic bitext (in sentences) at the time the PT entry - // was stored as the time stamp. For each word in the + if (!cache) cache = m_cache; // no context-specific cache, use global one + + ret = cache->get(phrasekey, dyn->revision()); + // TO DO: we should revise the revision mechanism: we take the + // length of the dynamic bitext (in sentences) at the time the PT + // entry was stored as the time stamp. For each word in the // vocabulary, we also store its most recent occurrence in the // bitext. Only if the timestamp of each word in the phrase is // newer than the timestamp of the phrase itself we must update // the entry. // std::cerr << "Phrasekey is " << ret->key << " at " << HERE << std::endl; - std::cerr << ret << " with " << ret->refCount << " references at " - << HERE << std::endl; + // std::cerr << ret << " with " << ret->refCount << " references at " + // << HERE << std::endl; boost::upgrade_lock rlock(ret->lock); if (ret->GetSize()) return ret; // new TPC (not found or old one was not up to date) boost::upgrade_to_unique_lock wlock(rlock); + // maybe another thread did the work while we waited for the lock ? if (ret->GetSize()) return ret; - // check again, another thread may have done the work already // OK: pt entry NOT found or NOT up to date // lookup and expansion could be done in parallel threads, @@ -718,12 +720,16 @@ namespace Moses else { BitextSampler s(btfix.get(), mfix, context->bias, - m_min_sample_size, m_default_sample_size, m_sampling_method); + m_min_sample_size, + m_default_sample_size, + m_sampling_method); s(); sfix = s.stats(); } } - if (mdyn.size() == sphrase.size()) sdyn = dyn->lookup(ttask, mdyn); + + if (mdyn.size() == sphrase.size()) + sdyn = dyn->lookup(ttask, mdyn); vector > ppfix,ppdyn; PhrasePair::SortByTargetIdSeq sort_by_tgt_id; @@ -737,6 +743,7 @@ namespace Moses expand(mdyn, *dyn, *sdyn, ppdyn, m_bias_log); sort(ppdyn.begin(), ppdyn.end(),sort_by_tgt_id); } + // now we have two lists of Phrase Pairs, let's merge them PhrasePair::SortByTargetIdSeq sorter; size_t i = 0; size_t k = 0; @@ -939,9 +946,10 @@ namespace Moses return mdyn.size() == myphrase.size(); } +#if 0 void Mmsapt - ::Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const + ::Release(ttasksptr const& ttask, TargetPhraseCollection::shared_ptr*& tpc) const { if (!tpc) { @@ -957,6 +965,7 @@ namespace Moses if (cache) cache->release(static_cast(tpc)); tpc = NULL; } +#endif bool Mmsapt ::ProvidesPrefixCheck() const { return true; } diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h index 01044573a..abf56c6cd 100644 --- a/moses/TranslationModel/UG/mmsapt.h +++ b/moses/TranslationModel/UG/mmsapt.h @@ -179,7 +179,7 @@ namespace Moses uint64_t const pid1, sapt::pstats const& stats, sapt::Bitext const & bt, - TargetPhraseCollection* tpcoll + TargetPhraseCollection::shared_ptr tpcoll ) const; bool @@ -187,14 +187,14 @@ namespace Moses (Phrase const& src, uint64_t const pid1a, sapt::pstats * statsa, sapt::Bitext const & bta, uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext const & btb, - TargetPhraseCollection* tpcoll) const; + TargetPhraseCollection::shared_ptr tpcoll) const; bool combine_pstats (Phrase const& src, uint64_t const pid1a, sapt::pstats* statsa, sapt::Bitext const & bta, uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext const & btb, - TargetPhraseCollection* tpcoll) const; + TargetPhraseCollection::shared_ptr tpcoll) const; void load_extra_data(std::string bname, bool locking); void load_bias(std::string bname); @@ -209,15 +209,15 @@ namespace Moses std::string const& GetName() const; #ifndef NO_MOSES - TargetPhraseCollection const* + TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const; - TargetPhraseCollection const* - GetTargetPhraseCollectionLEGACY(const Phrase& src) const; + // TargetPhraseCollection::shared_ptr + // GetTargetPhraseCollectionLEGACY(const Phrase& src) const; void - GetTargetPhraseCollectionBatch(ttasksptr const& ttask, - const InputPathList &inputPathQueue) const; + GetTargetPhraseCollectionBatch + (ttasksptr const& ttask, InputPathList const& inputPathQueue) const; //! Create a sentence-specific manager for SCFG rule lookup. ChartRuleLookupManager* @@ -234,7 +234,8 @@ namespace Moses void setWeights(std::vector const& w); - void Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const; + // void Release(ttasksptr const& ttask, + // TargetPhraseCollection const*& tpc) const; // some consumer lets me know that *tpc isn't needed any more diff --git a/moses/TranslationModel/UG/ptable-lookup.cc b/moses/TranslationModel/UG/ptable-lookup.cc index d6def9e35..d37097c97 100644 --- a/moses/TranslationModel/UG/ptable-lookup.cc +++ b/moses/TranslationModel/UG/ptable-lookup.cc @@ -80,7 +80,8 @@ int main(int argc, char* argv[]) Phrase& p = *phrase; cout << p << endl; - TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(ttask,p); + TargetPhraseCollection::shared_ptr trg + = PT->GetTargetPhraseCollectionLEGACY(ttask,p); if (!trg) continue; vector order(trg->GetSize()); for (size_t i = 0; i < order.size(); ++i) order[i] = i; @@ -118,7 +119,7 @@ int main(int argc, char* argv[]) } cout << endl; } - PT->Release(ttask, trg); + // PT->Release(ttask, trg); } exit(0); } diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp index 2355b8913..f0f6c8d84 100644 --- a/moses/TranslationOptionCollection.cpp +++ b/moses/TranslationOptionCollection.cpp @@ -410,7 +410,7 @@ CreateTranslationOptionsForRange const DecodeStep &dstep = **d; const PhraseDictionary &pdict = *dstep.GetPhraseDictionaryFeature(); - const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(pdict); + TargetPhraseCollection::shared_ptr targetPhrases = inputPath.GetTargetPhrases(pdict); static_cast(dstep).ProcessInitialTranslation (m_source, *oldPtoc, sPos, ePos, adhereTableLimit, inputPath, targetPhrases); @@ -431,7 +431,7 @@ CreateTranslationOptionsForRange TranslationOption &inputPartialTranslOpt = **pto; if (const Tstep *tstep = dynamic_cast(dstep)) { const PhraseDictionary &pdict = *tstep->GetPhraseDictionaryFeature(); - const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(pdict); + TargetPhraseCollection::shared_ptr targetPhrases = inputPath.GetTargetPhrases(pdict); tstep->Process(inputPartialTranslOpt, *dstep, *newPtoc, this, adhereTableLimit, targetPhrases); } else { diff --git a/moses/TranslationOptionCollectionLattice.cpp b/moses/TranslationOptionCollectionLattice.cpp index b43ef81df..bf5c305d2 100644 --- a/moses/TranslationOptionCollectionLattice.cpp +++ b/moses/TranslationOptionCollectionLattice.cpp @@ -142,7 +142,8 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions() for (size_t i = 0; i < m_inputPathQueue.size(); ++i) { const InputPath &path = *m_inputPathQueue[i]; - const TargetPhraseCollection *tpColl = path.GetTargetPhrases(phraseDictionary); + TargetPhraseCollection::shared_ptr tpColl + = path.GetTargetPhrases(phraseDictionary); const WordsRange &range = path.GetWordsRange(); if (tpColl && tpColl->GetSize()) {