Life cycle of TargetPhraseCollection is now managed via shared pointers.

This commit is contained in:
Ulrich Germann 2015-10-18 21:27:58 +01:00
parent 7a85126a92
commit bdb0227ee9
95 changed files with 896 additions and 810 deletions

View File

@ -249,16 +249,12 @@ size_t PhraseNode::ReadChild(Word &wordFound, uint64_t &childFilePos, const char
return memRead; return memRead;
} }
const TargetPhraseCollection *PhraseNode::GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const TargetPhraseCollection::shared_ptr
PhraseNode::
GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const
{ {
TargetPhraseCollection *ret = new TargetPhraseCollection(); TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
if (m_value > 0) ret->ReadFromFile(tableLimit, m_value, onDiskWrapper);
if (m_value > 0)
ret->ReadFromFile(tableLimit, m_value, onDiskWrapper);
else {
}
return ret; return ret;
} }

View File

@ -92,8 +92,11 @@ public:
} }
const PhraseNode *GetChild(const Word &wordSought, OnDiskWrapper &onDiskWrapper) const; const PhraseNode *GetChild(const Word &wordSought, OnDiskWrapper &onDiskWrapper) const;
const TargetPhraseCollection *GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const;
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection(size_t tableLimit,
OnDiskWrapper &onDiskWrapper) const;
void AddCounts(const std::vector<float> &counts) { void AddCounts(const std::vector<float> &counts) {
m_counts = counts; m_counts = counts;
} }

View File

@ -114,23 +114,22 @@ void TargetPhraseCollection::Save(OnDiskWrapper &onDiskWrapper)
} }
Moses::TargetPhraseCollection *TargetPhraseCollection::ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors Moses::TargetPhraseCollection::shared_ptr TargetPhraseCollection::ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
, const std::vector<Moses::FactorType> &outputFactors , const std::vector<Moses::FactorType> &outputFactors
, const Moses::PhraseDictionary &phraseDict , const Moses::PhraseDictionary &phraseDict
, const std::vector<float> &weightT , const std::vector<float> &weightT
, Vocab &vocab , Vocab &vocab
, bool isSyntax) const , bool isSyntax) const
{ {
Moses::TargetPhraseCollection *ret = new Moses::TargetPhraseCollection(); Moses::TargetPhraseCollection::shared_ptr ret;
ret.reset(new Moses::TargetPhraseCollection);
CollType::const_iterator iter; CollType::const_iterator iter;
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) { for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) {
const TargetPhrase &tp = **iter; const TargetPhrase &tp = **iter;
Moses::TargetPhrase *mosesPhrase = tp.ConvertToMoses(inputFactors, outputFactors Moses::TargetPhrase *mosesPhrase
, vocab = tp.ConvertToMoses(inputFactors, outputFactors, vocab,
, phraseDict phraseDict, weightT, isSyntax);
, weightT
, isSyntax);
/* /*
// debugging output // debugging output

View File

@ -21,6 +21,8 @@
#include "TargetPhrase.h" #include "TargetPhrase.h"
#include "Vocab.h" #include "Vocab.h"
#include "moses/TargetPhraseCollection.h"
#include <boost/shared_ptr.hpp>
namespace Moses namespace Moses
{ {
@ -50,6 +52,9 @@ protected:
std::string m_debugStr; std::string m_debugStr;
public: public:
typedef boost::shared_ptr<TargetPhraseCollection const> shared_const_ptr;
typedef boost::shared_ptr<TargetPhraseCollection> shared_ptr;
static size_t s_sortScoreInd; static size_t s_sortScoreInd;
TargetPhraseCollection(); TargetPhraseCollection();
@ -69,7 +74,7 @@ public:
uint64_t GetFilePos() const; uint64_t GetFilePos() const;
Moses::TargetPhraseCollection *ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors Moses::TargetPhraseCollection::shared_ptr ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
, const std::vector<Moses::FactorType> &outputFactors , const std::vector<Moses::FactorType> &outputFactors
, const Moses::PhraseDictionary &phraseDict , const Moses::PhraseDictionary &phraseDict
, const std::vector<float> &weightT , const std::vector<float> &weightT

View File

@ -56,7 +56,7 @@ int main(int argc, char **argv)
if (node) { if (node) {
// source phrase points to a bunch of rules // source phrase points to a bunch of rules
const TargetPhraseCollection *coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper); TargetPhraseCollection::shared_ptr coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper);
string str = coll->GetDebugStr(); string str = coll->GetDebugStr();
cout << "Found " << coll->GetSize() << endl; cout << "Found " << coll->GetSize() << endl;

View File

@ -116,7 +116,7 @@ typedef
boost::unordered_set<TargetPhrase*,PhrasePtrHasher,PhrasePtrComparator> PhraseSet; boost::unordered_set<TargetPhrase*,PhrasePtrHasher,PhrasePtrComparator> PhraseSet;
const TargetPhraseCollection* TargetPhraseCollection::shared_ptr
PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const
{ {
@ -125,7 +125,7 @@ PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const
PhraseSet allPhrases; PhraseSet allPhrases;
vector<PhraseSet> phrasesByTable(m_dictionaries.size()); vector<PhraseSet> phrasesByTable(m_dictionaries.size());
for (size_t i = 0; i < m_dictionaries.size(); ++i) { for (size_t i = 0; i < m_dictionaries.size(); ++i) {
const TargetPhraseCollection* phrases = m_dictionaries[i]->GetTargetPhraseCollection(src); TargetPhraseCollection::shared_ptr phrases = m_dictionaries[i]->GetTargetPhraseCollection(src);
if (phrases) { if (phrases) {
for (TargetPhraseCollection::const_iterator j = phrases->begin(); for (TargetPhraseCollection::const_iterator j = phrases->begin();
j != phrases->end(); ++j) { j != phrases->end(); ++j) {

View File

@ -52,7 +52,7 @@ public:
, const LMList &languageModels , const LMList &languageModels
, float weightWP); , float weightWP);
virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const; virtual TargetPhraseCollection::shared_ptr GetTargetPhraseCollection(const Phrase& src) const;
virtual void InitializeForInput(ttasksptr const& ttask); virtual void InitializeForInput(ttasksptr const& ttask);
virtual ChartRuleLookupManager *CreateRuleLookupManager( virtual ChartRuleLookupManager *CreateRuleLookupManager(
const InputType &, const InputType &,
@ -65,7 +65,7 @@ private:
typedef boost::shared_ptr<PhraseDictionaryTreeAdaptor> DictionaryHandle; typedef boost::shared_ptr<PhraseDictionaryTreeAdaptor> DictionaryHandle;
std::vector<DictionaryHandle> m_dictionaries; std::vector<DictionaryHandle> m_dictionaries;
std::vector<std::vector<float> > m_weights; //feature x table std::vector<std::vector<float> > m_weights; //feature x table
mutable TargetPhraseCollection* m_targetPhrases; mutable TargetPhraseCollection::shared_ptr m_targetPhrases;
std::vector<float> m_weightT; std::vector<float> m_weightT;
size_t m_tableLimit; size_t m_tableLimit;
const LMList* m_languageModels; const LMList* m_languageModels;

View File

@ -44,7 +44,7 @@ ChartParserUnknown
ChartParserUnknown::~ChartParserUnknown() ChartParserUnknown::~ChartParserUnknown()
{ {
RemoveAllInColl(m_unksrcs); RemoveAllInColl(m_unksrcs);
RemoveAllInColl(m_cacheTargetPhraseCollection); // RemoveAllInColl(m_cacheTargetPhraseCollection);
} }
void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range, ChartParserCallback &to) void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range, ChartParserCallback &to)

View File

@ -27,7 +27,7 @@
#include "WordsRange.h" #include "WordsRange.h"
#include "StackVec.h" #include "StackVec.h"
#include "InputPath.h" #include "InputPath.h"
#include "TargetPhraseCollection.h"
namespace Moses namespace Moses
{ {
@ -38,7 +38,7 @@ class Sentence;
class ChartCellCollectionBase; class ChartCellCollectionBase;
class Word; class Word;
class Phrase; class Phrase;
class TargetPhraseCollection; // class TargetPhraseCollection;
class DecodeGraph; class DecodeGraph;
class ChartParserUnknown class ChartParserUnknown
@ -56,7 +56,7 @@ public:
private: private:
std::vector<Phrase*> m_unksrcs; std::vector<Phrase*> m_unksrcs;
std::list<TargetPhraseCollection*> m_cacheTargetPhraseCollection; std::list<TargetPhraseCollection::shared_ptr> m_cacheTargetPhraseCollection;
}; };
class ChartParser class ChartParser

View File

@ -3,6 +3,7 @@
#include "StackVec.h" #include "StackVec.h"
#include <list> #include <list>
#include "TargetPhraseCollection.h"
namespace Moses namespace Moses
{ {
@ -23,7 +24,7 @@ public:
virtual bool Empty() const = 0; virtual bool Empty() const = 0;
virtual void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range) = 0; virtual void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const WordsRange &range) = 0;
virtual void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath) = 0; virtual void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath) = 0;

View File

@ -115,9 +115,13 @@ void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc,
} }
} }
void ChartTranslationOptionList::AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range) void
ChartTranslationOptionList::
AddPhraseOOV(TargetPhrase &phrase,
std::list<TargetPhraseCollection::shared_ptr > &waste_memory,
const WordsRange &range)
{ {
TargetPhraseCollection *tpc = new TargetPhraseCollection(); TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
tpc->Add(&phrase); tpc->Add(&phrase);
waste_memory.push_back(tpc); waste_memory.push_back(tpc);
StackVec empty; StackVec empty;

View File

@ -55,7 +55,7 @@ public:
void Add(const TargetPhraseCollection &, const StackVec &, void Add(const TargetPhraseCollection &, const StackVec &,
const WordsRange &); const WordsRange &);
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range); void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const WordsRange &range);
bool Empty() const { bool Empty() const {
return m_size == 0; return m_size == 0;

View File

@ -49,7 +49,7 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
, PartialTranslOptColl &outputPartialTranslOptColl , PartialTranslOptColl &outputPartialTranslOptColl
, TranslationOptionCollection *toc , TranslationOptionCollection *toc
, bool adhereTableLimit , bool adhereTableLimit
, const TargetPhraseCollection *phraseColl) const , TargetPhraseCollection::shared_ptr phraseColl) const
{ {
if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) { if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) {
// word deletion // word deletion
@ -105,7 +105,7 @@ void DecodeStepTranslation::ProcessInitialTranslation(
,PartialTranslOptColl &outputPartialTranslOptColl ,PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos, size_t endPos, bool adhereTableLimit , size_t startPos, size_t endPos, bool adhereTableLimit
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhraseCollection *phraseColl) const , TargetPhraseCollection::shared_ptr phraseColl) const
{ {
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature(); const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
const size_t tableLimit = phraseDictionary->GetTableLimit(); const size_t tableLimit = phraseDictionary->GetTableLimit();
@ -147,7 +147,8 @@ void DecodeStepTranslation::ProcessInitialTranslationLEGACY(
const size_t tableLimit = phraseDictionary->GetTableLimit(); const size_t tableLimit = phraseDictionary->GetTableLimit();
const WordsRange wordsRange(startPos, endPos); const WordsRange wordsRange(startPos, endPos);
const TargetPhraseCollectionWithSourcePhrase *phraseColl = phraseDictionary->GetTargetPhraseCollectionLEGACY(source,wordsRange); TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl
= phraseDictionary->GetTargetPhraseCollectionLEGACY(source,wordsRange);
if (phraseColl != NULL) { if (phraseColl != NULL) {
IFVERBOSE(3) { IFVERBOSE(3) {
@ -237,8 +238,8 @@ ProcessLEGACY(TranslationOption const& in,
size_t const currSize = inPhrase.GetSize(); size_t const currSize = inPhrase.GetSize();
size_t const tableLimit = pdict->GetTableLimit(); size_t const tableLimit = pdict->GetTableLimit();
TargetPhraseCollectionWithSourcePhrase const* phraseColl; TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl
phraseColl = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange); = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
if (phraseColl != NULL) { if (phraseColl != NULL) {
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd; TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;

View File

@ -48,7 +48,7 @@ public:
, PartialTranslOptColl &outputPartialTranslOptColl , PartialTranslOptColl &outputPartialTranslOptColl
, TranslationOptionCollection *toc , TranslationOptionCollection *toc
, bool adhereTableLimit , bool adhereTableLimit
, const TargetPhraseCollection *phraseColl) const; , TargetPhraseCollection::shared_ptr phraseColl) const;
/*! initialize list of partial translation options by applying the first translation step /*! initialize list of partial translation options by applying the first translation step
@ -58,7 +58,7 @@ public:
, PartialTranslOptColl &outputPartialTranslOptColl , PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos, size_t endPos, bool adhereTableLimit , size_t startPos, size_t endPos, bool adhereTableLimit
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhraseCollection *phraseColl) const; , TargetPhraseCollection::shared_ptr phraseColl) const;
// legacy // legacy
void ProcessInitialTranslationLEGACY(const InputType &source void ProcessInitialTranslationLEGACY(const InputType &source

View File

@ -83,7 +83,7 @@ public:
void Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &ignored); void Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &ignored);
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range); void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const WordsRange &range);
float GetBestScore(const ChartCellLabel *chartCell) const; float GetBestScore(const ChartCellLabel *chartCell) const;
@ -160,7 +160,7 @@ template <class Model> void Fill<Model>::Add(const TargetPhraseCollection &targe
} }
} }
template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &, const WordsRange &range) template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &, const WordsRange &range)
{ {
std::vector<lm::WordIndex> words; std::vector<lm::WordIndex> words;
UTIL_THROW_IF2(phrase.GetSize() > 1, UTIL_THROW_IF2(phrase.GetSize() > 1,

View File

@ -39,34 +39,40 @@ InputPath::~InputPath()
// std::cerr << "Deconstructing InputPath" << std::endl; // std::cerr << "Deconstructing InputPath" << std::endl;
// Since there is no way for the Phrase Dictionaries to tell in
// which (sentence) context phrases were looked up, we tell them // // NOT NEEDED ANY MORE SINCE THE SWITCH TO SHARED POINTERS
// now that the phrase isn't needed any more by this inputPath // // Since there is no way for the Phrase Dictionaries to tell in
typedef std::pair<const TargetPhraseCollection*, const void* > entry; // // which (sentence) context phrases were looked up, we tell them
std::map<const PhraseDictionary*, entry>::iterator iter; // // now that the phrase isn't needed any more by this inputPath
ttasksptr theTask = this->ttask.lock(); // typedef std::pair<boost::shared_ptr<TargetPhraseCollection>, const void* > entry;
for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter) // std::map<const PhraseDictionary*, entry>::iterator iter;
{ // ttasksptr theTask = this->ttask.lock();
// std::cerr << iter->second.first << " decommissioned." << std::endl; // for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter)
iter->first->Release(theTask, iter->second.first); // {
} // // std::cerr << iter->second.first << " decommissioned." << std::endl;
// iter->first->Release(theTask, iter->second.first);
// }
delete m_inputScore; delete m_inputScore;
} }
const TargetPhraseCollection *InputPath::GetTargetPhrases(const PhraseDictionary &phraseDictionary) const TargetPhraseCollection::shared_ptr
InputPath::
GetTargetPhrases(const PhraseDictionary &phraseDictionary) const
{ {
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter; TargetPhrases::const_iterator iter;
iter = m_targetPhrases.find(&phraseDictionary); iter = m_targetPhrases.find(&phraseDictionary);
if (iter == m_targetPhrases.end()) { if (iter == m_targetPhrases.end()) {
return NULL; return TargetPhraseCollection::shared_ptr();
} }
return iter->second.first; return iter->second.first;
} }
const void *InputPath::GetPtNode(const PhraseDictionary &phraseDictionary) const const void*
InputPath::
GetPtNode(const PhraseDictionary &phraseDictionary) const
{ {
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter; TargetPhrases::const_iterator iter;
iter = m_targetPhrases.find(&phraseDictionary); iter = m_targetPhrases.find(&phraseDictionary);
if (iter == m_targetPhrases.end()) { if (iter == m_targetPhrases.end()) {
return NULL; return NULL;
@ -74,11 +80,14 @@ const void *InputPath::GetPtNode(const PhraseDictionary &phraseDictionary) const
return iter->second.second; return iter->second.second;
} }
void InputPath::SetTargetPhrases(const PhraseDictionary &phraseDictionary void
, const TargetPhraseCollection *targetPhrases InputPath::
, const void *ptNode) SetTargetPhrases(const PhraseDictionary &phraseDictionary,
TargetPhraseCollection::shared_ptr const& targetPhrases,
const void *ptNode)
{ {
std::pair<const TargetPhraseCollection*, const void*> value(targetPhrases, ptNode); std::pair<TargetPhraseCollection::shared_ptr, const void*>
value(targetPhrases, ptNode);
m_targetPhrases[&phraseDictionary] = value; m_targetPhrases[&phraseDictionary] = value;
} }
@ -93,10 +102,10 @@ const Word &InputPath::GetLastWord() const
size_t InputPath::GetTotalRuleSize() const size_t InputPath::GetTotalRuleSize() const
{ {
size_t ret = 0; size_t ret = 0;
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter; TargetPhrases::const_iterator iter;
for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter) { for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter) {
// const PhraseDictionary *pt = iter->first; // const PhraseDictionary *pt = iter->first;
const TargetPhraseCollection *tpColl = iter->second.first; TargetPhraseCollection::shared_ptr tpColl = iter->second.first;
if (tpColl) { if (tpColl) {
ret += tpColl->GetSize(); ret += tpColl->GetSize();
@ -110,10 +119,10 @@ std::ostream& operator<<(std::ostream& out, const InputPath& obj)
{ {
out << &obj << " " << obj.GetWordsRange() << " " << obj.GetPrevPath() << " " << obj.GetPhrase(); out << &obj << " " << obj.GetWordsRange() << " " << obj.GetPrevPath() << " " << obj.GetPhrase();
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter; InputPath::TargetPhrases::const_iterator iter;
for (iter = obj.m_targetPhrases.begin(); iter != obj.m_targetPhrases.end(); ++iter) { for (iter = obj.m_targetPhrases.begin(); iter != obj.m_targetPhrases.end(); ++iter) {
const PhraseDictionary *pt = iter->first; const PhraseDictionary *pt = iter->first;
const TargetPhraseCollection *tpColl = iter->second.first; boost::shared_ptr<TargetPhraseCollection const> tpColl = iter->second.first;
out << pt << "="; out << pt << "=";
if (tpColl) { if (tpColl) {

View File

@ -8,12 +8,12 @@
#include "WordsRange.h" #include "WordsRange.h"
#include "NonTerminal.h" #include "NonTerminal.h"
#include "moses/FactorCollection.h" #include "moses/FactorCollection.h"
#include <boost/shared_ptr.hpp>
#include "TargetPhraseCollection.h"
namespace Moses namespace Moses
{ {
class PhraseDictionary; class PhraseDictionary;
class TargetPhraseCollection;
class ScoreComponentCollection; class ScoreComponentCollection;
class TargetPhrase; class TargetPhrase;
class InputPath; class InputPath;
@ -32,7 +32,12 @@ class InputPath
friend std::ostream& operator<<(std::ostream& out, const InputPath &obj); friend std::ostream& operator<<(std::ostream& out, const InputPath &obj);
public: public:
typedef std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> > TargetPhrases;
typedef std::pair<TargetPhraseCollection::shared_ptr, const void*>
TPCollStoreEntry;
typedef std::map<const PhraseDictionary*, TPCollStoreEntry>
TargetPhrases;
public: public:
ttaskwptr const ttask; ttaskwptr const ttask;
@ -96,10 +101,14 @@ public:
m_nextNode = nextNode; m_nextNode = nextNode;
} }
void SetTargetPhrases(const PhraseDictionary &phraseDictionary void
, const TargetPhraseCollection *targetPhrases SetTargetPhrases(const PhraseDictionary &phraseDictionary,
, const void *ptNode); TargetPhraseCollection::shared_ptr const& targetPhrases,
const TargetPhraseCollection *GetTargetPhrases(const PhraseDictionary &phraseDictionary) const; const void *ptNode);
TargetPhraseCollection::shared_ptr
GetTargetPhrases(const PhraseDictionary &phraseDictionary) const;
const TargetPhrases &GetTargetPhrases() const { const TargetPhrases &GetTargetPhrases() const {
return m_targetPhrases; return m_targetPhrases;
} }

View File

@ -63,27 +63,29 @@ void PDTAimp::CleanUp()
{ {
assert(m_dict); assert(m_dict);
m_dict->FreeMemory(); m_dict->FreeMemory();
for(size_t i=0; i<m_tgtColls.size(); ++i) delete m_tgtColls[i]; // for(size_t i=0; i<m_tgtColls.size(); ++i) m_tgtColls[i].reset();
m_tgtColls.clear(); m_tgtColls.clear();
m_cache.clear(); m_cache.clear();
m_rangeCache.clear(); m_rangeCache.clear();
uniqSrcPhr.clear(); uniqSrcPhr.clear();
} }
TargetPhraseCollectionWithSourcePhrase const* TargetPhraseCollectionWithSourcePhrase::shared_ptr
PDTAimp::GetTargetPhraseCollection(Phrase const &src) const PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
{ {
assert(m_dict); assert(m_dict);
if(src.GetSize()==0) return 0;
TargetPhraseCollectionWithSourcePhrase::shared_ptr ret;
if(src.GetSize()==0) return ret;
std::pair<MapSrc2Tgt::iterator,bool> piter; std::pair<MapSrc2Tgt::iterator,bool> piter;
if(useCache) { if(useCache) {
piter=m_cache.insert(std::make_pair(src,static_cast<TargetPhraseCollectionWithSourcePhrase const*>(0))); piter=m_cache.insert(std::make_pair(src, ret));
if(!piter.second) return piter.first->second; if(!piter.second) return piter.first->second;
} else if (m_cache.size()) { } else if (m_cache.size()) {
MapSrc2Tgt::const_iterator i=m_cache.find(src); MapSrc2Tgt::const_iterator i=m_cache.find(src);
return (i!=m_cache.end() ? i->second : 0); return (i!=m_cache.end() ? i->second : ret);
} }
std::vector<std::string> srcString(src.GetSize()); std::vector<std::string> srcString(src.GetSize());
@ -97,7 +99,7 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
std::vector<std::string> wacands; std::vector<std::string> wacands;
m_dict->GetTargetCandidates(srcString,cands,wacands); m_dict->GetTargetCandidates(srcString,cands,wacands);
if(cands.empty()) { if(cands.empty()) {
return 0; return ret;
} }
//TODO: Multiple models broken here //TODO: Multiple models broken here
@ -140,16 +142,14 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
sourcePhrases.push_back(src); sourcePhrases.push_back(src);
} }
TargetPhraseCollectionWithSourcePhrase *rv; ret = PruneTargetCandidates(tCands,costs, sourcePhrases);
rv=PruneTargetCandidates(tCands,costs, sourcePhrases); if(ret->IsEmpty()) {
if(rv->IsEmpty()) { ret.reset();
delete rv;
return 0;
} else { } else {
if(useCache) piter.first->second=rv; if(useCache) piter.first->second = ret;
m_tgtColls.push_back(rv); m_tgtColls.push_back(ret);
return rv;
} }
return ret;
} }
@ -352,7 +352,8 @@ void PDTAimp::CacheSource(ConfusionNet const& src)
pathExplored[len]+=exploredPaths[len]; pathExplored[len]+=exploredPaths[len];
m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0)); // m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize()));
for(std::map<Range,E2Costs>::const_iterator i=cov2cand.begin(); i!=cov2cand.end(); ++i) { for(std::map<Range,E2Costs>::const_iterator i=cov2cand.begin(); i!=cov2cand.end(); ++i) {
assert(i->first.first<m_rangeCache.size()); assert(i->first.first<m_rangeCache.size());
@ -386,10 +387,11 @@ void PDTAimp::CacheSource(ConfusionNet const& src)
//std::cerr << i->first.first << "-" << i->first.second << ": " << targetPhrase << std::endl; //std::cerr << i->first.first << "-" << i->first.second << ": " << targetPhrase << std::endl;
} }
TargetPhraseCollectionWithSourcePhrase *rv=PruneTargetCandidates(tCands, costs, sourcePhrases); TargetPhraseCollectionWithSourcePhrase::shared_ptr
rv = PruneTargetCandidates(tCands, costs, sourcePhrases);
if(rv->IsEmpty()) if(rv->IsEmpty())
delete rv; rv.reset();
else { else {
m_rangeCache[i->first.first][i->first.second-1]=rv; m_rangeCache[i->first.first][i->first.second-1]=rv;
m_tgtColls.push_back(rv); m_tgtColls.push_back(rv);
@ -428,7 +430,8 @@ void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
targetPhrase.EvaluateInIsolation(*srcPtr, m_obj->GetFeaturesToApply()); targetPhrase.EvaluateInIsolation(*srcPtr, m_obj->GetFeaturesToApply());
} }
TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates TargetPhraseCollectionWithSourcePhrase::shared_ptr
PDTAimp::PruneTargetCandidates
(const std::vector<TargetPhrase> & tCands, (const std::vector<TargetPhrase> & tCands,
std::vector<std::pair<float,size_t> >& costs, std::vector<std::pair<float,size_t> >& costs,
const std::vector<Phrase> &sourcePhrases) const const std::vector<Phrase> &sourcePhrases) const
@ -437,7 +440,8 @@ TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates
UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(), UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(),
"Number of target phrases must equal number of source phrases"); "Number of target phrases must equal number of source phrases");
TargetPhraseCollectionWithSourcePhrase *rv=new TargetPhraseCollectionWithSourcePhrase; TargetPhraseCollectionWithSourcePhrase::shared_ptr rv;
rv.reset(new TargetPhraseCollectionWithSourcePhrase);
// set limit to tableLimit or actual size, whatever is smaller // set limit to tableLimit or actual size, whatever is smaller

View File

@ -44,10 +44,10 @@ public:
std::vector<FactorType> m_input,m_output; std::vector<FactorType> m_input,m_output;
PhraseDictionaryTree *m_dict; PhraseDictionaryTree *m_dict;
const InputFeature *m_inputFeature; const InputFeature *m_inputFeature;
typedef std::vector<TargetPhraseCollectionWithSourcePhrase const*> vTPC; typedef std::vector<TargetPhraseCollectionWithSourcePhrase::shared_ptr> vTPC;
mutable vTPC m_tgtColls; mutable vTPC m_tgtColls;
typedef std::map<Phrase,TargetPhraseCollectionWithSourcePhrase const*> MapSrc2Tgt; typedef std::map<Phrase,TargetPhraseCollectionWithSourcePhrase::shared_ptr> MapSrc2Tgt;
mutable MapSrc2Tgt m_cache; mutable MapSrc2Tgt m_cache;
PhraseDictionaryTreeAdaptor *m_obj; PhraseDictionaryTreeAdaptor *m_obj;
int useCache; int useCache;
@ -69,7 +69,7 @@ public:
void CleanUp(); void CleanUp();
TargetPhraseCollectionWithSourcePhrase const* TargetPhraseCollectionWithSourcePhrase::shared_ptr
GetTargetPhraseCollection(Phrase const &src) const; GetTargetPhraseCollection(Phrase const &src) const;
void Create(const std::vector<FactorType> &input void Create(const std::vector<FactorType> &input
@ -121,7 +121,7 @@ public:
const std::string *alignmentString, const std::string *alignmentString,
Phrase const* srcPtr=0) const; Phrase const* srcPtr=0) const;
TargetPhraseCollectionWithSourcePhrase* PruneTargetCandidates TargetPhraseCollectionWithSourcePhrase::shared_ptr PruneTargetCandidates
(const std::vector<TargetPhrase> & tCands, (const std::vector<TargetPhrase> & tCands,
std::vector<std::pair<float,size_t> >& costs, std::vector<std::pair<float,size_t> >& costs,
const std::vector<Phrase> &sourcePhrases) const; const std::vector<Phrase> &sourcePhrases) const;

View File

@ -28,9 +28,9 @@ void GlueRuleSynthesizer::SynthesizeRule(const Forest::Hyperedge &e)
HyperPath source; HyperPath source;
SynthesizeHyperPath(e, source); SynthesizeHyperPath(e, source);
TargetPhrase *tp = SynthesizeTargetPhrase(e); TargetPhrase *tp = SynthesizeTargetPhrase(e);
TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(m_hyperTree, TargetPhraseCollection::shared_ptr tpc
source); = GetOrCreateTargetPhraseCollection(m_hyperTree, source);
tpc.Add(tp); tpc->Add(tp);
} }
void GlueRuleSynthesizer::SynthesizeHyperPath(const Forest::Hyperedge &e, void GlueRuleSynthesizer::SynthesizeHyperPath(const Forest::Hyperedge &e,

View File

@ -14,7 +14,7 @@ void HyperTree::Node::Prune(std::size_t tableLimit)
p->second.Prune(tableLimit); p->second.Prune(tableLimit);
} }
// Prune TargetPhraseCollection at this node. // Prune TargetPhraseCollection at this node.
m_targetPhraseCollection.Prune(true, tableLimit); m_targetPhraseCollection->Prune(true, tableLimit);
} }
void HyperTree::Node::Sort(std::size_t tableLimit) void HyperTree::Node::Sort(std::size_t tableLimit)
@ -24,7 +24,7 @@ void HyperTree::Node::Sort(std::size_t tableLimit)
p->second.Sort(tableLimit); p->second.Sort(tableLimit);
} }
// Sort TargetPhraseCollection at this node. // Sort TargetPhraseCollection at this node.
m_targetPhraseCollection.Sort(true, tableLimit); m_targetPhraseCollection->Sort(true, tableLimit);
} }
HyperTree::Node *HyperTree::Node::GetOrCreateChild( HyperTree::Node *HyperTree::Node::GetOrCreateChild(
@ -40,7 +40,7 @@ const HyperTree::Node *HyperTree::Node::GetChild(
return (p == m_map.end()) ? NULL : &p->second; return (p == m_map.end()) ? NULL : &p->second;
} }
TargetPhraseCollection &HyperTree::GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr HyperTree::GetOrCreateTargetPhraseCollection(
const HyperPath &hyperPath) const HyperPath &hyperPath)
{ {
Node &node = GetOrCreateNode(hyperPath); Node &node = GetOrCreateNode(hyperPath);

View File

@ -37,7 +37,7 @@ public:
} }
bool HasRules() const { bool HasRules() const {
return !m_targetPhraseCollection.IsEmpty(); return !m_targetPhraseCollection->IsEmpty();
} }
void Prune(std::size_t tableLimit); void Prune(std::size_t tableLimit);
@ -47,11 +47,13 @@ public:
const Node *GetChild(const HyperPath::NodeSeq &) const; const Node *GetChild(const HyperPath::NodeSeq &) const;
const TargetPhraseCollection &GetTargetPhraseCollection() const { TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection() const {
return m_targetPhraseCollection; return m_targetPhraseCollection;
} }
TargetPhraseCollection &GetTargetPhraseCollection() { TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection() {
return m_targetPhraseCollection; return m_targetPhraseCollection;
} }
@ -59,12 +61,14 @@ public:
return m_map; return m_map;
} }
Node() : m_targetPhraseCollection(new TargetPhraseCollection) { }
private: private:
Map m_map; Map m_map;
TargetPhraseCollection m_targetPhraseCollection; TargetPhraseCollection::shared_ptr m_targetPhraseCollection;
}; };
HyperTree(const RuleTableFF *ff) : RuleTable(ff) {} HyperTree(const RuleTableFF *ff) : RuleTable(ff) { }
const Node &GetRootNode() const { const Node &GetRootNode() const {
return m_root; return m_root;
@ -73,7 +77,8 @@ public:
private: private:
friend class HyperTreeCreator; friend class HyperTreeCreator;
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const HyperPath &); TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection(const HyperPath &);
Node &GetOrCreateNode(const HyperPath &); Node &GetOrCreateNode(const HyperPath &);

View File

@ -21,7 +21,7 @@ protected:
// Provide access to HyperTree's private GetOrCreateTargetPhraseCollection // Provide access to HyperTree's private GetOrCreateTargetPhraseCollection
// function. // function.
TargetPhraseCollection &GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(
HyperTree &trie, const HyperPath &fragment) { HyperTree &trie, const HyperPath &fragment) {
return trie.GetOrCreateTargetPhraseCollection(fragment); return trie.GetOrCreateTargetPhraseCollection(fragment);
} }

View File

@ -130,9 +130,9 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
ff.GetFeaturesToApply()); ff.GetFeaturesToApply());
// Add rule to trie. // Add rule to trie.
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr phraseColl
trie, sourceFragment); = GetOrCreateTargetPhraseCollection(trie, sourceFragment);
phraseColl.Add(targetPhrase); phraseColl->Add(targetPhrase);
count++; count++;
} }

View File

@ -51,8 +51,8 @@ void RuleMatcherHyperTree<Callback>::EnumerateHyperedges(
m_hyperedge.label.inputWeight += (*p)->weight; m_hyperedge.label.inputWeight += (*p)->weight;
} }
// Set the output hyperedge label's translation set pointer. // Set the output hyperedge label's translation set pointer.
m_hyperedge.label.translations = m_hyperedge.label.translations
&(item.trieNode->GetTargetPhraseCollection()); = item.trieNode->GetTargetPhraseCollection();
// Pass the output hyperedge to the callback. // Pass the output hyperedge to the callback.
callback(m_hyperedge); callback(m_hyperedge);
} }

View File

@ -9,7 +9,7 @@ namespace Syntax
struct PLabel { struct PLabel {
float inputWeight; float inputWeight;
const TargetPhraseCollection *translations; TargetPhraseCollection::shared_ptr translations;
}; };
} // Syntax } // Syntax

View File

@ -32,9 +32,10 @@ boost::shared_ptr<RuleTrie> OovHandler<RuleTrie>::SynthesizeRuleTrie(
// TODO Check ownership and fix any leaks. // TODO Check ownership and fix any leaks.
Word *tgtLHS = SynthesizeTargetLhs(targetLhsStr); Word *tgtLHS = SynthesizeTargetLhs(targetLhsStr);
TargetPhrase *tp = SynthesizeTargetPhrase(oov, *srcPhrase, *tgtLHS, prob); TargetPhrase *tp = SynthesizeTargetPhrase(oov, *srcPhrase, *tgtLHS, prob);
TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr tpc;
*trie, *srcPhrase, *tp, NULL); // TODO Check NULL is valid argument tpc= GetOrCreateTargetPhraseCollection(*trie, *srcPhrase, *tp, NULL);
tpc.Add(tp); // TODO Check NULL is valid argument
tpc->Add(tp);
} }
} }

View File

@ -132,9 +132,9 @@ void RecursiveCYKPlusParser<Callback>::AddAndExtend(
m_hyperedge.tail.push_back(const_cast<PVertex *>(&vertex)); m_hyperedge.tail.push_back(const_cast<PVertex *>(&vertex));
// Add target phrase collection (except if rule is empty or unary). // Add target phrase collection (except if rule is empty or unary).
const TargetPhraseCollection &tpc = node.GetTargetPhraseCollection(); TargetPhraseCollection::shared_ptr tpc = node.GetTargetPhraseCollection();
if (!tpc.IsEmpty() && !IsNonLexicalUnary(m_hyperedge)) { if (!tpc->IsEmpty() && !IsNonLexicalUnary(m_hyperedge)) {
m_hyperedge.label.translations = &tpc; m_hyperedge.label.translations = tpc;
(*m_callback)(m_hyperedge, end); (*m_callback)(m_hyperedge, end);
} }

View File

@ -38,8 +38,8 @@ Scope3Parser<Callback>::~Scope3Parser()
} }
template<typename Callback> template<typename Callback>
void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range, void Scope3Parser<Callback>::
Callback &callback) EnumerateHyperedges(const WordsRange &range, Callback &callback)
{ {
const std::size_t start = range.GetStartPos(); const std::size_t start = range.GetStartPos();
const std::size_t end = range.GetEndPos(); const std::size_t end = range.GetEndPos();
@ -64,8 +64,7 @@ void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
// Ask the grammar for the mapping from label sequences to target phrase // Ask the grammar for the mapping from label sequences to target phrase
// collections for this pattern. // collections for this pattern.
const RuleTrie::Node::LabelMap &labelMap = const RuleTrie::Node::LabelMap &labelMap = patNode->m_node->GetLabelMap();
patNode->m_node->GetLabelMap();
// For each label sequence, search the lattice for the set of PHyperedge // For each label sequence, search the lattice for the set of PHyperedge
// tails. // tails.
@ -73,7 +72,7 @@ void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
RuleTrie::Node::LabelMap::const_iterator q = labelMap.begin(); RuleTrie::Node::LabelMap::const_iterator q = labelMap.begin();
for (; q != labelMap.end(); ++q) { for (; q != labelMap.end(); ++q) {
const std::vector<int> &labelSeq = q->first; const std::vector<int> &labelSeq = q->first;
const TargetPhraseCollection &tpc = q->second; TargetPhraseCollection::shared_ptr tpc = q->second;
// For many label sequences there won't be any corresponding paths through // For many label sequences there won't be any corresponding paths through
// the lattice. As an optimisation, we use m_quickCheckTable to test // the lattice. As an optimisation, we use m_quickCheckTable to test
// for this and we don't begin a search if there are no paths to find. // for this and we don't begin a search if there are no paths to find.

View File

@ -6,7 +6,7 @@
#include "moses/Syntax/PHyperedge.h" #include "moses/Syntax/PHyperedge.h"
#include "TailLattice.h" #include "TailLattice.h"
#include "moses/TargetPhraseCollection.h"
namespace Moses namespace Moses
{ {
namespace Syntax namespace Syntax
@ -25,13 +25,14 @@ public:
, m_key(key) , m_key(key)
, m_ranges(ranges) {} , m_ranges(ranges) {}
void Search(const std::vector<int> &labels, const TargetPhraseCollection &tpc, void Search(const std::vector<int> &labels,
const TargetPhraseCollection::shared_ptr tpc,
Callback &callback) { Callback &callback) {
m_labels = &labels; m_labels = &labels;
m_matchCB = &callback; m_matchCB = &callback;
m_hyperedge.head = 0; m_hyperedge.head = 0;
m_hyperedge.tail.clear(); m_hyperedge.tail.clear();
m_hyperedge.label.translations = &tpc; m_hyperedge.label.translations = tpc;
SearchInner(0, 0, 0); SearchInner(0, 0, 0);
} }

View File

@ -28,9 +28,10 @@ public:
private: private:
friend class RuleTrieCreator; friend class RuleTrieCreator;
virtual TargetPhraseCollection &GetOrCreateTargetPhraseCollection( virtual TargetPhraseCollection::shared_ptr
const Phrase &source, const TargetPhrase &target, GetOrCreateTargetPhraseCollection(const Phrase &source,
const Word *sourceLHS) = 0; const TargetPhrase &target,
const Word *sourceLHS) = 0;
virtual void SortAndPrune(std::size_t) = 0; virtual void SortAndPrune(std::size_t) = 0;
}; };

View File

@ -33,7 +33,7 @@ void RuleTrieCYKPlus::Node::Prune(std::size_t tableLimit)
} }
// prune TargetPhraseCollection in this node // prune TargetPhraseCollection in this node
m_targetPhraseCollection.Prune(true, tableLimit); m_targetPhraseCollection->Prune(true, tableLimit);
} }
void RuleTrieCYKPlus::Node::Sort(std::size_t tableLimit) void RuleTrieCYKPlus::Node::Sort(std::size_t tableLimit)
@ -49,7 +49,7 @@ void RuleTrieCYKPlus::Node::Sort(std::size_t tableLimit)
} }
// prune TargetPhraseCollection in this node // prune TargetPhraseCollection in this node
m_targetPhraseCollection.Sort(true, tableLimit); m_targetPhraseCollection->Sort(true, tableLimit);
} }
RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetOrCreateChild( RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetOrCreateChild(
@ -86,8 +86,11 @@ const RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetNonTerminalChild(
return (p == m_nonTermMap.end()) ? NULL : &p->second; return (p == m_nonTermMap.end()) ? NULL : &p->second;
} }
TargetPhraseCollection &RuleTrieCYKPlus::GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS) RuleTrieCYKPlus::
GetOrCreateTargetPhraseCollection(const Phrase &source,
const TargetPhrase &target,
const Word *sourceLHS)
{ {
Node &currNode = GetOrCreateNode(source, target, sourceLHS); Node &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetTargetPhraseCollection(); return currNode.GetTargetPhraseCollection();

View File

@ -38,7 +38,7 @@ public:
} }
bool HasRules() const { bool HasRules() const {
return !m_targetPhraseCollection.IsEmpty(); return !m_targetPhraseCollection->IsEmpty();
} }
void Prune(std::size_t tableLimit); void Prune(std::size_t tableLimit);
@ -50,11 +50,13 @@ public:
const Node *GetChild(const Word &sourceTerm) const; const Node *GetChild(const Word &sourceTerm) const;
const Node *GetNonTerminalChild(const Word &targetNonTerm) const; const Node *GetNonTerminalChild(const Word &targetNonTerm) const;
const TargetPhraseCollection &GetTargetPhraseCollection() const { TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection() const {
return m_targetPhraseCollection; return m_targetPhraseCollection;
} }
TargetPhraseCollection &GetTargetPhraseCollection() { TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection() {
return m_targetPhraseCollection; return m_targetPhraseCollection;
} }
@ -66,10 +68,12 @@ public:
return m_nonTermMap; return m_nonTermMap;
} }
Node() : m_targetPhraseCollection(new TargetPhraseCollection) {}
private: private:
SymbolMap m_sourceTermMap; SymbolMap m_sourceTermMap;
SymbolMap m_nonTermMap; SymbolMap m_nonTermMap;
TargetPhraseCollection m_targetPhraseCollection; TargetPhraseCollection::shared_ptr m_targetPhraseCollection;
}; };
RuleTrieCYKPlus(const RuleTableFF *ff) : RuleTrie(ff) {} RuleTrieCYKPlus(const RuleTableFF *ff) : RuleTrie(ff) {}
@ -81,8 +85,9 @@ public:
bool HasPreterminalRule(const Word &) const; bool HasPreterminalRule(const Word &) const;
private: private:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS); GetOrCreateTargetPhraseCollection
(const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
Node &GetOrCreateNode(const Phrase &source, const TargetPhrase &target, Node &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
const Word *sourceLHS); const Word *sourceLHS);

View File

@ -21,8 +21,9 @@ protected:
// Provide access to RuleTrie's private GetOrCreateTargetPhraseCollection // Provide access to RuleTrie's private GetOrCreateTargetPhraseCollection
// function. // function.
TargetPhraseCollection &GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
RuleTrie &trie, const Phrase &source, const TargetPhrase &target, GetOrCreateTargetPhraseCollection
( RuleTrie &trie, const Phrase &source, const TargetPhrase &target,
const Word *sourceLHS) { const Word *sourceLHS) {
return trie.GetOrCreateTargetPhraseCollection(source, target, sourceLHS); return trie.GetOrCreateTargetPhraseCollection(source, target, sourceLHS);
} }

View File

@ -125,9 +125,10 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector); targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply()); targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr phraseColl
trie, sourcePhrase, *targetPhrase, sourceLHS); = GetOrCreateTargetPhraseCollection(trie, sourcePhrase,
phraseColl.Add(targetPhrase); *targetPhrase, sourceLHS);
phraseColl->Add(targetPhrase);
// not implemented correctly in memory pt. just delete it for now // not implemented correctly in memory pt. just delete it for now
delete sourceLHS; delete sourceLHS;

View File

@ -33,7 +33,7 @@ void RuleTrieScope3::Node::Prune(std::size_t tableLimit)
// Prune TargetPhraseCollections at this node. // Prune TargetPhraseCollections at this node.
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) { for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
p->second.Prune(true, tableLimit); p->second->Prune(true, tableLimit);
} }
} }
@ -50,7 +50,7 @@ void RuleTrieScope3::Node::Sort(std::size_t tableLimit)
// Sort TargetPhraseCollections at this node. // Sort TargetPhraseCollections at this node.
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) { for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
p->second.Sort(true, tableLimit); p->second->Sort(true, tableLimit);
} }
} }
@ -75,9 +75,10 @@ RuleTrieScope3::Node *RuleTrieScope3::Node::GetOrCreateNonTerminalChild(
return m_gapNode; return m_gapNode;
} }
TargetPhraseCollection & TargetPhraseCollection::shared_ptr
RuleTrieScope3::Node::GetOrCreateTargetPhraseCollection( RuleTrieScope3::
const TargetPhrase &target) Node::
GetOrCreateTargetPhraseCollection(const TargetPhrase &target)
{ {
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm(); const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
const std::size_t rank = alignmentInfo.GetSize(); const std::size_t rank = alignmentInfo.GetSize();
@ -94,12 +95,16 @@ RuleTrieScope3::Node::GetOrCreateTargetPhraseCollection(
const Word &targetNonTerm = target.GetWord(targetNonTermIndex); const Word &targetNonTerm = target.GetWord(targetNonTermIndex);
vec.push_back(InsertLabel(i++, targetNonTerm)); vec.push_back(InsertLabel(i++, targetNonTerm));
} }
TargetPhraseCollection::shared_ptr& ret = m_labelMap[vec];
return m_labelMap[vec]; if (!ret) ret.reset(new TargetPhraseCollection);
return ret;
} }
TargetPhraseCollection &RuleTrieScope3::GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS) RuleTrieScope3::
GetOrCreateTargetPhraseCollection(const Phrase &source,
const TargetPhrase &target,
const Word *sourceLHS)
{ {
Node &currNode = GetOrCreateNode(source, target, sourceLHS); Node &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetOrCreateTargetPhraseCollection(target); return currNode.GetOrCreateTargetPhraseCollection(target);

View File

@ -35,7 +35,7 @@ public:
SymbolEqualityPred> TerminalMap; SymbolEqualityPred> TerminalMap;
typedef boost::unordered_map<std::vector<int>, typedef boost::unordered_map<std::vector<int>,
TargetPhraseCollection> LabelMap; TargetPhraseCollection::shared_ptr> LabelMap;
~Node() { ~Node() {
delete m_gapNode; delete m_gapNode;
@ -61,8 +61,8 @@ public:
Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm); Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
TargetPhraseCollection &GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const TargetPhrase &); GetOrCreateTargetPhraseCollection(const TargetPhrase &);
bool IsLeaf() const { bool IsLeaf() const {
return m_terminalMap.empty() && m_gapNode == NULL; return m_terminalMap.empty() && m_gapNode == NULL;
@ -106,8 +106,10 @@ public:
bool HasPreterminalRule(const Word &) const; bool HasPreterminalRule(const Word &) const;
private: private:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS); GetOrCreateTargetPhraseCollection(const Phrase &source,
const TargetPhrase &target,
const Word *sourceLHS);
Node &GetOrCreateNode(const Phrase &source, const TargetPhrase &target, Node &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
const Word *sourceLHS); const Word *sourceLHS);

View File

@ -17,7 +17,7 @@ struct PVertex;
struct SHyperedgeBundle { struct SHyperedgeBundle {
float inputWeight; float inputWeight;
std::vector<const SVertexStack*> stacks; std::vector<const SVertexStack*> stacks;
const TargetPhraseCollection *translations; TargetPhraseCollection::shared_ptr translations;
friend void swap(SHyperedgeBundle &x, SHyperedgeBundle &y) { friend void swap(SHyperedgeBundle &x, SHyperedgeBundle &y) {
using std::swap; using std::swap;

View File

@ -17,9 +17,9 @@ void GlueRuleSynthesizer::SynthesizeRule(const InputTree::Node &node)
const Word &sourceLhs = node.pvertex.symbol; const Word &sourceLhs = node.pvertex.symbol;
boost::scoped_ptr<Phrase> sourceRhs(SynthesizeSourcePhrase(node)); boost::scoped_ptr<Phrase> sourceRhs(SynthesizeSourcePhrase(node));
TargetPhrase *tp = SynthesizeTargetPhrase(node, *sourceRhs); TargetPhrase *tp = SynthesizeTargetPhrase(node, *sourceRhs);
TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr tpc
m_ruleTrie, sourceLhs, *sourceRhs); = GetOrCreateTargetPhraseCollection(m_ruleTrie, sourceLhs, *sourceRhs);
tpc.Add(tp); tpc->Add(tp);
} }
Phrase *GlueRuleSynthesizer::SynthesizeSourcePhrase(const InputTree::Node &node) Phrase *GlueRuleSynthesizer::SynthesizeSourcePhrase(const InputTree::Node &node)

View File

@ -48,11 +48,11 @@ public:
const Node *GetChild(const HyperPath::NodeSeq &) const; const Node *GetChild(const HyperPath::NodeSeq &) const;
const TargetPhraseCollection &GetTargetPhraseCollection() const const TargetPhraseCollection::shared_ptr GetTargetPhraseCollection() const
return m_targetPhraseCollection; return m_targetPhraseCollection;
} }
TargetPhraseCollection &GetTargetPhraseCollection() TargetPhraseCollection::shared_ptr GetTargetPhraseCollection()
return m_targetPhraseCollection; return m_targetPhraseCollection;
} }
@ -76,7 +76,7 @@ const Node &GetRootNode() const
private: private:
friend class RuleTrieCreator; friend class RuleTrieCreator;
TargetPhraseCollection &GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(
const Word &sourceLHS, const Phrase &sourceRHS); const Word &sourceLHS, const Phrase &sourceRHS);
Node &GetOrCreateNode(const Phrase &sourceRHS); Node &GetOrCreateNode(const Phrase &sourceRHS);

View File

@ -61,7 +61,7 @@ void RuleMatcherSCFG<Callback>::Match(const InputTree::Node &inNode,
if (candidate.pvertex.span.GetEndPos() == inNode.pvertex.span.GetEndPos()) { if (candidate.pvertex.span.GetEndPos() == inNode.pvertex.span.GetEndPos()) {
// Check if the trie node has any rules with a LHS that match inNode. // Check if the trie node has any rules with a LHS that match inNode.
const Word &lhs = inNode.pvertex.symbol; const Word &lhs = inNode.pvertex.symbol;
const TargetPhraseCollection *tpc = TargetPhraseCollection::shared_ptr tpc =
newTrieNode.GetTargetPhraseCollection(lhs); newTrieNode.GetTargetPhraseCollection(lhs);
if (tpc) { if (tpc) {
m_hyperedge.label.translations = tpc; m_hyperedge.label.translations = tpc;

View File

@ -35,7 +35,7 @@ void RuleTrie::Node::Prune(std::size_t tableLimit)
// Prune TargetPhraseCollections at this node. // Prune TargetPhraseCollections at this node.
for (TPCMap::iterator p = m_targetPhraseCollections.begin(); for (TPCMap::iterator p = m_targetPhraseCollections.begin();
p != m_targetPhraseCollections.end(); ++p) { p != m_targetPhraseCollections.end(); ++p) {
p->second.Prune(true, tableLimit); p->second->Prune(true, tableLimit);
} }
} }
@ -54,17 +54,21 @@ void RuleTrie::Node::Sort(std::size_t tableLimit)
// Sort TargetPhraseCollections at this node. // Sort TargetPhraseCollections at this node.
for (TPCMap::iterator p = m_targetPhraseCollections.begin(); for (TPCMap::iterator p = m_targetPhraseCollections.begin();
p != m_targetPhraseCollections.end(); ++p) { p != m_targetPhraseCollections.end(); ++p) {
p->second.Sort(true, tableLimit); p->second->Sort(true, tableLimit);
} }
} }
RuleTrie::Node *RuleTrie::Node::GetOrCreateChild( RuleTrie::Node*
const Word &sourceTerm) RuleTrie::Node::
GetOrCreateChild(const Word &sourceTerm)
{ {
return &m_sourceTermMap[sourceTerm]; return &m_sourceTermMap[sourceTerm];
} }
RuleTrie::Node *RuleTrie::Node::GetOrCreateNonTerminalChild(const Word &targetNonTerm) RuleTrie::Node *
RuleTrie::
Node::
GetOrCreateNonTerminalChild(const Word &targetNonTerm)
{ {
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(), UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
"Not a non-terminal: " << targetNonTerm); "Not a non-terminal: " << targetNonTerm);
@ -72,42 +76,52 @@ RuleTrie::Node *RuleTrie::Node::GetOrCreateNonTerminalChild(const Word &targetNo
return &m_nonTermMap[targetNonTerm]; return &m_nonTermMap[targetNonTerm];
} }
TargetPhraseCollection &RuleTrie::Node::GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const Word &sourceLHS) RuleTrie::
Node::
GetOrCreateTargetPhraseCollection(const Word &sourceLHS)
{ {
UTIL_THROW_IF2(!sourceLHS.IsNonTerminal(), UTIL_THROW_IF2(!sourceLHS.IsNonTerminal(),
"Not a non-terminal: " << sourceLHS); "Not a non-terminal: " << sourceLHS);
return m_targetPhraseCollections[sourceLHS]; TargetPhraseCollection::shared_ptr& foo
= m_targetPhraseCollections[sourceLHS];
if (!foo) foo.reset(new TargetPhraseCollection);
return foo;
} }
const RuleTrie::Node *RuleTrie::Node::GetChild( RuleTrie::Node const*
const Word &sourceTerm) const RuleTrie::
Node::
GetChild(const Word &sourceTerm) const
{ {
UTIL_THROW_IF2(sourceTerm.IsNonTerminal(), UTIL_THROW_IF2(sourceTerm.IsNonTerminal(), "Not a terminal: " << sourceTerm);
"Not a terminal: " << sourceTerm);
SymbolMap::const_iterator p = m_sourceTermMap.find(sourceTerm); SymbolMap::const_iterator p = m_sourceTermMap.find(sourceTerm);
return (p == m_sourceTermMap.end()) ? NULL : &p->second; return (p == m_sourceTermMap.end()) ? NULL : &p->second;
} }
const RuleTrie::Node *RuleTrie::Node::GetNonTerminalChild( RuleTrie::Node const*
const Word &targetNonTerm) const RuleTrie::
Node::
GetNonTerminalChild(const Word &targetNonTerm) const
{ {
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(), UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
"Not a non-terminal: " << targetNonTerm); "Not a non-terminal: " << targetNonTerm);
SymbolMap::const_iterator p = m_nonTermMap.find(targetNonTerm); SymbolMap::const_iterator p = m_nonTermMap.find(targetNonTerm);
return (p == m_nonTermMap.end()) ? NULL : &p->second; return (p == m_nonTermMap.end()) ? NULL : &p->second;
} }
TargetPhraseCollection &RuleTrie::GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const Word &sourceLHS, const Phrase &sourceRHS) RuleTrie::
GetOrCreateTargetPhraseCollection
( const Word &sourceLHS, const Phrase &sourceRHS )
{ {
Node &currNode = GetOrCreateNode(sourceRHS); Node &currNode = GetOrCreateNode(sourceRHS);
return currNode.GetOrCreateTargetPhraseCollection(sourceLHS); return currNode.GetOrCreateTargetPhraseCollection(sourceLHS);
} }
RuleTrie::Node &RuleTrie::GetOrCreateNode(const Phrase &sourceRHS) RuleTrie::Node &
RuleTrie::
GetOrCreateNode(const Phrase &sourceRHS)
{ {
const std::size_t size = sourceRHS.GetSize(); const std::size_t size = sourceRHS.GetSize();

View File

@ -32,7 +32,7 @@ public:
typedef boost::unordered_map<Word, Node, SymbolHasher, typedef boost::unordered_map<Word, Node, SymbolHasher,
SymbolEqualityPred> SymbolMap; SymbolEqualityPred> SymbolMap;
typedef boost::unordered_map<Word, TargetPhraseCollection, typedef boost::unordered_map<Word, TargetPhraseCollection::shared_ptr,
SymbolHasher, SymbolEqualityPred> TPCMap; SymbolHasher, SymbolEqualityPred> TPCMap;
bool IsLeaf() const { bool IsLeaf() const {
@ -48,15 +48,18 @@ public:
Node *GetOrCreateChild(const Word &sourceTerm); Node *GetOrCreateChild(const Word &sourceTerm);
Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm); Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const Word &); TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(const Word &);
const Node *GetChild(const Word &sourceTerm) const; const Node *GetChild(const Word &sourceTerm) const;
const Node *GetNonTerminalChild(const Word &targetNonTerm) const; const Node *GetNonTerminalChild(const Word &targetNonTerm) const;
const TargetPhraseCollection *GetTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const Word &sourceLHS) const { GetTargetPhraseCollection(const Word &sourceLHS) const {
TPCMap::const_iterator p = m_targetPhraseCollections.find(sourceLHS); TPCMap::const_iterator p = m_targetPhraseCollections.find(sourceLHS);
return p == m_targetPhraseCollections.end() ? 0 : &(p->second); if (p != m_targetPhraseCollections.end())
return p->second;
else
return TargetPhraseCollection::shared_ptr();
} }
// FIXME IS there any reason to distinguish these two for T2S? // FIXME IS there any reason to distinguish these two for T2S?
@ -83,8 +86,9 @@ public:
private: private:
friend class RuleTrieCreator; friend class RuleTrieCreator;
TargetPhraseCollection &GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const Word &sourceLHS, const Phrase &sourceRHS); GetOrCreateTargetPhraseCollection
(const Word &sourceLHS, const Phrase &sourceRHS);
Node &GetOrCreateNode(const Phrase &sourceRHS); Node &GetOrCreateNode(const Phrase &sourceRHS);

View File

@ -21,7 +21,7 @@ protected:
// Provide access to RuleTrie's private // Provide access to RuleTrie's private
// GetOrCreateTargetPhraseCollection function. // GetOrCreateTargetPhraseCollection function.
TargetPhraseCollection &GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(
RuleTrie &trie, const Word &sourceLHS, const Phrase &sourceRHS) { RuleTrie &trie, const Word &sourceLHS, const Phrase &sourceRHS) {
return trie.GetOrCreateTargetPhraseCollection(sourceLHS, sourceRHS); return trie.GetOrCreateTargetPhraseCollection(sourceLHS, sourceRHS);
} }

View File

@ -55,7 +55,9 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
std::vector<float> scoreVector; std::vector<float> scoreVector;
StringPiece line; StringPiece line;
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan"); int noflags = double_conversion::StringToDoubleConverter::NO_FLAGS;
double_conversion::StringToDoubleConverter
converter(noflags, NAN, NAN, "inf", "nan");
while(true) { while(true) {
try { try {
@ -132,9 +134,9 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector); targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply()); targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr phraseColl
trie, *sourceLHS, sourcePhrase); = GetOrCreateTargetPhraseCollection(trie, *sourceLHS, sourcePhrase);
phraseColl.Add(targetPhrase); phraseColl->Add(targetPhrase);
// not implemented correctly in memory pt. just delete it for now // not implemented correctly in memory pt. just delete it for now
delete sourceLHS; delete sourceLHS;

View File

@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <iostream> #include <iostream>
#include "TargetPhrase.h" #include "TargetPhrase.h"
#include "Util.h" #include "Util.h"
#include <boost/shared_ptr.hpp>
namespace Moses namespace Moses
{ {
@ -43,6 +44,8 @@ public:
// iters // iters
typedef CollType::iterator iterator; typedef CollType::iterator iterator;
typedef CollType::const_iterator const_iterator; typedef CollType::const_iterator const_iterator;
typedef boost::shared_ptr<TargetPhraseCollection> shared_ptr;
typedef boost::shared_ptr<TargetPhraseCollection const> shared_const_ptr;
TargetPhrase const* TargetPhrase const*
operator[](size_t const i) const { operator[](size_t const i) const {
@ -127,6 +130,9 @@ protected:
std::vector<Phrase> m_sourcePhrases; std::vector<Phrase> m_sourcePhrases;
public: public:
typedef boost::shared_ptr<TargetPhraseCollectionWithSourcePhrase> shared_ptr;
typedef boost::shared_ptr<TargetPhraseCollectionWithSourcePhrase const> shared_const_ptr;
const std::vector<Phrase> &GetSourcePhrases() const { const std::vector<Phrase> &GetSourcePhrases() const {
return m_sourcePhrases; return m_sourcePhrases;
} }

View File

@ -167,10 +167,10 @@ void ChartRuleLookupManagerMemory::AddAndExtend(
size_t endPos) size_t endPos)
{ {
const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection(); TargetPhraseCollection::shared_ptr tpc = node->GetTargetPhraseCollection();
// add target phrase collection (except if rule is empty or a unary non-terminal rule) // add target phrase collection (except if rule is empty or a unary non-terminal rule)
if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) { if (!tpc->IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl); m_completedRules[endPos].Add(*tpc, m_stackVec, m_stackScores, *m_outColl);
} }
// get all further extensions of rule (until reaching end of sentence or max-chart-span) // get all further extensions of rule (until reaching end of sentence or max-chart-span)

View File

@ -167,10 +167,11 @@ void ChartRuleLookupManagerMemoryPerSentence::AddAndExtend(
size_t endPos) size_t endPos)
{ {
const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection(); TargetPhraseCollection::shared_ptr tpc
= node->GetTargetPhraseCollection();
// add target phrase collection (except if rule is empty or a unary non-terminal rule) // add target phrase collection (except if rule is empty or a unary non-terminal rule)
if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) { if (!tpc->IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl); m_completedRules[endPos].Add(*tpc, m_stackVec, m_stackScores, *m_outColl);
} }
// get all further extensions of rule (until reaching end of sentence or max-chart-span) // get all further extensions of rule (until reaching end of sentence or max-chart-span)

View File

@ -64,11 +64,12 @@ ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk(
ChartRuleLookupManagerOnDisk::~ChartRuleLookupManagerOnDisk() ChartRuleLookupManagerOnDisk::~ChartRuleLookupManagerOnDisk()
{ {
std::map<uint64_t, const TargetPhraseCollection*>::const_iterator iterCache; // not needed any more due to the switch to shared pointers
for (iterCache = m_cache.begin(); iterCache != m_cache.end(); ++iterCache) { // std::map<uint64_t, TargetPhraseCollection::shared_ptr >::const_iterator iterCache;
delete iterCache->second; // for (iterCache = m_cache.begin(); iterCache != m_cache.end(); ++iterCache) {
} // iterCache->second.reset();
m_cache.clear(); // }
// m_cache.clear();
RemoveAllInColl(m_expandableDottedRuleListVec); RemoveAllInColl(m_expandableDottedRuleListVec);
RemoveAllInColl(m_sourcePhraseNode); RemoveAllInColl(m_sourcePhraseNode);
@ -236,14 +237,16 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
if (sourceLHSBerkeleyDb == NULL) if (sourceLHSBerkeleyDb == NULL)
continue; continue;
const TargetPhraseCollection *targetPhraseCollection = NULL; TargetPhraseCollection::shared_ptr targetPhraseCollection;
const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper); const OnDiskPt::PhraseNode *node
= prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper);
if (node) { if (node) {
uint64_t tpCollFilePos = node->GetValue(); uint64_t tpCollFilePos = node->GetValue();
std::map<uint64_t, const TargetPhraseCollection*>::const_iterator iterCache = m_cache.find(tpCollFilePos); std::map<uint64_t, TargetPhraseCollection::shared_ptr >::const_iterator iterCache = m_cache.find(tpCollFilePos);
if (iterCache == m_cache.end()) { if (iterCache == m_cache.end()) {
const OnDiskPt::TargetPhraseCollection *tpcollBerkeleyDb = node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper); OnDiskPt::TargetPhraseCollection::shared_ptr tpcollBerkeleyDb
= node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper);
std::vector<float> weightT = staticData.GetWeights(&m_dictionary); std::vector<float> weightT = staticData.GetWeights(&m_dictionary);
targetPhraseCollection targetPhraseCollection
@ -254,7 +257,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
,m_dbWrapper.GetVocab() ,m_dbWrapper.GetVocab()
,true); ,true);
delete tpcollBerkeleyDb; tpcollBerkeleyDb.reset();
m_cache[tpCollFilePos] = targetPhraseCollection; m_cache[tpCollFilePos] = targetPhraseCollection;
} else { } else {
// just get out of cache // just get out of cache

View File

@ -55,7 +55,7 @@ private:
const std::vector<FactorType> &m_inputFactorsVec; const std::vector<FactorType> &m_inputFactorsVec;
const std::vector<FactorType> &m_outputFactorsVec; const std::vector<FactorType> &m_outputFactorsVec;
std::vector<DottedRuleStackOnDisk*> m_expandableDottedRuleListVec; std::vector<DottedRuleStackOnDisk*> m_expandableDottedRuleListVec;
std::map<uint64_t, const TargetPhraseCollection*> m_cache; std::map<uint64_t, TargetPhraseCollection::shared_ptr > m_cache;
std::list<const OnDiskPt::PhraseNode*> m_sourcePhraseNode; std::list<const OnDiskPt::PhraseNode*> m_sourcePhraseNode;
}; };

View File

@ -48,7 +48,7 @@ ChartRuleLookupManagerSkeleton::ChartRuleLookupManagerSkeleton(
ChartRuleLookupManagerSkeleton::~ChartRuleLookupManagerSkeleton() ChartRuleLookupManagerSkeleton::~ChartRuleLookupManagerSkeleton()
{ {
RemoveAllInColl(m_tpColl); // RemoveAllInColl(m_tpColl);
} }
void ChartRuleLookupManagerSkeleton::GetChartRuleCollection( void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
@ -58,7 +58,7 @@ void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
{ {
//m_tpColl.push_back(TargetPhraseCollection()); //m_tpColl.push_back(TargetPhraseCollection());
//TargetPhraseCollection &tpColl = m_tpColl.back(); //TargetPhraseCollection &tpColl = m_tpColl.back();
TargetPhraseCollection *tpColl = new TargetPhraseCollection(); TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection);
m_tpColl.push_back(tpColl); m_tpColl.push_back(tpColl);
const WordsRange &range = inputPath.GetWordsRange(); const WordsRange &range = inputPath.GetWordsRange();
@ -73,7 +73,9 @@ void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
outColl.Add(*tpColl, m_stackVec, range); outColl.Add(*tpColl, m_stackVec, range);
} }
TargetPhrase *ChartRuleLookupManagerSkeleton::CreateTargetPhrase(const Word &sourceWord) const TargetPhrase *
ChartRuleLookupManagerSkeleton::
CreateTargetPhrase(const Word &sourceWord) const
{ {
// create a target phrase from the 1st word of the source, prefix with 'ChartManagerSkeleton:' // create a target phrase from the 1st word of the source, prefix with 'ChartManagerSkeleton:'
string str = sourceWord.GetFactor(0)->GetString().as_string(); string str = sourceWord.GetFactor(0)->GetString().as_string();

View File

@ -49,7 +49,7 @@ private:
TargetPhrase *CreateTargetPhrase(const Word &sourceWord) const; TargetPhrase *CreateTargetPhrase(const Word &sourceWord) const;
StackVec m_stackVec; StackVec m_stackVec;
std::vector<TargetPhraseCollection*> m_tpColl; std::vector<TargetPhraseCollection::shared_ptr > m_tpColl;
const SkeletonPT &m_skeletonPT; const SkeletonPT &m_skeletonPT;
}; };

View File

@ -119,4 +119,4 @@ private:
} // namespace Moses } // namespace Moses
#endif #endif

View File

@ -107,14 +107,15 @@ void PhraseDictionaryCompact::Load()
// } // }
// }; // };
const TargetPhraseCollection* TargetPhraseCollection::shared_ptr
PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &sourcePhrase) const PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &sourcePhrase) const
{ {
TargetPhraseCollection::shared_ptr ret;
// There is no souch source phrase if source phrase is longer than longest // There is no souch source phrase if source phrase is longer than longest
// observed source phrase during compilation // observed source phrase during compilation
if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength()) if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
return NULL; return ret;
// Retrieve target phrase collection from phrase table // Retrieve target phrase collection from phrase table
TargetPhraseVectorPtr decodedPhraseColl TargetPhraseVectorPtr decodedPhraseColl
@ -122,7 +123,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &s
if(decodedPhraseColl != NULL && decodedPhraseColl->size()) { if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl)); TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
TargetPhraseCollection* phraseColl = new TargetPhraseCollection(); TargetPhraseCollection::shared_ptr phraseColl(new TargetPhraseCollection);
// Score phrases and if possible apply ttable_limit // Score phrases and if possible apply ttable_limit
TargetPhraseVector::iterator nth = TargetPhraseVector::iterator nth =
@ -139,7 +140,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &s
return phraseColl; return phraseColl;
} else } else
return NULL; return ret;
} }
TargetPhraseVectorPtr TargetPhraseVectorPtr
@ -163,7 +164,7 @@ PhraseDictionaryCompact::~PhraseDictionaryCompact()
//TO_STRING_BODY(PhraseDictionaryCompact) //TO_STRING_BODY(PhraseDictionaryCompact)
void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection* tpc) void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
{ {
if(!m_sentenceCache.get()) if(!m_sentenceCache.get())
m_sentenceCache.reset(new PhraseCache()); m_sentenceCache.reset(new PhraseCache());
@ -179,12 +180,13 @@ void PhraseDictionaryCompact::CleanUpAfterSentenceProcessing(const InputType &so
m_sentenceCache.reset(new PhraseCache()); m_sentenceCache.reset(new PhraseCache());
m_phraseDecoder->PruneCache(); m_phraseDecoder->PruneCache();
for(PhraseCache::iterator it = m_sentenceCache->begin(); // for(PhraseCache::iterator it = m_sentenceCache->begin();
it != m_sentenceCache->end(); it++) // it != m_sentenceCache->end(); it++)
delete *it; // it->reset();
PhraseCache temp; // PhraseCache temp;
temp.swap(*m_sentenceCache); // temp.swap(*m_sentenceCache);
m_sentenceCache->clear();
ReduceCache(); ReduceCache();
} }

View File

@ -51,7 +51,7 @@ protected:
bool m_inMemory; bool m_inMemory;
bool m_useAlignmentInfo; bool m_useAlignmentInfo;
typedef std::vector<TargetPhraseCollection*> PhraseCache; typedef std::vector<TargetPhraseCollection::shared_ptr > PhraseCache;
typedef boost::thread_specific_ptr<PhraseCache> SentenceCache; typedef boost::thread_specific_ptr<PhraseCache> SentenceCache;
static SentenceCache m_sentenceCache; static SentenceCache m_sentenceCache;
@ -69,12 +69,12 @@ public:
void Load(); void Load();
const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const; TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const;
TargetPhraseVectorPtr GetTargetPhraseCollectionRaw(const Phrase &source) const; TargetPhraseVectorPtr GetTargetPhraseCollectionRaw(const Phrase &source) const;
void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase); void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase);
void CacheForCleanup(TargetPhraseCollection* tpc); void CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
void CleanUpAfterSentenceProcessing(const InputType &source); void CleanUpAfterSentenceProcessing(const InputType &source);
virtual ChartRuleLookupManager *CreateRuleLookupManager( virtual ChartRuleLookupManager *CreateRuleLookupManager(

View File

@ -35,14 +35,15 @@ namespace Moses
{ {
std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl; std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl;
CacheColl::~CacheColl() // CacheColl::~CacheColl()
{ // {
for (iterator iter = begin(); iter != end(); ++iter) { // // not needed any more since the switch to shared pointers
std::pair<const TargetPhraseCollection*, clock_t> &key = iter->second; // // for (iterator iter = begin(); iter != end(); ++iter) {
const TargetPhraseCollection *tps = key.first; // // std::pair<TargetPhraseCollection::shared_ptr , clock_t> &key = iter->second;
delete tps; // // TargetPhraseCollection::shared_ptr tps = key.first;
} // // delete tps;
} // // }
// }
PhraseDictionary::PhraseDictionary(const std::string &line, bool registerNow) PhraseDictionary::PhraseDictionary(const std::string &line, bool registerNow)
: DecodeFeature(line, registerNow) : DecodeFeature(line, registerNow)
@ -60,9 +61,12 @@ ProvidesPrefixCheck() const
return false; return false;
} }
const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(const Phrase& src) const TargetPhraseCollection::shared_ptr
PhraseDictionary::
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
{ {
const TargetPhraseCollection *ret; TargetPhraseCollection::shared_ptr ret;
typedef std::pair<TargetPhraseCollection::shared_ptr , clock_t> entry;
if (m_maxCacheSize) { if (m_maxCacheSize) {
CacheColl &cache = GetCache(); CacheColl &cache = GetCache();
@ -74,18 +78,14 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(
if (iter == cache.end()) { if (iter == cache.end()) {
// not in cache, need to look up from phrase table // not in cache, need to look up from phrase table
ret = GetTargetPhraseCollectionNonCacheLEGACY(src); ret = GetTargetPhraseCollectionNonCacheLEGACY(src);
if (ret) { if (ret) { // make a copy
ret = new TargetPhraseCollection(*ret); ret.reset(new TargetPhraseCollection(*ret));
} }
cache[hash] = entry(ret, clock());
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
cache[hash] = value;
} else { } else {
// in cache. just use it // in cache. just use it
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second; iter->second.second = clock();
value.second = clock(); ret = iter->second.first;
ret = value.first;
} }
} else { } else {
// don't use cache. look up from phrase table // don't use cache. look up from phrase table
@ -95,7 +95,7 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(
return ret; return ret;
} }
TargetPhraseCollection const * TargetPhraseCollection::shared_ptr
PhraseDictionary:: PhraseDictionary::
GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const
{ {
@ -103,7 +103,7 @@ GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const
} }
TargetPhraseCollectionWithSourcePhrase const* TargetPhraseCollectionWithSourcePhrase::shared_ptr
PhraseDictionary:: PhraseDictionary::
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const
{ {
@ -140,14 +140,14 @@ SetFeaturesToApply()
} }
// tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more // // tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more
void // void
PhraseDictionary:: // PhraseDictionary::
Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const // Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const
{ // {
// do nothing by default // // do nothing by default
return; // return;
} // }
bool bool
PhraseDictionary:: PhraseDictionary::
@ -170,7 +170,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
} }
const Phrase &phrase = inputPath.GetPhrase(); const Phrase &phrase = inputPath.GetPhrase();
const TargetPhraseCollection *targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase); TargetPhraseCollection::shared_ptr targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase);
inputPath.SetTargetPhrases(*this, targetPhrases, NULL); inputPath.SetTargetPhrases(*this, targetPhrases, NULL);
} }
} }
@ -180,7 +180,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
//void PhraseDictionary::SaveCache() const //void PhraseDictionary::SaveCache() const
//{ //{
// CacheColl &cache = GetCache(); // CacheColl &cache = GetCache();
// for( std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter, // for( std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iter,
// iter != cache.end(), // iter != cache.end(),
// iter++ ) { // iter++ ) {
// //
@ -191,10 +191,10 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
//void PhraseDictionary::LoadCache() const //void PhraseDictionary::LoadCache() const
//{ //{
// CacheColl &cache = GetCache(); // CacheColl &cache = GetCache();
// std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter; // std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iter;
// iter = cache.begin(); // iter = cache.begin();
// while( iter != cache.end() ) { // while( iter != cache.end() ) {
// std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iterRemove = iter++; // std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iterRemove = iter++;
// delete iterRemove->second.first; // delete iterRemove->second.first;
// cache.erase(iterRemove); // cache.erase(iterRemove);
// } // }
@ -225,11 +225,12 @@ void PhraseDictionary::ReduceCache() const
while( iter != cache.end() ) { while( iter != cache.end() ) {
if (iter->second.second < cutoffLastUsedTime) { if (iter->second.second < cutoffLastUsedTime) {
CacheColl::iterator iterRemove = iter++; CacheColl::iterator iterRemove = iter++;
delete iterRemove->second.first; // delete iterRemove->second.first;
cache.erase(iterRemove); cache.erase(iterRemove);
} else iter++; } else iter++;
} }
VERBOSE(2,"Reduced persistent translation option cache in " << reduceCacheTime << " seconds." << std::endl); VERBOSE(2,"Reduced persistent translation option cache in "
<< reduceCacheTime << " seconds." << std::endl);
} }
CacheColl &PhraseDictionary::GetCache() const CacheColl &PhraseDictionary::GetCache() const
@ -265,8 +266,8 @@ bool PhraseDictionary::SatisfyBackoff(const InputPath &inputPath) const
// lookup translation only if no other translations // lookup translation only if no other translations
InputPath::TargetPhrases::const_iterator iter; InputPath::TargetPhrases::const_iterator iter;
for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) { for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) {
const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second; const std::pair<TargetPhraseCollection::shared_ptr , const void*> &temp = iter->second;
const TargetPhraseCollection *tpCollPrev = temp.first; TargetPhraseCollection::shared_ptr tpCollPrev = temp.first;
if (tpCollPrev && tpCollPrev->GetSize()) { if (tpCollPrev && tpCollPrev->GetSize()) {
// already have translation from another pt. Don't create translations // already have translation from another pt. Don't create translations

View File

@ -55,15 +55,18 @@ class ChartCellCollectionBase;
class ChartRuleLookupManager; class ChartRuleLookupManager;
class ChartParser; class ChartParser;
class CacheColl : public boost::unordered_map<size_t, std::pair<const TargetPhraseCollection*, clock_t> > // typedef std::pair<TargetPhraseCollection::shared_ptr, clock_t> TPCollLastUse;
{ typedef std::pair<TargetPhraseCollection::shared_ptr, clock_t> CacheCollEntry;
// 1st = hash of source phrase/ address of phrase-table node typedef boost::unordered_map<size_t, CacheCollEntry> CacheColl;
// 2nd = all translations // class CacheColl : public boost::unordered_map<size_t, TPCollLastUse>
// 3rd = time of last access // {
// // 1st = hash of source phrase/ address of phrase-table node
// // 2nd = all translations
// // 3rd = time of last access
public: // public:
~CacheColl(); // ~CacheColl();
}; // };
/** /**
* Abstract base class for phrase dictionaries (tables). * Abstract base class for phrase dictionaries (tables).
@ -95,9 +98,9 @@ public:
return m_id; return m_id;
} }
virtual // virtual
void // void
Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const; // Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const;
/// return true if phrase table entries starting with /phrase/ /// return true if phrase table entries starting with /phrase/
// exist in the table. // exist in the table.
@ -111,24 +114,23 @@ public:
//! find list of translations that can translates src. Only for phrase input //! find list of translations that can translates src. Only for phrase input
public: public:
virtual virtual TargetPhraseCollection::shared_ptr
TargetPhraseCollection const *
GetTargetPhraseCollectionLEGACY(const Phrase& src) const; GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
virtual virtual TargetPhraseCollection::shared_ptr
TargetPhraseCollection const * GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask,
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const { Phrase const& src) const
{
return GetTargetPhraseCollectionLEGACY(src); return GetTargetPhraseCollectionLEGACY(src);
} }
virtual virtual void
void
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
virtual virtual void
void GetTargetPhraseCollectionBatch
GetTargetPhraseCollectionBatch(ttasksptr const& ttask, (ttasksptr const& ttask, InputPathList const& inputPathQueue) const
const InputPathList &inputPathQueue) const { {
GetTargetPhraseCollectionBatch(inputPathQueue); GetTargetPhraseCollectionBatch(inputPathQueue);
} }
@ -157,7 +159,9 @@ public:
// LEGACY // LEGACY
//! find list of translations that can translates a portion of src. Used by confusion network decoding //! find list of translations that can translates a portion of src. Used by confusion network decoding
virtual const TargetPhraseCollectionWithSourcePhrase* GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const; virtual
TargetPhraseCollectionWithSourcePhrase::shared_ptr
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const;
protected: protected:
static std::vector<PhraseDictionary*> s_staticColl; static std::vector<PhraseDictionary*> s_staticColl;
@ -184,7 +188,10 @@ protected:
mutable boost::scoped_ptr<CacheColl> m_cache; mutable boost::scoped_ptr<CacheColl> m_cache;
#endif #endif
virtual const TargetPhraseCollection *GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const; virtual
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const;
void ReduceCache() const; void ReduceCache() const;
protected: protected:

View File

@ -150,15 +150,15 @@ void PhraseDictionaryDynamicCacheBased::InitializeForInput(ttasksptr const& ttas
ReduceCache(); ReduceCache();
} }
const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const
{ {
#ifdef WITH_THREADS #ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock); boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif #endif
TargetPhraseCollection* tpc = NULL; TargetPhraseCollection::shared_ptr tpc;
cacheMap::const_iterator it = m_cacheTM.find(source); cacheMap::const_iterator it = m_cacheTM.find(source);
if(it != m_cacheTM.end()) { if(it != m_cacheTM.end()) {
tpc = new TargetPhraseCollection(*(it->second).first); tpc.reset(new TargetPhraseCollection(*(it->second).first));
std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin(); std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
@ -174,15 +174,15 @@ const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhrase
return tpc; return tpc;
} }
const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionLEGACY(Phrase const &src) const TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionLEGACY(Phrase const &src) const
{ {
const TargetPhraseCollection *ret = GetTargetPhraseCollection(src); TargetPhraseCollection::shared_ptr ret = GetTargetPhraseCollection(src);
return ret; return ret;
} }
const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
{ {
const TargetPhraseCollection *ret = GetTargetPhraseCollection(src); TargetPhraseCollection::shared_ptr ret = GetTargetPhraseCollection(src);
return ret; return ret;
} }
@ -366,7 +366,7 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)
// and then add new entry // and then add new entry
TargetCollectionAgePair TgtCollAgePair = it->second; TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first; TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second; AgeCollection* ac = TgtCollAgePair.second;
const Phrase* p_ptr = NULL; const Phrase* p_ptr = NULL;
TargetPhrase* tp_ptr = NULL; TargetPhrase* tp_ptr = NULL;
@ -397,7 +397,7 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)
if (tpc->GetSize() == 0) { if (tpc->GetSize() == 0) {
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
ac->clear(); ac->clear();
delete tpc; tpc.reset();
delete ac; delete ac;
m_cacheTM.erase(sp); m_cacheTM.erase(sp);
} }
@ -451,14 +451,14 @@ void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp)
//sp is found //sp is found
TargetCollectionAgePair TgtCollAgePair = it->second; TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first; TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second; AgeCollection* ac = TgtCollAgePair.second;
m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
ac->clear(); ac->clear();
delete tpc; tpc.reset();
delete ac; delete ac;
m_cacheTM.erase(sp); m_cacheTM.erase(sp);
} else { } else {
@ -558,7 +558,7 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a
// and then add new entry // and then add new entry
TargetCollectionAgePair TgtCollAgePair = it->second; TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first; TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second; AgeCollection* ac = TgtCollAgePair.second;
// const TargetPhrase* p_ptr = NULL; // const TargetPhrase* p_ptr = NULL;
const Phrase* p_ptr = NULL; const Phrase* p_ptr = NULL;
@ -599,7 +599,7 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a
// create target collection // create target collection
// we have to create new target collection age pair and add new entry to target collection age pair // we have to create new target collection age pair and add new entry to target collection age pair
TargetPhraseCollection* tpc = new TargetPhraseCollection(); TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
AgeCollection* ac = new AgeCollection(); AgeCollection* ac = new AgeCollection();
m_cacheTM.insert(make_pair(sp,make_pair(tpc,ac))); m_cacheTM.insert(make_pair(sp,make_pair(tpc,ac)));
@ -629,13 +629,13 @@ void PhraseDictionaryDynamicCacheBased::Decay()
void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp)
{ {
VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) sp:|" << sp << "|" << std::endl); VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) sp:|" << sp << "|" << std::endl);
cacheMap::const_iterator it = m_cacheTM.find(sp); cacheMap::iterator it = m_cacheTM.find(sp);
if (it != m_cacheTM.end()) { if (it != m_cacheTM.end()) {
VERBOSE(3,"found:|" << sp << "|" << std::endl); VERBOSE(3,"found:|" << sp << "|" << std::endl);
//sp is found //sp is found
TargetCollectionAgePair TgtCollAgePair = it->second; TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first; TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second; AgeCollection* ac = TgtCollAgePair.second;
//loop in inverted order to allow a correct deletion of std::vectors tpc and ac //loop in inverted order to allow a correct deletion of std::vectors tpc and ac
@ -661,7 +661,7 @@ void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp)
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
(((*it).second).second)->clear(); (((*it).second).second)->clear();
delete ((*it).second).second; delete ((*it).second).second;
delete ((*it).second).first; ((*it).second).first.reset();
m_cacheTM.erase(sp); m_cacheTM.erase(sp);
} }
} else { } else {
@ -703,11 +703,11 @@ void PhraseDictionaryDynamicCacheBased::Clear()
#ifdef WITH_THREADS #ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock); boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif #endif
cacheMap::const_iterator it; cacheMap::iterator it;
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) { for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
(((*it).second).second)->clear(); (((*it).second).second)->clear();
delete ((*it).second).second; delete ((*it).second).second;
delete ((*it).second).first; ((*it).second).first.reset();
} }
m_cacheTM.clear(); m_cacheTM.clear();
m_entries = 0; m_entries = 0;
@ -746,7 +746,7 @@ void PhraseDictionaryDynamicCacheBased::Print() const
cacheMap::const_iterator it; cacheMap::const_iterator it;
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) { for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
std::string source = (it->first).ToString(); std::string source = (it->first).ToString();
TargetPhraseCollection* tpc = (it->second).first; TargetPhraseCollection::shared_ptr tpc = (it->second).first;
TargetPhraseCollection::iterator itr; TargetPhraseCollection::iterator itr;
for(itr = tpc->begin(); itr != tpc->end(); itr++) { for(itr = tpc->begin(); itr != tpc->end(); itr++) {
std::string target = (*itr)->ToString(); std::string target = (*itr)->ToString();

View File

@ -53,7 +53,7 @@ class PhraseDictionaryDynamicCacheBased : public PhraseDictionary
{ {
typedef std::vector<unsigned int> AgeCollection; typedef std::vector<unsigned int> AgeCollection;
typedef std::pair<TargetPhraseCollection*, AgeCollection*> TargetCollectionAgePair; typedef std::pair<TargetPhraseCollection::shared_ptr , AgeCollection*> TargetCollectionAgePair;
typedef std::map<Phrase, TargetCollectionAgePair> cacheMap; typedef std::map<Phrase, TargetCollectionAgePair> cacheMap;
// data structure for the cache // data structure for the cache
@ -111,9 +111,14 @@ public:
void Load(); void Load();
void Load(const std::string files); void Load(const std::string files);
const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &src) const; TargetPhraseCollection::shared_ptr
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(Phrase const &src) const; GetTargetPhraseCollection(const Phrase &src) const;
const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionLEGACY(Phrase const &src) const;
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
// for phrase-based model // for phrase-based model
// void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; // void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;

View File

@ -86,29 +86,32 @@ void PhraseDictionaryGroup::GetTargetPhraseCollectionBatch(
// Look up each input in each model // Look up each input in each model
BOOST_FOREACH(InputPath* inputPath, inputPathQueue) { BOOST_FOREACH(InputPath* inputPath, inputPathQueue) {
const Phrase &phrase = inputPath->GetPhrase(); const Phrase &phrase = inputPath->GetPhrase();
const TargetPhraseCollection* targetPhrases = TargetPhraseCollection::shared_ptr targetPhrases =
this->GetTargetPhraseCollectionLEGACY(ttask, phrase); this->GetTargetPhraseCollectionLEGACY(ttask, phrase);
inputPath->SetTargetPhrases(*this, targetPhrases, NULL); inputPath->SetTargetPhrases(*this, targetPhrases, NULL);
} }
} }
const TargetPhraseCollection* PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY( TargetPhraseCollection::shared_ptr PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY(
const Phrase& src) const const Phrase& src) const
{ {
UTIL_THROW2("Don't call me without the translation task."); UTIL_THROW2("Don't call me without the translation task.");
} }
const TargetPhraseCollection* PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY( TargetPhraseCollection::shared_ptr
const ttasksptr& ttask, const Phrase& src) const PhraseDictionaryGroup::
GetTargetPhraseCollectionLEGACY(const ttasksptr& ttask, const Phrase& src) const
{ {
TargetPhraseCollection* ret = CreateTargetPhraseCollection(ttask, src); TargetPhraseCollection::shared_ptr ret
= CreateTargetPhraseCollection(ttask, src);
ret->NthElement(m_tableLimit); // sort the phrases for pruning later ret->NthElement(m_tableLimit); // sort the phrases for pruning later
const_cast<PhraseDictionaryGroup*>(this)->CacheForCleanup(ret); const_cast<PhraseDictionaryGroup*>(this)->CacheForCleanup(ret);
return ret; return ret;
} }
TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const ttasksptr& ttask, const Phrase& src) const PhraseDictionaryGroup::
CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const
{ {
// Aggregation of phrases and the scores that will be applied to them // Aggregation of phrases and the scores that will be applied to them
vector<TargetPhrase*> allPhrases; vector<TargetPhrase*> allPhrases;
@ -121,8 +124,8 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
// Collect phrases from this table // Collect phrases from this table
const PhraseDictionary& pd = *m_memberPDs[i]; const PhraseDictionary& pd = *m_memberPDs[i];
const TargetPhraseCollection* ret_raw = pd.GetTargetPhraseCollectionLEGACY( TargetPhraseCollection::shared_ptr
ttask, src); ret_raw = pd.GetTargetPhraseCollectionLEGACY(ttask, src);
if (ret_raw != NULL) { if (ret_raw != NULL) {
// Process each phrase from table // Process each phrase from table
@ -162,7 +165,7 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
} }
// Apply scores to phrases and add them to return collection // Apply scores to phrases and add them to return collection
TargetPhraseCollection* ret = new TargetPhraseCollection(); TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
const vector<FeatureFunction*> pd_feature_const(m_pdFeature); const vector<FeatureFunction*> pd_feature_const(m_pdFeature);
BOOST_FOREACH(TargetPhrase* phrase, allPhrases) { BOOST_FOREACH(TargetPhrase* phrase, allPhrases) {
phrase->GetScoreBreakdown().Assign(this, allScores.find(phrase)->second); phrase->GetScoreBreakdown().Assign(this, allScores.find(phrase)->second);
@ -174,29 +177,33 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
return ret; return ret;
} }
ChartRuleLookupManager *PhraseDictionaryGroup::CreateRuleLookupManager( ChartRuleLookupManager*
const ChartParser &, const ChartCellCollectionBase&, size_t) PhraseDictionaryGroup::
CreateRuleLookupManager(const ChartParser &,
const ChartCellCollectionBase&, size_t)
{ {
UTIL_THROW(util::Exception, "Phrase table used in chart decoder"); UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
} }
//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence //copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
void PhraseDictionaryGroup::CacheForCleanup(TargetPhraseCollection* tpc) void PhraseDictionaryGroup::CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
{ {
PhraseCache &ref = GetPhraseCache(); PhraseCache &ref = GetPhraseCache();
ref.push_back(tpc); ref.push_back(tpc);
} }
void PhraseDictionaryGroup::CleanUpAfterSentenceProcessing( void
const InputType &source) PhraseDictionaryGroup::
CleanUpAfterSentenceProcessing(const InputType &source)
{ {
PhraseCache &ref = GetPhraseCache(); GetPhraseCache().clear();
for (PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) { // PhraseCache &ref = GetPhraseCache();
delete *it; // for (PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
} // delete *it;
// }
PhraseCache temp; // PhraseCache temp;
temp.swap(ref); // temp.swap(ref);
CleanUpComponentModels(source); CleanUpComponentModels(source);
} }

View File

@ -43,19 +43,20 @@ class PhraseDictionaryGroup: public PhraseDictionary
public: public:
PhraseDictionaryGroup(const std::string& line); PhraseDictionaryGroup(const std::string& line);
void Load(); void Load();
TargetPhraseCollection* CreateTargetPhraseCollection(const ttasksptr& ttask, TargetPhraseCollection::shared_ptr
CreateTargetPhraseCollection(const ttasksptr& ttask,
const Phrase& src) const; const Phrase& src) const;
std::vector<std::vector<float> > getWeights(size_t numWeights, std::vector<std::vector<float> > getWeights(size_t numWeights,
bool normalize) const; bool normalize) const;
void CacheForCleanup(TargetPhraseCollection* tpc); void CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
void CleanUpAfterSentenceProcessing(const InputType& source); void CleanUpAfterSentenceProcessing(const InputType& source);
void CleanUpComponentModels(const InputType& source); void CleanUpComponentModels(const InputType& source);
// functions below override the base class // functions below override the base class
void GetTargetPhraseCollectionBatch(const ttasksptr& ttask, void GetTargetPhraseCollectionBatch(const ttasksptr& ttask,
const InputPathList &inputPathQueue) const; const InputPathList &inputPathQueue) const;
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY( TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(
const Phrase& src) const; const Phrase& src) const;
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY( TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(
const ttasksptr& ttask, const Phrase& src) const; const ttasksptr& ttask, const Phrase& src) const;
void InitializeForInput(ttasksptr const& ttask) { void InitializeForInput(ttasksptr const& ttask) {
/* Don't do anything source specific here as this object is shared between threads.*/ /* Don't do anything source specific here as this object is shared between threads.*/
@ -71,7 +72,7 @@ protected:
bool m_restrict; bool m_restrict;
std::vector<FeatureFunction*> m_pdFeature; std::vector<FeatureFunction*> m_pdFeature;
typedef std::vector<TargetPhraseCollection*> PhraseCache; typedef std::vector<TargetPhraseCollection::shared_ptr > PhraseCache;
#ifdef WITH_THREADS #ifdef WITH_THREADS
boost::shared_mutex m_lock_cache; boost::shared_mutex m_lock_cache;
typedef std::map<boost::thread::id, PhraseCache> SentenceCache; typedef std::map<boost::thread::id, PhraseCache> SentenceCache;

View File

@ -49,16 +49,17 @@ PhraseDictionaryMemory::PhraseDictionaryMemory(const std::string &line)
} }
TargetPhraseCollection &PhraseDictionaryMemory::GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const Phrase &source PhraseDictionaryMemory::
, const TargetPhrase &target GetOrCreateTargetPhraseCollection(const Phrase &source,
, const Word *sourceLHS) const TargetPhrase &target,
const Word *sourceLHS)
{ {
PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(source, target, sourceLHS); PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetTargetPhraseCollection(); return currNode.GetTargetPhraseCollection();
} }
const TargetPhraseCollection* TargetPhraseCollection::shared_ptr
PhraseDictionaryMemory:: PhraseDictionaryMemory::
GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const
{ {
@ -73,10 +74,10 @@ GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const
const Word& word = source.GetWord(pos); const Word& word = source.GetWord(pos);
currNode = currNode->GetChild(word); currNode = currNode->GetChild(word);
if (currNode == NULL) if (currNode == NULL)
return NULL; return TargetPhraseCollection::shared_ptr();
} }
return &currNode->GetTargetPhraseCollection(); return currNode->GetTargetPhraseCollection();
} }
PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase &source PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase &source
@ -168,12 +169,11 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
lastWord.OnlyTheseFactors(m_inputFactors); lastWord.OnlyTheseFactors(m_inputFactors);
const PhraseDictionaryNodeMemory *ptNode = prevPtNode->GetChild(lastWord); const PhraseDictionaryNodeMemory *ptNode = prevPtNode->GetChild(lastWord);
TargetPhraseCollection::shared_ptr targetPhrases;
if (ptNode) { if (ptNode) {
const TargetPhraseCollection &targetPhrases = ptNode->GetTargetPhraseCollection(); targetPhrases = ptNode->GetTargetPhraseCollection();
inputPath.SetTargetPhrases(*this, &targetPhrases, ptNode); }
} else { inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
inputPath.SetTargetPhrases(*this, NULL, NULL);
}
} }
} }
} }

View File

@ -56,19 +56,23 @@ public:
std::size_t); std::size_t);
// only used by multi-model phrase table, and other meta-features // only used by multi-model phrase table, and other meta-features
const TargetPhraseCollection *GetTargetPhraseCollectionLEGACY(const Phrase& src) const; TargetPhraseCollection::shared_ptr
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
void
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
TO_STRING(); TO_STRING();
protected: protected:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS); GetOrCreateTargetPhraseCollection
(const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
PhraseDictionaryNodeMemory &GetOrCreateNode(const Phrase &source
, const TargetPhrase &target
, const Word *sourceLHS);
PhraseDictionaryNodeMemory &
GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
const Word *sourceLHS);
void SortAndPrune(); void SortAndPrune();
PhraseDictionaryNodeMemory m_collection; PhraseDictionaryNodeMemory m_collection;

View File

@ -26,8 +26,10 @@ using namespace std;
namespace Moses namespace Moses
{ {
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
:PhraseDictionary(line, true) PhraseDictionaryMultiModel::
PhraseDictionaryMultiModel(const std::string &line)
: PhraseDictionary(line, true)
{ {
ReadParameters(); ReadParameters();
@ -45,7 +47,8 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
} }
} }
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::string &line) PhraseDictionaryMultiModel::
PhraseDictionaryMultiModel(int type, const std::string &line)
:PhraseDictionary(line, true) :PhraseDictionary(line, true)
{ {
if (type == 1) { if (type == 1) {
@ -56,7 +59,9 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::stri
} }
} }
void PhraseDictionaryMultiModel::SetParameter(const std::string& key, const std::string& value) void
PhraseDictionaryMultiModel::
SetParameter(const std::string& key, const std::string& value)
{ {
if (key == "mode") { if (key == "mode") {
m_mode = value; m_mode = value;
@ -70,9 +75,9 @@ void PhraseDictionaryMultiModel::SetParameter(const std::string& key, const std:
} }
} }
PhraseDictionaryMultiModel::~PhraseDictionaryMultiModel() PhraseDictionaryMultiModel::
{ ~PhraseDictionaryMultiModel()
} { }
void PhraseDictionaryMultiModel::Load() void PhraseDictionaryMultiModel::Load()
{ {
@ -88,18 +93,21 @@ void PhraseDictionaryMultiModel::Load()
} }
} }
TargetPhraseCollection::shared_ptr
const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollectionLEGACY(const Phrase& src) const PhraseDictionaryMultiModel::
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
{ {
std::vector<std::vector<float> > multimodelweights = getWeights(m_numScoreComponents, true); std::vector<std::vector<float> > multimodelweights;
TargetPhraseCollection *ret = NULL; multimodelweights = getWeights(m_numScoreComponents, true);
TargetPhraseCollection::shared_ptr ret;
std::map<std::string,multiModelStatistics*>* allStats = new(std::map<std::string,multiModelStatistics*>); std::map<std::string, multiModelStats*>* allStats;
allStats = new(std::map<std::string,multiModelStats*>);
CollectSufficientStatistics(src, allStats); CollectSufficientStatistics(src, allStats);
ret = CreateTargetPhraseCollectionLinearInterpolation(src, allStats, multimodelweights); ret = CreateTargetPhraseCollectionLinearInterpolation(src, allStats, multimodelweights);
RemoveAllInMap(*allStats); RemoveAllInMap(*allStats);
delete allStats; delete allStats; // ??? Why the detour through malloc? UG
ret->NthElement(m_tableLimit); // sort the phrases for pruning later ret->NthElement(m_tableLimit); // sort the phrases for pruning later
const_cast<PhraseDictionaryMultiModel*>(this)->CacheForCleanup(ret); const_cast<PhraseDictionaryMultiModel*>(this)->CacheForCleanup(ret);
@ -107,16 +115,19 @@ const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollect
return ret; return ret;
} }
void
void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const PhraseDictionaryMultiModel::
CollectSufficientStatistics
(const Phrase& src, std::map<std::string, multiModelStats*>* allStats) const
{ {
for(size_t i = 0; i < m_numModels; ++i) { for(size_t i = 0; i < m_numModels; ++i) {
const PhraseDictionary &pd = *m_pd[i]; const PhraseDictionary &pd = *m_pd[i];
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollectionLEGACY( src); TargetPhraseCollection::shared_ptr ret_raw;
ret_raw = pd.GetTargetPhraseCollectionLEGACY(src);
if (ret_raw != NULL) { if (ret_raw != NULL) {
TargetPhraseCollection::iterator iterTargetPhrase, iterLast; TargetPhraseCollection::const_iterator iterTargetPhrase, iterLast;
if (m_tableLimit != 0 && ret_raw->GetSize() > m_tableLimit) { if (m_tableLimit != 0 && ret_raw->GetSize() > m_tableLimit) {
iterLast = ret_raw->begin() + m_tableLimit; iterLast = ret_raw->begin() + m_tableLimit;
} else { } else {
@ -130,7 +141,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
std::string targetString = targetPhrase->GetStringRep(m_output); std::string targetString = targetPhrase->GetStringRep(m_output);
if (allStats->find(targetString) == allStats->end()) { if (allStats->find(targetString) == allStats->end()) {
multiModelStatistics * statistics = new multiModelStatistics; multiModelStats * statistics = new multiModelStats;
statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
statistics->p.resize(m_numScoreComponents); statistics->p.resize(m_numScoreComponents);
for(size_t j = 0; j < m_numScoreComponents; ++j) { for(size_t j = 0; j < m_numScoreComponents; ++j) {
@ -149,7 +160,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
(*allStats)[targetString] = statistics; (*allStats)[targetString] = statistics;
} }
multiModelStatistics * statistics = (*allStats)[targetString]; multiModelStats * statistics = (*allStats)[targetString];
for(size_t j = 0; j < m_numScoreComponents; ++j) { for(size_t j = 0; j < m_numScoreComponents; ++j) {
statistics->p[j][i] = UntransformScore(raw_scores[j]); statistics->p[j][i] = UntransformScore(raw_scores[j]);
@ -161,12 +172,17 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
} }
} }
TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollectionLinearInterpolation(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const TargetPhraseCollection::shared_ptr
PhraseDictionaryMultiModel::
CreateTargetPhraseCollectionLinearInterpolation
( const Phrase& src,
std::map<std::string,multiModelStats*>* allStats,
std::vector<std::vector<float> > &multimodelweights) const
{ {
TargetPhraseCollection *ret = new TargetPhraseCollection(); TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
for ( std::map< std::string, multiModelStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) { for ( std::map< std::string, multiModelStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
multiModelStatistics * statistics = iter->second; multiModelStats * statistics = iter->second;
Scores scoreVector(m_numScoreComponents); Scores scoreVector(m_numScoreComponents);
@ -188,7 +204,9 @@ TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollection
} }
//TODO: is it worth caching the results as long as weights don't change? //TODO: is it worth caching the results as long as weights don't change?
std::vector<std::vector<float> > PhraseDictionaryMultiModel::getWeights(size_t numWeights, bool normalize) const std::vector<std::vector<float> >
PhraseDictionaryMultiModel::
getWeights(size_t numWeights, bool normalize) const
{ {
const std::vector<float>* weights_ptr; const std::vector<float>* weights_ptr;
std::vector<float> raw_weights; std::vector<float> raw_weights;
@ -237,7 +255,9 @@ std::vector<std::vector<float> > PhraseDictionaryMultiModel::getWeights(size_t n
return multimodelweights; return multimodelweights;
} }
std::vector<float> PhraseDictionaryMultiModel::normalizeWeights(std::vector<float> &weights) const std::vector<float>
PhraseDictionaryMultiModel::
normalizeWeights(std::vector<float> &weights) const
{ {
std::vector<float> ret (m_numModels); std::vector<float> ret (m_numModels);
float total = std::accumulate(weights.begin(),weights.end(),0.0); float total = std::accumulate(weights.begin(),weights.end(),0.0);
@ -248,29 +268,36 @@ std::vector<float> PhraseDictionaryMultiModel::normalizeWeights(std::vector<floa
} }
ChartRuleLookupManager *PhraseDictionaryMultiModel::CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&, std::size_t) ChartRuleLookupManager *
PhraseDictionaryMultiModel::
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&,
std::size_t)
{ {
UTIL_THROW(util::Exception, "Phrase table used in chart decoder"); UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
} }
//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence //copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
void PhraseDictionaryMultiModel::CacheForCleanup(TargetPhraseCollection* tpc) void
PhraseDictionaryMultiModel::
CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
{ {
PhraseCache &ref = GetPhraseCache(); GetPhraseCache().push_back(tpc);
ref.push_back(tpc);
} }
void PhraseDictionaryMultiModel::CleanUpAfterSentenceProcessing(const InputType &source) void
PhraseDictionaryMultiModel::
CleanUpAfterSentenceProcessing(const InputType &source)
{ {
PhraseCache &ref = GetPhraseCache(); // PhraseCache &ref = GetPhraseCache();
for(PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) { // for(PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
delete *it; // it->reset();
} // }
PhraseCache temp; // PhraseCache temp;
temp.swap(ref); // temp.swap(ref);
GetPhraseCache().clear();
CleanUpComponentModels(source); CleanUpComponentModels(source);
@ -279,14 +306,18 @@ void PhraseDictionaryMultiModel::CleanUpAfterSentenceProcessing(const InputType
} }
void PhraseDictionaryMultiModel::CleanUpComponentModels(const InputType &source) void
PhraseDictionaryMultiModel::
CleanUpComponentModels(const InputType &source)
{ {
for(size_t i = 0; i < m_numModels; ++i) { for(size_t i = 0; i < m_numModels; ++i) {
m_pd[i]->CleanUpAfterSentenceProcessing(source); m_pd[i]->CleanUpAfterSentenceProcessing(source);
} }
} }
const std::vector<float>* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeightsVector() const const std::vector<float>*
PhraseDictionaryMultiModel::
GetTemporaryMultiModelWeightsVector() const
{ {
#ifdef WITH_THREADS #ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_lock_weights); boost::shared_lock<boost::shared_mutex> read_lock(m_lock_weights);
@ -300,7 +331,9 @@ const std::vector<float>* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeig
#endif #endif
} }
void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector<float> weights) void
PhraseDictionaryMultiModel::
SetTemporaryMultiModelWeightsVector(std::vector<float> weights)
{ {
#ifdef WITH_THREADS #ifdef WITH_THREADS
boost::unique_lock<boost::shared_mutex> lock(m_lock_weights); boost::unique_lock<boost::shared_mutex> lock(m_lock_weights);
@ -311,7 +344,9 @@ void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector
} }
#ifdef WITH_DLIB #ifdef WITH_DLIB
vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector) vector<float>
PhraseDictionaryMultiModel::
MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
{ {
map<pair<string, string>, size_t> phrase_pair_map; map<pair<string, string>, size_t> phrase_pair_map;
@ -320,7 +355,7 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
phrase_pair_map[*iter] += 1; phrase_pair_map[*iter] += 1;
} }
vector<multiModelStatisticsOptimization*> optimizerStats; vector<multiModelStatsOptimization*> optimizerStats;
for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) { for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
@ -329,7 +364,7 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
string target_string = phrase_pair.second; string target_string = phrase_pair.second;
vector<float> fs(m_numModels); vector<float> fs(m_numModels);
map<string,multiModelStatistics*>* allStats = new(map<string,multiModelStatistics*>); map<string,multiModelStats*>* allStats = new(map<string,multiModelStats*>);
Phrase sourcePhrase(0); Phrase sourcePhrase(0);
sourcePhrase.CreateFromString(Input, m_input, source_string, NULL); sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
@ -343,7 +378,7 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
continue; continue;
} }
multiModelStatisticsOptimization* targetStatistics = new multiModelStatisticsOptimization(); multiModelStatsOptimization* targetStatistics = new multiModelStatsOptimization();
targetStatistics->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase); targetStatistics->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
targetStatistics->p = (*allStats)[target_string]->p; targetStatistics->p = (*allStats)[target_string]->p;
targetStatistics->f = iter->second; targetStatistics->f = iter->second;
@ -383,7 +418,9 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
} }
vector<float> PhraseDictionaryMultiModel::Optimize(OptimizationObjective *ObjectiveFunction, size_t numModels) vector<float>
PhraseDictionaryMultiModel::
Optimize(OptimizationObjective *ObjectiveFunction, size_t numModels)
{ {
dlib::matrix<double,0,1> starting_point; dlib::matrix<double,0,1> starting_point;
@ -428,8 +465,8 @@ double CrossEntropy::operator() ( const dlib::matrix<double,0,1>& arg) const
weight_vector = m_model->normalizeWeights(weight_vector); weight_vector = m_model->normalizeWeights(weight_vector);
} }
for ( std::vector<multiModelStatisticsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) { for ( std::vector<multiModelStatsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
multiModelStatisticsOptimization* statistics = *iter; multiModelStatsOptimization* statistics = *iter;
size_t f = statistics->f; size_t f = statistics->f;
double score; double score;

View File

@ -36,15 +36,15 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses namespace Moses
{ {
struct multiModelStatistics { struct multiModelStats {
TargetPhrase *targetPhrase; TargetPhrase *targetPhrase;
std::vector<std::vector<float> > p; std::vector<std::vector<float> > p;
~multiModelStatistics() { ~multiModelStats() {
delete targetPhrase; delete targetPhrase;
}; };
}; };
struct multiModelStatisticsOptimization: multiModelStatistics { struct multiModelStatsOptimization: multiModelStats {
size_t f; size_t f;
}; };
@ -71,27 +71,59 @@ public:
PhraseDictionaryMultiModel(int type, const std::string &line); PhraseDictionaryMultiModel(int type, const std::string &line);
~PhraseDictionaryMultiModel(); ~PhraseDictionaryMultiModel();
void Load(); void Load();
virtual void CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const;
virtual TargetPhraseCollection* CreateTargetPhraseCollectionLinearInterpolation(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const; virtual void
std::vector<std::vector<float> > getWeights(size_t numWeights, bool normalize) const; CollectSufficientStatistics
std::vector<float> normalizeWeights(std::vector<float> &weights) const; (const Phrase& src, std::map<std::string,multiModelStats*>* allStats)
void CacheForCleanup(TargetPhraseCollection* tpc); const;
void CleanUpAfterSentenceProcessing(const InputType &source);
virtual void CleanUpComponentModels(const InputType &source); virtual TargetPhraseCollection::shared_ptr
CreateTargetPhraseCollectionLinearInterpolation
(const Phrase& src, std::map<std::string,multiModelStats*>* allStats,
std::vector<std::vector<float> > &multimodelweights) const;
std::vector<std::vector<float> >
getWeights(size_t numWeights, bool normalize) const;
std::vector<float>
normalizeWeights(std::vector<float> &weights) const;
void
CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
void
CleanUpAfterSentenceProcessing(const InputType &source);
virtual void
CleanUpComponentModels(const InputType &source);
#ifdef WITH_DLIB #ifdef WITH_DLIB
virtual std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector); virtual std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
std::vector<float> Optimize(OptimizationObjective * ObjectiveFunction, size_t numModels); std::vector<float> Optimize(OptimizationObjective * ObjectiveFunction, size_t numModels);
#endif #endif
// functions below required by base class
virtual const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
virtual void InitializeForInput(ttasksptr const& ttask) {
/* Don't do anything source specific here as this object is shared between threads.*/
}
ChartRuleLookupManager *CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&, std::size_t);
void SetParameter(const std::string& key, const std::string& value);
const std::vector<float>* GetTemporaryMultiModelWeightsVector() const; // functions below required by base class
void SetTemporaryMultiModelWeightsVector(std::vector<float> weights); virtual TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
virtual void
InitializeForInput(ttasksptr const& ttask) {
// Don't do anything source specific here as this object is shared
// between threads.
}
ChartRuleLookupManager*
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&,
std::size_t);
void
SetParameter(const std::string& key, const std::string& value);
const std::vector<float>*
GetTemporaryMultiModelWeightsVector() const;
void
SetTemporaryMultiModelWeightsVector(std::vector<float> weights);
protected: protected:
std::string m_mode; std::string m_mode;
@ -100,7 +132,7 @@ protected:
size_t m_numModels; size_t m_numModels;
std::vector<float> m_multimodelweights; std::vector<float> m_multimodelweights;
typedef std::vector<TargetPhraseCollection*> PhraseCache; typedef std::vector<TargetPhraseCollection::shared_ptr> PhraseCache;
#ifdef WITH_THREADS #ifdef WITH_THREADS
boost::shared_mutex m_lock_cache; boost::shared_mutex m_lock_cache;
typedef std::map<boost::thread::id, PhraseCache> SentenceCache; typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
@ -146,7 +178,7 @@ class CrossEntropy: public OptimizationObjective
public: public:
CrossEntropy ( CrossEntropy (
std::vector<multiModelStatisticsOptimization*> &optimizerStats, std::vector<multiModelStatsOptimization*> &optimizerStats,
PhraseDictionaryMultiModel * model, PhraseDictionaryMultiModel * model,
size_t iFeature size_t iFeature
) { ) {
@ -158,7 +190,7 @@ public:
double operator() ( const dlib::matrix<double,0,1>& arg) const; double operator() ( const dlib::matrix<double,0,1>& arg) const;
protected: protected:
std::vector<multiModelStatisticsOptimization*> m_optimizerStats; std::vector<multiModelStatsOptimization*> m_optimizerStats;
PhraseDictionaryMultiModel * m_model; PhraseDictionaryMultiModel * m_model;
size_t m_iFeature; size_t m_iFeature;
}; };

View File

@ -120,7 +120,7 @@ void PhraseDictionaryMultiModelCounts::Load()
} }
const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseCollectionLEGACY(const Phrase& src) const TargetPhraseCollection::shared_ptr PhraseDictionaryMultiModelCounts::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
{ {
vector<vector<float> > multimodelweights; vector<vector<float> > multimodelweights;
bool normalize; bool normalize;
@ -130,11 +130,12 @@ const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseC
//source phrase frequency is shared among all phrase pairs //source phrase frequency is shared among all phrase pairs
vector<float> fs(m_numModels); vector<float> fs(m_numModels);
map<string,multiModelCountsStatistics*>* allStats = new(map<string,multiModelCountsStatistics*>); map<string,multiModelCountsStats*>* allStats = new(map<string,multiModelCountsStats*>);
CollectSufficientStatistics(src, fs, allStats); CollectSufficientStats(src, fs, allStats);
TargetPhraseCollection *ret = CreateTargetPhraseCollectionCounts(src, fs, allStats, multimodelweights); TargetPhraseCollection::shared_ptr ret
= CreateTargetPhraseCollectionCounts(src, fs, allStats, multimodelweights);
ret->NthElement(m_tableLimit); // sort the phrases for pruning later ret->NthElement(m_tableLimit); // sort the phrases for pruning later
const_cast<PhraseDictionaryMultiModelCounts*>(this)->CacheForCleanup(ret); const_cast<PhraseDictionaryMultiModelCounts*>(this)->CacheForCleanup(ret);
@ -142,16 +143,17 @@ const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseC
} }
void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase& src, vector<float> &fs, map<string,multiModelCountsStatistics*>* allStats) const void PhraseDictionaryMultiModelCounts::CollectSufficientStats(const Phrase& src, vector<float> &fs, map<string,multiModelCountsStats*>* allStats) const
//fill fs and allStats with statistics from models //fill fs and allStats with statistics from models
{ {
for(size_t i = 0; i < m_numModels; ++i) { for(size_t i = 0; i < m_numModels; ++i) {
const PhraseDictionary &pd = *m_pd[i]; const PhraseDictionary &pd = *m_pd[i];
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollectionLEGACY( src); TargetPhraseCollection::shared_ptr ret_raw
= pd.GetTargetPhraseCollectionLEGACY(src);
if (ret_raw != NULL) { if (ret_raw != NULL) {
TargetPhraseCollection::iterator iterTargetPhrase; TargetPhraseCollection::const_iterator iterTargetPhrase;
for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != ret_raw->end(); ++iterTargetPhrase) { for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != ret_raw->end(); ++iterTargetPhrase) {
const TargetPhrase * targetPhrase = *iterTargetPhrase; const TargetPhrase * targetPhrase = *iterTargetPhrase;
@ -160,7 +162,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
string targetString = targetPhrase->GetStringRep(m_output); string targetString = targetPhrase->GetStringRep(m_output);
if (allStats->find(targetString) == allStats->end()) { if (allStats->find(targetString) == allStats->end()) {
multiModelCountsStatistics * statistics = new multiModelCountsStatistics; multiModelCountsStats * statistics = new multiModelCountsStats;
statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
//correct future cost estimates and total score //correct future cost estimates and total score
@ -178,7 +180,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
(*allStats)[targetString] = statistics; (*allStats)[targetString] = statistics;
} }
multiModelCountsStatistics * statistics = (*allStats)[targetString]; multiModelCountsStats * statistics = (*allStats)[targetString];
statistics->fst[i] = UntransformScore(raw_scores[0]); statistics->fst[i] = UntransformScore(raw_scores[0]);
statistics->ft[i] = UntransformScore(raw_scores[1]); statistics->ft[i] = UntransformScore(raw_scores[1]);
@ -189,8 +191,8 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
} }
// get target phrase frequency for models which have not seen the phrase pair // get target phrase frequency for models which have not seen the phrase pair
for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) { for ( map< string, multiModelCountsStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
multiModelCountsStatistics * statistics = iter->second; multiModelCountsStats * statistics = iter->second;
for (size_t i = 0; i < m_numModels; ++i) { for (size_t i = 0; i < m_numModels; ++i) {
if (!statistics->ft[i]) { if (!statistics->ft[i]) {
@ -200,12 +202,14 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
} }
} }
TargetPhraseCollection* PhraseDictionaryMultiModelCounts::CreateTargetPhraseCollectionCounts(const Phrase &src, vector<float> &fs, map<string,multiModelCountsStatistics*>* allStats, vector<vector<float> > &multimodelweights) const TargetPhraseCollection::shared_ptr
PhraseDictionaryMultiModelCounts::
CreateTargetPhraseCollectionCounts(const Phrase &src, vector<float> &fs, map<string,multiModelCountsStats*>* allStats, vector<vector<float> > &multimodelweights) const
{ {
TargetPhraseCollection *ret = new TargetPhraseCollection(); TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) { for ( map< string, multiModelCountsStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
multiModelCountsStatistics * statistics = iter->second; multiModelCountsStats * statistics = iter->second;
if (statistics->targetPhrase->GetAlignTerm().GetSize() == 0) { if (statistics->targetPhrase->GetAlignTerm().GetSize() == 0) {
UTIL_THROW(util::Exception, " alignment information empty\ncount-tables need to include alignment information for computation of lexical weights.\nUse --phrase-word-alignment during training; for on-disk tables, also set -alignment-info when creating on-disk tables."); UTIL_THROW(util::Exception, " alignment information empty\ncount-tables need to include alignment information for computation of lexical weights.\nUse --phrase-word-alignment during training; for on-disk tables, also set -alignment-info when creating on-disk tables.");
@ -248,7 +252,7 @@ float PhraseDictionaryMultiModelCounts::GetTargetCount(const Phrase &target, siz
{ {
const PhraseDictionary &pd = *m_inverse_pd[modelIndex]; const PhraseDictionary &pd = *m_inverse_pd[modelIndex];
const TargetPhraseCollection *ret_raw = pd.GetTargetPhraseCollectionLEGACY(target); TargetPhraseCollection::shared_ptr ret_raw = pd.GetTargetPhraseCollectionLEGACY(target);
// in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score) // in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score)
if (ret_raw && ret_raw->GetSize() > 0) { if (ret_raw && ret_raw->GetSize() > 0) {
@ -320,7 +324,7 @@ double PhraseDictionaryMultiModelCounts::ComputeWeightedLexicalTranslation( cons
} }
lexicalCache PhraseDictionaryMultiModelCounts::CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector<lexicalTable*> &tables, bool is_input ) lexicalCache PhraseDictionaryMultiModelCounts::CacheLexicalStats( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector<lexicalTable*> &tables, bool is_input )
{ {
//do all the necessary lexical table lookups and get counts, but don't apply weights yet //do all the necessary lexical table lookups and get counts, but don't apply weights yet
@ -474,7 +478,7 @@ vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<s
phrase_pair_map[*iter] += 1; phrase_pair_map[*iter] += 1;
} }
vector<multiModelCountsStatisticsOptimization*> optimizerStats; vector<multiModelCountsStatsOptimization*> optimizerStats;
for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) { for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
@ -483,12 +487,12 @@ vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<s
string target_string = phrase_pair.second; string target_string = phrase_pair.second;
vector<float> fs(m_numModels); vector<float> fs(m_numModels);
map<string,multiModelCountsStatistics*>* allStats = new(map<string,multiModelCountsStatistics*>); map<string,multiModelCountsStats*>* allStats = new(map<string,multiModelCountsStats*>);
Phrase sourcePhrase(0); Phrase sourcePhrase(0);
sourcePhrase.CreateFromString(Input, m_input, source_string, NULL); sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
CollectSufficientStatistics(sourcePhrase, fs, allStats); //optimization potential: only call this once per source phrase CollectSufficientStats(sourcePhrase, fs, allStats); //optimization potential: only call this once per source phrase
//phrase pair not found; leave cache empty //phrase pair not found; leave cache empty
if (allStats->find(target_string) == allStats->end()) { if (allStats->find(target_string) == allStats->end()) {
@ -497,19 +501,19 @@ vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<s
continue; continue;
} }
multiModelCountsStatisticsOptimization * targetStatistics = new multiModelCountsStatisticsOptimization(); multiModelCountsStatsOptimization * targetStats = new multiModelCountsStatsOptimization();
targetStatistics->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase); targetStats->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
targetStatistics->fs = fs; targetStats->fs = fs;
targetStatistics->fst = (*allStats)[target_string]->fst; targetStats->fst = (*allStats)[target_string]->fst;
targetStatistics->ft = (*allStats)[target_string]->ft; targetStats->ft = (*allStats)[target_string]->ft;
targetStatistics->f = iter->second; targetStats->f = iter->second;
try { try {
pair<vector< set<size_t> >, vector< set<size_t> > > alignment = GetAlignmentsForLexWeights(sourcePhrase, static_cast<const Phrase&>(*targetStatistics->targetPhrase), targetStatistics->targetPhrase->GetAlignTerm()); pair<vector< set<size_t> >, vector< set<size_t> > > alignment = GetAlignmentsForLexWeights(sourcePhrase, static_cast<const Phrase&>(*targetStats->targetPhrase), targetStats->targetPhrase->GetAlignTerm());
targetStatistics->lexCachee2f = CacheLexicalStatistics(static_cast<const Phrase&>(*targetStatistics->targetPhrase), sourcePhrase, alignment.second, m_lexTable_e2f, false ); targetStats->lexCachee2f = CacheLexicalStats(static_cast<const Phrase&>(*targetStats->targetPhrase), sourcePhrase, alignment.second, m_lexTable_e2f, false );
targetStatistics->lexCachef2e = CacheLexicalStatistics(sourcePhrase, static_cast<const Phrase&>(*targetStatistics->targetPhrase), alignment.first, m_lexTable_f2e, true ); targetStats->lexCachef2e = CacheLexicalStats(sourcePhrase, static_cast<const Phrase&>(*targetStats->targetPhrase), alignment.first, m_lexTable_f2e, true );
optimizerStats.push_back(targetStatistics); optimizerStats.push_back(targetStats);
} catch (AlignmentException& e) {} } catch (AlignmentException& e) {}
RemoveAllInMap(*allStats); RemoveAllInMap(*allStats);
@ -561,8 +565,8 @@ double CrossEntropyCounts::operator() ( const dlib::matrix<double,0,1>& arg) con
weight_vector = m_model->normalizeWeights(weight_vector); weight_vector = m_model->normalizeWeights(weight_vector);
} }
for ( std::vector<multiModelCountsStatisticsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) { for ( std::vector<multiModelCountsStatsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
multiModelCountsStatisticsOptimization* statistics = *iter; multiModelCountsStatsOptimization* statistics = *iter;
size_t f = statistics->f; size_t f = statistics->f;
double score; double score;

View File

@ -37,11 +37,11 @@ typedef boost::unordered_map<Word, lexicalMap > lexicalMapJoint;
typedef std::pair<std::vector<float>, std::vector<float> > lexicalPair; typedef std::pair<std::vector<float>, std::vector<float> > lexicalPair;
typedef std::vector<std::vector<lexicalPair> > lexicalCache; typedef std::vector<std::vector<lexicalPair> > lexicalCache;
struct multiModelCountsStatistics : multiModelStatistics { struct multiModelCountsStats : multiModelStats {
std::vector<float> fst, ft; std::vector<float> fst, ft;
}; };
struct multiModelCountsStatisticsOptimization: multiModelCountsStatistics { struct multiModelCountsStatsOptimization: multiModelCountsStats {
std::vector<float> fs; std::vector<float> fs;
lexicalCache lexCachee2f, lexCachef2e; lexicalCache lexCachee2f, lexCachef2e;
size_t f; size_t f;
@ -80,18 +80,18 @@ public:
PhraseDictionaryMultiModelCounts(const std::string &line); PhraseDictionaryMultiModelCounts(const std::string &line);
~PhraseDictionaryMultiModelCounts(); ~PhraseDictionaryMultiModelCounts();
void Load(); void Load();
TargetPhraseCollection* CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const; TargetPhraseCollection::shared_ptr CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStats*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
void CollectSufficientStatistics(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStatistics*>* allStats) const; void CollectSufficientStats(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStats*>* allStats) const;
float GetTargetCount(const Phrase& target, size_t modelIndex) const; float GetTargetCount(const Phrase& target, size_t modelIndex) const;
double GetLexicalProbability( Word &inner, Word &outer, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights ) const; double GetLexicalProbability( Word &inner, Word &outer, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights ) const;
double ComputeWeightedLexicalTranslation( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights, bool is_input ) const; double ComputeWeightedLexicalTranslation( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights, bool is_input ) const;
double ComputeWeightedLexicalTranslationFromCache( std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > &cache, std::vector<float> &weights ) const; double ComputeWeightedLexicalTranslationFromCache( std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > &cache, std::vector<float> &weights ) const;
std::pair<PhraseDictionaryMultiModelCounts::AlignVector,PhraseDictionaryMultiModelCounts::AlignVector> GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const; std::pair<PhraseDictionaryMultiModelCounts::AlignVector,PhraseDictionaryMultiModelCounts::AlignVector> GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const;
std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, bool is_input ); std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > CacheLexicalStats( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, bool is_input );
void FillLexicalCountsJoint(Word &wordS, Word &wordT, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const; void FillLexicalCountsJoint(Word &wordS, Word &wordT, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
void FillLexicalCountsMarginal(Word &wordS, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const; void FillLexicalCountsMarginal(Word &wordS, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
void LoadLexicalTable( std::string &fileName, lexicalTable* ltable); void LoadLexicalTable( std::string &fileName, lexicalTable* ltable);
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const; TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
#ifdef WITH_DLIB #ifdef WITH_DLIB
std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector); std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
#endif #endif
@ -117,7 +117,7 @@ class CrossEntropyCounts: public OptimizationObjective
public: public:
CrossEntropyCounts ( CrossEntropyCounts (
std::vector<multiModelCountsStatisticsOptimization*> &optimizerStats, std::vector<multiModelCountsStatsOptimization*> &optimizerStats,
PhraseDictionaryMultiModelCounts * model, PhraseDictionaryMultiModelCounts * model,
size_t iFeature size_t iFeature
) { ) {
@ -129,7 +129,7 @@ public:
double operator() ( const dlib::matrix<double,0,1>& arg) const; double operator() ( const dlib::matrix<double,0,1>& arg) const;
private: private:
std::vector<multiModelCountsStatisticsOptimization*> m_optimizerStats; std::vector<multiModelCountsStatsOptimization*> m_optimizerStats;
PhraseDictionaryMultiModelCounts * m_model; PhraseDictionaryMultiModelCounts * m_model;
size_t m_iFeature; size_t m_iFeature;
}; };

View File

@ -39,7 +39,7 @@ void PhraseDictionaryNodeMemory::Prune(size_t tableLimit)
} }
// prune TargetPhraseCollection in this node // prune TargetPhraseCollection in this node
m_targetPhraseCollection.Prune(true, tableLimit); m_targetPhraseCollection->Prune(true, tableLimit);
} }
void PhraseDictionaryNodeMemory::Sort(size_t tableLimit) void PhraseDictionaryNodeMemory::Sort(size_t tableLimit)
@ -53,10 +53,11 @@ void PhraseDictionaryNodeMemory::Sort(size_t tableLimit)
} }
// prune TargetPhraseCollection in this node // prune TargetPhraseCollection in this node
m_targetPhraseCollection.Sort(true, tableLimit); m_targetPhraseCollection->Sort(true, tableLimit);
} }
PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateChild(const Word &sourceTerm) PhraseDictionaryNodeMemory*
PhraseDictionaryNodeMemory::GetOrCreateChild(const Word &sourceTerm)
{ {
return &m_sourceTermMap[sourceTerm]; return &m_sourceTermMap[sourceTerm];
} }
@ -118,7 +119,7 @@ void PhraseDictionaryNodeMemory::Remove()
{ {
m_sourceTermMap.clear(); m_sourceTermMap.clear();
m_nonTermMap.clear(); m_nonTermMap.clear();
m_targetPhraseCollection.Remove(); m_targetPhraseCollection->Remove();
} }
std::ostream& operator<<(std::ostream &out, const PhraseDictionaryNodeMemory &node) std::ostream& operator<<(std::ostream &out, const PhraseDictionaryNodeMemory &node)

View File

@ -130,12 +130,13 @@ private:
TerminalMap m_sourceTermMap; TerminalMap m_sourceTermMap;
NonTerminalMap m_nonTermMap; NonTerminalMap m_nonTermMap;
TargetPhraseCollection m_targetPhraseCollection; TargetPhraseCollection::shared_ptr m_targetPhraseCollection;
public: public:
PhraseDictionaryNodeMemory() {} PhraseDictionaryNodeMemory()
: m_targetPhraseCollection(new TargetPhraseCollection) { }
bool IsLeaf() const { bool IsLeaf() const {
return m_sourceTermMap.empty() && m_nonTermMap.empty(); return m_sourceTermMap.empty() && m_nonTermMap.empty();
} }
@ -152,10 +153,12 @@ public:
const PhraseDictionaryNodeMemory *GetChild(const Word &sourceNonTerm, const Word &targetNonTerm) const; const PhraseDictionaryNodeMemory *GetChild(const Word &sourceNonTerm, const Word &targetNonTerm) const;
#endif #endif
const TargetPhraseCollection &GetTargetPhraseCollection() const { TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection() const {
return m_targetPhraseCollection; return m_targetPhraseCollection;
} }
TargetPhraseCollection &GetTargetPhraseCollection() { TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection() {
return m_targetPhraseCollection; return m_targetPhraseCollection;
} }

View File

@ -54,7 +54,9 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const Input
} }
} }
void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &inputPath) const void
PhraseDictionaryTransliteration::
GetTargetPhraseCollection(InputPath &inputPath) const
{ {
const Phrase &sourcePhrase = inputPath.GetPhrase(); const Phrase &sourcePhrase = inputPath.GetPhrase();
size_t hash = hash_value(sourcePhrase); size_t hash = hash_value(sourcePhrase);
@ -66,7 +68,7 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input
if (iter != cache.end()) { if (iter != cache.end()) {
// already in cache // already in cache
const TargetPhraseCollection *tpColl = iter->second.first; TargetPhraseCollection::shared_ptr tpColl = iter->second.first;
inputPath.SetTargetPhrases(*this, tpColl, NULL); inputPath.SetTargetPhrases(*this, tpColl, NULL);
} else { } else {
// TRANSLITERATE // TRANSLITERATE
@ -89,17 +91,15 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input
int ret = system(cmd.c_str()); int ret = system(cmd.c_str());
UTIL_THROW_IF2(ret != 0, "Transliteration script error"); UTIL_THROW_IF2(ret != 0, "Transliteration script error");
TargetPhraseCollection *tpColl = new TargetPhraseCollection(); TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection);
vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir.path()); vector<TargetPhrase*> targetPhrases
= CreateTargetPhrases(sourcePhrase, outDir.path());
vector<TargetPhrase*>::const_iterator iter; vector<TargetPhrase*>::const_iterator iter;
for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) { for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
TargetPhrase *tp = *iter; TargetPhrase *tp = *iter;
tpColl->Add(tp); tpColl->Add(tp);
} }
cache[hash] = CacheCollEntry(tpColl, clock());
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
cache[hash] = value;
inputPath.SetTargetPhrases(*this, tpColl, NULL); inputPath.SetTargetPhrases(*this, tpColl, NULL);
} }
} }

View File

@ -74,11 +74,10 @@ void PhraseDictionaryTreeAdaptor::CleanUpAfterSentenceProcessing(InputType const
obj.CleanUp(); obj.CleanUp();
} }
TargetPhraseCollection const* TargetPhraseCollection::shared_ptr
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
{ {
const TargetPhraseCollection *ret = GetImplementation().GetTargetPhraseCollection(src); return GetImplementation().GetTargetPhraseCollection(src);
return ret;
} }
void PhraseDictionaryTreeAdaptor::EnableCache() void PhraseDictionaryTreeAdaptor::EnableCache()
@ -107,16 +106,17 @@ const PDTAimp& PhraseDictionaryTreeAdaptor::GetImplementation() const
} }
// legacy // legacy
const TargetPhraseCollectionWithSourcePhrase* TargetPhraseCollectionWithSourcePhrase::shared_ptr
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const &range) const PhraseDictionaryTreeAdaptor::
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const &range) const
{ {
TargetPhraseCollectionWithSourcePhrase::shared_ptr ret;
if(GetImplementation().m_rangeCache.empty()) { if(GetImplementation().m_rangeCache.empty()) {
const TargetPhraseCollectionWithSourcePhrase *tpColl = GetImplementation().GetTargetPhraseCollection(src.GetSubString(range)); ret = GetImplementation().GetTargetPhraseCollection(src.GetSubString(range));
return tpColl;
} else { } else {
const TargetPhraseCollectionWithSourcePhrase *tpColl = GetImplementation().m_rangeCache[range.GetStartPos()][range.GetEndPos()]; ret = GetImplementation().m_rangeCache[range.GetStartPos()][range.GetEndPos()];
return tpColl;
} }
return ret;
} }
} }

View File

@ -59,7 +59,8 @@ public:
// get translation candidates for a given source phrase // get translation candidates for a given source phrase
// returns null pointer if nothing found // returns null pointer if nothing found
TargetPhraseCollection const* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const; TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
void InitializeForInput(ttasksptr const& ttask); void InitializeForInput(ttasksptr const& ttask);
void CleanUpAfterSentenceProcessing(InputType const& source); void CleanUpAfterSentenceProcessing(InputType const& source);
@ -73,7 +74,9 @@ public:
} }
// legacy // legacy
const TargetPhraseCollectionWithSourcePhrase *GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const & srcRange) const; TargetPhraseCollectionWithSourcePhrase::shared_ptr
GetTargetPhraseCollectionLEGACY(InputType const& src,
WordsRange const & srcRange) const;
}; };

View File

@ -79,11 +79,11 @@ void ProbingPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQue
continue; continue;
} }
TargetPhraseCollection *tpColl = CreateTargetPhrase(sourcePhrase); TargetPhraseCollection::shared_ptr tpColl = CreateTargetPhrase(sourcePhrase);
// add target phrase to phrase-table cache // add target phrase to phrase-table cache
size_t hash = hash_value(sourcePhrase); size_t hash = hash_value(sourcePhrase);
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock()); std::pair<TargetPhraseCollection::shared_ptr , clock_t> value(tpColl, clock());
cache[hash] = value; cache[hash] = value;
inputPath.SetTargetPhrases(*this, tpColl, NULL); inputPath.SetTargetPhrases(*this, tpColl, NULL);
@ -109,7 +109,7 @@ std::vector<uint64_t> ProbingPT::ConvertToProbingSourcePhrase(const Phrase &sour
return ret; return ret;
} }
TargetPhraseCollection *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase) const TargetPhraseCollection::shared_ptr ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
{ {
// create a target phrase from the 1st word of the source, prefix with 'ProbingPT:' // create a target phrase from the 1st word of the source, prefix with 'ProbingPT:'
assert(sourcePhrase.GetSize()); assert(sourcePhrase.GetSize());
@ -124,7 +124,7 @@ TargetPhraseCollection *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase
std::pair<bool, std::vector<target_text> > query_result; std::pair<bool, std::vector<target_text> > query_result;
TargetPhraseCollection *tpColl = NULL; TargetPhraseCollection::shared_ptr tpColl = NULL;
//Actual lookup //Actual lookup
query_result = m_engine->query(probingSource); query_result = m_engine->query(probingSource);

View File

@ -49,12 +49,14 @@ protected:
// Provide access to RuleTableTrie's private // Provide access to RuleTableTrie's private
// GetOrCreateTargetPhraseCollection function. // GetOrCreateTargetPhraseCollection function.
TargetPhraseCollection &GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
RuleTableTrie &ruleTable GetOrCreateTargetPhraseCollection(RuleTableTrie &ruleTable,
, const Phrase &source const Phrase &source,
, const TargetPhrase &target const TargetPhrase &target,
, const Word *sourceLHS) { const Word *sourceLHS)
return ruleTable.GetOrCreateTargetPhraseCollection(source, target, sourceLHS); {
return ruleTable.GetOrCreateTargetPhraseCollection(source, target,
sourceLHS);
} }
}; };

View File

@ -224,9 +224,10 @@ bool RuleTableLoaderCompact::LoadRuleSection(
targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply()); targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());
// Insert rule into table. // Insert rule into table.
TargetPhraseCollection &coll = GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr coll;
ruleTable, sourcePhrase, *targetPhrase, &sourceLHS); coll = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase,
coll.Add(targetPhrase); *targetPhrase, &sourceLHS);
coll->Add(targetPhrase);
} }
return true; return true;

View File

@ -242,8 +242,10 @@ bool RuleTableLoaderStandard::Load(FormatType format
targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector); targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector);
targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply()); targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS); TargetPhraseCollection::shared_ptr phraseColl
phraseColl.Add(targetPhrase); = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase,
*targetPhrase, sourceLHS);
phraseColl->Add(targetPhrase);
// not implemented correctly in memory pt. just delete it for now // not implemented correctly in memory pt. just delete it for now
delete sourceLHS; delete sourceLHS;

View File

@ -282,8 +282,10 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(ttasksptr const& ttask)
targetPhrase->GetScoreBreakdown().Assign(this, scoreVector); targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
targetPhrase->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply()); targetPhrase->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS); TargetPhraseCollection::shared_ptr phraseColl
phraseColl.Add(targetPhrase); = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase,
*targetPhrase, sourceLHS);
phraseColl->Add(targetPhrase);
count++; count++;
@ -301,7 +303,9 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(ttasksptr const& ttask)
//removedirectoryrecursively(dirName); //removedirectoryrecursively(dirName);
} }
TargetPhraseCollection &PhraseDictionaryFuzzyMatch::GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode TargetPhraseCollection::shared_ptr
PhraseDictionaryFuzzyMatch::
GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
, const Phrase &source , const Phrase &source
, const TargetPhrase &target , const TargetPhrase &target
, const Word *sourceLHS) , const Word *sourceLHS)

View File

@ -1,3 +1,4 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
/*********************************************************************** /***********************************************************************
Moses - statistical machine translation system Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh Copyright (C) 2006-2011 University of Edinburgh
@ -59,7 +60,8 @@ public:
TO_STRING(); TO_STRING();
protected: protected:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
, const Phrase &source , const Phrase &source
, const TargetPhrase &target , const TargetPhrase &target
, const Word *sourceLHS); , const Word *sourceLHS);

View File

@ -149,26 +149,26 @@ void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath
lastWord.OnlyTheseFactors(m_inputFactors); lastWord.OnlyTheseFactors(m_inputFactors);
OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord); OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord);
TargetPhraseCollection::shared_ptr tpc;
if (lastWordOnDisk == NULL) { if (lastWordOnDisk == NULL) {
// OOV according to this phrase table. Not possible to extend // OOV according to this phrase table. Not possible to extend
inputPath.SetTargetPhrases(*this, NULL, NULL); inputPath.SetTargetPhrases(*this, tpc, NULL);
} else { } else {
const OnDiskPt::PhraseNode *ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper); OnDiskPt::PhraseNode const* ptNode;
if (ptNode) { ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper);
const TargetPhraseCollection *targetPhrases = GetTargetPhraseCollection(ptNode); if (ptNode) tpc = GetTargetPhraseCollection(ptNode);
inputPath.SetTargetPhrases(*this, targetPhrases, ptNode); inputPath.SetTargetPhrases(*this, tpc, ptNode);
} else {
inputPath.SetTargetPhrases(*this, NULL, NULL);
}
delete lastWordOnDisk; delete lastWordOnDisk;
} }
} }
} }
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const TargetPhraseCollection::shared_ptr
PhraseDictionaryOnDisk::
GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const
{ {
const TargetPhraseCollection *ret; TargetPhraseCollection::shared_ptr ret;
CacheColl &cache = GetCache(); CacheColl &cache = GetCache();
size_t hash = (size_t) ptNode->GetFilePos(); size_t hash = (size_t) ptNode->GetFilePos();
@ -181,31 +181,34 @@ const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(
// not in cache, need to look up from phrase table // not in cache, need to look up from phrase table
ret = GetTargetPhraseCollectionNonCache(ptNode); ret = GetTargetPhraseCollectionNonCache(ptNode);
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock()); std::pair<TargetPhraseCollection::shared_ptr , clock_t> value(ret, clock());
cache[hash] = value; cache[hash] = value;
} else { } else {
// in cache. just use it // in cache. just use it
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second; iter->second.second = clock();
value.second = clock(); ret = iter->second.first;
ret = value.first;
} }
return ret; return ret;
} }
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const TargetPhraseCollection::shared_ptr
PhraseDictionaryOnDisk::
GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const
{ {
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation()); OnDiskPt::OnDiskWrapper& wrapper
= const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
vector<float> weightT = StaticData::Instance().GetWeights(this); vector<float> weightT = StaticData::Instance().GetWeights(this);
OnDiskPt::Vocab &vocab = wrapper.GetVocab(); OnDiskPt::Vocab &vocab = wrapper.GetVocab();
const OnDiskPt::TargetPhraseCollection *targetPhrasesOnDisk = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper); OnDiskPt::TargetPhraseCollection::shared_ptr targetPhrasesOnDisk
TargetPhraseCollection *targetPhrases = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
= targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false); TargetPhraseCollection::shared_ptr targetPhrases
= targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this,
weightT, vocab, false);
delete targetPhrasesOnDisk; // delete targetPhrasesOnDisk;
return targetPhrases; return targetPhrases;
} }

View File

@ -78,8 +78,11 @@ public:
virtual void InitializeForInput(ttasksptr const& ttask); virtual void InitializeForInput(ttasksptr const& ttask);
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
const TargetPhraseCollection *GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const; TargetPhraseCollection::shared_ptr
const TargetPhraseCollection *GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const; GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const;
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const;
void SetParameter(const std::string& key, const std::string& value); void SetParameter(const std::string& key, const std::string& value);

View File

@ -51,9 +51,10 @@ public:
private: private:
friend class RuleTableLoader; friend class RuleTableLoader;
virtual TargetPhraseCollection &GetOrCreateTargetPhraseCollection( virtual TargetPhraseCollection::shared_ptr
const Phrase &source, const TargetPhrase &target, GetOrCreateTargetPhraseCollection(const Phrase &source,
const Word *sourceLHS) = 0; const TargetPhrase &target,
const Word *sourceLHS) = 0;
virtual void SortAndPrune() = 0; virtual void SortAndPrune() = 0;

View File

@ -38,8 +38,11 @@
namespace Moses namespace Moses
{ {
TargetPhraseCollection &RuleTableUTrie::GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS) RuleTableUTrie::
GetOrCreateTargetPhraseCollection(const Phrase &source,
const TargetPhrase &target,
const Word *sourceLHS)
{ {
UTrieNode &currNode = GetOrCreateNode(source, target, sourceLHS); UTrieNode &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetOrCreateTargetPhraseCollection(target); return currNode.GetOrCreateTargetPhraseCollection(target);

View File

@ -21,13 +21,13 @@
#include "Trie.h" #include "Trie.h"
#include "UTrieNode.h" #include "UTrieNode.h"
#include "moses/TargetPhraseCollection.h"
namespace Moses namespace Moses
{ {
class Phrase; class Phrase;
class TargetPhrase; class TargetPhrase;
class TargetPhraseCollection;
class Word; class Word;
class ChartParser; class ChartParser;
@ -57,8 +57,10 @@ public:
const ChartCellCollectionBase &, std::size_t); const ChartCellCollectionBase &, std::size_t);
private: private:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS); GetOrCreateTargetPhraseCollection(const Phrase &source,
const TargetPhrase &target,
const Word *sourceLHS);
UTrieNode &GetOrCreateNode(const Phrase &source, const TargetPhrase &target, UTrieNode &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
const Word *sourceLHS); const Word *sourceLHS);

View File

@ -49,7 +49,7 @@ void UTrieNode::Prune(size_t tableLimit)
// Prune TargetPhraseCollections at this node. // Prune TargetPhraseCollections at this node.
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) { for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
p->second.Prune(true, tableLimit); p->second->Prune(true, tableLimit);
} }
} }
@ -66,7 +66,7 @@ void UTrieNode::Sort(size_t tableLimit)
// Sort TargetPhraseCollections at this node. // Sort TargetPhraseCollections at this node.
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) { for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
p->second.Sort(true, tableLimit); p->second->Sort(true, tableLimit);
} }
} }
@ -89,8 +89,9 @@ UTrieNode *UTrieNode::GetOrCreateNonTerminalChild(const Word &targetNonTerm)
return m_gapNode; return m_gapNode;
} }
TargetPhraseCollection &UTrieNode::GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const TargetPhrase &target) UTrieNode::
GetOrCreateTargetPhraseCollection(const TargetPhrase &target)
{ {
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm(); const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
const size_t rank = alignmentInfo.GetSize(); const size_t rank = alignmentInfo.GetSize();
@ -107,8 +108,9 @@ TargetPhraseCollection &UTrieNode::GetOrCreateTargetPhraseCollection(
const Word &targetNonTerm = target.GetWord(targetNonTermIndex); const Word &targetNonTerm = target.GetWord(targetNonTermIndex);
vec.push_back(InsertLabel(i++, targetNonTerm)); vec.push_back(InsertLabel(i++, targetNonTerm));
} }
TargetPhraseCollection::shared_ptr& ret = m_labelMap[vec];
return m_labelMap[vec]; if (ret == NULL) ret.reset(new TargetPhraseCollection);
return ret;
} }
} // namespace Moses } // namespace Moses

View File

@ -51,10 +51,10 @@ public:
TerminalEqualityPred> TerminalMap; TerminalEqualityPred> TerminalMap;
typedef boost::unordered_map<std::vector<int>, typedef boost::unordered_map<std::vector<int>,
TargetPhraseCollection> LabelMap; TargetPhraseCollection::shared_ptr> LabelMap;
#else #else
typedef std::map<Word, UTrieNode> TerminalMap; typedef std::map<Word, UTrieNode> TerminalMap;
typedef std::map<std::vector<int>, TargetPhraseCollection> LabelMap; typedef std::map<std::vector<int>, TargetPhraseCollection::shared_ptr> LabelMap;
#endif #endif
~UTrieNode() { ~UTrieNode() {
@ -78,8 +78,8 @@ public:
UTrieNode *GetOrCreateTerminalChild(const Word &sourceTerm); UTrieNode *GetOrCreateTerminalChild(const Word &sourceTerm);
UTrieNode *GetOrCreateNonTerminalChild(const Word &targetNonTerm); UTrieNode *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
TargetPhraseCollection &GetOrCreateTargetPhraseCollection( TargetPhraseCollection::shared_ptr
const TargetPhrase &); GetOrCreateTargetPhraseCollection(const TargetPhrase &);
bool IsLeaf() const { bool IsLeaf() const {
return m_terminalMap.empty() && m_gapNode == NULL; return m_terminalMap.empty() && m_gapNode == NULL;

View File

@ -47,7 +47,8 @@ void Scope3Parser::GetChartRuleCollection(
const size_t start = range.GetStartPos(); const size_t start = range.GetStartPos();
const size_t end = range.GetEndPos(); const size_t end = range.GetEndPos();
std::vector<std::pair<const UTrieNode *, const VarSpanNode *> > &pairVec = m_ruleApplications[start][end-start+1]; std::vector<std::pair<const UTrieNode *, const VarSpanNode *> > &pairVec
= m_ruleApplications[start][end-start+1];
MatchCallback matchCB(range, outColl); MatchCallback matchCB(range, outColl);
for (std::vector<std::pair<const UTrieNode *, const VarSpanNode *> >::const_iterator p = pairVec.begin(); p != pairVec.end(); ++p) { for (std::vector<std::pair<const UTrieNode *, const VarSpanNode *> >::const_iterator p = pairVec.begin(); p != pairVec.end(); ++p) {
@ -58,8 +59,8 @@ void Scope3Parser::GetChartRuleCollection(
if (varSpanNode.m_rank == 0) { // Purely lexical rule. if (varSpanNode.m_rank == 0) { // Purely lexical rule.
assert(labelMap.size() == 1); assert(labelMap.size() == 1);
const TargetPhraseCollection &tpc = labelMap.begin()->second; TargetPhraseCollection::shared_ptr tpc = labelMap.begin()->second;
matchCB.m_tpc = &tpc; matchCB.m_tpc = tpc;
matchCB(m_emptyStackVec); matchCB(m_emptyStackVec);
} else { // Rule has at least one non-terminal. } else { // Rule has at least one non-terminal.
varSpanNode.CalculateRanges(start, end, m_ranges); varSpanNode.CalculateRanges(start, end, m_ranges);
@ -70,7 +71,7 @@ void Scope3Parser::GetChartRuleCollection(
UTrieNode::LabelMap::const_iterator p = labelMap.begin(); UTrieNode::LabelMap::const_iterator p = labelMap.begin();
for (; p != labelMap.end(); ++p) { for (; p != labelMap.end(); ++p) {
const std::vector<int> &labels = p->first; const std::vector<int> &labels = p->first;
const TargetPhraseCollection &tpc = p->second; TargetPhraseCollection::shared_ptr tpc = p->second;
assert(labels.size() == varSpanNode.m_rank); assert(labels.size() == varSpanNode.m_rank);
bool failCheck = false; bool failCheck = false;
for (size_t i = 0; i < varSpanNode.m_rank; ++i) { for (size_t i = 0; i < varSpanNode.m_rank; ++i) {
@ -82,7 +83,7 @@ void Scope3Parser::GetChartRuleCollection(
if (failCheck) { if (failCheck) {
continue; continue;
} }
matchCB.m_tpc = &tpc; matchCB.m_tpc = tpc;
searcher.Search(labels, matchCB); searcher.Search(labels, matchCB);
} }
} }

View File

@ -66,17 +66,16 @@ private:
// Define a callback type for use by StackLatticeSearcher. // Define a callback type for use by StackLatticeSearcher.
struct MatchCallback { struct MatchCallback {
public: public:
MatchCallback(const WordsRange &range, MatchCallback(const WordsRange &range, ChartParserCallback &out)
ChartParserCallback &out) : m_range(range) , m_out(out) // , m_tpc(NULL)
: m_range(range) { }
, m_out(out)
, m_tpc(NULL) {}
void operator()(const StackVec &stackVec) { void operator()(const StackVec &stackVec) {
m_out.Add(*m_tpc, stackVec, m_range); m_out.Add(*m_tpc, stackVec, m_range);
} }
const WordsRange &m_range; const WordsRange &m_range;
ChartParserCallback &m_out; ChartParserCallback &m_out;
const TargetPhraseCollection *m_tpc; TargetPhraseCollection::shared_ptr m_tpc;
}; };
void Init(); void Init();

View File

@ -32,12 +32,13 @@ void SkeletonPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQu
const Phrase &sourcePhrase = inputPath.GetPhrase(); const Phrase &sourcePhrase = inputPath.GetPhrase();
TargetPhrase *tp = CreateTargetPhrase(sourcePhrase); TargetPhrase *tp = CreateTargetPhrase(sourcePhrase);
TargetPhraseCollection *tpColl = new TargetPhraseCollection(); TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection);
tpColl->Add(tp); tpColl->Add(tp);
// add target phrase to phrase-table cache // add target phrase to phrase-table cache
size_t hash = hash_value(sourcePhrase); size_t hash = hash_value(sourcePhrase);
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock()); std::pair<TargetPhraseCollection::shared_ptr, clock_t>
value(tpColl, clock());
cache[hash] = value; cache[hash] = value;
inputPath.SetTargetPhrases(*this, tpColl, NULL); inputPath.SetTargetPhrases(*this, tpColl, NULL);

View File

@ -4,182 +4,52 @@ namespace Moses
{ {
using std::vector; using std::vector;
TPCollCache TPCollCache::
::TPCollCache(size_t capacity) TPCollCache(size_t capacity)
{ {
m_doomed_first = m_doomed_last = NULL; m_qfirst = m_qlast = m_cache.end();
m_doomed_count = 0;
m_capacity = capacity; m_capacity = capacity;
UTIL_THROW_IF2(m_capacity <= 2, "Cache capacity must be > 1!");
} }
SPTR<TPCollWrapper>
bool TPCollCache::
sancheck(TPCollWrapper const* first, TPCollWrapper const* last, size_t count) get(uint64_t key, size_t revision)
{
if (first == NULL)
{
UTIL_THROW_IF2(last != NULL || count != 0, "queue error");
return true;
}
size_t s = 0;
for (TPCollWrapper const* x = first; x; x = x->next)
{
std::cerr << ++s << "/" << count << " "
<< first << " "
<< x->prev << " " << x << " " << x->next << " "
<< last << std::endl;
}
std::cerr << std::string(80,'-') << std::endl;
// while (x != last && s < count)
// {
// UTIL_THROW_IF2(x->next == NULL, "queue error");
// x = x->next;
// ++s;
// std::cerr << x << " " << s << "/" << count << std::endl;
// }
// std::cerr << x << " " << s << "/" << count << std::endl;
// UTIL_THROW_IF2(x != last, "queue error");
// UTIL_THROW_IF2(s != count, "queue error");
// x = last; s = 1;
// while (x != first && s++ < count)
// {
// UTIL_THROW_IF2(x->prev == NULL, "queue error");
// x = x->prev;
// }
// UTIL_THROW_IF2(x != first, "queue error");
// UTIL_THROW_IF2(s != count, "queue error");
return true;
}
/// remove a TPC from the "doomed" queue
void
TPCollCache
::remove_from_queue(TPCollWrapper* x)
{
// caller must lock!
if (m_doomed_first != x && x->prev == NULL)
{ // not in the queue
UTIL_THROW_IF2(x->next, "queue error");
return;
}
sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
std::cerr << "Removing " << x << std::endl;
if (m_doomed_first == x)
m_doomed_first = x->next;
else x->prev->next = x->next;
if (m_doomed_last == x)
m_doomed_last = x->prev;
else x->next->prev = x->prev;
x->next = x->prev = NULL;
--m_doomed_count;
// sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
}
void
TPCollCache
::add_to_queue(TPCollWrapper* x)
{
// sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
// caller must lock!
x->prev = m_doomed_last;
if (!m_doomed_first)
m_doomed_first = x;
if (m_doomed_last) m_doomed_last->next = x;
m_doomed_last = x;
++m_doomed_count;
// sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
}
TPCollWrapper*
TPCollCache
::get(uint64_t key, size_t revision)
{ {
using namespace boost; using namespace boost;
upgrade_lock<shared_mutex> rlock(m_lock); unique_lock<shared_mutex> lock(m_lock);
cache_t::iterator m = m_cache.find(key); std::pair<uint64_t, SPTR<TPCollWrapper> > e(key, SPTR<TPCollWrapper>());
if (m == m_cache.end()) // new std::pair<cache_t::iterator, bool> foo = m_cache.insert(e);
SPTR<TPCollWrapper>& ret = foo.first->second;
if (ret)
{ {
std::pair<uint64_t,TPCollWrapper*> e(key,NULL); if (m_qfirst == foo.first) m_qfirst = ret->next;
upgrade_to_unique_lock<shared_mutex> wlock(rlock); else ret->prev->second->next = ret->next;
std::pair<cache_t::iterator,bool> foo = m_cache.insert(e); if (m_qlast != foo.first)
if (foo.second) foo.first->second = new TPCollWrapper(key, revision); ret->next->second->prev = ret->prev;
m = foo.first;
// ++m->second->refCount;
} }
else if (!ret || ret->revision != revision)
ret.reset(new TPCollWrapper(key,revision));
ret->prev = m_qlast;
if (m_qlast != m_cache.end()) m_qlast->second->next = foo.first;
m_qlast = foo.first;
while (m_cache.size() > m_capacity && m_qfirst->second.use_count() == 1)
{ {
if (m->second->refCount == 0) m_qfirst = m_qfirst->second->next;
{ m_cache.erase(m_qfirst->second->prev);
upgrade_to_unique_lock<shared_mutex> wlock(rlock);
remove_from_queue(m->second);
}
if (m->second->revision != revision) // out of date
{
upgrade_to_unique_lock<shared_mutex> wlock(rlock);
m->second = new TPCollWrapper(key, revision);
}
} }
++m->second->refCount;
return m->second; return ret;
} // TPCollCache::get(...) } // TPCollCache::get(...)
void
TPCollCache
::release(TPCollWrapper const* ptr)
{
if (!ptr) return;
std::cerr << "Releasing " << ptr->key << " (" << ptr->refCount << ")" << std::endl;
if (--ptr->refCount == 0)
{
boost::unique_lock<boost::shared_mutex> lock(m_lock);
if (m_doomed_count == m_capacity)
{
TPCollWrapper* x = m_doomed_first;
remove_from_queue(x);
UTIL_THROW_IF2(x->refCount || x == ptr, "TPC was doomed while still in use!");
cache_t::iterator m = m_cache.find(ptr->key);
if (m != m_cache.end() && m->second == ptr)
{ // the cache could have been updated with a new pointer
// for the same phrase already, so we need to check
// if the pointer we cound is the one we want to get rid of,
// hence the second check
// boost::upgrade_to_unique_lock<boost::shared_mutex> xlock(lock);
m_cache.erase(m);
}
std::cerr << "Deleting " << x->key << " " << x->refCount << std::endl;
// delete x;
}
add_to_queue(const_cast<TPCollWrapper*>(ptr));
}
} // TPCollCache::release(...)
TPCollWrapper:: TPCollWrapper::
TPCollWrapper(uint64_t key_, size_t revision_) TPCollWrapper(uint64_t key_, size_t revision_)
: refCount(0), prev(NULL), next(NULL) : revision(revision_), key(key_)
, revision(revision_), key(key_)
{ } { }
TPCollWrapper:: TPCollWrapper::
~TPCollWrapper() ~TPCollWrapper()
{ { }
UTIL_THROW_IF2(this->refCount, "TPCollWrapper refCount > 0!");
assert(this->refCount == 0);
}
} // namespace } // namespace

View File

@ -3,60 +3,44 @@
#include <time.h> #include <time.h>
#include "moses/TargetPhraseCollection.h" #include "moses/TargetPhraseCollection.h"
#include <boost/atomic.hpp> #include <boost/atomic.hpp>
#include "mm/ug_typedefs.h"
namespace Moses namespace Moses
{ {
class TPCollCache; class TPCollWrapper;
class TPCollCache
{
public:
typedef boost::unordered_map<uint64_t, SPTR<TPCollWrapper> > cache_t;
private:
uint32_t m_capacity; // capacity of cache
cache_t m_cache; // maps from ids to items
cache_t::iterator m_qfirst, m_qlast;
mutable boost::shared_mutex m_lock;
public:
TPCollCache(size_t capacity=10000);
SPTR<TPCollWrapper>
get(uint64_t key, size_t revision);
};
class TPCollWrapper
// wrapper around TargetPhraseCollection with reference counting // wrapper around TargetPhraseCollection with reference counting
// and additional members for caching purposes // and additional members for caching purposes
class TPCollWrapper
: public TargetPhraseCollection : public TargetPhraseCollection
{ {
friend class TPCollCache; friend class TPCollCache;
friend class Mmsapt; friend class Mmsapt;
mutable boost::atomic<uint32_t> refCount; // reference count
public: public:
TPCollWrapper* prev; // ... in queue of TPCollWrappers used recently TPCollCache::cache_t::iterator prev, next;
TPCollWrapper* next; // ... in queue of TPCollWrappers used recently
public: public:
mutable boost::shared_mutex lock; mutable boost::shared_mutex lock;
size_t const revision; // rev. No. of the underlying corpus size_t const revision; // rev. No. of the underlying corpus
uint64_t const key; // phrase key uint64_t const key; // phrase key
#if defined(timespec) // timespec is better, but not available everywhere
timespec tstamp; // last use
#else
timeval tstamp; // last use
#endif
TPCollWrapper(uint64_t const key, size_t const rev); TPCollWrapper(uint64_t const key, size_t const rev);
~TPCollWrapper(); ~TPCollWrapper();
}; };
class TPCollCache
{
typedef boost::unordered_map<uint64_t, TPCollWrapper*> cache_t;
typedef std::vector<TPCollWrapper*> history_t;
cache_t m_cache; // maps from phrase ids to target phrase collections
// mutable history_t m_history; // heap of live items, least recently used one on top
mutable boost::shared_mutex m_lock; // locks m_cache
TPCollWrapper* m_doomed_first;
TPCollWrapper* m_doomed_last;
uint32_t m_doomed_count; // counter of doomed TPCs
uint32_t m_capacity; // capacity of cache
void add_to_queue(TPCollWrapper* x);
void remove_from_queue(TPCollWrapper* x);
public:
TPCollCache(size_t capacity=10000);
TPCollWrapper*
get(uint64_t key, size_t revision);
void
release(TPCollWrapper const* tpc);
};
} }

View File

@ -627,30 +627,32 @@ namespace Moses
{ {
InputPath &inputPath = **iter; InputPath &inputPath = **iter;
const Phrase &phrase = inputPath.GetPhrase(); const Phrase &phrase = inputPath.GetPhrase();
const TargetPhraseCollection *targetPhrases TargetPhraseCollection::shared_ptr targetPhrases
= this->GetTargetPhraseCollectionLEGACY(ttask,phrase); = this->GetTargetPhraseCollectionLEGACY(ttask,phrase);
inputPath.SetTargetPhrases(*this, targetPhrases, NULL); inputPath.SetTargetPhrases(*this, targetPhrases, NULL);
} }
} }
TargetPhraseCollection const* // TargetPhraseCollection::shared_ptr
Mmsapt:: // Mmsapt::
GetTargetPhraseCollectionLEGACY(const Phrase& src) const // GetTargetPhraseCollectionLEGACY(const Phrase& src) const
{ // {
UTIL_THROW2("Don't call me without the translation task."); // UTIL_THROW2("Don't call me without the translation task.");
} // }
// This is not the most efficient way of phrase lookup! // This is not the most efficient way of phrase lookup!
TargetPhraseCollection const* TargetPhraseCollection::shared_ptr
Mmsapt:: Mmsapt::
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const
{ {
boost::unique_lock<boost::shared_mutex> xlock(m_lock); SPTR<TPCollWrapper> ret;
// boost::unique_lock<boost::shared_mutex> xlock(m_lock);
// map from Moses Phrase to internal id sequence // map from Moses Phrase to internal id sequence
vector<id_type> sphrase; vector<id_type> sphrase;
fillIdSeq(src, m_ifactor, *(btfix->V1), sphrase); fillIdSeq(src, m_ifactor, *(btfix->V1), sphrase);
if (sphrase.size() == 0) return NULL; if (sphrase.size() == 0) return ret;
// Reserve a local copy of the dynamic bitext in its current form. /btdyn/ // Reserve a local copy of the dynamic bitext in its current form. /btdyn/
// is set to a new copy of the dynamic bitext every time a sentence pair // is set to a new copy of the dynamic bitext every time a sentence pair
// is added. /dyn/ keeps the old bitext around as long as we need it. // is added. /dyn/ keeps the old bitext around as long as we need it.
@ -665,42 +667,42 @@ namespace Moses
// lookup phrases in both bitexts // lookup phrases in both bitexts
TSA<Token>::tree_iterator mfix(btfix->I1.get(), &sphrase[0], sphrase.size()); TSA<Token>::tree_iterator mfix(btfix->I1.get(), &sphrase[0], sphrase.size());
TSA<Token>::tree_iterator mdyn(dyn->I1.get()); TSA<Token>::tree_iterator mdyn(dyn->I1.get());
if (dyn->I1.get()) if (dyn->I1.get()) // we have a dynamic bitext
for (size_t i = 0; mdyn.size() == i && i < sphrase.size(); ++i) for (size_t i = 0; mdyn.size() == i && i < sphrase.size(); ++i)
mdyn.extend(sphrase[i]); mdyn.extend(sphrase[i]);
if (mdyn.size() != sphrase.size() && mfix.size() != sphrase.size()) if (mdyn.size() != sphrase.size() && mfix.size() != sphrase.size())
return NULL; // phrase not found in either bitext return ret; // phrase not found in either bitext
// do we have cached results for this phrase? // do we have cached results for this phrase?
uint64_t phrasekey = (mfix.size() == sphrase.size() uint64_t phrasekey = (mfix.size() == sphrase.size()
? (mfix.getPid()<<1) : (mdyn.getPid()<<1)+1); ? (mfix.getPid()<<1)
: (mdyn.getPid()<<1)+1);
// std::cerr << "Phrasekey is " << phrasekey << " at " << HERE << std::endl;
// get context-specific cache of items previously looked up // get context-specific cache of items previously looked up
SPTR<ContextScope> const& scope = ttask->GetScope(); SPTR<ContextScope> const& scope = ttask->GetScope();
SPTR<TPCollCache> cache = scope->get<TPCollCache>(cache_key); SPTR<TPCollCache> cache = scope->get<TPCollCache>(cache_key);
if (!cache) cache = m_cache; if (!cache) cache = m_cache; // no context-specific cache, use global one
TPCollWrapper* ret = cache->get(phrasekey, dyn->revision());
// TO DO: we should revise the revision mechanism: we take the length ret = cache->get(phrasekey, dyn->revision());
// of the dynamic bitext (in sentences) at the time the PT entry // TO DO: we should revise the revision mechanism: we take the
// was stored as the time stamp. For each word in the // length of the dynamic bitext (in sentences) at the time the PT
// entry was stored as the time stamp. For each word in the
// vocabulary, we also store its most recent occurrence in the // vocabulary, we also store its most recent occurrence in the
// bitext. Only if the timestamp of each word in the phrase is // bitext. Only if the timestamp of each word in the phrase is
// newer than the timestamp of the phrase itself we must update // newer than the timestamp of the phrase itself we must update
// the entry. // the entry.
// std::cerr << "Phrasekey is " << ret->key << " at " << HERE << std::endl; // std::cerr << "Phrasekey is " << ret->key << " at " << HERE << std::endl;
std::cerr << ret << " with " << ret->refCount << " references at " // std::cerr << ret << " with " << ret->refCount << " references at "
<< HERE << std::endl; // << HERE << std::endl;
boost::upgrade_lock<boost::shared_mutex> rlock(ret->lock); boost::upgrade_lock<boost::shared_mutex> rlock(ret->lock);
if (ret->GetSize()) return ret; if (ret->GetSize()) return ret;
// new TPC (not found or old one was not up to date) // new TPC (not found or old one was not up to date)
boost::upgrade_to_unique_lock<boost::shared_mutex> wlock(rlock); boost::upgrade_to_unique_lock<boost::shared_mutex> wlock(rlock);
// maybe another thread did the work while we waited for the lock ?
if (ret->GetSize()) return ret; if (ret->GetSize()) return ret;
// check again, another thread may have done the work already
// OK: pt entry NOT found or NOT up to date // OK: pt entry NOT found or NOT up to date
// lookup and expansion could be done in parallel threads, // lookup and expansion could be done in parallel threads,
@ -718,12 +720,16 @@ namespace Moses
else else
{ {
BitextSampler<Token> s(btfix.get(), mfix, context->bias, BitextSampler<Token> s(btfix.get(), mfix, context->bias,
m_min_sample_size, m_default_sample_size, m_sampling_method); m_min_sample_size,
m_default_sample_size,
m_sampling_method);
s(); s();
sfix = s.stats(); sfix = s.stats();
} }
} }
if (mdyn.size() == sphrase.size()) sdyn = dyn->lookup(ttask, mdyn);
if (mdyn.size() == sphrase.size())
sdyn = dyn->lookup(ttask, mdyn);
vector<PhrasePair<Token> > ppfix,ppdyn; vector<PhrasePair<Token> > ppfix,ppdyn;
PhrasePair<Token>::SortByTargetIdSeq sort_by_tgt_id; PhrasePair<Token>::SortByTargetIdSeq sort_by_tgt_id;
@ -737,6 +743,7 @@ namespace Moses
expand(mdyn, *dyn, *sdyn, ppdyn, m_bias_log); expand(mdyn, *dyn, *sdyn, ppdyn, m_bias_log);
sort(ppdyn.begin(), ppdyn.end(),sort_by_tgt_id); sort(ppdyn.begin(), ppdyn.end(),sort_by_tgt_id);
} }
// now we have two lists of Phrase Pairs, let's merge them // now we have two lists of Phrase Pairs, let's merge them
PhrasePair<Token>::SortByTargetIdSeq sorter; PhrasePair<Token>::SortByTargetIdSeq sorter;
size_t i = 0; size_t k = 0; size_t i = 0; size_t k = 0;
@ -939,9 +946,10 @@ namespace Moses
return mdyn.size() == myphrase.size(); return mdyn.size() == myphrase.size();
} }
#if 0
void void
Mmsapt Mmsapt
::Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const ::Release(ttasksptr const& ttask, TargetPhraseCollection::shared_ptr*& tpc) const
{ {
if (!tpc) if (!tpc)
{ {
@ -957,6 +965,7 @@ namespace Moses
if (cache) cache->release(static_cast<TPCollWrapper const*>(tpc)); if (cache) cache->release(static_cast<TPCollWrapper const*>(tpc));
tpc = NULL; tpc = NULL;
} }
#endif
bool Mmsapt bool Mmsapt
::ProvidesPrefixCheck() const { return true; } ::ProvidesPrefixCheck() const { return true; }

View File

@ -179,7 +179,7 @@ namespace Moses
uint64_t const pid1, uint64_t const pid1,
sapt::pstats const& stats, sapt::pstats const& stats,
sapt::Bitext<Token> const & bt, sapt::Bitext<Token> const & bt,
TargetPhraseCollection* tpcoll TargetPhraseCollection::shared_ptr tpcoll
) const; ) const;
bool bool
@ -187,14 +187,14 @@ namespace Moses
(Phrase const& src, (Phrase const& src,
uint64_t const pid1a, sapt::pstats * statsa, sapt::Bitext<Token> const & bta, uint64_t const pid1a, sapt::pstats * statsa, sapt::Bitext<Token> const & bta,
uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb, uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb,
TargetPhraseCollection* tpcoll) const; TargetPhraseCollection::shared_ptr tpcoll) const;
bool bool
combine_pstats combine_pstats
(Phrase const& src, (Phrase const& src,
uint64_t const pid1a, sapt::pstats* statsa, sapt::Bitext<Token> const & bta, uint64_t const pid1a, sapt::pstats* statsa, sapt::Bitext<Token> const & bta,
uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb, uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb,
TargetPhraseCollection* tpcoll) const; TargetPhraseCollection::shared_ptr tpcoll) const;
void load_extra_data(std::string bname, bool locking); void load_extra_data(std::string bname, bool locking);
void load_bias(std::string bname); void load_bias(std::string bname);
@ -209,15 +209,15 @@ namespace Moses
std::string const& GetName() const; std::string const& GetName() const;
#ifndef NO_MOSES #ifndef NO_MOSES
TargetPhraseCollection const* TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const; GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const;
TargetPhraseCollection const* // TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionLEGACY(const Phrase& src) const; // GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
void void
GetTargetPhraseCollectionBatch(ttasksptr const& ttask, GetTargetPhraseCollectionBatch
const InputPathList &inputPathQueue) const; (ttasksptr const& ttask, InputPathList const& inputPathQueue) const;
//! Create a sentence-specific manager for SCFG rule lookup. //! Create a sentence-specific manager for SCFG rule lookup.
ChartRuleLookupManager* ChartRuleLookupManager*
@ -234,7 +234,8 @@ namespace Moses
void setWeights(std::vector<float> const& w); void setWeights(std::vector<float> const& w);
void Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const; // void Release(ttasksptr const& ttask,
// TargetPhraseCollection const*& tpc) const;
// some consumer lets me know that *tpc isn't needed any more // some consumer lets me know that *tpc isn't needed any more

View File

@ -80,7 +80,8 @@ int main(int argc, char* argv[])
Phrase& p = *phrase; Phrase& p = *phrase;
cout << p << endl; cout << p << endl;
TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(ttask,p); TargetPhraseCollection::shared_ptr trg
= PT->GetTargetPhraseCollectionLEGACY(ttask,p);
if (!trg) continue; if (!trg) continue;
vector<size_t> order(trg->GetSize()); vector<size_t> order(trg->GetSize());
for (size_t i = 0; i < order.size(); ++i) order[i] = i; for (size_t i = 0; i < order.size(); ++i) order[i] = i;
@ -118,7 +119,7 @@ int main(int argc, char* argv[])
} }
cout << endl; cout << endl;
} }
PT->Release(ttask, trg); // PT->Release(ttask, trg);
} }
exit(0); exit(0);
} }

View File

@ -410,7 +410,7 @@ CreateTranslationOptionsForRange
const DecodeStep &dstep = **d; const DecodeStep &dstep = **d;
const PhraseDictionary &pdict = *dstep.GetPhraseDictionaryFeature(); const PhraseDictionary &pdict = *dstep.GetPhraseDictionaryFeature();
const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(pdict); TargetPhraseCollection::shared_ptr targetPhrases = inputPath.GetTargetPhrases(pdict);
static_cast<const Tstep&>(dstep).ProcessInitialTranslation static_cast<const Tstep&>(dstep).ProcessInitialTranslation
(m_source, *oldPtoc, sPos, ePos, adhereTableLimit, inputPath, targetPhrases); (m_source, *oldPtoc, sPos, ePos, adhereTableLimit, inputPath, targetPhrases);
@ -431,7 +431,7 @@ CreateTranslationOptionsForRange
TranslationOption &inputPartialTranslOpt = **pto; TranslationOption &inputPartialTranslOpt = **pto;
if (const Tstep *tstep = dynamic_cast<const Tstep*>(dstep)) { if (const Tstep *tstep = dynamic_cast<const Tstep*>(dstep)) {
const PhraseDictionary &pdict = *tstep->GetPhraseDictionaryFeature(); const PhraseDictionary &pdict = *tstep->GetPhraseDictionaryFeature();
const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(pdict); TargetPhraseCollection::shared_ptr targetPhrases = inputPath.GetTargetPhrases(pdict);
tstep->Process(inputPartialTranslOpt, *dstep, *newPtoc, tstep->Process(inputPartialTranslOpt, *dstep, *newPtoc,
this, adhereTableLimit, targetPhrases); this, adhereTableLimit, targetPhrases);
} else { } else {

View File

@ -142,7 +142,8 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
for (size_t i = 0; i < m_inputPathQueue.size(); ++i) { for (size_t i = 0; i < m_inputPathQueue.size(); ++i) {
const InputPath &path = *m_inputPathQueue[i]; const InputPath &path = *m_inputPathQueue[i];
const TargetPhraseCollection *tpColl = path.GetTargetPhrases(phraseDictionary); TargetPhraseCollection::shared_ptr tpColl
= path.GetTargetPhrases(phraseDictionary);
const WordsRange &range = path.GetWordsRange(); const WordsRange &range = path.GetWordsRange();
if (tpColl && tpColl->GetSize()) { if (tpColl && tpColl->GetSize()) {