Life cycle of TargetPhraseCollection is now managed via shared pointers.

This commit is contained in:
Ulrich Germann 2015-10-18 21:27:58 +01:00
parent 7a85126a92
commit bdb0227ee9
95 changed files with 896 additions and 810 deletions

View File

@ -249,16 +249,12 @@ size_t PhraseNode::ReadChild(Word &wordFound, uint64_t &childFilePos, const char
return memRead;
}
const TargetPhraseCollection *PhraseNode::GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const
TargetPhraseCollection::shared_ptr
PhraseNode::
GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const
{
TargetPhraseCollection *ret = new TargetPhraseCollection();
if (m_value > 0)
ret->ReadFromFile(tableLimit, m_value, onDiskWrapper);
else {
}
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
if (m_value > 0) ret->ReadFromFile(tableLimit, m_value, onDiskWrapper);
return ret;
}

View File

@ -92,8 +92,11 @@ public:
}
const PhraseNode *GetChild(const Word &wordSought, OnDiskWrapper &onDiskWrapper) const;
const TargetPhraseCollection *GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const;
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection(size_t tableLimit,
OnDiskWrapper &onDiskWrapper) const;
void AddCounts(const std::vector<float> &counts) {
m_counts = counts;
}

View File

@ -114,23 +114,22 @@ void TargetPhraseCollection::Save(OnDiskWrapper &onDiskWrapper)
}
Moses::TargetPhraseCollection *TargetPhraseCollection::ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
Moses::TargetPhraseCollection::shared_ptr TargetPhraseCollection::ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
, const std::vector<Moses::FactorType> &outputFactors
, const Moses::PhraseDictionary &phraseDict
, const std::vector<float> &weightT
, Vocab &vocab
, bool isSyntax) const
{
Moses::TargetPhraseCollection *ret = new Moses::TargetPhraseCollection();
Moses::TargetPhraseCollection::shared_ptr ret;
ret.reset(new Moses::TargetPhraseCollection);
CollType::const_iterator iter;
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) {
const TargetPhrase &tp = **iter;
Moses::TargetPhrase *mosesPhrase = tp.ConvertToMoses(inputFactors, outputFactors
, vocab
, phraseDict
, weightT
, isSyntax);
Moses::TargetPhrase *mosesPhrase
= tp.ConvertToMoses(inputFactors, outputFactors, vocab,
phraseDict, weightT, isSyntax);
/*
// debugging output

View File

@ -21,6 +21,8 @@
#include "TargetPhrase.h"
#include "Vocab.h"
#include "moses/TargetPhraseCollection.h"
#include <boost/shared_ptr.hpp>
namespace Moses
{
@ -50,6 +52,9 @@ protected:
std::string m_debugStr;
public:
typedef boost::shared_ptr<TargetPhraseCollection const> shared_const_ptr;
typedef boost::shared_ptr<TargetPhraseCollection> shared_ptr;
static size_t s_sortScoreInd;
TargetPhraseCollection();
@ -69,7 +74,7 @@ public:
uint64_t GetFilePos() const;
Moses::TargetPhraseCollection *ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
Moses::TargetPhraseCollection::shared_ptr ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
, const std::vector<Moses::FactorType> &outputFactors
, const Moses::PhraseDictionary &phraseDict
, const std::vector<float> &weightT

View File

@ -56,7 +56,7 @@ int main(int argc, char **argv)
if (node) {
// source phrase points to a bunch of rules
const TargetPhraseCollection *coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper);
TargetPhraseCollection::shared_ptr coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper);
string str = coll->GetDebugStr();
cout << "Found " << coll->GetSize() << endl;

View File

@ -116,7 +116,7 @@ typedef
boost::unordered_set<TargetPhrase*,PhrasePtrHasher,PhrasePtrComparator> PhraseSet;
const TargetPhraseCollection*
TargetPhraseCollection::shared_ptr
PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const
{
@ -125,7 +125,7 @@ PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const
PhraseSet allPhrases;
vector<PhraseSet> phrasesByTable(m_dictionaries.size());
for (size_t i = 0; i < m_dictionaries.size(); ++i) {
const TargetPhraseCollection* phrases = m_dictionaries[i]->GetTargetPhraseCollection(src);
TargetPhraseCollection::shared_ptr phrases = m_dictionaries[i]->GetTargetPhraseCollection(src);
if (phrases) {
for (TargetPhraseCollection::const_iterator j = phrases->begin();
j != phrases->end(); ++j) {

View File

@ -52,7 +52,7 @@ public:
, const LMList &languageModels
, float weightWP);
virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const;
virtual TargetPhraseCollection::shared_ptr GetTargetPhraseCollection(const Phrase& src) const;
virtual void InitializeForInput(ttasksptr const& ttask);
virtual ChartRuleLookupManager *CreateRuleLookupManager(
const InputType &,
@ -65,7 +65,7 @@ private:
typedef boost::shared_ptr<PhraseDictionaryTreeAdaptor> DictionaryHandle;
std::vector<DictionaryHandle> m_dictionaries;
std::vector<std::vector<float> > m_weights; //feature x table
mutable TargetPhraseCollection* m_targetPhrases;
mutable TargetPhraseCollection::shared_ptr m_targetPhrases;
std::vector<float> m_weightT;
size_t m_tableLimit;
const LMList* m_languageModels;

View File

@ -44,7 +44,7 @@ ChartParserUnknown
ChartParserUnknown::~ChartParserUnknown()
{
RemoveAllInColl(m_unksrcs);
RemoveAllInColl(m_cacheTargetPhraseCollection);
// RemoveAllInColl(m_cacheTargetPhraseCollection);
}
void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range, ChartParserCallback &to)

View File

@ -27,7 +27,7 @@
#include "WordsRange.h"
#include "StackVec.h"
#include "InputPath.h"
#include "TargetPhraseCollection.h"
namespace Moses
{
@ -38,7 +38,7 @@ class Sentence;
class ChartCellCollectionBase;
class Word;
class Phrase;
class TargetPhraseCollection;
// class TargetPhraseCollection;
class DecodeGraph;
class ChartParserUnknown
@ -56,7 +56,7 @@ public:
private:
std::vector<Phrase*> m_unksrcs;
std::list<TargetPhraseCollection*> m_cacheTargetPhraseCollection;
std::list<TargetPhraseCollection::shared_ptr> m_cacheTargetPhraseCollection;
};
class ChartParser

View File

@ -3,6 +3,7 @@
#include "StackVec.h"
#include <list>
#include "TargetPhraseCollection.h"
namespace Moses
{
@ -23,7 +24,7 @@ public:
virtual bool Empty() const = 0;
virtual void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range) = 0;
virtual void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const WordsRange &range) = 0;
virtual void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath) = 0;

View File

@ -115,9 +115,13 @@ void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc,
}
}
void ChartTranslationOptionList::AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range)
void
ChartTranslationOptionList::
AddPhraseOOV(TargetPhrase &phrase,
std::list<TargetPhraseCollection::shared_ptr > &waste_memory,
const WordsRange &range)
{
TargetPhraseCollection *tpc = new TargetPhraseCollection();
TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
tpc->Add(&phrase);
waste_memory.push_back(tpc);
StackVec empty;

View File

@ -55,7 +55,7 @@ public:
void Add(const TargetPhraseCollection &, const StackVec &,
const WordsRange &);
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range);
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const WordsRange &range);
bool Empty() const {
return m_size == 0;

View File

@ -49,7 +49,7 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
, PartialTranslOptColl &outputPartialTranslOptColl
, TranslationOptionCollection *toc
, bool adhereTableLimit
, const TargetPhraseCollection *phraseColl) const
, TargetPhraseCollection::shared_ptr phraseColl) const
{
if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) {
// word deletion
@ -105,7 +105,7 @@ void DecodeStepTranslation::ProcessInitialTranslation(
,PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos, size_t endPos, bool adhereTableLimit
, const InputPath &inputPath
, const TargetPhraseCollection *phraseColl) const
, TargetPhraseCollection::shared_ptr phraseColl) const
{
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
const size_t tableLimit = phraseDictionary->GetTableLimit();
@ -147,7 +147,8 @@ void DecodeStepTranslation::ProcessInitialTranslationLEGACY(
const size_t tableLimit = phraseDictionary->GetTableLimit();
const WordsRange wordsRange(startPos, endPos);
const TargetPhraseCollectionWithSourcePhrase *phraseColl = phraseDictionary->GetTargetPhraseCollectionLEGACY(source,wordsRange);
TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl
= phraseDictionary->GetTargetPhraseCollectionLEGACY(source,wordsRange);
if (phraseColl != NULL) {
IFVERBOSE(3) {
@ -237,8 +238,8 @@ ProcessLEGACY(TranslationOption const& in,
size_t const currSize = inPhrase.GetSize();
size_t const tableLimit = pdict->GetTableLimit();
TargetPhraseCollectionWithSourcePhrase const* phraseColl;
phraseColl = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl
= pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
if (phraseColl != NULL) {
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;

View File

@ -48,7 +48,7 @@ public:
, PartialTranslOptColl &outputPartialTranslOptColl
, TranslationOptionCollection *toc
, bool adhereTableLimit
, const TargetPhraseCollection *phraseColl) const;
, TargetPhraseCollection::shared_ptr phraseColl) const;
/*! initialize list of partial translation options by applying the first translation step
@ -58,7 +58,7 @@ public:
, PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos, size_t endPos, bool adhereTableLimit
, const InputPath &inputPath
, const TargetPhraseCollection *phraseColl) const;
, TargetPhraseCollection::shared_ptr phraseColl) const;
// legacy
void ProcessInitialTranslationLEGACY(const InputType &source

View File

@ -83,7 +83,7 @@ public:
void Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &ignored);
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range);
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const WordsRange &range);
float GetBestScore(const ChartCellLabel *chartCell) const;
@ -160,7 +160,7 @@ template <class Model> void Fill<Model>::Add(const TargetPhraseCollection &targe
}
}
template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &, const WordsRange &range)
template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &, const WordsRange &range)
{
std::vector<lm::WordIndex> words;
UTIL_THROW_IF2(phrase.GetSize() > 1,

View File

@ -39,34 +39,40 @@ InputPath::~InputPath()
// std::cerr << "Deconstructing InputPath" << std::endl;
// Since there is no way for the Phrase Dictionaries to tell in
// which (sentence) context phrases were looked up, we tell them
// now that the phrase isn't needed any more by this inputPath
typedef std::pair<const TargetPhraseCollection*, const void* > entry;
std::map<const PhraseDictionary*, entry>::iterator iter;
ttasksptr theTask = this->ttask.lock();
for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter)
{
// std::cerr << iter->second.first << " decommissioned." << std::endl;
iter->first->Release(theTask, iter->second.first);
}
// // NOT NEEDED ANY MORE SINCE THE SWITCH TO SHARED POINTERS
// // Since there is no way for the Phrase Dictionaries to tell in
// // which (sentence) context phrases were looked up, we tell them
// // now that the phrase isn't needed any more by this inputPath
// typedef std::pair<boost::shared_ptr<TargetPhraseCollection>, const void* > entry;
// std::map<const PhraseDictionary*, entry>::iterator iter;
// ttasksptr theTask = this->ttask.lock();
// for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter)
// {
// // std::cerr << iter->second.first << " decommissioned." << std::endl;
// iter->first->Release(theTask, iter->second.first);
// }
delete m_inputScore;
}
const TargetPhraseCollection *InputPath::GetTargetPhrases(const PhraseDictionary &phraseDictionary) const
TargetPhraseCollection::shared_ptr
InputPath::
GetTargetPhrases(const PhraseDictionary &phraseDictionary) const
{
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
TargetPhrases::const_iterator iter;
iter = m_targetPhrases.find(&phraseDictionary);
if (iter == m_targetPhrases.end()) {
return NULL;
return TargetPhraseCollection::shared_ptr();
}
return iter->second.first;
}
const void *InputPath::GetPtNode(const PhraseDictionary &phraseDictionary) const
const void*
InputPath::
GetPtNode(const PhraseDictionary &phraseDictionary) const
{
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
TargetPhrases::const_iterator iter;
iter = m_targetPhrases.find(&phraseDictionary);
if (iter == m_targetPhrases.end()) {
return NULL;
@ -74,11 +80,14 @@ const void *InputPath::GetPtNode(const PhraseDictionary &phraseDictionary) const
return iter->second.second;
}
void InputPath::SetTargetPhrases(const PhraseDictionary &phraseDictionary
, const TargetPhraseCollection *targetPhrases
, const void *ptNode)
void
InputPath::
SetTargetPhrases(const PhraseDictionary &phraseDictionary,
TargetPhraseCollection::shared_ptr const& targetPhrases,
const void *ptNode)
{
std::pair<const TargetPhraseCollection*, const void*> value(targetPhrases, ptNode);
std::pair<TargetPhraseCollection::shared_ptr, const void*>
value(targetPhrases, ptNode);
m_targetPhrases[&phraseDictionary] = value;
}
@ -93,10 +102,10 @@ const Word &InputPath::GetLastWord() const
size_t InputPath::GetTotalRuleSize() const
{
size_t ret = 0;
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
TargetPhrases::const_iterator iter;
for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter) {
// const PhraseDictionary *pt = iter->first;
const TargetPhraseCollection *tpColl = iter->second.first;
TargetPhraseCollection::shared_ptr tpColl = iter->second.first;
if (tpColl) {
ret += tpColl->GetSize();
@ -110,10 +119,10 @@ std::ostream& operator<<(std::ostream& out, const InputPath& obj)
{
out << &obj << " " << obj.GetWordsRange() << " " << obj.GetPrevPath() << " " << obj.GetPhrase();
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
InputPath::TargetPhrases::const_iterator iter;
for (iter = obj.m_targetPhrases.begin(); iter != obj.m_targetPhrases.end(); ++iter) {
const PhraseDictionary *pt = iter->first;
const TargetPhraseCollection *tpColl = iter->second.first;
boost::shared_ptr<TargetPhraseCollection const> tpColl = iter->second.first;
out << pt << "=";
if (tpColl) {

View File

@ -8,12 +8,12 @@
#include "WordsRange.h"
#include "NonTerminal.h"
#include "moses/FactorCollection.h"
#include <boost/shared_ptr.hpp>
#include "TargetPhraseCollection.h"
namespace Moses
{
class PhraseDictionary;
class TargetPhraseCollection;
class ScoreComponentCollection;
class TargetPhrase;
class InputPath;
@ -32,7 +32,12 @@ class InputPath
friend std::ostream& operator<<(std::ostream& out, const InputPath &obj);
public:
typedef std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> > TargetPhrases;
typedef std::pair<TargetPhraseCollection::shared_ptr, const void*>
TPCollStoreEntry;
typedef std::map<const PhraseDictionary*, TPCollStoreEntry>
TargetPhrases;
public:
ttaskwptr const ttask;
@ -96,10 +101,14 @@ public:
m_nextNode = nextNode;
}
void SetTargetPhrases(const PhraseDictionary &phraseDictionary
, const TargetPhraseCollection *targetPhrases
, const void *ptNode);
const TargetPhraseCollection *GetTargetPhrases(const PhraseDictionary &phraseDictionary) const;
void
SetTargetPhrases(const PhraseDictionary &phraseDictionary,
TargetPhraseCollection::shared_ptr const& targetPhrases,
const void *ptNode);
TargetPhraseCollection::shared_ptr
GetTargetPhrases(const PhraseDictionary &phraseDictionary) const;
const TargetPhrases &GetTargetPhrases() const {
return m_targetPhrases;
}

View File

@ -63,27 +63,29 @@ void PDTAimp::CleanUp()
{
assert(m_dict);
m_dict->FreeMemory();
for(size_t i=0; i<m_tgtColls.size(); ++i) delete m_tgtColls[i];
// for(size_t i=0; i<m_tgtColls.size(); ++i) m_tgtColls[i].reset();
m_tgtColls.clear();
m_cache.clear();
m_rangeCache.clear();
uniqSrcPhr.clear();
}
TargetPhraseCollectionWithSourcePhrase const*
TargetPhraseCollectionWithSourcePhrase::shared_ptr
PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
{
assert(m_dict);
if(src.GetSize()==0) return 0;
TargetPhraseCollectionWithSourcePhrase::shared_ptr ret;
if(src.GetSize()==0) return ret;
std::pair<MapSrc2Tgt::iterator,bool> piter;
if(useCache) {
piter=m_cache.insert(std::make_pair(src,static_cast<TargetPhraseCollectionWithSourcePhrase const*>(0)));
piter=m_cache.insert(std::make_pair(src, ret));
if(!piter.second) return piter.first->second;
} else if (m_cache.size()) {
MapSrc2Tgt::const_iterator i=m_cache.find(src);
return (i!=m_cache.end() ? i->second : 0);
return (i!=m_cache.end() ? i->second : ret);
}
std::vector<std::string> srcString(src.GetSize());
@ -97,7 +99,7 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
std::vector<std::string> wacands;
m_dict->GetTargetCandidates(srcString,cands,wacands);
if(cands.empty()) {
return 0;
return ret;
}
//TODO: Multiple models broken here
@ -140,16 +142,14 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
sourcePhrases.push_back(src);
}
TargetPhraseCollectionWithSourcePhrase *rv;
rv=PruneTargetCandidates(tCands,costs, sourcePhrases);
if(rv->IsEmpty()) {
delete rv;
return 0;
ret = PruneTargetCandidates(tCands,costs, sourcePhrases);
if(ret->IsEmpty()) {
ret.reset();
} else {
if(useCache) piter.first->second=rv;
m_tgtColls.push_back(rv);
return rv;
if(useCache) piter.first->second = ret;
m_tgtColls.push_back(ret);
}
return ret;
}
@ -352,7 +352,8 @@ void PDTAimp::CacheSource(ConfusionNet const& src)
pathExplored[len]+=exploredPaths[len];
m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
// m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize()));
for(std::map<Range,E2Costs>::const_iterator i=cov2cand.begin(); i!=cov2cand.end(); ++i) {
assert(i->first.first<m_rangeCache.size());
@ -386,10 +387,11 @@ void PDTAimp::CacheSource(ConfusionNet const& src)
//std::cerr << i->first.first << "-" << i->first.second << ": " << targetPhrase << std::endl;
}
TargetPhraseCollectionWithSourcePhrase *rv=PruneTargetCandidates(tCands, costs, sourcePhrases);
TargetPhraseCollectionWithSourcePhrase::shared_ptr
rv = PruneTargetCandidates(tCands, costs, sourcePhrases);
if(rv->IsEmpty())
delete rv;
rv.reset();
else {
m_rangeCache[i->first.first][i->first.second-1]=rv;
m_tgtColls.push_back(rv);
@ -428,7 +430,8 @@ void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
targetPhrase.EvaluateInIsolation(*srcPtr, m_obj->GetFeaturesToApply());
}
TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates
TargetPhraseCollectionWithSourcePhrase::shared_ptr
PDTAimp::PruneTargetCandidates
(const std::vector<TargetPhrase> & tCands,
std::vector<std::pair<float,size_t> >& costs,
const std::vector<Phrase> &sourcePhrases) const
@ -437,7 +440,8 @@ TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates
UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(),
"Number of target phrases must equal number of source phrases");
TargetPhraseCollectionWithSourcePhrase *rv=new TargetPhraseCollectionWithSourcePhrase;
TargetPhraseCollectionWithSourcePhrase::shared_ptr rv;
rv.reset(new TargetPhraseCollectionWithSourcePhrase);
// set limit to tableLimit or actual size, whatever is smaller

View File

@ -44,10 +44,10 @@ public:
std::vector<FactorType> m_input,m_output;
PhraseDictionaryTree *m_dict;
const InputFeature *m_inputFeature;
typedef std::vector<TargetPhraseCollectionWithSourcePhrase const*> vTPC;
typedef std::vector<TargetPhraseCollectionWithSourcePhrase::shared_ptr> vTPC;
mutable vTPC m_tgtColls;
typedef std::map<Phrase,TargetPhraseCollectionWithSourcePhrase const*> MapSrc2Tgt;
typedef std::map<Phrase,TargetPhraseCollectionWithSourcePhrase::shared_ptr> MapSrc2Tgt;
mutable MapSrc2Tgt m_cache;
PhraseDictionaryTreeAdaptor *m_obj;
int useCache;
@ -69,7 +69,7 @@ public:
void CleanUp();
TargetPhraseCollectionWithSourcePhrase const*
TargetPhraseCollectionWithSourcePhrase::shared_ptr
GetTargetPhraseCollection(Phrase const &src) const;
void Create(const std::vector<FactorType> &input
@ -121,7 +121,7 @@ public:
const std::string *alignmentString,
Phrase const* srcPtr=0) const;
TargetPhraseCollectionWithSourcePhrase* PruneTargetCandidates
TargetPhraseCollectionWithSourcePhrase::shared_ptr PruneTargetCandidates
(const std::vector<TargetPhrase> & tCands,
std::vector<std::pair<float,size_t> >& costs,
const std::vector<Phrase> &sourcePhrases) const;

View File

@ -28,9 +28,9 @@ void GlueRuleSynthesizer::SynthesizeRule(const Forest::Hyperedge &e)
HyperPath source;
SynthesizeHyperPath(e, source);
TargetPhrase *tp = SynthesizeTargetPhrase(e);
TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(m_hyperTree,
source);
tpc.Add(tp);
TargetPhraseCollection::shared_ptr tpc
= GetOrCreateTargetPhraseCollection(m_hyperTree, source);
tpc->Add(tp);
}
void GlueRuleSynthesizer::SynthesizeHyperPath(const Forest::Hyperedge &e,

View File

@ -14,7 +14,7 @@ void HyperTree::Node::Prune(std::size_t tableLimit)
p->second.Prune(tableLimit);
}
// Prune TargetPhraseCollection at this node.
m_targetPhraseCollection.Prune(true, tableLimit);
m_targetPhraseCollection->Prune(true, tableLimit);
}
void HyperTree::Node::Sort(std::size_t tableLimit)
@ -24,7 +24,7 @@ void HyperTree::Node::Sort(std::size_t tableLimit)
p->second.Sort(tableLimit);
}
// Sort TargetPhraseCollection at this node.
m_targetPhraseCollection.Sort(true, tableLimit);
m_targetPhraseCollection->Sort(true, tableLimit);
}
HyperTree::Node *HyperTree::Node::GetOrCreateChild(
@ -40,7 +40,7 @@ const HyperTree::Node *HyperTree::Node::GetChild(
return (p == m_map.end()) ? NULL : &p->second;
}
TargetPhraseCollection &HyperTree::GetOrCreateTargetPhraseCollection(
TargetPhraseCollection::shared_ptr HyperTree::GetOrCreateTargetPhraseCollection(
const HyperPath &hyperPath)
{
Node &node = GetOrCreateNode(hyperPath);

View File

@ -37,7 +37,7 @@ public:
}
bool HasRules() const {
return !m_targetPhraseCollection.IsEmpty();
return !m_targetPhraseCollection->IsEmpty();
}
void Prune(std::size_t tableLimit);
@ -47,11 +47,13 @@ public:
const Node *GetChild(const HyperPath::NodeSeq &) const;
const TargetPhraseCollection &GetTargetPhraseCollection() const {
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection() const {
return m_targetPhraseCollection;
}
TargetPhraseCollection &GetTargetPhraseCollection() {
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection() {
return m_targetPhraseCollection;
}
@ -59,12 +61,14 @@ public:
return m_map;
}
Node() : m_targetPhraseCollection(new TargetPhraseCollection) { }
private:
Map m_map;
TargetPhraseCollection m_targetPhraseCollection;
TargetPhraseCollection::shared_ptr m_targetPhraseCollection;
};
HyperTree(const RuleTableFF *ff) : RuleTable(ff) {}
HyperTree(const RuleTableFF *ff) : RuleTable(ff) { }
const Node &GetRootNode() const {
return m_root;
@ -73,7 +77,8 @@ public:
private:
friend class HyperTreeCreator;
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const HyperPath &);
TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection(const HyperPath &);
Node &GetOrCreateNode(const HyperPath &);

View File

@ -21,7 +21,7 @@ protected:
// Provide access to HyperTree's private GetOrCreateTargetPhraseCollection
// function.
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(
HyperTree &trie, const HyperPath &fragment) {
return trie.GetOrCreateTargetPhraseCollection(fragment);
}

View File

@ -130,9 +130,9 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
ff.GetFeaturesToApply());
// Add rule to trie.
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
trie, sourceFragment);
phraseColl.Add(targetPhrase);
TargetPhraseCollection::shared_ptr phraseColl
= GetOrCreateTargetPhraseCollection(trie, sourceFragment);
phraseColl->Add(targetPhrase);
count++;
}

View File

@ -51,8 +51,8 @@ void RuleMatcherHyperTree<Callback>::EnumerateHyperedges(
m_hyperedge.label.inputWeight += (*p)->weight;
}
// Set the output hyperedge label's translation set pointer.
m_hyperedge.label.translations =
&(item.trieNode->GetTargetPhraseCollection());
m_hyperedge.label.translations
= item.trieNode->GetTargetPhraseCollection();
// Pass the output hyperedge to the callback.
callback(m_hyperedge);
}

View File

@ -9,7 +9,7 @@ namespace Syntax
struct PLabel {
float inputWeight;
const TargetPhraseCollection *translations;
TargetPhraseCollection::shared_ptr translations;
};
} // Syntax

View File

@ -32,9 +32,10 @@ boost::shared_ptr<RuleTrie> OovHandler<RuleTrie>::SynthesizeRuleTrie(
// TODO Check ownership and fix any leaks.
Word *tgtLHS = SynthesizeTargetLhs(targetLhsStr);
TargetPhrase *tp = SynthesizeTargetPhrase(oov, *srcPhrase, *tgtLHS, prob);
TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(
*trie, *srcPhrase, *tp, NULL); // TODO Check NULL is valid argument
tpc.Add(tp);
TargetPhraseCollection::shared_ptr tpc;
tpc= GetOrCreateTargetPhraseCollection(*trie, *srcPhrase, *tp, NULL);
// TODO Check NULL is valid argument
tpc->Add(tp);
}
}

View File

@ -132,9 +132,9 @@ void RecursiveCYKPlusParser<Callback>::AddAndExtend(
m_hyperedge.tail.push_back(const_cast<PVertex *>(&vertex));
// Add target phrase collection (except if rule is empty or unary).
const TargetPhraseCollection &tpc = node.GetTargetPhraseCollection();
if (!tpc.IsEmpty() && !IsNonLexicalUnary(m_hyperedge)) {
m_hyperedge.label.translations = &tpc;
TargetPhraseCollection::shared_ptr tpc = node.GetTargetPhraseCollection();
if (!tpc->IsEmpty() && !IsNonLexicalUnary(m_hyperedge)) {
m_hyperedge.label.translations = tpc;
(*m_callback)(m_hyperedge, end);
}

View File

@ -38,8 +38,8 @@ Scope3Parser<Callback>::~Scope3Parser()
}
template<typename Callback>
void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
Callback &callback)
void Scope3Parser<Callback>::
EnumerateHyperedges(const WordsRange &range, Callback &callback)
{
const std::size_t start = range.GetStartPos();
const std::size_t end = range.GetEndPos();
@ -64,8 +64,7 @@ void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
// Ask the grammar for the mapping from label sequences to target phrase
// collections for this pattern.
const RuleTrie::Node::LabelMap &labelMap =
patNode->m_node->GetLabelMap();
const RuleTrie::Node::LabelMap &labelMap = patNode->m_node->GetLabelMap();
// For each label sequence, search the lattice for the set of PHyperedge
// tails.
@ -73,7 +72,7 @@ void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
RuleTrie::Node::LabelMap::const_iterator q = labelMap.begin();
for (; q != labelMap.end(); ++q) {
const std::vector<int> &labelSeq = q->first;
const TargetPhraseCollection &tpc = q->second;
TargetPhraseCollection::shared_ptr tpc = q->second;
// For many label sequences there won't be any corresponding paths through
// the lattice. As an optimisation, we use m_quickCheckTable to test
// for this and we don't begin a search if there are no paths to find.

View File

@ -6,7 +6,7 @@
#include "moses/Syntax/PHyperedge.h"
#include "TailLattice.h"
#include "moses/TargetPhraseCollection.h"
namespace Moses
{
namespace Syntax
@ -25,13 +25,14 @@ public:
, m_key(key)
, m_ranges(ranges) {}
void Search(const std::vector<int> &labels, const TargetPhraseCollection &tpc,
void Search(const std::vector<int> &labels,
const TargetPhraseCollection::shared_ptr tpc,
Callback &callback) {
m_labels = &labels;
m_matchCB = &callback;
m_hyperedge.head = 0;
m_hyperedge.tail.clear();
m_hyperedge.label.translations = &tpc;
m_hyperedge.label.translations = tpc;
SearchInner(0, 0, 0);
}

View File

@ -28,9 +28,10 @@ public:
private:
friend class RuleTrieCreator;
virtual TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target,
const Word *sourceLHS) = 0;
virtual TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection(const Phrase &source,
const TargetPhrase &target,
const Word *sourceLHS) = 0;
virtual void SortAndPrune(std::size_t) = 0;
};

View File

@ -33,7 +33,7 @@ void RuleTrieCYKPlus::Node::Prune(std::size_t tableLimit)
}
// prune TargetPhraseCollection in this node
m_targetPhraseCollection.Prune(true, tableLimit);
m_targetPhraseCollection->Prune(true, tableLimit);
}
void RuleTrieCYKPlus::Node::Sort(std::size_t tableLimit)
@ -49,7 +49,7 @@ void RuleTrieCYKPlus::Node::Sort(std::size_t tableLimit)
}
// prune TargetPhraseCollection in this node
m_targetPhraseCollection.Sort(true, tableLimit);
m_targetPhraseCollection->Sort(true, tableLimit);
}
RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetOrCreateChild(
@ -86,8 +86,11 @@ const RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetNonTerminalChild(
return (p == m_nonTermMap.end()) ? NULL : &p->second;
}
TargetPhraseCollection &RuleTrieCYKPlus::GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
TargetPhraseCollection::shared_ptr
RuleTrieCYKPlus::
GetOrCreateTargetPhraseCollection(const Phrase &source,
const TargetPhrase &target,
const Word *sourceLHS)
{
Node &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetTargetPhraseCollection();

View File

@ -38,7 +38,7 @@ public:
}
bool HasRules() const {
return !m_targetPhraseCollection.IsEmpty();
return !m_targetPhraseCollection->IsEmpty();
}
void Prune(std::size_t tableLimit);
@ -50,11 +50,13 @@ public:
const Node *GetChild(const Word &sourceTerm) const;
const Node *GetNonTerminalChild(const Word &targetNonTerm) const;
const TargetPhraseCollection &GetTargetPhraseCollection() const {
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection() const {
return m_targetPhraseCollection;
}
TargetPhraseCollection &GetTargetPhraseCollection() {
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection() {
return m_targetPhraseCollection;
}
@ -66,10 +68,12 @@ public:
return m_nonTermMap;
}
Node() : m_targetPhraseCollection(new TargetPhraseCollection) {}
private:
SymbolMap m_sourceTermMap;
SymbolMap m_nonTermMap;
TargetPhraseCollection m_targetPhraseCollection;
TargetPhraseCollection::shared_ptr m_targetPhraseCollection;
};
RuleTrieCYKPlus(const RuleTableFF *ff) : RuleTrie(ff) {}
@ -81,8 +85,9 @@ public:
bool HasPreterminalRule(const Word &) const;
private:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection
(const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
Node &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
const Word *sourceLHS);

View File

@ -21,8 +21,9 @@ protected:
// Provide access to RuleTrie's private GetOrCreateTargetPhraseCollection
// function.
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
RuleTrie &trie, const Phrase &source, const TargetPhrase &target,
TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection
( RuleTrie &trie, const Phrase &source, const TargetPhrase &target,
const Word *sourceLHS) {
return trie.GetOrCreateTargetPhraseCollection(source, target, sourceLHS);
}

View File

@ -125,9 +125,10 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
trie, sourcePhrase, *targetPhrase, sourceLHS);
phraseColl.Add(targetPhrase);
TargetPhraseCollection::shared_ptr phraseColl
= GetOrCreateTargetPhraseCollection(trie, sourcePhrase,
*targetPhrase, sourceLHS);
phraseColl->Add(targetPhrase);
// not implemented correctly in memory pt. just delete it for now
delete sourceLHS;

View File

@ -33,7 +33,7 @@ void RuleTrieScope3::Node::Prune(std::size_t tableLimit)
// Prune TargetPhraseCollections at this node.
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
p->second.Prune(true, tableLimit);
p->second->Prune(true, tableLimit);
}
}
@ -50,7 +50,7 @@ void RuleTrieScope3::Node::Sort(std::size_t tableLimit)
// Sort TargetPhraseCollections at this node.
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
p->second.Sort(true, tableLimit);
p->second->Sort(true, tableLimit);
}
}
@ -75,9 +75,10 @@ RuleTrieScope3::Node *RuleTrieScope3::Node::GetOrCreateNonTerminalChild(
return m_gapNode;
}
TargetPhraseCollection &
RuleTrieScope3::Node::GetOrCreateTargetPhraseCollection(
const TargetPhrase &target)
TargetPhraseCollection::shared_ptr
RuleTrieScope3::
Node::
GetOrCreateTargetPhraseCollection(const TargetPhrase &target)
{
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
const std::size_t rank = alignmentInfo.GetSize();
@ -94,12 +95,16 @@ RuleTrieScope3::Node::GetOrCreateTargetPhraseCollection(
const Word &targetNonTerm = target.GetWord(targetNonTermIndex);
vec.push_back(InsertLabel(i++, targetNonTerm));
}
return m_labelMap[vec];
TargetPhraseCollection::shared_ptr& ret = m_labelMap[vec];
if (!ret) ret.reset(new TargetPhraseCollection);
return ret;
}
TargetPhraseCollection &RuleTrieScope3::GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
TargetPhraseCollection::shared_ptr
RuleTrieScope3::
GetOrCreateTargetPhraseCollection(const Phrase &source,
const TargetPhrase &target,
const Word *sourceLHS)
{
Node &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetOrCreateTargetPhraseCollection(target);

View File

@ -35,7 +35,7 @@ public:
SymbolEqualityPred> TerminalMap;
typedef boost::unordered_map<std::vector<int>,
TargetPhraseCollection> LabelMap;
TargetPhraseCollection::shared_ptr> LabelMap;
~Node() {
delete m_gapNode;
@ -61,8 +61,8 @@ public:
Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const TargetPhrase &);
TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection(const TargetPhrase &);
bool IsLeaf() const {
return m_terminalMap.empty() && m_gapNode == NULL;
@ -106,8 +106,10 @@ public:
bool HasPreterminalRule(const Word &) const;
private:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection(const Phrase &source,
const TargetPhrase &target,
const Word *sourceLHS);
Node &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
const Word *sourceLHS);

View File

@ -17,7 +17,7 @@ struct PVertex;
struct SHyperedgeBundle {
float inputWeight;
std::vector<const SVertexStack*> stacks;
const TargetPhraseCollection *translations;
TargetPhraseCollection::shared_ptr translations;
friend void swap(SHyperedgeBundle &x, SHyperedgeBundle &y) {
using std::swap;

View File

@ -17,9 +17,9 @@ void GlueRuleSynthesizer::SynthesizeRule(const InputTree::Node &node)
const Word &sourceLhs = node.pvertex.symbol;
boost::scoped_ptr<Phrase> sourceRhs(SynthesizeSourcePhrase(node));
TargetPhrase *tp = SynthesizeTargetPhrase(node, *sourceRhs);
TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(
m_ruleTrie, sourceLhs, *sourceRhs);
tpc.Add(tp);
TargetPhraseCollection::shared_ptr tpc
= GetOrCreateTargetPhraseCollection(m_ruleTrie, sourceLhs, *sourceRhs);
tpc->Add(tp);
}
Phrase *GlueRuleSynthesizer::SynthesizeSourcePhrase(const InputTree::Node &node)

View File

@ -48,11 +48,11 @@ public:
const Node *GetChild(const HyperPath::NodeSeq &) const;
const TargetPhraseCollection &GetTargetPhraseCollection() const
const TargetPhraseCollection::shared_ptr GetTargetPhraseCollection() const
return m_targetPhraseCollection;
}
TargetPhraseCollection &GetTargetPhraseCollection()
TargetPhraseCollection::shared_ptr GetTargetPhraseCollection()
return m_targetPhraseCollection;
}
@ -76,7 +76,7 @@ const Node &GetRootNode() const
private:
friend class RuleTrieCreator;
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(
const Word &sourceLHS, const Phrase &sourceRHS);
Node &GetOrCreateNode(const Phrase &sourceRHS);

View File

@ -61,7 +61,7 @@ void RuleMatcherSCFG<Callback>::Match(const InputTree::Node &inNode,
if (candidate.pvertex.span.GetEndPos() == inNode.pvertex.span.GetEndPos()) {
// Check if the trie node has any rules with a LHS that match inNode.
const Word &lhs = inNode.pvertex.symbol;
const TargetPhraseCollection *tpc =
TargetPhraseCollection::shared_ptr tpc =
newTrieNode.GetTargetPhraseCollection(lhs);
if (tpc) {
m_hyperedge.label.translations = tpc;

View File

@ -35,7 +35,7 @@ void RuleTrie::Node::Prune(std::size_t tableLimit)
// Prune TargetPhraseCollections at this node.
for (TPCMap::iterator p = m_targetPhraseCollections.begin();
p != m_targetPhraseCollections.end(); ++p) {
p->second.Prune(true, tableLimit);
p->second->Prune(true, tableLimit);
}
}
@ -54,17 +54,21 @@ void RuleTrie::Node::Sort(std::size_t tableLimit)
// Sort TargetPhraseCollections at this node.
for (TPCMap::iterator p = m_targetPhraseCollections.begin();
p != m_targetPhraseCollections.end(); ++p) {
p->second.Sort(true, tableLimit);
p->second->Sort(true, tableLimit);
}
}
RuleTrie::Node *RuleTrie::Node::GetOrCreateChild(
const Word &sourceTerm)
RuleTrie::Node*
RuleTrie::Node::
GetOrCreateChild(const Word &sourceTerm)
{
return &m_sourceTermMap[sourceTerm];
}
RuleTrie::Node *RuleTrie::Node::GetOrCreateNonTerminalChild(const Word &targetNonTerm)
RuleTrie::Node *
RuleTrie::
Node::
GetOrCreateNonTerminalChild(const Word &targetNonTerm)
{
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
"Not a non-terminal: " << targetNonTerm);
@ -72,42 +76,52 @@ RuleTrie::Node *RuleTrie::Node::GetOrCreateNonTerminalChild(const Word &targetNo
return &m_nonTermMap[targetNonTerm];
}
TargetPhraseCollection &RuleTrie::Node::GetOrCreateTargetPhraseCollection(
const Word &sourceLHS)
TargetPhraseCollection::shared_ptr
RuleTrie::
Node::
GetOrCreateTargetPhraseCollection(const Word &sourceLHS)
{
UTIL_THROW_IF2(!sourceLHS.IsNonTerminal(),
"Not a non-terminal: " << sourceLHS);
return m_targetPhraseCollections[sourceLHS];
TargetPhraseCollection::shared_ptr& foo
= m_targetPhraseCollections[sourceLHS];
if (!foo) foo.reset(new TargetPhraseCollection);
return foo;
}
const RuleTrie::Node *RuleTrie::Node::GetChild(
const Word &sourceTerm) const
RuleTrie::Node const*
RuleTrie::
Node::
GetChild(const Word &sourceTerm) const
{
UTIL_THROW_IF2(sourceTerm.IsNonTerminal(),
"Not a terminal: " << sourceTerm);
UTIL_THROW_IF2(sourceTerm.IsNonTerminal(), "Not a terminal: " << sourceTerm);
SymbolMap::const_iterator p = m_sourceTermMap.find(sourceTerm);
return (p == m_sourceTermMap.end()) ? NULL : &p->second;
}
const RuleTrie::Node *RuleTrie::Node::GetNonTerminalChild(
const Word &targetNonTerm) const
RuleTrie::Node const*
RuleTrie::
Node::
GetNonTerminalChild(const Word &targetNonTerm) const
{
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
"Not a non-terminal: " << targetNonTerm);
SymbolMap::const_iterator p = m_nonTermMap.find(targetNonTerm);
return (p == m_nonTermMap.end()) ? NULL : &p->second;
}
TargetPhraseCollection &RuleTrie::GetOrCreateTargetPhraseCollection(
const Word &sourceLHS, const Phrase &sourceRHS)
TargetPhraseCollection::shared_ptr
RuleTrie::
GetOrCreateTargetPhraseCollection
( const Word &sourceLHS, const Phrase &sourceRHS )
{
Node &currNode = GetOrCreateNode(sourceRHS);
return currNode.GetOrCreateTargetPhraseCollection(sourceLHS);
}
RuleTrie::Node &RuleTrie::GetOrCreateNode(const Phrase &sourceRHS)
RuleTrie::Node &
RuleTrie::
GetOrCreateNode(const Phrase &sourceRHS)
{
const std::size_t size = sourceRHS.GetSize();

View File

@ -32,7 +32,7 @@ public:
typedef boost::unordered_map<Word, Node, SymbolHasher,
SymbolEqualityPred> SymbolMap;
typedef boost::unordered_map<Word, TargetPhraseCollection,
typedef boost::unordered_map<Word, TargetPhraseCollection::shared_ptr,
SymbolHasher, SymbolEqualityPred> TPCMap;
bool IsLeaf() const {
@ -48,15 +48,18 @@ public:
Node *GetOrCreateChild(const Word &sourceTerm);
Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const Word &);
TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(const Word &);
const Node *GetChild(const Word &sourceTerm) const;
const Node *GetNonTerminalChild(const Word &targetNonTerm) const;
const TargetPhraseCollection *GetTargetPhraseCollection(
const Word &sourceLHS) const {
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection(const Word &sourceLHS) const {
TPCMap::const_iterator p = m_targetPhraseCollections.find(sourceLHS);
return p == m_targetPhraseCollections.end() ? 0 : &(p->second);
if (p != m_targetPhraseCollections.end())
return p->second;
else
return TargetPhraseCollection::shared_ptr();
}
// FIXME IS there any reason to distinguish these two for T2S?
@ -83,8 +86,9 @@ public:
private:
friend class RuleTrieCreator;
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const Word &sourceLHS, const Phrase &sourceRHS);
TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection
(const Word &sourceLHS, const Phrase &sourceRHS);
Node &GetOrCreateNode(const Phrase &sourceRHS);

View File

@ -21,7 +21,7 @@ protected:
// Provide access to RuleTrie's private
// GetOrCreateTargetPhraseCollection function.
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(
RuleTrie &trie, const Word &sourceLHS, const Phrase &sourceRHS) {
return trie.GetOrCreateTargetPhraseCollection(sourceLHS, sourceRHS);
}

View File

@ -55,7 +55,9 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
std::vector<float> scoreVector;
StringPiece line;
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
int noflags = double_conversion::StringToDoubleConverter::NO_FLAGS;
double_conversion::StringToDoubleConverter
converter(noflags, NAN, NAN, "inf", "nan");
while(true) {
try {
@ -132,9 +134,9 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
trie, *sourceLHS, sourcePhrase);
phraseColl.Add(targetPhrase);
TargetPhraseCollection::shared_ptr phraseColl
= GetOrCreateTargetPhraseCollection(trie, *sourceLHS, sourcePhrase);
phraseColl->Add(targetPhrase);
// not implemented correctly in memory pt. just delete it for now
delete sourceLHS;

View File

@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <iostream>
#include "TargetPhrase.h"
#include "Util.h"
#include <boost/shared_ptr.hpp>
namespace Moses
{
@ -43,6 +44,8 @@ public:
// iters
typedef CollType::iterator iterator;
typedef CollType::const_iterator const_iterator;
typedef boost::shared_ptr<TargetPhraseCollection> shared_ptr;
typedef boost::shared_ptr<TargetPhraseCollection const> shared_const_ptr;
TargetPhrase const*
operator[](size_t const i) const {
@ -127,6 +130,9 @@ protected:
std::vector<Phrase> m_sourcePhrases;
public:
typedef boost::shared_ptr<TargetPhraseCollectionWithSourcePhrase> shared_ptr;
typedef boost::shared_ptr<TargetPhraseCollectionWithSourcePhrase const> shared_const_ptr;
const std::vector<Phrase> &GetSourcePhrases() const {
return m_sourcePhrases;
}

View File

@ -167,10 +167,10 @@ void ChartRuleLookupManagerMemory::AddAndExtend(
size_t endPos)
{
const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
TargetPhraseCollection::shared_ptr tpc = node->GetTargetPhraseCollection();
// add target phrase collection (except if rule is empty or a unary non-terminal rule)
if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
if (!tpc->IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
m_completedRules[endPos].Add(*tpc, m_stackVec, m_stackScores, *m_outColl);
}
// get all further extensions of rule (until reaching end of sentence or max-chart-span)

View File

@ -167,10 +167,11 @@ void ChartRuleLookupManagerMemoryPerSentence::AddAndExtend(
size_t endPos)
{
const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
TargetPhraseCollection::shared_ptr tpc
= node->GetTargetPhraseCollection();
// add target phrase collection (except if rule is empty or a unary non-terminal rule)
if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
if (!tpc->IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
m_completedRules[endPos].Add(*tpc, m_stackVec, m_stackScores, *m_outColl);
}
// get all further extensions of rule (until reaching end of sentence or max-chart-span)

View File

@ -64,11 +64,12 @@ ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk(
ChartRuleLookupManagerOnDisk::~ChartRuleLookupManagerOnDisk()
{
std::map<uint64_t, const TargetPhraseCollection*>::const_iterator iterCache;
for (iterCache = m_cache.begin(); iterCache != m_cache.end(); ++iterCache) {
delete iterCache->second;
}
m_cache.clear();
// not needed any more due to the switch to shared pointers
// std::map<uint64_t, TargetPhraseCollection::shared_ptr >::const_iterator iterCache;
// for (iterCache = m_cache.begin(); iterCache != m_cache.end(); ++iterCache) {
// iterCache->second.reset();
// }
// m_cache.clear();
RemoveAllInColl(m_expandableDottedRuleListVec);
RemoveAllInColl(m_sourcePhraseNode);
@ -236,14 +237,16 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
if (sourceLHSBerkeleyDb == NULL)
continue;
const TargetPhraseCollection *targetPhraseCollection = NULL;
const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper);
TargetPhraseCollection::shared_ptr targetPhraseCollection;
const OnDiskPt::PhraseNode *node
= prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper);
if (node) {
uint64_t tpCollFilePos = node->GetValue();
std::map<uint64_t, const TargetPhraseCollection*>::const_iterator iterCache = m_cache.find(tpCollFilePos);
std::map<uint64_t, TargetPhraseCollection::shared_ptr >::const_iterator iterCache = m_cache.find(tpCollFilePos);
if (iterCache == m_cache.end()) {
const OnDiskPt::TargetPhraseCollection *tpcollBerkeleyDb = node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper);
OnDiskPt::TargetPhraseCollection::shared_ptr tpcollBerkeleyDb
= node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper);
std::vector<float> weightT = staticData.GetWeights(&m_dictionary);
targetPhraseCollection
@ -254,7 +257,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
,m_dbWrapper.GetVocab()
,true);
delete tpcollBerkeleyDb;
tpcollBerkeleyDb.reset();
m_cache[tpCollFilePos] = targetPhraseCollection;
} else {
// just get out of cache

View File

@ -55,7 +55,7 @@ private:
const std::vector<FactorType> &m_inputFactorsVec;
const std::vector<FactorType> &m_outputFactorsVec;
std::vector<DottedRuleStackOnDisk*> m_expandableDottedRuleListVec;
std::map<uint64_t, const TargetPhraseCollection*> m_cache;
std::map<uint64_t, TargetPhraseCollection::shared_ptr > m_cache;
std::list<const OnDiskPt::PhraseNode*> m_sourcePhraseNode;
};

View File

@ -48,7 +48,7 @@ ChartRuleLookupManagerSkeleton::ChartRuleLookupManagerSkeleton(
ChartRuleLookupManagerSkeleton::~ChartRuleLookupManagerSkeleton()
{
RemoveAllInColl(m_tpColl);
// RemoveAllInColl(m_tpColl);
}
void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
@ -58,7 +58,7 @@ void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
{
//m_tpColl.push_back(TargetPhraseCollection());
//TargetPhraseCollection &tpColl = m_tpColl.back();
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection);
m_tpColl.push_back(tpColl);
const WordsRange &range = inputPath.GetWordsRange();
@ -73,7 +73,9 @@ void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
outColl.Add(*tpColl, m_stackVec, range);
}
TargetPhrase *ChartRuleLookupManagerSkeleton::CreateTargetPhrase(const Word &sourceWord) const
TargetPhrase *
ChartRuleLookupManagerSkeleton::
CreateTargetPhrase(const Word &sourceWord) const
{
// create a target phrase from the 1st word of the source, prefix with 'ChartManagerSkeleton:'
string str = sourceWord.GetFactor(0)->GetString().as_string();

View File

@ -49,7 +49,7 @@ private:
TargetPhrase *CreateTargetPhrase(const Word &sourceWord) const;
StackVec m_stackVec;
std::vector<TargetPhraseCollection*> m_tpColl;
std::vector<TargetPhraseCollection::shared_ptr > m_tpColl;
const SkeletonPT &m_skeletonPT;
};

View File

@ -119,4 +119,4 @@ private:
} // namespace Moses
#endif
#endif

View File

@ -107,14 +107,15 @@ void PhraseDictionaryCompact::Load()
// }
// };
const TargetPhraseCollection*
TargetPhraseCollection::shared_ptr
PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &sourcePhrase) const
{
TargetPhraseCollection::shared_ptr ret;
// There is no souch source phrase if source phrase is longer than longest
// observed source phrase during compilation
if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
return NULL;
return ret;
// Retrieve target phrase collection from phrase table
TargetPhraseVectorPtr decodedPhraseColl
@ -122,7 +123,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &s
if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
TargetPhraseCollection* phraseColl = new TargetPhraseCollection();
TargetPhraseCollection::shared_ptr phraseColl(new TargetPhraseCollection);
// Score phrases and if possible apply ttable_limit
TargetPhraseVector::iterator nth =
@ -139,7 +140,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &s
return phraseColl;
} else
return NULL;
return ret;
}
TargetPhraseVectorPtr
@ -163,7 +164,7 @@ PhraseDictionaryCompact::~PhraseDictionaryCompact()
//TO_STRING_BODY(PhraseDictionaryCompact)
void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection* tpc)
void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
{
if(!m_sentenceCache.get())
m_sentenceCache.reset(new PhraseCache());
@ -179,12 +180,13 @@ void PhraseDictionaryCompact::CleanUpAfterSentenceProcessing(const InputType &so
m_sentenceCache.reset(new PhraseCache());
m_phraseDecoder->PruneCache();
for(PhraseCache::iterator it = m_sentenceCache->begin();
it != m_sentenceCache->end(); it++)
delete *it;
// for(PhraseCache::iterator it = m_sentenceCache->begin();
// it != m_sentenceCache->end(); it++)
// it->reset();
PhraseCache temp;
temp.swap(*m_sentenceCache);
// PhraseCache temp;
// temp.swap(*m_sentenceCache);
m_sentenceCache->clear();
ReduceCache();
}

View File

@ -51,7 +51,7 @@ protected:
bool m_inMemory;
bool m_useAlignmentInfo;
typedef std::vector<TargetPhraseCollection*> PhraseCache;
typedef std::vector<TargetPhraseCollection::shared_ptr > PhraseCache;
typedef boost::thread_specific_ptr<PhraseCache> SentenceCache;
static SentenceCache m_sentenceCache;
@ -69,12 +69,12 @@ public:
void Load();
const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const;
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const;
TargetPhraseVectorPtr GetTargetPhraseCollectionRaw(const Phrase &source) const;
void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase);
void CacheForCleanup(TargetPhraseCollection* tpc);
void CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
void CleanUpAfterSentenceProcessing(const InputType &source);
virtual ChartRuleLookupManager *CreateRuleLookupManager(

View File

@ -35,14 +35,15 @@ namespace Moses
{
std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl;
CacheColl::~CacheColl()
{
for (iterator iter = begin(); iter != end(); ++iter) {
std::pair<const TargetPhraseCollection*, clock_t> &key = iter->second;
const TargetPhraseCollection *tps = key.first;
delete tps;
}
}
// CacheColl::~CacheColl()
// {
// // not needed any more since the switch to shared pointers
// // for (iterator iter = begin(); iter != end(); ++iter) {
// // std::pair<TargetPhraseCollection::shared_ptr , clock_t> &key = iter->second;
// // TargetPhraseCollection::shared_ptr tps = key.first;
// // delete tps;
// // }
// }
PhraseDictionary::PhraseDictionary(const std::string &line, bool registerNow)
: DecodeFeature(line, registerNow)
@ -60,9 +61,12 @@ ProvidesPrefixCheck() const
return false;
}
const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
TargetPhraseCollection::shared_ptr
PhraseDictionary::
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
{
const TargetPhraseCollection *ret;
TargetPhraseCollection::shared_ptr ret;
typedef std::pair<TargetPhraseCollection::shared_ptr , clock_t> entry;
if (m_maxCacheSize) {
CacheColl &cache = GetCache();
@ -74,18 +78,14 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(
if (iter == cache.end()) {
// not in cache, need to look up from phrase table
ret = GetTargetPhraseCollectionNonCacheLEGACY(src);
if (ret) {
ret = new TargetPhraseCollection(*ret);
if (ret) { // make a copy
ret.reset(new TargetPhraseCollection(*ret));
}
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
cache[hash] = value;
cache[hash] = entry(ret, clock());
} else {
// in cache. just use it
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
value.second = clock();
ret = value.first;
iter->second.second = clock();
ret = iter->second.first;
}
} else {
// don't use cache. look up from phrase table
@ -95,7 +95,7 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(
return ret;
}
TargetPhraseCollection const *
TargetPhraseCollection::shared_ptr
PhraseDictionary::
GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const
{
@ -103,7 +103,7 @@ GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const
}
TargetPhraseCollectionWithSourcePhrase const*
TargetPhraseCollectionWithSourcePhrase::shared_ptr
PhraseDictionary::
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const
{
@ -140,14 +140,14 @@ SetFeaturesToApply()
}
// tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more
void
PhraseDictionary::
Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const
{
// do nothing by default
return;
}
// // tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more
// void
// PhraseDictionary::
// Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const
// {
// // do nothing by default
// return;
// }
bool
PhraseDictionary::
@ -170,7 +170,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
}
const Phrase &phrase = inputPath.GetPhrase();
const TargetPhraseCollection *targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase);
TargetPhraseCollection::shared_ptr targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase);
inputPath.SetTargetPhrases(*this, targetPhrases, NULL);
}
}
@ -180,7 +180,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
//void PhraseDictionary::SaveCache() const
//{
// CacheColl &cache = GetCache();
// for( std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter,
// for( std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iter,
// iter != cache.end(),
// iter++ ) {
//
@ -191,10 +191,10 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
//void PhraseDictionary::LoadCache() const
//{
// CacheColl &cache = GetCache();
// std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter;
// std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iter;
// iter = cache.begin();
// while( iter != cache.end() ) {
// std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iterRemove = iter++;
// std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iterRemove = iter++;
// delete iterRemove->second.first;
// cache.erase(iterRemove);
// }
@ -225,11 +225,12 @@ void PhraseDictionary::ReduceCache() const
while( iter != cache.end() ) {
if (iter->second.second < cutoffLastUsedTime) {
CacheColl::iterator iterRemove = iter++;
delete iterRemove->second.first;
// delete iterRemove->second.first;
cache.erase(iterRemove);
} else iter++;
}
VERBOSE(2,"Reduced persistent translation option cache in " << reduceCacheTime << " seconds." << std::endl);
VERBOSE(2,"Reduced persistent translation option cache in "
<< reduceCacheTime << " seconds." << std::endl);
}
CacheColl &PhraseDictionary::GetCache() const
@ -265,8 +266,8 @@ bool PhraseDictionary::SatisfyBackoff(const InputPath &inputPath) const
// lookup translation only if no other translations
InputPath::TargetPhrases::const_iterator iter;
for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) {
const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second;
const TargetPhraseCollection *tpCollPrev = temp.first;
const std::pair<TargetPhraseCollection::shared_ptr , const void*> &temp = iter->second;
TargetPhraseCollection::shared_ptr tpCollPrev = temp.first;
if (tpCollPrev && tpCollPrev->GetSize()) {
// already have translation from another pt. Don't create translations

View File

@ -55,15 +55,18 @@ class ChartCellCollectionBase;
class ChartRuleLookupManager;
class ChartParser;
class CacheColl : public boost::unordered_map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >
{
// 1st = hash of source phrase/ address of phrase-table node
// 2nd = all translations
// 3rd = time of last access
// typedef std::pair<TargetPhraseCollection::shared_ptr, clock_t> TPCollLastUse;
typedef std::pair<TargetPhraseCollection::shared_ptr, clock_t> CacheCollEntry;
typedef boost::unordered_map<size_t, CacheCollEntry> CacheColl;
// class CacheColl : public boost::unordered_map<size_t, TPCollLastUse>
// {
// // 1st = hash of source phrase/ address of phrase-table node
// // 2nd = all translations
// // 3rd = time of last access
public:
~CacheColl();
};
// public:
// ~CacheColl();
// };
/**
* Abstract base class for phrase dictionaries (tables).
@ -95,9 +98,9 @@ public:
return m_id;
}
virtual
void
Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const;
// virtual
// void
// Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const;
/// return true if phrase table entries starting with /phrase/
// exist in the table.
@ -111,24 +114,23 @@ public:
//! find list of translations that can translates src. Only for phrase input
public:
virtual
TargetPhraseCollection const *
virtual TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
virtual
TargetPhraseCollection const *
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const {
virtual TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask,
Phrase const& src) const
{
return GetTargetPhraseCollectionLEGACY(src);
}
virtual
void
virtual void
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
virtual
void
GetTargetPhraseCollectionBatch(ttasksptr const& ttask,
const InputPathList &inputPathQueue) const {
virtual void
GetTargetPhraseCollectionBatch
(ttasksptr const& ttask, InputPathList const& inputPathQueue) const
{
GetTargetPhraseCollectionBatch(inputPathQueue);
}
@ -157,7 +159,9 @@ public:
// LEGACY
//! find list of translations that can translates a portion of src. Used by confusion network decoding
virtual const TargetPhraseCollectionWithSourcePhrase* GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const;
virtual
TargetPhraseCollectionWithSourcePhrase::shared_ptr
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const;
protected:
static std::vector<PhraseDictionary*> s_staticColl;
@ -184,7 +188,10 @@ protected:
mutable boost::scoped_ptr<CacheColl> m_cache;
#endif
virtual const TargetPhraseCollection *GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const;
virtual
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const;
void ReduceCache() const;
protected:

View File

@ -150,15 +150,15 @@ void PhraseDictionaryDynamicCacheBased::InitializeForInput(ttasksptr const& ttas
ReduceCache();
}
const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const
TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
TargetPhraseCollection* tpc = NULL;
TargetPhraseCollection::shared_ptr tpc;
cacheMap::const_iterator it = m_cacheTM.find(source);
if(it != m_cacheTM.end()) {
tpc = new TargetPhraseCollection(*(it->second).first);
tpc.reset(new TargetPhraseCollection(*(it->second).first));
std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
@ -174,15 +174,15 @@ const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhrase
return tpc;
}
const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionLEGACY(Phrase const &src) const
TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionLEGACY(Phrase const &src) const
{
const TargetPhraseCollection *ret = GetTargetPhraseCollection(src);
TargetPhraseCollection::shared_ptr ret = GetTargetPhraseCollection(src);
return ret;
}
const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
{
const TargetPhraseCollection *ret = GetTargetPhraseCollection(src);
TargetPhraseCollection::shared_ptr ret = GetTargetPhraseCollection(src);
return ret;
}
@ -366,7 +366,7 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)
// and then add new entry
TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first;
TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second;
const Phrase* p_ptr = NULL;
TargetPhrase* tp_ptr = NULL;
@ -397,7 +397,7 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)
if (tpc->GetSize() == 0) {
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
ac->clear();
delete tpc;
tpc.reset();
delete ac;
m_cacheTM.erase(sp);
}
@ -451,14 +451,14 @@ void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp)
//sp is found
TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first;
TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second;
m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
ac->clear();
delete tpc;
tpc.reset();
delete ac;
m_cacheTM.erase(sp);
} else {
@ -558,7 +558,7 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a
// and then add new entry
TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first;
TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second;
// const TargetPhrase* p_ptr = NULL;
const Phrase* p_ptr = NULL;
@ -599,7 +599,7 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a
// create target collection
// we have to create new target collection age pair and add new entry to target collection age pair
TargetPhraseCollection* tpc = new TargetPhraseCollection();
TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
AgeCollection* ac = new AgeCollection();
m_cacheTM.insert(make_pair(sp,make_pair(tpc,ac)));
@ -629,13 +629,13 @@ void PhraseDictionaryDynamicCacheBased::Decay()
void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp)
{
VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) sp:|" << sp << "|" << std::endl);
cacheMap::const_iterator it = m_cacheTM.find(sp);
cacheMap::iterator it = m_cacheTM.find(sp);
if (it != m_cacheTM.end()) {
VERBOSE(3,"found:|" << sp << "|" << std::endl);
//sp is found
TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first;
TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second;
//loop in inverted order to allow a correct deletion of std::vectors tpc and ac
@ -661,7 +661,7 @@ void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp)
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
(((*it).second).second)->clear();
delete ((*it).second).second;
delete ((*it).second).first;
((*it).second).first.reset();
m_cacheTM.erase(sp);
}
} else {
@ -703,11 +703,11 @@ void PhraseDictionaryDynamicCacheBased::Clear()
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
cacheMap::const_iterator it;
cacheMap::iterator it;
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
(((*it).second).second)->clear();
delete ((*it).second).second;
delete ((*it).second).first;
((*it).second).first.reset();
}
m_cacheTM.clear();
m_entries = 0;
@ -746,7 +746,7 @@ void PhraseDictionaryDynamicCacheBased::Print() const
cacheMap::const_iterator it;
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
std::string source = (it->first).ToString();
TargetPhraseCollection* tpc = (it->second).first;
TargetPhraseCollection::shared_ptr tpc = (it->second).first;
TargetPhraseCollection::iterator itr;
for(itr = tpc->begin(); itr != tpc->end(); itr++) {
std::string target = (*itr)->ToString();

View File

@ -53,7 +53,7 @@ class PhraseDictionaryDynamicCacheBased : public PhraseDictionary
{
typedef std::vector<unsigned int> AgeCollection;
typedef std::pair<TargetPhraseCollection*, AgeCollection*> TargetCollectionAgePair;
typedef std::pair<TargetPhraseCollection::shared_ptr , AgeCollection*> TargetCollectionAgePair;
typedef std::map<Phrase, TargetCollectionAgePair> cacheMap;
// data structure for the cache
@ -111,9 +111,14 @@ public:
void Load();
void Load(const std::string files);
const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &src) const;
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(Phrase const &src) const;
const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection(const Phrase &src) const;
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionLEGACY(Phrase const &src) const;
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
// for phrase-based model
// void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;

View File

@ -86,29 +86,32 @@ void PhraseDictionaryGroup::GetTargetPhraseCollectionBatch(
// Look up each input in each model
BOOST_FOREACH(InputPath* inputPath, inputPathQueue) {
const Phrase &phrase = inputPath->GetPhrase();
const TargetPhraseCollection* targetPhrases =
TargetPhraseCollection::shared_ptr targetPhrases =
this->GetTargetPhraseCollectionLEGACY(ttask, phrase);
inputPath->SetTargetPhrases(*this, targetPhrases, NULL);
}
}
const TargetPhraseCollection* PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY(
TargetPhraseCollection::shared_ptr PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY(
const Phrase& src) const
{
UTIL_THROW2("Don't call me without the translation task.");
}
const TargetPhraseCollection* PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY(
const ttasksptr& ttask, const Phrase& src) const
TargetPhraseCollection::shared_ptr
PhraseDictionaryGroup::
GetTargetPhraseCollectionLEGACY(const ttasksptr& ttask, const Phrase& src) const
{
TargetPhraseCollection* ret = CreateTargetPhraseCollection(ttask, src);
TargetPhraseCollection::shared_ptr ret
= CreateTargetPhraseCollection(ttask, src);
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
const_cast<PhraseDictionaryGroup*>(this)->CacheForCleanup(ret);
return ret;
}
TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
const ttasksptr& ttask, const Phrase& src) const
TargetPhraseCollection::shared_ptr
PhraseDictionaryGroup::
CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const
{
// Aggregation of phrases and the scores that will be applied to them
vector<TargetPhrase*> allPhrases;
@ -121,8 +124,8 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
// Collect phrases from this table
const PhraseDictionary& pd = *m_memberPDs[i];
const TargetPhraseCollection* ret_raw = pd.GetTargetPhraseCollectionLEGACY(
ttask, src);
TargetPhraseCollection::shared_ptr
ret_raw = pd.GetTargetPhraseCollectionLEGACY(ttask, src);
if (ret_raw != NULL) {
// Process each phrase from table
@ -162,7 +165,7 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
}
// Apply scores to phrases and add them to return collection
TargetPhraseCollection* ret = new TargetPhraseCollection();
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
const vector<FeatureFunction*> pd_feature_const(m_pdFeature);
BOOST_FOREACH(TargetPhrase* phrase, allPhrases) {
phrase->GetScoreBreakdown().Assign(this, allScores.find(phrase)->second);
@ -174,29 +177,33 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
return ret;
}
ChartRuleLookupManager *PhraseDictionaryGroup::CreateRuleLookupManager(
const ChartParser &, const ChartCellCollectionBase&, size_t)
ChartRuleLookupManager*
PhraseDictionaryGroup::
CreateRuleLookupManager(const ChartParser &,
const ChartCellCollectionBase&, size_t)
{
UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
}
//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
void PhraseDictionaryGroup::CacheForCleanup(TargetPhraseCollection* tpc)
void PhraseDictionaryGroup::CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
{
PhraseCache &ref = GetPhraseCache();
ref.push_back(tpc);
}
void PhraseDictionaryGroup::CleanUpAfterSentenceProcessing(
const InputType &source)
void
PhraseDictionaryGroup::
CleanUpAfterSentenceProcessing(const InputType &source)
{
PhraseCache &ref = GetPhraseCache();
for (PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
delete *it;
}
GetPhraseCache().clear();
// PhraseCache &ref = GetPhraseCache();
// for (PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
// delete *it;
// }
PhraseCache temp;
temp.swap(ref);
// PhraseCache temp;
// temp.swap(ref);
CleanUpComponentModels(source);
}

View File

@ -43,19 +43,20 @@ class PhraseDictionaryGroup: public PhraseDictionary
public:
PhraseDictionaryGroup(const std::string& line);
void Load();
TargetPhraseCollection* CreateTargetPhraseCollection(const ttasksptr& ttask,
TargetPhraseCollection::shared_ptr
CreateTargetPhraseCollection(const ttasksptr& ttask,
const Phrase& src) const;
std::vector<std::vector<float> > getWeights(size_t numWeights,
bool normalize) const;
void CacheForCleanup(TargetPhraseCollection* tpc);
void CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
void CleanUpAfterSentenceProcessing(const InputType& source);
void CleanUpComponentModels(const InputType& source);
// functions below override the base class
void GetTargetPhraseCollectionBatch(const ttasksptr& ttask,
const InputPathList &inputPathQueue) const;
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(
const Phrase& src) const;
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(
const ttasksptr& ttask, const Phrase& src) const;
void InitializeForInput(ttasksptr const& ttask) {
/* Don't do anything source specific here as this object is shared between threads.*/
@ -71,7 +72,7 @@ protected:
bool m_restrict;
std::vector<FeatureFunction*> m_pdFeature;
typedef std::vector<TargetPhraseCollection*> PhraseCache;
typedef std::vector<TargetPhraseCollection::shared_ptr > PhraseCache;
#ifdef WITH_THREADS
boost::shared_mutex m_lock_cache;
typedef std::map<boost::thread::id, PhraseCache> SentenceCache;

View File

@ -49,16 +49,17 @@ PhraseDictionaryMemory::PhraseDictionaryMemory(const std::string &line)
}
TargetPhraseCollection &PhraseDictionaryMemory::GetOrCreateTargetPhraseCollection(
const Phrase &source
, const TargetPhrase &target
, const Word *sourceLHS)
TargetPhraseCollection::shared_ptr
PhraseDictionaryMemory::
GetOrCreateTargetPhraseCollection(const Phrase &source,
const TargetPhrase &target,
const Word *sourceLHS)
{
PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetTargetPhraseCollection();
}
const TargetPhraseCollection*
TargetPhraseCollection::shared_ptr
PhraseDictionaryMemory::
GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const
{
@ -73,10 +74,10 @@ GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const
const Word& word = source.GetWord(pos);
currNode = currNode->GetChild(word);
if (currNode == NULL)
return NULL;
return TargetPhraseCollection::shared_ptr();
}
return &currNode->GetTargetPhraseCollection();
return currNode->GetTargetPhraseCollection();
}
PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase &source
@ -168,12 +169,11 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
lastWord.OnlyTheseFactors(m_inputFactors);
const PhraseDictionaryNodeMemory *ptNode = prevPtNode->GetChild(lastWord);
TargetPhraseCollection::shared_ptr targetPhrases;
if (ptNode) {
const TargetPhraseCollection &targetPhrases = ptNode->GetTargetPhraseCollection();
inputPath.SetTargetPhrases(*this, &targetPhrases, ptNode);
} else {
inputPath.SetTargetPhrases(*this, NULL, NULL);
}
targetPhrases = ptNode->GetTargetPhraseCollection();
}
inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
}
}
}

View File

@ -56,19 +56,23 @@ public:
std::size_t);
// only used by multi-model phrase table, and other meta-features
const TargetPhraseCollection *GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
void
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
TO_STRING();
protected:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
PhraseDictionaryNodeMemory &GetOrCreateNode(const Phrase &source
, const TargetPhrase &target
, const Word *sourceLHS);
TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection
(const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
PhraseDictionaryNodeMemory &
GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
const Word *sourceLHS);
void SortAndPrune();
PhraseDictionaryNodeMemory m_collection;

View File

@ -26,8 +26,10 @@ using namespace std;
namespace Moses
{
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
:PhraseDictionary(line, true)
PhraseDictionaryMultiModel::
PhraseDictionaryMultiModel(const std::string &line)
: PhraseDictionary(line, true)
{
ReadParameters();
@ -45,7 +47,8 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
}
}
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::string &line)
PhraseDictionaryMultiModel::
PhraseDictionaryMultiModel(int type, const std::string &line)
:PhraseDictionary(line, true)
{
if (type == 1) {
@ -56,7 +59,9 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::stri
}
}
void PhraseDictionaryMultiModel::SetParameter(const std::string& key, const std::string& value)
void
PhraseDictionaryMultiModel::
SetParameter(const std::string& key, const std::string& value)
{
if (key == "mode") {
m_mode = value;
@ -70,9 +75,9 @@ void PhraseDictionaryMultiModel::SetParameter(const std::string& key, const std:
}
}
PhraseDictionaryMultiModel::~PhraseDictionaryMultiModel()
{
}
PhraseDictionaryMultiModel::
~PhraseDictionaryMultiModel()
{ }
void PhraseDictionaryMultiModel::Load()
{
@ -88,18 +93,21 @@ void PhraseDictionaryMultiModel::Load()
}
}
const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
TargetPhraseCollection::shared_ptr
PhraseDictionaryMultiModel::
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
{
std::vector<std::vector<float> > multimodelweights = getWeights(m_numScoreComponents, true);
TargetPhraseCollection *ret = NULL;
std::vector<std::vector<float> > multimodelweights;
multimodelweights = getWeights(m_numScoreComponents, true);
TargetPhraseCollection::shared_ptr ret;
std::map<std::string,multiModelStatistics*>* allStats = new(std::map<std::string,multiModelStatistics*>);
std::map<std::string, multiModelStats*>* allStats;
allStats = new(std::map<std::string,multiModelStats*>);
CollectSufficientStatistics(src, allStats);
ret = CreateTargetPhraseCollectionLinearInterpolation(src, allStats, multimodelweights);
RemoveAllInMap(*allStats);
delete allStats;
delete allStats; // ??? Why the detour through malloc? UG
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
const_cast<PhraseDictionaryMultiModel*>(this)->CacheForCleanup(ret);
@ -107,16 +115,19 @@ const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollect
return ret;
}
void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const
void
PhraseDictionaryMultiModel::
CollectSufficientStatistics
(const Phrase& src, std::map<std::string, multiModelStats*>* allStats) const
{
for(size_t i = 0; i < m_numModels; ++i) {
const PhraseDictionary &pd = *m_pd[i];
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollectionLEGACY( src);
TargetPhraseCollection::shared_ptr ret_raw;
ret_raw = pd.GetTargetPhraseCollectionLEGACY(src);
if (ret_raw != NULL) {
TargetPhraseCollection::iterator iterTargetPhrase, iterLast;
TargetPhraseCollection::const_iterator iterTargetPhrase, iterLast;
if (m_tableLimit != 0 && ret_raw->GetSize() > m_tableLimit) {
iterLast = ret_raw->begin() + m_tableLimit;
} else {
@ -130,7 +141,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
std::string targetString = targetPhrase->GetStringRep(m_output);
if (allStats->find(targetString) == allStats->end()) {
multiModelStatistics * statistics = new multiModelStatistics;
multiModelStats * statistics = new multiModelStats;
statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
statistics->p.resize(m_numScoreComponents);
for(size_t j = 0; j < m_numScoreComponents; ++j) {
@ -149,7 +160,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
(*allStats)[targetString] = statistics;
}
multiModelStatistics * statistics = (*allStats)[targetString];
multiModelStats * statistics = (*allStats)[targetString];
for(size_t j = 0; j < m_numScoreComponents; ++j) {
statistics->p[j][i] = UntransformScore(raw_scores[j]);
@ -161,12 +172,17 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
}
}
TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollectionLinearInterpolation(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const
TargetPhraseCollection::shared_ptr
PhraseDictionaryMultiModel::
CreateTargetPhraseCollectionLinearInterpolation
( const Phrase& src,
std::map<std::string,multiModelStats*>* allStats,
std::vector<std::vector<float> > &multimodelweights) const
{
TargetPhraseCollection *ret = new TargetPhraseCollection();
for ( std::map< std::string, multiModelStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
for ( std::map< std::string, multiModelStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
multiModelStatistics * statistics = iter->second;
multiModelStats * statistics = iter->second;
Scores scoreVector(m_numScoreComponents);
@ -188,7 +204,9 @@ TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollection
}
//TODO: is it worth caching the results as long as weights don't change?
std::vector<std::vector<float> > PhraseDictionaryMultiModel::getWeights(size_t numWeights, bool normalize) const
std::vector<std::vector<float> >
PhraseDictionaryMultiModel::
getWeights(size_t numWeights, bool normalize) const
{
const std::vector<float>* weights_ptr;
std::vector<float> raw_weights;
@ -237,7 +255,9 @@ std::vector<std::vector<float> > PhraseDictionaryMultiModel::getWeights(size_t n
return multimodelweights;
}
std::vector<float> PhraseDictionaryMultiModel::normalizeWeights(std::vector<float> &weights) const
std::vector<float>
PhraseDictionaryMultiModel::
normalizeWeights(std::vector<float> &weights) const
{
std::vector<float> ret (m_numModels);
float total = std::accumulate(weights.begin(),weights.end(),0.0);
@ -248,29 +268,36 @@ std::vector<float> PhraseDictionaryMultiModel::normalizeWeights(std::vector<floa
}
ChartRuleLookupManager *PhraseDictionaryMultiModel::CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&, std::size_t)
ChartRuleLookupManager *
PhraseDictionaryMultiModel::
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&,
std::size_t)
{
UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
}
//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
void PhraseDictionaryMultiModel::CacheForCleanup(TargetPhraseCollection* tpc)
void
PhraseDictionaryMultiModel::
CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
{
PhraseCache &ref = GetPhraseCache();
ref.push_back(tpc);
GetPhraseCache().push_back(tpc);
}
void PhraseDictionaryMultiModel::CleanUpAfterSentenceProcessing(const InputType &source)
void
PhraseDictionaryMultiModel::
CleanUpAfterSentenceProcessing(const InputType &source)
{
PhraseCache &ref = GetPhraseCache();
for(PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
delete *it;
}
// PhraseCache &ref = GetPhraseCache();
// for(PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
// it->reset();
// }
PhraseCache temp;
temp.swap(ref);
// PhraseCache temp;
// temp.swap(ref);
GetPhraseCache().clear();
CleanUpComponentModels(source);
@ -279,14 +306,18 @@ void PhraseDictionaryMultiModel::CleanUpAfterSentenceProcessing(const InputType
}
void PhraseDictionaryMultiModel::CleanUpComponentModels(const InputType &source)
void
PhraseDictionaryMultiModel::
CleanUpComponentModels(const InputType &source)
{
for(size_t i = 0; i < m_numModels; ++i) {
m_pd[i]->CleanUpAfterSentenceProcessing(source);
}
}
const std::vector<float>* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeightsVector() const
const std::vector<float>*
PhraseDictionaryMultiModel::
GetTemporaryMultiModelWeightsVector() const
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_lock_weights);
@ -300,7 +331,9 @@ const std::vector<float>* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeig
#endif
}
void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector<float> weights)
void
PhraseDictionaryMultiModel::
SetTemporaryMultiModelWeightsVector(std::vector<float> weights)
{
#ifdef WITH_THREADS
boost::unique_lock<boost::shared_mutex> lock(m_lock_weights);
@ -311,7 +344,9 @@ void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector
}
#ifdef WITH_DLIB
vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
vector<float>
PhraseDictionaryMultiModel::
MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
{
map<pair<string, string>, size_t> phrase_pair_map;
@ -320,7 +355,7 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
phrase_pair_map[*iter] += 1;
}
vector<multiModelStatisticsOptimization*> optimizerStats;
vector<multiModelStatsOptimization*> optimizerStats;
for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
@ -329,7 +364,7 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
string target_string = phrase_pair.second;
vector<float> fs(m_numModels);
map<string,multiModelStatistics*>* allStats = new(map<string,multiModelStatistics*>);
map<string,multiModelStats*>* allStats = new(map<string,multiModelStats*>);
Phrase sourcePhrase(0);
sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
@ -343,7 +378,7 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
continue;
}
multiModelStatisticsOptimization* targetStatistics = new multiModelStatisticsOptimization();
multiModelStatsOptimization* targetStatistics = new multiModelStatsOptimization();
targetStatistics->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
targetStatistics->p = (*allStats)[target_string]->p;
targetStatistics->f = iter->second;
@ -383,7 +418,9 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
}
vector<float> PhraseDictionaryMultiModel::Optimize(OptimizationObjective *ObjectiveFunction, size_t numModels)
vector<float>
PhraseDictionaryMultiModel::
Optimize(OptimizationObjective *ObjectiveFunction, size_t numModels)
{
dlib::matrix<double,0,1> starting_point;
@ -428,8 +465,8 @@ double CrossEntropy::operator() ( const dlib::matrix<double,0,1>& arg) const
weight_vector = m_model->normalizeWeights(weight_vector);
}
for ( std::vector<multiModelStatisticsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
multiModelStatisticsOptimization* statistics = *iter;
for ( std::vector<multiModelStatsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
multiModelStatsOptimization* statistics = *iter;
size_t f = statistics->f;
double score;

View File

@ -36,15 +36,15 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
struct multiModelStatistics {
struct multiModelStats {
TargetPhrase *targetPhrase;
std::vector<std::vector<float> > p;
~multiModelStatistics() {
~multiModelStats() {
delete targetPhrase;
};
};
struct multiModelStatisticsOptimization: multiModelStatistics {
struct multiModelStatsOptimization: multiModelStats {
size_t f;
};
@ -71,27 +71,59 @@ public:
PhraseDictionaryMultiModel(int type, const std::string &line);
~PhraseDictionaryMultiModel();
void Load();
virtual void CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const;
virtual TargetPhraseCollection* CreateTargetPhraseCollectionLinearInterpolation(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
std::vector<std::vector<float> > getWeights(size_t numWeights, bool normalize) const;
std::vector<float> normalizeWeights(std::vector<float> &weights) const;
void CacheForCleanup(TargetPhraseCollection* tpc);
void CleanUpAfterSentenceProcessing(const InputType &source);
virtual void CleanUpComponentModels(const InputType &source);
virtual void
CollectSufficientStatistics
(const Phrase& src, std::map<std::string,multiModelStats*>* allStats)
const;
virtual TargetPhraseCollection::shared_ptr
CreateTargetPhraseCollectionLinearInterpolation
(const Phrase& src, std::map<std::string,multiModelStats*>* allStats,
std::vector<std::vector<float> > &multimodelweights) const;
std::vector<std::vector<float> >
getWeights(size_t numWeights, bool normalize) const;
std::vector<float>
normalizeWeights(std::vector<float> &weights) const;
void
CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
void
CleanUpAfterSentenceProcessing(const InputType &source);
virtual void
CleanUpComponentModels(const InputType &source);
#ifdef WITH_DLIB
virtual std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
std::vector<float> Optimize(OptimizationObjective * ObjectiveFunction, size_t numModels);
#endif
// functions below required by base class
virtual const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
virtual void InitializeForInput(ttasksptr const& ttask) {
/* Don't do anything source specific here as this object is shared between threads.*/
}
ChartRuleLookupManager *CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&, std::size_t);
void SetParameter(const std::string& key, const std::string& value);
const std::vector<float>* GetTemporaryMultiModelWeightsVector() const;
void SetTemporaryMultiModelWeightsVector(std::vector<float> weights);
// functions below required by base class
virtual TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
virtual void
InitializeForInput(ttasksptr const& ttask) {
// Don't do anything source specific here as this object is shared
// between threads.
}
ChartRuleLookupManager*
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&,
std::size_t);
void
SetParameter(const std::string& key, const std::string& value);
const std::vector<float>*
GetTemporaryMultiModelWeightsVector() const;
void
SetTemporaryMultiModelWeightsVector(std::vector<float> weights);
protected:
std::string m_mode;
@ -100,7 +132,7 @@ protected:
size_t m_numModels;
std::vector<float> m_multimodelweights;
typedef std::vector<TargetPhraseCollection*> PhraseCache;
typedef std::vector<TargetPhraseCollection::shared_ptr> PhraseCache;
#ifdef WITH_THREADS
boost::shared_mutex m_lock_cache;
typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
@ -146,7 +178,7 @@ class CrossEntropy: public OptimizationObjective
public:
CrossEntropy (
std::vector<multiModelStatisticsOptimization*> &optimizerStats,
std::vector<multiModelStatsOptimization*> &optimizerStats,
PhraseDictionaryMultiModel * model,
size_t iFeature
) {
@ -158,7 +190,7 @@ public:
double operator() ( const dlib::matrix<double,0,1>& arg) const;
protected:
std::vector<multiModelStatisticsOptimization*> m_optimizerStats;
std::vector<multiModelStatsOptimization*> m_optimizerStats;
PhraseDictionaryMultiModel * m_model;
size_t m_iFeature;
};

View File

@ -120,7 +120,7 @@ void PhraseDictionaryMultiModelCounts::Load()
}
const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
TargetPhraseCollection::shared_ptr PhraseDictionaryMultiModelCounts::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
{
vector<vector<float> > multimodelweights;
bool normalize;
@ -130,11 +130,12 @@ const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseC
//source phrase frequency is shared among all phrase pairs
vector<float> fs(m_numModels);
map<string,multiModelCountsStatistics*>* allStats = new(map<string,multiModelCountsStatistics*>);
map<string,multiModelCountsStats*>* allStats = new(map<string,multiModelCountsStats*>);
CollectSufficientStatistics(src, fs, allStats);
CollectSufficientStats(src, fs, allStats);
TargetPhraseCollection *ret = CreateTargetPhraseCollectionCounts(src, fs, allStats, multimodelweights);
TargetPhraseCollection::shared_ptr ret
= CreateTargetPhraseCollectionCounts(src, fs, allStats, multimodelweights);
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
const_cast<PhraseDictionaryMultiModelCounts*>(this)->CacheForCleanup(ret);
@ -142,16 +143,17 @@ const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseC
}
void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase& src, vector<float> &fs, map<string,multiModelCountsStatistics*>* allStats) const
void PhraseDictionaryMultiModelCounts::CollectSufficientStats(const Phrase& src, vector<float> &fs, map<string,multiModelCountsStats*>* allStats) const
//fill fs and allStats with statistics from models
{
for(size_t i = 0; i < m_numModels; ++i) {
const PhraseDictionary &pd = *m_pd[i];
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollectionLEGACY( src);
TargetPhraseCollection::shared_ptr ret_raw
= pd.GetTargetPhraseCollectionLEGACY(src);
if (ret_raw != NULL) {
TargetPhraseCollection::iterator iterTargetPhrase;
TargetPhraseCollection::const_iterator iterTargetPhrase;
for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != ret_raw->end(); ++iterTargetPhrase) {
const TargetPhrase * targetPhrase = *iterTargetPhrase;
@ -160,7 +162,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
string targetString = targetPhrase->GetStringRep(m_output);
if (allStats->find(targetString) == allStats->end()) {
multiModelCountsStatistics * statistics = new multiModelCountsStatistics;
multiModelCountsStats * statistics = new multiModelCountsStats;
statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
//correct future cost estimates and total score
@ -178,7 +180,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
(*allStats)[targetString] = statistics;
}
multiModelCountsStatistics * statistics = (*allStats)[targetString];
multiModelCountsStats * statistics = (*allStats)[targetString];
statistics->fst[i] = UntransformScore(raw_scores[0]);
statistics->ft[i] = UntransformScore(raw_scores[1]);
@ -189,8 +191,8 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
}
// get target phrase frequency for models which have not seen the phrase pair
for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
multiModelCountsStatistics * statistics = iter->second;
for ( map< string, multiModelCountsStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
multiModelCountsStats * statistics = iter->second;
for (size_t i = 0; i < m_numModels; ++i) {
if (!statistics->ft[i]) {
@ -200,12 +202,14 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
}
}
TargetPhraseCollection* PhraseDictionaryMultiModelCounts::CreateTargetPhraseCollectionCounts(const Phrase &src, vector<float> &fs, map<string,multiModelCountsStatistics*>* allStats, vector<vector<float> > &multimodelweights) const
TargetPhraseCollection::shared_ptr
PhraseDictionaryMultiModelCounts::
CreateTargetPhraseCollectionCounts(const Phrase &src, vector<float> &fs, map<string,multiModelCountsStats*>* allStats, vector<vector<float> > &multimodelweights) const
{
TargetPhraseCollection *ret = new TargetPhraseCollection();
for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
for ( map< string, multiModelCountsStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
multiModelCountsStatistics * statistics = iter->second;
multiModelCountsStats * statistics = iter->second;
if (statistics->targetPhrase->GetAlignTerm().GetSize() == 0) {
UTIL_THROW(util::Exception, " alignment information empty\ncount-tables need to include alignment information for computation of lexical weights.\nUse --phrase-word-alignment during training; for on-disk tables, also set -alignment-info when creating on-disk tables.");
@ -248,7 +252,7 @@ float PhraseDictionaryMultiModelCounts::GetTargetCount(const Phrase &target, siz
{
const PhraseDictionary &pd = *m_inverse_pd[modelIndex];
const TargetPhraseCollection *ret_raw = pd.GetTargetPhraseCollectionLEGACY(target);
TargetPhraseCollection::shared_ptr ret_raw = pd.GetTargetPhraseCollectionLEGACY(target);
// in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score)
if (ret_raw && ret_raw->GetSize() > 0) {
@ -320,7 +324,7 @@ double PhraseDictionaryMultiModelCounts::ComputeWeightedLexicalTranslation( cons
}
lexicalCache PhraseDictionaryMultiModelCounts::CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector<lexicalTable*> &tables, bool is_input )
lexicalCache PhraseDictionaryMultiModelCounts::CacheLexicalStats( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector<lexicalTable*> &tables, bool is_input )
{
//do all the necessary lexical table lookups and get counts, but don't apply weights yet
@ -474,7 +478,7 @@ vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<s
phrase_pair_map[*iter] += 1;
}
vector<multiModelCountsStatisticsOptimization*> optimizerStats;
vector<multiModelCountsStatsOptimization*> optimizerStats;
for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
@ -483,12 +487,12 @@ vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<s
string target_string = phrase_pair.second;
vector<float> fs(m_numModels);
map<string,multiModelCountsStatistics*>* allStats = new(map<string,multiModelCountsStatistics*>);
map<string,multiModelCountsStats*>* allStats = new(map<string,multiModelCountsStats*>);
Phrase sourcePhrase(0);
sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
CollectSufficientStatistics(sourcePhrase, fs, allStats); //optimization potential: only call this once per source phrase
CollectSufficientStats(sourcePhrase, fs, allStats); //optimization potential: only call this once per source phrase
//phrase pair not found; leave cache empty
if (allStats->find(target_string) == allStats->end()) {
@ -497,19 +501,19 @@ vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<s
continue;
}
multiModelCountsStatisticsOptimization * targetStatistics = new multiModelCountsStatisticsOptimization();
targetStatistics->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
targetStatistics->fs = fs;
targetStatistics->fst = (*allStats)[target_string]->fst;
targetStatistics->ft = (*allStats)[target_string]->ft;
targetStatistics->f = iter->second;
multiModelCountsStatsOptimization * targetStats = new multiModelCountsStatsOptimization();
targetStats->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
targetStats->fs = fs;
targetStats->fst = (*allStats)[target_string]->fst;
targetStats->ft = (*allStats)[target_string]->ft;
targetStats->f = iter->second;
try {
pair<vector< set<size_t> >, vector< set<size_t> > > alignment = GetAlignmentsForLexWeights(sourcePhrase, static_cast<const Phrase&>(*targetStatistics->targetPhrase), targetStatistics->targetPhrase->GetAlignTerm());
targetStatistics->lexCachee2f = CacheLexicalStatistics(static_cast<const Phrase&>(*targetStatistics->targetPhrase), sourcePhrase, alignment.second, m_lexTable_e2f, false );
targetStatistics->lexCachef2e = CacheLexicalStatistics(sourcePhrase, static_cast<const Phrase&>(*targetStatistics->targetPhrase), alignment.first, m_lexTable_f2e, true );
pair<vector< set<size_t> >, vector< set<size_t> > > alignment = GetAlignmentsForLexWeights(sourcePhrase, static_cast<const Phrase&>(*targetStats->targetPhrase), targetStats->targetPhrase->GetAlignTerm());
targetStats->lexCachee2f = CacheLexicalStats(static_cast<const Phrase&>(*targetStats->targetPhrase), sourcePhrase, alignment.second, m_lexTable_e2f, false );
targetStats->lexCachef2e = CacheLexicalStats(sourcePhrase, static_cast<const Phrase&>(*targetStats->targetPhrase), alignment.first, m_lexTable_f2e, true );
optimizerStats.push_back(targetStatistics);
optimizerStats.push_back(targetStats);
} catch (AlignmentException& e) {}
RemoveAllInMap(*allStats);
@ -561,8 +565,8 @@ double CrossEntropyCounts::operator() ( const dlib::matrix<double,0,1>& arg) con
weight_vector = m_model->normalizeWeights(weight_vector);
}
for ( std::vector<multiModelCountsStatisticsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
multiModelCountsStatisticsOptimization* statistics = *iter;
for ( std::vector<multiModelCountsStatsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
multiModelCountsStatsOptimization* statistics = *iter;
size_t f = statistics->f;
double score;

View File

@ -37,11 +37,11 @@ typedef boost::unordered_map<Word, lexicalMap > lexicalMapJoint;
typedef std::pair<std::vector<float>, std::vector<float> > lexicalPair;
typedef std::vector<std::vector<lexicalPair> > lexicalCache;
struct multiModelCountsStatistics : multiModelStatistics {
struct multiModelCountsStats : multiModelStats {
std::vector<float> fst, ft;
};
struct multiModelCountsStatisticsOptimization: multiModelCountsStatistics {
struct multiModelCountsStatsOptimization: multiModelCountsStats {
std::vector<float> fs;
lexicalCache lexCachee2f, lexCachef2e;
size_t f;
@ -80,18 +80,18 @@ public:
PhraseDictionaryMultiModelCounts(const std::string &line);
~PhraseDictionaryMultiModelCounts();
void Load();
TargetPhraseCollection* CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
void CollectSufficientStatistics(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStatistics*>* allStats) const;
TargetPhraseCollection::shared_ptr CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStats*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
void CollectSufficientStats(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStats*>* allStats) const;
float GetTargetCount(const Phrase& target, size_t modelIndex) const;
double GetLexicalProbability( Word &inner, Word &outer, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights ) const;
double ComputeWeightedLexicalTranslation( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights, bool is_input ) const;
double ComputeWeightedLexicalTranslationFromCache( std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > &cache, std::vector<float> &weights ) const;
std::pair<PhraseDictionaryMultiModelCounts::AlignVector,PhraseDictionaryMultiModelCounts::AlignVector> GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const;
std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, bool is_input );
std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > CacheLexicalStats( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, bool is_input );
void FillLexicalCountsJoint(Word &wordS, Word &wordT, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
void FillLexicalCountsMarginal(Word &wordS, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
void LoadLexicalTable( std::string &fileName, lexicalTable* ltable);
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
#ifdef WITH_DLIB
std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
#endif
@ -117,7 +117,7 @@ class CrossEntropyCounts: public OptimizationObjective
public:
CrossEntropyCounts (
std::vector<multiModelCountsStatisticsOptimization*> &optimizerStats,
std::vector<multiModelCountsStatsOptimization*> &optimizerStats,
PhraseDictionaryMultiModelCounts * model,
size_t iFeature
) {
@ -129,7 +129,7 @@ public:
double operator() ( const dlib::matrix<double,0,1>& arg) const;
private:
std::vector<multiModelCountsStatisticsOptimization*> m_optimizerStats;
std::vector<multiModelCountsStatsOptimization*> m_optimizerStats;
PhraseDictionaryMultiModelCounts * m_model;
size_t m_iFeature;
};

View File

@ -39,7 +39,7 @@ void PhraseDictionaryNodeMemory::Prune(size_t tableLimit)
}
// prune TargetPhraseCollection in this node
m_targetPhraseCollection.Prune(true, tableLimit);
m_targetPhraseCollection->Prune(true, tableLimit);
}
void PhraseDictionaryNodeMemory::Sort(size_t tableLimit)
@ -53,10 +53,11 @@ void PhraseDictionaryNodeMemory::Sort(size_t tableLimit)
}
// prune TargetPhraseCollection in this node
m_targetPhraseCollection.Sort(true, tableLimit);
m_targetPhraseCollection->Sort(true, tableLimit);
}
PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateChild(const Word &sourceTerm)
PhraseDictionaryNodeMemory*
PhraseDictionaryNodeMemory::GetOrCreateChild(const Word &sourceTerm)
{
return &m_sourceTermMap[sourceTerm];
}
@ -118,7 +119,7 @@ void PhraseDictionaryNodeMemory::Remove()
{
m_sourceTermMap.clear();
m_nonTermMap.clear();
m_targetPhraseCollection.Remove();
m_targetPhraseCollection->Remove();
}
std::ostream& operator<<(std::ostream &out, const PhraseDictionaryNodeMemory &node)

View File

@ -130,12 +130,13 @@ private:
TerminalMap m_sourceTermMap;
NonTerminalMap m_nonTermMap;
TargetPhraseCollection m_targetPhraseCollection;
TargetPhraseCollection::shared_ptr m_targetPhraseCollection;
public:
PhraseDictionaryNodeMemory() {}
PhraseDictionaryNodeMemory()
: m_targetPhraseCollection(new TargetPhraseCollection) { }
bool IsLeaf() const {
return m_sourceTermMap.empty() && m_nonTermMap.empty();
}
@ -152,10 +153,12 @@ public:
const PhraseDictionaryNodeMemory *GetChild(const Word &sourceNonTerm, const Word &targetNonTerm) const;
#endif
const TargetPhraseCollection &GetTargetPhraseCollection() const {
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection() const {
return m_targetPhraseCollection;
}
TargetPhraseCollection &GetTargetPhraseCollection() {
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection() {
return m_targetPhraseCollection;
}

View File

@ -54,7 +54,9 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const Input
}
}
void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &inputPath) const
void
PhraseDictionaryTransliteration::
GetTargetPhraseCollection(InputPath &inputPath) const
{
const Phrase &sourcePhrase = inputPath.GetPhrase();
size_t hash = hash_value(sourcePhrase);
@ -66,7 +68,7 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input
if (iter != cache.end()) {
// already in cache
const TargetPhraseCollection *tpColl = iter->second.first;
TargetPhraseCollection::shared_ptr tpColl = iter->second.first;
inputPath.SetTargetPhrases(*this, tpColl, NULL);
} else {
// TRANSLITERATE
@ -89,17 +91,15 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input
int ret = system(cmd.c_str());
UTIL_THROW_IF2(ret != 0, "Transliteration script error");
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir.path());
TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection);
vector<TargetPhrase*> targetPhrases
= CreateTargetPhrases(sourcePhrase, outDir.path());
vector<TargetPhrase*>::const_iterator iter;
for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
TargetPhrase *tp = *iter;
tpColl->Add(tp);
}
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
cache[hash] = value;
cache[hash] = CacheCollEntry(tpColl, clock());
inputPath.SetTargetPhrases(*this, tpColl, NULL);
}
}

View File

@ -74,11 +74,10 @@ void PhraseDictionaryTreeAdaptor::CleanUpAfterSentenceProcessing(InputType const
obj.CleanUp();
}
TargetPhraseCollection const*
TargetPhraseCollection::shared_ptr
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
{
const TargetPhraseCollection *ret = GetImplementation().GetTargetPhraseCollection(src);
return ret;
return GetImplementation().GetTargetPhraseCollection(src);
}
void PhraseDictionaryTreeAdaptor::EnableCache()
@ -107,16 +106,17 @@ const PDTAimp& PhraseDictionaryTreeAdaptor::GetImplementation() const
}
// legacy
const TargetPhraseCollectionWithSourcePhrase*
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const &range) const
TargetPhraseCollectionWithSourcePhrase::shared_ptr
PhraseDictionaryTreeAdaptor::
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const &range) const
{
TargetPhraseCollectionWithSourcePhrase::shared_ptr ret;
if(GetImplementation().m_rangeCache.empty()) {
const TargetPhraseCollectionWithSourcePhrase *tpColl = GetImplementation().GetTargetPhraseCollection(src.GetSubString(range));
return tpColl;
ret = GetImplementation().GetTargetPhraseCollection(src.GetSubString(range));
} else {
const TargetPhraseCollectionWithSourcePhrase *tpColl = GetImplementation().m_rangeCache[range.GetStartPos()][range.GetEndPos()];
return tpColl;
ret = GetImplementation().m_rangeCache[range.GetStartPos()][range.GetEndPos()];
}
return ret;
}
}

View File

@ -59,7 +59,8 @@ public:
// get translation candidates for a given source phrase
// returns null pointer if nothing found
TargetPhraseCollection const* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
void InitializeForInput(ttasksptr const& ttask);
void CleanUpAfterSentenceProcessing(InputType const& source);
@ -73,7 +74,9 @@ public:
}
// legacy
const TargetPhraseCollectionWithSourcePhrase *GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const & srcRange) const;
TargetPhraseCollectionWithSourcePhrase::shared_ptr
GetTargetPhraseCollectionLEGACY(InputType const& src,
WordsRange const & srcRange) const;
};

View File

@ -79,11 +79,11 @@ void ProbingPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQue
continue;
}
TargetPhraseCollection *tpColl = CreateTargetPhrase(sourcePhrase);
TargetPhraseCollection::shared_ptr tpColl = CreateTargetPhrase(sourcePhrase);
// add target phrase to phrase-table cache
size_t hash = hash_value(sourcePhrase);
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
std::pair<TargetPhraseCollection::shared_ptr , clock_t> value(tpColl, clock());
cache[hash] = value;
inputPath.SetTargetPhrases(*this, tpColl, NULL);
@ -109,7 +109,7 @@ std::vector<uint64_t> ProbingPT::ConvertToProbingSourcePhrase(const Phrase &sour
return ret;
}
TargetPhraseCollection *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
TargetPhraseCollection::shared_ptr ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
{
// create a target phrase from the 1st word of the source, prefix with 'ProbingPT:'
assert(sourcePhrase.GetSize());
@ -124,7 +124,7 @@ TargetPhraseCollection *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase
std::pair<bool, std::vector<target_text> > query_result;
TargetPhraseCollection *tpColl = NULL;
TargetPhraseCollection::shared_ptr tpColl = NULL;
//Actual lookup
query_result = m_engine->query(probingSource);

View File

@ -49,12 +49,14 @@ protected:
// Provide access to RuleTableTrie's private
// GetOrCreateTargetPhraseCollection function.
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
RuleTableTrie &ruleTable
, const Phrase &source
, const TargetPhrase &target
, const Word *sourceLHS) {
return ruleTable.GetOrCreateTargetPhraseCollection(source, target, sourceLHS);
TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection(RuleTableTrie &ruleTable,
const Phrase &source,
const TargetPhrase &target,
const Word *sourceLHS)
{
return ruleTable.GetOrCreateTargetPhraseCollection(source, target,
sourceLHS);
}
};

View File

@ -224,9 +224,10 @@ bool RuleTableLoaderCompact::LoadRuleSection(
targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());
// Insert rule into table.
TargetPhraseCollection &coll = GetOrCreateTargetPhraseCollection(
ruleTable, sourcePhrase, *targetPhrase, &sourceLHS);
coll.Add(targetPhrase);
TargetPhraseCollection::shared_ptr coll;
coll = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase,
*targetPhrase, &sourceLHS);
coll->Add(targetPhrase);
}
return true;

View File

@ -242,8 +242,10 @@ bool RuleTableLoaderStandard::Load(FormatType format
targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector);
targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS);
phraseColl.Add(targetPhrase);
TargetPhraseCollection::shared_ptr phraseColl
= GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase,
*targetPhrase, sourceLHS);
phraseColl->Add(targetPhrase);
// not implemented correctly in memory pt. just delete it for now
delete sourceLHS;

View File

@ -282,8 +282,10 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(ttasksptr const& ttask)
targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
targetPhrase->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS);
phraseColl.Add(targetPhrase);
TargetPhraseCollection::shared_ptr phraseColl
= GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase,
*targetPhrase, sourceLHS);
phraseColl->Add(targetPhrase);
count++;
@ -301,7 +303,9 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(ttasksptr const& ttask)
//removedirectoryrecursively(dirName);
}
TargetPhraseCollection &PhraseDictionaryFuzzyMatch::GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
TargetPhraseCollection::shared_ptr
PhraseDictionaryFuzzyMatch::
GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
, const Phrase &source
, const TargetPhrase &target
, const Word *sourceLHS)

View File

@ -1,3 +1,4 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
@ -59,7 +60,8 @@ public:
TO_STRING();
protected:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
, const Phrase &source
, const TargetPhrase &target
, const Word *sourceLHS);

View File

@ -149,26 +149,26 @@ void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath
lastWord.OnlyTheseFactors(m_inputFactors);
OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord);
TargetPhraseCollection::shared_ptr tpc;
if (lastWordOnDisk == NULL) {
// OOV according to this phrase table. Not possible to extend
inputPath.SetTargetPhrases(*this, NULL, NULL);
inputPath.SetTargetPhrases(*this, tpc, NULL);
} else {
const OnDiskPt::PhraseNode *ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper);
if (ptNode) {
const TargetPhraseCollection *targetPhrases = GetTargetPhraseCollection(ptNode);
inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
} else {
inputPath.SetTargetPhrases(*this, NULL, NULL);
}
OnDiskPt::PhraseNode const* ptNode;
ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper);
if (ptNode) tpc = GetTargetPhraseCollection(ptNode);
inputPath.SetTargetPhrases(*this, tpc, ptNode);
delete lastWordOnDisk;
}
}
}
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const
TargetPhraseCollection::shared_ptr
PhraseDictionaryOnDisk::
GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const
{
const TargetPhraseCollection *ret;
TargetPhraseCollection::shared_ptr ret;
CacheColl &cache = GetCache();
size_t hash = (size_t) ptNode->GetFilePos();
@ -181,31 +181,34 @@ const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(
// not in cache, need to look up from phrase table
ret = GetTargetPhraseCollectionNonCache(ptNode);
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
std::pair<TargetPhraseCollection::shared_ptr , clock_t> value(ret, clock());
cache[hash] = value;
} else {
// in cache. just use it
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
value.second = clock();
ret = value.first;
iter->second.second = clock();
ret = iter->second.first;
}
return ret;
}
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const
TargetPhraseCollection::shared_ptr
PhraseDictionaryOnDisk::
GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const
{
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
OnDiskPt::OnDiskWrapper& wrapper
= const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
vector<float> weightT = StaticData::Instance().GetWeights(this);
OnDiskPt::Vocab &vocab = wrapper.GetVocab();
const OnDiskPt::TargetPhraseCollection *targetPhrasesOnDisk = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
TargetPhraseCollection *targetPhrases
= targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false);
OnDiskPt::TargetPhraseCollection::shared_ptr targetPhrasesOnDisk
= ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
TargetPhraseCollection::shared_ptr targetPhrases
= targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this,
weightT, vocab, false);
delete targetPhrasesOnDisk;
// delete targetPhrasesOnDisk;
return targetPhrases;
}

View File

@ -78,8 +78,11 @@ public:
virtual void InitializeForInput(ttasksptr const& ttask);
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
const TargetPhraseCollection *GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const;
const TargetPhraseCollection *GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const;
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const;
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const;
void SetParameter(const std::string& key, const std::string& value);

View File

@ -51,9 +51,10 @@ public:
private:
friend class RuleTableLoader;
virtual TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target,
const Word *sourceLHS) = 0;
virtual TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection(const Phrase &source,
const TargetPhrase &target,
const Word *sourceLHS) = 0;
virtual void SortAndPrune() = 0;

View File

@ -38,8 +38,11 @@
namespace Moses
{
TargetPhraseCollection &RuleTableUTrie::GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
TargetPhraseCollection::shared_ptr
RuleTableUTrie::
GetOrCreateTargetPhraseCollection(const Phrase &source,
const TargetPhrase &target,
const Word *sourceLHS)
{
UTrieNode &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetOrCreateTargetPhraseCollection(target);

View File

@ -21,13 +21,13 @@
#include "Trie.h"
#include "UTrieNode.h"
#include "moses/TargetPhraseCollection.h"
namespace Moses
{
class Phrase;
class TargetPhrase;
class TargetPhraseCollection;
class Word;
class ChartParser;
@ -57,8 +57,10 @@ public:
const ChartCellCollectionBase &, std::size_t);
private:
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection(const Phrase &source,
const TargetPhrase &target,
const Word *sourceLHS);
UTrieNode &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
const Word *sourceLHS);

View File

@ -49,7 +49,7 @@ void UTrieNode::Prune(size_t tableLimit)
// Prune TargetPhraseCollections at this node.
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
p->second.Prune(true, tableLimit);
p->second->Prune(true, tableLimit);
}
}
@ -66,7 +66,7 @@ void UTrieNode::Sort(size_t tableLimit)
// Sort TargetPhraseCollections at this node.
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
p->second.Sort(true, tableLimit);
p->second->Sort(true, tableLimit);
}
}
@ -89,8 +89,9 @@ UTrieNode *UTrieNode::GetOrCreateNonTerminalChild(const Word &targetNonTerm)
return m_gapNode;
}
TargetPhraseCollection &UTrieNode::GetOrCreateTargetPhraseCollection(
const TargetPhrase &target)
TargetPhraseCollection::shared_ptr
UTrieNode::
GetOrCreateTargetPhraseCollection(const TargetPhrase &target)
{
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
const size_t rank = alignmentInfo.GetSize();
@ -107,8 +108,9 @@ TargetPhraseCollection &UTrieNode::GetOrCreateTargetPhraseCollection(
const Word &targetNonTerm = target.GetWord(targetNonTermIndex);
vec.push_back(InsertLabel(i++, targetNonTerm));
}
return m_labelMap[vec];
TargetPhraseCollection::shared_ptr& ret = m_labelMap[vec];
if (ret == NULL) ret.reset(new TargetPhraseCollection);
return ret;
}
} // namespace Moses

View File

@ -51,10 +51,10 @@ public:
TerminalEqualityPred> TerminalMap;
typedef boost::unordered_map<std::vector<int>,
TargetPhraseCollection> LabelMap;
TargetPhraseCollection::shared_ptr> LabelMap;
#else
typedef std::map<Word, UTrieNode> TerminalMap;
typedef std::map<std::vector<int>, TargetPhraseCollection> LabelMap;
typedef std::map<std::vector<int>, TargetPhraseCollection::shared_ptr> LabelMap;
#endif
~UTrieNode() {
@ -78,8 +78,8 @@ public:
UTrieNode *GetOrCreateTerminalChild(const Word &sourceTerm);
UTrieNode *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
const TargetPhrase &);
TargetPhraseCollection::shared_ptr
GetOrCreateTargetPhraseCollection(const TargetPhrase &);
bool IsLeaf() const {
return m_terminalMap.empty() && m_gapNode == NULL;

View File

@ -47,7 +47,8 @@ void Scope3Parser::GetChartRuleCollection(
const size_t start = range.GetStartPos();
const size_t end = range.GetEndPos();
std::vector<std::pair<const UTrieNode *, const VarSpanNode *> > &pairVec = m_ruleApplications[start][end-start+1];
std::vector<std::pair<const UTrieNode *, const VarSpanNode *> > &pairVec
= m_ruleApplications[start][end-start+1];
MatchCallback matchCB(range, outColl);
for (std::vector<std::pair<const UTrieNode *, const VarSpanNode *> >::const_iterator p = pairVec.begin(); p != pairVec.end(); ++p) {
@ -58,8 +59,8 @@ void Scope3Parser::GetChartRuleCollection(
if (varSpanNode.m_rank == 0) { // Purely lexical rule.
assert(labelMap.size() == 1);
const TargetPhraseCollection &tpc = labelMap.begin()->second;
matchCB.m_tpc = &tpc;
TargetPhraseCollection::shared_ptr tpc = labelMap.begin()->second;
matchCB.m_tpc = tpc;
matchCB(m_emptyStackVec);
} else { // Rule has at least one non-terminal.
varSpanNode.CalculateRanges(start, end, m_ranges);
@ -70,7 +71,7 @@ void Scope3Parser::GetChartRuleCollection(
UTrieNode::LabelMap::const_iterator p = labelMap.begin();
for (; p != labelMap.end(); ++p) {
const std::vector<int> &labels = p->first;
const TargetPhraseCollection &tpc = p->second;
TargetPhraseCollection::shared_ptr tpc = p->second;
assert(labels.size() == varSpanNode.m_rank);
bool failCheck = false;
for (size_t i = 0; i < varSpanNode.m_rank; ++i) {
@ -82,7 +83,7 @@ void Scope3Parser::GetChartRuleCollection(
if (failCheck) {
continue;
}
matchCB.m_tpc = &tpc;
matchCB.m_tpc = tpc;
searcher.Search(labels, matchCB);
}
}

View File

@ -66,17 +66,16 @@ private:
// Define a callback type for use by StackLatticeSearcher.
struct MatchCallback {
public:
MatchCallback(const WordsRange &range,
ChartParserCallback &out)
: m_range(range)
, m_out(out)
, m_tpc(NULL) {}
MatchCallback(const WordsRange &range, ChartParserCallback &out)
: m_range(range) , m_out(out) // , m_tpc(NULL)
{ }
void operator()(const StackVec &stackVec) {
m_out.Add(*m_tpc, stackVec, m_range);
}
const WordsRange &m_range;
ChartParserCallback &m_out;
const TargetPhraseCollection *m_tpc;
TargetPhraseCollection::shared_ptr m_tpc;
};
void Init();

View File

@ -32,12 +32,13 @@ void SkeletonPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQu
const Phrase &sourcePhrase = inputPath.GetPhrase();
TargetPhrase *tp = CreateTargetPhrase(sourcePhrase);
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection);
tpColl->Add(tp);
// add target phrase to phrase-table cache
size_t hash = hash_value(sourcePhrase);
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
std::pair<TargetPhraseCollection::shared_ptr, clock_t>
value(tpColl, clock());
cache[hash] = value;
inputPath.SetTargetPhrases(*this, tpColl, NULL);

View File

@ -4,182 +4,52 @@ namespace Moses
{
using std::vector;
TPCollCache
::TPCollCache(size_t capacity)
TPCollCache::
TPCollCache(size_t capacity)
{
m_doomed_first = m_doomed_last = NULL;
m_doomed_count = 0;
m_qfirst = m_qlast = m_cache.end();
m_capacity = capacity;
UTIL_THROW_IF2(m_capacity <= 2, "Cache capacity must be > 1!");
}
bool
sancheck(TPCollWrapper const* first, TPCollWrapper const* last, size_t count)
{
if (first == NULL)
{
UTIL_THROW_IF2(last != NULL || count != 0, "queue error");
return true;
}
size_t s = 0;
for (TPCollWrapper const* x = first; x; x = x->next)
{
std::cerr << ++s << "/" << count << " "
<< first << " "
<< x->prev << " " << x << " " << x->next << " "
<< last << std::endl;
}
std::cerr << std::string(80,'-') << std::endl;
// while (x != last && s < count)
// {
// UTIL_THROW_IF2(x->next == NULL, "queue error");
// x = x->next;
// ++s;
// std::cerr << x << " " << s << "/" << count << std::endl;
// }
// std::cerr << x << " " << s << "/" << count << std::endl;
// UTIL_THROW_IF2(x != last, "queue error");
// UTIL_THROW_IF2(s != count, "queue error");
// x = last; s = 1;
// while (x != first && s++ < count)
// {
// UTIL_THROW_IF2(x->prev == NULL, "queue error");
// x = x->prev;
// }
// UTIL_THROW_IF2(x != first, "queue error");
// UTIL_THROW_IF2(s != count, "queue error");
return true;
}
/// remove a TPC from the "doomed" queue
void
TPCollCache
::remove_from_queue(TPCollWrapper* x)
{
// caller must lock!
if (m_doomed_first != x && x->prev == NULL)
{ // not in the queue
UTIL_THROW_IF2(x->next, "queue error");
return;
}
sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
std::cerr << "Removing " << x << std::endl;
if (m_doomed_first == x)
m_doomed_first = x->next;
else x->prev->next = x->next;
if (m_doomed_last == x)
m_doomed_last = x->prev;
else x->next->prev = x->prev;
x->next = x->prev = NULL;
--m_doomed_count;
// sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
}
void
TPCollCache
::add_to_queue(TPCollWrapper* x)
{
// sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
// caller must lock!
x->prev = m_doomed_last;
if (!m_doomed_first)
m_doomed_first = x;
if (m_doomed_last) m_doomed_last->next = x;
m_doomed_last = x;
++m_doomed_count;
// sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
}
TPCollWrapper*
TPCollCache
::get(uint64_t key, size_t revision)
SPTR<TPCollWrapper>
TPCollCache::
get(uint64_t key, size_t revision)
{
using namespace boost;
upgrade_lock<shared_mutex> rlock(m_lock);
cache_t::iterator m = m_cache.find(key);
if (m == m_cache.end()) // new
unique_lock<shared_mutex> lock(m_lock);
std::pair<uint64_t, SPTR<TPCollWrapper> > e(key, SPTR<TPCollWrapper>());
std::pair<cache_t::iterator, bool> foo = m_cache.insert(e);
SPTR<TPCollWrapper>& ret = foo.first->second;
if (ret)
{
std::pair<uint64_t,TPCollWrapper*> e(key,NULL);
upgrade_to_unique_lock<shared_mutex> wlock(rlock);
std::pair<cache_t::iterator,bool> foo = m_cache.insert(e);
if (foo.second) foo.first->second = new TPCollWrapper(key, revision);
m = foo.first;
// ++m->second->refCount;
if (m_qfirst == foo.first) m_qfirst = ret->next;
else ret->prev->second->next = ret->next;
if (m_qlast != foo.first)
ret->next->second->prev = ret->prev;
}
else
if (!ret || ret->revision != revision)
ret.reset(new TPCollWrapper(key,revision));
ret->prev = m_qlast;
if (m_qlast != m_cache.end()) m_qlast->second->next = foo.first;
m_qlast = foo.first;
while (m_cache.size() > m_capacity && m_qfirst->second.use_count() == 1)
{
if (m->second->refCount == 0)
{
upgrade_to_unique_lock<shared_mutex> wlock(rlock);
remove_from_queue(m->second);
}
if (m->second->revision != revision) // out of date
{
upgrade_to_unique_lock<shared_mutex> wlock(rlock);
m->second = new TPCollWrapper(key, revision);
}
m_qfirst = m_qfirst->second->next;
m_cache.erase(m_qfirst->second->prev);
}
++m->second->refCount;
return m->second;
return ret;
} // TPCollCache::get(...)
void
TPCollCache
::release(TPCollWrapper const* ptr)
{
if (!ptr) return;
std::cerr << "Releasing " << ptr->key << " (" << ptr->refCount << ")" << std::endl;
if (--ptr->refCount == 0)
{
boost::unique_lock<boost::shared_mutex> lock(m_lock);
if (m_doomed_count == m_capacity)
{
TPCollWrapper* x = m_doomed_first;
remove_from_queue(x);
UTIL_THROW_IF2(x->refCount || x == ptr, "TPC was doomed while still in use!");
cache_t::iterator m = m_cache.find(ptr->key);
if (m != m_cache.end() && m->second == ptr)
{ // the cache could have been updated with a new pointer
// for the same phrase already, so we need to check
// if the pointer we cound is the one we want to get rid of,
// hence the second check
// boost::upgrade_to_unique_lock<boost::shared_mutex> xlock(lock);
m_cache.erase(m);
}
std::cerr << "Deleting " << x->key << " " << x->refCount << std::endl;
// delete x;
}
add_to_queue(const_cast<TPCollWrapper*>(ptr));
}
} // TPCollCache::release(...)
TPCollWrapper::
TPCollWrapper(uint64_t key_, size_t revision_)
: refCount(0), prev(NULL), next(NULL)
, revision(revision_), key(key_)
: revision(revision_), key(key_)
{ }
TPCollWrapper::
~TPCollWrapper()
{
UTIL_THROW_IF2(this->refCount, "TPCollWrapper refCount > 0!");
assert(this->refCount == 0);
}
{ }
} // namespace

View File

@ -3,60 +3,44 @@
#include <time.h>
#include "moses/TargetPhraseCollection.h"
#include <boost/atomic.hpp>
#include "mm/ug_typedefs.h"
namespace Moses
{
class TPCollCache;
class TPCollWrapper;
class TPCollCache
{
public:
typedef boost::unordered_map<uint64_t, SPTR<TPCollWrapper> > cache_t;
private:
uint32_t m_capacity; // capacity of cache
cache_t m_cache; // maps from ids to items
cache_t::iterator m_qfirst, m_qlast;
mutable boost::shared_mutex m_lock;
public:
TPCollCache(size_t capacity=10000);
SPTR<TPCollWrapper>
get(uint64_t key, size_t revision);
};
class TPCollWrapper
// wrapper around TargetPhraseCollection with reference counting
// and additional members for caching purposes
class TPCollWrapper
: public TargetPhraseCollection
{
friend class TPCollCache;
friend class Mmsapt;
mutable boost::atomic<uint32_t> refCount; // reference count
public:
TPCollWrapper* prev; // ... in queue of TPCollWrappers used recently
TPCollWrapper* next; // ... in queue of TPCollWrappers used recently
TPCollCache::cache_t::iterator prev, next;
public:
mutable boost::shared_mutex lock;
size_t const revision; // rev. No. of the underlying corpus
uint64_t const key; // phrase key
#if defined(timespec) // timespec is better, but not available everywhere
timespec tstamp; // last use
#else
timeval tstamp; // last use
#endif
TPCollWrapper(uint64_t const key, size_t const rev);
~TPCollWrapper();
};
class TPCollCache
{
typedef boost::unordered_map<uint64_t, TPCollWrapper*> cache_t;
typedef std::vector<TPCollWrapper*> history_t;
cache_t m_cache; // maps from phrase ids to target phrase collections
// mutable history_t m_history; // heap of live items, least recently used one on top
mutable boost::shared_mutex m_lock; // locks m_cache
TPCollWrapper* m_doomed_first;
TPCollWrapper* m_doomed_last;
uint32_t m_doomed_count; // counter of doomed TPCs
uint32_t m_capacity; // capacity of cache
void add_to_queue(TPCollWrapper* x);
void remove_from_queue(TPCollWrapper* x);
public:
TPCollCache(size_t capacity=10000);
TPCollWrapper*
get(uint64_t key, size_t revision);
void
release(TPCollWrapper const* tpc);
};
}

View File

@ -627,30 +627,32 @@ namespace Moses
{
InputPath &inputPath = **iter;
const Phrase &phrase = inputPath.GetPhrase();
const TargetPhraseCollection *targetPhrases
TargetPhraseCollection::shared_ptr targetPhrases
= this->GetTargetPhraseCollectionLEGACY(ttask,phrase);
inputPath.SetTargetPhrases(*this, targetPhrases, NULL);
}
}
TargetPhraseCollection const*
Mmsapt::
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
{
UTIL_THROW2("Don't call me without the translation task.");
}
// TargetPhraseCollection::shared_ptr
// Mmsapt::
// GetTargetPhraseCollectionLEGACY(const Phrase& src) const
// {
// UTIL_THROW2("Don't call me without the translation task.");
// }
// This is not the most efficient way of phrase lookup!
TargetPhraseCollection const*
TargetPhraseCollection::shared_ptr
Mmsapt::
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const
{
boost::unique_lock<boost::shared_mutex> xlock(m_lock);
SPTR<TPCollWrapper> ret;
// boost::unique_lock<boost::shared_mutex> xlock(m_lock);
// map from Moses Phrase to internal id sequence
vector<id_type> sphrase;
fillIdSeq(src, m_ifactor, *(btfix->V1), sphrase);
if (sphrase.size() == 0) return NULL;
if (sphrase.size() == 0) return ret;
// Reserve a local copy of the dynamic bitext in its current form. /btdyn/
// is set to a new copy of the dynamic bitext every time a sentence pair
// is added. /dyn/ keeps the old bitext around as long as we need it.
@ -665,42 +667,42 @@ namespace Moses
// lookup phrases in both bitexts
TSA<Token>::tree_iterator mfix(btfix->I1.get(), &sphrase[0], sphrase.size());
TSA<Token>::tree_iterator mdyn(dyn->I1.get());
if (dyn->I1.get())
if (dyn->I1.get()) // we have a dynamic bitext
for (size_t i = 0; mdyn.size() == i && i < sphrase.size(); ++i)
mdyn.extend(sphrase[i]);
if (mdyn.size() != sphrase.size() && mfix.size() != sphrase.size())
return NULL; // phrase not found in either bitext
return ret; // phrase not found in either bitext
// do we have cached results for this phrase?
uint64_t phrasekey = (mfix.size() == sphrase.size()
? (mfix.getPid()<<1) : (mdyn.getPid()<<1)+1);
// std::cerr << "Phrasekey is " << phrasekey << " at " << HERE << std::endl;
? (mfix.getPid()<<1)
: (mdyn.getPid()<<1)+1);
// get context-specific cache of items previously looked up
SPTR<ContextScope> const& scope = ttask->GetScope();
SPTR<TPCollCache> cache = scope->get<TPCollCache>(cache_key);
if (!cache) cache = m_cache;
TPCollWrapper* ret = cache->get(phrasekey, dyn->revision());
// TO DO: we should revise the revision mechanism: we take the length
// of the dynamic bitext (in sentences) at the time the PT entry
// was stored as the time stamp. For each word in the
if (!cache) cache = m_cache; // no context-specific cache, use global one
ret = cache->get(phrasekey, dyn->revision());
// TO DO: we should revise the revision mechanism: we take the
// length of the dynamic bitext (in sentences) at the time the PT
// entry was stored as the time stamp. For each word in the
// vocabulary, we also store its most recent occurrence in the
// bitext. Only if the timestamp of each word in the phrase is
// newer than the timestamp of the phrase itself we must update
// the entry.
// std::cerr << "Phrasekey is " << ret->key << " at " << HERE << std::endl;
std::cerr << ret << " with " << ret->refCount << " references at "
<< HERE << std::endl;
// std::cerr << ret << " with " << ret->refCount << " references at "
// << HERE << std::endl;
boost::upgrade_lock<boost::shared_mutex> rlock(ret->lock);
if (ret->GetSize()) return ret;
// new TPC (not found or old one was not up to date)
boost::upgrade_to_unique_lock<boost::shared_mutex> wlock(rlock);
// maybe another thread did the work while we waited for the lock ?
if (ret->GetSize()) return ret;
// check again, another thread may have done the work already
// OK: pt entry NOT found or NOT up to date
// lookup and expansion could be done in parallel threads,
@ -718,12 +720,16 @@ namespace Moses
else
{
BitextSampler<Token> s(btfix.get(), mfix, context->bias,
m_min_sample_size, m_default_sample_size, m_sampling_method);
m_min_sample_size,
m_default_sample_size,
m_sampling_method);
s();
sfix = s.stats();
}
}
if (mdyn.size() == sphrase.size()) sdyn = dyn->lookup(ttask, mdyn);
if (mdyn.size() == sphrase.size())
sdyn = dyn->lookup(ttask, mdyn);
vector<PhrasePair<Token> > ppfix,ppdyn;
PhrasePair<Token>::SortByTargetIdSeq sort_by_tgt_id;
@ -737,6 +743,7 @@ namespace Moses
expand(mdyn, *dyn, *sdyn, ppdyn, m_bias_log);
sort(ppdyn.begin(), ppdyn.end(),sort_by_tgt_id);
}
// now we have two lists of Phrase Pairs, let's merge them
PhrasePair<Token>::SortByTargetIdSeq sorter;
size_t i = 0; size_t k = 0;
@ -939,9 +946,10 @@ namespace Moses
return mdyn.size() == myphrase.size();
}
#if 0
void
Mmsapt
::Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const
::Release(ttasksptr const& ttask, TargetPhraseCollection::shared_ptr*& tpc) const
{
if (!tpc)
{
@ -957,6 +965,7 @@ namespace Moses
if (cache) cache->release(static_cast<TPCollWrapper const*>(tpc));
tpc = NULL;
}
#endif
bool Mmsapt
::ProvidesPrefixCheck() const { return true; }

View File

@ -179,7 +179,7 @@ namespace Moses
uint64_t const pid1,
sapt::pstats const& stats,
sapt::Bitext<Token> const & bt,
TargetPhraseCollection* tpcoll
TargetPhraseCollection::shared_ptr tpcoll
) const;
bool
@ -187,14 +187,14 @@ namespace Moses
(Phrase const& src,
uint64_t const pid1a, sapt::pstats * statsa, sapt::Bitext<Token> const & bta,
uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb,
TargetPhraseCollection* tpcoll) const;
TargetPhraseCollection::shared_ptr tpcoll) const;
bool
combine_pstats
(Phrase const& src,
uint64_t const pid1a, sapt::pstats* statsa, sapt::Bitext<Token> const & bta,
uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb,
TargetPhraseCollection* tpcoll) const;
TargetPhraseCollection::shared_ptr tpcoll) const;
void load_extra_data(std::string bname, bool locking);
void load_bias(std::string bname);
@ -209,15 +209,15 @@ namespace Moses
std::string const& GetName() const;
#ifndef NO_MOSES
TargetPhraseCollection const*
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const;
TargetPhraseCollection const*
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
// TargetPhraseCollection::shared_ptr
// GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
void
GetTargetPhraseCollectionBatch(ttasksptr const& ttask,
const InputPathList &inputPathQueue) const;
GetTargetPhraseCollectionBatch
(ttasksptr const& ttask, InputPathList const& inputPathQueue) const;
//! Create a sentence-specific manager for SCFG rule lookup.
ChartRuleLookupManager*
@ -234,7 +234,8 @@ namespace Moses
void setWeights(std::vector<float> const& w);
void Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const;
// void Release(ttasksptr const& ttask,
// TargetPhraseCollection const*& tpc) const;
// some consumer lets me know that *tpc isn't needed any more

View File

@ -80,7 +80,8 @@ int main(int argc, char* argv[])
Phrase& p = *phrase;
cout << p << endl;
TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(ttask,p);
TargetPhraseCollection::shared_ptr trg
= PT->GetTargetPhraseCollectionLEGACY(ttask,p);
if (!trg) continue;
vector<size_t> order(trg->GetSize());
for (size_t i = 0; i < order.size(); ++i) order[i] = i;
@ -118,7 +119,7 @@ int main(int argc, char* argv[])
}
cout << endl;
}
PT->Release(ttask, trg);
// PT->Release(ttask, trg);
}
exit(0);
}

View File

@ -410,7 +410,7 @@ CreateTranslationOptionsForRange
const DecodeStep &dstep = **d;
const PhraseDictionary &pdict = *dstep.GetPhraseDictionaryFeature();
const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(pdict);
TargetPhraseCollection::shared_ptr targetPhrases = inputPath.GetTargetPhrases(pdict);
static_cast<const Tstep&>(dstep).ProcessInitialTranslation
(m_source, *oldPtoc, sPos, ePos, adhereTableLimit, inputPath, targetPhrases);
@ -431,7 +431,7 @@ CreateTranslationOptionsForRange
TranslationOption &inputPartialTranslOpt = **pto;
if (const Tstep *tstep = dynamic_cast<const Tstep*>(dstep)) {
const PhraseDictionary &pdict = *tstep->GetPhraseDictionaryFeature();
const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(pdict);
TargetPhraseCollection::shared_ptr targetPhrases = inputPath.GetTargetPhrases(pdict);
tstep->Process(inputPartialTranslOpt, *dstep, *newPtoc,
this, adhereTableLimit, targetPhrases);
} else {

View File

@ -142,7 +142,8 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
for (size_t i = 0; i < m_inputPathQueue.size(); ++i) {
const InputPath &path = *m_inputPathQueue[i];
const TargetPhraseCollection *tpColl = path.GetTargetPhrases(phraseDictionary);
TargetPhraseCollection::shared_ptr tpColl
= path.GetTargetPhrases(phraseDictionary);
const WordsRange &range = path.GetWordsRange();
if (tpColl && tpColl->GetSize()) {