mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
Life cycle of TargetPhraseCollection is now managed via shared pointers.
This commit is contained in:
parent
7a85126a92
commit
bdb0227ee9
@ -249,16 +249,12 @@ size_t PhraseNode::ReadChild(Word &wordFound, uint64_t &childFilePos, const char
|
||||
return memRead;
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *PhraseNode::GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseNode::
|
||||
GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const
|
||||
{
|
||||
TargetPhraseCollection *ret = new TargetPhraseCollection();
|
||||
|
||||
if (m_value > 0)
|
||||
ret->ReadFromFile(tableLimit, m_value, onDiskWrapper);
|
||||
else {
|
||||
|
||||
}
|
||||
|
||||
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
|
||||
if (m_value > 0) ret->ReadFromFile(tableLimit, m_value, onDiskWrapper);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -92,8 +92,11 @@ public:
|
||||
}
|
||||
|
||||
const PhraseNode *GetChild(const Word &wordSought, OnDiskWrapper &onDiskWrapper) const;
|
||||
const TargetPhraseCollection *GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const;
|
||||
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollection(size_t tableLimit,
|
||||
OnDiskWrapper &onDiskWrapper) const;
|
||||
|
||||
void AddCounts(const std::vector<float> &counts) {
|
||||
m_counts = counts;
|
||||
}
|
||||
|
@ -114,23 +114,22 @@ void TargetPhraseCollection::Save(OnDiskWrapper &onDiskWrapper)
|
||||
|
||||
}
|
||||
|
||||
Moses::TargetPhraseCollection *TargetPhraseCollection::ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
|
||||
Moses::TargetPhraseCollection::shared_ptr TargetPhraseCollection::ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
|
||||
, const std::vector<Moses::FactorType> &outputFactors
|
||||
, const Moses::PhraseDictionary &phraseDict
|
||||
, const std::vector<float> &weightT
|
||||
, Vocab &vocab
|
||||
, bool isSyntax) const
|
||||
{
|
||||
Moses::TargetPhraseCollection *ret = new Moses::TargetPhraseCollection();
|
||||
Moses::TargetPhraseCollection::shared_ptr ret;
|
||||
ret.reset(new Moses::TargetPhraseCollection);
|
||||
|
||||
CollType::const_iterator iter;
|
||||
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) {
|
||||
const TargetPhrase &tp = **iter;
|
||||
Moses::TargetPhrase *mosesPhrase = tp.ConvertToMoses(inputFactors, outputFactors
|
||||
, vocab
|
||||
, phraseDict
|
||||
, weightT
|
||||
, isSyntax);
|
||||
Moses::TargetPhrase *mosesPhrase
|
||||
= tp.ConvertToMoses(inputFactors, outputFactors, vocab,
|
||||
phraseDict, weightT, isSyntax);
|
||||
|
||||
/*
|
||||
// debugging output
|
||||
|
@ -21,6 +21,8 @@
|
||||
|
||||
#include "TargetPhrase.h"
|
||||
#include "Vocab.h"
|
||||
#include "moses/TargetPhraseCollection.h"
|
||||
#include <boost/shared_ptr.hpp>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -50,6 +52,9 @@ protected:
|
||||
std::string m_debugStr;
|
||||
|
||||
public:
|
||||
typedef boost::shared_ptr<TargetPhraseCollection const> shared_const_ptr;
|
||||
typedef boost::shared_ptr<TargetPhraseCollection> shared_ptr;
|
||||
|
||||
static size_t s_sortScoreInd;
|
||||
|
||||
TargetPhraseCollection();
|
||||
@ -69,7 +74,7 @@ public:
|
||||
|
||||
uint64_t GetFilePos() const;
|
||||
|
||||
Moses::TargetPhraseCollection *ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
|
||||
Moses::TargetPhraseCollection::shared_ptr ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
|
||||
, const std::vector<Moses::FactorType> &outputFactors
|
||||
, const Moses::PhraseDictionary &phraseDict
|
||||
, const std::vector<float> &weightT
|
||||
|
@ -56,7 +56,7 @@ int main(int argc, char **argv)
|
||||
|
||||
if (node) {
|
||||
// source phrase points to a bunch of rules
|
||||
const TargetPhraseCollection *coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper);
|
||||
TargetPhraseCollection::shared_ptr coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper);
|
||||
string str = coll->GetDebugStr();
|
||||
cout << "Found " << coll->GetSize() << endl;
|
||||
|
||||
|
@ -116,7 +116,7 @@ typedef
|
||||
boost::unordered_set<TargetPhrase*,PhrasePtrHasher,PhrasePtrComparator> PhraseSet;
|
||||
|
||||
|
||||
const TargetPhraseCollection*
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const
|
||||
{
|
||||
|
||||
@ -125,7 +125,7 @@ PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const
|
||||
PhraseSet allPhrases;
|
||||
vector<PhraseSet> phrasesByTable(m_dictionaries.size());
|
||||
for (size_t i = 0; i < m_dictionaries.size(); ++i) {
|
||||
const TargetPhraseCollection* phrases = m_dictionaries[i]->GetTargetPhraseCollection(src);
|
||||
TargetPhraseCollection::shared_ptr phrases = m_dictionaries[i]->GetTargetPhraseCollection(src);
|
||||
if (phrases) {
|
||||
for (TargetPhraseCollection::const_iterator j = phrases->begin();
|
||||
j != phrases->end(); ++j) {
|
||||
|
@ -52,7 +52,7 @@ public:
|
||||
, const LMList &languageModels
|
||||
, float weightWP);
|
||||
|
||||
virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const;
|
||||
virtual TargetPhraseCollection::shared_ptr GetTargetPhraseCollection(const Phrase& src) const;
|
||||
virtual void InitializeForInput(ttasksptr const& ttask);
|
||||
virtual ChartRuleLookupManager *CreateRuleLookupManager(
|
||||
const InputType &,
|
||||
@ -65,7 +65,7 @@ private:
|
||||
typedef boost::shared_ptr<PhraseDictionaryTreeAdaptor> DictionaryHandle;
|
||||
std::vector<DictionaryHandle> m_dictionaries;
|
||||
std::vector<std::vector<float> > m_weights; //feature x table
|
||||
mutable TargetPhraseCollection* m_targetPhrases;
|
||||
mutable TargetPhraseCollection::shared_ptr m_targetPhrases;
|
||||
std::vector<float> m_weightT;
|
||||
size_t m_tableLimit;
|
||||
const LMList* m_languageModels;
|
||||
|
@ -44,7 +44,7 @@ ChartParserUnknown
|
||||
ChartParserUnknown::~ChartParserUnknown()
|
||||
{
|
||||
RemoveAllInColl(m_unksrcs);
|
||||
RemoveAllInColl(m_cacheTargetPhraseCollection);
|
||||
// RemoveAllInColl(m_cacheTargetPhraseCollection);
|
||||
}
|
||||
|
||||
void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range, ChartParserCallback &to)
|
||||
|
@ -27,7 +27,7 @@
|
||||
#include "WordsRange.h"
|
||||
#include "StackVec.h"
|
||||
#include "InputPath.h"
|
||||
|
||||
#include "TargetPhraseCollection.h"
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
@ -38,7 +38,7 @@ class Sentence;
|
||||
class ChartCellCollectionBase;
|
||||
class Word;
|
||||
class Phrase;
|
||||
class TargetPhraseCollection;
|
||||
// class TargetPhraseCollection;
|
||||
class DecodeGraph;
|
||||
|
||||
class ChartParserUnknown
|
||||
@ -56,7 +56,7 @@ public:
|
||||
|
||||
private:
|
||||
std::vector<Phrase*> m_unksrcs;
|
||||
std::list<TargetPhraseCollection*> m_cacheTargetPhraseCollection;
|
||||
std::list<TargetPhraseCollection::shared_ptr> m_cacheTargetPhraseCollection;
|
||||
};
|
||||
|
||||
class ChartParser
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "StackVec.h"
|
||||
|
||||
#include <list>
|
||||
#include "TargetPhraseCollection.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -23,7 +24,7 @@ public:
|
||||
|
||||
virtual bool Empty() const = 0;
|
||||
|
||||
virtual void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range) = 0;
|
||||
virtual void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const WordsRange &range) = 0;
|
||||
|
||||
virtual void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath) = 0;
|
||||
|
||||
|
@ -115,9 +115,13 @@ void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc,
|
||||
}
|
||||
}
|
||||
|
||||
void ChartTranslationOptionList::AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range)
|
||||
void
|
||||
ChartTranslationOptionList::
|
||||
AddPhraseOOV(TargetPhrase &phrase,
|
||||
std::list<TargetPhraseCollection::shared_ptr > &waste_memory,
|
||||
const WordsRange &range)
|
||||
{
|
||||
TargetPhraseCollection *tpc = new TargetPhraseCollection();
|
||||
TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
|
||||
tpc->Add(&phrase);
|
||||
waste_memory.push_back(tpc);
|
||||
StackVec empty;
|
||||
|
@ -55,7 +55,7 @@ public:
|
||||
void Add(const TargetPhraseCollection &, const StackVec &,
|
||||
const WordsRange &);
|
||||
|
||||
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range);
|
||||
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const WordsRange &range);
|
||||
|
||||
bool Empty() const {
|
||||
return m_size == 0;
|
||||
|
@ -49,7 +49,7 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, TranslationOptionCollection *toc
|
||||
, bool adhereTableLimit
|
||||
, const TargetPhraseCollection *phraseColl) const
|
||||
, TargetPhraseCollection::shared_ptr phraseColl) const
|
||||
{
|
||||
if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) {
|
||||
// word deletion
|
||||
@ -105,7 +105,7 @@ void DecodeStepTranslation::ProcessInitialTranslation(
|
||||
,PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, size_t startPos, size_t endPos, bool adhereTableLimit
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhraseCollection *phraseColl) const
|
||||
, TargetPhraseCollection::shared_ptr phraseColl) const
|
||||
{
|
||||
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
|
||||
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
||||
@ -147,7 +147,8 @@ void DecodeStepTranslation::ProcessInitialTranslationLEGACY(
|
||||
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
||||
|
||||
const WordsRange wordsRange(startPos, endPos);
|
||||
const TargetPhraseCollectionWithSourcePhrase *phraseColl = phraseDictionary->GetTargetPhraseCollectionLEGACY(source,wordsRange);
|
||||
TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl
|
||||
= phraseDictionary->GetTargetPhraseCollectionLEGACY(source,wordsRange);
|
||||
|
||||
if (phraseColl != NULL) {
|
||||
IFVERBOSE(3) {
|
||||
@ -237,8 +238,8 @@ ProcessLEGACY(TranslationOption const& in,
|
||||
size_t const currSize = inPhrase.GetSize();
|
||||
size_t const tableLimit = pdict->GetTableLimit();
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase const* phraseColl;
|
||||
phraseColl = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
|
||||
TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl
|
||||
= pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
|
||||
|
||||
if (phraseColl != NULL) {
|
||||
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
|
||||
|
@ -48,7 +48,7 @@ public:
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, TranslationOptionCollection *toc
|
||||
, bool adhereTableLimit
|
||||
, const TargetPhraseCollection *phraseColl) const;
|
||||
, TargetPhraseCollection::shared_ptr phraseColl) const;
|
||||
|
||||
|
||||
/*! initialize list of partial translation options by applying the first translation step
|
||||
@ -58,7 +58,7 @@ public:
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, size_t startPos, size_t endPos, bool adhereTableLimit
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhraseCollection *phraseColl) const;
|
||||
, TargetPhraseCollection::shared_ptr phraseColl) const;
|
||||
|
||||
// legacy
|
||||
void ProcessInitialTranslationLEGACY(const InputType &source
|
||||
|
@ -83,7 +83,7 @@ public:
|
||||
|
||||
void Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &ignored);
|
||||
|
||||
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range);
|
||||
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const WordsRange &range);
|
||||
|
||||
float GetBestScore(const ChartCellLabel *chartCell) const;
|
||||
|
||||
@ -160,7 +160,7 @@ template <class Model> void Fill<Model>::Add(const TargetPhraseCollection &targe
|
||||
}
|
||||
}
|
||||
|
||||
template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &, const WordsRange &range)
|
||||
template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &, const WordsRange &range)
|
||||
{
|
||||
std::vector<lm::WordIndex> words;
|
||||
UTIL_THROW_IF2(phrase.GetSize() > 1,
|
||||
|
@ -39,34 +39,40 @@ InputPath::~InputPath()
|
||||
|
||||
// std::cerr << "Deconstructing InputPath" << std::endl;
|
||||
|
||||
// Since there is no way for the Phrase Dictionaries to tell in
|
||||
// which (sentence) context phrases were looked up, we tell them
|
||||
// now that the phrase isn't needed any more by this inputPath
|
||||
typedef std::pair<const TargetPhraseCollection*, const void* > entry;
|
||||
std::map<const PhraseDictionary*, entry>::iterator iter;
|
||||
ttasksptr theTask = this->ttask.lock();
|
||||
for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter)
|
||||
{
|
||||
// std::cerr << iter->second.first << " decommissioned." << std::endl;
|
||||
iter->first->Release(theTask, iter->second.first);
|
||||
}
|
||||
|
||||
// // NOT NEEDED ANY MORE SINCE THE SWITCH TO SHARED POINTERS
|
||||
// // Since there is no way for the Phrase Dictionaries to tell in
|
||||
// // which (sentence) context phrases were looked up, we tell them
|
||||
// // now that the phrase isn't needed any more by this inputPath
|
||||
// typedef std::pair<boost::shared_ptr<TargetPhraseCollection>, const void* > entry;
|
||||
// std::map<const PhraseDictionary*, entry>::iterator iter;
|
||||
// ttasksptr theTask = this->ttask.lock();
|
||||
// for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter)
|
||||
// {
|
||||
// // std::cerr << iter->second.first << " decommissioned." << std::endl;
|
||||
// iter->first->Release(theTask, iter->second.first);
|
||||
// }
|
||||
|
||||
delete m_inputScore;
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *InputPath::GetTargetPhrases(const PhraseDictionary &phraseDictionary) const
|
||||
TargetPhraseCollection::shared_ptr
|
||||
InputPath::
|
||||
GetTargetPhrases(const PhraseDictionary &phraseDictionary) const
|
||||
{
|
||||
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
|
||||
TargetPhrases::const_iterator iter;
|
||||
iter = m_targetPhrases.find(&phraseDictionary);
|
||||
if (iter == m_targetPhrases.end()) {
|
||||
return NULL;
|
||||
return TargetPhraseCollection::shared_ptr();
|
||||
}
|
||||
return iter->second.first;
|
||||
}
|
||||
|
||||
const void *InputPath::GetPtNode(const PhraseDictionary &phraseDictionary) const
|
||||
const void*
|
||||
InputPath::
|
||||
GetPtNode(const PhraseDictionary &phraseDictionary) const
|
||||
{
|
||||
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
|
||||
TargetPhrases::const_iterator iter;
|
||||
iter = m_targetPhrases.find(&phraseDictionary);
|
||||
if (iter == m_targetPhrases.end()) {
|
||||
return NULL;
|
||||
@ -74,11 +80,14 @@ const void *InputPath::GetPtNode(const PhraseDictionary &phraseDictionary) const
|
||||
return iter->second.second;
|
||||
}
|
||||
|
||||
void InputPath::SetTargetPhrases(const PhraseDictionary &phraseDictionary
|
||||
, const TargetPhraseCollection *targetPhrases
|
||||
, const void *ptNode)
|
||||
void
|
||||
InputPath::
|
||||
SetTargetPhrases(const PhraseDictionary &phraseDictionary,
|
||||
TargetPhraseCollection::shared_ptr const& targetPhrases,
|
||||
const void *ptNode)
|
||||
{
|
||||
std::pair<const TargetPhraseCollection*, const void*> value(targetPhrases, ptNode);
|
||||
std::pair<TargetPhraseCollection::shared_ptr, const void*>
|
||||
value(targetPhrases, ptNode);
|
||||
m_targetPhrases[&phraseDictionary] = value;
|
||||
}
|
||||
|
||||
@ -93,10 +102,10 @@ const Word &InputPath::GetLastWord() const
|
||||
size_t InputPath::GetTotalRuleSize() const
|
||||
{
|
||||
size_t ret = 0;
|
||||
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
|
||||
TargetPhrases::const_iterator iter;
|
||||
for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter) {
|
||||
// const PhraseDictionary *pt = iter->first;
|
||||
const TargetPhraseCollection *tpColl = iter->second.first;
|
||||
TargetPhraseCollection::shared_ptr tpColl = iter->second.first;
|
||||
|
||||
if (tpColl) {
|
||||
ret += tpColl->GetSize();
|
||||
@ -110,10 +119,10 @@ std::ostream& operator<<(std::ostream& out, const InputPath& obj)
|
||||
{
|
||||
out << &obj << " " << obj.GetWordsRange() << " " << obj.GetPrevPath() << " " << obj.GetPhrase();
|
||||
|
||||
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
|
||||
InputPath::TargetPhrases::const_iterator iter;
|
||||
for (iter = obj.m_targetPhrases.begin(); iter != obj.m_targetPhrases.end(); ++iter) {
|
||||
const PhraseDictionary *pt = iter->first;
|
||||
const TargetPhraseCollection *tpColl = iter->second.first;
|
||||
boost::shared_ptr<TargetPhraseCollection const> tpColl = iter->second.first;
|
||||
|
||||
out << pt << "=";
|
||||
if (tpColl) {
|
||||
|
@ -8,12 +8,12 @@
|
||||
#include "WordsRange.h"
|
||||
#include "NonTerminal.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include "TargetPhraseCollection.h"
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class PhraseDictionary;
|
||||
class TargetPhraseCollection;
|
||||
class ScoreComponentCollection;
|
||||
class TargetPhrase;
|
||||
class InputPath;
|
||||
@ -32,7 +32,12 @@ class InputPath
|
||||
friend std::ostream& operator<<(std::ostream& out, const InputPath &obj);
|
||||
|
||||
public:
|
||||
typedef std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> > TargetPhrases;
|
||||
|
||||
typedef std::pair<TargetPhraseCollection::shared_ptr, const void*>
|
||||
TPCollStoreEntry;
|
||||
|
||||
typedef std::map<const PhraseDictionary*, TPCollStoreEntry>
|
||||
TargetPhrases;
|
||||
|
||||
public:
|
||||
ttaskwptr const ttask;
|
||||
@ -96,10 +101,14 @@ public:
|
||||
m_nextNode = nextNode;
|
||||
}
|
||||
|
||||
void SetTargetPhrases(const PhraseDictionary &phraseDictionary
|
||||
, const TargetPhraseCollection *targetPhrases
|
||||
, const void *ptNode);
|
||||
const TargetPhraseCollection *GetTargetPhrases(const PhraseDictionary &phraseDictionary) const;
|
||||
void
|
||||
SetTargetPhrases(const PhraseDictionary &phraseDictionary,
|
||||
TargetPhraseCollection::shared_ptr const& targetPhrases,
|
||||
const void *ptNode);
|
||||
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhrases(const PhraseDictionary &phraseDictionary) const;
|
||||
|
||||
const TargetPhrases &GetTargetPhrases() const {
|
||||
return m_targetPhrases;
|
||||
}
|
||||
|
@ -63,27 +63,29 @@ void PDTAimp::CleanUp()
|
||||
{
|
||||
assert(m_dict);
|
||||
m_dict->FreeMemory();
|
||||
for(size_t i=0; i<m_tgtColls.size(); ++i) delete m_tgtColls[i];
|
||||
// for(size_t i=0; i<m_tgtColls.size(); ++i) m_tgtColls[i].reset();
|
||||
m_tgtColls.clear();
|
||||
m_cache.clear();
|
||||
m_rangeCache.clear();
|
||||
uniqSrcPhr.clear();
|
||||
}
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase const*
|
||||
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||
PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
|
||||
{
|
||||
|
||||
assert(m_dict);
|
||||
if(src.GetSize()==0) return 0;
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase::shared_ptr ret;
|
||||
if(src.GetSize()==0) return ret;
|
||||
|
||||
std::pair<MapSrc2Tgt::iterator,bool> piter;
|
||||
if(useCache) {
|
||||
piter=m_cache.insert(std::make_pair(src,static_cast<TargetPhraseCollectionWithSourcePhrase const*>(0)));
|
||||
piter=m_cache.insert(std::make_pair(src, ret));
|
||||
if(!piter.second) return piter.first->second;
|
||||
} else if (m_cache.size()) {
|
||||
MapSrc2Tgt::const_iterator i=m_cache.find(src);
|
||||
return (i!=m_cache.end() ? i->second : 0);
|
||||
return (i!=m_cache.end() ? i->second : ret);
|
||||
}
|
||||
|
||||
std::vector<std::string> srcString(src.GetSize());
|
||||
@ -97,7 +99,7 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
|
||||
std::vector<std::string> wacands;
|
||||
m_dict->GetTargetCandidates(srcString,cands,wacands);
|
||||
if(cands.empty()) {
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
//TODO: Multiple models broken here
|
||||
@ -140,16 +142,14 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
|
||||
sourcePhrases.push_back(src);
|
||||
}
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase *rv;
|
||||
rv=PruneTargetCandidates(tCands,costs, sourcePhrases);
|
||||
if(rv->IsEmpty()) {
|
||||
delete rv;
|
||||
return 0;
|
||||
ret = PruneTargetCandidates(tCands,costs, sourcePhrases);
|
||||
if(ret->IsEmpty()) {
|
||||
ret.reset();
|
||||
} else {
|
||||
if(useCache) piter.first->second=rv;
|
||||
m_tgtColls.push_back(rv);
|
||||
return rv;
|
||||
if(useCache) piter.first->second = ret;
|
||||
m_tgtColls.push_back(ret);
|
||||
}
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
@ -352,7 +352,8 @@ void PDTAimp::CacheSource(ConfusionNet const& src)
|
||||
pathExplored[len]+=exploredPaths[len];
|
||||
|
||||
|
||||
m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
|
||||
// m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
|
||||
m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize()));
|
||||
|
||||
for(std::map<Range,E2Costs>::const_iterator i=cov2cand.begin(); i!=cov2cand.end(); ++i) {
|
||||
assert(i->first.first<m_rangeCache.size());
|
||||
@ -386,10 +387,11 @@ void PDTAimp::CacheSource(ConfusionNet const& src)
|
||||
//std::cerr << i->first.first << "-" << i->first.second << ": " << targetPhrase << std::endl;
|
||||
}
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase *rv=PruneTargetCandidates(tCands, costs, sourcePhrases);
|
||||
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||
rv = PruneTargetCandidates(tCands, costs, sourcePhrases);
|
||||
|
||||
if(rv->IsEmpty())
|
||||
delete rv;
|
||||
rv.reset();
|
||||
else {
|
||||
m_rangeCache[i->first.first][i->first.second-1]=rv;
|
||||
m_tgtColls.push_back(rv);
|
||||
@ -428,7 +430,8 @@ void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
|
||||
targetPhrase.EvaluateInIsolation(*srcPtr, m_obj->GetFeaturesToApply());
|
||||
}
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates
|
||||
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||
PDTAimp::PruneTargetCandidates
|
||||
(const std::vector<TargetPhrase> & tCands,
|
||||
std::vector<std::pair<float,size_t> >& costs,
|
||||
const std::vector<Phrase> &sourcePhrases) const
|
||||
@ -437,7 +440,8 @@ TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates
|
||||
UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(),
|
||||
"Number of target phrases must equal number of source phrases");
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase *rv=new TargetPhraseCollectionWithSourcePhrase;
|
||||
TargetPhraseCollectionWithSourcePhrase::shared_ptr rv;
|
||||
rv.reset(new TargetPhraseCollectionWithSourcePhrase);
|
||||
|
||||
|
||||
// set limit to tableLimit or actual size, whatever is smaller
|
||||
|
@ -44,10 +44,10 @@ public:
|
||||
std::vector<FactorType> m_input,m_output;
|
||||
PhraseDictionaryTree *m_dict;
|
||||
const InputFeature *m_inputFeature;
|
||||
typedef std::vector<TargetPhraseCollectionWithSourcePhrase const*> vTPC;
|
||||
typedef std::vector<TargetPhraseCollectionWithSourcePhrase::shared_ptr> vTPC;
|
||||
mutable vTPC m_tgtColls;
|
||||
|
||||
typedef std::map<Phrase,TargetPhraseCollectionWithSourcePhrase const*> MapSrc2Tgt;
|
||||
typedef std::map<Phrase,TargetPhraseCollectionWithSourcePhrase::shared_ptr> MapSrc2Tgt;
|
||||
mutable MapSrc2Tgt m_cache;
|
||||
PhraseDictionaryTreeAdaptor *m_obj;
|
||||
int useCache;
|
||||
@ -69,7 +69,7 @@ public:
|
||||
|
||||
void CleanUp();
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase const*
|
||||
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||
GetTargetPhraseCollection(Phrase const &src) const;
|
||||
|
||||
void Create(const std::vector<FactorType> &input
|
||||
@ -121,7 +121,7 @@ public:
|
||||
const std::string *alignmentString,
|
||||
Phrase const* srcPtr=0) const;
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase* PruneTargetCandidates
|
||||
TargetPhraseCollectionWithSourcePhrase::shared_ptr PruneTargetCandidates
|
||||
(const std::vector<TargetPhrase> & tCands,
|
||||
std::vector<std::pair<float,size_t> >& costs,
|
||||
const std::vector<Phrase> &sourcePhrases) const;
|
||||
|
@ -28,9 +28,9 @@ void GlueRuleSynthesizer::SynthesizeRule(const Forest::Hyperedge &e)
|
||||
HyperPath source;
|
||||
SynthesizeHyperPath(e, source);
|
||||
TargetPhrase *tp = SynthesizeTargetPhrase(e);
|
||||
TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(m_hyperTree,
|
||||
source);
|
||||
tpc.Add(tp);
|
||||
TargetPhraseCollection::shared_ptr tpc
|
||||
= GetOrCreateTargetPhraseCollection(m_hyperTree, source);
|
||||
tpc->Add(tp);
|
||||
}
|
||||
|
||||
void GlueRuleSynthesizer::SynthesizeHyperPath(const Forest::Hyperedge &e,
|
||||
|
@ -14,7 +14,7 @@ void HyperTree::Node::Prune(std::size_t tableLimit)
|
||||
p->second.Prune(tableLimit);
|
||||
}
|
||||
// Prune TargetPhraseCollection at this node.
|
||||
m_targetPhraseCollection.Prune(true, tableLimit);
|
||||
m_targetPhraseCollection->Prune(true, tableLimit);
|
||||
}
|
||||
|
||||
void HyperTree::Node::Sort(std::size_t tableLimit)
|
||||
@ -24,7 +24,7 @@ void HyperTree::Node::Sort(std::size_t tableLimit)
|
||||
p->second.Sort(tableLimit);
|
||||
}
|
||||
// Sort TargetPhraseCollection at this node.
|
||||
m_targetPhraseCollection.Sort(true, tableLimit);
|
||||
m_targetPhraseCollection->Sort(true, tableLimit);
|
||||
}
|
||||
|
||||
HyperTree::Node *HyperTree::Node::GetOrCreateChild(
|
||||
@ -40,7 +40,7 @@ const HyperTree::Node *HyperTree::Node::GetChild(
|
||||
return (p == m_map.end()) ? NULL : &p->second;
|
||||
}
|
||||
|
||||
TargetPhraseCollection &HyperTree::GetOrCreateTargetPhraseCollection(
|
||||
TargetPhraseCollection::shared_ptr HyperTree::GetOrCreateTargetPhraseCollection(
|
||||
const HyperPath &hyperPath)
|
||||
{
|
||||
Node &node = GetOrCreateNode(hyperPath);
|
||||
|
@ -37,7 +37,7 @@ public:
|
||||
}
|
||||
|
||||
bool HasRules() const {
|
||||
return !m_targetPhraseCollection.IsEmpty();
|
||||
return !m_targetPhraseCollection->IsEmpty();
|
||||
}
|
||||
|
||||
void Prune(std::size_t tableLimit);
|
||||
@ -47,11 +47,13 @@ public:
|
||||
|
||||
const Node *GetChild(const HyperPath::NodeSeq &) const;
|
||||
|
||||
const TargetPhraseCollection &GetTargetPhraseCollection() const {
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollection() const {
|
||||
return m_targetPhraseCollection;
|
||||
}
|
||||
|
||||
TargetPhraseCollection &GetTargetPhraseCollection() {
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollection() {
|
||||
return m_targetPhraseCollection;
|
||||
}
|
||||
|
||||
@ -59,12 +61,14 @@ public:
|
||||
return m_map;
|
||||
}
|
||||
|
||||
Node() : m_targetPhraseCollection(new TargetPhraseCollection) { }
|
||||
|
||||
private:
|
||||
Map m_map;
|
||||
TargetPhraseCollection m_targetPhraseCollection;
|
||||
TargetPhraseCollection::shared_ptr m_targetPhraseCollection;
|
||||
};
|
||||
|
||||
HyperTree(const RuleTableFF *ff) : RuleTable(ff) {}
|
||||
HyperTree(const RuleTableFF *ff) : RuleTable(ff) { }
|
||||
|
||||
const Node &GetRootNode() const {
|
||||
return m_root;
|
||||
@ -73,7 +77,8 @@ public:
|
||||
private:
|
||||
friend class HyperTreeCreator;
|
||||
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const HyperPath &);
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetOrCreateTargetPhraseCollection(const HyperPath &);
|
||||
|
||||
Node &GetOrCreateNode(const HyperPath &);
|
||||
|
||||
|
@ -21,7 +21,7 @@ protected:
|
||||
|
||||
// Provide access to HyperTree's private GetOrCreateTargetPhraseCollection
|
||||
// function.
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(
|
||||
HyperTree &trie, const HyperPath &fragment) {
|
||||
return trie.GetOrCreateTargetPhraseCollection(fragment);
|
||||
}
|
||||
|
@ -130,9 +130,9 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
|
||||
ff.GetFeaturesToApply());
|
||||
|
||||
// Add rule to trie.
|
||||
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
|
||||
trie, sourceFragment);
|
||||
phraseColl.Add(targetPhrase);
|
||||
TargetPhraseCollection::shared_ptr phraseColl
|
||||
= GetOrCreateTargetPhraseCollection(trie, sourceFragment);
|
||||
phraseColl->Add(targetPhrase);
|
||||
|
||||
count++;
|
||||
}
|
||||
|
@ -51,8 +51,8 @@ void RuleMatcherHyperTree<Callback>::EnumerateHyperedges(
|
||||
m_hyperedge.label.inputWeight += (*p)->weight;
|
||||
}
|
||||
// Set the output hyperedge label's translation set pointer.
|
||||
m_hyperedge.label.translations =
|
||||
&(item.trieNode->GetTargetPhraseCollection());
|
||||
m_hyperedge.label.translations
|
||||
= item.trieNode->GetTargetPhraseCollection();
|
||||
// Pass the output hyperedge to the callback.
|
||||
callback(m_hyperedge);
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ namespace Syntax
|
||||
|
||||
struct PLabel {
|
||||
float inputWeight;
|
||||
const TargetPhraseCollection *translations;
|
||||
TargetPhraseCollection::shared_ptr translations;
|
||||
};
|
||||
|
||||
} // Syntax
|
||||
|
@ -32,9 +32,10 @@ boost::shared_ptr<RuleTrie> OovHandler<RuleTrie>::SynthesizeRuleTrie(
|
||||
// TODO Check ownership and fix any leaks.
|
||||
Word *tgtLHS = SynthesizeTargetLhs(targetLhsStr);
|
||||
TargetPhrase *tp = SynthesizeTargetPhrase(oov, *srcPhrase, *tgtLHS, prob);
|
||||
TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(
|
||||
*trie, *srcPhrase, *tp, NULL); // TODO Check NULL is valid argument
|
||||
tpc.Add(tp);
|
||||
TargetPhraseCollection::shared_ptr tpc;
|
||||
tpc= GetOrCreateTargetPhraseCollection(*trie, *srcPhrase, *tp, NULL);
|
||||
// TODO Check NULL is valid argument
|
||||
tpc->Add(tp);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -132,9 +132,9 @@ void RecursiveCYKPlusParser<Callback>::AddAndExtend(
|
||||
m_hyperedge.tail.push_back(const_cast<PVertex *>(&vertex));
|
||||
|
||||
// Add target phrase collection (except if rule is empty or unary).
|
||||
const TargetPhraseCollection &tpc = node.GetTargetPhraseCollection();
|
||||
if (!tpc.IsEmpty() && !IsNonLexicalUnary(m_hyperedge)) {
|
||||
m_hyperedge.label.translations = &tpc;
|
||||
TargetPhraseCollection::shared_ptr tpc = node.GetTargetPhraseCollection();
|
||||
if (!tpc->IsEmpty() && !IsNonLexicalUnary(m_hyperedge)) {
|
||||
m_hyperedge.label.translations = tpc;
|
||||
(*m_callback)(m_hyperedge, end);
|
||||
}
|
||||
|
||||
|
@ -38,8 +38,8 @@ Scope3Parser<Callback>::~Scope3Parser()
|
||||
}
|
||||
|
||||
template<typename Callback>
|
||||
void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
|
||||
Callback &callback)
|
||||
void Scope3Parser<Callback>::
|
||||
EnumerateHyperedges(const WordsRange &range, Callback &callback)
|
||||
{
|
||||
const std::size_t start = range.GetStartPos();
|
||||
const std::size_t end = range.GetEndPos();
|
||||
@ -64,8 +64,7 @@ void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
|
||||
|
||||
// Ask the grammar for the mapping from label sequences to target phrase
|
||||
// collections for this pattern.
|
||||
const RuleTrie::Node::LabelMap &labelMap =
|
||||
patNode->m_node->GetLabelMap();
|
||||
const RuleTrie::Node::LabelMap &labelMap = patNode->m_node->GetLabelMap();
|
||||
|
||||
// For each label sequence, search the lattice for the set of PHyperedge
|
||||
// tails.
|
||||
@ -73,7 +72,7 @@ void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
|
||||
RuleTrie::Node::LabelMap::const_iterator q = labelMap.begin();
|
||||
for (; q != labelMap.end(); ++q) {
|
||||
const std::vector<int> &labelSeq = q->first;
|
||||
const TargetPhraseCollection &tpc = q->second;
|
||||
TargetPhraseCollection::shared_ptr tpc = q->second;
|
||||
// For many label sequences there won't be any corresponding paths through
|
||||
// the lattice. As an optimisation, we use m_quickCheckTable to test
|
||||
// for this and we don't begin a search if there are no paths to find.
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include "moses/Syntax/PHyperedge.h"
|
||||
|
||||
#include "TailLattice.h"
|
||||
|
||||
#include "moses/TargetPhraseCollection.h"
|
||||
namespace Moses
|
||||
{
|
||||
namespace Syntax
|
||||
@ -25,13 +25,14 @@ public:
|
||||
, m_key(key)
|
||||
, m_ranges(ranges) {}
|
||||
|
||||
void Search(const std::vector<int> &labels, const TargetPhraseCollection &tpc,
|
||||
void Search(const std::vector<int> &labels,
|
||||
const TargetPhraseCollection::shared_ptr tpc,
|
||||
Callback &callback) {
|
||||
m_labels = &labels;
|
||||
m_matchCB = &callback;
|
||||
m_hyperedge.head = 0;
|
||||
m_hyperedge.tail.clear();
|
||||
m_hyperedge.label.translations = &tpc;
|
||||
m_hyperedge.label.translations = tpc;
|
||||
SearchInner(0, 0, 0);
|
||||
}
|
||||
|
||||
|
@ -28,9 +28,10 @@ public:
|
||||
private:
|
||||
friend class RuleTrieCreator;
|
||||
|
||||
virtual TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
const Phrase &source, const TargetPhrase &target,
|
||||
const Word *sourceLHS) = 0;
|
||||
virtual TargetPhraseCollection::shared_ptr
|
||||
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||
const TargetPhrase &target,
|
||||
const Word *sourceLHS) = 0;
|
||||
|
||||
virtual void SortAndPrune(std::size_t) = 0;
|
||||
};
|
||||
|
@ -33,7 +33,7 @@ void RuleTrieCYKPlus::Node::Prune(std::size_t tableLimit)
|
||||
}
|
||||
|
||||
// prune TargetPhraseCollection in this node
|
||||
m_targetPhraseCollection.Prune(true, tableLimit);
|
||||
m_targetPhraseCollection->Prune(true, tableLimit);
|
||||
}
|
||||
|
||||
void RuleTrieCYKPlus::Node::Sort(std::size_t tableLimit)
|
||||
@ -49,7 +49,7 @@ void RuleTrieCYKPlus::Node::Sort(std::size_t tableLimit)
|
||||
}
|
||||
|
||||
// prune TargetPhraseCollection in this node
|
||||
m_targetPhraseCollection.Sort(true, tableLimit);
|
||||
m_targetPhraseCollection->Sort(true, tableLimit);
|
||||
}
|
||||
|
||||
RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetOrCreateChild(
|
||||
@ -86,8 +86,11 @@ const RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetNonTerminalChild(
|
||||
return (p == m_nonTermMap.end()) ? NULL : &p->second;
|
||||
}
|
||||
|
||||
TargetPhraseCollection &RuleTrieCYKPlus::GetOrCreateTargetPhraseCollection(
|
||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
|
||||
TargetPhraseCollection::shared_ptr
|
||||
RuleTrieCYKPlus::
|
||||
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||
const TargetPhrase &target,
|
||||
const Word *sourceLHS)
|
||||
{
|
||||
Node &currNode = GetOrCreateNode(source, target, sourceLHS);
|
||||
return currNode.GetTargetPhraseCollection();
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
}
|
||||
|
||||
bool HasRules() const {
|
||||
return !m_targetPhraseCollection.IsEmpty();
|
||||
return !m_targetPhraseCollection->IsEmpty();
|
||||
}
|
||||
|
||||
void Prune(std::size_t tableLimit);
|
||||
@ -50,11 +50,13 @@ public:
|
||||
const Node *GetChild(const Word &sourceTerm) const;
|
||||
const Node *GetNonTerminalChild(const Word &targetNonTerm) const;
|
||||
|
||||
const TargetPhraseCollection &GetTargetPhraseCollection() const {
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollection() const {
|
||||
return m_targetPhraseCollection;
|
||||
}
|
||||
|
||||
TargetPhraseCollection &GetTargetPhraseCollection() {
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollection() {
|
||||
return m_targetPhraseCollection;
|
||||
}
|
||||
|
||||
@ -66,10 +68,12 @@ public:
|
||||
return m_nonTermMap;
|
||||
}
|
||||
|
||||
Node() : m_targetPhraseCollection(new TargetPhraseCollection) {}
|
||||
|
||||
private:
|
||||
SymbolMap m_sourceTermMap;
|
||||
SymbolMap m_nonTermMap;
|
||||
TargetPhraseCollection m_targetPhraseCollection;
|
||||
TargetPhraseCollection::shared_ptr m_targetPhraseCollection;
|
||||
};
|
||||
|
||||
RuleTrieCYKPlus(const RuleTableFF *ff) : RuleTrie(ff) {}
|
||||
@ -81,8 +85,9 @@ public:
|
||||
bool HasPreterminalRule(const Word &) const;
|
||||
|
||||
private:
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetOrCreateTargetPhraseCollection
|
||||
(const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
|
||||
|
||||
Node &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
|
||||
const Word *sourceLHS);
|
||||
|
@ -21,8 +21,9 @@ protected:
|
||||
|
||||
// Provide access to RuleTrie's private GetOrCreateTargetPhraseCollection
|
||||
// function.
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
RuleTrie &trie, const Phrase &source, const TargetPhrase &target,
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetOrCreateTargetPhraseCollection
|
||||
( RuleTrie &trie, const Phrase &source, const TargetPhrase &target,
|
||||
const Word *sourceLHS) {
|
||||
return trie.GetOrCreateTargetPhraseCollection(source, target, sourceLHS);
|
||||
}
|
||||
|
@ -125,9 +125,10 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
|
||||
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
|
||||
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
|
||||
|
||||
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
|
||||
trie, sourcePhrase, *targetPhrase, sourceLHS);
|
||||
phraseColl.Add(targetPhrase);
|
||||
TargetPhraseCollection::shared_ptr phraseColl
|
||||
= GetOrCreateTargetPhraseCollection(trie, sourcePhrase,
|
||||
*targetPhrase, sourceLHS);
|
||||
phraseColl->Add(targetPhrase);
|
||||
|
||||
// not implemented correctly in memory pt. just delete it for now
|
||||
delete sourceLHS;
|
||||
|
@ -33,7 +33,7 @@ void RuleTrieScope3::Node::Prune(std::size_t tableLimit)
|
||||
|
||||
// Prune TargetPhraseCollections at this node.
|
||||
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
|
||||
p->second.Prune(true, tableLimit);
|
||||
p->second->Prune(true, tableLimit);
|
||||
}
|
||||
}
|
||||
|
||||
@ -50,7 +50,7 @@ void RuleTrieScope3::Node::Sort(std::size_t tableLimit)
|
||||
|
||||
// Sort TargetPhraseCollections at this node.
|
||||
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
|
||||
p->second.Sort(true, tableLimit);
|
||||
p->second->Sort(true, tableLimit);
|
||||
}
|
||||
}
|
||||
|
||||
@ -75,9 +75,10 @@ RuleTrieScope3::Node *RuleTrieScope3::Node::GetOrCreateNonTerminalChild(
|
||||
return m_gapNode;
|
||||
}
|
||||
|
||||
TargetPhraseCollection &
|
||||
RuleTrieScope3::Node::GetOrCreateTargetPhraseCollection(
|
||||
const TargetPhrase &target)
|
||||
TargetPhraseCollection::shared_ptr
|
||||
RuleTrieScope3::
|
||||
Node::
|
||||
GetOrCreateTargetPhraseCollection(const TargetPhrase &target)
|
||||
{
|
||||
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
|
||||
const std::size_t rank = alignmentInfo.GetSize();
|
||||
@ -94,12 +95,16 @@ RuleTrieScope3::Node::GetOrCreateTargetPhraseCollection(
|
||||
const Word &targetNonTerm = target.GetWord(targetNonTermIndex);
|
||||
vec.push_back(InsertLabel(i++, targetNonTerm));
|
||||
}
|
||||
|
||||
return m_labelMap[vec];
|
||||
TargetPhraseCollection::shared_ptr& ret = m_labelMap[vec];
|
||||
if (!ret) ret.reset(new TargetPhraseCollection);
|
||||
return ret;
|
||||
}
|
||||
|
||||
TargetPhraseCollection &RuleTrieScope3::GetOrCreateTargetPhraseCollection(
|
||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
|
||||
TargetPhraseCollection::shared_ptr
|
||||
RuleTrieScope3::
|
||||
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||
const TargetPhrase &target,
|
||||
const Word *sourceLHS)
|
||||
{
|
||||
Node &currNode = GetOrCreateNode(source, target, sourceLHS);
|
||||
return currNode.GetOrCreateTargetPhraseCollection(target);
|
||||
|
@ -35,7 +35,7 @@ public:
|
||||
SymbolEqualityPred> TerminalMap;
|
||||
|
||||
typedef boost::unordered_map<std::vector<int>,
|
||||
TargetPhraseCollection> LabelMap;
|
||||
TargetPhraseCollection::shared_ptr> LabelMap;
|
||||
|
||||
~Node() {
|
||||
delete m_gapNode;
|
||||
@ -61,8 +61,8 @@ public:
|
||||
|
||||
Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
|
||||
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
const TargetPhrase &);
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetOrCreateTargetPhraseCollection(const TargetPhrase &);
|
||||
|
||||
bool IsLeaf() const {
|
||||
return m_terminalMap.empty() && m_gapNode == NULL;
|
||||
@ -106,8 +106,10 @@ public:
|
||||
bool HasPreterminalRule(const Word &) const;
|
||||
|
||||
private:
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||
const TargetPhrase &target,
|
||||
const Word *sourceLHS);
|
||||
|
||||
Node &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
|
||||
const Word *sourceLHS);
|
||||
|
@ -17,7 +17,7 @@ struct PVertex;
|
||||
struct SHyperedgeBundle {
|
||||
float inputWeight;
|
||||
std::vector<const SVertexStack*> stacks;
|
||||
const TargetPhraseCollection *translations;
|
||||
TargetPhraseCollection::shared_ptr translations;
|
||||
|
||||
friend void swap(SHyperedgeBundle &x, SHyperedgeBundle &y) {
|
||||
using std::swap;
|
||||
|
@ -17,9 +17,9 @@ void GlueRuleSynthesizer::SynthesizeRule(const InputTree::Node &node)
|
||||
const Word &sourceLhs = node.pvertex.symbol;
|
||||
boost::scoped_ptr<Phrase> sourceRhs(SynthesizeSourcePhrase(node));
|
||||
TargetPhrase *tp = SynthesizeTargetPhrase(node, *sourceRhs);
|
||||
TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(
|
||||
m_ruleTrie, sourceLhs, *sourceRhs);
|
||||
tpc.Add(tp);
|
||||
TargetPhraseCollection::shared_ptr tpc
|
||||
= GetOrCreateTargetPhraseCollection(m_ruleTrie, sourceLhs, *sourceRhs);
|
||||
tpc->Add(tp);
|
||||
}
|
||||
|
||||
Phrase *GlueRuleSynthesizer::SynthesizeSourcePhrase(const InputTree::Node &node)
|
||||
|
@ -48,11 +48,11 @@ public:
|
||||
|
||||
const Node *GetChild(const HyperPath::NodeSeq &) const;
|
||||
|
||||
const TargetPhraseCollection &GetTargetPhraseCollection() const
|
||||
const TargetPhraseCollection::shared_ptr GetTargetPhraseCollection() const
|
||||
return m_targetPhraseCollection;
|
||||
}
|
||||
|
||||
TargetPhraseCollection &GetTargetPhraseCollection()
|
||||
TargetPhraseCollection::shared_ptr GetTargetPhraseCollection()
|
||||
return m_targetPhraseCollection;
|
||||
}
|
||||
|
||||
@ -76,7 +76,7 @@ const Node &GetRootNode() const
|
||||
private:
|
||||
friend class RuleTrieCreator;
|
||||
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(
|
||||
const Word &sourceLHS, const Phrase &sourceRHS);
|
||||
|
||||
Node &GetOrCreateNode(const Phrase &sourceRHS);
|
||||
|
@ -61,7 +61,7 @@ void RuleMatcherSCFG<Callback>::Match(const InputTree::Node &inNode,
|
||||
if (candidate.pvertex.span.GetEndPos() == inNode.pvertex.span.GetEndPos()) {
|
||||
// Check if the trie node has any rules with a LHS that match inNode.
|
||||
const Word &lhs = inNode.pvertex.symbol;
|
||||
const TargetPhraseCollection *tpc =
|
||||
TargetPhraseCollection::shared_ptr tpc =
|
||||
newTrieNode.GetTargetPhraseCollection(lhs);
|
||||
if (tpc) {
|
||||
m_hyperedge.label.translations = tpc;
|
||||
|
@ -35,7 +35,7 @@ void RuleTrie::Node::Prune(std::size_t tableLimit)
|
||||
// Prune TargetPhraseCollections at this node.
|
||||
for (TPCMap::iterator p = m_targetPhraseCollections.begin();
|
||||
p != m_targetPhraseCollections.end(); ++p) {
|
||||
p->second.Prune(true, tableLimit);
|
||||
p->second->Prune(true, tableLimit);
|
||||
}
|
||||
}
|
||||
|
||||
@ -54,17 +54,21 @@ void RuleTrie::Node::Sort(std::size_t tableLimit)
|
||||
// Sort TargetPhraseCollections at this node.
|
||||
for (TPCMap::iterator p = m_targetPhraseCollections.begin();
|
||||
p != m_targetPhraseCollections.end(); ++p) {
|
||||
p->second.Sort(true, tableLimit);
|
||||
p->second->Sort(true, tableLimit);
|
||||
}
|
||||
}
|
||||
|
||||
RuleTrie::Node *RuleTrie::Node::GetOrCreateChild(
|
||||
const Word &sourceTerm)
|
||||
RuleTrie::Node*
|
||||
RuleTrie::Node::
|
||||
GetOrCreateChild(const Word &sourceTerm)
|
||||
{
|
||||
return &m_sourceTermMap[sourceTerm];
|
||||
}
|
||||
|
||||
RuleTrie::Node *RuleTrie::Node::GetOrCreateNonTerminalChild(const Word &targetNonTerm)
|
||||
RuleTrie::Node *
|
||||
RuleTrie::
|
||||
Node::
|
||||
GetOrCreateNonTerminalChild(const Word &targetNonTerm)
|
||||
{
|
||||
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
|
||||
"Not a non-terminal: " << targetNonTerm);
|
||||
@ -72,42 +76,52 @@ RuleTrie::Node *RuleTrie::Node::GetOrCreateNonTerminalChild(const Word &targetNo
|
||||
return &m_nonTermMap[targetNonTerm];
|
||||
}
|
||||
|
||||
TargetPhraseCollection &RuleTrie::Node::GetOrCreateTargetPhraseCollection(
|
||||
const Word &sourceLHS)
|
||||
TargetPhraseCollection::shared_ptr
|
||||
RuleTrie::
|
||||
Node::
|
||||
GetOrCreateTargetPhraseCollection(const Word &sourceLHS)
|
||||
{
|
||||
UTIL_THROW_IF2(!sourceLHS.IsNonTerminal(),
|
||||
"Not a non-terminal: " << sourceLHS);
|
||||
return m_targetPhraseCollections[sourceLHS];
|
||||
TargetPhraseCollection::shared_ptr& foo
|
||||
= m_targetPhraseCollections[sourceLHS];
|
||||
if (!foo) foo.reset(new TargetPhraseCollection);
|
||||
return foo;
|
||||
}
|
||||
|
||||
const RuleTrie::Node *RuleTrie::Node::GetChild(
|
||||
const Word &sourceTerm) const
|
||||
RuleTrie::Node const*
|
||||
RuleTrie::
|
||||
Node::
|
||||
GetChild(const Word &sourceTerm) const
|
||||
{
|
||||
UTIL_THROW_IF2(sourceTerm.IsNonTerminal(),
|
||||
"Not a terminal: " << sourceTerm);
|
||||
|
||||
UTIL_THROW_IF2(sourceTerm.IsNonTerminal(), "Not a terminal: " << sourceTerm);
|
||||
SymbolMap::const_iterator p = m_sourceTermMap.find(sourceTerm);
|
||||
return (p == m_sourceTermMap.end()) ? NULL : &p->second;
|
||||
}
|
||||
|
||||
const RuleTrie::Node *RuleTrie::Node::GetNonTerminalChild(
|
||||
const Word &targetNonTerm) const
|
||||
RuleTrie::Node const*
|
||||
RuleTrie::
|
||||
Node::
|
||||
GetNonTerminalChild(const Word &targetNonTerm) const
|
||||
{
|
||||
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
|
||||
"Not a non-terminal: " << targetNonTerm);
|
||||
|
||||
SymbolMap::const_iterator p = m_nonTermMap.find(targetNonTerm);
|
||||
return (p == m_nonTermMap.end()) ? NULL : &p->second;
|
||||
}
|
||||
|
||||
TargetPhraseCollection &RuleTrie::GetOrCreateTargetPhraseCollection(
|
||||
const Word &sourceLHS, const Phrase &sourceRHS)
|
||||
TargetPhraseCollection::shared_ptr
|
||||
RuleTrie::
|
||||
GetOrCreateTargetPhraseCollection
|
||||
( const Word &sourceLHS, const Phrase &sourceRHS )
|
||||
{
|
||||
Node &currNode = GetOrCreateNode(sourceRHS);
|
||||
return currNode.GetOrCreateTargetPhraseCollection(sourceLHS);
|
||||
}
|
||||
|
||||
RuleTrie::Node &RuleTrie::GetOrCreateNode(const Phrase &sourceRHS)
|
||||
RuleTrie::Node &
|
||||
RuleTrie::
|
||||
GetOrCreateNode(const Phrase &sourceRHS)
|
||||
{
|
||||
const std::size_t size = sourceRHS.GetSize();
|
||||
|
||||
|
@ -32,7 +32,7 @@ public:
|
||||
typedef boost::unordered_map<Word, Node, SymbolHasher,
|
||||
SymbolEqualityPred> SymbolMap;
|
||||
|
||||
typedef boost::unordered_map<Word, TargetPhraseCollection,
|
||||
typedef boost::unordered_map<Word, TargetPhraseCollection::shared_ptr,
|
||||
SymbolHasher, SymbolEqualityPred> TPCMap;
|
||||
|
||||
bool IsLeaf() const {
|
||||
@ -48,15 +48,18 @@ public:
|
||||
|
||||
Node *GetOrCreateChild(const Word &sourceTerm);
|
||||
Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const Word &);
|
||||
TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(const Word &);
|
||||
|
||||
const Node *GetChild(const Word &sourceTerm) const;
|
||||
const Node *GetNonTerminalChild(const Word &targetNonTerm) const;
|
||||
|
||||
const TargetPhraseCollection *GetTargetPhraseCollection(
|
||||
const Word &sourceLHS) const {
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollection(const Word &sourceLHS) const {
|
||||
TPCMap::const_iterator p = m_targetPhraseCollections.find(sourceLHS);
|
||||
return p == m_targetPhraseCollections.end() ? 0 : &(p->second);
|
||||
if (p != m_targetPhraseCollections.end())
|
||||
return p->second;
|
||||
else
|
||||
return TargetPhraseCollection::shared_ptr();
|
||||
}
|
||||
|
||||
// FIXME IS there any reason to distinguish these two for T2S?
|
||||
@ -83,8 +86,9 @@ public:
|
||||
private:
|
||||
friend class RuleTrieCreator;
|
||||
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
const Word &sourceLHS, const Phrase &sourceRHS);
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetOrCreateTargetPhraseCollection
|
||||
(const Word &sourceLHS, const Phrase &sourceRHS);
|
||||
|
||||
Node &GetOrCreateNode(const Phrase &sourceRHS);
|
||||
|
||||
|
@ -21,7 +21,7 @@ protected:
|
||||
|
||||
// Provide access to RuleTrie's private
|
||||
// GetOrCreateTargetPhraseCollection function.
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(
|
||||
RuleTrie &trie, const Word &sourceLHS, const Phrase &sourceRHS) {
|
||||
return trie.GetOrCreateTargetPhraseCollection(sourceLHS, sourceRHS);
|
||||
}
|
||||
|
@ -55,7 +55,9 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
|
||||
std::vector<float> scoreVector;
|
||||
StringPiece line;
|
||||
|
||||
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
|
||||
int noflags = double_conversion::StringToDoubleConverter::NO_FLAGS;
|
||||
double_conversion::StringToDoubleConverter
|
||||
converter(noflags, NAN, NAN, "inf", "nan");
|
||||
|
||||
while(true) {
|
||||
try {
|
||||
@ -132,9 +134,9 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
|
||||
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
|
||||
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
|
||||
|
||||
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
|
||||
trie, *sourceLHS, sourcePhrase);
|
||||
phraseColl.Add(targetPhrase);
|
||||
TargetPhraseCollection::shared_ptr phraseColl
|
||||
= GetOrCreateTargetPhraseCollection(trie, *sourceLHS, sourcePhrase);
|
||||
phraseColl->Add(targetPhrase);
|
||||
|
||||
// not implemented correctly in memory pt. just delete it for now
|
||||
delete sourceLHS;
|
||||
|
@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include <iostream>
|
||||
#include "TargetPhrase.h"
|
||||
#include "Util.h"
|
||||
#include <boost/shared_ptr.hpp>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -43,6 +44,8 @@ public:
|
||||
// iters
|
||||
typedef CollType::iterator iterator;
|
||||
typedef CollType::const_iterator const_iterator;
|
||||
typedef boost::shared_ptr<TargetPhraseCollection> shared_ptr;
|
||||
typedef boost::shared_ptr<TargetPhraseCollection const> shared_const_ptr;
|
||||
|
||||
TargetPhrase const*
|
||||
operator[](size_t const i) const {
|
||||
@ -127,6 +130,9 @@ protected:
|
||||
std::vector<Phrase> m_sourcePhrases;
|
||||
|
||||
public:
|
||||
typedef boost::shared_ptr<TargetPhraseCollectionWithSourcePhrase> shared_ptr;
|
||||
typedef boost::shared_ptr<TargetPhraseCollectionWithSourcePhrase const> shared_const_ptr;
|
||||
|
||||
const std::vector<Phrase> &GetSourcePhrases() const {
|
||||
return m_sourcePhrases;
|
||||
}
|
||||
|
@ -167,10 +167,10 @@ void ChartRuleLookupManagerMemory::AddAndExtend(
|
||||
size_t endPos)
|
||||
{
|
||||
|
||||
const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
|
||||
TargetPhraseCollection::shared_ptr tpc = node->GetTargetPhraseCollection();
|
||||
// add target phrase collection (except if rule is empty or a unary non-terminal rule)
|
||||
if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
|
||||
m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
|
||||
if (!tpc->IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
|
||||
m_completedRules[endPos].Add(*tpc, m_stackVec, m_stackScores, *m_outColl);
|
||||
}
|
||||
|
||||
// get all further extensions of rule (until reaching end of sentence or max-chart-span)
|
||||
|
@ -167,10 +167,11 @@ void ChartRuleLookupManagerMemoryPerSentence::AddAndExtend(
|
||||
size_t endPos)
|
||||
{
|
||||
|
||||
const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
|
||||
TargetPhraseCollection::shared_ptr tpc
|
||||
= node->GetTargetPhraseCollection();
|
||||
// add target phrase collection (except if rule is empty or a unary non-terminal rule)
|
||||
if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
|
||||
m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
|
||||
if (!tpc->IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
|
||||
m_completedRules[endPos].Add(*tpc, m_stackVec, m_stackScores, *m_outColl);
|
||||
}
|
||||
|
||||
// get all further extensions of rule (until reaching end of sentence or max-chart-span)
|
||||
|
@ -64,11 +64,12 @@ ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk(
|
||||
|
||||
ChartRuleLookupManagerOnDisk::~ChartRuleLookupManagerOnDisk()
|
||||
{
|
||||
std::map<uint64_t, const TargetPhraseCollection*>::const_iterator iterCache;
|
||||
for (iterCache = m_cache.begin(); iterCache != m_cache.end(); ++iterCache) {
|
||||
delete iterCache->second;
|
||||
}
|
||||
m_cache.clear();
|
||||
// not needed any more due to the switch to shared pointers
|
||||
// std::map<uint64_t, TargetPhraseCollection::shared_ptr >::const_iterator iterCache;
|
||||
// for (iterCache = m_cache.begin(); iterCache != m_cache.end(); ++iterCache) {
|
||||
// iterCache->second.reset();
|
||||
// }
|
||||
// m_cache.clear();
|
||||
|
||||
RemoveAllInColl(m_expandableDottedRuleListVec);
|
||||
RemoveAllInColl(m_sourcePhraseNode);
|
||||
@ -236,14 +237,16 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
|
||||
if (sourceLHSBerkeleyDb == NULL)
|
||||
continue;
|
||||
|
||||
const TargetPhraseCollection *targetPhraseCollection = NULL;
|
||||
const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper);
|
||||
TargetPhraseCollection::shared_ptr targetPhraseCollection;
|
||||
const OnDiskPt::PhraseNode *node
|
||||
= prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper);
|
||||
if (node) {
|
||||
uint64_t tpCollFilePos = node->GetValue();
|
||||
std::map<uint64_t, const TargetPhraseCollection*>::const_iterator iterCache = m_cache.find(tpCollFilePos);
|
||||
std::map<uint64_t, TargetPhraseCollection::shared_ptr >::const_iterator iterCache = m_cache.find(tpCollFilePos);
|
||||
if (iterCache == m_cache.end()) {
|
||||
|
||||
const OnDiskPt::TargetPhraseCollection *tpcollBerkeleyDb = node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper);
|
||||
OnDiskPt::TargetPhraseCollection::shared_ptr tpcollBerkeleyDb
|
||||
= node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper);
|
||||
|
||||
std::vector<float> weightT = staticData.GetWeights(&m_dictionary);
|
||||
targetPhraseCollection
|
||||
@ -254,7 +257,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
|
||||
,m_dbWrapper.GetVocab()
|
||||
,true);
|
||||
|
||||
delete tpcollBerkeleyDb;
|
||||
tpcollBerkeleyDb.reset();
|
||||
m_cache[tpCollFilePos] = targetPhraseCollection;
|
||||
} else {
|
||||
// just get out of cache
|
||||
|
@ -55,7 +55,7 @@ private:
|
||||
const std::vector<FactorType> &m_inputFactorsVec;
|
||||
const std::vector<FactorType> &m_outputFactorsVec;
|
||||
std::vector<DottedRuleStackOnDisk*> m_expandableDottedRuleListVec;
|
||||
std::map<uint64_t, const TargetPhraseCollection*> m_cache;
|
||||
std::map<uint64_t, TargetPhraseCollection::shared_ptr > m_cache;
|
||||
std::list<const OnDiskPt::PhraseNode*> m_sourcePhraseNode;
|
||||
};
|
||||
|
||||
|
@ -48,7 +48,7 @@ ChartRuleLookupManagerSkeleton::ChartRuleLookupManagerSkeleton(
|
||||
|
||||
ChartRuleLookupManagerSkeleton::~ChartRuleLookupManagerSkeleton()
|
||||
{
|
||||
RemoveAllInColl(m_tpColl);
|
||||
// RemoveAllInColl(m_tpColl);
|
||||
}
|
||||
|
||||
void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
|
||||
@ -58,7 +58,7 @@ void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
|
||||
{
|
||||
//m_tpColl.push_back(TargetPhraseCollection());
|
||||
//TargetPhraseCollection &tpColl = m_tpColl.back();
|
||||
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
|
||||
TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection);
|
||||
m_tpColl.push_back(tpColl);
|
||||
|
||||
const WordsRange &range = inputPath.GetWordsRange();
|
||||
@ -73,7 +73,9 @@ void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
|
||||
outColl.Add(*tpColl, m_stackVec, range);
|
||||
}
|
||||
|
||||
TargetPhrase *ChartRuleLookupManagerSkeleton::CreateTargetPhrase(const Word &sourceWord) const
|
||||
TargetPhrase *
|
||||
ChartRuleLookupManagerSkeleton::
|
||||
CreateTargetPhrase(const Word &sourceWord) const
|
||||
{
|
||||
// create a target phrase from the 1st word of the source, prefix with 'ChartManagerSkeleton:'
|
||||
string str = sourceWord.GetFactor(0)->GetString().as_string();
|
||||
|
@ -49,7 +49,7 @@ private:
|
||||
TargetPhrase *CreateTargetPhrase(const Word &sourceWord) const;
|
||||
|
||||
StackVec m_stackVec;
|
||||
std::vector<TargetPhraseCollection*> m_tpColl;
|
||||
std::vector<TargetPhraseCollection::shared_ptr > m_tpColl;
|
||||
const SkeletonPT &m_skeletonPT;
|
||||
};
|
||||
|
||||
|
@ -119,4 +119,4 @@ private:
|
||||
|
||||
} // namespace Moses
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@ -107,14 +107,15 @@ void PhraseDictionaryCompact::Load()
|
||||
// }
|
||||
// };
|
||||
|
||||
const TargetPhraseCollection*
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &sourcePhrase) const
|
||||
{
|
||||
|
||||
TargetPhraseCollection::shared_ptr ret;
|
||||
// There is no souch source phrase if source phrase is longer than longest
|
||||
// observed source phrase during compilation
|
||||
if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
|
||||
return NULL;
|
||||
return ret;
|
||||
|
||||
// Retrieve target phrase collection from phrase table
|
||||
TargetPhraseVectorPtr decodedPhraseColl
|
||||
@ -122,7 +123,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &s
|
||||
|
||||
if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
|
||||
TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
|
||||
TargetPhraseCollection* phraseColl = new TargetPhraseCollection();
|
||||
TargetPhraseCollection::shared_ptr phraseColl(new TargetPhraseCollection);
|
||||
|
||||
// Score phrases and if possible apply ttable_limit
|
||||
TargetPhraseVector::iterator nth =
|
||||
@ -139,7 +140,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &s
|
||||
|
||||
return phraseColl;
|
||||
} else
|
||||
return NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
TargetPhraseVectorPtr
|
||||
@ -163,7 +164,7 @@ PhraseDictionaryCompact::~PhraseDictionaryCompact()
|
||||
|
||||
//TO_STRING_BODY(PhraseDictionaryCompact)
|
||||
|
||||
void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection* tpc)
|
||||
void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
|
||||
{
|
||||
if(!m_sentenceCache.get())
|
||||
m_sentenceCache.reset(new PhraseCache());
|
||||
@ -179,12 +180,13 @@ void PhraseDictionaryCompact::CleanUpAfterSentenceProcessing(const InputType &so
|
||||
m_sentenceCache.reset(new PhraseCache());
|
||||
|
||||
m_phraseDecoder->PruneCache();
|
||||
for(PhraseCache::iterator it = m_sentenceCache->begin();
|
||||
it != m_sentenceCache->end(); it++)
|
||||
delete *it;
|
||||
// for(PhraseCache::iterator it = m_sentenceCache->begin();
|
||||
// it != m_sentenceCache->end(); it++)
|
||||
// it->reset();
|
||||
|
||||
PhraseCache temp;
|
||||
temp.swap(*m_sentenceCache);
|
||||
// PhraseCache temp;
|
||||
// temp.swap(*m_sentenceCache);
|
||||
m_sentenceCache->clear();
|
||||
|
||||
ReduceCache();
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ protected:
|
||||
bool m_inMemory;
|
||||
bool m_useAlignmentInfo;
|
||||
|
||||
typedef std::vector<TargetPhraseCollection*> PhraseCache;
|
||||
typedef std::vector<TargetPhraseCollection::shared_ptr > PhraseCache;
|
||||
typedef boost::thread_specific_ptr<PhraseCache> SentenceCache;
|
||||
static SentenceCache m_sentenceCache;
|
||||
|
||||
@ -69,12 +69,12 @@ public:
|
||||
|
||||
void Load();
|
||||
|
||||
const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const;
|
||||
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const;
|
||||
TargetPhraseVectorPtr GetTargetPhraseCollectionRaw(const Phrase &source) const;
|
||||
|
||||
void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase);
|
||||
|
||||
void CacheForCleanup(TargetPhraseCollection* tpc);
|
||||
void CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
|
||||
void CleanUpAfterSentenceProcessing(const InputType &source);
|
||||
|
||||
virtual ChartRuleLookupManager *CreateRuleLookupManager(
|
||||
|
@ -35,14 +35,15 @@ namespace Moses
|
||||
{
|
||||
std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl;
|
||||
|
||||
CacheColl::~CacheColl()
|
||||
{
|
||||
for (iterator iter = begin(); iter != end(); ++iter) {
|
||||
std::pair<const TargetPhraseCollection*, clock_t> &key = iter->second;
|
||||
const TargetPhraseCollection *tps = key.first;
|
||||
delete tps;
|
||||
}
|
||||
}
|
||||
// CacheColl::~CacheColl()
|
||||
// {
|
||||
// // not needed any more since the switch to shared pointers
|
||||
// // for (iterator iter = begin(); iter != end(); ++iter) {
|
||||
// // std::pair<TargetPhraseCollection::shared_ptr , clock_t> &key = iter->second;
|
||||
// // TargetPhraseCollection::shared_ptr tps = key.first;
|
||||
// // delete tps;
|
||||
// // }
|
||||
// }
|
||||
|
||||
PhraseDictionary::PhraseDictionary(const std::string &line, bool registerNow)
|
||||
: DecodeFeature(line, registerNow)
|
||||
@ -60,9 +61,12 @@ ProvidesPrefixCheck() const
|
||||
return false;
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionary::
|
||||
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
{
|
||||
const TargetPhraseCollection *ret;
|
||||
TargetPhraseCollection::shared_ptr ret;
|
||||
typedef std::pair<TargetPhraseCollection::shared_ptr , clock_t> entry;
|
||||
if (m_maxCacheSize) {
|
||||
CacheColl &cache = GetCache();
|
||||
|
||||
@ -74,18 +78,14 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(
|
||||
if (iter == cache.end()) {
|
||||
// not in cache, need to look up from phrase table
|
||||
ret = GetTargetPhraseCollectionNonCacheLEGACY(src);
|
||||
if (ret) {
|
||||
ret = new TargetPhraseCollection(*ret);
|
||||
if (ret) { // make a copy
|
||||
ret.reset(new TargetPhraseCollection(*ret));
|
||||
}
|
||||
|
||||
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
|
||||
cache[hash] = value;
|
||||
cache[hash] = entry(ret, clock());
|
||||
} else {
|
||||
// in cache. just use it
|
||||
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
|
||||
value.second = clock();
|
||||
|
||||
ret = value.first;
|
||||
iter->second.second = clock();
|
||||
ret = iter->second.first;
|
||||
}
|
||||
} else {
|
||||
// don't use cache. look up from phrase table
|
||||
@ -95,7 +95,7 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(
|
||||
return ret;
|
||||
}
|
||||
|
||||
TargetPhraseCollection const *
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionary::
|
||||
GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const
|
||||
{
|
||||
@ -103,7 +103,7 @@ GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const
|
||||
}
|
||||
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase const*
|
||||
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||
PhraseDictionary::
|
||||
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const
|
||||
{
|
||||
@ -140,14 +140,14 @@ SetFeaturesToApply()
|
||||
}
|
||||
|
||||
|
||||
// tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more
|
||||
void
|
||||
PhraseDictionary::
|
||||
Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const
|
||||
{
|
||||
// do nothing by default
|
||||
return;
|
||||
}
|
||||
// // tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more
|
||||
// void
|
||||
// PhraseDictionary::
|
||||
// Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const
|
||||
// {
|
||||
// // do nothing by default
|
||||
// return;
|
||||
// }
|
||||
|
||||
bool
|
||||
PhraseDictionary::
|
||||
@ -170,7 +170,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
||||
}
|
||||
|
||||
const Phrase &phrase = inputPath.GetPhrase();
|
||||
const TargetPhraseCollection *targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase);
|
||||
TargetPhraseCollection::shared_ptr targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase);
|
||||
inputPath.SetTargetPhrases(*this, targetPhrases, NULL);
|
||||
}
|
||||
}
|
||||
@ -180,7 +180,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
||||
//void PhraseDictionary::SaveCache() const
|
||||
//{
|
||||
// CacheColl &cache = GetCache();
|
||||
// for( std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter,
|
||||
// for( std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iter,
|
||||
// iter != cache.end(),
|
||||
// iter++ ) {
|
||||
//
|
||||
@ -191,10 +191,10 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
||||
//void PhraseDictionary::LoadCache() const
|
||||
//{
|
||||
// CacheColl &cache = GetCache();
|
||||
// std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter;
|
||||
// std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iter;
|
||||
// iter = cache.begin();
|
||||
// while( iter != cache.end() ) {
|
||||
// std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iterRemove = iter++;
|
||||
// std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iterRemove = iter++;
|
||||
// delete iterRemove->second.first;
|
||||
// cache.erase(iterRemove);
|
||||
// }
|
||||
@ -225,11 +225,12 @@ void PhraseDictionary::ReduceCache() const
|
||||
while( iter != cache.end() ) {
|
||||
if (iter->second.second < cutoffLastUsedTime) {
|
||||
CacheColl::iterator iterRemove = iter++;
|
||||
delete iterRemove->second.first;
|
||||
// delete iterRemove->second.first;
|
||||
cache.erase(iterRemove);
|
||||
} else iter++;
|
||||
}
|
||||
VERBOSE(2,"Reduced persistent translation option cache in " << reduceCacheTime << " seconds." << std::endl);
|
||||
VERBOSE(2,"Reduced persistent translation option cache in "
|
||||
<< reduceCacheTime << " seconds." << std::endl);
|
||||
}
|
||||
|
||||
CacheColl &PhraseDictionary::GetCache() const
|
||||
@ -265,8 +266,8 @@ bool PhraseDictionary::SatisfyBackoff(const InputPath &inputPath) const
|
||||
// lookup translation only if no other translations
|
||||
InputPath::TargetPhrases::const_iterator iter;
|
||||
for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) {
|
||||
const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second;
|
||||
const TargetPhraseCollection *tpCollPrev = temp.first;
|
||||
const std::pair<TargetPhraseCollection::shared_ptr , const void*> &temp = iter->second;
|
||||
TargetPhraseCollection::shared_ptr tpCollPrev = temp.first;
|
||||
|
||||
if (tpCollPrev && tpCollPrev->GetSize()) {
|
||||
// already have translation from another pt. Don't create translations
|
||||
|
@ -55,15 +55,18 @@ class ChartCellCollectionBase;
|
||||
class ChartRuleLookupManager;
|
||||
class ChartParser;
|
||||
|
||||
class CacheColl : public boost::unordered_map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >
|
||||
{
|
||||
// 1st = hash of source phrase/ address of phrase-table node
|
||||
// 2nd = all translations
|
||||
// 3rd = time of last access
|
||||
// typedef std::pair<TargetPhraseCollection::shared_ptr, clock_t> TPCollLastUse;
|
||||
typedef std::pair<TargetPhraseCollection::shared_ptr, clock_t> CacheCollEntry;
|
||||
typedef boost::unordered_map<size_t, CacheCollEntry> CacheColl;
|
||||
// class CacheColl : public boost::unordered_map<size_t, TPCollLastUse>
|
||||
// {
|
||||
// // 1st = hash of source phrase/ address of phrase-table node
|
||||
// // 2nd = all translations
|
||||
// // 3rd = time of last access
|
||||
|
||||
public:
|
||||
~CacheColl();
|
||||
};
|
||||
// public:
|
||||
// ~CacheColl();
|
||||
// };
|
||||
|
||||
/**
|
||||
* Abstract base class for phrase dictionaries (tables).
|
||||
@ -95,9 +98,9 @@ public:
|
||||
return m_id;
|
||||
}
|
||||
|
||||
virtual
|
||||
void
|
||||
Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const;
|
||||
// virtual
|
||||
// void
|
||||
// Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const;
|
||||
|
||||
/// return true if phrase table entries starting with /phrase/
|
||||
// exist in the table.
|
||||
@ -111,24 +114,23 @@ public:
|
||||
//! find list of translations that can translates src. Only for phrase input
|
||||
|
||||
public:
|
||||
virtual
|
||||
TargetPhraseCollection const *
|
||||
virtual TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
|
||||
virtual
|
||||
TargetPhraseCollection const *
|
||||
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const {
|
||||
virtual TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask,
|
||||
Phrase const& src) const
|
||||
{
|
||||
return GetTargetPhraseCollectionLEGACY(src);
|
||||
}
|
||||
|
||||
virtual
|
||||
void
|
||||
virtual void
|
||||
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
||||
|
||||
virtual
|
||||
void
|
||||
GetTargetPhraseCollectionBatch(ttasksptr const& ttask,
|
||||
const InputPathList &inputPathQueue) const {
|
||||
virtual void
|
||||
GetTargetPhraseCollectionBatch
|
||||
(ttasksptr const& ttask, InputPathList const& inputPathQueue) const
|
||||
{
|
||||
GetTargetPhraseCollectionBatch(inputPathQueue);
|
||||
}
|
||||
|
||||
@ -157,7 +159,9 @@ public:
|
||||
|
||||
// LEGACY
|
||||
//! find list of translations that can translates a portion of src. Used by confusion network decoding
|
||||
virtual const TargetPhraseCollectionWithSourcePhrase* GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const;
|
||||
virtual
|
||||
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const;
|
||||
|
||||
protected:
|
||||
static std::vector<PhraseDictionary*> s_staticColl;
|
||||
@ -184,7 +188,10 @@ protected:
|
||||
mutable boost::scoped_ptr<CacheColl> m_cache;
|
||||
#endif
|
||||
|
||||
virtual const TargetPhraseCollection *GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const;
|
||||
virtual
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const;
|
||||
|
||||
void ReduceCache() const;
|
||||
|
||||
protected:
|
||||
|
@ -150,15 +150,15 @@ void PhraseDictionaryDynamicCacheBased::InitializeForInput(ttasksptr const& ttas
|
||||
ReduceCache();
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const
|
||||
TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const
|
||||
{
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
|
||||
#endif
|
||||
TargetPhraseCollection* tpc = NULL;
|
||||
TargetPhraseCollection::shared_ptr tpc;
|
||||
cacheMap::const_iterator it = m_cacheTM.find(source);
|
||||
if(it != m_cacheTM.end()) {
|
||||
tpc = new TargetPhraseCollection(*(it->second).first);
|
||||
tpc.reset(new TargetPhraseCollection(*(it->second).first));
|
||||
|
||||
std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
|
||||
|
||||
@ -174,15 +174,15 @@ const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhrase
|
||||
return tpc;
|
||||
}
|
||||
|
||||
const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionLEGACY(Phrase const &src) const
|
||||
TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionLEGACY(Phrase const &src) const
|
||||
{
|
||||
const TargetPhraseCollection *ret = GetTargetPhraseCollection(src);
|
||||
TargetPhraseCollection::shared_ptr ret = GetTargetPhraseCollection(src);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
|
||||
TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
|
||||
{
|
||||
const TargetPhraseCollection *ret = GetTargetPhraseCollection(src);
|
||||
TargetPhraseCollection::shared_ptr ret = GetTargetPhraseCollection(src);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -366,7 +366,7 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)
|
||||
// and then add new entry
|
||||
|
||||
TargetCollectionAgePair TgtCollAgePair = it->second;
|
||||
TargetPhraseCollection* tpc = TgtCollAgePair.first;
|
||||
TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
|
||||
AgeCollection* ac = TgtCollAgePair.second;
|
||||
const Phrase* p_ptr = NULL;
|
||||
TargetPhrase* tp_ptr = NULL;
|
||||
@ -397,7 +397,7 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)
|
||||
if (tpc->GetSize() == 0) {
|
||||
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
|
||||
ac->clear();
|
||||
delete tpc;
|
||||
tpc.reset();
|
||||
delete ac;
|
||||
m_cacheTM.erase(sp);
|
||||
}
|
||||
@ -451,14 +451,14 @@ void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp)
|
||||
//sp is found
|
||||
|
||||
TargetCollectionAgePair TgtCollAgePair = it->second;
|
||||
TargetPhraseCollection* tpc = TgtCollAgePair.first;
|
||||
TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
|
||||
AgeCollection* ac = TgtCollAgePair.second;
|
||||
|
||||
m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache
|
||||
|
||||
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
|
||||
ac->clear();
|
||||
delete tpc;
|
||||
tpc.reset();
|
||||
delete ac;
|
||||
m_cacheTM.erase(sp);
|
||||
} else {
|
||||
@ -558,7 +558,7 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a
|
||||
// and then add new entry
|
||||
|
||||
TargetCollectionAgePair TgtCollAgePair = it->second;
|
||||
TargetPhraseCollection* tpc = TgtCollAgePair.first;
|
||||
TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
|
||||
AgeCollection* ac = TgtCollAgePair.second;
|
||||
// const TargetPhrase* p_ptr = NULL;
|
||||
const Phrase* p_ptr = NULL;
|
||||
@ -599,7 +599,7 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a
|
||||
// create target collection
|
||||
// we have to create new target collection age pair and add new entry to target collection age pair
|
||||
|
||||
TargetPhraseCollection* tpc = new TargetPhraseCollection();
|
||||
TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
|
||||
AgeCollection* ac = new AgeCollection();
|
||||
m_cacheTM.insert(make_pair(sp,make_pair(tpc,ac)));
|
||||
|
||||
@ -629,13 +629,13 @@ void PhraseDictionaryDynamicCacheBased::Decay()
|
||||
void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp)
|
||||
{
|
||||
VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) sp:|" << sp << "|" << std::endl);
|
||||
cacheMap::const_iterator it = m_cacheTM.find(sp);
|
||||
cacheMap::iterator it = m_cacheTM.find(sp);
|
||||
if (it != m_cacheTM.end()) {
|
||||
VERBOSE(3,"found:|" << sp << "|" << std::endl);
|
||||
//sp is found
|
||||
|
||||
TargetCollectionAgePair TgtCollAgePair = it->second;
|
||||
TargetPhraseCollection* tpc = TgtCollAgePair.first;
|
||||
TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
|
||||
AgeCollection* ac = TgtCollAgePair.second;
|
||||
|
||||
//loop in inverted order to allow a correct deletion of std::vectors tpc and ac
|
||||
@ -661,7 +661,7 @@ void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp)
|
||||
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
|
||||
(((*it).second).second)->clear();
|
||||
delete ((*it).second).second;
|
||||
delete ((*it).second).first;
|
||||
((*it).second).first.reset();
|
||||
m_cacheTM.erase(sp);
|
||||
}
|
||||
} else {
|
||||
@ -703,11 +703,11 @@ void PhraseDictionaryDynamicCacheBased::Clear()
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
|
||||
#endif
|
||||
cacheMap::const_iterator it;
|
||||
cacheMap::iterator it;
|
||||
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
|
||||
(((*it).second).second)->clear();
|
||||
delete ((*it).second).second;
|
||||
delete ((*it).second).first;
|
||||
((*it).second).first.reset();
|
||||
}
|
||||
m_cacheTM.clear();
|
||||
m_entries = 0;
|
||||
@ -746,7 +746,7 @@ void PhraseDictionaryDynamicCacheBased::Print() const
|
||||
cacheMap::const_iterator it;
|
||||
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
|
||||
std::string source = (it->first).ToString();
|
||||
TargetPhraseCollection* tpc = (it->second).first;
|
||||
TargetPhraseCollection::shared_ptr tpc = (it->second).first;
|
||||
TargetPhraseCollection::iterator itr;
|
||||
for(itr = tpc->begin(); itr != tpc->end(); itr++) {
|
||||
std::string target = (*itr)->ToString();
|
||||
|
@ -53,7 +53,7 @@ class PhraseDictionaryDynamicCacheBased : public PhraseDictionary
|
||||
{
|
||||
|
||||
typedef std::vector<unsigned int> AgeCollection;
|
||||
typedef std::pair<TargetPhraseCollection*, AgeCollection*> TargetCollectionAgePair;
|
||||
typedef std::pair<TargetPhraseCollection::shared_ptr , AgeCollection*> TargetCollectionAgePair;
|
||||
typedef std::map<Phrase, TargetCollectionAgePair> cacheMap;
|
||||
|
||||
// data structure for the cache
|
||||
@ -111,9 +111,14 @@ public:
|
||||
void Load();
|
||||
void Load(const std::string files);
|
||||
|
||||
const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &src) const;
|
||||
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(Phrase const &src) const;
|
||||
const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollection(const Phrase &src) const;
|
||||
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollectionLEGACY(Phrase const &src) const;
|
||||
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
|
||||
|
||||
// for phrase-based model
|
||||
// void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
||||
|
@ -86,29 +86,32 @@ void PhraseDictionaryGroup::GetTargetPhraseCollectionBatch(
|
||||
// Look up each input in each model
|
||||
BOOST_FOREACH(InputPath* inputPath, inputPathQueue) {
|
||||
const Phrase &phrase = inputPath->GetPhrase();
|
||||
const TargetPhraseCollection* targetPhrases =
|
||||
TargetPhraseCollection::shared_ptr targetPhrases =
|
||||
this->GetTargetPhraseCollectionLEGACY(ttask, phrase);
|
||||
inputPath->SetTargetPhrases(*this, targetPhrases, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
const TargetPhraseCollection* PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY(
|
||||
TargetPhraseCollection::shared_ptr PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY(
|
||||
const Phrase& src) const
|
||||
{
|
||||
UTIL_THROW2("Don't call me without the translation task.");
|
||||
}
|
||||
|
||||
const TargetPhraseCollection* PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY(
|
||||
const ttasksptr& ttask, const Phrase& src) const
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryGroup::
|
||||
GetTargetPhraseCollectionLEGACY(const ttasksptr& ttask, const Phrase& src) const
|
||||
{
|
||||
TargetPhraseCollection* ret = CreateTargetPhraseCollection(ttask, src);
|
||||
TargetPhraseCollection::shared_ptr ret
|
||||
= CreateTargetPhraseCollection(ttask, src);
|
||||
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
|
||||
const_cast<PhraseDictionaryGroup*>(this)->CacheForCleanup(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
|
||||
const ttasksptr& ttask, const Phrase& src) const
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryGroup::
|
||||
CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const
|
||||
{
|
||||
// Aggregation of phrases and the scores that will be applied to them
|
||||
vector<TargetPhrase*> allPhrases;
|
||||
@ -121,8 +124,8 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
|
||||
|
||||
// Collect phrases from this table
|
||||
const PhraseDictionary& pd = *m_memberPDs[i];
|
||||
const TargetPhraseCollection* ret_raw = pd.GetTargetPhraseCollectionLEGACY(
|
||||
ttask, src);
|
||||
TargetPhraseCollection::shared_ptr
|
||||
ret_raw = pd.GetTargetPhraseCollectionLEGACY(ttask, src);
|
||||
|
||||
if (ret_raw != NULL) {
|
||||
// Process each phrase from table
|
||||
@ -162,7 +165,7 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
|
||||
}
|
||||
|
||||
// Apply scores to phrases and add them to return collection
|
||||
TargetPhraseCollection* ret = new TargetPhraseCollection();
|
||||
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
|
||||
const vector<FeatureFunction*> pd_feature_const(m_pdFeature);
|
||||
BOOST_FOREACH(TargetPhrase* phrase, allPhrases) {
|
||||
phrase->GetScoreBreakdown().Assign(this, allScores.find(phrase)->second);
|
||||
@ -174,29 +177,33 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
|
||||
return ret;
|
||||
}
|
||||
|
||||
ChartRuleLookupManager *PhraseDictionaryGroup::CreateRuleLookupManager(
|
||||
const ChartParser &, const ChartCellCollectionBase&, size_t)
|
||||
ChartRuleLookupManager*
|
||||
PhraseDictionaryGroup::
|
||||
CreateRuleLookupManager(const ChartParser &,
|
||||
const ChartCellCollectionBase&, size_t)
|
||||
{
|
||||
UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
|
||||
}
|
||||
|
||||
//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
|
||||
void PhraseDictionaryGroup::CacheForCleanup(TargetPhraseCollection* tpc)
|
||||
void PhraseDictionaryGroup::CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
|
||||
{
|
||||
PhraseCache &ref = GetPhraseCache();
|
||||
ref.push_back(tpc);
|
||||
}
|
||||
|
||||
void PhraseDictionaryGroup::CleanUpAfterSentenceProcessing(
|
||||
const InputType &source)
|
||||
void
|
||||
PhraseDictionaryGroup::
|
||||
CleanUpAfterSentenceProcessing(const InputType &source)
|
||||
{
|
||||
PhraseCache &ref = GetPhraseCache();
|
||||
for (PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
|
||||
delete *it;
|
||||
}
|
||||
GetPhraseCache().clear();
|
||||
// PhraseCache &ref = GetPhraseCache();
|
||||
// for (PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
|
||||
// delete *it;
|
||||
// }
|
||||
|
||||
PhraseCache temp;
|
||||
temp.swap(ref);
|
||||
// PhraseCache temp;
|
||||
// temp.swap(ref);
|
||||
|
||||
CleanUpComponentModels(source);
|
||||
}
|
||||
|
@ -43,19 +43,20 @@ class PhraseDictionaryGroup: public PhraseDictionary
|
||||
public:
|
||||
PhraseDictionaryGroup(const std::string& line);
|
||||
void Load();
|
||||
TargetPhraseCollection* CreateTargetPhraseCollection(const ttasksptr& ttask,
|
||||
TargetPhraseCollection::shared_ptr
|
||||
CreateTargetPhraseCollection(const ttasksptr& ttask,
|
||||
const Phrase& src) const;
|
||||
std::vector<std::vector<float> > getWeights(size_t numWeights,
|
||||
bool normalize) const;
|
||||
void CacheForCleanup(TargetPhraseCollection* tpc);
|
||||
void CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
|
||||
void CleanUpAfterSentenceProcessing(const InputType& source);
|
||||
void CleanUpComponentModels(const InputType& source);
|
||||
// functions below override the base class
|
||||
void GetTargetPhraseCollectionBatch(const ttasksptr& ttask,
|
||||
const InputPathList &inputPathQueue) const;
|
||||
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(
|
||||
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(
|
||||
const Phrase& src) const;
|
||||
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(
|
||||
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(
|
||||
const ttasksptr& ttask, const Phrase& src) const;
|
||||
void InitializeForInput(ttasksptr const& ttask) {
|
||||
/* Don't do anything source specific here as this object is shared between threads.*/
|
||||
@ -71,7 +72,7 @@ protected:
|
||||
bool m_restrict;
|
||||
std::vector<FeatureFunction*> m_pdFeature;
|
||||
|
||||
typedef std::vector<TargetPhraseCollection*> PhraseCache;
|
||||
typedef std::vector<TargetPhraseCollection::shared_ptr > PhraseCache;
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_mutex m_lock_cache;
|
||||
typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
|
||||
|
@ -49,16 +49,17 @@ PhraseDictionaryMemory::PhraseDictionaryMemory(const std::string &line)
|
||||
|
||||
}
|
||||
|
||||
TargetPhraseCollection &PhraseDictionaryMemory::GetOrCreateTargetPhraseCollection(
|
||||
const Phrase &source
|
||||
, const TargetPhrase &target
|
||||
, const Word *sourceLHS)
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryMemory::
|
||||
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||
const TargetPhrase &target,
|
||||
const Word *sourceLHS)
|
||||
{
|
||||
PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(source, target, sourceLHS);
|
||||
return currNode.GetTargetPhraseCollection();
|
||||
}
|
||||
|
||||
const TargetPhraseCollection*
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryMemory::
|
||||
GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const
|
||||
{
|
||||
@ -73,10 +74,10 @@ GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const
|
||||
const Word& word = source.GetWord(pos);
|
||||
currNode = currNode->GetChild(word);
|
||||
if (currNode == NULL)
|
||||
return NULL;
|
||||
return TargetPhraseCollection::shared_ptr();
|
||||
}
|
||||
|
||||
return &currNode->GetTargetPhraseCollection();
|
||||
return currNode->GetTargetPhraseCollection();
|
||||
}
|
||||
|
||||
PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase &source
|
||||
@ -168,12 +169,11 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
||||
lastWord.OnlyTheseFactors(m_inputFactors);
|
||||
|
||||
const PhraseDictionaryNodeMemory *ptNode = prevPtNode->GetChild(lastWord);
|
||||
TargetPhraseCollection::shared_ptr targetPhrases;
|
||||
if (ptNode) {
|
||||
const TargetPhraseCollection &targetPhrases = ptNode->GetTargetPhraseCollection();
|
||||
inputPath.SetTargetPhrases(*this, &targetPhrases, ptNode);
|
||||
} else {
|
||||
inputPath.SetTargetPhrases(*this, NULL, NULL);
|
||||
}
|
||||
targetPhrases = ptNode->GetTargetPhraseCollection();
|
||||
}
|
||||
inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -56,19 +56,23 @@ public:
|
||||
std::size_t);
|
||||
|
||||
// only used by multi-model phrase table, and other meta-features
|
||||
const TargetPhraseCollection *GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
|
||||
void
|
||||
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
||||
|
||||
TO_STRING();
|
||||
|
||||
protected:
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
|
||||
|
||||
PhraseDictionaryNodeMemory &GetOrCreateNode(const Phrase &source
|
||||
, const TargetPhrase &target
|
||||
, const Word *sourceLHS);
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetOrCreateTargetPhraseCollection
|
||||
(const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
|
||||
|
||||
PhraseDictionaryNodeMemory &
|
||||
GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
|
||||
const Word *sourceLHS);
|
||||
|
||||
void SortAndPrune();
|
||||
|
||||
PhraseDictionaryNodeMemory m_collection;
|
||||
|
@ -26,8 +26,10 @@ using namespace std;
|
||||
namespace Moses
|
||||
|
||||
{
|
||||
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
|
||||
:PhraseDictionary(line, true)
|
||||
|
||||
PhraseDictionaryMultiModel::
|
||||
PhraseDictionaryMultiModel(const std::string &line)
|
||||
: PhraseDictionary(line, true)
|
||||
{
|
||||
ReadParameters();
|
||||
|
||||
@ -45,7 +47,8 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
|
||||
}
|
||||
}
|
||||
|
||||
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::string &line)
|
||||
PhraseDictionaryMultiModel::
|
||||
PhraseDictionaryMultiModel(int type, const std::string &line)
|
||||
:PhraseDictionary(line, true)
|
||||
{
|
||||
if (type == 1) {
|
||||
@ -56,7 +59,9 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::stri
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryMultiModel::SetParameter(const std::string& key, const std::string& value)
|
||||
void
|
||||
PhraseDictionaryMultiModel::
|
||||
SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "mode") {
|
||||
m_mode = value;
|
||||
@ -70,9 +75,9 @@ void PhraseDictionaryMultiModel::SetParameter(const std::string& key, const std:
|
||||
}
|
||||
}
|
||||
|
||||
PhraseDictionaryMultiModel::~PhraseDictionaryMultiModel()
|
||||
{
|
||||
}
|
||||
PhraseDictionaryMultiModel::
|
||||
~PhraseDictionaryMultiModel()
|
||||
{ }
|
||||
|
||||
void PhraseDictionaryMultiModel::Load()
|
||||
{
|
||||
@ -88,18 +93,21 @@ void PhraseDictionaryMultiModel::Load()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryMultiModel::
|
||||
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
{
|
||||
|
||||
std::vector<std::vector<float> > multimodelweights = getWeights(m_numScoreComponents, true);
|
||||
TargetPhraseCollection *ret = NULL;
|
||||
std::vector<std::vector<float> > multimodelweights;
|
||||
multimodelweights = getWeights(m_numScoreComponents, true);
|
||||
TargetPhraseCollection::shared_ptr ret;
|
||||
|
||||
std::map<std::string,multiModelStatistics*>* allStats = new(std::map<std::string,multiModelStatistics*>);
|
||||
std::map<std::string, multiModelStats*>* allStats;
|
||||
allStats = new(std::map<std::string,multiModelStats*>);
|
||||
CollectSufficientStatistics(src, allStats);
|
||||
ret = CreateTargetPhraseCollectionLinearInterpolation(src, allStats, multimodelweights);
|
||||
RemoveAllInMap(*allStats);
|
||||
delete allStats;
|
||||
delete allStats; // ??? Why the detour through malloc? UG
|
||||
|
||||
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
|
||||
const_cast<PhraseDictionaryMultiModel*>(this)->CacheForCleanup(ret);
|
||||
@ -107,16 +115,19 @@ const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollect
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const
|
||||
void
|
||||
PhraseDictionaryMultiModel::
|
||||
CollectSufficientStatistics
|
||||
(const Phrase& src, std::map<std::string, multiModelStats*>* allStats) const
|
||||
{
|
||||
for(size_t i = 0; i < m_numModels; ++i) {
|
||||
const PhraseDictionary &pd = *m_pd[i];
|
||||
|
||||
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollectionLEGACY( src);
|
||||
TargetPhraseCollection::shared_ptr ret_raw;
|
||||
ret_raw = pd.GetTargetPhraseCollectionLEGACY(src);
|
||||
if (ret_raw != NULL) {
|
||||
|
||||
TargetPhraseCollection::iterator iterTargetPhrase, iterLast;
|
||||
TargetPhraseCollection::const_iterator iterTargetPhrase, iterLast;
|
||||
if (m_tableLimit != 0 && ret_raw->GetSize() > m_tableLimit) {
|
||||
iterLast = ret_raw->begin() + m_tableLimit;
|
||||
} else {
|
||||
@ -130,7 +141,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
|
||||
std::string targetString = targetPhrase->GetStringRep(m_output);
|
||||
if (allStats->find(targetString) == allStats->end()) {
|
||||
|
||||
multiModelStatistics * statistics = new multiModelStatistics;
|
||||
multiModelStats * statistics = new multiModelStats;
|
||||
statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
|
||||
statistics->p.resize(m_numScoreComponents);
|
||||
for(size_t j = 0; j < m_numScoreComponents; ++j) {
|
||||
@ -149,7 +160,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
|
||||
(*allStats)[targetString] = statistics;
|
||||
|
||||
}
|
||||
multiModelStatistics * statistics = (*allStats)[targetString];
|
||||
multiModelStats * statistics = (*allStats)[targetString];
|
||||
|
||||
for(size_t j = 0; j < m_numScoreComponents; ++j) {
|
||||
statistics->p[j][i] = UntransformScore(raw_scores[j]);
|
||||
@ -161,12 +172,17 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
|
||||
}
|
||||
}
|
||||
|
||||
TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollectionLinearInterpolation(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryMultiModel::
|
||||
CreateTargetPhraseCollectionLinearInterpolation
|
||||
( const Phrase& src,
|
||||
std::map<std::string,multiModelStats*>* allStats,
|
||||
std::vector<std::vector<float> > &multimodelweights) const
|
||||
{
|
||||
TargetPhraseCollection *ret = new TargetPhraseCollection();
|
||||
for ( std::map< std::string, multiModelStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
|
||||
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
|
||||
for ( std::map< std::string, multiModelStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
|
||||
|
||||
multiModelStatistics * statistics = iter->second;
|
||||
multiModelStats * statistics = iter->second;
|
||||
|
||||
Scores scoreVector(m_numScoreComponents);
|
||||
|
||||
@ -188,7 +204,9 @@ TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollection
|
||||
}
|
||||
|
||||
//TODO: is it worth caching the results as long as weights don't change?
|
||||
std::vector<std::vector<float> > PhraseDictionaryMultiModel::getWeights(size_t numWeights, bool normalize) const
|
||||
std::vector<std::vector<float> >
|
||||
PhraseDictionaryMultiModel::
|
||||
getWeights(size_t numWeights, bool normalize) const
|
||||
{
|
||||
const std::vector<float>* weights_ptr;
|
||||
std::vector<float> raw_weights;
|
||||
@ -237,7 +255,9 @@ std::vector<std::vector<float> > PhraseDictionaryMultiModel::getWeights(size_t n
|
||||
return multimodelweights;
|
||||
}
|
||||
|
||||
std::vector<float> PhraseDictionaryMultiModel::normalizeWeights(std::vector<float> &weights) const
|
||||
std::vector<float>
|
||||
PhraseDictionaryMultiModel::
|
||||
normalizeWeights(std::vector<float> &weights) const
|
||||
{
|
||||
std::vector<float> ret (m_numModels);
|
||||
float total = std::accumulate(weights.begin(),weights.end(),0.0);
|
||||
@ -248,29 +268,36 @@ std::vector<float> PhraseDictionaryMultiModel::normalizeWeights(std::vector<floa
|
||||
}
|
||||
|
||||
|
||||
ChartRuleLookupManager *PhraseDictionaryMultiModel::CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&, std::size_t)
|
||||
ChartRuleLookupManager *
|
||||
PhraseDictionaryMultiModel::
|
||||
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&,
|
||||
std::size_t)
|
||||
{
|
||||
UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
|
||||
}
|
||||
|
||||
|
||||
//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
|
||||
void PhraseDictionaryMultiModel::CacheForCleanup(TargetPhraseCollection* tpc)
|
||||
void
|
||||
PhraseDictionaryMultiModel::
|
||||
CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
|
||||
{
|
||||
PhraseCache &ref = GetPhraseCache();
|
||||
ref.push_back(tpc);
|
||||
GetPhraseCache().push_back(tpc);
|
||||
}
|
||||
|
||||
|
||||
void PhraseDictionaryMultiModel::CleanUpAfterSentenceProcessing(const InputType &source)
|
||||
void
|
||||
PhraseDictionaryMultiModel::
|
||||
CleanUpAfterSentenceProcessing(const InputType &source)
|
||||
{
|
||||
PhraseCache &ref = GetPhraseCache();
|
||||
for(PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
|
||||
delete *it;
|
||||
}
|
||||
// PhraseCache &ref = GetPhraseCache();
|
||||
// for(PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
|
||||
// it->reset();
|
||||
// }
|
||||
|
||||
PhraseCache temp;
|
||||
temp.swap(ref);
|
||||
// PhraseCache temp;
|
||||
// temp.swap(ref);
|
||||
GetPhraseCache().clear();
|
||||
|
||||
CleanUpComponentModels(source);
|
||||
|
||||
@ -279,14 +306,18 @@ void PhraseDictionaryMultiModel::CleanUpAfterSentenceProcessing(const InputType
|
||||
}
|
||||
|
||||
|
||||
void PhraseDictionaryMultiModel::CleanUpComponentModels(const InputType &source)
|
||||
void
|
||||
PhraseDictionaryMultiModel::
|
||||
CleanUpComponentModels(const InputType &source)
|
||||
{
|
||||
for(size_t i = 0; i < m_numModels; ++i) {
|
||||
m_pd[i]->CleanUpAfterSentenceProcessing(source);
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<float>* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeightsVector() const
|
||||
const std::vector<float>*
|
||||
PhraseDictionaryMultiModel::
|
||||
GetTemporaryMultiModelWeightsVector() const
|
||||
{
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> read_lock(m_lock_weights);
|
||||
@ -300,7 +331,9 @@ const std::vector<float>* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeig
|
||||
#endif
|
||||
}
|
||||
|
||||
void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector<float> weights)
|
||||
void
|
||||
PhraseDictionaryMultiModel::
|
||||
SetTemporaryMultiModelWeightsVector(std::vector<float> weights)
|
||||
{
|
||||
#ifdef WITH_THREADS
|
||||
boost::unique_lock<boost::shared_mutex> lock(m_lock_weights);
|
||||
@ -311,7 +344,9 @@ void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector
|
||||
}
|
||||
|
||||
#ifdef WITH_DLIB
|
||||
vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
|
||||
vector<float>
|
||||
PhraseDictionaryMultiModel::
|
||||
MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
|
||||
{
|
||||
|
||||
map<pair<string, string>, size_t> phrase_pair_map;
|
||||
@ -320,7 +355,7 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
|
||||
phrase_pair_map[*iter] += 1;
|
||||
}
|
||||
|
||||
vector<multiModelStatisticsOptimization*> optimizerStats;
|
||||
vector<multiModelStatsOptimization*> optimizerStats;
|
||||
|
||||
for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
|
||||
|
||||
@ -329,7 +364,7 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
|
||||
string target_string = phrase_pair.second;
|
||||
|
||||
vector<float> fs(m_numModels);
|
||||
map<string,multiModelStatistics*>* allStats = new(map<string,multiModelStatistics*>);
|
||||
map<string,multiModelStats*>* allStats = new(map<string,multiModelStats*>);
|
||||
|
||||
Phrase sourcePhrase(0);
|
||||
sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
|
||||
@ -343,7 +378,7 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
|
||||
continue;
|
||||
}
|
||||
|
||||
multiModelStatisticsOptimization* targetStatistics = new multiModelStatisticsOptimization();
|
||||
multiModelStatsOptimization* targetStatistics = new multiModelStatsOptimization();
|
||||
targetStatistics->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
|
||||
targetStatistics->p = (*allStats)[target_string]->p;
|
||||
targetStatistics->f = iter->second;
|
||||
@ -383,7 +418,9 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
|
||||
|
||||
}
|
||||
|
||||
vector<float> PhraseDictionaryMultiModel::Optimize(OptimizationObjective *ObjectiveFunction, size_t numModels)
|
||||
vector<float>
|
||||
PhraseDictionaryMultiModel::
|
||||
Optimize(OptimizationObjective *ObjectiveFunction, size_t numModels)
|
||||
{
|
||||
|
||||
dlib::matrix<double,0,1> starting_point;
|
||||
@ -428,8 +465,8 @@ double CrossEntropy::operator() ( const dlib::matrix<double,0,1>& arg) const
|
||||
weight_vector = m_model->normalizeWeights(weight_vector);
|
||||
}
|
||||
|
||||
for ( std::vector<multiModelStatisticsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
|
||||
multiModelStatisticsOptimization* statistics = *iter;
|
||||
for ( std::vector<multiModelStatsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
|
||||
multiModelStatsOptimization* statistics = *iter;
|
||||
size_t f = statistics->f;
|
||||
|
||||
double score;
|
||||
|
@ -36,15 +36,15 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
struct multiModelStatistics {
|
||||
struct multiModelStats {
|
||||
TargetPhrase *targetPhrase;
|
||||
std::vector<std::vector<float> > p;
|
||||
~multiModelStatistics() {
|
||||
~multiModelStats() {
|
||||
delete targetPhrase;
|
||||
};
|
||||
};
|
||||
|
||||
struct multiModelStatisticsOptimization: multiModelStatistics {
|
||||
struct multiModelStatsOptimization: multiModelStats {
|
||||
size_t f;
|
||||
};
|
||||
|
||||
@ -71,27 +71,59 @@ public:
|
||||
PhraseDictionaryMultiModel(int type, const std::string &line);
|
||||
~PhraseDictionaryMultiModel();
|
||||
void Load();
|
||||
virtual void CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const;
|
||||
virtual TargetPhraseCollection* CreateTargetPhraseCollectionLinearInterpolation(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
|
||||
std::vector<std::vector<float> > getWeights(size_t numWeights, bool normalize) const;
|
||||
std::vector<float> normalizeWeights(std::vector<float> &weights) const;
|
||||
void CacheForCleanup(TargetPhraseCollection* tpc);
|
||||
void CleanUpAfterSentenceProcessing(const InputType &source);
|
||||
virtual void CleanUpComponentModels(const InputType &source);
|
||||
|
||||
virtual void
|
||||
CollectSufficientStatistics
|
||||
(const Phrase& src, std::map<std::string,multiModelStats*>* allStats)
|
||||
const;
|
||||
|
||||
virtual TargetPhraseCollection::shared_ptr
|
||||
CreateTargetPhraseCollectionLinearInterpolation
|
||||
(const Phrase& src, std::map<std::string,multiModelStats*>* allStats,
|
||||
std::vector<std::vector<float> > &multimodelweights) const;
|
||||
|
||||
std::vector<std::vector<float> >
|
||||
getWeights(size_t numWeights, bool normalize) const;
|
||||
|
||||
std::vector<float>
|
||||
normalizeWeights(std::vector<float> &weights) const;
|
||||
|
||||
void
|
||||
CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
|
||||
|
||||
void
|
||||
CleanUpAfterSentenceProcessing(const InputType &source);
|
||||
|
||||
virtual void
|
||||
CleanUpComponentModels(const InputType &source);
|
||||
|
||||
#ifdef WITH_DLIB
|
||||
virtual std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
|
||||
std::vector<float> Optimize(OptimizationObjective * ObjectiveFunction, size_t numModels);
|
||||
#endif
|
||||
// functions below required by base class
|
||||
virtual const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
virtual void InitializeForInput(ttasksptr const& ttask) {
|
||||
/* Don't do anything source specific here as this object is shared between threads.*/
|
||||
}
|
||||
ChartRuleLookupManager *CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&, std::size_t);
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
const std::vector<float>* GetTemporaryMultiModelWeightsVector() const;
|
||||
void SetTemporaryMultiModelWeightsVector(std::vector<float> weights);
|
||||
// functions below required by base class
|
||||
virtual TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
|
||||
virtual void
|
||||
InitializeForInput(ttasksptr const& ttask) {
|
||||
// Don't do anything source specific here as this object is shared
|
||||
// between threads.
|
||||
}
|
||||
|
||||
ChartRuleLookupManager*
|
||||
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&,
|
||||
std::size_t);
|
||||
|
||||
void
|
||||
SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
const std::vector<float>*
|
||||
GetTemporaryMultiModelWeightsVector() const;
|
||||
|
||||
void
|
||||
SetTemporaryMultiModelWeightsVector(std::vector<float> weights);
|
||||
|
||||
protected:
|
||||
std::string m_mode;
|
||||
@ -100,7 +132,7 @@ protected:
|
||||
size_t m_numModels;
|
||||
std::vector<float> m_multimodelweights;
|
||||
|
||||
typedef std::vector<TargetPhraseCollection*> PhraseCache;
|
||||
typedef std::vector<TargetPhraseCollection::shared_ptr> PhraseCache;
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_mutex m_lock_cache;
|
||||
typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
|
||||
@ -146,7 +178,7 @@ class CrossEntropy: public OptimizationObjective
|
||||
public:
|
||||
|
||||
CrossEntropy (
|
||||
std::vector<multiModelStatisticsOptimization*> &optimizerStats,
|
||||
std::vector<multiModelStatsOptimization*> &optimizerStats,
|
||||
PhraseDictionaryMultiModel * model,
|
||||
size_t iFeature
|
||||
) {
|
||||
@ -158,7 +190,7 @@ public:
|
||||
double operator() ( const dlib::matrix<double,0,1>& arg) const;
|
||||
|
||||
protected:
|
||||
std::vector<multiModelStatisticsOptimization*> m_optimizerStats;
|
||||
std::vector<multiModelStatsOptimization*> m_optimizerStats;
|
||||
PhraseDictionaryMultiModel * m_model;
|
||||
size_t m_iFeature;
|
||||
};
|
||||
|
@ -120,7 +120,7 @@ void PhraseDictionaryMultiModelCounts::Load()
|
||||
}
|
||||
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
TargetPhraseCollection::shared_ptr PhraseDictionaryMultiModelCounts::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
{
|
||||
vector<vector<float> > multimodelweights;
|
||||
bool normalize;
|
||||
@ -130,11 +130,12 @@ const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseC
|
||||
//source phrase frequency is shared among all phrase pairs
|
||||
vector<float> fs(m_numModels);
|
||||
|
||||
map<string,multiModelCountsStatistics*>* allStats = new(map<string,multiModelCountsStatistics*>);
|
||||
map<string,multiModelCountsStats*>* allStats = new(map<string,multiModelCountsStats*>);
|
||||
|
||||
CollectSufficientStatistics(src, fs, allStats);
|
||||
CollectSufficientStats(src, fs, allStats);
|
||||
|
||||
TargetPhraseCollection *ret = CreateTargetPhraseCollectionCounts(src, fs, allStats, multimodelweights);
|
||||
TargetPhraseCollection::shared_ptr ret
|
||||
= CreateTargetPhraseCollectionCounts(src, fs, allStats, multimodelweights);
|
||||
|
||||
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
|
||||
const_cast<PhraseDictionaryMultiModelCounts*>(this)->CacheForCleanup(ret);
|
||||
@ -142,16 +143,17 @@ const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseC
|
||||
}
|
||||
|
||||
|
||||
void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase& src, vector<float> &fs, map<string,multiModelCountsStatistics*>* allStats) const
|
||||
void PhraseDictionaryMultiModelCounts::CollectSufficientStats(const Phrase& src, vector<float> &fs, map<string,multiModelCountsStats*>* allStats) const
|
||||
//fill fs and allStats with statistics from models
|
||||
{
|
||||
for(size_t i = 0; i < m_numModels; ++i) {
|
||||
const PhraseDictionary &pd = *m_pd[i];
|
||||
|
||||
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollectionLEGACY( src);
|
||||
TargetPhraseCollection::shared_ptr ret_raw
|
||||
= pd.GetTargetPhraseCollectionLEGACY(src);
|
||||
if (ret_raw != NULL) {
|
||||
|
||||
TargetPhraseCollection::iterator iterTargetPhrase;
|
||||
TargetPhraseCollection::const_iterator iterTargetPhrase;
|
||||
for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != ret_raw->end(); ++iterTargetPhrase) {
|
||||
|
||||
const TargetPhrase * targetPhrase = *iterTargetPhrase;
|
||||
@ -160,7 +162,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
|
||||
string targetString = targetPhrase->GetStringRep(m_output);
|
||||
if (allStats->find(targetString) == allStats->end()) {
|
||||
|
||||
multiModelCountsStatistics * statistics = new multiModelCountsStatistics;
|
||||
multiModelCountsStats * statistics = new multiModelCountsStats;
|
||||
statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
|
||||
|
||||
//correct future cost estimates and total score
|
||||
@ -178,7 +180,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
|
||||
(*allStats)[targetString] = statistics;
|
||||
|
||||
}
|
||||
multiModelCountsStatistics * statistics = (*allStats)[targetString];
|
||||
multiModelCountsStats * statistics = (*allStats)[targetString];
|
||||
|
||||
statistics->fst[i] = UntransformScore(raw_scores[0]);
|
||||
statistics->ft[i] = UntransformScore(raw_scores[1]);
|
||||
@ -189,8 +191,8 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
|
||||
}
|
||||
|
||||
// get target phrase frequency for models which have not seen the phrase pair
|
||||
for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
|
||||
multiModelCountsStatistics * statistics = iter->second;
|
||||
for ( map< string, multiModelCountsStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
|
||||
multiModelCountsStats * statistics = iter->second;
|
||||
|
||||
for (size_t i = 0; i < m_numModels; ++i) {
|
||||
if (!statistics->ft[i]) {
|
||||
@ -200,12 +202,14 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
|
||||
}
|
||||
}
|
||||
|
||||
TargetPhraseCollection* PhraseDictionaryMultiModelCounts::CreateTargetPhraseCollectionCounts(const Phrase &src, vector<float> &fs, map<string,multiModelCountsStatistics*>* allStats, vector<vector<float> > &multimodelweights) const
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryMultiModelCounts::
|
||||
CreateTargetPhraseCollectionCounts(const Phrase &src, vector<float> &fs, map<string,multiModelCountsStats*>* allStats, vector<vector<float> > &multimodelweights) const
|
||||
{
|
||||
TargetPhraseCollection *ret = new TargetPhraseCollection();
|
||||
for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
|
||||
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
|
||||
for ( map< string, multiModelCountsStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
|
||||
|
||||
multiModelCountsStatistics * statistics = iter->second;
|
||||
multiModelCountsStats * statistics = iter->second;
|
||||
|
||||
if (statistics->targetPhrase->GetAlignTerm().GetSize() == 0) {
|
||||
UTIL_THROW(util::Exception, " alignment information empty\ncount-tables need to include alignment information for computation of lexical weights.\nUse --phrase-word-alignment during training; for on-disk tables, also set -alignment-info when creating on-disk tables.");
|
||||
@ -248,7 +252,7 @@ float PhraseDictionaryMultiModelCounts::GetTargetCount(const Phrase &target, siz
|
||||
{
|
||||
|
||||
const PhraseDictionary &pd = *m_inverse_pd[modelIndex];
|
||||
const TargetPhraseCollection *ret_raw = pd.GetTargetPhraseCollectionLEGACY(target);
|
||||
TargetPhraseCollection::shared_ptr ret_raw = pd.GetTargetPhraseCollectionLEGACY(target);
|
||||
|
||||
// in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score)
|
||||
if (ret_raw && ret_raw->GetSize() > 0) {
|
||||
@ -320,7 +324,7 @@ double PhraseDictionaryMultiModelCounts::ComputeWeightedLexicalTranslation( cons
|
||||
}
|
||||
|
||||
|
||||
lexicalCache PhraseDictionaryMultiModelCounts::CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector<lexicalTable*> &tables, bool is_input )
|
||||
lexicalCache PhraseDictionaryMultiModelCounts::CacheLexicalStats( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector<lexicalTable*> &tables, bool is_input )
|
||||
{
|
||||
//do all the necessary lexical table lookups and get counts, but don't apply weights yet
|
||||
|
||||
@ -474,7 +478,7 @@ vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<s
|
||||
phrase_pair_map[*iter] += 1;
|
||||
}
|
||||
|
||||
vector<multiModelCountsStatisticsOptimization*> optimizerStats;
|
||||
vector<multiModelCountsStatsOptimization*> optimizerStats;
|
||||
|
||||
for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
|
||||
|
||||
@ -483,12 +487,12 @@ vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<s
|
||||
string target_string = phrase_pair.second;
|
||||
|
||||
vector<float> fs(m_numModels);
|
||||
map<string,multiModelCountsStatistics*>* allStats = new(map<string,multiModelCountsStatistics*>);
|
||||
map<string,multiModelCountsStats*>* allStats = new(map<string,multiModelCountsStats*>);
|
||||
|
||||
Phrase sourcePhrase(0);
|
||||
sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
|
||||
|
||||
CollectSufficientStatistics(sourcePhrase, fs, allStats); //optimization potential: only call this once per source phrase
|
||||
CollectSufficientStats(sourcePhrase, fs, allStats); //optimization potential: only call this once per source phrase
|
||||
|
||||
//phrase pair not found; leave cache empty
|
||||
if (allStats->find(target_string) == allStats->end()) {
|
||||
@ -497,19 +501,19 @@ vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<s
|
||||
continue;
|
||||
}
|
||||
|
||||
multiModelCountsStatisticsOptimization * targetStatistics = new multiModelCountsStatisticsOptimization();
|
||||
targetStatistics->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
|
||||
targetStatistics->fs = fs;
|
||||
targetStatistics->fst = (*allStats)[target_string]->fst;
|
||||
targetStatistics->ft = (*allStats)[target_string]->ft;
|
||||
targetStatistics->f = iter->second;
|
||||
multiModelCountsStatsOptimization * targetStats = new multiModelCountsStatsOptimization();
|
||||
targetStats->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
|
||||
targetStats->fs = fs;
|
||||
targetStats->fst = (*allStats)[target_string]->fst;
|
||||
targetStats->ft = (*allStats)[target_string]->ft;
|
||||
targetStats->f = iter->second;
|
||||
|
||||
try {
|
||||
pair<vector< set<size_t> >, vector< set<size_t> > > alignment = GetAlignmentsForLexWeights(sourcePhrase, static_cast<const Phrase&>(*targetStatistics->targetPhrase), targetStatistics->targetPhrase->GetAlignTerm());
|
||||
targetStatistics->lexCachee2f = CacheLexicalStatistics(static_cast<const Phrase&>(*targetStatistics->targetPhrase), sourcePhrase, alignment.second, m_lexTable_e2f, false );
|
||||
targetStatistics->lexCachef2e = CacheLexicalStatistics(sourcePhrase, static_cast<const Phrase&>(*targetStatistics->targetPhrase), alignment.first, m_lexTable_f2e, true );
|
||||
pair<vector< set<size_t> >, vector< set<size_t> > > alignment = GetAlignmentsForLexWeights(sourcePhrase, static_cast<const Phrase&>(*targetStats->targetPhrase), targetStats->targetPhrase->GetAlignTerm());
|
||||
targetStats->lexCachee2f = CacheLexicalStats(static_cast<const Phrase&>(*targetStats->targetPhrase), sourcePhrase, alignment.second, m_lexTable_e2f, false );
|
||||
targetStats->lexCachef2e = CacheLexicalStats(sourcePhrase, static_cast<const Phrase&>(*targetStats->targetPhrase), alignment.first, m_lexTable_f2e, true );
|
||||
|
||||
optimizerStats.push_back(targetStatistics);
|
||||
optimizerStats.push_back(targetStats);
|
||||
} catch (AlignmentException& e) {}
|
||||
|
||||
RemoveAllInMap(*allStats);
|
||||
@ -561,8 +565,8 @@ double CrossEntropyCounts::operator() ( const dlib::matrix<double,0,1>& arg) con
|
||||
weight_vector = m_model->normalizeWeights(weight_vector);
|
||||
}
|
||||
|
||||
for ( std::vector<multiModelCountsStatisticsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
|
||||
multiModelCountsStatisticsOptimization* statistics = *iter;
|
||||
for ( std::vector<multiModelCountsStatsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
|
||||
multiModelCountsStatsOptimization* statistics = *iter;
|
||||
size_t f = statistics->f;
|
||||
|
||||
double score;
|
||||
|
@ -37,11 +37,11 @@ typedef boost::unordered_map<Word, lexicalMap > lexicalMapJoint;
|
||||
typedef std::pair<std::vector<float>, std::vector<float> > lexicalPair;
|
||||
typedef std::vector<std::vector<lexicalPair> > lexicalCache;
|
||||
|
||||
struct multiModelCountsStatistics : multiModelStatistics {
|
||||
struct multiModelCountsStats : multiModelStats {
|
||||
std::vector<float> fst, ft;
|
||||
};
|
||||
|
||||
struct multiModelCountsStatisticsOptimization: multiModelCountsStatistics {
|
||||
struct multiModelCountsStatsOptimization: multiModelCountsStats {
|
||||
std::vector<float> fs;
|
||||
lexicalCache lexCachee2f, lexCachef2e;
|
||||
size_t f;
|
||||
@ -80,18 +80,18 @@ public:
|
||||
PhraseDictionaryMultiModelCounts(const std::string &line);
|
||||
~PhraseDictionaryMultiModelCounts();
|
||||
void Load();
|
||||
TargetPhraseCollection* CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
|
||||
void CollectSufficientStatistics(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStatistics*>* allStats) const;
|
||||
TargetPhraseCollection::shared_ptr CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStats*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
|
||||
void CollectSufficientStats(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStats*>* allStats) const;
|
||||
float GetTargetCount(const Phrase& target, size_t modelIndex) const;
|
||||
double GetLexicalProbability( Word &inner, Word &outer, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights ) const;
|
||||
double ComputeWeightedLexicalTranslation( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights, bool is_input ) const;
|
||||
double ComputeWeightedLexicalTranslationFromCache( std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > &cache, std::vector<float> &weights ) const;
|
||||
std::pair<PhraseDictionaryMultiModelCounts::AlignVector,PhraseDictionaryMultiModelCounts::AlignVector> GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const;
|
||||
std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, bool is_input );
|
||||
std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > CacheLexicalStats( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, bool is_input );
|
||||
void FillLexicalCountsJoint(Word &wordS, Word &wordT, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
|
||||
void FillLexicalCountsMarginal(Word &wordS, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
|
||||
void LoadLexicalTable( std::string &fileName, lexicalTable* ltable);
|
||||
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
#ifdef WITH_DLIB
|
||||
std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
|
||||
#endif
|
||||
@ -117,7 +117,7 @@ class CrossEntropyCounts: public OptimizationObjective
|
||||
public:
|
||||
|
||||
CrossEntropyCounts (
|
||||
std::vector<multiModelCountsStatisticsOptimization*> &optimizerStats,
|
||||
std::vector<multiModelCountsStatsOptimization*> &optimizerStats,
|
||||
PhraseDictionaryMultiModelCounts * model,
|
||||
size_t iFeature
|
||||
) {
|
||||
@ -129,7 +129,7 @@ public:
|
||||
double operator() ( const dlib::matrix<double,0,1>& arg) const;
|
||||
|
||||
private:
|
||||
std::vector<multiModelCountsStatisticsOptimization*> m_optimizerStats;
|
||||
std::vector<multiModelCountsStatsOptimization*> m_optimizerStats;
|
||||
PhraseDictionaryMultiModelCounts * m_model;
|
||||
size_t m_iFeature;
|
||||
};
|
||||
|
@ -39,7 +39,7 @@ void PhraseDictionaryNodeMemory::Prune(size_t tableLimit)
|
||||
}
|
||||
|
||||
// prune TargetPhraseCollection in this node
|
||||
m_targetPhraseCollection.Prune(true, tableLimit);
|
||||
m_targetPhraseCollection->Prune(true, tableLimit);
|
||||
}
|
||||
|
||||
void PhraseDictionaryNodeMemory::Sort(size_t tableLimit)
|
||||
@ -53,10 +53,11 @@ void PhraseDictionaryNodeMemory::Sort(size_t tableLimit)
|
||||
}
|
||||
|
||||
// prune TargetPhraseCollection in this node
|
||||
m_targetPhraseCollection.Sort(true, tableLimit);
|
||||
m_targetPhraseCollection->Sort(true, tableLimit);
|
||||
}
|
||||
|
||||
PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateChild(const Word &sourceTerm)
|
||||
PhraseDictionaryNodeMemory*
|
||||
PhraseDictionaryNodeMemory::GetOrCreateChild(const Word &sourceTerm)
|
||||
{
|
||||
return &m_sourceTermMap[sourceTerm];
|
||||
}
|
||||
@ -118,7 +119,7 @@ void PhraseDictionaryNodeMemory::Remove()
|
||||
{
|
||||
m_sourceTermMap.clear();
|
||||
m_nonTermMap.clear();
|
||||
m_targetPhraseCollection.Remove();
|
||||
m_targetPhraseCollection->Remove();
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const PhraseDictionaryNodeMemory &node)
|
||||
|
@ -130,12 +130,13 @@ private:
|
||||
|
||||
TerminalMap m_sourceTermMap;
|
||||
NonTerminalMap m_nonTermMap;
|
||||
TargetPhraseCollection m_targetPhraseCollection;
|
||||
TargetPhraseCollection::shared_ptr m_targetPhraseCollection;
|
||||
|
||||
|
||||
public:
|
||||
PhraseDictionaryNodeMemory() {}
|
||||
|
||||
PhraseDictionaryNodeMemory()
|
||||
: m_targetPhraseCollection(new TargetPhraseCollection) { }
|
||||
|
||||
bool IsLeaf() const {
|
||||
return m_sourceTermMap.empty() && m_nonTermMap.empty();
|
||||
}
|
||||
@ -152,10 +153,12 @@ public:
|
||||
const PhraseDictionaryNodeMemory *GetChild(const Word &sourceNonTerm, const Word &targetNonTerm) const;
|
||||
#endif
|
||||
|
||||
const TargetPhraseCollection &GetTargetPhraseCollection() const {
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollection() const {
|
||||
return m_targetPhraseCollection;
|
||||
}
|
||||
TargetPhraseCollection &GetTargetPhraseCollection() {
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollection() {
|
||||
return m_targetPhraseCollection;
|
||||
}
|
||||
|
||||
|
@ -54,7 +54,9 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const Input
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &inputPath) const
|
||||
void
|
||||
PhraseDictionaryTransliteration::
|
||||
GetTargetPhraseCollection(InputPath &inputPath) const
|
||||
{
|
||||
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
||||
size_t hash = hash_value(sourcePhrase);
|
||||
@ -66,7 +68,7 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input
|
||||
|
||||
if (iter != cache.end()) {
|
||||
// already in cache
|
||||
const TargetPhraseCollection *tpColl = iter->second.first;
|
||||
TargetPhraseCollection::shared_ptr tpColl = iter->second.first;
|
||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||
} else {
|
||||
// TRANSLITERATE
|
||||
@ -89,17 +91,15 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input
|
||||
int ret = system(cmd.c_str());
|
||||
UTIL_THROW_IF2(ret != 0, "Transliteration script error");
|
||||
|
||||
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
|
||||
vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir.path());
|
||||
TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection);
|
||||
vector<TargetPhrase*> targetPhrases
|
||||
= CreateTargetPhrases(sourcePhrase, outDir.path());
|
||||
vector<TargetPhrase*>::const_iterator iter;
|
||||
for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
|
||||
TargetPhrase *tp = *iter;
|
||||
tpColl->Add(tp);
|
||||
}
|
||||
|
||||
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
|
||||
cache[hash] = value;
|
||||
|
||||
cache[hash] = CacheCollEntry(tpColl, clock());
|
||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||
}
|
||||
}
|
||||
|
@ -74,11 +74,10 @@ void PhraseDictionaryTreeAdaptor::CleanUpAfterSentenceProcessing(InputType const
|
||||
obj.CleanUp();
|
||||
}
|
||||
|
||||
TargetPhraseCollection const*
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
|
||||
{
|
||||
const TargetPhraseCollection *ret = GetImplementation().GetTargetPhraseCollection(src);
|
||||
return ret;
|
||||
return GetImplementation().GetTargetPhraseCollection(src);
|
||||
}
|
||||
|
||||
void PhraseDictionaryTreeAdaptor::EnableCache()
|
||||
@ -107,16 +106,17 @@ const PDTAimp& PhraseDictionaryTreeAdaptor::GetImplementation() const
|
||||
}
|
||||
|
||||
// legacy
|
||||
const TargetPhraseCollectionWithSourcePhrase*
|
||||
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const &range) const
|
||||
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||
PhraseDictionaryTreeAdaptor::
|
||||
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const &range) const
|
||||
{
|
||||
TargetPhraseCollectionWithSourcePhrase::shared_ptr ret;
|
||||
if(GetImplementation().m_rangeCache.empty()) {
|
||||
const TargetPhraseCollectionWithSourcePhrase *tpColl = GetImplementation().GetTargetPhraseCollection(src.GetSubString(range));
|
||||
return tpColl;
|
||||
ret = GetImplementation().GetTargetPhraseCollection(src.GetSubString(range));
|
||||
} else {
|
||||
const TargetPhraseCollectionWithSourcePhrase *tpColl = GetImplementation().m_rangeCache[range.GetStartPos()][range.GetEndPos()];
|
||||
return tpColl;
|
||||
ret = GetImplementation().m_rangeCache[range.GetStartPos()][range.GetEndPos()];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -59,7 +59,8 @@ public:
|
||||
|
||||
// get translation candidates for a given source phrase
|
||||
// returns null pointer if nothing found
|
||||
TargetPhraseCollection const* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
|
||||
|
||||
void InitializeForInput(ttasksptr const& ttask);
|
||||
void CleanUpAfterSentenceProcessing(InputType const& source);
|
||||
@ -73,7 +74,9 @@ public:
|
||||
}
|
||||
|
||||
// legacy
|
||||
const TargetPhraseCollectionWithSourcePhrase *GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const & srcRange) const;
|
||||
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||
GetTargetPhraseCollectionLEGACY(InputType const& src,
|
||||
WordsRange const & srcRange) const;
|
||||
|
||||
};
|
||||
|
||||
|
@ -79,11 +79,11 @@ void ProbingPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQue
|
||||
continue;
|
||||
}
|
||||
|
||||
TargetPhraseCollection *tpColl = CreateTargetPhrase(sourcePhrase);
|
||||
TargetPhraseCollection::shared_ptr tpColl = CreateTargetPhrase(sourcePhrase);
|
||||
|
||||
// add target phrase to phrase-table cache
|
||||
size_t hash = hash_value(sourcePhrase);
|
||||
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
|
||||
std::pair<TargetPhraseCollection::shared_ptr , clock_t> value(tpColl, clock());
|
||||
cache[hash] = value;
|
||||
|
||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||
@ -109,7 +109,7 @@ std::vector<uint64_t> ProbingPT::ConvertToProbingSourcePhrase(const Phrase &sour
|
||||
return ret;
|
||||
}
|
||||
|
||||
TargetPhraseCollection *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
|
||||
TargetPhraseCollection::shared_ptr ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
|
||||
{
|
||||
// create a target phrase from the 1st word of the source, prefix with 'ProbingPT:'
|
||||
assert(sourcePhrase.GetSize());
|
||||
@ -124,7 +124,7 @@ TargetPhraseCollection *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase
|
||||
|
||||
std::pair<bool, std::vector<target_text> > query_result;
|
||||
|
||||
TargetPhraseCollection *tpColl = NULL;
|
||||
TargetPhraseCollection::shared_ptr tpColl = NULL;
|
||||
|
||||
//Actual lookup
|
||||
query_result = m_engine->query(probingSource);
|
||||
|
@ -49,12 +49,14 @@ protected:
|
||||
|
||||
// Provide access to RuleTableTrie's private
|
||||
// GetOrCreateTargetPhraseCollection function.
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
RuleTableTrie &ruleTable
|
||||
, const Phrase &source
|
||||
, const TargetPhrase &target
|
||||
, const Word *sourceLHS) {
|
||||
return ruleTable.GetOrCreateTargetPhraseCollection(source, target, sourceLHS);
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetOrCreateTargetPhraseCollection(RuleTableTrie &ruleTable,
|
||||
const Phrase &source,
|
||||
const TargetPhrase &target,
|
||||
const Word *sourceLHS)
|
||||
{
|
||||
return ruleTable.GetOrCreateTargetPhraseCollection(source, target,
|
||||
sourceLHS);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -224,9 +224,10 @@ bool RuleTableLoaderCompact::LoadRuleSection(
|
||||
targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());
|
||||
|
||||
// Insert rule into table.
|
||||
TargetPhraseCollection &coll = GetOrCreateTargetPhraseCollection(
|
||||
ruleTable, sourcePhrase, *targetPhrase, &sourceLHS);
|
||||
coll.Add(targetPhrase);
|
||||
TargetPhraseCollection::shared_ptr coll;
|
||||
coll = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase,
|
||||
*targetPhrase, &sourceLHS);
|
||||
coll->Add(targetPhrase);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -242,8 +242,10 @@ bool RuleTableLoaderStandard::Load(FormatType format
|
||||
targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector);
|
||||
targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());
|
||||
|
||||
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS);
|
||||
phraseColl.Add(targetPhrase);
|
||||
TargetPhraseCollection::shared_ptr phraseColl
|
||||
= GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase,
|
||||
*targetPhrase, sourceLHS);
|
||||
phraseColl->Add(targetPhrase);
|
||||
|
||||
// not implemented correctly in memory pt. just delete it for now
|
||||
delete sourceLHS;
|
||||
|
@ -282,8 +282,10 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(ttasksptr const& ttask)
|
||||
targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
|
||||
targetPhrase->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
|
||||
|
||||
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS);
|
||||
phraseColl.Add(targetPhrase);
|
||||
TargetPhraseCollection::shared_ptr phraseColl
|
||||
= GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase,
|
||||
*targetPhrase, sourceLHS);
|
||||
phraseColl->Add(targetPhrase);
|
||||
|
||||
count++;
|
||||
|
||||
@ -301,7 +303,9 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(ttasksptr const& ttask)
|
||||
//removedirectoryrecursively(dirName);
|
||||
}
|
||||
|
||||
TargetPhraseCollection &PhraseDictionaryFuzzyMatch::GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryFuzzyMatch::
|
||||
GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
|
||||
, const Phrase &source
|
||||
, const TargetPhrase &target
|
||||
, const Word *sourceLHS)
|
||||
|
@ -1,3 +1,4 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2011 University of Edinburgh
|
||||
@ -59,7 +60,8 @@ public:
|
||||
TO_STRING();
|
||||
|
||||
protected:
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
|
||||
, const Phrase &source
|
||||
, const TargetPhrase &target
|
||||
, const Word *sourceLHS);
|
||||
|
@ -149,26 +149,26 @@ void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath
|
||||
lastWord.OnlyTheseFactors(m_inputFactors);
|
||||
OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord);
|
||||
|
||||
TargetPhraseCollection::shared_ptr tpc;
|
||||
if (lastWordOnDisk == NULL) {
|
||||
// OOV according to this phrase table. Not possible to extend
|
||||
inputPath.SetTargetPhrases(*this, NULL, NULL);
|
||||
inputPath.SetTargetPhrases(*this, tpc, NULL);
|
||||
} else {
|
||||
const OnDiskPt::PhraseNode *ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper);
|
||||
if (ptNode) {
|
||||
const TargetPhraseCollection *targetPhrases = GetTargetPhraseCollection(ptNode);
|
||||
inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
|
||||
} else {
|
||||
inputPath.SetTargetPhrases(*this, NULL, NULL);
|
||||
}
|
||||
|
||||
OnDiskPt::PhraseNode const* ptNode;
|
||||
ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper);
|
||||
if (ptNode) tpc = GetTargetPhraseCollection(ptNode);
|
||||
inputPath.SetTargetPhrases(*this, tpc, ptNode);
|
||||
|
||||
delete lastWordOnDisk;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryOnDisk::
|
||||
GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const
|
||||
{
|
||||
const TargetPhraseCollection *ret;
|
||||
TargetPhraseCollection::shared_ptr ret;
|
||||
|
||||
CacheColl &cache = GetCache();
|
||||
size_t hash = (size_t) ptNode->GetFilePos();
|
||||
@ -181,31 +181,34 @@ const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(
|
||||
// not in cache, need to look up from phrase table
|
||||
ret = GetTargetPhraseCollectionNonCache(ptNode);
|
||||
|
||||
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
|
||||
std::pair<TargetPhraseCollection::shared_ptr , clock_t> value(ret, clock());
|
||||
cache[hash] = value;
|
||||
} else {
|
||||
// in cache. just use it
|
||||
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
|
||||
value.second = clock();
|
||||
|
||||
ret = value.first;
|
||||
iter->second.second = clock();
|
||||
ret = iter->second.first;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const
|
||||
TargetPhraseCollection::shared_ptr
|
||||
PhraseDictionaryOnDisk::
|
||||
GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const
|
||||
{
|
||||
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
|
||||
OnDiskPt::OnDiskWrapper& wrapper
|
||||
= const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
|
||||
|
||||
vector<float> weightT = StaticData::Instance().GetWeights(this);
|
||||
OnDiskPt::Vocab &vocab = wrapper.GetVocab();
|
||||
|
||||
const OnDiskPt::TargetPhraseCollection *targetPhrasesOnDisk = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
|
||||
TargetPhraseCollection *targetPhrases
|
||||
= targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false);
|
||||
OnDiskPt::TargetPhraseCollection::shared_ptr targetPhrasesOnDisk
|
||||
= ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
|
||||
TargetPhraseCollection::shared_ptr targetPhrases
|
||||
= targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this,
|
||||
weightT, vocab, false);
|
||||
|
||||
delete targetPhrasesOnDisk;
|
||||
// delete targetPhrasesOnDisk;
|
||||
|
||||
return targetPhrases;
|
||||
}
|
||||
|
@ -78,8 +78,11 @@ public:
|
||||
virtual void InitializeForInput(ttasksptr const& ttask);
|
||||
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
||||
|
||||
const TargetPhraseCollection *GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const;
|
||||
const TargetPhraseCollection *GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const;
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const;
|
||||
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const;
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
|
@ -51,9 +51,10 @@ public:
|
||||
private:
|
||||
friend class RuleTableLoader;
|
||||
|
||||
virtual TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
const Phrase &source, const TargetPhrase &target,
|
||||
const Word *sourceLHS) = 0;
|
||||
virtual TargetPhraseCollection::shared_ptr
|
||||
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||
const TargetPhrase &target,
|
||||
const Word *sourceLHS) = 0;
|
||||
|
||||
virtual void SortAndPrune() = 0;
|
||||
|
||||
|
@ -38,8 +38,11 @@
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
TargetPhraseCollection &RuleTableUTrie::GetOrCreateTargetPhraseCollection(
|
||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
|
||||
TargetPhraseCollection::shared_ptr
|
||||
RuleTableUTrie::
|
||||
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||
const TargetPhrase &target,
|
||||
const Word *sourceLHS)
|
||||
{
|
||||
UTrieNode &currNode = GetOrCreateNode(source, target, sourceLHS);
|
||||
return currNode.GetOrCreateTargetPhraseCollection(target);
|
||||
|
@ -21,13 +21,13 @@
|
||||
|
||||
#include "Trie.h"
|
||||
#include "UTrieNode.h"
|
||||
#include "moses/TargetPhraseCollection.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class Phrase;
|
||||
class TargetPhrase;
|
||||
class TargetPhraseCollection;
|
||||
class Word;
|
||||
class ChartParser;
|
||||
|
||||
@ -57,8 +57,10 @@ public:
|
||||
const ChartCellCollectionBase &, std::size_t);
|
||||
|
||||
private:
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||
const TargetPhrase &target,
|
||||
const Word *sourceLHS);
|
||||
|
||||
UTrieNode &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
|
||||
const Word *sourceLHS);
|
||||
|
@ -49,7 +49,7 @@ void UTrieNode::Prune(size_t tableLimit)
|
||||
|
||||
// Prune TargetPhraseCollections at this node.
|
||||
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
|
||||
p->second.Prune(true, tableLimit);
|
||||
p->second->Prune(true, tableLimit);
|
||||
}
|
||||
}
|
||||
|
||||
@ -66,7 +66,7 @@ void UTrieNode::Sort(size_t tableLimit)
|
||||
|
||||
// Sort TargetPhraseCollections at this node.
|
||||
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
|
||||
p->second.Sort(true, tableLimit);
|
||||
p->second->Sort(true, tableLimit);
|
||||
}
|
||||
}
|
||||
|
||||
@ -89,8 +89,9 @@ UTrieNode *UTrieNode::GetOrCreateNonTerminalChild(const Word &targetNonTerm)
|
||||
return m_gapNode;
|
||||
}
|
||||
|
||||
TargetPhraseCollection &UTrieNode::GetOrCreateTargetPhraseCollection(
|
||||
const TargetPhrase &target)
|
||||
TargetPhraseCollection::shared_ptr
|
||||
UTrieNode::
|
||||
GetOrCreateTargetPhraseCollection(const TargetPhrase &target)
|
||||
{
|
||||
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
|
||||
const size_t rank = alignmentInfo.GetSize();
|
||||
@ -107,8 +108,9 @@ TargetPhraseCollection &UTrieNode::GetOrCreateTargetPhraseCollection(
|
||||
const Word &targetNonTerm = target.GetWord(targetNonTermIndex);
|
||||
vec.push_back(InsertLabel(i++, targetNonTerm));
|
||||
}
|
||||
|
||||
return m_labelMap[vec];
|
||||
TargetPhraseCollection::shared_ptr& ret = m_labelMap[vec];
|
||||
if (ret == NULL) ret.reset(new TargetPhraseCollection);
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace Moses
|
||||
|
@ -51,10 +51,10 @@ public:
|
||||
TerminalEqualityPred> TerminalMap;
|
||||
|
||||
typedef boost::unordered_map<std::vector<int>,
|
||||
TargetPhraseCollection> LabelMap;
|
||||
TargetPhraseCollection::shared_ptr> LabelMap;
|
||||
#else
|
||||
typedef std::map<Word, UTrieNode> TerminalMap;
|
||||
typedef std::map<std::vector<int>, TargetPhraseCollection> LabelMap;
|
||||
typedef std::map<std::vector<int>, TargetPhraseCollection::shared_ptr> LabelMap;
|
||||
#endif
|
||||
|
||||
~UTrieNode() {
|
||||
@ -78,8 +78,8 @@ public:
|
||||
UTrieNode *GetOrCreateTerminalChild(const Word &sourceTerm);
|
||||
UTrieNode *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
|
||||
|
||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
||||
const TargetPhrase &);
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetOrCreateTargetPhraseCollection(const TargetPhrase &);
|
||||
|
||||
bool IsLeaf() const {
|
||||
return m_terminalMap.empty() && m_gapNode == NULL;
|
||||
|
@ -47,7 +47,8 @@ void Scope3Parser::GetChartRuleCollection(
|
||||
const size_t start = range.GetStartPos();
|
||||
const size_t end = range.GetEndPos();
|
||||
|
||||
std::vector<std::pair<const UTrieNode *, const VarSpanNode *> > &pairVec = m_ruleApplications[start][end-start+1];
|
||||
std::vector<std::pair<const UTrieNode *, const VarSpanNode *> > &pairVec
|
||||
= m_ruleApplications[start][end-start+1];
|
||||
|
||||
MatchCallback matchCB(range, outColl);
|
||||
for (std::vector<std::pair<const UTrieNode *, const VarSpanNode *> >::const_iterator p = pairVec.begin(); p != pairVec.end(); ++p) {
|
||||
@ -58,8 +59,8 @@ void Scope3Parser::GetChartRuleCollection(
|
||||
|
||||
if (varSpanNode.m_rank == 0) { // Purely lexical rule.
|
||||
assert(labelMap.size() == 1);
|
||||
const TargetPhraseCollection &tpc = labelMap.begin()->second;
|
||||
matchCB.m_tpc = &tpc;
|
||||
TargetPhraseCollection::shared_ptr tpc = labelMap.begin()->second;
|
||||
matchCB.m_tpc = tpc;
|
||||
matchCB(m_emptyStackVec);
|
||||
} else { // Rule has at least one non-terminal.
|
||||
varSpanNode.CalculateRanges(start, end, m_ranges);
|
||||
@ -70,7 +71,7 @@ void Scope3Parser::GetChartRuleCollection(
|
||||
UTrieNode::LabelMap::const_iterator p = labelMap.begin();
|
||||
for (; p != labelMap.end(); ++p) {
|
||||
const std::vector<int> &labels = p->first;
|
||||
const TargetPhraseCollection &tpc = p->second;
|
||||
TargetPhraseCollection::shared_ptr tpc = p->second;
|
||||
assert(labels.size() == varSpanNode.m_rank);
|
||||
bool failCheck = false;
|
||||
for (size_t i = 0; i < varSpanNode.m_rank; ++i) {
|
||||
@ -82,7 +83,7 @@ void Scope3Parser::GetChartRuleCollection(
|
||||
if (failCheck) {
|
||||
continue;
|
||||
}
|
||||
matchCB.m_tpc = &tpc;
|
||||
matchCB.m_tpc = tpc;
|
||||
searcher.Search(labels, matchCB);
|
||||
}
|
||||
}
|
||||
|
@ -66,17 +66,16 @@ private:
|
||||
// Define a callback type for use by StackLatticeSearcher.
|
||||
struct MatchCallback {
|
||||
public:
|
||||
MatchCallback(const WordsRange &range,
|
||||
ChartParserCallback &out)
|
||||
: m_range(range)
|
||||
, m_out(out)
|
||||
, m_tpc(NULL) {}
|
||||
MatchCallback(const WordsRange &range, ChartParserCallback &out)
|
||||
: m_range(range) , m_out(out) // , m_tpc(NULL)
|
||||
{ }
|
||||
|
||||
void operator()(const StackVec &stackVec) {
|
||||
m_out.Add(*m_tpc, stackVec, m_range);
|
||||
}
|
||||
const WordsRange &m_range;
|
||||
ChartParserCallback &m_out;
|
||||
const TargetPhraseCollection *m_tpc;
|
||||
TargetPhraseCollection::shared_ptr m_tpc;
|
||||
};
|
||||
|
||||
void Init();
|
||||
|
@ -32,12 +32,13 @@ void SkeletonPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQu
|
||||
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
||||
|
||||
TargetPhrase *tp = CreateTargetPhrase(sourcePhrase);
|
||||
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
|
||||
TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection);
|
||||
tpColl->Add(tp);
|
||||
|
||||
// add target phrase to phrase-table cache
|
||||
size_t hash = hash_value(sourcePhrase);
|
||||
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
|
||||
std::pair<TargetPhraseCollection::shared_ptr, clock_t>
|
||||
value(tpColl, clock());
|
||||
cache[hash] = value;
|
||||
|
||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||
|
@ -4,182 +4,52 @@ namespace Moses
|
||||
{
|
||||
using std::vector;
|
||||
|
||||
TPCollCache
|
||||
::TPCollCache(size_t capacity)
|
||||
TPCollCache::
|
||||
TPCollCache(size_t capacity)
|
||||
{
|
||||
m_doomed_first = m_doomed_last = NULL;
|
||||
m_doomed_count = 0;
|
||||
m_qfirst = m_qlast = m_cache.end();
|
||||
m_capacity = capacity;
|
||||
UTIL_THROW_IF2(m_capacity <= 2, "Cache capacity must be > 1!");
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
sancheck(TPCollWrapper const* first, TPCollWrapper const* last, size_t count)
|
||||
{
|
||||
if (first == NULL)
|
||||
{
|
||||
UTIL_THROW_IF2(last != NULL || count != 0, "queue error");
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t s = 0;
|
||||
for (TPCollWrapper const* x = first; x; x = x->next)
|
||||
{
|
||||
std::cerr << ++s << "/" << count << " "
|
||||
<< first << " "
|
||||
<< x->prev << " " << x << " " << x->next << " "
|
||||
<< last << std::endl;
|
||||
}
|
||||
std::cerr << std::string(80,'-') << std::endl;
|
||||
// while (x != last && s < count)
|
||||
// {
|
||||
// UTIL_THROW_IF2(x->next == NULL, "queue error");
|
||||
// x = x->next;
|
||||
// ++s;
|
||||
// std::cerr << x << " " << s << "/" << count << std::endl;
|
||||
// }
|
||||
// std::cerr << x << " " << s << "/" << count << std::endl;
|
||||
|
||||
// UTIL_THROW_IF2(x != last, "queue error");
|
||||
// UTIL_THROW_IF2(s != count, "queue error");
|
||||
// x = last; s = 1;
|
||||
// while (x != first && s++ < count)
|
||||
// {
|
||||
// UTIL_THROW_IF2(x->prev == NULL, "queue error");
|
||||
// x = x->prev;
|
||||
// }
|
||||
// UTIL_THROW_IF2(x != first, "queue error");
|
||||
// UTIL_THROW_IF2(s != count, "queue error");
|
||||
return true;
|
||||
}
|
||||
|
||||
/// remove a TPC from the "doomed" queue
|
||||
void
|
||||
TPCollCache
|
||||
::remove_from_queue(TPCollWrapper* x)
|
||||
{
|
||||
// caller must lock!
|
||||
|
||||
if (m_doomed_first != x && x->prev == NULL)
|
||||
{ // not in the queue
|
||||
UTIL_THROW_IF2(x->next, "queue error");
|
||||
return;
|
||||
}
|
||||
|
||||
sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
|
||||
|
||||
std::cerr << "Removing " << x << std::endl;
|
||||
|
||||
if (m_doomed_first == x)
|
||||
m_doomed_first = x->next;
|
||||
else x->prev->next = x->next;
|
||||
|
||||
if (m_doomed_last == x)
|
||||
m_doomed_last = x->prev;
|
||||
else x->next->prev = x->prev;
|
||||
|
||||
x->next = x->prev = NULL;
|
||||
--m_doomed_count;
|
||||
|
||||
// sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
|
||||
}
|
||||
|
||||
void
|
||||
TPCollCache
|
||||
::add_to_queue(TPCollWrapper* x)
|
||||
{
|
||||
// sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
|
||||
|
||||
// caller must lock!
|
||||
x->prev = m_doomed_last;
|
||||
|
||||
if (!m_doomed_first)
|
||||
m_doomed_first = x;
|
||||
|
||||
if (m_doomed_last) m_doomed_last->next = x;
|
||||
m_doomed_last = x;
|
||||
|
||||
++m_doomed_count;
|
||||
|
||||
// sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
|
||||
}
|
||||
|
||||
TPCollWrapper*
|
||||
TPCollCache
|
||||
::get(uint64_t key, size_t revision)
|
||||
SPTR<TPCollWrapper>
|
||||
TPCollCache::
|
||||
get(uint64_t key, size_t revision)
|
||||
{
|
||||
using namespace boost;
|
||||
upgrade_lock<shared_mutex> rlock(m_lock);
|
||||
cache_t::iterator m = m_cache.find(key);
|
||||
if (m == m_cache.end()) // new
|
||||
unique_lock<shared_mutex> lock(m_lock);
|
||||
std::pair<uint64_t, SPTR<TPCollWrapper> > e(key, SPTR<TPCollWrapper>());
|
||||
std::pair<cache_t::iterator, bool> foo = m_cache.insert(e);
|
||||
SPTR<TPCollWrapper>& ret = foo.first->second;
|
||||
if (ret)
|
||||
{
|
||||
std::pair<uint64_t,TPCollWrapper*> e(key,NULL);
|
||||
upgrade_to_unique_lock<shared_mutex> wlock(rlock);
|
||||
std::pair<cache_t::iterator,bool> foo = m_cache.insert(e);
|
||||
if (foo.second) foo.first->second = new TPCollWrapper(key, revision);
|
||||
m = foo.first;
|
||||
// ++m->second->refCount;
|
||||
if (m_qfirst == foo.first) m_qfirst = ret->next;
|
||||
else ret->prev->second->next = ret->next;
|
||||
if (m_qlast != foo.first)
|
||||
ret->next->second->prev = ret->prev;
|
||||
}
|
||||
else
|
||||
if (!ret || ret->revision != revision)
|
||||
ret.reset(new TPCollWrapper(key,revision));
|
||||
ret->prev = m_qlast;
|
||||
if (m_qlast != m_cache.end()) m_qlast->second->next = foo.first;
|
||||
m_qlast = foo.first;
|
||||
|
||||
while (m_cache.size() > m_capacity && m_qfirst->second.use_count() == 1)
|
||||
{
|
||||
if (m->second->refCount == 0)
|
||||
{
|
||||
upgrade_to_unique_lock<shared_mutex> wlock(rlock);
|
||||
remove_from_queue(m->second);
|
||||
}
|
||||
if (m->second->revision != revision) // out of date
|
||||
{
|
||||
upgrade_to_unique_lock<shared_mutex> wlock(rlock);
|
||||
m->second = new TPCollWrapper(key, revision);
|
||||
}
|
||||
m_qfirst = m_qfirst->second->next;
|
||||
m_cache.erase(m_qfirst->second->prev);
|
||||
}
|
||||
++m->second->refCount;
|
||||
return m->second;
|
||||
|
||||
return ret;
|
||||
} // TPCollCache::get(...)
|
||||
|
||||
void
|
||||
TPCollCache
|
||||
::release(TPCollWrapper const* ptr)
|
||||
{
|
||||
if (!ptr) return;
|
||||
std::cerr << "Releasing " << ptr->key << " (" << ptr->refCount << ")" << std::endl;
|
||||
if (--ptr->refCount == 0)
|
||||
{
|
||||
boost::unique_lock<boost::shared_mutex> lock(m_lock);
|
||||
if (m_doomed_count == m_capacity)
|
||||
{
|
||||
TPCollWrapper* x = m_doomed_first;
|
||||
remove_from_queue(x);
|
||||
UTIL_THROW_IF2(x->refCount || x == ptr, "TPC was doomed while still in use!");
|
||||
cache_t::iterator m = m_cache.find(ptr->key);
|
||||
if (m != m_cache.end() && m->second == ptr)
|
||||
{ // the cache could have been updated with a new pointer
|
||||
// for the same phrase already, so we need to check
|
||||
// if the pointer we cound is the one we want to get rid of,
|
||||
// hence the second check
|
||||
// boost::upgrade_to_unique_lock<boost::shared_mutex> xlock(lock);
|
||||
m_cache.erase(m);
|
||||
}
|
||||
|
||||
std::cerr << "Deleting " << x->key << " " << x->refCount << std::endl;
|
||||
|
||||
// delete x;
|
||||
}
|
||||
add_to_queue(const_cast<TPCollWrapper*>(ptr));
|
||||
}
|
||||
} // TPCollCache::release(...)
|
||||
|
||||
TPCollWrapper::
|
||||
TPCollWrapper(uint64_t key_, size_t revision_)
|
||||
: refCount(0), prev(NULL), next(NULL)
|
||||
, revision(revision_), key(key_)
|
||||
: revision(revision_), key(key_)
|
||||
{ }
|
||||
|
||||
TPCollWrapper::
|
||||
~TPCollWrapper()
|
||||
{
|
||||
UTIL_THROW_IF2(this->refCount, "TPCollWrapper refCount > 0!");
|
||||
assert(this->refCount == 0);
|
||||
}
|
||||
{ }
|
||||
|
||||
} // namespace
|
||||
|
@ -3,60 +3,44 @@
|
||||
#include <time.h>
|
||||
#include "moses/TargetPhraseCollection.h"
|
||||
#include <boost/atomic.hpp>
|
||||
|
||||
#include "mm/ug_typedefs.h"
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class TPCollCache;
|
||||
class TPCollWrapper;
|
||||
|
||||
class TPCollCache
|
||||
{
|
||||
public:
|
||||
typedef boost::unordered_map<uint64_t, SPTR<TPCollWrapper> > cache_t;
|
||||
private:
|
||||
uint32_t m_capacity; // capacity of cache
|
||||
cache_t m_cache; // maps from ids to items
|
||||
cache_t::iterator m_qfirst, m_qlast;
|
||||
mutable boost::shared_mutex m_lock;
|
||||
public:
|
||||
TPCollCache(size_t capacity=10000);
|
||||
|
||||
SPTR<TPCollWrapper>
|
||||
get(uint64_t key, size_t revision);
|
||||
|
||||
};
|
||||
|
||||
class TPCollWrapper
|
||||
// wrapper around TargetPhraseCollection with reference counting
|
||||
// and additional members for caching purposes
|
||||
class TPCollWrapper
|
||||
: public TargetPhraseCollection
|
||||
{
|
||||
friend class TPCollCache;
|
||||
friend class Mmsapt;
|
||||
mutable boost::atomic<uint32_t> refCount; // reference count
|
||||
public:
|
||||
TPCollWrapper* prev; // ... in queue of TPCollWrappers used recently
|
||||
TPCollWrapper* next; // ... in queue of TPCollWrappers used recently
|
||||
TPCollCache::cache_t::iterator prev, next;
|
||||
public:
|
||||
mutable boost::shared_mutex lock;
|
||||
size_t const revision; // rev. No. of the underlying corpus
|
||||
uint64_t const key; // phrase key
|
||||
#if defined(timespec) // timespec is better, but not available everywhere
|
||||
timespec tstamp; // last use
|
||||
#else
|
||||
timeval tstamp; // last use
|
||||
#endif
|
||||
TPCollWrapper(uint64_t const key, size_t const rev);
|
||||
~TPCollWrapper();
|
||||
};
|
||||
|
||||
class TPCollCache
|
||||
{
|
||||
typedef boost::unordered_map<uint64_t, TPCollWrapper*> cache_t;
|
||||
typedef std::vector<TPCollWrapper*> history_t;
|
||||
cache_t m_cache; // maps from phrase ids to target phrase collections
|
||||
// mutable history_t m_history; // heap of live items, least recently used one on top
|
||||
|
||||
mutable boost::shared_mutex m_lock; // locks m_cache
|
||||
|
||||
TPCollWrapper* m_doomed_first;
|
||||
TPCollWrapper* m_doomed_last;
|
||||
uint32_t m_doomed_count; // counter of doomed TPCs
|
||||
uint32_t m_capacity; // capacity of cache
|
||||
void add_to_queue(TPCollWrapper* x);
|
||||
void remove_from_queue(TPCollWrapper* x);
|
||||
public:
|
||||
TPCollCache(size_t capacity=10000);
|
||||
|
||||
TPCollWrapper*
|
||||
get(uint64_t key, size_t revision);
|
||||
|
||||
void
|
||||
release(TPCollWrapper const* tpc);
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -627,30 +627,32 @@ namespace Moses
|
||||
{
|
||||
InputPath &inputPath = **iter;
|
||||
const Phrase &phrase = inputPath.GetPhrase();
|
||||
const TargetPhraseCollection *targetPhrases
|
||||
TargetPhraseCollection::shared_ptr targetPhrases
|
||||
= this->GetTargetPhraseCollectionLEGACY(ttask,phrase);
|
||||
inputPath.SetTargetPhrases(*this, targetPhrases, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
TargetPhraseCollection const*
|
||||
Mmsapt::
|
||||
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
{
|
||||
UTIL_THROW2("Don't call me without the translation task.");
|
||||
}
|
||||
// TargetPhraseCollection::shared_ptr
|
||||
// Mmsapt::
|
||||
// GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
// {
|
||||
// UTIL_THROW2("Don't call me without the translation task.");
|
||||
// }
|
||||
|
||||
// This is not the most efficient way of phrase lookup!
|
||||
TargetPhraseCollection const*
|
||||
TargetPhraseCollection::shared_ptr
|
||||
Mmsapt::
|
||||
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const
|
||||
{
|
||||
boost::unique_lock<boost::shared_mutex> xlock(m_lock);
|
||||
SPTR<TPCollWrapper> ret;
|
||||
// boost::unique_lock<boost::shared_mutex> xlock(m_lock);
|
||||
|
||||
// map from Moses Phrase to internal id sequence
|
||||
vector<id_type> sphrase;
|
||||
fillIdSeq(src, m_ifactor, *(btfix->V1), sphrase);
|
||||
if (sphrase.size() == 0) return NULL;
|
||||
|
||||
if (sphrase.size() == 0) return ret;
|
||||
|
||||
// Reserve a local copy of the dynamic bitext in its current form. /btdyn/
|
||||
// is set to a new copy of the dynamic bitext every time a sentence pair
|
||||
// is added. /dyn/ keeps the old bitext around as long as we need it.
|
||||
@ -665,42 +667,42 @@ namespace Moses
|
||||
// lookup phrases in both bitexts
|
||||
TSA<Token>::tree_iterator mfix(btfix->I1.get(), &sphrase[0], sphrase.size());
|
||||
TSA<Token>::tree_iterator mdyn(dyn->I1.get());
|
||||
if (dyn->I1.get())
|
||||
if (dyn->I1.get()) // we have a dynamic bitext
|
||||
for (size_t i = 0; mdyn.size() == i && i < sphrase.size(); ++i)
|
||||
mdyn.extend(sphrase[i]);
|
||||
|
||||
if (mdyn.size() != sphrase.size() && mfix.size() != sphrase.size())
|
||||
return NULL; // phrase not found in either bitext
|
||||
return ret; // phrase not found in either bitext
|
||||
|
||||
// do we have cached results for this phrase?
|
||||
uint64_t phrasekey = (mfix.size() == sphrase.size()
|
||||
? (mfix.getPid()<<1) : (mdyn.getPid()<<1)+1);
|
||||
|
||||
// std::cerr << "Phrasekey is " << phrasekey << " at " << HERE << std::endl;
|
||||
? (mfix.getPid()<<1)
|
||||
: (mdyn.getPid()<<1)+1);
|
||||
|
||||
// get context-specific cache of items previously looked up
|
||||
SPTR<ContextScope> const& scope = ttask->GetScope();
|
||||
SPTR<TPCollCache> cache = scope->get<TPCollCache>(cache_key);
|
||||
if (!cache) cache = m_cache;
|
||||
TPCollWrapper* ret = cache->get(phrasekey, dyn->revision());
|
||||
// TO DO: we should revise the revision mechanism: we take the length
|
||||
// of the dynamic bitext (in sentences) at the time the PT entry
|
||||
// was stored as the time stamp. For each word in the
|
||||
if (!cache) cache = m_cache; // no context-specific cache, use global one
|
||||
|
||||
ret = cache->get(phrasekey, dyn->revision());
|
||||
// TO DO: we should revise the revision mechanism: we take the
|
||||
// length of the dynamic bitext (in sentences) at the time the PT
|
||||
// entry was stored as the time stamp. For each word in the
|
||||
// vocabulary, we also store its most recent occurrence in the
|
||||
// bitext. Only if the timestamp of each word in the phrase is
|
||||
// newer than the timestamp of the phrase itself we must update
|
||||
// the entry.
|
||||
|
||||
// std::cerr << "Phrasekey is " << ret->key << " at " << HERE << std::endl;
|
||||
std::cerr << ret << " with " << ret->refCount << " references at "
|
||||
<< HERE << std::endl;
|
||||
// std::cerr << ret << " with " << ret->refCount << " references at "
|
||||
// << HERE << std::endl;
|
||||
boost::upgrade_lock<boost::shared_mutex> rlock(ret->lock);
|
||||
if (ret->GetSize()) return ret;
|
||||
|
||||
// new TPC (not found or old one was not up to date)
|
||||
boost::upgrade_to_unique_lock<boost::shared_mutex> wlock(rlock);
|
||||
// maybe another thread did the work while we waited for the lock ?
|
||||
if (ret->GetSize()) return ret;
|
||||
// check again, another thread may have done the work already
|
||||
|
||||
// OK: pt entry NOT found or NOT up to date
|
||||
// lookup and expansion could be done in parallel threads,
|
||||
@ -718,12 +720,16 @@ namespace Moses
|
||||
else
|
||||
{
|
||||
BitextSampler<Token> s(btfix.get(), mfix, context->bias,
|
||||
m_min_sample_size, m_default_sample_size, m_sampling_method);
|
||||
m_min_sample_size,
|
||||
m_default_sample_size,
|
||||
m_sampling_method);
|
||||
s();
|
||||
sfix = s.stats();
|
||||
}
|
||||
}
|
||||
if (mdyn.size() == sphrase.size()) sdyn = dyn->lookup(ttask, mdyn);
|
||||
|
||||
if (mdyn.size() == sphrase.size())
|
||||
sdyn = dyn->lookup(ttask, mdyn);
|
||||
|
||||
vector<PhrasePair<Token> > ppfix,ppdyn;
|
||||
PhrasePair<Token>::SortByTargetIdSeq sort_by_tgt_id;
|
||||
@ -737,6 +743,7 @@ namespace Moses
|
||||
expand(mdyn, *dyn, *sdyn, ppdyn, m_bias_log);
|
||||
sort(ppdyn.begin(), ppdyn.end(),sort_by_tgt_id);
|
||||
}
|
||||
|
||||
// now we have two lists of Phrase Pairs, let's merge them
|
||||
PhrasePair<Token>::SortByTargetIdSeq sorter;
|
||||
size_t i = 0; size_t k = 0;
|
||||
@ -939,9 +946,10 @@ namespace Moses
|
||||
return mdyn.size() == myphrase.size();
|
||||
}
|
||||
|
||||
#if 0
|
||||
void
|
||||
Mmsapt
|
||||
::Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const
|
||||
::Release(ttasksptr const& ttask, TargetPhraseCollection::shared_ptr*& tpc) const
|
||||
{
|
||||
if (!tpc)
|
||||
{
|
||||
@ -957,6 +965,7 @@ namespace Moses
|
||||
if (cache) cache->release(static_cast<TPCollWrapper const*>(tpc));
|
||||
tpc = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool Mmsapt
|
||||
::ProvidesPrefixCheck() const { return true; }
|
||||
|
@ -179,7 +179,7 @@ namespace Moses
|
||||
uint64_t const pid1,
|
||||
sapt::pstats const& stats,
|
||||
sapt::Bitext<Token> const & bt,
|
||||
TargetPhraseCollection* tpcoll
|
||||
TargetPhraseCollection::shared_ptr tpcoll
|
||||
) const;
|
||||
|
||||
bool
|
||||
@ -187,14 +187,14 @@ namespace Moses
|
||||
(Phrase const& src,
|
||||
uint64_t const pid1a, sapt::pstats * statsa, sapt::Bitext<Token> const & bta,
|
||||
uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb,
|
||||
TargetPhraseCollection* tpcoll) const;
|
||||
TargetPhraseCollection::shared_ptr tpcoll) const;
|
||||
|
||||
bool
|
||||
combine_pstats
|
||||
(Phrase const& src,
|
||||
uint64_t const pid1a, sapt::pstats* statsa, sapt::Bitext<Token> const & bta,
|
||||
uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb,
|
||||
TargetPhraseCollection* tpcoll) const;
|
||||
TargetPhraseCollection::shared_ptr tpcoll) const;
|
||||
|
||||
void load_extra_data(std::string bname, bool locking);
|
||||
void load_bias(std::string bname);
|
||||
@ -209,15 +209,15 @@ namespace Moses
|
||||
std::string const& GetName() const;
|
||||
|
||||
#ifndef NO_MOSES
|
||||
TargetPhraseCollection const*
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const;
|
||||
|
||||
TargetPhraseCollection const*
|
||||
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
// TargetPhraseCollection::shared_ptr
|
||||
// GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
|
||||
void
|
||||
GetTargetPhraseCollectionBatch(ttasksptr const& ttask,
|
||||
const InputPathList &inputPathQueue) const;
|
||||
GetTargetPhraseCollectionBatch
|
||||
(ttasksptr const& ttask, InputPathList const& inputPathQueue) const;
|
||||
|
||||
//! Create a sentence-specific manager for SCFG rule lookup.
|
||||
ChartRuleLookupManager*
|
||||
@ -234,7 +234,8 @@ namespace Moses
|
||||
void setWeights(std::vector<float> const& w);
|
||||
|
||||
|
||||
void Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const;
|
||||
// void Release(ttasksptr const& ttask,
|
||||
// TargetPhraseCollection const*& tpc) const;
|
||||
// some consumer lets me know that *tpc isn't needed any more
|
||||
|
||||
|
||||
|
@ -80,7 +80,8 @@ int main(int argc, char* argv[])
|
||||
Phrase& p = *phrase;
|
||||
|
||||
cout << p << endl;
|
||||
TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(ttask,p);
|
||||
TargetPhraseCollection::shared_ptr trg
|
||||
= PT->GetTargetPhraseCollectionLEGACY(ttask,p);
|
||||
if (!trg) continue;
|
||||
vector<size_t> order(trg->GetSize());
|
||||
for (size_t i = 0; i < order.size(); ++i) order[i] = i;
|
||||
@ -118,7 +119,7 @@ int main(int argc, char* argv[])
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
PT->Release(ttask, trg);
|
||||
// PT->Release(ttask, trg);
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
|
@ -410,7 +410,7 @@ CreateTranslationOptionsForRange
|
||||
const DecodeStep &dstep = **d;
|
||||
|
||||
const PhraseDictionary &pdict = *dstep.GetPhraseDictionaryFeature();
|
||||
const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(pdict);
|
||||
TargetPhraseCollection::shared_ptr targetPhrases = inputPath.GetTargetPhrases(pdict);
|
||||
|
||||
static_cast<const Tstep&>(dstep).ProcessInitialTranslation
|
||||
(m_source, *oldPtoc, sPos, ePos, adhereTableLimit, inputPath, targetPhrases);
|
||||
@ -431,7 +431,7 @@ CreateTranslationOptionsForRange
|
||||
TranslationOption &inputPartialTranslOpt = **pto;
|
||||
if (const Tstep *tstep = dynamic_cast<const Tstep*>(dstep)) {
|
||||
const PhraseDictionary &pdict = *tstep->GetPhraseDictionaryFeature();
|
||||
const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(pdict);
|
||||
TargetPhraseCollection::shared_ptr targetPhrases = inputPath.GetTargetPhrases(pdict);
|
||||
tstep->Process(inputPartialTranslOpt, *dstep, *newPtoc,
|
||||
this, adhereTableLimit, targetPhrases);
|
||||
} else {
|
||||
|
@ -142,7 +142,8 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
|
||||
for (size_t i = 0; i < m_inputPathQueue.size(); ++i) {
|
||||
const InputPath &path = *m_inputPathQueue[i];
|
||||
|
||||
const TargetPhraseCollection *tpColl = path.GetTargetPhrases(phraseDictionary);
|
||||
TargetPhraseCollection::shared_ptr tpColl
|
||||
= path.GetTargetPhrases(phraseDictionary);
|
||||
const WordsRange &range = path.GetWordsRange();
|
||||
|
||||
if (tpColl && tpColl->GetSize()) {
|
||||
|
Loading…
Reference in New Issue
Block a user