mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
Life cycle of TargetPhraseCollection is now managed via shared pointers.
This commit is contained in:
parent
7a85126a92
commit
bdb0227ee9
@ -249,16 +249,12 @@ size_t PhraseNode::ReadChild(Word &wordFound, uint64_t &childFilePos, const char
|
|||||||
return memRead;
|
return memRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
const TargetPhraseCollection *PhraseNode::GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const
|
TargetPhraseCollection::shared_ptr
|
||||||
|
PhraseNode::
|
||||||
|
GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const
|
||||||
{
|
{
|
||||||
TargetPhraseCollection *ret = new TargetPhraseCollection();
|
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
|
||||||
|
if (m_value > 0) ret->ReadFromFile(tableLimit, m_value, onDiskWrapper);
|
||||||
if (m_value > 0)
|
|
||||||
ret->ReadFromFile(tableLimit, m_value, onDiskWrapper);
|
|
||||||
else {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,7 +92,10 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
const PhraseNode *GetChild(const Word &wordSought, OnDiskWrapper &onDiskWrapper) const;
|
const PhraseNode *GetChild(const Word &wordSought, OnDiskWrapper &onDiskWrapper) const;
|
||||||
const TargetPhraseCollection *GetTargetPhraseCollection(size_t tableLimit, OnDiskWrapper &onDiskWrapper) const;
|
|
||||||
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetTargetPhraseCollection(size_t tableLimit,
|
||||||
|
OnDiskWrapper &onDiskWrapper) const;
|
||||||
|
|
||||||
void AddCounts(const std::vector<float> &counts) {
|
void AddCounts(const std::vector<float> &counts) {
|
||||||
m_counts = counts;
|
m_counts = counts;
|
||||||
|
@ -114,23 +114,22 @@ void TargetPhraseCollection::Save(OnDiskWrapper &onDiskWrapper)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Moses::TargetPhraseCollection *TargetPhraseCollection::ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
|
Moses::TargetPhraseCollection::shared_ptr TargetPhraseCollection::ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
|
||||||
, const std::vector<Moses::FactorType> &outputFactors
|
, const std::vector<Moses::FactorType> &outputFactors
|
||||||
, const Moses::PhraseDictionary &phraseDict
|
, const Moses::PhraseDictionary &phraseDict
|
||||||
, const std::vector<float> &weightT
|
, const std::vector<float> &weightT
|
||||||
, Vocab &vocab
|
, Vocab &vocab
|
||||||
, bool isSyntax) const
|
, bool isSyntax) const
|
||||||
{
|
{
|
||||||
Moses::TargetPhraseCollection *ret = new Moses::TargetPhraseCollection();
|
Moses::TargetPhraseCollection::shared_ptr ret;
|
||||||
|
ret.reset(new Moses::TargetPhraseCollection);
|
||||||
|
|
||||||
CollType::const_iterator iter;
|
CollType::const_iterator iter;
|
||||||
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) {
|
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) {
|
||||||
const TargetPhrase &tp = **iter;
|
const TargetPhrase &tp = **iter;
|
||||||
Moses::TargetPhrase *mosesPhrase = tp.ConvertToMoses(inputFactors, outputFactors
|
Moses::TargetPhrase *mosesPhrase
|
||||||
, vocab
|
= tp.ConvertToMoses(inputFactors, outputFactors, vocab,
|
||||||
, phraseDict
|
phraseDict, weightT, isSyntax);
|
||||||
, weightT
|
|
||||||
, isSyntax);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// debugging output
|
// debugging output
|
||||||
|
@ -21,6 +21,8 @@
|
|||||||
|
|
||||||
#include "TargetPhrase.h"
|
#include "TargetPhrase.h"
|
||||||
#include "Vocab.h"
|
#include "Vocab.h"
|
||||||
|
#include "moses/TargetPhraseCollection.h"
|
||||||
|
#include <boost/shared_ptr.hpp>
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
@ -50,6 +52,9 @@ protected:
|
|||||||
std::string m_debugStr;
|
std::string m_debugStr;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
typedef boost::shared_ptr<TargetPhraseCollection const> shared_const_ptr;
|
||||||
|
typedef boost::shared_ptr<TargetPhraseCollection> shared_ptr;
|
||||||
|
|
||||||
static size_t s_sortScoreInd;
|
static size_t s_sortScoreInd;
|
||||||
|
|
||||||
TargetPhraseCollection();
|
TargetPhraseCollection();
|
||||||
@ -69,7 +74,7 @@ public:
|
|||||||
|
|
||||||
uint64_t GetFilePos() const;
|
uint64_t GetFilePos() const;
|
||||||
|
|
||||||
Moses::TargetPhraseCollection *ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
|
Moses::TargetPhraseCollection::shared_ptr ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
|
||||||
, const std::vector<Moses::FactorType> &outputFactors
|
, const std::vector<Moses::FactorType> &outputFactors
|
||||||
, const Moses::PhraseDictionary &phraseDict
|
, const Moses::PhraseDictionary &phraseDict
|
||||||
, const std::vector<float> &weightT
|
, const std::vector<float> &weightT
|
||||||
|
@ -56,7 +56,7 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
if (node) {
|
if (node) {
|
||||||
// source phrase points to a bunch of rules
|
// source phrase points to a bunch of rules
|
||||||
const TargetPhraseCollection *coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper);
|
TargetPhraseCollection::shared_ptr coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper);
|
||||||
string str = coll->GetDebugStr();
|
string str = coll->GetDebugStr();
|
||||||
cout << "Found " << coll->GetSize() << endl;
|
cout << "Found " << coll->GetSize() << endl;
|
||||||
|
|
||||||
|
@ -116,7 +116,7 @@ typedef
|
|||||||
boost::unordered_set<TargetPhrase*,PhrasePtrHasher,PhrasePtrComparator> PhraseSet;
|
boost::unordered_set<TargetPhrase*,PhrasePtrHasher,PhrasePtrComparator> PhraseSet;
|
||||||
|
|
||||||
|
|
||||||
const TargetPhraseCollection*
|
TargetPhraseCollection::shared_ptr
|
||||||
PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const
|
PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -125,7 +125,7 @@ PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const
|
|||||||
PhraseSet allPhrases;
|
PhraseSet allPhrases;
|
||||||
vector<PhraseSet> phrasesByTable(m_dictionaries.size());
|
vector<PhraseSet> phrasesByTable(m_dictionaries.size());
|
||||||
for (size_t i = 0; i < m_dictionaries.size(); ++i) {
|
for (size_t i = 0; i < m_dictionaries.size(); ++i) {
|
||||||
const TargetPhraseCollection* phrases = m_dictionaries[i]->GetTargetPhraseCollection(src);
|
TargetPhraseCollection::shared_ptr phrases = m_dictionaries[i]->GetTargetPhraseCollection(src);
|
||||||
if (phrases) {
|
if (phrases) {
|
||||||
for (TargetPhraseCollection::const_iterator j = phrases->begin();
|
for (TargetPhraseCollection::const_iterator j = phrases->begin();
|
||||||
j != phrases->end(); ++j) {
|
j != phrases->end(); ++j) {
|
||||||
|
@ -52,7 +52,7 @@ public:
|
|||||||
, const LMList &languageModels
|
, const LMList &languageModels
|
||||||
, float weightWP);
|
, float weightWP);
|
||||||
|
|
||||||
virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const;
|
virtual TargetPhraseCollection::shared_ptr GetTargetPhraseCollection(const Phrase& src) const;
|
||||||
virtual void InitializeForInput(ttasksptr const& ttask);
|
virtual void InitializeForInput(ttasksptr const& ttask);
|
||||||
virtual ChartRuleLookupManager *CreateRuleLookupManager(
|
virtual ChartRuleLookupManager *CreateRuleLookupManager(
|
||||||
const InputType &,
|
const InputType &,
|
||||||
@ -65,7 +65,7 @@ private:
|
|||||||
typedef boost::shared_ptr<PhraseDictionaryTreeAdaptor> DictionaryHandle;
|
typedef boost::shared_ptr<PhraseDictionaryTreeAdaptor> DictionaryHandle;
|
||||||
std::vector<DictionaryHandle> m_dictionaries;
|
std::vector<DictionaryHandle> m_dictionaries;
|
||||||
std::vector<std::vector<float> > m_weights; //feature x table
|
std::vector<std::vector<float> > m_weights; //feature x table
|
||||||
mutable TargetPhraseCollection* m_targetPhrases;
|
mutable TargetPhraseCollection::shared_ptr m_targetPhrases;
|
||||||
std::vector<float> m_weightT;
|
std::vector<float> m_weightT;
|
||||||
size_t m_tableLimit;
|
size_t m_tableLimit;
|
||||||
const LMList* m_languageModels;
|
const LMList* m_languageModels;
|
||||||
|
@ -44,7 +44,7 @@ ChartParserUnknown
|
|||||||
ChartParserUnknown::~ChartParserUnknown()
|
ChartParserUnknown::~ChartParserUnknown()
|
||||||
{
|
{
|
||||||
RemoveAllInColl(m_unksrcs);
|
RemoveAllInColl(m_unksrcs);
|
||||||
RemoveAllInColl(m_cacheTargetPhraseCollection);
|
// RemoveAllInColl(m_cacheTargetPhraseCollection);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range, ChartParserCallback &to)
|
void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range, ChartParserCallback &to)
|
||||||
|
@ -27,7 +27,7 @@
|
|||||||
#include "WordsRange.h"
|
#include "WordsRange.h"
|
||||||
#include "StackVec.h"
|
#include "StackVec.h"
|
||||||
#include "InputPath.h"
|
#include "InputPath.h"
|
||||||
|
#include "TargetPhraseCollection.h"
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -38,7 +38,7 @@ class Sentence;
|
|||||||
class ChartCellCollectionBase;
|
class ChartCellCollectionBase;
|
||||||
class Word;
|
class Word;
|
||||||
class Phrase;
|
class Phrase;
|
||||||
class TargetPhraseCollection;
|
// class TargetPhraseCollection;
|
||||||
class DecodeGraph;
|
class DecodeGraph;
|
||||||
|
|
||||||
class ChartParserUnknown
|
class ChartParserUnknown
|
||||||
@ -56,7 +56,7 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<Phrase*> m_unksrcs;
|
std::vector<Phrase*> m_unksrcs;
|
||||||
std::list<TargetPhraseCollection*> m_cacheTargetPhraseCollection;
|
std::list<TargetPhraseCollection::shared_ptr> m_cacheTargetPhraseCollection;
|
||||||
};
|
};
|
||||||
|
|
||||||
class ChartParser
|
class ChartParser
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include "StackVec.h"
|
#include "StackVec.h"
|
||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
|
#include "TargetPhraseCollection.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
@ -23,7 +24,7 @@ public:
|
|||||||
|
|
||||||
virtual bool Empty() const = 0;
|
virtual bool Empty() const = 0;
|
||||||
|
|
||||||
virtual void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range) = 0;
|
virtual void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const WordsRange &range) = 0;
|
||||||
|
|
||||||
virtual void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath) = 0;
|
virtual void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath) = 0;
|
||||||
|
|
||||||
|
@ -115,9 +115,13 @@ void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChartTranslationOptionList::AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range)
|
void
|
||||||
|
ChartTranslationOptionList::
|
||||||
|
AddPhraseOOV(TargetPhrase &phrase,
|
||||||
|
std::list<TargetPhraseCollection::shared_ptr > &waste_memory,
|
||||||
|
const WordsRange &range)
|
||||||
{
|
{
|
||||||
TargetPhraseCollection *tpc = new TargetPhraseCollection();
|
TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
|
||||||
tpc->Add(&phrase);
|
tpc->Add(&phrase);
|
||||||
waste_memory.push_back(tpc);
|
waste_memory.push_back(tpc);
|
||||||
StackVec empty;
|
StackVec empty;
|
||||||
|
@ -55,7 +55,7 @@ public:
|
|||||||
void Add(const TargetPhraseCollection &, const StackVec &,
|
void Add(const TargetPhraseCollection &, const StackVec &,
|
||||||
const WordsRange &);
|
const WordsRange &);
|
||||||
|
|
||||||
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range);
|
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const WordsRange &range);
|
||||||
|
|
||||||
bool Empty() const {
|
bool Empty() const {
|
||||||
return m_size == 0;
|
return m_size == 0;
|
||||||
|
@ -49,7 +49,7 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
|
|||||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||||
, TranslationOptionCollection *toc
|
, TranslationOptionCollection *toc
|
||||||
, bool adhereTableLimit
|
, bool adhereTableLimit
|
||||||
, const TargetPhraseCollection *phraseColl) const
|
, TargetPhraseCollection::shared_ptr phraseColl) const
|
||||||
{
|
{
|
||||||
if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) {
|
if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) {
|
||||||
// word deletion
|
// word deletion
|
||||||
@ -105,7 +105,7 @@ void DecodeStepTranslation::ProcessInitialTranslation(
|
|||||||
,PartialTranslOptColl &outputPartialTranslOptColl
|
,PartialTranslOptColl &outputPartialTranslOptColl
|
||||||
, size_t startPos, size_t endPos, bool adhereTableLimit
|
, size_t startPos, size_t endPos, bool adhereTableLimit
|
||||||
, const InputPath &inputPath
|
, const InputPath &inputPath
|
||||||
, const TargetPhraseCollection *phraseColl) const
|
, TargetPhraseCollection::shared_ptr phraseColl) const
|
||||||
{
|
{
|
||||||
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
|
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
|
||||||
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
||||||
@ -147,7 +147,8 @@ void DecodeStepTranslation::ProcessInitialTranslationLEGACY(
|
|||||||
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
||||||
|
|
||||||
const WordsRange wordsRange(startPos, endPos);
|
const WordsRange wordsRange(startPos, endPos);
|
||||||
const TargetPhraseCollectionWithSourcePhrase *phraseColl = phraseDictionary->GetTargetPhraseCollectionLEGACY(source,wordsRange);
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl
|
||||||
|
= phraseDictionary->GetTargetPhraseCollectionLEGACY(source,wordsRange);
|
||||||
|
|
||||||
if (phraseColl != NULL) {
|
if (phraseColl != NULL) {
|
||||||
IFVERBOSE(3) {
|
IFVERBOSE(3) {
|
||||||
@ -237,8 +238,8 @@ ProcessLEGACY(TranslationOption const& in,
|
|||||||
size_t const currSize = inPhrase.GetSize();
|
size_t const currSize = inPhrase.GetSize();
|
||||||
size_t const tableLimit = pdict->GetTableLimit();
|
size_t const tableLimit = pdict->GetTableLimit();
|
||||||
|
|
||||||
TargetPhraseCollectionWithSourcePhrase const* phraseColl;
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl
|
||||||
phraseColl = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
|
= pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
|
||||||
|
|
||||||
if (phraseColl != NULL) {
|
if (phraseColl != NULL) {
|
||||||
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
|
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
|
||||||
|
@ -48,7 +48,7 @@ public:
|
|||||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||||
, TranslationOptionCollection *toc
|
, TranslationOptionCollection *toc
|
||||||
, bool adhereTableLimit
|
, bool adhereTableLimit
|
||||||
, const TargetPhraseCollection *phraseColl) const;
|
, TargetPhraseCollection::shared_ptr phraseColl) const;
|
||||||
|
|
||||||
|
|
||||||
/*! initialize list of partial translation options by applying the first translation step
|
/*! initialize list of partial translation options by applying the first translation step
|
||||||
@ -58,7 +58,7 @@ public:
|
|||||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||||
, size_t startPos, size_t endPos, bool adhereTableLimit
|
, size_t startPos, size_t endPos, bool adhereTableLimit
|
||||||
, const InputPath &inputPath
|
, const InputPath &inputPath
|
||||||
, const TargetPhraseCollection *phraseColl) const;
|
, TargetPhraseCollection::shared_ptr phraseColl) const;
|
||||||
|
|
||||||
// legacy
|
// legacy
|
||||||
void ProcessInitialTranslationLEGACY(const InputType &source
|
void ProcessInitialTranslationLEGACY(const InputType &source
|
||||||
|
@ -83,7 +83,7 @@ public:
|
|||||||
|
|
||||||
void Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &ignored);
|
void Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &ignored);
|
||||||
|
|
||||||
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range);
|
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const WordsRange &range);
|
||||||
|
|
||||||
float GetBestScore(const ChartCellLabel *chartCell) const;
|
float GetBestScore(const ChartCellLabel *chartCell) const;
|
||||||
|
|
||||||
@ -160,7 +160,7 @@ template <class Model> void Fill<Model>::Add(const TargetPhraseCollection &targe
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &, const WordsRange &range)
|
template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &, const WordsRange &range)
|
||||||
{
|
{
|
||||||
std::vector<lm::WordIndex> words;
|
std::vector<lm::WordIndex> words;
|
||||||
UTIL_THROW_IF2(phrase.GetSize() > 1,
|
UTIL_THROW_IF2(phrase.GetSize() > 1,
|
||||||
|
@ -39,34 +39,40 @@ InputPath::~InputPath()
|
|||||||
|
|
||||||
// std::cerr << "Deconstructing InputPath" << std::endl;
|
// std::cerr << "Deconstructing InputPath" << std::endl;
|
||||||
|
|
||||||
// Since there is no way for the Phrase Dictionaries to tell in
|
|
||||||
// which (sentence) context phrases were looked up, we tell them
|
// // NOT NEEDED ANY MORE SINCE THE SWITCH TO SHARED POINTERS
|
||||||
// now that the phrase isn't needed any more by this inputPath
|
// // Since there is no way for the Phrase Dictionaries to tell in
|
||||||
typedef std::pair<const TargetPhraseCollection*, const void* > entry;
|
// // which (sentence) context phrases were looked up, we tell them
|
||||||
std::map<const PhraseDictionary*, entry>::iterator iter;
|
// // now that the phrase isn't needed any more by this inputPath
|
||||||
ttasksptr theTask = this->ttask.lock();
|
// typedef std::pair<boost::shared_ptr<TargetPhraseCollection>, const void* > entry;
|
||||||
for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter)
|
// std::map<const PhraseDictionary*, entry>::iterator iter;
|
||||||
{
|
// ttasksptr theTask = this->ttask.lock();
|
||||||
// std::cerr << iter->second.first << " decommissioned." << std::endl;
|
// for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter)
|
||||||
iter->first->Release(theTask, iter->second.first);
|
// {
|
||||||
}
|
// // std::cerr << iter->second.first << " decommissioned." << std::endl;
|
||||||
|
// iter->first->Release(theTask, iter->second.first);
|
||||||
|
// }
|
||||||
|
|
||||||
delete m_inputScore;
|
delete m_inputScore;
|
||||||
}
|
}
|
||||||
|
|
||||||
const TargetPhraseCollection *InputPath::GetTargetPhrases(const PhraseDictionary &phraseDictionary) const
|
TargetPhraseCollection::shared_ptr
|
||||||
|
InputPath::
|
||||||
|
GetTargetPhrases(const PhraseDictionary &phraseDictionary) const
|
||||||
{
|
{
|
||||||
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
|
TargetPhrases::const_iterator iter;
|
||||||
iter = m_targetPhrases.find(&phraseDictionary);
|
iter = m_targetPhrases.find(&phraseDictionary);
|
||||||
if (iter == m_targetPhrases.end()) {
|
if (iter == m_targetPhrases.end()) {
|
||||||
return NULL;
|
return TargetPhraseCollection::shared_ptr();
|
||||||
}
|
}
|
||||||
return iter->second.first;
|
return iter->second.first;
|
||||||
}
|
}
|
||||||
|
|
||||||
const void *InputPath::GetPtNode(const PhraseDictionary &phraseDictionary) const
|
const void*
|
||||||
|
InputPath::
|
||||||
|
GetPtNode(const PhraseDictionary &phraseDictionary) const
|
||||||
{
|
{
|
||||||
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
|
TargetPhrases::const_iterator iter;
|
||||||
iter = m_targetPhrases.find(&phraseDictionary);
|
iter = m_targetPhrases.find(&phraseDictionary);
|
||||||
if (iter == m_targetPhrases.end()) {
|
if (iter == m_targetPhrases.end()) {
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -74,11 +80,14 @@ const void *InputPath::GetPtNode(const PhraseDictionary &phraseDictionary) const
|
|||||||
return iter->second.second;
|
return iter->second.second;
|
||||||
}
|
}
|
||||||
|
|
||||||
void InputPath::SetTargetPhrases(const PhraseDictionary &phraseDictionary
|
void
|
||||||
, const TargetPhraseCollection *targetPhrases
|
InputPath::
|
||||||
, const void *ptNode)
|
SetTargetPhrases(const PhraseDictionary &phraseDictionary,
|
||||||
|
TargetPhraseCollection::shared_ptr const& targetPhrases,
|
||||||
|
const void *ptNode)
|
||||||
{
|
{
|
||||||
std::pair<const TargetPhraseCollection*, const void*> value(targetPhrases, ptNode);
|
std::pair<TargetPhraseCollection::shared_ptr, const void*>
|
||||||
|
value(targetPhrases, ptNode);
|
||||||
m_targetPhrases[&phraseDictionary] = value;
|
m_targetPhrases[&phraseDictionary] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,10 +102,10 @@ const Word &InputPath::GetLastWord() const
|
|||||||
size_t InputPath::GetTotalRuleSize() const
|
size_t InputPath::GetTotalRuleSize() const
|
||||||
{
|
{
|
||||||
size_t ret = 0;
|
size_t ret = 0;
|
||||||
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
|
TargetPhrases::const_iterator iter;
|
||||||
for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter) {
|
for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter) {
|
||||||
// const PhraseDictionary *pt = iter->first;
|
// const PhraseDictionary *pt = iter->first;
|
||||||
const TargetPhraseCollection *tpColl = iter->second.first;
|
TargetPhraseCollection::shared_ptr tpColl = iter->second.first;
|
||||||
|
|
||||||
if (tpColl) {
|
if (tpColl) {
|
||||||
ret += tpColl->GetSize();
|
ret += tpColl->GetSize();
|
||||||
@ -110,10 +119,10 @@ std::ostream& operator<<(std::ostream& out, const InputPath& obj)
|
|||||||
{
|
{
|
||||||
out << &obj << " " << obj.GetWordsRange() << " " << obj.GetPrevPath() << " " << obj.GetPhrase();
|
out << &obj << " " << obj.GetWordsRange() << " " << obj.GetPrevPath() << " " << obj.GetPhrase();
|
||||||
|
|
||||||
std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> >::const_iterator iter;
|
InputPath::TargetPhrases::const_iterator iter;
|
||||||
for (iter = obj.m_targetPhrases.begin(); iter != obj.m_targetPhrases.end(); ++iter) {
|
for (iter = obj.m_targetPhrases.begin(); iter != obj.m_targetPhrases.end(); ++iter) {
|
||||||
const PhraseDictionary *pt = iter->first;
|
const PhraseDictionary *pt = iter->first;
|
||||||
const TargetPhraseCollection *tpColl = iter->second.first;
|
boost::shared_ptr<TargetPhraseCollection const> tpColl = iter->second.first;
|
||||||
|
|
||||||
out << pt << "=";
|
out << pt << "=";
|
||||||
if (tpColl) {
|
if (tpColl) {
|
||||||
|
@ -8,12 +8,12 @@
|
|||||||
#include "WordsRange.h"
|
#include "WordsRange.h"
|
||||||
#include "NonTerminal.h"
|
#include "NonTerminal.h"
|
||||||
#include "moses/FactorCollection.h"
|
#include "moses/FactorCollection.h"
|
||||||
|
#include <boost/shared_ptr.hpp>
|
||||||
|
#include "TargetPhraseCollection.h"
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
|
|
||||||
class PhraseDictionary;
|
class PhraseDictionary;
|
||||||
class TargetPhraseCollection;
|
|
||||||
class ScoreComponentCollection;
|
class ScoreComponentCollection;
|
||||||
class TargetPhrase;
|
class TargetPhrase;
|
||||||
class InputPath;
|
class InputPath;
|
||||||
@ -32,7 +32,12 @@ class InputPath
|
|||||||
friend std::ostream& operator<<(std::ostream& out, const InputPath &obj);
|
friend std::ostream& operator<<(std::ostream& out, const InputPath &obj);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef std::map<const PhraseDictionary*, std::pair<const TargetPhraseCollection*, const void*> > TargetPhrases;
|
|
||||||
|
typedef std::pair<TargetPhraseCollection::shared_ptr, const void*>
|
||||||
|
TPCollStoreEntry;
|
||||||
|
|
||||||
|
typedef std::map<const PhraseDictionary*, TPCollStoreEntry>
|
||||||
|
TargetPhrases;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ttaskwptr const ttask;
|
ttaskwptr const ttask;
|
||||||
@ -96,10 +101,14 @@ public:
|
|||||||
m_nextNode = nextNode;
|
m_nextNode = nextNode;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetTargetPhrases(const PhraseDictionary &phraseDictionary
|
void
|
||||||
, const TargetPhraseCollection *targetPhrases
|
SetTargetPhrases(const PhraseDictionary &phraseDictionary,
|
||||||
, const void *ptNode);
|
TargetPhraseCollection::shared_ptr const& targetPhrases,
|
||||||
const TargetPhraseCollection *GetTargetPhrases(const PhraseDictionary &phraseDictionary) const;
|
const void *ptNode);
|
||||||
|
|
||||||
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetTargetPhrases(const PhraseDictionary &phraseDictionary) const;
|
||||||
|
|
||||||
const TargetPhrases &GetTargetPhrases() const {
|
const TargetPhrases &GetTargetPhrases() const {
|
||||||
return m_targetPhrases;
|
return m_targetPhrases;
|
||||||
}
|
}
|
||||||
|
@ -63,27 +63,29 @@ void PDTAimp::CleanUp()
|
|||||||
{
|
{
|
||||||
assert(m_dict);
|
assert(m_dict);
|
||||||
m_dict->FreeMemory();
|
m_dict->FreeMemory();
|
||||||
for(size_t i=0; i<m_tgtColls.size(); ++i) delete m_tgtColls[i];
|
// for(size_t i=0; i<m_tgtColls.size(); ++i) m_tgtColls[i].reset();
|
||||||
m_tgtColls.clear();
|
m_tgtColls.clear();
|
||||||
m_cache.clear();
|
m_cache.clear();
|
||||||
m_rangeCache.clear();
|
m_rangeCache.clear();
|
||||||
uniqSrcPhr.clear();
|
uniqSrcPhr.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollectionWithSourcePhrase const*
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||||
PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
|
PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
|
||||||
{
|
{
|
||||||
|
|
||||||
assert(m_dict);
|
assert(m_dict);
|
||||||
if(src.GetSize()==0) return 0;
|
|
||||||
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr ret;
|
||||||
|
if(src.GetSize()==0) return ret;
|
||||||
|
|
||||||
std::pair<MapSrc2Tgt::iterator,bool> piter;
|
std::pair<MapSrc2Tgt::iterator,bool> piter;
|
||||||
if(useCache) {
|
if(useCache) {
|
||||||
piter=m_cache.insert(std::make_pair(src,static_cast<TargetPhraseCollectionWithSourcePhrase const*>(0)));
|
piter=m_cache.insert(std::make_pair(src, ret));
|
||||||
if(!piter.second) return piter.first->second;
|
if(!piter.second) return piter.first->second;
|
||||||
} else if (m_cache.size()) {
|
} else if (m_cache.size()) {
|
||||||
MapSrc2Tgt::const_iterator i=m_cache.find(src);
|
MapSrc2Tgt::const_iterator i=m_cache.find(src);
|
||||||
return (i!=m_cache.end() ? i->second : 0);
|
return (i!=m_cache.end() ? i->second : ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> srcString(src.GetSize());
|
std::vector<std::string> srcString(src.GetSize());
|
||||||
@ -97,7 +99,7 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
|
|||||||
std::vector<std::string> wacands;
|
std::vector<std::string> wacands;
|
||||||
m_dict->GetTargetCandidates(srcString,cands,wacands);
|
m_dict->GetTargetCandidates(srcString,cands,wacands);
|
||||||
if(cands.empty()) {
|
if(cands.empty()) {
|
||||||
return 0;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
//TODO: Multiple models broken here
|
//TODO: Multiple models broken here
|
||||||
@ -140,16 +142,14 @@ PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
|
|||||||
sourcePhrases.push_back(src);
|
sourcePhrases.push_back(src);
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollectionWithSourcePhrase *rv;
|
ret = PruneTargetCandidates(tCands,costs, sourcePhrases);
|
||||||
rv=PruneTargetCandidates(tCands,costs, sourcePhrases);
|
if(ret->IsEmpty()) {
|
||||||
if(rv->IsEmpty()) {
|
ret.reset();
|
||||||
delete rv;
|
|
||||||
return 0;
|
|
||||||
} else {
|
} else {
|
||||||
if(useCache) piter.first->second=rv;
|
if(useCache) piter.first->second = ret;
|
||||||
m_tgtColls.push_back(rv);
|
m_tgtColls.push_back(ret);
|
||||||
return rv;
|
|
||||||
}
|
}
|
||||||
|
return ret;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -352,7 +352,8 @@ void PDTAimp::CacheSource(ConfusionNet const& src)
|
|||||||
pathExplored[len]+=exploredPaths[len];
|
pathExplored[len]+=exploredPaths[len];
|
||||||
|
|
||||||
|
|
||||||
m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
|
// m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
|
||||||
|
m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize()));
|
||||||
|
|
||||||
for(std::map<Range,E2Costs>::const_iterator i=cov2cand.begin(); i!=cov2cand.end(); ++i) {
|
for(std::map<Range,E2Costs>::const_iterator i=cov2cand.begin(); i!=cov2cand.end(); ++i) {
|
||||||
assert(i->first.first<m_rangeCache.size());
|
assert(i->first.first<m_rangeCache.size());
|
||||||
@ -386,10 +387,11 @@ void PDTAimp::CacheSource(ConfusionNet const& src)
|
|||||||
//std::cerr << i->first.first << "-" << i->first.second << ": " << targetPhrase << std::endl;
|
//std::cerr << i->first.first << "-" << i->first.second << ": " << targetPhrase << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollectionWithSourcePhrase *rv=PruneTargetCandidates(tCands, costs, sourcePhrases);
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||||
|
rv = PruneTargetCandidates(tCands, costs, sourcePhrases);
|
||||||
|
|
||||||
if(rv->IsEmpty())
|
if(rv->IsEmpty())
|
||||||
delete rv;
|
rv.reset();
|
||||||
else {
|
else {
|
||||||
m_rangeCache[i->first.first][i->first.second-1]=rv;
|
m_rangeCache[i->first.first][i->first.second-1]=rv;
|
||||||
m_tgtColls.push_back(rv);
|
m_tgtColls.push_back(rv);
|
||||||
@ -428,7 +430,8 @@ void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
|
|||||||
targetPhrase.EvaluateInIsolation(*srcPtr, m_obj->GetFeaturesToApply());
|
targetPhrase.EvaluateInIsolation(*srcPtr, m_obj->GetFeaturesToApply());
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||||
|
PDTAimp::PruneTargetCandidates
|
||||||
(const std::vector<TargetPhrase> & tCands,
|
(const std::vector<TargetPhrase> & tCands,
|
||||||
std::vector<std::pair<float,size_t> >& costs,
|
std::vector<std::pair<float,size_t> >& costs,
|
||||||
const std::vector<Phrase> &sourcePhrases) const
|
const std::vector<Phrase> &sourcePhrases) const
|
||||||
@ -437,7 +440,8 @@ TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates
|
|||||||
UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(),
|
UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(),
|
||||||
"Number of target phrases must equal number of source phrases");
|
"Number of target phrases must equal number of source phrases");
|
||||||
|
|
||||||
TargetPhraseCollectionWithSourcePhrase *rv=new TargetPhraseCollectionWithSourcePhrase;
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr rv;
|
||||||
|
rv.reset(new TargetPhraseCollectionWithSourcePhrase);
|
||||||
|
|
||||||
|
|
||||||
// set limit to tableLimit or actual size, whatever is smaller
|
// set limit to tableLimit or actual size, whatever is smaller
|
||||||
|
@ -44,10 +44,10 @@ public:
|
|||||||
std::vector<FactorType> m_input,m_output;
|
std::vector<FactorType> m_input,m_output;
|
||||||
PhraseDictionaryTree *m_dict;
|
PhraseDictionaryTree *m_dict;
|
||||||
const InputFeature *m_inputFeature;
|
const InputFeature *m_inputFeature;
|
||||||
typedef std::vector<TargetPhraseCollectionWithSourcePhrase const*> vTPC;
|
typedef std::vector<TargetPhraseCollectionWithSourcePhrase::shared_ptr> vTPC;
|
||||||
mutable vTPC m_tgtColls;
|
mutable vTPC m_tgtColls;
|
||||||
|
|
||||||
typedef std::map<Phrase,TargetPhraseCollectionWithSourcePhrase const*> MapSrc2Tgt;
|
typedef std::map<Phrase,TargetPhraseCollectionWithSourcePhrase::shared_ptr> MapSrc2Tgt;
|
||||||
mutable MapSrc2Tgt m_cache;
|
mutable MapSrc2Tgt m_cache;
|
||||||
PhraseDictionaryTreeAdaptor *m_obj;
|
PhraseDictionaryTreeAdaptor *m_obj;
|
||||||
int useCache;
|
int useCache;
|
||||||
@ -69,7 +69,7 @@ public:
|
|||||||
|
|
||||||
void CleanUp();
|
void CleanUp();
|
||||||
|
|
||||||
TargetPhraseCollectionWithSourcePhrase const*
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||||
GetTargetPhraseCollection(Phrase const &src) const;
|
GetTargetPhraseCollection(Phrase const &src) const;
|
||||||
|
|
||||||
void Create(const std::vector<FactorType> &input
|
void Create(const std::vector<FactorType> &input
|
||||||
@ -121,7 +121,7 @@ public:
|
|||||||
const std::string *alignmentString,
|
const std::string *alignmentString,
|
||||||
Phrase const* srcPtr=0) const;
|
Phrase const* srcPtr=0) const;
|
||||||
|
|
||||||
TargetPhraseCollectionWithSourcePhrase* PruneTargetCandidates
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr PruneTargetCandidates
|
||||||
(const std::vector<TargetPhrase> & tCands,
|
(const std::vector<TargetPhrase> & tCands,
|
||||||
std::vector<std::pair<float,size_t> >& costs,
|
std::vector<std::pair<float,size_t> >& costs,
|
||||||
const std::vector<Phrase> &sourcePhrases) const;
|
const std::vector<Phrase> &sourcePhrases) const;
|
||||||
|
@ -28,9 +28,9 @@ void GlueRuleSynthesizer::SynthesizeRule(const Forest::Hyperedge &e)
|
|||||||
HyperPath source;
|
HyperPath source;
|
||||||
SynthesizeHyperPath(e, source);
|
SynthesizeHyperPath(e, source);
|
||||||
TargetPhrase *tp = SynthesizeTargetPhrase(e);
|
TargetPhrase *tp = SynthesizeTargetPhrase(e);
|
||||||
TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(m_hyperTree,
|
TargetPhraseCollection::shared_ptr tpc
|
||||||
source);
|
= GetOrCreateTargetPhraseCollection(m_hyperTree, source);
|
||||||
tpc.Add(tp);
|
tpc->Add(tp);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GlueRuleSynthesizer::SynthesizeHyperPath(const Forest::Hyperedge &e,
|
void GlueRuleSynthesizer::SynthesizeHyperPath(const Forest::Hyperedge &e,
|
||||||
|
@ -14,7 +14,7 @@ void HyperTree::Node::Prune(std::size_t tableLimit)
|
|||||||
p->second.Prune(tableLimit);
|
p->second.Prune(tableLimit);
|
||||||
}
|
}
|
||||||
// Prune TargetPhraseCollection at this node.
|
// Prune TargetPhraseCollection at this node.
|
||||||
m_targetPhraseCollection.Prune(true, tableLimit);
|
m_targetPhraseCollection->Prune(true, tableLimit);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HyperTree::Node::Sort(std::size_t tableLimit)
|
void HyperTree::Node::Sort(std::size_t tableLimit)
|
||||||
@ -24,7 +24,7 @@ void HyperTree::Node::Sort(std::size_t tableLimit)
|
|||||||
p->second.Sort(tableLimit);
|
p->second.Sort(tableLimit);
|
||||||
}
|
}
|
||||||
// Sort TargetPhraseCollection at this node.
|
// Sort TargetPhraseCollection at this node.
|
||||||
m_targetPhraseCollection.Sort(true, tableLimit);
|
m_targetPhraseCollection->Sort(true, tableLimit);
|
||||||
}
|
}
|
||||||
|
|
||||||
HyperTree::Node *HyperTree::Node::GetOrCreateChild(
|
HyperTree::Node *HyperTree::Node::GetOrCreateChild(
|
||||||
@ -40,7 +40,7 @@ const HyperTree::Node *HyperTree::Node::GetChild(
|
|||||||
return (p == m_map.end()) ? NULL : &p->second;
|
return (p == m_map.end()) ? NULL : &p->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection &HyperTree::GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr HyperTree::GetOrCreateTargetPhraseCollection(
|
||||||
const HyperPath &hyperPath)
|
const HyperPath &hyperPath)
|
||||||
{
|
{
|
||||||
Node &node = GetOrCreateNode(hyperPath);
|
Node &node = GetOrCreateNode(hyperPath);
|
||||||
|
@ -37,7 +37,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool HasRules() const {
|
bool HasRules() const {
|
||||||
return !m_targetPhraseCollection.IsEmpty();
|
return !m_targetPhraseCollection->IsEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Prune(std::size_t tableLimit);
|
void Prune(std::size_t tableLimit);
|
||||||
@ -47,11 +47,13 @@ public:
|
|||||||
|
|
||||||
const Node *GetChild(const HyperPath::NodeSeq &) const;
|
const Node *GetChild(const HyperPath::NodeSeq &) const;
|
||||||
|
|
||||||
const TargetPhraseCollection &GetTargetPhraseCollection() const {
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetTargetPhraseCollection() const {
|
||||||
return m_targetPhraseCollection;
|
return m_targetPhraseCollection;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection &GetTargetPhraseCollection() {
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetTargetPhraseCollection() {
|
||||||
return m_targetPhraseCollection;
|
return m_targetPhraseCollection;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,9 +61,11 @@ public:
|
|||||||
return m_map;
|
return m_map;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Node() : m_targetPhraseCollection(new TargetPhraseCollection) { }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Map m_map;
|
Map m_map;
|
||||||
TargetPhraseCollection m_targetPhraseCollection;
|
TargetPhraseCollection::shared_ptr m_targetPhraseCollection;
|
||||||
};
|
};
|
||||||
|
|
||||||
HyperTree(const RuleTableFF *ff) : RuleTable(ff) { }
|
HyperTree(const RuleTableFF *ff) : RuleTable(ff) { }
|
||||||
@ -73,7 +77,8 @@ public:
|
|||||||
private:
|
private:
|
||||||
friend class HyperTreeCreator;
|
friend class HyperTreeCreator;
|
||||||
|
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const HyperPath &);
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetOrCreateTargetPhraseCollection(const HyperPath &);
|
||||||
|
|
||||||
Node &GetOrCreateNode(const HyperPath &);
|
Node &GetOrCreateNode(const HyperPath &);
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ protected:
|
|||||||
|
|
||||||
// Provide access to HyperTree's private GetOrCreateTargetPhraseCollection
|
// Provide access to HyperTree's private GetOrCreateTargetPhraseCollection
|
||||||
// function.
|
// function.
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(
|
||||||
HyperTree &trie, const HyperPath &fragment) {
|
HyperTree &trie, const HyperPath &fragment) {
|
||||||
return trie.GetOrCreateTargetPhraseCollection(fragment);
|
return trie.GetOrCreateTargetPhraseCollection(fragment);
|
||||||
}
|
}
|
||||||
|
@ -130,9 +130,9 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
|
|||||||
ff.GetFeaturesToApply());
|
ff.GetFeaturesToApply());
|
||||||
|
|
||||||
// Add rule to trie.
|
// Add rule to trie.
|
||||||
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr phraseColl
|
||||||
trie, sourceFragment);
|
= GetOrCreateTargetPhraseCollection(trie, sourceFragment);
|
||||||
phraseColl.Add(targetPhrase);
|
phraseColl->Add(targetPhrase);
|
||||||
|
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
|
@ -51,8 +51,8 @@ void RuleMatcherHyperTree<Callback>::EnumerateHyperedges(
|
|||||||
m_hyperedge.label.inputWeight += (*p)->weight;
|
m_hyperedge.label.inputWeight += (*p)->weight;
|
||||||
}
|
}
|
||||||
// Set the output hyperedge label's translation set pointer.
|
// Set the output hyperedge label's translation set pointer.
|
||||||
m_hyperedge.label.translations =
|
m_hyperedge.label.translations
|
||||||
&(item.trieNode->GetTargetPhraseCollection());
|
= item.trieNode->GetTargetPhraseCollection();
|
||||||
// Pass the output hyperedge to the callback.
|
// Pass the output hyperedge to the callback.
|
||||||
callback(m_hyperedge);
|
callback(m_hyperedge);
|
||||||
}
|
}
|
||||||
|
@ -9,7 +9,7 @@ namespace Syntax
|
|||||||
|
|
||||||
struct PLabel {
|
struct PLabel {
|
||||||
float inputWeight;
|
float inputWeight;
|
||||||
const TargetPhraseCollection *translations;
|
TargetPhraseCollection::shared_ptr translations;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // Syntax
|
} // Syntax
|
||||||
|
@ -32,9 +32,10 @@ boost::shared_ptr<RuleTrie> OovHandler<RuleTrie>::SynthesizeRuleTrie(
|
|||||||
// TODO Check ownership and fix any leaks.
|
// TODO Check ownership and fix any leaks.
|
||||||
Word *tgtLHS = SynthesizeTargetLhs(targetLhsStr);
|
Word *tgtLHS = SynthesizeTargetLhs(targetLhsStr);
|
||||||
TargetPhrase *tp = SynthesizeTargetPhrase(oov, *srcPhrase, *tgtLHS, prob);
|
TargetPhrase *tp = SynthesizeTargetPhrase(oov, *srcPhrase, *tgtLHS, prob);
|
||||||
TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr tpc;
|
||||||
*trie, *srcPhrase, *tp, NULL); // TODO Check NULL is valid argument
|
tpc= GetOrCreateTargetPhraseCollection(*trie, *srcPhrase, *tp, NULL);
|
||||||
tpc.Add(tp);
|
// TODO Check NULL is valid argument
|
||||||
|
tpc->Add(tp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -132,9 +132,9 @@ void RecursiveCYKPlusParser<Callback>::AddAndExtend(
|
|||||||
m_hyperedge.tail.push_back(const_cast<PVertex *>(&vertex));
|
m_hyperedge.tail.push_back(const_cast<PVertex *>(&vertex));
|
||||||
|
|
||||||
// Add target phrase collection (except if rule is empty or unary).
|
// Add target phrase collection (except if rule is empty or unary).
|
||||||
const TargetPhraseCollection &tpc = node.GetTargetPhraseCollection();
|
TargetPhraseCollection::shared_ptr tpc = node.GetTargetPhraseCollection();
|
||||||
if (!tpc.IsEmpty() && !IsNonLexicalUnary(m_hyperedge)) {
|
if (!tpc->IsEmpty() && !IsNonLexicalUnary(m_hyperedge)) {
|
||||||
m_hyperedge.label.translations = &tpc;
|
m_hyperedge.label.translations = tpc;
|
||||||
(*m_callback)(m_hyperedge, end);
|
(*m_callback)(m_hyperedge, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,8 +38,8 @@ Scope3Parser<Callback>::~Scope3Parser()
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename Callback>
|
template<typename Callback>
|
||||||
void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
|
void Scope3Parser<Callback>::
|
||||||
Callback &callback)
|
EnumerateHyperedges(const WordsRange &range, Callback &callback)
|
||||||
{
|
{
|
||||||
const std::size_t start = range.GetStartPos();
|
const std::size_t start = range.GetStartPos();
|
||||||
const std::size_t end = range.GetEndPos();
|
const std::size_t end = range.GetEndPos();
|
||||||
@ -64,8 +64,7 @@ void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
|
|||||||
|
|
||||||
// Ask the grammar for the mapping from label sequences to target phrase
|
// Ask the grammar for the mapping from label sequences to target phrase
|
||||||
// collections for this pattern.
|
// collections for this pattern.
|
||||||
const RuleTrie::Node::LabelMap &labelMap =
|
const RuleTrie::Node::LabelMap &labelMap = patNode->m_node->GetLabelMap();
|
||||||
patNode->m_node->GetLabelMap();
|
|
||||||
|
|
||||||
// For each label sequence, search the lattice for the set of PHyperedge
|
// For each label sequence, search the lattice for the set of PHyperedge
|
||||||
// tails.
|
// tails.
|
||||||
@ -73,7 +72,7 @@ void Scope3Parser<Callback>::EnumerateHyperedges(const WordsRange &range,
|
|||||||
RuleTrie::Node::LabelMap::const_iterator q = labelMap.begin();
|
RuleTrie::Node::LabelMap::const_iterator q = labelMap.begin();
|
||||||
for (; q != labelMap.end(); ++q) {
|
for (; q != labelMap.end(); ++q) {
|
||||||
const std::vector<int> &labelSeq = q->first;
|
const std::vector<int> &labelSeq = q->first;
|
||||||
const TargetPhraseCollection &tpc = q->second;
|
TargetPhraseCollection::shared_ptr tpc = q->second;
|
||||||
// For many label sequences there won't be any corresponding paths through
|
// For many label sequences there won't be any corresponding paths through
|
||||||
// the lattice. As an optimisation, we use m_quickCheckTable to test
|
// the lattice. As an optimisation, we use m_quickCheckTable to test
|
||||||
// for this and we don't begin a search if there are no paths to find.
|
// for this and we don't begin a search if there are no paths to find.
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
#include "moses/Syntax/PHyperedge.h"
|
#include "moses/Syntax/PHyperedge.h"
|
||||||
|
|
||||||
#include "TailLattice.h"
|
#include "TailLattice.h"
|
||||||
|
#include "moses/TargetPhraseCollection.h"
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
namespace Syntax
|
namespace Syntax
|
||||||
@ -25,13 +25,14 @@ public:
|
|||||||
, m_key(key)
|
, m_key(key)
|
||||||
, m_ranges(ranges) {}
|
, m_ranges(ranges) {}
|
||||||
|
|
||||||
void Search(const std::vector<int> &labels, const TargetPhraseCollection &tpc,
|
void Search(const std::vector<int> &labels,
|
||||||
|
const TargetPhraseCollection::shared_ptr tpc,
|
||||||
Callback &callback) {
|
Callback &callback) {
|
||||||
m_labels = &labels;
|
m_labels = &labels;
|
||||||
m_matchCB = &callback;
|
m_matchCB = &callback;
|
||||||
m_hyperedge.head = 0;
|
m_hyperedge.head = 0;
|
||||||
m_hyperedge.tail.clear();
|
m_hyperedge.tail.clear();
|
||||||
m_hyperedge.label.translations = &tpc;
|
m_hyperedge.label.translations = tpc;
|
||||||
SearchInner(0, 0, 0);
|
SearchInner(0, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,8 +28,9 @@ public:
|
|||||||
private:
|
private:
|
||||||
friend class RuleTrieCreator;
|
friend class RuleTrieCreator;
|
||||||
|
|
||||||
virtual TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
virtual TargetPhraseCollection::shared_ptr
|
||||||
const Phrase &source, const TargetPhrase &target,
|
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||||
|
const TargetPhrase &target,
|
||||||
const Word *sourceLHS) = 0;
|
const Word *sourceLHS) = 0;
|
||||||
|
|
||||||
virtual void SortAndPrune(std::size_t) = 0;
|
virtual void SortAndPrune(std::size_t) = 0;
|
||||||
|
@ -33,7 +33,7 @@ void RuleTrieCYKPlus::Node::Prune(std::size_t tableLimit)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// prune TargetPhraseCollection in this node
|
// prune TargetPhraseCollection in this node
|
||||||
m_targetPhraseCollection.Prune(true, tableLimit);
|
m_targetPhraseCollection->Prune(true, tableLimit);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RuleTrieCYKPlus::Node::Sort(std::size_t tableLimit)
|
void RuleTrieCYKPlus::Node::Sort(std::size_t tableLimit)
|
||||||
@ -49,7 +49,7 @@ void RuleTrieCYKPlus::Node::Sort(std::size_t tableLimit)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// prune TargetPhraseCollection in this node
|
// prune TargetPhraseCollection in this node
|
||||||
m_targetPhraseCollection.Sort(true, tableLimit);
|
m_targetPhraseCollection->Sort(true, tableLimit);
|
||||||
}
|
}
|
||||||
|
|
||||||
RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetOrCreateChild(
|
RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetOrCreateChild(
|
||||||
@ -86,8 +86,11 @@ const RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetNonTerminalChild(
|
|||||||
return (p == m_nonTermMap.end()) ? NULL : &p->second;
|
return (p == m_nonTermMap.end()) ? NULL : &p->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection &RuleTrieCYKPlus::GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
|
RuleTrieCYKPlus::
|
||||||
|
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||||
|
const TargetPhrase &target,
|
||||||
|
const Word *sourceLHS)
|
||||||
{
|
{
|
||||||
Node &currNode = GetOrCreateNode(source, target, sourceLHS);
|
Node &currNode = GetOrCreateNode(source, target, sourceLHS);
|
||||||
return currNode.GetTargetPhraseCollection();
|
return currNode.GetTargetPhraseCollection();
|
||||||
|
@ -38,7 +38,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool HasRules() const {
|
bool HasRules() const {
|
||||||
return !m_targetPhraseCollection.IsEmpty();
|
return !m_targetPhraseCollection->IsEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Prune(std::size_t tableLimit);
|
void Prune(std::size_t tableLimit);
|
||||||
@ -50,11 +50,13 @@ public:
|
|||||||
const Node *GetChild(const Word &sourceTerm) const;
|
const Node *GetChild(const Word &sourceTerm) const;
|
||||||
const Node *GetNonTerminalChild(const Word &targetNonTerm) const;
|
const Node *GetNonTerminalChild(const Word &targetNonTerm) const;
|
||||||
|
|
||||||
const TargetPhraseCollection &GetTargetPhraseCollection() const {
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetTargetPhraseCollection() const {
|
||||||
return m_targetPhraseCollection;
|
return m_targetPhraseCollection;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection &GetTargetPhraseCollection() {
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetTargetPhraseCollection() {
|
||||||
return m_targetPhraseCollection;
|
return m_targetPhraseCollection;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -66,10 +68,12 @@ public:
|
|||||||
return m_nonTermMap;
|
return m_nonTermMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Node() : m_targetPhraseCollection(new TargetPhraseCollection) {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
SymbolMap m_sourceTermMap;
|
SymbolMap m_sourceTermMap;
|
||||||
SymbolMap m_nonTermMap;
|
SymbolMap m_nonTermMap;
|
||||||
TargetPhraseCollection m_targetPhraseCollection;
|
TargetPhraseCollection::shared_ptr m_targetPhraseCollection;
|
||||||
};
|
};
|
||||||
|
|
||||||
RuleTrieCYKPlus(const RuleTableFF *ff) : RuleTrie(ff) {}
|
RuleTrieCYKPlus(const RuleTableFF *ff) : RuleTrie(ff) {}
|
||||||
@ -81,8 +85,9 @@ public:
|
|||||||
bool HasPreterminalRule(const Word &) const;
|
bool HasPreterminalRule(const Word &) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
|
GetOrCreateTargetPhraseCollection
|
||||||
|
(const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
|
||||||
|
|
||||||
Node &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
|
Node &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
|
||||||
const Word *sourceLHS);
|
const Word *sourceLHS);
|
||||||
|
@ -21,8 +21,9 @@ protected:
|
|||||||
|
|
||||||
// Provide access to RuleTrie's private GetOrCreateTargetPhraseCollection
|
// Provide access to RuleTrie's private GetOrCreateTargetPhraseCollection
|
||||||
// function.
|
// function.
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
RuleTrie &trie, const Phrase &source, const TargetPhrase &target,
|
GetOrCreateTargetPhraseCollection
|
||||||
|
( RuleTrie &trie, const Phrase &source, const TargetPhrase &target,
|
||||||
const Word *sourceLHS) {
|
const Word *sourceLHS) {
|
||||||
return trie.GetOrCreateTargetPhraseCollection(source, target, sourceLHS);
|
return trie.GetOrCreateTargetPhraseCollection(source, target, sourceLHS);
|
||||||
}
|
}
|
||||||
|
@ -125,9 +125,10 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
|
|||||||
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
|
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
|
||||||
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
|
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
|
||||||
|
|
||||||
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr phraseColl
|
||||||
trie, sourcePhrase, *targetPhrase, sourceLHS);
|
= GetOrCreateTargetPhraseCollection(trie, sourcePhrase,
|
||||||
phraseColl.Add(targetPhrase);
|
*targetPhrase, sourceLHS);
|
||||||
|
phraseColl->Add(targetPhrase);
|
||||||
|
|
||||||
// not implemented correctly in memory pt. just delete it for now
|
// not implemented correctly in memory pt. just delete it for now
|
||||||
delete sourceLHS;
|
delete sourceLHS;
|
||||||
|
@ -33,7 +33,7 @@ void RuleTrieScope3::Node::Prune(std::size_t tableLimit)
|
|||||||
|
|
||||||
// Prune TargetPhraseCollections at this node.
|
// Prune TargetPhraseCollections at this node.
|
||||||
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
|
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
|
||||||
p->second.Prune(true, tableLimit);
|
p->second->Prune(true, tableLimit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -50,7 +50,7 @@ void RuleTrieScope3::Node::Sort(std::size_t tableLimit)
|
|||||||
|
|
||||||
// Sort TargetPhraseCollections at this node.
|
// Sort TargetPhraseCollections at this node.
|
||||||
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
|
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
|
||||||
p->second.Sort(true, tableLimit);
|
p->second->Sort(true, tableLimit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -75,9 +75,10 @@ RuleTrieScope3::Node *RuleTrieScope3::Node::GetOrCreateNonTerminalChild(
|
|||||||
return m_gapNode;
|
return m_gapNode;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection &
|
TargetPhraseCollection::shared_ptr
|
||||||
RuleTrieScope3::Node::GetOrCreateTargetPhraseCollection(
|
RuleTrieScope3::
|
||||||
const TargetPhrase &target)
|
Node::
|
||||||
|
GetOrCreateTargetPhraseCollection(const TargetPhrase &target)
|
||||||
{
|
{
|
||||||
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
|
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
|
||||||
const std::size_t rank = alignmentInfo.GetSize();
|
const std::size_t rank = alignmentInfo.GetSize();
|
||||||
@ -94,12 +95,16 @@ RuleTrieScope3::Node::GetOrCreateTargetPhraseCollection(
|
|||||||
const Word &targetNonTerm = target.GetWord(targetNonTermIndex);
|
const Word &targetNonTerm = target.GetWord(targetNonTermIndex);
|
||||||
vec.push_back(InsertLabel(i++, targetNonTerm));
|
vec.push_back(InsertLabel(i++, targetNonTerm));
|
||||||
}
|
}
|
||||||
|
TargetPhraseCollection::shared_ptr& ret = m_labelMap[vec];
|
||||||
return m_labelMap[vec];
|
if (!ret) ret.reset(new TargetPhraseCollection);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection &RuleTrieScope3::GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
|
RuleTrieScope3::
|
||||||
|
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||||
|
const TargetPhrase &target,
|
||||||
|
const Word *sourceLHS)
|
||||||
{
|
{
|
||||||
Node &currNode = GetOrCreateNode(source, target, sourceLHS);
|
Node &currNode = GetOrCreateNode(source, target, sourceLHS);
|
||||||
return currNode.GetOrCreateTargetPhraseCollection(target);
|
return currNode.GetOrCreateTargetPhraseCollection(target);
|
||||||
|
@ -35,7 +35,7 @@ public:
|
|||||||
SymbolEqualityPred> TerminalMap;
|
SymbolEqualityPred> TerminalMap;
|
||||||
|
|
||||||
typedef boost::unordered_map<std::vector<int>,
|
typedef boost::unordered_map<std::vector<int>,
|
||||||
TargetPhraseCollection> LabelMap;
|
TargetPhraseCollection::shared_ptr> LabelMap;
|
||||||
|
|
||||||
~Node() {
|
~Node() {
|
||||||
delete m_gapNode;
|
delete m_gapNode;
|
||||||
@ -61,8 +61,8 @@ public:
|
|||||||
|
|
||||||
Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
|
Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
|
||||||
|
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const TargetPhrase &);
|
GetOrCreateTargetPhraseCollection(const TargetPhrase &);
|
||||||
|
|
||||||
bool IsLeaf() const {
|
bool IsLeaf() const {
|
||||||
return m_terminalMap.empty() && m_gapNode == NULL;
|
return m_terminalMap.empty() && m_gapNode == NULL;
|
||||||
@ -106,8 +106,10 @@ public:
|
|||||||
bool HasPreterminalRule(const Word &) const;
|
bool HasPreterminalRule(const Word &) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
|
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||||
|
const TargetPhrase &target,
|
||||||
|
const Word *sourceLHS);
|
||||||
|
|
||||||
Node &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
|
Node &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
|
||||||
const Word *sourceLHS);
|
const Word *sourceLHS);
|
||||||
|
@ -17,7 +17,7 @@ struct PVertex;
|
|||||||
struct SHyperedgeBundle {
|
struct SHyperedgeBundle {
|
||||||
float inputWeight;
|
float inputWeight;
|
||||||
std::vector<const SVertexStack*> stacks;
|
std::vector<const SVertexStack*> stacks;
|
||||||
const TargetPhraseCollection *translations;
|
TargetPhraseCollection::shared_ptr translations;
|
||||||
|
|
||||||
friend void swap(SHyperedgeBundle &x, SHyperedgeBundle &y) {
|
friend void swap(SHyperedgeBundle &x, SHyperedgeBundle &y) {
|
||||||
using std::swap;
|
using std::swap;
|
||||||
|
@ -17,9 +17,9 @@ void GlueRuleSynthesizer::SynthesizeRule(const InputTree::Node &node)
|
|||||||
const Word &sourceLhs = node.pvertex.symbol;
|
const Word &sourceLhs = node.pvertex.symbol;
|
||||||
boost::scoped_ptr<Phrase> sourceRhs(SynthesizeSourcePhrase(node));
|
boost::scoped_ptr<Phrase> sourceRhs(SynthesizeSourcePhrase(node));
|
||||||
TargetPhrase *tp = SynthesizeTargetPhrase(node, *sourceRhs);
|
TargetPhrase *tp = SynthesizeTargetPhrase(node, *sourceRhs);
|
||||||
TargetPhraseCollection &tpc = GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr tpc
|
||||||
m_ruleTrie, sourceLhs, *sourceRhs);
|
= GetOrCreateTargetPhraseCollection(m_ruleTrie, sourceLhs, *sourceRhs);
|
||||||
tpc.Add(tp);
|
tpc->Add(tp);
|
||||||
}
|
}
|
||||||
|
|
||||||
Phrase *GlueRuleSynthesizer::SynthesizeSourcePhrase(const InputTree::Node &node)
|
Phrase *GlueRuleSynthesizer::SynthesizeSourcePhrase(const InputTree::Node &node)
|
||||||
|
@ -48,11 +48,11 @@ public:
|
|||||||
|
|
||||||
const Node *GetChild(const HyperPath::NodeSeq &) const;
|
const Node *GetChild(const HyperPath::NodeSeq &) const;
|
||||||
|
|
||||||
const TargetPhraseCollection &GetTargetPhraseCollection() const
|
const TargetPhraseCollection::shared_ptr GetTargetPhraseCollection() const
|
||||||
return m_targetPhraseCollection;
|
return m_targetPhraseCollection;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection &GetTargetPhraseCollection()
|
TargetPhraseCollection::shared_ptr GetTargetPhraseCollection()
|
||||||
return m_targetPhraseCollection;
|
return m_targetPhraseCollection;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -76,7 +76,7 @@ const Node &GetRootNode() const
|
|||||||
private:
|
private:
|
||||||
friend class RuleTrieCreator;
|
friend class RuleTrieCreator;
|
||||||
|
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(
|
||||||
const Word &sourceLHS, const Phrase &sourceRHS);
|
const Word &sourceLHS, const Phrase &sourceRHS);
|
||||||
|
|
||||||
Node &GetOrCreateNode(const Phrase &sourceRHS);
|
Node &GetOrCreateNode(const Phrase &sourceRHS);
|
||||||
|
@ -61,7 +61,7 @@ void RuleMatcherSCFG<Callback>::Match(const InputTree::Node &inNode,
|
|||||||
if (candidate.pvertex.span.GetEndPos() == inNode.pvertex.span.GetEndPos()) {
|
if (candidate.pvertex.span.GetEndPos() == inNode.pvertex.span.GetEndPos()) {
|
||||||
// Check if the trie node has any rules with a LHS that match inNode.
|
// Check if the trie node has any rules with a LHS that match inNode.
|
||||||
const Word &lhs = inNode.pvertex.symbol;
|
const Word &lhs = inNode.pvertex.symbol;
|
||||||
const TargetPhraseCollection *tpc =
|
TargetPhraseCollection::shared_ptr tpc =
|
||||||
newTrieNode.GetTargetPhraseCollection(lhs);
|
newTrieNode.GetTargetPhraseCollection(lhs);
|
||||||
if (tpc) {
|
if (tpc) {
|
||||||
m_hyperedge.label.translations = tpc;
|
m_hyperedge.label.translations = tpc;
|
||||||
|
@ -35,7 +35,7 @@ void RuleTrie::Node::Prune(std::size_t tableLimit)
|
|||||||
// Prune TargetPhraseCollections at this node.
|
// Prune TargetPhraseCollections at this node.
|
||||||
for (TPCMap::iterator p = m_targetPhraseCollections.begin();
|
for (TPCMap::iterator p = m_targetPhraseCollections.begin();
|
||||||
p != m_targetPhraseCollections.end(); ++p) {
|
p != m_targetPhraseCollections.end(); ++p) {
|
||||||
p->second.Prune(true, tableLimit);
|
p->second->Prune(true, tableLimit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -54,17 +54,21 @@ void RuleTrie::Node::Sort(std::size_t tableLimit)
|
|||||||
// Sort TargetPhraseCollections at this node.
|
// Sort TargetPhraseCollections at this node.
|
||||||
for (TPCMap::iterator p = m_targetPhraseCollections.begin();
|
for (TPCMap::iterator p = m_targetPhraseCollections.begin();
|
||||||
p != m_targetPhraseCollections.end(); ++p) {
|
p != m_targetPhraseCollections.end(); ++p) {
|
||||||
p->second.Sort(true, tableLimit);
|
p->second->Sort(true, tableLimit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RuleTrie::Node *RuleTrie::Node::GetOrCreateChild(
|
RuleTrie::Node*
|
||||||
const Word &sourceTerm)
|
RuleTrie::Node::
|
||||||
|
GetOrCreateChild(const Word &sourceTerm)
|
||||||
{
|
{
|
||||||
return &m_sourceTermMap[sourceTerm];
|
return &m_sourceTermMap[sourceTerm];
|
||||||
}
|
}
|
||||||
|
|
||||||
RuleTrie::Node *RuleTrie::Node::GetOrCreateNonTerminalChild(const Word &targetNonTerm)
|
RuleTrie::Node *
|
||||||
|
RuleTrie::
|
||||||
|
Node::
|
||||||
|
GetOrCreateNonTerminalChild(const Word &targetNonTerm)
|
||||||
{
|
{
|
||||||
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
|
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
|
||||||
"Not a non-terminal: " << targetNonTerm);
|
"Not a non-terminal: " << targetNonTerm);
|
||||||
@ -72,42 +76,52 @@ RuleTrie::Node *RuleTrie::Node::GetOrCreateNonTerminalChild(const Word &targetNo
|
|||||||
return &m_nonTermMap[targetNonTerm];
|
return &m_nonTermMap[targetNonTerm];
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection &RuleTrie::Node::GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const Word &sourceLHS)
|
RuleTrie::
|
||||||
|
Node::
|
||||||
|
GetOrCreateTargetPhraseCollection(const Word &sourceLHS)
|
||||||
{
|
{
|
||||||
UTIL_THROW_IF2(!sourceLHS.IsNonTerminal(),
|
UTIL_THROW_IF2(!sourceLHS.IsNonTerminal(),
|
||||||
"Not a non-terminal: " << sourceLHS);
|
"Not a non-terminal: " << sourceLHS);
|
||||||
return m_targetPhraseCollections[sourceLHS];
|
TargetPhraseCollection::shared_ptr& foo
|
||||||
|
= m_targetPhraseCollections[sourceLHS];
|
||||||
|
if (!foo) foo.reset(new TargetPhraseCollection);
|
||||||
|
return foo;
|
||||||
}
|
}
|
||||||
|
|
||||||
const RuleTrie::Node *RuleTrie::Node::GetChild(
|
RuleTrie::Node const*
|
||||||
const Word &sourceTerm) const
|
RuleTrie::
|
||||||
|
Node::
|
||||||
|
GetChild(const Word &sourceTerm) const
|
||||||
{
|
{
|
||||||
UTIL_THROW_IF2(sourceTerm.IsNonTerminal(),
|
UTIL_THROW_IF2(sourceTerm.IsNonTerminal(), "Not a terminal: " << sourceTerm);
|
||||||
"Not a terminal: " << sourceTerm);
|
|
||||||
|
|
||||||
SymbolMap::const_iterator p = m_sourceTermMap.find(sourceTerm);
|
SymbolMap::const_iterator p = m_sourceTermMap.find(sourceTerm);
|
||||||
return (p == m_sourceTermMap.end()) ? NULL : &p->second;
|
return (p == m_sourceTermMap.end()) ? NULL : &p->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
const RuleTrie::Node *RuleTrie::Node::GetNonTerminalChild(
|
RuleTrie::Node const*
|
||||||
const Word &targetNonTerm) const
|
RuleTrie::
|
||||||
|
Node::
|
||||||
|
GetNonTerminalChild(const Word &targetNonTerm) const
|
||||||
{
|
{
|
||||||
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
|
UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(),
|
||||||
"Not a non-terminal: " << targetNonTerm);
|
"Not a non-terminal: " << targetNonTerm);
|
||||||
|
|
||||||
SymbolMap::const_iterator p = m_nonTermMap.find(targetNonTerm);
|
SymbolMap::const_iterator p = m_nonTermMap.find(targetNonTerm);
|
||||||
return (p == m_nonTermMap.end()) ? NULL : &p->second;
|
return (p == m_nonTermMap.end()) ? NULL : &p->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection &RuleTrie::GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const Word &sourceLHS, const Phrase &sourceRHS)
|
RuleTrie::
|
||||||
|
GetOrCreateTargetPhraseCollection
|
||||||
|
( const Word &sourceLHS, const Phrase &sourceRHS )
|
||||||
{
|
{
|
||||||
Node &currNode = GetOrCreateNode(sourceRHS);
|
Node &currNode = GetOrCreateNode(sourceRHS);
|
||||||
return currNode.GetOrCreateTargetPhraseCollection(sourceLHS);
|
return currNode.GetOrCreateTargetPhraseCollection(sourceLHS);
|
||||||
}
|
}
|
||||||
|
|
||||||
RuleTrie::Node &RuleTrie::GetOrCreateNode(const Phrase &sourceRHS)
|
RuleTrie::Node &
|
||||||
|
RuleTrie::
|
||||||
|
GetOrCreateNode(const Phrase &sourceRHS)
|
||||||
{
|
{
|
||||||
const std::size_t size = sourceRHS.GetSize();
|
const std::size_t size = sourceRHS.GetSize();
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ public:
|
|||||||
typedef boost::unordered_map<Word, Node, SymbolHasher,
|
typedef boost::unordered_map<Word, Node, SymbolHasher,
|
||||||
SymbolEqualityPred> SymbolMap;
|
SymbolEqualityPred> SymbolMap;
|
||||||
|
|
||||||
typedef boost::unordered_map<Word, TargetPhraseCollection,
|
typedef boost::unordered_map<Word, TargetPhraseCollection::shared_ptr,
|
||||||
SymbolHasher, SymbolEqualityPred> TPCMap;
|
SymbolHasher, SymbolEqualityPred> TPCMap;
|
||||||
|
|
||||||
bool IsLeaf() const {
|
bool IsLeaf() const {
|
||||||
@ -48,15 +48,18 @@ public:
|
|||||||
|
|
||||||
Node *GetOrCreateChild(const Word &sourceTerm);
|
Node *GetOrCreateChild(const Word &sourceTerm);
|
||||||
Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
|
Node *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const Word &);
|
TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(const Word &);
|
||||||
|
|
||||||
const Node *GetChild(const Word &sourceTerm) const;
|
const Node *GetChild(const Word &sourceTerm) const;
|
||||||
const Node *GetNonTerminalChild(const Word &targetNonTerm) const;
|
const Node *GetNonTerminalChild(const Word &targetNonTerm) const;
|
||||||
|
|
||||||
const TargetPhraseCollection *GetTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const Word &sourceLHS) const {
|
GetTargetPhraseCollection(const Word &sourceLHS) const {
|
||||||
TPCMap::const_iterator p = m_targetPhraseCollections.find(sourceLHS);
|
TPCMap::const_iterator p = m_targetPhraseCollections.find(sourceLHS);
|
||||||
return p == m_targetPhraseCollections.end() ? 0 : &(p->second);
|
if (p != m_targetPhraseCollections.end())
|
||||||
|
return p->second;
|
||||||
|
else
|
||||||
|
return TargetPhraseCollection::shared_ptr();
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME IS there any reason to distinguish these two for T2S?
|
// FIXME IS there any reason to distinguish these two for T2S?
|
||||||
@ -83,8 +86,9 @@ public:
|
|||||||
private:
|
private:
|
||||||
friend class RuleTrieCreator;
|
friend class RuleTrieCreator;
|
||||||
|
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const Word &sourceLHS, const Phrase &sourceRHS);
|
GetOrCreateTargetPhraseCollection
|
||||||
|
(const Word &sourceLHS, const Phrase &sourceRHS);
|
||||||
|
|
||||||
Node &GetOrCreateNode(const Phrase &sourceRHS);
|
Node &GetOrCreateNode(const Phrase &sourceRHS);
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ protected:
|
|||||||
|
|
||||||
// Provide access to RuleTrie's private
|
// Provide access to RuleTrie's private
|
||||||
// GetOrCreateTargetPhraseCollection function.
|
// GetOrCreateTargetPhraseCollection function.
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr GetOrCreateTargetPhraseCollection(
|
||||||
RuleTrie &trie, const Word &sourceLHS, const Phrase &sourceRHS) {
|
RuleTrie &trie, const Word &sourceLHS, const Phrase &sourceRHS) {
|
||||||
return trie.GetOrCreateTargetPhraseCollection(sourceLHS, sourceRHS);
|
return trie.GetOrCreateTargetPhraseCollection(sourceLHS, sourceRHS);
|
||||||
}
|
}
|
||||||
|
@ -55,7 +55,9 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
|
|||||||
std::vector<float> scoreVector;
|
std::vector<float> scoreVector;
|
||||||
StringPiece line;
|
StringPiece line;
|
||||||
|
|
||||||
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
|
int noflags = double_conversion::StringToDoubleConverter::NO_FLAGS;
|
||||||
|
double_conversion::StringToDoubleConverter
|
||||||
|
converter(noflags, NAN, NAN, "inf", "nan");
|
||||||
|
|
||||||
while(true) {
|
while(true) {
|
||||||
try {
|
try {
|
||||||
@ -132,9 +134,9 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
|
|||||||
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
|
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
|
||||||
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
|
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply());
|
||||||
|
|
||||||
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr phraseColl
|
||||||
trie, *sourceLHS, sourcePhrase);
|
= GetOrCreateTargetPhraseCollection(trie, *sourceLHS, sourcePhrase);
|
||||||
phraseColl.Add(targetPhrase);
|
phraseColl->Add(targetPhrase);
|
||||||
|
|
||||||
// not implemented correctly in memory pt. just delete it for now
|
// not implemented correctly in memory pt. just delete it for now
|
||||||
delete sourceLHS;
|
delete sourceLHS;
|
||||||
|
@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "TargetPhrase.h"
|
#include "TargetPhrase.h"
|
||||||
#include "Util.h"
|
#include "Util.h"
|
||||||
|
#include <boost/shared_ptr.hpp>
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
@ -43,6 +44,8 @@ public:
|
|||||||
// iters
|
// iters
|
||||||
typedef CollType::iterator iterator;
|
typedef CollType::iterator iterator;
|
||||||
typedef CollType::const_iterator const_iterator;
|
typedef CollType::const_iterator const_iterator;
|
||||||
|
typedef boost::shared_ptr<TargetPhraseCollection> shared_ptr;
|
||||||
|
typedef boost::shared_ptr<TargetPhraseCollection const> shared_const_ptr;
|
||||||
|
|
||||||
TargetPhrase const*
|
TargetPhrase const*
|
||||||
operator[](size_t const i) const {
|
operator[](size_t const i) const {
|
||||||
@ -127,6 +130,9 @@ protected:
|
|||||||
std::vector<Phrase> m_sourcePhrases;
|
std::vector<Phrase> m_sourcePhrases;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
typedef boost::shared_ptr<TargetPhraseCollectionWithSourcePhrase> shared_ptr;
|
||||||
|
typedef boost::shared_ptr<TargetPhraseCollectionWithSourcePhrase const> shared_const_ptr;
|
||||||
|
|
||||||
const std::vector<Phrase> &GetSourcePhrases() const {
|
const std::vector<Phrase> &GetSourcePhrases() const {
|
||||||
return m_sourcePhrases;
|
return m_sourcePhrases;
|
||||||
}
|
}
|
||||||
|
@ -167,10 +167,10 @@ void ChartRuleLookupManagerMemory::AddAndExtend(
|
|||||||
size_t endPos)
|
size_t endPos)
|
||||||
{
|
{
|
||||||
|
|
||||||
const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
|
TargetPhraseCollection::shared_ptr tpc = node->GetTargetPhraseCollection();
|
||||||
// add target phrase collection (except if rule is empty or a unary non-terminal rule)
|
// add target phrase collection (except if rule is empty or a unary non-terminal rule)
|
||||||
if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
|
if (!tpc->IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
|
||||||
m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
|
m_completedRules[endPos].Add(*tpc, m_stackVec, m_stackScores, *m_outColl);
|
||||||
}
|
}
|
||||||
|
|
||||||
// get all further extensions of rule (until reaching end of sentence or max-chart-span)
|
// get all further extensions of rule (until reaching end of sentence or max-chart-span)
|
||||||
|
@ -167,10 +167,11 @@ void ChartRuleLookupManagerMemoryPerSentence::AddAndExtend(
|
|||||||
size_t endPos)
|
size_t endPos)
|
||||||
{
|
{
|
||||||
|
|
||||||
const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
|
TargetPhraseCollection::shared_ptr tpc
|
||||||
|
= node->GetTargetPhraseCollection();
|
||||||
// add target phrase collection (except if rule is empty or a unary non-terminal rule)
|
// add target phrase collection (except if rule is empty or a unary non-terminal rule)
|
||||||
if (!tpc.IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
|
if (!tpc->IsEmpty() && (m_stackVec.empty() || endPos != m_unaryPos)) {
|
||||||
m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
|
m_completedRules[endPos].Add(*tpc, m_stackVec, m_stackScores, *m_outColl);
|
||||||
}
|
}
|
||||||
|
|
||||||
// get all further extensions of rule (until reaching end of sentence or max-chart-span)
|
// get all further extensions of rule (until reaching end of sentence or max-chart-span)
|
||||||
|
@ -64,11 +64,12 @@ ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk(
|
|||||||
|
|
||||||
ChartRuleLookupManagerOnDisk::~ChartRuleLookupManagerOnDisk()
|
ChartRuleLookupManagerOnDisk::~ChartRuleLookupManagerOnDisk()
|
||||||
{
|
{
|
||||||
std::map<uint64_t, const TargetPhraseCollection*>::const_iterator iterCache;
|
// not needed any more due to the switch to shared pointers
|
||||||
for (iterCache = m_cache.begin(); iterCache != m_cache.end(); ++iterCache) {
|
// std::map<uint64_t, TargetPhraseCollection::shared_ptr >::const_iterator iterCache;
|
||||||
delete iterCache->second;
|
// for (iterCache = m_cache.begin(); iterCache != m_cache.end(); ++iterCache) {
|
||||||
}
|
// iterCache->second.reset();
|
||||||
m_cache.clear();
|
// }
|
||||||
|
// m_cache.clear();
|
||||||
|
|
||||||
RemoveAllInColl(m_expandableDottedRuleListVec);
|
RemoveAllInColl(m_expandableDottedRuleListVec);
|
||||||
RemoveAllInColl(m_sourcePhraseNode);
|
RemoveAllInColl(m_sourcePhraseNode);
|
||||||
@ -236,14 +237,16 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
|
|||||||
if (sourceLHSBerkeleyDb == NULL)
|
if (sourceLHSBerkeleyDb == NULL)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
const TargetPhraseCollection *targetPhraseCollection = NULL;
|
TargetPhraseCollection::shared_ptr targetPhraseCollection;
|
||||||
const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper);
|
const OnDiskPt::PhraseNode *node
|
||||||
|
= prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper);
|
||||||
if (node) {
|
if (node) {
|
||||||
uint64_t tpCollFilePos = node->GetValue();
|
uint64_t tpCollFilePos = node->GetValue();
|
||||||
std::map<uint64_t, const TargetPhraseCollection*>::const_iterator iterCache = m_cache.find(tpCollFilePos);
|
std::map<uint64_t, TargetPhraseCollection::shared_ptr >::const_iterator iterCache = m_cache.find(tpCollFilePos);
|
||||||
if (iterCache == m_cache.end()) {
|
if (iterCache == m_cache.end()) {
|
||||||
|
|
||||||
const OnDiskPt::TargetPhraseCollection *tpcollBerkeleyDb = node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper);
|
OnDiskPt::TargetPhraseCollection::shared_ptr tpcollBerkeleyDb
|
||||||
|
= node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper);
|
||||||
|
|
||||||
std::vector<float> weightT = staticData.GetWeights(&m_dictionary);
|
std::vector<float> weightT = staticData.GetWeights(&m_dictionary);
|
||||||
targetPhraseCollection
|
targetPhraseCollection
|
||||||
@ -254,7 +257,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
|
|||||||
,m_dbWrapper.GetVocab()
|
,m_dbWrapper.GetVocab()
|
||||||
,true);
|
,true);
|
||||||
|
|
||||||
delete tpcollBerkeleyDb;
|
tpcollBerkeleyDb.reset();
|
||||||
m_cache[tpCollFilePos] = targetPhraseCollection;
|
m_cache[tpCollFilePos] = targetPhraseCollection;
|
||||||
} else {
|
} else {
|
||||||
// just get out of cache
|
// just get out of cache
|
||||||
|
@ -55,7 +55,7 @@ private:
|
|||||||
const std::vector<FactorType> &m_inputFactorsVec;
|
const std::vector<FactorType> &m_inputFactorsVec;
|
||||||
const std::vector<FactorType> &m_outputFactorsVec;
|
const std::vector<FactorType> &m_outputFactorsVec;
|
||||||
std::vector<DottedRuleStackOnDisk*> m_expandableDottedRuleListVec;
|
std::vector<DottedRuleStackOnDisk*> m_expandableDottedRuleListVec;
|
||||||
std::map<uint64_t, const TargetPhraseCollection*> m_cache;
|
std::map<uint64_t, TargetPhraseCollection::shared_ptr > m_cache;
|
||||||
std::list<const OnDiskPt::PhraseNode*> m_sourcePhraseNode;
|
std::list<const OnDiskPt::PhraseNode*> m_sourcePhraseNode;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ ChartRuleLookupManagerSkeleton::ChartRuleLookupManagerSkeleton(
|
|||||||
|
|
||||||
ChartRuleLookupManagerSkeleton::~ChartRuleLookupManagerSkeleton()
|
ChartRuleLookupManagerSkeleton::~ChartRuleLookupManagerSkeleton()
|
||||||
{
|
{
|
||||||
RemoveAllInColl(m_tpColl);
|
// RemoveAllInColl(m_tpColl);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
|
void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
|
||||||
@ -58,7 +58,7 @@ void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
|
|||||||
{
|
{
|
||||||
//m_tpColl.push_back(TargetPhraseCollection());
|
//m_tpColl.push_back(TargetPhraseCollection());
|
||||||
//TargetPhraseCollection &tpColl = m_tpColl.back();
|
//TargetPhraseCollection &tpColl = m_tpColl.back();
|
||||||
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
|
TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection);
|
||||||
m_tpColl.push_back(tpColl);
|
m_tpColl.push_back(tpColl);
|
||||||
|
|
||||||
const WordsRange &range = inputPath.GetWordsRange();
|
const WordsRange &range = inputPath.GetWordsRange();
|
||||||
@ -73,7 +73,9 @@ void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
|
|||||||
outColl.Add(*tpColl, m_stackVec, range);
|
outColl.Add(*tpColl, m_stackVec, range);
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhrase *ChartRuleLookupManagerSkeleton::CreateTargetPhrase(const Word &sourceWord) const
|
TargetPhrase *
|
||||||
|
ChartRuleLookupManagerSkeleton::
|
||||||
|
CreateTargetPhrase(const Word &sourceWord) const
|
||||||
{
|
{
|
||||||
// create a target phrase from the 1st word of the source, prefix with 'ChartManagerSkeleton:'
|
// create a target phrase from the 1st word of the source, prefix with 'ChartManagerSkeleton:'
|
||||||
string str = sourceWord.GetFactor(0)->GetString().as_string();
|
string str = sourceWord.GetFactor(0)->GetString().as_string();
|
||||||
|
@ -49,7 +49,7 @@ private:
|
|||||||
TargetPhrase *CreateTargetPhrase(const Word &sourceWord) const;
|
TargetPhrase *CreateTargetPhrase(const Word &sourceWord) const;
|
||||||
|
|
||||||
StackVec m_stackVec;
|
StackVec m_stackVec;
|
||||||
std::vector<TargetPhraseCollection*> m_tpColl;
|
std::vector<TargetPhraseCollection::shared_ptr > m_tpColl;
|
||||||
const SkeletonPT &m_skeletonPT;
|
const SkeletonPT &m_skeletonPT;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -107,14 +107,15 @@ void PhraseDictionaryCompact::Load()
|
|||||||
// }
|
// }
|
||||||
// };
|
// };
|
||||||
|
|
||||||
const TargetPhraseCollection*
|
TargetPhraseCollection::shared_ptr
|
||||||
PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &sourcePhrase) const
|
PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &sourcePhrase) const
|
||||||
{
|
{
|
||||||
|
|
||||||
|
TargetPhraseCollection::shared_ptr ret;
|
||||||
// There is no souch source phrase if source phrase is longer than longest
|
// There is no souch source phrase if source phrase is longer than longest
|
||||||
// observed source phrase during compilation
|
// observed source phrase during compilation
|
||||||
if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
|
if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
|
||||||
return NULL;
|
return ret;
|
||||||
|
|
||||||
// Retrieve target phrase collection from phrase table
|
// Retrieve target phrase collection from phrase table
|
||||||
TargetPhraseVectorPtr decodedPhraseColl
|
TargetPhraseVectorPtr decodedPhraseColl
|
||||||
@ -122,7 +123,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &s
|
|||||||
|
|
||||||
if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
|
if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
|
||||||
TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
|
TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
|
||||||
TargetPhraseCollection* phraseColl = new TargetPhraseCollection();
|
TargetPhraseCollection::shared_ptr phraseColl(new TargetPhraseCollection);
|
||||||
|
|
||||||
// Score phrases and if possible apply ttable_limit
|
// Score phrases and if possible apply ttable_limit
|
||||||
TargetPhraseVector::iterator nth =
|
TargetPhraseVector::iterator nth =
|
||||||
@ -139,7 +140,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &s
|
|||||||
|
|
||||||
return phraseColl;
|
return phraseColl;
|
||||||
} else
|
} else
|
||||||
return NULL;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseVectorPtr
|
TargetPhraseVectorPtr
|
||||||
@ -163,7 +164,7 @@ PhraseDictionaryCompact::~PhraseDictionaryCompact()
|
|||||||
|
|
||||||
//TO_STRING_BODY(PhraseDictionaryCompact)
|
//TO_STRING_BODY(PhraseDictionaryCompact)
|
||||||
|
|
||||||
void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection* tpc)
|
void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
|
||||||
{
|
{
|
||||||
if(!m_sentenceCache.get())
|
if(!m_sentenceCache.get())
|
||||||
m_sentenceCache.reset(new PhraseCache());
|
m_sentenceCache.reset(new PhraseCache());
|
||||||
@ -179,12 +180,13 @@ void PhraseDictionaryCompact::CleanUpAfterSentenceProcessing(const InputType &so
|
|||||||
m_sentenceCache.reset(new PhraseCache());
|
m_sentenceCache.reset(new PhraseCache());
|
||||||
|
|
||||||
m_phraseDecoder->PruneCache();
|
m_phraseDecoder->PruneCache();
|
||||||
for(PhraseCache::iterator it = m_sentenceCache->begin();
|
// for(PhraseCache::iterator it = m_sentenceCache->begin();
|
||||||
it != m_sentenceCache->end(); it++)
|
// it != m_sentenceCache->end(); it++)
|
||||||
delete *it;
|
// it->reset();
|
||||||
|
|
||||||
PhraseCache temp;
|
// PhraseCache temp;
|
||||||
temp.swap(*m_sentenceCache);
|
// temp.swap(*m_sentenceCache);
|
||||||
|
m_sentenceCache->clear();
|
||||||
|
|
||||||
ReduceCache();
|
ReduceCache();
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,7 @@ protected:
|
|||||||
bool m_inMemory;
|
bool m_inMemory;
|
||||||
bool m_useAlignmentInfo;
|
bool m_useAlignmentInfo;
|
||||||
|
|
||||||
typedef std::vector<TargetPhraseCollection*> PhraseCache;
|
typedef std::vector<TargetPhraseCollection::shared_ptr > PhraseCache;
|
||||||
typedef boost::thread_specific_ptr<PhraseCache> SentenceCache;
|
typedef boost::thread_specific_ptr<PhraseCache> SentenceCache;
|
||||||
static SentenceCache m_sentenceCache;
|
static SentenceCache m_sentenceCache;
|
||||||
|
|
||||||
@ -69,12 +69,12 @@ public:
|
|||||||
|
|
||||||
void Load();
|
void Load();
|
||||||
|
|
||||||
const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const;
|
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const;
|
||||||
TargetPhraseVectorPtr GetTargetPhraseCollectionRaw(const Phrase &source) const;
|
TargetPhraseVectorPtr GetTargetPhraseCollectionRaw(const Phrase &source) const;
|
||||||
|
|
||||||
void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase);
|
void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase);
|
||||||
|
|
||||||
void CacheForCleanup(TargetPhraseCollection* tpc);
|
void CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
|
||||||
void CleanUpAfterSentenceProcessing(const InputType &source);
|
void CleanUpAfterSentenceProcessing(const InputType &source);
|
||||||
|
|
||||||
virtual ChartRuleLookupManager *CreateRuleLookupManager(
|
virtual ChartRuleLookupManager *CreateRuleLookupManager(
|
||||||
|
@ -35,14 +35,15 @@ namespace Moses
|
|||||||
{
|
{
|
||||||
std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl;
|
std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl;
|
||||||
|
|
||||||
CacheColl::~CacheColl()
|
// CacheColl::~CacheColl()
|
||||||
{
|
// {
|
||||||
for (iterator iter = begin(); iter != end(); ++iter) {
|
// // not needed any more since the switch to shared pointers
|
||||||
std::pair<const TargetPhraseCollection*, clock_t> &key = iter->second;
|
// // for (iterator iter = begin(); iter != end(); ++iter) {
|
||||||
const TargetPhraseCollection *tps = key.first;
|
// // std::pair<TargetPhraseCollection::shared_ptr , clock_t> &key = iter->second;
|
||||||
delete tps;
|
// // TargetPhraseCollection::shared_ptr tps = key.first;
|
||||||
}
|
// // delete tps;
|
||||||
}
|
// // }
|
||||||
|
// }
|
||||||
|
|
||||||
PhraseDictionary::PhraseDictionary(const std::string &line, bool registerNow)
|
PhraseDictionary::PhraseDictionary(const std::string &line, bool registerNow)
|
||||||
: DecodeFeature(line, registerNow)
|
: DecodeFeature(line, registerNow)
|
||||||
@ -60,9 +61,12 @@ ProvidesPrefixCheck() const
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
TargetPhraseCollection::shared_ptr
|
||||||
|
PhraseDictionary::
|
||||||
|
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||||
{
|
{
|
||||||
const TargetPhraseCollection *ret;
|
TargetPhraseCollection::shared_ptr ret;
|
||||||
|
typedef std::pair<TargetPhraseCollection::shared_ptr , clock_t> entry;
|
||||||
if (m_maxCacheSize) {
|
if (m_maxCacheSize) {
|
||||||
CacheColl &cache = GetCache();
|
CacheColl &cache = GetCache();
|
||||||
|
|
||||||
@ -74,18 +78,14 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(
|
|||||||
if (iter == cache.end()) {
|
if (iter == cache.end()) {
|
||||||
// not in cache, need to look up from phrase table
|
// not in cache, need to look up from phrase table
|
||||||
ret = GetTargetPhraseCollectionNonCacheLEGACY(src);
|
ret = GetTargetPhraseCollectionNonCacheLEGACY(src);
|
||||||
if (ret) {
|
if (ret) { // make a copy
|
||||||
ret = new TargetPhraseCollection(*ret);
|
ret.reset(new TargetPhraseCollection(*ret));
|
||||||
}
|
}
|
||||||
|
cache[hash] = entry(ret, clock());
|
||||||
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
|
|
||||||
cache[hash] = value;
|
|
||||||
} else {
|
} else {
|
||||||
// in cache. just use it
|
// in cache. just use it
|
||||||
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
|
iter->second.second = clock();
|
||||||
value.second = clock();
|
ret = iter->second.first;
|
||||||
|
|
||||||
ret = value.first;
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// don't use cache. look up from phrase table
|
// don't use cache. look up from phrase table
|
||||||
@ -95,7 +95,7 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection const *
|
TargetPhraseCollection::shared_ptr
|
||||||
PhraseDictionary::
|
PhraseDictionary::
|
||||||
GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const
|
GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const
|
||||||
{
|
{
|
||||||
@ -103,7 +103,7 @@ GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
TargetPhraseCollectionWithSourcePhrase const*
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||||
PhraseDictionary::
|
PhraseDictionary::
|
||||||
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const
|
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const
|
||||||
{
|
{
|
||||||
@ -140,14 +140,14 @@ SetFeaturesToApply()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more
|
// // tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more
|
||||||
void
|
// void
|
||||||
PhraseDictionary::
|
// PhraseDictionary::
|
||||||
Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const
|
// Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const
|
||||||
{
|
// {
|
||||||
// do nothing by default
|
// // do nothing by default
|
||||||
return;
|
// return;
|
||||||
}
|
// }
|
||||||
|
|
||||||
bool
|
bool
|
||||||
PhraseDictionary::
|
PhraseDictionary::
|
||||||
@ -170,7 +170,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
const Phrase &phrase = inputPath.GetPhrase();
|
const Phrase &phrase = inputPath.GetPhrase();
|
||||||
const TargetPhraseCollection *targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase);
|
TargetPhraseCollection::shared_ptr targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase);
|
||||||
inputPath.SetTargetPhrases(*this, targetPhrases, NULL);
|
inputPath.SetTargetPhrases(*this, targetPhrases, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -180,7 +180,7 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
|||||||
//void PhraseDictionary::SaveCache() const
|
//void PhraseDictionary::SaveCache() const
|
||||||
//{
|
//{
|
||||||
// CacheColl &cache = GetCache();
|
// CacheColl &cache = GetCache();
|
||||||
// for( std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter,
|
// for( std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iter,
|
||||||
// iter != cache.end(),
|
// iter != cache.end(),
|
||||||
// iter++ ) {
|
// iter++ ) {
|
||||||
//
|
//
|
||||||
@ -191,10 +191,10 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
|||||||
//void PhraseDictionary::LoadCache() const
|
//void PhraseDictionary::LoadCache() const
|
||||||
//{
|
//{
|
||||||
// CacheColl &cache = GetCache();
|
// CacheColl &cache = GetCache();
|
||||||
// std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter;
|
// std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iter;
|
||||||
// iter = cache.begin();
|
// iter = cache.begin();
|
||||||
// while( iter != cache.end() ) {
|
// while( iter != cache.end() ) {
|
||||||
// std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iterRemove = iter++;
|
// std::map<size_t, std::pair<TargetPhraseCollection::shared_ptr ,clock_t> >::iterator iterRemove = iter++;
|
||||||
// delete iterRemove->second.first;
|
// delete iterRemove->second.first;
|
||||||
// cache.erase(iterRemove);
|
// cache.erase(iterRemove);
|
||||||
// }
|
// }
|
||||||
@ -225,11 +225,12 @@ void PhraseDictionary::ReduceCache() const
|
|||||||
while( iter != cache.end() ) {
|
while( iter != cache.end() ) {
|
||||||
if (iter->second.second < cutoffLastUsedTime) {
|
if (iter->second.second < cutoffLastUsedTime) {
|
||||||
CacheColl::iterator iterRemove = iter++;
|
CacheColl::iterator iterRemove = iter++;
|
||||||
delete iterRemove->second.first;
|
// delete iterRemove->second.first;
|
||||||
cache.erase(iterRemove);
|
cache.erase(iterRemove);
|
||||||
} else iter++;
|
} else iter++;
|
||||||
}
|
}
|
||||||
VERBOSE(2,"Reduced persistent translation option cache in " << reduceCacheTime << " seconds." << std::endl);
|
VERBOSE(2,"Reduced persistent translation option cache in "
|
||||||
|
<< reduceCacheTime << " seconds." << std::endl);
|
||||||
}
|
}
|
||||||
|
|
||||||
CacheColl &PhraseDictionary::GetCache() const
|
CacheColl &PhraseDictionary::GetCache() const
|
||||||
@ -265,8 +266,8 @@ bool PhraseDictionary::SatisfyBackoff(const InputPath &inputPath) const
|
|||||||
// lookup translation only if no other translations
|
// lookup translation only if no other translations
|
||||||
InputPath::TargetPhrases::const_iterator iter;
|
InputPath::TargetPhrases::const_iterator iter;
|
||||||
for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) {
|
for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) {
|
||||||
const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second;
|
const std::pair<TargetPhraseCollection::shared_ptr , const void*> &temp = iter->second;
|
||||||
const TargetPhraseCollection *tpCollPrev = temp.first;
|
TargetPhraseCollection::shared_ptr tpCollPrev = temp.first;
|
||||||
|
|
||||||
if (tpCollPrev && tpCollPrev->GetSize()) {
|
if (tpCollPrev && tpCollPrev->GetSize()) {
|
||||||
// already have translation from another pt. Don't create translations
|
// already have translation from another pt. Don't create translations
|
||||||
|
@ -55,15 +55,18 @@ class ChartCellCollectionBase;
|
|||||||
class ChartRuleLookupManager;
|
class ChartRuleLookupManager;
|
||||||
class ChartParser;
|
class ChartParser;
|
||||||
|
|
||||||
class CacheColl : public boost::unordered_map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >
|
// typedef std::pair<TargetPhraseCollection::shared_ptr, clock_t> TPCollLastUse;
|
||||||
{
|
typedef std::pair<TargetPhraseCollection::shared_ptr, clock_t> CacheCollEntry;
|
||||||
// 1st = hash of source phrase/ address of phrase-table node
|
typedef boost::unordered_map<size_t, CacheCollEntry> CacheColl;
|
||||||
// 2nd = all translations
|
// class CacheColl : public boost::unordered_map<size_t, TPCollLastUse>
|
||||||
// 3rd = time of last access
|
// {
|
||||||
|
// // 1st = hash of source phrase/ address of phrase-table node
|
||||||
|
// // 2nd = all translations
|
||||||
|
// // 3rd = time of last access
|
||||||
|
|
||||||
public:
|
// public:
|
||||||
~CacheColl();
|
// ~CacheColl();
|
||||||
};
|
// };
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract base class for phrase dictionaries (tables).
|
* Abstract base class for phrase dictionaries (tables).
|
||||||
@ -95,9 +98,9 @@ public:
|
|||||||
return m_id;
|
return m_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual
|
// virtual
|
||||||
void
|
// void
|
||||||
Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const;
|
// Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const;
|
||||||
|
|
||||||
/// return true if phrase table entries starting with /phrase/
|
/// return true if phrase table entries starting with /phrase/
|
||||||
// exist in the table.
|
// exist in the table.
|
||||||
@ -111,24 +114,23 @@ public:
|
|||||||
//! find list of translations that can translates src. Only for phrase input
|
//! find list of translations that can translates src. Only for phrase input
|
||||||
|
|
||||||
public:
|
public:
|
||||||
virtual
|
virtual TargetPhraseCollection::shared_ptr
|
||||||
TargetPhraseCollection const *
|
|
||||||
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||||
|
|
||||||
virtual
|
virtual TargetPhraseCollection::shared_ptr
|
||||||
TargetPhraseCollection const *
|
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask,
|
||||||
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const {
|
Phrase const& src) const
|
||||||
|
{
|
||||||
return GetTargetPhraseCollectionLEGACY(src);
|
return GetTargetPhraseCollectionLEGACY(src);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual
|
virtual void
|
||||||
void
|
|
||||||
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
||||||
|
|
||||||
virtual
|
virtual void
|
||||||
void
|
GetTargetPhraseCollectionBatch
|
||||||
GetTargetPhraseCollectionBatch(ttasksptr const& ttask,
|
(ttasksptr const& ttask, InputPathList const& inputPathQueue) const
|
||||||
const InputPathList &inputPathQueue) const {
|
{
|
||||||
GetTargetPhraseCollectionBatch(inputPathQueue);
|
GetTargetPhraseCollectionBatch(inputPathQueue);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -157,7 +159,9 @@ public:
|
|||||||
|
|
||||||
// LEGACY
|
// LEGACY
|
||||||
//! find list of translations that can translates a portion of src. Used by confusion network decoding
|
//! find list of translations that can translates a portion of src. Used by confusion network decoding
|
||||||
virtual const TargetPhraseCollectionWithSourcePhrase* GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const;
|
virtual
|
||||||
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||||
|
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
static std::vector<PhraseDictionary*> s_staticColl;
|
static std::vector<PhraseDictionary*> s_staticColl;
|
||||||
@ -184,7 +188,10 @@ protected:
|
|||||||
mutable boost::scoped_ptr<CacheColl> m_cache;
|
mutable boost::scoped_ptr<CacheColl> m_cache;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
virtual const TargetPhraseCollection *GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const;
|
virtual
|
||||||
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const;
|
||||||
|
|
||||||
void ReduceCache() const;
|
void ReduceCache() const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -150,15 +150,15 @@ void PhraseDictionaryDynamicCacheBased::InitializeForInput(ttasksptr const& ttas
|
|||||||
ReduceCache();
|
ReduceCache();
|
||||||
}
|
}
|
||||||
|
|
||||||
const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const
|
TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const
|
||||||
{
|
{
|
||||||
#ifdef WITH_THREADS
|
#ifdef WITH_THREADS
|
||||||
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
|
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
|
||||||
#endif
|
#endif
|
||||||
TargetPhraseCollection* tpc = NULL;
|
TargetPhraseCollection::shared_ptr tpc;
|
||||||
cacheMap::const_iterator it = m_cacheTM.find(source);
|
cacheMap::const_iterator it = m_cacheTM.find(source);
|
||||||
if(it != m_cacheTM.end()) {
|
if(it != m_cacheTM.end()) {
|
||||||
tpc = new TargetPhraseCollection(*(it->second).first);
|
tpc.reset(new TargetPhraseCollection(*(it->second).first));
|
||||||
|
|
||||||
std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
|
std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
|
||||||
|
|
||||||
@ -174,15 +174,15 @@ const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhrase
|
|||||||
return tpc;
|
return tpc;
|
||||||
}
|
}
|
||||||
|
|
||||||
const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionLEGACY(Phrase const &src) const
|
TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionLEGACY(Phrase const &src) const
|
||||||
{
|
{
|
||||||
const TargetPhraseCollection *ret = GetTargetPhraseCollection(src);
|
TargetPhraseCollection::shared_ptr ret = GetTargetPhraseCollection(src);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
|
TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
|
||||||
{
|
{
|
||||||
const TargetPhraseCollection *ret = GetTargetPhraseCollection(src);
|
TargetPhraseCollection::shared_ptr ret = GetTargetPhraseCollection(src);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -366,7 +366,7 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)
|
|||||||
// and then add new entry
|
// and then add new entry
|
||||||
|
|
||||||
TargetCollectionAgePair TgtCollAgePair = it->second;
|
TargetCollectionAgePair TgtCollAgePair = it->second;
|
||||||
TargetPhraseCollection* tpc = TgtCollAgePair.first;
|
TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
|
||||||
AgeCollection* ac = TgtCollAgePair.second;
|
AgeCollection* ac = TgtCollAgePair.second;
|
||||||
const Phrase* p_ptr = NULL;
|
const Phrase* p_ptr = NULL;
|
||||||
TargetPhrase* tp_ptr = NULL;
|
TargetPhrase* tp_ptr = NULL;
|
||||||
@ -397,7 +397,7 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)
|
|||||||
if (tpc->GetSize() == 0) {
|
if (tpc->GetSize() == 0) {
|
||||||
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
|
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
|
||||||
ac->clear();
|
ac->clear();
|
||||||
delete tpc;
|
tpc.reset();
|
||||||
delete ac;
|
delete ac;
|
||||||
m_cacheTM.erase(sp);
|
m_cacheTM.erase(sp);
|
||||||
}
|
}
|
||||||
@ -451,14 +451,14 @@ void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp)
|
|||||||
//sp is found
|
//sp is found
|
||||||
|
|
||||||
TargetCollectionAgePair TgtCollAgePair = it->second;
|
TargetCollectionAgePair TgtCollAgePair = it->second;
|
||||||
TargetPhraseCollection* tpc = TgtCollAgePair.first;
|
TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
|
||||||
AgeCollection* ac = TgtCollAgePair.second;
|
AgeCollection* ac = TgtCollAgePair.second;
|
||||||
|
|
||||||
m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache
|
m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache
|
||||||
|
|
||||||
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
|
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
|
||||||
ac->clear();
|
ac->clear();
|
||||||
delete tpc;
|
tpc.reset();
|
||||||
delete ac;
|
delete ac;
|
||||||
m_cacheTM.erase(sp);
|
m_cacheTM.erase(sp);
|
||||||
} else {
|
} else {
|
||||||
@ -558,7 +558,7 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a
|
|||||||
// and then add new entry
|
// and then add new entry
|
||||||
|
|
||||||
TargetCollectionAgePair TgtCollAgePair = it->second;
|
TargetCollectionAgePair TgtCollAgePair = it->second;
|
||||||
TargetPhraseCollection* tpc = TgtCollAgePair.first;
|
TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
|
||||||
AgeCollection* ac = TgtCollAgePair.second;
|
AgeCollection* ac = TgtCollAgePair.second;
|
||||||
// const TargetPhrase* p_ptr = NULL;
|
// const TargetPhrase* p_ptr = NULL;
|
||||||
const Phrase* p_ptr = NULL;
|
const Phrase* p_ptr = NULL;
|
||||||
@ -599,7 +599,7 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int a
|
|||||||
// create target collection
|
// create target collection
|
||||||
// we have to create new target collection age pair and add new entry to target collection age pair
|
// we have to create new target collection age pair and add new entry to target collection age pair
|
||||||
|
|
||||||
TargetPhraseCollection* tpc = new TargetPhraseCollection();
|
TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
|
||||||
AgeCollection* ac = new AgeCollection();
|
AgeCollection* ac = new AgeCollection();
|
||||||
m_cacheTM.insert(make_pair(sp,make_pair(tpc,ac)));
|
m_cacheTM.insert(make_pair(sp,make_pair(tpc,ac)));
|
||||||
|
|
||||||
@ -629,13 +629,13 @@ void PhraseDictionaryDynamicCacheBased::Decay()
|
|||||||
void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp)
|
void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp)
|
||||||
{
|
{
|
||||||
VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) sp:|" << sp << "|" << std::endl);
|
VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) sp:|" << sp << "|" << std::endl);
|
||||||
cacheMap::const_iterator it = m_cacheTM.find(sp);
|
cacheMap::iterator it = m_cacheTM.find(sp);
|
||||||
if (it != m_cacheTM.end()) {
|
if (it != m_cacheTM.end()) {
|
||||||
VERBOSE(3,"found:|" << sp << "|" << std::endl);
|
VERBOSE(3,"found:|" << sp << "|" << std::endl);
|
||||||
//sp is found
|
//sp is found
|
||||||
|
|
||||||
TargetCollectionAgePair TgtCollAgePair = it->second;
|
TargetCollectionAgePair TgtCollAgePair = it->second;
|
||||||
TargetPhraseCollection* tpc = TgtCollAgePair.first;
|
TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
|
||||||
AgeCollection* ac = TgtCollAgePair.second;
|
AgeCollection* ac = TgtCollAgePair.second;
|
||||||
|
|
||||||
//loop in inverted order to allow a correct deletion of std::vectors tpc and ac
|
//loop in inverted order to allow a correct deletion of std::vectors tpc and ac
|
||||||
@ -661,7 +661,7 @@ void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp)
|
|||||||
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
|
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
|
||||||
(((*it).second).second)->clear();
|
(((*it).second).second)->clear();
|
||||||
delete ((*it).second).second;
|
delete ((*it).second).second;
|
||||||
delete ((*it).second).first;
|
((*it).second).first.reset();
|
||||||
m_cacheTM.erase(sp);
|
m_cacheTM.erase(sp);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -703,11 +703,11 @@ void PhraseDictionaryDynamicCacheBased::Clear()
|
|||||||
#ifdef WITH_THREADS
|
#ifdef WITH_THREADS
|
||||||
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
|
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
|
||||||
#endif
|
#endif
|
||||||
cacheMap::const_iterator it;
|
cacheMap::iterator it;
|
||||||
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
|
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
|
||||||
(((*it).second).second)->clear();
|
(((*it).second).second)->clear();
|
||||||
delete ((*it).second).second;
|
delete ((*it).second).second;
|
||||||
delete ((*it).second).first;
|
((*it).second).first.reset();
|
||||||
}
|
}
|
||||||
m_cacheTM.clear();
|
m_cacheTM.clear();
|
||||||
m_entries = 0;
|
m_entries = 0;
|
||||||
@ -746,7 +746,7 @@ void PhraseDictionaryDynamicCacheBased::Print() const
|
|||||||
cacheMap::const_iterator it;
|
cacheMap::const_iterator it;
|
||||||
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
|
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
|
||||||
std::string source = (it->first).ToString();
|
std::string source = (it->first).ToString();
|
||||||
TargetPhraseCollection* tpc = (it->second).first;
|
TargetPhraseCollection::shared_ptr tpc = (it->second).first;
|
||||||
TargetPhraseCollection::iterator itr;
|
TargetPhraseCollection::iterator itr;
|
||||||
for(itr = tpc->begin(); itr != tpc->end(); itr++) {
|
for(itr = tpc->begin(); itr != tpc->end(); itr++) {
|
||||||
std::string target = (*itr)->ToString();
|
std::string target = (*itr)->ToString();
|
||||||
|
@ -53,7 +53,7 @@ class PhraseDictionaryDynamicCacheBased : public PhraseDictionary
|
|||||||
{
|
{
|
||||||
|
|
||||||
typedef std::vector<unsigned int> AgeCollection;
|
typedef std::vector<unsigned int> AgeCollection;
|
||||||
typedef std::pair<TargetPhraseCollection*, AgeCollection*> TargetCollectionAgePair;
|
typedef std::pair<TargetPhraseCollection::shared_ptr , AgeCollection*> TargetCollectionAgePair;
|
||||||
typedef std::map<Phrase, TargetCollectionAgePair> cacheMap;
|
typedef std::map<Phrase, TargetCollectionAgePair> cacheMap;
|
||||||
|
|
||||||
// data structure for the cache
|
// data structure for the cache
|
||||||
@ -111,9 +111,14 @@ public:
|
|||||||
void Load();
|
void Load();
|
||||||
void Load(const std::string files);
|
void Load(const std::string files);
|
||||||
|
|
||||||
const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &src) const;
|
TargetPhraseCollection::shared_ptr
|
||||||
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(Phrase const &src) const;
|
GetTargetPhraseCollection(const Phrase &src) const;
|
||||||
const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
|
|
||||||
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetTargetPhraseCollectionLEGACY(Phrase const &src) const;
|
||||||
|
|
||||||
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
|
||||||
|
|
||||||
// for phrase-based model
|
// for phrase-based model
|
||||||
// void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
// void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
||||||
|
@ -86,29 +86,32 @@ void PhraseDictionaryGroup::GetTargetPhraseCollectionBatch(
|
|||||||
// Look up each input in each model
|
// Look up each input in each model
|
||||||
BOOST_FOREACH(InputPath* inputPath, inputPathQueue) {
|
BOOST_FOREACH(InputPath* inputPath, inputPathQueue) {
|
||||||
const Phrase &phrase = inputPath->GetPhrase();
|
const Phrase &phrase = inputPath->GetPhrase();
|
||||||
const TargetPhraseCollection* targetPhrases =
|
TargetPhraseCollection::shared_ptr targetPhrases =
|
||||||
this->GetTargetPhraseCollectionLEGACY(ttask, phrase);
|
this->GetTargetPhraseCollectionLEGACY(ttask, phrase);
|
||||||
inputPath->SetTargetPhrases(*this, targetPhrases, NULL);
|
inputPath->SetTargetPhrases(*this, targetPhrases, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const TargetPhraseCollection* PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY(
|
TargetPhraseCollection::shared_ptr PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY(
|
||||||
const Phrase& src) const
|
const Phrase& src) const
|
||||||
{
|
{
|
||||||
UTIL_THROW2("Don't call me without the translation task.");
|
UTIL_THROW2("Don't call me without the translation task.");
|
||||||
}
|
}
|
||||||
|
|
||||||
const TargetPhraseCollection* PhraseDictionaryGroup::GetTargetPhraseCollectionLEGACY(
|
TargetPhraseCollection::shared_ptr
|
||||||
const ttasksptr& ttask, const Phrase& src) const
|
PhraseDictionaryGroup::
|
||||||
|
GetTargetPhraseCollectionLEGACY(const ttasksptr& ttask, const Phrase& src) const
|
||||||
{
|
{
|
||||||
TargetPhraseCollection* ret = CreateTargetPhraseCollection(ttask, src);
|
TargetPhraseCollection::shared_ptr ret
|
||||||
|
= CreateTargetPhraseCollection(ttask, src);
|
||||||
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
|
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
|
||||||
const_cast<PhraseDictionaryGroup*>(this)->CacheForCleanup(ret);
|
const_cast<PhraseDictionaryGroup*>(this)->CacheForCleanup(ret);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const ttasksptr& ttask, const Phrase& src) const
|
PhraseDictionaryGroup::
|
||||||
|
CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const
|
||||||
{
|
{
|
||||||
// Aggregation of phrases and the scores that will be applied to them
|
// Aggregation of phrases and the scores that will be applied to them
|
||||||
vector<TargetPhrase*> allPhrases;
|
vector<TargetPhrase*> allPhrases;
|
||||||
@ -121,8 +124,8 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
|
|||||||
|
|
||||||
// Collect phrases from this table
|
// Collect phrases from this table
|
||||||
const PhraseDictionary& pd = *m_memberPDs[i];
|
const PhraseDictionary& pd = *m_memberPDs[i];
|
||||||
const TargetPhraseCollection* ret_raw = pd.GetTargetPhraseCollectionLEGACY(
|
TargetPhraseCollection::shared_ptr
|
||||||
ttask, src);
|
ret_raw = pd.GetTargetPhraseCollectionLEGACY(ttask, src);
|
||||||
|
|
||||||
if (ret_raw != NULL) {
|
if (ret_raw != NULL) {
|
||||||
// Process each phrase from table
|
// Process each phrase from table
|
||||||
@ -162,7 +165,7 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Apply scores to phrases and add them to return collection
|
// Apply scores to phrases and add them to return collection
|
||||||
TargetPhraseCollection* ret = new TargetPhraseCollection();
|
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
|
||||||
const vector<FeatureFunction*> pd_feature_const(m_pdFeature);
|
const vector<FeatureFunction*> pd_feature_const(m_pdFeature);
|
||||||
BOOST_FOREACH(TargetPhrase* phrase, allPhrases) {
|
BOOST_FOREACH(TargetPhrase* phrase, allPhrases) {
|
||||||
phrase->GetScoreBreakdown().Assign(this, allScores.find(phrase)->second);
|
phrase->GetScoreBreakdown().Assign(this, allScores.find(phrase)->second);
|
||||||
@ -174,29 +177,33 @@ TargetPhraseCollection* PhraseDictionaryGroup::CreateTargetPhraseCollection(
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
ChartRuleLookupManager *PhraseDictionaryGroup::CreateRuleLookupManager(
|
ChartRuleLookupManager*
|
||||||
const ChartParser &, const ChartCellCollectionBase&, size_t)
|
PhraseDictionaryGroup::
|
||||||
|
CreateRuleLookupManager(const ChartParser &,
|
||||||
|
const ChartCellCollectionBase&, size_t)
|
||||||
{
|
{
|
||||||
UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
|
UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
|
||||||
}
|
}
|
||||||
|
|
||||||
//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
|
//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
|
||||||
void PhraseDictionaryGroup::CacheForCleanup(TargetPhraseCollection* tpc)
|
void PhraseDictionaryGroup::CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
|
||||||
{
|
{
|
||||||
PhraseCache &ref = GetPhraseCache();
|
PhraseCache &ref = GetPhraseCache();
|
||||||
ref.push_back(tpc);
|
ref.push_back(tpc);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PhraseDictionaryGroup::CleanUpAfterSentenceProcessing(
|
void
|
||||||
const InputType &source)
|
PhraseDictionaryGroup::
|
||||||
|
CleanUpAfterSentenceProcessing(const InputType &source)
|
||||||
{
|
{
|
||||||
PhraseCache &ref = GetPhraseCache();
|
GetPhraseCache().clear();
|
||||||
for (PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
|
// PhraseCache &ref = GetPhraseCache();
|
||||||
delete *it;
|
// for (PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
|
||||||
}
|
// delete *it;
|
||||||
|
// }
|
||||||
|
|
||||||
PhraseCache temp;
|
// PhraseCache temp;
|
||||||
temp.swap(ref);
|
// temp.swap(ref);
|
||||||
|
|
||||||
CleanUpComponentModels(source);
|
CleanUpComponentModels(source);
|
||||||
}
|
}
|
||||||
|
@ -43,19 +43,20 @@ class PhraseDictionaryGroup: public PhraseDictionary
|
|||||||
public:
|
public:
|
||||||
PhraseDictionaryGroup(const std::string& line);
|
PhraseDictionaryGroup(const std::string& line);
|
||||||
void Load();
|
void Load();
|
||||||
TargetPhraseCollection* CreateTargetPhraseCollection(const ttasksptr& ttask,
|
TargetPhraseCollection::shared_ptr
|
||||||
|
CreateTargetPhraseCollection(const ttasksptr& ttask,
|
||||||
const Phrase& src) const;
|
const Phrase& src) const;
|
||||||
std::vector<std::vector<float> > getWeights(size_t numWeights,
|
std::vector<std::vector<float> > getWeights(size_t numWeights,
|
||||||
bool normalize) const;
|
bool normalize) const;
|
||||||
void CacheForCleanup(TargetPhraseCollection* tpc);
|
void CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
|
||||||
void CleanUpAfterSentenceProcessing(const InputType& source);
|
void CleanUpAfterSentenceProcessing(const InputType& source);
|
||||||
void CleanUpComponentModels(const InputType& source);
|
void CleanUpComponentModels(const InputType& source);
|
||||||
// functions below override the base class
|
// functions below override the base class
|
||||||
void GetTargetPhraseCollectionBatch(const ttasksptr& ttask,
|
void GetTargetPhraseCollectionBatch(const ttasksptr& ttask,
|
||||||
const InputPathList &inputPathQueue) const;
|
const InputPathList &inputPathQueue) const;
|
||||||
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(
|
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(
|
||||||
const Phrase& src) const;
|
const Phrase& src) const;
|
||||||
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(
|
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(
|
||||||
const ttasksptr& ttask, const Phrase& src) const;
|
const ttasksptr& ttask, const Phrase& src) const;
|
||||||
void InitializeForInput(ttasksptr const& ttask) {
|
void InitializeForInput(ttasksptr const& ttask) {
|
||||||
/* Don't do anything source specific here as this object is shared between threads.*/
|
/* Don't do anything source specific here as this object is shared between threads.*/
|
||||||
@ -71,7 +72,7 @@ protected:
|
|||||||
bool m_restrict;
|
bool m_restrict;
|
||||||
std::vector<FeatureFunction*> m_pdFeature;
|
std::vector<FeatureFunction*> m_pdFeature;
|
||||||
|
|
||||||
typedef std::vector<TargetPhraseCollection*> PhraseCache;
|
typedef std::vector<TargetPhraseCollection::shared_ptr > PhraseCache;
|
||||||
#ifdef WITH_THREADS
|
#ifdef WITH_THREADS
|
||||||
boost::shared_mutex m_lock_cache;
|
boost::shared_mutex m_lock_cache;
|
||||||
typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
|
typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
|
||||||
|
@ -49,16 +49,17 @@ PhraseDictionaryMemory::PhraseDictionaryMemory(const std::string &line)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection &PhraseDictionaryMemory::GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const Phrase &source
|
PhraseDictionaryMemory::
|
||||||
, const TargetPhrase &target
|
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||||
, const Word *sourceLHS)
|
const TargetPhrase &target,
|
||||||
|
const Word *sourceLHS)
|
||||||
{
|
{
|
||||||
PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(source, target, sourceLHS);
|
PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(source, target, sourceLHS);
|
||||||
return currNode.GetTargetPhraseCollection();
|
return currNode.GetTargetPhraseCollection();
|
||||||
}
|
}
|
||||||
|
|
||||||
const TargetPhraseCollection*
|
TargetPhraseCollection::shared_ptr
|
||||||
PhraseDictionaryMemory::
|
PhraseDictionaryMemory::
|
||||||
GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const
|
GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const
|
||||||
{
|
{
|
||||||
@ -73,10 +74,10 @@ GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const
|
|||||||
const Word& word = source.GetWord(pos);
|
const Word& word = source.GetWord(pos);
|
||||||
currNode = currNode->GetChild(word);
|
currNode = currNode->GetChild(word);
|
||||||
if (currNode == NULL)
|
if (currNode == NULL)
|
||||||
return NULL;
|
return TargetPhraseCollection::shared_ptr();
|
||||||
}
|
}
|
||||||
|
|
||||||
return &currNode->GetTargetPhraseCollection();
|
return currNode->GetTargetPhraseCollection();
|
||||||
}
|
}
|
||||||
|
|
||||||
PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase &source
|
PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase &source
|
||||||
@ -168,12 +169,11 @@ GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
|||||||
lastWord.OnlyTheseFactors(m_inputFactors);
|
lastWord.OnlyTheseFactors(m_inputFactors);
|
||||||
|
|
||||||
const PhraseDictionaryNodeMemory *ptNode = prevPtNode->GetChild(lastWord);
|
const PhraseDictionaryNodeMemory *ptNode = prevPtNode->GetChild(lastWord);
|
||||||
|
TargetPhraseCollection::shared_ptr targetPhrases;
|
||||||
if (ptNode) {
|
if (ptNode) {
|
||||||
const TargetPhraseCollection &targetPhrases = ptNode->GetTargetPhraseCollection();
|
targetPhrases = ptNode->GetTargetPhraseCollection();
|
||||||
inputPath.SetTargetPhrases(*this, &targetPhrases, ptNode);
|
|
||||||
} else {
|
|
||||||
inputPath.SetTargetPhrases(*this, NULL, NULL);
|
|
||||||
}
|
}
|
||||||
|
inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -56,18 +56,22 @@ public:
|
|||||||
std::size_t);
|
std::size_t);
|
||||||
|
|
||||||
// only used by multi-model phrase table, and other meta-features
|
// only used by multi-model phrase table, and other meta-features
|
||||||
const TargetPhraseCollection *GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
TargetPhraseCollection::shared_ptr
|
||||||
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||||
|
|
||||||
|
void
|
||||||
|
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
||||||
|
|
||||||
TO_STRING();
|
TO_STRING();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
|
GetOrCreateTargetPhraseCollection
|
||||||
|
(const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
|
||||||
|
|
||||||
PhraseDictionaryNodeMemory &GetOrCreateNode(const Phrase &source
|
PhraseDictionaryNodeMemory &
|
||||||
, const TargetPhrase &target
|
GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
|
||||||
, const Word *sourceLHS);
|
const Word *sourceLHS);
|
||||||
|
|
||||||
void SortAndPrune();
|
void SortAndPrune();
|
||||||
|
|
||||||
|
@ -26,7 +26,9 @@ using namespace std;
|
|||||||
namespace Moses
|
namespace Moses
|
||||||
|
|
||||||
{
|
{
|
||||||
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
|
|
||||||
|
PhraseDictionaryMultiModel::
|
||||||
|
PhraseDictionaryMultiModel(const std::string &line)
|
||||||
: PhraseDictionary(line, true)
|
: PhraseDictionary(line, true)
|
||||||
{
|
{
|
||||||
ReadParameters();
|
ReadParameters();
|
||||||
@ -45,7 +47,8 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::string &line)
|
PhraseDictionaryMultiModel::
|
||||||
|
PhraseDictionaryMultiModel(int type, const std::string &line)
|
||||||
:PhraseDictionary(line, true)
|
:PhraseDictionary(line, true)
|
||||||
{
|
{
|
||||||
if (type == 1) {
|
if (type == 1) {
|
||||||
@ -56,7 +59,9 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(int type, const std::stri
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PhraseDictionaryMultiModel::SetParameter(const std::string& key, const std::string& value)
|
void
|
||||||
|
PhraseDictionaryMultiModel::
|
||||||
|
SetParameter(const std::string& key, const std::string& value)
|
||||||
{
|
{
|
||||||
if (key == "mode") {
|
if (key == "mode") {
|
||||||
m_mode = value;
|
m_mode = value;
|
||||||
@ -70,9 +75,9 @@ void PhraseDictionaryMultiModel::SetParameter(const std::string& key, const std:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PhraseDictionaryMultiModel::~PhraseDictionaryMultiModel()
|
PhraseDictionaryMultiModel::
|
||||||
{
|
~PhraseDictionaryMultiModel()
|
||||||
}
|
{ }
|
||||||
|
|
||||||
void PhraseDictionaryMultiModel::Load()
|
void PhraseDictionaryMultiModel::Load()
|
||||||
{
|
{
|
||||||
@ -88,18 +93,21 @@ void PhraseDictionaryMultiModel::Load()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TargetPhraseCollection::shared_ptr
|
||||||
const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
PhraseDictionaryMultiModel::
|
||||||
|
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||||
{
|
{
|
||||||
|
|
||||||
std::vector<std::vector<float> > multimodelweights = getWeights(m_numScoreComponents, true);
|
std::vector<std::vector<float> > multimodelweights;
|
||||||
TargetPhraseCollection *ret = NULL;
|
multimodelweights = getWeights(m_numScoreComponents, true);
|
||||||
|
TargetPhraseCollection::shared_ptr ret;
|
||||||
|
|
||||||
std::map<std::string,multiModelStatistics*>* allStats = new(std::map<std::string,multiModelStatistics*>);
|
std::map<std::string, multiModelStats*>* allStats;
|
||||||
|
allStats = new(std::map<std::string,multiModelStats*>);
|
||||||
CollectSufficientStatistics(src, allStats);
|
CollectSufficientStatistics(src, allStats);
|
||||||
ret = CreateTargetPhraseCollectionLinearInterpolation(src, allStats, multimodelweights);
|
ret = CreateTargetPhraseCollectionLinearInterpolation(src, allStats, multimodelweights);
|
||||||
RemoveAllInMap(*allStats);
|
RemoveAllInMap(*allStats);
|
||||||
delete allStats;
|
delete allStats; // ??? Why the detour through malloc? UG
|
||||||
|
|
||||||
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
|
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
|
||||||
const_cast<PhraseDictionaryMultiModel*>(this)->CacheForCleanup(ret);
|
const_cast<PhraseDictionaryMultiModel*>(this)->CacheForCleanup(ret);
|
||||||
@ -107,16 +115,19 @@ const TargetPhraseCollection *PhraseDictionaryMultiModel::GetTargetPhraseCollect
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const
|
PhraseDictionaryMultiModel::
|
||||||
|
CollectSufficientStatistics
|
||||||
|
(const Phrase& src, std::map<std::string, multiModelStats*>* allStats) const
|
||||||
{
|
{
|
||||||
for(size_t i = 0; i < m_numModels; ++i) {
|
for(size_t i = 0; i < m_numModels; ++i) {
|
||||||
const PhraseDictionary &pd = *m_pd[i];
|
const PhraseDictionary &pd = *m_pd[i];
|
||||||
|
|
||||||
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollectionLEGACY( src);
|
TargetPhraseCollection::shared_ptr ret_raw;
|
||||||
|
ret_raw = pd.GetTargetPhraseCollectionLEGACY(src);
|
||||||
if (ret_raw != NULL) {
|
if (ret_raw != NULL) {
|
||||||
|
|
||||||
TargetPhraseCollection::iterator iterTargetPhrase, iterLast;
|
TargetPhraseCollection::const_iterator iterTargetPhrase, iterLast;
|
||||||
if (m_tableLimit != 0 && ret_raw->GetSize() > m_tableLimit) {
|
if (m_tableLimit != 0 && ret_raw->GetSize() > m_tableLimit) {
|
||||||
iterLast = ret_raw->begin() + m_tableLimit;
|
iterLast = ret_raw->begin() + m_tableLimit;
|
||||||
} else {
|
} else {
|
||||||
@ -130,7 +141,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
|
|||||||
std::string targetString = targetPhrase->GetStringRep(m_output);
|
std::string targetString = targetPhrase->GetStringRep(m_output);
|
||||||
if (allStats->find(targetString) == allStats->end()) {
|
if (allStats->find(targetString) == allStats->end()) {
|
||||||
|
|
||||||
multiModelStatistics * statistics = new multiModelStatistics;
|
multiModelStats * statistics = new multiModelStats;
|
||||||
statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
|
statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
|
||||||
statistics->p.resize(m_numScoreComponents);
|
statistics->p.resize(m_numScoreComponents);
|
||||||
for(size_t j = 0; j < m_numScoreComponents; ++j) {
|
for(size_t j = 0; j < m_numScoreComponents; ++j) {
|
||||||
@ -149,7 +160,7 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
|
|||||||
(*allStats)[targetString] = statistics;
|
(*allStats)[targetString] = statistics;
|
||||||
|
|
||||||
}
|
}
|
||||||
multiModelStatistics * statistics = (*allStats)[targetString];
|
multiModelStats * statistics = (*allStats)[targetString];
|
||||||
|
|
||||||
for(size_t j = 0; j < m_numScoreComponents; ++j) {
|
for(size_t j = 0; j < m_numScoreComponents; ++j) {
|
||||||
statistics->p[j][i] = UntransformScore(raw_scores[j]);
|
statistics->p[j][i] = UntransformScore(raw_scores[j]);
|
||||||
@ -161,12 +172,17 @@ void PhraseDictionaryMultiModel::CollectSufficientStatistics(const Phrase& src,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollectionLinearInterpolation(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const
|
TargetPhraseCollection::shared_ptr
|
||||||
|
PhraseDictionaryMultiModel::
|
||||||
|
CreateTargetPhraseCollectionLinearInterpolation
|
||||||
|
( const Phrase& src,
|
||||||
|
std::map<std::string,multiModelStats*>* allStats,
|
||||||
|
std::vector<std::vector<float> > &multimodelweights) const
|
||||||
{
|
{
|
||||||
TargetPhraseCollection *ret = new TargetPhraseCollection();
|
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
|
||||||
for ( std::map< std::string, multiModelStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
|
for ( std::map< std::string, multiModelStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
|
||||||
|
|
||||||
multiModelStatistics * statistics = iter->second;
|
multiModelStats * statistics = iter->second;
|
||||||
|
|
||||||
Scores scoreVector(m_numScoreComponents);
|
Scores scoreVector(m_numScoreComponents);
|
||||||
|
|
||||||
@ -188,7 +204,9 @@ TargetPhraseCollection* PhraseDictionaryMultiModel::CreateTargetPhraseCollection
|
|||||||
}
|
}
|
||||||
|
|
||||||
//TODO: is it worth caching the results as long as weights don't change?
|
//TODO: is it worth caching the results as long as weights don't change?
|
||||||
std::vector<std::vector<float> > PhraseDictionaryMultiModel::getWeights(size_t numWeights, bool normalize) const
|
std::vector<std::vector<float> >
|
||||||
|
PhraseDictionaryMultiModel::
|
||||||
|
getWeights(size_t numWeights, bool normalize) const
|
||||||
{
|
{
|
||||||
const std::vector<float>* weights_ptr;
|
const std::vector<float>* weights_ptr;
|
||||||
std::vector<float> raw_weights;
|
std::vector<float> raw_weights;
|
||||||
@ -237,7 +255,9 @@ std::vector<std::vector<float> > PhraseDictionaryMultiModel::getWeights(size_t n
|
|||||||
return multimodelweights;
|
return multimodelweights;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<float> PhraseDictionaryMultiModel::normalizeWeights(std::vector<float> &weights) const
|
std::vector<float>
|
||||||
|
PhraseDictionaryMultiModel::
|
||||||
|
normalizeWeights(std::vector<float> &weights) const
|
||||||
{
|
{
|
||||||
std::vector<float> ret (m_numModels);
|
std::vector<float> ret (m_numModels);
|
||||||
float total = std::accumulate(weights.begin(),weights.end(),0.0);
|
float total = std::accumulate(weights.begin(),weights.end(),0.0);
|
||||||
@ -248,29 +268,36 @@ std::vector<float> PhraseDictionaryMultiModel::normalizeWeights(std::vector<floa
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ChartRuleLookupManager *PhraseDictionaryMultiModel::CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&, std::size_t)
|
ChartRuleLookupManager *
|
||||||
|
PhraseDictionaryMultiModel::
|
||||||
|
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&,
|
||||||
|
std::size_t)
|
||||||
{
|
{
|
||||||
UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
|
UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
|
//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
|
||||||
void PhraseDictionaryMultiModel::CacheForCleanup(TargetPhraseCollection* tpc)
|
void
|
||||||
|
PhraseDictionaryMultiModel::
|
||||||
|
CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
|
||||||
{
|
{
|
||||||
PhraseCache &ref = GetPhraseCache();
|
GetPhraseCache().push_back(tpc);
|
||||||
ref.push_back(tpc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void PhraseDictionaryMultiModel::CleanUpAfterSentenceProcessing(const InputType &source)
|
void
|
||||||
|
PhraseDictionaryMultiModel::
|
||||||
|
CleanUpAfterSentenceProcessing(const InputType &source)
|
||||||
{
|
{
|
||||||
PhraseCache &ref = GetPhraseCache();
|
// PhraseCache &ref = GetPhraseCache();
|
||||||
for(PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
|
// for(PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
|
||||||
delete *it;
|
// it->reset();
|
||||||
}
|
// }
|
||||||
|
|
||||||
PhraseCache temp;
|
// PhraseCache temp;
|
||||||
temp.swap(ref);
|
// temp.swap(ref);
|
||||||
|
GetPhraseCache().clear();
|
||||||
|
|
||||||
CleanUpComponentModels(source);
|
CleanUpComponentModels(source);
|
||||||
|
|
||||||
@ -279,14 +306,18 @@ void PhraseDictionaryMultiModel::CleanUpAfterSentenceProcessing(const InputType
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void PhraseDictionaryMultiModel::CleanUpComponentModels(const InputType &source)
|
void
|
||||||
|
PhraseDictionaryMultiModel::
|
||||||
|
CleanUpComponentModels(const InputType &source)
|
||||||
{
|
{
|
||||||
for(size_t i = 0; i < m_numModels; ++i) {
|
for(size_t i = 0; i < m_numModels; ++i) {
|
||||||
m_pd[i]->CleanUpAfterSentenceProcessing(source);
|
m_pd[i]->CleanUpAfterSentenceProcessing(source);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<float>* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeightsVector() const
|
const std::vector<float>*
|
||||||
|
PhraseDictionaryMultiModel::
|
||||||
|
GetTemporaryMultiModelWeightsVector() const
|
||||||
{
|
{
|
||||||
#ifdef WITH_THREADS
|
#ifdef WITH_THREADS
|
||||||
boost::shared_lock<boost::shared_mutex> read_lock(m_lock_weights);
|
boost::shared_lock<boost::shared_mutex> read_lock(m_lock_weights);
|
||||||
@ -300,7 +331,9 @@ const std::vector<float>* PhraseDictionaryMultiModel::GetTemporaryMultiModelWeig
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector<float> weights)
|
void
|
||||||
|
PhraseDictionaryMultiModel::
|
||||||
|
SetTemporaryMultiModelWeightsVector(std::vector<float> weights)
|
||||||
{
|
{
|
||||||
#ifdef WITH_THREADS
|
#ifdef WITH_THREADS
|
||||||
boost::unique_lock<boost::shared_mutex> lock(m_lock_weights);
|
boost::unique_lock<boost::shared_mutex> lock(m_lock_weights);
|
||||||
@ -311,7 +344,9 @@ void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef WITH_DLIB
|
#ifdef WITH_DLIB
|
||||||
vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
|
vector<float>
|
||||||
|
PhraseDictionaryMultiModel::
|
||||||
|
MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
|
||||||
{
|
{
|
||||||
|
|
||||||
map<pair<string, string>, size_t> phrase_pair_map;
|
map<pair<string, string>, size_t> phrase_pair_map;
|
||||||
@ -320,7 +355,7 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
|
|||||||
phrase_pair_map[*iter] += 1;
|
phrase_pair_map[*iter] += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<multiModelStatisticsOptimization*> optimizerStats;
|
vector<multiModelStatsOptimization*> optimizerStats;
|
||||||
|
|
||||||
for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
|
for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
|
||||||
|
|
||||||
@ -329,7 +364,7 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
|
|||||||
string target_string = phrase_pair.second;
|
string target_string = phrase_pair.second;
|
||||||
|
|
||||||
vector<float> fs(m_numModels);
|
vector<float> fs(m_numModels);
|
||||||
map<string,multiModelStatistics*>* allStats = new(map<string,multiModelStatistics*>);
|
map<string,multiModelStats*>* allStats = new(map<string,multiModelStats*>);
|
||||||
|
|
||||||
Phrase sourcePhrase(0);
|
Phrase sourcePhrase(0);
|
||||||
sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
|
sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
|
||||||
@ -343,7 +378,7 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiModelStatisticsOptimization* targetStatistics = new multiModelStatisticsOptimization();
|
multiModelStatsOptimization* targetStatistics = new multiModelStatsOptimization();
|
||||||
targetStatistics->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
|
targetStatistics->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
|
||||||
targetStatistics->p = (*allStats)[target_string]->p;
|
targetStatistics->p = (*allStats)[target_string]->p;
|
||||||
targetStatistics->f = iter->second;
|
targetStatistics->f = iter->second;
|
||||||
@ -383,7 +418,9 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<float> PhraseDictionaryMultiModel::Optimize(OptimizationObjective *ObjectiveFunction, size_t numModels)
|
vector<float>
|
||||||
|
PhraseDictionaryMultiModel::
|
||||||
|
Optimize(OptimizationObjective *ObjectiveFunction, size_t numModels)
|
||||||
{
|
{
|
||||||
|
|
||||||
dlib::matrix<double,0,1> starting_point;
|
dlib::matrix<double,0,1> starting_point;
|
||||||
@ -428,8 +465,8 @@ double CrossEntropy::operator() ( const dlib::matrix<double,0,1>& arg) const
|
|||||||
weight_vector = m_model->normalizeWeights(weight_vector);
|
weight_vector = m_model->normalizeWeights(weight_vector);
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( std::vector<multiModelStatisticsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
|
for ( std::vector<multiModelStatsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
|
||||||
multiModelStatisticsOptimization* statistics = *iter;
|
multiModelStatsOptimization* statistics = *iter;
|
||||||
size_t f = statistics->f;
|
size_t f = statistics->f;
|
||||||
|
|
||||||
double score;
|
double score;
|
||||||
|
@ -36,15 +36,15 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
|
|
||||||
struct multiModelStatistics {
|
struct multiModelStats {
|
||||||
TargetPhrase *targetPhrase;
|
TargetPhrase *targetPhrase;
|
||||||
std::vector<std::vector<float> > p;
|
std::vector<std::vector<float> > p;
|
||||||
~multiModelStatistics() {
|
~multiModelStats() {
|
||||||
delete targetPhrase;
|
delete targetPhrase;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct multiModelStatisticsOptimization: multiModelStatistics {
|
struct multiModelStatsOptimization: multiModelStats {
|
||||||
size_t f;
|
size_t f;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -71,27 +71,59 @@ public:
|
|||||||
PhraseDictionaryMultiModel(int type, const std::string &line);
|
PhraseDictionaryMultiModel(int type, const std::string &line);
|
||||||
~PhraseDictionaryMultiModel();
|
~PhraseDictionaryMultiModel();
|
||||||
void Load();
|
void Load();
|
||||||
virtual void CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const;
|
|
||||||
virtual TargetPhraseCollection* CreateTargetPhraseCollectionLinearInterpolation(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
|
virtual void
|
||||||
std::vector<std::vector<float> > getWeights(size_t numWeights, bool normalize) const;
|
CollectSufficientStatistics
|
||||||
std::vector<float> normalizeWeights(std::vector<float> &weights) const;
|
(const Phrase& src, std::map<std::string,multiModelStats*>* allStats)
|
||||||
void CacheForCleanup(TargetPhraseCollection* tpc);
|
const;
|
||||||
void CleanUpAfterSentenceProcessing(const InputType &source);
|
|
||||||
virtual void CleanUpComponentModels(const InputType &source);
|
virtual TargetPhraseCollection::shared_ptr
|
||||||
|
CreateTargetPhraseCollectionLinearInterpolation
|
||||||
|
(const Phrase& src, std::map<std::string,multiModelStats*>* allStats,
|
||||||
|
std::vector<std::vector<float> > &multimodelweights) const;
|
||||||
|
|
||||||
|
std::vector<std::vector<float> >
|
||||||
|
getWeights(size_t numWeights, bool normalize) const;
|
||||||
|
|
||||||
|
std::vector<float>
|
||||||
|
normalizeWeights(std::vector<float> &weights) const;
|
||||||
|
|
||||||
|
void
|
||||||
|
CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
|
||||||
|
|
||||||
|
void
|
||||||
|
CleanUpAfterSentenceProcessing(const InputType &source);
|
||||||
|
|
||||||
|
virtual void
|
||||||
|
CleanUpComponentModels(const InputType &source);
|
||||||
|
|
||||||
#ifdef WITH_DLIB
|
#ifdef WITH_DLIB
|
||||||
virtual std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
|
virtual std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
|
||||||
std::vector<float> Optimize(OptimizationObjective * ObjectiveFunction, size_t numModels);
|
std::vector<float> Optimize(OptimizationObjective * ObjectiveFunction, size_t numModels);
|
||||||
#endif
|
#endif
|
||||||
// functions below required by base class
|
|
||||||
virtual const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
|
||||||
virtual void InitializeForInput(ttasksptr const& ttask) {
|
|
||||||
/* Don't do anything source specific here as this object is shared between threads.*/
|
|
||||||
}
|
|
||||||
ChartRuleLookupManager *CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&, std::size_t);
|
|
||||||
void SetParameter(const std::string& key, const std::string& value);
|
|
||||||
|
|
||||||
const std::vector<float>* GetTemporaryMultiModelWeightsVector() const;
|
// functions below required by base class
|
||||||
void SetTemporaryMultiModelWeightsVector(std::vector<float> weights);
|
virtual TargetPhraseCollection::shared_ptr
|
||||||
|
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||||
|
|
||||||
|
virtual void
|
||||||
|
InitializeForInput(ttasksptr const& ttask) {
|
||||||
|
// Don't do anything source specific here as this object is shared
|
||||||
|
// between threads.
|
||||||
|
}
|
||||||
|
|
||||||
|
ChartRuleLookupManager*
|
||||||
|
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&,
|
||||||
|
std::size_t);
|
||||||
|
|
||||||
|
void
|
||||||
|
SetParameter(const std::string& key, const std::string& value);
|
||||||
|
|
||||||
|
const std::vector<float>*
|
||||||
|
GetTemporaryMultiModelWeightsVector() const;
|
||||||
|
|
||||||
|
void
|
||||||
|
SetTemporaryMultiModelWeightsVector(std::vector<float> weights);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::string m_mode;
|
std::string m_mode;
|
||||||
@ -100,7 +132,7 @@ protected:
|
|||||||
size_t m_numModels;
|
size_t m_numModels;
|
||||||
std::vector<float> m_multimodelweights;
|
std::vector<float> m_multimodelweights;
|
||||||
|
|
||||||
typedef std::vector<TargetPhraseCollection*> PhraseCache;
|
typedef std::vector<TargetPhraseCollection::shared_ptr> PhraseCache;
|
||||||
#ifdef WITH_THREADS
|
#ifdef WITH_THREADS
|
||||||
boost::shared_mutex m_lock_cache;
|
boost::shared_mutex m_lock_cache;
|
||||||
typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
|
typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
|
||||||
@ -146,7 +178,7 @@ class CrossEntropy: public OptimizationObjective
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
CrossEntropy (
|
CrossEntropy (
|
||||||
std::vector<multiModelStatisticsOptimization*> &optimizerStats,
|
std::vector<multiModelStatsOptimization*> &optimizerStats,
|
||||||
PhraseDictionaryMultiModel * model,
|
PhraseDictionaryMultiModel * model,
|
||||||
size_t iFeature
|
size_t iFeature
|
||||||
) {
|
) {
|
||||||
@ -158,7 +190,7 @@ public:
|
|||||||
double operator() ( const dlib::matrix<double,0,1>& arg) const;
|
double operator() ( const dlib::matrix<double,0,1>& arg) const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::vector<multiModelStatisticsOptimization*> m_optimizerStats;
|
std::vector<multiModelStatsOptimization*> m_optimizerStats;
|
||||||
PhraseDictionaryMultiModel * m_model;
|
PhraseDictionaryMultiModel * m_model;
|
||||||
size_t m_iFeature;
|
size_t m_iFeature;
|
||||||
};
|
};
|
||||||
|
@ -120,7 +120,7 @@ void PhraseDictionaryMultiModelCounts::Load()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
TargetPhraseCollection::shared_ptr PhraseDictionaryMultiModelCounts::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||||
{
|
{
|
||||||
vector<vector<float> > multimodelweights;
|
vector<vector<float> > multimodelweights;
|
||||||
bool normalize;
|
bool normalize;
|
||||||
@ -130,11 +130,12 @@ const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseC
|
|||||||
//source phrase frequency is shared among all phrase pairs
|
//source phrase frequency is shared among all phrase pairs
|
||||||
vector<float> fs(m_numModels);
|
vector<float> fs(m_numModels);
|
||||||
|
|
||||||
map<string,multiModelCountsStatistics*>* allStats = new(map<string,multiModelCountsStatistics*>);
|
map<string,multiModelCountsStats*>* allStats = new(map<string,multiModelCountsStats*>);
|
||||||
|
|
||||||
CollectSufficientStatistics(src, fs, allStats);
|
CollectSufficientStats(src, fs, allStats);
|
||||||
|
|
||||||
TargetPhraseCollection *ret = CreateTargetPhraseCollectionCounts(src, fs, allStats, multimodelweights);
|
TargetPhraseCollection::shared_ptr ret
|
||||||
|
= CreateTargetPhraseCollectionCounts(src, fs, allStats, multimodelweights);
|
||||||
|
|
||||||
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
|
ret->NthElement(m_tableLimit); // sort the phrases for pruning later
|
||||||
const_cast<PhraseDictionaryMultiModelCounts*>(this)->CacheForCleanup(ret);
|
const_cast<PhraseDictionaryMultiModelCounts*>(this)->CacheForCleanup(ret);
|
||||||
@ -142,16 +143,17 @@ const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseC
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase& src, vector<float> &fs, map<string,multiModelCountsStatistics*>* allStats) const
|
void PhraseDictionaryMultiModelCounts::CollectSufficientStats(const Phrase& src, vector<float> &fs, map<string,multiModelCountsStats*>* allStats) const
|
||||||
//fill fs and allStats with statistics from models
|
//fill fs and allStats with statistics from models
|
||||||
{
|
{
|
||||||
for(size_t i = 0; i < m_numModels; ++i) {
|
for(size_t i = 0; i < m_numModels; ++i) {
|
||||||
const PhraseDictionary &pd = *m_pd[i];
|
const PhraseDictionary &pd = *m_pd[i];
|
||||||
|
|
||||||
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollectionLEGACY( src);
|
TargetPhraseCollection::shared_ptr ret_raw
|
||||||
|
= pd.GetTargetPhraseCollectionLEGACY(src);
|
||||||
if (ret_raw != NULL) {
|
if (ret_raw != NULL) {
|
||||||
|
|
||||||
TargetPhraseCollection::iterator iterTargetPhrase;
|
TargetPhraseCollection::const_iterator iterTargetPhrase;
|
||||||
for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != ret_raw->end(); ++iterTargetPhrase) {
|
for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != ret_raw->end(); ++iterTargetPhrase) {
|
||||||
|
|
||||||
const TargetPhrase * targetPhrase = *iterTargetPhrase;
|
const TargetPhrase * targetPhrase = *iterTargetPhrase;
|
||||||
@ -160,7 +162,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
|
|||||||
string targetString = targetPhrase->GetStringRep(m_output);
|
string targetString = targetPhrase->GetStringRep(m_output);
|
||||||
if (allStats->find(targetString) == allStats->end()) {
|
if (allStats->find(targetString) == allStats->end()) {
|
||||||
|
|
||||||
multiModelCountsStatistics * statistics = new multiModelCountsStatistics;
|
multiModelCountsStats * statistics = new multiModelCountsStats;
|
||||||
statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
|
statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
|
||||||
|
|
||||||
//correct future cost estimates and total score
|
//correct future cost estimates and total score
|
||||||
@ -178,7 +180,7 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
|
|||||||
(*allStats)[targetString] = statistics;
|
(*allStats)[targetString] = statistics;
|
||||||
|
|
||||||
}
|
}
|
||||||
multiModelCountsStatistics * statistics = (*allStats)[targetString];
|
multiModelCountsStats * statistics = (*allStats)[targetString];
|
||||||
|
|
||||||
statistics->fst[i] = UntransformScore(raw_scores[0]);
|
statistics->fst[i] = UntransformScore(raw_scores[0]);
|
||||||
statistics->ft[i] = UntransformScore(raw_scores[1]);
|
statistics->ft[i] = UntransformScore(raw_scores[1]);
|
||||||
@ -189,8 +191,8 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
|
|||||||
}
|
}
|
||||||
|
|
||||||
// get target phrase frequency for models which have not seen the phrase pair
|
// get target phrase frequency for models which have not seen the phrase pair
|
||||||
for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
|
for ( map< string, multiModelCountsStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
|
||||||
multiModelCountsStatistics * statistics = iter->second;
|
multiModelCountsStats * statistics = iter->second;
|
||||||
|
|
||||||
for (size_t i = 0; i < m_numModels; ++i) {
|
for (size_t i = 0; i < m_numModels; ++i) {
|
||||||
if (!statistics->ft[i]) {
|
if (!statistics->ft[i]) {
|
||||||
@ -200,12 +202,14 @@ void PhraseDictionaryMultiModelCounts::CollectSufficientStatistics(const Phrase&
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection* PhraseDictionaryMultiModelCounts::CreateTargetPhraseCollectionCounts(const Phrase &src, vector<float> &fs, map<string,multiModelCountsStatistics*>* allStats, vector<vector<float> > &multimodelweights) const
|
TargetPhraseCollection::shared_ptr
|
||||||
|
PhraseDictionaryMultiModelCounts::
|
||||||
|
CreateTargetPhraseCollectionCounts(const Phrase &src, vector<float> &fs, map<string,multiModelCountsStats*>* allStats, vector<vector<float> > &multimodelweights) const
|
||||||
{
|
{
|
||||||
TargetPhraseCollection *ret = new TargetPhraseCollection();
|
TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
|
||||||
for ( map< string, multiModelCountsStatistics*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
|
for ( map< string, multiModelCountsStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
|
||||||
|
|
||||||
multiModelCountsStatistics * statistics = iter->second;
|
multiModelCountsStats * statistics = iter->second;
|
||||||
|
|
||||||
if (statistics->targetPhrase->GetAlignTerm().GetSize() == 0) {
|
if (statistics->targetPhrase->GetAlignTerm().GetSize() == 0) {
|
||||||
UTIL_THROW(util::Exception, " alignment information empty\ncount-tables need to include alignment information for computation of lexical weights.\nUse --phrase-word-alignment during training; for on-disk tables, also set -alignment-info when creating on-disk tables.");
|
UTIL_THROW(util::Exception, " alignment information empty\ncount-tables need to include alignment information for computation of lexical weights.\nUse --phrase-word-alignment during training; for on-disk tables, also set -alignment-info when creating on-disk tables.");
|
||||||
@ -248,7 +252,7 @@ float PhraseDictionaryMultiModelCounts::GetTargetCount(const Phrase &target, siz
|
|||||||
{
|
{
|
||||||
|
|
||||||
const PhraseDictionary &pd = *m_inverse_pd[modelIndex];
|
const PhraseDictionary &pd = *m_inverse_pd[modelIndex];
|
||||||
const TargetPhraseCollection *ret_raw = pd.GetTargetPhraseCollectionLEGACY(target);
|
TargetPhraseCollection::shared_ptr ret_raw = pd.GetTargetPhraseCollectionLEGACY(target);
|
||||||
|
|
||||||
// in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score)
|
// in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score)
|
||||||
if (ret_raw && ret_raw->GetSize() > 0) {
|
if (ret_raw && ret_raw->GetSize() > 0) {
|
||||||
@ -320,7 +324,7 @@ double PhraseDictionaryMultiModelCounts::ComputeWeightedLexicalTranslation( cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
lexicalCache PhraseDictionaryMultiModelCounts::CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector<lexicalTable*> &tables, bool is_input )
|
lexicalCache PhraseDictionaryMultiModelCounts::CacheLexicalStats( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector<lexicalTable*> &tables, bool is_input )
|
||||||
{
|
{
|
||||||
//do all the necessary lexical table lookups and get counts, but don't apply weights yet
|
//do all the necessary lexical table lookups and get counts, but don't apply weights yet
|
||||||
|
|
||||||
@ -474,7 +478,7 @@ vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<s
|
|||||||
phrase_pair_map[*iter] += 1;
|
phrase_pair_map[*iter] += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<multiModelCountsStatisticsOptimization*> optimizerStats;
|
vector<multiModelCountsStatsOptimization*> optimizerStats;
|
||||||
|
|
||||||
for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
|
for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
|
||||||
|
|
||||||
@ -483,12 +487,12 @@ vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<s
|
|||||||
string target_string = phrase_pair.second;
|
string target_string = phrase_pair.second;
|
||||||
|
|
||||||
vector<float> fs(m_numModels);
|
vector<float> fs(m_numModels);
|
||||||
map<string,multiModelCountsStatistics*>* allStats = new(map<string,multiModelCountsStatistics*>);
|
map<string,multiModelCountsStats*>* allStats = new(map<string,multiModelCountsStats*>);
|
||||||
|
|
||||||
Phrase sourcePhrase(0);
|
Phrase sourcePhrase(0);
|
||||||
sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
|
sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
|
||||||
|
|
||||||
CollectSufficientStatistics(sourcePhrase, fs, allStats); //optimization potential: only call this once per source phrase
|
CollectSufficientStats(sourcePhrase, fs, allStats); //optimization potential: only call this once per source phrase
|
||||||
|
|
||||||
//phrase pair not found; leave cache empty
|
//phrase pair not found; leave cache empty
|
||||||
if (allStats->find(target_string) == allStats->end()) {
|
if (allStats->find(target_string) == allStats->end()) {
|
||||||
@ -497,19 +501,19 @@ vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<s
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiModelCountsStatisticsOptimization * targetStatistics = new multiModelCountsStatisticsOptimization();
|
multiModelCountsStatsOptimization * targetStats = new multiModelCountsStatsOptimization();
|
||||||
targetStatistics->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
|
targetStats->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
|
||||||
targetStatistics->fs = fs;
|
targetStats->fs = fs;
|
||||||
targetStatistics->fst = (*allStats)[target_string]->fst;
|
targetStats->fst = (*allStats)[target_string]->fst;
|
||||||
targetStatistics->ft = (*allStats)[target_string]->ft;
|
targetStats->ft = (*allStats)[target_string]->ft;
|
||||||
targetStatistics->f = iter->second;
|
targetStats->f = iter->second;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
pair<vector< set<size_t> >, vector< set<size_t> > > alignment = GetAlignmentsForLexWeights(sourcePhrase, static_cast<const Phrase&>(*targetStatistics->targetPhrase), targetStatistics->targetPhrase->GetAlignTerm());
|
pair<vector< set<size_t> >, vector< set<size_t> > > alignment = GetAlignmentsForLexWeights(sourcePhrase, static_cast<const Phrase&>(*targetStats->targetPhrase), targetStats->targetPhrase->GetAlignTerm());
|
||||||
targetStatistics->lexCachee2f = CacheLexicalStatistics(static_cast<const Phrase&>(*targetStatistics->targetPhrase), sourcePhrase, alignment.second, m_lexTable_e2f, false );
|
targetStats->lexCachee2f = CacheLexicalStats(static_cast<const Phrase&>(*targetStats->targetPhrase), sourcePhrase, alignment.second, m_lexTable_e2f, false );
|
||||||
targetStatistics->lexCachef2e = CacheLexicalStatistics(sourcePhrase, static_cast<const Phrase&>(*targetStatistics->targetPhrase), alignment.first, m_lexTable_f2e, true );
|
targetStats->lexCachef2e = CacheLexicalStats(sourcePhrase, static_cast<const Phrase&>(*targetStats->targetPhrase), alignment.first, m_lexTable_f2e, true );
|
||||||
|
|
||||||
optimizerStats.push_back(targetStatistics);
|
optimizerStats.push_back(targetStats);
|
||||||
} catch (AlignmentException& e) {}
|
} catch (AlignmentException& e) {}
|
||||||
|
|
||||||
RemoveAllInMap(*allStats);
|
RemoveAllInMap(*allStats);
|
||||||
@ -561,8 +565,8 @@ double CrossEntropyCounts::operator() ( const dlib::matrix<double,0,1>& arg) con
|
|||||||
weight_vector = m_model->normalizeWeights(weight_vector);
|
weight_vector = m_model->normalizeWeights(weight_vector);
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( std::vector<multiModelCountsStatisticsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
|
for ( std::vector<multiModelCountsStatsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
|
||||||
multiModelCountsStatisticsOptimization* statistics = *iter;
|
multiModelCountsStatsOptimization* statistics = *iter;
|
||||||
size_t f = statistics->f;
|
size_t f = statistics->f;
|
||||||
|
|
||||||
double score;
|
double score;
|
||||||
|
@ -37,11 +37,11 @@ typedef boost::unordered_map<Word, lexicalMap > lexicalMapJoint;
|
|||||||
typedef std::pair<std::vector<float>, std::vector<float> > lexicalPair;
|
typedef std::pair<std::vector<float>, std::vector<float> > lexicalPair;
|
||||||
typedef std::vector<std::vector<lexicalPair> > lexicalCache;
|
typedef std::vector<std::vector<lexicalPair> > lexicalCache;
|
||||||
|
|
||||||
struct multiModelCountsStatistics : multiModelStatistics {
|
struct multiModelCountsStats : multiModelStats {
|
||||||
std::vector<float> fst, ft;
|
std::vector<float> fst, ft;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct multiModelCountsStatisticsOptimization: multiModelCountsStatistics {
|
struct multiModelCountsStatsOptimization: multiModelCountsStats {
|
||||||
std::vector<float> fs;
|
std::vector<float> fs;
|
||||||
lexicalCache lexCachee2f, lexCachef2e;
|
lexicalCache lexCachee2f, lexCachef2e;
|
||||||
size_t f;
|
size_t f;
|
||||||
@ -80,18 +80,18 @@ public:
|
|||||||
PhraseDictionaryMultiModelCounts(const std::string &line);
|
PhraseDictionaryMultiModelCounts(const std::string &line);
|
||||||
~PhraseDictionaryMultiModelCounts();
|
~PhraseDictionaryMultiModelCounts();
|
||||||
void Load();
|
void Load();
|
||||||
TargetPhraseCollection* CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
|
TargetPhraseCollection::shared_ptr CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStats*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
|
||||||
void CollectSufficientStatistics(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStatistics*>* allStats) const;
|
void CollectSufficientStats(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStats*>* allStats) const;
|
||||||
float GetTargetCount(const Phrase& target, size_t modelIndex) const;
|
float GetTargetCount(const Phrase& target, size_t modelIndex) const;
|
||||||
double GetLexicalProbability( Word &inner, Word &outer, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights ) const;
|
double GetLexicalProbability( Word &inner, Word &outer, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights ) const;
|
||||||
double ComputeWeightedLexicalTranslation( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights, bool is_input ) const;
|
double ComputeWeightedLexicalTranslation( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights, bool is_input ) const;
|
||||||
double ComputeWeightedLexicalTranslationFromCache( std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > &cache, std::vector<float> &weights ) const;
|
double ComputeWeightedLexicalTranslationFromCache( std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > &cache, std::vector<float> &weights ) const;
|
||||||
std::pair<PhraseDictionaryMultiModelCounts::AlignVector,PhraseDictionaryMultiModelCounts::AlignVector> GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const;
|
std::pair<PhraseDictionaryMultiModelCounts::AlignVector,PhraseDictionaryMultiModelCounts::AlignVector> GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const;
|
||||||
std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, bool is_input );
|
std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > CacheLexicalStats( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, bool is_input );
|
||||||
void FillLexicalCountsJoint(Word &wordS, Word &wordT, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
|
void FillLexicalCountsJoint(Word &wordS, Word &wordT, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
|
||||||
void FillLexicalCountsMarginal(Word &wordS, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
|
void FillLexicalCountsMarginal(Word &wordS, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
|
||||||
void LoadLexicalTable( std::string &fileName, lexicalTable* ltable);
|
void LoadLexicalTable( std::string &fileName, lexicalTable* ltable);
|
||||||
const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||||
#ifdef WITH_DLIB
|
#ifdef WITH_DLIB
|
||||||
std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
|
std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
|
||||||
#endif
|
#endif
|
||||||
@ -117,7 +117,7 @@ class CrossEntropyCounts: public OptimizationObjective
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
CrossEntropyCounts (
|
CrossEntropyCounts (
|
||||||
std::vector<multiModelCountsStatisticsOptimization*> &optimizerStats,
|
std::vector<multiModelCountsStatsOptimization*> &optimizerStats,
|
||||||
PhraseDictionaryMultiModelCounts * model,
|
PhraseDictionaryMultiModelCounts * model,
|
||||||
size_t iFeature
|
size_t iFeature
|
||||||
) {
|
) {
|
||||||
@ -129,7 +129,7 @@ public:
|
|||||||
double operator() ( const dlib::matrix<double,0,1>& arg) const;
|
double operator() ( const dlib::matrix<double,0,1>& arg) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<multiModelCountsStatisticsOptimization*> m_optimizerStats;
|
std::vector<multiModelCountsStatsOptimization*> m_optimizerStats;
|
||||||
PhraseDictionaryMultiModelCounts * m_model;
|
PhraseDictionaryMultiModelCounts * m_model;
|
||||||
size_t m_iFeature;
|
size_t m_iFeature;
|
||||||
};
|
};
|
||||||
|
@ -39,7 +39,7 @@ void PhraseDictionaryNodeMemory::Prune(size_t tableLimit)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// prune TargetPhraseCollection in this node
|
// prune TargetPhraseCollection in this node
|
||||||
m_targetPhraseCollection.Prune(true, tableLimit);
|
m_targetPhraseCollection->Prune(true, tableLimit);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PhraseDictionaryNodeMemory::Sort(size_t tableLimit)
|
void PhraseDictionaryNodeMemory::Sort(size_t tableLimit)
|
||||||
@ -53,10 +53,11 @@ void PhraseDictionaryNodeMemory::Sort(size_t tableLimit)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// prune TargetPhraseCollection in this node
|
// prune TargetPhraseCollection in this node
|
||||||
m_targetPhraseCollection.Sort(true, tableLimit);
|
m_targetPhraseCollection->Sort(true, tableLimit);
|
||||||
}
|
}
|
||||||
|
|
||||||
PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateChild(const Word &sourceTerm)
|
PhraseDictionaryNodeMemory*
|
||||||
|
PhraseDictionaryNodeMemory::GetOrCreateChild(const Word &sourceTerm)
|
||||||
{
|
{
|
||||||
return &m_sourceTermMap[sourceTerm];
|
return &m_sourceTermMap[sourceTerm];
|
||||||
}
|
}
|
||||||
@ -118,7 +119,7 @@ void PhraseDictionaryNodeMemory::Remove()
|
|||||||
{
|
{
|
||||||
m_sourceTermMap.clear();
|
m_sourceTermMap.clear();
|
||||||
m_nonTermMap.clear();
|
m_nonTermMap.clear();
|
||||||
m_targetPhraseCollection.Remove();
|
m_targetPhraseCollection->Remove();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream &out, const PhraseDictionaryNodeMemory &node)
|
std::ostream& operator<<(std::ostream &out, const PhraseDictionaryNodeMemory &node)
|
||||||
|
@ -130,11 +130,12 @@ private:
|
|||||||
|
|
||||||
TerminalMap m_sourceTermMap;
|
TerminalMap m_sourceTermMap;
|
||||||
NonTerminalMap m_nonTermMap;
|
NonTerminalMap m_nonTermMap;
|
||||||
TargetPhraseCollection m_targetPhraseCollection;
|
TargetPhraseCollection::shared_ptr m_targetPhraseCollection;
|
||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
PhraseDictionaryNodeMemory() {}
|
PhraseDictionaryNodeMemory()
|
||||||
|
: m_targetPhraseCollection(new TargetPhraseCollection) { }
|
||||||
|
|
||||||
bool IsLeaf() const {
|
bool IsLeaf() const {
|
||||||
return m_sourceTermMap.empty() && m_nonTermMap.empty();
|
return m_sourceTermMap.empty() && m_nonTermMap.empty();
|
||||||
@ -152,10 +153,12 @@ public:
|
|||||||
const PhraseDictionaryNodeMemory *GetChild(const Word &sourceNonTerm, const Word &targetNonTerm) const;
|
const PhraseDictionaryNodeMemory *GetChild(const Word &sourceNonTerm, const Word &targetNonTerm) const;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
const TargetPhraseCollection &GetTargetPhraseCollection() const {
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetTargetPhraseCollection() const {
|
||||||
return m_targetPhraseCollection;
|
return m_targetPhraseCollection;
|
||||||
}
|
}
|
||||||
TargetPhraseCollection &GetTargetPhraseCollection() {
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetTargetPhraseCollection() {
|
||||||
return m_targetPhraseCollection;
|
return m_targetPhraseCollection;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,7 +54,9 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const Input
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &inputPath) const
|
void
|
||||||
|
PhraseDictionaryTransliteration::
|
||||||
|
GetTargetPhraseCollection(InputPath &inputPath) const
|
||||||
{
|
{
|
||||||
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
||||||
size_t hash = hash_value(sourcePhrase);
|
size_t hash = hash_value(sourcePhrase);
|
||||||
@ -66,7 +68,7 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input
|
|||||||
|
|
||||||
if (iter != cache.end()) {
|
if (iter != cache.end()) {
|
||||||
// already in cache
|
// already in cache
|
||||||
const TargetPhraseCollection *tpColl = iter->second.first;
|
TargetPhraseCollection::shared_ptr tpColl = iter->second.first;
|
||||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||||
} else {
|
} else {
|
||||||
// TRANSLITERATE
|
// TRANSLITERATE
|
||||||
@ -89,17 +91,15 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input
|
|||||||
int ret = system(cmd.c_str());
|
int ret = system(cmd.c_str());
|
||||||
UTIL_THROW_IF2(ret != 0, "Transliteration script error");
|
UTIL_THROW_IF2(ret != 0, "Transliteration script error");
|
||||||
|
|
||||||
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
|
TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection);
|
||||||
vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir.path());
|
vector<TargetPhrase*> targetPhrases
|
||||||
|
= CreateTargetPhrases(sourcePhrase, outDir.path());
|
||||||
vector<TargetPhrase*>::const_iterator iter;
|
vector<TargetPhrase*>::const_iterator iter;
|
||||||
for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
|
for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
|
||||||
TargetPhrase *tp = *iter;
|
TargetPhrase *tp = *iter;
|
||||||
tpColl->Add(tp);
|
tpColl->Add(tp);
|
||||||
}
|
}
|
||||||
|
cache[hash] = CacheCollEntry(tpColl, clock());
|
||||||
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
|
|
||||||
cache[hash] = value;
|
|
||||||
|
|
||||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -74,11 +74,10 @@ void PhraseDictionaryTreeAdaptor::CleanUpAfterSentenceProcessing(InputType const
|
|||||||
obj.CleanUp();
|
obj.CleanUp();
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection const*
|
TargetPhraseCollection::shared_ptr
|
||||||
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
|
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
|
||||||
{
|
{
|
||||||
const TargetPhraseCollection *ret = GetImplementation().GetTargetPhraseCollection(src);
|
return GetImplementation().GetTargetPhraseCollection(src);
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void PhraseDictionaryTreeAdaptor::EnableCache()
|
void PhraseDictionaryTreeAdaptor::EnableCache()
|
||||||
@ -107,16 +106,17 @@ const PDTAimp& PhraseDictionaryTreeAdaptor::GetImplementation() const
|
|||||||
}
|
}
|
||||||
|
|
||||||
// legacy
|
// legacy
|
||||||
const TargetPhraseCollectionWithSourcePhrase*
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||||
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const &range) const
|
PhraseDictionaryTreeAdaptor::
|
||||||
|
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const &range) const
|
||||||
{
|
{
|
||||||
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr ret;
|
||||||
if(GetImplementation().m_rangeCache.empty()) {
|
if(GetImplementation().m_rangeCache.empty()) {
|
||||||
const TargetPhraseCollectionWithSourcePhrase *tpColl = GetImplementation().GetTargetPhraseCollection(src.GetSubString(range));
|
ret = GetImplementation().GetTargetPhraseCollection(src.GetSubString(range));
|
||||||
return tpColl;
|
|
||||||
} else {
|
} else {
|
||||||
const TargetPhraseCollectionWithSourcePhrase *tpColl = GetImplementation().m_rangeCache[range.GetStartPos()][range.GetEndPos()];
|
ret = GetImplementation().m_rangeCache[range.GetStartPos()][range.GetEndPos()];
|
||||||
return tpColl;
|
|
||||||
}
|
}
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -59,7 +59,8 @@ public:
|
|||||||
|
|
||||||
// get translation candidates for a given source phrase
|
// get translation candidates for a given source phrase
|
||||||
// returns null pointer if nothing found
|
// returns null pointer if nothing found
|
||||||
TargetPhraseCollection const* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const;
|
||||||
|
|
||||||
void InitializeForInput(ttasksptr const& ttask);
|
void InitializeForInput(ttasksptr const& ttask);
|
||||||
void CleanUpAfterSentenceProcessing(InputType const& source);
|
void CleanUpAfterSentenceProcessing(InputType const& source);
|
||||||
@ -73,7 +74,9 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// legacy
|
// legacy
|
||||||
const TargetPhraseCollectionWithSourcePhrase *GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const & srcRange) const;
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
||||||
|
GetTargetPhraseCollectionLEGACY(InputType const& src,
|
||||||
|
WordsRange const & srcRange) const;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -79,11 +79,11 @@ void ProbingPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQue
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection *tpColl = CreateTargetPhrase(sourcePhrase);
|
TargetPhraseCollection::shared_ptr tpColl = CreateTargetPhrase(sourcePhrase);
|
||||||
|
|
||||||
// add target phrase to phrase-table cache
|
// add target phrase to phrase-table cache
|
||||||
size_t hash = hash_value(sourcePhrase);
|
size_t hash = hash_value(sourcePhrase);
|
||||||
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
|
std::pair<TargetPhraseCollection::shared_ptr , clock_t> value(tpColl, clock());
|
||||||
cache[hash] = value;
|
cache[hash] = value;
|
||||||
|
|
||||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||||
@ -109,7 +109,7 @@ std::vector<uint64_t> ProbingPT::ConvertToProbingSourcePhrase(const Phrase &sour
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
|
TargetPhraseCollection::shared_ptr ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
|
||||||
{
|
{
|
||||||
// create a target phrase from the 1st word of the source, prefix with 'ProbingPT:'
|
// create a target phrase from the 1st word of the source, prefix with 'ProbingPT:'
|
||||||
assert(sourcePhrase.GetSize());
|
assert(sourcePhrase.GetSize());
|
||||||
@ -124,7 +124,7 @@ TargetPhraseCollection *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase
|
|||||||
|
|
||||||
std::pair<bool, std::vector<target_text> > query_result;
|
std::pair<bool, std::vector<target_text> > query_result;
|
||||||
|
|
||||||
TargetPhraseCollection *tpColl = NULL;
|
TargetPhraseCollection::shared_ptr tpColl = NULL;
|
||||||
|
|
||||||
//Actual lookup
|
//Actual lookup
|
||||||
query_result = m_engine->query(probingSource);
|
query_result = m_engine->query(probingSource);
|
||||||
|
@ -49,12 +49,14 @@ protected:
|
|||||||
|
|
||||||
// Provide access to RuleTableTrie's private
|
// Provide access to RuleTableTrie's private
|
||||||
// GetOrCreateTargetPhraseCollection function.
|
// GetOrCreateTargetPhraseCollection function.
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
RuleTableTrie &ruleTable
|
GetOrCreateTargetPhraseCollection(RuleTableTrie &ruleTable,
|
||||||
, const Phrase &source
|
const Phrase &source,
|
||||||
, const TargetPhrase &target
|
const TargetPhrase &target,
|
||||||
, const Word *sourceLHS) {
|
const Word *sourceLHS)
|
||||||
return ruleTable.GetOrCreateTargetPhraseCollection(source, target, sourceLHS);
|
{
|
||||||
|
return ruleTable.GetOrCreateTargetPhraseCollection(source, target,
|
||||||
|
sourceLHS);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -224,9 +224,10 @@ bool RuleTableLoaderCompact::LoadRuleSection(
|
|||||||
targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());
|
targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());
|
||||||
|
|
||||||
// Insert rule into table.
|
// Insert rule into table.
|
||||||
TargetPhraseCollection &coll = GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr coll;
|
||||||
ruleTable, sourcePhrase, *targetPhrase, &sourceLHS);
|
coll = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase,
|
||||||
coll.Add(targetPhrase);
|
*targetPhrase, &sourceLHS);
|
||||||
|
coll->Add(targetPhrase);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -242,8 +242,10 @@ bool RuleTableLoaderStandard::Load(FormatType format
|
|||||||
targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector);
|
targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector);
|
||||||
targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());
|
targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());
|
||||||
|
|
||||||
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS);
|
TargetPhraseCollection::shared_ptr phraseColl
|
||||||
phraseColl.Add(targetPhrase);
|
= GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase,
|
||||||
|
*targetPhrase, sourceLHS);
|
||||||
|
phraseColl->Add(targetPhrase);
|
||||||
|
|
||||||
// not implemented correctly in memory pt. just delete it for now
|
// not implemented correctly in memory pt. just delete it for now
|
||||||
delete sourceLHS;
|
delete sourceLHS;
|
||||||
|
@ -282,8 +282,10 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(ttasksptr const& ttask)
|
|||||||
targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
|
targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
|
||||||
targetPhrase->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
|
targetPhrase->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
|
||||||
|
|
||||||
TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS);
|
TargetPhraseCollection::shared_ptr phraseColl
|
||||||
phraseColl.Add(targetPhrase);
|
= GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase,
|
||||||
|
*targetPhrase, sourceLHS);
|
||||||
|
phraseColl->Add(targetPhrase);
|
||||||
|
|
||||||
count++;
|
count++;
|
||||||
|
|
||||||
@ -301,7 +303,9 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(ttasksptr const& ttask)
|
|||||||
//removedirectoryrecursively(dirName);
|
//removedirectoryrecursively(dirName);
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection &PhraseDictionaryFuzzyMatch::GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
|
TargetPhraseCollection::shared_ptr
|
||||||
|
PhraseDictionaryFuzzyMatch::
|
||||||
|
GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
|
||||||
, const Phrase &source
|
, const Phrase &source
|
||||||
, const TargetPhrase &target
|
, const TargetPhrase &target
|
||||||
, const Word *sourceLHS)
|
, const Word *sourceLHS)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
Moses - statistical machine translation system
|
Moses - statistical machine translation system
|
||||||
Copyright (C) 2006-2011 University of Edinburgh
|
Copyright (C) 2006-2011 University of Edinburgh
|
||||||
@ -59,7 +60,8 @@ public:
|
|||||||
TO_STRING();
|
TO_STRING();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
|
||||||
, const Phrase &source
|
, const Phrase &source
|
||||||
, const TargetPhrase &target
|
, const TargetPhrase &target
|
||||||
, const Word *sourceLHS);
|
, const Word *sourceLHS);
|
||||||
|
@ -149,26 +149,26 @@ void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath
|
|||||||
lastWord.OnlyTheseFactors(m_inputFactors);
|
lastWord.OnlyTheseFactors(m_inputFactors);
|
||||||
OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord);
|
OnDiskPt::Word *lastWordOnDisk = wrapper.ConvertFromMoses(m_input, lastWord);
|
||||||
|
|
||||||
|
TargetPhraseCollection::shared_ptr tpc;
|
||||||
if (lastWordOnDisk == NULL) {
|
if (lastWordOnDisk == NULL) {
|
||||||
// OOV according to this phrase table. Not possible to extend
|
// OOV according to this phrase table. Not possible to extend
|
||||||
inputPath.SetTargetPhrases(*this, NULL, NULL);
|
inputPath.SetTargetPhrases(*this, tpc, NULL);
|
||||||
} else {
|
} else {
|
||||||
const OnDiskPt::PhraseNode *ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper);
|
OnDiskPt::PhraseNode const* ptNode;
|
||||||
if (ptNode) {
|
ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper);
|
||||||
const TargetPhraseCollection *targetPhrases = GetTargetPhraseCollection(ptNode);
|
if (ptNode) tpc = GetTargetPhraseCollection(ptNode);
|
||||||
inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
|
inputPath.SetTargetPhrases(*this, tpc, ptNode);
|
||||||
} else {
|
|
||||||
inputPath.SetTargetPhrases(*this, NULL, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
delete lastWordOnDisk;
|
delete lastWordOnDisk;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const
|
TargetPhraseCollection::shared_ptr
|
||||||
|
PhraseDictionaryOnDisk::
|
||||||
|
GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const
|
||||||
{
|
{
|
||||||
const TargetPhraseCollection *ret;
|
TargetPhraseCollection::shared_ptr ret;
|
||||||
|
|
||||||
CacheColl &cache = GetCache();
|
CacheColl &cache = GetCache();
|
||||||
size_t hash = (size_t) ptNode->GetFilePos();
|
size_t hash = (size_t) ptNode->GetFilePos();
|
||||||
@ -181,31 +181,34 @@ const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(
|
|||||||
// not in cache, need to look up from phrase table
|
// not in cache, need to look up from phrase table
|
||||||
ret = GetTargetPhraseCollectionNonCache(ptNode);
|
ret = GetTargetPhraseCollectionNonCache(ptNode);
|
||||||
|
|
||||||
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
|
std::pair<TargetPhraseCollection::shared_ptr , clock_t> value(ret, clock());
|
||||||
cache[hash] = value;
|
cache[hash] = value;
|
||||||
} else {
|
} else {
|
||||||
// in cache. just use it
|
// in cache. just use it
|
||||||
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
|
iter->second.second = clock();
|
||||||
value.second = clock();
|
ret = iter->second.first;
|
||||||
|
|
||||||
ret = value.first;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const
|
TargetPhraseCollection::shared_ptr
|
||||||
|
PhraseDictionaryOnDisk::
|
||||||
|
GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const
|
||||||
{
|
{
|
||||||
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
|
OnDiskPt::OnDiskWrapper& wrapper
|
||||||
|
= const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
|
||||||
|
|
||||||
vector<float> weightT = StaticData::Instance().GetWeights(this);
|
vector<float> weightT = StaticData::Instance().GetWeights(this);
|
||||||
OnDiskPt::Vocab &vocab = wrapper.GetVocab();
|
OnDiskPt::Vocab &vocab = wrapper.GetVocab();
|
||||||
|
|
||||||
const OnDiskPt::TargetPhraseCollection *targetPhrasesOnDisk = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
|
OnDiskPt::TargetPhraseCollection::shared_ptr targetPhrasesOnDisk
|
||||||
TargetPhraseCollection *targetPhrases
|
= ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
|
||||||
= targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false);
|
TargetPhraseCollection::shared_ptr targetPhrases
|
||||||
|
= targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this,
|
||||||
|
weightT, vocab, false);
|
||||||
|
|
||||||
delete targetPhrasesOnDisk;
|
// delete targetPhrasesOnDisk;
|
||||||
|
|
||||||
return targetPhrases;
|
return targetPhrases;
|
||||||
}
|
}
|
||||||
|
@ -78,8 +78,11 @@ public:
|
|||||||
virtual void InitializeForInput(ttasksptr const& ttask);
|
virtual void InitializeForInput(ttasksptr const& ttask);
|
||||||
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
||||||
|
|
||||||
const TargetPhraseCollection *GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const;
|
TargetPhraseCollection::shared_ptr
|
||||||
const TargetPhraseCollection *GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const;
|
GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const;
|
||||||
|
|
||||||
|
TargetPhraseCollection::shared_ptr
|
||||||
|
GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const;
|
||||||
|
|
||||||
void SetParameter(const std::string& key, const std::string& value);
|
void SetParameter(const std::string& key, const std::string& value);
|
||||||
|
|
||||||
|
@ -51,8 +51,9 @@ public:
|
|||||||
private:
|
private:
|
||||||
friend class RuleTableLoader;
|
friend class RuleTableLoader;
|
||||||
|
|
||||||
virtual TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
virtual TargetPhraseCollection::shared_ptr
|
||||||
const Phrase &source, const TargetPhrase &target,
|
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||||
|
const TargetPhrase &target,
|
||||||
const Word *sourceLHS) = 0;
|
const Word *sourceLHS) = 0;
|
||||||
|
|
||||||
virtual void SortAndPrune() = 0;
|
virtual void SortAndPrune() = 0;
|
||||||
|
@ -38,8 +38,11 @@
|
|||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
|
|
||||||
TargetPhraseCollection &RuleTableUTrie::GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS)
|
RuleTableUTrie::
|
||||||
|
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||||
|
const TargetPhrase &target,
|
||||||
|
const Word *sourceLHS)
|
||||||
{
|
{
|
||||||
UTrieNode &currNode = GetOrCreateNode(source, target, sourceLHS);
|
UTrieNode &currNode = GetOrCreateNode(source, target, sourceLHS);
|
||||||
return currNode.GetOrCreateTargetPhraseCollection(target);
|
return currNode.GetOrCreateTargetPhraseCollection(target);
|
||||||
|
@ -21,13 +21,13 @@
|
|||||||
|
|
||||||
#include "Trie.h"
|
#include "Trie.h"
|
||||||
#include "UTrieNode.h"
|
#include "UTrieNode.h"
|
||||||
|
#include "moses/TargetPhraseCollection.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
|
|
||||||
class Phrase;
|
class Phrase;
|
||||||
class TargetPhrase;
|
class TargetPhrase;
|
||||||
class TargetPhraseCollection;
|
|
||||||
class Word;
|
class Word;
|
||||||
class ChartParser;
|
class ChartParser;
|
||||||
|
|
||||||
@ -57,8 +57,10 @@ public:
|
|||||||
const ChartCellCollectionBase &, std::size_t);
|
const ChartCellCollectionBase &, std::size_t);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const Phrase &source, const TargetPhrase &target, const Word *sourceLHS);
|
GetOrCreateTargetPhraseCollection(const Phrase &source,
|
||||||
|
const TargetPhrase &target,
|
||||||
|
const Word *sourceLHS);
|
||||||
|
|
||||||
UTrieNode &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
|
UTrieNode &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
|
||||||
const Word *sourceLHS);
|
const Word *sourceLHS);
|
||||||
|
@ -49,7 +49,7 @@ void UTrieNode::Prune(size_t tableLimit)
|
|||||||
|
|
||||||
// Prune TargetPhraseCollections at this node.
|
// Prune TargetPhraseCollections at this node.
|
||||||
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
|
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
|
||||||
p->second.Prune(true, tableLimit);
|
p->second->Prune(true, tableLimit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -66,7 +66,7 @@ void UTrieNode::Sort(size_t tableLimit)
|
|||||||
|
|
||||||
// Sort TargetPhraseCollections at this node.
|
// Sort TargetPhraseCollections at this node.
|
||||||
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
|
for (LabelMap::iterator p = m_labelMap.begin(); p != m_labelMap.end(); ++p) {
|
||||||
p->second.Sort(true, tableLimit);
|
p->second->Sort(true, tableLimit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -89,8 +89,9 @@ UTrieNode *UTrieNode::GetOrCreateNonTerminalChild(const Word &targetNonTerm)
|
|||||||
return m_gapNode;
|
return m_gapNode;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection &UTrieNode::GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const TargetPhrase &target)
|
UTrieNode::
|
||||||
|
GetOrCreateTargetPhraseCollection(const TargetPhrase &target)
|
||||||
{
|
{
|
||||||
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
|
const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
|
||||||
const size_t rank = alignmentInfo.GetSize();
|
const size_t rank = alignmentInfo.GetSize();
|
||||||
@ -107,8 +108,9 @@ TargetPhraseCollection &UTrieNode::GetOrCreateTargetPhraseCollection(
|
|||||||
const Word &targetNonTerm = target.GetWord(targetNonTermIndex);
|
const Word &targetNonTerm = target.GetWord(targetNonTermIndex);
|
||||||
vec.push_back(InsertLabel(i++, targetNonTerm));
|
vec.push_back(InsertLabel(i++, targetNonTerm));
|
||||||
}
|
}
|
||||||
|
TargetPhraseCollection::shared_ptr& ret = m_labelMap[vec];
|
||||||
return m_labelMap[vec];
|
if (ret == NULL) ret.reset(new TargetPhraseCollection);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Moses
|
} // namespace Moses
|
||||||
|
@ -51,10 +51,10 @@ public:
|
|||||||
TerminalEqualityPred> TerminalMap;
|
TerminalEqualityPred> TerminalMap;
|
||||||
|
|
||||||
typedef boost::unordered_map<std::vector<int>,
|
typedef boost::unordered_map<std::vector<int>,
|
||||||
TargetPhraseCollection> LabelMap;
|
TargetPhraseCollection::shared_ptr> LabelMap;
|
||||||
#else
|
#else
|
||||||
typedef std::map<Word, UTrieNode> TerminalMap;
|
typedef std::map<Word, UTrieNode> TerminalMap;
|
||||||
typedef std::map<std::vector<int>, TargetPhraseCollection> LabelMap;
|
typedef std::map<std::vector<int>, TargetPhraseCollection::shared_ptr> LabelMap;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
~UTrieNode() {
|
~UTrieNode() {
|
||||||
@ -78,8 +78,8 @@ public:
|
|||||||
UTrieNode *GetOrCreateTerminalChild(const Word &sourceTerm);
|
UTrieNode *GetOrCreateTerminalChild(const Word &sourceTerm);
|
||||||
UTrieNode *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
|
UTrieNode *GetOrCreateNonTerminalChild(const Word &targetNonTerm);
|
||||||
|
|
||||||
TargetPhraseCollection &GetOrCreateTargetPhraseCollection(
|
TargetPhraseCollection::shared_ptr
|
||||||
const TargetPhrase &);
|
GetOrCreateTargetPhraseCollection(const TargetPhrase &);
|
||||||
|
|
||||||
bool IsLeaf() const {
|
bool IsLeaf() const {
|
||||||
return m_terminalMap.empty() && m_gapNode == NULL;
|
return m_terminalMap.empty() && m_gapNode == NULL;
|
||||||
|
@ -47,7 +47,8 @@ void Scope3Parser::GetChartRuleCollection(
|
|||||||
const size_t start = range.GetStartPos();
|
const size_t start = range.GetStartPos();
|
||||||
const size_t end = range.GetEndPos();
|
const size_t end = range.GetEndPos();
|
||||||
|
|
||||||
std::vector<std::pair<const UTrieNode *, const VarSpanNode *> > &pairVec = m_ruleApplications[start][end-start+1];
|
std::vector<std::pair<const UTrieNode *, const VarSpanNode *> > &pairVec
|
||||||
|
= m_ruleApplications[start][end-start+1];
|
||||||
|
|
||||||
MatchCallback matchCB(range, outColl);
|
MatchCallback matchCB(range, outColl);
|
||||||
for (std::vector<std::pair<const UTrieNode *, const VarSpanNode *> >::const_iterator p = pairVec.begin(); p != pairVec.end(); ++p) {
|
for (std::vector<std::pair<const UTrieNode *, const VarSpanNode *> >::const_iterator p = pairVec.begin(); p != pairVec.end(); ++p) {
|
||||||
@ -58,8 +59,8 @@ void Scope3Parser::GetChartRuleCollection(
|
|||||||
|
|
||||||
if (varSpanNode.m_rank == 0) { // Purely lexical rule.
|
if (varSpanNode.m_rank == 0) { // Purely lexical rule.
|
||||||
assert(labelMap.size() == 1);
|
assert(labelMap.size() == 1);
|
||||||
const TargetPhraseCollection &tpc = labelMap.begin()->second;
|
TargetPhraseCollection::shared_ptr tpc = labelMap.begin()->second;
|
||||||
matchCB.m_tpc = &tpc;
|
matchCB.m_tpc = tpc;
|
||||||
matchCB(m_emptyStackVec);
|
matchCB(m_emptyStackVec);
|
||||||
} else { // Rule has at least one non-terminal.
|
} else { // Rule has at least one non-terminal.
|
||||||
varSpanNode.CalculateRanges(start, end, m_ranges);
|
varSpanNode.CalculateRanges(start, end, m_ranges);
|
||||||
@ -70,7 +71,7 @@ void Scope3Parser::GetChartRuleCollection(
|
|||||||
UTrieNode::LabelMap::const_iterator p = labelMap.begin();
|
UTrieNode::LabelMap::const_iterator p = labelMap.begin();
|
||||||
for (; p != labelMap.end(); ++p) {
|
for (; p != labelMap.end(); ++p) {
|
||||||
const std::vector<int> &labels = p->first;
|
const std::vector<int> &labels = p->first;
|
||||||
const TargetPhraseCollection &tpc = p->second;
|
TargetPhraseCollection::shared_ptr tpc = p->second;
|
||||||
assert(labels.size() == varSpanNode.m_rank);
|
assert(labels.size() == varSpanNode.m_rank);
|
||||||
bool failCheck = false;
|
bool failCheck = false;
|
||||||
for (size_t i = 0; i < varSpanNode.m_rank; ++i) {
|
for (size_t i = 0; i < varSpanNode.m_rank; ++i) {
|
||||||
@ -82,7 +83,7 @@ void Scope3Parser::GetChartRuleCollection(
|
|||||||
if (failCheck) {
|
if (failCheck) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
matchCB.m_tpc = &tpc;
|
matchCB.m_tpc = tpc;
|
||||||
searcher.Search(labels, matchCB);
|
searcher.Search(labels, matchCB);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -66,17 +66,16 @@ private:
|
|||||||
// Define a callback type for use by StackLatticeSearcher.
|
// Define a callback type for use by StackLatticeSearcher.
|
||||||
struct MatchCallback {
|
struct MatchCallback {
|
||||||
public:
|
public:
|
||||||
MatchCallback(const WordsRange &range,
|
MatchCallback(const WordsRange &range, ChartParserCallback &out)
|
||||||
ChartParserCallback &out)
|
: m_range(range) , m_out(out) // , m_tpc(NULL)
|
||||||
: m_range(range)
|
{ }
|
||||||
, m_out(out)
|
|
||||||
, m_tpc(NULL) {}
|
|
||||||
void operator()(const StackVec &stackVec) {
|
void operator()(const StackVec &stackVec) {
|
||||||
m_out.Add(*m_tpc, stackVec, m_range);
|
m_out.Add(*m_tpc, stackVec, m_range);
|
||||||
}
|
}
|
||||||
const WordsRange &m_range;
|
const WordsRange &m_range;
|
||||||
ChartParserCallback &m_out;
|
ChartParserCallback &m_out;
|
||||||
const TargetPhraseCollection *m_tpc;
|
TargetPhraseCollection::shared_ptr m_tpc;
|
||||||
};
|
};
|
||||||
|
|
||||||
void Init();
|
void Init();
|
||||||
|
@ -32,12 +32,13 @@ void SkeletonPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQu
|
|||||||
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
const Phrase &sourcePhrase = inputPath.GetPhrase();
|
||||||
|
|
||||||
TargetPhrase *tp = CreateTargetPhrase(sourcePhrase);
|
TargetPhrase *tp = CreateTargetPhrase(sourcePhrase);
|
||||||
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
|
TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection);
|
||||||
tpColl->Add(tp);
|
tpColl->Add(tp);
|
||||||
|
|
||||||
// add target phrase to phrase-table cache
|
// add target phrase to phrase-table cache
|
||||||
size_t hash = hash_value(sourcePhrase);
|
size_t hash = hash_value(sourcePhrase);
|
||||||
std::pair<const TargetPhraseCollection*, clock_t> value(tpColl, clock());
|
std::pair<TargetPhraseCollection::shared_ptr, clock_t>
|
||||||
|
value(tpColl, clock());
|
||||||
cache[hash] = value;
|
cache[hash] = value;
|
||||||
|
|
||||||
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
inputPath.SetTargetPhrases(*this, tpColl, NULL);
|
||||||
|
@ -4,182 +4,52 @@ namespace Moses
|
|||||||
{
|
{
|
||||||
using std::vector;
|
using std::vector;
|
||||||
|
|
||||||
TPCollCache
|
TPCollCache::
|
||||||
::TPCollCache(size_t capacity)
|
TPCollCache(size_t capacity)
|
||||||
{
|
{
|
||||||
m_doomed_first = m_doomed_last = NULL;
|
m_qfirst = m_qlast = m_cache.end();
|
||||||
m_doomed_count = 0;
|
|
||||||
m_capacity = capacity;
|
m_capacity = capacity;
|
||||||
|
UTIL_THROW_IF2(m_capacity <= 2, "Cache capacity must be > 1!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SPTR<TPCollWrapper>
|
||||||
bool
|
TPCollCache::
|
||||||
sancheck(TPCollWrapper const* first, TPCollWrapper const* last, size_t count)
|
get(uint64_t key, size_t revision)
|
||||||
{
|
|
||||||
if (first == NULL)
|
|
||||||
{
|
|
||||||
UTIL_THROW_IF2(last != NULL || count != 0, "queue error");
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t s = 0;
|
|
||||||
for (TPCollWrapper const* x = first; x; x = x->next)
|
|
||||||
{
|
|
||||||
std::cerr << ++s << "/" << count << " "
|
|
||||||
<< first << " "
|
|
||||||
<< x->prev << " " << x << " " << x->next << " "
|
|
||||||
<< last << std::endl;
|
|
||||||
}
|
|
||||||
std::cerr << std::string(80,'-') << std::endl;
|
|
||||||
// while (x != last && s < count)
|
|
||||||
// {
|
|
||||||
// UTIL_THROW_IF2(x->next == NULL, "queue error");
|
|
||||||
// x = x->next;
|
|
||||||
// ++s;
|
|
||||||
// std::cerr << x << " " << s << "/" << count << std::endl;
|
|
||||||
// }
|
|
||||||
// std::cerr << x << " " << s << "/" << count << std::endl;
|
|
||||||
|
|
||||||
// UTIL_THROW_IF2(x != last, "queue error");
|
|
||||||
// UTIL_THROW_IF2(s != count, "queue error");
|
|
||||||
// x = last; s = 1;
|
|
||||||
// while (x != first && s++ < count)
|
|
||||||
// {
|
|
||||||
// UTIL_THROW_IF2(x->prev == NULL, "queue error");
|
|
||||||
// x = x->prev;
|
|
||||||
// }
|
|
||||||
// UTIL_THROW_IF2(x != first, "queue error");
|
|
||||||
// UTIL_THROW_IF2(s != count, "queue error");
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// remove a TPC from the "doomed" queue
|
|
||||||
void
|
|
||||||
TPCollCache
|
|
||||||
::remove_from_queue(TPCollWrapper* x)
|
|
||||||
{
|
|
||||||
// caller must lock!
|
|
||||||
|
|
||||||
if (m_doomed_first != x && x->prev == NULL)
|
|
||||||
{ // not in the queue
|
|
||||||
UTIL_THROW_IF2(x->next, "queue error");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
|
|
||||||
|
|
||||||
std::cerr << "Removing " << x << std::endl;
|
|
||||||
|
|
||||||
if (m_doomed_first == x)
|
|
||||||
m_doomed_first = x->next;
|
|
||||||
else x->prev->next = x->next;
|
|
||||||
|
|
||||||
if (m_doomed_last == x)
|
|
||||||
m_doomed_last = x->prev;
|
|
||||||
else x->next->prev = x->prev;
|
|
||||||
|
|
||||||
x->next = x->prev = NULL;
|
|
||||||
--m_doomed_count;
|
|
||||||
|
|
||||||
// sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
TPCollCache
|
|
||||||
::add_to_queue(TPCollWrapper* x)
|
|
||||||
{
|
|
||||||
// sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
|
|
||||||
|
|
||||||
// caller must lock!
|
|
||||||
x->prev = m_doomed_last;
|
|
||||||
|
|
||||||
if (!m_doomed_first)
|
|
||||||
m_doomed_first = x;
|
|
||||||
|
|
||||||
if (m_doomed_last) m_doomed_last->next = x;
|
|
||||||
m_doomed_last = x;
|
|
||||||
|
|
||||||
++m_doomed_count;
|
|
||||||
|
|
||||||
// sancheck(m_doomed_first, m_doomed_last, m_doomed_count);
|
|
||||||
}
|
|
||||||
|
|
||||||
TPCollWrapper*
|
|
||||||
TPCollCache
|
|
||||||
::get(uint64_t key, size_t revision)
|
|
||||||
{
|
{
|
||||||
using namespace boost;
|
using namespace boost;
|
||||||
upgrade_lock<shared_mutex> rlock(m_lock);
|
unique_lock<shared_mutex> lock(m_lock);
|
||||||
cache_t::iterator m = m_cache.find(key);
|
std::pair<uint64_t, SPTR<TPCollWrapper> > e(key, SPTR<TPCollWrapper>());
|
||||||
if (m == m_cache.end()) // new
|
|
||||||
{
|
|
||||||
std::pair<uint64_t,TPCollWrapper*> e(key,NULL);
|
|
||||||
upgrade_to_unique_lock<shared_mutex> wlock(rlock);
|
|
||||||
std::pair<cache_t::iterator, bool> foo = m_cache.insert(e);
|
std::pair<cache_t::iterator, bool> foo = m_cache.insert(e);
|
||||||
if (foo.second) foo.first->second = new TPCollWrapper(key, revision);
|
SPTR<TPCollWrapper>& ret = foo.first->second;
|
||||||
m = foo.first;
|
if (ret)
|
||||||
// ++m->second->refCount;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
if (m->second->refCount == 0)
|
if (m_qfirst == foo.first) m_qfirst = ret->next;
|
||||||
|
else ret->prev->second->next = ret->next;
|
||||||
|
if (m_qlast != foo.first)
|
||||||
|
ret->next->second->prev = ret->prev;
|
||||||
|
}
|
||||||
|
if (!ret || ret->revision != revision)
|
||||||
|
ret.reset(new TPCollWrapper(key,revision));
|
||||||
|
ret->prev = m_qlast;
|
||||||
|
if (m_qlast != m_cache.end()) m_qlast->second->next = foo.first;
|
||||||
|
m_qlast = foo.first;
|
||||||
|
|
||||||
|
while (m_cache.size() > m_capacity && m_qfirst->second.use_count() == 1)
|
||||||
{
|
{
|
||||||
upgrade_to_unique_lock<shared_mutex> wlock(rlock);
|
m_qfirst = m_qfirst->second->next;
|
||||||
remove_from_queue(m->second);
|
m_cache.erase(m_qfirst->second->prev);
|
||||||
}
|
}
|
||||||
if (m->second->revision != revision) // out of date
|
|
||||||
{
|
return ret;
|
||||||
upgrade_to_unique_lock<shared_mutex> wlock(rlock);
|
|
||||||
m->second = new TPCollWrapper(key, revision);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
++m->second->refCount;
|
|
||||||
return m->second;
|
|
||||||
} // TPCollCache::get(...)
|
} // TPCollCache::get(...)
|
||||||
|
|
||||||
void
|
|
||||||
TPCollCache
|
|
||||||
::release(TPCollWrapper const* ptr)
|
|
||||||
{
|
|
||||||
if (!ptr) return;
|
|
||||||
std::cerr << "Releasing " << ptr->key << " (" << ptr->refCount << ")" << std::endl;
|
|
||||||
if (--ptr->refCount == 0)
|
|
||||||
{
|
|
||||||
boost::unique_lock<boost::shared_mutex> lock(m_lock);
|
|
||||||
if (m_doomed_count == m_capacity)
|
|
||||||
{
|
|
||||||
TPCollWrapper* x = m_doomed_first;
|
|
||||||
remove_from_queue(x);
|
|
||||||
UTIL_THROW_IF2(x->refCount || x == ptr, "TPC was doomed while still in use!");
|
|
||||||
cache_t::iterator m = m_cache.find(ptr->key);
|
|
||||||
if (m != m_cache.end() && m->second == ptr)
|
|
||||||
{ // the cache could have been updated with a new pointer
|
|
||||||
// for the same phrase already, so we need to check
|
|
||||||
// if the pointer we cound is the one we want to get rid of,
|
|
||||||
// hence the second check
|
|
||||||
// boost::upgrade_to_unique_lock<boost::shared_mutex> xlock(lock);
|
|
||||||
m_cache.erase(m);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cerr << "Deleting " << x->key << " " << x->refCount << std::endl;
|
|
||||||
|
|
||||||
// delete x;
|
|
||||||
}
|
|
||||||
add_to_queue(const_cast<TPCollWrapper*>(ptr));
|
|
||||||
}
|
|
||||||
} // TPCollCache::release(...)
|
|
||||||
|
|
||||||
TPCollWrapper::
|
TPCollWrapper::
|
||||||
TPCollWrapper(uint64_t key_, size_t revision_)
|
TPCollWrapper(uint64_t key_, size_t revision_)
|
||||||
: refCount(0), prev(NULL), next(NULL)
|
: revision(revision_), key(key_)
|
||||||
, revision(revision_), key(key_)
|
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
TPCollWrapper::
|
TPCollWrapper::
|
||||||
~TPCollWrapper()
|
~TPCollWrapper()
|
||||||
{
|
{ }
|
||||||
UTIL_THROW_IF2(this->refCount, "TPCollWrapper refCount > 0!");
|
|
||||||
assert(this->refCount == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -3,60 +3,44 @@
|
|||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include "moses/TargetPhraseCollection.h"
|
#include "moses/TargetPhraseCollection.h"
|
||||||
#include <boost/atomic.hpp>
|
#include <boost/atomic.hpp>
|
||||||
|
#include "mm/ug_typedefs.h"
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
|
|
||||||
class TPCollCache;
|
class TPCollWrapper;
|
||||||
|
|
||||||
|
class TPCollCache
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef boost::unordered_map<uint64_t, SPTR<TPCollWrapper> > cache_t;
|
||||||
|
private:
|
||||||
|
uint32_t m_capacity; // capacity of cache
|
||||||
|
cache_t m_cache; // maps from ids to items
|
||||||
|
cache_t::iterator m_qfirst, m_qlast;
|
||||||
|
mutable boost::shared_mutex m_lock;
|
||||||
|
public:
|
||||||
|
TPCollCache(size_t capacity=10000);
|
||||||
|
|
||||||
|
SPTR<TPCollWrapper>
|
||||||
|
get(uint64_t key, size_t revision);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
class TPCollWrapper
|
|
||||||
// wrapper around TargetPhraseCollection with reference counting
|
// wrapper around TargetPhraseCollection with reference counting
|
||||||
// and additional members for caching purposes
|
// and additional members for caching purposes
|
||||||
|
class TPCollWrapper
|
||||||
: public TargetPhraseCollection
|
: public TargetPhraseCollection
|
||||||
{
|
{
|
||||||
friend class TPCollCache;
|
friend class TPCollCache;
|
||||||
friend class Mmsapt;
|
friend class Mmsapt;
|
||||||
mutable boost::atomic<uint32_t> refCount; // reference count
|
|
||||||
public:
|
public:
|
||||||
TPCollWrapper* prev; // ... in queue of TPCollWrappers used recently
|
TPCollCache::cache_t::iterator prev, next;
|
||||||
TPCollWrapper* next; // ... in queue of TPCollWrappers used recently
|
|
||||||
public:
|
public:
|
||||||
mutable boost::shared_mutex lock;
|
mutable boost::shared_mutex lock;
|
||||||
size_t const revision; // rev. No. of the underlying corpus
|
size_t const revision; // rev. No. of the underlying corpus
|
||||||
uint64_t const key; // phrase key
|
uint64_t const key; // phrase key
|
||||||
#if defined(timespec) // timespec is better, but not available everywhere
|
|
||||||
timespec tstamp; // last use
|
|
||||||
#else
|
|
||||||
timeval tstamp; // last use
|
|
||||||
#endif
|
|
||||||
TPCollWrapper(uint64_t const key, size_t const rev);
|
TPCollWrapper(uint64_t const key, size_t const rev);
|
||||||
~TPCollWrapper();
|
~TPCollWrapper();
|
||||||
};
|
};
|
||||||
|
|
||||||
class TPCollCache
|
|
||||||
{
|
|
||||||
typedef boost::unordered_map<uint64_t, TPCollWrapper*> cache_t;
|
|
||||||
typedef std::vector<TPCollWrapper*> history_t;
|
|
||||||
cache_t m_cache; // maps from phrase ids to target phrase collections
|
|
||||||
// mutable history_t m_history; // heap of live items, least recently used one on top
|
|
||||||
|
|
||||||
mutable boost::shared_mutex m_lock; // locks m_cache
|
|
||||||
|
|
||||||
TPCollWrapper* m_doomed_first;
|
|
||||||
TPCollWrapper* m_doomed_last;
|
|
||||||
uint32_t m_doomed_count; // counter of doomed TPCs
|
|
||||||
uint32_t m_capacity; // capacity of cache
|
|
||||||
void add_to_queue(TPCollWrapper* x);
|
|
||||||
void remove_from_queue(TPCollWrapper* x);
|
|
||||||
public:
|
|
||||||
TPCollCache(size_t capacity=10000);
|
|
||||||
|
|
||||||
TPCollWrapper*
|
|
||||||
get(uint64_t key, size_t revision);
|
|
||||||
|
|
||||||
void
|
|
||||||
release(TPCollWrapper const* tpc);
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -627,29 +627,31 @@ namespace Moses
|
|||||||
{
|
{
|
||||||
InputPath &inputPath = **iter;
|
InputPath &inputPath = **iter;
|
||||||
const Phrase &phrase = inputPath.GetPhrase();
|
const Phrase &phrase = inputPath.GetPhrase();
|
||||||
const TargetPhraseCollection *targetPhrases
|
TargetPhraseCollection::shared_ptr targetPhrases
|
||||||
= this->GetTargetPhraseCollectionLEGACY(ttask,phrase);
|
= this->GetTargetPhraseCollectionLEGACY(ttask,phrase);
|
||||||
inputPath.SetTargetPhrases(*this, targetPhrases, NULL);
|
inputPath.SetTargetPhrases(*this, targetPhrases, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseCollection const*
|
// TargetPhraseCollection::shared_ptr
|
||||||
Mmsapt::
|
// Mmsapt::
|
||||||
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
// GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||||
{
|
// {
|
||||||
UTIL_THROW2("Don't call me without the translation task.");
|
// UTIL_THROW2("Don't call me without the translation task.");
|
||||||
}
|
// }
|
||||||
|
|
||||||
// This is not the most efficient way of phrase lookup!
|
// This is not the most efficient way of phrase lookup!
|
||||||
TargetPhraseCollection const*
|
TargetPhraseCollection::shared_ptr
|
||||||
Mmsapt::
|
Mmsapt::
|
||||||
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const
|
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const
|
||||||
{
|
{
|
||||||
boost::unique_lock<boost::shared_mutex> xlock(m_lock);
|
SPTR<TPCollWrapper> ret;
|
||||||
|
// boost::unique_lock<boost::shared_mutex> xlock(m_lock);
|
||||||
|
|
||||||
// map from Moses Phrase to internal id sequence
|
// map from Moses Phrase to internal id sequence
|
||||||
vector<id_type> sphrase;
|
vector<id_type> sphrase;
|
||||||
fillIdSeq(src, m_ifactor, *(btfix->V1), sphrase);
|
fillIdSeq(src, m_ifactor, *(btfix->V1), sphrase);
|
||||||
if (sphrase.size() == 0) return NULL;
|
if (sphrase.size() == 0) return ret;
|
||||||
|
|
||||||
// Reserve a local copy of the dynamic bitext in its current form. /btdyn/
|
// Reserve a local copy of the dynamic bitext in its current form. /btdyn/
|
||||||
// is set to a new copy of the dynamic bitext every time a sentence pair
|
// is set to a new copy of the dynamic bitext every time a sentence pair
|
||||||
@ -665,42 +667,42 @@ namespace Moses
|
|||||||
// lookup phrases in both bitexts
|
// lookup phrases in both bitexts
|
||||||
TSA<Token>::tree_iterator mfix(btfix->I1.get(), &sphrase[0], sphrase.size());
|
TSA<Token>::tree_iterator mfix(btfix->I1.get(), &sphrase[0], sphrase.size());
|
||||||
TSA<Token>::tree_iterator mdyn(dyn->I1.get());
|
TSA<Token>::tree_iterator mdyn(dyn->I1.get());
|
||||||
if (dyn->I1.get())
|
if (dyn->I1.get()) // we have a dynamic bitext
|
||||||
for (size_t i = 0; mdyn.size() == i && i < sphrase.size(); ++i)
|
for (size_t i = 0; mdyn.size() == i && i < sphrase.size(); ++i)
|
||||||
mdyn.extend(sphrase[i]);
|
mdyn.extend(sphrase[i]);
|
||||||
|
|
||||||
if (mdyn.size() != sphrase.size() && mfix.size() != sphrase.size())
|
if (mdyn.size() != sphrase.size() && mfix.size() != sphrase.size())
|
||||||
return NULL; // phrase not found in either bitext
|
return ret; // phrase not found in either bitext
|
||||||
|
|
||||||
// do we have cached results for this phrase?
|
// do we have cached results for this phrase?
|
||||||
uint64_t phrasekey = (mfix.size() == sphrase.size()
|
uint64_t phrasekey = (mfix.size() == sphrase.size()
|
||||||
? (mfix.getPid()<<1) : (mdyn.getPid()<<1)+1);
|
? (mfix.getPid()<<1)
|
||||||
|
: (mdyn.getPid()<<1)+1);
|
||||||
// std::cerr << "Phrasekey is " << phrasekey << " at " << HERE << std::endl;
|
|
||||||
|
|
||||||
// get context-specific cache of items previously looked up
|
// get context-specific cache of items previously looked up
|
||||||
SPTR<ContextScope> const& scope = ttask->GetScope();
|
SPTR<ContextScope> const& scope = ttask->GetScope();
|
||||||
SPTR<TPCollCache> cache = scope->get<TPCollCache>(cache_key);
|
SPTR<TPCollCache> cache = scope->get<TPCollCache>(cache_key);
|
||||||
if (!cache) cache = m_cache;
|
if (!cache) cache = m_cache; // no context-specific cache, use global one
|
||||||
TPCollWrapper* ret = cache->get(phrasekey, dyn->revision());
|
|
||||||
// TO DO: we should revise the revision mechanism: we take the length
|
ret = cache->get(phrasekey, dyn->revision());
|
||||||
// of the dynamic bitext (in sentences) at the time the PT entry
|
// TO DO: we should revise the revision mechanism: we take the
|
||||||
// was stored as the time stamp. For each word in the
|
// length of the dynamic bitext (in sentences) at the time the PT
|
||||||
|
// entry was stored as the time stamp. For each word in the
|
||||||
// vocabulary, we also store its most recent occurrence in the
|
// vocabulary, we also store its most recent occurrence in the
|
||||||
// bitext. Only if the timestamp of each word in the phrase is
|
// bitext. Only if the timestamp of each word in the phrase is
|
||||||
// newer than the timestamp of the phrase itself we must update
|
// newer than the timestamp of the phrase itself we must update
|
||||||
// the entry.
|
// the entry.
|
||||||
|
|
||||||
// std::cerr << "Phrasekey is " << ret->key << " at " << HERE << std::endl;
|
// std::cerr << "Phrasekey is " << ret->key << " at " << HERE << std::endl;
|
||||||
std::cerr << ret << " with " << ret->refCount << " references at "
|
// std::cerr << ret << " with " << ret->refCount << " references at "
|
||||||
<< HERE << std::endl;
|
// << HERE << std::endl;
|
||||||
boost::upgrade_lock<boost::shared_mutex> rlock(ret->lock);
|
boost::upgrade_lock<boost::shared_mutex> rlock(ret->lock);
|
||||||
if (ret->GetSize()) return ret;
|
if (ret->GetSize()) return ret;
|
||||||
|
|
||||||
// new TPC (not found or old one was not up to date)
|
// new TPC (not found or old one was not up to date)
|
||||||
boost::upgrade_to_unique_lock<boost::shared_mutex> wlock(rlock);
|
boost::upgrade_to_unique_lock<boost::shared_mutex> wlock(rlock);
|
||||||
|
// maybe another thread did the work while we waited for the lock ?
|
||||||
if (ret->GetSize()) return ret;
|
if (ret->GetSize()) return ret;
|
||||||
// check again, another thread may have done the work already
|
|
||||||
|
|
||||||
// OK: pt entry NOT found or NOT up to date
|
// OK: pt entry NOT found or NOT up to date
|
||||||
// lookup and expansion could be done in parallel threads,
|
// lookup and expansion could be done in parallel threads,
|
||||||
@ -718,12 +720,16 @@ namespace Moses
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
BitextSampler<Token> s(btfix.get(), mfix, context->bias,
|
BitextSampler<Token> s(btfix.get(), mfix, context->bias,
|
||||||
m_min_sample_size, m_default_sample_size, m_sampling_method);
|
m_min_sample_size,
|
||||||
|
m_default_sample_size,
|
||||||
|
m_sampling_method);
|
||||||
s();
|
s();
|
||||||
sfix = s.stats();
|
sfix = s.stats();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (mdyn.size() == sphrase.size()) sdyn = dyn->lookup(ttask, mdyn);
|
|
||||||
|
if (mdyn.size() == sphrase.size())
|
||||||
|
sdyn = dyn->lookup(ttask, mdyn);
|
||||||
|
|
||||||
vector<PhrasePair<Token> > ppfix,ppdyn;
|
vector<PhrasePair<Token> > ppfix,ppdyn;
|
||||||
PhrasePair<Token>::SortByTargetIdSeq sort_by_tgt_id;
|
PhrasePair<Token>::SortByTargetIdSeq sort_by_tgt_id;
|
||||||
@ -737,6 +743,7 @@ namespace Moses
|
|||||||
expand(mdyn, *dyn, *sdyn, ppdyn, m_bias_log);
|
expand(mdyn, *dyn, *sdyn, ppdyn, m_bias_log);
|
||||||
sort(ppdyn.begin(), ppdyn.end(),sort_by_tgt_id);
|
sort(ppdyn.begin(), ppdyn.end(),sort_by_tgt_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
// now we have two lists of Phrase Pairs, let's merge them
|
// now we have two lists of Phrase Pairs, let's merge them
|
||||||
PhrasePair<Token>::SortByTargetIdSeq sorter;
|
PhrasePair<Token>::SortByTargetIdSeq sorter;
|
||||||
size_t i = 0; size_t k = 0;
|
size_t i = 0; size_t k = 0;
|
||||||
@ -939,9 +946,10 @@ namespace Moses
|
|||||||
return mdyn.size() == myphrase.size();
|
return mdyn.size() == myphrase.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
void
|
void
|
||||||
Mmsapt
|
Mmsapt
|
||||||
::Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const
|
::Release(ttasksptr const& ttask, TargetPhraseCollection::shared_ptr*& tpc) const
|
||||||
{
|
{
|
||||||
if (!tpc)
|
if (!tpc)
|
||||||
{
|
{
|
||||||
@ -957,6 +965,7 @@ namespace Moses
|
|||||||
if (cache) cache->release(static_cast<TPCollWrapper const*>(tpc));
|
if (cache) cache->release(static_cast<TPCollWrapper const*>(tpc));
|
||||||
tpc = NULL;
|
tpc = NULL;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
bool Mmsapt
|
bool Mmsapt
|
||||||
::ProvidesPrefixCheck() const { return true; }
|
::ProvidesPrefixCheck() const { return true; }
|
||||||
|
@ -179,7 +179,7 @@ namespace Moses
|
|||||||
uint64_t const pid1,
|
uint64_t const pid1,
|
||||||
sapt::pstats const& stats,
|
sapt::pstats const& stats,
|
||||||
sapt::Bitext<Token> const & bt,
|
sapt::Bitext<Token> const & bt,
|
||||||
TargetPhraseCollection* tpcoll
|
TargetPhraseCollection::shared_ptr tpcoll
|
||||||
) const;
|
) const;
|
||||||
|
|
||||||
bool
|
bool
|
||||||
@ -187,14 +187,14 @@ namespace Moses
|
|||||||
(Phrase const& src,
|
(Phrase const& src,
|
||||||
uint64_t const pid1a, sapt::pstats * statsa, sapt::Bitext<Token> const & bta,
|
uint64_t const pid1a, sapt::pstats * statsa, sapt::Bitext<Token> const & bta,
|
||||||
uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb,
|
uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb,
|
||||||
TargetPhraseCollection* tpcoll) const;
|
TargetPhraseCollection::shared_ptr tpcoll) const;
|
||||||
|
|
||||||
bool
|
bool
|
||||||
combine_pstats
|
combine_pstats
|
||||||
(Phrase const& src,
|
(Phrase const& src,
|
||||||
uint64_t const pid1a, sapt::pstats* statsa, sapt::Bitext<Token> const & bta,
|
uint64_t const pid1a, sapt::pstats* statsa, sapt::Bitext<Token> const & bta,
|
||||||
uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb,
|
uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb,
|
||||||
TargetPhraseCollection* tpcoll) const;
|
TargetPhraseCollection::shared_ptr tpcoll) const;
|
||||||
|
|
||||||
void load_extra_data(std::string bname, bool locking);
|
void load_extra_data(std::string bname, bool locking);
|
||||||
void load_bias(std::string bname);
|
void load_bias(std::string bname);
|
||||||
@ -209,15 +209,15 @@ namespace Moses
|
|||||||
std::string const& GetName() const;
|
std::string const& GetName() const;
|
||||||
|
|
||||||
#ifndef NO_MOSES
|
#ifndef NO_MOSES
|
||||||
TargetPhraseCollection const*
|
TargetPhraseCollection::shared_ptr
|
||||||
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const;
|
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const;
|
||||||
|
|
||||||
TargetPhraseCollection const*
|
// TargetPhraseCollection::shared_ptr
|
||||||
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
// GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||||
|
|
||||||
void
|
void
|
||||||
GetTargetPhraseCollectionBatch(ttasksptr const& ttask,
|
GetTargetPhraseCollectionBatch
|
||||||
const InputPathList &inputPathQueue) const;
|
(ttasksptr const& ttask, InputPathList const& inputPathQueue) const;
|
||||||
|
|
||||||
//! Create a sentence-specific manager for SCFG rule lookup.
|
//! Create a sentence-specific manager for SCFG rule lookup.
|
||||||
ChartRuleLookupManager*
|
ChartRuleLookupManager*
|
||||||
@ -234,7 +234,8 @@ namespace Moses
|
|||||||
void setWeights(std::vector<float> const& w);
|
void setWeights(std::vector<float> const& w);
|
||||||
|
|
||||||
|
|
||||||
void Release(ttasksptr const& ttask, TargetPhraseCollection const*& tpc) const;
|
// void Release(ttasksptr const& ttask,
|
||||||
|
// TargetPhraseCollection const*& tpc) const;
|
||||||
// some consumer lets me know that *tpc isn't needed any more
|
// some consumer lets me know that *tpc isn't needed any more
|
||||||
|
|
||||||
|
|
||||||
|
@ -80,7 +80,8 @@ int main(int argc, char* argv[])
|
|||||||
Phrase& p = *phrase;
|
Phrase& p = *phrase;
|
||||||
|
|
||||||
cout << p << endl;
|
cout << p << endl;
|
||||||
TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(ttask,p);
|
TargetPhraseCollection::shared_ptr trg
|
||||||
|
= PT->GetTargetPhraseCollectionLEGACY(ttask,p);
|
||||||
if (!trg) continue;
|
if (!trg) continue;
|
||||||
vector<size_t> order(trg->GetSize());
|
vector<size_t> order(trg->GetSize());
|
||||||
for (size_t i = 0; i < order.size(); ++i) order[i] = i;
|
for (size_t i = 0; i < order.size(); ++i) order[i] = i;
|
||||||
@ -118,7 +119,7 @@ int main(int argc, char* argv[])
|
|||||||
}
|
}
|
||||||
cout << endl;
|
cout << endl;
|
||||||
}
|
}
|
||||||
PT->Release(ttask, trg);
|
// PT->Release(ttask, trg);
|
||||||
}
|
}
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
@ -410,7 +410,7 @@ CreateTranslationOptionsForRange
|
|||||||
const DecodeStep &dstep = **d;
|
const DecodeStep &dstep = **d;
|
||||||
|
|
||||||
const PhraseDictionary &pdict = *dstep.GetPhraseDictionaryFeature();
|
const PhraseDictionary &pdict = *dstep.GetPhraseDictionaryFeature();
|
||||||
const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(pdict);
|
TargetPhraseCollection::shared_ptr targetPhrases = inputPath.GetTargetPhrases(pdict);
|
||||||
|
|
||||||
static_cast<const Tstep&>(dstep).ProcessInitialTranslation
|
static_cast<const Tstep&>(dstep).ProcessInitialTranslation
|
||||||
(m_source, *oldPtoc, sPos, ePos, adhereTableLimit, inputPath, targetPhrases);
|
(m_source, *oldPtoc, sPos, ePos, adhereTableLimit, inputPath, targetPhrases);
|
||||||
@ -431,7 +431,7 @@ CreateTranslationOptionsForRange
|
|||||||
TranslationOption &inputPartialTranslOpt = **pto;
|
TranslationOption &inputPartialTranslOpt = **pto;
|
||||||
if (const Tstep *tstep = dynamic_cast<const Tstep*>(dstep)) {
|
if (const Tstep *tstep = dynamic_cast<const Tstep*>(dstep)) {
|
||||||
const PhraseDictionary &pdict = *tstep->GetPhraseDictionaryFeature();
|
const PhraseDictionary &pdict = *tstep->GetPhraseDictionaryFeature();
|
||||||
const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(pdict);
|
TargetPhraseCollection::shared_ptr targetPhrases = inputPath.GetTargetPhrases(pdict);
|
||||||
tstep->Process(inputPartialTranslOpt, *dstep, *newPtoc,
|
tstep->Process(inputPartialTranslOpt, *dstep, *newPtoc,
|
||||||
this, adhereTableLimit, targetPhrases);
|
this, adhereTableLimit, targetPhrases);
|
||||||
} else {
|
} else {
|
||||||
|
@ -142,7 +142,8 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
|
|||||||
for (size_t i = 0; i < m_inputPathQueue.size(); ++i) {
|
for (size_t i = 0; i < m_inputPathQueue.size(); ++i) {
|
||||||
const InputPath &path = *m_inputPathQueue[i];
|
const InputPath &path = *m_inputPathQueue[i];
|
||||||
|
|
||||||
const TargetPhraseCollection *tpColl = path.GetTargetPhrases(phraseDictionary);
|
TargetPhraseCollection::shared_ptr tpColl
|
||||||
|
= path.GetTargetPhrases(phraseDictionary);
|
||||||
const WordsRange &range = path.GetWordsRange();
|
const WordsRange &range = path.GetWordsRange();
|
||||||
|
|
||||||
if (tpColl && tpColl->GetSize()) {
|
if (tpColl && tpColl->GetSize()) {
|
||||||
|
Loading…
Reference in New Issue
Block a user