adding debugging to verify speed; not robust

This commit is contained in:
Nicola Bertoldi 2014-02-16 19:42:56 +01:00
parent fd01180568
commit 8d9bf2405d
7 changed files with 281 additions and 8 deletions

View File

@ -205,6 +205,28 @@ void DynamicCacheBasedLanguageModel::Update(std::vector<std::string> words, int
}
}
void DynamicCacheBasedLanguageModel::ClearEntries(std::string &entries)
{
if (entries != "") {
VERBOSE(3,"entries:|" << entries << "|" << std::endl);
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||");
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
ClearEntries(elements);
}
}
void DynamicCacheBasedLanguageModel::ClearEntries(std::vector<std::string> words)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
VERBOSE(3,"words.size():|" << words.size() << "|" << std::endl);
for (size_t j=0; j<words.size(); j++) { words[j] = Trim(words[j]);
VERBOSE(3,"CacheBasedLanguageModel::ClearEntries word[" << j << "]:"<< words[j] << std::endl);
m_cache.erase(words[j]); //always erase the element (do nothing if the entry does not exist)
}
}
void DynamicCacheBasedLanguageModel::Insert(std::string &entries)
{
if (entries != "") {

View File

@ -61,6 +61,8 @@ class DynamicCacheBasedLanguageModel : public StatelessFeatureFunction
void Decay();
void Update(std::vector<std::string> words, int age);
void ClearEntries(std::vector<std::string> entries);
void Execute(std::vector<std::string> commands);
void Execute_Single_Command(std::string command);
@ -73,7 +75,6 @@ class DynamicCacheBasedLanguageModel : public StatelessFeatureFunction
void Print() const;
void Clear();
protected:
static DynamicCacheBasedLanguageModel *s_instance;
@ -98,7 +99,9 @@ public:
void Execute(std::string command);
void SetParameter(const std::string& key, const std::string& value);
void ClearEntries(std::string &entries);
void Insert(std::string &entries);
void Clear();
virtual void Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase

View File

@ -131,15 +131,34 @@ public:
TargetPhraseCollectionWithSourcePhrase const*
GetTargetPhraseCollection(Phrase const &src) const {
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection" << std::endl);
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: src:|" << src << "|" << std::endl);
assert(m_dict);
if(src.GetSize()==0) return 0;
std::pair<MapSrc2Tgt::iterator,bool> piter;
if(useCache) {
piter=m_cache.insert(std::make_pair(src,static_cast<TargetPhraseCollectionWithSourcePhrase const*>(0)));
if(!piter.second) return piter.first->second;
if(!piter.second){
if (piter.first->second){
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: piter.first->second->GetSize():" << (piter.first->second)->GetSize() << std::endl);
}else{
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: piter.first->second->GetSize():" << 0 << std::endl);
}
return piter.first->second;
}
} else if (m_cache.size()) {
MapSrc2Tgt::const_iterator i=m_cache.find(src);
if (i!=m_cache.end()){
if (i->second){
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: i->second->GetSize():" << (void*) (i->second) << std::endl);
}else{
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: i->second->GetSize():" << 0 << std::endl);
}
}else{
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: i->second->GetSize():" << 0 << std::endl);
}
return (i!=m_cache.end() ? i->second : 0);
}
@ -154,6 +173,7 @@ public:
std::vector<std::string> wacands;
m_dict->GetTargetCandidates(srcString,cands,wacands);
if(cands.empty()) {
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection ret->GetSize():" << 0 << std::endl);
return 0;
}
@ -197,13 +217,16 @@ public:
sourcePhrases.push_back(src);
}
TargetPhraseCollectionWithSourcePhrase *rv;
rv=PruneTargetCandidates(tCands,costs, sourcePhrases);
if(rv->IsEmpty()) {
delete rv;
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection rv->GetSize():" << 0 << std::endl);
return 0;
} else {
if(useCache) piter.first->second=rv;
VERBOSE(1,"PDTAimp::GetTargetPhraseCollection rv->GetSize():" << rv->GetSize() << std::endl);
m_tgtColls.push_back(rv);
return rv;
}

View File

@ -146,6 +146,15 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
if ((*dlt_meta_it).find("cbtm-file") != (*dlt_meta_it).end()) {
if (&cbtm) cbtm.Load((*dlt_meta_it)["cbtm-file"]);
}
if ((*dlt_meta_it).find("cbtm-clear-source") != (*dlt_meta_it).end()) {
if (&cbtm) cbtm.ClearSource((*dlt_meta_it)["cbtm-clear-source"]);
}
if ((*dlt_meta_it).find("cbtm-clear-entries") != (*dlt_meta_it).end()) {
if (&cbtm) cbtm.ClearEntries((*dlt_meta_it)["cbtm-clear-entries"]);
}
if ((*dlt_meta_it).find("cbtm-clear-all") != (*dlt_meta_it).end()) {
if (&cbtm) cbtm.Clear();
}
if ((*dlt_meta_it).find("cblm") != (*dlt_meta_it).end()) {
if (&cblm) cblm.Insert((*dlt_meta_it)["cblm"]);
}
@ -155,6 +164,12 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
if ((*dlt_meta_it).find("cblm-file") != (*dlt_meta_it).end()) {
if (&cblm) cblm.Load((*dlt_meta_it)["cblm-file"]);
}
if ((*dlt_meta_it).find("cblm-clear-entries") != (*dlt_meta_it).end()) {
if (&cblm) cblm.ClearEntries((*dlt_meta_it)["cblm-clear-entries"]);
}
if ((*dlt_meta_it).find("cblm-clear-all") != (*dlt_meta_it).end()) {
if (&cblm) cblm.Clear();
}
}
// parse XML markup in translation line

View File

@ -108,6 +108,8 @@ void PhraseDictionaryDynamicCacheBased::InitializeForInput(InputType const& sour
const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const
{
VERBOSE(1,"PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection" << std::endl);
VERBOSE(1,"PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection src:|" << source << "|" << std::endl);
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
@ -116,7 +118,8 @@ const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhrase
cacheMap::const_iterator it = m_cacheTM.find(source);
if(it != m_cacheTM.end()) {
VERBOSE(3,"source:|" << source << "| FOUND" << std::endl);
tpc = (it->second).first;
// tpc = (it->second).first;
tpc = new TargetPhraseCollection(*(it->second).first);
std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
@ -129,11 +132,17 @@ const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhrase
tpc->NthElement(m_tableLimit); // sort the phrases for the decoder
}
if (tpc){
VERBOSE(1,"PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY tpc->size():" << tpc->GetSize() << std::endl);
}else{
VERBOSE(1,"PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY tpc->size():" << 0 << std::endl);
}
return tpc;
}
const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
{
VERBOSE(1,"PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY" << std::endl);
const TargetPhraseCollection *ret = GetTargetPhraseCollection(src);
return ret;
}
@ -251,6 +260,177 @@ Scores PhraseDictionaryDynamicCacheBased::GetPreComputedScores(const unsigned in
}
}
void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string &entries)
{
if (entries != "") {
VERBOSE(3,"entries:|" << entries << "|" << std::endl);
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
ClearEntries(elements);
}
}
void PhraseDictionaryDynamicCacheBased::ClearEntries(std::vector<std::string> entries)
{
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::vector<std::string> entries)" << std::endl);
std::vector<std::string> pp;
std::vector<std::string>::iterator it;
for(it = entries.begin(); it!=entries.end(); it++) {
pp.clear();
pp = TokenizeMultiCharSeparator((*it), "|||");
VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl);
VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl);
ClearEntries(pp[0], pp[1]);
}
}
void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)
{
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)" << std::endl);
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
Phrase sourcePhrase(0);
Phrase targetPhrase(0);
//target
targetPhrase.Clear();
VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
targetPhrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetPhraseString, factorDelimiter, NULL);
VERBOSE(2, "targetPhrase:|" << targetPhrase << "|" << std::endl);
//TODO: Would be better to reuse source phrases, but ownership has to be
//consistent across phrase table implementations
sourcePhrase.Clear();
VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl);
sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), sourcePhraseString, factorDelimiter, NULL);
VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
ClearEntries(sourcePhrase, targetPhrase);
}
void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)
{
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)" << std::endl);
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
VERBOSE(3, "PhraseDictionaryCache deleting sp:|" << sp << "| tp:|" << tp << "|" << std::endl);
cacheMap::const_iterator it = m_cacheTM.find(sp);
VERBOSE(3,"sp:|" << sp << "|" << std::endl);
if(it!=m_cacheTM.end()) {
VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl);
// sp is found
// here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap
// and then add new entry
TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second;
const Phrase* p_ptr = NULL;
TargetPhrase* tp_ptr = NULL;
bool found = false;
size_t tp_pos=0;
while (!found && tp_pos < tpc->GetSize()) {
tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
p_ptr = (const Phrase*) tp_ptr;
if (tp == *p_ptr) {
found = true;
continue;
}
tp_pos++;
}
if (!found) {
VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl);
//do nothing
}
else{
VERBOSE(3,"tp:|" << tp << "| FOUND" << std::endl);
tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection
ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection
m_entries--;
VERBOSE(3,"tpc size:|" << tpc->GetSize() << "|" << std::endl);
VERBOSE(3,"ac size:|" << ac->size() << "|" << std::endl);
VERBOSE(3,"tp:|" << tp << "| DELETED" << std::endl);
}
if (tpc->GetSize() == 0) {
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
ac->clear();
delete tpc;
delete ac;
m_cacheTM.erase(sp);
}
} else {
VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
//do nothing
}
}
void PhraseDictionaryDynamicCacheBased::ClearSource(std::string &entries)
{
if (entries != "") {
VERBOSE(3,"entries:|" << entries << "|" << std::endl);
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
ClearEntries(elements);
}
}
void PhraseDictionaryDynamicCacheBased::ClearSource(std::vector<std::string> entries)
{
VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
Phrase sourcePhrase(0);
std::vector<std::string>::iterator it;
for(it = entries.begin(); it!=entries.end(); it++) {
sourcePhrase.Clear();
VERBOSE(3, "sourcePhraseString:|" << (*it) << "|" << std::endl);
sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), *it, factorDelimiter, NULL);
VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
ClearSource(sourcePhrase);
}
IFVERBOSE(2) Print();
}
void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp)
{
VERBOSE(3,"sp:|" << sp << "|" << std::endl);
cacheMap::const_iterator it = m_cacheTM.find(sp);
VERBOSE(3,"searching:|" << sp << "|" << std::endl);
if (it != m_cacheTM.end()) {
VERBOSE(3,"found:|" << sp << "|" << std::endl);
//sp is found
TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second;
m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
ac->clear();
delete tpc;
delete ac;
m_cacheTM.erase(sp);
}
else
{
//do nothing
}
}
void PhraseDictionaryDynamicCacheBased::Insert(std::string &entries)
{
if (entries != "") {
@ -323,19 +503,21 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, Phrase tp, int age)
VERBOSE(3,"sp:|" << sp << "|" << std::endl);
if(it!=m_cacheTM.end()) {
VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl);
// p is found
// sp is found
// here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap
// and then add new entry
TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second;
const Phrase* tp_ptr = NULL;
const Phrase* p_ptr = NULL;
TargetPhrase* tp_ptr = NULL;
bool found = false;
size_t tp_pos=0;
while (!found && tp_pos < tpc->GetSize()) {
tp_ptr = (const Phrase*) tpc->GetTargetPhrase(tp_pos);
if (tp == *tp_ptr) {
tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
p_ptr = (const Phrase*) tp_ptr;
if (tp == *p_ptr) {
found = true;
continue;
}
@ -354,6 +536,14 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, Phrase tp, int age)
VERBOSE(3,"ac size:|" << ac->size() << "|" << std::endl);
VERBOSE(3,"tp:|" << tp << "| INSERTED" << std::endl);
}
else{
VERBOSE(3,"tp:|" << tp << "| FOUND" << std::endl);
tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
ac->at(tp_pos) = age;
VERBOSE(3,"tpc size:|" << tpc->GetSize() << "|" << std::endl);
VERBOSE(3,"ac size:|" << ac->size() << "|" << std::endl);
VERBOSE(3,"tp:|" << tp << "| UPDATED" << std::endl);
}
} else {
VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
// p is not found

View File

@ -101,7 +101,10 @@ public:
// }
void Print() const; // prints the cache
void Clear(); // clears the cache
void ClearEntries(std::string &entries);
void ClearSource(std::string &entries);
void Insert(std::string &entries);
void Execute(std::string command);
@ -120,11 +123,17 @@ protected:
void Update(std::string sourceString, std::string targetString, std::string ageString);
void Update(Phrase p, Phrase tp, int age);
void ClearEntries(std::vector<std::string> entries);
void ClearEntries(std::string sourceString, std::string targetString);
void ClearEntries(Phrase p, Phrase tp);
void ClearSource(std::vector<std::string> entries);
void ClearSource(Phrase sp);
void Execute(std::vector<std::string> commands);
void Execute_Single_Command(std::string command);
void Clear(); // clears the cache
void SetPreComputedScores(const unsigned int numScoreComponent);
Scores GetPreComputedScores(const unsigned int age);

View File

@ -384,24 +384,30 @@ void TranslationOptionCollection::CreateTranslationOptions()
const DecodeGraph &decodeGraph = *decodeGraphList[graphInd];
size_t backoff = decodeGraph.GetBackoff();
// generate phrases that start at startPos ...
VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() graphInd:" << graphInd << endl);
for (size_t startPos = 0 ; startPos < size; startPos++) {
VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() startPos:" << startPos << endl);
size_t maxSize = size - startPos; // don't go over end of sentence
size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
maxSize = std::min(maxSize, maxSizePhrase);
// ... and that end at endPos
for (size_t endPos = startPos ; endPos < startPos + maxSize ; endPos++) {
VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() endPos:" << endPos << endl);
if (graphInd > 0 && // only skip subsequent graphs
backoff != 0 && // use of backoff specified
(endPos-startPos+1 >= backoff || // size exceeds backoff limit or ...
m_collection[startPos][endPos-startPos].size() > 0)) { // no phrases found so far
VERBOSE(3,"No backoff to graph " << graphInd << " for span [" << startPos << ";" << endPos << "]" << endl);
// do not create more options
VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() continue:" << endl);
continue;
}
// create translation options for that range
VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() before CreateTranslationOptionsForRange" << endl);
CreateTranslationOptionsForRange( decodeGraph, startPos, endPos, true, graphInd);
VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() after CreateTranslationOptionsForRange" << endl);
}
}
}
@ -432,6 +438,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
, size_t graphInd
, InputPath &inputPath)
{
VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() START startPos:" << startPos << " endPos:" << endPos << endl);
if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) {
// partial trans opt stored in here
@ -445,10 +452,12 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
const PhraseDictionary &phraseDictionary = *decodeStep.GetPhraseDictionaryFeature();
const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(phraseDictionary);
VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() before ProcessInitialTranslation" << endl);
static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslation
(m_source, *oldPtoc
, startPos, endPos, adhereTableLimit
, inputPath, targetPhrases);
VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() after ProcessInitialTranslation" << endl);
SetInputScore(inputPath, *oldPtoc);
@ -511,9 +520,11 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
// TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);
} // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))
VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() before CreateXmlOptionsForRange" << endl);
if (graphInd == 0 && StaticData::Instance().GetXmlInputType() != XmlPassThrough && HasXmlOptionsOverlappingRange(startPos,endPos)) {
CreateXmlOptionsForRange(startPos, endPos);
}
VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() after CreateXmlOptionsForRange" << endl);
}
void TranslationOptionCollection::SetInputScore(const InputPath &inputPath, PartialTranslOptColl &oldPtoc)