From 8d9bf2405d76c73a7bfbd0ea304afd68ab0725dc Mon Sep 17 00:00:00 2001 From: Nicola Bertoldi Date: Sun, 16 Feb 2014 19:42:56 +0100 Subject: [PATCH] adding debugging to verify speed; not robust --- moses/FF/DynamicCacheBasedLanguageModel.cpp | 22 ++ moses/FF/DynamicCacheBasedLanguageModel.h | 5 +- moses/PDTAimp.h | 25 ++- moses/Sentence.cpp | 15 ++ .../PhraseDictionaryDynamicCacheBased.cpp | 200 +++++++++++++++++- .../PhraseDictionaryDynamicCacheBased.h | 11 +- moses/TranslationOptionCollection.cpp | 11 + 7 files changed, 281 insertions(+), 8 deletions(-) diff --git a/moses/FF/DynamicCacheBasedLanguageModel.cpp b/moses/FF/DynamicCacheBasedLanguageModel.cpp index 877a56927..5d58a16be 100644 --- a/moses/FF/DynamicCacheBasedLanguageModel.cpp +++ b/moses/FF/DynamicCacheBasedLanguageModel.cpp @@ -205,6 +205,28 @@ void DynamicCacheBasedLanguageModel::Update(std::vector words, int } } +void DynamicCacheBasedLanguageModel::ClearEntries(std::string &entries) +{ + if (entries != "") { + VERBOSE(3,"entries:|" << entries << "|" << std::endl); + std::vector elements = TokenizeMultiCharSeparator(entries, "||"); + VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); + ClearEntries(elements); + } +} + +void DynamicCacheBasedLanguageModel::ClearEntries(std::vector words) +{ +#ifdef WITH_THREADS + boost::shared_lock lock(m_cacheLock); +#endif + VERBOSE(3,"words.size():|" << words.size() << "|" << std::endl); + for (size_t j=0; j words, int age); + void ClearEntries(std::vector entries); + void Execute(std::vector commands); void Execute_Single_Command(std::string command); @@ -73,7 +75,6 @@ class DynamicCacheBasedLanguageModel : public StatelessFeatureFunction void Print() const; - void Clear(); protected: static DynamicCacheBasedLanguageModel *s_instance; @@ -98,7 +99,9 @@ public: void Execute(std::string command); void SetParameter(const std::string& key, const std::string& value); + void ClearEntries(std::string &entries); void Insert(std::string &entries); + void Clear(); virtual void Evaluate(const Phrase &source , const TargetPhrase &targetPhrase diff --git a/moses/PDTAimp.h b/moses/PDTAimp.h index 156612998..f9bf0adf8 100644 --- a/moses/PDTAimp.h +++ b/moses/PDTAimp.h @@ -131,15 +131,34 @@ public: TargetPhraseCollectionWithSourcePhrase const* GetTargetPhraseCollection(Phrase const &src) const { + VERBOSE(1,"PDTAimp::GetTargetPhraseCollection" << std::endl); + VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: src:|" << src << "|" << std::endl); + assert(m_dict); if(src.GetSize()==0) return 0; std::pair piter; if(useCache) { piter=m_cache.insert(std::make_pair(src,static_cast(0))); - if(!piter.second) return piter.first->second; + if(!piter.second){ + if (piter.first->second){ + VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: piter.first->second->GetSize():" << (piter.first->second)->GetSize() << std::endl); + }else{ + VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: piter.first->second->GetSize():" << 0 << std::endl); + } + return piter.first->second; + } } else if (m_cache.size()) { MapSrc2Tgt::const_iterator i=m_cache.find(src); + if (i!=m_cache.end()){ + if (i->second){ + VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: i->second->GetSize():" << (void*) (i->second) << std::endl); + }else{ + VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: i->second->GetSize():" << 0 << std::endl); + } + }else{ + VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: i->second->GetSize():" << 0 << std::endl); + } return (i!=m_cache.end() ? i->second : 0); } @@ -154,6 +173,7 @@ public: std::vector wacands; m_dict->GetTargetCandidates(srcString,cands,wacands); if(cands.empty()) { + VERBOSE(1,"PDTAimp::GetTargetPhraseCollection ret->GetSize():" << 0 << std::endl); return 0; } @@ -197,13 +217,16 @@ public: sourcePhrases.push_back(src); } + TargetPhraseCollectionWithSourcePhrase *rv; rv=PruneTargetCandidates(tCands,costs, sourcePhrases); if(rv->IsEmpty()) { delete rv; + VERBOSE(1,"PDTAimp::GetTargetPhraseCollection rv->GetSize():" << 0 << std::endl); return 0; } else { if(useCache) piter.first->second=rv; + VERBOSE(1,"PDTAimp::GetTargetPhraseCollection rv->GetSize():" << rv->GetSize() << std::endl); m_tgtColls.push_back(rv); return rv; } diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp index ebfe64a5e..f9d405e54 100644 --- a/moses/Sentence.cpp +++ b/moses/Sentence.cpp @@ -146,6 +146,15 @@ int Sentence::Read(std::istream& in,const std::vector& factorOrder) if ((*dlt_meta_it).find("cbtm-file") != (*dlt_meta_it).end()) { if (&cbtm) cbtm.Load((*dlt_meta_it)["cbtm-file"]); } + if ((*dlt_meta_it).find("cbtm-clear-source") != (*dlt_meta_it).end()) { + if (&cbtm) cbtm.ClearSource((*dlt_meta_it)["cbtm-clear-source"]); + } + if ((*dlt_meta_it).find("cbtm-clear-entries") != (*dlt_meta_it).end()) { + if (&cbtm) cbtm.ClearEntries((*dlt_meta_it)["cbtm-clear-entries"]); + } + if ((*dlt_meta_it).find("cbtm-clear-all") != (*dlt_meta_it).end()) { + if (&cbtm) cbtm.Clear(); + } if ((*dlt_meta_it).find("cblm") != (*dlt_meta_it).end()) { if (&cblm) cblm.Insert((*dlt_meta_it)["cblm"]); } @@ -155,6 +164,12 @@ int Sentence::Read(std::istream& in,const std::vector& factorOrder) if ((*dlt_meta_it).find("cblm-file") != (*dlt_meta_it).end()) { if (&cblm) cblm.Load((*dlt_meta_it)["cblm-file"]); } + if ((*dlt_meta_it).find("cblm-clear-entries") != (*dlt_meta_it).end()) { + if (&cblm) cblm.ClearEntries((*dlt_meta_it)["cblm-clear-entries"]); + } + if ((*dlt_meta_it).find("cblm-clear-all") != (*dlt_meta_it).end()) { + if (&cblm) cblm.Clear(); + } } // parse XML markup in translation line diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp index da2d9a5ad..9b9ad9f85 100644 --- a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp +++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp @@ -108,6 +108,8 @@ void PhraseDictionaryDynamicCacheBased::InitializeForInput(InputType const& sour const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const { + VERBOSE(1,"PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection" << std::endl); + VERBOSE(1,"PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection src:|" << source << "|" << std::endl); #ifdef WITH_THREADS boost::shared_lock read_lock(m_cacheLock); #endif @@ -116,7 +118,8 @@ const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhrase cacheMap::const_iterator it = m_cacheTM.find(source); if(it != m_cacheTM.end()) { VERBOSE(3,"source:|" << source << "| FOUND" << std::endl); - tpc = (it->second).first; +// tpc = (it->second).first; + tpc = new TargetPhraseCollection(*(it->second).first); std::vector::const_iterator it2 = tpc->begin(); @@ -129,11 +132,17 @@ const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhrase tpc->NthElement(m_tableLimit); // sort the phrases for the decoder } + if (tpc){ + VERBOSE(1,"PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY tpc->size():" << tpc->GetSize() << std::endl); + }else{ + VERBOSE(1,"PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY tpc->size():" << 0 << std::endl); + } return tpc; } const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const { + VERBOSE(1,"PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY" << std::endl); const TargetPhraseCollection *ret = GetTargetPhraseCollection(src); return ret; } @@ -251,6 +260,177 @@ Scores PhraseDictionaryDynamicCacheBased::GetPreComputedScores(const unsigned in } } +void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string &entries) +{ + if (entries != "") { + VERBOSE(3,"entries:|" << entries << "|" << std::endl); + std::vector elements = TokenizeMultiCharSeparator(entries, "||||"); + VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); + ClearEntries(elements); + } +} + +void PhraseDictionaryDynamicCacheBased::ClearEntries(std::vector entries) +{ + VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::vector entries)" << std::endl); + std::vector pp; + + std::vector::iterator it; + for(it = entries.begin(); it!=entries.end(); it++) { + pp.clear(); + pp = TokenizeMultiCharSeparator((*it), "|||"); + VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl); + VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl); + + ClearEntries(pp[0], pp[1]); + } +} + +void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString) +{ + VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)" << std::endl); + const StaticData &staticData = StaticData::Instance(); + const std::string& factorDelimiter = staticData.GetFactorDelimiter(); + Phrase sourcePhrase(0); + Phrase targetPhrase(0); + + //target + targetPhrase.Clear(); + VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl); + targetPhrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetPhraseString, factorDelimiter, NULL); + VERBOSE(2, "targetPhrase:|" << targetPhrase << "|" << std::endl); + + //TODO: Would be better to reuse source phrases, but ownership has to be + //consistent across phrase table implementations + sourcePhrase.Clear(); + VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl); + sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), sourcePhraseString, factorDelimiter, NULL); + VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); + ClearEntries(sourcePhrase, targetPhrase); + +} + +void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp) +{ + VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)" << std::endl); +#ifdef WITH_THREADS + boost::shared_lock lock(m_cacheLock); +#endif + VERBOSE(3, "PhraseDictionaryCache deleting sp:|" << sp << "| tp:|" << tp << "|" << std::endl); + + cacheMap::const_iterator it = m_cacheTM.find(sp); + VERBOSE(3,"sp:|" << sp << "|" << std::endl); + if(it!=m_cacheTM.end()) { + VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl); + // sp is found + // here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap + // and then add new entry + + TargetCollectionAgePair TgtCollAgePair = it->second; + TargetPhraseCollection* tpc = TgtCollAgePair.first; + AgeCollection* ac = TgtCollAgePair.second; + const Phrase* p_ptr = NULL; + TargetPhrase* tp_ptr = NULL; + bool found = false; + size_t tp_pos=0; + while (!found && tp_pos < tpc->GetSize()) { + tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos); + p_ptr = (const Phrase*) tp_ptr; + if (tp == *p_ptr) { + found = true; + continue; + } + tp_pos++; + } + if (!found) { + VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl); + //do nothing + } + else{ + VERBOSE(3,"tp:|" << tp << "| FOUND" << std::endl); + + tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection + ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection + m_entries--; + VERBOSE(3,"tpc size:|" << tpc->GetSize() << "|" << std::endl); + VERBOSE(3,"ac size:|" << ac->size() << "|" << std::endl); + VERBOSE(3,"tp:|" << tp << "| DELETED" << std::endl); + } + if (tpc->GetSize() == 0) { + // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection + ac->clear(); + delete tpc; + delete ac; + m_cacheTM.erase(sp); + } + + } else { + VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl); + //do nothing + } +} + + + + +void PhraseDictionaryDynamicCacheBased::ClearSource(std::string &entries) +{ + if (entries != "") { + VERBOSE(3,"entries:|" << entries << "|" << std::endl); + std::vector elements = TokenizeMultiCharSeparator(entries, "||||"); + VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); + ClearEntries(elements); + } +} + +void PhraseDictionaryDynamicCacheBased::ClearSource(std::vector entries) +{ + VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl); + const StaticData &staticData = StaticData::Instance(); + const std::string& factorDelimiter = staticData.GetFactorDelimiter(); + Phrase sourcePhrase(0); + + std::vector::iterator it; + for(it = entries.begin(); it!=entries.end(); it++) { + + sourcePhrase.Clear(); + VERBOSE(3, "sourcePhraseString:|" << (*it) << "|" << std::endl); + sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), *it, factorDelimiter, NULL); + VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); + + ClearSource(sourcePhrase); + } + + IFVERBOSE(2) Print(); +} + +void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp) +{ + VERBOSE(3,"sp:|" << sp << "|" << std::endl); + cacheMap::const_iterator it = m_cacheTM.find(sp); + VERBOSE(3,"searching:|" << sp << "|" << std::endl); + if (it != m_cacheTM.end()) { + VERBOSE(3,"found:|" << sp << "|" << std::endl); + //sp is found + + TargetCollectionAgePair TgtCollAgePair = it->second; + TargetPhraseCollection* tpc = TgtCollAgePair.first; + AgeCollection* ac = TgtCollAgePair.second; + + m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache + + // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection + ac->clear(); + delete tpc; + delete ac; + m_cacheTM.erase(sp); + } + else + { + //do nothing + } +} + void PhraseDictionaryDynamicCacheBased::Insert(std::string &entries) { if (entries != "") { @@ -323,19 +503,21 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, Phrase tp, int age) VERBOSE(3,"sp:|" << sp << "|" << std::endl); if(it!=m_cacheTM.end()) { VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl); - // p is found + // sp is found // here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap // and then add new entry TargetCollectionAgePair TgtCollAgePair = it->second; TargetPhraseCollection* tpc = TgtCollAgePair.first; AgeCollection* ac = TgtCollAgePair.second; - const Phrase* tp_ptr = NULL; + const Phrase* p_ptr = NULL; + TargetPhrase* tp_ptr = NULL; bool found = false; size_t tp_pos=0; while (!found && tp_pos < tpc->GetSize()) { - tp_ptr = (const Phrase*) tpc->GetTargetPhrase(tp_pos); - if (tp == *tp_ptr) { + tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos); + p_ptr = (const Phrase*) tp_ptr; + if (tp == *p_ptr) { found = true; continue; } @@ -354,6 +536,14 @@ void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, Phrase tp, int age) VERBOSE(3,"ac size:|" << ac->size() << "|" << std::endl); VERBOSE(3,"tp:|" << tp << "| INSERTED" << std::endl); } + else{ + VERBOSE(3,"tp:|" << tp << "| FOUND" << std::endl); + tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(age)); + ac->at(tp_pos) = age; + VERBOSE(3,"tpc size:|" << tpc->GetSize() << "|" << std::endl); + VERBOSE(3,"ac size:|" << ac->size() << "|" << std::endl); + VERBOSE(3,"tp:|" << tp << "| UPDATED" << std::endl); + } } else { VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl); // p is not found diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h index b9ef6c0d4..18f8aa1e3 100644 --- a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h +++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h @@ -101,7 +101,10 @@ public: // } void Print() const; // prints the cache + void Clear(); // clears the cache + void ClearEntries(std::string &entries); + void ClearSource(std::string &entries); void Insert(std::string &entries); void Execute(std::string command); @@ -120,11 +123,17 @@ protected: void Update(std::string sourceString, std::string targetString, std::string ageString); void Update(Phrase p, Phrase tp, int age); + void ClearEntries(std::vector entries); + void ClearEntries(std::string sourceString, std::string targetString); + void ClearEntries(Phrase p, Phrase tp); + + void ClearSource(std::vector entries); + void ClearSource(Phrase sp); + void Execute(std::vector commands); void Execute_Single_Command(std::string command); - void Clear(); // clears the cache void SetPreComputedScores(const unsigned int numScoreComponent); Scores GetPreComputedScores(const unsigned int age); diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp index db041e199..6cb9f8c13 100644 --- a/moses/TranslationOptionCollection.cpp +++ b/moses/TranslationOptionCollection.cpp @@ -384,24 +384,30 @@ void TranslationOptionCollection::CreateTranslationOptions() const DecodeGraph &decodeGraph = *decodeGraphList[graphInd]; size_t backoff = decodeGraph.GetBackoff(); // generate phrases that start at startPos ... + VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() graphInd:" << graphInd << endl); for (size_t startPos = 0 ; startPos < size; startPos++) { + VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() startPos:" << startPos << endl); size_t maxSize = size - startPos; // don't go over end of sentence size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); maxSize = std::min(maxSize, maxSizePhrase); // ... and that end at endPos for (size_t endPos = startPos ; endPos < startPos + maxSize ; endPos++) { + VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() endPos:" << endPos << endl); if (graphInd > 0 && // only skip subsequent graphs backoff != 0 && // use of backoff specified (endPos-startPos+1 >= backoff || // size exceeds backoff limit or ... m_collection[startPos][endPos-startPos].size() > 0)) { // no phrases found so far VERBOSE(3,"No backoff to graph " << graphInd << " for span [" << startPos << ";" << endPos << "]" << endl); // do not create more options + VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() continue:" << endl); continue; } // create translation options for that range + VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() before CreateTranslationOptionsForRange" << endl); CreateTranslationOptionsForRange( decodeGraph, startPos, endPos, true, graphInd); + VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() after CreateTranslationOptionsForRange" << endl); } } } @@ -432,6 +438,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange( , size_t graphInd , InputPath &inputPath) { +VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() START startPos:" << startPos << " endPos:" << endPos << endl); if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) { // partial trans opt stored in here @@ -445,10 +452,12 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange( const PhraseDictionary &phraseDictionary = *decodeStep.GetPhraseDictionaryFeature(); const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(phraseDictionary); + VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() before ProcessInitialTranslation" << endl); static_cast(decodeStep).ProcessInitialTranslation (m_source, *oldPtoc , startPos, endPos, adhereTableLimit , inputPath, targetPhrases); + VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() after ProcessInitialTranslation" << endl); SetInputScore(inputPath, *oldPtoc); @@ -511,9 +520,11 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange( // TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl); } // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) + VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() before CreateXmlOptionsForRange" << endl); if (graphInd == 0 && StaticData::Instance().GetXmlInputType() != XmlPassThrough && HasXmlOptionsOverlappingRange(startPos,endPos)) { CreateXmlOptionsForRange(startPos, endPos); } + VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() after CreateXmlOptionsForRange" << endl); } void TranslationOptionCollection::SetInputScore(const InputPath &inputPath, PartialTranslOptColl &oldPtoc)