diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index 048981d04..3d9be2fa3 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -10,6 +10,7 @@ #include "moses/TranslationModel/PhraseDictionaryScope3.h" #include "moses/TranslationModel/PhraseDictionaryTransliteration.h" #include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h" +#include "moses/TranslationModel/PhraseDictionaryCache.h" #include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h" #include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h" @@ -234,6 +235,7 @@ FeatureRegistry::FeatureRegistry() // MOSES_FNAME(PhraseDictionaryDynSuffixArray); MOSES_FNAME(PhraseDictionaryTransliteration); MOSES_FNAME(PhraseDictionaryDynamicCacheBased); + MOSES_FNAME(PhraseDictionaryCache); MOSES_FNAME(PhraseDictionaryFuzzyMatch); MOSES_FNAME(ProbingPT); MOSES_FNAME(PhraseDictionaryMemoryPerSentence); diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp index fc9ec6480..ada728919 100644 --- a/moses/Parameter.cpp +++ b/moses/Parameter.cpp @@ -793,6 +793,9 @@ ConvertWeightArgsPhraseModel(const string &oldWeightName) case 15: // DCacheBased: ptType = "PhraseDictionaryDynamicCacheBased"; break; + case 16: // CachePT: + ptType = "PhraseDictionaryCache"; + break; default: break; } diff --git a/moses/TranslationModel/PhraseDictionaryCache.cpp b/moses/TranslationModel/PhraseDictionaryCache.cpp new file mode 100644 index 000000000..b3f8a4a7a --- /dev/null +++ b/moses/TranslationModel/PhraseDictionaryCache.cpp @@ -0,0 +1,582 @@ +// vim:tabstop=2 + +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2006 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ +#include "util/exception.hh" + +#include "moses/TranslationModel/PhraseDictionary.h" +#include "moses/TranslationModel/PhraseDictionaryCache.h" +#include "moses/FactorCollection.h" +#include "moses/InputFileStream.h" +#include "moses/StaticData.h" +#include "moses/TargetPhrase.h" + + +using namespace std; + +namespace Moses +{ +std::map< const std::string, PhraseDictionaryCache * > PhraseDictionaryCache::s_instance_map; +PhraseDictionaryCache *PhraseDictionaryCache::s_instance = NULL; + +//! contructor +PhraseDictionaryCache::PhraseDictionaryCache(const std::string &line) + : PhraseDictionary(line, true) +{ + std::cerr << "Initializing PhraseDictionaryCache feature..." << std::endl; + + //disabling internal cache (provided by PhraseDictionary) for translation options (third parameter set to 0) + m_maxCacheSize = 0; + + m_entries = 0; + m_name = "default"; + m_constant = false; + + ReadParameters(); + + UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 PhraseDictionaryCache feature named " + m_name + " is allowed"); + s_instance_map[m_name] = this; + s_instance = this; //for back compatibility + vector weight = StaticData::Instance().GetWeights(this); + m_numscorecomponent = weight.size(); + m_sentences=0; +} + +PhraseDictionaryCache::~PhraseDictionaryCache() +{ + Clear(); +} + +void PhraseDictionaryCache::SetParameter(const std::string& key, const std::string& value) +{ + VERBOSE(2, "PhraseDictionaryCache::SetParameter key:|" << key << "| value:|" << value << "|" << std::endl); + + if (key == "cache-name") { + m_name = Scan(value); + } else if (key == "input-factor") { + m_inputFactorsVec = Tokenize(value,","); + } else if (key == "output-factor") { + m_outputFactorsVec = Tokenize(value,","); + } else { + PhraseDictionary::SetParameter(key, value); + } +} + +void PhraseDictionaryCache::InitializeForInput(ttasksptr const& ttask) +{ + long tID = ttask->GetSource()->GetTranslationId(); + TargetPhraseCollection::shared_ptr tpc; + if (m_cacheTM.find(tID) == m_cacheTM.end()) return; + for(cacheMap::const_iterator it=m_cacheTM.at(tID).begin(); it != m_cacheTM.at(tID).end(); it++) { + std::cerr<<"Source : "<first<second).first)); + std::cerr<<"TPC size : " << tpc->GetSize() << std::endl; + std::vector::const_iterator it2 = tpc->begin(); + + while (it2 != tpc->end()) { + ((TargetPhrase*) *it2)->EvaluateInIsolation(it->first, GetFeaturesToApply()); + std::cerr<< "Target Phrase : "<<**it2 << std::endl; + it2++; + } + } + if (tpc) { + tpc->NthElement(m_tableLimit); // sort the phrases for the decoder + } +} + +void PhraseDictionaryCache::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const +{ + InputPathList::const_iterator iter; + for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) { + InputPath &inputPath = **iter; + long tID = inputPath.ttask->GetSource()->GetTranslationId(); + if (m_cacheTM.find(tID) == m_cacheTM.end()) continue; + TargetPhraseCollection::shared_ptr tpc; + for(cacheMap::const_iterator it=m_cacheTM.at(tID).begin(); it != m_cacheTM.at(tID).end(); it++) { + tpc.reset(new TargetPhraseCollection(*(it->second).first)); + inputPath.SetTargetPhrases(*this, tpc, NULL); + } + } +} + +TargetPhraseCollection::shared_ptr PhraseDictionaryCache::GetTargetPhraseCollection(const Phrase &source, long tID) const +{ +#ifdef WITH_THREADS + boost::shared_lock read_lock(m_cacheLock); +#endif + TargetPhraseCollection::shared_ptr tpc; + + if(m_cacheTM.find(tID) == m_cacheTM.end()) return tpc; + + cacheMap::const_iterator it = m_cacheTM.at(tID).find(source); + if(it != m_cacheTM.at(tID).end()) { + tpc.reset(new TargetPhraseCollection(*(it->second).first)); + + std::vector::const_iterator it2 = tpc->begin(); + + while (it2 != tpc->end()) { + ((TargetPhrase*) *it2)->EvaluateInIsolation(source, GetFeaturesToApply()); + it2++; + } + } + if (tpc) { + tpc->NthElement(m_tableLimit); // sort the phrases for the decoder + } + + return tpc; +} + +ChartRuleLookupManager* PhraseDictionaryCache::CreateRuleLookupManager(const ChartParser &parser, const ChartCellCollectionBase &cellCollection, std::size_t /*maxChartSpan*/) +{ + UTIL_THROW(util::Exception, "Not implemented for Chart Decoder"); +} + +// friend +ostream& operator<<(ostream& out, const PhraseDictionaryCache& phraseDict) +{ + return out; +} + +void PhraseDictionaryCache::ClearEntries(std::string &entries, long tID) +{ + if (entries != "" && m_cacheTM.find(tID) != m_cacheTM.end()) { + VERBOSE(3,"entries:|" << entries << "|" << std::endl); + std::vector elements = TokenizeMultiCharSeparator(entries, "||||"); + VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); + ClearEntries(elements, tID); + } +} + +void PhraseDictionaryCache::ClearEntries(std::vector entries, long tID) +{ + VERBOSE(3,"PhraseDictionaryCache::ClearEntries(std::vector entries)" << std::endl); + std::vector pp; + + std::vector::iterator it; + for(it = entries.begin(); it!=entries.end(); it++) { + pp.clear(); + pp = TokenizeMultiCharSeparator((*it), "|||"); + VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl); + VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl); + + ClearEntries(pp[0], pp[1], tID); + } +} + +void PhraseDictionaryCache::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString, long tID) +{ + VERBOSE(3,"PhraseDictionaryCache::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)" << std::endl); + const StaticData &staticData = StaticData::Instance(); + Phrase sourcePhrase(0); + Phrase targetPhrase(0); + + //target + targetPhrase.Clear(); + VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl); + targetPhrase.CreateFromString(Output, m_outputFactorsVec, + targetPhraseString, /*factorDelimiter,*/ NULL); + VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl); + + //TODO: Would be better to reuse source phrases, but ownership has to be + //consistent across phrase table implementations + sourcePhrase.Clear(); + VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl); + sourcePhrase.CreateFromString(Input, m_inputFactorsVec, + sourcePhraseString, /*factorDelimiter,*/ NULL); + VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); + ClearEntries(sourcePhrase, targetPhrase, tID); + +} + +void PhraseDictionaryCache::ClearEntries(Phrase sp, Phrase tp, long tID) +{ + VERBOSE(3,"PhraseDictionaryCache::ClearEntries(Phrase sp, Phrase tp)" << std::endl); +#ifdef WITH_THREADS + boost::shared_lock lock(m_cacheLock); +#endif + VERBOSE(3, "PhraseDictionaryCache deleting sp:|" << sp << "| tp:|" << tp << "|" << std::endl); + + cacheMap::const_iterator it = m_cacheTM.at(tID).find(sp); + VERBOSE(3,"sp:|" << sp << "|" << std::endl); + if(it!=m_cacheTM.at(tID).end()) { + VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl); + // sp is found + + TargetCollectionPair TgtCollPair = it->second; + TargetPhraseCollection::shared_ptr tpc = TgtCollPair.first; + Scores* sc = TgtCollPair.second; + const Phrase* p_ptr = NULL; + TargetPhrase* tp_ptr = NULL; + bool found = false; + size_t tp_pos=0; + while (!found && tp_pos < tpc->GetSize()) { + tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos); + p_ptr = (const Phrase*) tp_ptr; + if (tp == *p_ptr) { + found = true; + continue; + } + tp_pos++; + } + if (!found) { + VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl); + //do nothing + } else { + VERBOSE(3,"tp:|" << tp << "| FOUND" << std::endl); + + tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection +// sc->clear(); + // no need to delete scores here + m_entries--; + VERBOSE(3,"tpc size:|" << tpc->GetSize() << "|" << std::endl); + VERBOSE(3,"sc size:|" << sc->size() << "|" << std::endl); + VERBOSE(3,"tp:|" << tp << "| DELETED" << std::endl); + } + if (tpc->GetSize() == 0) { + sc->clear(); + tpc.reset(); + delete sc; + m_cacheTM.at(tID).erase(sp); + } + + } else { + VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl); + //do nothing + } +} + +void PhraseDictionaryCache::ClearSource(std::string &entries, long tID) +{ + if (entries != "" && m_cacheTM.find(tID) != m_cacheTM.end()) { + VERBOSE(3,"entries:|" << entries << "|" << std::endl); + std::vector elements = TokenizeMultiCharSeparator(entries, "||||"); + VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); + ClearEntries(elements, tID); + } +} + +void PhraseDictionaryCache::ClearSource(std::vector entries, long tID) +{ + VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl); + const StaticData &staticData = StaticData::Instance(); + Phrase sourcePhrase(0); + + std::vector::iterator it; + for(it = entries.begin(); it!=entries.end(); it++) { + + sourcePhrase.Clear(); + VERBOSE(3, "sourcePhraseString:|" << (*it) << "|" << std::endl); + sourcePhrase.CreateFromString(Input, m_inputFactorsVec, + *it, /*factorDelimiter,*/ NULL); + VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); + + ClearSource(sourcePhrase, tID); + } + + IFVERBOSE(2) Print(); +} + +void PhraseDictionaryCache::ClearSource(Phrase sp, long tID) +{ + VERBOSE(3,"void PhraseDictionaryCache::ClearSource(Phrase sp) sp:|" << sp << "|" << std::endl); + cacheMap::const_iterator it = m_cacheTM.at(tID).find(sp); + if (it != m_cacheTM.at(tID).end()) { + VERBOSE(3,"found:|" << sp << "|" << std::endl); + //sp is found + + TargetCollectionPair TgtCollPair = it->second; + TargetPhraseCollection::shared_ptr tpc = TgtCollPair.first; + Scores* sc = TgtCollPair.second; + + m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache + + sc->clear(); + tpc.reset(); + delete sc; + m_cacheTM.at(tID).erase(sp); + } else { + //do nothing + } +} + +void PhraseDictionaryCache::Insert(std::string &entries, long tID) +{ + if (entries != "") { + VERBOSE(3,"entries:|" << entries << "|" << " tID | " << tID << std::endl); + std::vector elements = TokenizeMultiCharSeparator(entries, "||||"); + VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); + Insert(elements, tID); + } +} + +void PhraseDictionaryCache::Insert(std::vector entries, long tID) +{ + VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl); + Update(tID, entries); + IFVERBOSE(3) Print(); +} + + +void PhraseDictionaryCache::Update(long tID, std::vector entries) +{ + std::vector pp; + + std::vector::iterator it; + for(it = entries.begin(); it!=entries.end(); it++) { + pp.clear(); + pp = TokenizeMultiCharSeparator((*it), "|||"); + VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl); + VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl); + + if (pp.size() > 3) { + VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl); + VERBOSE(3,"pp[3]:|" << pp[3] << "|" << std::endl); + Update(tID,pp[0], pp[1], pp[2], pp[3]); + } else if (pp.size() > 2){ + VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl); + Update(tID,pp[0], pp[1], pp[2]); + } else { + Update(tID,pp[0], pp[1]); + } + } +} + +Scores PhraseDictionaryCache::Conv2VecFloats(std::string& s){ + std::vector n; + if (s.empty()) + return n; + std::istringstream iss(s); + std::copy(std::istream_iterator(iss), + std::istream_iterator(), + std::back_inserter(n)); + return n; +} + +void PhraseDictionaryCache::Update(long tID, std::string sourcePhraseString, std::string targetPhraseString, std::string scoreString, std::string waString) +{ + const StaticData &staticData = StaticData::Instance(); + Phrase sourcePhrase(0); + TargetPhrase targetPhrase(0); + + char *err_ind_temp; + Scores scores = Conv2VecFloats(scoreString); + //target + targetPhrase.Clear(); + // change here for factored based CBTM + VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl); + targetPhrase.CreateFromString(Output, m_outputFactorsVec, + targetPhraseString, /*factorDelimiter,*/ NULL); + VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl); + + //TODO: Would be better to reuse source phrases, but ownership has to be + //consistent across phrase table implementations + sourcePhrase.Clear(); + VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl); + sourcePhrase.CreateFromString(Input, m_inputFactorsVec, sourcePhraseString, /*factorDelimiter,*/ NULL); + VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); + + if (!waString.empty()) VERBOSE(3, "waString:|" << waString << "|" << std::endl); + + Update(tID, sourcePhrase, targetPhrase, scores, waString); +} + +void PhraseDictionaryCache::Update(long tID, Phrase sp, TargetPhrase tp, Scores scores, std::string waString) +{ + VERBOSE(3,"PhraseDictionaryCache::Update(Phrase sp, TargetPhrase tp, Scores scores, std::string waString)" << std::endl); +#ifdef WITH_THREADS + boost::shared_lock lock(m_cacheLock); +#endif + VERBOSE(3, "PhraseDictionaryCache inserting sp:|" << sp << "| tp:|" << tp << "| word-alignment |" << waString << "|" << std::endl); + + cacheMap::const_iterator it = m_cacheTM[tID].find(sp); + VERBOSE(3,"sp:|" << sp << "|" << std::endl); + if(it!=m_cacheTM.at(tID).end()) { + VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl); + // sp is found + + TargetCollectionPair TgtCollPair = it->second; + TargetPhraseCollection::shared_ptr tpc = TgtCollPair.first; + Scores* sc = TgtCollPair.second; + const Phrase* p_ptr = NULL; + TargetPhrase* tp_ptr = NULL; + bool found = false; + size_t tp_pos=0; + while (!found && tp_pos < tpc->GetSize()) { + tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos); + p_ptr = (const TargetPhrase*) tp_ptr; + if ((Phrase) tp == *p_ptr) { + found = true; + continue; + } + tp_pos++; + } + if (!found) { + VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl); + std::auto_ptr targetPhrase(new TargetPhrase(tp)); + Scores scoreVec; + for (unsigned int i=0; iGetScoreBreakdown().Assign(this, scoreVec); + if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString); + + tpc->Add(targetPhrase.release()); + + tp_pos = tpc->GetSize()-1; + sc = &scores; + m_entries++; + VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| INSERTED" << std::endl); + } else { + Scores scoreVec; + for (unsigned int i=0; iGetScoreBreakdown().Assign(this, scoreVec); + if (!waString.empty()) tp_ptr->SetAlignmentInfo(waString); + VERBOSE(1,"sp:|" << sp << "tp:|" << tp << "| UPDATED" << std::endl); + } + } else { + VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl); + // p is not found + // create target collection + + TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection); + Scores* sc = new Scores(); + m_cacheTM[tID].insert(make_pair(sp,std::make_pair(tpc,sc))); + + //tp is not found + std::auto_ptr targetPhrase(new TargetPhrase(tp)); + // scoreVec is a composition of decay_score and the feature scores + Scores scoreVec; + for (unsigned int i=0; iGetScoreBreakdown().Assign(this, scoreVec); + if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString); + + tpc->Add(targetPhrase.release()); + sc = &scores; + m_entries++; + VERBOSE(1,"sp:|" << sp << "| tp:|" << tp << "| INSERTED" << std::endl); + } +} + +void PhraseDictionaryCache::Execute(std::string command, long tID) +{ + VERBOSE(2,"command:|" << command << "|" << std::endl); + std::vector commands = Tokenize(command, "||"); + Execute(commands, tID); +} + +void PhraseDictionaryCache::Execute(std::vector commands, long tID) +{ + for (size_t j=0; jfirst); + } +} + +void PhraseDictionaryCache::Clear(long tID) +{ +#ifdef WITH_THREADS + boost::shared_lock lock(m_cacheLock); +#endif + cacheMap::iterator it; + for(it = m_cacheTM.at(tID).begin(); it!=m_cacheTM.at(tID).end(); it++) { + (((*it).second).second)->clear(); + delete ((*it).second).second; + ((*it).second).first.reset(); + } + m_cacheTM.at(tID).clear(); + m_entries = 0; +} + + +void PhraseDictionaryCache::ExecuteDlt(std::map dlt_meta, long tID) +{ + if (dlt_meta.find("cbtm") != dlt_meta.end()) { + Insert(dlt_meta["cbtm"], tID); + } + if (dlt_meta.find("cbtm-command") != dlt_meta.end()) { + Execute(dlt_meta["cbtm-command"], tID); + } + if (dlt_meta.find("cbtm-clear-source") != dlt_meta.end()) { + ClearSource(dlt_meta["cbtm-clear-source"], tID); + } + if (dlt_meta.find("cbtm-clear-entries") != dlt_meta.end()) { + ClearEntries(dlt_meta["cbtm-clear-entries"], tID); + } + if (dlt_meta.find("cbtm-clear-all") != dlt_meta.end()) { + Clear(); + } +} + +void PhraseDictionaryCache::Print() const +{ + VERBOSE(2,"PhraseDictionaryCache::Print()" << std::endl); +#ifdef WITH_THREADS + boost::shared_lock read_lock(m_cacheLock); +#endif + for(sentCacheMap::const_iterator itr = m_cacheTM.begin(); itr!=m_cacheTM.end(); itr++) { + cacheMap::const_iterator it; + for(it = (itr->second).begin(); it!=(itr->second).end(); it++) { + std::string source = (it->first).ToString(); + TargetPhraseCollection::shared_ptr tpc = (it->second).first; + TargetPhraseCollection::iterator itr; + for(itr = tpc->begin(); itr != tpc->end(); itr++) { + std::string target = (*itr)->ToString(); + std::cout << source << " ||| " << target << std::endl; + } + source.clear(); + } + } +} + +}// end namespace diff --git a/moses/TranslationModel/PhraseDictionaryCache.h b/moses/TranslationModel/PhraseDictionaryCache.h new file mode 100644 index 000000000..8103a12b4 --- /dev/null +++ b/moses/TranslationModel/PhraseDictionaryCache.h @@ -0,0 +1,184 @@ +/*********************************************************************** + Moses - statistical machine translation system + Copyright (C) 2006-2011 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#pragma once + +#ifndef moses_PhraseDictionaryCache_H +#define moses_PhraseDictionaryCache_H + +#include "moses/TypeDef.h" +#include "moses/TranslationModel/PhraseDictionary.h" +#include "moses/TranslationTask.h" + +#include +#include + +#ifdef WITH_THREADS +#include +#include +#endif + +#define CBTM_SCORE_TYPE_UNDEFINED (-1) +#define CBTM_SCORE_TYPE_HYPERBOLA 0 +#define CBTM_SCORE_TYPE_POWER 1 +#define CBTM_SCORE_TYPE_EXPONENTIAL 2 +#define CBTM_SCORE_TYPE_COSINE 3 +#define CBTM_SCORE_TYPE_HYPERBOLA_REWARD 10 +#define CBTM_SCORE_TYPE_POWER_REWARD 11 +#define CBTM_SCORE_TYPE_EXPONENTIAL_REWARD 12 +#define PI 3.14159265 + + +namespace Moses +{ +class ChartParser; +class ChartCellCollectionBase; +class ChartRuleLookupManager; +class TranslationTask; +class PhraseDictionary; + +/** Implementation of a Cache-based phrase table. + */ +class PhraseDictionaryCache : public PhraseDictionary +{ + + typedef std::pair TargetCollectionPair; + typedef boost::unordered_map cacheMap; + typedef std::map sentCacheMap; + + // factored translation + std::vector m_inputFactorsVec, m_outputFactorsVec; + + // data structure for the cache + sentCacheMap m_cacheTM; + long m_sentences; + unsigned int m_numscorecomponent; + size_t m_score_type; //scoring type of the match + size_t m_entries; //total number of entries in the cache + float m_lower_score; //lower_bound_score for no match + bool m_constant; //flag for setting a non-decaying cache + std::string m_initfiles; // vector of files loaded in the initialization phase + std::string m_name; // internal name to identify this instance of the Cache-based phrase table + +#ifdef WITH_THREADS + //multiple readers - single writer lock + mutable boost::shared_mutex m_cacheLock; +#endif + + friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryCache&); + +public: + PhraseDictionaryCache(const std::string &line); + ~PhraseDictionaryCache(); + + inline const std::string GetName() { + return m_name; + }; + inline void SetName(const std::string name) { + m_name = name; + } + + static const PhraseDictionaryCache* Instance(const std::string& name) { + if (s_instance_map.find(name) == s_instance_map.end()) { + return NULL; + } + return s_instance_map[name]; + } + + static PhraseDictionaryCache* InstanceNonConst(const std::string& name) { + if (s_instance_map.find(name) == s_instance_map.end()) { + return NULL; + } + return s_instance_map[name]; + } + + + static const PhraseDictionaryCache& Instance() { + return *s_instance; + } + + static PhraseDictionaryCache& InstanceNonConst() { + return *s_instance; + } + + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, + Phrase const& src) const{ + GetTargetPhraseCollection(src, ttask->GetSource()->GetTranslationId()); + } + + + // for phrase-based model + void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; + + TargetPhraseCollection::shared_ptr + GetTargetPhraseCollection(const Phrase &src, long tID) const; + + // for phrase-based model +// virtual void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; + + // for syntax/hiero model (CKY+ decoding) + ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t); + + void SetParameter(const std::string& key, const std::string& value); + + void InitializeForInput(ttasksptr const& ttask); + + void Print() const; // prints the cache + void Clear(); // clears the cache + void Clear(long tID); // clears cache of a sentence + + void ClearEntries(std::string &entries, long tID); + void ClearSource(std::string &entries, long tID); + void Insert(std::string &entries, long tID); + void Execute(std::string command, long tID); + void ExecuteDlt(std::map dlt_meta, long tID); + +protected: + + static PhraseDictionaryCache *s_instance; + static std::map< const std::string, PhraseDictionaryCache * > s_instance_map; + + Scores Conv2VecFloats(std::string&); + void Insert(std::vector entries, long tID); + + void Update(long tID, std::vector entries); + void Update(long tID, std::string sourceString, std::string targetString, std::string ScoreString="", std::string waString=""); + void Update(long tID, Phrase p, TargetPhrase tp, Scores scores, std::string waString=""); + + void ClearEntries(std::vector entries, long tID); + void ClearEntries(std::string sourceString, std::string targetString, long tID); + void ClearEntries(Phrase p, Phrase tp, long tID); + + void ClearSource(std::vector entries, long tID); + void ClearSource(Phrase sp, long tID); + + void Execute(std::vector commands, long tID); + void Execute_Single_Command(std::string command); + + + void SetPreComputedScores(const unsigned int numScoreComponent); + Scores GetPreComputedScores(const unsigned int age); + + TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const; +}; + +} // namespace Moses + +#endif /* moses_PhraseDictionaryCache_H_ */ diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp index 75df7443b..d0a44fb83 100644 --- a/moses/TranslationTask.cpp +++ b/moses/TranslationTask.cpp @@ -17,6 +17,8 @@ #include "moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h" #include "moses/Syntax/T2S/RuleMatcherSCFG.h" +#include "moses/TranslationModel/PhraseDictionaryCache.h" + #include "util/exception.hh" using namespace std; @@ -149,6 +151,13 @@ interpret_dlt() typedef std::map dltmap_t; BOOST_FOREACH(dltmap_t const& M, snt.GetDltMeta()) { dltmap_t::const_iterator i = M.find("type"); + if (i->second == "cache") { + map::const_iterator k = M.find("id"); + string id = k == M.end() ? "default" : k->second; + PhraseDictionaryCache* cache; + cache = PhraseDictionaryCache::InstanceNonConst(id); + if (cache) cache->ExecuteDlt(M, this->GetSource()->GetTranslationId()); + } if (i == M.end() || i->second != "adaptive-lm") continue; dltmap_t::const_iterator j = M.find("context-weights"); if (j == M.end()) continue;