Adding a new cache based translation model (thread safe)

This commit is contained in:
Prashant Mathur 2016-05-14 13:21:55 +02:00
parent 02027c14e8
commit 6f75c31be2
5 changed files with 780 additions and 0 deletions

View File

@ -10,6 +10,7 @@
#include "moses/TranslationModel/PhraseDictionaryScope3.h"
#include "moses/TranslationModel/PhraseDictionaryTransliteration.h"
#include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
#include "moses/TranslationModel/PhraseDictionaryCache.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"
@ -234,6 +235,7 @@ FeatureRegistry::FeatureRegistry()
// MOSES_FNAME(PhraseDictionaryDynSuffixArray);
MOSES_FNAME(PhraseDictionaryTransliteration);
MOSES_FNAME(PhraseDictionaryDynamicCacheBased);
MOSES_FNAME(PhraseDictionaryCache);
MOSES_FNAME(PhraseDictionaryFuzzyMatch);
MOSES_FNAME(ProbingPT);
MOSES_FNAME(PhraseDictionaryMemoryPerSentence);

View File

@ -793,6 +793,9 @@ ConvertWeightArgsPhraseModel(const string &oldWeightName)
case 15: // DCacheBased:
ptType = "PhraseDictionaryDynamicCacheBased";
break;
case 16: // CachePT:
ptType = "PhraseDictionaryCache";
break;
default:
break;
}

View File

@ -0,0 +1,582 @@
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "util/exception.hh"
#include "moses/TranslationModel/PhraseDictionary.h"
#include "moses/TranslationModel/PhraseDictionaryCache.h"
#include "moses/FactorCollection.h"
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/TargetPhrase.h"
using namespace std;
namespace Moses
{
std::map< const std::string, PhraseDictionaryCache * > PhraseDictionaryCache::s_instance_map;
PhraseDictionaryCache *PhraseDictionaryCache::s_instance = NULL;
//! contructor
PhraseDictionaryCache::PhraseDictionaryCache(const std::string &line)
: PhraseDictionary(line, true)
{
std::cerr << "Initializing PhraseDictionaryCache feature..." << std::endl;
//disabling internal cache (provided by PhraseDictionary) for translation options (third parameter set to 0)
m_maxCacheSize = 0;
m_entries = 0;
m_name = "default";
m_constant = false;
ReadParameters();
UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 PhraseDictionaryCache feature named " + m_name + " is allowed");
s_instance_map[m_name] = this;
s_instance = this; //for back compatibility
vector<float> weight = StaticData::Instance().GetWeights(this);
m_numscorecomponent = weight.size();
m_sentences=0;
}
PhraseDictionaryCache::~PhraseDictionaryCache()
{
Clear();
}
void PhraseDictionaryCache::SetParameter(const std::string& key, const std::string& value)
{
VERBOSE(2, "PhraseDictionaryCache::SetParameter key:|" << key << "| value:|" << value << "|" << std::endl);
if (key == "cache-name") {
m_name = Scan<std::string>(value);
} else if (key == "input-factor") {
m_inputFactorsVec = Tokenize<FactorType>(value,",");
} else if (key == "output-factor") {
m_outputFactorsVec = Tokenize<FactorType>(value,",");
} else {
PhraseDictionary::SetParameter(key, value);
}
}
void PhraseDictionaryCache::InitializeForInput(ttasksptr const& ttask)
{
long tID = ttask->GetSource()->GetTranslationId();
TargetPhraseCollection::shared_ptr tpc;
if (m_cacheTM.find(tID) == m_cacheTM.end()) return;
for(cacheMap::const_iterator it=m_cacheTM.at(tID).begin(); it != m_cacheTM.at(tID).end(); it++) {
std::cerr<<"Source : "<<it->first<<std::endl;
tpc.reset(new TargetPhraseCollection(*(it->second).first));
std::cerr<<"TPC size : " << tpc->GetSize() << std::endl;
std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
while (it2 != tpc->end()) {
((TargetPhrase*) *it2)->EvaluateInIsolation(it->first, GetFeaturesToApply());
std::cerr<< "Target Phrase : "<<**it2 << std::endl;
it2++;
}
}
if (tpc) {
tpc->NthElement(m_tableLimit); // sort the phrases for the decoder
}
}
void PhraseDictionaryCache::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
{
InputPathList::const_iterator iter;
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
InputPath &inputPath = **iter;
long tID = inputPath.ttask->GetSource()->GetTranslationId();
if (m_cacheTM.find(tID) == m_cacheTM.end()) continue;
TargetPhraseCollection::shared_ptr tpc;
for(cacheMap::const_iterator it=m_cacheTM.at(tID).begin(); it != m_cacheTM.at(tID).end(); it++) {
tpc.reset(new TargetPhraseCollection(*(it->second).first));
inputPath.SetTargetPhrases(*this, tpc, NULL);
}
}
}
TargetPhraseCollection::shared_ptr PhraseDictionaryCache::GetTargetPhraseCollection(const Phrase &source, long tID) const
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
TargetPhraseCollection::shared_ptr tpc;
if(m_cacheTM.find(tID) == m_cacheTM.end()) return tpc;
cacheMap::const_iterator it = m_cacheTM.at(tID).find(source);
if(it != m_cacheTM.at(tID).end()) {
tpc.reset(new TargetPhraseCollection(*(it->second).first));
std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
while (it2 != tpc->end()) {
((TargetPhrase*) *it2)->EvaluateInIsolation(source, GetFeaturesToApply());
it2++;
}
}
if (tpc) {
tpc->NthElement(m_tableLimit); // sort the phrases for the decoder
}
return tpc;
}
ChartRuleLookupManager* PhraseDictionaryCache::CreateRuleLookupManager(const ChartParser &parser, const ChartCellCollectionBase &cellCollection, std::size_t /*maxChartSpan*/)
{
UTIL_THROW(util::Exception, "Not implemented for Chart Decoder");
}
// friend
ostream& operator<<(ostream& out, const PhraseDictionaryCache& phraseDict)
{
return out;
}
void PhraseDictionaryCache::ClearEntries(std::string &entries, long tID)
{
if (entries != "" && m_cacheTM.find(tID) != m_cacheTM.end()) {
VERBOSE(3,"entries:|" << entries << "|" << std::endl);
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
ClearEntries(elements, tID);
}
}
void PhraseDictionaryCache::ClearEntries(std::vector<std::string> entries, long tID)
{
VERBOSE(3,"PhraseDictionaryCache::ClearEntries(std::vector<std::string> entries)" << std::endl);
std::vector<std::string> pp;
std::vector<std::string>::iterator it;
for(it = entries.begin(); it!=entries.end(); it++) {
pp.clear();
pp = TokenizeMultiCharSeparator((*it), "|||");
VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl);
VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl);
ClearEntries(pp[0], pp[1], tID);
}
}
void PhraseDictionaryCache::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString, long tID)
{
VERBOSE(3,"PhraseDictionaryCache::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)" << std::endl);
const StaticData &staticData = StaticData::Instance();
Phrase sourcePhrase(0);
Phrase targetPhrase(0);
//target
targetPhrase.Clear();
VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
targetPhrase.CreateFromString(Output, m_outputFactorsVec,
targetPhraseString, /*factorDelimiter,*/ NULL);
VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl);
//TODO: Would be better to reuse source phrases, but ownership has to be
//consistent across phrase table implementations
sourcePhrase.Clear();
VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl);
sourcePhrase.CreateFromString(Input, m_inputFactorsVec,
sourcePhraseString, /*factorDelimiter,*/ NULL);
VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
ClearEntries(sourcePhrase, targetPhrase, tID);
}
void PhraseDictionaryCache::ClearEntries(Phrase sp, Phrase tp, long tID)
{
VERBOSE(3,"PhraseDictionaryCache::ClearEntries(Phrase sp, Phrase tp)" << std::endl);
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
VERBOSE(3, "PhraseDictionaryCache deleting sp:|" << sp << "| tp:|" << tp << "|" << std::endl);
cacheMap::const_iterator it = m_cacheTM.at(tID).find(sp);
VERBOSE(3,"sp:|" << sp << "|" << std::endl);
if(it!=m_cacheTM.at(tID).end()) {
VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl);
// sp is found
TargetCollectionPair TgtCollPair = it->second;
TargetPhraseCollection::shared_ptr tpc = TgtCollPair.first;
Scores* sc = TgtCollPair.second;
const Phrase* p_ptr = NULL;
TargetPhrase* tp_ptr = NULL;
bool found = false;
size_t tp_pos=0;
while (!found && tp_pos < tpc->GetSize()) {
tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
p_ptr = (const Phrase*) tp_ptr;
if (tp == *p_ptr) {
found = true;
continue;
}
tp_pos++;
}
if (!found) {
VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl);
//do nothing
} else {
VERBOSE(3,"tp:|" << tp << "| FOUND" << std::endl);
tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection
// sc->clear();
// no need to delete scores here
m_entries--;
VERBOSE(3,"tpc size:|" << tpc->GetSize() << "|" << std::endl);
VERBOSE(3,"sc size:|" << sc->size() << "|" << std::endl);
VERBOSE(3,"tp:|" << tp << "| DELETED" << std::endl);
}
if (tpc->GetSize() == 0) {
sc->clear();
tpc.reset();
delete sc;
m_cacheTM.at(tID).erase(sp);
}
} else {
VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
//do nothing
}
}
void PhraseDictionaryCache::ClearSource(std::string &entries, long tID)
{
if (entries != "" && m_cacheTM.find(tID) != m_cacheTM.end()) {
VERBOSE(3,"entries:|" << entries << "|" << std::endl);
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
ClearEntries(elements, tID);
}
}
void PhraseDictionaryCache::ClearSource(std::vector<std::string> entries, long tID)
{
VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
const StaticData &staticData = StaticData::Instance();
Phrase sourcePhrase(0);
std::vector<std::string>::iterator it;
for(it = entries.begin(); it!=entries.end(); it++) {
sourcePhrase.Clear();
VERBOSE(3, "sourcePhraseString:|" << (*it) << "|" << std::endl);
sourcePhrase.CreateFromString(Input, m_inputFactorsVec,
*it, /*factorDelimiter,*/ NULL);
VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
ClearSource(sourcePhrase, tID);
}
IFVERBOSE(2) Print();
}
void PhraseDictionaryCache::ClearSource(Phrase sp, long tID)
{
VERBOSE(3,"void PhraseDictionaryCache::ClearSource(Phrase sp) sp:|" << sp << "|" << std::endl);
cacheMap::const_iterator it = m_cacheTM.at(tID).find(sp);
if (it != m_cacheTM.at(tID).end()) {
VERBOSE(3,"found:|" << sp << "|" << std::endl);
//sp is found
TargetCollectionPair TgtCollPair = it->second;
TargetPhraseCollection::shared_ptr tpc = TgtCollPair.first;
Scores* sc = TgtCollPair.second;
m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache
sc->clear();
tpc.reset();
delete sc;
m_cacheTM.at(tID).erase(sp);
} else {
//do nothing
}
}
void PhraseDictionaryCache::Insert(std::string &entries, long tID)
{
if (entries != "") {
VERBOSE(3,"entries:|" << entries << "|" << " tID | " << tID << std::endl);
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
Insert(elements, tID);
}
}
void PhraseDictionaryCache::Insert(std::vector<std::string> entries, long tID)
{
VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
Update(tID, entries);
IFVERBOSE(3) Print();
}
void PhraseDictionaryCache::Update(long tID, std::vector<std::string> entries)
{
std::vector<std::string> pp;
std::vector<std::string>::iterator it;
for(it = entries.begin(); it!=entries.end(); it++) {
pp.clear();
pp = TokenizeMultiCharSeparator((*it), "|||");
VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl);
VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl);
if (pp.size() > 3) {
VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl);
VERBOSE(3,"pp[3]:|" << pp[3] << "|" << std::endl);
Update(tID,pp[0], pp[1], pp[2], pp[3]);
} else if (pp.size() > 2){
VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl);
Update(tID,pp[0], pp[1], pp[2]);
} else {
Update(tID,pp[0], pp[1]);
}
}
}
Scores PhraseDictionaryCache::Conv2VecFloats(std::string& s){
std::vector<float> n;
if (s.empty())
return n;
std::istringstream iss(s);
std::copy(std::istream_iterator<float>(iss),
std::istream_iterator<float>(),
std::back_inserter(n));
return n;
}
void PhraseDictionaryCache::Update(long tID, std::string sourcePhraseString, std::string targetPhraseString, std::string scoreString, std::string waString)
{
const StaticData &staticData = StaticData::Instance();
Phrase sourcePhrase(0);
TargetPhrase targetPhrase(0);
char *err_ind_temp;
Scores scores = Conv2VecFloats(scoreString);
//target
targetPhrase.Clear();
// change here for factored based CBTM
VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
targetPhrase.CreateFromString(Output, m_outputFactorsVec,
targetPhraseString, /*factorDelimiter,*/ NULL);
VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl);
//TODO: Would be better to reuse source phrases, but ownership has to be
//consistent across phrase table implementations
sourcePhrase.Clear();
VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl);
sourcePhrase.CreateFromString(Input, m_inputFactorsVec, sourcePhraseString, /*factorDelimiter,*/ NULL);
VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
if (!waString.empty()) VERBOSE(3, "waString:|" << waString << "|" << std::endl);
Update(tID, sourcePhrase, targetPhrase, scores, waString);
}
void PhraseDictionaryCache::Update(long tID, Phrase sp, TargetPhrase tp, Scores scores, std::string waString)
{
VERBOSE(3,"PhraseDictionaryCache::Update(Phrase sp, TargetPhrase tp, Scores scores, std::string waString)" << std::endl);
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
VERBOSE(3, "PhraseDictionaryCache inserting sp:|" << sp << "| tp:|" << tp << "| word-alignment |" << waString << "|" << std::endl);
cacheMap::const_iterator it = m_cacheTM[tID].find(sp);
VERBOSE(3,"sp:|" << sp << "|" << std::endl);
if(it!=m_cacheTM.at(tID).end()) {
VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl);
// sp is found
TargetCollectionPair TgtCollPair = it->second;
TargetPhraseCollection::shared_ptr tpc = TgtCollPair.first;
Scores* sc = TgtCollPair.second;
const Phrase* p_ptr = NULL;
TargetPhrase* tp_ptr = NULL;
bool found = false;
size_t tp_pos=0;
while (!found && tp_pos < tpc->GetSize()) {
tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
p_ptr = (const TargetPhrase*) tp_ptr;
if ((Phrase) tp == *p_ptr) {
found = true;
continue;
}
tp_pos++;
}
if (!found) {
VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl);
std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
Scores scoreVec;
for (unsigned int i=0; i<scores.size(); i++){
scoreVec.push_back(scores[i]);
}
if(scoreVec.size() != m_numScoreComponents){
VERBOSE(1, "Scores does not match number of score components for phrase : "<< sp.ToString() <<" ||| " << tp.ToString() <<endl);
VERBOSE(1, "I am ignoring this..." <<endl);
// std::cin.ignore();
}
targetPhrase->GetScoreBreakdown().Assign(this, scoreVec);
if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString);
tpc->Add(targetPhrase.release());
tp_pos = tpc->GetSize()-1;
sc = &scores;
m_entries++;
VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| INSERTED" << std::endl);
} else {
Scores scoreVec;
for (unsigned int i=0; i<scores.size(); i++){
scoreVec.push_back(scores[i]);
}
if(scoreVec.size() != m_numScoreComponents){
VERBOSE(1, "Scores does not match number of score components for phrase : "<< sp.ToString() <<" ||| " << tp.ToString() <<endl);
VERBOSE(1, "I am ignoring this..." <<endl);
// std::cin.ignore();
}
tp_ptr->GetScoreBreakdown().Assign(this, scoreVec);
if (!waString.empty()) tp_ptr->SetAlignmentInfo(waString);
VERBOSE(1,"sp:|" << sp << "tp:|" << tp << "| UPDATED" << std::endl);
}
} else {
VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
// p is not found
// create target collection
TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
Scores* sc = new Scores();
m_cacheTM[tID].insert(make_pair(sp,std::make_pair(tpc,sc)));
//tp is not found
std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
// scoreVec is a composition of decay_score and the feature scores
Scores scoreVec;
for (unsigned int i=0; i<scores.size(); i++){
scoreVec.push_back(scores[i]);
}
if(scoreVec.size() != m_numScoreComponents){
VERBOSE(1, "Scores do not match number of score components for phrase : "<< sp <<" ||| " << tp <<endl);
VERBOSE(1, "I am ignoring this..." <<endl);
// std::cin.ignore();
}
targetPhrase->GetScoreBreakdown().Assign(this, scoreVec);
if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString);
tpc->Add(targetPhrase.release());
sc = &scores;
m_entries++;
VERBOSE(1,"sp:|" << sp << "| tp:|" << tp << "| INSERTED" << std::endl);
}
}
void PhraseDictionaryCache::Execute(std::string command, long tID)
{
VERBOSE(2,"command:|" << command << "|" << std::endl);
std::vector<std::string> commands = Tokenize(command, "||");
Execute(commands, tID);
}
void PhraseDictionaryCache::Execute(std::vector<std::string> commands, long tID)
{
for (size_t j=0; j<commands.size(); j++) {
Execute_Single_Command(commands[j]);
}
IFVERBOSE(2) Print();
}
void PhraseDictionaryCache::Execute_Single_Command(std::string command)
{
if (command == "clear") {
VERBOSE(2,"PhraseDictionaryCache Execute command:|"<< command << "|. Cache cleared." << std::endl);
Clear();
} else {
VERBOSE(2,"PhraseDictionaryCache Execute command:|"<< command << "| is unknown. Skipped." << std::endl);
}
}
void PhraseDictionaryCache::Clear(){
for(sentCacheMap::iterator it=m_cacheTM.begin(); it!=m_cacheTM.end(); it++){
Clear(it->first);
}
}
void PhraseDictionaryCache::Clear(long tID)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
cacheMap::iterator it;
for(it = m_cacheTM.at(tID).begin(); it!=m_cacheTM.at(tID).end(); it++) {
(((*it).second).second)->clear();
delete ((*it).second).second;
((*it).second).first.reset();
}
m_cacheTM.at(tID).clear();
m_entries = 0;
}
void PhraseDictionaryCache::ExecuteDlt(std::map<std::string, std::string> dlt_meta, long tID)
{
if (dlt_meta.find("cbtm") != dlt_meta.end()) {
Insert(dlt_meta["cbtm"], tID);
}
if (dlt_meta.find("cbtm-command") != dlt_meta.end()) {
Execute(dlt_meta["cbtm-command"], tID);
}
if (dlt_meta.find("cbtm-clear-source") != dlt_meta.end()) {
ClearSource(dlt_meta["cbtm-clear-source"], tID);
}
if (dlt_meta.find("cbtm-clear-entries") != dlt_meta.end()) {
ClearEntries(dlt_meta["cbtm-clear-entries"], tID);
}
if (dlt_meta.find("cbtm-clear-all") != dlt_meta.end()) {
Clear();
}
}
void PhraseDictionaryCache::Print() const
{
VERBOSE(2,"PhraseDictionaryCache::Print()" << std::endl);
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
for(sentCacheMap::const_iterator itr = m_cacheTM.begin(); itr!=m_cacheTM.end(); itr++) {
cacheMap::const_iterator it;
for(it = (itr->second).begin(); it!=(itr->second).end(); it++) {
std::string source = (it->first).ToString();
TargetPhraseCollection::shared_ptr tpc = (it->second).first;
TargetPhraseCollection::iterator itr;
for(itr = tpc->begin(); itr != tpc->end(); itr++) {
std::string target = (*itr)->ToString();
std::cout << source << " ||| " << target << std::endl;
}
source.clear();
}
}
}
}// end namespace

View File

@ -0,0 +1,184 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#ifndef moses_PhraseDictionaryCache_H
#define moses_PhraseDictionaryCache_H
#include "moses/TypeDef.h"
#include "moses/TranslationModel/PhraseDictionary.h"
#include "moses/TranslationTask.h"
#include <boost/tuple/tuple.hpp>
#include <boost/tuple/tuple_io.hpp>
#ifdef WITH_THREADS
#include <boost/thread/shared_mutex.hpp>
#include <boost/thread/locks.hpp>
#endif
#define CBTM_SCORE_TYPE_UNDEFINED (-1)
#define CBTM_SCORE_TYPE_HYPERBOLA 0
#define CBTM_SCORE_TYPE_POWER 1
#define CBTM_SCORE_TYPE_EXPONENTIAL 2
#define CBTM_SCORE_TYPE_COSINE 3
#define CBTM_SCORE_TYPE_HYPERBOLA_REWARD 10
#define CBTM_SCORE_TYPE_POWER_REWARD 11
#define CBTM_SCORE_TYPE_EXPONENTIAL_REWARD 12
#define PI 3.14159265
namespace Moses
{
class ChartParser;
class ChartCellCollectionBase;
class ChartRuleLookupManager;
class TranslationTask;
class PhraseDictionary;
/** Implementation of a Cache-based phrase table.
*/
class PhraseDictionaryCache : public PhraseDictionary
{
typedef std::pair<TargetPhraseCollection::shared_ptr, Scores*> TargetCollectionPair;
typedef boost::unordered_map<Phrase, TargetCollectionPair> cacheMap;
typedef std::map<long, cacheMap> sentCacheMap;
// factored translation
std::vector<FactorType> m_inputFactorsVec, m_outputFactorsVec;
// data structure for the cache
sentCacheMap m_cacheTM;
long m_sentences;
unsigned int m_numscorecomponent;
size_t m_score_type; //scoring type of the match
size_t m_entries; //total number of entries in the cache
float m_lower_score; //lower_bound_score for no match
bool m_constant; //flag for setting a non-decaying cache
std::string m_initfiles; // vector of files loaded in the initialization phase
std::string m_name; // internal name to identify this instance of the Cache-based phrase table
#ifdef WITH_THREADS
//multiple readers - single writer lock
mutable boost::shared_mutex m_cacheLock;
#endif
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryCache&);
public:
PhraseDictionaryCache(const std::string &line);
~PhraseDictionaryCache();
inline const std::string GetName() {
return m_name;
};
inline void SetName(const std::string name) {
m_name = name;
}
static const PhraseDictionaryCache* Instance(const std::string& name) {
if (s_instance_map.find(name) == s_instance_map.end()) {
return NULL;
}
return s_instance_map[name];
}
static PhraseDictionaryCache* InstanceNonConst(const std::string& name) {
if (s_instance_map.find(name) == s_instance_map.end()) {
return NULL;
}
return s_instance_map[name];
}
static const PhraseDictionaryCache& Instance() {
return *s_instance;
}
static PhraseDictionaryCache& InstanceNonConst() {
return *s_instance;
}
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask,
Phrase const& src) const{
GetTargetPhraseCollection(src, ttask->GetSource()->GetTranslationId());
}
// for phrase-based model
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
TargetPhraseCollection::shared_ptr
GetTargetPhraseCollection(const Phrase &src, long tID) const;
// for phrase-based model
// virtual void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
// for syntax/hiero model (CKY+ decoding)
ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t);
void SetParameter(const std::string& key, const std::string& value);
void InitializeForInput(ttasksptr const& ttask);
void Print() const; // prints the cache
void Clear(); // clears the cache
void Clear(long tID); // clears cache of a sentence
void ClearEntries(std::string &entries, long tID);
void ClearSource(std::string &entries, long tID);
void Insert(std::string &entries, long tID);
void Execute(std::string command, long tID);
void ExecuteDlt(std::map<std::string, std::string> dlt_meta, long tID);
protected:
static PhraseDictionaryCache *s_instance;
static std::map< const std::string, PhraseDictionaryCache * > s_instance_map;
Scores Conv2VecFloats(std::string&);
void Insert(std::vector<std::string> entries, long tID);
void Update(long tID, std::vector<std::string> entries);
void Update(long tID, std::string sourceString, std::string targetString, std::string ScoreString="", std::string waString="");
void Update(long tID, Phrase p, TargetPhrase tp, Scores scores, std::string waString="");
void ClearEntries(std::vector<std::string> entries, long tID);
void ClearEntries(std::string sourceString, std::string targetString, long tID);
void ClearEntries(Phrase p, Phrase tp, long tID);
void ClearSource(std::vector<std::string> entries, long tID);
void ClearSource(Phrase sp, long tID);
void Execute(std::vector<std::string> commands, long tID);
void Execute_Single_Command(std::string command);
void SetPreComputedScores(const unsigned int numScoreComponent);
Scores GetPreComputedScores(const unsigned int age);
TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const;
};
} // namespace Moses
#endif /* moses_PhraseDictionaryCache_H_ */

View File

@ -17,6 +17,8 @@
#include "moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h"
#include "moses/Syntax/T2S/RuleMatcherSCFG.h"
#include "moses/TranslationModel/PhraseDictionaryCache.h"
#include "util/exception.hh"
using namespace std;
@ -149,6 +151,13 @@ interpret_dlt()
typedef std::map<std::string,std::string> dltmap_t;
BOOST_FOREACH(dltmap_t const& M, snt.GetDltMeta()) {
dltmap_t::const_iterator i = M.find("type");
if (i->second == "cache") {
map<string, string>::const_iterator k = M.find("id");
string id = k == M.end() ? "default" : k->second;
PhraseDictionaryCache* cache;
cache = PhraseDictionaryCache::InstanceNonConst(id);
if (cache) cache->ExecuteDlt(M, this->GetSource()->GetTranslationId());
}
if (i == M.end() || i->second != "adaptive-lm") continue;
dltmap_t::const_iterator j = M.find("context-weights");
if (j == M.end()) continue;