mosesdecoder/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp
Ulrich Germann e4f5c69109 One step closer to eliminating the requirement to provide num-features=... in the config file.
Some FF (Mmsapt, LexicalReordering, Many single-value FF) provide this number during "registration";
when missing, a default weight vector of uniform 1.0 is automatically generated. This eliminates the
need for the user to figure out what the exact number of features is for each FF, which can get complicated,
e.g. in the case of Mmsapt/PhraseDictionaryBitextSampling.
2015-04-29 20:16:52 +01:00

760 lines
26 KiB
C++

// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "util/exception.hh"
#include "moses/TranslationModel/PhraseDictionary.h"
#include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
#include "moses/FactorCollection.h"
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/TargetPhrase.h"
using namespace std;
namespace Moses
{
std::map< const std::string, PhraseDictionaryDynamicCacheBased * > PhraseDictionaryDynamicCacheBased::s_instance_map;
PhraseDictionaryDynamicCacheBased *PhraseDictionaryDynamicCacheBased::s_instance = NULL;
//! contructor
PhraseDictionaryDynamicCacheBased::PhraseDictionaryDynamicCacheBased(const std::string &line)
: PhraseDictionary(line, true)
{
std::cerr << "Initializing PhraseDictionaryDynamicCacheBased feature..." << std::endl;
//disabling internal cache (provided by PhraseDictionary) for translation options (third parameter set to 0)
m_maxCacheSize = 0;
m_score_type = CBTM_SCORE_TYPE_HYPERBOLA;
m_maxAge = 1000;
m_entries = 0;
m_name = "default";
m_constant = false;
ReadParameters();
UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 PhraseDictionaryDynamicCacheBased feature named " + m_name + " is allowed");
s_instance_map[m_name] = this;
s_instance = this; //for back compatibility
}
PhraseDictionaryDynamicCacheBased::~PhraseDictionaryDynamicCacheBased()
{
Clear();
}
void PhraseDictionaryDynamicCacheBased::Load()
{
VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load()" << std::endl);
SetFeaturesToApply();
vector<float> weight = StaticData::Instance().GetWeights(this);
SetPreComputedScores(weight.size());
Load(m_initfiles);
}
void PhraseDictionaryDynamicCacheBased::Load(const std::string filestr)
{
VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load(const std::string filestr)" << std::endl);
// std::vector<std::string> files = Tokenize(m_initfiles, "||");
std::vector<std::string> files = Tokenize(filestr, "||");
Load_Multiple_Files(files);
}
void PhraseDictionaryDynamicCacheBased::Load_Multiple_Files(std::vector<std::string> files)
{
VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load_Multiple_Files(std::vector<std::string> files)" << std::endl);
for(size_t j = 0; j < files.size(); ++j) {
Load_Single_File(files[j]);
}
}
void PhraseDictionaryDynamicCacheBased::Load_Single_File(const std::string file)
{
VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load_Single_File(const std::string file)" << std::endl);
//file format
//age |||| src_phr ||| trg_phr
//age |||| src_phr2 ||| trg_phr2 |||| src_phr3 ||| trg_phr3 |||| src_phr4 ||| trg_ph4
//....
//or
//age |||| src_phr ||| trg_phr ||| wa_align
//age |||| src_phr2 ||| trg_phr2 ||| wa_align2 |||| src_phr3 ||| trg_phr3 ||| wa_align3 |||| src_phr4 ||| trg_phr4 ||| wa_align4
//....
//each src_phr ad trg_phr are sequences of src and trg words, respectively, of any length
//if provided, wa_align is the alignment between src_phr and trg_phr
//
//there is no limit on the size of n
//
//entries can be repeated, but the last entry overwrites the previous
VERBOSE(2,"Loading data from the cache file " << file << std::endl);
InputFileStream cacheFile(file);
std::string line;
std::vector<std::string> words;
while (getline(cacheFile, line)) {
std::vector<std::string> vecStr = TokenizeMultiCharSeparator( line , "||||" );
if (vecStr.size() >= 2) {
std::string ageString = vecStr[0];
vecStr.erase(vecStr.begin());
Update(vecStr,ageString);
} else {
UTIL_THROW_IF2(false, "The format of the loaded file is wrong: " << line);
}
}
IFVERBOSE(2) Print();
}
void PhraseDictionaryDynamicCacheBased::SetParameter(const std::string& key, const std::string& value)
{
VERBOSE(2, "PhraseDictionaryDynamicCacheBased::SetParameter key:|" << key << "| value:|" << value << "|" << std::endl);
if(key == "cbtm-score-type") {
SetScoreType(Scan<size_t>(value));
} else if (key == "cbtm-max-age") {
SetMaxAge(Scan<unsigned int>(value));
} else if (key == "cbtm-file") {
m_initfiles = Scan<std::string>(value);
} else if (key == "cbtm-name") {
m_name = Scan<std::string>(value);
} else if (key == "cbtm-constant") {
m_constant = Scan<bool>(value);
} else {
PhraseDictionary::SetParameter(key, value);
}
}
void PhraseDictionaryDynamicCacheBased::InitializeForInput(InputType const& source)
{
ReduceCache();
}
const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
TargetPhraseCollection* tpc = NULL;
cacheMap::const_iterator it = m_cacheTM.find(source);
if(it != m_cacheTM.end()) {
tpc = new TargetPhraseCollection(*(it->second).first);
std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
while (it2 != tpc->end()) {
((TargetPhrase*) *it2)->EvaluateInIsolation(source, GetFeaturesToApply());
it2++;
}
}
if (tpc) {
tpc->NthElement(m_tableLimit); // sort the phrases for the decoder
}
return tpc;
}
const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionLEGACY(Phrase const &src) const
{
const TargetPhraseCollection *ret = GetTargetPhraseCollection(src);
return ret;
}
const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
{
const TargetPhraseCollection *ret = GetTargetPhraseCollection(src);
return ret;
}
ChartRuleLookupManager* PhraseDictionaryDynamicCacheBased::CreateRuleLookupManager(const ChartParser &parser, const ChartCellCollectionBase &cellCollection, std::size_t /*maxChartSpan*/)
{
UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
}
void PhraseDictionaryDynamicCacheBased::SetScoreType(size_t type)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
m_score_type = type;
if ( m_score_type != CBTM_SCORE_TYPE_HYPERBOLA
&& m_score_type != CBTM_SCORE_TYPE_POWER
&& m_score_type != CBTM_SCORE_TYPE_EXPONENTIAL
&& m_score_type != CBTM_SCORE_TYPE_COSINE
&& m_score_type != CBTM_SCORE_TYPE_HYPERBOLA_REWARD
&& m_score_type != CBTM_SCORE_TYPE_POWER_REWARD
&& m_score_type != CBTM_SCORE_TYPE_EXPONENTIAL_REWARD ) {
VERBOSE(2, "This score type " << m_score_type << " is unknown. Instead used " << CBTM_SCORE_TYPE_HYPERBOLA << "." << std::endl);
m_score_type = CBTM_SCORE_TYPE_HYPERBOLA;
}
VERBOSE(2, "PhraseDictionaryDynamicCacheBased ScoreType: " << m_score_type << std::endl);
}
void PhraseDictionaryDynamicCacheBased::SetMaxAge(unsigned int age)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
m_maxAge = age;
VERBOSE(2, "PhraseDictionaryCache MaxAge: " << m_maxAge << std::endl);
}
// friend
ostream& operator<<(ostream& out, const PhraseDictionaryDynamicCacheBased& phraseDict)
{
return out;
}
float PhraseDictionaryDynamicCacheBased::decaying_score(const int age)
{
float sc;
switch(m_score_type) {
case CBTM_SCORE_TYPE_HYPERBOLA:
sc = (float) 1.0/age - 1.0;
break;
case CBTM_SCORE_TYPE_POWER:
sc = (float) pow(age, -0.25) - 1.0;
break;
case CBTM_SCORE_TYPE_EXPONENTIAL:
sc = (age == 1) ? 0.0 : (float) exp( 1.0/age ) / exp(1.0) - 1.0;
break;
case CBTM_SCORE_TYPE_COSINE:
sc = (float) cos( (age-1) * (PI/2) / m_maxAge ) - 1.0;
break;
case CBTM_SCORE_TYPE_HYPERBOLA_REWARD:
sc = (float) 1.0/age;
break;
case CBTM_SCORE_TYPE_POWER_REWARD:
sc = (float) pow(age, -0.25);
break;
case CBTM_SCORE_TYPE_EXPONENTIAL_REWARD:
sc = (age == 1) ? 1.0 : (float) exp( 1.0/age ) / exp(1.0);
break;
default:
sc = -1.0;
}
return sc;
}
void PhraseDictionaryDynamicCacheBased::SetPreComputedScores(const unsigned int numScoreComponent)
{
VERBOSE(2, "PhraseDictionaryDynamicCacheBased SetPreComputedScores: " << m_maxAge << std::endl);
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
float sc;
for (size_t i=0; i<=m_maxAge; i++) {
if (i==m_maxAge) {
if ( m_score_type == CBTM_SCORE_TYPE_HYPERBOLA
|| m_score_type == CBTM_SCORE_TYPE_POWER
|| m_score_type == CBTM_SCORE_TYPE_EXPONENTIAL
|| m_score_type == CBTM_SCORE_TYPE_COSINE ) {
sc = decaying_score(m_maxAge)/numScoreComponent;
} else { // m_score_type = CBTM_SCORE_TYPE_XXXXXXXXX_REWARD
sc = 0.0;
}
} else {
sc = decaying_score(i)/numScoreComponent;
}
Scores sc_vec;
for (size_t j=0; j<numScoreComponent; j++) {
sc_vec.push_back(sc); //CHECK THIS SCORE
}
precomputedScores.push_back(sc_vec);
}
m_lower_score = precomputedScores[m_maxAge].at(0);
VERBOSE(3, "SetPreComputedScores(const unsigned int): lower_age:|" << m_maxAge << "| lower_score:|" << m_lower_score << "|" << std::endl);
}
Scores PhraseDictionaryDynamicCacheBased::GetPreComputedScores(const unsigned int age)
{
if (age < m_maxAge) {
return precomputedScores.at(age);
} else {
return precomputedScores.at(m_maxAge);
}
}
void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string &entries)
{
if (entries != "") {
VERBOSE(3,"entries:|" << entries << "|" << std::endl);
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
ClearEntries(elements);
}
}
void PhraseDictionaryDynamicCacheBased::ClearEntries(std::vector<std::string> entries)
{
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::vector<std::string> entries)" << std::endl);
std::vector<std::string> pp;
std::vector<std::string>::iterator it;
for(it = entries.begin(); it!=entries.end(); it++) {
pp.clear();
pp = TokenizeMultiCharSeparator((*it), "|||");
VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl);
VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl);
ClearEntries(pp[0], pp[1]);
}
}
void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)
{
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)" << std::endl);
const StaticData &staticData = StaticData::Instance();
// const std::string& factorDelimiter = staticData.GetFactorDelimiter();
Phrase sourcePhrase(0);
Phrase targetPhrase(0);
//target
targetPhrase.Clear();
VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
targetPhrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetPhraseString, /*factorDelimiter,*/ NULL);
VERBOSE(2, "targetPhrase:|" << targetPhrase << "|" << std::endl);
//TODO: Would be better to reuse source phrases, but ownership has to be
//consistent across phrase table implementations
sourcePhrase.Clear();
VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl);
sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), sourcePhraseString, /*factorDelimiter,*/ NULL);
VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
ClearEntries(sourcePhrase, targetPhrase);
}
void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)
{
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)" << std::endl);
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
VERBOSE(3, "PhraseDictionaryCache deleting sp:|" << sp << "| tp:|" << tp << "|" << std::endl);
cacheMap::const_iterator it = m_cacheTM.find(sp);
VERBOSE(3,"sp:|" << sp << "|" << std::endl);
if(it!=m_cacheTM.end()) {
VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl);
// sp is found
// here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap
// and then add new entry
TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second;
const Phrase* p_ptr = NULL;
TargetPhrase* tp_ptr = NULL;
bool found = false;
size_t tp_pos=0;
while (!found && tp_pos < tpc->GetSize()) {
tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
p_ptr = (const Phrase*) tp_ptr;
if (tp == *p_ptr) {
found = true;
continue;
}
tp_pos++;
}
if (!found) {
VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl);
//do nothing
} else {
VERBOSE(3,"tp:|" << tp << "| FOUND" << std::endl);
tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection
ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection
m_entries--;
VERBOSE(3,"tpc size:|" << tpc->GetSize() << "|" << std::endl);
VERBOSE(3,"ac size:|" << ac->size() << "|" << std::endl);
VERBOSE(3,"tp:|" << tp << "| DELETED" << std::endl);
}
if (tpc->GetSize() == 0) {
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
ac->clear();
delete tpc;
delete ac;
m_cacheTM.erase(sp);
}
} else {
VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
//do nothing
}
}
void PhraseDictionaryDynamicCacheBased::ClearSource(std::string &entries)
{
if (entries != "") {
VERBOSE(3,"entries:|" << entries << "|" << std::endl);
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
ClearEntries(elements);
}
}
void PhraseDictionaryDynamicCacheBased::ClearSource(std::vector<std::string> entries)
{
VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
const StaticData &staticData = StaticData::Instance();
// const std::string& factorDelimiter = staticData.GetFactorDelimiter();
Phrase sourcePhrase(0);
std::vector<std::string>::iterator it;
for(it = entries.begin(); it!=entries.end(); it++) {
sourcePhrase.Clear();
VERBOSE(3, "sourcePhraseString:|" << (*it) << "|" << std::endl);
sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), *it, /*factorDelimiter,*/ NULL);
VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
ClearSource(sourcePhrase);
}
IFVERBOSE(2) Print();
}
void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp)
{
VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp) sp:|" << sp << "|" << std::endl);
cacheMap::const_iterator it = m_cacheTM.find(sp);
if (it != m_cacheTM.end()) {
VERBOSE(3,"found:|" << sp << "|" << std::endl);
//sp is found
TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second;
m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
ac->clear();
delete tpc;
delete ac;
m_cacheTM.erase(sp);
} else {
//do nothing
}
}
void PhraseDictionaryDynamicCacheBased::Insert(std::string &entries)
{
if (entries != "") {
VERBOSE(3,"entries:|" << entries << "|" << std::endl);
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
Insert(elements);
}
}
void PhraseDictionaryDynamicCacheBased::Insert(std::vector<std::string> entries)
{
VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
if (m_constant == false) {
Decay();
}
Update(entries, "1");
IFVERBOSE(3) Print();
}
void PhraseDictionaryDynamicCacheBased::Update(std::vector<std::string> entries, std::string ageString)
{
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::vector<std::string> entries, std::string ageString)" << std::endl);
std::vector<std::string> pp;
VERBOSE(3,"ageString:|" << ageString << "|" << std::endl);
std::vector<std::string>::iterator it;
for(it = entries.begin(); it!=entries.end(); it++) {
pp.clear();
pp = TokenizeMultiCharSeparator((*it), "|||");
VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl);
VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl);
if (pp.size() > 2) {
VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl);
Update(pp[0], pp[1], ageString, pp[2]);
} else {
Update(pp[0], pp[1], ageString);
}
}
}
void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString)
{
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString)" << std::endl);
const StaticData &staticData = StaticData::Instance();
// const std::string& factorDelimiter = staticData.GetFactorDelimiter();
Phrase sourcePhrase(0);
TargetPhrase targetPhrase(0);
VERBOSE(3, "ageString:|" << ageString << "|" << std::endl);
char *err_ind_temp;
ageString = Trim(ageString);
int age = strtod(ageString.c_str(), &err_ind_temp);
VERBOSE(3, "age:|" << age << "|" << std::endl);
//target
targetPhrase.Clear();
VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
targetPhrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetPhraseString, /*factorDelimiter,*/ NULL);
VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl);
//TODO: Would be better to reuse source phrases, but ownership has to be
//consistent across phrase table implementations
sourcePhrase.Clear();
VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl);
sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), sourcePhraseString, /*factorDelimiter,*/ NULL);
VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
if (!waString.empty()) VERBOSE(3, "waString:|" << waString << "|" << std::endl);
Update(sourcePhrase, targetPhrase, age, waString);
}
void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int age, std::string waString)
{
VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int age, std::string waString)" << std::endl);
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
VERBOSE(3, "PhraseDictionaryCache inserting sp:|" << sp << "| tp:|" << tp << "| age:|" << age << "| word-alignment |" << waString << "|" << std::endl);
cacheMap::const_iterator it = m_cacheTM.find(sp);
VERBOSE(3,"sp:|" << sp << "|" << std::endl);
if(it!=m_cacheTM.end()) {
VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl);
// sp is found
// here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap
// and then add new entry
TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second;
// const TargetPhrase* p_ptr = NULL;
const Phrase* p_ptr = NULL;
TargetPhrase* tp_ptr = NULL;
bool found = false;
size_t tp_pos=0;
while (!found && tp_pos < tpc->GetSize()) {
tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
p_ptr = (const TargetPhrase*) tp_ptr;
if ((Phrase) tp == *p_ptr) {
found = true;
continue;
}
tp_pos++;
}
if (!found) {
VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl);
std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString);
tpc->Add(targetPhrase.release());
tp_pos = tpc->GetSize()-1;
ac->push_back(age);
m_entries++;
VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| INSERTED" << std::endl);
} else {
tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
if (!waString.empty()) tp_ptr->SetAlignmentInfo(waString);
ac->at(tp_pos) = age;
VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| UPDATED" << std::endl);
}
} else {
VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
// p is not found
// create target collection
// we have to create new target collection age pair and add new entry to target collection age pair
TargetPhraseCollection* tpc = new TargetPhraseCollection();
AgeCollection* ac = new AgeCollection();
m_cacheTM.insert(make_pair(sp,make_pair(tpc,ac)));
//tp is not found
std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString);
tpc->Add(targetPhrase.release());
ac->push_back(age);
m_entries++;
VERBOSE(3,"sp:|" << sp << "| tp:|" << tp << "| INSERTED" << std::endl);
}
}
void PhraseDictionaryDynamicCacheBased::Decay()
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
cacheMap::iterator it;
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
Decay((*it).first);
}
}
void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp)
{
VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) sp:|" << sp << "|" << std::endl);
cacheMap::const_iterator it = m_cacheTM.find(sp);
if (it != m_cacheTM.end()) {
VERBOSE(3,"found:|" << sp << "|" << std::endl);
//sp is found
TargetCollectionAgePair TgtCollAgePair = it->second;
TargetPhraseCollection* tpc = TgtCollAgePair.first;
AgeCollection* ac = TgtCollAgePair.second;
//loop in inverted order to allow a correct deletion of std::vectors tpc and ac
for (int tp_pos = tpc->GetSize() - 1 ; tp_pos >= 0; tp_pos--) {
unsigned int tp_age = ac->at(tp_pos); //increase the age by 1
tp_age++; //increase the age by 1
VERBOSE(3,"sp:|" << sp << "| " << " new tp_age:|" << tp_age << "|" << std::endl);
TargetPhrase* tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
if (tp_age > m_maxAge) {
VERBOSE(3,"tp_age:|" << tp_age << "| TOO BIG" << std::endl);
tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection
ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection
m_entries--;
} else {
VERBOSE(3,"tp_age:|" << tp_age << "| STILL GOOD" << std::endl);
tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(tp_age));
ac->at(tp_pos) = tp_age;
}
}
if (tpc->GetSize() == 0) {
// delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection
(((*it).second).second)->clear();
delete ((*it).second).second;
delete ((*it).second).first;
m_cacheTM.erase(sp);
}
} else {
//do nothing
VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
}
//put here the removal of entries with age greater than m_maxAge
}
void PhraseDictionaryDynamicCacheBased::Execute(std::string command)
{
VERBOSE(2,"command:|" << command << "|" << std::endl);
std::vector<std::string> commands = Tokenize(command, "||");
Execute(commands);
}
void PhraseDictionaryDynamicCacheBased::Execute(std::vector<std::string> commands)
{
for (size_t j=0; j<commands.size(); j++) {
Execute_Single_Command(commands[j]);
}
IFVERBOSE(2) Print();
}
void PhraseDictionaryDynamicCacheBased::Execute_Single_Command(std::string command)
{
if (command == "clear") {
VERBOSE(2,"PhraseDictionaryDynamicCacheBased Execute command:|"<< command << "|. Cache cleared." << std::endl);
Clear();
} else {
VERBOSE(2,"PhraseDictionaryDynamicCacheBased Execute command:|"<< command << "| is unknown. Skipped." << std::endl);
}
}
void PhraseDictionaryDynamicCacheBased::Clear()
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
#endif
cacheMap::const_iterator it;
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
(((*it).second).second)->clear();
delete ((*it).second).second;
delete ((*it).second).first;
}
m_cacheTM.clear();
m_entries = 0;
}
void PhraseDictionaryDynamicCacheBased::ExecuteDlt(std::map<std::string, std::string> dlt_meta)
{
if (dlt_meta.find("cbtm") != dlt_meta.end()) {
Insert(dlt_meta["cbtm"]);
}
if (dlt_meta.find("cbtm-command") != dlt_meta.end()) {
Execute(dlt_meta["cbtm-command"]);
}
if (dlt_meta.find("cbtm-file") != dlt_meta.end()) {
Load(dlt_meta["cbtm-file"]);
}
if (dlt_meta.find("cbtm-clear-source") != dlt_meta.end()) {
ClearSource(dlt_meta["cbtm-clear-source"]);
}
if (dlt_meta.find("cbtm-clear-entries") != dlt_meta.end()) {
ClearEntries(dlt_meta["cbtm-clear-entries"]);
}
if (dlt_meta.find("cbtm-clear-all") != dlt_meta.end()) {
Clear();
}
}
void PhraseDictionaryDynamicCacheBased::Print() const
{
VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Print()" << std::endl);
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
#endif
cacheMap::const_iterator it;
for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
std::string source = (it->first).ToString();
TargetPhraseCollection* tpc = (it->second).first;
TargetPhraseCollection::iterator itr;
for(itr = tpc->begin(); itr != tpc->end(); itr++) {
std::string target = (*itr)->ToString();
std::cout << source << " ||| " << target << std::endl;
}
source.clear();
}
}
}// end namespace