mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-07-14 14:50:41 +03:00
Adding a new cache based translation model (thread safe)
This commit is contained in:
parent
02027c14e8
commit
6f75c31be2
@ -10,6 +10,7 @@
|
||||
#include "moses/TranslationModel/PhraseDictionaryScope3.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryTransliteration.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryCache.h"
|
||||
|
||||
#include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
|
||||
#include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"
|
||||
@ -234,6 +235,7 @@ FeatureRegistry::FeatureRegistry()
|
||||
// MOSES_FNAME(PhraseDictionaryDynSuffixArray);
|
||||
MOSES_FNAME(PhraseDictionaryTransliteration);
|
||||
MOSES_FNAME(PhraseDictionaryDynamicCacheBased);
|
||||
MOSES_FNAME(PhraseDictionaryCache);
|
||||
MOSES_FNAME(PhraseDictionaryFuzzyMatch);
|
||||
MOSES_FNAME(ProbingPT);
|
||||
MOSES_FNAME(PhraseDictionaryMemoryPerSentence);
|
||||
|
@ -793,6 +793,9 @@ ConvertWeightArgsPhraseModel(const string &oldWeightName)
|
||||
case 15: // DCacheBased:
|
||||
ptType = "PhraseDictionaryDynamicCacheBased";
|
||||
break;
|
||||
case 16: // CachePT:
|
||||
ptType = "PhraseDictionaryCache";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
582
moses/TranslationModel/PhraseDictionaryCache.cpp
Normal file
582
moses/TranslationModel/PhraseDictionaryCache.cpp
Normal file
@ -0,0 +1,582 @@
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
#include "util/exception.hh"
|
||||
|
||||
#include "moses/TranslationModel/PhraseDictionary.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryCache.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
#include "moses/InputFileStream.h"
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/TargetPhrase.h"
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
std::map< const std::string, PhraseDictionaryCache * > PhraseDictionaryCache::s_instance_map;
|
||||
PhraseDictionaryCache *PhraseDictionaryCache::s_instance = NULL;
|
||||
|
||||
//! contructor
|
||||
PhraseDictionaryCache::PhraseDictionaryCache(const std::string &line)
|
||||
: PhraseDictionary(line, true)
|
||||
{
|
||||
std::cerr << "Initializing PhraseDictionaryCache feature..." << std::endl;
|
||||
|
||||
//disabling internal cache (provided by PhraseDictionary) for translation options (third parameter set to 0)
|
||||
m_maxCacheSize = 0;
|
||||
|
||||
m_entries = 0;
|
||||
m_name = "default";
|
||||
m_constant = false;
|
||||
|
||||
ReadParameters();
|
||||
|
||||
UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 PhraseDictionaryCache feature named " + m_name + " is allowed");
|
||||
s_instance_map[m_name] = this;
|
||||
s_instance = this; //for back compatibility
|
||||
vector<float> weight = StaticData::Instance().GetWeights(this);
|
||||
m_numscorecomponent = weight.size();
|
||||
m_sentences=0;
|
||||
}
|
||||
|
||||
PhraseDictionaryCache::~PhraseDictionaryCache()
|
||||
{
|
||||
Clear();
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
VERBOSE(2, "PhraseDictionaryCache::SetParameter key:|" << key << "| value:|" << value << "|" << std::endl);
|
||||
|
||||
if (key == "cache-name") {
|
||||
m_name = Scan<std::string>(value);
|
||||
} else if (key == "input-factor") {
|
||||
m_inputFactorsVec = Tokenize<FactorType>(value,",");
|
||||
} else if (key == "output-factor") {
|
||||
m_outputFactorsVec = Tokenize<FactorType>(value,",");
|
||||
} else {
|
||||
PhraseDictionary::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::InitializeForInput(ttasksptr const& ttask)
|
||||
{
|
||||
long tID = ttask->GetSource()->GetTranslationId();
|
||||
TargetPhraseCollection::shared_ptr tpc;
|
||||
if (m_cacheTM.find(tID) == m_cacheTM.end()) return;
|
||||
for(cacheMap::const_iterator it=m_cacheTM.at(tID).begin(); it != m_cacheTM.at(tID).end(); it++) {
|
||||
std::cerr<<"Source : "<<it->first<<std::endl;
|
||||
tpc.reset(new TargetPhraseCollection(*(it->second).first));
|
||||
std::cerr<<"TPC size : " << tpc->GetSize() << std::endl;
|
||||
std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
|
||||
|
||||
while (it2 != tpc->end()) {
|
||||
((TargetPhrase*) *it2)->EvaluateInIsolation(it->first, GetFeaturesToApply());
|
||||
std::cerr<< "Target Phrase : "<<**it2 << std::endl;
|
||||
it2++;
|
||||
}
|
||||
}
|
||||
if (tpc) {
|
||||
tpc->NthElement(m_tableLimit); // sort the phrases for the decoder
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
||||
{
|
||||
InputPathList::const_iterator iter;
|
||||
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
|
||||
InputPath &inputPath = **iter;
|
||||
long tID = inputPath.ttask->GetSource()->GetTranslationId();
|
||||
if (m_cacheTM.find(tID) == m_cacheTM.end()) continue;
|
||||
TargetPhraseCollection::shared_ptr tpc;
|
||||
for(cacheMap::const_iterator it=m_cacheTM.at(tID).begin(); it != m_cacheTM.at(tID).end(); it++) {
|
||||
tpc.reset(new TargetPhraseCollection(*(it->second).first));
|
||||
inputPath.SetTargetPhrases(*this, tpc, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TargetPhraseCollection::shared_ptr PhraseDictionaryCache::GetTargetPhraseCollection(const Phrase &source, long tID) const
|
||||
{
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
|
||||
#endif
|
||||
TargetPhraseCollection::shared_ptr tpc;
|
||||
|
||||
if(m_cacheTM.find(tID) == m_cacheTM.end()) return tpc;
|
||||
|
||||
cacheMap::const_iterator it = m_cacheTM.at(tID).find(source);
|
||||
if(it != m_cacheTM.at(tID).end()) {
|
||||
tpc.reset(new TargetPhraseCollection(*(it->second).first));
|
||||
|
||||
std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
|
||||
|
||||
while (it2 != tpc->end()) {
|
||||
((TargetPhrase*) *it2)->EvaluateInIsolation(source, GetFeaturesToApply());
|
||||
it2++;
|
||||
}
|
||||
}
|
||||
if (tpc) {
|
||||
tpc->NthElement(m_tableLimit); // sort the phrases for the decoder
|
||||
}
|
||||
|
||||
return tpc;
|
||||
}
|
||||
|
||||
ChartRuleLookupManager* PhraseDictionaryCache::CreateRuleLookupManager(const ChartParser &parser, const ChartCellCollectionBase &cellCollection, std::size_t /*maxChartSpan*/)
|
||||
{
|
||||
UTIL_THROW(util::Exception, "Not implemented for Chart Decoder");
|
||||
}
|
||||
|
||||
// friend
|
||||
ostream& operator<<(ostream& out, const PhraseDictionaryCache& phraseDict)
|
||||
{
|
||||
return out;
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::ClearEntries(std::string &entries, long tID)
|
||||
{
|
||||
if (entries != "" && m_cacheTM.find(tID) != m_cacheTM.end()) {
|
||||
VERBOSE(3,"entries:|" << entries << "|" << std::endl);
|
||||
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
|
||||
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
|
||||
ClearEntries(elements, tID);
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::ClearEntries(std::vector<std::string> entries, long tID)
|
||||
{
|
||||
VERBOSE(3,"PhraseDictionaryCache::ClearEntries(std::vector<std::string> entries)" << std::endl);
|
||||
std::vector<std::string> pp;
|
||||
|
||||
std::vector<std::string>::iterator it;
|
||||
for(it = entries.begin(); it!=entries.end(); it++) {
|
||||
pp.clear();
|
||||
pp = TokenizeMultiCharSeparator((*it), "|||");
|
||||
VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl);
|
||||
VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl);
|
||||
|
||||
ClearEntries(pp[0], pp[1], tID);
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString, long tID)
|
||||
{
|
||||
VERBOSE(3,"PhraseDictionaryCache::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)" << std::endl);
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
Phrase sourcePhrase(0);
|
||||
Phrase targetPhrase(0);
|
||||
|
||||
//target
|
||||
targetPhrase.Clear();
|
||||
VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
|
||||
targetPhrase.CreateFromString(Output, m_outputFactorsVec,
|
||||
targetPhraseString, /*factorDelimiter,*/ NULL);
|
||||
VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl);
|
||||
|
||||
//TODO: Would be better to reuse source phrases, but ownership has to be
|
||||
//consistent across phrase table implementations
|
||||
sourcePhrase.Clear();
|
||||
VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl);
|
||||
sourcePhrase.CreateFromString(Input, m_inputFactorsVec,
|
||||
sourcePhraseString, /*factorDelimiter,*/ NULL);
|
||||
VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
|
||||
ClearEntries(sourcePhrase, targetPhrase, tID);
|
||||
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::ClearEntries(Phrase sp, Phrase tp, long tID)
|
||||
{
|
||||
VERBOSE(3,"PhraseDictionaryCache::ClearEntries(Phrase sp, Phrase tp)" << std::endl);
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
|
||||
#endif
|
||||
VERBOSE(3, "PhraseDictionaryCache deleting sp:|" << sp << "| tp:|" << tp << "|" << std::endl);
|
||||
|
||||
cacheMap::const_iterator it = m_cacheTM.at(tID).find(sp);
|
||||
VERBOSE(3,"sp:|" << sp << "|" << std::endl);
|
||||
if(it!=m_cacheTM.at(tID).end()) {
|
||||
VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl);
|
||||
// sp is found
|
||||
|
||||
TargetCollectionPair TgtCollPair = it->second;
|
||||
TargetPhraseCollection::shared_ptr tpc = TgtCollPair.first;
|
||||
Scores* sc = TgtCollPair.second;
|
||||
const Phrase* p_ptr = NULL;
|
||||
TargetPhrase* tp_ptr = NULL;
|
||||
bool found = false;
|
||||
size_t tp_pos=0;
|
||||
while (!found && tp_pos < tpc->GetSize()) {
|
||||
tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
|
||||
p_ptr = (const Phrase*) tp_ptr;
|
||||
if (tp == *p_ptr) {
|
||||
found = true;
|
||||
continue;
|
||||
}
|
||||
tp_pos++;
|
||||
}
|
||||
if (!found) {
|
||||
VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl);
|
||||
//do nothing
|
||||
} else {
|
||||
VERBOSE(3,"tp:|" << tp << "| FOUND" << std::endl);
|
||||
|
||||
tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection
|
||||
// sc->clear();
|
||||
// no need to delete scores here
|
||||
m_entries--;
|
||||
VERBOSE(3,"tpc size:|" << tpc->GetSize() << "|" << std::endl);
|
||||
VERBOSE(3,"sc size:|" << sc->size() << "|" << std::endl);
|
||||
VERBOSE(3,"tp:|" << tp << "| DELETED" << std::endl);
|
||||
}
|
||||
if (tpc->GetSize() == 0) {
|
||||
sc->clear();
|
||||
tpc.reset();
|
||||
delete sc;
|
||||
m_cacheTM.at(tID).erase(sp);
|
||||
}
|
||||
|
||||
} else {
|
||||
VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
|
||||
//do nothing
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::ClearSource(std::string &entries, long tID)
|
||||
{
|
||||
if (entries != "" && m_cacheTM.find(tID) != m_cacheTM.end()) {
|
||||
VERBOSE(3,"entries:|" << entries << "|" << std::endl);
|
||||
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
|
||||
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
|
||||
ClearEntries(elements, tID);
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::ClearSource(std::vector<std::string> entries, long tID)
|
||||
{
|
||||
VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
Phrase sourcePhrase(0);
|
||||
|
||||
std::vector<std::string>::iterator it;
|
||||
for(it = entries.begin(); it!=entries.end(); it++) {
|
||||
|
||||
sourcePhrase.Clear();
|
||||
VERBOSE(3, "sourcePhraseString:|" << (*it) << "|" << std::endl);
|
||||
sourcePhrase.CreateFromString(Input, m_inputFactorsVec,
|
||||
*it, /*factorDelimiter,*/ NULL);
|
||||
VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
|
||||
|
||||
ClearSource(sourcePhrase, tID);
|
||||
}
|
||||
|
||||
IFVERBOSE(2) Print();
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::ClearSource(Phrase sp, long tID)
|
||||
{
|
||||
VERBOSE(3,"void PhraseDictionaryCache::ClearSource(Phrase sp) sp:|" << sp << "|" << std::endl);
|
||||
cacheMap::const_iterator it = m_cacheTM.at(tID).find(sp);
|
||||
if (it != m_cacheTM.at(tID).end()) {
|
||||
VERBOSE(3,"found:|" << sp << "|" << std::endl);
|
||||
//sp is found
|
||||
|
||||
TargetCollectionPair TgtCollPair = it->second;
|
||||
TargetPhraseCollection::shared_ptr tpc = TgtCollPair.first;
|
||||
Scores* sc = TgtCollPair.second;
|
||||
|
||||
m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache
|
||||
|
||||
sc->clear();
|
||||
tpc.reset();
|
||||
delete sc;
|
||||
m_cacheTM.at(tID).erase(sp);
|
||||
} else {
|
||||
//do nothing
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::Insert(std::string &entries, long tID)
|
||||
{
|
||||
if (entries != "") {
|
||||
VERBOSE(3,"entries:|" << entries << "|" << " tID | " << tID << std::endl);
|
||||
std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
|
||||
VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
|
||||
Insert(elements, tID);
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::Insert(std::vector<std::string> entries, long tID)
|
||||
{
|
||||
VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
|
||||
Update(tID, entries);
|
||||
IFVERBOSE(3) Print();
|
||||
}
|
||||
|
||||
|
||||
void PhraseDictionaryCache::Update(long tID, std::vector<std::string> entries)
|
||||
{
|
||||
std::vector<std::string> pp;
|
||||
|
||||
std::vector<std::string>::iterator it;
|
||||
for(it = entries.begin(); it!=entries.end(); it++) {
|
||||
pp.clear();
|
||||
pp = TokenizeMultiCharSeparator((*it), "|||");
|
||||
VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl);
|
||||
VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl);
|
||||
|
||||
if (pp.size() > 3) {
|
||||
VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl);
|
||||
VERBOSE(3,"pp[3]:|" << pp[3] << "|" << std::endl);
|
||||
Update(tID,pp[0], pp[1], pp[2], pp[3]);
|
||||
} else if (pp.size() > 2){
|
||||
VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl);
|
||||
Update(tID,pp[0], pp[1], pp[2]);
|
||||
} else {
|
||||
Update(tID,pp[0], pp[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Scores PhraseDictionaryCache::Conv2VecFloats(std::string& s){
|
||||
std::vector<float> n;
|
||||
if (s.empty())
|
||||
return n;
|
||||
std::istringstream iss(s);
|
||||
std::copy(std::istream_iterator<float>(iss),
|
||||
std::istream_iterator<float>(),
|
||||
std::back_inserter(n));
|
||||
return n;
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::Update(long tID, std::string sourcePhraseString, std::string targetPhraseString, std::string scoreString, std::string waString)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
Phrase sourcePhrase(0);
|
||||
TargetPhrase targetPhrase(0);
|
||||
|
||||
char *err_ind_temp;
|
||||
Scores scores = Conv2VecFloats(scoreString);
|
||||
//target
|
||||
targetPhrase.Clear();
|
||||
// change here for factored based CBTM
|
||||
VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
|
||||
targetPhrase.CreateFromString(Output, m_outputFactorsVec,
|
||||
targetPhraseString, /*factorDelimiter,*/ NULL);
|
||||
VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl);
|
||||
|
||||
//TODO: Would be better to reuse source phrases, but ownership has to be
|
||||
//consistent across phrase table implementations
|
||||
sourcePhrase.Clear();
|
||||
VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl);
|
||||
sourcePhrase.CreateFromString(Input, m_inputFactorsVec, sourcePhraseString, /*factorDelimiter,*/ NULL);
|
||||
VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
|
||||
|
||||
if (!waString.empty()) VERBOSE(3, "waString:|" << waString << "|" << std::endl);
|
||||
|
||||
Update(tID, sourcePhrase, targetPhrase, scores, waString);
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::Update(long tID, Phrase sp, TargetPhrase tp, Scores scores, std::string waString)
|
||||
{
|
||||
VERBOSE(3,"PhraseDictionaryCache::Update(Phrase sp, TargetPhrase tp, Scores scores, std::string waString)" << std::endl);
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
|
||||
#endif
|
||||
VERBOSE(3, "PhraseDictionaryCache inserting sp:|" << sp << "| tp:|" << tp << "| word-alignment |" << waString << "|" << std::endl);
|
||||
|
||||
cacheMap::const_iterator it = m_cacheTM[tID].find(sp);
|
||||
VERBOSE(3,"sp:|" << sp << "|" << std::endl);
|
||||
if(it!=m_cacheTM.at(tID).end()) {
|
||||
VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl);
|
||||
// sp is found
|
||||
|
||||
TargetCollectionPair TgtCollPair = it->second;
|
||||
TargetPhraseCollection::shared_ptr tpc = TgtCollPair.first;
|
||||
Scores* sc = TgtCollPair.second;
|
||||
const Phrase* p_ptr = NULL;
|
||||
TargetPhrase* tp_ptr = NULL;
|
||||
bool found = false;
|
||||
size_t tp_pos=0;
|
||||
while (!found && tp_pos < tpc->GetSize()) {
|
||||
tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
|
||||
p_ptr = (const TargetPhrase*) tp_ptr;
|
||||
if ((Phrase) tp == *p_ptr) {
|
||||
found = true;
|
||||
continue;
|
||||
}
|
||||
tp_pos++;
|
||||
}
|
||||
if (!found) {
|
||||
VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl);
|
||||
std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
|
||||
Scores scoreVec;
|
||||
for (unsigned int i=0; i<scores.size(); i++){
|
||||
scoreVec.push_back(scores[i]);
|
||||
}
|
||||
if(scoreVec.size() != m_numScoreComponents){
|
||||
VERBOSE(1, "Scores does not match number of score components for phrase : "<< sp.ToString() <<" ||| " << tp.ToString() <<endl);
|
||||
VERBOSE(1, "I am ignoring this..." <<endl);
|
||||
// std::cin.ignore();
|
||||
}
|
||||
targetPhrase->GetScoreBreakdown().Assign(this, scoreVec);
|
||||
if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString);
|
||||
|
||||
tpc->Add(targetPhrase.release());
|
||||
|
||||
tp_pos = tpc->GetSize()-1;
|
||||
sc = &scores;
|
||||
m_entries++;
|
||||
VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| INSERTED" << std::endl);
|
||||
} else {
|
||||
Scores scoreVec;
|
||||
for (unsigned int i=0; i<scores.size(); i++){
|
||||
scoreVec.push_back(scores[i]);
|
||||
}
|
||||
if(scoreVec.size() != m_numScoreComponents){
|
||||
VERBOSE(1, "Scores does not match number of score components for phrase : "<< sp.ToString() <<" ||| " << tp.ToString() <<endl);
|
||||
VERBOSE(1, "I am ignoring this..." <<endl);
|
||||
// std::cin.ignore();
|
||||
}
|
||||
tp_ptr->GetScoreBreakdown().Assign(this, scoreVec);
|
||||
if (!waString.empty()) tp_ptr->SetAlignmentInfo(waString);
|
||||
VERBOSE(1,"sp:|" << sp << "tp:|" << tp << "| UPDATED" << std::endl);
|
||||
}
|
||||
} else {
|
||||
VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
|
||||
// p is not found
|
||||
// create target collection
|
||||
|
||||
TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
|
||||
Scores* sc = new Scores();
|
||||
m_cacheTM[tID].insert(make_pair(sp,std::make_pair(tpc,sc)));
|
||||
|
||||
//tp is not found
|
||||
std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
|
||||
// scoreVec is a composition of decay_score and the feature scores
|
||||
Scores scoreVec;
|
||||
for (unsigned int i=0; i<scores.size(); i++){
|
||||
scoreVec.push_back(scores[i]);
|
||||
}
|
||||
if(scoreVec.size() != m_numScoreComponents){
|
||||
VERBOSE(1, "Scores do not match number of score components for phrase : "<< sp <<" ||| " << tp <<endl);
|
||||
VERBOSE(1, "I am ignoring this..." <<endl);
|
||||
// std::cin.ignore();
|
||||
}
|
||||
targetPhrase->GetScoreBreakdown().Assign(this, scoreVec);
|
||||
if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString);
|
||||
|
||||
tpc->Add(targetPhrase.release());
|
||||
sc = &scores;
|
||||
m_entries++;
|
||||
VERBOSE(1,"sp:|" << sp << "| tp:|" << tp << "| INSERTED" << std::endl);
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::Execute(std::string command, long tID)
|
||||
{
|
||||
VERBOSE(2,"command:|" << command << "|" << std::endl);
|
||||
std::vector<std::string> commands = Tokenize(command, "||");
|
||||
Execute(commands, tID);
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::Execute(std::vector<std::string> commands, long tID)
|
||||
{
|
||||
for (size_t j=0; j<commands.size(); j++) {
|
||||
Execute_Single_Command(commands[j]);
|
||||
}
|
||||
IFVERBOSE(2) Print();
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::Execute_Single_Command(std::string command)
|
||||
{
|
||||
if (command == "clear") {
|
||||
VERBOSE(2,"PhraseDictionaryCache Execute command:|"<< command << "|. Cache cleared." << std::endl);
|
||||
Clear();
|
||||
} else {
|
||||
VERBOSE(2,"PhraseDictionaryCache Execute command:|"<< command << "| is unknown. Skipped." << std::endl);
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::Clear(){
|
||||
for(sentCacheMap::iterator it=m_cacheTM.begin(); it!=m_cacheTM.end(); it++){
|
||||
Clear(it->first);
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::Clear(long tID)
|
||||
{
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
|
||||
#endif
|
||||
cacheMap::iterator it;
|
||||
for(it = m_cacheTM.at(tID).begin(); it!=m_cacheTM.at(tID).end(); it++) {
|
||||
(((*it).second).second)->clear();
|
||||
delete ((*it).second).second;
|
||||
((*it).second).first.reset();
|
||||
}
|
||||
m_cacheTM.at(tID).clear();
|
||||
m_entries = 0;
|
||||
}
|
||||
|
||||
|
||||
void PhraseDictionaryCache::ExecuteDlt(std::map<std::string, std::string> dlt_meta, long tID)
|
||||
{
|
||||
if (dlt_meta.find("cbtm") != dlt_meta.end()) {
|
||||
Insert(dlt_meta["cbtm"], tID);
|
||||
}
|
||||
if (dlt_meta.find("cbtm-command") != dlt_meta.end()) {
|
||||
Execute(dlt_meta["cbtm-command"], tID);
|
||||
}
|
||||
if (dlt_meta.find("cbtm-clear-source") != dlt_meta.end()) {
|
||||
ClearSource(dlt_meta["cbtm-clear-source"], tID);
|
||||
}
|
||||
if (dlt_meta.find("cbtm-clear-entries") != dlt_meta.end()) {
|
||||
ClearEntries(dlt_meta["cbtm-clear-entries"], tID);
|
||||
}
|
||||
if (dlt_meta.find("cbtm-clear-all") != dlt_meta.end()) {
|
||||
Clear();
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryCache::Print() const
|
||||
{
|
||||
VERBOSE(2,"PhraseDictionaryCache::Print()" << std::endl);
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
|
||||
#endif
|
||||
for(sentCacheMap::const_iterator itr = m_cacheTM.begin(); itr!=m_cacheTM.end(); itr++) {
|
||||
cacheMap::const_iterator it;
|
||||
for(it = (itr->second).begin(); it!=(itr->second).end(); it++) {
|
||||
std::string source = (it->first).ToString();
|
||||
TargetPhraseCollection::shared_ptr tpc = (it->second).first;
|
||||
TargetPhraseCollection::iterator itr;
|
||||
for(itr = tpc->begin(); itr != tpc->end(); itr++) {
|
||||
std::string target = (*itr)->ToString();
|
||||
std::cout << source << " ||| " << target << std::endl;
|
||||
}
|
||||
source.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}// end namespace
|
184
moses/TranslationModel/PhraseDictionaryCache.h
Normal file
184
moses/TranslationModel/PhraseDictionaryCache.h
Normal file
@ -0,0 +1,184 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2011 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef moses_PhraseDictionaryCache_H
|
||||
#define moses_PhraseDictionaryCache_H
|
||||
|
||||
#include "moses/TypeDef.h"
|
||||
#include "moses/TranslationModel/PhraseDictionary.h"
|
||||
#include "moses/TranslationTask.h"
|
||||
|
||||
#include <boost/tuple/tuple.hpp>
|
||||
#include <boost/tuple/tuple_io.hpp>
|
||||
|
||||
#ifdef WITH_THREADS
|
||||
#include <boost/thread/shared_mutex.hpp>
|
||||
#include <boost/thread/locks.hpp>
|
||||
#endif
|
||||
|
||||
#define CBTM_SCORE_TYPE_UNDEFINED (-1)
|
||||
#define CBTM_SCORE_TYPE_HYPERBOLA 0
|
||||
#define CBTM_SCORE_TYPE_POWER 1
|
||||
#define CBTM_SCORE_TYPE_EXPONENTIAL 2
|
||||
#define CBTM_SCORE_TYPE_COSINE 3
|
||||
#define CBTM_SCORE_TYPE_HYPERBOLA_REWARD 10
|
||||
#define CBTM_SCORE_TYPE_POWER_REWARD 11
|
||||
#define CBTM_SCORE_TYPE_EXPONENTIAL_REWARD 12
|
||||
#define PI 3.14159265
|
||||
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class ChartParser;
|
||||
class ChartCellCollectionBase;
|
||||
class ChartRuleLookupManager;
|
||||
class TranslationTask;
|
||||
class PhraseDictionary;
|
||||
|
||||
/** Implementation of a Cache-based phrase table.
|
||||
*/
|
||||
class PhraseDictionaryCache : public PhraseDictionary
|
||||
{
|
||||
|
||||
typedef std::pair<TargetPhraseCollection::shared_ptr, Scores*> TargetCollectionPair;
|
||||
typedef boost::unordered_map<Phrase, TargetCollectionPair> cacheMap;
|
||||
typedef std::map<long, cacheMap> sentCacheMap;
|
||||
|
||||
// factored translation
|
||||
std::vector<FactorType> m_inputFactorsVec, m_outputFactorsVec;
|
||||
|
||||
// data structure for the cache
|
||||
sentCacheMap m_cacheTM;
|
||||
long m_sentences;
|
||||
unsigned int m_numscorecomponent;
|
||||
size_t m_score_type; //scoring type of the match
|
||||
size_t m_entries; //total number of entries in the cache
|
||||
float m_lower_score; //lower_bound_score for no match
|
||||
bool m_constant; //flag for setting a non-decaying cache
|
||||
std::string m_initfiles; // vector of files loaded in the initialization phase
|
||||
std::string m_name; // internal name to identify this instance of the Cache-based phrase table
|
||||
|
||||
#ifdef WITH_THREADS
|
||||
//multiple readers - single writer lock
|
||||
mutable boost::shared_mutex m_cacheLock;
|
||||
#endif
|
||||
|
||||
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryCache&);
|
||||
|
||||
public:
|
||||
PhraseDictionaryCache(const std::string &line);
|
||||
~PhraseDictionaryCache();
|
||||
|
||||
inline const std::string GetName() {
|
||||
return m_name;
|
||||
};
|
||||
inline void SetName(const std::string name) {
|
||||
m_name = name;
|
||||
}
|
||||
|
||||
static const PhraseDictionaryCache* Instance(const std::string& name) {
|
||||
if (s_instance_map.find(name) == s_instance_map.end()) {
|
||||
return NULL;
|
||||
}
|
||||
return s_instance_map[name];
|
||||
}
|
||||
|
||||
static PhraseDictionaryCache* InstanceNonConst(const std::string& name) {
|
||||
if (s_instance_map.find(name) == s_instance_map.end()) {
|
||||
return NULL;
|
||||
}
|
||||
return s_instance_map[name];
|
||||
}
|
||||
|
||||
|
||||
static const PhraseDictionaryCache& Instance() {
|
||||
return *s_instance;
|
||||
}
|
||||
|
||||
static PhraseDictionaryCache& InstanceNonConst() {
|
||||
return *s_instance;
|
||||
}
|
||||
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask,
|
||||
Phrase const& src) const{
|
||||
GetTargetPhraseCollection(src, ttask->GetSource()->GetTranslationId());
|
||||
}
|
||||
|
||||
|
||||
// for phrase-based model
|
||||
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
||||
|
||||
TargetPhraseCollection::shared_ptr
|
||||
GetTargetPhraseCollection(const Phrase &src, long tID) const;
|
||||
|
||||
// for phrase-based model
|
||||
// virtual void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
||||
|
||||
// for syntax/hiero model (CKY+ decoding)
|
||||
ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t);
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
void InitializeForInput(ttasksptr const& ttask);
|
||||
|
||||
void Print() const; // prints the cache
|
||||
void Clear(); // clears the cache
|
||||
void Clear(long tID); // clears cache of a sentence
|
||||
|
||||
void ClearEntries(std::string &entries, long tID);
|
||||
void ClearSource(std::string &entries, long tID);
|
||||
void Insert(std::string &entries, long tID);
|
||||
void Execute(std::string command, long tID);
|
||||
void ExecuteDlt(std::map<std::string, std::string> dlt_meta, long tID);
|
||||
|
||||
protected:
|
||||
|
||||
static PhraseDictionaryCache *s_instance;
|
||||
static std::map< const std::string, PhraseDictionaryCache * > s_instance_map;
|
||||
|
||||
Scores Conv2VecFloats(std::string&);
|
||||
void Insert(std::vector<std::string> entries, long tID);
|
||||
|
||||
void Update(long tID, std::vector<std::string> entries);
|
||||
void Update(long tID, std::string sourceString, std::string targetString, std::string ScoreString="", std::string waString="");
|
||||
void Update(long tID, Phrase p, TargetPhrase tp, Scores scores, std::string waString="");
|
||||
|
||||
void ClearEntries(std::vector<std::string> entries, long tID);
|
||||
void ClearEntries(std::string sourceString, std::string targetString, long tID);
|
||||
void ClearEntries(Phrase p, Phrase tp, long tID);
|
||||
|
||||
void ClearSource(std::vector<std::string> entries, long tID);
|
||||
void ClearSource(Phrase sp, long tID);
|
||||
|
||||
void Execute(std::vector<std::string> commands, long tID);
|
||||
void Execute_Single_Command(std::string command);
|
||||
|
||||
|
||||
void SetPreComputedScores(const unsigned int numScoreComponent);
|
||||
Scores GetPreComputedScores(const unsigned int age);
|
||||
|
||||
TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const;
|
||||
};
|
||||
|
||||
} // namespace Moses
|
||||
|
||||
#endif /* moses_PhraseDictionaryCache_H_ */
|
@ -17,6 +17,8 @@
|
||||
#include "moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h"
|
||||
#include "moses/Syntax/T2S/RuleMatcherSCFG.h"
|
||||
|
||||
#include "moses/TranslationModel/PhraseDictionaryCache.h"
|
||||
|
||||
#include "util/exception.hh"
|
||||
|
||||
using namespace std;
|
||||
@ -149,6 +151,13 @@ interpret_dlt()
|
||||
typedef std::map<std::string,std::string> dltmap_t;
|
||||
BOOST_FOREACH(dltmap_t const& M, snt.GetDltMeta()) {
|
||||
dltmap_t::const_iterator i = M.find("type");
|
||||
if (i->second == "cache") {
|
||||
map<string, string>::const_iterator k = M.find("id");
|
||||
string id = k == M.end() ? "default" : k->second;
|
||||
PhraseDictionaryCache* cache;
|
||||
cache = PhraseDictionaryCache::InstanceNonConst(id);
|
||||
if (cache) cache->ExecuteDlt(M, this->GetSource()->GetTranslationId());
|
||||
}
|
||||
if (i == M.end() || i->second != "adaptive-lm") continue;
|
||||
dltmap_t::const_iterator j = M.find("context-weights");
|
||||
if (j == M.end()) continue;
|
||||
|
Loading…
Reference in New Issue
Block a user