mosesdecoder/moses/TranslationModel/PhraseDictionary.cpp

282 lines
7.6 KiB
C++
Raw Normal View History

// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <queue>
#include "moses/TranslationModel/PhraseDictionary.h"
#include "moses/StaticData.h"
#include "moses/InputType.h"
#include "moses/TranslationOption.h"
#include "moses/DecodeStep.h"
#include "moses/DecodeGraph.h"
#include "moses/InputPath.h"
#include "util/exception.hh"
using namespace std;
namespace Moses
{
std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl;
CacheColl::~CacheColl()
{
2015-01-14 14:07:42 +03:00
for (iterator iter = begin(); iter != end(); ++iter) {
std::pair<const TargetPhraseCollection*, clock_t> &key = iter->second;
const TargetPhraseCollection *tps = key.first;
delete tps;
}
}
PhraseDictionary::PhraseDictionary(const std::string &line)
:DecodeFeature(line)
,m_tableLimit(20) // default
2013-08-16 18:05:36 +04:00
,m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE)
{
2015-01-14 14:07:42 +03:00
m_id = s_staticColl.size();
s_staticColl.push_back(this);
2013-02-22 23:17:57 +04:00
}
2013-02-21 22:06:03 +04:00
bool
PhraseDictionary::
ProvidesPrefixCheck() const
{
return false;
}
const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
{
const TargetPhraseCollection *ret;
2013-08-16 18:05:36 +04:00
if (m_maxCacheSize) {
CacheColl &cache = GetCache();
2013-08-16 00:14:04 +04:00
size_t hash = hash_value(src);
CacheColl::iterator iter;
iter = cache.find(hash);
2013-08-16 00:14:04 +04:00
if (iter == cache.end()) {
2013-08-16 18:05:36 +04:00
// not in cache, need to look up from phrase table
ret = GetTargetPhraseCollectionNonCacheLEGACY(src);
2013-08-16 00:14:04 +04:00
if (ret) {
ret = new TargetPhraseCollection(*ret);
}
2013-08-16 18:05:36 +04:00
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
cache[hash] = value;
2013-08-16 00:14:04 +04:00
} else {
2013-08-16 18:05:36 +04:00
// in cache. just use it
2013-08-23 16:53:30 +04:00
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
value.second = clock();
2013-08-16 18:05:36 +04:00
2013-08-23 16:53:30 +04:00
ret = value.first;
2013-08-16 00:14:04 +04:00
}
} else {
2013-08-23 16:53:30 +04:00
// don't use cache. look up from phrase table
ret = GetTargetPhraseCollectionNonCacheLEGACY(src);
}
return ret;
}
2013-09-25 02:56:23 +04:00
TargetPhraseCollection const *
PhraseDictionary::
GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const
{
UTIL_THROW(util::Exception, "Legacy method not implemented");
}
2013-02-22 23:17:57 +04:00
2013-09-27 12:35:24 +04:00
TargetPhraseCollectionWithSourcePhrase const*
2013-09-25 02:56:23 +04:00
PhraseDictionary::
2013-08-24 00:34:10 +04:00
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const
2013-02-22 23:17:57 +04:00
{
UTIL_THROW(util::Exception, "Legacy method not implemented");
2013-02-01 19:50:57 +04:00
}
2013-09-27 12:35:24 +04:00
void
2013-09-25 02:56:23 +04:00
PhraseDictionary::
SetParameter(const std::string& key, const std::string& value)
{
2013-08-16 18:05:36 +04:00
if (key == "cache-size") {
2013-08-23 16:53:30 +04:00
m_maxCacheSize = Scan<size_t>(value);
} else if (key == "path") {
m_filePath = value;
} else if (key == "table-limit") {
m_tableLimit = Scan<size_t>(value);
} else {
DecodeFeature::SetParameter(key, value);
}
}
2013-09-27 12:35:24 +04:00
void
2013-09-25 02:56:23 +04:00
PhraseDictionary::
SetFeaturesToApply()
{
// find out which feature function can be applied in this decode step
const std::vector<FeatureFunction*> &allFeatures = FeatureFunction::GetFeatureFunctions();
for (size_t i = 0; i < allFeatures.size(); ++i) {
FeatureFunction *feature = allFeatures[i];
if (feature->IsUseable(m_outputFactors)) {
m_featuresToApply.push_back(feature);
}
}
}
2015-01-14 14:07:42 +03:00
// tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more
void
PhraseDictionary::
Release(TargetPhraseCollection const* tpc) const
{
// do nothing by default
return;
}
bool
PhraseDictionary::
PrefixExists(ttasksptr const& ttask, Phrase const& phrase) const
2015-01-14 14:07:42 +03:00
{
return true;
}
2013-09-27 12:35:24 +04:00
void
2013-09-25 02:56:23 +04:00
PhraseDictionary::
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
{
InputPathList::const_iterator iter;
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
InputPath &inputPath = **iter;
// backoff
if (!SatisfyBackoff(inputPath)) {
2015-01-14 14:07:42 +03:00
continue;
}
const Phrase &phrase = inputPath.GetPhrase();
const TargetPhraseCollection *targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase);
inputPath.SetTargetPhrases(*this, targetPhrases, NULL);
}
}
// persistent cache handling
// saving presistent cache to disk
//void PhraseDictionary::SaveCache() const
//{
// CacheColl &cache = GetCache();
// for( std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter,
// iter != cache.end(),
// iter++ ) {
2015-01-14 14:07:42 +03:00
//
// }
//}
// loading persistent cache from disk
//void PhraseDictionary::LoadCache() const
//{
// CacheColl &cache = GetCache();
// std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter;
// iter = cache.begin();
// while( iter != cache.end() ) {
// std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iterRemove = iter++;
// delete iterRemove->second.first;
// cache.erase(iterRemove);
// }
//}
// reduce presistent cache by half of maximum size
2013-08-16 18:05:36 +04:00
void PhraseDictionary::ReduceCache() const
{
Timer reduceCacheTime;
reduceCacheTime.start();
2013-08-23 16:53:30 +04:00
CacheColl &cache = GetCache();
if (cache.size() <= m_maxCacheSize) return; // not full
2013-08-16 18:05:36 +04:00
// find cutoff for last used time
priority_queue< clock_t > lastUsedTimes;
CacheColl::iterator iter;
iter = cache.begin();
while( iter != cache.end() ) {
2013-08-16 18:05:36 +04:00
lastUsedTimes.push( iter->second.second );
iter++;
}
for( size_t i=0; i < lastUsedTimes.size()-m_maxCacheSize/2; i++ )
lastUsedTimes.pop();
clock_t cutoffLastUsedTime = lastUsedTimes.top();
// remove all old entries
iter = cache.begin();
while( iter != cache.end() ) {
2013-08-16 18:05:36 +04:00
if (iter->second.second < cutoffLastUsedTime) {
CacheColl::iterator iterRemove = iter++;
2013-08-16 18:05:36 +04:00
delete iterRemove->second.first;
cache.erase(iterRemove);
2013-08-16 18:05:36 +04:00
} else iter++;
}
VERBOSE(2,"Reduced persistent translation option cache in " << reduceCacheTime << " seconds." << std::endl);
2013-08-16 18:05:36 +04:00
}
CacheColl &PhraseDictionary::GetCache() const
{
CacheColl *cache;
cache = m_cache.get();
if (cache == NULL) {
2013-08-23 16:53:30 +04:00
cache = new CacheColl;
m_cache.reset(cache);
}
assert(cache);
return *cache;
}
2014-05-12 18:40:18 +04:00
bool PhraseDictionary::SatisfyBackoff(const InputPath &inputPath) const
{
2014-05-12 18:40:18 +04:00
const Phrase &sourcePhrase = inputPath.GetPhrase();
assert(m_container);
const DecodeGraph &decodeGraph = GetDecodeGraph();
size_t backoff = decodeGraph.GetBackoff();
if (backoff == 0) {
2015-01-14 14:07:42 +03:00
// ie. don't backoff. Collect ALL translations
return true;
2014-05-12 18:40:18 +04:00
}
if (sourcePhrase.GetSize() > backoff) {
2015-01-14 14:07:42 +03:00
// source phrase too big
return false;
2014-05-12 18:40:18 +04:00
}
// lookup translation only if no other translations
InputPath::TargetPhrases::const_iterator iter;
for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) {
2015-01-14 14:07:42 +03:00
const std::pair<const TargetPhraseCollection*, const void*> &temp = iter->second;
const TargetPhraseCollection *tpCollPrev = temp.first;
2014-05-12 18:40:18 +04:00
2015-01-14 14:07:42 +03:00
if (tpCollPrev && tpCollPrev->GetSize()) {
// already have translation from another pt. Don't create translations
return false;
}
2014-05-12 18:40:18 +04:00
}
return true;
}
} // namespace