2008-06-11 14:52:57 +04:00
|
|
|
// vim:tabstop=2
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2006 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
2012-11-27 19:08:31 +04:00
|
|
|
#include "moses/TranslationModel/PhraseDictionary.h"
|
|
|
|
#include "moses/StaticData.h"
|
|
|
|
#include "moses/InputType.h"
|
|
|
|
#include "moses/TranslationOption.h"
|
|
|
|
#include "moses/UserMessage.h"
|
2013-07-07 01:42:52 +04:00
|
|
|
#include "moses/InputPath.h"
|
2013-07-03 19:57:23 +04:00
|
|
|
#include "util/exception.hh"
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2010-08-10 17:51:20 +04:00
|
|
|
using namespace std;
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
namespace Moses
|
|
|
|
{
|
2013-11-15 21:26:26 +04:00
|
|
|
std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl;
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2013-10-29 22:20:55 +04:00
|
|
|
PhraseDictionary::PhraseDictionary(const std::string &line)
|
2013-10-29 22:44:33 +04:00
|
|
|
:DecodeFeature(line)
|
2013-06-10 21:11:55 +04:00
|
|
|
,m_tableLimit(20) // default
|
2013-08-16 18:05:36 +04:00
|
|
|
,m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE)
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
2013-11-15 21:26:26 +04:00
|
|
|
s_staticColl.push_back(this);
|
2013-02-22 23:17:57 +04:00
|
|
|
}
|
2013-02-21 22:06:03 +04:00
|
|
|
|
2013-08-24 00:02:03 +04:00
|
|
|
const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
2013-08-15 23:50:22 +04:00
|
|
|
{
|
|
|
|
const TargetPhraseCollection *ret;
|
2013-08-16 18:05:36 +04:00
|
|
|
if (m_maxCacheSize) {
|
2013-08-21 18:58:17 +04:00
|
|
|
CacheColl &cache = GetCache();
|
|
|
|
|
2013-08-16 00:14:04 +04:00
|
|
|
size_t hash = hash_value(src);
|
|
|
|
|
2013-08-16 18:05:36 +04:00
|
|
|
std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >::iterator iter;
|
2013-08-16 00:14:04 +04:00
|
|
|
|
2013-08-21 18:58:17 +04:00
|
|
|
iter = cache.find(hash);
|
2013-08-16 00:14:04 +04:00
|
|
|
|
2013-08-21 18:58:17 +04:00
|
|
|
if (iter == cache.end()) {
|
2013-08-16 18:05:36 +04:00
|
|
|
// not in cache, need to look up from phrase table
|
2013-08-24 00:08:53 +04:00
|
|
|
ret = GetTargetPhraseCollectionNonCacheLEGACY(src);
|
2013-08-16 00:14:04 +04:00
|
|
|
if (ret) {
|
|
|
|
ret = new TargetPhraseCollection(*ret);
|
|
|
|
}
|
|
|
|
|
2013-08-16 18:05:36 +04:00
|
|
|
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
|
2013-08-21 18:58:17 +04:00
|
|
|
cache[hash] = value;
|
2013-08-16 00:14:04 +04:00
|
|
|
} else {
|
2013-08-16 18:05:36 +04:00
|
|
|
// in cache. just use it
|
2013-08-23 16:53:30 +04:00
|
|
|
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
|
|
|
|
value.second = clock();
|
2013-08-16 18:05:36 +04:00
|
|
|
|
2013-08-23 16:53:30 +04:00
|
|
|
ret = value.first;
|
2013-08-16 00:14:04 +04:00
|
|
|
}
|
|
|
|
} else {
|
2013-08-23 16:53:30 +04:00
|
|
|
// don't use cache. look up from phrase table
|
2013-08-24 00:08:53 +04:00
|
|
|
ret = GetTargetPhraseCollectionNonCacheLEGACY(src);
|
2013-08-15 23:50:22 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-09-25 02:56:23 +04:00
|
|
|
TargetPhraseCollection const *
|
|
|
|
PhraseDictionary::
|
|
|
|
GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const
|
2013-07-11 23:05:08 +04:00
|
|
|
{
|
|
|
|
UTIL_THROW(util::Exception, "Legacy method not implemented");
|
|
|
|
}
|
|
|
|
|
2013-02-22 23:17:57 +04:00
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
TargetPhraseCollectionWithSourcePhrase const*
|
2013-09-25 02:56:23 +04:00
|
|
|
PhraseDictionary::
|
2013-08-24 00:34:10 +04:00
|
|
|
GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const
|
2013-02-22 23:17:57 +04:00
|
|
|
{
|
2013-07-11 20:24:19 +04:00
|
|
|
UTIL_THROW(util::Exception, "Legacy method not implemented");
|
2013-02-01 19:50:57 +04:00
|
|
|
}
|
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
void
|
2013-09-25 02:56:23 +04:00
|
|
|
PhraseDictionary::
|
|
|
|
SetParameter(const std::string& key, const std::string& value)
|
2013-06-07 20:32:01 +04:00
|
|
|
{
|
2013-08-16 18:05:36 +04:00
|
|
|
if (key == "cache-size") {
|
2013-08-23 16:53:30 +04:00
|
|
|
m_maxCacheSize = Scan<size_t>(value);
|
2013-08-15 23:50:22 +04:00
|
|
|
} else if (key == "path") {
|
2013-06-10 21:11:55 +04:00
|
|
|
m_filePath = value;
|
|
|
|
} else if (key == "table-limit") {
|
|
|
|
m_tableLimit = Scan<size_t>(value);
|
|
|
|
} else {
|
2013-06-20 16:25:02 +04:00
|
|
|
DecodeFeature::SetParameter(key, value);
|
2013-06-07 20:32:01 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
void
|
2013-09-25 02:56:23 +04:00
|
|
|
PhraseDictionary::
|
|
|
|
SetFeaturesToApply()
|
2013-06-14 21:34:47 +04:00
|
|
|
{
|
|
|
|
// find out which feature function can be applied in this decode step
|
|
|
|
const std::vector<FeatureFunction*> &allFeatures = FeatureFunction::GetFeatureFunctions();
|
|
|
|
for (size_t i = 0; i < allFeatures.size(); ++i) {
|
|
|
|
FeatureFunction *feature = allFeatures[i];
|
|
|
|
if (feature->IsUseable(m_outputFactors)) {
|
|
|
|
m_featuresToApply.push_back(feature);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
void
|
2013-09-25 02:56:23 +04:00
|
|
|
PhraseDictionary::
|
2013-10-03 21:58:45 +04:00
|
|
|
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
2013-07-03 19:57:23 +04:00
|
|
|
{
|
2013-07-09 17:19:35 +04:00
|
|
|
InputPathList::const_iterator iter;
|
2013-10-03 21:58:45 +04:00
|
|
|
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
|
2013-07-09 17:19:35 +04:00
|
|
|
InputPath &node = **iter;
|
2013-07-03 19:57:23 +04:00
|
|
|
|
2013-07-03 22:07:36 +04:00
|
|
|
const Phrase &phrase = node.GetPhrase();
|
2013-08-24 00:02:03 +04:00
|
|
|
const TargetPhraseCollection *targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase);
|
2013-07-05 02:38:18 +04:00
|
|
|
node.SetTargetPhrases(*this, targetPhrases, NULL);
|
2013-07-03 19:57:23 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-16 18:05:36 +04:00
|
|
|
void PhraseDictionary::ReduceCache() const
|
|
|
|
{
|
2013-08-23 16:53:30 +04:00
|
|
|
CacheColl &cache = GetCache();
|
2013-08-21 18:58:17 +04:00
|
|
|
if (cache.size() <= m_maxCacheSize) return; // not full
|
2013-08-16 18:05:36 +04:00
|
|
|
|
|
|
|
// find cutoff for last used time
|
|
|
|
priority_queue< clock_t > lastUsedTimes;
|
|
|
|
std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter;
|
2013-08-21 18:58:17 +04:00
|
|
|
iter = cache.begin();
|
|
|
|
while( iter != cache.end() ) {
|
2013-08-16 18:05:36 +04:00
|
|
|
lastUsedTimes.push( iter->second.second );
|
|
|
|
iter++;
|
|
|
|
}
|
|
|
|
for( size_t i=0; i < lastUsedTimes.size()-m_maxCacheSize/2; i++ )
|
|
|
|
lastUsedTimes.pop();
|
|
|
|
clock_t cutoffLastUsedTime = lastUsedTimes.top();
|
2013-08-21 13:15:29 +04:00
|
|
|
clock_t t = clock();
|
2013-08-16 18:05:36 +04:00
|
|
|
|
|
|
|
// remove all old entries
|
2013-08-21 18:58:17 +04:00
|
|
|
iter = cache.begin();
|
|
|
|
while( iter != cache.end() ) {
|
2013-08-16 18:05:36 +04:00
|
|
|
if (iter->second.second < cutoffLastUsedTime) {
|
|
|
|
std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iterRemove = iter++;
|
|
|
|
delete iterRemove->second.first;
|
2013-08-21 18:58:17 +04:00
|
|
|
cache.erase(iterRemove);
|
2013-08-16 18:05:36 +04:00
|
|
|
} else iter++;
|
|
|
|
}
|
|
|
|
VERBOSE(2,"Reduced persistent translation option cache in " << ((clock()-t)/(float)CLOCKS_PER_SEC) << " seconds." << std::endl);
|
|
|
|
}
|
2013-08-21 18:58:17 +04:00
|
|
|
|
|
|
|
PhraseDictionary::CacheColl &PhraseDictionary::GetCache() const
|
|
|
|
{
|
|
|
|
CacheColl *cache;
|
|
|
|
cache = m_cache.get();
|
|
|
|
if (cache == NULL) {
|
2013-08-23 16:53:30 +04:00
|
|
|
cache = new CacheColl;
|
|
|
|
m_cache.reset(cache);
|
2013-08-21 18:58:17 +04:00
|
|
|
}
|
2013-11-19 22:52:15 +04:00
|
|
|
assert(cache);
|
2013-08-21 18:58:17 +04:00
|
|
|
return *cache;
|
2008-10-09 03:51:26 +04:00
|
|
|
}
|
|
|
|
|
2013-08-21 18:58:17 +04:00
|
|
|
} // namespace
|
|
|
|
|