mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
Merge branch 'hieu_opt_input' of ../hh
This commit is contained in:
commit
51ac2d6567
@ -131,7 +131,6 @@ Parameter::Parameter()
|
||||
AddParam("rule-limit", "a little like table limit. But for chart decoding rules. Default is DEFAULT_MAX_TRANS_OPT_SIZE");
|
||||
AddParam("source-label-overlap", "What happens if a span already has a label. 0=add more. 1=replace. 2=discard. Default is 0");
|
||||
AddParam("output-hypo-score", "Output the hypo score to stdout with the output string. For search error analysis. Default is false");
|
||||
AddParam("unknown-lhs", "file containing target lhs of unknown words. 1 per line: LHS prob");
|
||||
AddParam("show-weights", "print feature weights and exit");
|
||||
AddParam("start-translation-id", "Id of 1st input. Default = 0");
|
||||
AddParam("output-unknowns", "Output the unknown (OOV) words to the given file, one line per sentence");
|
||||
|
@ -107,7 +107,7 @@ struct CompareTargetPhrase {
|
||||
};
|
||||
|
||||
const TargetPhraseCollection*
|
||||
PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) const
|
||||
PhraseDictionaryCompact::GetTargetPhraseCollectionNonCache(const Phrase &sourcePhrase) const
|
||||
{
|
||||
|
||||
// There is no souch source phrase if source phrase is longer than longest
|
||||
@ -171,6 +171,8 @@ void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection* tpc)
|
||||
PhraseCache &ref = m_sentenceCache;
|
||||
#endif
|
||||
ref.push_back(tpc);
|
||||
|
||||
ReduceCache();
|
||||
}
|
||||
|
||||
void PhraseDictionaryCompact::AddEquivPhrase(const Phrase &source,
|
||||
|
@ -74,7 +74,7 @@ public:
|
||||
|
||||
void Load();
|
||||
|
||||
const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &source) const;
|
||||
const TargetPhraseCollection* GetTargetPhraseCollectionNonCache(const Phrase &source) const;
|
||||
TargetPhraseVectorPtr GetTargetPhraseCollectionRaw(const Phrase &source) const;
|
||||
|
||||
void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase);
|
||||
|
@ -35,17 +35,17 @@ namespace Moses
|
||||
PhraseDictionary::PhraseDictionary(const std::string &description, const std::string &line)
|
||||
:DecodeFeature(description, line)
|
||||
,m_tableLimit(20) // default
|
||||
,m_useCache(666)
|
||||
,m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE)
|
||||
{
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollection(const Phrase& src) const
|
||||
{
|
||||
const TargetPhraseCollection *ret;
|
||||
if (m_useCache) {
|
||||
if (m_maxCacheSize) {
|
||||
size_t hash = hash_value(src);
|
||||
|
||||
std::map<size_t, const TargetPhraseCollection*>::const_iterator iter;
|
||||
std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >::iterator iter;
|
||||
|
||||
{
|
||||
// scope of read lock
|
||||
@ -56,19 +56,26 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollection(const
|
||||
}
|
||||
|
||||
if (iter == m_cache.end()) {
|
||||
// not in cache, need to look up from phrase table
|
||||
ret = GetTargetPhraseCollectionNonCache(src);
|
||||
if (ret) {
|
||||
ret = new TargetPhraseCollection(*ret);
|
||||
}
|
||||
|
||||
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
|
||||
#ifdef WITH_THREADS
|
||||
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
||||
#endif
|
||||
m_cache[hash] = ret;
|
||||
m_cache[hash] = value;
|
||||
} else {
|
||||
ret = iter->second;
|
||||
// in cache. just use it
|
||||
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
|
||||
value.second = clock();
|
||||
|
||||
ret = value.first;
|
||||
}
|
||||
} else {
|
||||
// don't use cache. look up from phrase table
|
||||
ret = GetTargetPhraseCollectionNonCache(src);
|
||||
}
|
||||
|
||||
@ -91,8 +98,8 @@ GetTargetPhraseCollectionLegacy(InputType const& src,WordsRange const& range) co
|
||||
|
||||
void PhraseDictionary::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "use-cache") {
|
||||
m_useCache = Scan<int>(value);
|
||||
if (key == "cache-size") {
|
||||
m_maxCacheSize = Scan<size_t>(value);
|
||||
} else if (key == "path") {
|
||||
m_filePath = value;
|
||||
} else if (key == "table-limit") {
|
||||
@ -126,5 +133,37 @@ void PhraseDictionary::GetTargetPhraseCollectionBatch(const InputPathList &phras
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionary::ReduceCache() const
|
||||
{
|
||||
if (m_cache.size() <= m_maxCacheSize) return; // not full
|
||||
clock_t t = clock();
|
||||
|
||||
// find cutoff for last used time
|
||||
priority_queue< clock_t > lastUsedTimes;
|
||||
std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter;
|
||||
iter = m_cache.begin();
|
||||
while( iter != m_cache.end() ) {
|
||||
lastUsedTimes.push( iter->second.second );
|
||||
iter++;
|
||||
}
|
||||
for( size_t i=0; i < lastUsedTimes.size()-m_maxCacheSize/2; i++ )
|
||||
lastUsedTimes.pop();
|
||||
clock_t cutoffLastUsedTime = lastUsedTimes.top();
|
||||
|
||||
// remove all old entries
|
||||
#ifdef WITH_THREADS
|
||||
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
||||
#endif
|
||||
|
||||
iter = m_cache.begin();
|
||||
while( iter != m_cache.end() ) {
|
||||
if (iter->second.second < cutoffLastUsedTime) {
|
||||
std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iterRemove = iter++;
|
||||
delete iterRemove->second.first;
|
||||
m_cache.erase(iterRemove);
|
||||
} else iter++;
|
||||
}
|
||||
VERBOSE(2,"Reduced persistent translation option cache in " << ((clock()-t)/(float)CLOCKS_PER_SEC) << " seconds." << std::endl);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -115,15 +115,15 @@ protected:
|
||||
void SetFeaturesToApply();
|
||||
|
||||
// cache
|
||||
int m_useCache; // 666=not yet set, otherwise act like a bool
|
||||
mutable std::map<size_t, const TargetPhraseCollection*> m_cache;
|
||||
size_t m_maxCacheSize; // 0 = no caching
|
||||
mutable std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> > m_cache;
|
||||
#ifdef WITH_THREADS
|
||||
//reader-writer lock
|
||||
mutable boost::shared_mutex m_accessLock;
|
||||
#endif
|
||||
|
||||
virtual const TargetPhraseCollection *GetTargetPhraseCollectionNonCache(const Phrase& src) const;
|
||||
|
||||
void ReduceCache() const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -44,6 +44,10 @@ PhraseDictionaryMemory::PhraseDictionaryMemory(const std::string &line)
|
||||
: RuleTableTrie("PhraseDictionaryMemory", line)
|
||||
{
|
||||
ReadParameters();
|
||||
|
||||
// caching for memory pt is pointless
|
||||
m_maxCacheSize = 0;
|
||||
|
||||
}
|
||||
|
||||
TargetPhraseCollection &PhraseDictionaryMemory::GetOrCreateTargetPhraseCollection(
|
||||
|
@ -34,9 +34,10 @@ PhraseDictionaryTreeAdaptor(const std::string &line)
|
||||
|
||||
PhraseDictionaryTreeAdaptor::~PhraseDictionaryTreeAdaptor()
|
||||
{
|
||||
std::map<size_t, const TargetPhraseCollection*>::const_iterator iter;
|
||||
std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >::const_iterator iter;
|
||||
for (iter = m_cache.begin(); iter != m_cache.end(); ++iter) {
|
||||
const TargetPhraseCollection *coll = iter->second;
|
||||
const std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
|
||||
const TargetPhraseCollection *coll = value.first;
|
||||
delete coll;
|
||||
}
|
||||
}
|
||||
@ -50,6 +51,8 @@ void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
ReduceCache();
|
||||
|
||||
PDTAimp *obj = new PDTAimp(this);
|
||||
|
||||
vector<float> weight = staticData.GetWeights(this);
|
||||
|
@ -79,6 +79,8 @@ void PhraseDictionaryOnDisk::InitializeForInput(InputType const& source)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
ReduceCache();
|
||||
|
||||
OnDiskPt::OnDiskWrapper *obj = new OnDiskPt::OnDiskWrapper();
|
||||
if (!obj->BeginLoad(m_filePath))
|
||||
return;
|
||||
@ -91,13 +93,18 @@ void PhraseDictionaryOnDisk::InitializeForInput(InputType const& source)
|
||||
m_implementation.reset(obj);
|
||||
}
|
||||
|
||||
void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(const InputPathList &phraseDictionaryQueue) const
|
||||
void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
|
||||
{
|
||||
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
|
||||
|
||||
InputPathList::const_iterator iter;
|
||||
for (iter = phraseDictionaryQueue.begin(); iter != phraseDictionaryQueue.end(); ++iter) {
|
||||
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
|
||||
InputPath &inputPath = **iter;
|
||||
GetTargetPhraseCollectionBatch(inputPath);
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath) const
|
||||
{
|
||||
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
|
||||
const Phrase &phrase = inputPath.GetPhrase();
|
||||
const InputPath *prevInputPath = inputPath.GetPrevNode();
|
||||
|
||||
@ -122,17 +129,8 @@ void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(const InputPathList
|
||||
} else {
|
||||
const OnDiskPt::PhraseNode *ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper);
|
||||
if (ptNode) {
|
||||
vector<float> weightT = StaticData::Instance().GetWeights(this);
|
||||
OnDiskPt::Vocab &vocab = wrapper.GetVocab();
|
||||
|
||||
const OnDiskPt::TargetPhraseCollection *targetPhrasesOnDisk = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
|
||||
TargetPhraseCollection *targetPhrases
|
||||
= targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false);
|
||||
|
||||
inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
|
||||
|
||||
delete targetPhrasesOnDisk;
|
||||
|
||||
const TargetPhraseCollection *targetPhrases = GetTargetPhraseCollection(ptNode);
|
||||
inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
|
||||
} else {
|
||||
inputPath.SetTargetPhrases(*this, NULL, NULL);
|
||||
}
|
||||
@ -140,8 +138,67 @@ void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(const InputPathList
|
||||
delete lastWordOnDisk;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const
|
||||
{
|
||||
const TargetPhraseCollection *ret;
|
||||
if (m_maxCacheSize) {
|
||||
size_t hash = (size_t) ptNode->GetFilePos();
|
||||
|
||||
std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >::iterator iter;
|
||||
|
||||
{
|
||||
// scope of read lock
|
||||
#ifdef WITH_THREADS
|
||||
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
|
||||
#endif
|
||||
iter = m_cache.find(hash);
|
||||
}
|
||||
|
||||
if (iter == m_cache.end()) {
|
||||
// not in cache, need to look up from phrase table
|
||||
ret = GetTargetPhraseCollectionNonCache(ptNode);
|
||||
if (ret) {
|
||||
ret = new TargetPhraseCollection(*ret);
|
||||
}
|
||||
|
||||
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
|
||||
|
||||
#ifdef WITH_THREADS
|
||||
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
||||
#endif
|
||||
m_cache[hash] = value;
|
||||
}
|
||||
else {
|
||||
// in cache. just use it
|
||||
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
|
||||
value.second = clock();
|
||||
|
||||
ret = value.first;
|
||||
}
|
||||
} else {
|
||||
ret = GetTargetPhraseCollectionNonCache(ptNode);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const
|
||||
{
|
||||
OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
|
||||
|
||||
vector<float> weightT = StaticData::Instance().GetWeights(this);
|
||||
OnDiskPt::Vocab &vocab = wrapper.GetVocab();
|
||||
|
||||
const OnDiskPt::TargetPhraseCollection *targetPhrasesOnDisk = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
|
||||
TargetPhraseCollection *targetPhrases
|
||||
= targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false);
|
||||
|
||||
delete targetPhrasesOnDisk;
|
||||
|
||||
return targetPhrases;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
@ -60,6 +60,8 @@ protected:
|
||||
OnDiskPt::OnDiskWrapper &GetImplementation();
|
||||
const OnDiskPt::OnDiskWrapper &GetImplementation() const;
|
||||
|
||||
void GetTargetPhraseCollectionBatch(InputPath &inputPath) const;
|
||||
|
||||
public:
|
||||
PhraseDictionaryOnDisk(const std::string &line);
|
||||
~PhraseDictionaryOnDisk();
|
||||
@ -75,7 +77,10 @@ public:
|
||||
const ChartCellCollectionBase &);
|
||||
|
||||
virtual void InitializeForInput(InputType const& source);
|
||||
void GetTargetPhraseCollectionBatch(const InputPathList &phraseDictionaryQueue) const;
|
||||
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
||||
|
||||
const TargetPhraseCollection *GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const;
|
||||
const TargetPhraseCollection *GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const;
|
||||
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user