Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Hieu Hoang 2013-08-22 09:24:58 +01:00
commit 5accd5f2e5
7 changed files with 58 additions and 52 deletions

15
Jamroot
View File

@ -19,7 +19,8 @@
#--with-cmph=/path/to/cmph
#
#Thread-caching malloc (if present, used for multi-threaded builds by default)
#--without-tcmalloc
#--without-tcmalloc does not compile with tcmalloc even if present
#--full-tcmalloc links against the full version (useful for memory profiling)
#
#REGRESSION TESTING
#--with-regtest=/path/to/moses-reg-test-data
@ -76,8 +77,16 @@ boost 103600 ;
external-lib z ;
if ! [ option.get "without-tcmalloc" : : "yes" ] && [ test_library "tcmalloc_minimal" ] {
external-lib tcmalloc_minimal ;
requirements += <threading>multi:<library>tcmalloc_minimal ;
if [ option.get "full-tcmalloc" : : "yes" ] {
tcmalloc = "tcmalloc" ;
external-lib unwind ;
external-lib tcmalloc : : unwind ;
requirements += <library>tcmalloc <library>unwind <cflags>-fno-omit-frame-pointer <cxxflags>-fno-omit-frame-pointer ;
} else {
tcmalloc = "tcmalloc_minimal" ;
external-lib tcmalloc_minimal ;
requirements += <threading>multi:<library>$(tcmalloc) ;
}
} else {
echo "Tip: install tcmalloc for faster threading. See BUILD-INSTRUCTIONS.txt for more information." ;
}

View File

@ -184,8 +184,8 @@ rule boost ( min-version ) {
}
#Link normally to a library, but sometimes static isn't installed so fall back to dynamic.
rule external-lib ( name : search-path * ) {
lib $(name) : : [ auto-shared $(name) : "-L"$(search-path) ] <search>$(search-path) ;
rule external-lib ( name : search-path * : deps * ) {
lib $(name) : : [ auto-shared $(name) : "-L"$(search-path) ] <search>$(search-path) <use>$(deps) ;
}
#Write the current command line to previous.sh. This does not do shell escaping.

View File

@ -171,8 +171,6 @@ void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection* tpc)
PhraseCache &ref = m_sentenceCache;
#endif
ref.push_back(tpc);
ReduceCache();
}
void PhraseDictionaryCompact::AddEquivPhrase(const Phrase &source,
@ -197,6 +195,8 @@ void PhraseDictionaryCompact::CleanUpAfterSentenceProcessing(const InputType &so
PhraseCache temp;
temp.swap(ref);
ReduceCache();
}
}

View File

@ -43,19 +43,15 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollection(const
{
const TargetPhraseCollection *ret;
if (m_maxCacheSize) {
CacheColl &cache = GetCache();
size_t hash = hash_value(src);
std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >::iterator iter;
{
// scope of read lock
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
#endif
iter = m_cache.find(hash);
}
iter = cache.find(hash);
if (iter == m_cache.end()) {
if (iter == cache.end()) {
// not in cache, need to look up from phrase table
ret = GetTargetPhraseCollectionNonCache(src);
if (ret) {
@ -63,10 +59,7 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollection(const
}
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
#ifdef WITH_THREADS
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
m_cache[hash] = value;
cache[hash] = value;
} else {
// in cache. just use it
std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
@ -135,35 +128,45 @@ void PhraseDictionary::GetTargetPhraseCollectionBatch(const InputPathList &phras
void PhraseDictionary::ReduceCache() const
{
if (m_cache.size() <= m_maxCacheSize) return; // not full
clock_t t = clock();
CacheColl &cache = GetCache();
if (cache.size() <= m_maxCacheSize) return; // not full
// find cutoff for last used time
priority_queue< clock_t > lastUsedTimes;
std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iter;
iter = m_cache.begin();
while( iter != m_cache.end() ) {
iter = cache.begin();
while( iter != cache.end() ) {
lastUsedTimes.push( iter->second.second );
iter++;
}
for( size_t i=0; i < lastUsedTimes.size()-m_maxCacheSize/2; i++ )
lastUsedTimes.pop();
clock_t cutoffLastUsedTime = lastUsedTimes.top();
clock_t t = clock();
// remove all old entries
#ifdef WITH_THREADS
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
iter = m_cache.begin();
while( iter != m_cache.end() ) {
iter = cache.begin();
while( iter != cache.end() ) {
if (iter->second.second < cutoffLastUsedTime) {
std::map<size_t, std::pair<const TargetPhraseCollection*,clock_t> >::iterator iterRemove = iter++;
delete iterRemove->second.first;
m_cache.erase(iterRemove);
cache.erase(iterRemove);
} else iter++;
}
VERBOSE(2,"Reduced persistent translation option cache in " << ((clock()-t)/(float)CLOCKS_PER_SEC) << " seconds." << std::endl);
}
PhraseDictionary::CacheColl &PhraseDictionary::GetCache() const
{
CacheColl *cache;
cache = m_cache.get();
if (cache == NULL) {
cache = new CacheColl;
m_cache.reset(cache);
}
CHECK(cache);
return *cache;
}
} // namespace

View File

@ -32,7 +32,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#ifdef WITH_THREADS
#include <boost/thread/tss.hpp>
#include <boost/thread/shared_mutex.hpp>
#else
#include <boost/scoped_ptr.hpp>
#endif
#include "moses/Phrase.h"
@ -116,14 +117,21 @@ protected:
// cache
size_t m_maxCacheSize; // 0 = no caching
mutable std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> > m_cache;
typedef std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> > CacheColl;
#ifdef WITH_THREADS
//reader-writer lock
mutable boost::shared_mutex m_accessLock;
mutable boost::thread_specific_ptr<CacheColl> m_cache;
#else
mutable boost::scoped_ptr<CacheColl> m_cache;
#endif
virtual const TargetPhraseCollection *GetTargetPhraseCollectionNonCache(const Phrase& src) const;
void ReduceCache() const;
protected:
CacheColl &GetCache() const;
};
}

View File

@ -34,12 +34,6 @@ PhraseDictionaryTreeAdaptor(const std::string &line)
PhraseDictionaryTreeAdaptor::~PhraseDictionaryTreeAdaptor()
{
std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >::const_iterator iter;
for (iter = m_cache.begin(); iter != m_cache.end(); ++iter) {
const std::pair<const TargetPhraseCollection*, clock_t> &value = iter->second;
const TargetPhraseCollection *coll = value.first;
delete coll;
}
}
void PhraseDictionaryTreeAdaptor::Load()

View File

@ -143,20 +143,16 @@ void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath
const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const
{
const TargetPhraseCollection *ret;
if (m_maxCacheSize) {
CacheColl &cache = GetCache();
size_t hash = (size_t) ptNode->GetFilePos();
std::map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >::iterator iter;
{
// scope of read lock
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
#endif
iter = m_cache.find(hash);
}
iter = cache.find(hash);
if (iter == m_cache.end()) {
if (iter == cache.end()) {
// not in cache, need to look up from phrase table
ret = GetTargetPhraseCollectionNonCache(ptNode);
if (ret) {
@ -164,11 +160,7 @@ const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollection(
}
std::pair<const TargetPhraseCollection*, clock_t> value(ret, clock());
#ifdef WITH_THREADS
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
m_cache[hash] = value;
cache[hash] = value;
}
else {
// in cache. just use it