mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 12:52:29 +03:00
remove locking. Make wordIndex variable local
This commit is contained in:
parent
5fd9cbb529
commit
7abb3c878a
@ -49,35 +49,13 @@ namespace tmmt
|
||||
cerr << "loading completed" << endl;
|
||||
}
|
||||
|
||||
FuzzyMatchWrapper::WordIndex &FuzzyMatchWrapper::GetWordIndex(long translationId)
|
||||
{
|
||||
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
|
||||
std::map<long, WordIndex>::iterator iter = m_wordIndex.find(translationId);
|
||||
assert(iter != m_wordIndex.end());
|
||||
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
void FuzzyMatchWrapper::AddWordIndex(long translationId)
|
||||
{
|
||||
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
||||
WordIndex &ret = m_wordIndex[translationId];
|
||||
}
|
||||
|
||||
void FuzzyMatchWrapper::DeleteWordIndex(long translationId)
|
||||
{
|
||||
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
||||
size_t ret = m_wordIndex.erase(translationId);
|
||||
CHECK(ret == 1);
|
||||
}
|
||||
|
||||
string FuzzyMatchWrapper::Extract(long translationId, const string &dirNameStr)
|
||||
{
|
||||
const Moses::StaticData &staticData = Moses::StaticData::Instance();
|
||||
|
||||
AddWordIndex(translationId);
|
||||
|
||||
string fuzzyMatchFile = ExtractTM(translationId, dirNameStr);
|
||||
WordIndex wordIndex;
|
||||
|
||||
string fuzzyMatchFile = ExtractTM(wordIndex, translationId, dirNameStr);
|
||||
|
||||
// create extrac files
|
||||
create_xml(fuzzyMatchFile);
|
||||
@ -104,12 +82,11 @@ namespace tmmt
|
||||
+ " -phrase-translation-table " + fuzzyMatchFile + ".pt";
|
||||
system(cmd.c_str());
|
||||
|
||||
DeleteWordIndex(translationId);
|
||||
|
||||
return fuzzyMatchFile + ".pt.gz";
|
||||
}
|
||||
|
||||
string FuzzyMatchWrapper::ExtractTM(long translationId, const string &dirNameStr)
|
||||
string FuzzyMatchWrapper::ExtractTM(WordIndex &wordIndex, long translationId, const string &dirNameStr)
|
||||
{
|
||||
const std::vector< std::vector< WORD_ID > > &source = suffixArray->GetCorpus();
|
||||
|
||||
@ -277,7 +254,7 @@ namespace tmmt
|
||||
int pruned_match_count = 0;
|
||||
if (short_match_max_length( input_length ))
|
||||
{
|
||||
init_short_matches(translationId, input[sentenceInd] );
|
||||
init_short_matches(wordIndex, translationId, input[sentenceInd] );
|
||||
}
|
||||
vector< int > best_tm;
|
||||
typedef map< int, vector< Match > >::iterator I;
|
||||
@ -289,7 +266,7 @@ namespace tmmt
|
||||
int tmID = tm->first;
|
||||
int tm_length = suffixArray->GetSentenceLength(tmID);
|
||||
vector< Match > &match = tm->second;
|
||||
add_short_matches( translationId, match, source[tmID], input_length, best_cost );
|
||||
add_short_matches(wordIndex, translationId, match, source[tmID], input_length, best_cost );
|
||||
|
||||
//cerr << "match in sentence " << tmID << ": " << match.size() << " [" << tm_length << "]" << endl;
|
||||
|
||||
@ -838,13 +815,12 @@ int FuzzyMatchWrapper::short_match_max_length( int input_length )
|
||||
(to be used by the next function)
|
||||
(done here, because this has be done only once for an input sentence) */
|
||||
|
||||
void FuzzyMatchWrapper::init_short_matches(long translationId, const vector< WORD_ID > &input )
|
||||
void FuzzyMatchWrapper::init_short_matches(WordIndex &wordIndex, long translationId, const vector< WORD_ID > &input )
|
||||
{
|
||||
int max_length = short_match_max_length( input.size() );
|
||||
if (max_length == 0)
|
||||
return;
|
||||
|
||||
WordIndex &wordIndex = GetWordIndex(translationId);
|
||||
wordIndex.clear();
|
||||
|
||||
// store input words and their positions in hash map
|
||||
@ -861,14 +837,12 @@ void FuzzyMatchWrapper::init_short_matches(long translationId, const vector< WOR
|
||||
|
||||
/* add all short matches to list of matches for a sentence */
|
||||
|
||||
void FuzzyMatchWrapper::add_short_matches(long translationId, vector< Match > &match, const vector< WORD_ID > &tm, int input_length, int best_cost )
|
||||
void FuzzyMatchWrapper::add_short_matches(WordIndex &wordIndex, long translationId, vector< Match > &match, const vector< WORD_ID > &tm, int input_length, int best_cost )
|
||||
{
|
||||
int max_length = short_match_max_length( input_length );
|
||||
if (max_length == 0)
|
||||
return;
|
||||
|
||||
WordIndex &wordIndex = GetWordIndex(translationId);
|
||||
|
||||
int tm_length = tm.size();
|
||||
map< WORD_ID,vector< int > >::iterator input_word_hit;
|
||||
for(int t_pos=0; t_pos<tm.size(); t_pos++)
|
||||
|
@ -9,9 +9,6 @@
|
||||
#ifndef moses_FuzzyMatchWrapper_h
|
||||
#define moses_FuzzyMatchWrapper_h
|
||||
|
||||
#ifdef WITH_THREADS
|
||||
#include <boost/thread/shared_mutex.hpp>
|
||||
#endif
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include "SuffixArray.h"
|
||||
@ -46,12 +43,6 @@ protected:
|
||||
int multiple_max;
|
||||
|
||||
typedef std::map< WORD_ID,std::vector< int > > WordIndex;
|
||||
std::map<long, WordIndex> m_wordIndex;
|
||||
//WordIndex m_wordIndex;
|
||||
#ifdef WITH_THREADS
|
||||
//reader-writer lock
|
||||
mutable boost::shared_mutex m_accessLock;
|
||||
#endif
|
||||
|
||||
// global cache for word pairs
|
||||
std::map< std::pair< WORD_ID, WORD_ID >, unsigned int > lsed;
|
||||
@ -69,21 +60,18 @@ protected:
|
||||
unsigned int compute_length( const std::vector< tmmt::WORD_ID > &sentence );
|
||||
unsigned int letter_sed( WORD_ID aIdx, WORD_ID bIdx );
|
||||
unsigned int sed( const std::vector< WORD_ID > &a, const std::vector< WORD_ID > &b, std::string &best_path, bool use_letter_sed );
|
||||
void init_short_matches(long translationId, const std::vector< WORD_ID > &input );
|
||||
void init_short_matches(WordIndex &wordIndex, long translationId, const std::vector< WORD_ID > &input );
|
||||
int short_match_max_length( int input_length );
|
||||
void add_short_matches(long translationId, std::vector< Match > &match, const std::vector< WORD_ID > &tm, int input_length, int best_cost );
|
||||
void add_short_matches(WordIndex &wordIndex, long translationId, std::vector< Match > &match, const std::vector< WORD_ID > &tm, int input_length, int best_cost );
|
||||
std::vector< Match > prune_matches( const std::vector< Match > &match, int best_cost );
|
||||
int parse_matches( std::vector< Match > &match, int input_length, int tm_length, int &best_cost );
|
||||
|
||||
void create_extract(int sentenceInd, int cost, const std::vector< WORD_ID > &sourceSentence, const std::vector<SentenceAlignment> &targets, const std::string &inputStr, const std::string &path, std::ofstream &outputFile);
|
||||
|
||||
std::string ExtractTM(long translationId, const std::string &inputPath);
|
||||
std::string ExtractTM(WordIndex &wordIndex, long translationId, const std::string &inputPath);
|
||||
Vocabulary &GetVocabulary()
|
||||
{ return suffixArray->GetVocabulary(); }
|
||||
|
||||
WordIndex &GetWordIndex(long translationId);
|
||||
void AddWordIndex(long translationId);
|
||||
void DeleteWordIndex(long translationId);
|
||||
};
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user