From 7dc6ad425561384c8e2d258da3a3486c0b4ba80b Mon Sep 17 00:00:00 2001 From: Nicola Bertoldi Date: Sat, 14 Sep 2013 10:13:24 +0200 Subject: [PATCH] beautify --- moses/FF/DynamicCacheBasedLanguageModel.cpp | 713 ++++++++++---------- moses/FF/DynamicCacheBasedLanguageModel.h | 18 +- moses/StaticData.h | 16 +- moses/TypeDef.h | 2 +- moses/Util.cpp | 80 +-- 5 files changed, 404 insertions(+), 425 deletions(-) diff --git a/moses/FF/DynamicCacheBasedLanguageModel.cpp b/moses/FF/DynamicCacheBasedLanguageModel.cpp index 4ebd63010..999bc202d 100644 --- a/moses/FF/DynamicCacheBasedLanguageModel.cpp +++ b/moses/FF/DynamicCacheBasedLanguageModel.cpp @@ -6,391 +6,370 @@ namespace Moses { - - DynamicCacheBasedLanguageModel::DynamicCacheBasedLanguageModel(const std::string &line) - : StatelessFeatureFunction("DynamicCacheBasedLanguageModel", line) - { - std::cerr << "Initializing DynamicCacheBasedLanguageModel feature.." << std::endl; - query_type = CBLM_QUERY_TYPE_ALLSUBSTRINGS; - score_type = CBLM_SCORE_TYPE_HYPERBOLA; - maxAge = 1000; +DynamicCacheBasedLanguageModel::DynamicCacheBasedLanguageModel(const std::string &line) + : StatelessFeatureFunction("DynamicCacheBasedLanguageModel", line) +{ + std::cerr << "Initializing DynamicCacheBasedLanguageModel feature.." << std::endl; - ReadParameters(); - } + query_type = CBLM_QUERY_TYPE_ALLSUBSTRINGS; + score_type = CBLM_SCORE_TYPE_HYPERBOLA; + maxAge = 1000; - DynamicCacheBasedLanguageModel::~DynamicCacheBasedLanguageModel(){}; - - void DynamicCacheBasedLanguageModel::SetPreComputedScores() - { + ReadParameters(); +} + +DynamicCacheBasedLanguageModel::~DynamicCacheBasedLanguageModel() {}; + +void DynamicCacheBasedLanguageModel::SetPreComputedScores() +{ #ifdef WITH_THREADS - boost::shared_lock lock(m_cacheLock); -#endif - precomputedScores.clear(); - for (size_t i=0; i lock(m_cacheLock); +#endif + precomputedScores.clear(); + for (size_t i=0; i(value); - } - else if (key == "cblm-score-type") { - score_type = Scan(value); - } - else if (key == "cblm-file") { - m_initfiles = Scan(value); - } else { - StatelessFeatureFunction::SetParameter(key, value); - } - } +void DynamicCacheBasedLanguageModel::SetParameter(const std::string& key, const std::string& value) +{ + std::cerr << "DynamicCacheBasedLanguageModel::SetParameter" << std::endl; + if (key == "cblm-query-type") { + query_type = Scan(value); + } else if (key == "cblm-score-type") { + score_type = Scan(value); + } else if (key == "cblm-file") { + m_initfiles = Scan(value); + } else { + StatelessFeatureFunction::SetParameter(key, value); + } +} - void DynamicCacheBasedLanguageModel::Evaluate(const Phrase &sp - , const TargetPhrase &tp - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedFutureScore) const - { - float score; - switch(query_type){ - case CBLM_QUERY_TYPE_WHOLESTRING: - score = Evaluate_Whole_String(tp); - break; - case CBLM_QUERY_TYPE_ALLSUBSTRINGS: - score = Evaluate_All_Substrings(tp); - break; - default: - CHECK(false); - } +void DynamicCacheBasedLanguageModel::Evaluate(const Phrase &sp + , const TargetPhrase &tp + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const +{ + float score; + switch(query_type) { + case CBLM_QUERY_TYPE_WHOLESTRING: + score = Evaluate_Whole_String(tp); + break; + case CBLM_QUERY_TYPE_ALLSUBSTRINGS: + score = Evaluate_All_Substrings(tp); + break; + default: + CHECK(false); + } - VERBOSE(2,"cblm::Evaluate: score:|" << score << "|" << std::endl); - scoreBreakdown.Assign(this, score); - } + VERBOSE(2,"cblm::Evaluate: score:|" << score << "|" << std::endl); + scoreBreakdown.Assign(this, score); +} - float DynamicCacheBasedLanguageModel::Evaluate_Whole_String(const TargetPhrase& tp) const - { - //consider all words in the TargetPhrase as one n-gram - // and compute the decaying_score for all words - // and return their sum +float DynamicCacheBasedLanguageModel::Evaluate_Whole_String(const TargetPhrase& tp) const +{ + //consider all words in the TargetPhrase as one n-gram + // and compute the decaying_score for all words + // and return their sum - decaying_cache_t::const_iterator it; - float score = 0.0; + decaying_cache_t::const_iterator it; + float score = 0.0; - std::string w = ""; - size_t endpos = tp.GetSize(); - for (size_t pos = 0 ; pos < endpos ; ++pos) { - w += tp.GetWord(pos).GetFactor(0)->GetString().as_string(); - if ((pos == 0) && (endpos > 1)){ - w += " "; - } - } - it = m_cache.find(w); + std::string w = ""; + size_t endpos = tp.GetSize(); + for (size_t pos = 0 ; pos < endpos ; ++pos) { + w += tp.GetWord(pos).GetFactor(0)->GetString().as_string(); + if ((pos == 0) && (endpos > 1)) { + w += " "; + } + } + it = m_cache.find(w); // VERBOSE(1,"cblm::Evaluate: cheching cache for w:|" << w << "|" << std::endl); - if (it != m_cache.end()) //found! - { - score += ((*it).second).second; - VERBOSE(3,"cblm::Evaluate_Whole_String: found w:|" << w << "| actual score:|" << ((*it).second).second << "| score:|" << -score << "|" << std::endl); - } + if (it != m_cache.end()) { //found! + score += ((*it).second).second; + VERBOSE(3,"cblm::Evaluate_Whole_String: found w:|" << w << "| actual score:|" << ((*it).second).second << "| score:|" << + score << "|" << std::endl); + } - VERBOSE(3,"cblm::Evaluate_Whole_String: returning score:|" << score << "|" << std::endl); - return score; - } + VERBOSE(3,"cblm::Evaluate_Whole_String: returning score:|" << score << "|" << std::endl); + return score; +} - float DynamicCacheBasedLanguageModel::Evaluate_All_Substrings(const TargetPhrase& tp) const - { - //loop over all n-grams in the TargetPhrase (no matter of n) - // and compute the decaying_score for all words - // and return their sum +float DynamicCacheBasedLanguageModel::Evaluate_All_Substrings(const TargetPhrase& tp) const +{ + //loop over all n-grams in the TargetPhrase (no matter of n) + // and compute the decaying_score for all words + // and return their sum - decaying_cache_t::const_iterator it; - float score = 0.0; - for (size_t startpos = 0 ; startpos < tp.GetSize() ; ++startpos) { - std::string w = ""; - for (size_t endpos = startpos; endpos < tp.GetSize() ; ++endpos) { - w += tp.GetWord(endpos).GetFactor(0)->GetString().as_string(); - it = m_cache.find(w); + decaying_cache_t::const_iterator it; + float score = 0.0; + for (size_t startpos = 0 ; startpos < tp.GetSize() ; ++startpos) { + std::string w = ""; + for (size_t endpos = startpos; endpos < tp.GetSize() ; ++endpos) { + w += tp.GetWord(endpos).GetFactor(0)->GetString().as_string(); + it = m_cache.find(w); // VERBOSE(1,"cblm::Evaluate_All_Substrings: cheching cache for w:|" << w << "|" << std::endl); - if (it != m_cache.end()) //found! - { - score += ((*it).second).second; - VERBOSE(3,"cblm::Evaluate_All_Substrings: found w:|" << w << "| actual score:|" << ((*it).second).second << "| score:|" << score << "|" << std::endl); - } + if (it != m_cache.end()) { //found! + score += ((*it).second).second; + VERBOSE(3,"cblm::Evaluate_All_Substrings: found w:|" << w << "| actual score:|" << ((*it).second).second << "| score:|" << score << "|" << std::endl); + } - if (endpos == startpos){ - w += " "; - } + if (endpos == startpos) { + w += " "; + } - } - } - VERBOSE(3,"cblm::Evaluate_All_Substrings: returning score:|" << score << "|" << std::endl); - return score; - } - - void DynamicCacheBasedLanguageModel::Print() const - { -#ifdef WITH_THREADS - boost::shared_lock read_lock(m_cacheLock); -#endif - decaying_cache_t::const_iterator it; - std::cout << "Content of the cache of Cache-Based Language Model" << std::endl; - for ( it=m_cache.begin() ; it != m_cache.end(); it++ ) - { - std::cout << "word:|" << (*it).first << "| age:|" << ((*it).second).first << "| score:|" << ((*it).second).second << "|" << std::endl; - } - } - - void DynamicCacheBasedLanguageModel::Decay() - { -#ifdef WITH_THREADS - boost::shared_lock lock(m_cacheLock); -#endif - decaying_cache_t::iterator it; - - int age; - float score; - for ( it=m_cache.begin() ; it != m_cache.end(); it++ ) - { - age=((*it).second).first + 1; - if (age > 1000) - { - m_cache.erase(it); - it--; - } - else - { - score = decaying_score(age); - decaying_cache_value_t p (age, score); - (*it).second = p; - } - } - } - - void DynamicCacheBasedLanguageModel::Update(std::vector words, int age) - { -#ifdef WITH_THREADS - boost::shared_lock lock(m_cacheLock); -#endif - for (size_t j=0; j e (words[j],p); - m_cache.erase(words[j]); //always erase the element (do nothing if the entry does not exist) - m_cache.insert(e); //insert the entry - } - } - - void DynamicCacheBasedLanguageModel::Insert(std::string &entries) - { - if (entries != "") - { - VERBOSE(1,"entries:|" << entries << "|" << std::endl); - std::vector elements = TokenizeMultiCharSeparator(entries, "||"); - VERBOSE(1,"elements.size() after:|" << elements.size() << "|" << std::endl); - Insert(elements); - } - } - - void DynamicCacheBasedLanguageModel::Insert(std::vector ngrams) - { - VERBOSE(1,"CacheBasedLanguageModel Insert ngrams.size():|" << ngrams.size() << "|" << std::endl); - Decay(); - Update(ngrams,1); - IFVERBOSE(2) Print(); - } - - void DynamicCacheBasedLanguageModel::Execute(std::string command) - { - VERBOSE(1,"CacheBasedLanguageModel::Execute(std::string command:|" << command << "|" << std::endl); - std::vector commands = Tokenize(command, "||"); - Execute(commands); - } - - void DynamicCacheBasedLanguageModel::Execute(std::vector commands) - { - for (size_t j=0; j lock(m_cacheLock); -#endif - m_cache.clear(); - } - - void DynamicCacheBasedLanguageModel::Load() - { - VERBOSE(2,"DynamicCacheBasedLanguageModel::Load()" << std::endl); - Load(m_initfiles); - } - - void DynamicCacheBasedLanguageModel::Load(const std::string file) - { - VERBOSE(2,"DynamicCacheBasedLanguageModel::Loadconst std::string file()" << std::endl); - std::vector files = Tokenize(m_initfiles, "||"); - Load(files); - } - - - void DynamicCacheBasedLanguageModel::Load(std::vector files) - { - for(size_t j = 0; j < files.size(); ++j) - { - Load_Single_File(files[j]); - } - } - - void DynamicCacheBasedLanguageModel::Load_Single_File(const std::string file) - { - //file format - //age || n-gram - //age || n-gram || n-gram || n-gram || ... - //.... - //each n-gram is a sequence of n words (no matter of n) - // - //there is no limit on the size of n - // - //entries can be repeated, but the last entry overwrites the previous - - - VERBOSE(2,"Loading data from the cache file " << file << std::endl); - InputFileStream cacheFile(file); - - std::string line; - int age; - std::vector words; - - while (getline(cacheFile, line)) { - std::vector vecStr = TokenizeMultiCharSeparator( line , "||" ); - if (vecStr.size() >= 2) { - age = Scan(vecStr[0]); - vecStr.erase(vecStr.begin()); - Update(vecStr,age); - } else { - TRACE_ERR("ERROR: The format of the loaded file is wrong: " << line << std::endl); - CHECK(false); - } - } - IFVERBOSE(2) Print(); - } - - void DynamicCacheBasedLanguageModel::SetQueryType(size_t type) { -#ifdef WITH_THREADS - boost::shared_lock read_lock(m_cacheLock); -#endif - - query_type = type; - if ( query_type != CBLM_QUERY_TYPE_WHOLESTRING - && query_type != CBLM_QUERY_TYPE_ALLSUBSTRINGS ) - { - VERBOSE(2, "This query type " << query_type << " is unknown. Instead used " << CBLM_QUERY_TYPE_ALLSUBSTRINGS << "." << std::endl); - query_type = CBLM_QUERY_TYPE_ALLSUBSTRINGS; - } - VERBOSE(2, "CacheBasedLanguageModel QueryType: " << query_type << std::endl); - - }; - - void DynamicCacheBasedLanguageModel::SetScoreType(size_t type) { -#ifdef WITH_THREADS - boost::shared_lock read_lock(m_cacheLock); -#endif - score_type = type; - if ( score_type != CBLM_SCORE_TYPE_HYPERBOLA - && score_type != CBLM_SCORE_TYPE_POWER - && score_type != CBLM_SCORE_TYPE_EXPONENTIAL - && score_type != CBLM_SCORE_TYPE_COSINE - && score_type != CBLM_SCORE_TYPE_HYPERBOLA_REWARD - && score_type != CBLM_SCORE_TYPE_POWER_REWARD - && score_type != CBLM_SCORE_TYPE_EXPONENTIAL_REWARD ) - { - VERBOSE(2, "This score type " << score_type << " is unknown. Instead used " << CBLM_SCORE_TYPE_HYPERBOLA << "." << std::endl); - score_type = CBLM_SCORE_TYPE_HYPERBOLA; - } - VERBOSE(2, "CacheBasedLanguageModel ScoreType: " << score_type << std::endl); - }; - - void DynamicCacheBasedLanguageModel::SetMaxAge(unsigned int age) { -#ifdef WITH_THREADS - boost::shared_lock read_lock(m_cacheLock); -#endif - maxAge = age; - VERBOSE(2, "CacheBasedLanguageModel MaxAge: " << maxAge << std::endl); - }; - - float DynamicCacheBasedLanguageModel::decaying_score(const int age) - { - float sc; - switch(score_type){ - case CBLM_SCORE_TYPE_HYPERBOLA: - sc = (float) 1.0/age - 1.0; - break; - case CBLM_SCORE_TYPE_POWER: - sc = (float) pow(age, -0.25) - 1.0; - break; - case CBLM_SCORE_TYPE_EXPONENTIAL: - sc = (age == 1) ? 0.0 : (float) exp( 1.0/age ) / exp(1.0) - 1.0; - break; - case CBLM_SCORE_TYPE_COSINE: - sc = (float) cos( (age-1) * (PI/2) / maxAge ) - 1.0; - break; - case CBLM_SCORE_TYPE_HYPERBOLA_REWARD: - sc = (float) 1.0/age; - break; - case CBLM_SCORE_TYPE_POWER_REWARD: - sc = (float) pow(age, -0.25); - break; - case CBLM_SCORE_TYPE_EXPONENTIAL_REWARD: - sc = (age == 1) ? 1.0 : (float) exp( 1.0/age ) / exp(1.0); - break; - default: - sc = -1.0; - } - return sc; - } + } + } + VERBOSE(3,"cblm::Evaluate_All_Substrings: returning score:|" << score << "|" << std::endl); + return score; +} + +void DynamicCacheBasedLanguageModel::Print() const +{ +#ifdef WITH_THREADS + boost::shared_lock read_lock(m_cacheLock); +#endif + decaying_cache_t::const_iterator it; + std::cout << "Content of the cache of Cache-Based Language Model" << std::endl; + for ( it=m_cache.begin() ; it != m_cache.end(); it++ ) { + std::cout << "word:|" << (*it).first << "| age:|" << ((*it).second).first << "| score:|" << ((*it).second).second << "|" << std::endl; + } +} + +void DynamicCacheBasedLanguageModel::Decay() +{ +#ifdef WITH_THREADS + boost::shared_lock lock(m_cacheLock); +#endif + decaying_cache_t::iterator it; + + int age; + float score; + for ( it=m_cache.begin() ; it != m_cache.end(); it++ ) { + age=((*it).second).first + 1; + if (age > 1000) { + m_cache.erase(it); + it--; + } else { + score = decaying_score(age); + decaying_cache_value_t p (age, score); + (*it).second = p; + } + } +} + +void DynamicCacheBasedLanguageModel::Update(std::vector words, int age) +{ +#ifdef WITH_THREADS + boost::shared_lock lock(m_cacheLock); +#endif + for (size_t j=0; j e (words[j],p); + m_cache.erase(words[j]); //always erase the element (do nothing if the entry does not exist) + m_cache.insert(e); //insert the entry + } +} + +void DynamicCacheBasedLanguageModel::Insert(std::string &entries) +{ + if (entries != "") { + VERBOSE(1,"entries:|" << entries << "|" << std::endl); + std::vector elements = TokenizeMultiCharSeparator(entries, "||"); + VERBOSE(1,"elements.size() after:|" << elements.size() << "|" << std::endl); + Insert(elements); + } +} + +void DynamicCacheBasedLanguageModel::Insert(std::vector ngrams) +{ + VERBOSE(1,"CacheBasedLanguageModel Insert ngrams.size():|" << ngrams.size() << "|" << std::endl); + Decay(); + Update(ngrams,1); + IFVERBOSE(2) Print(); +} + +void DynamicCacheBasedLanguageModel::Execute(std::string command) +{ + VERBOSE(1,"CacheBasedLanguageModel::Execute(std::string command:|" << command << "|" << std::endl); + std::vector commands = Tokenize(command, "||"); + Execute(commands); +} + +void DynamicCacheBasedLanguageModel::Execute(std::vector commands) +{ + for (size_t j=0; j lock(m_cacheLock); +#endif + m_cache.clear(); +} + +void DynamicCacheBasedLanguageModel::Load() +{ + VERBOSE(2,"DynamicCacheBasedLanguageModel::Load()" << std::endl); + Load(m_initfiles); +} + +void DynamicCacheBasedLanguageModel::Load(const std::string file) +{ + VERBOSE(2,"DynamicCacheBasedLanguageModel::Loadconst std::string file()" << std::endl); + std::vector files = Tokenize(m_initfiles, "||"); + Load(files); +} + + +void DynamicCacheBasedLanguageModel::Load(std::vector files) +{ + for(size_t j = 0; j < files.size(); ++j) { + Load_Single_File(files[j]); + } +} + +void DynamicCacheBasedLanguageModel::Load_Single_File(const std::string file) +{ + //file format + //age || n-gram + //age || n-gram || n-gram || n-gram || ... + //.... + //each n-gram is a sequence of n words (no matter of n) + // + //there is no limit on the size of n + // + //entries can be repeated, but the last entry overwrites the previous + + + VERBOSE(2,"Loading data from the cache file " << file << std::endl); + InputFileStream cacheFile(file); + + std::string line; + int age; + std::vector words; + + while (getline(cacheFile, line)) { + std::vector vecStr = TokenizeMultiCharSeparator( line , "||" ); + if (vecStr.size() >= 2) { + age = Scan(vecStr[0]); + vecStr.erase(vecStr.begin()); + Update(vecStr,age); + } else { + TRACE_ERR("ERROR: The format of the loaded file is wrong: " << line << std::endl); + CHECK(false); + } + } + IFVERBOSE(2) Print(); +} + +void DynamicCacheBasedLanguageModel::SetQueryType(size_t type) +{ +#ifdef WITH_THREADS + boost::shared_lock read_lock(m_cacheLock); +#endif + + query_type = type; + if ( query_type != CBLM_QUERY_TYPE_WHOLESTRING + && query_type != CBLM_QUERY_TYPE_ALLSUBSTRINGS ) { + VERBOSE(2, "This query type " << query_type << " is unknown. Instead used " << CBLM_QUERY_TYPE_ALLSUBSTRINGS << "." << std::endl); + query_type = CBLM_QUERY_TYPE_ALLSUBSTRINGS; + } + VERBOSE(2, "CacheBasedLanguageModel QueryType: " << query_type << std::endl); + +}; + +void DynamicCacheBasedLanguageModel::SetScoreType(size_t type) +{ +#ifdef WITH_THREADS + boost::shared_lock read_lock(m_cacheLock); +#endif + score_type = type; + if ( score_type != CBLM_SCORE_TYPE_HYPERBOLA + && score_type != CBLM_SCORE_TYPE_POWER + && score_type != CBLM_SCORE_TYPE_EXPONENTIAL + && score_type != CBLM_SCORE_TYPE_COSINE + && score_type != CBLM_SCORE_TYPE_HYPERBOLA_REWARD + && score_type != CBLM_SCORE_TYPE_POWER_REWARD + && score_type != CBLM_SCORE_TYPE_EXPONENTIAL_REWARD ) { + VERBOSE(2, "This score type " << score_type << " is unknown. Instead used " << CBLM_SCORE_TYPE_HYPERBOLA << "." << std::endl); + score_type = CBLM_SCORE_TYPE_HYPERBOLA; + } + VERBOSE(2, "CacheBasedLanguageModel ScoreType: " << score_type << std::endl); +}; + +void DynamicCacheBasedLanguageModel::SetMaxAge(unsigned int age) +{ +#ifdef WITH_THREADS + boost::shared_lock read_lock(m_cacheLock); +#endif + maxAge = age; + VERBOSE(2, "CacheBasedLanguageModel MaxAge: " << maxAge << std::endl); +}; + +float DynamicCacheBasedLanguageModel::decaying_score(const int age) +{ + float sc; + switch(score_type) { + case CBLM_SCORE_TYPE_HYPERBOLA: + sc = (float) 1.0/age - 1.0; + break; + case CBLM_SCORE_TYPE_POWER: + sc = (float) pow(age, -0.25) - 1.0; + break; + case CBLM_SCORE_TYPE_EXPONENTIAL: + sc = (age == 1) ? 0.0 : (float) exp( 1.0/age ) / exp(1.0) - 1.0; + break; + case CBLM_SCORE_TYPE_COSINE: + sc = (float) cos( (age-1) * (PI/2) / maxAge ) - 1.0; + break; + case CBLM_SCORE_TYPE_HYPERBOLA_REWARD: + sc = (float) 1.0/age; + break; + case CBLM_SCORE_TYPE_POWER_REWARD: + sc = (float) pow(age, -0.25); + break; + case CBLM_SCORE_TYPE_EXPONENTIAL_REWARD: + sc = (age == 1) ? 1.0 : (float) exp( 1.0/age ) / exp(1.0); + break; + default: + sc = -1.0; + } + return sc; +} } diff --git a/moses/FF/DynamicCacheBasedLanguageModel.h b/moses/FF/DynamicCacheBasedLanguageModel.h index 2768e4481..31ab8eba8 100644 --- a/moses/FF/DynamicCacheBasedLanguageModel.h +++ b/moses/FF/DynamicCacheBasedLanguageModel.h @@ -10,8 +10,8 @@ #include #endif -typedef std::pair decaying_cache_value_t; -typedef std::map decaying_cache_t; +typedef std::pair decaying_cache_value_t; +typedef std::map decaying_cache_t; #define CBLM_QUERY_TYPE_ALLSUBSTRINGS 0 #define CBLM_QUERY_TYPE_WHOLESTRING 1 @@ -57,10 +57,10 @@ class DynamicCacheBasedLanguageModel : public StatelessFeatureFunction void Decay(); void Update(std::vector words, int age); - + void Execute(std::vector commands); void Execute_Single_Command(std::string command); - + void Load(std::vector files); void Load_Single_File(const std::string file); @@ -91,9 +91,9 @@ public: void Insert(std::string &entries); void Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedFutureScore) const; + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const; }; @@ -116,7 +116,7 @@ class DynamicCacheBasedLanguageModel : public StatelessFeatureFunction void Decay(); void Update(std::vector words, int age); - + void Execute(std::vector commands); void Execute_Single_Command(std::string command); @@ -130,7 +130,7 @@ class DynamicCacheBasedLanguageModel : public StatelessFeatureFunction void Evaluate( const TargetPhrase&, ScoreComponentCollection* ) const; public: - + DynamicCacheBasedLanguageModel(const std::string &line); ~DynamicCacheBasedLanguageModel(); diff --git a/moses/StaticData.h b/moses/StaticData.h index a29aaf663..d3eaf80a7 100644 --- a/moses/StaticData.h +++ b/moses/StaticData.h @@ -480,15 +480,15 @@ public: return m_dynamicCBLM; } -/* - DynamicCacheBasedPhraseDictionary *GetDynamicCacheBasedPhraseDictionary() const { - return m_dynamicCBPD; - } + /* + DynamicCacheBasedPhraseDictionary *GetDynamicCacheBasedPhraseDictionary() const { + return m_dynamicCBPD; + } - const DynamicCacheBasedPhraseDictionary *GetDynamicCacheBasedPhraseDictionary() { // for mira - return m_dynamicCBPD; - } -*/ + const DynamicCacheBasedPhraseDictionary *GetDynamicCacheBasedPhraseDictionary() { // for mira + return m_dynamicCBPD; + } + */ const ScoreComponentCollection& GetAllWeights() const { return m_allWeights; diff --git a/moses/TypeDef.h b/moses/TypeDef.h index bba99e077..a1eb3084d 100644 --- a/moses/TypeDef.h +++ b/moses/TypeDef.h @@ -122,7 +122,7 @@ enum PhraseTableImplementation { ,Compact = 12 ,Interpolated = 13 ,DSuffixArray = 14 - ,DCacheBased = 32 + ,DCacheBased = 32 }; enum InputTypeEnum { diff --git a/moses/Util.cpp b/moses/Util.cpp index 9e21d2406..f94c05f54 100644 --- a/moses/Util.cpp +++ b/moses/Util.cpp @@ -112,8 +112,8 @@ std::vector< std::map > ProcessAndStripDLT(std::string std::vector< std::map > meta; std::string lline = ToLower(line); bool check_dlt = true; - - std::cerr << "GLOBAL START" << endl; + + std::cerr << "GLOBAL START" << endl; while (check_dlt) { size_t start = lline.find(" > ProcessAndStripDLT(std::string line.erase(start,close-start+2); lline.erase(start,close-start+2); - if (dlt != ""){ + if (dlt != "") { - std::map tmp_meta; - for (size_t i = 1; i < dlt.size(); i++) { - if (dlt[i] == '=') { - std::string label = dlt.substr(0, i); - std::string val = dlt.substr(i+1); - std::cerr << "label:|" << label << "|" << endl; - std::cerr << "val:|" << val << "|" << endl; - if (val[0] == '"') { - val = val.substr(1); - // it admits any double quotation mark in the value of the attribute - // it assumes that just one attribute is present in the tag, - // it assumes that the value starts and ends with double quotation mark - size_t close = val.rfind('"'); - if (close == std::string::npos) { - TRACE_ERR("SGML parse error: missing \"\n"); - dlt = ""; - i = 0; + std::map tmp_meta; + for (size_t i = 1; i < dlt.size(); i++) { + if (dlt[i] == '=') { + std::string label = dlt.substr(0, i); + std::string val = dlt.substr(i+1); + std::cerr << "label:|" << label << "|" << endl; + std::cerr << "val:|" << val << "|" << endl; + if (val[0] == '"') { + val = val.substr(1); + // it admits any double quotation mark in the value of the attribute + // it assumes that just one attribute is present in the tag, + // it assumes that the value starts and ends with double quotation mark + size_t close = val.rfind('"'); + if (close == std::string::npos) { + TRACE_ERR("SGML parse error: missing \"\n"); + dlt = ""; + i = 0; + } else { + dlt = val.substr(close+1); + val = val.substr(0, close); + i = 0; + } } else { - dlt = val.substr(close+1); - val = val.substr(0, close); - i = 0; - } - } else { - size_t close = val.find(' '); - if (close == std::string::npos) { - dlt = ""; - i = 0; - } else { - dlt = val.substr(close+1); - val = val.substr(0, close); + size_t close = val.find(' '); + if (close == std::string::npos) { + dlt = ""; + i = 0; + } else { + dlt = val.substr(close+1); + val = val.substr(0, close); + } } + label = Trim(label); + dlt = Trim(dlt); + + tmp_meta[label] = val; + std::cerr << "tmp_meta:|" << tmp_meta[label] << "|" << endl; } - label = Trim(label); - dlt = Trim(dlt); - - tmp_meta[label] = val; - std::cerr << "tmp_meta:|" << tmp_meta[label] << "|" << endl; } - } - meta.push_back(tmp_meta); - } + meta.push_back(tmp_meta); + } } std::cerr << "GLOBAL END" << endl; return meta;