diff --git a/moses/ChartCellLabelSet.h b/moses/ChartCellLabelSet.h index 2b497b957..2c1e8b50f 100644 --- a/moses/ChartCellLabelSet.h +++ b/moses/ChartCellLabelSet.h @@ -45,9 +45,9 @@ public: typedef MapType::iterator iterator; ChartCellLabelSet(const WordsRange &coverage) - : m_coverage(coverage) - , m_map(FactorCollection::Instance().GetNumNonTerminals(), NULL) - , m_size(0) { } + : m_coverage(coverage) + , m_map(FactorCollection::Instance().GetNumNonTerminals(), NULL) + , m_size(0) { } ~ChartCellLabelSet() { RemoveAllInColl(m_map); @@ -82,8 +82,7 @@ public: if (ChartCellExists(idx)) { ChartCellLabel::Stack & s = m_map[idx]->MutableStack(); s.cube = stack; - } - else { + } else { ChartCellLabel::Stack s; s.cube = stack; m_size++; @@ -97,8 +96,7 @@ public: if (m_map.at(idx) != NULL) { return true; } - } - catch (const std::out_of_range& oor) { + } catch (const std::out_of_range& oor) { m_map.resize(FactorCollection::Instance().GetNumNonTerminals(), NULL); } return false; @@ -116,8 +114,7 @@ public: size_t idx = w[0]->GetId(); try { return m_map.at(idx); - } - catch (const std::out_of_range& oor) { + } catch (const std::out_of_range& oor) { return NULL; } } diff --git a/moses/ChartKBestExtractor.cpp b/moses/ChartKBestExtractor.cpp index 3a16198fc..bcbd07c03 100644 --- a/moses/ChartKBestExtractor.cpp +++ b/moses/ChartKBestExtractor.cpp @@ -32,8 +32,8 @@ namespace Moses // Extract the k-best list from the search graph. void ChartKBestExtractor::Extract( - const std::vector &topLevelHypos, std::size_t k, - KBestVec &kBestList) + const std::vector &topLevelHypos, std::size_t k, + KBestVec &kBestList) { kBestList.clear(); if (topLevelHypos.empty()) { @@ -45,7 +45,7 @@ void ChartKBestExtractor::Extract( std::vector::const_iterator p = topLevelHypos.begin(); const ChartHypothesis &bestTopLevelHypo = **p; boost::scoped_ptr supremeHypo( - new ChartHypothesis(bestTopLevelHypo, *this)); + new ChartHypothesis(bestTopLevelHypo, *this)); // Do the same for each alternative top-level hypothesis, but add the new // ChartHypothesis objects as arcs from supremeHypo, as if they had been @@ -68,8 +68,8 @@ void ChartKBestExtractor::Extract( // each derivation. kBestList.reserve(targetVertex->kBestList.size()); for (std::vector >::const_iterator - q = targetVertex->kBestList.begin(); - q != targetVertex->kBestList.end(); ++q) { + q = targetVertex->kBestList.begin(); + q != targetVertex->kBestList.end(); ++q) { const boost::shared_ptr d(*q); assert(d); assert(d->subderivations.size() == 1); @@ -124,7 +124,7 @@ Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d) // Create an unweighted hyperarc corresponding to the given ChartHypothesis. ChartKBestExtractor::UnweightedHyperarc ChartKBestExtractor::CreateEdge( - const ChartHypothesis &h) + const ChartHypothesis &h) { UnweightedHyperarc edge; edge.head = FindOrCreateVertex(h); diff --git a/moses/ChartKBestExtractor.h b/moses/ChartKBestExtractor.h index 05b016d50..416d425b2 100644 --- a/moses/ChartKBestExtractor.h +++ b/moses/ChartKBestExtractor.h @@ -70,8 +70,8 @@ public: struct Vertex { typedef std::priority_queue, - std::vector >, - DerivationOrderer> DerivationQueue; + std::vector >, + DerivationOrderer> DerivationQueue; Vertex(const ChartHypothesis &h) : hypothesis(h), visited(false) {} @@ -92,7 +92,7 @@ public: private: typedef boost::unordered_map > VertexMap; + boost::shared_ptr > VertexMap; struct DerivationHasher { std::size_t operator()(const boost::shared_ptr &d) const { @@ -114,7 +114,7 @@ private: }; typedef boost::unordered_set, DerivationHasher, - DerivationEqualityPred> DerivationSet; + DerivationEqualityPred> DerivationSet; UnweightedHyperarc CreateEdge(const ChartHypothesis &); boost::shared_ptr FindOrCreateVertex(const ChartHypothesis &); diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp index 623968dfc..2ba2c44aa 100644 --- a/moses/ChartManager.cpp +++ b/moses/ChartManager.cpp @@ -269,9 +269,9 @@ void ChartManager::CalcNBest(size_t count, ChartTrellisPathList &ret,bool onlyDi * \param onlyDistinct whether to check for distinct output sentence or not (default - don't check, just return top n-paths) */ void ChartManager::CalcNBest( - std::size_t n, - std::vector > &nBestList, - bool onlyDistinct) const + std::size_t n, + std::vector > &nBestList, + bool onlyDistinct) const { nBestList.clear(); if (n == 0 || m_source.GetSize() == 0) { @@ -282,7 +282,7 @@ void ChartManager::CalcNBest( WordsRange range(0, m_source.GetSize()-1); const ChartCell &lastCell = m_hypoStackColl.Get(range); boost::scoped_ptr > topLevelHypos( - lastCell.GetAllSortedHypotheses()); + lastCell.GetAllSortedHypotheses()); if (!topLevelHypos) { return; } diff --git a/moses/ChartManager.h b/moses/ChartManager.h index 27914e207..926f23903 100644 --- a/moses/ChartManager.h +++ b/moses/ChartManager.h @@ -108,7 +108,9 @@ public: return m_hypothesisId++; } - const ChartParser &GetParser() const { return m_parser; } + const ChartParser &GetParser() const { + return m_parser; + } }; } diff --git a/moses/ChartParser.cpp b/moses/ChartParser.cpp index 3c95b074f..36bc9476c 100644 --- a/moses/ChartParser.cpp +++ b/moses/ChartParser.cpp @@ -183,7 +183,7 @@ void ChartParser::Create(const WordsRange &wordsRange, ChartParserCallback &to) size_t maxSpan = decodeGraph.GetMaxChartSpan(); size_t last = m_source.GetSize()-1; if (maxSpan != 0) { - last = min(last, wordsRange.GetStartPos()+maxSpan); + last = min(last, wordsRange.GetStartPos()+maxSpan); } if (maxSpan == 0 || wordsRange.GetNumWordsCovered() <= maxSpan) { ruleLookupManager.GetChartRuleCollection(wordsRange, last, to); diff --git a/moses/ChartParser.h b/moses/ChartParser.h index acd30179d..b9e82d2e7 100644 --- a/moses/ChartParser.h +++ b/moses/ChartParser.h @@ -48,7 +48,9 @@ public: void Process(const Word &sourceWord, const WordsRange &range, ChartParserCallback &to); - const std::vector &GetUnknownSources() const { return m_unksrcs; } + const std::vector &GetUnknownSources() const { + return m_unksrcs; + } private: std::vector m_unksrcs; @@ -69,7 +71,9 @@ public: size_t GetSize() const; const InputPath &GetInputPath(size_t startPos, size_t endPos) const; const InputPath &GetInputPath(WordsRange &range) const; - const std::vector &GetUnknownSources() const { return m_unknown.GetUnknownSources(); } + const std::vector &GetUnknownSources() const { + return m_unknown.GetUnknownSources(); + } private: ChartParserUnknown m_unknown; diff --git a/moses/ChartTranslationOptionList.cpp b/moses/ChartTranslationOptionList.cpp index 586ecb40d..32ee8b18d 100644 --- a/moses/ChartTranslationOptionList.cpp +++ b/moses/ChartTranslationOptionList.cpp @@ -161,11 +161,11 @@ void ChartTranslationOptionList::ApplyThreshold() float ChartTranslationOptionList::GetBestScore(const ChartCellLabel *chartCell) const { - const HypoList *stack = chartCell->GetStack().cube; - assert(stack); - assert(!stack->empty()); - const ChartHypothesis &bestHypo = **(stack->begin()); - return bestHypo.GetTotalScore(); + const HypoList *stack = chartCell->GetStack().cube; + assert(stack); + assert(!stack->empty()); + const ChartHypothesis &bestHypo = **(stack->begin()); + return bestHypo.GetTotalScore(); } void ChartTranslationOptionList::Evaluate(const InputType &input, const InputPath &inputPath) diff --git a/moses/ConfusionNet.cpp b/moses/ConfusionNet.cpp index 5861ee5f1..d18d78ad6 100644 --- a/moses/ConfusionNet.cpp +++ b/moses/ConfusionNet.cpp @@ -14,299 +14,299 @@ namespace Moses { - struct CNStats { - size_t created,destr,read,colls,words; - - CNStats() : created(0),destr(0),read(0),colls(0),words(0) {} - ~CNStats() { - print(std::cerr); - } +struct CNStats { + size_t created,destr,read,colls,words; - void createOne() { - ++created; - } - void destroyOne() { - ++destr; - } - - void collect(const ConfusionNet& cn) { - ++read; - colls+=cn.GetSize(); - for(size_t i=0; i0) { - out<<"confusion net statistics:\n" - " created:\t"< temp = std::make_pair(s.GetWord(i), scorePair); - data[i].push_back(temp); + void print(std::ostream& out) const { + if(created>0) { + out<<"confusion net statistics:\n" + " created:\t"<& factorOrder, int format) - { - VERBOSE(2, "read confusion net with format "<& factorOrder) - { - int rv=ReadF(in,factorOrder,0); - if(rv) stats.collect(*this); - return rv; +size_t +ConfusionNet:: +GetColumnIncrement(size_t i, size_t j) const +{ + (void) i; + (void) j; + return 1; +} + +ConfusionNet:: +ConfusionNet() + : InputType() +{ + stats.createOne(); + + const StaticData& staticData = StaticData::Instance(); + if (staticData.IsChart()) { + m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal()); } + UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified"); +} + +ConfusionNet:: +~ConfusionNet() +{ + stats.destroyOne(); +} + +ConfusionNet:: +ConfusionNet(Sentence const& s) +{ + data.resize(s.GetSize()); + for(size_t i=0; i temp = std::make_pair(s.GetWord(i), scorePair); + data[i].push_back(temp); + } +} + +bool +ConfusionNet:: +ReadF(std::istream& in, const std::vector& factorOrder, int format) +{ + VERBOSE(2, "read confusion net with format "<& factorOrder) +{ + int rv=ReadF(in,factorOrder,0); + if(rv) stats.collect(*this); + return rv; +} #if 0 - // Deprecated due to code duplication; - // use Word::CreateFromString() instead - void - ConfusionNet:: - String2Word(const std::string& s,Word& w, - const std::vector& factorOrder) - { - std::vector factorStrVector = Tokenize(s, "|"); - for(size_t i=0; i& factorOrder) +{ + std::vector factorStrVector = Tokenize(s, "|"); + for(size_t i=0; i& factorOrder) - { - Clear(); +bool +ConfusionNet:: +ReadFormat0(std::istream& in, const std::vector& factorOrder) +{ + Clear(); - const StaticData &staticData = StaticData::Instance(); - const InputFeature &inputFeature = InputFeature::Instance(); - size_t numInputScores = inputFeature.GetNumInputScores(); - size_t numRealWordCount = inputFeature.GetNumRealWordsInInput(); + const StaticData &staticData = StaticData::Instance(); + const InputFeature &inputFeature = InputFeature::Instance(); + size_t numInputScores = inputFeature.GetNumInputScores(); + size_t numRealWordCount = inputFeature.GetNumRealWordsInInput(); - size_t totalCount = numInputScores + numRealWordCount; - bool addRealWordCount = (numRealWordCount > 0); + size_t totalCount = numInputScores + numRealWordCount; + bool addRealWordCount = (numRealWordCount > 0); - std::string line; - while(getline(in,line)) { - std::istringstream is(line); - std::string word; + std::string line; + while(getline(in,line)) { + std::istringstream is(line); + std::string word; - Column col; - while(is>>word) { - Word w; - // String2Word(word,w,factorOrder); - w.CreateFromString(Input,factorOrder,StringPiece(word),false,false); - std::vector probs(totalCount, 0.0); - for(size_t i=0; i < numInputScores; i++) { - double prob; - if (!(is>>prob)) { - TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n"); - return false; - } - if(prob<0.0) { - VERBOSE(1, "WARN: negative prob: "<set to 0.0\n"); - prob=0.0; - } else if (prob>1.0) { - VERBOSE(1, "WARN: prob > 1.0 : "< set to 1.0\n"); - prob=1.0; - } - probs[i] = (std::max(static_cast(log(prob)),LOWEST_SCORE)); + Column col; + while(is>>word) { + Word w; + // String2Word(word,w,factorOrder); + w.CreateFromString(Input,factorOrder,StringPiece(word),false,false); + std::vector probs(totalCount, 0.0); + for(size_t i=0; i < numInputScores; i++) { + double prob; + if (!(is>>prob)) { + TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n"); + return false; + } + if(prob<0.0) { + VERBOSE(1, "WARN: negative prob: "<set to 0.0\n"); + prob=0.0; + } else if (prob>1.0) { + VERBOSE(1, "WARN: prob > 1.0 : "< set to 1.0\n"); + prob=1.0; + } + probs[i] = (std::max(static_cast(log(prob)),LOWEST_SCORE)); - } - //store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon - if (addRealWordCount && word!=EPSILON && word!="") - probs.back() = -1.0; - - ScorePair scorePair(probs); - - col.push_back(std::make_pair(w,scorePair)); } - if(col.size()) { - data.push_back(col); - ShrinkToFit(data.back()); - } else break; - } - return !data.empty(); - } + //store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon + if (addRealWordCount && word!=EPSILON && word!="") + probs.back() = -1.0; - bool - ConfusionNet:: - ReadFormat1(std::istream& in, const std::vector& factorOrder) - { - Clear(); - std::string line; + ScorePair scorePair(probs); + + col.push_back(std::make_pair(w,scorePair)); + } + if(col.size()) { + data.push_back(col); + ShrinkToFit(data.back()); + } else break; + } + return !data.empty(); +} + +bool +ConfusionNet:: +ReadFormat1(std::istream& in, const std::vector& factorOrder) +{ + Clear(); + std::string line; + if(!getline(in,line)) return 0; + size_t s; + if(getline(in,line)) s=atoi(line.c_str()); + else return 0; + data.resize(s); + for(size_t i=0; i>s)) return 0; - std::string word; - double prob; - data[i].resize(s); - for(size_t j=0; j>word>>prob) { - //TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS - data[i][j].second.denseScores = std::vector (1); - data[i][j].second.denseScores.push_back((float) log(prob)); - if(data[i][j].second.denseScores[0]<0) { - VERBOSE(1, "WARN: neg costs: "< set to 0\n"); - data[i][j].second.denseScores[0]=0.0; - } - // String2Word(word,data[i][j].first,factorOrder); - Word& w = data[i][j].first; - w.CreateFromString(Input,factorOrder,StringPiece(word),false,false); - } else return 0; - } - return !data.empty(); + std::istringstream is(line); + if(!(is>>s)) return 0; + std::string word; + double prob; + data[i].resize(s); + for(size_t j=0; j>word>>prob) { + //TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS + data[i][j].second.denseScores = std::vector (1); + data[i][j].second.denseScores.push_back((float) log(prob)); + if(data[i][j].second.denseScores[0]<0) { + VERBOSE(1, "WARN: neg costs: "< set to 0\n"); + data[i][j].second.denseScores[0]=0.0; + } + // String2Word(word,data[i][j].first,factorOrder); + Word& w = data[i][j].first; + w.CreateFromString(Input,factorOrder,StringPiece(word),false,false); + } else return 0; } + return !data.empty(); +} - void ConfusionNet::Print(std::ostream& out) const - { - out<<"conf net: "<::const_iterator iterDense; - for(iterDense = data[i][j].second.denseScores.begin(); - iterDense < data[i][j].second.denseScores.end(); - ++iterDense) { - out<<", "<<*iterDense; - } - - // sparse - std::map::const_iterator iterSparse; - for(iterSparse = data[i][j].second.sparseScores.begin(); - iterSparse != data[i][j].second.sparseScores.end(); - ++iterSparse) { - out << ", " << iterSparse->first << "=" << iterSparse->second; - } - - out<<") "; + // dense + std::vector::const_iterator iterDense; + for(iterDense = data[i][j].second.denseScores.begin(); + iterDense < data[i][j].second.denseScores.end(); + ++iterDense) { + out<<", "<<*iterDense; } - out<<"\n"; + + // sparse + std::map::const_iterator iterSparse; + for(iterSparse = data[i][j].second.sparseScores.begin(); + iterSparse != data[i][j].second.sparseScores.end(); + ++iterSparse) { + out << ", " << iterSparse->first << "=" << iterSparse->second; + } + + out<<") "; } - out<<"\n\n"; + out<<"\n"; } + out<<"\n\n"; +} #ifdef _WIN32 #pragma warning(disable:4716) #endif - Phrase - ConfusionNet:: - GetSubString(const WordsRange&) const - { - UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n"); - //return Phrase(Input); - } +Phrase +ConfusionNet:: +GetSubString(const WordsRange&) const +{ + UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n"); + //return Phrase(Input); +} - std::string - ConfusionNet:: - GetStringRep(const std::vector /* factorsToPrint */) const //not well defined yet - { - TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n"); - return ""; - } +std::string +ConfusionNet:: +GetStringRep(const std::vector /* factorsToPrint */) const //not well defined yet +{ + TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n"); + return ""; +} #ifdef _WIN32 #pragma warning(disable:4716) #endif - const Word& ConfusionNet::GetWord(size_t) const - { - UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n"); - } +const Word& ConfusionNet::GetWord(size_t) const +{ + UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n"); +} #ifdef _WIN32 #pragma warning(default:4716) #endif - std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn) - { - cn.Print(out); - return out; - } +std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn) +{ + cn.Print(out); + return out; +} - TranslationOptionCollection* - ConfusionNet:: - CreateTranslationOptionCollection() const - { - size_t maxNoTransOptPerCoverage - = StaticData::Instance().GetMaxNoTransOptPerCoverage(); - float translationOptionThreshold - = StaticData::Instance().GetTranslationOptionThreshold(); - TranslationOptionCollection *rv - = new TranslationOptionCollectionConfusionNet - (*this, maxNoTransOptPerCoverage, translationOptionThreshold); - assert(rv); - return rv; - } +TranslationOptionCollection* +ConfusionNet:: +CreateTranslationOptionCollection() const +{ + size_t maxNoTransOptPerCoverage + = StaticData::Instance().GetMaxNoTransOptPerCoverage(); + float translationOptionThreshold + = StaticData::Instance().GetTranslationOptionThreshold(); + TranslationOptionCollection *rv + = new TranslationOptionCollectionConfusionNet + (*this, maxNoTransOptPerCoverage, translationOptionThreshold); + assert(rv); + return rv; +} } diff --git a/moses/FF/CountNonTerms.cpp b/moses/FF/CountNonTerms.cpp index 92b79cd5d..5f876e9e4 100644 --- a/moses/FF/CountNonTerms.cpp +++ b/moses/FF/CountNonTerms.cpp @@ -8,18 +8,18 @@ using namespace std; namespace Moses { CountNonTerms::CountNonTerms(const std::string &line) -:StatelessFeatureFunction(line) -,m_all(true) -,m_sourceSyntax(false) -,m_targetSyntax(false) + :StatelessFeatureFunction(line) + ,m_all(true) + ,m_sourceSyntax(false) + ,m_targetSyntax(false) { ReadParameters(); } void CountNonTerms::Evaluate(const Phrase &sourcePhrase - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedFutureScore) const + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const { const StaticData &staticData = StaticData::Instance(); @@ -27,33 +27,33 @@ void CountNonTerms::Evaluate(const Phrase &sourcePhrase size_t indScore = 0; if (m_all) { - for (size_t i = 0; i < targetPhrase.GetSize(); ++i) { - const Word &word = targetPhrase.GetWord(i); - if (word.IsNonTerminal()) { - ++scores[indScore]; - } - } - ++indScore; + for (size_t i = 0; i < targetPhrase.GetSize(); ++i) { + const Word &word = targetPhrase.GetWord(i); + if (word.IsNonTerminal()) { + ++scores[indScore]; + } + } + ++indScore; } if (m_targetSyntax) { - for (size_t i = 0; i < targetPhrase.GetSize(); ++i) { - const Word &word = targetPhrase.GetWord(i); - if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) { - ++scores[indScore]; - } - } - ++indScore; + for (size_t i = 0; i < targetPhrase.GetSize(); ++i) { + const Word &word = targetPhrase.GetWord(i); + if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) { + ++scores[indScore]; + } + } + ++indScore; } if (m_sourceSyntax) { - for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) { - const Word &word = sourcePhrase.GetWord(i); - if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) { - ++scores[indScore]; - } - } - ++indScore; + for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) { + const Word &word = sourcePhrase.GetWord(i); + if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) { + ++scores[indScore]; + } + } + ++indScore; } scoreBreakdown.PlusEquals(this, scores); @@ -64,9 +64,9 @@ void CountNonTerms::SetParameter(const std::string& key, const std::string& valu if (key == "all") { m_all = Scan(value); } else if (key == "source-syntax") { - m_sourceSyntax = Scan(value); + m_sourceSyntax = Scan(value); } else if (key == "target-syntax") { - m_targetSyntax = Scan(value); + m_targetSyntax = Scan(value); } else { StatelessFeatureFunction::SetParameter(key, value); } diff --git a/moses/FF/CountNonTerms.h b/moses/FF/CountNonTerms.h index 1877c8f2c..765157a9c 100644 --- a/moses/FF/CountNonTerms.h +++ b/moses/FF/CountNonTerms.h @@ -9,8 +9,9 @@ class CountNonTerms : public StatelessFeatureFunction { public: CountNonTerms(const std::string &line); - bool IsUseable(const FactorMask &mask) const - { return true; } + bool IsUseable(const FactorMask &mask) const { + return true; + } void Evaluate(const Phrase &source , const TargetPhrase &targetPhrase diff --git a/moses/FF/DynamicCacheBasedLanguageModel.cpp b/moses/FF/DynamicCacheBasedLanguageModel.cpp index d3438cb90..eae1adc05 100644 --- a/moses/FF/DynamicCacheBasedLanguageModel.cpp +++ b/moses/FF/DynamicCacheBasedLanguageModel.cpp @@ -5,439 +5,440 @@ namespace Moses { - - std::map< const std::string, DynamicCacheBasedLanguageModel * > DynamicCacheBasedLanguageModel::s_instance_map; - DynamicCacheBasedLanguageModel *DynamicCacheBasedLanguageModel::s_instance = NULL; - - DynamicCacheBasedLanguageModel::DynamicCacheBasedLanguageModel(const std::string &line) + +std::map< const std::string, DynamicCacheBasedLanguageModel * > DynamicCacheBasedLanguageModel::s_instance_map; +DynamicCacheBasedLanguageModel *DynamicCacheBasedLanguageModel::s_instance = NULL; + +DynamicCacheBasedLanguageModel::DynamicCacheBasedLanguageModel(const std::string &line) : StatelessFeatureFunction(1, line) - { - VERBOSE(2,"Initializing DynamicCacheBasedLanguageModel feature..." << std::endl); - - m_query_type = CBLM_QUERY_TYPE_ALLSUBSTRINGS; - m_score_type = CBLM_SCORE_TYPE_HYPERBOLA; - m_maxAge = 1000; - m_name = "default"; - - ReadParameters(); - UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 DynamicCacheBasedLanguageModel feature named " + m_name + " is allowed"); - s_instance_map[m_name] = this; - s_instance = this; //for back compatibility - } - - DynamicCacheBasedLanguageModel::~DynamicCacheBasedLanguageModel() {}; - - void DynamicCacheBasedLanguageModel::SetPreComputedScores() - { -#ifdef WITH_THREADS - boost::shared_lock lock(m_cacheLock); -#endif - precomputedScores.clear(); - for (unsigned int i=0; i(value)); - } else if (key == "cblm-score-type") { - SetScoreType(Scan(value)); - } else if (key == "cblm-max-age") { - SetMaxAge(Scan(value)); - } else if (key == "cblm-file") { - m_initfiles = Scan(value); - } else if (key == "cblm-name") { - m_name = Scan(value); - } else { - StatelessFeatureFunction::SetParameter(key, value); - } - } - - void DynamicCacheBasedLanguageModel::Evaluate(const Phrase &sp - , const TargetPhrase &tp - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedFutureScore) const - { - float score = m_lower_score; - switch(m_query_type) { - case CBLM_QUERY_TYPE_WHOLESTRING: - score = Evaluate_Whole_String(tp); - break; - case CBLM_QUERY_TYPE_ALLSUBSTRINGS: - score = Evaluate_All_Substrings(tp); - break; - default: - UTIL_THROW_IF2(false, "This score type (" << m_query_type << ") is unknown."); - } - - scoreBreakdown.Assign(this, score); - } - - float DynamicCacheBasedLanguageModel::Evaluate_Whole_String(const TargetPhrase& tp) const - { - //consider all words in the TargetPhrase as one n-gram - // and compute the decaying_score for the whole n-gram - // and return this value - - decaying_cache_t::const_iterator it; - float score = m_lower_score; - - std::string w = ""; - size_t endpos = tp.GetSize(); - for (size_t pos = 0 ; pos < endpos ; ++pos) { - w += tp.GetWord(pos).GetFactor(0)->GetString().as_string(); - if ((pos == 0) && (endpos > 1)) { - w += " "; - } - } - it = m_cache.find(w); - - VERBOSE(4,"cblm::Evaluate_Whole_String: searching w:|" << w << "|" << std::endl); - if (it != m_cache.end()) { //found! - score = ((*it).second).second; - VERBOSE(4,"cblm::Evaluate_Whole_String: found w:|" << w << "|" << std::endl); - } - - VERBOSE(4,"cblm::Evaluate_Whole_String: returning score:|" << score << "|" << std::endl); - return score; - } - - float DynamicCacheBasedLanguageModel::Evaluate_All_Substrings(const TargetPhrase& tp) const - { - //loop over all n-grams in the TargetPhrase (no matter of n) - //and compute the decaying_score for all words - //and return their sum - - decaying_cache_t::const_iterator it; - float score = 0.0; - - for (size_t startpos = 0 ; startpos < tp.GetSize() ; ++startpos) { - std::string w = ""; - for (size_t endpos = startpos; endpos < tp.GetSize() ; ++endpos) { - w += tp.GetWord(endpos).GetFactor(0)->GetString().as_string(); - it = m_cache.find(w); - - if (it != m_cache.end()) { //found! - score += ((*it).second).second; - VERBOSE(3,"cblm::Evaluate_All_Substrings: found w:|" << w << "| actual score:|" << ((*it).second).second << "| score:|" << score << "|" << std::endl); - } else { - score += m_lower_score; - } - - if (endpos == startpos) { - w += " "; - } - - } - } - VERBOSE(3,"cblm::Evaluate_All_Substrings: returning score:|" << score << "|" << std::endl); - return score; - } - - void DynamicCacheBasedLanguageModel::Print() const - { -#ifdef WITH_THREADS - boost::shared_lock read_lock(m_cacheLock); -#endif - decaying_cache_t::const_iterator it; - std::cout << "Content of the cache of Cache-Based Language Model" << std::endl; - std::cout << "Size of the cache of Cache-Based Language Model:|" << m_cache.size() << "|" << std::endl; - for ( it=m_cache.begin() ; it != m_cache.end(); it++ ) { - std::cout << "word:|" << (*it).first << "| age:|" << ((*it).second).first << "| score:|" << ((*it).second).second << "|" << std::endl; - } - } - - void DynamicCacheBasedLanguageModel::Decay() - { -#ifdef WITH_THREADS - boost::shared_lock lock(m_cacheLock); -#endif - decaying_cache_t::iterator it; - - unsigned int age; - float score; - for ( it=m_cache.begin() ; it != m_cache.end(); it++ ) { - age=((*it).second).first + 1; - if (age > m_maxAge) { - m_cache.erase(it); - it--; - } else { - score = decaying_score(age); - decaying_cache_value_t p (age, score); - (*it).second = p; - } - } - } - - void DynamicCacheBasedLanguageModel::Update(std::vector words, int age) - { -#ifdef WITH_THREADS - boost::shared_lock lock(m_cacheLock); -#endif - VERBOSE(3,"words.size():|" << words.size() << "|" << std::endl); - for (size_t j=0; j e (words[j],p); - m_cache.erase(words[j]); //always erase the element (do nothing if the entry does not exist) - m_cache.insert(e); //insert the entry - } - } - - void DynamicCacheBasedLanguageModel::ClearEntries(std::string &entries) - { - if (entries != "") { - VERBOSE(3,"entries:|" << entries << "|" << std::endl); - std::vector elements = TokenizeMultiCharSeparator(entries, "||"); - VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); - ClearEntries(elements); - } - } - - void DynamicCacheBasedLanguageModel::ClearEntries(std::vector words) - { -#ifdef WITH_THREADS - boost::shared_lock lock(m_cacheLock); -#endif - VERBOSE(3,"words.size():|" << words.size() << "|" << std::endl); - for (size_t j=0; j elements = TokenizeMultiCharSeparator(entries, "||"); - VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); - Insert(elements); - } - } - - void DynamicCacheBasedLanguageModel::Insert(std::vector ngrams) - { - VERBOSE(3,"DynamicCacheBasedLanguageModel Insert ngrams.size():|" << ngrams.size() << "|" << std::endl); - Decay(); - Update(ngrams,1); - // Print(); - IFVERBOSE(2) Print(); - } - - void DynamicCacheBasedLanguageModel::ExecuteDlt(std::map dlt_meta) - { - if (dlt_meta.find("cblm") != dlt_meta.end()) { - Insert(dlt_meta["cblm"]); - } - if (dlt_meta.find("cblm-command") != dlt_meta.end()) { - Execute(dlt_meta["cblm-command"]); - } - if (dlt_meta.find("cblm-file") != dlt_meta.end()) { - Load(dlt_meta["cblm-file"]); - } - if (dlt_meta.find("cblm-clear-entries") != dlt_meta.end()) { - ClearEntries(dlt_meta["cblm-clear-entries"]); - } - if (dlt_meta.find("cblm-clear-all") != dlt_meta.end()) { - Clear(); - } - - } - - void DynamicCacheBasedLanguageModel::Execute(std::string command) - { - VERBOSE(2,"DynamicCacheBasedLanguageModel::Execute(std::string command:|" << command << "|" << std::endl); - std::vector commands = Tokenize(command, "||"); - Execute(commands); - } - - void DynamicCacheBasedLanguageModel::Execute(std::vector commands) - { - for (size_t j=0; j lock(m_cacheLock); -#endif - m_cache.clear(); - } - - void DynamicCacheBasedLanguageModel::Load() - { - SetPreComputedScores(); - VERBOSE(2,"DynamicCacheBasedLanguageModel::Load()" << std::endl); - Load(m_initfiles); - } - - void DynamicCacheBasedLanguageModel::Load(const std::string file) - { - VERBOSE(2,"DynamicCacheBasedLanguageModel::Load(const std::string file)" << std::endl); - std::vector files = Tokenize(m_initfiles, "||"); - Load_Multiple_Files(files); - } - - - void DynamicCacheBasedLanguageModel::Load_Multiple_Files(std::vector files) - { - VERBOSE(2,"DynamicCacheBasedLanguageModel::Load_Multiple_Files(std::vector files)" << std::endl); - for(size_t j = 0; j < files.size(); ++j) { - Load_Single_File(files[j]); - } - } - - void DynamicCacheBasedLanguageModel::Load_Single_File(const std::string file) - { - VERBOSE(2,"DynamicCacheBasedLanguageModel::Load_Single_File(const std::string file)" << std::endl); - //file format - //age || n-gram - //age || n-gram || n-gram || n-gram || ... - //.... - //each n-gram is a sequence of n words (no matter of n) - // - //there is no limit on the size of n - // - //entries can be repeated, but the last entry overwrites the previous - - - VERBOSE(2,"Loading data from the cache file " << file << std::endl); - InputFileStream cacheFile(file); - - std::string line; - int age; - std::vector words; - - while (getline(cacheFile, line)) { - std::vector vecStr = TokenizeMultiCharSeparator( line , "||" ); - if (vecStr.size() >= 2) { - age = Scan(vecStr[0]); - vecStr.erase(vecStr.begin()); - Update(vecStr,age); - } else { - UTIL_THROW_IF2(false, "The format of the loaded file is wrong: " << line); - } - } - IFVERBOSE(2) Print(); - } - - void DynamicCacheBasedLanguageModel::SetQueryType(size_t type) - { -#ifdef WITH_THREADS - boost::shared_lock read_lock(m_cacheLock); -#endif - - m_query_type = type; - if ( m_query_type != CBLM_QUERY_TYPE_WHOLESTRING - && m_query_type != CBLM_QUERY_TYPE_ALLSUBSTRINGS ) { - VERBOSE(2, "This query type " << m_query_type << " is unknown. Instead used " << CBLM_QUERY_TYPE_ALLSUBSTRINGS << "." << std::endl); - m_query_type = CBLM_QUERY_TYPE_ALLSUBSTRINGS; - } - VERBOSE(2, "CacheBasedLanguageModel QueryType: " << m_query_type << std::endl); - - }; - - void DynamicCacheBasedLanguageModel::SetScoreType(size_t type) - { -#ifdef WITH_THREADS - boost::shared_lock read_lock(m_cacheLock); -#endif - m_score_type = type; - if ( m_score_type != CBLM_SCORE_TYPE_HYPERBOLA - && m_score_type != CBLM_SCORE_TYPE_POWER - && m_score_type != CBLM_SCORE_TYPE_EXPONENTIAL - && m_score_type != CBLM_SCORE_TYPE_COSINE - && m_score_type != CBLM_SCORE_TYPE_HYPERBOLA_REWARD - && m_score_type != CBLM_SCORE_TYPE_POWER_REWARD - && m_score_type != CBLM_SCORE_TYPE_EXPONENTIAL_REWARD ) { - VERBOSE(2, "This score type " << m_score_type << " is unknown. Instead used " << CBLM_SCORE_TYPE_HYPERBOLA << "." << std::endl); - m_score_type = CBLM_SCORE_TYPE_HYPERBOLA; - } - VERBOSE(2, "CacheBasedLanguageModel ScoreType: " << m_score_type << std::endl); - }; - - void DynamicCacheBasedLanguageModel::SetMaxAge(unsigned int age) - { -#ifdef WITH_THREADS - boost::shared_lock read_lock(m_cacheLock); -#endif - m_maxAge = age; - VERBOSE(2, "CacheBasedLanguageModel MaxAge: " << m_maxAge << std::endl); - }; - - float DynamicCacheBasedLanguageModel::decaying_score(const unsigned int age) - { - float sc; - switch(m_score_type) { - case CBLM_SCORE_TYPE_HYPERBOLA: - sc = (float) 1.0/age - 1.0; - break; - case CBLM_SCORE_TYPE_POWER: - sc = (float) pow(age, -0.25) - 1.0; - break; - case CBLM_SCORE_TYPE_EXPONENTIAL: - sc = (age == 1) ? 0.0 : (float) exp( 1.0/age ) / exp(1.0) - 1.0; - break; - case CBLM_SCORE_TYPE_COSINE: - sc = (float) cos( (age-1) * (PI/2) / m_maxAge ) - 1.0; - break; - case CBLM_SCORE_TYPE_HYPERBOLA_REWARD: - sc = (float) 1.0/age; - break; - case CBLM_SCORE_TYPE_POWER_REWARD: - sc = (float) pow(age, -0.25); - break; - case CBLM_SCORE_TYPE_EXPONENTIAL_REWARD: - sc = (age == 1) ? 1.0 : (float) exp( 1.0/age ) / exp(1.0); - break; - default: - sc = -1.0; - } - return sc; - } +{ + VERBOSE(2,"Initializing DynamicCacheBasedLanguageModel feature..." << std::endl); + + m_query_type = CBLM_QUERY_TYPE_ALLSUBSTRINGS; + m_score_type = CBLM_SCORE_TYPE_HYPERBOLA; + m_maxAge = 1000; + m_name = "default"; + + ReadParameters(); + UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 DynamicCacheBasedLanguageModel feature named " + m_name + " is allowed"); + s_instance_map[m_name] = this; + s_instance = this; //for back compatibility +} + +DynamicCacheBasedLanguageModel::~DynamicCacheBasedLanguageModel() {}; + +void DynamicCacheBasedLanguageModel::SetPreComputedScores() +{ +#ifdef WITH_THREADS + boost::shared_lock lock(m_cacheLock); +#endif + precomputedScores.clear(); + for (unsigned int i=0; i(value)); + } else if (key == "cblm-score-type") { + SetScoreType(Scan(value)); + } else if (key == "cblm-max-age") { + SetMaxAge(Scan(value)); + } else if (key == "cblm-file") { + m_initfiles = Scan(value); + } else if (key == "cblm-name") { + m_name = Scan(value); + } else { + StatelessFeatureFunction::SetParameter(key, value); + } +} + +void DynamicCacheBasedLanguageModel::Evaluate(const Phrase &sp + , const TargetPhrase &tp + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const +{ + float score = m_lower_score; + switch(m_query_type) { + case CBLM_QUERY_TYPE_WHOLESTRING: + score = Evaluate_Whole_String(tp); + break; + case CBLM_QUERY_TYPE_ALLSUBSTRINGS: + score = Evaluate_All_Substrings(tp); + break; + default: + UTIL_THROW_IF2(false, "This score type (" << m_query_type << ") is unknown."); + } + + scoreBreakdown.Assign(this, score); +} + +float DynamicCacheBasedLanguageModel::Evaluate_Whole_String(const TargetPhrase& tp) const +{ + //consider all words in the TargetPhrase as one n-gram + // and compute the decaying_score for the whole n-gram + // and return this value + + decaying_cache_t::const_iterator it; + float score = m_lower_score; + + std::string w = ""; + size_t endpos = tp.GetSize(); + for (size_t pos = 0 ; pos < endpos ; ++pos) { + w += tp.GetWord(pos).GetFactor(0)->GetString().as_string(); + if ((pos == 0) && (endpos > 1)) { + w += " "; + } + } + it = m_cache.find(w); + + VERBOSE(4,"cblm::Evaluate_Whole_String: searching w:|" << w << "|" << std::endl); + if (it != m_cache.end()) { //found! + score = ((*it).second).second; + VERBOSE(4,"cblm::Evaluate_Whole_String: found w:|" << w << "|" << std::endl); + } + + VERBOSE(4,"cblm::Evaluate_Whole_String: returning score:|" << score << "|" << std::endl); + return score; +} + +float DynamicCacheBasedLanguageModel::Evaluate_All_Substrings(const TargetPhrase& tp) const +{ + //loop over all n-grams in the TargetPhrase (no matter of n) + //and compute the decaying_score for all words + //and return their sum + + decaying_cache_t::const_iterator it; + float score = 0.0; + + for (size_t startpos = 0 ; startpos < tp.GetSize() ; ++startpos) { + std::string w = ""; + for (size_t endpos = startpos; endpos < tp.GetSize() ; ++endpos) { + w += tp.GetWord(endpos).GetFactor(0)->GetString().as_string(); + it = m_cache.find(w); + + if (it != m_cache.end()) { //found! + score += ((*it).second).second; + VERBOSE(3,"cblm::Evaluate_All_Substrings: found w:|" << w << "| actual score:|" << ((*it).second).second << "| score:|" << score << "|" << std::endl); + } else { + score += m_lower_score; + } + + if (endpos == startpos) { + w += " "; + } + + } + } + VERBOSE(3,"cblm::Evaluate_All_Substrings: returning score:|" << score << "|" << std::endl); + return score; +} + +void DynamicCacheBasedLanguageModel::Print() const +{ +#ifdef WITH_THREADS + boost::shared_lock read_lock(m_cacheLock); +#endif + decaying_cache_t::const_iterator it; + std::cout << "Content of the cache of Cache-Based Language Model" << std::endl; + std::cout << "Size of the cache of Cache-Based Language Model:|" << m_cache.size() << "|" << std::endl; + for ( it=m_cache.begin() ; it != m_cache.end(); it++ ) { + std::cout << "word:|" << (*it).first << "| age:|" << ((*it).second).first << "| score:|" << ((*it).second).second << "|" << std::endl; + } +} + +void DynamicCacheBasedLanguageModel::Decay() +{ +#ifdef WITH_THREADS + boost::shared_lock lock(m_cacheLock); +#endif + decaying_cache_t::iterator it; + + unsigned int age; + float score; + for ( it=m_cache.begin() ; it != m_cache.end(); it++ ) { + age=((*it).second).first + 1; + if (age > m_maxAge) { + m_cache.erase(it); + it--; + } else { + score = decaying_score(age); + decaying_cache_value_t p (age, score); + (*it).second = p; + } + } +} + +void DynamicCacheBasedLanguageModel::Update(std::vector words, int age) +{ +#ifdef WITH_THREADS + boost::shared_lock lock(m_cacheLock); +#endif + VERBOSE(3,"words.size():|" << words.size() << "|" << std::endl); + for (size_t j=0; j e (words[j],p); + m_cache.erase(words[j]); //always erase the element (do nothing if the entry does not exist) + m_cache.insert(e); //insert the entry + } +} + +void DynamicCacheBasedLanguageModel::ClearEntries(std::string &entries) +{ + if (entries != "") { + VERBOSE(3,"entries:|" << entries << "|" << std::endl); + std::vector elements = TokenizeMultiCharSeparator(entries, "||"); + VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); + ClearEntries(elements); + } +} + +void DynamicCacheBasedLanguageModel::ClearEntries(std::vector words) +{ +#ifdef WITH_THREADS + boost::shared_lock lock(m_cacheLock); +#endif + VERBOSE(3,"words.size():|" << words.size() << "|" << std::endl); + for (size_t j=0; j elements = TokenizeMultiCharSeparator(entries, "||"); + VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); + Insert(elements); + } +} + +void DynamicCacheBasedLanguageModel::Insert(std::vector ngrams) +{ + VERBOSE(3,"DynamicCacheBasedLanguageModel Insert ngrams.size():|" << ngrams.size() << "|" << std::endl); + Decay(); + Update(ngrams,1); + // Print(); + IFVERBOSE(2) Print(); +} + +void DynamicCacheBasedLanguageModel::ExecuteDlt(std::map dlt_meta) +{ + if (dlt_meta.find("cblm") != dlt_meta.end()) { + Insert(dlt_meta["cblm"]); + } + if (dlt_meta.find("cblm-command") != dlt_meta.end()) { + Execute(dlt_meta["cblm-command"]); + } + if (dlt_meta.find("cblm-file") != dlt_meta.end()) { + Load(dlt_meta["cblm-file"]); + } + if (dlt_meta.find("cblm-clear-entries") != dlt_meta.end()) { + ClearEntries(dlt_meta["cblm-clear-entries"]); + } + if (dlt_meta.find("cblm-clear-all") != dlt_meta.end()) { + Clear(); + } + +} + +void DynamicCacheBasedLanguageModel::Execute(std::string command) +{ + VERBOSE(2,"DynamicCacheBasedLanguageModel::Execute(std::string command:|" << command << "|" << std::endl); + std::vector commands = Tokenize(command, "||"); + Execute(commands); +} + +void DynamicCacheBasedLanguageModel::Execute(std::vector commands) +{ + for (size_t j=0; j lock(m_cacheLock); +#endif + m_cache.clear(); +} + +void DynamicCacheBasedLanguageModel::Load() +{ + SetPreComputedScores(); + VERBOSE(2,"DynamicCacheBasedLanguageModel::Load()" << std::endl); + Load(m_initfiles); +} + +void DynamicCacheBasedLanguageModel::Load(const std::string file) +{ + VERBOSE(2,"DynamicCacheBasedLanguageModel::Load(const std::string file)" << std::endl); + std::vector files = Tokenize(m_initfiles, "||"); + Load_Multiple_Files(files); +} + + +void DynamicCacheBasedLanguageModel::Load_Multiple_Files(std::vector files) +{ + VERBOSE(2,"DynamicCacheBasedLanguageModel::Load_Multiple_Files(std::vector files)" << std::endl); + for(size_t j = 0; j < files.size(); ++j) { + Load_Single_File(files[j]); + } +} + +void DynamicCacheBasedLanguageModel::Load_Single_File(const std::string file) +{ + VERBOSE(2,"DynamicCacheBasedLanguageModel::Load_Single_File(const std::string file)" << std::endl); + //file format + //age || n-gram + //age || n-gram || n-gram || n-gram || ... + //.... + //each n-gram is a sequence of n words (no matter of n) + // + //there is no limit on the size of n + // + //entries can be repeated, but the last entry overwrites the previous + + + VERBOSE(2,"Loading data from the cache file " << file << std::endl); + InputFileStream cacheFile(file); + + std::string line; + int age; + std::vector words; + + while (getline(cacheFile, line)) { + std::vector vecStr = TokenizeMultiCharSeparator( line , "||" ); + if (vecStr.size() >= 2) { + age = Scan(vecStr[0]); + vecStr.erase(vecStr.begin()); + Update(vecStr,age); + } else { + UTIL_THROW_IF2(false, "The format of the loaded file is wrong: " << line); + } + } + IFVERBOSE(2) Print(); +} + +void DynamicCacheBasedLanguageModel::SetQueryType(size_t type) +{ +#ifdef WITH_THREADS + boost::shared_lock read_lock(m_cacheLock); +#endif + + m_query_type = type; + if ( m_query_type != CBLM_QUERY_TYPE_WHOLESTRING + && m_query_type != CBLM_QUERY_TYPE_ALLSUBSTRINGS ) { + VERBOSE(2, "This query type " << m_query_type << " is unknown. Instead used " << CBLM_QUERY_TYPE_ALLSUBSTRINGS << "." << std::endl); + m_query_type = CBLM_QUERY_TYPE_ALLSUBSTRINGS; + } + VERBOSE(2, "CacheBasedLanguageModel QueryType: " << m_query_type << std::endl); + +}; + +void DynamicCacheBasedLanguageModel::SetScoreType(size_t type) +{ +#ifdef WITH_THREADS + boost::shared_lock read_lock(m_cacheLock); +#endif + m_score_type = type; + if ( m_score_type != CBLM_SCORE_TYPE_HYPERBOLA + && m_score_type != CBLM_SCORE_TYPE_POWER + && m_score_type != CBLM_SCORE_TYPE_EXPONENTIAL + && m_score_type != CBLM_SCORE_TYPE_COSINE + && m_score_type != CBLM_SCORE_TYPE_HYPERBOLA_REWARD + && m_score_type != CBLM_SCORE_TYPE_POWER_REWARD + && m_score_type != CBLM_SCORE_TYPE_EXPONENTIAL_REWARD ) { + VERBOSE(2, "This score type " << m_score_type << " is unknown. Instead used " << CBLM_SCORE_TYPE_HYPERBOLA << "." << std::endl); + m_score_type = CBLM_SCORE_TYPE_HYPERBOLA; + } + VERBOSE(2, "CacheBasedLanguageModel ScoreType: " << m_score_type << std::endl); +}; + +void DynamicCacheBasedLanguageModel::SetMaxAge(unsigned int age) +{ +#ifdef WITH_THREADS + boost::shared_lock read_lock(m_cacheLock); +#endif + m_maxAge = age; + VERBOSE(2, "CacheBasedLanguageModel MaxAge: " << m_maxAge << std::endl); +}; + +float DynamicCacheBasedLanguageModel::decaying_score(const unsigned int age) +{ + float sc; + switch(m_score_type) { + case CBLM_SCORE_TYPE_HYPERBOLA: + sc = (float) 1.0/age - 1.0; + break; + case CBLM_SCORE_TYPE_POWER: + sc = (float) pow(age, -0.25) - 1.0; + break; + case CBLM_SCORE_TYPE_EXPONENTIAL: + sc = (age == 1) ? 0.0 : (float) exp( 1.0/age ) / exp(1.0) - 1.0; + break; + case CBLM_SCORE_TYPE_COSINE: + sc = (float) cos( (age-1) * (PI/2) / m_maxAge ) - 1.0; + break; + case CBLM_SCORE_TYPE_HYPERBOLA_REWARD: + sc = (float) 1.0/age; + break; + case CBLM_SCORE_TYPE_POWER_REWARD: + sc = (float) pow(age, -0.25); + break; + case CBLM_SCORE_TYPE_EXPONENTIAL_REWARD: + sc = (age == 1) ? 1.0 : (float) exp( 1.0/age ) / exp(1.0); + break; + default: + sc = -1.0; + } + return sc; +} } diff --git a/moses/FF/DynamicCacheBasedLanguageModel.h b/moses/FF/DynamicCacheBasedLanguageModel.h index dc77560f7..f95c5515c 100644 --- a/moses/FF/DynamicCacheBasedLanguageModel.h +++ b/moses/FF/DynamicCacheBasedLanguageModel.h @@ -43,7 +43,7 @@ class DynamicCacheBasedLanguageModel : public StatelessFeatureFunction size_t m_query_type; //way of querying the cache size_t m_score_type; //way of scoring entries of the cache std::string m_initfiles; // vector of files loaded in the initialization phase - std::string m_name; // internal name to identify this instance of the Cache-based pseudo LM + std::string m_name; // internal name to identify this instance of the Cache-based pseudo LM float m_lower_score; //lower_bound_score for no match std::vector precomputedScores; unsigned int m_maxAge; @@ -64,7 +64,7 @@ class DynamicCacheBasedLanguageModel : public StatelessFeatureFunction void Update(std::vector words, int age); void ClearEntries(std::vector entries); - + void Execute(std::vector commands); void Execute_Single_Command(std::string command); @@ -80,24 +80,28 @@ class DynamicCacheBasedLanguageModel : public StatelessFeatureFunction protected: static DynamicCacheBasedLanguageModel *s_instance; - static std::map< const std::string, DynamicCacheBasedLanguageModel * > s_instance_map; + static std::map< const std::string, DynamicCacheBasedLanguageModel * > s_instance_map; public: DynamicCacheBasedLanguageModel(const std::string &line); ~DynamicCacheBasedLanguageModel(); - - inline const std::string GetName() { return m_name; }; - inline void SetName(const std::string name){ m_name = name; } - static const DynamicCacheBasedLanguageModel& Instance(const std::string name) { - UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The DynamicCacheBasedLanguageModel feature named " + name + " does not exist!"); - return *(s_instance_map[name]); - } - - static DynamicCacheBasedLanguageModel& InstanceNonConst(const std::string name) { - UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The DynamicCacheBasedLanguageModel feature named " + name + " does not exist!"); - return *(s_instance_map[name]); - } + inline const std::string GetName() { + return m_name; + }; + inline void SetName(const std::string name) { + m_name = name; + } + + static const DynamicCacheBasedLanguageModel& Instance(const std::string name) { + UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The DynamicCacheBasedLanguageModel feature named " + name + " does not exist!"); + return *(s_instance_map[name]); + } + + static DynamicCacheBasedLanguageModel& InstanceNonConst(const std::string name) { + UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The DynamicCacheBasedLanguageModel feature named " + name + " does not exist!"); + return *(s_instance_map[name]); + } static const DynamicCacheBasedLanguageModel& Instance() { return *s_instance; } @@ -113,7 +117,7 @@ public: void Load(const std::string file); void Execute(std::string command); void SetParameter(const std::string& key, const std::string& value); - void ExecuteDlt(std::map dlt_meta); + void ExecuteDlt(std::map dlt_meta); void ClearEntries(std::string &entries); void Insert(std::string &entries); diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index 60fb19ed8..6462504fd 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -250,13 +250,13 @@ void FeatureRegistry::Construct(const std::string &name, const std::string &line void FeatureRegistry::PrintFF() const { - std::cerr << "Available feature functions:" << std::endl; - Map::const_iterator iter; - for (iter = registry_.begin(); iter != registry_.end(); ++iter) { - const string &ffName = iter->first; - std::cerr << ffName << " "; - } - std::cerr << std::endl; + std::cerr << "Available feature functions:" << std::endl; + Map::const_iterator iter; + for (iter = registry_.begin(); iter != registry_.end(); ++iter) { + const string &ffName = iter->first; + std::cerr << ffName << " "; + } + std::cerr << std::endl; } } // namespace Moses diff --git a/moses/FF/HyperParameterAsWeight.cpp b/moses/FF/HyperParameterAsWeight.cpp index 2fd0f2acb..a2c068530 100644 --- a/moses/FF/HyperParameterAsWeight.cpp +++ b/moses/FF/HyperParameterAsWeight.cpp @@ -7,7 +7,7 @@ namespace Moses { HyperParameterAsWeight::HyperParameterAsWeight(const std::string &line) -:StatelessFeatureFunction(2, line) + :StatelessFeatureFunction(2, line) { ReadParameters(); diff --git a/moses/FF/HyperParameterAsWeight.h b/moses/FF/HyperParameterAsWeight.h index d27b3cd2a..7d953343f 100644 --- a/moses/FF/HyperParameterAsWeight.h +++ b/moses/FF/HyperParameterAsWeight.h @@ -14,8 +14,9 @@ class HyperParameterAsWeight : public StatelessFeatureFunction public: HyperParameterAsWeight(const std::string &line); - virtual bool IsUseable(const FactorMask &mask) const - { return true; } + virtual bool IsUseable(const FactorMask &mask) const { + return true; + } virtual void Evaluate(const Phrase &source , const TargetPhrase &targetPhrase diff --git a/moses/FF/InputFeature.cpp b/moses/FF/InputFeature.cpp index cd3541d34..bd75de391 100644 --- a/moses/FF/InputFeature.cpp +++ b/moses/FF/InputFeature.cpp @@ -18,14 +18,14 @@ InputFeature::InputFeature(const std::string &line) { m_numInputScores = this->m_numScoreComponents; ReadParameters(); - + UTIL_THROW_IF2(s_instance, "Can only have 1 input feature"); s_instance = this; } void InputFeature::Load() { - + const PhraseDictionary *pt = PhraseDictionary::GetColl()[0]; const PhraseDictionaryTreeAdaptor *ptBin = dynamic_cast(pt); diff --git a/moses/FF/ReferenceComparison.cpp b/moses/FF/ReferenceComparison.cpp index b11d133c2..80dcbd234 100644 --- a/moses/FF/ReferenceComparison.cpp +++ b/moses/FF/ReferenceComparison.cpp @@ -3,7 +3,7 @@ namespace Moses { ReferenceComparison::ReferenceComparison(const std::string &line) -:StatelessFeatureFunction(0, line) + :StatelessFeatureFunction(0, line) { } diff --git a/moses/FF/ReferenceComparison.h b/moses/FF/ReferenceComparison.h index aef7be493..2c722afbc 100644 --- a/moses/FF/ReferenceComparison.h +++ b/moses/FF/ReferenceComparison.h @@ -10,34 +10,36 @@ namespace Moses class ReferenceComparison : public StatelessFeatureFunction { public: - ReferenceComparison(const std::string &line); + ReferenceComparison(const std::string &line); - virtual bool IsUseable(const FactorMask &mask) const - { return true; } + virtual bool IsUseable(const FactorMask &mask) const { + return true; + } - virtual void Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedFutureScore) const - {} + virtual void Evaluate(const Phrase &source + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const + {} - virtual void Evaluate(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedFutureScore = NULL) const - {} + virtual void Evaluate(const InputType &input + , const InputPath &inputPath + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection *estimatedFutureScore = NULL) const + {} - virtual void Evaluate(const Hypothesis& hypo, - ScoreComponentCollection* accumulator) const - {} + virtual void Evaluate(const Hypothesis& hypo, + ScoreComponentCollection* accumulator) const + {} - virtual void EvaluateChart(const ChartHypothesis &hypo, - ScoreComponentCollection* accumulator) const - {} + virtual void EvaluateChart(const ChartHypothesis &hypo, + ScoreComponentCollection* accumulator) const + {} - std::vector DefaultWeights() const - { return std::vector(); } + std::vector DefaultWeights() const { + return std::vector(); + } protected: diff --git a/moses/FF/RuleAmbiguity.cpp b/moses/FF/RuleAmbiguity.cpp index 8f8760d28..4197230e0 100644 --- a/moses/FF/RuleAmbiguity.cpp +++ b/moses/FF/RuleAmbiguity.cpp @@ -5,8 +5,8 @@ namespace Moses { RuleAmbiguity::RuleAmbiguity(const std::string &line) -:StatelessFeatureFunction(1, line) -,m_sourceSyntax(true) + :StatelessFeatureFunction(1, line) + ,m_sourceSyntax(true) { } @@ -17,32 +17,31 @@ bool IsAmbiguous(const Word &word, bool sourceSyntax) } void RuleAmbiguity::Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedFutureScore) const + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const { // source can't be empty, right? float score = 0; int count = 0; for (size_t i = 0; i < source.GetSize() - 0; ++i) { - const Word &word = source.GetWord(i); - bool ambiguous = IsAmbiguous(word, m_sourceSyntax); - if (ambiguous) { - ++count; - } - else { - if (count > 0) { - score += count; - } - count = -1; - } + const Word &word = source.GetWord(i); + bool ambiguous = IsAmbiguous(word, m_sourceSyntax); + if (ambiguous) { + ++count; + } else { + if (count > 0) { + score += count; + } + count = -1; + } } // 1st & last always adjacent to ambiguity ++count; if (count > 0) { - score += count; + score += count; } scoreBreakdown.PlusEquals(this, score); @@ -51,7 +50,7 @@ void RuleAmbiguity::Evaluate(const Phrase &source void RuleAmbiguity::SetParameter(const std::string& key, const std::string& value) { if (key == "source-syntax") { - m_sourceSyntax = Scan(value); + m_sourceSyntax = Scan(value); } else { StatelessFeatureFunction::SetParameter(key, value); } diff --git a/moses/FF/RuleAmbiguity.h b/moses/FF/RuleAmbiguity.h index 436e2fa58..c954239e0 100644 --- a/moses/FF/RuleAmbiguity.h +++ b/moses/FF/RuleAmbiguity.h @@ -9,32 +9,33 @@ namespace Moses class RuleAmbiguity : public StatelessFeatureFunction { public: - RuleAmbiguity(const std::string &line); + RuleAmbiguity(const std::string &line); - virtual bool IsUseable(const FactorMask &mask) const - { return true; } + virtual bool IsUseable(const FactorMask &mask) const { + return true; + } - virtual void Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedFutureScore) const; + virtual void Evaluate(const Phrase &source + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const; - virtual void Evaluate(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedFutureScore = NULL) const - {} + virtual void Evaluate(const InputType &input + , const InputPath &inputPath + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection *estimatedFutureScore = NULL) const + {} - virtual void Evaluate(const Hypothesis& hypo, - ScoreComponentCollection* accumulator) const - {} + virtual void Evaluate(const Hypothesis& hypo, + ScoreComponentCollection* accumulator) const + {} - virtual void EvaluateChart(const ChartHypothesis &hypo, - ScoreComponentCollection* accumulator) const - {} + virtual void EvaluateChart(const ChartHypothesis &hypo, + ScoreComponentCollection* accumulator) const + {} - void SetParameter(const std::string& key, const std::string& value); + void SetParameter(const std::string& key, const std::string& value); protected: bool m_sourceSyntax; diff --git a/moses/FF/SetSourcePhrase.cpp b/moses/FF/SetSourcePhrase.cpp index 757b3f25b..b0f152b18 100644 --- a/moses/FF/SetSourcePhrase.cpp +++ b/moses/FF/SetSourcePhrase.cpp @@ -4,18 +4,18 @@ namespace Moses { SetSourcePhrase::SetSourcePhrase(const std::string &line) -:StatelessFeatureFunction(1, line) + :StatelessFeatureFunction(1, line) { m_tuneable = false; ReadParameters(); } void SetSourcePhrase::Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedFutureScore) const + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const { - targetPhrase.SetRuleSource(source); + targetPhrase.SetRuleSource(source); } } diff --git a/moses/FF/SetSourcePhrase.h b/moses/FF/SetSourcePhrase.h index 6b391baa4..f475b79e5 100644 --- a/moses/FF/SetSourcePhrase.h +++ b/moses/FF/SetSourcePhrase.h @@ -11,19 +11,20 @@ class SetSourcePhrase : public StatelessFeatureFunction public: SetSourcePhrase(const std::string &line); - virtual bool IsUseable(const FactorMask &mask) const - { return true; } + virtual bool IsUseable(const FactorMask &mask) const { + return true; + } virtual void Evaluate(const Phrase &source - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection &estimatedFutureScore) const; + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const; virtual void Evaluate(const InputType &input - , const InputPath &inputPath - , const TargetPhrase &targetPhrase - , ScoreComponentCollection &scoreBreakdown - , ScoreComponentCollection *estimatedFutureScore = NULL) const + , const InputPath &inputPath + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection *estimatedFutureScore = NULL) const {} virtual void Evaluate(const Hypothesis& hypo, @@ -34,8 +35,9 @@ public: ScoreComponentCollection* accumulator) const {} - std::vector DefaultWeights() const - { return std::vector(); } + std::vector DefaultWeights() const { + return std::vector(); + } }; diff --git a/moses/FF/SoftMatchingFeature.cpp b/moses/FF/SoftMatchingFeature.cpp index 017e551c4..3e4e9db43 100644 --- a/moses/FF/SoftMatchingFeature.cpp +++ b/moses/FF/SoftMatchingFeature.cpp @@ -24,8 +24,8 @@ void SoftMatchingFeature::SetParameter(const std::string& key, const std::string m_tuneable = Scan(value); } else if (key == "filterable") { //ignore } else if (key == "path") { - const std::string filePath = value; - Load(filePath); + const std::string filePath = value; + Load(filePath); } else { UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value); } @@ -35,34 +35,34 @@ void SoftMatchingFeature::SetParameter(const std::string& key, const std::string bool SoftMatchingFeature::Load(const std::string& filePath) { - StaticData &staticData = StaticData::InstanceNonConst(); + StaticData &staticData = StaticData::InstanceNonConst(); - InputFileStream inStream(filePath); - std::string line; - while(getline(inStream, line)) { - std::vector tokens = Tokenize(line); - UTIL_THROW_IF2(tokens.size() != 2, "Error: wrong format of SoftMatching file: must have two nonterminals per line"); + InputFileStream inStream(filePath); + std::string line; + while(getline(inStream, line)) { + std::vector tokens = Tokenize(line); + UTIL_THROW_IF2(tokens.size() != 2, "Error: wrong format of SoftMatching file: must have two nonterminals per line"); - // no soft matching necessary if LHS and RHS are the same - if (tokens[0] == tokens[1]) { - continue; - } - - Word LHS, RHS; - LHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[0], true); - RHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[1], true); - - m_softMatches[RHS[0]->GetId()].push_back(LHS); - GetOrSetFeatureName(RHS, LHS); + // no soft matching necessary if LHS and RHS are the same + if (tokens[0] == tokens[1]) { + continue; } - staticData.SetSoftMatches(m_softMatches); + Word LHS, RHS; + LHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[0], true); + RHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[1], true); - return true; + m_softMatches[RHS[0]->GetId()].push_back(LHS); + GetOrSetFeatureName(RHS, LHS); + } + + staticData.SetSoftMatches(m_softMatches); + + return true; } void SoftMatchingFeature::EvaluateChart(const ChartHypothesis& hypo, - ScoreComponentCollection* accumulator) const + ScoreComponentCollection* accumulator) const { const TargetPhrase& target = hypo.GetCurrTargetPhrase(); @@ -87,7 +87,8 @@ void SoftMatchingFeature::EvaluateChart(const ChartHypothesis& hypo, } // when loading, or when we notice that non-terminals have been added after loading, we resize vectors -void SoftMatchingFeature::ResizeCache() const { +void SoftMatchingFeature::ResizeCache() const +{ FactorCollection& fc = FactorCollection::Instance(); size_t numNonTerminals = fc.GetNumNonTerminals(); @@ -98,7 +99,8 @@ void SoftMatchingFeature::ResizeCache() const { } -const std::string& SoftMatchingFeature::GetOrSetFeatureName(const Word& RHS, const Word& LHS) const { +const std::string& SoftMatchingFeature::GetOrSetFeatureName(const Word& RHS, const Word& LHS) const +{ try { #ifdef WITH_THREADS //try read-only lock boost::shared_lock read_lock(m_accessLock); @@ -107,23 +109,22 @@ const std::string& SoftMatchingFeature::GetOrSetFeatureName(const Word& RHS, con if (!name.empty()) { return name; } - } - catch (const std::out_of_range& oor) { + } catch (const std::out_of_range& oor) { #ifdef WITH_THREADS //need to resize cache; write lock boost::unique_lock lock(m_accessLock); #endif ResizeCache(); } #ifdef WITH_THREADS //need to update cache; write lock - boost::unique_lock lock(m_accessLock); + boost::unique_lock lock(m_accessLock); #endif - std::string &name = m_nameCache[RHS[0]->GetId()][LHS[0]->GetId()]; - const std::vector &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); - std::string LHS_string = LHS.GetString(outputFactorOrder, false); - std::string RHS_string = RHS.GetString(outputFactorOrder, false); - name = LHS_string + "->" + RHS_string; - return name; - } + std::string &name = m_nameCache[RHS[0]->GetId()][LHS[0]->GetId()]; + const std::vector &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); + std::string LHS_string = LHS.GetString(outputFactorOrder, false); + std::string RHS_string = RHS.GetString(outputFactorOrder, false); + name = LHS_string + "->" + RHS_string; + return name; +} } diff --git a/moses/FF/TreeStructureFeature.cpp b/moses/FF/TreeStructureFeature.cpp index aa879fe0e..31f884108 100644 --- a/moses/FF/TreeStructureFeature.cpp +++ b/moses/FF/TreeStructureFeature.cpp @@ -13,230 +13,237 @@ namespace Moses { InternalTree::InternalTree(const std::string & line, const bool terminal): - m_value_nt(0), - m_isTerminal(terminal) - { + m_value_nt(0), + m_isTerminal(terminal) +{ - size_t found = line.find_first_of("[] "); + size_t found = line.find_first_of("[] "); - if (found == line.npos) { - m_value = line; - } + if (found == line.npos) { + m_value = line; + } - else { - AddSubTree(line, 0); - } + else { + AddSubTree(line, 0); + } } -size_t InternalTree::AddSubTree(const std::string & line, size_t pos) { +size_t InternalTree::AddSubTree(const std::string & line, size_t pos) +{ - std::string value = ""; - char token = 0; + std::string value = ""; + char token = 0; - while (token != ']' && pos != std::string::npos) - { - size_t oldpos = pos; - pos = line.find_first_of("[] ", pos); - if (pos == std::string::npos) break; - token = line[pos]; - value = line.substr(oldpos,pos-oldpos); + while (token != ']' && pos != std::string::npos) { + size_t oldpos = pos; + pos = line.find_first_of("[] ", pos); + if (pos == std::string::npos) break; + token = line[pos]; + value = line.substr(oldpos,pos-oldpos); - if (token == '[') { - if (m_value.size() > 0) { - TreePointer child(new InternalTree(value, false)); - m_children.push_back(child); - pos = child->AddSubTree(line, pos+1); - } - else { - if (value.size() > 0) { - m_value = value; - } - pos = AddSubTree(line, pos+1); - } - } - else if (token == ' ' || token == ']') { - if (value.size() > 0 && ! m_value.size() > 0) { - m_value = value; - } - else if (value.size() > 0) { - m_isTerminal = false; - TreePointer child(new InternalTree(value, true)); - m_children.push_back(child); - } - if (token == ' ') { - pos++; - } - } - - if (m_children.size() > 0) { - m_isTerminal = false; + if (token == '[') { + if (m_value.size() > 0) { + TreePointer child(new InternalTree(value, false)); + m_children.push_back(child); + pos = child->AddSubTree(line, pos+1); + } else { + if (value.size() > 0) { + m_value = value; } + pos = AddSubTree(line, pos+1); + } + } else if (token == ' ' || token == ']') { + if (value.size() > 0 && ! m_value.size() > 0) { + m_value = value; + } else if (value.size() > 0) { + m_isTerminal = false; + TreePointer child(new InternalTree(value, true)); + m_children.push_back(child); + } + if (token == ' ') { + pos++; + } } - if (pos == std::string::npos) { - return line.size(); + if (m_children.size() > 0) { + m_isTerminal = false; } - return min(line.size(),pos+1); + } + + if (pos == std::string::npos) { + return line.size(); + } + return min(line.size(),pos+1); } -std::string InternalTree::GetString() const { +std::string InternalTree::GetString() const +{ - std::string ret = " "; + std::string ret = " "; - if (!m_isTerminal) { - ret += "["; - } + if (!m_isTerminal) { + ret += "["; + } - ret += m_value; - for (std::vector::const_iterator it = m_children.begin(); it != m_children.end(); ++it) - { - ret += (*it)->GetString(); - } + ret += m_value; + for (std::vector::const_iterator it = m_children.begin(); it != m_children.end(); ++it) { + ret += (*it)->GetString(); + } - if (!m_isTerminal) { - ret += "]"; - } - return ret; + if (!m_isTerminal) { + ret += "]"; + } + return ret; } -void InternalTree::Combine(const std::vector &previous) { +void InternalTree::Combine(const std::vector &previous) +{ - std::vector::iterator it; - bool found = false; - leafNT next_leafNT(this); - for (std::vector::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) { - found = next_leafNT(it); - if (found) { - *it = *it_prev; - } - else { - std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n"; - } + std::vector::iterator it; + bool found = false; + leafNT next_leafNT(this); + for (std::vector::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) { + found = next_leafNT(it); + if (found) { + *it = *it_prev; + } else { + std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n"; } + } } -bool InternalTree::FlatSearch(const std::string & label, std::vector::const_iterator & it) const { - for (it = m_children.begin(); it != m_children.end(); ++it) { - if ((*it)->GetLabel() == label) { - return true; - } +bool InternalTree::FlatSearch(const std::string & label, std::vector::const_iterator & it) const +{ + for (it = m_children.begin(); it != m_children.end(); ++it) { + if ((*it)->GetLabel() == label) { + return true; } - return false; + } + return false; } -bool InternalTree::RecursiveSearch(const std::string & label, std::vector::const_iterator & it) const { - for (it = m_children.begin(); it != m_children.end(); ++it) { - if ((*it)->GetLabel() == label) { - return true; - } - std::vector::const_iterator it2; - if ((*it)->RecursiveSearch(label, it2)) { - it = it2; - return true; - } +bool InternalTree::RecursiveSearch(const std::string & label, std::vector::const_iterator & it) const +{ + for (it = m_children.begin(); it != m_children.end(); ++it) { + if ((*it)->GetLabel() == label) { + return true; } - return false; + std::vector::const_iterator it2; + if ((*it)->RecursiveSearch(label, it2)) { + it = it2; + return true; + } + } + return false; } -bool InternalTree::RecursiveSearch(const std::string & label, std::vector::const_iterator & it, InternalTree const* &parent) const { - for (it = m_children.begin(); it != m_children.end(); ++it) { - if ((*it)->GetLabel() == label) { - parent = this; - return true; - } - std::vector::const_iterator it2; - if ((*it)->RecursiveSearch(label, it2, parent)) { - it = it2; - return true; - } +bool InternalTree::RecursiveSearch(const std::string & label, std::vector::const_iterator & it, InternalTree const* &parent) const +{ + for (it = m_children.begin(); it != m_children.end(); ++it) { + if ((*it)->GetLabel() == label) { + parent = this; + return true; } - return false; + std::vector::const_iterator it2; + if ((*it)->RecursiveSearch(label, it2, parent)) { + it = it2; + return true; + } + } + return false; } -bool InternalTree::FlatSearch(const NTLabel & label, std::vector::const_iterator & it) const { - for (it = m_children.begin(); it != m_children.end(); ++it) { - if ((*it)->GetNTLabel() == label) { - return true; - } +bool InternalTree::FlatSearch(const NTLabel & label, std::vector::const_iterator & it) const +{ + for (it = m_children.begin(); it != m_children.end(); ++it) { + if ((*it)->GetNTLabel() == label) { + return true; } - return false; + } + return false; } -bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector::const_iterator & it) const { - for (it = m_children.begin(); it != m_children.end(); ++it) { - if ((*it)->GetNTLabel() == label) { - return true; - } - std::vector::const_iterator it2; - if ((*it)->RecursiveSearch(label, it2)) { - it = it2; - return true; - } +bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector::const_iterator & it) const +{ + for (it = m_children.begin(); it != m_children.end(); ++it) { + if ((*it)->GetNTLabel() == label) { + return true; } - return false; + std::vector::const_iterator it2; + if ((*it)->RecursiveSearch(label, it2)) { + it = it2; + return true; + } + } + return false; } -bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector::const_iterator & it, InternalTree const* &parent) const { - for (it = m_children.begin(); it != m_children.end(); ++it) { - if ((*it)->GetNTLabel() == label) { - parent = this; - return true; - } - std::vector::const_iterator it2; - if ((*it)->RecursiveSearch(label, it2, parent)) { - it = it2; - return true; - } +bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector::const_iterator & it, InternalTree const* &parent) const +{ + for (it = m_children.begin(); it != m_children.end(); ++it) { + if ((*it)->GetNTLabel() == label) { + parent = this; + return true; } - return false; + std::vector::const_iterator it2; + if ((*it)->RecursiveSearch(label, it2, parent)) { + it = it2; + return true; + } + } + return false; } -bool InternalTree::FlatSearch(const std::vector & labels, std::vector::const_iterator & it) const { - for (it = m_children.begin(); it != m_children.end(); ++it) { - if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) { - return true; - } +bool InternalTree::FlatSearch(const std::vector & labels, std::vector::const_iterator & it) const +{ + for (it = m_children.begin(); it != m_children.end(); ++it) { + if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) { + return true; } - return false; + } + return false; } -bool InternalTree::RecursiveSearch(const std::vector & labels, std::vector::const_iterator & it) const { - for (it = m_children.begin(); it != m_children.end(); ++it) { - if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) { - return true; - } - std::vector::const_iterator it2; - if ((*it)->RecursiveSearch(labels, it2)) { - it = it2; - return true; - } +bool InternalTree::RecursiveSearch(const std::vector & labels, std::vector::const_iterator & it) const +{ + for (it = m_children.begin(); it != m_children.end(); ++it) { + if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) { + return true; } - return false; + std::vector::const_iterator it2; + if ((*it)->RecursiveSearch(labels, it2)) { + it = it2; + return true; + } + } + return false; } -bool InternalTree::RecursiveSearch(const std::vector & labels, std::vector::const_iterator & it, InternalTree const* &parent) const { - for (it = m_children.begin(); it != m_children.end(); ++it) { - if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) { - parent = this; - return true; - } - std::vector::const_iterator it2; - if ((*it)->RecursiveSearch(labels, it2, parent)) { - it = it2; - return true; - } +bool InternalTree::RecursiveSearch(const std::vector & labels, std::vector::const_iterator & it, InternalTree const* &parent) const +{ + for (it = m_children.begin(); it != m_children.end(); ++it) { + if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) { + parent = this; + return true; } - return false; + std::vector::const_iterator it2; + if ((*it)->RecursiveSearch(labels, it2, parent)) { + it = it2; + return true; + } + } + return false; } -void TreeStructureFeature::Load() { +void TreeStructureFeature::Load() +{ // syntactic constraints can be hooked in here. m_constraints = NULL; @@ -248,27 +255,28 @@ void TreeStructureFeature::Load() { // define NT labels (ints) that are mapped from strings for quicker comparison. -void TreeStructureFeature::AddNTLabels(TreePointer root) const { - std::string label = root->GetLabel(); +void TreeStructureFeature::AddNTLabels(TreePointer root) const +{ + std::string label = root->GetLabel(); - if (root->IsTerminal()) { - return; - } + if (root->IsTerminal()) { + return; + } - std::map::const_iterator it = m_labelset->string_to_label.find(label); - if (it != m_labelset->string_to_label.end()) { - root->SetNTLabel(it->second); - } + std::map::const_iterator it = m_labelset->string_to_label.find(label); + if (it != m_labelset->string_to_label.end()) { + root->SetNTLabel(it->second); + } - std::vector children = root->GetChildren(); - for (std::vector::const_iterator it2 = children.begin(); it2 != children.end(); ++it2) { - AddNTLabels(*it2); - } + std::vector children = root->GetChildren(); + for (std::vector::const_iterator it2 = children.begin(); it2 != children.end(); ++it2) { + AddNTLabels(*it2); + } } FFState* TreeStructureFeature::EvaluateChart(const ChartHypothesis& cur_hypo - , int featureID /* used to index the state in the previous hypotheses */ - , ScoreComponentCollection* accumulator) const + , int featureID /* used to index the state in the previous hypotheses */ + , ScoreComponentCollection* accumulator) const { std::string tree; bool found = 0; @@ -277,7 +285,7 @@ FFState* TreeStructureFeature::EvaluateChart(const ChartHypothesis& cur_hypo TreePointer mytree (new InternalTree(tree)); if (m_labelset) { - AddNTLabels(mytree); + AddNTLabels(mytree); } //get subtrees (in target order) @@ -304,8 +312,7 @@ FFState* TreeStructureFeature::EvaluateChart(const ChartHypothesis& cur_hypo accumulator->PlusEquals(this, *feature, 1); } return new TreeState(mytree); - } - else { + } else { UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found"); } diff --git a/moses/FF/TreeStructureFeature.h b/moses/FF/TreeStructureFeature.h index 1a5b8b5e3..f4cdf00d2 100644 --- a/moses/FF/TreeStructureFeature.h +++ b/moses/FF/TreeStructureFeature.h @@ -17,92 +17,91 @@ typedef int NTLabel; class InternalTree { -std::string m_value; -NTLabel m_value_nt; -std::vector m_children; -bool m_isTerminal; + std::string m_value; + NTLabel m_value_nt; + std::vector m_children; + bool m_isTerminal; public: - InternalTree(const std::string & line, const bool terminal = false); - InternalTree(const InternalTree & tree): - m_value(tree.m_value), - m_isTerminal(tree.m_isTerminal) { - const std::vector & children = tree.m_children; - for (std::vector::const_iterator it = children.begin(); it != children.end(); it++) { - TreePointer child (new InternalTree(**it)); - m_children.push_back(child); - } - } - size_t AddSubTree(const std::string & line, size_t start); - - std::string GetString() const; - void Combine(const std::vector &previous); - const std::string & GetLabel() const { - return m_value; + InternalTree(const std::string & line, const bool terminal = false); + InternalTree(const InternalTree & tree): + m_value(tree.m_value), + m_isTerminal(tree.m_isTerminal) { + const std::vector & children = tree.m_children; + for (std::vector::const_iterator it = children.begin(); it != children.end(); it++) { + TreePointer child (new InternalTree(**it)); + m_children.push_back(child); } + } + size_t AddSubTree(const std::string & line, size_t start); - // optionally identify label by int instead of string; - // allows abstraction if multiple nonterminal strings should map to same label. - const NTLabel & GetNTLabel() const { - return m_value_nt; - } + std::string GetString() const; + void Combine(const std::vector &previous); + const std::string & GetLabel() const { + return m_value; + } - void SetNTLabel(NTLabel value) { - m_value_nt = value; - } + // optionally identify label by int instead of string; + // allows abstraction if multiple nonterminal strings should map to same label. + const NTLabel & GetNTLabel() const { + return m_value_nt; + } - size_t GetLength() const { - return m_children.size(); - } - std::vector & GetChildren() { - return m_children; - } - void AddChild(TreePointer child) { - m_children.push_back(child); - } + void SetNTLabel(NTLabel value) { + m_value_nt = value; + } - bool IsTerminal() const { - return m_isTerminal; - } + size_t GetLength() const { + return m_children.size(); + } + std::vector & GetChildren() { + return m_children; + } + void AddChild(TreePointer child) { + m_children.push_back(child); + } - bool IsLeafNT() const { - return (!m_isTerminal && m_children.size() == 0); - } + bool IsTerminal() const { + return m_isTerminal; + } - // different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents. - // can be used for formulating syntax constraints. + bool IsLeafNT() const { + return (!m_isTerminal && m_children.size() == 0); + } - // if found, 'it' is iterator to first tree node that matches search string - bool FlatSearch(const std::string & label, std::vector::const_iterator & it) const; - bool RecursiveSearch(const std::string & label, std::vector::const_iterator & it) const; + // different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents. + // can be used for formulating syntax constraints. - // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node - bool RecursiveSearch(const std::string & label, std::vector::const_iterator & it, InternalTree const* &parent) const; + // if found, 'it' is iterator to first tree node that matches search string + bool FlatSearch(const std::string & label, std::vector::const_iterator & it) const; + bool RecursiveSearch(const std::string & label, std::vector::const_iterator & it) const; - // use NTLabel for search to reduce number of string comparisons / deal with synonymous labels - // if found, 'it' is iterator to first tree node that matches search string - bool FlatSearch(const NTLabel & label, std::vector::const_iterator & it) const; - bool RecursiveSearch(const NTLabel & label, std::vector::const_iterator & it) const; + // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node + bool RecursiveSearch(const std::string & label, std::vector::const_iterator & it, InternalTree const* &parent) const; - // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node - bool RecursiveSearch(const NTLabel & label, std::vector::const_iterator & it, InternalTree const* &parent) const; + // use NTLabel for search to reduce number of string comparisons / deal with synonymous labels + // if found, 'it' is iterator to first tree node that matches search string + bool FlatSearch(const NTLabel & label, std::vector::const_iterator & it) const; + bool RecursiveSearch(const NTLabel & label, std::vector::const_iterator & it) const; - // pass vector of possible labels to search - // if found, 'it' is iterator to first tree node that matches search string - bool FlatSearch(const std::vector & labels, std::vector::const_iterator & it) const; - bool RecursiveSearch(const std::vector & labels, std::vector::const_iterator & it) const; + // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node + bool RecursiveSearch(const NTLabel & label, std::vector::const_iterator & it, InternalTree const* &parent) const; - // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node - bool RecursiveSearch(const std::vector & labels, std::vector::const_iterator & it, InternalTree const* &parent) const; + // pass vector of possible labels to search + // if found, 'it' is iterator to first tree node that matches search string + bool FlatSearch(const std::vector & labels, std::vector::const_iterator & it) const; + bool RecursiveSearch(const std::vector & labels, std::vector::const_iterator & it) const; + + // if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node + bool RecursiveSearch(const std::vector & labels, std::vector::const_iterator & it, InternalTree const* &parent) const; }; // mapping from string nonterminal label to int representation. // allows abstraction if multiple nonterminal strings should map to same label. -struct LabelSet -{ +struct LabelSet { public: - std::map string_to_label; + std::map string_to_label; }; @@ -111,8 +110,8 @@ public: class SyntaxConstraints { public: - virtual std::vector SyntacticRules(TreePointer root, const std::vector &previous) = 0; - virtual ~SyntaxConstraints() {}; + virtual std::vector SyntacticRules(TreePointer root, const std::vector &previous) = 0; + virtual ~SyntaxConstraints() {}; }; @@ -125,10 +124,12 @@ public: {} TreePointer GetTree() const { - return m_tree; + return m_tree; } - int Compare(const FFState& other) const {return 0;}; + int Compare(const FFState& other) const { + return 0; + }; }; class TreeStructureFeature : public StatefulFeatureFunction @@ -138,9 +139,11 @@ class TreeStructureFeature : public StatefulFeatureFunction public: TreeStructureFeature(const std::string &line) :StatefulFeatureFunction(0, line) { - ReadParameters(); - } - ~TreeStructureFeature() {delete m_constraints;}; + ReadParameters(); + } + ~TreeStructureFeature() { + delete m_constraints; + }; virtual const FFState* EmptyHypothesisState(const InputType &input) const { return new TreeState(TreePointer()); @@ -164,7 +167,9 @@ public: FFState* Evaluate( const Hypothesis& cur_hypo, const FFState* prev_state, - ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");}; + ScoreComponentCollection* accumulator) const { + UTIL_THROW(util::Exception, "Not implemented"); + }; FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, @@ -174,42 +179,42 @@ public: }; // Python-like generator that yields next nonterminal leaf on every call -$generator(leafNT) { - std::vector::iterator it; - InternalTree* tree; - leafNT(InternalTree* root = 0): tree(root) {} - $emit(std::vector::iterator) - for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) { - if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) { - $yield(it); - } - else if ((*it)->GetLength() > 0) { - if (&(**it)) { // normal pointer to same object that TreePointer points to - $restart(tree = &(**it)); - } - } +$generator(leafNT) +{ + std::vector::iterator it; + InternalTree* tree; + leafNT(InternalTree* root = 0): tree(root) {} + $emit(std::vector::iterator) + for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) { + if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) { + $yield(it); + } else if ((*it)->GetLength() > 0) { + if (&(**it)) { // normal pointer to same object that TreePointer points to + $restart(tree = &(**it)); + } } - $stop; + } + $stop; }; // Python-like generator that yields the parent of the next nonterminal leaf on every call -$generator(leafNTParent) { - std::vector::iterator it; - InternalTree* tree; - leafNTParent(InternalTree* root = 0): tree(root) {} - $emit(InternalTree*) - for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) { - if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) { - $yield(tree); - } - else if ((*it)->GetLength() > 0) { - if (&(**it)) { // normal pointer to same object that TreePointer points to - $restart(tree = &(**it)); - } - } +$generator(leafNTParent) +{ + std::vector::iterator it; + InternalTree* tree; + leafNTParent(InternalTree* root = 0): tree(root) {} + $emit(InternalTree*) + for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) { + if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) { + $yield(tree); + } else if ((*it)->GetLength() > 0) { + if (&(**it)) { // normal pointer to same object that TreePointer points to + $restart(tree = &(**it)); + } } - $stop; + } + $stop; }; diff --git a/moses/FactorCollection.cpp b/moses/FactorCollection.cpp index 5013da417..93edeff51 100644 --- a/moses/FactorCollection.cpp +++ b/moses/FactorCollection.cpp @@ -59,8 +59,7 @@ const Factor *FactorCollection::AddFactor(const StringPiece &factorString, bool if (isNonTerminal) { m_factorIdNonTerminal++; UTIL_THROW_IF2(m_factorIdNonTerminal >= moses_MaxNumNonterminals, "Number of non-terminals exceeds maximum size reserved. Adjust parameter moses_MaxNumNonterminals, then recompile"); - } - else { + } else { m_factorId++; } } diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp index b13378f66..d72573df0 100644 --- a/moses/Incremental.cpp +++ b/moses/Incremental.cpp @@ -182,9 +182,9 @@ template void Fill::AddPhraseOOV(TargetPhrase &phrase, std: // for pruning template float Fill::GetBestScore(const ChartCellLabel *chartCell) const { - search::PartialVertex vertex = chartCell->GetStack().incr->RootAlternate(); - UTIL_THROW_IF2(vertex.Empty(), "hypothesis with empty stack"); - return vertex.Bound(); + search::PartialVertex vertex = chartCell->GetStack().incr->RootAlternate(); + UTIL_THROW_IF2(vertex.Empty(), "hypothesis with empty stack"); + return vertex.Bound(); } // TODO: factors (but chart doesn't seem to support factors anyway). diff --git a/moses/InputPath.cpp b/moses/InputPath.cpp index b7b85fa6f..37380ffda 100644 --- a/moses/InputPath.cpp +++ b/moses/InputPath.cpp @@ -33,14 +33,14 @@ InputPath(const Phrase &phrase, const NonTerminalSet &sourceNonTerms, InputPath::~InputPath() { - // Since there is no way for the Phrase Dictionaries to tell in - // which (sentence) context phrases were looked up, we tell them + // Since there is no way for the Phrase Dictionaries to tell in + // which (sentence) context phrases were looked up, we tell them // now that the phrase isn't needed any more by this inputPath typedef std::pair entry; std::map::const_iterator iter; for (iter = m_targetPhrases.begin(); iter != m_targetPhrases.end(); ++iter) iter->first->Release(iter->second.first); - + delete m_inputScore; } diff --git a/moses/LM/DALMWrapper.cpp b/moses/LM/DALMWrapper.cpp index b6c9764de..1c2910a21 100644 --- a/moses/LM/DALMWrapper.cpp +++ b/moses/LM/DALMWrapper.cpp @@ -59,22 +59,22 @@ public: delete state; } - void reset(const DALMState &from){ - delete state; - state = new DALM::State(*from.state); - } + void reset(const DALMState &from) { + delete state; + state = new DALM::State(*from.state); + } - void reset(DALM::State *s){ - delete state; - state = s; - } + void reset(DALM::State *s) { + delete state; + state = s; + } - virtual int Compare(const FFState& other) const{ - const DALMState &o = static_cast(other); - if(state->get_count() < o.state->get_count()) return -1; - else if(state->get_count() > o.state->get_count()) return 1; - else return state->compare(o.state); - } + virtual int Compare(const FFState& other) const { + const DALMState &o = static_cast(other); + if(state->get_count() < o.state->get_count()) return -1; + else if(state->get_count() > o.state->get_count()) return 1; + else return state->compare(o.state); + } DALM::State *get_state() const { return state; @@ -88,78 +88,78 @@ public: class DALMChartState : public FFState { private: - const ChartHypothesis &hypo; - DALM::Fragment *prefixFragments; - unsigned short prefixLength; - float prefixScore; - DALMState *rightContext; - bool isLarge; + const ChartHypothesis &hypo; + DALM::Fragment *prefixFragments; + unsigned short prefixLength; + float prefixScore; + DALMState *rightContext; + bool isLarge; public: - DALMChartState( - const ChartHypothesis &hypo, - DALM::Fragment *prefixFragments, - unsigned short prefixLength, - float prefixScore, - DALMState *rightContext, - bool isLarge) - : hypo(hypo), - prefixFragments(prefixFragments), - prefixLength(prefixLength), - prefixScore(prefixScore), - rightContext(rightContext), - isLarge(isLarge) - {} + DALMChartState( + const ChartHypothesis &hypo, + DALM::Fragment *prefixFragments, + unsigned short prefixLength, + float prefixScore, + DALMState *rightContext, + bool isLarge) + : hypo(hypo), + prefixFragments(prefixFragments), + prefixLength(prefixLength), + prefixScore(prefixScore), + rightContext(rightContext), + isLarge(isLarge) + {} - virtual ~DALMChartState(){ - delete [] prefixFragments; - delete rightContext; - } + virtual ~DALMChartState() { + delete [] prefixFragments; + delete rightContext; + } - unsigned short GetPrefixLength() const{ - return prefixLength; - } + unsigned short GetPrefixLength() const { + return prefixLength; + } - const DALM::Fragment *GetPrefixFragments() const{ - return prefixFragments; - } + const DALM::Fragment *GetPrefixFragments() const { + return prefixFragments; + } - float GetPrefixScore() const{ - return prefixScore; - } + float GetPrefixScore() const { + return prefixScore; + } - const DALMState *GetRightContext() const{ - return rightContext; - } + const DALMState *GetRightContext() const { + return rightContext; + } - bool LargeEnough() const{ - return isLarge; - } + bool LargeEnough() const { + return isLarge; + } - virtual int Compare(const FFState& other) const{ - const DALMChartState &o = static_cast(other); - // prefix + virtual int Compare(const FFState& other) const { + const DALMChartState &o = static_cast(other); + // prefix if (hypo.GetCurrSourceRange().GetStartPos() > 0) { // not for " ..." - if (prefixLength != o.prefixLength){ - return (prefixLength < o.prefixLength)?-1:1; - } else { - if(prefixLength > 0){ - DALM::Fragment &f = prefixFragments[prefixLength-1]; - DALM::Fragment &of = o.prefixFragments[prefixLength-1]; - int ret = DALM::compare_fragments(f, of); - if(ret != 0) return ret; - } - } + if (prefixLength != o.prefixLength) { + return (prefixLength < o.prefixLength)?-1:1; + } else { + if(prefixLength > 0) { + DALM::Fragment &f = prefixFragments[prefixLength-1]; + DALM::Fragment &of = o.prefixFragments[prefixLength-1]; + int ret = DALM::compare_fragments(f, of); + if(ret != 0) return ret; + } + } } // suffix - size_t inputSize = hypo.GetManager().GetSource().GetSize(); + size_t inputSize = hypo.GetManager().GetSource().GetSize(); if (hypo.GetCurrSourceRange().GetEndPos() < inputSize - 1) { // not for "... " - int ret = o.rightContext->Compare(*rightContext); + int ret = o.rightContext->Compare(*rightContext); if (ret != 0) return ret; } - return 0; - } + return 0; + } }; LanguageModelDALM::LanguageModelDALM(const std::string &line) @@ -181,18 +181,18 @@ LanguageModelDALM::~LanguageModelDALM() void LanguageModelDALM::Load() { - ///////////////////// - // READING INIFILE // - ///////////////////// - string inifile= m_filePath + "/dalm.ini"; + ///////////////////// + // READING INIFILE // + ///////////////////// + string inifile= m_filePath + "/dalm.ini"; UTIL_THROW_IF(model.empty() || words.empty() || wordstxt.empty(), util::FileOpenException, "Failed to read DALM ini file " << m_filePath << ". Probably doesn't exist"); - model = m_filePath + "/" + model; - words = m_filePath + "/" + words; - wordstxt = m_filePath + "/" + wordstxt; + model = m_filePath + "/" + model; + words = m_filePath + "/" + words; + wordstxt = m_filePath + "/" + wordstxt; // Preparing a logger object. m_logger = new DALM::Logger(stderr); @@ -233,14 +233,14 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float size_t currPos = 0; size_t hist_count = 0; DALMState *dalm_state = new DALMState(m_nGramOrder); - DALM::State *state = dalm_state->get_state(); + DALM::State *state = dalm_state->get_state(); + + if(phrase.GetWord(0).GetFactor(m_factorType) == m_beginSentenceFactor) { + m_lm->init_state(*state); + currPos++; + hist_count++; + } - if(phrase.GetWord(0).GetFactor(m_factorType) == m_beginSentenceFactor){ - m_lm->init_state(*state); - currPos++; - hist_count++; - } - while (currPos < phraseSize) { const Word &word = phrase.GetWord(currPos); hist_count++; @@ -249,9 +249,9 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float state->refresh(); hist_count = 0; } else { - DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType)); - float score = m_lm->query(wid, *state); - fullScore += score; + DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType)); + float score = m_lm->query(wid, *state); + fullScore += score; if (hist_count >= m_nGramOrder) ngramScore += score; if (wid==m_vocab->unk()) ++oovCount; } @@ -259,9 +259,9 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float currPos++; } - fullScore = TransformLMScore(fullScore); - ngramScore = TransformLMScore(ngramScore); - delete dalm_state; + fullScore = TransformLMScore(fullScore); + ngramScore = TransformLMScore(ngramScore); + delete dalm_state; } FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const @@ -283,11 +283,11 @@ FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps, const std::size_t adjust_end = std::min(end, begin + m_nGramOrder - 1); DALMState *dalm_state = new DALMState(*dalm_ps); - DALM::State *state = dalm_state->get_state(); - + DALM::State *state = dalm_state->get_state(); + float score = 0.0; - for(std::size_t position=begin; position < adjust_end; position++){ - score += m_lm->query(GetVocabId(hypo.GetWord(position).GetFactor(m_factorType)), *state); + for(std::size_t position=begin; position < adjust_end; position++) { + score += m_lm->query(GetVocabId(hypo.GetWord(position).GetFactor(m_factorType)), *state); } if (hypo.IsSourceCompleted()) { @@ -295,8 +295,8 @@ FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps, std::vector indices(m_nGramOrder-1); const DALM::VocabId *last = LastIDs(hypo, &indices.front()); m_lm->set_state(&indices.front(), (last-&indices.front()), *state); - - score += m_lm->query(wid_end, *state); + + score += m_lm->query(wid_end, *state); } else if (adjust_end < end) { // Get state after adding a long phrase. std::vector indices(m_nGramOrder-1); @@ -304,7 +304,7 @@ FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps, m_lm->set_state(&indices.front(), (last-&indices.front()), *state); } - score = TransformLMScore(score); + score = TransformLMScore(score); if (OOVFeatureEnabled()) { std::vector scores(2); scores[0] = score; @@ -317,73 +317,74 @@ FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps, return dalm_state; } -FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const{ +FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const +{ // initialize language model context state - DALMState *dalm_state = new DALMState(m_nGramOrder); - DALM::State *state = dalm_state->get_state(); + DALMState *dalm_state = new DALMState(m_nGramOrder); + DALM::State *state = dalm_state->get_state(); - size_t contextSize = m_nGramOrder-1; - DALM::Fragment *prefixFragments = new DALM::Fragment[contextSize]; - unsigned short prefixLength = 0; - bool isLarge = false; + size_t contextSize = m_nGramOrder-1; + DALM::Fragment *prefixFragments = new DALM::Fragment[contextSize]; + unsigned short prefixLength = 0; + bool isLarge = false; // initial language model scores float prefixScore = 0.0; // not yet final for initial words (lack context) float hypoScore = 0.0; // total hypothesis score. - const TargetPhrase &targetPhrase = hypo.GetCurrTargetPhrase(); - size_t hypoSize = targetPhrase.GetSize(); + const TargetPhrase &targetPhrase = hypo.GetCurrTargetPhrase(); + size_t hypoSize = targetPhrase.GetSize(); // get index map for underlying hypotheses const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = targetPhrase.GetAlignNonTerm().GetNonTermIndexMap(); - size_t phrasePos = 0; - - // begginig of sentence. - if(hypoSize > 0){ - const Word &word = targetPhrase.GetWord(0); - if(!word.IsNonTerminal()){ - DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType)); - if(word.GetFactor(m_factorType) == m_beginSentenceFactor){ - m_lm->init_state(*state); - // state is finalized. - isLarge = true; - }else{ - if(isLarge){ - float score = m_lm->query(wid, *state); - hypoScore += score; - }else{ - float score = m_lm->query(wid, *state, prefixFragments[prefixLength]); + size_t phrasePos = 0; - prefixScore += score; - hypoScore += score; - prefixLength++; - if(prefixLength >= contextSize) isLarge = true; - } - } - }else{ + // begginig of sentence. + if(hypoSize > 0) { + const Word &word = targetPhrase.GetWord(0); + if(!word.IsNonTerminal()) { + DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType)); + if(word.GetFactor(m_factorType) == m_beginSentenceFactor) { + m_lm->init_state(*state); + // state is finalized. + isLarge = true; + } else { + if(isLarge) { + float score = m_lm->query(wid, *state); + hypoScore += score; + } else { + float score = m_lm->query(wid, *state, prefixFragments[prefixLength]); + + prefixScore += score; + hypoScore += score; + prefixLength++; + if(prefixLength >= contextSize) isLarge = true; + } + } + } else { // special case: rule starts with non-terminal -> copy everything size_t nonTermIndex = nonTermIndexMap[0]; const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex); const DALMChartState* prevState = static_cast(prevHypo->GetFFState(featureID)); - + // get prefixScore and hypoScore prefixScore = prevState->GetPrefixScore(); - hypoScore = UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]); + hypoScore = UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]); // get language model state - dalm_state->reset(*prevState->GetRightContext()); - state = dalm_state->get_state(); + dalm_state->reset(*prevState->GetRightContext()); + state = dalm_state->get_state(); - prefixLength = prevState->GetPrefixLength(); - const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments(); - std::memcpy(prefixFragments, prevPrefixFragments, sizeof(DALM::Fragment)*prefixLength); - isLarge = prevState->LargeEnough(); - } - phrasePos++; + prefixLength = prevState->GetPrefixLength(); + const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments(); + std::memcpy(prefixFragments, prevPrefixFragments, sizeof(DALM::Fragment)*prefixLength); + isLarge = prevState->LargeEnough(); + } + phrasePos++; } // loop over rule @@ -393,16 +394,16 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu // regular word if (!word.IsNonTerminal()) { - DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType)); - if (isLarge) { - hypoScore += m_lm->query(wid, *state); - }else{ - float score = m_lm->query(wid, *state, prefixFragments[prefixLength]); - prefixScore += score; - hypoScore += score; - prefixLength++; - if(prefixLength >= contextSize) isLarge = true; - } + DALM::VocabId wid = GetVocabId(word.GetFactor(m_factorType)); + if (isLarge) { + hypoScore += m_lm->query(wid, *state); + } else { + float score = m_lm->query(wid, *state, prefixFragments[prefixLength]); + prefixScore += score; + hypoScore += score; + prefixLength++; + if(prefixLength >= contextSize) isLarge = true; + } } // non-terminal, add phrase from underlying hypothesis @@ -414,40 +415,40 @@ FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featu const DALMChartState* prevState = static_cast(prevHypo->GetFFState(featureID)); - + size_t prevPrefixLength = prevState->GetPrefixLength(); - const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments(); - DALM::Gap gap(*state); + const DALM::Fragment *prevPrefixFragments = prevState->GetPrefixFragments(); + DALM::Gap gap(*state); // score its prefix for(size_t prefixPos = 0; prefixPos < prevPrefixLength; prefixPos++) { - const DALM::Fragment &f = prevPrefixFragments[prefixPos]; + const DALM::Fragment &f = prevPrefixFragments[prefixPos]; - if (isLarge) { - hypoScore += m_lm->query(f, *state, gap); - } else { - float score = m_lm->query(f, *state, gap, prefixFragments[prefixLength]); - prefixScore += score; - hypoScore += score; - prefixLength++; - if(prefixLength >= contextSize) isLarge = true; - } - gap.succ(); + if (isLarge) { + hypoScore += m_lm->query(f, *state, gap); + } else { + float score = m_lm->query(f, *state, gap, prefixFragments[prefixLength]); + prefixScore += score; + hypoScore += score; + prefixLength++; + if(prefixLength >= contextSize) isLarge = true; + } + gap.succ(); } // check if we are dealing with a large sub-phrase if (prevState->LargeEnough()) { // add its language model score - hypoScore += UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]); + hypoScore += UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]); hypoScore -= prevState->GetPrefixScore(); // remove overwrapped score. - // copy language model state - dalm_state->reset(*prevState->GetRightContext()); - state = dalm_state->get_state(); + // copy language model state + dalm_state->reset(*prevState->GetRightContext()); + state = dalm_state->get_state(); } else { - DALM::State *state_new = new DALM::State(*prevState->GetRightContext()->get_state()); - m_lm->set_state(*state_new, *state, gap); - dalm_state->reset(state_new); - state = dalm_state->get_state(); - } + DALM::State *state_new = new DALM::State(*prevState->GetRightContext()->get_state()); + m_lm->set_state(*state_new, *state, gap); + dalm_state->reset(state_new); + state = dalm_state->get_state(); + } } } @@ -466,36 +467,36 @@ void LanguageModelDALM::CreateVocabMapping(const std::string &wordstxt) { InputFileStream vocabStrm(wordstxt); - std::vector< std::pair > vlist; + std::vector< std::pair > vlist; string line; - std::size_t max_fid = 0; + std::size_t max_fid = 0; while(getline(vocabStrm, line)) { - const Factor *factor = FactorCollection::Instance().AddFactor(line); - std::size_t fid = factor->GetId(); - DALM::VocabId wid = m_vocab->lookup(line.c_str()); + const Factor *factor = FactorCollection::Instance().AddFactor(line); + std::size_t fid = factor->GetId(); + DALM::VocabId wid = m_vocab->lookup(line.c_str()); - vlist.push_back(std::pair(fid, wid)); - if(max_fid < fid) max_fid = fid; + vlist.push_back(std::pair(fid, wid)); + if(max_fid < fid) max_fid = fid; } - for(std::size_t i = 0; i < m_vocabMap.size(); i++){ - m_vocabMap[i] = m_vocab->unk(); - } + for(std::size_t i = 0; i < m_vocabMap.size(); i++) { + m_vocabMap[i] = m_vocab->unk(); + } - m_vocabMap.resize(max_fid+1, m_vocab->unk()); - std::vector< std::pair >::iterator it = vlist.begin(); - while(it != vlist.end()){ - std::pair &entry = *it; - m_vocabMap[entry.first] = entry.second; + m_vocabMap.resize(max_fid+1, m_vocab->unk()); + std::vector< std::pair >::iterator it = vlist.begin(); + while(it != vlist.end()) { + std::pair &entry = *it; + m_vocabMap[entry.first] = entry.second; - ++it; - } + ++it; + } } DALM::VocabId LanguageModelDALM::GetVocabId(const Factor *factor) const { - std::size_t fid = factor->GetId(); - return (m_vocabMap.size() > fid)? m_vocabMap[fid] : m_vocab->unk(); + std::size_t fid = factor->GetId(); + return (m_vocabMap.size() > fid)? m_vocabMap[fid] : m_vocab->unk(); } void LanguageModelDALM::SetParameter(const std::string& key, const std::string& value) diff --git a/moses/Manager.cpp b/moses/Manager.cpp index 38154e6f9..3d484e4f2 100644 --- a/moses/Manager.cpp +++ b/moses/Manager.cpp @@ -182,11 +182,11 @@ void Manager::printDivergentHypothesis(long translationId, const Hypothesis* hyp } -void +void Manager:: -printThisHypothesis(long translationId, const Hypothesis* hypo, - const vector & remainingPhrases, - float remainingScore, ostream& outputStream) const +printThisHypothesis(long translationId, const Hypothesis* hypo, + const vector & remainingPhrases, + float remainingScore, ostream& outputStream) const { outputStream << translationId << " ||| "; diff --git a/moses/PDTAimp.h b/moses/PDTAimp.h index f9bf0adf8..33a30e3ef 100644 --- a/moses/PDTAimp.h +++ b/moses/PDTAimp.h @@ -140,23 +140,23 @@ public: std::pair piter; if(useCache) { piter=m_cache.insert(std::make_pair(src,static_cast(0))); - if(!piter.second){ - if (piter.first->second){ - VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: piter.first->second->GetSize():" << (piter.first->second)->GetSize() << std::endl); - }else{ - VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: piter.first->second->GetSize():" << 0 << std::endl); - } + if(!piter.second) { + if (piter.first->second) { + VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: piter.first->second->GetSize():" << (piter.first->second)->GetSize() << std::endl); + } else { + VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: piter.first->second->GetSize():" << 0 << std::endl); + } return piter.first->second; } } else if (m_cache.size()) { MapSrc2Tgt::const_iterator i=m_cache.find(src); - if (i!=m_cache.end()){ - if (i->second){ - VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: i->second->GetSize():" << (void*) (i->second) << std::endl); - }else{ + if (i!=m_cache.end()) { + if (i->second) { + VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: i->second->GetSize():" << (void*) (i->second) << std::endl); + } else { VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: i->second->GetSize():" << 0 << std::endl); } - }else{ + } else { VERBOSE(1,"PDTAimp::GetTargetPhraseCollection: i->second->GetSize():" << 0 << std::endl); } return (i!=m_cache.end() ? i->second : 0); diff --git a/moses/Parameter.h b/moses/Parameter.h index 97f54ee40..a087b40c6 100644 --- a/moses/Parameter.h +++ b/moses/Parameter.h @@ -117,7 +117,7 @@ public: std::vector GetWeights(const std::string &name); std::map > GetAllWeights() const { - return m_weights; + return m_weights; } std::set GetWeightNames() const; diff --git a/moses/Phrase.cpp b/moses/Phrase.cpp index 05531a2bc..cc4c134de 100644 --- a/moses/Phrase.cpp +++ b/moses/Phrase.cpp @@ -381,7 +381,7 @@ void Phrase::InitStartEndWord() size_t Phrase::Find(const Phrase &sought, int maxUnknown) const { if (GetSize() < sought.GetSize()) { - // sought phrase too big + // sought phrase too big return NOT_FOUND; } diff --git a/moses/PrefixTreeMap.cpp b/moses/PrefixTreeMap.cpp index 6e375c76a..1719ebdba 100644 --- a/moses/PrefixTreeMap.cpp +++ b/moses/PrefixTreeMap.cpp @@ -65,7 +65,8 @@ void Candidates::readBin(FILE* f) const LabelId PrefixTreeMap::MagicWord = std::numeric_limits::max() - 1; ////////////////////////////////////////////////////////////////// -PrefixTreeMap::~PrefixTreeMap() { +PrefixTreeMap::~PrefixTreeMap() +{ if(m_FileSrc) { fClose(m_FileSrc); } @@ -99,8 +100,7 @@ WordVoc &ReadVoc(std::map &vocs, const std::string& filenam WordVoc &voc = vocs[filename]; voc.Read(filename); return voc; - } - else { + } else { return vi->second; } } diff --git a/moses/SearchCubePruning.cpp b/moses/SearchCubePruning.cpp index 49ca22645..eeb11d164 100644 --- a/moses/SearchCubePruning.cpp +++ b/moses/SearchCubePruning.cpp @@ -251,8 +251,8 @@ bool SearchCubePruning::CheckDistortion(const WordsBitmap &hypoBitmap, const Wor } if (StaticData::Instance().AdjacentOnly() && - !hypoBitmap.IsAdjacent(range.GetStartPos(), range.GetEndPos())) { - return false; + !hypoBitmap.IsAdjacent(range.GetStartPos(), range.GetEndPos())) { + return false; } bool leftMostEdge = (hypoFirstGapPos == startPos); diff --git a/moses/SearchNormal.cpp b/moses/SearchNormal.cpp index 8ac0eca13..0df179e13 100644 --- a/moses/SearchNormal.cpp +++ b/moses/SearchNormal.cpp @@ -254,8 +254,8 @@ void SearchNormal::ExpandAllHypotheses(const Hypothesis &hypothesis, size_t star } if (StaticData::Instance().AdjacentOnly() && - !hypothesis.GetWordsBitmap().IsAdjacent(startPos, endPos)) { - return; + !hypothesis.GetWordsBitmap().IsAdjacent(startPos, endPos)) { + return; } // loop through all translation options diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp index ae9e3781e..271dec11c 100644 --- a/moses/Sentence.cpp +++ b/moses/Sentence.cpp @@ -130,30 +130,30 @@ int Sentence::Read(std::istream& in,const std::vector& factorOrder) std::vector< std::map > dlt_meta = ProcessAndStripDLT(line); - PhraseDictionaryDynamicCacheBased* cbtm = NULL; - DynamicCacheBasedLanguageModel* cblm = NULL; + PhraseDictionaryDynamicCacheBased* cbtm = NULL; + DynamicCacheBasedLanguageModel* cblm = NULL; std::vector< std::map >::iterator dlt_meta_it = dlt_meta.begin(); for (dlt_meta_it = dlt_meta.begin(); dlt_meta_it != dlt_meta.end(); ++dlt_meta_it) { - - if ((*dlt_meta_it).find("type") != (*dlt_meta_it).end()) { - if ((*dlt_meta_it)["type"] == "cbtm") { - std::string id = "default"; - if ((*dlt_meta_it).find("id") != (*dlt_meta_it).end()) { - id = (*dlt_meta_it)["id"]; - } - cbtm = &PhraseDictionaryDynamicCacheBased::InstanceNonConst(id); - if (cbtm) cbtm->ExecuteDlt(*dlt_meta_it); - } - if ((*dlt_meta_it)["type"] == "cblm") { - std::string id = "default"; - if ((*dlt_meta_it).find("id") != (*dlt_meta_it).end()) { - id = (*dlt_meta_it)["id"]; - } - cblm = &DynamicCacheBasedLanguageModel::InstanceNonConst(id); - if (cblm) cblm->ExecuteDlt(*dlt_meta_it); - } + + if ((*dlt_meta_it).find("type") != (*dlt_meta_it).end()) { + if ((*dlt_meta_it)["type"] == "cbtm") { + std::string id = "default"; + if ((*dlt_meta_it).find("id") != (*dlt_meta_it).end()) { + id = (*dlt_meta_it)["id"]; + } + cbtm = &PhraseDictionaryDynamicCacheBased::InstanceNonConst(id); + if (cbtm) cbtm->ExecuteDlt(*dlt_meta_it); + } + if ((*dlt_meta_it)["type"] == "cblm") { + std::string id = "default"; + if ((*dlt_meta_it).find("id") != (*dlt_meta_it).end()) { + id = (*dlt_meta_it)["id"]; + } + cblm = &DynamicCacheBasedLanguageModel::InstanceNonConst(id); + if (cblm) cblm->ExecuteDlt(*dlt_meta_it); + } } - } + } // parse XML markup in translation line std::vector< size_t > xmlWalls; diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp index 2f0940739..a20e284fe 100644 --- a/moses/StaticData.cpp +++ b/moses/StaticData.cpp @@ -537,21 +537,21 @@ bool StaticData::LoadData(Parameter *parameter) NoCache(); OverrideFeatures(); -std::cerr <<"After StaticData::LoadDataStatic" << std::endl; + std::cerr <<"After StaticData::LoadDataStatic" << std::endl; -/* -std::cerr <<"Before ShowWeights" << std::endl; - // setting "-show-weights" -> just dump out weights and exit - if (m_parameter->isParamSpecified("show-weights")) { - MosesCmd::ShowWeights(); - exit(0); - } -std::cerr <<"After ShowWeights" << std::endl; -*/ + /* + std::cerr <<"Before ShowWeights" << std::endl; + // setting "-show-weights" -> just dump out weights and exit + if (m_parameter->isParamSpecified("show-weights")) { + MosesCmd::ShowWeights(); + exit(0); + } + std::cerr <<"After ShowWeights" << std::endl; + */ -std::cerr <<"Before LoadFeatureFunctions" << std::endl; + std::cerr <<"Before LoadFeatureFunctions" << std::endl; LoadFeatureFunctions(); -std::cerr <<"After LoadFeatureFunctions" << std::endl; + std::cerr <<"After LoadFeatureFunctions" << std::endl; if (!LoadDecodeGraphs()) return false; @@ -982,8 +982,7 @@ bool StaticData::CheckWeights() const cerr << fname << "\n"; if (featureNames.find(fname) != featureNames.end()) { weightNames.erase(iter++); - } - else { + } else { ++iter; } } @@ -1002,7 +1001,8 @@ bool StaticData::CheckWeights() const } -void StaticData::LoadSparseWeightsFromConfig() { +void StaticData::LoadSparseWeightsFromConfig() +{ set featureNames; const std::vector &ffs = FeatureFunction::GetFeatureFunctions(); for (size_t i = 0; i < ffs.size(); ++i) { @@ -1017,7 +1017,7 @@ void StaticData::LoadSparseWeightsFromConfig() { // this indicates that it is sparse feature if (featureNames.find(iter->first) == featureNames.end()) { UTIL_THROW_IF2(iter->second.size() != 1, "ERROR: only one weight per sparse feature allowed: " << iter->first); - m_allWeights.Assign(iter->first, iter->second[0]); + m_allWeights.Assign(iter->first, iter->second[0]); } } @@ -1211,24 +1211,24 @@ void StaticData::ResetWeights(const std::string &denseWeights, const std::string vector weights; vector toks = Tokenize(denseWeights); for (size_t i = 0; i < toks.size(); ++i) { - const string &tok = toks[i]; + const string &tok = toks[i]; - if (tok.substr(tok.size() - 1, 1) == "=") { - // start of new feature + if (tok.substr(tok.size() - 1, 1) == "=") { + // start of new feature - if (name != "") { - // save previous ff - const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name); - m_allWeights.Assign(&ff, weights); - weights.clear(); - } + if (name != "") { + // save previous ff + const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name); + m_allWeights.Assign(&ff, weights); + weights.clear(); + } - name = tok.substr(0, tok.size() - 1); - } else { - // a weight for curr ff - float weight = Scan(toks[i]); - weights.push_back(weight); - } + name = tok.substr(0, tok.size() - 1); + } else { + // a weight for curr ff + float weight = Scan(toks[i]); + weights.push_back(weight); + } } const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name); @@ -1238,14 +1238,14 @@ void StaticData::ResetWeights(const std::string &denseWeights, const std::string InputFileStream sparseStrme(sparseFile); string line; while (getline(sparseStrme, line)) { - vector toks = Tokenize(line); - UTIL_THROW_IF2(toks.size() != 2, "Incorrect sparse weight format. Should be FFName_spareseName weight"); + vector toks = Tokenize(line); + UTIL_THROW_IF2(toks.size() != 2, "Incorrect sparse weight format. Should be FFName_spareseName weight"); - vector names = Tokenize(toks[0], "_"); - UTIL_THROW_IF2(names.size() != 2, "Incorrect sparse weight name. Should be FFName_spareseName"); + vector names = Tokenize(toks[0], "_"); + UTIL_THROW_IF2(names.size() != 2, "Incorrect sparse weight name. Should be FFName_spareseName"); - const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(names[0]); - m_allWeights.Assign(&ff, names[1], Scan(toks[1])); + const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(names[0]); + m_allWeights.Assign(&ff, names[1], Scan(toks[1])); } } diff --git a/moses/StaticData.h b/moses/StaticData.h index 687b7d77c..5c49892f2 100644 --- a/moses/StaticData.h +++ b/moses/StaticData.h @@ -760,8 +760,9 @@ public: } - bool AdjacentOnly() const - { return m_adjacentOnly; } + bool AdjacentOnly() const { + return m_adjacentOnly; + } void ResetWeights(const std::string &denseWeights, const std::string &sparseFile); @@ -769,11 +770,11 @@ public: // need global access for output of tree structure const StatefulFeatureFunction* GetTreeStructure() const { - return m_treeStructure; + return m_treeStructure; } void SetTreeStructure(const StatefulFeatureFunction* treeStructure) { - m_treeStructure = treeStructure; + m_treeStructure = treeStructure; } }; diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp index a6ea7d7c1..5d9c4f89c 100644 --- a/moses/TargetPhrase.cpp +++ b/moses/TargetPhrase.cpp @@ -3,17 +3,17 @@ /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh - + This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. - + This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. - + You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA @@ -38,7 +38,7 @@ using namespace std; namespace Moses { - TargetPhrase::TargetPhrase( std::string out_string) +TargetPhrase::TargetPhrase( std::string out_string) :Phrase(0) , m_fullScore(0.0) , m_futureScore(0.0) @@ -46,14 +46,14 @@ namespace Moses , m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo()) , m_lhsTarget(NULL) , m_ruleSource(NULL) - { - - //ACAT - const StaticData &staticData = StaticData::Instance(); - CreateFromString(Output, staticData.GetInputFactorOrder(), out_string, staticData.GetFactorDelimiter(), NULL); - } - - TargetPhrase::TargetPhrase() +{ + + //ACAT + const StaticData &staticData = StaticData::Instance(); + CreateFromString(Output, staticData.GetInputFactorOrder(), out_string, staticData.GetFactorDelimiter(), NULL); +} + +TargetPhrase::TargetPhrase() :Phrase() , m_fullScore(0.0) , m_futureScore(0.0) @@ -61,10 +61,10 @@ namespace Moses , m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo()) , m_lhsTarget(NULL) , m_ruleSource(NULL) - { - } - - TargetPhrase::TargetPhrase(const Phrase &phrase) +{ +} + +TargetPhrase::TargetPhrase(const Phrase &phrase) : Phrase(phrase) , m_fullScore(0.0) , m_futureScore(0.0) @@ -72,223 +72,223 @@ namespace Moses , m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo()) , m_lhsTarget(NULL) , m_ruleSource(NULL) - { - } - - TargetPhrase::TargetPhrase(const TargetPhrase ©) +{ +} + +TargetPhrase::TargetPhrase(const TargetPhrase ©) : Phrase(copy) , m_fullScore(copy.m_fullScore) , m_futureScore(copy.m_futureScore) , m_scoreBreakdown(copy.m_scoreBreakdown) , m_alignTerm(copy.m_alignTerm) , m_alignNonTerm(copy.m_alignNonTerm) - { - if (copy.m_lhsTarget) { - m_lhsTarget = new Word(*copy.m_lhsTarget); - } else { - m_lhsTarget = NULL; - } - - if (copy.m_ruleSource) { - m_ruleSource = new Phrase(*copy.m_ruleSource); - } else { - m_ruleSource = NULL; - } - } - - TargetPhrase::~TargetPhrase() - { - //cerr << "m_lhsTarget=" << m_lhsTarget << endl; - - delete m_lhsTarget; - delete m_ruleSource; - } - -#ifdef HAVE_PROTOBUF - void TargetPhrase::WriteToRulePB(hgmert::Rule* pb) const - { - pb->add_trg_words("[X,1]"); - for (size_t pos = 0 ; pos < GetSize() ; pos++) - pb->add_trg_words(GetWord(pos)[0]->GetString()); - } -#endif - - void TargetPhrase::Evaluate(const Phrase &source) - { - const std::vector &ffs = FeatureFunction::GetFeatureFunctions(); - Evaluate(source, ffs); - } - - void TargetPhrase::Evaluate(const Phrase &source, const std::vector &ffs) - { - if (ffs.size()) { - const StaticData &staticData = StaticData::Instance(); - ScoreComponentCollection futureScoreBreakdown; - for (size_t i = 0; i < ffs.size(); ++i) { - const FeatureFunction &ff = *ffs[i]; - if (! staticData.IsFeatureFunctionIgnored( ff )) { - ff.Evaluate(source, *this, m_scoreBreakdown, futureScoreBreakdown); - } - } - - float weightedScore = m_scoreBreakdown.GetWeightedScore(); - m_futureScore += futureScoreBreakdown.GetWeightedScore(); - m_fullScore = weightedScore + m_futureScore; - } - } - - void TargetPhrase::Evaluate(const InputType &input, const InputPath &inputPath) - { - const std::vector &ffs = FeatureFunction::GetFeatureFunctions(); - const StaticData &staticData = StaticData::Instance(); - ScoreComponentCollection futureScoreBreakdown; - for (size_t i = 0; i < ffs.size(); ++i) { - const FeatureFunction &ff = *ffs[i]; - if (! staticData.IsFeatureFunctionIgnored( ff )) { - ff.Evaluate(input, inputPath, *this, m_scoreBreakdown, &futureScoreBreakdown); - } - } - float weightedScore = m_scoreBreakdown.GetWeightedScore(); - m_futureScore += futureScoreBreakdown.GetWeightedScore(); - m_fullScore = weightedScore + m_futureScore; - } - - void TargetPhrase::SetXMLScore(float score) - { - const FeatureFunction* prod = PhraseDictionary::GetColl()[0]; - size_t numScores = prod->GetNumScoreComponents(); - vector scoreVector(numScores,score/numScores); - - m_scoreBreakdown.Assign(prod, scoreVector); - } - - void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) - { - AlignmentInfo::CollType alignTerm, alignNonTerm; - for (util::TokenIter token(alignString, util::AnyCharacter(" \t")); token; ++token) { - util::TokenIter dash(*token, util::SingleCharacter('-')); - - char *endptr; - size_t sourcePos = strtoul(dash->data(), &endptr, 10); - UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash); - ++dash; - size_t targetPos = strtoul(dash->data(), &endptr, 10); - UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash); - UTIL_THROW_IF2(++dash, "Extra gunk in alignment " << *token); - - if (GetWord(targetPos).IsNonTerminal()) { - alignNonTerm.insert(std::pair(sourcePos, targetPos)); - } else { - alignTerm.insert(std::pair(sourcePos, targetPos)); - } - } - SetAlignTerm(alignTerm); - SetAlignNonTerm(alignNonTerm); - // cerr << "TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) this:|" << *this << "|\n"; - } - - void TargetPhrase::SetAlignTerm(const AlignmentInfo::CollType &coll) - { - const AlignmentInfo *alignmentInfo = AlignmentInfoCollection::Instance().Add(coll); - m_alignTerm = alignmentInfo; - - } - - void TargetPhrase::SetAlignNonTerm(const AlignmentInfo::CollType &coll) - { - const AlignmentInfo *alignmentInfo = AlignmentInfoCollection::Instance().Add(coll); - m_alignNonTerm = alignmentInfo; - } - - void TargetPhrase::SetSparseScore(const FeatureFunction* translationScoreProducer, const StringPiece &sparseString) - { - m_scoreBreakdown.Assign(translationScoreProducer, sparseString.as_string()); - } - - void TargetPhrase::Merge(const TargetPhrase ©, const std::vector& factorVec) - { - Phrase::MergeFactors(copy, factorVec); - m_scoreBreakdown.Merge(copy.GetScoreBreakdown()); - m_futureScore += copy.m_futureScore; - m_fullScore += copy.m_fullScore; - } - - void TargetPhrase::SetProperties(const StringPiece &str) - { - if (str.size() == 0) { - return; - } - - vector toks; - TokenizeMultiCharSeparator(toks, str.as_string(), "{{"); - for (size_t i = 0; i < toks.size(); ++i) { - string &tok = toks[i]; - if (tok.empty()) { - continue; - } - size_t endPos = tok.rfind("}"); - - tok = tok.substr(0, endPos - 1); - - vector keyValue = TokenizeFirstOnly(tok, " "); - UTIL_THROW_IF2(keyValue.size() != 2, - "Incorrect format of property: " << str); - SetProperty(keyValue[0], keyValue[1]); - } - } - - void TargetPhrase::GetProperty(const std::string &key, std::string &value, bool &found) const - { - std::map::const_iterator iter; - iter = m_properties.find(key); - if (iter == m_properties.end()) { - found = false; - } else { - found = true; - value = iter->second; - } - } - - void TargetPhrase::SetRuleSource(const Phrase &ruleSource) const - { - if (m_ruleSource == NULL) { - m_ruleSource = new Phrase(ruleSource); - } - } - - void swap(TargetPhrase &first, TargetPhrase &second) - { - first.SwapWords(second); - std::swap(first.m_fullScore, second.m_fullScore); - std::swap(first.m_futureScore, second.m_futureScore); - swap(first.m_scoreBreakdown, second.m_scoreBreakdown); - std::swap(first.m_alignTerm, second.m_alignTerm); - std::swap(first.m_alignNonTerm, second.m_alignNonTerm); - std::swap(first.m_lhsTarget, second.m_lhsTarget); - } - - TO_STRING_BODY(TargetPhrase); - - std::ostream& operator<<(std::ostream& os, const TargetPhrase& tp) - { - if (tp.m_lhsTarget) { - os << *tp.m_lhsTarget<< " -> "; - } +{ + if (copy.m_lhsTarget) { + m_lhsTarget = new Word(*copy.m_lhsTarget); + } else { + m_lhsTarget = NULL; + } + + if (copy.m_ruleSource) { + m_ruleSource = new Phrase(*copy.m_ruleSource); + } else { + m_ruleSource = NULL; + } +} + +TargetPhrase::~TargetPhrase() +{ + //cerr << "m_lhsTarget=" << m_lhsTarget << endl; + + delete m_lhsTarget; + delete m_ruleSource; +} + +#ifdef HAVE_PROTOBUF +void TargetPhrase::WriteToRulePB(hgmert::Rule* pb) const +{ + pb->add_trg_words("[X,1]"); + for (size_t pos = 0 ; pos < GetSize() ; pos++) + pb->add_trg_words(GetWord(pos)[0]->GetString()); +} +#endif + +void TargetPhrase::Evaluate(const Phrase &source) +{ + const std::vector &ffs = FeatureFunction::GetFeatureFunctions(); + Evaluate(source, ffs); +} + +void TargetPhrase::Evaluate(const Phrase &source, const std::vector &ffs) +{ + if (ffs.size()) { + const StaticData &staticData = StaticData::Instance(); + ScoreComponentCollection futureScoreBreakdown; + for (size_t i = 0; i < ffs.size(); ++i) { + const FeatureFunction &ff = *ffs[i]; + if (! staticData.IsFeatureFunctionIgnored( ff )) { + ff.Evaluate(source, *this, m_scoreBreakdown, futureScoreBreakdown); + } + } + + float weightedScore = m_scoreBreakdown.GetWeightedScore(); + m_futureScore += futureScoreBreakdown.GetWeightedScore(); + m_fullScore = weightedScore + m_futureScore; + } +} + +void TargetPhrase::Evaluate(const InputType &input, const InputPath &inputPath) +{ + const std::vector &ffs = FeatureFunction::GetFeatureFunctions(); + const StaticData &staticData = StaticData::Instance(); + ScoreComponentCollection futureScoreBreakdown; + for (size_t i = 0; i < ffs.size(); ++i) { + const FeatureFunction &ff = *ffs[i]; + if (! staticData.IsFeatureFunctionIgnored( ff )) { + ff.Evaluate(input, inputPath, *this, m_scoreBreakdown, &futureScoreBreakdown); + } + } + float weightedScore = m_scoreBreakdown.GetWeightedScore(); + m_futureScore += futureScoreBreakdown.GetWeightedScore(); + m_fullScore = weightedScore + m_futureScore; +} + +void TargetPhrase::SetXMLScore(float score) +{ + const FeatureFunction* prod = PhraseDictionary::GetColl()[0]; + size_t numScores = prod->GetNumScoreComponents(); + vector scoreVector(numScores,score/numScores); + + m_scoreBreakdown.Assign(prod, scoreVector); +} + +void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) +{ + AlignmentInfo::CollType alignTerm, alignNonTerm; + for (util::TokenIter token(alignString, util::AnyCharacter(" \t")); token; ++token) { + util::TokenIter dash(*token, util::SingleCharacter('-')); + + char *endptr; + size_t sourcePos = strtoul(dash->data(), &endptr, 10); + UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash); + ++dash; + size_t targetPos = strtoul(dash->data(), &endptr, 10); + UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash); + UTIL_THROW_IF2(++dash, "Extra gunk in alignment " << *token); + + if (GetWord(targetPos).IsNonTerminal()) { + alignNonTerm.insert(std::pair(sourcePos, targetPos)); + } else { + alignTerm.insert(std::pair(sourcePos, targetPos)); + } + } + SetAlignTerm(alignTerm); + SetAlignNonTerm(alignNonTerm); + // cerr << "TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) this:|" << *this << "|\n"; +} + +void TargetPhrase::SetAlignTerm(const AlignmentInfo::CollType &coll) +{ + const AlignmentInfo *alignmentInfo = AlignmentInfoCollection::Instance().Add(coll); + m_alignTerm = alignmentInfo; + +} + +void TargetPhrase::SetAlignNonTerm(const AlignmentInfo::CollType &coll) +{ + const AlignmentInfo *alignmentInfo = AlignmentInfoCollection::Instance().Add(coll); + m_alignNonTerm = alignmentInfo; +} + +void TargetPhrase::SetSparseScore(const FeatureFunction* translationScoreProducer, const StringPiece &sparseString) +{ + m_scoreBreakdown.Assign(translationScoreProducer, sparseString.as_string()); +} + +void TargetPhrase::Merge(const TargetPhrase ©, const std::vector& factorVec) +{ + Phrase::MergeFactors(copy, factorVec); + m_scoreBreakdown.Merge(copy.GetScoreBreakdown()); + m_futureScore += copy.m_futureScore; + m_fullScore += copy.m_fullScore; +} + +void TargetPhrase::SetProperties(const StringPiece &str) +{ + if (str.size() == 0) { + return; + } + + vector toks; + TokenizeMultiCharSeparator(toks, str.as_string(), "{{"); + for (size_t i = 0; i < toks.size(); ++i) { + string &tok = toks[i]; + if (tok.empty()) { + continue; + } + size_t endPos = tok.rfind("}"); + + tok = tok.substr(0, endPos - 1); + + vector keyValue = TokenizeFirstOnly(tok, " "); + UTIL_THROW_IF2(keyValue.size() != 2, + "Incorrect format of property: " << str); + SetProperty(keyValue[0], keyValue[1]); + } +} + +void TargetPhrase::GetProperty(const std::string &key, std::string &value, bool &found) const +{ + std::map::const_iterator iter; + iter = m_properties.find(key); + if (iter == m_properties.end()) { + found = false; + } else { + found = true; + value = iter->second; + } +} + +void TargetPhrase::SetRuleSource(const Phrase &ruleSource) const +{ + if (m_ruleSource == NULL) { + m_ruleSource = new Phrase(ruleSource); + } +} + +void swap(TargetPhrase &first, TargetPhrase &second) +{ + first.SwapWords(second); + std::swap(first.m_fullScore, second.m_fullScore); + std::swap(first.m_futureScore, second.m_futureScore); + swap(first.m_scoreBreakdown, second.m_scoreBreakdown); + std::swap(first.m_alignTerm, second.m_alignTerm); + std::swap(first.m_alignNonTerm, second.m_alignNonTerm); + std::swap(first.m_lhsTarget, second.m_lhsTarget); +} + +TO_STRING_BODY(TargetPhrase); + +std::ostream& operator<<(std::ostream& os, const TargetPhrase& tp) +{ + if (tp.m_lhsTarget) { + os << *tp.m_lhsTarget<< " -> "; + } + + os << static_cast(tp) << ":" << flush; + // os << tp.GetAlignNonTerm() << flush; + os << ": term=" << tp.GetAlignTerm() << flush; + os << ": nonterm=" << tp.GetAlignNonTerm() << flush; + os << ": c=" << tp.m_fullScore << flush; + os << " " << tp.m_scoreBreakdown << flush; + + const Phrase *sourcePhrase = tp.GetRuleSource(); + if (sourcePhrase) { + os << " sourcePhrase=" << *sourcePhrase << flush; + } + + return os; +} - os << static_cast(tp) << ":" << flush; - // os << tp.GetAlignNonTerm() << flush; - os << ": term=" << tp.GetAlignTerm() << flush; - os << ": nonterm=" << tp.GetAlignNonTerm() << flush; - os << ": c=" << tp.m_fullScore << flush; - os << " " << tp.m_scoreBreakdown << flush; - - const Phrase *sourcePhrase = tp.GetRuleSource(); - if (sourcePhrase) { - os << " sourcePhrase=" << *sourcePhrase << flush; - } - - return os; - } - } diff --git a/moses/TranslationModel/PhraseDictionary.cpp b/moses/TranslationModel/PhraseDictionary.cpp index d12dc483d..af0b754fe 100644 --- a/moses/TranslationModel/PhraseDictionary.cpp +++ b/moses/TranslationModel/PhraseDictionary.cpp @@ -136,22 +136,22 @@ SetFeaturesToApply() } } - - // tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more - void - PhraseDictionary:: - Release(TargetPhraseCollection const* tpc) const - { - // do nothing by default - return; - } - bool - PhraseDictionary:: - PrefixExists(Phrase const& phrase) const - { - return true; - } +// tell the Phrase Dictionary that the TargetPhraseCollection is not needed any more +void +PhraseDictionary:: +Release(TargetPhraseCollection const* tpc) const +{ + // do nothing by default + return; +} + +bool +PhraseDictionary:: +PrefixExists(Phrase const& phrase) const +{ + return true; +} void PhraseDictionary:: diff --git a/moses/TranslationModel/PhraseDictionary.h b/moses/TranslationModel/PhraseDictionary.h index 0f63581fa..f616ae1a6 100644 --- a/moses/TranslationModel/PhraseDictionary.h +++ b/moses/TranslationModel/PhraseDictionary.h @@ -91,7 +91,7 @@ public: void Release(TargetPhraseCollection const* tpc) const; - /// return true if phrase table entries starting with /phrase/ + /// return true if phrase table entries starting with /phrase/ // exist in the table. virtual bool diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp index e0f90a7f1..eea1e8b04 100644 --- a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp +++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp @@ -3,17 +3,17 @@ /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh - + This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. - + This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. - + You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA @@ -31,679 +31,675 @@ using namespace std; namespace Moses { - std::map< const std::string, PhraseDictionaryDynamicCacheBased * > PhraseDictionaryDynamicCacheBased::s_instance_map; - PhraseDictionaryDynamicCacheBased *PhraseDictionaryDynamicCacheBased::s_instance = NULL; - - //! contructor - PhraseDictionaryDynamicCacheBased::PhraseDictionaryDynamicCacheBased(const std::string &line) +std::map< const std::string, PhraseDictionaryDynamicCacheBased * > PhraseDictionaryDynamicCacheBased::s_instance_map; +PhraseDictionaryDynamicCacheBased *PhraseDictionaryDynamicCacheBased::s_instance = NULL; + +//! contructor +PhraseDictionaryDynamicCacheBased::PhraseDictionaryDynamicCacheBased(const std::string &line) : PhraseDictionary(line) - { - std::cerr << "Initializing PhraseDictionaryDynamicCacheBased feature..." << std::endl; - - //disabling internal cache (provided by PhraseDictionary) for translation options (third parameter set to 0) - m_maxCacheSize = 0; - - m_score_type = CBTM_SCORE_TYPE_HYPERBOLA; - m_maxAge = 1000; - m_entries = 0; - m_name = "default"; - ReadParameters(); - - UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 PhraseDictionaryDynamicCacheBased feature named " + m_name + " is allowed"); - s_instance_map[m_name] = this; - s_instance = this; //for back compatibility - } - - PhraseDictionaryDynamicCacheBased::~PhraseDictionaryDynamicCacheBased() - { - Clear(); - } - - void PhraseDictionaryDynamicCacheBased::Load() - { - std::cerr << "PhraseDictionaryDynamicCacheBased::Load()" << std::endl; - VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load()" << std::endl); - SetFeaturesToApply(); - vector weight = StaticData::Instance().GetWeights(this); - SetPreComputedScores(weight.size()); - Load(m_initfiles); - } - - void PhraseDictionaryDynamicCacheBased::Load(const std::string file) - { - VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load(const std::string file)" << std::endl); - std::vector files = Tokenize(m_initfiles, "||"); - Load_Multiple_Files(files); - } - - void PhraseDictionaryDynamicCacheBased::Load_Multiple_Files(std::vector files) - { - VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load_Multiple_Files(std::vector files)" << std::endl); - for(size_t j = 0; j < files.size(); ++j) { - Load_Single_File(files[j]); - } - } - - void PhraseDictionaryDynamicCacheBased::Load_Single_File(const std::string file) - { - VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load_Single_File(const std::string file)" << std::endl); - } - - void PhraseDictionaryDynamicCacheBased::SetParameter(const std::string& key, const std::string& value) - { - VERBOSE(2, "PhraseDictionaryDynamicCacheBased::SetParameter key:|" << key << "| value:|" << value << "|" << std::endl); - - if(key == "cbtm-score-type") { - SetScoreType(Scan(value)); - } else if (key == "cbtm-max-age") { - SetMaxAge(Scan(value)); - } else if (key == "cbtm-file") { - m_initfiles = Scan(value); - Load(m_initfiles); - } else if (key == "cbtm-name") { - m_name = Scan(value); - } else { - PhraseDictionary::SetParameter(key, value); - } - } - - void PhraseDictionaryDynamicCacheBased::InitializeForInput(InputType const& source) - { - ReduceCache(); - } - - const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const - { +{ + std::cerr << "Initializing PhraseDictionaryDynamicCacheBased feature..." << std::endl; + + //disabling internal cache (provided by PhraseDictionary) for translation options (third parameter set to 0) + m_maxCacheSize = 0; + + m_score_type = CBTM_SCORE_TYPE_HYPERBOLA; + m_maxAge = 1000; + m_entries = 0; + m_name = "default"; + ReadParameters(); + + UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 PhraseDictionaryDynamicCacheBased feature named " + m_name + " is allowed"); + s_instance_map[m_name] = this; + s_instance = this; //for back compatibility +} + +PhraseDictionaryDynamicCacheBased::~PhraseDictionaryDynamicCacheBased() +{ + Clear(); +} + +void PhraseDictionaryDynamicCacheBased::Load() +{ + std::cerr << "PhraseDictionaryDynamicCacheBased::Load()" << std::endl; + VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load()" << std::endl); + SetFeaturesToApply(); + vector weight = StaticData::Instance().GetWeights(this); + SetPreComputedScores(weight.size()); + Load(m_initfiles); +} + +void PhraseDictionaryDynamicCacheBased::Load(const std::string file) +{ + VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load(const std::string file)" << std::endl); + std::vector files = Tokenize(m_initfiles, "||"); + Load_Multiple_Files(files); +} + +void PhraseDictionaryDynamicCacheBased::Load_Multiple_Files(std::vector files) +{ + VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load_Multiple_Files(std::vector files)" << std::endl); + for(size_t j = 0; j < files.size(); ++j) { + Load_Single_File(files[j]); + } +} + +void PhraseDictionaryDynamicCacheBased::Load_Single_File(const std::string file) +{ + VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load_Single_File(const std::string file)" << std::endl); +} + +void PhraseDictionaryDynamicCacheBased::SetParameter(const std::string& key, const std::string& value) +{ + VERBOSE(2, "PhraseDictionaryDynamicCacheBased::SetParameter key:|" << key << "| value:|" << value << "|" << std::endl); + + if(key == "cbtm-score-type") { + SetScoreType(Scan(value)); + } else if (key == "cbtm-max-age") { + SetMaxAge(Scan(value)); + } else if (key == "cbtm-file") { + m_initfiles = Scan(value); + Load(m_initfiles); + } else if (key == "cbtm-name") { + m_name = Scan(value); + } else { + PhraseDictionary::SetParameter(key, value); + } +} + +void PhraseDictionaryDynamicCacheBased::InitializeForInput(InputType const& source) +{ + ReduceCache(); +} + +const TargetPhraseCollection *PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const +{ #ifdef WITH_THREADS - boost::shared_lock read_lock(m_cacheLock); + boost::shared_lock read_lock(m_cacheLock); #endif - TargetPhraseCollection* tpc = NULL; - cacheMap::const_iterator it = m_cacheTM.find(source); - if(it != m_cacheTM.end()) { - tpc = new TargetPhraseCollection(*(it->second).first); - - std::vector::const_iterator it2 = tpc->begin(); - - while (it2 != tpc->end()) { - ((TargetPhrase*) *it2)->Evaluate(source, GetFeaturesToApply()); - it2++; - } - } - if (tpc) { - tpc->NthElement(m_tableLimit); // sort the phrases for the decoder - } - - return tpc; - } - - const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const - { - const TargetPhraseCollection *ret = GetTargetPhraseCollection(src); - return ret; - } - - ChartRuleLookupManager* PhraseDictionaryDynamicCacheBased::CreateRuleLookupManager(const ChartParser &parser, const ChartCellCollectionBase &cellCollection, std::size_t /*maxChartSpan*/) - { - UTIL_THROW(util::Exception, "Phrase table used in chart decoder"); - } - - void PhraseDictionaryDynamicCacheBased::SetScoreType(size_t type) - { + TargetPhraseCollection* tpc = NULL; + cacheMap::const_iterator it = m_cacheTM.find(source); + if(it != m_cacheTM.end()) { + tpc = new TargetPhraseCollection(*(it->second).first); + + std::vector::const_iterator it2 = tpc->begin(); + + while (it2 != tpc->end()) { + ((TargetPhrase*) *it2)->Evaluate(source, GetFeaturesToApply()); + it2++; + } + } + if (tpc) { + tpc->NthElement(m_tableLimit); // sort the phrases for the decoder + } + + return tpc; +} + +const TargetPhraseCollection* PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const +{ + const TargetPhraseCollection *ret = GetTargetPhraseCollection(src); + return ret; +} + +ChartRuleLookupManager* PhraseDictionaryDynamicCacheBased::CreateRuleLookupManager(const ChartParser &parser, const ChartCellCollectionBase &cellCollection, std::size_t /*maxChartSpan*/) +{ + UTIL_THROW(util::Exception, "Phrase table used in chart decoder"); +} + +void PhraseDictionaryDynamicCacheBased::SetScoreType(size_t type) +{ #ifdef WITH_THREADS - boost::shared_lock read_lock(m_cacheLock); + boost::shared_lock read_lock(m_cacheLock); #endif - - m_score_type = type; - if ( m_score_type != CBTM_SCORE_TYPE_HYPERBOLA - && m_score_type != CBTM_SCORE_TYPE_POWER - && m_score_type != CBTM_SCORE_TYPE_EXPONENTIAL - && m_score_type != CBTM_SCORE_TYPE_COSINE - && m_score_type != CBTM_SCORE_TYPE_HYPERBOLA_REWARD - && m_score_type != CBTM_SCORE_TYPE_POWER_REWARD - && m_score_type != CBTM_SCORE_TYPE_EXPONENTIAL_REWARD ) { - VERBOSE(2, "This score type " << m_score_type << " is unknown. Instead used " << CBTM_SCORE_TYPE_HYPERBOLA << "." << std::endl); - m_score_type = CBTM_SCORE_TYPE_HYPERBOLA; - } - - VERBOSE(2, "PhraseDictionaryDynamicCacheBased ScoreType: " << m_score_type << std::endl); - } - - - void PhraseDictionaryDynamicCacheBased::SetMaxAge(unsigned int age) - { + + m_score_type = type; + if ( m_score_type != CBTM_SCORE_TYPE_HYPERBOLA + && m_score_type != CBTM_SCORE_TYPE_POWER + && m_score_type != CBTM_SCORE_TYPE_EXPONENTIAL + && m_score_type != CBTM_SCORE_TYPE_COSINE + && m_score_type != CBTM_SCORE_TYPE_HYPERBOLA_REWARD + && m_score_type != CBTM_SCORE_TYPE_POWER_REWARD + && m_score_type != CBTM_SCORE_TYPE_EXPONENTIAL_REWARD ) { + VERBOSE(2, "This score type " << m_score_type << " is unknown. Instead used " << CBTM_SCORE_TYPE_HYPERBOLA << "." << std::endl); + m_score_type = CBTM_SCORE_TYPE_HYPERBOLA; + } + + VERBOSE(2, "PhraseDictionaryDynamicCacheBased ScoreType: " << m_score_type << std::endl); +} + + +void PhraseDictionaryDynamicCacheBased::SetMaxAge(unsigned int age) +{ #ifdef WITH_THREADS - boost::shared_lock read_lock(m_cacheLock); + boost::shared_lock read_lock(m_cacheLock); #endif - m_maxAge = age; - VERBOSE(2, "PhraseDictionaryCache MaxAge: " << m_maxAge << std::endl); - } - - - // friend - ostream& operator<<(ostream& out, const PhraseDictionaryDynamicCacheBased& phraseDict) - { - return out; - } - - float PhraseDictionaryDynamicCacheBased::decaying_score(const int age) - { - float sc; - switch(m_score_type) { - case CBTM_SCORE_TYPE_HYPERBOLA: - sc = (float) 1.0/age - 1.0; - break; - case CBTM_SCORE_TYPE_POWER: - sc = (float) pow(age, -0.25) - 1.0; - break; - case CBTM_SCORE_TYPE_EXPONENTIAL: - sc = (age == 1) ? 0.0 : (float) exp( 1.0/age ) / exp(1.0) - 1.0; - break; - case CBTM_SCORE_TYPE_COSINE: - sc = (float) cos( (age-1) * (PI/2) / m_maxAge ) - 1.0; - break; - case CBTM_SCORE_TYPE_HYPERBOLA_REWARD: - sc = (float) 1.0/age; - break; - case CBTM_SCORE_TYPE_POWER_REWARD: - sc = (float) pow(age, -0.25); - break; - case CBTM_SCORE_TYPE_EXPONENTIAL_REWARD: - sc = (age == 1) ? 1.0 : (float) exp( 1.0/age ) / exp(1.0); - break; - default: - sc = -1.0; - } - return sc; - } - - void PhraseDictionaryDynamicCacheBased::SetPreComputedScores(const unsigned int numScoreComponent) - { + m_maxAge = age; + VERBOSE(2, "PhraseDictionaryCache MaxAge: " << m_maxAge << std::endl); +} + + +// friend +ostream& operator<<(ostream& out, const PhraseDictionaryDynamicCacheBased& phraseDict) +{ + return out; +} + +float PhraseDictionaryDynamicCacheBased::decaying_score(const int age) +{ + float sc; + switch(m_score_type) { + case CBTM_SCORE_TYPE_HYPERBOLA: + sc = (float) 1.0/age - 1.0; + break; + case CBTM_SCORE_TYPE_POWER: + sc = (float) pow(age, -0.25) - 1.0; + break; + case CBTM_SCORE_TYPE_EXPONENTIAL: + sc = (age == 1) ? 0.0 : (float) exp( 1.0/age ) / exp(1.0) - 1.0; + break; + case CBTM_SCORE_TYPE_COSINE: + sc = (float) cos( (age-1) * (PI/2) / m_maxAge ) - 1.0; + break; + case CBTM_SCORE_TYPE_HYPERBOLA_REWARD: + sc = (float) 1.0/age; + break; + case CBTM_SCORE_TYPE_POWER_REWARD: + sc = (float) pow(age, -0.25); + break; + case CBTM_SCORE_TYPE_EXPONENTIAL_REWARD: + sc = (age == 1) ? 1.0 : (float) exp( 1.0/age ) / exp(1.0); + break; + default: + sc = -1.0; + } + return sc; +} + +void PhraseDictionaryDynamicCacheBased::SetPreComputedScores(const unsigned int numScoreComponent) +{ #ifdef WITH_THREADS - boost::shared_lock lock(m_cacheLock); + boost::shared_lock lock(m_cacheLock); #endif - float sc; - for (size_t i=0; i<=m_maxAge; i++) { - if (i==m_maxAge) { - if ( m_score_type == CBTM_SCORE_TYPE_HYPERBOLA - || m_score_type == CBTM_SCORE_TYPE_POWER - || m_score_type == CBTM_SCORE_TYPE_EXPONENTIAL - || m_score_type == CBTM_SCORE_TYPE_COSINE ) { - sc = decaying_score(m_maxAge)/numScoreComponent; - } else { // m_score_type = CBTM_SCORE_TYPE_XXXXXXXXX_REWARD - sc = 0.0; - } - } else { - sc = decaying_score(i)/numScoreComponent; - } - Scores sc_vec; - for (size_t j=0; j elements = TokenizeMultiCharSeparator(entries, "||||"); - VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); - ClearEntries(elements); - } - } - - void PhraseDictionaryDynamicCacheBased::ClearEntries(std::vector entries) - { - VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::vector entries)" << std::endl); - std::vector pp; - - std::vector::iterator it; - for(it = entries.begin(); it!=entries.end(); it++) { - pp.clear(); - pp = TokenizeMultiCharSeparator((*it), "|||"); - VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl); - VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl); - - ClearEntries(pp[0], pp[1]); - } - } - - void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString) - { - VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)" << std::endl); - const StaticData &staticData = StaticData::Instance(); - const std::string& factorDelimiter = staticData.GetFactorDelimiter(); - Phrase sourcePhrase(0); - Phrase targetPhrase(0); - - //target - targetPhrase.Clear(); - VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl); - targetPhrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetPhraseString, factorDelimiter, NULL); - VERBOSE(2, "targetPhrase:|" << targetPhrase << "|" << std::endl); - - //TODO: Would be better to reuse source phrases, but ownership has to be - //consistent across phrase table implementations - sourcePhrase.Clear(); - VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl); - sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), sourcePhraseString, factorDelimiter, NULL); - VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); - ClearEntries(sourcePhrase, targetPhrase); - - } - - void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp) - { - VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)" << std::endl); + float sc; + for (size_t i=0; i<=m_maxAge; i++) { + if (i==m_maxAge) { + if ( m_score_type == CBTM_SCORE_TYPE_HYPERBOLA + || m_score_type == CBTM_SCORE_TYPE_POWER + || m_score_type == CBTM_SCORE_TYPE_EXPONENTIAL + || m_score_type == CBTM_SCORE_TYPE_COSINE ) { + sc = decaying_score(m_maxAge)/numScoreComponent; + } else { // m_score_type = CBTM_SCORE_TYPE_XXXXXXXXX_REWARD + sc = 0.0; + } + } else { + sc = decaying_score(i)/numScoreComponent; + } + Scores sc_vec; + for (size_t j=0; j elements = TokenizeMultiCharSeparator(entries, "||||"); + VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); + ClearEntries(elements); + } +} + +void PhraseDictionaryDynamicCacheBased::ClearEntries(std::vector entries) +{ + VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::vector entries)" << std::endl); + std::vector pp; + + std::vector::iterator it; + for(it = entries.begin(); it!=entries.end(); it++) { + pp.clear(); + pp = TokenizeMultiCharSeparator((*it), "|||"); + VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl); + VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl); + + ClearEntries(pp[0], pp[1]); + } +} + +void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString) +{ + VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)" << std::endl); + const StaticData &staticData = StaticData::Instance(); + const std::string& factorDelimiter = staticData.GetFactorDelimiter(); + Phrase sourcePhrase(0); + Phrase targetPhrase(0); + + //target + targetPhrase.Clear(); + VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl); + targetPhrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetPhraseString, factorDelimiter, NULL); + VERBOSE(2, "targetPhrase:|" << targetPhrase << "|" << std::endl); + + //TODO: Would be better to reuse source phrases, but ownership has to be + //consistent across phrase table implementations + sourcePhrase.Clear(); + VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl); + sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), sourcePhraseString, factorDelimiter, NULL); + VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); + ClearEntries(sourcePhrase, targetPhrase); + +} + +void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp) +{ + VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)" << std::endl); #ifdef WITH_THREADS - boost::shared_lock lock(m_cacheLock); + boost::shared_lock lock(m_cacheLock); #endif - VERBOSE(3, "PhraseDictionaryCache deleting sp:|" << sp << "| tp:|" << tp << "|" << std::endl); - - cacheMap::const_iterator it = m_cacheTM.find(sp); - VERBOSE(3,"sp:|" << sp << "|" << std::endl); - if(it!=m_cacheTM.end()) { - VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl); - // sp is found - // here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap - // and then add new entry - - TargetCollectionAgePair TgtCollAgePair = it->second; - TargetPhraseCollection* tpc = TgtCollAgePair.first; - AgeCollection* ac = TgtCollAgePair.second; - const Phrase* p_ptr = NULL; - TargetPhrase* tp_ptr = NULL; - bool found = false; - size_t tp_pos=0; - while (!found && tp_pos < tpc->GetSize()) { - tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos); - p_ptr = (const Phrase*) tp_ptr; - if (tp == *p_ptr) { - found = true; - continue; - } - tp_pos++; - } - if (!found) { - VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl); - //do nothing - } - else{ - VERBOSE(3,"tp:|" << tp << "| FOUND" << std::endl); - - tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection - ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection - m_entries--; - VERBOSE(3,"tpc size:|" << tpc->GetSize() << "|" << std::endl); - VERBOSE(3,"ac size:|" << ac->size() << "|" << std::endl); - VERBOSE(3,"tp:|" << tp << "| DELETED" << std::endl); - } - if (tpc->GetSize() == 0) { - // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection - ac->clear(); - delete tpc; - delete ac; - m_cacheTM.erase(sp); - } - - } else { - VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl); - //do nothing - } - } - - - - - void PhraseDictionaryDynamicCacheBased::ClearSource(std::string &entries) - { - if (entries != "") { - VERBOSE(3,"entries:|" << entries << "|" << std::endl); - std::vector elements = TokenizeMultiCharSeparator(entries, "||||"); - VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); - ClearEntries(elements); - } - } - - void PhraseDictionaryDynamicCacheBased::ClearSource(std::vector entries) - { - VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl); - const StaticData &staticData = StaticData::Instance(); - const std::string& factorDelimiter = staticData.GetFactorDelimiter(); - Phrase sourcePhrase(0); - - std::vector::iterator it; - for(it = entries.begin(); it!=entries.end(); it++) { - - sourcePhrase.Clear(); - VERBOSE(3, "sourcePhraseString:|" << (*it) << "|" << std::endl); - sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), *it, factorDelimiter, NULL); - VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); - - ClearSource(sourcePhrase); - } - - IFVERBOSE(2) Print(); - } - - void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp) - { - VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp) sp:|" << sp << "|" << std::endl); - cacheMap::const_iterator it = m_cacheTM.find(sp); - if (it != m_cacheTM.end()) { - VERBOSE(3,"found:|" << sp << "|" << std::endl); - //sp is found - - TargetCollectionAgePair TgtCollAgePair = it->second; - TargetPhraseCollection* tpc = TgtCollAgePair.first; - AgeCollection* ac = TgtCollAgePair.second; - - m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache - - // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection - ac->clear(); - delete tpc; - delete ac; - m_cacheTM.erase(sp); - } - else - { - //do nothing - } - } - - void PhraseDictionaryDynamicCacheBased::Insert(std::string &entries) - { - if (entries != "") { - VERBOSE(3,"entries:|" << entries << "|" << std::endl); - std::vector elements = TokenizeMultiCharSeparator(entries, "||||"); - VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); - Insert(elements); - } - } - - void PhraseDictionaryDynamicCacheBased::Insert(std::vector entries) - { - VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl); - Decay(); - Update(entries, "1"); - IFVERBOSE(2) Print(); - } - - - void PhraseDictionaryDynamicCacheBased::Update(std::vector entries, std::string ageString) - { - VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::vector entries, std::string ageString)" << std::endl); - std::vector pp; - - std::vector::iterator it; - for(it = entries.begin(); it!=entries.end(); it++) { - pp.clear(); - pp = TokenizeMultiCharSeparator((*it), "|||"); - VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl); - VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl); - - if (pp.size() > 2){ - VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl); - Update(pp[0], pp[1], ageString, pp[2]); - }else{ - Update(pp[0], pp[1], ageString); - } - } - } - - void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString) - { - VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString)" << std::endl); - const StaticData &staticData = StaticData::Instance(); - const std::string& factorDelimiter = staticData.GetFactorDelimiter(); - Phrase sourcePhrase(0); - Phrase targetPhrase(0); - - char *err_ind_temp; - int age = strtod(ageString.c_str(), &err_ind_temp); - //target - targetPhrase.Clear(); - VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl); - targetPhrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetPhraseString, factorDelimiter, NULL); - VERBOSE(2, "targetPhrase:|" << targetPhrase << "|" << std::endl); - - //TODO: Would be better to reuse source phrases, but ownership has to be - //consistent across phrase table implementations - sourcePhrase.Clear(); - VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl); - sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), sourcePhraseString, factorDelimiter, NULL); - VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); - - if (!waString.empty()) VERBOSE(3, "waString:|" << waString << "|" << std::endl); - - Update(sourcePhrase, targetPhrase, age, waString); - } - - void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, Phrase tp, int age, std::string waString) - { - VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(Phrase sp, Phrase tp, int age, std::string waString)" << std::endl); + VERBOSE(3, "PhraseDictionaryCache deleting sp:|" << sp << "| tp:|" << tp << "|" << std::endl); + + cacheMap::const_iterator it = m_cacheTM.find(sp); + VERBOSE(3,"sp:|" << sp << "|" << std::endl); + if(it!=m_cacheTM.end()) { + VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl); + // sp is found + // here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap + // and then add new entry + + TargetCollectionAgePair TgtCollAgePair = it->second; + TargetPhraseCollection* tpc = TgtCollAgePair.first; + AgeCollection* ac = TgtCollAgePair.second; + const Phrase* p_ptr = NULL; + TargetPhrase* tp_ptr = NULL; + bool found = false; + size_t tp_pos=0; + while (!found && tp_pos < tpc->GetSize()) { + tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos); + p_ptr = (const Phrase*) tp_ptr; + if (tp == *p_ptr) { + found = true; + continue; + } + tp_pos++; + } + if (!found) { + VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl); + //do nothing + } else { + VERBOSE(3,"tp:|" << tp << "| FOUND" << std::endl); + + tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection + ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection + m_entries--; + VERBOSE(3,"tpc size:|" << tpc->GetSize() << "|" << std::endl); + VERBOSE(3,"ac size:|" << ac->size() << "|" << std::endl); + VERBOSE(3,"tp:|" << tp << "| DELETED" << std::endl); + } + if (tpc->GetSize() == 0) { + // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection + ac->clear(); + delete tpc; + delete ac; + m_cacheTM.erase(sp); + } + + } else { + VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl); + //do nothing + } +} + + + + +void PhraseDictionaryDynamicCacheBased::ClearSource(std::string &entries) +{ + if (entries != "") { + VERBOSE(3,"entries:|" << entries << "|" << std::endl); + std::vector elements = TokenizeMultiCharSeparator(entries, "||||"); + VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); + ClearEntries(elements); + } +} + +void PhraseDictionaryDynamicCacheBased::ClearSource(std::vector entries) +{ + VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl); + const StaticData &staticData = StaticData::Instance(); + const std::string& factorDelimiter = staticData.GetFactorDelimiter(); + Phrase sourcePhrase(0); + + std::vector::iterator it; + for(it = entries.begin(); it!=entries.end(); it++) { + + sourcePhrase.Clear(); + VERBOSE(3, "sourcePhraseString:|" << (*it) << "|" << std::endl); + sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), *it, factorDelimiter, NULL); + VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); + + ClearSource(sourcePhrase); + } + + IFVERBOSE(2) Print(); +} + +void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp) +{ + VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp) sp:|" << sp << "|" << std::endl); + cacheMap::const_iterator it = m_cacheTM.find(sp); + if (it != m_cacheTM.end()) { + VERBOSE(3,"found:|" << sp << "|" << std::endl); + //sp is found + + TargetCollectionAgePair TgtCollAgePair = it->second; + TargetPhraseCollection* tpc = TgtCollAgePair.first; + AgeCollection* ac = TgtCollAgePair.second; + + m_entries-=tpc->GetSize(); //reduce the total amount of entries of the cache + + // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection + ac->clear(); + delete tpc; + delete ac; + m_cacheTM.erase(sp); + } else { + //do nothing + } +} + +void PhraseDictionaryDynamicCacheBased::Insert(std::string &entries) +{ + if (entries != "") { + VERBOSE(3,"entries:|" << entries << "|" << std::endl); + std::vector elements = TokenizeMultiCharSeparator(entries, "||||"); + VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); + Insert(elements); + } +} + +void PhraseDictionaryDynamicCacheBased::Insert(std::vector entries) +{ + VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl); + Decay(); + Update(entries, "1"); + IFVERBOSE(2) Print(); +} + + +void PhraseDictionaryDynamicCacheBased::Update(std::vector entries, std::string ageString) +{ + VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::vector entries, std::string ageString)" << std::endl); + std::vector pp; + + std::vector::iterator it; + for(it = entries.begin(); it!=entries.end(); it++) { + pp.clear(); + pp = TokenizeMultiCharSeparator((*it), "|||"); + VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl); + VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl); + + if (pp.size() > 2) { + VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl); + Update(pp[0], pp[1], ageString, pp[2]); + } else { + Update(pp[0], pp[1], ageString); + } + } +} + +void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString) +{ + VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString)" << std::endl); + const StaticData &staticData = StaticData::Instance(); + const std::string& factorDelimiter = staticData.GetFactorDelimiter(); + Phrase sourcePhrase(0); + Phrase targetPhrase(0); + + char *err_ind_temp; + int age = strtod(ageString.c_str(), &err_ind_temp); + //target + targetPhrase.Clear(); + VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl); + targetPhrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetPhraseString, factorDelimiter, NULL); + VERBOSE(2, "targetPhrase:|" << targetPhrase << "|" << std::endl); + + //TODO: Would be better to reuse source phrases, but ownership has to be + //consistent across phrase table implementations + sourcePhrase.Clear(); + VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl); + sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), sourcePhraseString, factorDelimiter, NULL); + VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); + + if (!waString.empty()) VERBOSE(3, "waString:|" << waString << "|" << std::endl); + + Update(sourcePhrase, targetPhrase, age, waString); +} + +void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, Phrase tp, int age, std::string waString) +{ + VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(Phrase sp, Phrase tp, int age, std::string waString)" << std::endl); #ifdef WITH_THREADS - boost::shared_lock lock(m_cacheLock); + boost::shared_lock lock(m_cacheLock); #endif - VERBOSE(3, "PhraseDictionaryCache inserting sp:|" << sp << "| tp:|" << tp << "| age:|" << age << "| word-alignment |" << waString << "|" << std::endl); - - cacheMap::const_iterator it = m_cacheTM.find(sp); - VERBOSE(3,"sp:|" << sp << "|" << std::endl); - if(it!=m_cacheTM.end()) { - VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl); - // sp is found - // here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap - // and then add new entry - - TargetCollectionAgePair TgtCollAgePair = it->second; - TargetPhraseCollection* tpc = TgtCollAgePair.first; - AgeCollection* ac = TgtCollAgePair.second; - const Phrase* p_ptr = NULL; - TargetPhrase* tp_ptr = NULL; - bool found = false; - size_t tp_pos=0; - while (!found && tp_pos < tpc->GetSize()) { - tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos); - p_ptr = (const Phrase*) tp_ptr; - if (tp == *p_ptr) { - found = true; - continue; - } - tp_pos++; - } - if (!found) { - VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl); - std::auto_ptr targetPhrase(new TargetPhrase(tp)); - - targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age)); - if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString); - - tpc->Add(targetPhrase.release()); - - tp_pos = tpc->GetSize()-1; - ac->push_back(age); - m_entries++; - VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| INSERTED" << std::endl); - } - else{ - tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(age)); - if (!waString.empty()) tp_ptr->SetAlignmentInfo(waString); - ac->at(tp_pos) = age; - VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| UPDATED" << std::endl); - } - } else { - VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl); - // p is not found - // create target collection - // we have to create new target collection age pair and add new entry to target collection age pair - - TargetPhraseCollection* tpc = new TargetPhraseCollection(); - AgeCollection* ac = new AgeCollection(); - m_cacheTM.insert(make_pair(sp,make_pair(tpc,ac))); - - //tp is not found - std::auto_ptr targetPhrase(new TargetPhrase(tp)); - - targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age)); - if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString); - - tpc->Add(targetPhrase.release()); - ac->push_back(age); - m_entries++; - VERBOSE(3,"sp:|" << sp << "| tp:|" << tp << "| INSERTED" << std::endl); - } - } - - void PhraseDictionaryDynamicCacheBased::Decay() - { + VERBOSE(3, "PhraseDictionaryCache inserting sp:|" << sp << "| tp:|" << tp << "| age:|" << age << "| word-alignment |" << waString << "|" << std::endl); + + cacheMap::const_iterator it = m_cacheTM.find(sp); + VERBOSE(3,"sp:|" << sp << "|" << std::endl); + if(it!=m_cacheTM.end()) { + VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl); + // sp is found + // here we have to remove the target phrase from targetphrasecollection and from the TargetAgeMap + // and then add new entry + + TargetCollectionAgePair TgtCollAgePair = it->second; + TargetPhraseCollection* tpc = TgtCollAgePair.first; + AgeCollection* ac = TgtCollAgePair.second; + const Phrase* p_ptr = NULL; + TargetPhrase* tp_ptr = NULL; + bool found = false; + size_t tp_pos=0; + while (!found && tp_pos < tpc->GetSize()) { + tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos); + p_ptr = (const Phrase*) tp_ptr; + if (tp == *p_ptr) { + found = true; + continue; + } + tp_pos++; + } + if (!found) { + VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl); + std::auto_ptr targetPhrase(new TargetPhrase(tp)); + + targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age)); + if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString); + + tpc->Add(targetPhrase.release()); + + tp_pos = tpc->GetSize()-1; + ac->push_back(age); + m_entries++; + VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| INSERTED" << std::endl); + } else { + tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(age)); + if (!waString.empty()) tp_ptr->SetAlignmentInfo(waString); + ac->at(tp_pos) = age; + VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| UPDATED" << std::endl); + } + } else { + VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl); + // p is not found + // create target collection + // we have to create new target collection age pair and add new entry to target collection age pair + + TargetPhraseCollection* tpc = new TargetPhraseCollection(); + AgeCollection* ac = new AgeCollection(); + m_cacheTM.insert(make_pair(sp,make_pair(tpc,ac))); + + //tp is not found + std::auto_ptr targetPhrase(new TargetPhrase(tp)); + + targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age)); + if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString); + + tpc->Add(targetPhrase.release()); + ac->push_back(age); + m_entries++; + VERBOSE(3,"sp:|" << sp << "| tp:|" << tp << "| INSERTED" << std::endl); + } +} + +void PhraseDictionaryDynamicCacheBased::Decay() +{ #ifdef WITH_THREADS - boost::shared_lock lock(m_cacheLock); + boost::shared_lock lock(m_cacheLock); #endif - cacheMap::iterator it; - for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) { - Decay((*it).first); - } - } - - void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) - { - VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) sp:|" << sp << "|" << std::endl); - cacheMap::const_iterator it = m_cacheTM.find(sp); - if (it != m_cacheTM.end()) { - VERBOSE(3,"found:|" << sp << "|" << std::endl); - //sp is found - - TargetCollectionAgePair TgtCollAgePair = it->second; - TargetPhraseCollection* tpc = TgtCollAgePair.first; - AgeCollection* ac = TgtCollAgePair.second; - - //loop in inverted order to allow a correct deletion of std::vectors tpc and ac - for (int tp_pos = tpc->GetSize() - 1 ; tp_pos >= 0; tp_pos--) { - unsigned int tp_age = ac->at(tp_pos); //increase the age by 1 - tp_age++; //increase the age by 1 - VERBOSE(3,"sp:|" << sp << "| " << " new tp_age:|" << tp_age << "|" << std::endl); - - TargetPhrase* tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos); - - if (tp_age > m_maxAge) { - VERBOSE(3,"tp_age:|" << tp_age << "| TOO BIG" << std::endl); - tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection - ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection - m_entries--; - } else { - VERBOSE(3,"tp_age:|" << tp_age << "| STILL GOOD" << std::endl); - tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(tp_age)); - ac->at(tp_pos) = tp_age; - } - } - if (tpc->GetSize() == 0) { - // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection - (((*it).second).second)->clear(); - delete ((*it).second).second; - delete ((*it).second).first; - m_cacheTM.erase(sp); - } - } else { - //do nothing - VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl); - } - - //put here the removal of entries with age greater than m_maxAge - } - - void PhraseDictionaryDynamicCacheBased::Execute(std::string command) - { - VERBOSE(2,"command:|" << command << "|" << std::endl); - std::vector commands = Tokenize(command, "||"); - Execute(commands); - } - - void PhraseDictionaryDynamicCacheBased::Execute(std::vector commands) - { - for (size_t j=0; jsecond; + TargetPhraseCollection* tpc = TgtCollAgePair.first; + AgeCollection* ac = TgtCollAgePair.second; + + //loop in inverted order to allow a correct deletion of std::vectors tpc and ac + for (int tp_pos = tpc->GetSize() - 1 ; tp_pos >= 0; tp_pos--) { + unsigned int tp_age = ac->at(tp_pos); //increase the age by 1 + tp_age++; //increase the age by 1 + VERBOSE(3,"sp:|" << sp << "| " << " new tp_age:|" << tp_age << "|" << std::endl); + + TargetPhrase* tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos); + + if (tp_age > m_maxAge) { + VERBOSE(3,"tp_age:|" << tp_age << "| TOO BIG" << std::endl); + tpc->Remove(tp_pos); //delete entry in the Target Phrase Collection + ac->erase(ac->begin() + tp_pos); //delete entry in the Age Collection + m_entries--; + } else { + VERBOSE(3,"tp_age:|" << tp_age << "| STILL GOOD" << std::endl); + tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(tp_age)); + ac->at(tp_pos) = tp_age; + } + } + if (tpc->GetSize() == 0) { + // delete the entry from m_cacheTM in case it points to an empty TargetPhraseCollection and AgeCollection + (((*it).second).second)->clear(); + delete ((*it).second).second; + delete ((*it).second).first; + m_cacheTM.erase(sp); + } + } else { + //do nothing + VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl); + } + + //put here the removal of entries with age greater than m_maxAge +} + +void PhraseDictionaryDynamicCacheBased::Execute(std::string command) +{ + VERBOSE(2,"command:|" << command << "|" << std::endl); + std::vector commands = Tokenize(command, "||"); + Execute(commands); +} + +void PhraseDictionaryDynamicCacheBased::Execute(std::vector commands) +{ + for (size_t j=0; j lock(m_cacheLock); + boost::shared_lock lock(m_cacheLock); #endif - cacheMap::const_iterator it; - for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) { - (((*it).second).second)->clear(); - delete ((*it).second).second; - delete ((*it).second).first; - } - m_cacheTM.clear(); - m_entries = 0; - } - - - void PhraseDictionaryDynamicCacheBased::ExecuteDlt(std::map dlt_meta) - { - if (dlt_meta.find("cbtm") != dlt_meta.end()) { - Insert(dlt_meta["cbtm"]); - } - if (dlt_meta.find("cbtm-command") != dlt_meta.end()) { - Execute(dlt_meta["cbtm-command"]); - } - if (dlt_meta.find("cbtm-file") != dlt_meta.end()) { - Load(dlt_meta["cbtm-file"]); - } - if (dlt_meta.find("cbtm-clear-source") != dlt_meta.end()) { - ClearSource(dlt_meta["cbtm-clear-source"]); - } - if (dlt_meta.find("cbtm-clear-entries") != dlt_meta.end()) { - ClearEntries(dlt_meta["cbtm-clear-entries"]); - } - if (dlt_meta.find("cbtm-clear-all") != dlt_meta.end()) { - Clear(); - } - - } - - void PhraseDictionaryDynamicCacheBased::Print() const - { - VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Print()" << std::endl); + cacheMap::const_iterator it; + for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) { + (((*it).second).second)->clear(); + delete ((*it).second).second; + delete ((*it).second).first; + } + m_cacheTM.clear(); + m_entries = 0; +} + + +void PhraseDictionaryDynamicCacheBased::ExecuteDlt(std::map dlt_meta) +{ + if (dlt_meta.find("cbtm") != dlt_meta.end()) { + Insert(dlt_meta["cbtm"]); + } + if (dlt_meta.find("cbtm-command") != dlt_meta.end()) { + Execute(dlt_meta["cbtm-command"]); + } + if (dlt_meta.find("cbtm-file") != dlt_meta.end()) { + Load(dlt_meta["cbtm-file"]); + } + if (dlt_meta.find("cbtm-clear-source") != dlt_meta.end()) { + ClearSource(dlt_meta["cbtm-clear-source"]); + } + if (dlt_meta.find("cbtm-clear-entries") != dlt_meta.end()) { + ClearEntries(dlt_meta["cbtm-clear-entries"]); + } + if (dlt_meta.find("cbtm-clear-all") != dlt_meta.end()) { + Clear(); + } + +} + +void PhraseDictionaryDynamicCacheBased::Print() const +{ + VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Print()" << std::endl); #ifdef WITH_THREADS - boost::shared_lock read_lock(m_cacheLock); + boost::shared_lock read_lock(m_cacheLock); #endif - cacheMap::const_iterator it; - for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) { - std::string source = (it->first).ToString(); - TargetPhraseCollection* tpc = (it->second).first; - TargetPhraseCollection::iterator itr; - for(itr = tpc->begin(); itr != tpc->end(); itr++) { - std::string target = (*itr)->ToString(); - std::cout << source << " ||| " << target << std::endl; - } - source.clear(); - } - } - + cacheMap::const_iterator it; + for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) { + std::string source = (it->first).ToString(); + TargetPhraseCollection* tpc = (it->second).first; + TargetPhraseCollection::iterator itr; + for(itr = tpc->begin(); itr != tpc->end(); itr++) { + std::string target = (*itr)->ToString(); + std::cout << source << " ||| " << target << std::endl; + } + source.clear(); + } +} + }// end namespace diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h index 02f6ff7ef..26a070a4b 100644 --- a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h +++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h @@ -1,17 +1,17 @@ /*********************************************************************** Moses - statistical machine translation system Copyright (C) 2006-2011 University of Edinburgh - + This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. - + This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. - + You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA @@ -43,126 +43,130 @@ namespace Moses { - class ChartParser; - class ChartCellCollectionBase; - class ChartRuleLookupManager; - - /** Implementation of a Cache-based phrase table. - */ - class PhraseDictionaryDynamicCacheBased : public PhraseDictionary - { - - typedef std::vector AgeCollection; - typedef std::pair TargetCollectionAgePair; - typedef std::map cacheMap; - - // data structure for the cache - cacheMap m_cacheTM; - std::vector precomputedScores; - unsigned int m_maxAge; - size_t m_score_type; //scoring type of the match - size_t m_entries; //total number of entries in the cache - float m_lower_score; //lower_bound_score for no match - std::string m_initfiles; // vector of files loaded in the initialization phase - std::string m_name; // internal name to identify this instance of the Cache-based phrase table - +class ChartParser; +class ChartCellCollectionBase; +class ChartRuleLookupManager; + +/** Implementation of a Cache-based phrase table. + */ +class PhraseDictionaryDynamicCacheBased : public PhraseDictionary +{ + + typedef std::vector AgeCollection; + typedef std::pair TargetCollectionAgePair; + typedef std::map cacheMap; + + // data structure for the cache + cacheMap m_cacheTM; + std::vector precomputedScores; + unsigned int m_maxAge; + size_t m_score_type; //scoring type of the match + size_t m_entries; //total number of entries in the cache + float m_lower_score; //lower_bound_score for no match + std::string m_initfiles; // vector of files loaded in the initialization phase + std::string m_name; // internal name to identify this instance of the Cache-based phrase table + #ifdef WITH_THREADS - //multiple readers - single writer lock - mutable boost::shared_mutex m_cacheLock; + //multiple readers - single writer lock + mutable boost::shared_mutex m_cacheLock; #endif - - friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryDynamicCacheBased&); - - public: - PhraseDictionaryDynamicCacheBased(const std::string &line); - ~PhraseDictionaryDynamicCacheBased(); - - inline const std::string GetName() { return m_name; }; - inline void SetName(const std::string name){ m_name = name; } - - static const PhraseDictionaryDynamicCacheBased& Instance(const std::string name) { - UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The PhraseDictionaryDynamicCacheBased feature named " + name + " does not exist!"); - return *(s_instance_map[name]); - } - - static PhraseDictionaryDynamicCacheBased& InstanceNonConst(const std::string name) { - UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The PhraseDictionaryDynamicCacheBased feature named " + name + " does not exist!"); - return *(s_instance_map[name]); - } - static const PhraseDictionaryDynamicCacheBased& Instance() { - return *s_instance; - } - - static PhraseDictionaryDynamicCacheBased& InstanceNonConst() { - return *s_instance; - } + friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryDynamicCacheBased&); + +public: + PhraseDictionaryDynamicCacheBased(const std::string &line); + ~PhraseDictionaryDynamicCacheBased(); + + inline const std::string GetName() { + return m_name; + }; + inline void SetName(const std::string name) { + m_name = name; + } + + static const PhraseDictionaryDynamicCacheBased& Instance(const std::string name) { + UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The PhraseDictionaryDynamicCacheBased feature named " + name + " does not exist!"); + return *(s_instance_map[name]); + } + + static PhraseDictionaryDynamicCacheBased& InstanceNonConst(const std::string name) { + UTIL_THROW_IF2(s_instance_map.find(name) == s_instance_map.end(), "The PhraseDictionaryDynamicCacheBased feature named " + name + " does not exist!"); + return *(s_instance_map[name]); + } + + static const PhraseDictionaryDynamicCacheBased& Instance() { + return *s_instance; + } + + static PhraseDictionaryDynamicCacheBased& InstanceNonConst() { + return *s_instance; + } + + void Load(); + void Load(const std::string file); + + const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &src) const; + const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const; + + // for phrase-based model + // void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; + + // for syntax/hiero model (CKY+ decoding) + ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t); + + void SetParameter(const std::string& key, const std::string& value); + + void InitializeForInput(InputType const& source); + + // virtual void InitializeForInput(InputType const&) { + // /* Don't do anything source specific here as this object is shared between threads.*/ + // } + + void Print() const; // prints the cache + void Clear(); // clears the cache + + void ClearEntries(std::string &entries); + void ClearSource(std::string &entries); + void Insert(std::string &entries); + void Execute(std::string command); + void ExecuteDlt(std::map dlt_meta); + + void SetScoreType(size_t type); + void SetMaxAge(unsigned int age); + +protected: + static PhraseDictionaryDynamicCacheBased *s_instance; + static std::map< const std::string, PhraseDictionaryDynamicCacheBased * > s_instance_map; + + float decaying_score(const int age); // calculates the decay score given the age + void Insert(std::vector entries); + + void Decay(); // traverse through the cache and decay each entry + void Decay(Phrase p); // traverse through the cache and decay each entry for a given Phrase + void Update(std::vector entries, std::string ageString); + void Update(std::string sourceString, std::string targetString, std::string ageString, std::string waString=""); + void Update(Phrase p, Phrase tp, int age, std::string waString=""); + + void ClearEntries(std::vector entries); + void ClearEntries(std::string sourceString, std::string targetString); + void ClearEntries(Phrase p, Phrase tp); + + void ClearSource(std::vector entries); + void ClearSource(Phrase sp); + + void Execute(std::vector commands); + void Execute_Single_Command(std::string command); + + + void SetPreComputedScores(const unsigned int numScoreComponent); + Scores GetPreComputedScores(const unsigned int age); + + void Load_Multiple_Files(std::vector files); + void Load_Single_File(const std::string file); + + TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const; +}; - void Load(); - void Load(const std::string file); - - const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &src) const; - const TargetPhraseCollection* GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const; - - // for phrase-based model - // void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; - - // for syntax/hiero model (CKY+ decoding) - ChartRuleLookupManager* CreateRuleLookupManager(const ChartParser&, const ChartCellCollectionBase&, std::size_t); - - void SetParameter(const std::string& key, const std::string& value); - - void InitializeForInput(InputType const& source); - - // virtual void InitializeForInput(InputType const&) { - // /* Don't do anything source specific here as this object is shared between threads.*/ - // } - - void Print() const; // prints the cache - void Clear(); // clears the cache - - void ClearEntries(std::string &entries); - void ClearSource(std::string &entries); - void Insert(std::string &entries); - void Execute(std::string command); - void ExecuteDlt(std::map dlt_meta); - - void SetScoreType(size_t type); - void SetMaxAge(unsigned int age); - - protected: - static PhraseDictionaryDynamicCacheBased *s_instance; - static std::map< const std::string, PhraseDictionaryDynamicCacheBased * > s_instance_map; - - float decaying_score(const int age); // calculates the decay score given the age - void Insert(std::vector entries); - - void Decay(); // traverse through the cache and decay each entry - void Decay(Phrase p); // traverse through the cache and decay each entry for a given Phrase - void Update(std::vector entries, std::string ageString); - void Update(std::string sourceString, std::string targetString, std::string ageString, std::string waString=""); - void Update(Phrase p, Phrase tp, int age, std::string waString=""); - - void ClearEntries(std::vector entries); - void ClearEntries(std::string sourceString, std::string targetString); - void ClearEntries(Phrase p, Phrase tp); - - void ClearSource(std::vector entries); - void ClearSource(Phrase sp); - - void Execute(std::vector commands); - void Execute_Single_Command(std::string command); - - - void SetPreComputedScores(const unsigned int numScoreComponent); - Scores GetPreComputedScores(const unsigned int age); - - void Load_Multiple_Files(std::vector files); - void Load_Single_File(const std::string file); - - TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const; - }; - } // namespace Moses #endif /* moses_PhraseDictionaryDynamicCacheBased_H_ */ diff --git a/moses/TranslationModel/PhraseDictionaryNodeMemory.cpp b/moses/TranslationModel/PhraseDictionaryNodeMemory.cpp index 2c6666c52..84639a737 100644 --- a/moses/TranslationModel/PhraseDictionaryNodeMemory.cpp +++ b/moses/TranslationModel/PhraseDictionaryNodeMemory.cpp @@ -65,7 +65,7 @@ PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateChild(const W PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetOrCreateNonTerminalChild(const Word &targetNonTerm) { UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(), - "Not a non-terminal: " << targetNonTerm); + "Not a non-terminal: " << targetNonTerm); return &m_nonTermMap[targetNonTerm]; } @@ -95,7 +95,7 @@ const PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetChild(const Wor const PhraseDictionaryNodeMemory *PhraseDictionaryNodeMemory::GetNonTerminalChild(const Word &targetNonTerm) const { UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(), - "Not a non-terminal: " << targetNonTerm); + "Not a non-terminal: " << targetNonTerm); NonTerminalMap::const_iterator p = m_nonTermMap.find(targetNonTerm); return (p == m_nonTermMap.end()) ? NULL : &p->second; diff --git a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp index 4f9e03e46..1934fa184 100644 --- a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp +++ b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp @@ -59,8 +59,8 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input CacheColl &cache = GetCache(); - CacheColl::iterator iter; - iter = cache.find(hash); + CacheColl::iterator iter; + iter = cache.find(hash); if (iter != cache.end()) { // already in cache diff --git a/moses/TranslationOptionCollectionConfusionNet.cpp b/moses/TranslationOptionCollectionConfusionNet.cpp index 98a4440c7..e03f074b0 100644 --- a/moses/TranslationOptionCollectionConfusionNet.cpp +++ b/moses/TranslationOptionCollectionConfusionNet.cpp @@ -20,11 +20,11 @@ namespace Moses /** constructor; just initialize the base class */ TranslationOptionCollectionConfusionNet:: -TranslationOptionCollectionConfusionNet(const ConfusionNet &input, - size_t maxNoTransOptPerCoverage, - float translationOptionThreshold) - : TranslationOptionCollection(input, maxNoTransOptPerCoverage, - translationOptionThreshold) +TranslationOptionCollectionConfusionNet(const ConfusionNet &input, + size_t maxNoTransOptPerCoverage, + float translationOptionThreshold) + : TranslationOptionCollection(input, maxNoTransOptPerCoverage, + translationOptionThreshold) { // Prefix checkers are phrase dictionaries that provide a prefix check // to indicate that a phrase table entry with a given prefix exists. @@ -32,8 +32,8 @@ TranslationOptionCollectionConfusionNet(const ConfusionNet &input, // expanding it further. vector prefixCheckers; BOOST_FOREACH(PhraseDictionary* pd, PhraseDictionary::GetColl()) - if (pd->ProvidesPrefixCheck()) prefixCheckers.push_back(pd); - + if (pd->ProvidesPrefixCheck()) prefixCheckers.push_back(pd); + const InputFeature &inputFeature = InputFeature::Instance(); UTIL_THROW_IF2(&inputFeature == NULL, "Input feature must be specified"); @@ -103,10 +103,10 @@ TranslationOptionCollectionConfusionNet(const ConfusionNet &input, Phrase subphrase(prevPhrase); subphrase.AddWord(word); - bool OK = prefixCheckers.size() == 0; - for (size_t k = 0; !OK && k < prefixCheckers.size(); ++k) - OK = prefixCheckers[k]->PrefixExists(subphrase); - if (!OK) continue; + bool OK = prefixCheckers.size() == 0; + for (size_t k = 0; !OK && k < prefixCheckers.size(); ++k) + OK = prefixCheckers[k]->PrefixExists(subphrase); + if (!OK) continue; const ScorePair &scores = col[i].second; ScorePair *inputScore = new ScorePair(*prevInputScore); @@ -122,8 +122,8 @@ TranslationOptionCollectionConfusionNet(const ConfusionNet &input, } // for (iterPath = prevPaths.begin(); iterPath != prevPaths.end(); ++iterPath) { } } - // cerr << "HAVE " << m_inputPathQueue.size() - // << " input paths of max. length " + // cerr << "HAVE " << m_inputPathQueue.size() + // << " input paths of max. length " // << maxSizePhrase << "." << endl; } @@ -249,9 +249,9 @@ void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRangeLE // go thru each intermediate trans opt just created const vector& partTransOptList = oldPtoc->GetList(); vector::const_iterator iterPartialTranslOpt; - for (iterPartialTranslOpt = partTransOptList.begin(); - iterPartialTranslOpt != partTransOptList.end(); - ++iterPartialTranslOpt) { + for (iterPartialTranslOpt = partTransOptList.begin(); + iterPartialTranslOpt != partTransOptList.end(); + ++iterPartialTranslOpt) { TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt; if (transStep) { diff --git a/moses/TranslationOptionCollectionLattice.cpp b/moses/TranslationOptionCollectionLattice.cpp index cfdfa1707..cedae925d 100644 --- a/moses/TranslationOptionCollectionLattice.cpp +++ b/moses/TranslationOptionCollectionLattice.cpp @@ -136,12 +136,12 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions() const WordsRange &range = path.GetWordsRange(); if (tpColl && tpColl->GetSize()) { - TargetPhraseCollection::const_iterator iter; - for (iter = tpColl->begin(); iter != tpColl->end(); ++iter) { - const TargetPhrase &tp = **iter; - TranslationOption *transOpt = new TranslationOption(range, tp); - transOpt->SetInputPath(path); - transOpt->Evaluate(m_source); + TargetPhraseCollection::const_iterator iter; + for (iter = tpColl->begin(); iter != tpColl->end(); ++iter) { + const TargetPhrase &tp = **iter; + TranslationOption *transOpt = new TranslationOption(range, tp); + transOpt->SetInputPath(path); + transOpt->Evaluate(m_source); Add(transOpt); } diff --git a/moses/Util.cpp b/moses/Util.cpp index 323df7d5a..79690668f 100644 --- a/moses/Util.cpp +++ b/moses/Util.cpp @@ -3,17 +3,17 @@ /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh - + This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. - + This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. - + You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA @@ -42,305 +42,301 @@ using namespace std; namespace Moses { - - //global variable - Timer g_timer; - - string GetTempFolder() - { -#ifdef _WIN32 - char *tmpPath = getenv("TMP"); - string str(tmpPath); - if (str.substr(str.size() - 1, 1) != "\\") - str += "\\"; - return str; -#else - return "/tmp/"; -#endif - } - - const std::string ToLower(const std::string& str) - { - std::string lc(str); - std::transform(lc.begin(), lc.end(), lc.begin(), (int(*)(int))std::tolower); - return lc; - } - - class BoolValueException : public util::Exception {}; - - template<> - bool Scan(const std::string &input) - { - std::string lc = ToLower(input); - if (lc == "yes" || lc == "y" || lc == "true" || lc == "1") - return true; - if (lc == "no" || lc == "n" || lc =="false" || lc == "0") - return false; - UTIL_THROW(BoolValueException, "Could not interpret " << input << " as a boolean. After lowercasing, valid values are yes, y, true, 1, no, n, false, and 0."); - } - - bool FileExists(const std::string& filePath) - { - ifstream ifs(filePath.c_str()); - return !ifs.fail(); - } - - const std::string Trim(const std::string& str, const std::string dropChars) - { - std::string res = str; - res.erase(str.find_last_not_of(dropChars)+1); - return res.erase(0, res.find_first_not_of(dropChars)); - } - - void ResetUserTime() - { - g_timer.start(); - }; - - void PrintUserTime(const std::string &message) - { - g_timer.check(message.c_str()); - } - - double GetUserTime() - { - return g_timer.get_elapsed_time(); - } - - std::vector< std::map > ProcessAndStripDLT(std::string &line) - { - std::vector< std::map > meta; - std::string lline = ToLower(line); - bool check_dlt = true; - - //allowed format of dlt tag - // - //the type attribute is mandatory; the name should not contain any double quotation mark - //the id attribute is optional; if present, the name should not contain any double quotation mark - //only one additional attribute is possible; value can contain double quotation marks - //both name and value must be surrounded by double quotation mark - -// std::cerr << "GLOBAL START" << endl; - while (check_dlt) { - size_t start = lline.find(""); - if (close == std::string::npos) { - // error: dlt tag is not ended - check_dlt = false; - continue; - } - //std::string dlt = Trim(lline.substr(start+4, close-start-4)); - std::string dlt = Trim(line.substr(start+4, close-start-4)); - - line.erase(start,close-start+2); - lline.erase(start,close-start+2); - - if (dlt != "") { - std::map tmp_meta; - - //check if type is present and store it - size_t start_type = dlt.find("type="); - size_t len_type=4; - if (start_type != std::string::npos) { - //type is present - //required format type="value" - //double quotation mark is required - - std::string val_type; - std::string label_type = dlt.substr(start_type, len_type); - if (dlt[start_type+len_type+1] == '"'){ - val_type = dlt.substr(start_type+len_type+2); - size_t close_type = val_type.find('"'); - val_type = val_type.substr(0, close_type); - dlt.erase(start_type,start_type+len_type+2+close_type+1); - } - else{ - TRACE_ERR("DLT parse error: missing character \" for type \n"); - } - label_type = Trim(label_type); - dlt = Trim(dlt); - - tmp_meta[label_type] = val_type; - } - else{ - //type is not present - UTIL_THROW(util::Exception, "ProcessAndStripDLT(std::string &line): Attribute type for dlt tag is mandatory."); - } - - //check if id is present and store it - size_t start_id = dlt.find("id="); - size_t len_id=2; - if (start_id != std::string::npos) { - //id is present - //required format id="name" - //double quotation mark is required - std::string val_id; - std::string label_id = dlt.substr(start_id, len_id); - if (dlt[start_id+len_id+1] == '"'){ - val_id = dlt.substr(start_id+len_id+2); - size_t close_id = val_id.find('"'); - val_id = val_id.substr(0, close_id); - dlt.erase(start_id,start_id+len_id+2+close_id+1); - } - else{ - TRACE_ERR("DLT parse error: missing character \" for id \n"); - } - label_id = Trim(label_id); - dlt = Trim(dlt); - - tmp_meta[label_id] = val_id; - } - else{ - //id is not present - //do nothing - } - - for (size_t i = 1; i < dlt.size(); i++) { - if (dlt[i] == '=') { - std::string label = dlt.substr(0, i); - std::string val = dlt.substr(i+1); - if (val[0] == '"') { - - val = val.substr(1); - // it admits any double quotation mark (but is attribute) in the value of the attribute - // it assumes that just one attribute (besides id attribute) is present in the tag, - // it assumes that the value starts and ends with double quotation mark - size_t close = val.rfind('"'); - if (close == std::string::npos) { - TRACE_ERR("SGML parse error: missing \"\n"); - dlt = ""; - i = 0; - } else { - dlt = val.substr(close+1); - val = val.substr(0, close); - i = 0; - } - } else { - size_t close = val.find(' '); - if (close == std::string::npos) { - dlt = ""; - i = 0; - } else { - dlt = val.substr(close+1); - val = val.substr(0, close); - } - } - label = Trim(label); - dlt = Trim(dlt); - - tmp_meta[label] = val; - } - } - - meta.push_back(tmp_meta); - } - } +//global variable +Timer g_timer; + +string GetTempFolder() +{ +#ifdef _WIN32 + char *tmpPath = getenv("TMP"); + string str(tmpPath); + if (str.substr(str.size() - 1, 1) != "\\") + str += "\\"; + return str; +#else + return "/tmp/"; +#endif +} + +const std::string ToLower(const std::string& str) +{ + std::string lc(str); + std::transform(lc.begin(), lc.end(), lc.begin(), (int(*)(int))std::tolower); + return lc; +} + +class BoolValueException : public util::Exception {}; + +template<> +bool Scan(const std::string &input) +{ + std::string lc = ToLower(input); + if (lc == "yes" || lc == "y" || lc == "true" || lc == "1") + return true; + if (lc == "no" || lc == "n" || lc =="false" || lc == "0") + return false; + UTIL_THROW(BoolValueException, "Could not interpret " << input << " as a boolean. After lowercasing, valid values are yes, y, true, 1, no, n, false, and 0."); +} + +bool FileExists(const std::string& filePath) +{ + ifstream ifs(filePath.c_str()); + return !ifs.fail(); +} + +const std::string Trim(const std::string& str, const std::string dropChars) +{ + std::string res = str; + res.erase(str.find_last_not_of(dropChars)+1); + return res.erase(0, res.find_first_not_of(dropChars)); +} + +void ResetUserTime() +{ + g_timer.start(); +}; + +void PrintUserTime(const std::string &message) +{ + g_timer.check(message.c_str()); +} + +double GetUserTime() +{ + return g_timer.get_elapsed_time(); +} + +std::vector< std::map > ProcessAndStripDLT(std::string &line) +{ + std::vector< std::map > meta; + std::string lline = ToLower(line); + bool check_dlt = true; + + //allowed format of dlt tag + // + //the type attribute is mandatory; the name should not contain any double quotation mark + //the id attribute is optional; if present, the name should not contain any double quotation mark + //only one additional attribute is possible; value can contain double quotation marks + //both name and value must be surrounded by double quotation mark + +// std::cerr << "GLOBAL START" << endl; + while (check_dlt) { + size_t start = lline.find(""); + if (close == std::string::npos) { + // error: dlt tag is not ended + check_dlt = false; + continue; + } + //std::string dlt = Trim(lline.substr(start+4, close-start-4)); + std::string dlt = Trim(line.substr(start+4, close-start-4)); + + line.erase(start,close-start+2); + lline.erase(start,close-start+2); + + if (dlt != "") { + std::map tmp_meta; + + //check if type is present and store it + size_t start_type = dlt.find("type="); + size_t len_type=4; + if (start_type != std::string::npos) { + //type is present + //required format type="value" + //double quotation mark is required + + std::string val_type; + std::string label_type = dlt.substr(start_type, len_type); + if (dlt[start_type+len_type+1] == '"') { + val_type = dlt.substr(start_type+len_type+2); + size_t close_type = val_type.find('"'); + val_type = val_type.substr(0, close_type); + dlt.erase(start_type,start_type+len_type+2+close_type+1); + } else { + TRACE_ERR("DLT parse error: missing character \" for type \n"); + } + label_type = Trim(label_type); + dlt = Trim(dlt); + + tmp_meta[label_type] = val_type; + } else { + //type is not present + UTIL_THROW(util::Exception, "ProcessAndStripDLT(std::string &line): Attribute type for dlt tag is mandatory."); + } + + //check if id is present and store it + size_t start_id = dlt.find("id="); + size_t len_id=2; + if (start_id != std::string::npos) { + //id is present + //required format id="name" + //double quotation mark is required + + std::string val_id; + std::string label_id = dlt.substr(start_id, len_id); + if (dlt[start_id+len_id+1] == '"') { + val_id = dlt.substr(start_id+len_id+2); + size_t close_id = val_id.find('"'); + val_id = val_id.substr(0, close_id); + dlt.erase(start_id,start_id+len_id+2+close_id+1); + } else { + TRACE_ERR("DLT parse error: missing character \" for id \n"); + } + label_id = Trim(label_id); + dlt = Trim(dlt); + + tmp_meta[label_id] = val_id; + } else { + //id is not present + //do nothing + } + + for (size_t i = 1; i < dlt.size(); i++) { + if (dlt[i] == '=') { + std::string label = dlt.substr(0, i); + std::string val = dlt.substr(i+1); + if (val[0] == '"') { + + val = val.substr(1); + // it admits any double quotation mark (but is attribute) in the value of the attribute + // it assumes that just one attribute (besides id attribute) is present in the tag, + // it assumes that the value starts and ends with double quotation mark + size_t close = val.rfind('"'); + if (close == std::string::npos) { + TRACE_ERR("SGML parse error: missing \"\n"); + dlt = ""; + i = 0; + } else { + dlt = val.substr(close+1); + val = val.substr(0, close); + i = 0; + } + } else { + size_t close = val.find(' '); + if (close == std::string::npos) { + dlt = ""; + i = 0; + } else { + dlt = val.substr(close+1); + val = val.substr(0, close); + } + } + label = Trim(label); + dlt = Trim(dlt); + + tmp_meta[label] = val; + } + } + + meta.push_back(tmp_meta); + } + } // std::cerr << "GLOBAL END" << endl; - return meta; - } - - std::map ProcessAndStripSGML(std::string &line) - { - std::map meta; - std::string lline = ToLower(line); - if (lline.find(""); - if (close == std::string::npos) return meta; // error - size_t end = lline.find(""); - std::string seg = Trim(lline.substr(4, close-4)); - std::string text = line.substr(close+1, end - close - 1); - for (size_t i = 1; i < seg.size(); i++) { - if (seg[i] == '=' && seg[i-1] == ' ') { - std::string less = seg.substr(0, i-1) + seg.substr(i); - seg = less; - i = 0; - continue; - } - if (seg[i] == '=' && seg[i+1] == ' ') { - std::string less = seg.substr(0, i+1); - if (i+2 < seg.size()) less += seg.substr(i+2); - seg = less; - i = 0; - continue; - } - } - line = Trim(text); - if (seg == "") return meta; - for (size_t i = 1; i < seg.size(); i++) { - if (seg[i] == '=') { - std::string label = seg.substr(0, i); - std::string val = seg.substr(i+1); - if (val[0] == '"') { - val = val.substr(1); - size_t close = val.find('"'); - if (close == std::string::npos) { - TRACE_ERR("SGML parse error: missing \"\n"); - seg = ""; - i = 0; - } else { - seg = val.substr(close+1); - val = val.substr(0, close); - i = 0; - } - } else { - size_t close = val.find(' '); - if (close == std::string::npos) { - seg = ""; - i = 0; - } else { - seg = val.substr(close+1); - val = val.substr(0, close); - } - } - label = Trim(label); - seg = Trim(seg); - meta[label] = val; - } - } - return meta; - } - - std::string PassthroughSGML(std::string &line, const std::string tagName, const std::string& lbrackStr, const std::string& rbrackStr) - { - string lbrack = lbrackStr; // = "<"; - string rbrack = rbrackStr; // = ">"; - - std::string meta = ""; - - std::string lline = ToLower(line); - size_t open = lline.find(lbrack+tagName); - //check whether the tag exists; if not return the empty string - if (open == std::string::npos) return meta; - - size_t close = lline.find(rbrack, open); - //check whether the tag is closed with '/>'; if not return the empty string - if (close == std::string::npos) { - TRACE_ERR("PassthroughSGML error: the tag does not end properly\n"); - return meta; - } - // extract the tag - std::string tmp = line.substr(open, close - open + 1); - meta = line.substr(open, close - open + 1); - - // strip the tag from the line - line = line.substr(0, open) + line.substr(close + 1, std::string::npos); - - TRACE_ERR("The input contains a tag:" << meta << std::endl); - - lline = ToLower(line); - open = lline.find(lbrack+tagName); - if (open != std::string::npos) { - TRACE_ERR("PassthroughSGML error: there are two tags\n"); - } - return meta; - } - + return meta; +} + +std::map ProcessAndStripSGML(std::string &line) +{ + std::map meta; + std::string lline = ToLower(line); + if (lline.find(""); + if (close == std::string::npos) return meta; // error + size_t end = lline.find(""); + std::string seg = Trim(lline.substr(4, close-4)); + std::string text = line.substr(close+1, end - close - 1); + for (size_t i = 1; i < seg.size(); i++) { + if (seg[i] == '=' && seg[i-1] == ' ') { + std::string less = seg.substr(0, i-1) + seg.substr(i); + seg = less; + i = 0; + continue; + } + if (seg[i] == '=' && seg[i+1] == ' ') { + std::string less = seg.substr(0, i+1); + if (i+2 < seg.size()) less += seg.substr(i+2); + seg = less; + i = 0; + continue; + } + } + line = Trim(text); + if (seg == "") return meta; + for (size_t i = 1; i < seg.size(); i++) { + if (seg[i] == '=') { + std::string label = seg.substr(0, i); + std::string val = seg.substr(i+1); + if (val[0] == '"') { + val = val.substr(1); + size_t close = val.find('"'); + if (close == std::string::npos) { + TRACE_ERR("SGML parse error: missing \"\n"); + seg = ""; + i = 0; + } else { + seg = val.substr(close+1); + val = val.substr(0, close); + i = 0; + } + } else { + size_t close = val.find(' '); + if (close == std::string::npos) { + seg = ""; + i = 0; + } else { + seg = val.substr(close+1); + val = val.substr(0, close); + } + } + label = Trim(label); + seg = Trim(seg); + meta[label] = val; + } + } + return meta; +} + +std::string PassthroughSGML(std::string &line, const std::string tagName, const std::string& lbrackStr, const std::string& rbrackStr) +{ + string lbrack = lbrackStr; // = "<"; + string rbrack = rbrackStr; // = ">"; + + std::string meta = ""; + + std::string lline = ToLower(line); + size_t open = lline.find(lbrack+tagName); + //check whether the tag exists; if not return the empty string + if (open == std::string::npos) return meta; + + size_t close = lline.find(rbrack, open); + //check whether the tag is closed with '/>'; if not return the empty string + if (close == std::string::npos) { + TRACE_ERR("PassthroughSGML error: the tag does not end properly\n"); + return meta; + } + // extract the tag + std::string tmp = line.substr(open, close - open + 1); + meta = line.substr(open, close - open + 1); + + // strip the tag from the line + line = line.substr(0, open) + line.substr(close + 1, std::string::npos); + + TRACE_ERR("The input contains a tag:" << meta << std::endl); + + lline = ToLower(line); + open = lline.find(lbrack+tagName); + if (open != std::string::npos) { + TRACE_ERR("PassthroughSGML error: there are two tags\n"); + } + return meta; +} + } diff --git a/moses/Word.cpp b/moses/Word.cpp index 5311d1362..3cf545980 100644 --- a/moses/Word.cpp +++ b/moses/Word.cpp @@ -98,45 +98,44 @@ StringPiece Word::GetString(FactorType factorType) const class StrayFactorException : public util::Exception {}; -void +void Word:: CreateFromString(FactorDirection direction - , const std::vector &factorOrder - , const StringPiece &str - , bool isNonTerminal - , bool strict) + , const std::vector &factorOrder + , const StringPiece &str + , bool isNonTerminal + , bool strict) { FactorCollection &factorCollection = FactorCollection::Instance(); vector bits(MAX_NUM_FACTORS); - util::TokenIter - fit(str, StaticData::Instance().GetFactorDelimiter()); + util::TokenIter + fit(str, StaticData::Instance().GetFactorDelimiter()); size_t i = 0; for (; i < MAX_NUM_FACTORS && fit; ++i,++fit) bits[i] = *fit; if (i == MAX_NUM_FACTORS) - UTIL_THROW_IF(fit, StrayFactorException, - "The hard limit for factors is " << MAX_NUM_FACTORS - << ". The word " << str << " contains factor delimiter " - << StaticData::Instance().GetFactorDelimiter() - << " too many times."); + UTIL_THROW_IF(fit, StrayFactorException, + "The hard limit for factors is " << MAX_NUM_FACTORS + << ". The word " << str << " contains factor delimiter " + << StaticData::Instance().GetFactorDelimiter() + << " too many times."); if (strict) - UTIL_THROW_IF(fit, StrayFactorException, - "You have configured " << factorOrder.size() - << " factors but the word " << str - << " contains factor delimiter " - << StaticData::Instance().GetFactorDelimiter() - << " too many times."); - - UTIL_THROW_IF(i < factorOrder.size(),util::Exception, - "Too few factors in string '" << str << "'."); - - for (size_t k = 0; k < factorOrder.size(); ++k) - { - UTIL_THROW_IF(factorOrder[k] >= MAX_NUM_FACTORS, util::Exception, - "Factor order out of bounds."); - m_factorArray[factorOrder[k]] = factorCollection.AddFactor(bits[k], isNonTerminal); - } - + UTIL_THROW_IF(fit, StrayFactorException, + "You have configured " << factorOrder.size() + << " factors but the word " << str + << " contains factor delimiter " + << StaticData::Instance().GetFactorDelimiter() + << " too many times."); + + UTIL_THROW_IF(i < factorOrder.size(),util::Exception, + "Too few factors in string '" << str << "'."); + + for (size_t k = 0; k < factorOrder.size(); ++k) { + UTIL_THROW_IF(factorOrder[k] >= MAX_NUM_FACTORS, util::Exception, + "Factor order out of bounds."); + m_factorArray[factorOrder[k]] = factorCollection.AddFactor(bits[k], isNonTerminal); + } + // assume term/non-term same for all factors m_isNonTerminal = isNonTerminal; } diff --git a/moses/Word.h b/moses/Word.h index 47df6e141..efdebddfa 100644 --- a/moses/Word.h +++ b/moses/Word.h @@ -152,7 +152,7 @@ public: , const std::vector &factorOrder , const StringPiece &str , bool isNonTerminal - , bool strict = true); + , bool strict = true); void CreateUnknownWord(const Word &sourceWord); diff --git a/moses/WordLattice.cpp b/moses/WordLattice.cpp index cc91345cb..269ad46ca 100644 --- a/moses/WordLattice.cpp +++ b/moses/WordLattice.cpp @@ -49,11 +49,11 @@ void WordLattice::Print(std::ostream& out) const out<<"\n\n"; } -int +int WordLattice:: InitializeFromPCNDataType -(const PCN::CN& cn, - const std::vector& factorOrder, +(const PCN::CN& cn, + const std::vector& factorOrder, const std::string& debug_line) { const StaticData &staticData = StaticData::Instance(); @@ -78,20 +78,20 @@ InitializeFromPCNDataType //check for correct number of link parameters if (alt.m_denseFeatures.size() != numInputScores) { - TRACE_ERR("ERROR: need " << numInputScores - << " link parameters, found " - << alt.m_denseFeatures.size() - << " while reading column " << i - << " from " << debug_line << "\n"); + TRACE_ERR("ERROR: need " << numInputScores + << " link parameters, found " + << alt.m_denseFeatures.size() + << " while reading column " << i + << " from " << debug_line << "\n"); return false; } //check each element for bounds std::vector::const_iterator probsIterator; data[i][j].second = std::vector(0); - for(probsIterator = alt.m_denseFeatures.begin(); - probsIterator < alt.m_denseFeatures.end(); - probsIterator++) { + for(probsIterator = alt.m_denseFeatures.begin(); + probsIterator < alt.m_denseFeatures.end(); + probsIterator++) { IFVERBOSE(1) { if (*probsIterator < 0.0f) { TRACE_ERR("WARN: neg probability: " << *probsIterator << "\n"); diff --git a/moses/WordsBitmap.cpp b/moses/WordsBitmap.cpp index 0866846ed..53c263cb5 100644 --- a/moses/WordsBitmap.cpp +++ b/moses/WordsBitmap.cpp @@ -66,7 +66,7 @@ int WordsBitmap::GetFutureCosts(int lastPos) const bool WordsBitmap::IsAdjacent(size_t startPos, size_t endPos) const { if (GetNumWordsCovered() == 0) { - return true; + return true; } size_t first = GetFirstGapPos();