diff --git a/moses/src/ScoreComponentCollection.cpp b/moses/src/ScoreComponentCollection.cpp index a16f706af..07e57e0a2 100644 --- a/moses/src/ScoreComponentCollection.cpp +++ b/moses/src/ScoreComponentCollection.cpp @@ -64,7 +64,7 @@ void ScoreComponentCollection::MultiplyEquals(float scalar) // Multiply all weights of this sparse producer by a given scalar void ScoreComponentCollection::MultiplyEquals(const ScoreProducer* sp, float scalar) { assert(sp->GetNumScoreComponents() == ScoreProducer::unlimited); - std::string prefix = sp->GetScoreProducerWeightShortName() + FName::SEP; + std::string prefix = sp->GetScoreProducerDescription() + FName::SEP; for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) { std::stringstream name; name << i->first; diff --git a/moses/src/TargetNgramFeature.cpp b/moses/src/TargetNgramFeature.cpp index 2a64026d1..3fefdfba2 100644 --- a/moses/src/TargetNgramFeature.cpp +++ b/moses/src/TargetNgramFeature.cpp @@ -60,11 +60,6 @@ string TargetNgramFeature::GetScoreProducerWeightShortName(unsigned) const return "dlm"; } -string TargetNgramFeature::GetShortNameWithSEP() const -{ - return "dlm_"; -} - size_t TargetNgramFeature::GetNumInputScores() const { return 0; @@ -187,8 +182,8 @@ void TargetNgramFeature::appendNgram(const Word& word, bool& skip, stringstream FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const { - vector contextFactor; - contextFactor.reserve(m_n); + vector contextFactor; + contextFactor.reserve(m_n); // get index map for underlying hypotheses const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = @@ -219,18 +214,21 @@ FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int if (phrasePos==cur_hypo.GetCurrTargetPhrase().GetSize()-1 || prev_is_NT) makeSuffix = true; - // beginning of sentence symbol ? - string w = word.GetString(m_factorType); - if (w.compare("") == 0) + // beginning/end of sentence symbol ,? + string factorZero = word.GetString(0); + if (factorZero.compare("") == 0) prefixTerminals++; // end of sentence symbol ? - else if (w.compare("") == 0) + else if (factorZero.compare("") == 0) suffixTerminals++; // everything else else { stringstream ngram; - ngram << GetShortNameWithSEP(); - ngram << word.GetString(m_factorType); + ngram << m_baseName; + if (m_factorType == 0) + ngram << factorZero; + else + ngram << word.GetString(m_factorType); accumulator->SparsePlusEquals(ngram.str(), 1); if (collectForPrefix) @@ -346,7 +344,7 @@ FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int // remove duplicates stringstream curr_ngram; - curr_ngram << GetShortNameWithSEP(); + curr_ngram << m_baseName; curr_ngram << (*contextFactor[m_n-2]).GetString(m_factorType); curr_ngram << ":"; curr_ngram << (*contextFactor[m_n-1]).GetString(m_factorType); @@ -373,7 +371,7 @@ FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int size_t size = contextFactor.size(); if (makePrefix && makeSuffix && (size <= m_n)) { stringstream curr_ngram; - curr_ngram << GetShortNameWithSEP(); + curr_ngram << m_baseName; for (size_t i = 0; i < size; ++i) { curr_ngram << (*contextFactor[i]).GetString(m_factorType); if (i < size-1) @@ -394,11 +392,15 @@ void TargetNgramFeature::MakePrefixNgrams(std::vector &contextFacto for (size_t k = 0; k < numberOfStartPos; ++k) { size_t max_end = (size < m_n+k+offset)? size: m_n+k+offset; for (size_t end_pos = 1+k+offset; end_pos < max_end; ++end_pos) { - ngram << GetShortNameWithSEP(); + ngram << m_baseName; for (size_t i=k+offset; i <= end_pos; ++i) { if (i > k+offset) ngram << ":"; - ngram << (*contextFactor[i]).GetString(m_factorType); + string factorZero = (*contextFactor[i]).GetString(0); + if (m_factorType == 0 || factorZero.compare("") == 0 || factorZero.compare("") == 0) + ngram << factorZero; + else + ngram << (*contextFactor[i]).GetString(m_factorType); const Word w = *contextFactor[i]; } // cerr << "p-ngram: " << ngram.str() << endl; @@ -413,12 +415,16 @@ void TargetNgramFeature::MakeSuffixNgrams(std::vector &contextFacto for (size_t k = 0; k < numberOfEndPos; ++k) { size_t end_pos = contextFactor.size()-1-k-offset; for (int start_pos=end_pos-1; (start_pos >= 0) && (end_pos-start_pos < m_n); --start_pos) { - ngram << GetShortNameWithSEP(); + ngram << m_baseName; for (size_t j=start_pos; j <= end_pos; ++j){ - ngram << (*contextFactor[j]).GetString(m_factorType); - if (j < end_pos) + string factorZero = (*contextFactor[j]).GetString(0); + if (m_factorType == 0 || factorZero.compare("") == 0 || factorZero.compare("") == 0) + ngram << factorZero; + else + ngram << (*contextFactor[j]).GetString(m_factorType); + if (j < end_pos) ngram << ":"; - } + } // cerr << "s-ngram: " << ngram.str() << endl; accumulator->SparsePlusEquals(ngram.str(), 1); ngram.str(""); diff --git a/moses/src/TargetNgramFeature.h b/moses/src/TargetNgramFeature.h index 40688567b..bc6d5ba66 100644 --- a/moses/src/TargetNgramFeature.h +++ b/moses/src/TargetNgramFeature.h @@ -190,12 +190,13 @@ public: FactorCollection& factorCollection = FactorCollection::Instance(); const Factor* bosFactor = factorCollection.AddFactor(Output,m_factorType,BOS_); m_bos.SetFactor(m_factorType,bosFactor); + m_baseName = GetScoreProducerDescription(); + m_baseName.append("_"); } bool Load(const std::string &filePath); std::string GetScoreProducerWeightShortName(unsigned) const; - std::string GetShortNameWithSEP() const; size_t GetNumInputScores() const; void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; } @@ -219,6 +220,8 @@ private: // additional weight that all sparse weights are scaled with float m_sparseProducerWeight; + std::string m_baseName; + void appendNgram(const Word& word, bool& skip, std::stringstream& ngram) const; void MakePrefixNgrams(std::vector &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfStartPos = 1, size_t offset = 0) const; diff --git a/moses/src/Word.cpp b/moses/src/Word.cpp index 0505122be..43629e033 100644 --- a/moses/src/Word.cpp +++ b/moses/src/Word.cpp @@ -103,7 +103,8 @@ void Word::CreateFromString(FactorDirection direction vector wordVec; Tokenize(wordVec, str, "|"); - assert(wordVec.size() == factorOrder.size()); + if (!isNonTerminal) + assert(wordVec.size() == factorOrder.size()); const Factor *factor; for (size_t ind = 0; ind < wordVec.size(); ++ind) {