diff --git a/mira/Decoder.cpp b/mira/Decoder.cpp index 3c7e381c4..8a04522b9 100644 --- a/mira/Decoder.cpp +++ b/mira/Decoder.cpp @@ -103,7 +103,7 @@ namespace Mira { staticData.ReLoadBleuScoreFeatureParameter(bleuObjectiveWeight*bleuScoreWeight); m_bleuScoreFeature->SetCurrentSourceLength((*m_sentence).GetSize()); - m_bleuScoreFeature->SetCurrentReference(sentenceid); + m_bleuScoreFeature->SetCurrentShortestReference(sentenceid); //run the decoder m_manager = new Moses::Manager(*m_sentence, staticData.GetSearchAlgorithm(), &system); @@ -192,12 +192,12 @@ namespace Mira { m_bleuScoreFeature->PrintHistory(out); } - void MosesDecoder::printReferenceLength(const vector& ref_ids) { +/* void MosesDecoder::printReferenceLength(const vector& ref_ids) { m_bleuScoreFeature->PrintReferenceLength(ref_ids); - } + }*/ - size_t MosesDecoder::getReferenceLength(size_t ref_id) { - return m_bleuScoreFeature->GetReferenceLength(ref_id); + size_t MosesDecoder::getClosestReferenceLength(size_t ref_id, int hypoLength) { + return m_bleuScoreFeature->GetClosestReferenceLength(ref_id, hypoLength); } void MosesDecoder::setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, diff --git a/mira/Decoder.h b/mira/Decoder.h index 067f1cdcb..52ce46af6 100644 --- a/mira/Decoder.h +++ b/mira/Decoder.h @@ -66,8 +66,8 @@ class MosesDecoder { void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector& sourceLengths, std::vector& ref_ids, size_t rank, size_t epoch); // void loadReferenceSentences(const std::vector >& refs); void printBleuFeatureHistory(std::ostream& out); - void printReferenceLength(const std::vector& ref_ids); - size_t getReferenceLength(size_t ref_id); +// void printReferenceLength(const std::vector& ref_ids); + size_t getClosestReferenceLength(size_t ref_id, int hypoLength); void setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend, float scaleByX, float historySmoothing, size_t scheme, float relax_BP); diff --git a/mira/Main.cpp b/mira/Main.cpp index a96e02ac4..836bf6bb0 100644 --- a/mira/Main.cpp +++ b/mira/Main.cpp @@ -352,6 +352,8 @@ int main(int argc, char** argv) { cerr << "Error: Need to select an one of parameters --hope-fear/--model-hope-fear for mira update." << endl; return 1; } + if (historyOf1best || historyOfOracles) + sentenceLevelBleu = false; if (!sentenceLevelBleu) { if (!historyOf1best && !historyOfOracles) { historyOf1best = true; @@ -495,7 +497,8 @@ int main(int argc, char** argv) { } } - size_t reference_length = decoder->getReferenceLength(*sid); + size_t ref_length; + float avg_ref_length; if (hope_fear || perceptron_update) { // HOPE cerr << "Rank " << rank << ", epoch " << epoch << ", " << hope_n << "best hope translations" << endl; @@ -504,7 +507,9 @@ int main(int argc, char** argv) { distinctNbest, rank, epoch); size_t current_input_length = decoder->getCurrentInputLength(); decoder->cleanup(); - float hope_length_ratio = (float)oracle.size()/reference_length; + ref_length = decoder->getClosestReferenceLength(*sid, oracle.size()); + avg_ref_length = ref_length; + float hope_length_ratio = (float)oracle.size()/ref_length; cerr << ", l-ratio hope: " << hope_length_ratio << endl; vector bestModel; @@ -516,8 +521,9 @@ int main(int argc, char** argv) { distinctNbest, rank, epoch); decoder->cleanup(); cerr << endl; + ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size()); dev_hypothesis_length += bestModel.size(); - dev_reference_length += reference_length; + dev_reference_length += ref_length; } // FEAR @@ -526,7 +532,10 @@ int main(int argc, char** argv) { featureValuesFear[batchPosition], bleuScoresFear[batchPosition], true, distinctNbest, rank, epoch); decoder->cleanup(); - float fear_length_ratio = (float)fear.size()/reference_length; + ref_length = decoder->getClosestReferenceLength(*sid, fear.size()); + avg_ref_length += ref_length; + avg_ref_length /= 2; + float fear_length_ratio = (float)fear.size()/ref_length; cerr << ", l-ratio fear: " << fear_length_ratio << endl; for (size_t i = 0; i < fear.size(); ++i) { delete fear[i]; @@ -541,7 +550,7 @@ int main(int argc, char** argv) { bool skip = false; if (max_length_dev_reference != -1 && (length_diff_hope > max_length_dev_reference || length_diff_fear > max_length_dev_reference)) skip = true; - if (max_length_dev_hypos != -1 && (length_diff_hope_fear > reference_length * max_length_dev_hypos)) + if (max_length_dev_hypos != -1 && (length_diff_hope_fear > avg_ref_length * max_length_dev_hypos)) skip = true; if (skip) { cerr << "Rank " << rank << ", epoch " << epoch << ", skip example (" << hope_length_ratio << ", " << fear_length_ratio << ", " << length_diff_hope_fear << ").. " << endl; @@ -579,7 +588,8 @@ int main(int argc, char** argv) { ref_ids.push_back(*sid); decoder->cleanup(); oracles.push_back(oracle); - float hope_length_ratio = (float)oracle.size()/reference_length; + ref_length = decoder->getClosestReferenceLength(*sid, oracle.size()); + float hope_length_ratio = (float)oracle.size()/ref_length; cerr << ", l-ratio hope: " << hope_length_ratio << endl; oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]); @@ -592,11 +602,12 @@ int main(int argc, char** argv) { distinctNbest, rank, epoch); decoder->cleanup(); oneBests.push_back(bestModel); - float model_length_ratio = (float)bestModel.size()/reference_length; + ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size()); + float model_length_ratio = (float)bestModel.size()/ref_length; cerr << ", l-ratio model: " << model_length_ratio << endl; if (stabiliseLength) { dev_hypothesis_length += bestModel.size(); - dev_reference_length += reference_length; + dev_reference_length += ref_length; } // FEAR @@ -606,7 +617,8 @@ int main(int argc, char** argv) { featureValues[batchPosition], bleuScores[batchPosition], true, distinctNbest, rank, epoch); decoder->cleanup(); - float fear_length_ratio = (float)fear.size()/reference_length; + ref_length = decoder->getClosestReferenceLength(*sid, fear.size()); + float fear_length_ratio = (float)fear.size()/ref_length; cerr << ", l-ratio fear: " << fear_length_ratio << endl; for (size_t i = 0; i < fear.size(); ++i) { delete fear[i]; diff --git a/moses/src/BleuScoreFeature.cpp b/moses/src/BleuScoreFeature.cpp index c315f0a84..45993b6ff 100644 --- a/moses/src/BleuScoreFeature.cpp +++ b/moses/src/BleuScoreFeature.cpp @@ -98,37 +98,47 @@ void BleuScoreFeature::SetBleuParameters(bool scaleByInputLength, bool scaleByRe void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs) { m_refs.clear(); - FactorCollection& fc = FactorCollection::Instance(); - cerr << "Number of reference files: " << refs.size() << endl; - for (size_t file_id = 0; file_id < refs.size(); file_id++) { - for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) { - const string& ref = refs[file_id][ref_id]; - vector refTokens = Tokenize(ref); - m_refs[ref_id] = pair(); - pair& ref_pair = m_refs[ref_id]; - ref_pair.first = refTokens.size(); - for (size_t order = 1; order <= BleuScoreState::bleu_order; order++) { - for (size_t end_idx = order; end_idx <= refTokens.size(); end_idx++) { - Phrase ngram(Output,1); - for (size_t s_idx = end_idx - order; s_idx < end_idx; s_idx++) { - const Factor* f = fc.AddFactor(Output, 0, refTokens[s_idx]); - Word w; - w.SetFactor(0, f); - ngram.AddWord(w); - } - ref_pair.second[ngram] += 1; - } - } - } - } + FactorCollection& fc = FactorCollection::Instance(); + for (size_t file_id = 0; file_id < refs.size(); file_id++) { + for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) { + const string& ref = refs[file_id][ref_id]; + vector refTokens = Tokenize(ref); + if (file_id == 0) + m_refs[ref_id] = pair,NGrams>(); + pair,NGrams>& ref_pair = m_refs[ref_id]; + (ref_pair.first).push_back(refTokens.size()); + for (size_t order = 1; order <= BleuScoreState::bleu_order; order++) { + for (size_t end_idx = order; end_idx <= refTokens.size(); end_idx++) { + Phrase ngram(Output,1); + for (size_t s_idx = end_idx - order; s_idx < end_idx; s_idx++) { + const Factor* f = fc.AddFactor(Output, 0, refTokens[s_idx]); + Word w; + w.SetFactor(0, f); + ngram.AddWord(w); + } + ref_pair.second[ngram] += 1; + } + } + } + } + +// for (size_t i = 0; i < m_refs.size(); ++i) { +// cerr << "ref id " << i << ", number of entries: " << (m_refs[i].first).size() << endl; +// } } void BleuScoreFeature::SetCurrentSourceLength(size_t source_length) { m_cur_source_length = source_length; } -void BleuScoreFeature::SetCurrentReference(size_t ref_id) { - m_cur_ref_length = m_refs[ref_id].first; +void BleuScoreFeature::SetCurrentShortestReference(size_t ref_id) { + // look for shortest reference + int shortestRef = -1; + for (size_t i = 0; i < (m_refs[ref_id].first).size(); ++i) { + if (shortestRef == -1 || (m_refs[ref_id].first)[i] < shortestRef) + shortestRef = (m_refs[ref_id].first)[i]; + } + m_cur_ref_length = shortestRef; m_cur_ref_ngrams = m_refs[ref_id].second; } @@ -163,15 +173,16 @@ void BleuScoreFeature::UpdateHistory(const vector< const Word* >& hypo) { * Update history with a batch of translations */ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypos, vector& sourceLengths, vector& ref_ids, size_t rank, size_t epoch) { - for (size_t batchPosition = 0; batchPosition < hypos.size(); ++batchPosition){ - Phrase phrase(Output, hypos[batchPosition]); + for (size_t ref_id = 0; ref_id < hypos.size(); ++ref_id){ + Phrase phrase(Output, hypos[ref_id]); std::vector< size_t > ngram_counts(BleuScoreState::bleu_order); std::vector< size_t > ngram_matches(BleuScoreState::bleu_order); // set current source and reference information for each oracle in the batch - size_t cur_source_length = sourceLengths[batchPosition]; - size_t cur_ref_length = m_refs[ref_ids[batchPosition]].first; - NGrams cur_ref_ngrams = m_refs[ref_ids[batchPosition]].second; + size_t cur_source_length = sourceLengths[ref_id]; + size_t hypo_length = hypos[ref_id].size(); + size_t cur_ref_length = GetClosestReferenceLength(ref_ids[ref_id], hypo_length); + NGrams cur_ref_ngrams = m_refs[ref_ids[ref_id]].second; cerr << "reference length: " << cur_ref_length << endl; // compute vector c(e;{r_k}): @@ -184,7 +195,7 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo m_match_history[i] += ngram_matches[i]; // do this for last position in batch - if (batchPosition == hypos.size() - 1) { + if (ref_id == hypos.size() - 1) { m_count_history[i] *= m_historySmoothing; m_match_history[i] *= m_historySmoothing; } @@ -192,11 +203,11 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo // update counts for reference and target length m_source_length_history += cur_source_length; - m_target_length_history += hypos[batchPosition].size(); + m_target_length_history += hypos[ref_id].size(); m_ref_length_history += cur_ref_length; // do this for last position in batch - if (batchPosition == hypos.size() - 1) { + if (ref_id == hypos.size() - 1) { cerr << "Rank " << rank << ", epoch " << epoch << " ,source length history: " << m_source_length_history << " --> " << m_source_length_history * m_historySmoothing << endl; cerr << "Rank " << rank << ", epoch " << epoch << " ,target length history: " << m_target_length_history << " --> " << m_target_length_history * m_historySmoothing << endl; m_source_length_history *= m_historySmoothing; @@ -209,15 +220,24 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo /* * Print batch of reference translations */ -void BleuScoreFeature::PrintReferenceLength(const vector& ref_ids) { - for (size_t batchPosition = 0; batchPosition < ref_ids.size(); ++batchPosition){ - size_t cur_ref_length = m_refs[ref_ids[batchPosition]].first; +/*void BleuScoreFeature::PrintReferenceLength(const vector& ref_ids) { + for (size_t ref_id = 0; ref_id < ref_ids.size(); ++ref_id){ + size_t cur_ref_length = (m_refs[ref_ids[ref_id]].first)[0]; // TODO!! cerr << "reference length: " << cur_ref_length << endl; } -} +}*/ -size_t BleuScoreFeature::GetReferenceLength(size_t ref_id) { - size_t cur_ref_length = m_refs[ref_id].first; +size_t BleuScoreFeature::GetClosestReferenceLength(size_t ref_id, int hypoLength) { + // look for closest reference + int currentDist = -1; + int closestRef = -1; + for (size_t i = 0; i < (m_refs[ref_id].first).size(); ++i) { + if (closestRef == -1 || abs(hypoLength - (int)(m_refs[ref_id].first)[i]) < currentDist) { + closestRef = (m_refs[ref_id].first)[i]; + currentDist = abs(hypoLength - (int)(m_refs[ref_id].first)[i]); + } + } + size_t cur_ref_length = closestRef; return cur_ref_length; } diff --git a/moses/src/BleuScoreFeature.h b/moses/src/BleuScoreFeature.h index 038c9692d..d572a7c37 100644 --- a/moses/src/BleuScoreFeature.h +++ b/moses/src/BleuScoreFeature.h @@ -44,7 +44,7 @@ class BleuScoreFeature : public StatefulFeatureFunction { public: typedef boost::unordered_map< Phrase, size_t > NGrams; - typedef boost::unordered_map > RefCounts; + typedef boost::unordered_map,NGrams> > RefCounts; typedef boost::unordered_map Matches; BleuScoreFeature(): @@ -75,11 +75,11 @@ public: void PrintHistory(std::ostream& out) const; void LoadReferences(const std::vector< std::vector< std::string > > &); void SetCurrentSourceLength(size_t); - void SetCurrentReference(size_t); + void SetCurrentShortestReference(size_t); void UpdateHistory(const std::vector< const Word* >&); void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector& sourceLengths, std::vector& ref_ids, size_t rank, size_t epoch); - void PrintReferenceLength(const std::vector& ref_ids); - size_t GetReferenceLength(size_t ref_id); +// void PrintReferenceLength(const std::vector& ref_ids); + size_t GetClosestReferenceLength(size_t ref_id, int hypoLength); void SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend, float scaleByX, float historySmoothing, size_t scheme, float relaxBP);