fix reference length selection/collection of ngrams for multiple references

This commit is contained in:
Eva Hasler 2012-01-16 11:22:58 +00:00
parent a050992abd
commit f989267b6c
5 changed files with 91 additions and 59 deletions

View File

@ -103,7 +103,7 @@ namespace Mira {
staticData.ReLoadBleuScoreFeatureParameter(bleuObjectiveWeight*bleuScoreWeight);
m_bleuScoreFeature->SetCurrentSourceLength((*m_sentence).GetSize());
m_bleuScoreFeature->SetCurrentReference(sentenceid);
m_bleuScoreFeature->SetCurrentShortestReference(sentenceid);
//run the decoder
m_manager = new Moses::Manager(*m_sentence, staticData.GetSearchAlgorithm(), &system);
@ -192,12 +192,12 @@ namespace Mira {
m_bleuScoreFeature->PrintHistory(out);
}
void MosesDecoder::printReferenceLength(const vector<size_t>& ref_ids) {
/* void MosesDecoder::printReferenceLength(const vector<size_t>& ref_ids) {
m_bleuScoreFeature->PrintReferenceLength(ref_ids);
}
}*/
size_t MosesDecoder::getReferenceLength(size_t ref_id) {
return m_bleuScoreFeature->GetReferenceLength(ref_id);
size_t MosesDecoder::getClosestReferenceLength(size_t ref_id, int hypoLength) {
return m_bleuScoreFeature->GetClosestReferenceLength(ref_id, hypoLength);
}
void MosesDecoder::setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,

View File

@ -66,8 +66,8 @@ class MosesDecoder {
void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
// void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
void printBleuFeatureHistory(std::ostream& out);
void printReferenceLength(const std::vector<size_t>& ref_ids);
size_t getReferenceLength(size_t ref_id);
// void printReferenceLength(const std::vector<size_t>& ref_ids);
size_t getClosestReferenceLength(size_t ref_id, int hypoLength);
void setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
float scaleByX, float historySmoothing, size_t scheme, float relax_BP);

View File

@ -352,6 +352,8 @@ int main(int argc, char** argv) {
cerr << "Error: Need to select an one of parameters --hope-fear/--model-hope-fear for mira update." << endl;
return 1;
}
if (historyOf1best || historyOfOracles)
sentenceLevelBleu = false;
if (!sentenceLevelBleu) {
if (!historyOf1best && !historyOfOracles) {
historyOf1best = true;
@ -495,7 +497,8 @@ int main(int argc, char** argv) {
}
}
size_t reference_length = decoder->getReferenceLength(*sid);
size_t ref_length;
float avg_ref_length;
if (hope_fear || perceptron_update) {
// HOPE
cerr << "Rank " << rank << ", epoch " << epoch << ", " << hope_n << "best hope translations" << endl;
@ -504,7 +507,9 @@ int main(int argc, char** argv) {
distinctNbest, rank, epoch);
size_t current_input_length = decoder->getCurrentInputLength();
decoder->cleanup();
float hope_length_ratio = (float)oracle.size()/reference_length;
ref_length = decoder->getClosestReferenceLength(*sid, oracle.size());
avg_ref_length = ref_length;
float hope_length_ratio = (float)oracle.size()/ref_length;
cerr << ", l-ratio hope: " << hope_length_ratio << endl;
vector<const Word*> bestModel;
@ -516,8 +521,9 @@ int main(int argc, char** argv) {
distinctNbest, rank, epoch);
decoder->cleanup();
cerr << endl;
ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size());
dev_hypothesis_length += bestModel.size();
dev_reference_length += reference_length;
dev_reference_length += ref_length;
}
// FEAR
@ -526,7 +532,10 @@ int main(int argc, char** argv) {
featureValuesFear[batchPosition], bleuScoresFear[batchPosition], true,
distinctNbest, rank, epoch);
decoder->cleanup();
float fear_length_ratio = (float)fear.size()/reference_length;
ref_length = decoder->getClosestReferenceLength(*sid, fear.size());
avg_ref_length += ref_length;
avg_ref_length /= 2;
float fear_length_ratio = (float)fear.size()/ref_length;
cerr << ", l-ratio fear: " << fear_length_ratio << endl;
for (size_t i = 0; i < fear.size(); ++i) {
delete fear[i];
@ -541,7 +550,7 @@ int main(int argc, char** argv) {
bool skip = false;
if (max_length_dev_reference != -1 && (length_diff_hope > max_length_dev_reference || length_diff_fear > max_length_dev_reference))
skip = true;
if (max_length_dev_hypos != -1 && (length_diff_hope_fear > reference_length * max_length_dev_hypos))
if (max_length_dev_hypos != -1 && (length_diff_hope_fear > avg_ref_length * max_length_dev_hypos))
skip = true;
if (skip) {
cerr << "Rank " << rank << ", epoch " << epoch << ", skip example (" << hope_length_ratio << ", " << fear_length_ratio << ", " << length_diff_hope_fear << ").. " << endl;
@ -579,7 +588,8 @@ int main(int argc, char** argv) {
ref_ids.push_back(*sid);
decoder->cleanup();
oracles.push_back(oracle);
float hope_length_ratio = (float)oracle.size()/reference_length;
ref_length = decoder->getClosestReferenceLength(*sid, oracle.size());
float hope_length_ratio = (float)oracle.size()/ref_length;
cerr << ", l-ratio hope: " << hope_length_ratio << endl;
oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]);
@ -592,11 +602,12 @@ int main(int argc, char** argv) {
distinctNbest, rank, epoch);
decoder->cleanup();
oneBests.push_back(bestModel);
float model_length_ratio = (float)bestModel.size()/reference_length;
ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size());
float model_length_ratio = (float)bestModel.size()/ref_length;
cerr << ", l-ratio model: " << model_length_ratio << endl;
if (stabiliseLength) {
dev_hypothesis_length += bestModel.size();
dev_reference_length += reference_length;
dev_reference_length += ref_length;
}
// FEAR
@ -606,7 +617,8 @@ int main(int argc, char** argv) {
featureValues[batchPosition], bleuScores[batchPosition], true,
distinctNbest, rank, epoch);
decoder->cleanup();
float fear_length_ratio = (float)fear.size()/reference_length;
ref_length = decoder->getClosestReferenceLength(*sid, fear.size());
float fear_length_ratio = (float)fear.size()/ref_length;
cerr << ", l-ratio fear: " << fear_length_ratio << endl;
for (size_t i = 0; i < fear.size(); ++i) {
delete fear[i];

View File

@ -98,37 +98,47 @@ void BleuScoreFeature::SetBleuParameters(bool scaleByInputLength, bool scaleByRe
void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
{
m_refs.clear();
FactorCollection& fc = FactorCollection::Instance();
cerr << "Number of reference files: " << refs.size() << endl;
for (size_t file_id = 0; file_id < refs.size(); file_id++) {
for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) {
const string& ref = refs[file_id][ref_id];
vector<string> refTokens = Tokenize(ref);
m_refs[ref_id] = pair<size_t,NGrams>();
pair<size_t,NGrams>& ref_pair = m_refs[ref_id];
ref_pair.first = refTokens.size();
for (size_t order = 1; order <= BleuScoreState::bleu_order; order++) {
for (size_t end_idx = order; end_idx <= refTokens.size(); end_idx++) {
Phrase ngram(Output,1);
for (size_t s_idx = end_idx - order; s_idx < end_idx; s_idx++) {
const Factor* f = fc.AddFactor(Output, 0, refTokens[s_idx]);
Word w;
w.SetFactor(0, f);
ngram.AddWord(w);
}
ref_pair.second[ngram] += 1;
}
}
}
}
FactorCollection& fc = FactorCollection::Instance();
for (size_t file_id = 0; file_id < refs.size(); file_id++) {
for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) {
const string& ref = refs[file_id][ref_id];
vector<string> refTokens = Tokenize(ref);
if (file_id == 0)
m_refs[ref_id] = pair<vector<size_t>,NGrams>();
pair<vector<size_t>,NGrams>& ref_pair = m_refs[ref_id];
(ref_pair.first).push_back(refTokens.size());
for (size_t order = 1; order <= BleuScoreState::bleu_order; order++) {
for (size_t end_idx = order; end_idx <= refTokens.size(); end_idx++) {
Phrase ngram(Output,1);
for (size_t s_idx = end_idx - order; s_idx < end_idx; s_idx++) {
const Factor* f = fc.AddFactor(Output, 0, refTokens[s_idx]);
Word w;
w.SetFactor(0, f);
ngram.AddWord(w);
}
ref_pair.second[ngram] += 1;
}
}
}
}
// for (size_t i = 0; i < m_refs.size(); ++i) {
// cerr << "ref id " << i << ", number of entries: " << (m_refs[i].first).size() << endl;
// }
}
void BleuScoreFeature::SetCurrentSourceLength(size_t source_length) {
m_cur_source_length = source_length;
}
void BleuScoreFeature::SetCurrentReference(size_t ref_id) {
m_cur_ref_length = m_refs[ref_id].first;
void BleuScoreFeature::SetCurrentShortestReference(size_t ref_id) {
// look for shortest reference
int shortestRef = -1;
for (size_t i = 0; i < (m_refs[ref_id].first).size(); ++i) {
if (shortestRef == -1 || (m_refs[ref_id].first)[i] < shortestRef)
shortestRef = (m_refs[ref_id].first)[i];
}
m_cur_ref_length = shortestRef;
m_cur_ref_ngrams = m_refs[ref_id].second;
}
@ -163,15 +173,16 @@ void BleuScoreFeature::UpdateHistory(const vector< const Word* >& hypo) {
* Update history with a batch of translations
*/
void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypos, vector<size_t>& sourceLengths, vector<size_t>& ref_ids, size_t rank, size_t epoch) {
for (size_t batchPosition = 0; batchPosition < hypos.size(); ++batchPosition){
Phrase phrase(Output, hypos[batchPosition]);
for (size_t ref_id = 0; ref_id < hypos.size(); ++ref_id){
Phrase phrase(Output, hypos[ref_id]);
std::vector< size_t > ngram_counts(BleuScoreState::bleu_order);
std::vector< size_t > ngram_matches(BleuScoreState::bleu_order);
// set current source and reference information for each oracle in the batch
size_t cur_source_length = sourceLengths[batchPosition];
size_t cur_ref_length = m_refs[ref_ids[batchPosition]].first;
NGrams cur_ref_ngrams = m_refs[ref_ids[batchPosition]].second;
size_t cur_source_length = sourceLengths[ref_id];
size_t hypo_length = hypos[ref_id].size();
size_t cur_ref_length = GetClosestReferenceLength(ref_ids[ref_id], hypo_length);
NGrams cur_ref_ngrams = m_refs[ref_ids[ref_id]].second;
cerr << "reference length: " << cur_ref_length << endl;
// compute vector c(e;{r_k}):
@ -184,7 +195,7 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
m_match_history[i] += ngram_matches[i];
// do this for last position in batch
if (batchPosition == hypos.size() - 1) {
if (ref_id == hypos.size() - 1) {
m_count_history[i] *= m_historySmoothing;
m_match_history[i] *= m_historySmoothing;
}
@ -192,11 +203,11 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
// update counts for reference and target length
m_source_length_history += cur_source_length;
m_target_length_history += hypos[batchPosition].size();
m_target_length_history += hypos[ref_id].size();
m_ref_length_history += cur_ref_length;
// do this for last position in batch
if (batchPosition == hypos.size() - 1) {
if (ref_id == hypos.size() - 1) {
cerr << "Rank " << rank << ", epoch " << epoch << " ,source length history: " << m_source_length_history << " --> " << m_source_length_history * m_historySmoothing << endl;
cerr << "Rank " << rank << ", epoch " << epoch << " ,target length history: " << m_target_length_history << " --> " << m_target_length_history * m_historySmoothing << endl;
m_source_length_history *= m_historySmoothing;
@ -209,15 +220,24 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
/*
* Print batch of reference translations
*/
void BleuScoreFeature::PrintReferenceLength(const vector<size_t>& ref_ids) {
for (size_t batchPosition = 0; batchPosition < ref_ids.size(); ++batchPosition){
size_t cur_ref_length = m_refs[ref_ids[batchPosition]].first;
/*void BleuScoreFeature::PrintReferenceLength(const vector<size_t>& ref_ids) {
for (size_t ref_id = 0; ref_id < ref_ids.size(); ++ref_id){
size_t cur_ref_length = (m_refs[ref_ids[ref_id]].first)[0]; // TODO!!
cerr << "reference length: " << cur_ref_length << endl;
}
}
}*/
size_t BleuScoreFeature::GetReferenceLength(size_t ref_id) {
size_t cur_ref_length = m_refs[ref_id].first;
size_t BleuScoreFeature::GetClosestReferenceLength(size_t ref_id, int hypoLength) {
// look for closest reference
int currentDist = -1;
int closestRef = -1;
for (size_t i = 0; i < (m_refs[ref_id].first).size(); ++i) {
if (closestRef == -1 || abs(hypoLength - (int)(m_refs[ref_id].first)[i]) < currentDist) {
closestRef = (m_refs[ref_id].first)[i];
currentDist = abs(hypoLength - (int)(m_refs[ref_id].first)[i]);
}
}
size_t cur_ref_length = closestRef;
return cur_ref_length;
}

View File

@ -44,7 +44,7 @@ class BleuScoreFeature : public StatefulFeatureFunction {
public:
typedef boost::unordered_map< Phrase, size_t > NGrams;
typedef boost::unordered_map<size_t, std::pair<size_t,NGrams> > RefCounts;
typedef boost::unordered_map<size_t, std::pair<std::vector<size_t>,NGrams> > RefCounts;
typedef boost::unordered_map<size_t, NGrams> Matches;
BleuScoreFeature():
@ -75,11 +75,11 @@ public:
void PrintHistory(std::ostream& out) const;
void LoadReferences(const std::vector< std::vector< std::string > > &);
void SetCurrentSourceLength(size_t);
void SetCurrentReference(size_t);
void SetCurrentShortestReference(size_t);
void UpdateHistory(const std::vector< const Word* >&);
void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
void PrintReferenceLength(const std::vector<size_t>& ref_ids);
size_t GetReferenceLength(size_t ref_id);
// void PrintReferenceLength(const std::vector<size_t>& ref_ids);
size_t GetClosestReferenceLength(size_t ref_id, int hypoLength);
void SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
float scaleByX, float historySmoothing, size_t scheme, float relaxBP);