change bleu smoothing, change handling of multiple oracles, parameter for increasing BP

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/mira-mtm5@3770 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
evahasler 2010-12-10 16:34:43 +00:00
parent cd62ffc021
commit 2f1b959302
8 changed files with 95 additions and 55 deletions

View File

@ -66,14 +66,26 @@ namespace Mira {
delete[] mosesargv;
}
MosesDecoder::MosesDecoder(const vector<vector<string> >& refs, bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing)
MosesDecoder::MosesDecoder(const vector<vector<string> >& refs, bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing)
: m_manager(NULL) {
// force initialisation of the phrase dictionary
const StaticData &staticData = StaticData::Instance();
// is this needed?
//m_sentence = new Sentence(Input);
//stringstream in("Initialising decoder..\n");
//const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
//m_sentence->Read(in,inputFactorOrder);
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
// is this needed?
//(TranslationSystem::DEFAULT);
//m_manager = new Manager(*m_sentence, staticData.GetSearchAlgorithm(), &system);
//m_manager->ProcessSentence();
// Add the bleu feature
m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, increaseBP, historySmoothing);
m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, BPfactor, historySmoothing);
(const_cast<TranslationSystem&>(system)).AddFeatureFunction(m_bleuScoreFeature);
m_bleuScoreFeature->LoadReferences(refs);
}

View File

@ -50,7 +50,7 @@ void initMoses(const std::string& inifile, int debuglevel, int argc=0, char** a
**/
class MosesDecoder {
public:
MosesDecoder(const std::vector<std::vector<std::string> >& refs, bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing);
MosesDecoder(const std::vector<std::vector<std::string> >& refs, bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing);
//returns the best sentence
std::vector<const Moses::Word*> getNBest(const std::string& source,

View File

@ -92,9 +92,9 @@ int main(int argc, char** argv) {
float historySmoothing;
bool useScaledReference;
bool scaleByInputLength;
bool increaseBP;
bool regulariseHildrethUpdates;
bool accumulateOracles;
float BPfactor;
float slack;
size_t maxNumberOracles;
bool accumulateMostViolatedConstraints;
bool pastAndCurrentConstraints;
bool suppressConvergence;
@ -124,9 +124,9 @@ int main(int argc, char** argv) {
("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
("use-scaled-reference", po::value<bool>(&useScaledReference)->default_value(true), "Use scaled reference length for comparing target and reference length of phrases")
("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths")
("increase-BP", po::value<bool>(&increaseBP)->default_value(false), "Increase penalty for short translations")
("regularise-hildreth-updates", po::value<bool>(&regulariseHildrethUpdates)->default_value(false), "Regularise Hildreth updates with the value set for clipping")
("accumulate-oracles", po::value<bool>(&accumulateOracles)->default_value(false), "Accumulate oracle translations over epochs")
("BP-factor", po::value<float>(&BPfactor)->default_value(1.0), "Increase penalty for short translations")
("slack", po::value<float>(&slack)->default_value(0), "Use slack in optimization problem")
("max-number-oracles", po::value<size_t>(&maxNumberOracles)->default_value(1), "Set a maximum number of oracles to use per example")
("accumulate-most-violated-constraints", po::value<bool>(&accumulateMostViolatedConstraints)->default_value(false), "Accumulate most violated constraint per example")
("past-and-current-constraints", po::value<bool>(&pastAndCurrentConstraints)->default_value(false), "Accumulate most violated constraint per example and use them along all current constraints")
("suppress-convergence", po::value<bool>(&suppressConvergence)->default_value(false), "Suppress convergence, fixed number of epochs")
@ -184,7 +184,7 @@ int main(int argc, char** argv) {
// initialise Moses
initMoses(mosesConfigFile, verbosity);//, argc, argv);
MosesDecoder* decoder = new MosesDecoder(referenceSentences, useScaledReference, scaleByInputLength, increaseBP, historySmoothing);
MosesDecoder* decoder = new MosesDecoder(referenceSentences, useScaledReference, scaleByInputLength, BPfactor, historySmoothing);
ScoreComponentCollection startWeights = decoder->getWeights();
startWeights.L1Normalise();
decoder->setWeights(startWeights);
@ -222,16 +222,15 @@ int main(int argc, char** argv) {
cerr << "Nbest list size: " << n << endl;
cerr << "Distinct translations in nbest list? " << distinctNbest << endl;
cerr << "Batch size: " << batchSize << endl;
cerr << "Accumulate oracles? " << accumulateOracles << endl;
cerr << "Maximum number of oracles: " << maxNumberOracles << endl;
cerr << "Accumulate most violated constraints? " << accumulateMostViolatedConstraints << endl;
cerr << "Margin scale factor: " << marginScaleFactor << endl;
cerr << "Add only violated constraints? " << onlyViolatedConstraints << endl;
float slack = regulariseHildrethUpdates ? clipping : 0;
cerr << "Using slack? " << slack << endl;
cerr << "Increase BP? " << increaseBP << endl;
cerr << "BP factor: " << BPfactor << endl;
if (learner == "mira") {
cerr << "Optimising using Mira" << endl;
optimiser = new MiraOptimiser(n, hildreth, marginScaleFactor, onlyViolatedConstraints, clipping, fixedClipping, regulariseHildrethUpdates, weightedLossFunction, accumulateOracles, accumulateMostViolatedConstraints, pastAndCurrentConstraints, order.size());
optimiser = new MiraOptimiser(n, hildreth, marginScaleFactor, onlyViolatedConstraints, clipping, fixedClipping, slack, weightedLossFunction, maxNumberOracles, accumulateMostViolatedConstraints, pastAndCurrentConstraints, order.size());
if (hildreth) {
cerr << "Using Hildreth's optimisation algorithm.." << endl;
}
@ -401,7 +400,7 @@ int main(int argc, char** argv) {
// run optimiser on batch
cerr << "\nRun optimiser.." << endl;
ScoreComponentCollection oldWeights(mosesWeights);
int constraintChange = optimiser->updateWeights(mosesWeights, featureValues, losses, bleuScores, oracleFeatureValues, ref_ids);
int constraintChange = optimiser->updateWeights(mosesWeights, featureValues, losses, bleuScores, oracleFeatureValues, oracleBleuScores, ref_ids);
// update Moses weights
mosesWeights.L1Normalise();

View File

@ -11,12 +11,45 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
const vector< vector<float> >& losses,
const vector<std::vector<float> >& bleuScores,
const vector< ScoreComponentCollection>& oracleFeatureValues,
const vector< float> oracleBleuScores,
const vector< size_t> sentenceIds) {
// add every oracle in batch to list of oracles
// add every oracle in batch to list of oracles (under certain conditions)
for (size_t i = 0; i < oracleFeatureValues.size(); ++i) {
float newWeightedScore = oracleFeatureValues[i].GetWeightedScore();
size_t sentenceId = sentenceIds[i];
m_oracles[sentenceId].push_back(oracleFeatureValues[i]);
// compare new oracle with existing oracles:
// if same translation exists, just update the bleu score
// if not, add the oracle
bool updated = false;
size_t indexOfWorst = 0;
float worstWeightedScore = 0;
for (size_t j = 0; j < m_oracles[sentenceId].size(); ++j) {
float currentWeightedScore = m_oracles[sentenceId][j].GetWeightedScore();
if (currentWeightedScore == newWeightedScore) {
cerr << "updated.." << endl;
m_bleu_of_oracles[sentenceId][j] = oracleBleuScores[j];
updated = true;
break;
}
else if (worstWeightedScore == 0 || currentWeightedScore > worstWeightedScore){
worstWeightedScore = currentWeightedScore;
indexOfWorst = j;
}
}
if (!updated) {
// add if number of maximum oracles not exceeded, otherwise override the worst
if (m_max_number_oracles > m_oracles[sentenceId].size()) {
m_oracles[sentenceId].push_back(oracleFeatureValues[i]);
m_bleu_of_oracles[sentenceId].push_back(oracleBleuScores[i]);
}
else {
m_oracles[sentenceId][indexOfWorst] = oracleFeatureValues[i];
m_bleu_of_oracles[sentenceId][indexOfWorst] = oracleBleuScores[i];
}
}
}
if (m_hildreth) {
@ -38,6 +71,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
// iterate over all available oracles (1 if not accumulating, otherwise one per started epoch)
for (size_t k = 0; k < m_oracles[sentenceId].size(); ++k) {
cerr << "Oracle " << k << ": " << m_oracles[sentenceId][k] << " (BLEU: " << m_bleu_of_oracles[sentenceId][k] << ", model score: " << m_oracles[sentenceId][k].GetWeightedScore() << ")" << endl;
ScoreComponentCollection featureValueDiff = m_oracles[sentenceId][k];
featureValueDiff.MinusEquals(featureValues[i][j]);
float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
@ -87,7 +121,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
}
}
if (!m_accumulateOracles) {
if (m_max_number_oracles == 1) {
for (size_t k = 0; k < sentenceIds.size(); ++k) {
size_t sentenceId = sentenceIds[k];
m_oracles[sentenceId].clear();
@ -101,8 +135,8 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
m_lossMarginDistances.push_back(maxViolationLossMarginDistance);
cerr << "Number of constraints passed to optimiser: " << m_featureValueDiffs.size() << endl;
if (m_regulariseHildrethUpdates) {
alphas = Hildreth::optimise(m_featureValueDiffs, m_lossMarginDistances, m_c);
if (m_slack != 0) {
alphas = Hildreth::optimise(m_featureValueDiffs, m_lossMarginDistances, m_slack);
}
else {
alphas = Hildreth::optimise(m_featureValueDiffs, m_lossMarginDistances);
@ -134,8 +168,8 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
//cerr << "Number of violated constraints before optimisation: " << violatedConstraintsBefore << endl;
cerr << "Number of constraints passed to optimiser: " << featureValueDiffs.size() << endl;
if (m_regulariseHildrethUpdates) {
alphas = Hildreth::optimise(featureValueDiffs, lossMarginDistances, m_c);
if (m_slack != 0) {
alphas = Hildreth::optimise(featureValueDiffs, lossMarginDistances, m_slack);
}
else {
alphas = Hildreth::optimise(featureValueDiffs, lossMarginDistances);
@ -251,7 +285,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
}
}
if (!m_accumulateOracles) {
if (m_max_number_oracles == 1) {
for (size_t k = 0; k < sentenceIds.size(); ++k) {
size_t sentenceId = sentenceIds[k];
m_oracles[sentenceId].clear();

View File

@ -34,6 +34,7 @@ namespace Mira {
const std::vector<std::vector<float> >& losses,
const std::vector<std::vector<float> >& bleuScores,
const std::vector<Moses::ScoreComponentCollection>& oracleFeatureValues,
const std::vector< float> oracleBleuScores,
const std::vector< size_t> dummy) = 0;
};
@ -44,6 +45,7 @@ namespace Mira {
const std::vector< std::vector<float> >& losses,
const std::vector<std::vector<float> >& bleuScores,
const std::vector<Moses::ScoreComponentCollection>& oracleFeatureValues,
const std::vector< float> oracleBleuScores,
const std::vector< size_t> dummy)
{ return 0; }
};
@ -57,6 +59,7 @@ namespace Mira {
const std::vector< std::vector<float> >& losses,
const std::vector<std::vector<float> >& bleuScores,
const std::vector<Moses::ScoreComponentCollection>& oracleFeatureValues,
const std::vector< float> oracleBleuScores,
const std::vector< size_t> dummy);
};
@ -65,7 +68,7 @@ namespace Mira {
MiraOptimiser() :
Optimiser() { }
MiraOptimiser(size_t n, bool hildreth, float marginScaleFactor, bool onlyViolatedConstraints, float clipping, bool fixedClipping, bool regulariseHildrethUpdates, bool weightedLossFunction, bool accumulateOracles, bool accumulateMostViolatedConstraints, bool pastAndCurrentConstraints, size_t exampleSize) :
MiraOptimiser(size_t n, bool hildreth, float marginScaleFactor, bool onlyViolatedConstraints, float clipping, bool fixedClipping, float slack, bool weightedLossFunction, size_t maxNumberOracles, bool accumulateMostViolatedConstraints, bool pastAndCurrentConstraints, size_t exampleSize) :
Optimiser(),
m_n(n),
m_hildreth(hildreth),
@ -73,12 +76,13 @@ namespace Mira {
m_onlyViolatedConstraints(onlyViolatedConstraints),
m_c(clipping),
m_fixedClipping(fixedClipping),
m_regulariseHildrethUpdates(regulariseHildrethUpdates),
m_slack(slack),
m_weightedLossFunction(weightedLossFunction),
m_accumulateOracles(accumulateOracles),
m_max_number_oracles(maxNumberOracles),
m_accumulateMostViolatedConstraints(accumulateMostViolatedConstraints),
m_pastAndCurrentConstraints(pastAndCurrentConstraints),
m_oracles(exampleSize) { }
m_oracles(exampleSize),
m_bleu_of_oracles(exampleSize) { }
~MiraOptimiser() {}
@ -87,6 +91,7 @@ namespace Mira {
const std::vector< std::vector<float> >& losses,
const std::vector<std::vector<float> >& bleuScores,
const std::vector< Moses::ScoreComponentCollection>& oracleFeatureValues,
const std::vector< float> oracleBleuScores,
const std::vector< size_t> sentenceId);
float computeDelta(Moses::ScoreComponentCollection& currWeights,
const Moses::ScoreComponentCollection featureValuesDiff,
@ -120,7 +125,7 @@ namespace Mira {
bool m_fixedClipping;
// regularise Hildreth updates
bool m_regulariseHildrethUpdates;
float m_slack;
bool m_weightedLossFunction;
@ -130,7 +135,9 @@ namespace Mira {
// keep a list of oracle translations over epochs
std::vector < std::vector< Moses::ScoreComponentCollection> > m_oracles;
bool m_accumulateOracles;
std::vector < std::vector< float> > m_bleu_of_oracles;
size_t m_max_number_oracles;
// accumulate most violated constraints for every example
std::vector< Moses::ScoreComponentCollection> m_featureValueDiffs;

View File

@ -27,8 +27,9 @@ namespace Mira {
int Perceptron::updateWeights(ScoreComponentCollection& currWeights,
const vector< vector<ScoreComponentCollection> >& featureValues,
const vector< vector<float> >& losses,
const vector<std::vector<float> >& bleuScores,
const vector< vector<float> >& bleuScores,
const vector< ScoreComponentCollection>& oracleFeatureValues,
const vector< float> oracleBleuScores,
const vector< size_t> dummy)
{
for (size_t i = 0; i < featureValues.size(); ++i) {

View File

@ -80,10 +80,10 @@ BleuScoreFeature::BleuScoreFeature():
m_ref_length_history(0),
m_use_scaled_reference(true),
m_scale_by_input_length(true),
m_increase_BP(false),
m_BP_factor(1.0),
m_historySmoothing(0.9) {}
BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing):
BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing):
StatefulFeatureFunction("BleuScore"),
m_count_history(BleuScoreState::bleu_order),
m_match_history(BleuScoreState::bleu_order),
@ -92,7 +92,7 @@ BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLen
m_ref_length_history(0),
m_use_scaled_reference(useScaledReference),
m_scale_by_input_length(scaleByInputLength),
m_increase_BP(increaseBP),
m_BP_factor(BPfactor),
m_historySmoothing(historySmoothing) {}
void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
@ -317,8 +317,10 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
if (state->m_ngram_counts[i]) {
smoothed_matches = m_match_history[i] + state->m_ngram_matches[i];
smoothed_count = m_count_history[i] + state->m_ngram_counts[i];
if (smoothed_matches == 0) {
smoothed_matches = 0.0001;
if (i > 0) {
// smoothing for all n > 1
smoothed_matches += 1;
smoothed_count += 1;
}
precision *= smoothed_matches / smoothed_count;
@ -337,12 +339,7 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
if (state->m_target_length < state->m_scaled_ref_length) {
float smoothed_target_length = m_target_length_history + state->m_target_length;
float smoothed_ref_length = m_ref_length_history + state->m_scaled_ref_length;
if (m_increase_BP) {
precision *= exp(1 - ((1.1 * smoothed_ref_length)/ smoothed_target_length));
}
else{
precision *= exp(1 - (smoothed_ref_length / smoothed_target_length));
}
precision *= exp(1 - ((m_BP_factor * smoothed_ref_length)/ smoothed_target_length));
}
}
else {
@ -351,12 +348,7 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
if (state->m_target_length < state->m_scaled_ref_length) {
float smoothed_target_length = m_target_length_history + state->m_target_length;
float smoothed_ref_length = m_ref_length_history + state->m_scaled_ref_length;
if (m_increase_BP) {
precision *= exp(1 - ((1.1 * smoothed_ref_length)/ smoothed_target_length));
}
else{
precision *= exp(1 - (smoothed_ref_length / smoothed_target_length));
}
precision *= exp(1 - ((m_BP_factor * smoothed_ref_length)/ smoothed_target_length));
}
}
else {
@ -364,12 +356,7 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
if (state->m_target_length < state->m_source_phrase_length) {
float smoothed_target_length = m_target_length_history + state->m_target_length;
float smoothed_ref_length = m_ref_length_history + state->m_scaled_ref_length;
if (m_increase_BP) {
precision *= exp(1 - ((1.1 * smoothed_ref_length)/ smoothed_target_length));
}
else{
precision *= exp(1 - (smoothed_ref_length / smoothed_target_length));
}
precision *= exp(1 - ((m_BP_factor * smoothed_ref_length)/ smoothed_target_length));
}
}
}

View File

@ -45,7 +45,7 @@ typedef std::map< Phrase, size_t > NGrams;
class BleuScoreFeature : public StatefulFeatureFunction {
public:
BleuScoreFeature();
BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing);
BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing);
std::string GetScoreProducerDescription() const
{
@ -92,7 +92,7 @@ private:
bool m_scale_by_input_length;
// increase penalty for short translations
bool m_increase_BP;
float m_BP_factor;
float m_historySmoothing;