introduce parameter --history-smoothing, add mpi_finalize statement

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/mira-mtm5@3752 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
evahasler 2010-12-01 18:09:49 +00:00
parent da712f0eeb
commit 406f55584e
7 changed files with 81 additions and 32 deletions

View File

@ -66,14 +66,14 @@ namespace Mira {
delete[] mosesargv;
}
MosesDecoder::MosesDecoder(const vector<vector<string> >& refs, bool useScaledReference, bool scaleByInputLength, bool increaseBP)
MosesDecoder::MosesDecoder(const vector<vector<string> >& refs, bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing)
: m_manager(NULL) {
// force initialisation of the phrase dictionary
const StaticData &staticData = StaticData::Instance();
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
// Add the bleu feature
m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, increaseBP);
m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, increaseBP, historySmoothing);
(const_cast<TranslationSystem&>(system)).AddFeatureFunction(m_bleuScoreFeature);
m_bleuScoreFeature->LoadReferences(refs);
}

View File

@ -50,7 +50,7 @@ void initMoses(const std::string& inifile, int debuglevel, int argc=0, char** a
**/
class MosesDecoder {
public:
MosesDecoder(const std::vector<std::vector<std::string> >& refs, bool useScaledReference, bool scaleByInputLength, bool increaseBP);
MosesDecoder(const std::vector<std::vector<std::string> >& refs, bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing);
//returns the best sentence
std::vector<const Moses::Word*> getNBest(const std::string& source,

View File

@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <boost/program_options.hpp>
#ifdef MPI_ENABLE
#include "mpi.h"
#include <boost/mpi.hpp>
namespace mpi = boost::mpi;
#endif
@ -88,6 +89,7 @@ int main(int argc, char** argv) {
bool distinctNbest;
bool onlyViolatedConstraints;
bool accumulateWeights;
float historySmoothing;
bool useScaledReference;
bool scaleByInputLength;
bool increaseBP;
@ -114,6 +116,7 @@ int main(int argc, char** argv) {
("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(false), "Use nbest list with distinct translations in inference step")
("only-violated-constraints", po::value<bool>(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem")
("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
("use-scaled-reference", po::value<bool>(&useScaledReference)->default_value(true), "Use scaled reference length for comparing target and reference length of phrases")
("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths")
("increase-BP", po::value<bool>(&increaseBP)->default_value(false), "Increase penalty for short translations")
@ -173,7 +176,7 @@ int main(int argc, char** argv) {
// initialise Moses
initMoses(mosesConfigFile, verbosity);//, argc, argv);
MosesDecoder* decoder = new MosesDecoder(referenceSentences, useScaledReference, scaleByInputLength, increaseBP);
MosesDecoder* decoder = new MosesDecoder(referenceSentences, useScaledReference, scaleByInputLength, increaseBP, historySmoothing);
ScoreComponentCollection startWeights = decoder->getWeights();
startWeights.L1Normalise();
decoder->setWeights(startWeights);
@ -243,6 +246,8 @@ int main(int argc, char** argv) {
ScoreComponentCollection averageTotalWeights;
// TODO: scaling of feature values for probabilistic features
vector< ScoreComponentCollection> list_of_delta_h; // collect delta_h and loss for all examples of an epoch
vector< float> list_of_losses;
for (size_t epoch = 0; epoch < epochs; ++epoch) {
cerr << "\nEpoch " << epoch << endl;
// Sum up weights over one epoch, final average uses weights from last epoch
@ -385,6 +390,9 @@ int main(int argc, char** argv) {
decoder->setWeights(mosesWeights);
// update history (for approximate document bleu)
for (size_t i = 0; i < oracles.size(); ++i) {
cerr << "oracle length: " << oracles[i].size() << " ";
}
decoder->updateHistory(oracles, inputLengths, ref_ids);
// clean up oracle translations after updating history
@ -410,6 +418,9 @@ int main(int argc, char** argv) {
// new weights
margin = featureDiff.InnerProduct(mosesWeights);
lossMinusMargin_new += (losses[batchPosition][j] - margin);
list_of_delta_h.push_back(featureDiff);
list_of_losses.push_back(losses[batchPosition][j]);
}
}
@ -431,13 +442,18 @@ int main(int argc, char** argv) {
#ifdef MPI_ENABLE
if (shardPosition % (shard.size() / mixFrequency) == 0) {
ScoreComponentCollection averageWeights;
VERBOSE(1, "\nRank: " << rank << " \nBefore mixing: " << mosesWeights << endl);
if (rank == 0) {
cerr << "Rank 0, before mixing: " << mosesWeights << endl);
}
//VERBOSE(1, "\nRank: " << rank << " \nBefore mixing: " << mosesWeights << endl);
// collect all weights in averageWeights and divide by number of processes
mpi::reduce(world, mosesWeights, averageWeights, SCCPlus(), 0);
if (rank == 0) {
averageWeights.DivideEquals(size);
VERBOSE(1, "After mixing: " << averageWeights << endl);
//VERBOSE(1, "After mixing: " << averageWeights << endl);
cerr << "Rank 0, after mixing: " << averageWeights << endl);
// normalise weights after averaging
averageWeights.L1Normalise();
@ -446,6 +462,14 @@ int main(int argc, char** argv) {
// broadcast average weights from process 0
mpi::broadcast(world, averageWeights, 0);
decoder->setWeights(averageWeights);
// compute summed error after mixing weights
float summedError = 0.0;
for (size_t i = 0; i < list_of_delta_h.size(); ++i) {
summedError += (list_of_losses[i] - list_of_delta_h[i].InnerProduct(averageWeights));
}
cerr << "summed error after mixing weights: " << summedError << " (" << list_of_delta_h.size() << " examples)" << endl;
}
#endif
@ -458,6 +482,13 @@ int main(int argc, char** argv) {
else
totalWeights.DivideEquals(iterationsThisEpoch);
#ifdef MPI_ENABLE
if (rank == 0) {
cerr << "Rank 0, cumulative weights: " << cumulativeWeights << endl);
cerr << "Rank 0, total weights: " << totalWeights << endl);
}
#endif
// average across processes
#ifdef MPI_ENABLE
mpi::reduce(world, totalWeights, averageTotalWeights, SCCPlus(), 0);
@ -465,6 +496,7 @@ int main(int argc, char** argv) {
// average and normalise weights
averageTotalWeights.DivideEquals(size);
averageTotalWeights.L1Normalise();
cerr << "Rank 0, average total weights: " << averageTotalWeights << endl);
}
#endif
#ifndef MPI_ENABLE
@ -482,13 +514,24 @@ int main(int argc, char** argv) {
averageTotalWeights.Save(filename.str());
++weightEpochDump;
}
// compute summed error after dumping weights
float summedError = 0.0;
for (size_t i = 0; i < list_of_delta_h.size(); ++i) {
summedError += (list_of_losses[i] - list_of_delta_h[i].InnerProduct(averageTotalWeights));
}
cerr << "summed error after dumping weights: " << summedError << " (" << list_of_delta_h.size() << " examples)" << endl;
}
}
list_of_delta_h.clear();
list_of_losses.clear();
}
/*#ifdef MPI_ENABLE
mpi::finalize();
#endif*/
#ifdef MPI_ENABLE
MPI_Finalize()
#endif
cerr << "Average total weights: " << averageTotalWeights << endl;

View File

@ -23,8 +23,8 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
ScoreComponentCollection featureValueDiff = oracleFeatureValues[i];
featureValueDiff.MinusEquals(featureValues[i][j]);
float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
cerr << "loss of hypothesis: " << losses[i][j] << endl;
cerr << "model score difference: " << modelScoreDiff << endl;
//cerr << "loss of hypothesis: " << losses[i][j] << endl;
//cerr << "model score difference: " << modelScoreDiff << endl;
float loss = losses[i][j] * m_marginScaleFactor;
bool addConstraint = true;
@ -49,12 +49,12 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
}
}
cerr << "Number of constraints passed to optimiser: " << featureValueDiffs.size() << endl;
//cerr << "Number of constraints passed to optimiser: " << featureValueDiffs.size() << endl;
if (violatedConstraintsBefore > 0) {
// TODO: slack?
// run optimisation
cerr << "\nNumber of violated constraints: " << violatedConstraintsBefore << endl;
//cerr << "\nNumber of violated constraints: " << violatedConstraintsBefore << endl;
// compute deltas for all given constraints
vector< float> alphas;
if (m_regulariseHildrethUpdates) {
@ -68,7 +68,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
// * w' = w' + delta * Dh_ij ---> w' = w' + delta * (h(e*) - h(e_ij))
float sumOfAlphas = 0;
for (size_t k = 0; k < featureValueDiffs.size(); ++k) {
cerr << "alpha " << k << ": " << alphas[k] << endl;
//cerr << "alpha " << k << ": " << alphas[k] << endl;
sumOfAlphas += alphas[k];
// compute update
@ -78,7 +78,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
currWeights.PlusEquals(featureValueDiffs[k]);
}
cerr << "sum of alphas: " << sumOfAlphas << endl;
//cerr << "sum of alphas: " << sumOfAlphas << endl;
// sanity check: how many constraints violated after optimisation?
size_t violatedConstraintsAfter = 0;
@ -92,11 +92,11 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
++violatedConstraintsAfter;
}
cerr << "New model score difference: " << modelScoreDiff << endl;
//cerr << "New model score difference: " << modelScoreDiff << endl;
}
}
cerr << "Number of violated constraints after optimisation: " << violatedConstraintsAfter << endl;
//cerr << "Number of violated constraints after optimisation: " << violatedConstraintsAfter << endl;
if (violatedConstraintsAfter > violatedConstraintsBefore) {
cerr << "Increase: " << violatedConstraintsAfter - violatedConstraintsBefore << endl << endl;
}

View File

@ -41,7 +41,7 @@ namespace Mira {
const std::vector< std::vector<Moses::ScoreComponentCollection> >& scores,
const std::vector< std::vector<float> >& losses,
const std::vector<Moses::ScoreComponentCollection>& oracleScores)
{/* do nothing */}
{ return 0; }
};
class Perceptron : public Optimiser {

View File

@ -80,9 +80,10 @@ BleuScoreFeature::BleuScoreFeature():
m_ref_length_history(0),
m_use_scaled_reference(true),
m_scale_by_input_length(true),
m_increase_BP(false) {}
m_increase_BP(false),
m_historySmoothing(0.9) {}
BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, bool increaseBP):
BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing):
StatefulFeatureFunction("BleuScore"),
m_count_history(BleuScoreState::bleu_order),
m_match_history(BleuScoreState::bleu_order),
@ -91,7 +92,8 @@ BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLen
m_ref_length_history(0),
m_use_scaled_reference(useScaledReference),
m_scale_by_input_length(scaleByInputLength),
m_increase_BP(increaseBP) {}
m_increase_BP(increaseBP),
m_historySmoothing(historySmoothing) {}
void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
{
@ -147,15 +149,15 @@ void BleuScoreFeature::UpdateHistory(const vector< const Word* >& hypo) {
// update counts and matches for every ngram length with counts from hypo
for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
m_count_history[i] = 0.9 * (m_count_history[i] + ngram_counts[i]);
m_match_history[i] = 0.9 * (m_match_history[i] + ngram_matches[i]);
m_count_history[i] = m_historySmoothing * (m_count_history[i] + ngram_counts[i]);
m_match_history[i] = m_historySmoothing * (m_match_history[i] + ngram_matches[i]);
//cerr << "precisionHistory " << i + 1 << ": " << (m_match_history[i]/m_count_history[i]) << " (" << m_match_history[i] << "/" << m_count_history[i] << ")" << endl;
}
// update counts for reference and target length
m_source_length_history = 0.9 * (m_source_length_history + m_cur_source_length);
m_target_length_history = 0.9 * (m_target_length_history + hypo.size());
m_ref_length_history = 0.9 * (m_ref_length_history + m_cur_ref_length);
m_source_length_history = m_historySmoothing * (m_source_length_history + m_cur_source_length);
m_target_length_history = m_historySmoothing * (m_target_length_history + hypo.size());
m_ref_length_history = m_historySmoothing * (m_ref_length_history + m_cur_ref_length);
}
/*
@ -171,6 +173,8 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
size_t cur_source_length = sourceLengths[batchPosition];
size_t cur_ref_length = m_refs[ref_ids[batchPosition]].first;
NGrams cur_ref_ngrams = m_refs[ref_ids[batchPosition]].second;
cerr << "reference length: " << cur_ref_length << endl;
// compute vector c(e;{r_k}):
// vector of effective reference length, number of ngrams in e, number of ngram matches between e and r_k
GetNgramMatchCounts(phrase, cur_ref_ngrams, ngram_counts, ngram_matches, 0);
@ -182,8 +186,8 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
// do this for last position in batch
if (batchPosition == hypos.size() - 1) {
m_count_history[i] *= 0.9;
m_match_history[i] *= 0.9;
m_count_history[i] *= m_historySmoothing;
m_match_history[i] *= m_historySmoothing;
}
}
@ -194,9 +198,9 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
// do this for last position in batch
if (batchPosition == hypos.size() - 1) {
m_source_length_history *= 0.9;
m_target_length_history *= 0.9;
m_ref_length_history *= 0.9;
m_source_length_history *= m_historySmoothing;
m_target_length_history *= m_historySmoothing;
m_ref_length_history *= m_historySmoothing;
}
}
}

View File

@ -45,7 +45,7 @@ typedef std::map< Phrase, size_t > NGrams;
class BleuScoreFeature : public StatefulFeatureFunction {
public:
BleuScoreFeature();
BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, bool increaseBP);
BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing);
std::string GetScoreProducerDescription() const
{
@ -94,6 +94,8 @@ private:
// increase penalty for short translations
bool m_increase_BP;
float m_historySmoothing;
// counts for pseudo-document big_O
std::vector< float > m_count_history;
std::vector< float > m_match_history;