mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-05 02:22:21 +03:00
introduce parameter --history-smoothing, add mpi_finalize statement
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/mira-mtm5@3752 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
da712f0eeb
commit
406f55584e
@ -66,14 +66,14 @@ namespace Mira {
|
||||
delete[] mosesargv;
|
||||
}
|
||||
|
||||
MosesDecoder::MosesDecoder(const vector<vector<string> >& refs, bool useScaledReference, bool scaleByInputLength, bool increaseBP)
|
||||
MosesDecoder::MosesDecoder(const vector<vector<string> >& refs, bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing)
|
||||
: m_manager(NULL) {
|
||||
// force initialisation of the phrase dictionary
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
|
||||
|
||||
// Add the bleu feature
|
||||
m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, increaseBP);
|
||||
m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, increaseBP, historySmoothing);
|
||||
(const_cast<TranslationSystem&>(system)).AddFeatureFunction(m_bleuScoreFeature);
|
||||
m_bleuScoreFeature->LoadReferences(refs);
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ void initMoses(const std::string& inifile, int debuglevel, int argc=0, char** a
|
||||
**/
|
||||
class MosesDecoder {
|
||||
public:
|
||||
MosesDecoder(const std::vector<std::vector<std::string> >& refs, bool useScaledReference, bool scaleByInputLength, bool increaseBP);
|
||||
MosesDecoder(const std::vector<std::vector<std::string> >& refs, bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing);
|
||||
|
||||
//returns the best sentence
|
||||
std::vector<const Moses::Word*> getNBest(const std::string& source,
|
||||
|
@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
#ifdef MPI_ENABLE
|
||||
#include "mpi.h"
|
||||
#include <boost/mpi.hpp>
|
||||
namespace mpi = boost::mpi;
|
||||
#endif
|
||||
@ -88,6 +89,7 @@ int main(int argc, char** argv) {
|
||||
bool distinctNbest;
|
||||
bool onlyViolatedConstraints;
|
||||
bool accumulateWeights;
|
||||
float historySmoothing;
|
||||
bool useScaledReference;
|
||||
bool scaleByInputLength;
|
||||
bool increaseBP;
|
||||
@ -114,6 +116,7 @@ int main(int argc, char** argv) {
|
||||
("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(false), "Use nbest list with distinct translations in inference step")
|
||||
("only-violated-constraints", po::value<bool>(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem")
|
||||
("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
|
||||
("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
|
||||
("use-scaled-reference", po::value<bool>(&useScaledReference)->default_value(true), "Use scaled reference length for comparing target and reference length of phrases")
|
||||
("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths")
|
||||
("increase-BP", po::value<bool>(&increaseBP)->default_value(false), "Increase penalty for short translations")
|
||||
@ -173,7 +176,7 @@ int main(int argc, char** argv) {
|
||||
|
||||
// initialise Moses
|
||||
initMoses(mosesConfigFile, verbosity);//, argc, argv);
|
||||
MosesDecoder* decoder = new MosesDecoder(referenceSentences, useScaledReference, scaleByInputLength, increaseBP);
|
||||
MosesDecoder* decoder = new MosesDecoder(referenceSentences, useScaledReference, scaleByInputLength, increaseBP, historySmoothing);
|
||||
ScoreComponentCollection startWeights = decoder->getWeights();
|
||||
startWeights.L1Normalise();
|
||||
decoder->setWeights(startWeights);
|
||||
@ -243,6 +246,8 @@ int main(int argc, char** argv) {
|
||||
ScoreComponentCollection averageTotalWeights;
|
||||
|
||||
// TODO: scaling of feature values for probabilistic features
|
||||
vector< ScoreComponentCollection> list_of_delta_h; // collect delta_h and loss for all examples of an epoch
|
||||
vector< float> list_of_losses;
|
||||
for (size_t epoch = 0; epoch < epochs; ++epoch) {
|
||||
cerr << "\nEpoch " << epoch << endl;
|
||||
// Sum up weights over one epoch, final average uses weights from last epoch
|
||||
@ -385,6 +390,9 @@ int main(int argc, char** argv) {
|
||||
decoder->setWeights(mosesWeights);
|
||||
|
||||
// update history (for approximate document bleu)
|
||||
for (size_t i = 0; i < oracles.size(); ++i) {
|
||||
cerr << "oracle length: " << oracles[i].size() << " ";
|
||||
}
|
||||
decoder->updateHistory(oracles, inputLengths, ref_ids);
|
||||
|
||||
// clean up oracle translations after updating history
|
||||
@ -410,6 +418,9 @@ int main(int argc, char** argv) {
|
||||
// new weights
|
||||
margin = featureDiff.InnerProduct(mosesWeights);
|
||||
lossMinusMargin_new += (losses[batchPosition][j] - margin);
|
||||
|
||||
list_of_delta_h.push_back(featureDiff);
|
||||
list_of_losses.push_back(losses[batchPosition][j]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -431,13 +442,18 @@ int main(int argc, char** argv) {
|
||||
#ifdef MPI_ENABLE
|
||||
if (shardPosition % (shard.size() / mixFrequency) == 0) {
|
||||
ScoreComponentCollection averageWeights;
|
||||
VERBOSE(1, "\nRank: " << rank << " \nBefore mixing: " << mosesWeights << endl);
|
||||
if (rank == 0) {
|
||||
cerr << "Rank 0, before mixing: " << mosesWeights << endl);
|
||||
}
|
||||
|
||||
//VERBOSE(1, "\nRank: " << rank << " \nBefore mixing: " << mosesWeights << endl);
|
||||
|
||||
// collect all weights in averageWeights and divide by number of processes
|
||||
mpi::reduce(world, mosesWeights, averageWeights, SCCPlus(), 0);
|
||||
if (rank == 0) {
|
||||
averageWeights.DivideEquals(size);
|
||||
VERBOSE(1, "After mixing: " << averageWeights << endl);
|
||||
//VERBOSE(1, "After mixing: " << averageWeights << endl);
|
||||
cerr << "Rank 0, after mixing: " << averageWeights << endl);
|
||||
|
||||
// normalise weights after averaging
|
||||
averageWeights.L1Normalise();
|
||||
@ -446,6 +462,14 @@ int main(int argc, char** argv) {
|
||||
// broadcast average weights from process 0
|
||||
mpi::broadcast(world, averageWeights, 0);
|
||||
decoder->setWeights(averageWeights);
|
||||
|
||||
// compute summed error after mixing weights
|
||||
float summedError = 0.0;
|
||||
for (size_t i = 0; i < list_of_delta_h.size(); ++i) {
|
||||
summedError += (list_of_losses[i] - list_of_delta_h[i].InnerProduct(averageWeights));
|
||||
}
|
||||
|
||||
cerr << "summed error after mixing weights: " << summedError << " (" << list_of_delta_h.size() << " examples)" << endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -458,6 +482,13 @@ int main(int argc, char** argv) {
|
||||
else
|
||||
totalWeights.DivideEquals(iterationsThisEpoch);
|
||||
|
||||
#ifdef MPI_ENABLE
|
||||
if (rank == 0) {
|
||||
cerr << "Rank 0, cumulative weights: " << cumulativeWeights << endl);
|
||||
cerr << "Rank 0, total weights: " << totalWeights << endl);
|
||||
}
|
||||
#endif
|
||||
|
||||
// average across processes
|
||||
#ifdef MPI_ENABLE
|
||||
mpi::reduce(world, totalWeights, averageTotalWeights, SCCPlus(), 0);
|
||||
@ -465,6 +496,7 @@ int main(int argc, char** argv) {
|
||||
// average and normalise weights
|
||||
averageTotalWeights.DivideEquals(size);
|
||||
averageTotalWeights.L1Normalise();
|
||||
cerr << "Rank 0, average total weights: " << averageTotalWeights << endl);
|
||||
}
|
||||
#endif
|
||||
#ifndef MPI_ENABLE
|
||||
@ -482,13 +514,24 @@ int main(int argc, char** argv) {
|
||||
averageTotalWeights.Save(filename.str());
|
||||
++weightEpochDump;
|
||||
}
|
||||
|
||||
// compute summed error after dumping weights
|
||||
float summedError = 0.0;
|
||||
for (size_t i = 0; i < list_of_delta_h.size(); ++i) {
|
||||
summedError += (list_of_losses[i] - list_of_delta_h[i].InnerProduct(averageTotalWeights));
|
||||
}
|
||||
|
||||
cerr << "summed error after dumping weights: " << summedError << " (" << list_of_delta_h.size() << " examples)" << endl;
|
||||
}
|
||||
}
|
||||
|
||||
list_of_delta_h.clear();
|
||||
list_of_losses.clear();
|
||||
}
|
||||
|
||||
/*#ifdef MPI_ENABLE
|
||||
mpi::finalize();
|
||||
#endif*/
|
||||
#ifdef MPI_ENABLE
|
||||
MPI_Finalize()
|
||||
#endif
|
||||
|
||||
cerr << "Average total weights: " << averageTotalWeights << endl;
|
||||
|
||||
|
@ -23,8 +23,8 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
ScoreComponentCollection featureValueDiff = oracleFeatureValues[i];
|
||||
featureValueDiff.MinusEquals(featureValues[i][j]);
|
||||
float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
|
||||
cerr << "loss of hypothesis: " << losses[i][j] << endl;
|
||||
cerr << "model score difference: " << modelScoreDiff << endl;
|
||||
//cerr << "loss of hypothesis: " << losses[i][j] << endl;
|
||||
//cerr << "model score difference: " << modelScoreDiff << endl;
|
||||
float loss = losses[i][j] * m_marginScaleFactor;
|
||||
|
||||
bool addConstraint = true;
|
||||
@ -49,12 +49,12 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
}
|
||||
}
|
||||
|
||||
cerr << "Number of constraints passed to optimiser: " << featureValueDiffs.size() << endl;
|
||||
//cerr << "Number of constraints passed to optimiser: " << featureValueDiffs.size() << endl;
|
||||
|
||||
if (violatedConstraintsBefore > 0) {
|
||||
// TODO: slack?
|
||||
// run optimisation
|
||||
cerr << "\nNumber of violated constraints: " << violatedConstraintsBefore << endl;
|
||||
//cerr << "\nNumber of violated constraints: " << violatedConstraintsBefore << endl;
|
||||
// compute deltas for all given constraints
|
||||
vector< float> alphas;
|
||||
if (m_regulariseHildrethUpdates) {
|
||||
@ -68,7 +68,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
// * w' = w' + delta * Dh_ij ---> w' = w' + delta * (h(e*) - h(e_ij))
|
||||
float sumOfAlphas = 0;
|
||||
for (size_t k = 0; k < featureValueDiffs.size(); ++k) {
|
||||
cerr << "alpha " << k << ": " << alphas[k] << endl;
|
||||
//cerr << "alpha " << k << ": " << alphas[k] << endl;
|
||||
sumOfAlphas += alphas[k];
|
||||
|
||||
// compute update
|
||||
@ -78,7 +78,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
currWeights.PlusEquals(featureValueDiffs[k]);
|
||||
}
|
||||
|
||||
cerr << "sum of alphas: " << sumOfAlphas << endl;
|
||||
//cerr << "sum of alphas: " << sumOfAlphas << endl;
|
||||
|
||||
// sanity check: how many constraints violated after optimisation?
|
||||
size_t violatedConstraintsAfter = 0;
|
||||
@ -92,11 +92,11 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
++violatedConstraintsAfter;
|
||||
}
|
||||
|
||||
cerr << "New model score difference: " << modelScoreDiff << endl;
|
||||
//cerr << "New model score difference: " << modelScoreDiff << endl;
|
||||
}
|
||||
}
|
||||
|
||||
cerr << "Number of violated constraints after optimisation: " << violatedConstraintsAfter << endl;
|
||||
//cerr << "Number of violated constraints after optimisation: " << violatedConstraintsAfter << endl;
|
||||
if (violatedConstraintsAfter > violatedConstraintsBefore) {
|
||||
cerr << "Increase: " << violatedConstraintsAfter - violatedConstraintsBefore << endl << endl;
|
||||
}
|
||||
|
@ -41,7 +41,7 @@ namespace Mira {
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& scores,
|
||||
const std::vector< std::vector<float> >& losses,
|
||||
const std::vector<Moses::ScoreComponentCollection>& oracleScores)
|
||||
{/* do nothing */}
|
||||
{ return 0; }
|
||||
};
|
||||
|
||||
class Perceptron : public Optimiser {
|
||||
|
@ -80,9 +80,10 @@ BleuScoreFeature::BleuScoreFeature():
|
||||
m_ref_length_history(0),
|
||||
m_use_scaled_reference(true),
|
||||
m_scale_by_input_length(true),
|
||||
m_increase_BP(false) {}
|
||||
m_increase_BP(false),
|
||||
m_historySmoothing(0.9) {}
|
||||
|
||||
BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, bool increaseBP):
|
||||
BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing):
|
||||
StatefulFeatureFunction("BleuScore"),
|
||||
m_count_history(BleuScoreState::bleu_order),
|
||||
m_match_history(BleuScoreState::bleu_order),
|
||||
@ -91,7 +92,8 @@ BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLen
|
||||
m_ref_length_history(0),
|
||||
m_use_scaled_reference(useScaledReference),
|
||||
m_scale_by_input_length(scaleByInputLength),
|
||||
m_increase_BP(increaseBP) {}
|
||||
m_increase_BP(increaseBP),
|
||||
m_historySmoothing(historySmoothing) {}
|
||||
|
||||
void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
|
||||
{
|
||||
@ -147,15 +149,15 @@ void BleuScoreFeature::UpdateHistory(const vector< const Word* >& hypo) {
|
||||
|
||||
// update counts and matches for every ngram length with counts from hypo
|
||||
for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
|
||||
m_count_history[i] = 0.9 * (m_count_history[i] + ngram_counts[i]);
|
||||
m_match_history[i] = 0.9 * (m_match_history[i] + ngram_matches[i]);
|
||||
m_count_history[i] = m_historySmoothing * (m_count_history[i] + ngram_counts[i]);
|
||||
m_match_history[i] = m_historySmoothing * (m_match_history[i] + ngram_matches[i]);
|
||||
//cerr << "precisionHistory " << i + 1 << ": " << (m_match_history[i]/m_count_history[i]) << " (" << m_match_history[i] << "/" << m_count_history[i] << ")" << endl;
|
||||
}
|
||||
|
||||
// update counts for reference and target length
|
||||
m_source_length_history = 0.9 * (m_source_length_history + m_cur_source_length);
|
||||
m_target_length_history = 0.9 * (m_target_length_history + hypo.size());
|
||||
m_ref_length_history = 0.9 * (m_ref_length_history + m_cur_ref_length);
|
||||
m_source_length_history = m_historySmoothing * (m_source_length_history + m_cur_source_length);
|
||||
m_target_length_history = m_historySmoothing * (m_target_length_history + hypo.size());
|
||||
m_ref_length_history = m_historySmoothing * (m_ref_length_history + m_cur_ref_length);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -171,6 +173,8 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
|
||||
size_t cur_source_length = sourceLengths[batchPosition];
|
||||
size_t cur_ref_length = m_refs[ref_ids[batchPosition]].first;
|
||||
NGrams cur_ref_ngrams = m_refs[ref_ids[batchPosition]].second;
|
||||
cerr << "reference length: " << cur_ref_length << endl;
|
||||
|
||||
// compute vector c(e;{r_k}):
|
||||
// vector of effective reference length, number of ngrams in e, number of ngram matches between e and r_k
|
||||
GetNgramMatchCounts(phrase, cur_ref_ngrams, ngram_counts, ngram_matches, 0);
|
||||
@ -182,8 +186,8 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
|
||||
|
||||
// do this for last position in batch
|
||||
if (batchPosition == hypos.size() - 1) {
|
||||
m_count_history[i] *= 0.9;
|
||||
m_match_history[i] *= 0.9;
|
||||
m_count_history[i] *= m_historySmoothing;
|
||||
m_match_history[i] *= m_historySmoothing;
|
||||
}
|
||||
}
|
||||
|
||||
@ -194,9 +198,9 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
|
||||
|
||||
// do this for last position in batch
|
||||
if (batchPosition == hypos.size() - 1) {
|
||||
m_source_length_history *= 0.9;
|
||||
m_target_length_history *= 0.9;
|
||||
m_ref_length_history *= 0.9;
|
||||
m_source_length_history *= m_historySmoothing;
|
||||
m_target_length_history *= m_historySmoothing;
|
||||
m_ref_length_history *= m_historySmoothing;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -45,7 +45,7 @@ typedef std::map< Phrase, size_t > NGrams;
|
||||
class BleuScoreFeature : public StatefulFeatureFunction {
|
||||
public:
|
||||
BleuScoreFeature();
|
||||
BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, bool increaseBP);
|
||||
BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing);
|
||||
|
||||
std::string GetScoreProducerDescription() const
|
||||
{
|
||||
@ -94,6 +94,8 @@ private:
|
||||
// increase penalty for short translations
|
||||
bool m_increase_BP;
|
||||
|
||||
float m_historySmoothing;
|
||||
|
||||
// counts for pseudo-document big_O
|
||||
std::vector< float > m_count_history;
|
||||
std::vector< float > m_match_history;
|
||||
|
Loading…
Reference in New Issue
Block a user