From 42333388b4f177c2626ca4e6dc498335b89aa280 Mon Sep 17 00:00:00 2001 From: ehasler Date: Sun, 26 Jun 2011 19:12:46 +0000 Subject: [PATCH] change verbosity for cerr messages, remove some unwanted options, introduce --margin-slack, --margin-incr git-svn-id: http://svn.statmt.org/repository/mira@3913 cc96ff50-19ce-11e0-b349-13d7f0bd23df --- mira/Decoder.cpp | 22 ++- mira/Decoder.h | 15 +- mira/Main.cpp | 409 +++++++++++++---------------------------- mira/MiraOptimiser.cpp | 221 +++++++++------------- mira/Optimiser.h | 44 ++--- mira/Perceptron.cpp | 4 +- 6 files changed, 257 insertions(+), 458 deletions(-) diff --git a/mira/Decoder.cpp b/mira/Decoder.cpp index 1181ee8be..4a7f116f1 100644 --- a/mira/Decoder.cpp +++ b/mira/Decoder.cpp @@ -68,7 +68,7 @@ namespace Mira { delete[] mosesargv; } - MosesDecoder::MosesDecoder(bool useScaledReference, bool scaleByInputLength, float historySmoothing) + MosesDecoder::MosesDecoder(bool scaleByInputLength, float historySmoothing) : m_manager(NULL) { // force initialisation of the phrase dictionary (TODO: what for?) const StaticData &staticData = StaticData::Instance(); @@ -82,7 +82,7 @@ namespace Mira { m_manager->ProcessSentence(); // Add the bleu feature - m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, historySmoothing); + m_bleuScoreFeature = new BleuScoreFeature(scaleByInputLength, historySmoothing); (const_cast(system)).AddFeatureFunction(m_bleuScoreFeature); } @@ -100,7 +100,8 @@ namespace Mira { vector< float>& bleuScores, bool oracle, bool distinct, - size_t rank) + size_t rank, + size_t epoch) { StaticData &staticData = StaticData::InstanceNonConst(); @@ -137,11 +138,11 @@ namespace Mira { //std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl; float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore); - cerr << "Rank " << rank << ", total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl; Phrase bestPhrase = path.GetTargetPhrase(); - cerr << "Rank " << rank << ": "; + cerr << "Rank " << rank << ", epoch " << epoch << ": "; Phrase phrase = path.GetTargetPhrase(); for (size_t pos = 0; pos < phrase.GetSize(); ++pos) { const Word &word = phrase.GetWord(pos); @@ -179,7 +180,9 @@ namespace Mira { size_t sentenceid, float bleuObjectiveWeight, float bleuScoreWeight, - bool distinct) + bool distinct, + size_t rank, + size_t epoch) { StaticData &staticData = StaticData::InstanceNonConst(); @@ -215,15 +218,15 @@ namespace Mira { bleuAndScore.push_back(bleuScore); bleuAndScore.push_back(scoreWithoutBleu); - cerr << "1best translation: "; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", 1best translation: "); Phrase phrase = path.GetTargetPhrase(); for (size_t pos = 0; pos < phrase.GetSize(); ++pos) { const Word &word = phrase.GetWord(pos); Word *newWord = new Word(word); - cerr << *newWord; + VERBOSE(1, *newWord); } - cerr << endl; + VERBOSE(1, endl); return bleuAndScore; } @@ -245,7 +248,6 @@ namespace Mira { } void MosesDecoder::setWeights(const ScoreComponentCollection& weights) { - //cerr << "New weights: " << weights << endl; StaticData::InstanceNonConst().SetAllWeights(weights); } diff --git a/mira/Decoder.h b/mira/Decoder.h index 9ae14b7f3..fdb6bb357 100644 --- a/mira/Decoder.h +++ b/mira/Decoder.h @@ -50,7 +50,7 @@ void initMoses(const std::string& inifile, int debuglevel, int argc, std::vecto **/ class MosesDecoder { public: - MosesDecoder(bool useScaledReference, bool scaleByInputLength, float historySmoothing); + MosesDecoder(bool scaleByInputLength, float historySmoothing); //returns the best sentence std::vector getNBest(const std::string& source, @@ -62,12 +62,15 @@ class MosesDecoder { std::vector< float>& scores, bool oracle, bool distinct, - size_t rank); + size_t rank, + size_t epoch); std::vector getBleuAndScore(const std::string& source, size_t sentenceid, float bleuObjectiveWeight, float bleuScoreWeight, - bool distinct); + bool distinct, + size_t rank, + size_t epoch); size_t getCurrentInputLength(); void updateHistory(const std::vector& words); void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector& sourceLengths, std::vector& ref_ids, size_t rank, size_t epoch); @@ -77,13 +80,13 @@ class MosesDecoder { std::vector calculateBleuOfCorpus(const std::vector< std::vector< const Moses::Word*> >& words, std::vector& ref_ids, size_t epoch, size_t rank); Moses::ScoreComponentCollection getWeights(); void setWeights(const Moses::ScoreComponentCollection& weights); - void cleanup(); + void cleanup(); private: float getBleuScore(const Moses::ScoreComponentCollection& scores); void setBleuScore(Moses::ScoreComponentCollection& scores, float bleu); - Moses::Manager *m_manager; - Moses::Sentence *m_sentence; + Moses::Manager *m_manager; + Moses::Sentence *m_sentence; Moses::BleuScoreFeature *m_bleuScoreFeature; diff --git a/mira/Main.cpp b/mira/Main.cpp index f3f1bb2c5..2f2676b70 100644 --- a/mira/Main.cpp +++ b/mira/Main.cpp @@ -144,7 +144,6 @@ int main(int argc, char** argv) { size_t weightDumpFrequency; string weightDumpStem; float min_learning_rate; - float min_sentence_update; size_t scale_margin; bool scale_update; size_t n; @@ -153,14 +152,12 @@ int main(int argc, char** argv) { bool onlyViolatedConstraints; bool accumulateWeights; float historySmoothing; - bool useScaledReference; bool scaleByInputLength; float slack; float slack_step; float slack_min; bool averageWeights; bool weightConvergence; - bool controlUpdates; float learning_rate; float mira_learning_rate; float perceptron_learning_rate; @@ -168,24 +165,18 @@ int main(int argc, char** argv) { size_t baseOfLog; string decoder_settings; float min_weight_change; - float max_sentence_update; float decrease_learning_rate; - float decrease_sentence_update; bool devBleu; bool normaliseWeights; bool print_feature_values; - bool stop_dev_bleu; - bool stop_approx_dev_bleu; - bool train_linear_classifier; - bool multiplyA; bool historyOf1best; bool burnIn; string burnInInputFile; vector burnInReferenceFiles; bool sentenceLevelBleu; float bleuScoreWeight; - float precision; - float min_bleu_change; + float margin_slack; + float margin_slack_incr; bool analytical_update; bool perceptron_update; bool hope_fear; @@ -204,49 +195,42 @@ int main(int argc, char** argv) { ("burn-in-input-file", po::value(&burnInInputFile), "Input file for burn-in phase of BLEU history") ("burn-in-reference-files", po::value >(&burnInReferenceFiles), "Reference file for burn-in phase of BLEU history") ("config,f", po::value(&mosesConfigFile), "Moses ini file") - ("control-updates", po::value(&controlUpdates)->default_value(true), "Ignore updates that increase number of violated constraints AND increase the error") ("core-weights", po::value(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)") ("decoder-settings", po::value(&decoder_settings)->default_value(""), "Decoder settings for tuning runs") ("decr-learning-rate", po::value(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch") - ("decr-sentence-update", po::value(&decrease_sentence_update)->default_value(0), "Decrease maximum weight update by the given value after every epoch") ("dev-bleu", po::value(&devBleu)->default_value(true), "Compute BLEU score of oracle translations of the whole tuning set") ("distinct-nbest", po::value(&distinctNbest)->default_value(true), "Use nbest list with distinct translations in inference step") ("weight-dump-frequency", po::value(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights, when using mpi") - ("epochs,e", po::value(&epochs)->default_value(5), "Number of epochs") + ("epochs,e", po::value(&epochs)->default_value(10), "Number of epochs") ("fear-n", po::value(&fear_n)->default_value(-1), "Number of fear translations used") ("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit") - ("history-of-1best", po::value(&historyOf1best)->default_value(0), "Use the 1best translation to update the history") - ("history-smoothing", po::value(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing") + ("history-of-1best", po::value(&historyOf1best)->default_value(false), "Use the 1best translation to update the history") + ("history-smoothing", po::value(&historySmoothing)->default_value(0.7), "Adjust the factor for history smoothing") ("hope-fear", po::value(&hope_fear)->default_value(true), "Use only hope and fear translations for optimization (not model)") ("hope-n", po::value(&hope_n)->default_value(-1), "Number of hope translations used") ("input-file,i", po::value(&inputFile), "Input file containing tokenised source") ("learner,l", po::value(&learner)->default_value("mira"), "Learning algorithm") + ("margin-slack", po::value(&margin_slack)->default_value(0), "Slack when comparing left and right hand side of constraints") + ("margin-incr", po::value(&margin_slack_incr)->default_value(0), "Increment margin slack after every epoch by this amount") ("mira-learning-rate", po::value(&mira_learning_rate)->default_value(1), "Learning rate for MIRA (fixed or flexible)") ("log-feature-values", po::value(&logFeatureValues)->default_value(false), "Take log of feature values according to the given base.") - ("min-bleu-change", po::value(&min_bleu_change)->default_value(0), "Minimum BLEU change of 1best translations of one epoch") - ("min-sentence-update", po::value(&min_sentence_update)->default_value(0), "Set a minimum weight update per sentence") ("min-learning-rate", po::value(&min_learning_rate)->default_value(0), "Set a minimum learning rate") - ("max-sentence-update", po::value(&max_sentence_update)->default_value(-1), "Set a maximum weight update per sentence") ("min-weight-change", po::value(&min_weight_change)->default_value(0.01), "Set minimum weight change for stopping criterion") - ("mixing-frequency", po::value(&mixingFrequency)->default_value(1), "How often per epoch to mix weights, when using mpi") + ("mixing-frequency", po::value(&mixingFrequency)->default_value(5), "How often per epoch to mix weights, when using mpi") ("model-hope-fear", po::value(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimization") - ("nbest,n", po::value(&n)->default_value(10), "Number of translations in nbest list") + ("nbest,n", po::value(&n)->default_value(1), "Number of translations in nbest list") ("normalise", po::value(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder") ("only-violated-constraints", po::value(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem") ("perceptron-learning-rate", po::value(&perceptron_learning_rate)->default_value(0.01), "Perceptron learning rate") - ("precision", po::value(&precision)->default_value(0), "Precision when comparing left and right hand side of constraints") ("print-feature-values", po::value(&print_feature_values)->default_value(false), "Print out feature values") ("reference-files,r", po::value >(&referenceFiles), "Reference translation files for training") ("scale-by-input-length", po::value(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths") - ("sentence-level-bleu", po::value(&sentenceLevelBleu)->default_value(false), "Use a sentences level bleu scoring function") + ("sentence-level-bleu", po::value(&sentenceLevelBleu)->default_value(true), "Use a sentences level bleu scoring function") ("shuffle", po::value(&shuffle)->default_value(false), "Shuffle input sentences before processing") ("slack", po::value(&slack)->default_value(0.01), "Use slack in optimizer") ("slack-min", po::value(&slack_min)->default_value(0.01), "Minimum slack used") ("slack-step", po::value(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided") - ("stop-dev-bleu", po::value(&stop_dev_bleu)->default_value(false), "Stop when average Bleu (dev) decreases (or no more increases)") - ("stop-approx-dev-bleu", po::value(&stop_approx_dev_bleu)->default_value(false), "Stop when average approx. sentence Bleu (dev) decreases (or no more increases)") ("stop-weights", po::value(&weightConvergence)->default_value(true), "Stop when weights converge") - ("use-scaled-reference", po::value(&useScaledReference)->default_value(true), "Use scaled reference length for comparing target and reference length of phrases") ("verbosity,v", po::value(&verbosity)->default_value(0), "Verbosity level") ("scale-margin", po::value(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation") ("scale-update", po::value(&scale_update)->default_value(false), "Scale the update by the Bleu score of the oracle translation") @@ -255,8 +239,7 @@ int main(int argc, char** argv) { po::options_description cmdline_options; cmdline_options.add(desc); po::variables_map vm; - po::store( - po::command_line_parser(argc, argv). options(cmdline_options).run(), vm); + po::store(po::command_line_parser(argc, argv). options(cmdline_options).run(), vm); po::notify(vm); if (help) { @@ -329,7 +312,7 @@ int main(int argc, char** argv) { vector decoder_params; boost::split(decoder_params, decoder_settings, boost::is_any_of("\t ")); initMoses(mosesConfigFile, verbosity, decoder_params.size(), decoder_params); - MosesDecoder* decoder = new MosesDecoder(useScaledReference, scaleByInputLength, historySmoothing); + MosesDecoder* decoder = new MosesDecoder(scaleByInputLength, historySmoothing); if (normaliseWeights) { ScoreComponentCollection startWeights = decoder->getWeights(); startWeights.L1Normalise(); @@ -353,12 +336,16 @@ int main(int argc, char** argv) { // initialise optimizer Optimiser* optimiser = NULL; if (learner == "mira") { - cerr << "Optimising using Mira" << endl; - optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, precision); + if (rank == 0) { + cerr << "Optimising using Mira" << endl; + } + optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, margin_slack); learning_rate = mira_learning_rate; perceptron_update = false; } else if (learner == "perceptron") { - cerr << "Optimising using Perceptron" << endl; + if (rank == 0) { + cerr << "Optimising using Perceptron" << endl; + } optimiser = new Perceptron(); learning_rate = perceptron_learning_rate; perceptron_update = true; @@ -373,7 +360,7 @@ int main(int argc, char** argv) { // resolve parameter dependencies if (perceptron_update || analytical_update) { batchSize = 1; - cerr << "Setting batch size to 1 for perceptron/analytical update" << endl; + cerr << "Info: Setting batch size to 1 for perceptron/analytical update" << endl; } if (hope_n == -1 && fear_n == -1) { @@ -385,14 +372,18 @@ int main(int argc, char** argv) { hope_fear = false; // is true by default } + if (!hope_fear && !analytical_update) { + model_hope_fear = true; + } + if (model_hope_fear && analytical_update) { - cerr << "Error: must choose between model-hope-fear and analytical update" << endl; + cerr << "Error: Must choose between model-hope-fear and analytical update" << endl; return 1; } if (burnIn && sentenceLevelBleu) { burnIn = false; - cerr << "Burn-in not needed when using sentence-level BLEU, deactivating burn-in." << endl; + cerr << "Info: Burn-in not needed when using sentence-level BLEU, deactivating burn-in." << endl; } if (burnIn) { @@ -436,7 +427,7 @@ int main(int argc, char** argv) { order.push_back(i); } - cerr << "Rank " << rank << ", starting burn-in phase for approx. BLEU history.." << endl; + VERBOSE(1, "Rank " << rank << ", starting burn-in phase for approx. BLEU history.." << endl); if (historyOf1best) { // get 1best translations for the burn-in sentences vector::const_iterator sid = order.begin(); @@ -444,7 +435,7 @@ int main(int argc, char** argv) { string& input = burnInInputSentences[*sid]; vector bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight, featureValues[0], bleuScores[0], true, - distinctNbest, rank); + distinctNbest, rank, -1); inputLengths.push_back(decoder->getCurrentInputLength()); ref_ids.push_back(*sid); decoder->cleanup(); @@ -468,8 +459,7 @@ int main(int argc, char** argv) { while (sid != order.end()) { string& input = burnInInputSentences[*sid]; vector oracle = decoder->getNBest(input, *sid, 1, 1.0, bleuScoreWeight, - featureValues[0], bleuScores[0], true, - distinctNbest, rank); + featureValues[0], bleuScores[0], true, distinctNbest, rank, -1); inputLengths.push_back(decoder->getCurrentInputLength()); ref_ids.push_back(*sid); decoder->cleanup(); @@ -488,7 +478,7 @@ int main(int argc, char** argv) { } } - cerr << "Bleu feature history after burn-in: " << endl; + VERBOSE(1, "Bleu feature history after burn-in: " << endl); decoder->printBleuFeatureHistory(cerr); decoder->loadReferenceSentences(referenceSentences); } @@ -532,44 +522,28 @@ int main(int argc, char** argv) { size_t numberOfUpdates = 0; size_t numberOfUpdatesThisEpoch = 0; - time_t now = time(0); // get current time - struct tm* tm = localtime(&now); // get struct filled out - cerr << "Start date/time: " << tm->tm_mon + 1 << "/" << tm->tm_mday << "/" - << tm->tm_year + 1900 << ", " << tm->tm_hour << ":" << tm->tm_min << ":" - << tm->tm_sec << endl; + time_t now; + time(&now); + cerr << "Rank " << rank << ", " << ctime(&now) << endl; ScoreComponentCollection mixedAverageWeights; ScoreComponentCollection mixedAverageWeightsPrevious; ScoreComponentCollection mixedAverageWeightsBeforePrevious; -/* float averageRatio = 0; - float averageBleu = 0; - float prevAverageBleu = 0; - float beforePrevAverageBleu = 0; - float summedApproxBleu = 0; - float averageApproxBleu = 0; - float prevAverageApproxBleu = 0; - float beforePrevAverageApproxBleu = 0;*/ bool stop = false; int sumStillViolatedConstraints; int sumStillViolatedConstraints_lastEpoch = 0; int sumConstraintChangeAbs; int sumConstraintChangeAbs_lastEpoch = 0; - size_t sumBleuChangeAbs; +// size_t sumBleuChangeAbs; float *sendbuf, *recvbuf; sendbuf = (float *) malloc(sizeof(float)); recvbuf = (float *) malloc(sizeof(float)); - // Note: make sure that the variable mosesWeights always holds the current decoder weights for (size_t epoch = 0; epoch < epochs && !stop; ++epoch) { - cerr << "\nRank " << rank << ", epoch " << epoch << endl; - // sum of violated constraints sumStillViolatedConstraints = 0; sumConstraintChangeAbs = 0; - sumBleuChangeAbs = 0; - - // sum of approx. sentence bleu scores per epoch -// summedApproxBleu = 0; +// sumBleuChangeAbs = 0; numberOfUpdatesThisEpoch = 0; // Sum up weights over one epoch, final average uses weights from last epoch @@ -601,8 +575,7 @@ int main(int argc, char** argv) { // get moses weights ScoreComponentCollection mosesWeights = decoder->getWeights(); - cerr << "\nRank " << rank << ", next batch" << endl; - cerr << "Rank " << rank << ", weights: " << mosesWeights << endl; + VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", weights: " << mosesWeights << endl); // BATCHING: produce nbest lists for all input sentences in batch vector oracleBleuScores; @@ -618,8 +591,7 @@ int main(int argc, char** argv) { != shard.end(); ++batchPosition) { string& input = inputSentences[*sid]; const vector& refs = referenceSentences[*sid]; - cerr << "Rank " << rank << ", batch position " << batchPosition << endl; - cerr << "Rank " << rank << ", input sentence " << *sid << ": \"" << input << "\"" << endl; + cerr << "\nRank " << rank << ", epoch " << epoch << ", input sentence " << *sid << ": \"" << input << "\"" << " (batch pos " << batchPosition << ")" << endl; vector newFeatureValues; vector newBleuScores; @@ -640,13 +612,13 @@ int main(int argc, char** argv) { if (perceptron_update || analytical_update) { if (historyOf1best) { // MODEL (for updating the history) - cerr << "Rank " << rank << ", run decoder to get " << 1 << "best wrt model score" << endl; + cerr << "Rank " << rank << ", run decoder to get 1best wrt model score (for history)" << endl; vector bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight, dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true, - distinctNbest, rank); + distinctNbest, rank, epoch); decoder->cleanup(); oneBests.push_back(bestModel); - cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << dummyBleuScores[batchPosition][0] << endl; + VERBOSE(1, "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << dummyBleuScores[batchPosition][0] << endl); } // clear dummies @@ -658,22 +630,22 @@ int main(int argc, char** argv) { size_t oraclePos = 0; vector oracle = decoder->getNBest(input, *sid, 1, 1.0, bleuScoreWeight, featureValuesHope[batchPosition], bleuScoresHope[batchPosition], true, - distinctNbest, rank); + distinctNbest, rank, epoch); // needed for history inputLengths.push_back(decoder->getCurrentInputLength()); ref_ids.push_back(*sid); decoder->cleanup(); oracles.push_back(oracle); - cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScoresHope[batchPosition][oraclePos] << endl; + VERBOSE(1, "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScoresHope[batchPosition][oraclePos] << endl); // FEAR cerr << "Rank " << rank << ", run decoder to get 1best fear translations" << endl; size_t fearPos = 0; vector fear = decoder->getNBest(input, *sid, 1, -1.0, bleuScoreWeight, featureValuesFear[batchPosition], bleuScoresFear[batchPosition], true, - distinctNbest, rank); + distinctNbest, rank, epoch); decoder->cleanup(); - cerr << "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScoresFear[batchPosition][fearPos] << endl; + VERBOSE(1, "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScoresFear[batchPosition][fearPos] << endl); for (size_t i = 0; i < fear.size(); ++i) { delete fear[i]; } @@ -682,37 +654,34 @@ int main(int argc, char** argv) { if (hope_fear) { if (historyOf1best) { // MODEL (for updating the history only, using dummy vectors) - cerr << "Rank " << rank << ", run decoder to get " << 1 << "best wrt model score" << endl; - cerr << "dummyFeatureValues.size: " << dummyFeatureValues.size() << endl; - cerr << "batch position: " << batchPosition << endl; + cerr << "Rank " << rank << ", run decoder to get 1best wrt model score (for history)" << endl; vector bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight, dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true, - distinctNbest, rank); - cerr << "finished decoding." << endl; + distinctNbest, rank, epoch); decoder->cleanup(); oneBests.push_back(bestModel); - cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << dummyBleuScores[batchPosition][0] << endl; + VERBOSE(1, "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << dummyBleuScores[batchPosition][0] << endl); } // HOPE cerr << "Rank " << rank << ", run decoder to get " << hope_n << "best hope translations" << endl; vector oracle = decoder->getNBest(input, *sid, hope_n, 1.0, bleuScoreWeight, featureValuesHope[batchPosition], bleuScoresHope[batchPosition], true, - distinctNbest, rank); + distinctNbest, rank, epoch); // needed for history inputLengths.push_back(decoder->getCurrentInputLength()); ref_ids.push_back(*sid); decoder->cleanup(); oracles.push_back(oracle); - cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScoresHope[batchPosition][0] << endl; + VERBOSE(1, "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScoresHope[batchPosition][0] << endl); // FEAR cerr << "Rank " << rank << ", run decoder to get " << fear_n << "best fear translations" << endl; vector fear = decoder->getNBest(input, *sid, fear_n, -1.0, bleuScoreWeight, featureValuesFear[batchPosition], bleuScoresFear[batchPosition], true, - distinctNbest, rank); + distinctNbest, rank, epoch); decoder->cleanup(); - cerr << "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScoresFear[batchPosition][0] << endl; + VERBOSE(1, "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScoresFear[batchPosition][0] << endl); for (size_t i = 0; i < fear.size(); ++i) { delete fear[i]; } @@ -722,26 +691,26 @@ int main(int argc, char** argv) { cerr << "Rank " << rank << ", run decoder to get " << n << "best wrt model score" << endl; vector bestModel = decoder->getNBest(input, *sid, n, 0.0, bleuScoreWeight, featureValues[batchPosition], bleuScores[batchPosition], true, - distinctNbest, rank); + distinctNbest, rank, epoch); decoder->cleanup(); oneBests.push_back(bestModel); // needed for calculating bleu of dev (1best translations) // todo: all_ref_ids.push_back(*sid); allBestModelScore.push_back(bestModel); - cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl; + VERBOSE(1, "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl); // HOPE cerr << "Rank " << rank << ", run decoder to get " << n << "best hope translations" << endl; size_t oraclePos = featureValues[batchPosition].size(); vector oracle = decoder->getNBest(input, *sid, n, 1.0, bleuScoreWeight, featureValues[batchPosition], bleuScores[batchPosition], true, - distinctNbest, rank); + distinctNbest, rank, epoch); // needed for history inputLengths.push_back(decoder->getCurrentInputLength()); ref_ids.push_back(*sid); decoder->cleanup(); oracles.push_back(oracle); - cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScores[batchPosition][oraclePos] << endl; + VERBOSE(1, "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScores[batchPosition][oraclePos] << endl); oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]); oracleBleuScores.push_back(bleuScores[batchPosition][oraclePos]); @@ -751,18 +720,15 @@ int main(int argc, char** argv) { size_t fearPos = featureValues[batchPosition].size(); vector fear = decoder->getNBest(input, *sid, n, -1.0, bleuScoreWeight, featureValues[batchPosition], bleuScores[batchPosition], true, - distinctNbest, rank); + distinctNbest, rank, epoch); decoder->cleanup(); - cerr << "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScores[batchPosition][fearPos] << endl; + VERBOSE(1, "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScores[batchPosition][fearPos] << endl); for (size_t i = 0; i < fear.size(); ++i) { delete fear[i]; } } } -// cerr << "Rank " << rank << ", sentence " << *sid << ", best model Bleu (approximate sentence bleu): " << bleuScores[batchPosition][0] << endl; -// summedApproxBleu += bleuScores[batchPosition][0]; - // next input sentence ++sid; ++actualBatchSize; @@ -802,14 +768,14 @@ int main(int argc, char** argv) { } } - // get 1best model results with old weights +/* // get 1best model results with old weights vector< vector > bestModelOld_batch; for (size_t i = 0; i < actualBatchSize; ++i) { string& input = inputSentences[*current_sid_start + i]; - vector bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest); + vector bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest, rank, epoch); bestModelOld_batch.push_back(bestModelOld); decoder->cleanup(); - } + }*/ // optionally print out the feature values if (print_feature_values) { @@ -840,7 +806,7 @@ int main(int argc, char** argv) { } // Run optimiser on batch: - cerr << "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl; + VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl); ScoreComponentCollection oldWeights(mosesWeights); vector update_status; if (perceptron_update) { @@ -848,12 +814,12 @@ int main(int argc, char** argv) { vector dummy2; update_status = optimiser->updateWeightsHopeFear(mosesWeights, featureValuesHope, featureValuesFear, dummy1, dummy1, dummy2, - learning_rate, 0, rank, epoch, 0); + learning_rate, rank, epoch); } else if (analytical_update) { update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically(mosesWeights, featureValuesHope[0][0], featureValuesFear[0][0], bleuScoresHope[0][0], bleuScoresFear[0][0], - ref_ids[0], learning_rate, max_sentence_update, rank, epoch, controlUpdates); + ref_ids[0], learning_rate, rank, epoch); } else { if (hope_fear) { @@ -884,74 +850,64 @@ int main(int argc, char** argv) { update_status = optimiser->updateWeightsHopeFear(mosesWeights, featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear, ref_ids, - learning_rate, max_sentence_update, rank, epoch, controlUpdates); + learning_rate, rank, epoch); } else { // model_hope_fear update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights, featureValues, losses, bleuScores, oracleFeatureValues, oracleBleuScores, ref_ids, - learning_rate, max_sentence_update, rank, epoch, controlUpdates); + learning_rate, rank, epoch); } } - if (update_status[0] == 1) { - cerr << "Rank " << rank << ", epoch " << epoch << ", no update for batch" << endl; + sumConstraintChangeAbs += abs(update_status[0] - update_status[1]); + sumStillViolatedConstraints += update_status[1]; + + // pass new weights to decoder + if (normaliseWeights) { + mosesWeights.L1Normalise(); } - else if (update_status[0] == -1) { - cerr << "Rank " << rank << ", epoch " << epoch << ", update ignored" << endl; + + cumulativeWeights.PlusEquals(mosesWeights); + ++numberOfUpdates; + ++numberOfUpdatesThisEpoch; + if (averageWeights) { + ScoreComponentCollection averageWeights(cumulativeWeights); + if (accumulateWeights) { + averageWeights.DivideEquals(numberOfUpdates); + } else { + averageWeights.DivideEquals(numberOfUpdatesThisEpoch); + } + + mosesWeights = averageWeights; } - else { - sumConstraintChangeAbs += abs(update_status[1] - update_status[2]); - sumStillViolatedConstraints += update_status[2]; - // pass new weights to decoder - if (normaliseWeights) { - mosesWeights.L1Normalise(); - } + // set new Moses weights (averaged or not) + decoder->setWeights(mosesWeights); - cumulativeWeights.PlusEquals(mosesWeights); - ++numberOfUpdates; - ++numberOfUpdatesThisEpoch; - if (averageWeights) { - ScoreComponentCollection averageWeights(cumulativeWeights); - if (accumulateWeights) { - averageWeights.DivideEquals(numberOfUpdates); - } else { - averageWeights.DivideEquals(numberOfUpdatesThisEpoch); - } + // compute difference to old weights + ScoreComponentCollection weightDifference(mosesWeights); + weightDifference.MinusEquals(oldWeights); + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weight difference: " << weightDifference << endl); - mosesWeights = averageWeights; - cerr << "Rank " << rank << ", epoch " << epoch << ", set new average weights: " << mosesWeights << endl; - } - else { - cerr << "Rank " << rank << ", epoch " << epoch << ", set new weights: " << mosesWeights << endl; - } - - // set new Moses weights (averaged or not) - decoder->setWeights(mosesWeights); - - // compute difference to old weights - ScoreComponentCollection weightDifference(mosesWeights); - weightDifference.MinusEquals(oldWeights); - cerr << "Rank " << rank << ", epoch " << epoch << ", weight difference: " << weightDifference << endl; - - // get 1best model results with new weights (for each sentence in batch) - vector bestModelNew; - for (size_t i = 0; i < actualBatchSize; ++i) { - string& input = inputSentences[*current_sid_start + i]; - bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest); - decoder->cleanup(); - sumBleuChangeAbs += abs(bestModelOld_batch[i][0] - bestModelNew[0]); - cerr << "Rank " << rank << ", epoch " << epoch << ", 1best model bleu, old: " << bestModelOld_batch[i][0] << ", new: " << bestModelNew[0] << endl; - cerr << "Rank " << rank << ", epoch " << epoch << ", 1best model score, old: " << bestModelOld_batch[i][1] << ", new: " << bestModelNew[1] << endl; - } - } +/* // get 1best model results with new weights (for each sentence in batch) + vector bestModelNew; + for (size_t i = 0; i < actualBatchSize; ++i) { + string& input = inputSentences[*current_sid_start + i]; + bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest, rank, epoch); + decoder->cleanup(); + sumBleuChangeAbs += abs(bestModelOld_batch[i][0] - bestModelNew[0]); + VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", 1best model bleu, old: " << bestModelOld_batch[i][0] << ", new: " << bestModelNew[0] << endl); + VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", 1best model score, old: " << bestModelOld_batch[i][1] << ", new: " << bestModelNew[1] << endl); + }*/ // update history (for approximate document Bleu) if (sentenceLevelBleu) { for (size_t i = 0; i < oracles.size(); ++i) { - cerr << "Rank " << rank << ", epoch " << epoch << ", oracle length: " << oracles[i].size() << " "; - decoder->printReferenceLength(ref_ids); + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", oracle length: " << oracles[i].size() << " "); + if (verbosity > 0) { + decoder->printReferenceLength(ref_ids); + } } } else { @@ -1058,16 +1014,17 @@ int main(int argc, char** argv) { cerr << "\nMixed average weights during epoch " << epoch << ": " << mixedAverageWeights << endl; } - cerr << "Dumping mixed average weights during epoch " << epoch << " to " << filename.str() << endl; + cerr << "Dumping mixed average weights during epoch " << epoch << " to " << filename.str() << endl << endl; mixedAverageWeights.Save(filename.str()); ++weightEpochDump; } }// end dumping } // end of shard loop, end of this epoch - - cerr << "Bleu feature history after epoch " << epoch << endl; - decoder->printBleuFeatureHistory(cerr); + if (verbosity > 0) { + cerr << "Bleu feature history after epoch " << epoch << endl; + decoder->printBleuFeatureHistory(cerr); + } // Check whether there were any weight updates during this epoch size_t sumUpdates; @@ -1094,131 +1051,30 @@ int main(int argc, char** argv) { if (epoch > 0) { if ((sumConstraintChangeAbs_lastEpoch == sumConstraintChangeAbs) && (sumStillViolatedConstraints_lastEpoch == sumStillViolatedConstraints)) { - cerr << "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints and constraint changes has stayed the same: " << sumStillViolatedConstraints << ", " << sumConstraintChangeAbs << endl; + VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints and constraint changes has stayed the same: " << sumStillViolatedConstraints << ", " << sumConstraintChangeAbs << endl); } else { - cerr << "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << ", sum of constraint changes " << sumConstraintChangeAbs << endl; + VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << ", sum of constraint changes " << sumConstraintChangeAbs << endl); } } else { - cerr << "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << endl; + VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << endl); } sumConstraintChangeAbs_lastEpoch = sumConstraintChangeAbs; sumStillViolatedConstraints_lastEpoch = sumStillViolatedConstraints; - - if (min_bleu_change > 0) { - if (sumBleuChangeAbs < min_bleu_change) { - cerr << "Rank " << rank << ", epoch " << epoch << ", sum of BLEU score changes was smaller than " << min_bleu_change << " (" << sumBleuChangeAbs << ")." << endl; - stop = true; - } - else { - cerr << "Rank " << rank << ", epoch " << epoch << ", sum of BLEU score changes: " << sumBleuChangeAbs << "." << endl; - } - } if (!stop) { -/* if (devBleu) { - // calculate bleu score of dev set - vector bleuAndRatio = decoder->calculateBleuOfCorpus(allBestModelScore, all_ref_ids, epoch, rank); - float bleu = bleuAndRatio[0]; - float ratio = bleuAndRatio[1]; - - for (size_t i = 0; i < allBestModelScore.size(); ++i) { - for (size_t j = 0; j < allBestModelScore[i].size(); ++j) { - delete allBestModelScore[i][j]; - } - } - - if (rank == 0) { - beforePrevAverageBleu = prevAverageBleu; - beforePrevAverageApproxBleu = prevAverageApproxBleu; - prevAverageBleu = averageBleu; - prevAverageApproxBleu = averageApproxBleu; - } - -#ifdef MPI_ENABLE - // average bleu across processes - sendbuf[0] = bleu; - recvbuf[0] = 0; - MPI_Reduce(sendbuf, recvbuf, 1, MPI_FLOAT, MPI_SUM, 0, world); - if (rank == 0) { - averageBleu = recvbuf[0]; - - // divide by number of processes - averageBleu /= size; - cerr << "Average Bleu (dev) after epoch " << epoch << ": " << averageBleu << endl; - } - - // average ratio across processes - sendbuf[0] = ratio; - recvbuf[0] = 0; - MPI_Reduce(sendbuf, recvbuf, 1, MPI_FLOAT, MPI_SUM, 0, world); - if (rank == 0) { - averageRatio = recvbuf[0]; - - // divide by number of processes - averageRatio /= size; - cerr << "Average ratio (dev) after epoch " << epoch << ": " << averageRatio << endl; - } - - // average approximate sentence bleu across processes - sendbuf[0] = summedApproxBleu/numberOfUpdatesThisEpoch; - recvbuf[0] = 0; - MPI_Reduce(sendbuf, recvbuf, 1, MPI_FLOAT, MPI_SUM, 0, world); - if (rank == 0) { - averageApproxBleu = recvbuf[0]; - - // divide by number of processes - averageApproxBleu /= size; - cerr << "Average approx. sentence Bleu (dev) after epoch " << epoch << ": " << averageApproxBleu << endl; - } -#endif -#ifndef MPI_ENABLE - averageBleu = bleu; - cerr << "Average Bleu (dev) after epoch " << epoch << ": " << averageBleu << endl; - averageApproxBleu = summedApproxBleu / numberOfUpdatesThisEpoch; - cerr << "Average approx. sentence Bleu (dev) after epoch " << epoch << ": " << averageApproxBleu << endl; -#endif - if (rank == 0) { - if (stop_dev_bleu) { - if (averageBleu <= prevAverageBleu && prevAverageBleu <= beforePrevAverageBleu) { - stop = true; - cerr << "Average Bleu (dev) is decreasing or no more increasing.. stop tuning." << endl; - ScoreComponentCollection dummy; - ostringstream endfilename; - endfilename << "stopping"; - dummy.Save(endfilename.str()); - } - } - - if (stop_approx_dev_bleu) { - if (averageApproxBleu <= prevAverageApproxBleu && prevAverageApproxBleu <= beforePrevAverageApproxBleu) { - stop = true; - cerr << "Average approx. sentence Bleu (dev) is decreasing or no more increasing.. stop tuning." << endl; - ScoreComponentCollection dummy; - ostringstream endfilename; - endfilename << "stopping"; - dummy.Save(endfilename.str()); - } - } - } - -#ifdef MPI_ENABLE - mpi::broadcast(world, stop, 0); -#endif - } // end if (dev_bleu) */ - // Test if weights have converged if (weightConvergence) { bool reached = true; if (rank == 0 && (epoch >= 2)) { ScoreComponentCollection firstDiff(mixedAverageWeights); firstDiff.MinusEquals(mixedAverageWeightsPrevious); - cerr << "Average weight changes since previous epoch: " << firstDiff << endl; + VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << endl); ScoreComponentCollection secondDiff(mixedAverageWeights); secondDiff.MinusEquals(mixedAverageWeightsBeforePrevious); - cerr << "Average weight changes since before previous epoch: " << secondDiff << endl << endl; + VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << endl << endl); // check whether stopping criterion has been reached // (both difference vectors must have all weight changes smaller than min_weight_change) @@ -1240,7 +1096,7 @@ int main(int argc, char** argv) { if (reached) { // stop MIRA stop = true; - cerr << "Stopping criterion has been reached after epoch " << epoch << ".. stopping MIRA." << endl; + cerr << "\nWeights have converged after epoch " << epoch << ".. stopping MIRA." << endl; ScoreComponentCollection dummy; ostringstream endfilename; endfilename << "stopping"; @@ -1255,17 +1111,26 @@ int main(int argc, char** argv) { #endif } //end if (weightConvergence) - // if using flexible regularization, decrease regularization parameter for next epoch + // if using flexible slack, decrease slack parameter for next epoch if (slack_step > 0) { if (slack - slack_step >= slack_min) { if (typeid(*optimiser) == typeid(MiraOptimiser)) { slack -= slack_step; - cerr << "Change slack to: " << slack << endl; + VERBOSE(1, "Change slack to: " << slack << endl); ((MiraOptimiser*) optimiser)->setSlack(slack); } } } + // if using flexible margin slack, decrease margin slack parameter for next epoch + if (margin_slack_incr > 0.0001) { + if (typeid(*optimiser) == typeid(MiraOptimiser)) { + margin_slack += margin_slack_incr; + VERBOSE(1, "Change margin slack to: " << margin_slack << endl); + ((MiraOptimiser*) optimiser)->setMarginSlack(margin_slack); + } + } + // change learning rate if ((decrease_learning_rate > 0) && (learning_rate - decrease_learning_rate >= min_learning_rate)) { learning_rate -= decrease_learning_rate; @@ -1276,20 +1141,7 @@ int main(int argc, char** argv) { mpi::broadcast(world, stop, 0); #endif } - cerr << "Change learning rate to " << learning_rate << endl; - } - - // change maximum sentence update - if ((decrease_sentence_update > 0) && (max_sentence_update - decrease_sentence_update >= min_sentence_update)) { - max_sentence_update -= decrease_sentence_update; - if (max_sentence_update <= 0.0001) { - max_sentence_update = 0; - stop = true; -#ifdef MPI_ENABLE - mpi::broadcast(world, stop, 0); -#endif - } - cerr << "Change maximum sentence update to " << max_sentence_update << endl; + VERBOSE(1, "Change learning rate to " << learning_rate << endl); } } } // end of epoch loop @@ -1298,11 +1150,8 @@ int main(int argc, char** argv) { MPI_Finalize(); #endif - now = time(0); // get current time - tm = localtime(&now); // get struct filled out - cerr << "\nEnd date/time: " << tm->tm_mon + 1 << "/" << tm->tm_mday - << "/" << tm->tm_year + 1900 << ", " << tm->tm_hour << ":" - << tm->tm_min << ":" << tm->tm_sec << endl; + time(&now); + cerr << "Rank " << rank << ", " << ctime(&now); delete decoder; exit(0); diff --git a/mira/MiraOptimiser.cpp b/mira/MiraOptimiser.cpp index 5231d380d..2447b6787 100644 --- a/mira/MiraOptimiser.cpp +++ b/mira/MiraOptimiser.cpp @@ -1,5 +1,6 @@ #include "Optimiser.h" #include "Hildreth.h" +#include "StaticData.h" using namespace Moses; using namespace std; @@ -14,10 +15,8 @@ vector MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, const vector oracleBleuScores, const vector sentenceIds, float learning_rate, - float max_sentence_update, size_t rank, - size_t epoch, - bool controlUpdates) { + size_t epoch) { // vector of feature values differences for all created constraints vector featureValueDiffs; @@ -40,41 +39,44 @@ vector MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, ScoreComponentCollection featureValueDiff = oracleFeatureValues[i]; featureValueDiff.MinusEquals(featureValues[i][j]); - cerr << "feature value diff: " << featureValueDiff << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", feature value diff: " << featureValueDiff << endl; if (featureValueDiff.GetL1Norm() == 0) { - cerr << "Equal feature values, constraint skipped.." << endl; + // skip constraint continue; } float loss = losses[i][j]; if (m_scale_margin == 1) { loss *= oracleBleuScores[i]; - cerr << "Scaling margin with oracle bleu score " << oracleBleuScores[i] << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with oracle bleu score " << oracleBleuScores[i] << endl); } else if (m_scale_margin == 2) { loss *= log2(oracleBleuScores[i]); - cerr << "Scaling margin with log2 oracle bleu score " << log2(oracleBleuScores[i]) << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with log2 oracle bleu score " << log2(oracleBleuScores[i]) << endl); } else if (m_scale_margin == 10) { loss *= log10(oracleBleuScores[i]); - cerr << "Scaling margin with log10 oracle bleu score " << log10(oracleBleuScores[i]) << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with log10 oracle bleu score " << log10(oracleBleuScores[i]) << endl) } // check if constraint is violated bool violated = false; bool addConstraint = true; float modelScoreDiff = featureValueDiff.InnerProduct(currWeights); - float diff = loss - (modelScoreDiff + m_precision); - cerr << "constraint: " << (modelScoreDiff + m_precision) << " >= " << loss << endl; + float diff = 0; + if (loss > (modelScoreDiff + m_margin_slack)) { + diff = loss - (modelScoreDiff + m_margin_slack); + } + cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl; + if (diff > epsilon) { violated = true; - cerr << "Rank " << rank << ", epoch " << epoch << ", current violation: " << diff << endl; } else if (m_onlyViolatedConstraints) { addConstraint = false; } - float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_precision); + float lossMinusModelScoreDiff = loss - modelScoreDiff; if (addConstraint) { featureValueDiffs.push_back(featureValueDiff); lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff); @@ -92,8 +94,8 @@ vector MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, vector alphas; ScoreComponentCollection summedUpdate; if (violatedConstraintsBefore > 0) { - cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " << featureValueDiffs.size() << endl; - cerr << "Rank " << rank << ", epoch " << epoch << ", number of violated constraints passed to optimizer: " << violatedConstraintsBefore << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " << + featureValueDiffs.size() << " (of which violated: " << violatedConstraintsBefore << ")" << endl; if (m_slack != 0) { alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs, m_slack); } else { @@ -104,7 +106,7 @@ vector MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, // * w' = w' + SUM alpha_i * (h_i(oracle) - h_i(hypothesis)) for (size_t k = 0; k < featureValueDiffs.size(); ++k) { float alpha = alphas[k]; - cerr << "alpha: " << alpha << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl); ScoreComponentCollection update(featureValueDiffs[k]); update.MultiplyEquals(alpha); @@ -113,11 +115,10 @@ vector MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, } } else { - cerr << "Rank " << rank << ", epoch " << epoch << ", check, no constraint violated for this batch" << endl; - vector status(3); - status[0] = 1; + cerr << "Rank " << rank << ", epoch " << epoch << ", check, no constraint violated for this batch" << endl; + vector status(2); + status[0] = 0; status[1] = 0; - status[2] = 0; return status; } @@ -130,56 +131,37 @@ vector MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, for (size_t i = 0; i < featureValueDiffs.size(); ++i) { float modelScoreDiff = featureValueDiffs[i].InnerProduct(newWeights); float loss = all_losses[i]; - float diff = loss - (modelScoreDiff + m_precision); + float diff = loss - (modelScoreDiff + m_margin_slack); if (diff > epsilon) { ++violatedConstraintsAfter; newDistanceFromOptimum += diff; } } - cerr << "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl; - cerr << "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl; - - if (controlUpdates && violatedConstraintsAfter > 0) { - float distanceChange = oldDistanceFromOptimum - newDistanceFromOptimum; - if ((violatedConstraintsBefore - violatedConstraintsAfter) <= 0 && distanceChange < 0) { - vector statusPlus(3); - statusPlus[0] = -1; - statusPlus[1] = -1; - statusPlus[2] = -1; - return statusPlus; - } - } + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl); + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl); // apply learning rate if (learning_rate != 1) { - cerr << "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl); summedUpdate.MultiplyEquals(learning_rate); - cerr << "Rank " << rank << ", epoch " << epoch << ", update after applying learning rate: " << summedUpdate << endl; - } - - // apply threshold scaling - if (max_sentence_update != -1) { - cerr << "Rank " << rank << ", epoch " << epoch << ", update before scaling to max-sentence-update: " << summedUpdate << endl; - summedUpdate.ThresholdScaling(max_sentence_update); - cerr << "Rank " << rank << ", epoch " << epoch << ", update after scaling to max-sentence-update: " << summedUpdate << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update after applying learning rate: " << summedUpdate << endl); } // scale update by BLEU of oracle if (oracleBleuScores.size() == 1 && m_scale_update) { - cerr << "Scaling summed update with log10 oracle bleu score " << log10(oracleBleuScores[0]) << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling summed update with log10 oracle bleu score " << log10(oracleBleuScores[0]) << endl); summedUpdate.MultiplyEquals(log10(oracleBleuScores[0])); } // apply update to weight vector - cerr << "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl); currWeights.PlusEquals(summedUpdate); - cerr << "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl); - vector statusPlus(3); - statusPlus[0] = 0; - statusPlus[1] = violatedConstraintsBefore; - statusPlus[2] = violatedConstraintsAfter; - return statusPlus; + vector status(2); + status[0] = violatedConstraintsBefore; + status[1] = violatedConstraintsAfter; + return status; } vector MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights, @@ -189,10 +171,8 @@ vector MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection const std::vector >& bleuScoresFear, const std::vector< size_t> sentenceIds, float learning_rate, - float max_sentence_update, size_t rank, - size_t epoch, - bool controlUpdates) { + size_t epoch) { // vector of feature values differences for all created constraints vector featureValueDiffs; @@ -216,41 +196,44 @@ vector MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection for (size_t k = 0; k < featureValuesFear[i].size(); ++k) { ScoreComponentCollection featureValueDiff = featureValuesHope[i][j]; featureValueDiff.MinusEquals(featureValuesFear[i][k]); - cerr << "feature value diff: " << featureValueDiff << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", feature value diff: " << featureValueDiff << endl; if (featureValueDiff.GetL1Norm() == 0) { - cerr << "Equal feature values, constraint skipped.." << endl; + // skip constraint continue; } float loss = bleuScoresHope[i][j] - bleuScoresFear[i][k]; if (m_scale_margin == 1) { loss *= bleuScoresHope[i][j]; - cerr << "Scaling margin with oracle bleu score " << bleuScoresHope[i][j] << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with oracle bleu score " << bleuScoresHope[i][j] << endl); } else if (m_scale_margin == 2) { loss *= log2(bleuScoresHope[i][j]); - cerr << "Scaling margin with log2 oracle bleu score " << log2(bleuScoresHope[i][j]) << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with log2 oracle bleu score " << log2(bleuScoresHope[i][j]) << endl); } else if (m_scale_margin == 10) { loss *= log10(bleuScoresHope[i][j]); - cerr << "Scaling margin with log10 oracle bleu score " << log10(bleuScoresHope[i][j]) << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with log10 oracle bleu score " << log10(bleuScoresHope[i][j]) << endl); } // check if constraint is violated bool violated = false; bool addConstraint = true; float modelScoreDiff = featureValueDiff.InnerProduct(currWeights); - float diff = loss - (modelScoreDiff + m_precision); - cerr << "constraint: " << (modelScoreDiff + m_precision) << " >= " << loss << endl; + float diff = 0; + if (loss > (modelScoreDiff + m_margin_slack)) { + diff = loss - (modelScoreDiff + m_margin_slack); + } + cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl; + if (diff > epsilon) { violated = true; - cerr << "Rank " << rank << ", epoch " << epoch << ", current violation: " << diff << endl; } else if (m_onlyViolatedConstraints) { addConstraint = false; } - float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_precision); + float lossMinusModelScoreDiff = loss - modelScoreDiff; if (addConstraint) { featureValueDiffs.push_back(featureValueDiff); lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff); @@ -269,8 +252,8 @@ vector MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection vector alphas; ScoreComponentCollection summedUpdate; if (violatedConstraintsBefore > 0) { - cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " << featureValueDiffs.size() << endl; - cerr << "Rank " << rank << ", epoch " << epoch << ", number of violated constraints passed to optimizer: " << violatedConstraintsBefore << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " << + featureValueDiffs.size() << " (of which violated: " << violatedConstraintsBefore << ")" << endl; if (m_slack != 0) { alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs, m_slack); } else { @@ -281,17 +264,17 @@ vector MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection // * w' = w' + SUM alpha_i * (h_i(oracle) - h_i(hypothesis)) for (size_t k = 0; k < featureValueDiffs.size(); ++k) { float alpha = alphas[k]; - cerr << "alpha: " << alpha << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl); ScoreComponentCollection update(featureValueDiffs[k]); update.MultiplyEquals(alpha); // scale update by BLEU of hope translation (only two cases defined at the moment) if (featureValuesHope.size() == 1 && m_scale_update) { // only defined for batch size 1) if (featureValuesHope[0].size() == 1) { - cerr << "Scaling update with log10 oracle bleu score " << log10(bleuScoresHope[0][0]) << endl; // only 1 oracle + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling update with log10 oracle bleu score " << log10(bleuScoresHope[0][0]) << endl); // only 1 oracle update.MultiplyEquals(log10(bleuScoresHope[0][0])); } else if (featureValuesFear[0].size() == 1) { - cerr << "Scaling update with log10 oracle bleu score " << log10(bleuScoresHope[0][k]) << endl; // k oracles + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling update with log10 oracle bleu score " << log10(bleuScoresHope[0][k]) << endl); // k oracles update.MultiplyEquals(log10(bleuScoresHope[0][k])); } } @@ -301,11 +284,10 @@ vector MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection } } else { - cerr << "Rank " << rank << ", epoch " << epoch << ", check, no constraint violated for this batch" << endl; - vector status(3); - status[0] = 1; + cerr << "Rank " << rank << ", epoch " << epoch << ", check, no constraint violated for this batch" << endl; + vector status(2); + status[0] = 0; status[1] = 0; - status[2] = 0; return status; } @@ -318,49 +300,30 @@ vector MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection for (size_t i = 0; i < featureValueDiffs.size(); ++i) { float modelScoreDiff = featureValueDiffs[i].InnerProduct(newWeights); float loss = all_losses[i]; - float diff = loss - (modelScoreDiff + m_precision); + float diff = loss - (modelScoreDiff + m_margin_slack); if (diff > epsilon) { ++violatedConstraintsAfter; newDistanceFromOptimum += diff; } } - cerr << "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl; - cerr << "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl; - - if (controlUpdates && violatedConstraintsAfter > 0) { - float distanceChange = oldDistanceFromOptimum - newDistanceFromOptimum; - if ((violatedConstraintsBefore - violatedConstraintsAfter) <= 0 && distanceChange < 0) { - vector statusPlus(3); - statusPlus[0] = -1; - statusPlus[1] = -1; - statusPlus[2] = -1; - return statusPlus; - } - } + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl); + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl); // Apply learning rate (fixed or flexible) if (learning_rate != 1) { - cerr << "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl); summedUpdate.MultiplyEquals(learning_rate); - cerr << "Rank " << rank << ", epoch " << epoch << ", update after applying learning rate: " << summedUpdate << endl; - } - - // Apply threshold scaling - if (max_sentence_update != -1) { - cerr << "Rank " << rank << ", epoch " << epoch << ", update before scaling to max-sentence-update: " << summedUpdate << endl; - summedUpdate.ThresholdScaling(max_sentence_update); - cerr << "Rank " << rank << ", epoch " << epoch << ", update after scaling to max-sentence-update: " << summedUpdate << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update after applying learning rate: " << summedUpdate << endl); } // apply update to weight vector - cerr << "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl); currWeights.PlusEquals(summedUpdate); - cerr << "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl); - vector statusPlus(3); - statusPlus[0] = 0; - statusPlus[1] = violatedConstraintsBefore; - statusPlus[2] = violatedConstraintsAfter; + vector statusPlus(2); + statusPlus[0] = violatedConstraintsBefore; + statusPlus[1] = violatedConstraintsAfter; return statusPlus; } @@ -371,26 +334,27 @@ vector MiraOptimiser::updateWeightsAnalytically(ScoreComponentCollection& c float bleuScoreFear, size_t sentenceId, float learning_rate, - float max_sentence_update, size_t rank, - size_t epoch, - bool controlUpdates) { + size_t epoch) { float epsilon = 0.0001; float oldDistanceFromOptimum = 0; bool constraintViolatedBefore = false; ScoreComponentCollection weightUpdate; - cerr << "hope: " << featureValuesHope << endl; - cerr << "fear: " << featureValuesFear << endl; + // cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope << endl; + // cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear << endl; ScoreComponentCollection featureValueDiff = featureValuesHope; featureValueDiff.MinusEquals(featureValuesFear); - cerr << "hope - fear: " << featureValueDiff << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl; float modelScoreDiff = featureValueDiff.InnerProduct(currWeights); float loss = bleuScoreHope - bleuScoreFear; - float diff = loss - (modelScoreDiff + m_precision); - // approximate comparison between floats - cerr << "constraint: " << (modelScoreDiff + m_precision) << " >= " << loss << endl; + float diff = 0; + if (loss > (modelScoreDiff + m_margin_slack)) { + diff = loss - (modelScoreDiff + m_margin_slack); + } + cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl; + if (diff > epsilon) { // constraint violated oldDistanceFromOptimum += diff; @@ -417,17 +381,16 @@ vector MiraOptimiser::updateWeightsAnalytically(ScoreComponentCollection& c weightUpdate.PlusEquals(featureValueDiff); } else { - cerr << "Rank " << rank << ", epoch " << epoch << ", no update because squared norm is 0, can only happen if oracle == hypothesis, are bleu scores equal as well?" << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", no update because squared norm is 0" << endl); } } if (!constraintViolatedBefore) { // constraint satisfied, nothing to do - cerr << "Rank " << rank << ", epoch " << epoch << ", check, constraint already satisfied" << endl; - vector status(3); - status[0] = 1; + cerr << "Rank " << rank << ", epoch " << epoch << ", constraint already satisfied" << endl; + vector status(2); + status[0] = 0; status[1] = 0; - status[2] = 0; return status; } @@ -439,35 +402,25 @@ vector MiraOptimiser::updateWeightsAnalytically(ScoreComponentCollection& c featureValueDiff = featureValuesHope; featureValueDiff.MinusEquals(featureValuesFear); modelScoreDiff = featureValueDiff.InnerProduct(newWeights); - diff = loss - (modelScoreDiff + m_precision); + diff = loss - (modelScoreDiff + m_margin_slack); // approximate comparison between floats! if (diff > epsilon) { constraintViolatedAfter = true; - newDistanceFromOptimum += (loss - (modelScoreDiff + m_precision)); + newDistanceFromOptimum += (loss - modelScoreDiff); } - cerr << "Rank " << rank << ", epoch " << epoch << ", check, constraint violated before? " << constraintViolatedBefore << ", after? " << constraintViolatedAfter << endl; - cerr << "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl; - - float distanceChange = oldDistanceFromOptimum - newDistanceFromOptimum; - if (controlUpdates && constraintViolatedAfter && distanceChange < 0) { - vector statusPlus(3); - statusPlus[0] = -1; - statusPlus[1] = 1; - statusPlus[2] = 1; - return statusPlus; - } + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, constraint violated before? " << constraintViolatedBefore << ", after? " << constraintViolatedAfter << endl); + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl); // apply update to weight vector - cerr << "Rank " << rank << ", weights before update: " << currWeights << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl); currWeights.PlusEquals(weightUpdate); - cerr << "Rank " << rank << ", weights after update: " << currWeights << endl; + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl); - vector statusPlus(3); - statusPlus[0] = 0; - statusPlus[1] = 1; - statusPlus[2] = constraintViolatedAfter ? 1 : 0; - return statusPlus; + vector status(2); + status[0] = 1; + status[1] = constraintViolatedAfter ? 1 : 0; + return status; } } diff --git a/mira/Optimiser.h b/mira/Optimiser.h index 025faca1a..18b75fb65 100644 --- a/mira/Optimiser.h +++ b/mira/Optimiser.h @@ -36,11 +36,9 @@ namespace Mira { const std::vector >& bleuScoresHope, const std::vector >& bleuScoresFear, const std::vector< size_t> sentenceIds, - float learning_rate, - float max_sentence_update, - size_t rank, - size_t epoch, - bool controlUpdates) = 0; + float learning_rate, + size_t rank, + size_t epoch) = 0; }; class Perceptron : public Optimiser { @@ -52,10 +50,8 @@ namespace Mira { const std::vector >& bleuScoresFear, const std::vector< size_t> sentenceIds, float learning_rate, - float max_sentence_update, - size_t rank, - size_t epoch, - bool controlUpdates); + size_t rank, + size_t epoch); }; class MiraOptimiser : public Optimiser { @@ -63,13 +59,13 @@ namespace Mira { MiraOptimiser() : Optimiser() { } - MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin, bool scale_update, float precision) : + MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin, bool scale_update, float margin_slack) : Optimiser(), m_onlyViolatedConstraints(onlyViolatedConstraints), m_slack(slack), m_scale_margin(scale_margin), m_scale_update(scale_update), - m_precision(precision) { } + m_margin_slack(margin_slack) { } std::vector updateWeightsAnalytically(Moses::ScoreComponentCollection& currWeights, Moses::ScoreComponentCollection& featureValuesHope, @@ -78,10 +74,8 @@ namespace Mira { float bleuScoresFear, size_t sentenceId, float learning_rate, - float max_sentence_update, size_t rank, - size_t epoch, - bool controlUpdates); + size_t epoch); std::vector updateWeights(Moses::ScoreComponentCollection& currWeights, const std::vector >& featureValues, const std::vector >& losses, @@ -89,27 +83,27 @@ namespace Mira { const std::vector< Moses::ScoreComponentCollection>& oracleFeatureValues, const std::vector< float> oracleBleuScores, const std::vector< size_t> sentenceIds, - float learning_rate, - float max_sentence_update, - size_t rank, - size_t epoch, - bool controlUpdates); + float learning_rate, + size_t rank, + size_t epoch); virtual std::vector updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights, const std::vector >& featureValuesHope, const std::vector >& featureValuesFear, const std::vector >& bleuScoresHope, const std::vector >& bleuScoresFear, const std::vector< size_t> sentenceIds, - float learning_rate, - float max_sentence_update, - size_t rank, - size_t epoch, - bool controlUpdates); + float learning_rate, + size_t rank, + size_t epoch); void setSlack(float slack) { m_slack = slack; } + void setMarginSlack(float margin_slack) { + m_margin_slack = margin_slack; + } + private: // add only violated constraints to the optimisation problem @@ -123,7 +117,7 @@ namespace Mira { // scale update with log 10 of oracle BLEU score bool m_scale_update; - float m_precision; + float m_margin_slack; }; } diff --git a/mira/Perceptron.cpp b/mira/Perceptron.cpp index f8ea0ce45..315281410 100644 --- a/mira/Perceptron.cpp +++ b/mira/Perceptron.cpp @@ -31,10 +31,8 @@ vector Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeig const vector< vector >& dummy2, const vector< size_t> dummy3, float perceptron_learning_rate, - float dummy4, size_t rank, - size_t epoch, - bool dummy5) + size_t epoch) { cerr << "hope: " << featureValuesHope[0][0] << endl; cerr << "fear: " << featureValuesFear[0][0] << endl;