diff --git a/mira/Decoder.cpp b/mira/Decoder.cpp index f3cbaf62b..8f6dbeaa6 100644 --- a/mira/Decoder.cpp +++ b/mira/Decoder.cpp @@ -68,7 +68,7 @@ namespace Mira { delete[] mosesargv; } - MosesDecoder::MosesDecoder(const vector >& refs, bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing) + MosesDecoder::MosesDecoder(bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing) : m_manager(NULL) { // force initialisation of the phrase dictionary (TODO: what for?) const StaticData &staticData = StaticData::Instance(); @@ -84,7 +84,6 @@ namespace Mira { // Add the bleu feature m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, BPfactor, historySmoothing); (const_cast(system)).AddFeatureFunction(m_bleuScoreFeature); - m_bleuScoreFeature->LoadReferences(refs); } void MosesDecoder::cleanup() { @@ -113,7 +112,7 @@ namespace Mira { // set the weight for the bleu feature ostringstream bleuWeightStr; - bleuWeightStr << bleuObjectiveWeight; + bleuWeightStr << (bleuObjectiveWeight * bleuScoreWeight); PARAM_VEC bleuWeight(1,bleuWeightStr.str()); staticData.GetParameter()->OverwriteParam("weight-bl", bleuWeight); @@ -137,7 +136,7 @@ namespace Mira { bleuScores.push_back(bleuScore); //std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl; - float scoreWithoutBleu = path.GetTotalScore() - bleuObjectiveWeight * bleuScore; + float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore); cerr << "Rank " << rank << ", total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl; Phrase bestPhrase = path.GetTargetPhrase(); @@ -179,6 +178,7 @@ namespace Mira { vector MosesDecoder::getBleuAndScore(const std::string& source, size_t sentenceid, float bleuObjectiveWeight, + float bleuScoreWeight, bool distinct) { StaticData &staticData = StaticData::InstanceNonConst(); @@ -191,7 +191,7 @@ namespace Mira { // set the weight for the bleu feature ostringstream bleuWeightStr; - bleuWeightStr << bleuObjectiveWeight; + bleuWeightStr << (bleuObjectiveWeight * bleuScoreWeight); PARAM_VEC bleuWeight(1,bleuWeightStr.str()); staticData.GetParameter()->OverwriteParam("weight-bl", bleuWeight); @@ -211,7 +211,7 @@ namespace Mira { vector bleuAndScore; const Moses::TrellisPath &path = **iter; float bleuScore = getBleuScore(path.GetScoreBreakdown()); - float scoreWithoutBleu = path.GetTotalScore() - bleuObjectiveWeight * bleuScore; + float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore); bleuAndScore.push_back(bleuScore); bleuAndScore.push_back(scoreWithoutBleu); return bleuAndScore; @@ -246,6 +246,14 @@ namespace Mira { m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch); } + void MosesDecoder::loadReferenceSentences(const vector >& refs) { + m_bleuScoreFeature->LoadReferences(refs); + } + + void MosesDecoder::printBleuFeatureHistory(std::ostream& out) { + m_bleuScoreFeature->PrintHistory(out); + } + vector MosesDecoder::calculateBleuOfCorpus(const vector< vector< const Word*> >& words, vector& ref_ids, size_t epoch, size_t rank) { vector bleu = m_bleuScoreFeature->CalculateBleuOfCorpus(words, ref_ids); if (bleu.size() > 0) { diff --git a/mira/Decoder.h b/mira/Decoder.h index 07d37cd7e..08ee07d69 100644 --- a/mira/Decoder.h +++ b/mira/Decoder.h @@ -50,7 +50,7 @@ void initMoses(const std::string& inifile, int debuglevel, int argc, std::vecto **/ class MosesDecoder { public: - MosesDecoder(const std::vector >& refs, bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing); + MosesDecoder(bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing); //returns the best sentence std::vector getNBest(const std::string& source, @@ -66,10 +66,13 @@ class MosesDecoder { std::vector getBleuAndScore(const std::string& source, size_t sentenceid, float bleuObjectiveWeight, + float bleuScoreWeight, bool distinct); size_t getCurrentInputLength(); void updateHistory(const std::vector& words); void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector& sourceLengths, std::vector& ref_ids, size_t rank, size_t epoch); + void loadReferenceSentences(const std::vector >& refs); + void printBleuFeatureHistory(std::ostream& out); std::vector calculateBleuOfCorpus(const std::vector< std::vector< const Moses::Word*> >& words, std::vector& ref_ids, size_t epoch, size_t rank); void setBPfactor(float factor); Moses::ScoreComponentCollection getWeights(); diff --git a/mira/Main.cpp b/mira/Main.cpp index c00cbfbf4..fffdfffa1 100644 --- a/mira/Main.cpp +++ b/mira/Main.cpp @@ -142,7 +142,7 @@ int main(int argc, char** argv) { float marginScaleFactorMin; float min_learning_rate; float min_sentence_update; - bool weightedLossFunction; + size_t weightedLossFunction; size_t n; size_t batchSize; bool distinctNbest; @@ -172,8 +172,6 @@ int main(int argc, char** argv) { float decrease_sentence_update; bool devBleu; bool normaliseWeights; - bool one_constraint; - bool one_per_batch; bool print_feature_values; bool stop_dev_bleu; bool stop_approx_dev_bleu; @@ -181,14 +179,25 @@ int main(int argc, char** argv) { bool train_linear_classifier; int updates_per_epoch; bool multiplyA; + bool historyOf1best; + bool burnIn; + string burnInInputFile; + vector burnInReferenceFiles; + bool sentenceLevelBleu; + float bleuScoreWeight; po::options_description desc("Allowed options"); - desc.add_options()("accumulate-most-violated-constraints", po::value(&accumulateMostViolatedConstraints)->default_value(false),"Accumulate most violated constraint per example") + desc.add_options() + ("accumulate-most-violated-constraints", po::value(&accumulateMostViolatedConstraints)->default_value(false),"Accumulate most violated constraint per example") ("accumulate-weights", po::value(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs") ("adapt-BP-factor", po::value(&adapt_BPfactor)->default_value(0), "Set factor to 1 when optimal translation length in reached") ("average-weights", po::value(&averageWeights)->default_value(false), "Set decoder weights to average weights after each update") ("base-of-log", po::value(&baseOfLog)->default_value(10), "Base for log-ing feature values") ("batch-size,b", po::value(&batchSize)->default_value(1), "Size of batch that is send to optimiser for weight adjustments") + ("bleu-score-weight", po::value(&bleuScoreWeight)->default_value(1.0), "Bleu score weight used in the decoder objective function (on top of the bleu objective weight)") ("BP-factor", po::value(&BPfactor)->default_value(1.0), "Increase penalty for short translations") + ("burn-in", po::value(&burnIn)->default_value(false), "Do a burn-in of the BLEU history before training") + ("burn-in-input-file", po::value(&burnInInputFile), "Input file for burn-in phase of BLEU history") + ("burn-in-reference-files", po::value >(&burnInReferenceFiles), "Reference file for burn-in phase of BLEU history") ("config,f", po::value(&mosesConfigFile), "Moses ini file") ("control-updates", po::value(&controlUpdates)->default_value(true), "Ignore updates that increase number of violated constraints AND increase the error") ("decoder-settings", po::value(&decoder_settings)->default_value(""), "Decoder settings for tuning runs") @@ -200,6 +209,7 @@ int main(int argc, char** argv) { ("epochs,e", po::value(&epochs)->default_value(5), "Number of epochs") ("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit") ("hildreth", po::value(&hildreth)->default_value(true), "Use Hildreth's optimisation algorithm") + ("history-of-1best", po::value(&historyOf1best)->default_value(0), "Use the 1best translation to update the history") ("history-smoothing", po::value(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing") ("input-file,i", po::value(&inputFile), "Input file containing tokenised source") ("learner,l", po::value(&learner)->default_value("mira"), "Learning algorithm") @@ -214,16 +224,15 @@ int main(int argc, char** argv) { ("msf", po::value(&marginScaleFactor)->default_value(1.0), "Margin scale factor, regularises the update by scaling the enforced margin") ("msf-min", po::value(&marginScaleFactorMin)->default_value(1.0), "Minimum value that margin is scaled by") ("msf-step", po::value(&marginScaleFactorStep)->default_value(0), "Decrease margin scale factor iteratively by the value provided") - ("multiplyA", po::value(&multiplyA)->default_value(true), "Multiply A with outcome before passing to Hildreth") - ("nbest,n", po::value(&n)->default_value(10), "Number of translations in nbest list") + ("multiplyA", po::value(&multiplyA)->default_value(true), "Multiply A with outcome before passing to Hildreth") + ("nbest,n", po::value(&n)->default_value(10), "Number of translations in nbest list") ("normalise", po::value(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder") - ("one-constraint", po::value(&one_constraint)->default_value(false), "Forget about hope and fear and consider only the 1best model translation to formulate a constraint") - ("one-per-batch", po::value(&one_per_batch)->default_value(false), "Only 1 constraint per batch for params --accumulate-most-violated.. and --past-and-current..") ("only-violated-constraints", po::value(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem") ("past-and-current-constraints", po::value(&pastAndCurrentConstraints)->default_value(false), "Accumulate most violated constraint per example and use them along all current constraints") ("print-feature-values", po::value(&print_feature_values)->default_value(false), "Print out feature values") ("reference-files,r", po::value >(&referenceFiles), "Reference translation files for training") ("scale-by-input-length", po::value(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths") + ("sentence-level-bleu", po::value(&sentenceLevelBleu)->default_value(false), "Use a sentences level bleu scoring function") ("shuffle", po::value(&shuffle)->default_value(false), "Shuffle input sentences before processing") ("slack", po::value(&slack)->default_value(0.01), "Use slack in optimizer") ("slack-max", po::value(&slack_max)->default_value(0), "Maximum slack used") @@ -236,7 +245,7 @@ int main(int argc, char** argv) { ("updates-per-epoch", po::value(&updates_per_epoch)->default_value(-1), "Accumulate updates and apply them to the weight vector the specified number of times per epoch") ("use-scaled-reference", po::value(&useScaledReference)->default_value(true), "Use scaled reference length for comparing target and reference length of phrases") ("verbosity,v", po::value(&verbosity)->default_value(0), "Verbosity level") - ("weighted-loss-function", po::value(&weightedLossFunction)->default_value(false), "Weight the loss of a hypothesis by its Bleu score") + ("weighted-loss-function", po::value(&weightedLossFunction)->default_value(0), "Weight the loss of a hypothesis by its Bleu score") ("weight-dump-stem", po::value(&weightDumpStem)->default_value("weights"), "Stem of filename to use for dumping weights"); po::options_description cmdline_options; @@ -605,14 +614,118 @@ int main(int argc, char** argv) { vector decoder_params; boost::split(decoder_params, decoder_settings, boost::is_any_of("\t ")); initMoses(mosesConfigFile, verbosity, decoder_params.size(), decoder_params); - MosesDecoder* decoder = new MosesDecoder(referenceSentences, - useScaledReference, scaleByInputLength, BPfactor, historySmoothing); + MosesDecoder* decoder = new MosesDecoder(useScaledReference, scaleByInputLength, BPfactor, historySmoothing); if (normaliseWeights) { ScoreComponentCollection startWeights = decoder->getWeights(); startWeights.L1Normalise(); decoder->setWeights(startWeights); } + if (sentenceLevelBleu) { + burnIn = false; + } + + if (burnIn) { + // load burn-in input and references + vector burnInInputSentences; + if (!loadSentences(burnInInputFile, burnInInputSentences)) { + cerr << "Error: Failed to load burn-in input sentences from " << burnInInputFile << endl; + return 1; + } + + vector > burnInReferenceSentences(burnInReferenceFiles.size()); + for (size_t i = 0; i < burnInReferenceFiles.size(); ++i) { + if (!loadSentences(burnInReferenceFiles[i], burnInReferenceSentences[i])) { + cerr << "Error: Failed to load burn-in reference sentences from " + << burnInReferenceFiles[i] << endl; + return 1; + } + if (burnInReferenceSentences[i].size() != burnInInputSentences.size()) { + cerr << "Error: Burn-in input file length (" << burnInInputSentences.size() << ") != (" + << burnInReferenceSentences[i].size() << ") length of burn-in reference file " << i + << endl; + return 1; + } + } + decoder->loadReferenceSentences(burnInReferenceSentences); + + vector inputLengths; + vector ref_ids; + vector > oracles; + vector > oneBests; + + vector > featureValues; + vector > bleuScores; + vector newFeatureValues; + vector newBleuScores; + featureValues.push_back(newFeatureValues); + bleuScores.push_back(newBleuScores); + + vector order; + for (size_t i = 0; i < burnInInputSentences.size(); ++i) { + order.push_back(i); + } + + cerr << "Start burn-in phase for approx. BLEU history.." << endl; + if (historyOf1best) { + // get 1best translations for the burn-in sentences + vector::const_iterator sid = order.begin(); + while (sid != order.end()) { + string& input = burnInInputSentences[*sid]; + vector bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight, + featureValues[0], bleuScores[0], true, + distinctNbest, rank); + inputLengths.push_back(decoder->getCurrentInputLength()); + ref_ids.push_back(*sid); + decoder->cleanup(); + oneBests.push_back(bestModel); + ++sid; + } + + // update history + decoder->updateHistory(oneBests, inputLengths, ref_ids, rank, 0); + + // clean up 1best translations after updating history + for (size_t i = 0; i < oracles.size(); ++i) { + for (size_t j = 0; j < oracles[i].size(); ++j) { + delete oracles[i][j]; + } + } + } + else { + // get oracle translations for the burn-in sentences + vector::const_iterator sid = order.begin(); + while (sid != order.end()) { + string& input = burnInInputSentences[*sid]; + vector oracle = decoder->getNBest(input, *sid, 1, 1.0, bleuScoreWeight, + featureValues[0], bleuScores[0], true, + distinctNbest, rank); + inputLengths.push_back(decoder->getCurrentInputLength()); + ref_ids.push_back(*sid); + decoder->cleanup(); + oracles.push_back(oracle); + ++sid; + } + + // update history + decoder->updateHistory(oracles, inputLengths, ref_ids, rank, 0); + + // clean up oracle translations after updating history + for (size_t i = 0; i < oracles.size(); ++i) { + for (size_t j = 0; j < oracles[i].size(); ++j) { + delete oracles[i][j]; + } + } + } + + cerr << "Bleu feature history after burn-in: " << endl; + decoder->printBleuFeatureHistory(cerr); + decoder->loadReferenceSentences(referenceSentences); + } + else { + decoder->loadReferenceSentences(referenceSentences); + } + // Optionally shuffle the sentences vector order; if (rank == 0) { @@ -691,8 +804,7 @@ int main(int argc, char** argv) { cerr << "Optimising using Mira" << endl; optimiser = new MiraOptimiser(n, hildreth, marginScaleFactor, onlyViolatedConstraints, slack, weightedLossFunction, maxNumberOracles, - accumulateMostViolatedConstraints, pastAndCurrentConstraints, one_per_batch, - order.size()); + accumulateMostViolatedConstraints, pastAndCurrentConstraints, order.size()); if (hildreth) { cerr << "Using Hildreth's optimisation algorithm.." << endl; } @@ -777,6 +889,7 @@ int main(int argc, char** argv) { vector oraclePositions; vector oracleBleuScores; vector > oracles; + vector > oneBests; vector oracleFeatureValues; vector inputLengths; vector ref_ids; @@ -795,52 +908,25 @@ int main(int argc, char** argv) { featureValues.push_back(newFeatureValues); bleuScores.push_back(newBleuScores); - if (one_constraint) { - cerr << "Rank " << rank << ", run decoder to get 1best wrt model score" << endl; - vector bestModel = decoder->getNBest(input, *sid, 1, 0.0, - 1.0, featureValues[batchPosition], bleuScores[batchPosition], true, - distinctNbest, rank); - inputLengths.push_back(decoder->getCurrentInputLength()); - ref_ids.push_back(*sid); - all_ref_ids.push_back(*sid); - allBestModelScore.push_back(bestModel); - decoder->cleanup(); - cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl; - - // HOPE - cerr << "Rank " << rank << ", run decoder to get nbest hope translations" << endl; - size_t oraclePos = featureValues[batchPosition].size(); - oraclePositions.push_back(oraclePos); - vector oracle = decoder->getNBest(input, *sid, 1, 1.0, - 1.0, featureValues[batchPosition], bleuScores[batchPosition], true, - distinctNbest, rank); - decoder->cleanup(); - oracles.push_back(oracle); - cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScores[batchPosition][oraclePos] << endl; - - oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]); - float oracleBleuScore = bleuScores[batchPosition][oraclePos]; - oracleBleuScores.push_back(oracleBleuScore); - } - else { // MODEL cerr << "Rank " << rank << ", run decoder to get nbest wrt model score" << endl; - vector bestModel = decoder->getNBest(input, *sid, n, 0.0, - 1.0, featureValues[batchPosition], bleuScores[batchPosition], true, + vector bestModel = decoder->getNBest(input, *sid, n, 0.0, bleuScoreWeight, + featureValues[batchPosition], bleuScores[batchPosition], true, distinctNbest, rank); inputLengths.push_back(decoder->getCurrentInputLength()); ref_ids.push_back(*sid); all_ref_ids.push_back(*sid); allBestModelScore.push_back(bestModel); decoder->cleanup(); + oneBests.push_back(bestModel); cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl; // HOPE cerr << "Rank " << rank << ", run decoder to get nbest hope translations" << endl; size_t oraclePos = featureValues[batchPosition].size(); oraclePositions.push_back(oraclePos); - vector oracle = decoder->getNBest(input, *sid, n, 1.0, - 1.0, featureValues[batchPosition], bleuScores[batchPosition], true, + vector oracle = decoder->getNBest(input, *sid, n, 1.0, bleuScoreWeight, + featureValues[batchPosition], bleuScores[batchPosition], true, distinctNbest, rank); decoder->cleanup(); oracles.push_back(oracle); @@ -853,7 +939,7 @@ int main(int argc, char** argv) { // FEAR cerr << "Rank " << rank << ", run decoder to get nbest fear translations" << endl; size_t fearPos = featureValues[batchPosition].size(); - vector fear = decoder->getNBest(input, *sid, n, -1.0, 1.0, + vector fear = decoder->getNBest(input, *sid, n, -1.0, bleuScoreWeight, featureValues[batchPosition], bleuScores[batchPosition], true, distinctNbest, rank); decoder->cleanup(); @@ -865,7 +951,6 @@ int main(int argc, char** argv) { for (size_t i = 0; i < fear.size(); ++i) { delete fear[i]; } - } cerr << "Rank " << rank << ", sentence " << *sid << ", best model Bleu (approximate sentence bleu): " << bleuScores[batchPosition][0] << endl; summedApproxBleu += bleuScores[batchPosition][0]; @@ -908,7 +993,7 @@ int main(int argc, char** argv) { vector< vector > bestModelOld_batch; for (size_t i = 0; i < actualBatchSize; ++i) { string& input = inputSentences[*current_sid_start + i]; - vector bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, distinctNbest); + vector bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest); bestModelOld_batch.push_back(bestModelOld); decoder->cleanup(); } @@ -928,16 +1013,9 @@ int main(int argc, char** argv) { cerr << "\nRank " << rank << ", run optimiser:" << endl; ScoreComponentCollection oldWeights(mosesWeights); vector update_status; - if (one_constraint) { - update_status = optimiser->updateWeightsAnalytically(mosesWeights, featureValues[0][0], - losses[0][0], oracleFeatureValues[0], oracleBleuScores[0], ref_ids[0], - learning_rate, max_sentence_update, rank, epoch, controlUpdates); - } - else { - update_status = optimiser->updateWeights(mosesWeights, featureValues, + update_status = optimiser->updateWeights(mosesWeights, featureValues, losses, bleuScores, oracleFeatureValues, oracleBleuScores, ref_ids, learning_rate, max_sentence_update, rank, epoch, updates_per_epoch, controlUpdates); - } if (update_status[0] == 1) { cerr << "Rank " << rank << ", no update for batch" << endl; @@ -985,7 +1063,7 @@ int main(int argc, char** argv) { vector bestModelNew; for (size_t i = 0; i < actualBatchSize; ++i) { string& input = inputSentences[*current_sid_start + i]; - bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, distinctNbest); + bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest); decoder->cleanup(); cerr << "Rank " << rank << ", epoch " << epoch << ", 1best model bleu, old: " << bestModelOld_batch[i][0] << ", new: " << bestModelNew[0] << endl; cerr << "Rank " << rank << ", epoch " << epoch << ", 1best model score, old: " << bestModelOld_batch[i][1] << ", new: " << bestModelNew[1] << endl; @@ -993,11 +1071,21 @@ int main(int argc, char** argv) { } } - // update history (for approximate document Bleu) - for (size_t i = 0; i < oracles.size(); ++i) { - cerr << "Rank " << rank << ", oracle length: " << oracles[i].size() << " "; + if (!sentenceLevelBleu) { + // update history (for approximate document Bleu) + if (historyOf1best) { + for (size_t i = 0; i < oneBests.size(); ++i) { + cerr << "Rank " << rank << ", 1best length: " << oneBests[i].size() << " "; + } + decoder->updateHistory(oneBests, inputLengths, ref_ids, rank, epoch); + } + else { + for (size_t i = 0; i < oracles.size(); ++i) { + cerr << "Rank " << rank << ", oracle length: " << oracles[i].size() << " "; + } + decoder->updateHistory(oracles, inputLengths, ref_ids, rank, epoch); + } } - decoder->updateHistory(oracles, inputLengths, ref_ids, rank, epoch); // clean up oracle translations after updating history for (size_t i = 0; i < oracles.size(); ++i) { @@ -1136,6 +1224,10 @@ int main(int argc, char** argv) { }// end dumping } // end of shard loop, end of this epoch + + cerr << "Bleu feature history after epoch " << epoch << endl; + decoder->printBleuFeatureHistory(cerr); + size_t sumUpdates; size_t *sendbuf_uint, *recvbuf_uint; sendbuf_uint = (size_t *) malloc(sizeof(size_t)); diff --git a/mira/Optimiser.h b/mira/Optimiser.h index bacdd027d..879c92d1f 100644 --- a/mira/Optimiser.h +++ b/mira/Optimiser.h @@ -88,7 +88,7 @@ namespace Mira { MiraOptimiser() : Optimiser() { } - MiraOptimiser(size_t n, bool hildreth, float marginScaleFactor, bool onlyViolatedConstraints, float slack, bool weightedLossFunction, size_t maxNumberOracles, bool accumulateMostViolatedConstraints, bool pastAndCurrentConstraints, bool one_per_batch, size_t exampleSize) : + MiraOptimiser(size_t n, bool hildreth, float marginScaleFactor, bool onlyViolatedConstraints, float slack, size_t weightedLossFunction, size_t maxNumberOracles, bool accumulateMostViolatedConstraints, bool pastAndCurrentConstraints, size_t exampleSize) : Optimiser(), m_n(n), m_hildreth(hildreth), @@ -99,7 +99,6 @@ namespace Mira { m_max_number_oracles(maxNumberOracles), m_accumulateMostViolatedConstraints(accumulateMostViolatedConstraints), m_pastAndCurrentConstraints(pastAndCurrentConstraints), - m_one_per_batch(one_per_batch), m_oracles(exampleSize), m_bleu_of_oracles(exampleSize) { } @@ -166,7 +165,7 @@ namespace Mira { // regularise Hildreth updates float m_slack; - bool m_weightedLossFunction; + size_t m_weightedLossFunction; // index of oracle translation in hypothesis matrix std::vector m_oracleIndices; @@ -180,14 +179,13 @@ namespace Mira { // accumulate most violated constraints for every example std::vector< Moses::ScoreComponentCollection> m_featureValueDiffs; - std::vector< float> m_lossMarginDistances; + std::vector< float> m_losses; + bool m_accumulateMostViolatedConstraints; bool m_pastAndCurrentConstraints; - bool m_one_per_batch; - Moses::ScoreComponentCollection m_accumulatedUpdates; }; } diff --git a/moses/src/BleuScoreFeature.cpp b/moses/src/BleuScoreFeature.cpp index 324eb1c3d..8a4a8f902 100644 --- a/moses/src/BleuScoreFeature.cpp +++ b/moses/src/BleuScoreFeature.cpp @@ -95,8 +95,20 @@ BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLen m_BP_factor(BPfactor), m_historySmoothing(historySmoothing) {} + +void BleuScoreFeature::PrintHistory(std::ostream& out) const { + out << "source length history=" << m_source_length_history << endl; + out << "target length history=" << m_target_length_history << endl; + out << "ref length history=" << m_ref_length_history << endl; + + for (size_t i = 0; i < BleuScoreState::bleu_order; ++i) { + out << "match history/count history (" << i << "):" << m_match_history[i] << "/" << m_count_history[i] << endl; + } +} + void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs) { + m_refs.clear(); FactorCollection& fc = FactorCollection::Instance(); for (size_t file_id = 0; file_id < refs.size(); file_id++) { for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) { diff --git a/moses/src/BleuScoreFeature.h b/moses/src/BleuScoreFeature.h index 7efaaa4c8..52a4506de 100644 --- a/moses/src/BleuScoreFeature.h +++ b/moses/src/BleuScoreFeature.h @@ -62,6 +62,7 @@ public: return 1; } + void PrintHistory(std::ostream& out) const; void LoadReferences(const std::vector< std::vector< std::string > > &); void SetCurrentSourceLength(size_t); void SetCurrentReference(size_t); diff --git a/moses/src/FeatureVector.cpp b/moses/src/FeatureVector.cpp index fe1272983..e628cc810 100644 --- a/moses/src/FeatureVector.cpp +++ b/moses/src/FeatureVector.cpp @@ -178,8 +178,11 @@ namespace Moses { /* if (i->first != DEFAULT_NAME && i->second != 0.0) { out << i->first << "=" << value << ", "; }*/ - if (i->first != DEFAULT_NAME) { +/* if (i->first != DEFAULT_NAME) { out << i->first << "=" << value << ", "; + }*/ + if (i->first != DEFAULT_NAME) { + out << value << ", "; } } out << "}";