diff --git a/mira/Decoder.cpp b/mira/Decoder.cpp index 61d971171..f1d6fc0e7 100644 --- a/mira/Decoder.cpp +++ b/mira/Decoder.cpp @@ -104,7 +104,8 @@ namespace Mira { vector< float>& bleuScores, bool oracle, bool distinct, - bool ignoreUWeight) + bool ignoreUWeight, + size_t rank) { StaticData &staticData = StaticData::InstanceNonConst(); @@ -143,17 +144,15 @@ namespace Mira { //std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl; float scoreWithoutBleu = path.GetTotalScore() - bleuObjectiveWeight * bleuScore; cerr << "Total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl; - //if (distinct) { - Phrase bestPhrase = path.GetTargetPhrase(); - for (size_t pos = 0; pos < bestPhrase.GetSize(); ++pos) { - const Word &word = bestPhrase.GetWord(pos); - Word *newWord = new Word(word); - cerr << *newWord << " "; - } - - cerr << endl; - //} + cerr << "Rank " << rank << ": " << endl; + Phrase phrase = path.GetTargetPhrase(); + for (size_t pos = 0; pos < phrase.GetSize(); ++pos) { + const Word &word = phrase.GetWord(pos); + Word *newWord = new Word(word); + cerr << *newWord << " "; + } + cerr << endl; // set bleu score to zero in the feature vector since we do not want to optimise its weight setBleuScore(featureValues.back(), 0); diff --git a/mira/Decoder.h b/mira/Decoder.h index f82a36894..9bf76ee27 100644 --- a/mira/Decoder.h +++ b/mira/Decoder.h @@ -62,7 +62,8 @@ class MosesDecoder { std::vector< float>& scores, bool oracle, bool distinct, - bool ignoreUWeight); + bool ignoreUWeight, + size_t rank); size_t getCurrentInputLength(); void updateHistory(const std::vector& words); void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector& sourceLengths, std::vector& ref_ids); diff --git a/mira/Main.cpp b/mira/Main.cpp index 5f32cbab9..6bba2782e 100644 --- a/mira/Main.cpp +++ b/mira/Main.cpp @@ -100,6 +100,9 @@ int main(int argc, char** argv) { bool suppressConvergence; bool ignoreUWeight; bool ignoreWeirdUpdates; + bool ignoreUpdatesAll; + bool ignoreUpdatesError; + bool ignoreUpdatesConstraints; float clipping; bool fixedClipping; po::options_description desc("Allowed options"); @@ -134,6 +137,9 @@ int main(int argc, char** argv) { ("suppress-convergence", po::value(&suppressConvergence)->default_value(false), "Suppress convergence, fixed number of epochs") ("ignore-u-weight", po::value(&ignoreUWeight)->default_value(false), "Don't tune unknown word penalty weight") ("ignore-weird-updates", po::value(&ignoreWeirdUpdates)->default_value(false), "Ignore updates that increase number of violated constraints AND increase the error") + ("ignore-updates-all", po::value(&ignoreUpdatesAll)->default_value(false), "Ignore updates that increase number of violated constraints OR increase the error") + ("ignore-updates-error", po::value(&ignoreUpdatesError)->default_value(false), "Ignore updates that increase the error") + ("ignore-updates-constraints", po::value(&ignoreUpdatesConstraints)->default_value(false), "Ignore updates that increase the number of violated constraints") ("clipping", po::value(&clipping)->default_value(0.01f), "Set a threshold to regularise updates") ("fixed-clipping", po::value(&fixedClipping)->default_value(false), "Use a fixed clipping threshold"); @@ -233,8 +239,6 @@ int main(int argc, char** argv) { cerr << "Using slack? " << slack << endl; cerr << "BP factor: " << BPfactor << endl; cerr << "Ignore unknown word penalty? " << ignoreUWeight << endl; - cerr << "Fixed clipping? " << fixedClipping << endl; - cerr << "clipping: " << clipping << endl; if (learner == "mira") { cerr << "Optimising using Mira" << endl; optimiser = new MiraOptimiser(n, hildreth, marginScaleFactor, onlyViolatedConstraints, clipping, fixedClipping, slack, weightedLossFunction, maxNumberOracles, accumulateMostViolatedConstraints, pastAndCurrentConstraints, order.size()); @@ -320,7 +324,8 @@ int main(int argc, char** argv) { bleuScores[batchPosition], true, distinctNbest, - ignoreUWeight); + ignoreUWeight, + rank); inputLengths.push_back(decoder->getCurrentInputLength()); ref_ids.push_back(*sid); decoder->cleanup(); @@ -344,7 +349,8 @@ int main(int argc, char** argv) { bleuScores[batchPosition], true, distinctNbest, - ignoreUWeight); + ignoreUWeight, + rank); decoder->cleanup(); oracles.push_back(oracle); cerr << "Rank " << rank << ": "; @@ -371,7 +377,8 @@ int main(int argc, char** argv) { bleuScores[batchPosition], true, distinctNbest, - ignoreUWeight); + ignoreUWeight, + rank); decoder->cleanup(); cerr << "Rank " << rank << ": "; for (size_t i = 0; i < fear.size(); ++i) { @@ -406,7 +413,7 @@ int main(int argc, char** argv) { const vector featureFunctions = StaticData::Instance().GetTranslationSystem (TranslationSystem::DEFAULT).GetFeatureFunctions(); mosesWeights.Assign(featureFunctions.back(), 0); - if (ignoreUWeight) { + /*if (ignoreUWeight) { // set weight for unknown word penalty to 0 for (size_t i = 0; i < featureFunctions.size(); ++i) { FName name = (featureFunctions[i]->GetFeatureNames())[0]; @@ -415,7 +422,7 @@ int main(int argc, char** argv) { mosesWeights.Assign(featureFunctions[i], 0); } } - } + }*/ if (!hildreth && typeid(*optimiser) == typeid(MiraOptimiser)) { ((MiraOptimiser*)optimiser)->setOracleIndices(oraclePositions); @@ -476,6 +483,9 @@ int main(int argc, char** argv) { bool useNewWeights = true; if (lossMinusMargin_new > lossMinusMargin_old) { cerr << "Rank " << rank << ", worsening: " << lossMinusMargin_new - lossMinusMargin_old << endl; + if (ignoreUpdatesError || ignoreUpdatesAll) { + useNewWeights = false; + } if (constraintChange < 0) { cerr << "Rank " << rank << ", something is going wrong here.." << endl; @@ -485,6 +495,13 @@ int main(int argc, char** argv) { } } + if (ignoreUpdatesConstraints || ignoreUpdatesAll) { + if (constraintChange < 0) { + useNewWeights = false; + } + } + + if (useNewWeights) { decoder->setWeights(mosesWeights); cumulativeWeights.PlusEquals(mosesWeights); @@ -559,7 +576,12 @@ int main(int argc, char** argv) { cerr << "Rank 0, average total weights: " << averageTotalWeights << endl; ostringstream filename; - filename << weightDumpStem << "_" << epoch; + if (epoch < 10) { + filename << weightDumpStem << "_0" << epoch; + } + else { + filename << weightDumpStem << "_" << epoch; + } if (weightDumpFrequency > 1) { filename << "_" << weightEpochDump; } @@ -612,10 +634,8 @@ int main(int argc, char** argv) { << ", " << tm->tm_hour << ":" << tm->tm_min << ":" << tm->tm_sec << endl; #ifdef MPI_ENABLE - MPI_Finalize(); MPI_Abort(MPI_COMM_WORLD, 0); #endif - exit(0); } } } diff --git a/mira/MiraOptimiser.cpp b/mira/MiraOptimiser.cpp index a06ac6785..f1cc1b18c 100644 --- a/mira/MiraOptimiser.cpp +++ b/mira/MiraOptimiser.cpp @@ -147,15 +147,6 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, for (size_t k = 0; k < m_featureValueDiffs.size(); ++k) { // compute update float update = alphas[k]; - if (m_fixedClipping) { - if (update > m_c) { - update = m_c; - } - else if (update < -1 * m_c) { - update = -1 * m_c; - } - } - m_featureValueDiffs[k].MultiplyEquals(update); cerr << "alpha: " << update << endl; @@ -190,15 +181,6 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, for (size_t k = 0; k < featureValueDiffs.size(); ++k) { // compute update float update = alphas[k]; - if (m_fixedClipping) { - if (update > m_c) { - update = m_c; - } - else if (update < -1 * m_c) { - update = -1 * m_c; - } - } - featureValueDiffs[k].MultiplyEquals(update); cerr << "alpha: " << update << endl;