revert clipping

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/mira-mtm5@3778 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
evahasler 2010-12-15 12:53:49 +00:00
parent 4024b5ef79
commit aa2e988132
4 changed files with 42 additions and 40 deletions

View File

@ -104,7 +104,8 @@ namespace Mira {
vector< float>& bleuScores,
bool oracle,
bool distinct,
bool ignoreUWeight)
bool ignoreUWeight,
size_t rank)
{
StaticData &staticData = StaticData::InstanceNonConst();
@ -143,17 +144,15 @@ namespace Mira {
//std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl;
float scoreWithoutBleu = path.GetTotalScore() - bleuObjectiveWeight * bleuScore;
cerr << "Total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl;
//if (distinct) {
Phrase bestPhrase = path.GetTargetPhrase();
for (size_t pos = 0; pos < bestPhrase.GetSize(); ++pos) {
const Word &word = bestPhrase.GetWord(pos);
Word *newWord = new Word(word);
cerr << *newWord << " ";
}
cerr << endl;
//}
cerr << "Rank " << rank << ": " << endl;
Phrase phrase = path.GetTargetPhrase();
for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
const Word &word = phrase.GetWord(pos);
Word *newWord = new Word(word);
cerr << *newWord << " ";
}
cerr << endl;
// set bleu score to zero in the feature vector since we do not want to optimise its weight
setBleuScore(featureValues.back(), 0);

View File

@ -62,7 +62,8 @@ class MosesDecoder {
std::vector< float>& scores,
bool oracle,
bool distinct,
bool ignoreUWeight);
bool ignoreUWeight,
size_t rank);
size_t getCurrentInputLength();
void updateHistory(const std::vector<const Moses::Word*>& words);
void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids);

View File

@ -100,6 +100,9 @@ int main(int argc, char** argv) {
bool suppressConvergence;
bool ignoreUWeight;
bool ignoreWeirdUpdates;
bool ignoreUpdatesAll;
bool ignoreUpdatesError;
bool ignoreUpdatesConstraints;
float clipping;
bool fixedClipping;
po::options_description desc("Allowed options");
@ -134,6 +137,9 @@ int main(int argc, char** argv) {
("suppress-convergence", po::value<bool>(&suppressConvergence)->default_value(false), "Suppress convergence, fixed number of epochs")
("ignore-u-weight", po::value<bool>(&ignoreUWeight)->default_value(false), "Don't tune unknown word penalty weight")
("ignore-weird-updates", po::value<bool>(&ignoreWeirdUpdates)->default_value(false), "Ignore updates that increase number of violated constraints AND increase the error")
("ignore-updates-all", po::value<bool>(&ignoreUpdatesAll)->default_value(false), "Ignore updates that increase number of violated constraints OR increase the error")
("ignore-updates-error", po::value<bool>(&ignoreUpdatesError)->default_value(false), "Ignore updates that increase the error")
("ignore-updates-constraints", po::value<bool>(&ignoreUpdatesConstraints)->default_value(false), "Ignore updates that increase the number of violated constraints")
("clipping", po::value<float>(&clipping)->default_value(0.01f), "Set a threshold to regularise updates")
("fixed-clipping", po::value<bool>(&fixedClipping)->default_value(false), "Use a fixed clipping threshold");
@ -233,8 +239,6 @@ int main(int argc, char** argv) {
cerr << "Using slack? " << slack << endl;
cerr << "BP factor: " << BPfactor << endl;
cerr << "Ignore unknown word penalty? " << ignoreUWeight << endl;
cerr << "Fixed clipping? " << fixedClipping << endl;
cerr << "clipping: " << clipping << endl;
if (learner == "mira") {
cerr << "Optimising using Mira" << endl;
optimiser = new MiraOptimiser(n, hildreth, marginScaleFactor, onlyViolatedConstraints, clipping, fixedClipping, slack, weightedLossFunction, maxNumberOracles, accumulateMostViolatedConstraints, pastAndCurrentConstraints, order.size());
@ -320,7 +324,8 @@ int main(int argc, char** argv) {
bleuScores[batchPosition],
true,
distinctNbest,
ignoreUWeight);
ignoreUWeight,
rank);
inputLengths.push_back(decoder->getCurrentInputLength());
ref_ids.push_back(*sid);
decoder->cleanup();
@ -344,7 +349,8 @@ int main(int argc, char** argv) {
bleuScores[batchPosition],
true,
distinctNbest,
ignoreUWeight);
ignoreUWeight,
rank);
decoder->cleanup();
oracles.push_back(oracle);
cerr << "Rank " << rank << ": ";
@ -371,7 +377,8 @@ int main(int argc, char** argv) {
bleuScores[batchPosition],
true,
distinctNbest,
ignoreUWeight);
ignoreUWeight,
rank);
decoder->cleanup();
cerr << "Rank " << rank << ": ";
for (size_t i = 0; i < fear.size(); ++i) {
@ -406,7 +413,7 @@ int main(int argc, char** argv) {
const vector<const ScoreProducer*> featureFunctions = StaticData::Instance().GetTranslationSystem (TranslationSystem::DEFAULT).GetFeatureFunctions();
mosesWeights.Assign(featureFunctions.back(), 0);
if (ignoreUWeight) {
/*if (ignoreUWeight) {
// set weight for unknown word penalty to 0
for (size_t i = 0; i < featureFunctions.size(); ++i) {
FName name = (featureFunctions[i]->GetFeatureNames())[0];
@ -415,7 +422,7 @@ int main(int argc, char** argv) {
mosesWeights.Assign(featureFunctions[i], 0);
}
}
}
}*/
if (!hildreth && typeid(*optimiser) == typeid(MiraOptimiser)) {
((MiraOptimiser*)optimiser)->setOracleIndices(oraclePositions);
@ -476,6 +483,9 @@ int main(int argc, char** argv) {
bool useNewWeights = true;
if (lossMinusMargin_new > lossMinusMargin_old) {
cerr << "Rank " << rank << ", worsening: " << lossMinusMargin_new - lossMinusMargin_old << endl;
if (ignoreUpdatesError || ignoreUpdatesAll) {
useNewWeights = false;
}
if (constraintChange < 0) {
cerr << "Rank " << rank << ", something is going wrong here.." << endl;
@ -485,6 +495,13 @@ int main(int argc, char** argv) {
}
}
if (ignoreUpdatesConstraints || ignoreUpdatesAll) {
if (constraintChange < 0) {
useNewWeights = false;
}
}
if (useNewWeights) {
decoder->setWeights(mosesWeights);
cumulativeWeights.PlusEquals(mosesWeights);
@ -559,7 +576,12 @@ int main(int argc, char** argv) {
cerr << "Rank 0, average total weights: " << averageTotalWeights << endl;
ostringstream filename;
filename << weightDumpStem << "_" << epoch;
if (epoch < 10) {
filename << weightDumpStem << "_0" << epoch;
}
else {
filename << weightDumpStem << "_" << epoch;
}
if (weightDumpFrequency > 1) {
filename << "_" << weightEpochDump;
}
@ -612,10 +634,8 @@ int main(int argc, char** argv) {
<< ", " << tm->tm_hour << ":" << tm->tm_min << ":" << tm->tm_sec << endl;
#ifdef MPI_ENABLE
MPI_Finalize();
MPI_Abort(MPI_COMM_WORLD, 0);
#endif
exit(0);
}
}
}

View File

@ -147,15 +147,6 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
for (size_t k = 0; k < m_featureValueDiffs.size(); ++k) {
// compute update
float update = alphas[k];
if (m_fixedClipping) {
if (update > m_c) {
update = m_c;
}
else if (update < -1 * m_c) {
update = -1 * m_c;
}
}
m_featureValueDiffs[k].MultiplyEquals(update);
cerr << "alpha: " << update << endl;
@ -190,15 +181,6 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
for (size_t k = 0; k < featureValueDiffs.size(); ++k) {
// compute update
float update = alphas[k];
if (m_fixedClipping) {
if (update > m_c) {
update = m_c;
}
else if (update < -1 * m_c) {
update = -1 * m_c;
}
}
featureValueDiffs[k].MultiplyEquals(update);
cerr << "alpha: " << update << endl;