From e1f6db3438a10e0968edaf9a570d477928a6e743 Mon Sep 17 00:00:00 2001 From: Eva Hasler Date: Thu, 12 Jan 2012 16:26:16 +0000 Subject: [PATCH] add parameter --stabilise-length --- mira/Decoder.cpp | 4 ++-- mira/Decoder.h | 2 +- mira/Main.cpp | 57 +++++++++++++++++++++++++++++++++++++++++------- mira/Main.h | 2 ++ 4 files changed, 54 insertions(+), 11 deletions(-) diff --git a/mira/Decoder.cpp b/mira/Decoder.cpp index 375eec8b5..3c7e381c4 100644 --- a/mira/Decoder.cpp +++ b/mira/Decoder.cpp @@ -184,9 +184,9 @@ namespace Mira { m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch); } - void MosesDecoder::loadReferenceSentences(const vector >& refs) { +/* void MosesDecoder::loadReferenceSentences(const vector >& refs) { m_bleuScoreFeature->LoadReferences(refs); - } + }*/ void MosesDecoder::printBleuFeatureHistory(std::ostream& out) { m_bleuScoreFeature->PrintHistory(out); diff --git a/mira/Decoder.h b/mira/Decoder.h index 7b42458f3..067f1cdcb 100644 --- a/mira/Decoder.h +++ b/mira/Decoder.h @@ -64,7 +64,7 @@ class MosesDecoder { size_t getCurrentInputLength(); void updateHistory(const std::vector& words); void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector& sourceLengths, std::vector& ref_ids, size_t rank, size_t epoch); - void loadReferenceSentences(const std::vector >& refs); +// void loadReferenceSentences(const std::vector >& refs); void printBleuFeatureHistory(std::ostream& out); void printReferenceLength(const std::vector& ref_ids); size_t getReferenceLength(size_t ref_id); diff --git a/mira/Main.cpp b/mira/Main.cpp index ad7fabd97..bb8a5f677 100644 --- a/mira/Main.cpp +++ b/mira/Main.cpp @@ -120,6 +120,7 @@ int main(int argc, char** argv) { float max_length_dev_hypos; float max_length_dev_reference; float relax_BP; + bool stabiliseLength; po::options_description desc("Allowed options"); desc.add_options() ("accumulate-weights", po::value(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs") @@ -176,6 +177,7 @@ int main(int argc, char** argv) { ("slack", po::value(&slack)->default_value(0.01), "Use slack in optimiser") ("slack-min", po::value(&slack_min)->default_value(0.01), "Minimum slack used") ("slack-step", po::value(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided") + ("stabilise-length", po::value(&stabiliseLength)->default_value(false), "Stabilise word penalty when length ratio >= 1") ("stop-weights", po::value(&weightConvergence)->default_value(true), "Stop when weights converge") ("threads", po::value(&threadcount)->default_value(1), "Number of threads used") ("verbosity,v", po::value(&verbosity)->default_value(0), "Verbosity level") @@ -408,6 +410,9 @@ int main(int argc, char** argv) { ScoreComponentCollection mixedAverageWeightsPrevious; ScoreComponentCollection mixedAverageWeightsBeforePrevious; + // when length ratio >= 1, set this to true + bool fixLength = false; + bool stop = false; // int sumStillViolatedConstraints; float *sendbuf, *recvbuf; @@ -426,6 +431,10 @@ int main(int argc, char** argv) { // number of weight dumps this epoch size_t weightEpochDump = 0; + // sum lengths of dev hypothesis/references to calculate translation length ratio for this epoch + size_t dev_hypothesis_length; + size_t dev_reference_length; + size_t shardPosition = 0; vector::const_iterator sid = shard.begin(); while (sid != shard.end()) { @@ -459,7 +468,7 @@ int main(int argc, char** argv) { for (size_t batchPosition = 0; batchPosition < batchSize && sid != shard.end(); ++batchPosition) { string& input = inputSentences[*sid]; - const vector& refs = referenceSentences[*sid]; +// const vector& refs = referenceSentences[*sid]; cerr << "\nRank " << rank << ", epoch " << epoch << ", input sentence " << *sid << ": \"" << input << "\"" << " (batch pos " << batchPosition << ")" << endl; vector newFeatureValues; @@ -473,7 +482,7 @@ int main(int argc, char** argv) { featureValuesFear.push_back(newFeatureValues); bleuScoresHope.push_back(newBleuScores); bleuScoresFear.push_back(newBleuScores); - if (historyOf1best) { + if (historyOf1best || stabiliseLength) { dummyFeatureValues.push_back(newFeatureValues); dummyBleuScores.push_back(newBleuScores); } @@ -492,13 +501,16 @@ int main(int argc, char** argv) { cerr << ", l-ratio hope: " << hope_length_ratio << endl; vector bestModel; - if (historyOf1best) { + if (historyOf1best || stabiliseLength) { // MODEL (for updating the history only, using dummy vectors) - cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history)" << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history or length stabilisation)" << endl; bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight, dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true, distinctNbest, rank, epoch); decoder->cleanup(); + cerr << endl; + dev_hypothesis_length += bestModel.size(); + dev_reference_length += reference_length; } // FEAR @@ -575,6 +587,10 @@ int main(int argc, char** argv) { oneBests.push_back(bestModel); float model_length_ratio = (float)bestModel.size()/reference_length; cerr << ", l-ratio model: " << model_length_ratio << endl; + if (stabiliseLength) { + dev_hypothesis_length += bestModel.size(); + dev_reference_length += reference_length; + } // FEAR cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best fear translations" << endl; @@ -621,6 +637,19 @@ int main(int argc, char** argv) { break; } + // set word penalty to 0 before optimising (if 'stabilise-length' is active) + if (fixLength) { + iter = featureFunctions.begin(); + for (; iter != featureFunctions.end(); ++iter) { + if ((*iter)->GetScoreProducerWeightShortName() == "w") { + ignoreWPFeature(featureValues, (*iter)); + ignoreWPFeature(featureValuesHope, (*iter)); + ignoreWPFeature(featureValuesFear, (*iter)); + break; + } + } + } + // take logs of feature values if (logFeatureValues) { takeLogs(featureValuesHope, baseOfLog); @@ -803,6 +832,14 @@ int main(int argc, char** argv) { }// end dumping } // end of shard loop, end of this epoch + if (stabiliseLength && !fixLength) { + float lengthRatio = (float)(dev_hypothesis_length+1) / dev_reference_length; + if (lengthRatio >= 1) { + cerr << "Rank " << rank << ", epoch " << epoch << ", length ratio >= 1, fixing word penalty. " << endl; + fixLength = 1; + } + } + if (verbosity > 0) { cerr << "Bleu feature history after epoch " << epoch << endl; decoder->printBleuFeatureHistory(cerr); @@ -981,16 +1018,20 @@ void printFeatureValues(vector > &featureValues } void ignoreCoreFeatures(vector > &featureValues, StrFloatMap &coreWeightMap) { - for (size_t i = 0; i < featureValues.size(); ++i) { + for (size_t i = 0; i < featureValues.size(); ++i) for (size_t j = 0; j < featureValues[i].size(); ++j) { // set all core features to 0 StrFloatMap::iterator p; for(p = coreWeightMap.begin(); p!=coreWeightMap.end(); ++p) - { featureValues[i][j].Assign(p->first, 0); - } } - } +} + +void ignoreWPFeature(vector > &featureValues, const ScoreProducer* sp) { + for (size_t i = 0; i < featureValues.size(); ++i) + for (size_t j = 0; j < featureValues[i].size(); ++j) + // set WP feature to 0 + featureValues[i][j].Assign(sp, 0); } void takeLogs(vector > &featureValues, size_t base) { diff --git a/mira/Main.h b/mira/Main.h index 68de9b9c9..4fd859b95 100644 --- a/mira/Main.h +++ b/mira/Main.h @@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "ScoreComponentCollection.h" #include "Word.h" +#include "ScoreProducer.h" typedef std::map StrFloatMap; typedef std::pair StrFloatPair; @@ -46,6 +47,7 @@ bool loadWeights(const std::string& filename, StrFloatMap& coreWeightMap); bool evaluateModulo(size_t shard_position, size_t mix_or_dump_base, size_t actual_batch_size); void printFeatureValues(std::vector > &featureValues); void ignoreCoreFeatures(std::vector > &featureValues, StrFloatMap &coreWeightMap); +void ignoreWPFeature(std::vector > &featureValues, const Moses::ScoreProducer* sp); void takeLogs(std::vector > &featureValues, size_t base); void deleteTranslations(std::vector > &translations);