add parameter --stabilise-length

This commit is contained in:
Eva Hasler 2012-01-12 16:26:16 +00:00
parent eaf940d5c1
commit e1f6db3438
4 changed files with 54 additions and 11 deletions

View File

@ -184,9 +184,9 @@ namespace Mira {
m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch);
}
void MosesDecoder::loadReferenceSentences(const vector<vector<string> >& refs) {
/* void MosesDecoder::loadReferenceSentences(const vector<vector<string> >& refs) {
m_bleuScoreFeature->LoadReferences(refs);
}
}*/
void MosesDecoder::printBleuFeatureHistory(std::ostream& out) {
m_bleuScoreFeature->PrintHistory(out);

View File

@ -64,7 +64,7 @@ class MosesDecoder {
size_t getCurrentInputLength();
void updateHistory(const std::vector<const Moses::Word*>& words);
void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
// void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
void printBleuFeatureHistory(std::ostream& out);
void printReferenceLength(const std::vector<size_t>& ref_ids);
size_t getReferenceLength(size_t ref_id);

View File

@ -120,6 +120,7 @@ int main(int argc, char** argv) {
float max_length_dev_hypos;
float max_length_dev_reference;
float relax_BP;
bool stabiliseLength;
po::options_description desc("Allowed options");
desc.add_options()
("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
@ -176,6 +177,7 @@ int main(int argc, char** argv) {
("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimiser")
("slack-min", po::value<float>(&slack_min)->default_value(0.01), "Minimum slack used")
("slack-step", po::value<float>(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided")
("stabilise-length", po::value<bool>(&stabiliseLength)->default_value(false), "Stabilise word penalty when length ratio >= 1")
("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
("threads", po::value<int>(&threadcount)->default_value(1), "Number of threads used")
("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
@ -408,6 +410,9 @@ int main(int argc, char** argv) {
ScoreComponentCollection mixedAverageWeightsPrevious;
ScoreComponentCollection mixedAverageWeightsBeforePrevious;
// when length ratio >= 1, set this to true
bool fixLength = false;
bool stop = false;
// int sumStillViolatedConstraints;
float *sendbuf, *recvbuf;
@ -426,6 +431,10 @@ int main(int argc, char** argv) {
// number of weight dumps this epoch
size_t weightEpochDump = 0;
// sum lengths of dev hypothesis/references to calculate translation length ratio for this epoch
size_t dev_hypothesis_length;
size_t dev_reference_length;
size_t shardPosition = 0;
vector<size_t>::const_iterator sid = shard.begin();
while (sid != shard.end()) {
@ -459,7 +468,7 @@ int main(int argc, char** argv) {
for (size_t batchPosition = 0; batchPosition < batchSize && sid
!= shard.end(); ++batchPosition) {
string& input = inputSentences[*sid];
const vector<string>& refs = referenceSentences[*sid];
// const vector<string>& refs = referenceSentences[*sid];
cerr << "\nRank " << rank << ", epoch " << epoch << ", input sentence " << *sid << ": \"" << input << "\"" << " (batch pos " << batchPosition << ")" << endl;
vector<ScoreComponentCollection> newFeatureValues;
@ -473,7 +482,7 @@ int main(int argc, char** argv) {
featureValuesFear.push_back(newFeatureValues);
bleuScoresHope.push_back(newBleuScores);
bleuScoresFear.push_back(newBleuScores);
if (historyOf1best) {
if (historyOf1best || stabiliseLength) {
dummyFeatureValues.push_back(newFeatureValues);
dummyBleuScores.push_back(newBleuScores);
}
@ -492,13 +501,16 @@ int main(int argc, char** argv) {
cerr << ", l-ratio hope: " << hope_length_ratio << endl;
vector<const Word*> bestModel;
if (historyOf1best) {
if (historyOf1best || stabiliseLength) {
// MODEL (for updating the history only, using dummy vectors)
cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history)" << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history or length stabilisation)" << endl;
bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true,
distinctNbest, rank, epoch);
decoder->cleanup();
cerr << endl;
dev_hypothesis_length += bestModel.size();
dev_reference_length += reference_length;
}
// FEAR
@ -575,6 +587,10 @@ int main(int argc, char** argv) {
oneBests.push_back(bestModel);
float model_length_ratio = (float)bestModel.size()/reference_length;
cerr << ", l-ratio model: " << model_length_ratio << endl;
if (stabiliseLength) {
dev_hypothesis_length += bestModel.size();
dev_reference_length += reference_length;
}
// FEAR
cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best fear translations" << endl;
@ -621,6 +637,19 @@ int main(int argc, char** argv) {
break;
}
// set word penalty to 0 before optimising (if 'stabilise-length' is active)
if (fixLength) {
iter = featureFunctions.begin();
for (; iter != featureFunctions.end(); ++iter) {
if ((*iter)->GetScoreProducerWeightShortName() == "w") {
ignoreWPFeature(featureValues, (*iter));
ignoreWPFeature(featureValuesHope, (*iter));
ignoreWPFeature(featureValuesFear, (*iter));
break;
}
}
}
// take logs of feature values
if (logFeatureValues) {
takeLogs(featureValuesHope, baseOfLog);
@ -803,6 +832,14 @@ int main(int argc, char** argv) {
}// end dumping
} // end of shard loop, end of this epoch
if (stabiliseLength && !fixLength) {
float lengthRatio = (float)(dev_hypothesis_length+1) / dev_reference_length;
if (lengthRatio >= 1) {
cerr << "Rank " << rank << ", epoch " << epoch << ", length ratio >= 1, fixing word penalty. " << endl;
fixLength = 1;
}
}
if (verbosity > 0) {
cerr << "Bleu feature history after epoch " << epoch << endl;
decoder->printBleuFeatureHistory(cerr);
@ -981,16 +1018,20 @@ void printFeatureValues(vector<vector<ScoreComponentCollection> > &featureValues
}
void ignoreCoreFeatures(vector<vector<ScoreComponentCollection> > &featureValues, StrFloatMap &coreWeightMap) {
for (size_t i = 0; i < featureValues.size(); ++i) {
for (size_t i = 0; i < featureValues.size(); ++i)
for (size_t j = 0; j < featureValues[i].size(); ++j) {
// set all core features to 0
StrFloatMap::iterator p;
for(p = coreWeightMap.begin(); p!=coreWeightMap.end(); ++p)
{
featureValues[i][j].Assign(p->first, 0);
}
}
}
}
void ignoreWPFeature(vector<vector<ScoreComponentCollection> > &featureValues, const ScoreProducer* sp) {
for (size_t i = 0; i < featureValues.size(); ++i)
for (size_t j = 0; j < featureValues[i].size(); ++j)
// set WP feature to 0
featureValues[i][j].Assign(sp, 0);
}
void takeLogs(vector<vector<ScoreComponentCollection> > &featureValues, size_t base) {

View File

@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "ScoreComponentCollection.h"
#include "Word.h"
#include "ScoreProducer.h"
typedef std::map<const std::string, float> StrFloatMap;
typedef std::pair<const std::string, float> StrFloatPair;
@ -46,6 +47,7 @@ bool loadWeights(const std::string& filename, StrFloatMap& coreWeightMap);
bool evaluateModulo(size_t shard_position, size_t mix_or_dump_base, size_t actual_batch_size);
void printFeatureValues(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues);
void ignoreCoreFeatures(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, StrFloatMap &coreWeightMap);
void ignoreWPFeature(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, const Moses::ScoreProducer* sp);
void takeLogs(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, size_t base);
void deleteTranslations(std::vector<std::vector<const Moses::Word*> > &translations);