mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
refactor handling of accumulated constraints, constraint checking, introduce burn-in for bleu history, sentence-level bleu, bleu score weight
git-svn-id: http://svn.statmt.org/repository/mira@3882 cc96ff50-19ce-11e0-b349-13d7f0bd23df
This commit is contained in:
parent
eaada140a4
commit
1964eaf98a
@ -68,7 +68,7 @@ namespace Mira {
|
||||
delete[] mosesargv;
|
||||
}
|
||||
|
||||
MosesDecoder::MosesDecoder(const vector<vector<string> >& refs, bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing)
|
||||
MosesDecoder::MosesDecoder(bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing)
|
||||
: m_manager(NULL) {
|
||||
// force initialisation of the phrase dictionary (TODO: what for?)
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
@ -84,7 +84,6 @@ namespace Mira {
|
||||
// Add the bleu feature
|
||||
m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, BPfactor, historySmoothing);
|
||||
(const_cast<TranslationSystem&>(system)).AddFeatureFunction(m_bleuScoreFeature);
|
||||
m_bleuScoreFeature->LoadReferences(refs);
|
||||
}
|
||||
|
||||
void MosesDecoder::cleanup() {
|
||||
@ -113,7 +112,7 @@ namespace Mira {
|
||||
|
||||
// set the weight for the bleu feature
|
||||
ostringstream bleuWeightStr;
|
||||
bleuWeightStr << bleuObjectiveWeight;
|
||||
bleuWeightStr << (bleuObjectiveWeight * bleuScoreWeight);
|
||||
PARAM_VEC bleuWeight(1,bleuWeightStr.str());
|
||||
|
||||
staticData.GetParameter()->OverwriteParam("weight-bl", bleuWeight);
|
||||
@ -137,7 +136,7 @@ namespace Mira {
|
||||
bleuScores.push_back(bleuScore);
|
||||
|
||||
//std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl;
|
||||
float scoreWithoutBleu = path.GetTotalScore() - bleuObjectiveWeight * bleuScore;
|
||||
float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
|
||||
cerr << "Rank " << rank << ", total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl;
|
||||
|
||||
Phrase bestPhrase = path.GetTargetPhrase();
|
||||
@ -179,6 +178,7 @@ namespace Mira {
|
||||
vector<float> MosesDecoder::getBleuAndScore(const std::string& source,
|
||||
size_t sentenceid,
|
||||
float bleuObjectiveWeight,
|
||||
float bleuScoreWeight,
|
||||
bool distinct)
|
||||
{
|
||||
StaticData &staticData = StaticData::InstanceNonConst();
|
||||
@ -191,7 +191,7 @@ namespace Mira {
|
||||
|
||||
// set the weight for the bleu feature
|
||||
ostringstream bleuWeightStr;
|
||||
bleuWeightStr << bleuObjectiveWeight;
|
||||
bleuWeightStr << (bleuObjectiveWeight * bleuScoreWeight);
|
||||
PARAM_VEC bleuWeight(1,bleuWeightStr.str());
|
||||
|
||||
staticData.GetParameter()->OverwriteParam("weight-bl", bleuWeight);
|
||||
@ -211,7 +211,7 @@ namespace Mira {
|
||||
vector<float> bleuAndScore;
|
||||
const Moses::TrellisPath &path = **iter;
|
||||
float bleuScore = getBleuScore(path.GetScoreBreakdown());
|
||||
float scoreWithoutBleu = path.GetTotalScore() - bleuObjectiveWeight * bleuScore;
|
||||
float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
|
||||
bleuAndScore.push_back(bleuScore);
|
||||
bleuAndScore.push_back(scoreWithoutBleu);
|
||||
return bleuAndScore;
|
||||
@ -246,6 +246,14 @@ namespace Mira {
|
||||
m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch);
|
||||
}
|
||||
|
||||
void MosesDecoder::loadReferenceSentences(const vector<vector<string> >& refs) {
|
||||
m_bleuScoreFeature->LoadReferences(refs);
|
||||
}
|
||||
|
||||
void MosesDecoder::printBleuFeatureHistory(std::ostream& out) {
|
||||
m_bleuScoreFeature->PrintHistory(out);
|
||||
}
|
||||
|
||||
vector<float> MosesDecoder::calculateBleuOfCorpus(const vector< vector< const Word*> >& words, vector<size_t>& ref_ids, size_t epoch, size_t rank) {
|
||||
vector<float> bleu = m_bleuScoreFeature->CalculateBleuOfCorpus(words, ref_ids);
|
||||
if (bleu.size() > 0) {
|
||||
|
@ -50,7 +50,7 @@ void initMoses(const std::string& inifile, int debuglevel, int argc, std::vecto
|
||||
**/
|
||||
class MosesDecoder {
|
||||
public:
|
||||
MosesDecoder(const std::vector<std::vector<std::string> >& refs, bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing);
|
||||
MosesDecoder(bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing);
|
||||
|
||||
//returns the best sentence
|
||||
std::vector<const Moses::Word*> getNBest(const std::string& source,
|
||||
@ -66,10 +66,13 @@ class MosesDecoder {
|
||||
std::vector<float> getBleuAndScore(const std::string& source,
|
||||
size_t sentenceid,
|
||||
float bleuObjectiveWeight,
|
||||
float bleuScoreWeight,
|
||||
bool distinct);
|
||||
size_t getCurrentInputLength();
|
||||
void updateHistory(const std::vector<const Moses::Word*>& words);
|
||||
void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
|
||||
void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
|
||||
void printBleuFeatureHistory(std::ostream& out);
|
||||
std::vector<float> calculateBleuOfCorpus(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& ref_ids, size_t epoch, size_t rank);
|
||||
void setBPfactor(float factor);
|
||||
Moses::ScoreComponentCollection getWeights();
|
||||
|
214
mira/Main.cpp
214
mira/Main.cpp
@ -142,7 +142,7 @@ int main(int argc, char** argv) {
|
||||
float marginScaleFactorMin;
|
||||
float min_learning_rate;
|
||||
float min_sentence_update;
|
||||
bool weightedLossFunction;
|
||||
size_t weightedLossFunction;
|
||||
size_t n;
|
||||
size_t batchSize;
|
||||
bool distinctNbest;
|
||||
@ -172,8 +172,6 @@ int main(int argc, char** argv) {
|
||||
float decrease_sentence_update;
|
||||
bool devBleu;
|
||||
bool normaliseWeights;
|
||||
bool one_constraint;
|
||||
bool one_per_batch;
|
||||
bool print_feature_values;
|
||||
bool stop_dev_bleu;
|
||||
bool stop_approx_dev_bleu;
|
||||
@ -181,14 +179,25 @@ int main(int argc, char** argv) {
|
||||
bool train_linear_classifier;
|
||||
int updates_per_epoch;
|
||||
bool multiplyA;
|
||||
bool historyOf1best;
|
||||
bool burnIn;
|
||||
string burnInInputFile;
|
||||
vector<string> burnInReferenceFiles;
|
||||
bool sentenceLevelBleu;
|
||||
float bleuScoreWeight;
|
||||
po::options_description desc("Allowed options");
|
||||
desc.add_options()("accumulate-most-violated-constraints", po::value<bool>(&accumulateMostViolatedConstraints)->default_value(false),"Accumulate most violated constraint per example")
|
||||
desc.add_options()
|
||||
("accumulate-most-violated-constraints", po::value<bool>(&accumulateMostViolatedConstraints)->default_value(false),"Accumulate most violated constraint per example")
|
||||
("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
|
||||
("adapt-BP-factor", po::value<bool>(&adapt_BPfactor)->default_value(0), "Set factor to 1 when optimal translation length in reached")
|
||||
("average-weights", po::value<bool>(&averageWeights)->default_value(false), "Set decoder weights to average weights after each update")
|
||||
("base-of-log", po::value<size_t>(&baseOfLog)->default_value(10), "Base for log-ing feature values")
|
||||
("batch-size,b", po::value<size_t>(&batchSize)->default_value(1), "Size of batch that is send to optimiser for weight adjustments")
|
||||
("bleu-score-weight", po::value<float>(&bleuScoreWeight)->default_value(1.0), "Bleu score weight used in the decoder objective function (on top of the bleu objective weight)")
|
||||
("BP-factor", po::value<float>(&BPfactor)->default_value(1.0), "Increase penalty for short translations")
|
||||
("burn-in", po::value<bool>(&burnIn)->default_value(false), "Do a burn-in of the BLEU history before training")
|
||||
("burn-in-input-file", po::value<string>(&burnInInputFile), "Input file for burn-in phase of BLEU history")
|
||||
("burn-in-reference-files", po::value<vector<string> >(&burnInReferenceFiles), "Reference file for burn-in phase of BLEU history")
|
||||
("config,f", po::value<string>(&mosesConfigFile), "Moses ini file")
|
||||
("control-updates", po::value<bool>(&controlUpdates)->default_value(true), "Ignore updates that increase number of violated constraints AND increase the error")
|
||||
("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
|
||||
@ -200,6 +209,7 @@ int main(int argc, char** argv) {
|
||||
("epochs,e", po::value<size_t>(&epochs)->default_value(5), "Number of epochs")
|
||||
("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
|
||||
("hildreth", po::value<bool>(&hildreth)->default_value(true), "Use Hildreth's optimisation algorithm")
|
||||
("history-of-1best", po::value<bool>(&historyOf1best)->default_value(0), "Use the 1best translation to update the history")
|
||||
("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
|
||||
("input-file,i", po::value<string>(&inputFile), "Input file containing tokenised source")
|
||||
("learner,l", po::value<string>(&learner)->default_value("mira"), "Learning algorithm")
|
||||
@ -214,16 +224,15 @@ int main(int argc, char** argv) {
|
||||
("msf", po::value<float>(&marginScaleFactor)->default_value(1.0), "Margin scale factor, regularises the update by scaling the enforced margin")
|
||||
("msf-min", po::value<float>(&marginScaleFactorMin)->default_value(1.0), "Minimum value that margin is scaled by")
|
||||
("msf-step", po::value<float>(&marginScaleFactorStep)->default_value(0), "Decrease margin scale factor iteratively by the value provided")
|
||||
("multiplyA", po::value<bool>(&multiplyA)->default_value(true), "Multiply A with outcome before passing to Hildreth")
|
||||
("nbest,n", po::value<size_t>(&n)->default_value(10), "Number of translations in nbest list")
|
||||
("multiplyA", po::value<bool>(&multiplyA)->default_value(true), "Multiply A with outcome before passing to Hildreth")
|
||||
("nbest,n", po::value<size_t>(&n)->default_value(10), "Number of translations in nbest list")
|
||||
("normalise", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
|
||||
("one-constraint", po::value<bool>(&one_constraint)->default_value(false), "Forget about hope and fear and consider only the 1best model translation to formulate a constraint")
|
||||
("one-per-batch", po::value<bool>(&one_per_batch)->default_value(false), "Only 1 constraint per batch for params --accumulate-most-violated.. and --past-and-current..")
|
||||
("only-violated-constraints", po::value<bool>(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem")
|
||||
("past-and-current-constraints", po::value<bool>(&pastAndCurrentConstraints)->default_value(false), "Accumulate most violated constraint per example and use them along all current constraints")
|
||||
("print-feature-values", po::value<bool>(&print_feature_values)->default_value(false), "Print out feature values")
|
||||
("reference-files,r", po::value<vector<string> >(&referenceFiles), "Reference translation files for training")
|
||||
("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths")
|
||||
("sentence-level-bleu", po::value<bool>(&sentenceLevelBleu)->default_value(false), "Use a sentences level bleu scoring function")
|
||||
("shuffle", po::value<bool>(&shuffle)->default_value(false), "Shuffle input sentences before processing")
|
||||
("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimizer")
|
||||
("slack-max", po::value<float>(&slack_max)->default_value(0), "Maximum slack used")
|
||||
@ -236,7 +245,7 @@ int main(int argc, char** argv) {
|
||||
("updates-per-epoch", po::value<int>(&updates_per_epoch)->default_value(-1), "Accumulate updates and apply them to the weight vector the specified number of times per epoch")
|
||||
("use-scaled-reference", po::value<bool>(&useScaledReference)->default_value(true), "Use scaled reference length for comparing target and reference length of phrases")
|
||||
("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
|
||||
("weighted-loss-function", po::value<bool>(&weightedLossFunction)->default_value(false), "Weight the loss of a hypothesis by its Bleu score")
|
||||
("weighted-loss-function", po::value<size_t>(&weightedLossFunction)->default_value(0), "Weight the loss of a hypothesis by its Bleu score")
|
||||
("weight-dump-stem", po::value<string>(&weightDumpStem)->default_value("weights"), "Stem of filename to use for dumping weights");
|
||||
|
||||
po::options_description cmdline_options;
|
||||
@ -605,14 +614,118 @@ int main(int argc, char** argv) {
|
||||
vector<string> decoder_params;
|
||||
boost::split(decoder_params, decoder_settings, boost::is_any_of("\t "));
|
||||
initMoses(mosesConfigFile, verbosity, decoder_params.size(), decoder_params);
|
||||
MosesDecoder* decoder = new MosesDecoder(referenceSentences,
|
||||
useScaledReference, scaleByInputLength, BPfactor, historySmoothing);
|
||||
MosesDecoder* decoder = new MosesDecoder(useScaledReference, scaleByInputLength, BPfactor, historySmoothing);
|
||||
if (normaliseWeights) {
|
||||
ScoreComponentCollection startWeights = decoder->getWeights();
|
||||
startWeights.L1Normalise();
|
||||
decoder->setWeights(startWeights);
|
||||
}
|
||||
|
||||
if (sentenceLevelBleu) {
|
||||
burnIn = false;
|
||||
}
|
||||
|
||||
if (burnIn) {
|
||||
// load burn-in input and references
|
||||
vector<string> burnInInputSentences;
|
||||
if (!loadSentences(burnInInputFile, burnInInputSentences)) {
|
||||
cerr << "Error: Failed to load burn-in input sentences from " << burnInInputFile << endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
vector<vector<string> > burnInReferenceSentences(burnInReferenceFiles.size());
|
||||
for (size_t i = 0; i < burnInReferenceFiles.size(); ++i) {
|
||||
if (!loadSentences(burnInReferenceFiles[i], burnInReferenceSentences[i])) {
|
||||
cerr << "Error: Failed to load burn-in reference sentences from "
|
||||
<< burnInReferenceFiles[i] << endl;
|
||||
return 1;
|
||||
}
|
||||
if (burnInReferenceSentences[i].size() != burnInInputSentences.size()) {
|
||||
cerr << "Error: Burn-in input file length (" << burnInInputSentences.size() << ") != ("
|
||||
<< burnInReferenceSentences[i].size() << ") length of burn-in reference file " << i
|
||||
<< endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
decoder->loadReferenceSentences(burnInReferenceSentences);
|
||||
|
||||
vector<size_t> inputLengths;
|
||||
vector<size_t> ref_ids;
|
||||
vector<vector<const Word*> > oracles;
|
||||
vector<vector<const Word*> > oneBests;
|
||||
|
||||
vector<vector<ScoreComponentCollection> > featureValues;
|
||||
vector<vector<float> > bleuScores;
|
||||
vector<ScoreComponentCollection> newFeatureValues;
|
||||
vector<float> newBleuScores;
|
||||
featureValues.push_back(newFeatureValues);
|
||||
bleuScores.push_back(newBleuScores);
|
||||
|
||||
vector<size_t> order;
|
||||
for (size_t i = 0; i < burnInInputSentences.size(); ++i) {
|
||||
order.push_back(i);
|
||||
}
|
||||
|
||||
cerr << "Start burn-in phase for approx. BLEU history.." << endl;
|
||||
if (historyOf1best) {
|
||||
// get 1best translations for the burn-in sentences
|
||||
vector<size_t>::const_iterator sid = order.begin();
|
||||
while (sid != order.end()) {
|
||||
string& input = burnInInputSentences[*sid];
|
||||
vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
|
||||
featureValues[0], bleuScores[0], true,
|
||||
distinctNbest, rank);
|
||||
inputLengths.push_back(decoder->getCurrentInputLength());
|
||||
ref_ids.push_back(*sid);
|
||||
decoder->cleanup();
|
||||
oneBests.push_back(bestModel);
|
||||
++sid;
|
||||
}
|
||||
|
||||
// update history
|
||||
decoder->updateHistory(oneBests, inputLengths, ref_ids, rank, 0);
|
||||
|
||||
// clean up 1best translations after updating history
|
||||
for (size_t i = 0; i < oracles.size(); ++i) {
|
||||
for (size_t j = 0; j < oracles[i].size(); ++j) {
|
||||
delete oracles[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// get oracle translations for the burn-in sentences
|
||||
vector<size_t>::const_iterator sid = order.begin();
|
||||
while (sid != order.end()) {
|
||||
string& input = burnInInputSentences[*sid];
|
||||
vector<const Word*> oracle = decoder->getNBest(input, *sid, 1, 1.0, bleuScoreWeight,
|
||||
featureValues[0], bleuScores[0], true,
|
||||
distinctNbest, rank);
|
||||
inputLengths.push_back(decoder->getCurrentInputLength());
|
||||
ref_ids.push_back(*sid);
|
||||
decoder->cleanup();
|
||||
oracles.push_back(oracle);
|
||||
++sid;
|
||||
}
|
||||
|
||||
// update history
|
||||
decoder->updateHistory(oracles, inputLengths, ref_ids, rank, 0);
|
||||
|
||||
// clean up oracle translations after updating history
|
||||
for (size_t i = 0; i < oracles.size(); ++i) {
|
||||
for (size_t j = 0; j < oracles[i].size(); ++j) {
|
||||
delete oracles[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cerr << "Bleu feature history after burn-in: " << endl;
|
||||
decoder->printBleuFeatureHistory(cerr);
|
||||
decoder->loadReferenceSentences(referenceSentences);
|
||||
}
|
||||
else {
|
||||
decoder->loadReferenceSentences(referenceSentences);
|
||||
}
|
||||
|
||||
// Optionally shuffle the sentences
|
||||
vector<size_t> order;
|
||||
if (rank == 0) {
|
||||
@ -691,8 +804,7 @@ int main(int argc, char** argv) {
|
||||
cerr << "Optimising using Mira" << endl;
|
||||
optimiser = new MiraOptimiser(n, hildreth, marginScaleFactor,
|
||||
onlyViolatedConstraints, slack, weightedLossFunction, maxNumberOracles,
|
||||
accumulateMostViolatedConstraints, pastAndCurrentConstraints, one_per_batch,
|
||||
order.size());
|
||||
accumulateMostViolatedConstraints, pastAndCurrentConstraints, order.size());
|
||||
if (hildreth) {
|
||||
cerr << "Using Hildreth's optimisation algorithm.." << endl;
|
||||
}
|
||||
@ -777,6 +889,7 @@ int main(int argc, char** argv) {
|
||||
vector<size_t> oraclePositions;
|
||||
vector<float> oracleBleuScores;
|
||||
vector<vector<const Word*> > oracles;
|
||||
vector<vector<const Word*> > oneBests;
|
||||
vector<ScoreComponentCollection> oracleFeatureValues;
|
||||
vector<size_t> inputLengths;
|
||||
vector<size_t> ref_ids;
|
||||
@ -795,52 +908,25 @@ int main(int argc, char** argv) {
|
||||
featureValues.push_back(newFeatureValues);
|
||||
bleuScores.push_back(newBleuScores);
|
||||
|
||||
if (one_constraint) {
|
||||
cerr << "Rank " << rank << ", run decoder to get 1best wrt model score" << endl;
|
||||
vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0,
|
||||
1.0, featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
inputLengths.push_back(decoder->getCurrentInputLength());
|
||||
ref_ids.push_back(*sid);
|
||||
all_ref_ids.push_back(*sid);
|
||||
allBestModelScore.push_back(bestModel);
|
||||
decoder->cleanup();
|
||||
cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl;
|
||||
|
||||
// HOPE
|
||||
cerr << "Rank " << rank << ", run decoder to get nbest hope translations" << endl;
|
||||
size_t oraclePos = featureValues[batchPosition].size();
|
||||
oraclePositions.push_back(oraclePos);
|
||||
vector<const Word*> oracle = decoder->getNBest(input, *sid, 1, 1.0,
|
||||
1.0, featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
decoder->cleanup();
|
||||
oracles.push_back(oracle);
|
||||
cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScores[batchPosition][oraclePos] << endl;
|
||||
|
||||
oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]);
|
||||
float oracleBleuScore = bleuScores[batchPosition][oraclePos];
|
||||
oracleBleuScores.push_back(oracleBleuScore);
|
||||
}
|
||||
else {
|
||||
// MODEL
|
||||
cerr << "Rank " << rank << ", run decoder to get nbest wrt model score" << endl;
|
||||
vector<const Word*> bestModel = decoder->getNBest(input, *sid, n, 0.0,
|
||||
1.0, featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
vector<const Word*> bestModel = decoder->getNBest(input, *sid, n, 0.0, bleuScoreWeight,
|
||||
featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
inputLengths.push_back(decoder->getCurrentInputLength());
|
||||
ref_ids.push_back(*sid);
|
||||
all_ref_ids.push_back(*sid);
|
||||
allBestModelScore.push_back(bestModel);
|
||||
decoder->cleanup();
|
||||
oneBests.push_back(bestModel);
|
||||
cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl;
|
||||
|
||||
// HOPE
|
||||
cerr << "Rank " << rank << ", run decoder to get nbest hope translations" << endl;
|
||||
size_t oraclePos = featureValues[batchPosition].size();
|
||||
oraclePositions.push_back(oraclePos);
|
||||
vector<const Word*> oracle = decoder->getNBest(input, *sid, n, 1.0,
|
||||
1.0, featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
vector<const Word*> oracle = decoder->getNBest(input, *sid, n, 1.0, bleuScoreWeight,
|
||||
featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
decoder->cleanup();
|
||||
oracles.push_back(oracle);
|
||||
@ -853,7 +939,7 @@ int main(int argc, char** argv) {
|
||||
// FEAR
|
||||
cerr << "Rank " << rank << ", run decoder to get nbest fear translations" << endl;
|
||||
size_t fearPos = featureValues[batchPosition].size();
|
||||
vector<const Word*> fear = decoder->getNBest(input, *sid, n, -1.0, 1.0,
|
||||
vector<const Word*> fear = decoder->getNBest(input, *sid, n, -1.0, bleuScoreWeight,
|
||||
featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
decoder->cleanup();
|
||||
@ -865,7 +951,6 @@ int main(int argc, char** argv) {
|
||||
for (size_t i = 0; i < fear.size(); ++i) {
|
||||
delete fear[i];
|
||||
}
|
||||
}
|
||||
|
||||
cerr << "Rank " << rank << ", sentence " << *sid << ", best model Bleu (approximate sentence bleu): " << bleuScores[batchPosition][0] << endl;
|
||||
summedApproxBleu += bleuScores[batchPosition][0];
|
||||
@ -908,7 +993,7 @@ int main(int argc, char** argv) {
|
||||
vector< vector <float > > bestModelOld_batch;
|
||||
for (size_t i = 0; i < actualBatchSize; ++i) {
|
||||
string& input = inputSentences[*current_sid_start + i];
|
||||
vector <float> bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, distinctNbest);
|
||||
vector <float> bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest);
|
||||
bestModelOld_batch.push_back(bestModelOld);
|
||||
decoder->cleanup();
|
||||
}
|
||||
@ -928,16 +1013,9 @@ int main(int argc, char** argv) {
|
||||
cerr << "\nRank " << rank << ", run optimiser:" << endl;
|
||||
ScoreComponentCollection oldWeights(mosesWeights);
|
||||
vector<int> update_status;
|
||||
if (one_constraint) {
|
||||
update_status = optimiser->updateWeightsAnalytically(mosesWeights, featureValues[0][0],
|
||||
losses[0][0], oracleFeatureValues[0], oracleBleuScores[0], ref_ids[0],
|
||||
learning_rate, max_sentence_update, rank, epoch, controlUpdates);
|
||||
}
|
||||
else {
|
||||
update_status = optimiser->updateWeights(mosesWeights, featureValues,
|
||||
update_status = optimiser->updateWeights(mosesWeights, featureValues,
|
||||
losses, bleuScores, oracleFeatureValues, oracleBleuScores, ref_ids,
|
||||
learning_rate, max_sentence_update, rank, epoch, updates_per_epoch, controlUpdates);
|
||||
}
|
||||
|
||||
if (update_status[0] == 1) {
|
||||
cerr << "Rank " << rank << ", no update for batch" << endl;
|
||||
@ -985,7 +1063,7 @@ int main(int argc, char** argv) {
|
||||
vector<float> bestModelNew;
|
||||
for (size_t i = 0; i < actualBatchSize; ++i) {
|
||||
string& input = inputSentences[*current_sid_start + i];
|
||||
bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, distinctNbest);
|
||||
bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest);
|
||||
decoder->cleanup();
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", 1best model bleu, old: " << bestModelOld_batch[i][0] << ", new: " << bestModelNew[0] << endl;
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", 1best model score, old: " << bestModelOld_batch[i][1] << ", new: " << bestModelNew[1] << endl;
|
||||
@ -993,11 +1071,21 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
}
|
||||
|
||||
// update history (for approximate document Bleu)
|
||||
for (size_t i = 0; i < oracles.size(); ++i) {
|
||||
cerr << "Rank " << rank << ", oracle length: " << oracles[i].size() << " ";
|
||||
if (!sentenceLevelBleu) {
|
||||
// update history (for approximate document Bleu)
|
||||
if (historyOf1best) {
|
||||
for (size_t i = 0; i < oneBests.size(); ++i) {
|
||||
cerr << "Rank " << rank << ", 1best length: " << oneBests[i].size() << " ";
|
||||
}
|
||||
decoder->updateHistory(oneBests, inputLengths, ref_ids, rank, epoch);
|
||||
}
|
||||
else {
|
||||
for (size_t i = 0; i < oracles.size(); ++i) {
|
||||
cerr << "Rank " << rank << ", oracle length: " << oracles[i].size() << " ";
|
||||
}
|
||||
decoder->updateHistory(oracles, inputLengths, ref_ids, rank, epoch);
|
||||
}
|
||||
}
|
||||
decoder->updateHistory(oracles, inputLengths, ref_ids, rank, epoch);
|
||||
|
||||
// clean up oracle translations after updating history
|
||||
for (size_t i = 0; i < oracles.size(); ++i) {
|
||||
@ -1136,6 +1224,10 @@ int main(int argc, char** argv) {
|
||||
}// end dumping
|
||||
} // end of shard loop, end of this epoch
|
||||
|
||||
|
||||
cerr << "Bleu feature history after epoch " << epoch << endl;
|
||||
decoder->printBleuFeatureHistory(cerr);
|
||||
|
||||
size_t sumUpdates;
|
||||
size_t *sendbuf_uint, *recvbuf_uint;
|
||||
sendbuf_uint = (size_t *) malloc(sizeof(size_t));
|
||||
|
@ -88,7 +88,7 @@ namespace Mira {
|
||||
MiraOptimiser() :
|
||||
Optimiser() { }
|
||||
|
||||
MiraOptimiser(size_t n, bool hildreth, float marginScaleFactor, bool onlyViolatedConstraints, float slack, bool weightedLossFunction, size_t maxNumberOracles, bool accumulateMostViolatedConstraints, bool pastAndCurrentConstraints, bool one_per_batch, size_t exampleSize) :
|
||||
MiraOptimiser(size_t n, bool hildreth, float marginScaleFactor, bool onlyViolatedConstraints, float slack, size_t weightedLossFunction, size_t maxNumberOracles, bool accumulateMostViolatedConstraints, bool pastAndCurrentConstraints, size_t exampleSize) :
|
||||
Optimiser(),
|
||||
m_n(n),
|
||||
m_hildreth(hildreth),
|
||||
@ -99,7 +99,6 @@ namespace Mira {
|
||||
m_max_number_oracles(maxNumberOracles),
|
||||
m_accumulateMostViolatedConstraints(accumulateMostViolatedConstraints),
|
||||
m_pastAndCurrentConstraints(pastAndCurrentConstraints),
|
||||
m_one_per_batch(one_per_batch),
|
||||
m_oracles(exampleSize),
|
||||
m_bleu_of_oracles(exampleSize) { }
|
||||
|
||||
@ -166,7 +165,7 @@ namespace Mira {
|
||||
// regularise Hildreth updates
|
||||
float m_slack;
|
||||
|
||||
bool m_weightedLossFunction;
|
||||
size_t m_weightedLossFunction;
|
||||
|
||||
// index of oracle translation in hypothesis matrix
|
||||
std::vector<size_t> m_oracleIndices;
|
||||
@ -180,14 +179,13 @@ namespace Mira {
|
||||
|
||||
// accumulate most violated constraints for every example
|
||||
std::vector< Moses::ScoreComponentCollection> m_featureValueDiffs;
|
||||
std::vector< float> m_lossMarginDistances;
|
||||
std::vector< float> m_losses;
|
||||
|
||||
|
||||
bool m_accumulateMostViolatedConstraints;
|
||||
|
||||
bool m_pastAndCurrentConstraints;
|
||||
|
||||
bool m_one_per_batch;
|
||||
|
||||
Moses::ScoreComponentCollection m_accumulatedUpdates;
|
||||
};
|
||||
}
|
||||
|
@ -95,8 +95,20 @@ BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLen
|
||||
m_BP_factor(BPfactor),
|
||||
m_historySmoothing(historySmoothing) {}
|
||||
|
||||
|
||||
void BleuScoreFeature::PrintHistory(std::ostream& out) const {
|
||||
out << "source length history=" << m_source_length_history << endl;
|
||||
out << "target length history=" << m_target_length_history << endl;
|
||||
out << "ref length history=" << m_ref_length_history << endl;
|
||||
|
||||
for (size_t i = 0; i < BleuScoreState::bleu_order; ++i) {
|
||||
out << "match history/count history (" << i << "):" << m_match_history[i] << "/" << m_count_history[i] << endl;
|
||||
}
|
||||
}
|
||||
|
||||
void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
|
||||
{
|
||||
m_refs.clear();
|
||||
FactorCollection& fc = FactorCollection::Instance();
|
||||
for (size_t file_id = 0; file_id < refs.size(); file_id++) {
|
||||
for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) {
|
||||
|
@ -62,6 +62,7 @@ public:
|
||||
return 1;
|
||||
}
|
||||
|
||||
void PrintHistory(std::ostream& out) const;
|
||||
void LoadReferences(const std::vector< std::vector< std::string > > &);
|
||||
void SetCurrentSourceLength(size_t);
|
||||
void SetCurrentReference(size_t);
|
||||
|
@ -178,8 +178,11 @@ namespace Moses {
|
||||
/* if (i->first != DEFAULT_NAME && i->second != 0.0) {
|
||||
out << i->first << "=" << value << ", ";
|
||||
}*/
|
||||
if (i->first != DEFAULT_NAME) {
|
||||
/* if (i->first != DEFAULT_NAME) {
|
||||
out << i->first << "=" << value << ", ";
|
||||
}*/
|
||||
if (i->first != DEFAULT_NAME) {
|
||||
out << value << ", ";
|
||||
}
|
||||
}
|
||||
out << "}";
|
||||
|
Loading…
Reference in New Issue
Block a user