refactor handling of accumulated constraints, constraint checking, introduce burn-in for bleu history, sentence-level bleu, bleu score weight

git-svn-id: http://svn.statmt.org/repository/mira@3882 cc96ff50-19ce-11e0-b349-13d7f0bd23df
This commit is contained in:
ehasler 2011-04-26 19:35:06 +00:00 committed by Ondrej Bojar
parent eaada140a4
commit 1964eaf98a
7 changed files with 192 additions and 75 deletions

View File

@ -68,7 +68,7 @@ namespace Mira {
delete[] mosesargv;
}
MosesDecoder::MosesDecoder(const vector<vector<string> >& refs, bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing)
MosesDecoder::MosesDecoder(bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing)
: m_manager(NULL) {
// force initialisation of the phrase dictionary (TODO: what for?)
const StaticData &staticData = StaticData::Instance();
@ -84,7 +84,6 @@ namespace Mira {
// Add the bleu feature
m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, BPfactor, historySmoothing);
(const_cast<TranslationSystem&>(system)).AddFeatureFunction(m_bleuScoreFeature);
m_bleuScoreFeature->LoadReferences(refs);
}
void MosesDecoder::cleanup() {
@ -113,7 +112,7 @@ namespace Mira {
// set the weight for the bleu feature
ostringstream bleuWeightStr;
bleuWeightStr << bleuObjectiveWeight;
bleuWeightStr << (bleuObjectiveWeight * bleuScoreWeight);
PARAM_VEC bleuWeight(1,bleuWeightStr.str());
staticData.GetParameter()->OverwriteParam("weight-bl", bleuWeight);
@ -137,7 +136,7 @@ namespace Mira {
bleuScores.push_back(bleuScore);
//std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl;
float scoreWithoutBleu = path.GetTotalScore() - bleuObjectiveWeight * bleuScore;
float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
cerr << "Rank " << rank << ", total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl;
Phrase bestPhrase = path.GetTargetPhrase();
@ -179,6 +178,7 @@ namespace Mira {
vector<float> MosesDecoder::getBleuAndScore(const std::string& source,
size_t sentenceid,
float bleuObjectiveWeight,
float bleuScoreWeight,
bool distinct)
{
StaticData &staticData = StaticData::InstanceNonConst();
@ -191,7 +191,7 @@ namespace Mira {
// set the weight for the bleu feature
ostringstream bleuWeightStr;
bleuWeightStr << bleuObjectiveWeight;
bleuWeightStr << (bleuObjectiveWeight * bleuScoreWeight);
PARAM_VEC bleuWeight(1,bleuWeightStr.str());
staticData.GetParameter()->OverwriteParam("weight-bl", bleuWeight);
@ -211,7 +211,7 @@ namespace Mira {
vector<float> bleuAndScore;
const Moses::TrellisPath &path = **iter;
float bleuScore = getBleuScore(path.GetScoreBreakdown());
float scoreWithoutBleu = path.GetTotalScore() - bleuObjectiveWeight * bleuScore;
float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
bleuAndScore.push_back(bleuScore);
bleuAndScore.push_back(scoreWithoutBleu);
return bleuAndScore;
@ -246,6 +246,14 @@ namespace Mira {
m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch);
}
void MosesDecoder::loadReferenceSentences(const vector<vector<string> >& refs) {
m_bleuScoreFeature->LoadReferences(refs);
}
void MosesDecoder::printBleuFeatureHistory(std::ostream& out) {
m_bleuScoreFeature->PrintHistory(out);
}
vector<float> MosesDecoder::calculateBleuOfCorpus(const vector< vector< const Word*> >& words, vector<size_t>& ref_ids, size_t epoch, size_t rank) {
vector<float> bleu = m_bleuScoreFeature->CalculateBleuOfCorpus(words, ref_ids);
if (bleu.size() > 0) {

View File

@ -50,7 +50,7 @@ void initMoses(const std::string& inifile, int debuglevel, int argc, std::vecto
**/
class MosesDecoder {
public:
MosesDecoder(const std::vector<std::vector<std::string> >& refs, bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing);
MosesDecoder(bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing);
//returns the best sentence
std::vector<const Moses::Word*> getNBest(const std::string& source,
@ -66,10 +66,13 @@ class MosesDecoder {
std::vector<float> getBleuAndScore(const std::string& source,
size_t sentenceid,
float bleuObjectiveWeight,
float bleuScoreWeight,
bool distinct);
size_t getCurrentInputLength();
void updateHistory(const std::vector<const Moses::Word*>& words);
void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
void printBleuFeatureHistory(std::ostream& out);
std::vector<float> calculateBleuOfCorpus(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& ref_ids, size_t epoch, size_t rank);
void setBPfactor(float factor);
Moses::ScoreComponentCollection getWeights();

View File

@ -142,7 +142,7 @@ int main(int argc, char** argv) {
float marginScaleFactorMin;
float min_learning_rate;
float min_sentence_update;
bool weightedLossFunction;
size_t weightedLossFunction;
size_t n;
size_t batchSize;
bool distinctNbest;
@ -172,8 +172,6 @@ int main(int argc, char** argv) {
float decrease_sentence_update;
bool devBleu;
bool normaliseWeights;
bool one_constraint;
bool one_per_batch;
bool print_feature_values;
bool stop_dev_bleu;
bool stop_approx_dev_bleu;
@ -181,14 +179,25 @@ int main(int argc, char** argv) {
bool train_linear_classifier;
int updates_per_epoch;
bool multiplyA;
bool historyOf1best;
bool burnIn;
string burnInInputFile;
vector<string> burnInReferenceFiles;
bool sentenceLevelBleu;
float bleuScoreWeight;
po::options_description desc("Allowed options");
desc.add_options()("accumulate-most-violated-constraints", po::value<bool>(&accumulateMostViolatedConstraints)->default_value(false),"Accumulate most violated constraint per example")
desc.add_options()
("accumulate-most-violated-constraints", po::value<bool>(&accumulateMostViolatedConstraints)->default_value(false),"Accumulate most violated constraint per example")
("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
("adapt-BP-factor", po::value<bool>(&adapt_BPfactor)->default_value(0), "Set factor to 1 when optimal translation length in reached")
("average-weights", po::value<bool>(&averageWeights)->default_value(false), "Set decoder weights to average weights after each update")
("base-of-log", po::value<size_t>(&baseOfLog)->default_value(10), "Base for log-ing feature values")
("batch-size,b", po::value<size_t>(&batchSize)->default_value(1), "Size of batch that is send to optimiser for weight adjustments")
("bleu-score-weight", po::value<float>(&bleuScoreWeight)->default_value(1.0), "Bleu score weight used in the decoder objective function (on top of the bleu objective weight)")
("BP-factor", po::value<float>(&BPfactor)->default_value(1.0), "Increase penalty for short translations")
("burn-in", po::value<bool>(&burnIn)->default_value(false), "Do a burn-in of the BLEU history before training")
("burn-in-input-file", po::value<string>(&burnInInputFile), "Input file for burn-in phase of BLEU history")
("burn-in-reference-files", po::value<vector<string> >(&burnInReferenceFiles), "Reference file for burn-in phase of BLEU history")
("config,f", po::value<string>(&mosesConfigFile), "Moses ini file")
("control-updates", po::value<bool>(&controlUpdates)->default_value(true), "Ignore updates that increase number of violated constraints AND increase the error")
("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
@ -200,6 +209,7 @@ int main(int argc, char** argv) {
("epochs,e", po::value<size_t>(&epochs)->default_value(5), "Number of epochs")
("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
("hildreth", po::value<bool>(&hildreth)->default_value(true), "Use Hildreth's optimisation algorithm")
("history-of-1best", po::value<bool>(&historyOf1best)->default_value(0), "Use the 1best translation to update the history")
("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
("input-file,i", po::value<string>(&inputFile), "Input file containing tokenised source")
("learner,l", po::value<string>(&learner)->default_value("mira"), "Learning algorithm")
@ -214,16 +224,15 @@ int main(int argc, char** argv) {
("msf", po::value<float>(&marginScaleFactor)->default_value(1.0), "Margin scale factor, regularises the update by scaling the enforced margin")
("msf-min", po::value<float>(&marginScaleFactorMin)->default_value(1.0), "Minimum value that margin is scaled by")
("msf-step", po::value<float>(&marginScaleFactorStep)->default_value(0), "Decrease margin scale factor iteratively by the value provided")
("multiplyA", po::value<bool>(&multiplyA)->default_value(true), "Multiply A with outcome before passing to Hildreth")
("nbest,n", po::value<size_t>(&n)->default_value(10), "Number of translations in nbest list")
("multiplyA", po::value<bool>(&multiplyA)->default_value(true), "Multiply A with outcome before passing to Hildreth")
("nbest,n", po::value<size_t>(&n)->default_value(10), "Number of translations in nbest list")
("normalise", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
("one-constraint", po::value<bool>(&one_constraint)->default_value(false), "Forget about hope and fear and consider only the 1best model translation to formulate a constraint")
("one-per-batch", po::value<bool>(&one_per_batch)->default_value(false), "Only 1 constraint per batch for params --accumulate-most-violated.. and --past-and-current..")
("only-violated-constraints", po::value<bool>(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem")
("past-and-current-constraints", po::value<bool>(&pastAndCurrentConstraints)->default_value(false), "Accumulate most violated constraint per example and use them along all current constraints")
("print-feature-values", po::value<bool>(&print_feature_values)->default_value(false), "Print out feature values")
("reference-files,r", po::value<vector<string> >(&referenceFiles), "Reference translation files for training")
("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths")
("sentence-level-bleu", po::value<bool>(&sentenceLevelBleu)->default_value(false), "Use a sentences level bleu scoring function")
("shuffle", po::value<bool>(&shuffle)->default_value(false), "Shuffle input sentences before processing")
("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimizer")
("slack-max", po::value<float>(&slack_max)->default_value(0), "Maximum slack used")
@ -236,7 +245,7 @@ int main(int argc, char** argv) {
("updates-per-epoch", po::value<int>(&updates_per_epoch)->default_value(-1), "Accumulate updates and apply them to the weight vector the specified number of times per epoch")
("use-scaled-reference", po::value<bool>(&useScaledReference)->default_value(true), "Use scaled reference length for comparing target and reference length of phrases")
("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
("weighted-loss-function", po::value<bool>(&weightedLossFunction)->default_value(false), "Weight the loss of a hypothesis by its Bleu score")
("weighted-loss-function", po::value<size_t>(&weightedLossFunction)->default_value(0), "Weight the loss of a hypothesis by its Bleu score")
("weight-dump-stem", po::value<string>(&weightDumpStem)->default_value("weights"), "Stem of filename to use for dumping weights");
po::options_description cmdline_options;
@ -605,14 +614,118 @@ int main(int argc, char** argv) {
vector<string> decoder_params;
boost::split(decoder_params, decoder_settings, boost::is_any_of("\t "));
initMoses(mosesConfigFile, verbosity, decoder_params.size(), decoder_params);
MosesDecoder* decoder = new MosesDecoder(referenceSentences,
useScaledReference, scaleByInputLength, BPfactor, historySmoothing);
MosesDecoder* decoder = new MosesDecoder(useScaledReference, scaleByInputLength, BPfactor, historySmoothing);
if (normaliseWeights) {
ScoreComponentCollection startWeights = decoder->getWeights();
startWeights.L1Normalise();
decoder->setWeights(startWeights);
}
if (sentenceLevelBleu) {
burnIn = false;
}
if (burnIn) {
// load burn-in input and references
vector<string> burnInInputSentences;
if (!loadSentences(burnInInputFile, burnInInputSentences)) {
cerr << "Error: Failed to load burn-in input sentences from " << burnInInputFile << endl;
return 1;
}
vector<vector<string> > burnInReferenceSentences(burnInReferenceFiles.size());
for (size_t i = 0; i < burnInReferenceFiles.size(); ++i) {
if (!loadSentences(burnInReferenceFiles[i], burnInReferenceSentences[i])) {
cerr << "Error: Failed to load burn-in reference sentences from "
<< burnInReferenceFiles[i] << endl;
return 1;
}
if (burnInReferenceSentences[i].size() != burnInInputSentences.size()) {
cerr << "Error: Burn-in input file length (" << burnInInputSentences.size() << ") != ("
<< burnInReferenceSentences[i].size() << ") length of burn-in reference file " << i
<< endl;
return 1;
}
}
decoder->loadReferenceSentences(burnInReferenceSentences);
vector<size_t> inputLengths;
vector<size_t> ref_ids;
vector<vector<const Word*> > oracles;
vector<vector<const Word*> > oneBests;
vector<vector<ScoreComponentCollection> > featureValues;
vector<vector<float> > bleuScores;
vector<ScoreComponentCollection> newFeatureValues;
vector<float> newBleuScores;
featureValues.push_back(newFeatureValues);
bleuScores.push_back(newBleuScores);
vector<size_t> order;
for (size_t i = 0; i < burnInInputSentences.size(); ++i) {
order.push_back(i);
}
cerr << "Start burn-in phase for approx. BLEU history.." << endl;
if (historyOf1best) {
// get 1best translations for the burn-in sentences
vector<size_t>::const_iterator sid = order.begin();
while (sid != order.end()) {
string& input = burnInInputSentences[*sid];
vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
featureValues[0], bleuScores[0], true,
distinctNbest, rank);
inputLengths.push_back(decoder->getCurrentInputLength());
ref_ids.push_back(*sid);
decoder->cleanup();
oneBests.push_back(bestModel);
++sid;
}
// update history
decoder->updateHistory(oneBests, inputLengths, ref_ids, rank, 0);
// clean up 1best translations after updating history
for (size_t i = 0; i < oracles.size(); ++i) {
for (size_t j = 0; j < oracles[i].size(); ++j) {
delete oracles[i][j];
}
}
}
else {
// get oracle translations for the burn-in sentences
vector<size_t>::const_iterator sid = order.begin();
while (sid != order.end()) {
string& input = burnInInputSentences[*sid];
vector<const Word*> oracle = decoder->getNBest(input, *sid, 1, 1.0, bleuScoreWeight,
featureValues[0], bleuScores[0], true,
distinctNbest, rank);
inputLengths.push_back(decoder->getCurrentInputLength());
ref_ids.push_back(*sid);
decoder->cleanup();
oracles.push_back(oracle);
++sid;
}
// update history
decoder->updateHistory(oracles, inputLengths, ref_ids, rank, 0);
// clean up oracle translations after updating history
for (size_t i = 0; i < oracles.size(); ++i) {
for (size_t j = 0; j < oracles[i].size(); ++j) {
delete oracles[i][j];
}
}
}
cerr << "Bleu feature history after burn-in: " << endl;
decoder->printBleuFeatureHistory(cerr);
decoder->loadReferenceSentences(referenceSentences);
}
else {
decoder->loadReferenceSentences(referenceSentences);
}
// Optionally shuffle the sentences
vector<size_t> order;
if (rank == 0) {
@ -691,8 +804,7 @@ int main(int argc, char** argv) {
cerr << "Optimising using Mira" << endl;
optimiser = new MiraOptimiser(n, hildreth, marginScaleFactor,
onlyViolatedConstraints, slack, weightedLossFunction, maxNumberOracles,
accumulateMostViolatedConstraints, pastAndCurrentConstraints, one_per_batch,
order.size());
accumulateMostViolatedConstraints, pastAndCurrentConstraints, order.size());
if (hildreth) {
cerr << "Using Hildreth's optimisation algorithm.." << endl;
}
@ -777,6 +889,7 @@ int main(int argc, char** argv) {
vector<size_t> oraclePositions;
vector<float> oracleBleuScores;
vector<vector<const Word*> > oracles;
vector<vector<const Word*> > oneBests;
vector<ScoreComponentCollection> oracleFeatureValues;
vector<size_t> inputLengths;
vector<size_t> ref_ids;
@ -795,52 +908,25 @@ int main(int argc, char** argv) {
featureValues.push_back(newFeatureValues);
bleuScores.push_back(newBleuScores);
if (one_constraint) {
cerr << "Rank " << rank << ", run decoder to get 1best wrt model score" << endl;
vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0,
1.0, featureValues[batchPosition], bleuScores[batchPosition], true,
distinctNbest, rank);
inputLengths.push_back(decoder->getCurrentInputLength());
ref_ids.push_back(*sid);
all_ref_ids.push_back(*sid);
allBestModelScore.push_back(bestModel);
decoder->cleanup();
cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl;
// HOPE
cerr << "Rank " << rank << ", run decoder to get nbest hope translations" << endl;
size_t oraclePos = featureValues[batchPosition].size();
oraclePositions.push_back(oraclePos);
vector<const Word*> oracle = decoder->getNBest(input, *sid, 1, 1.0,
1.0, featureValues[batchPosition], bleuScores[batchPosition], true,
distinctNbest, rank);
decoder->cleanup();
oracles.push_back(oracle);
cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScores[batchPosition][oraclePos] << endl;
oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]);
float oracleBleuScore = bleuScores[batchPosition][oraclePos];
oracleBleuScores.push_back(oracleBleuScore);
}
else {
// MODEL
cerr << "Rank " << rank << ", run decoder to get nbest wrt model score" << endl;
vector<const Word*> bestModel = decoder->getNBest(input, *sid, n, 0.0,
1.0, featureValues[batchPosition], bleuScores[batchPosition], true,
vector<const Word*> bestModel = decoder->getNBest(input, *sid, n, 0.0, bleuScoreWeight,
featureValues[batchPosition], bleuScores[batchPosition], true,
distinctNbest, rank);
inputLengths.push_back(decoder->getCurrentInputLength());
ref_ids.push_back(*sid);
all_ref_ids.push_back(*sid);
allBestModelScore.push_back(bestModel);
decoder->cleanup();
oneBests.push_back(bestModel);
cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl;
// HOPE
cerr << "Rank " << rank << ", run decoder to get nbest hope translations" << endl;
size_t oraclePos = featureValues[batchPosition].size();
oraclePositions.push_back(oraclePos);
vector<const Word*> oracle = decoder->getNBest(input, *sid, n, 1.0,
1.0, featureValues[batchPosition], bleuScores[batchPosition], true,
vector<const Word*> oracle = decoder->getNBest(input, *sid, n, 1.0, bleuScoreWeight,
featureValues[batchPosition], bleuScores[batchPosition], true,
distinctNbest, rank);
decoder->cleanup();
oracles.push_back(oracle);
@ -853,7 +939,7 @@ int main(int argc, char** argv) {
// FEAR
cerr << "Rank " << rank << ", run decoder to get nbest fear translations" << endl;
size_t fearPos = featureValues[batchPosition].size();
vector<const Word*> fear = decoder->getNBest(input, *sid, n, -1.0, 1.0,
vector<const Word*> fear = decoder->getNBest(input, *sid, n, -1.0, bleuScoreWeight,
featureValues[batchPosition], bleuScores[batchPosition], true,
distinctNbest, rank);
decoder->cleanup();
@ -865,7 +951,6 @@ int main(int argc, char** argv) {
for (size_t i = 0; i < fear.size(); ++i) {
delete fear[i];
}
}
cerr << "Rank " << rank << ", sentence " << *sid << ", best model Bleu (approximate sentence bleu): " << bleuScores[batchPosition][0] << endl;
summedApproxBleu += bleuScores[batchPosition][0];
@ -908,7 +993,7 @@ int main(int argc, char** argv) {
vector< vector <float > > bestModelOld_batch;
for (size_t i = 0; i < actualBatchSize; ++i) {
string& input = inputSentences[*current_sid_start + i];
vector <float> bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, distinctNbest);
vector <float> bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest);
bestModelOld_batch.push_back(bestModelOld);
decoder->cleanup();
}
@ -928,16 +1013,9 @@ int main(int argc, char** argv) {
cerr << "\nRank " << rank << ", run optimiser:" << endl;
ScoreComponentCollection oldWeights(mosesWeights);
vector<int> update_status;
if (one_constraint) {
update_status = optimiser->updateWeightsAnalytically(mosesWeights, featureValues[0][0],
losses[0][0], oracleFeatureValues[0], oracleBleuScores[0], ref_ids[0],
learning_rate, max_sentence_update, rank, epoch, controlUpdates);
}
else {
update_status = optimiser->updateWeights(mosesWeights, featureValues,
update_status = optimiser->updateWeights(mosesWeights, featureValues,
losses, bleuScores, oracleFeatureValues, oracleBleuScores, ref_ids,
learning_rate, max_sentence_update, rank, epoch, updates_per_epoch, controlUpdates);
}
if (update_status[0] == 1) {
cerr << "Rank " << rank << ", no update for batch" << endl;
@ -985,7 +1063,7 @@ int main(int argc, char** argv) {
vector<float> bestModelNew;
for (size_t i = 0; i < actualBatchSize; ++i) {
string& input = inputSentences[*current_sid_start + i];
bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, distinctNbest);
bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest);
decoder->cleanup();
cerr << "Rank " << rank << ", epoch " << epoch << ", 1best model bleu, old: " << bestModelOld_batch[i][0] << ", new: " << bestModelNew[0] << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", 1best model score, old: " << bestModelOld_batch[i][1] << ", new: " << bestModelNew[1] << endl;
@ -993,11 +1071,21 @@ int main(int argc, char** argv) {
}
}
// update history (for approximate document Bleu)
for (size_t i = 0; i < oracles.size(); ++i) {
cerr << "Rank " << rank << ", oracle length: " << oracles[i].size() << " ";
if (!sentenceLevelBleu) {
// update history (for approximate document Bleu)
if (historyOf1best) {
for (size_t i = 0; i < oneBests.size(); ++i) {
cerr << "Rank " << rank << ", 1best length: " << oneBests[i].size() << " ";
}
decoder->updateHistory(oneBests, inputLengths, ref_ids, rank, epoch);
}
else {
for (size_t i = 0; i < oracles.size(); ++i) {
cerr << "Rank " << rank << ", oracle length: " << oracles[i].size() << " ";
}
decoder->updateHistory(oracles, inputLengths, ref_ids, rank, epoch);
}
}
decoder->updateHistory(oracles, inputLengths, ref_ids, rank, epoch);
// clean up oracle translations after updating history
for (size_t i = 0; i < oracles.size(); ++i) {
@ -1136,6 +1224,10 @@ int main(int argc, char** argv) {
}// end dumping
} // end of shard loop, end of this epoch
cerr << "Bleu feature history after epoch " << epoch << endl;
decoder->printBleuFeatureHistory(cerr);
size_t sumUpdates;
size_t *sendbuf_uint, *recvbuf_uint;
sendbuf_uint = (size_t *) malloc(sizeof(size_t));

View File

@ -88,7 +88,7 @@ namespace Mira {
MiraOptimiser() :
Optimiser() { }
MiraOptimiser(size_t n, bool hildreth, float marginScaleFactor, bool onlyViolatedConstraints, float slack, bool weightedLossFunction, size_t maxNumberOracles, bool accumulateMostViolatedConstraints, bool pastAndCurrentConstraints, bool one_per_batch, size_t exampleSize) :
MiraOptimiser(size_t n, bool hildreth, float marginScaleFactor, bool onlyViolatedConstraints, float slack, size_t weightedLossFunction, size_t maxNumberOracles, bool accumulateMostViolatedConstraints, bool pastAndCurrentConstraints, size_t exampleSize) :
Optimiser(),
m_n(n),
m_hildreth(hildreth),
@ -99,7 +99,6 @@ namespace Mira {
m_max_number_oracles(maxNumberOracles),
m_accumulateMostViolatedConstraints(accumulateMostViolatedConstraints),
m_pastAndCurrentConstraints(pastAndCurrentConstraints),
m_one_per_batch(one_per_batch),
m_oracles(exampleSize),
m_bleu_of_oracles(exampleSize) { }
@ -166,7 +165,7 @@ namespace Mira {
// regularise Hildreth updates
float m_slack;
bool m_weightedLossFunction;
size_t m_weightedLossFunction;
// index of oracle translation in hypothesis matrix
std::vector<size_t> m_oracleIndices;
@ -180,14 +179,13 @@ namespace Mira {
// accumulate most violated constraints for every example
std::vector< Moses::ScoreComponentCollection> m_featureValueDiffs;
std::vector< float> m_lossMarginDistances;
std::vector< float> m_losses;
bool m_accumulateMostViolatedConstraints;
bool m_pastAndCurrentConstraints;
bool m_one_per_batch;
Moses::ScoreComponentCollection m_accumulatedUpdates;
};
}

View File

@ -95,8 +95,20 @@ BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLen
m_BP_factor(BPfactor),
m_historySmoothing(historySmoothing) {}
void BleuScoreFeature::PrintHistory(std::ostream& out) const {
out << "source length history=" << m_source_length_history << endl;
out << "target length history=" << m_target_length_history << endl;
out << "ref length history=" << m_ref_length_history << endl;
for (size_t i = 0; i < BleuScoreState::bleu_order; ++i) {
out << "match history/count history (" << i << "):" << m_match_history[i] << "/" << m_count_history[i] << endl;
}
}
void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
{
m_refs.clear();
FactorCollection& fc = FactorCollection::Instance();
for (size_t file_id = 0; file_id < refs.size(); file_id++) {
for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) {

View File

@ -62,6 +62,7 @@ public:
return 1;
}
void PrintHistory(std::ostream& out) const;
void LoadReferences(const std::vector< std::vector< std::string > > &);
void SetCurrentSourceLength(size_t);
void SetCurrentReference(size_t);

View File

@ -178,8 +178,11 @@ namespace Moses {
/* if (i->first != DEFAULT_NAME && i->second != 0.0) {
out << i->first << "=" << value << ", ";
}*/
if (i->first != DEFAULT_NAME) {
/* if (i->first != DEFAULT_NAME) {
out << i->first << "=" << value << ", ";
}*/
if (i->first != DEFAULT_NAME) {
out << value << ", ";
}
}
out << "}";