change verbosity for cerr messages, remove some unwanted options, introduce --margin-slack, --margin-incr

git-svn-id: http://svn.statmt.org/repository/mira@3913 cc96ff50-19ce-11e0-b349-13d7f0bd23df
This commit is contained in:
ehasler 2011-06-26 19:12:46 +00:00 committed by Ondrej Bojar
parent 0585646b2d
commit 42333388b4
6 changed files with 257 additions and 458 deletions

View File

@ -68,7 +68,7 @@ namespace Mira {
delete[] mosesargv;
}
MosesDecoder::MosesDecoder(bool useScaledReference, bool scaleByInputLength, float historySmoothing)
MosesDecoder::MosesDecoder(bool scaleByInputLength, float historySmoothing)
: m_manager(NULL) {
// force initialisation of the phrase dictionary (TODO: what for?)
const StaticData &staticData = StaticData::Instance();
@ -82,7 +82,7 @@ namespace Mira {
m_manager->ProcessSentence();
// Add the bleu feature
m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, historySmoothing);
m_bleuScoreFeature = new BleuScoreFeature(scaleByInputLength, historySmoothing);
(const_cast<TranslationSystem&>(system)).AddFeatureFunction(m_bleuScoreFeature);
}
@ -100,7 +100,8 @@ namespace Mira {
vector< float>& bleuScores,
bool oracle,
bool distinct,
size_t rank)
size_t rank,
size_t epoch)
{
StaticData &staticData = StaticData::InstanceNonConst();
@ -137,11 +138,11 @@ namespace Mira {
//std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl;
float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
cerr << "Rank " << rank << ", total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl;
Phrase bestPhrase = path.GetTargetPhrase();
cerr << "Rank " << rank << ": ";
cerr << "Rank " << rank << ", epoch " << epoch << ": ";
Phrase phrase = path.GetTargetPhrase();
for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
const Word &word = phrase.GetWord(pos);
@ -179,7 +180,9 @@ namespace Mira {
size_t sentenceid,
float bleuObjectiveWeight,
float bleuScoreWeight,
bool distinct)
bool distinct,
size_t rank,
size_t epoch)
{
StaticData &staticData = StaticData::InstanceNonConst();
@ -215,15 +218,15 @@ namespace Mira {
bleuAndScore.push_back(bleuScore);
bleuAndScore.push_back(scoreWithoutBleu);
cerr << "1best translation: ";
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", 1best translation: ");
Phrase phrase = path.GetTargetPhrase();
for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
const Word &word = phrase.GetWord(pos);
Word *newWord = new Word(word);
cerr << *newWord;
VERBOSE(1, *newWord);
}
cerr << endl;
VERBOSE(1, endl);
return bleuAndScore;
}
@ -245,7 +248,6 @@ namespace Mira {
}
void MosesDecoder::setWeights(const ScoreComponentCollection& weights) {
//cerr << "New weights: " << weights << endl;
StaticData::InstanceNonConst().SetAllWeights(weights);
}

View File

@ -50,7 +50,7 @@ void initMoses(const std::string& inifile, int debuglevel, int argc, std::vecto
**/
class MosesDecoder {
public:
MosesDecoder(bool useScaledReference, bool scaleByInputLength, float historySmoothing);
MosesDecoder(bool scaleByInputLength, float historySmoothing);
//returns the best sentence
std::vector<const Moses::Word*> getNBest(const std::string& source,
@ -62,12 +62,15 @@ class MosesDecoder {
std::vector< float>& scores,
bool oracle,
bool distinct,
size_t rank);
size_t rank,
size_t epoch);
std::vector<float> getBleuAndScore(const std::string& source,
size_t sentenceid,
float bleuObjectiveWeight,
float bleuScoreWeight,
bool distinct);
bool distinct,
size_t rank,
size_t epoch);
size_t getCurrentInputLength();
void updateHistory(const std::vector<const Moses::Word*>& words);
void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
@ -77,13 +80,13 @@ class MosesDecoder {
std::vector<float> calculateBleuOfCorpus(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& ref_ids, size_t epoch, size_t rank);
Moses::ScoreComponentCollection getWeights();
void setWeights(const Moses::ScoreComponentCollection& weights);
void cleanup();
void cleanup();
private:
float getBleuScore(const Moses::ScoreComponentCollection& scores);
void setBleuScore(Moses::ScoreComponentCollection& scores, float bleu);
Moses::Manager *m_manager;
Moses::Sentence *m_sentence;
Moses::Manager *m_manager;
Moses::Sentence *m_sentence;
Moses::BleuScoreFeature *m_bleuScoreFeature;

View File

@ -144,7 +144,6 @@ int main(int argc, char** argv) {
size_t weightDumpFrequency;
string weightDumpStem;
float min_learning_rate;
float min_sentence_update;
size_t scale_margin;
bool scale_update;
size_t n;
@ -153,14 +152,12 @@ int main(int argc, char** argv) {
bool onlyViolatedConstraints;
bool accumulateWeights;
float historySmoothing;
bool useScaledReference;
bool scaleByInputLength;
float slack;
float slack_step;
float slack_min;
bool averageWeights;
bool weightConvergence;
bool controlUpdates;
float learning_rate;
float mira_learning_rate;
float perceptron_learning_rate;
@ -168,24 +165,18 @@ int main(int argc, char** argv) {
size_t baseOfLog;
string decoder_settings;
float min_weight_change;
float max_sentence_update;
float decrease_learning_rate;
float decrease_sentence_update;
bool devBleu;
bool normaliseWeights;
bool print_feature_values;
bool stop_dev_bleu;
bool stop_approx_dev_bleu;
bool train_linear_classifier;
bool multiplyA;
bool historyOf1best;
bool burnIn;
string burnInInputFile;
vector<string> burnInReferenceFiles;
bool sentenceLevelBleu;
float bleuScoreWeight;
float precision;
float min_bleu_change;
float margin_slack;
float margin_slack_incr;
bool analytical_update;
bool perceptron_update;
bool hope_fear;
@ -204,49 +195,42 @@ int main(int argc, char** argv) {
("burn-in-input-file", po::value<string>(&burnInInputFile), "Input file for burn-in phase of BLEU history")
("burn-in-reference-files", po::value<vector<string> >(&burnInReferenceFiles), "Reference file for burn-in phase of BLEU history")
("config,f", po::value<string>(&mosesConfigFile), "Moses ini file")
("control-updates", po::value<bool>(&controlUpdates)->default_value(true), "Ignore updates that increase number of violated constraints AND increase the error")
("core-weights", po::value<string>(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)")
("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
("decr-learning-rate", po::value<float>(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch")
("decr-sentence-update", po::value<float>(&decrease_sentence_update)->default_value(0), "Decrease maximum weight update by the given value after every epoch")
("dev-bleu", po::value<bool>(&devBleu)->default_value(true), "Compute BLEU score of oracle translations of the whole tuning set")
("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use nbest list with distinct translations in inference step")
("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights, when using mpi")
("epochs,e", po::value<size_t>(&epochs)->default_value(5), "Number of epochs")
("epochs,e", po::value<size_t>(&epochs)->default_value(10), "Number of epochs")
("fear-n", po::value<int>(&fear_n)->default_value(-1), "Number of fear translations used")
("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
("history-of-1best", po::value<bool>(&historyOf1best)->default_value(0), "Use the 1best translation to update the history")
("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
("history-of-1best", po::value<bool>(&historyOf1best)->default_value(false), "Use the 1best translation to update the history")
("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.7), "Adjust the factor for history smoothing")
("hope-fear", po::value<bool>(&hope_fear)->default_value(true), "Use only hope and fear translations for optimization (not model)")
("hope-n", po::value<int>(&hope_n)->default_value(-1), "Number of hope translations used")
("input-file,i", po::value<string>(&inputFile), "Input file containing tokenised source")
("learner,l", po::value<string>(&learner)->default_value("mira"), "Learning algorithm")
("margin-slack", po::value<float>(&margin_slack)->default_value(0), "Slack when comparing left and right hand side of constraints")
("margin-incr", po::value<float>(&margin_slack_incr)->default_value(0), "Increment margin slack after every epoch by this amount")
("mira-learning-rate", po::value<float>(&mira_learning_rate)->default_value(1), "Learning rate for MIRA (fixed or flexible)")
("log-feature-values", po::value<bool>(&logFeatureValues)->default_value(false), "Take log of feature values according to the given base.")
("min-bleu-change", po::value<float>(&min_bleu_change)->default_value(0), "Minimum BLEU change of 1best translations of one epoch")
("min-sentence-update", po::value<float>(&min_sentence_update)->default_value(0), "Set a minimum weight update per sentence")
("min-learning-rate", po::value<float>(&min_learning_rate)->default_value(0), "Set a minimum learning rate")
("max-sentence-update", po::value<float>(&max_sentence_update)->default_value(-1), "Set a maximum weight update per sentence")
("min-weight-change", po::value<float>(&min_weight_change)->default_value(0.01), "Set minimum weight change for stopping criterion")
("mixing-frequency", po::value<size_t>(&mixingFrequency)->default_value(1), "How often per epoch to mix weights, when using mpi")
("mixing-frequency", po::value<size_t>(&mixingFrequency)->default_value(5), "How often per epoch to mix weights, when using mpi")
("model-hope-fear", po::value<bool>(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimization")
("nbest,n", po::value<size_t>(&n)->default_value(10), "Number of translations in nbest list")
("nbest,n", po::value<size_t>(&n)->default_value(1), "Number of translations in nbest list")
("normalise", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
("only-violated-constraints", po::value<bool>(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem")
("perceptron-learning-rate", po::value<float>(&perceptron_learning_rate)->default_value(0.01), "Perceptron learning rate")
("precision", po::value<float>(&precision)->default_value(0), "Precision when comparing left and right hand side of constraints")
("print-feature-values", po::value<bool>(&print_feature_values)->default_value(false), "Print out feature values")
("reference-files,r", po::value<vector<string> >(&referenceFiles), "Reference translation files for training")
("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths")
("sentence-level-bleu", po::value<bool>(&sentenceLevelBleu)->default_value(false), "Use a sentences level bleu scoring function")
("sentence-level-bleu", po::value<bool>(&sentenceLevelBleu)->default_value(true), "Use a sentences level bleu scoring function")
("shuffle", po::value<bool>(&shuffle)->default_value(false), "Shuffle input sentences before processing")
("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimizer")
("slack-min", po::value<float>(&slack_min)->default_value(0.01), "Minimum slack used")
("slack-step", po::value<float>(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided")
("stop-dev-bleu", po::value<bool>(&stop_dev_bleu)->default_value(false), "Stop when average Bleu (dev) decreases (or no more increases)")
("stop-approx-dev-bleu", po::value<bool>(&stop_approx_dev_bleu)->default_value(false), "Stop when average approx. sentence Bleu (dev) decreases (or no more increases)")
("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
("use-scaled-reference", po::value<bool>(&useScaledReference)->default_value(true), "Use scaled reference length for comparing target and reference length of phrases")
("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
("scale-margin", po::value<size_t>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
("scale-update", po::value<bool>(&scale_update)->default_value(false), "Scale the update by the Bleu score of the oracle translation")
@ -255,8 +239,7 @@ int main(int argc, char** argv) {
po::options_description cmdline_options;
cmdline_options.add(desc);
po::variables_map vm;
po::store(
po::command_line_parser(argc, argv). options(cmdline_options).run(), vm);
po::store(po::command_line_parser(argc, argv). options(cmdline_options).run(), vm);
po::notify(vm);
if (help) {
@ -329,7 +312,7 @@ int main(int argc, char** argv) {
vector<string> decoder_params;
boost::split(decoder_params, decoder_settings, boost::is_any_of("\t "));
initMoses(mosesConfigFile, verbosity, decoder_params.size(), decoder_params);
MosesDecoder* decoder = new MosesDecoder(useScaledReference, scaleByInputLength, historySmoothing);
MosesDecoder* decoder = new MosesDecoder(scaleByInputLength, historySmoothing);
if (normaliseWeights) {
ScoreComponentCollection startWeights = decoder->getWeights();
startWeights.L1Normalise();
@ -353,12 +336,16 @@ int main(int argc, char** argv) {
// initialise optimizer
Optimiser* optimiser = NULL;
if (learner == "mira") {
cerr << "Optimising using Mira" << endl;
optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, precision);
if (rank == 0) {
cerr << "Optimising using Mira" << endl;
}
optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, margin_slack);
learning_rate = mira_learning_rate;
perceptron_update = false;
} else if (learner == "perceptron") {
cerr << "Optimising using Perceptron" << endl;
if (rank == 0) {
cerr << "Optimising using Perceptron" << endl;
}
optimiser = new Perceptron();
learning_rate = perceptron_learning_rate;
perceptron_update = true;
@ -373,7 +360,7 @@ int main(int argc, char** argv) {
// resolve parameter dependencies
if (perceptron_update || analytical_update) {
batchSize = 1;
cerr << "Setting batch size to 1 for perceptron/analytical update" << endl;
cerr << "Info: Setting batch size to 1 for perceptron/analytical update" << endl;
}
if (hope_n == -1 && fear_n == -1) {
@ -385,14 +372,18 @@ int main(int argc, char** argv) {
hope_fear = false; // is true by default
}
if (!hope_fear && !analytical_update) {
model_hope_fear = true;
}
if (model_hope_fear && analytical_update) {
cerr << "Error: must choose between model-hope-fear and analytical update" << endl;
cerr << "Error: Must choose between model-hope-fear and analytical update" << endl;
return 1;
}
if (burnIn && sentenceLevelBleu) {
burnIn = false;
cerr << "Burn-in not needed when using sentence-level BLEU, deactivating burn-in." << endl;
cerr << "Info: Burn-in not needed when using sentence-level BLEU, deactivating burn-in." << endl;
}
if (burnIn) {
@ -436,7 +427,7 @@ int main(int argc, char** argv) {
order.push_back(i);
}
cerr << "Rank " << rank << ", starting burn-in phase for approx. BLEU history.." << endl;
VERBOSE(1, "Rank " << rank << ", starting burn-in phase for approx. BLEU history.." << endl);
if (historyOf1best) {
// get 1best translations for the burn-in sentences
vector<size_t>::const_iterator sid = order.begin();
@ -444,7 +435,7 @@ int main(int argc, char** argv) {
string& input = burnInInputSentences[*sid];
vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
featureValues[0], bleuScores[0], true,
distinctNbest, rank);
distinctNbest, rank, -1);
inputLengths.push_back(decoder->getCurrentInputLength());
ref_ids.push_back(*sid);
decoder->cleanup();
@ -468,8 +459,7 @@ int main(int argc, char** argv) {
while (sid != order.end()) {
string& input = burnInInputSentences[*sid];
vector<const Word*> oracle = decoder->getNBest(input, *sid, 1, 1.0, bleuScoreWeight,
featureValues[0], bleuScores[0], true,
distinctNbest, rank);
featureValues[0], bleuScores[0], true, distinctNbest, rank, -1);
inputLengths.push_back(decoder->getCurrentInputLength());
ref_ids.push_back(*sid);
decoder->cleanup();
@ -488,7 +478,7 @@ int main(int argc, char** argv) {
}
}
cerr << "Bleu feature history after burn-in: " << endl;
VERBOSE(1, "Bleu feature history after burn-in: " << endl);
decoder->printBleuFeatureHistory(cerr);
decoder->loadReferenceSentences(referenceSentences);
}
@ -532,44 +522,28 @@ int main(int argc, char** argv) {
size_t numberOfUpdates = 0;
size_t numberOfUpdatesThisEpoch = 0;
time_t now = time(0); // get current time
struct tm* tm = localtime(&now); // get struct filled out
cerr << "Start date/time: " << tm->tm_mon + 1 << "/" << tm->tm_mday << "/"
<< tm->tm_year + 1900 << ", " << tm->tm_hour << ":" << tm->tm_min << ":"
<< tm->tm_sec << endl;
time_t now;
time(&now);
cerr << "Rank " << rank << ", " << ctime(&now) << endl;
ScoreComponentCollection mixedAverageWeights;
ScoreComponentCollection mixedAverageWeightsPrevious;
ScoreComponentCollection mixedAverageWeightsBeforePrevious;
/* float averageRatio = 0;
float averageBleu = 0;
float prevAverageBleu = 0;
float beforePrevAverageBleu = 0;
float summedApproxBleu = 0;
float averageApproxBleu = 0;
float prevAverageApproxBleu = 0;
float beforePrevAverageApproxBleu = 0;*/
bool stop = false;
int sumStillViolatedConstraints;
int sumStillViolatedConstraints_lastEpoch = 0;
int sumConstraintChangeAbs;
int sumConstraintChangeAbs_lastEpoch = 0;
size_t sumBleuChangeAbs;
// size_t sumBleuChangeAbs;
float *sendbuf, *recvbuf;
sendbuf = (float *) malloc(sizeof(float));
recvbuf = (float *) malloc(sizeof(float));
// Note: make sure that the variable mosesWeights always holds the current decoder weights
for (size_t epoch = 0; epoch < epochs && !stop; ++epoch) {
cerr << "\nRank " << rank << ", epoch " << epoch << endl;
// sum of violated constraints
sumStillViolatedConstraints = 0;
sumConstraintChangeAbs = 0;
sumBleuChangeAbs = 0;
// sum of approx. sentence bleu scores per epoch
// summedApproxBleu = 0;
// sumBleuChangeAbs = 0;
numberOfUpdatesThisEpoch = 0;
// Sum up weights over one epoch, final average uses weights from last epoch
@ -601,8 +575,7 @@ int main(int argc, char** argv) {
// get moses weights
ScoreComponentCollection mosesWeights = decoder->getWeights();
cerr << "\nRank " << rank << ", next batch" << endl;
cerr << "Rank " << rank << ", weights: " << mosesWeights << endl;
VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", weights: " << mosesWeights << endl);
// BATCHING: produce nbest lists for all input sentences in batch
vector<float> oracleBleuScores;
@ -618,8 +591,7 @@ int main(int argc, char** argv) {
!= shard.end(); ++batchPosition) {
string& input = inputSentences[*sid];
const vector<string>& refs = referenceSentences[*sid];
cerr << "Rank " << rank << ", batch position " << batchPosition << endl;
cerr << "Rank " << rank << ", input sentence " << *sid << ": \"" << input << "\"" << endl;
cerr << "\nRank " << rank << ", epoch " << epoch << ", input sentence " << *sid << ": \"" << input << "\"" << " (batch pos " << batchPosition << ")" << endl;
vector<ScoreComponentCollection> newFeatureValues;
vector<float> newBleuScores;
@ -640,13 +612,13 @@ int main(int argc, char** argv) {
if (perceptron_update || analytical_update) {
if (historyOf1best) {
// MODEL (for updating the history)
cerr << "Rank " << rank << ", run decoder to get " << 1 << "best wrt model score" << endl;
cerr << "Rank " << rank << ", run decoder to get 1best wrt model score (for history)" << endl;
vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true,
distinctNbest, rank);
distinctNbest, rank, epoch);
decoder->cleanup();
oneBests.push_back(bestModel);
cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << dummyBleuScores[batchPosition][0] << endl;
VERBOSE(1, "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << dummyBleuScores[batchPosition][0] << endl);
}
// clear dummies
@ -658,22 +630,22 @@ int main(int argc, char** argv) {
size_t oraclePos = 0;
vector<const Word*> oracle = decoder->getNBest(input, *sid, 1, 1.0, bleuScoreWeight,
featureValuesHope[batchPosition], bleuScoresHope[batchPosition], true,
distinctNbest, rank);
distinctNbest, rank, epoch);
// needed for history
inputLengths.push_back(decoder->getCurrentInputLength());
ref_ids.push_back(*sid);
decoder->cleanup();
oracles.push_back(oracle);
cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScoresHope[batchPosition][oraclePos] << endl;
VERBOSE(1, "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScoresHope[batchPosition][oraclePos] << endl);
// FEAR
cerr << "Rank " << rank << ", run decoder to get 1best fear translations" << endl;
size_t fearPos = 0;
vector<const Word*> fear = decoder->getNBest(input, *sid, 1, -1.0, bleuScoreWeight,
featureValuesFear[batchPosition], bleuScoresFear[batchPosition], true,
distinctNbest, rank);
distinctNbest, rank, epoch);
decoder->cleanup();
cerr << "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScoresFear[batchPosition][fearPos] << endl;
VERBOSE(1, "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScoresFear[batchPosition][fearPos] << endl);
for (size_t i = 0; i < fear.size(); ++i) {
delete fear[i];
}
@ -682,37 +654,34 @@ int main(int argc, char** argv) {
if (hope_fear) {
if (historyOf1best) {
// MODEL (for updating the history only, using dummy vectors)
cerr << "Rank " << rank << ", run decoder to get " << 1 << "best wrt model score" << endl;
cerr << "dummyFeatureValues.size: " << dummyFeatureValues.size() << endl;
cerr << "batch position: " << batchPosition << endl;
cerr << "Rank " << rank << ", run decoder to get 1best wrt model score (for history)" << endl;
vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true,
distinctNbest, rank);
cerr << "finished decoding." << endl;
distinctNbest, rank, epoch);
decoder->cleanup();
oneBests.push_back(bestModel);
cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << dummyBleuScores[batchPosition][0] << endl;
VERBOSE(1, "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << dummyBleuScores[batchPosition][0] << endl);
}
// HOPE
cerr << "Rank " << rank << ", run decoder to get " << hope_n << "best hope translations" << endl;
vector<const Word*> oracle = decoder->getNBest(input, *sid, hope_n, 1.0, bleuScoreWeight,
featureValuesHope[batchPosition], bleuScoresHope[batchPosition], true,
distinctNbest, rank);
distinctNbest, rank, epoch);
// needed for history
inputLengths.push_back(decoder->getCurrentInputLength());
ref_ids.push_back(*sid);
decoder->cleanup();
oracles.push_back(oracle);
cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScoresHope[batchPosition][0] << endl;
VERBOSE(1, "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScoresHope[batchPosition][0] << endl);
// FEAR
cerr << "Rank " << rank << ", run decoder to get " << fear_n << "best fear translations" << endl;
vector<const Word*> fear = decoder->getNBest(input, *sid, fear_n, -1.0, bleuScoreWeight,
featureValuesFear[batchPosition], bleuScoresFear[batchPosition], true,
distinctNbest, rank);
distinctNbest, rank, epoch);
decoder->cleanup();
cerr << "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScoresFear[batchPosition][0] << endl;
VERBOSE(1, "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScoresFear[batchPosition][0] << endl);
for (size_t i = 0; i < fear.size(); ++i) {
delete fear[i];
}
@ -722,26 +691,26 @@ int main(int argc, char** argv) {
cerr << "Rank " << rank << ", run decoder to get " << n << "best wrt model score" << endl;
vector<const Word*> bestModel = decoder->getNBest(input, *sid, n, 0.0, bleuScoreWeight,
featureValues[batchPosition], bleuScores[batchPosition], true,
distinctNbest, rank);
distinctNbest, rank, epoch);
decoder->cleanup();
oneBests.push_back(bestModel);
// needed for calculating bleu of dev (1best translations) // todo:
all_ref_ids.push_back(*sid);
allBestModelScore.push_back(bestModel);
cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl;
VERBOSE(1, "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl);
// HOPE
cerr << "Rank " << rank << ", run decoder to get " << n << "best hope translations" << endl;
size_t oraclePos = featureValues[batchPosition].size();
vector<const Word*> oracle = decoder->getNBest(input, *sid, n, 1.0, bleuScoreWeight,
featureValues[batchPosition], bleuScores[batchPosition], true,
distinctNbest, rank);
distinctNbest, rank, epoch);
// needed for history
inputLengths.push_back(decoder->getCurrentInputLength());
ref_ids.push_back(*sid);
decoder->cleanup();
oracles.push_back(oracle);
cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScores[batchPosition][oraclePos] << endl;
VERBOSE(1, "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScores[batchPosition][oraclePos] << endl);
oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]);
oracleBleuScores.push_back(bleuScores[batchPosition][oraclePos]);
@ -751,18 +720,15 @@ int main(int argc, char** argv) {
size_t fearPos = featureValues[batchPosition].size();
vector<const Word*> fear = decoder->getNBest(input, *sid, n, -1.0, bleuScoreWeight,
featureValues[batchPosition], bleuScores[batchPosition], true,
distinctNbest, rank);
distinctNbest, rank, epoch);
decoder->cleanup();
cerr << "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScores[batchPosition][fearPos] << endl;
VERBOSE(1, "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScores[batchPosition][fearPos] << endl);
for (size_t i = 0; i < fear.size(); ++i) {
delete fear[i];
}
}
}
// cerr << "Rank " << rank << ", sentence " << *sid << ", best model Bleu (approximate sentence bleu): " << bleuScores[batchPosition][0] << endl;
// summedApproxBleu += bleuScores[batchPosition][0];
// next input sentence
++sid;
++actualBatchSize;
@ -802,14 +768,14 @@ int main(int argc, char** argv) {
}
}
// get 1best model results with old weights
/* // get 1best model results with old weights
vector< vector <float > > bestModelOld_batch;
for (size_t i = 0; i < actualBatchSize; ++i) {
string& input = inputSentences[*current_sid_start + i];
vector <float> bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest);
vector <float> bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest, rank, epoch);
bestModelOld_batch.push_back(bestModelOld);
decoder->cleanup();
}
}*/
// optionally print out the feature values
if (print_feature_values) {
@ -840,7 +806,7 @@ int main(int argc, char** argv) {
}
// Run optimiser on batch:
cerr << "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl;
VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl);
ScoreComponentCollection oldWeights(mosesWeights);
vector<int> update_status;
if (perceptron_update) {
@ -848,12 +814,12 @@ int main(int argc, char** argv) {
vector<size_t> dummy2;
update_status = optimiser->updateWeightsHopeFear(mosesWeights,
featureValuesHope, featureValuesFear, dummy1, dummy1, dummy2,
learning_rate, 0, rank, epoch, 0);
learning_rate, rank, epoch);
}
else if (analytical_update) {
update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically(mosesWeights,
featureValuesHope[0][0], featureValuesFear[0][0], bleuScoresHope[0][0], bleuScoresFear[0][0],
ref_ids[0], learning_rate, max_sentence_update, rank, epoch, controlUpdates);
ref_ids[0], learning_rate, rank, epoch);
}
else {
if (hope_fear) {
@ -884,74 +850,64 @@ int main(int argc, char** argv) {
update_status = optimiser->updateWeightsHopeFear(mosesWeights,
featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear, ref_ids,
learning_rate, max_sentence_update, rank, epoch, controlUpdates);
learning_rate, rank, epoch);
}
else {
// model_hope_fear
update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights, featureValues,
losses, bleuScores, oracleFeatureValues, oracleBleuScores, ref_ids,
learning_rate, max_sentence_update, rank, epoch, controlUpdates);
learning_rate, rank, epoch);
}
}
if (update_status[0] == 1) {
cerr << "Rank " << rank << ", epoch " << epoch << ", no update for batch" << endl;
sumConstraintChangeAbs += abs(update_status[0] - update_status[1]);
sumStillViolatedConstraints += update_status[1];
// pass new weights to decoder
if (normaliseWeights) {
mosesWeights.L1Normalise();
}
else if (update_status[0] == -1) {
cerr << "Rank " << rank << ", epoch " << epoch << ", update ignored" << endl;
cumulativeWeights.PlusEquals(mosesWeights);
++numberOfUpdates;
++numberOfUpdatesThisEpoch;
if (averageWeights) {
ScoreComponentCollection averageWeights(cumulativeWeights);
if (accumulateWeights) {
averageWeights.DivideEquals(numberOfUpdates);
} else {
averageWeights.DivideEquals(numberOfUpdatesThisEpoch);
}
mosesWeights = averageWeights;
}
else {
sumConstraintChangeAbs += abs(update_status[1] - update_status[2]);
sumStillViolatedConstraints += update_status[2];
// pass new weights to decoder
if (normaliseWeights) {
mosesWeights.L1Normalise();
}
// set new Moses weights (averaged or not)
decoder->setWeights(mosesWeights);
cumulativeWeights.PlusEquals(mosesWeights);
++numberOfUpdates;
++numberOfUpdatesThisEpoch;
if (averageWeights) {
ScoreComponentCollection averageWeights(cumulativeWeights);
if (accumulateWeights) {
averageWeights.DivideEquals(numberOfUpdates);
} else {
averageWeights.DivideEquals(numberOfUpdatesThisEpoch);
}
// compute difference to old weights
ScoreComponentCollection weightDifference(mosesWeights);
weightDifference.MinusEquals(oldWeights);
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weight difference: " << weightDifference << endl);
mosesWeights = averageWeights;
cerr << "Rank " << rank << ", epoch " << epoch << ", set new average weights: " << mosesWeights << endl;
}
else {
cerr << "Rank " << rank << ", epoch " << epoch << ", set new weights: " << mosesWeights << endl;
}
// set new Moses weights (averaged or not)
decoder->setWeights(mosesWeights);
// compute difference to old weights
ScoreComponentCollection weightDifference(mosesWeights);
weightDifference.MinusEquals(oldWeights);
cerr << "Rank " << rank << ", epoch " << epoch << ", weight difference: " << weightDifference << endl;
// get 1best model results with new weights (for each sentence in batch)
vector<float> bestModelNew;
for (size_t i = 0; i < actualBatchSize; ++i) {
string& input = inputSentences[*current_sid_start + i];
bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest);
decoder->cleanup();
sumBleuChangeAbs += abs(bestModelOld_batch[i][0] - bestModelNew[0]);
cerr << "Rank " << rank << ", epoch " << epoch << ", 1best model bleu, old: " << bestModelOld_batch[i][0] << ", new: " << bestModelNew[0] << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", 1best model score, old: " << bestModelOld_batch[i][1] << ", new: " << bestModelNew[1] << endl;
}
}
/* // get 1best model results with new weights (for each sentence in batch)
vector<float> bestModelNew;
for (size_t i = 0; i < actualBatchSize; ++i) {
string& input = inputSentences[*current_sid_start + i];
bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest, rank, epoch);
decoder->cleanup();
sumBleuChangeAbs += abs(bestModelOld_batch[i][0] - bestModelNew[0]);
VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", 1best model bleu, old: " << bestModelOld_batch[i][0] << ", new: " << bestModelNew[0] << endl);
VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", 1best model score, old: " << bestModelOld_batch[i][1] << ", new: " << bestModelNew[1] << endl);
}*/
// update history (for approximate document Bleu)
if (sentenceLevelBleu) {
for (size_t i = 0; i < oracles.size(); ++i) {
cerr << "Rank " << rank << ", epoch " << epoch << ", oracle length: " << oracles[i].size() << " ";
decoder->printReferenceLength(ref_ids);
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", oracle length: " << oracles[i].size() << " ");
if (verbosity > 0) {
decoder->printReferenceLength(ref_ids);
}
}
}
else {
@ -1058,16 +1014,17 @@ int main(int argc, char** argv) {
cerr << "\nMixed average weights during epoch " << epoch << ": " << mixedAverageWeights << endl;
}
cerr << "Dumping mixed average weights during epoch " << epoch << " to " << filename.str() << endl;
cerr << "Dumping mixed average weights during epoch " << epoch << " to " << filename.str() << endl << endl;
mixedAverageWeights.Save(filename.str());
++weightEpochDump;
}
}// end dumping
} // end of shard loop, end of this epoch
cerr << "Bleu feature history after epoch " << epoch << endl;
decoder->printBleuFeatureHistory(cerr);
if (verbosity > 0) {
cerr << "Bleu feature history after epoch " << epoch << endl;
decoder->printBleuFeatureHistory(cerr);
}
// Check whether there were any weight updates during this epoch
size_t sumUpdates;
@ -1094,131 +1051,30 @@ int main(int argc, char** argv) {
if (epoch > 0) {
if ((sumConstraintChangeAbs_lastEpoch == sumConstraintChangeAbs) && (sumStillViolatedConstraints_lastEpoch == sumStillViolatedConstraints)) {
cerr << "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints and constraint changes has stayed the same: " << sumStillViolatedConstraints << ", " << sumConstraintChangeAbs << endl;
VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints and constraint changes has stayed the same: " << sumStillViolatedConstraints << ", " << sumConstraintChangeAbs << endl);
}
else {
cerr << "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << ", sum of constraint changes " << sumConstraintChangeAbs << endl;
VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << ", sum of constraint changes " << sumConstraintChangeAbs << endl);
}
}
else {
cerr << "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << endl;
VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << endl);
}
sumConstraintChangeAbs_lastEpoch = sumConstraintChangeAbs;
sumStillViolatedConstraints_lastEpoch = sumStillViolatedConstraints;
if (min_bleu_change > 0) {
if (sumBleuChangeAbs < min_bleu_change) {
cerr << "Rank " << rank << ", epoch " << epoch << ", sum of BLEU score changes was smaller than " << min_bleu_change << " (" << sumBleuChangeAbs << ")." << endl;
stop = true;
}
else {
cerr << "Rank " << rank << ", epoch " << epoch << ", sum of BLEU score changes: " << sumBleuChangeAbs << "." << endl;
}
}
if (!stop) {
/* if (devBleu) {
// calculate bleu score of dev set
vector<float> bleuAndRatio = decoder->calculateBleuOfCorpus(allBestModelScore, all_ref_ids, epoch, rank);
float bleu = bleuAndRatio[0];
float ratio = bleuAndRatio[1];
for (size_t i = 0; i < allBestModelScore.size(); ++i) {
for (size_t j = 0; j < allBestModelScore[i].size(); ++j) {
delete allBestModelScore[i][j];
}
}
if (rank == 0) {
beforePrevAverageBleu = prevAverageBleu;
beforePrevAverageApproxBleu = prevAverageApproxBleu;
prevAverageBleu = averageBleu;
prevAverageApproxBleu = averageApproxBleu;
}
#ifdef MPI_ENABLE
// average bleu across processes
sendbuf[0] = bleu;
recvbuf[0] = 0;
MPI_Reduce(sendbuf, recvbuf, 1, MPI_FLOAT, MPI_SUM, 0, world);
if (rank == 0) {
averageBleu = recvbuf[0];
// divide by number of processes
averageBleu /= size;
cerr << "Average Bleu (dev) after epoch " << epoch << ": " << averageBleu << endl;
}
// average ratio across processes
sendbuf[0] = ratio;
recvbuf[0] = 0;
MPI_Reduce(sendbuf, recvbuf, 1, MPI_FLOAT, MPI_SUM, 0, world);
if (rank == 0) {
averageRatio = recvbuf[0];
// divide by number of processes
averageRatio /= size;
cerr << "Average ratio (dev) after epoch " << epoch << ": " << averageRatio << endl;
}
// average approximate sentence bleu across processes
sendbuf[0] = summedApproxBleu/numberOfUpdatesThisEpoch;
recvbuf[0] = 0;
MPI_Reduce(sendbuf, recvbuf, 1, MPI_FLOAT, MPI_SUM, 0, world);
if (rank == 0) {
averageApproxBleu = recvbuf[0];
// divide by number of processes
averageApproxBleu /= size;
cerr << "Average approx. sentence Bleu (dev) after epoch " << epoch << ": " << averageApproxBleu << endl;
}
#endif
#ifndef MPI_ENABLE
averageBleu = bleu;
cerr << "Average Bleu (dev) after epoch " << epoch << ": " << averageBleu << endl;
averageApproxBleu = summedApproxBleu / numberOfUpdatesThisEpoch;
cerr << "Average approx. sentence Bleu (dev) after epoch " << epoch << ": " << averageApproxBleu << endl;
#endif
if (rank == 0) {
if (stop_dev_bleu) {
if (averageBleu <= prevAverageBleu && prevAverageBleu <= beforePrevAverageBleu) {
stop = true;
cerr << "Average Bleu (dev) is decreasing or no more increasing.. stop tuning." << endl;
ScoreComponentCollection dummy;
ostringstream endfilename;
endfilename << "stopping";
dummy.Save(endfilename.str());
}
}
if (stop_approx_dev_bleu) {
if (averageApproxBleu <= prevAverageApproxBleu && prevAverageApproxBleu <= beforePrevAverageApproxBleu) {
stop = true;
cerr << "Average approx. sentence Bleu (dev) is decreasing or no more increasing.. stop tuning." << endl;
ScoreComponentCollection dummy;
ostringstream endfilename;
endfilename << "stopping";
dummy.Save(endfilename.str());
}
}
}
#ifdef MPI_ENABLE
mpi::broadcast(world, stop, 0);
#endif
} // end if (dev_bleu) */
// Test if weights have converged
if (weightConvergence) {
bool reached = true;
if (rank == 0 && (epoch >= 2)) {
ScoreComponentCollection firstDiff(mixedAverageWeights);
firstDiff.MinusEquals(mixedAverageWeightsPrevious);
cerr << "Average weight changes since previous epoch: " << firstDiff << endl;
VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << endl);
ScoreComponentCollection secondDiff(mixedAverageWeights);
secondDiff.MinusEquals(mixedAverageWeightsBeforePrevious);
cerr << "Average weight changes since before previous epoch: " << secondDiff << endl << endl;
VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << endl << endl);
// check whether stopping criterion has been reached
// (both difference vectors must have all weight changes smaller than min_weight_change)
@ -1240,7 +1096,7 @@ int main(int argc, char** argv) {
if (reached) {
// stop MIRA
stop = true;
cerr << "Stopping criterion has been reached after epoch " << epoch << ".. stopping MIRA." << endl;
cerr << "\nWeights have converged after epoch " << epoch << ".. stopping MIRA." << endl;
ScoreComponentCollection dummy;
ostringstream endfilename;
endfilename << "stopping";
@ -1255,17 +1111,26 @@ int main(int argc, char** argv) {
#endif
} //end if (weightConvergence)
// if using flexible regularization, decrease regularization parameter for next epoch
// if using flexible slack, decrease slack parameter for next epoch
if (slack_step > 0) {
if (slack - slack_step >= slack_min) {
if (typeid(*optimiser) == typeid(MiraOptimiser)) {
slack -= slack_step;
cerr << "Change slack to: " << slack << endl;
VERBOSE(1, "Change slack to: " << slack << endl);
((MiraOptimiser*) optimiser)->setSlack(slack);
}
}
}
// if using flexible margin slack, decrease margin slack parameter for next epoch
if (margin_slack_incr > 0.0001) {
if (typeid(*optimiser) == typeid(MiraOptimiser)) {
margin_slack += margin_slack_incr;
VERBOSE(1, "Change margin slack to: " << margin_slack << endl);
((MiraOptimiser*) optimiser)->setMarginSlack(margin_slack);
}
}
// change learning rate
if ((decrease_learning_rate > 0) && (learning_rate - decrease_learning_rate >= min_learning_rate)) {
learning_rate -= decrease_learning_rate;
@ -1276,20 +1141,7 @@ int main(int argc, char** argv) {
mpi::broadcast(world, stop, 0);
#endif
}
cerr << "Change learning rate to " << learning_rate << endl;
}
// change maximum sentence update
if ((decrease_sentence_update > 0) && (max_sentence_update - decrease_sentence_update >= min_sentence_update)) {
max_sentence_update -= decrease_sentence_update;
if (max_sentence_update <= 0.0001) {
max_sentence_update = 0;
stop = true;
#ifdef MPI_ENABLE
mpi::broadcast(world, stop, 0);
#endif
}
cerr << "Change maximum sentence update to " << max_sentence_update << endl;
VERBOSE(1, "Change learning rate to " << learning_rate << endl);
}
}
} // end of epoch loop
@ -1298,11 +1150,8 @@ int main(int argc, char** argv) {
MPI_Finalize();
#endif
now = time(0); // get current time
tm = localtime(&now); // get struct filled out
cerr << "\nEnd date/time: " << tm->tm_mon + 1 << "/" << tm->tm_mday
<< "/" << tm->tm_year + 1900 << ", " << tm->tm_hour << ":"
<< tm->tm_min << ":" << tm->tm_sec << endl;
time(&now);
cerr << "Rank " << rank << ", " << ctime(&now);
delete decoder;
exit(0);

View File

@ -1,5 +1,6 @@
#include "Optimiser.h"
#include "Hildreth.h"
#include "StaticData.h"
using namespace Moses;
using namespace std;
@ -14,10 +15,8 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
const vector<float> oracleBleuScores,
const vector<size_t> sentenceIds,
float learning_rate,
float max_sentence_update,
size_t rank,
size_t epoch,
bool controlUpdates) {
size_t epoch) {
// vector of feature values differences for all created constraints
vector<ScoreComponentCollection> featureValueDiffs;
@ -40,41 +39,44 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
ScoreComponentCollection featureValueDiff = oracleFeatureValues[i];
featureValueDiff.MinusEquals(featureValues[i][j]);
cerr << "feature value diff: " << featureValueDiff << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", feature value diff: " << featureValueDiff << endl;
if (featureValueDiff.GetL1Norm() == 0) {
cerr << "Equal feature values, constraint skipped.." << endl;
// skip constraint
continue;
}
float loss = losses[i][j];
if (m_scale_margin == 1) {
loss *= oracleBleuScores[i];
cerr << "Scaling margin with oracle bleu score " << oracleBleuScores[i] << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with oracle bleu score " << oracleBleuScores[i] << endl);
}
else if (m_scale_margin == 2) {
loss *= log2(oracleBleuScores[i]);
cerr << "Scaling margin with log2 oracle bleu score " << log2(oracleBleuScores[i]) << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with log2 oracle bleu score " << log2(oracleBleuScores[i]) << endl);
}
else if (m_scale_margin == 10) {
loss *= log10(oracleBleuScores[i]);
cerr << "Scaling margin with log10 oracle bleu score " << log10(oracleBleuScores[i]) << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with log10 oracle bleu score " << log10(oracleBleuScores[i]) << endl)
}
// check if constraint is violated
bool violated = false;
bool addConstraint = true;
float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
float diff = loss - (modelScoreDiff + m_precision);
cerr << "constraint: " << (modelScoreDiff + m_precision) << " >= " << loss << endl;
float diff = 0;
if (loss > (modelScoreDiff + m_margin_slack)) {
diff = loss - (modelScoreDiff + m_margin_slack);
}
cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
if (diff > epsilon) {
violated = true;
cerr << "Rank " << rank << ", epoch " << epoch << ", current violation: " << diff << endl;
}
else if (m_onlyViolatedConstraints) {
addConstraint = false;
}
float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_precision);
float lossMinusModelScoreDiff = loss - modelScoreDiff;
if (addConstraint) {
featureValueDiffs.push_back(featureValueDiff);
lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
@ -92,8 +94,8 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
vector<float> alphas;
ScoreComponentCollection summedUpdate;
if (violatedConstraintsBefore > 0) {
cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " << featureValueDiffs.size() << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", number of violated constraints passed to optimizer: " << violatedConstraintsBefore << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " <<
featureValueDiffs.size() << " (of which violated: " << violatedConstraintsBefore << ")" << endl;
if (m_slack != 0) {
alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs, m_slack);
} else {
@ -104,7 +106,7 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
// * w' = w' + SUM alpha_i * (h_i(oracle) - h_i(hypothesis))
for (size_t k = 0; k < featureValueDiffs.size(); ++k) {
float alpha = alphas[k];
cerr << "alpha: " << alpha << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl);
ScoreComponentCollection update(featureValueDiffs[k]);
update.MultiplyEquals(alpha);
@ -113,11 +115,10 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
}
}
else {
cerr << "Rank " << rank << ", epoch " << epoch << ", check, no constraint violated for this batch" << endl;
vector<int> status(3);
status[0] = 1;
cerr << "Rank " << rank << ", epoch " << epoch << ", check, no constraint violated for this batch" << endl;
vector<int> status(2);
status[0] = 0;
status[1] = 0;
status[2] = 0;
return status;
}
@ -130,56 +131,37 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
float modelScoreDiff = featureValueDiffs[i].InnerProduct(newWeights);
float loss = all_losses[i];
float diff = loss - (modelScoreDiff + m_precision);
float diff = loss - (modelScoreDiff + m_margin_slack);
if (diff > epsilon) {
++violatedConstraintsAfter;
newDistanceFromOptimum += diff;
}
}
cerr << "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl;
if (controlUpdates && violatedConstraintsAfter > 0) {
float distanceChange = oldDistanceFromOptimum - newDistanceFromOptimum;
if ((violatedConstraintsBefore - violatedConstraintsAfter) <= 0 && distanceChange < 0) {
vector<int> statusPlus(3);
statusPlus[0] = -1;
statusPlus[1] = -1;
statusPlus[2] = -1;
return statusPlus;
}
}
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl);
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
// apply learning rate
if (learning_rate != 1) {
cerr << "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl);
summedUpdate.MultiplyEquals(learning_rate);
cerr << "Rank " << rank << ", epoch " << epoch << ", update after applying learning rate: " << summedUpdate << endl;
}
// apply threshold scaling
if (max_sentence_update != -1) {
cerr << "Rank " << rank << ", epoch " << epoch << ", update before scaling to max-sentence-update: " << summedUpdate << endl;
summedUpdate.ThresholdScaling(max_sentence_update);
cerr << "Rank " << rank << ", epoch " << epoch << ", update after scaling to max-sentence-update: " << summedUpdate << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update after applying learning rate: " << summedUpdate << endl);
}
// scale update by BLEU of oracle
if (oracleBleuScores.size() == 1 && m_scale_update) {
cerr << "Scaling summed update with log10 oracle bleu score " << log10(oracleBleuScores[0]) << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling summed update with log10 oracle bleu score " << log10(oracleBleuScores[0]) << endl);
summedUpdate.MultiplyEquals(log10(oracleBleuScores[0]));
}
// apply update to weight vector
cerr << "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl);
currWeights.PlusEquals(summedUpdate);
cerr << "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl);
vector<int> statusPlus(3);
statusPlus[0] = 0;
statusPlus[1] = violatedConstraintsBefore;
statusPlus[2] = violatedConstraintsAfter;
return statusPlus;
vector<int> status(2);
status[0] = violatedConstraintsBefore;
status[1] = violatedConstraintsAfter;
return status;
}
vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
@ -189,10 +171,8 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
const std::vector<std::vector<float> >& bleuScoresFear,
const std::vector< size_t> sentenceIds,
float learning_rate,
float max_sentence_update,
size_t rank,
size_t epoch,
bool controlUpdates) {
size_t epoch) {
// vector of feature values differences for all created constraints
vector<ScoreComponentCollection> featureValueDiffs;
@ -216,41 +196,44 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
for (size_t k = 0; k < featureValuesFear[i].size(); ++k) {
ScoreComponentCollection featureValueDiff = featureValuesHope[i][j];
featureValueDiff.MinusEquals(featureValuesFear[i][k]);
cerr << "feature value diff: " << featureValueDiff << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", feature value diff: " << featureValueDiff << endl;
if (featureValueDiff.GetL1Norm() == 0) {
cerr << "Equal feature values, constraint skipped.." << endl;
// skip constraint
continue;
}
float loss = bleuScoresHope[i][j] - bleuScoresFear[i][k];
if (m_scale_margin == 1) {
loss *= bleuScoresHope[i][j];
cerr << "Scaling margin with oracle bleu score " << bleuScoresHope[i][j] << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with oracle bleu score " << bleuScoresHope[i][j] << endl);
}
else if (m_scale_margin == 2) {
loss *= log2(bleuScoresHope[i][j]);
cerr << "Scaling margin with log2 oracle bleu score " << log2(bleuScoresHope[i][j]) << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with log2 oracle bleu score " << log2(bleuScoresHope[i][j]) << endl);
}
else if (m_scale_margin == 10) {
loss *= log10(bleuScoresHope[i][j]);
cerr << "Scaling margin with log10 oracle bleu score " << log10(bleuScoresHope[i][j]) << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with log10 oracle bleu score " << log10(bleuScoresHope[i][j]) << endl);
}
// check if constraint is violated
bool violated = false;
bool addConstraint = true;
float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
float diff = loss - (modelScoreDiff + m_precision);
cerr << "constraint: " << (modelScoreDiff + m_precision) << " >= " << loss << endl;
float diff = 0;
if (loss > (modelScoreDiff + m_margin_slack)) {
diff = loss - (modelScoreDiff + m_margin_slack);
}
cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
if (diff > epsilon) {
violated = true;
cerr << "Rank " << rank << ", epoch " << epoch << ", current violation: " << diff << endl;
}
else if (m_onlyViolatedConstraints) {
addConstraint = false;
}
float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_precision);
float lossMinusModelScoreDiff = loss - modelScoreDiff;
if (addConstraint) {
featureValueDiffs.push_back(featureValueDiff);
lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
@ -269,8 +252,8 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
vector<float> alphas;
ScoreComponentCollection summedUpdate;
if (violatedConstraintsBefore > 0) {
cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " << featureValueDiffs.size() << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", number of violated constraints passed to optimizer: " << violatedConstraintsBefore << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " <<
featureValueDiffs.size() << " (of which violated: " << violatedConstraintsBefore << ")" << endl;
if (m_slack != 0) {
alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs, m_slack);
} else {
@ -281,17 +264,17 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
// * w' = w' + SUM alpha_i * (h_i(oracle) - h_i(hypothesis))
for (size_t k = 0; k < featureValueDiffs.size(); ++k) {
float alpha = alphas[k];
cerr << "alpha: " << alpha << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl);
ScoreComponentCollection update(featureValueDiffs[k]);
update.MultiplyEquals(alpha);
// scale update by BLEU of hope translation (only two cases defined at the moment)
if (featureValuesHope.size() == 1 && m_scale_update) { // only defined for batch size 1)
if (featureValuesHope[0].size() == 1) {
cerr << "Scaling update with log10 oracle bleu score " << log10(bleuScoresHope[0][0]) << endl; // only 1 oracle
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling update with log10 oracle bleu score " << log10(bleuScoresHope[0][0]) << endl); // only 1 oracle
update.MultiplyEquals(log10(bleuScoresHope[0][0]));
} else if (featureValuesFear[0].size() == 1) {
cerr << "Scaling update with log10 oracle bleu score " << log10(bleuScoresHope[0][k]) << endl; // k oracles
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling update with log10 oracle bleu score " << log10(bleuScoresHope[0][k]) << endl); // k oracles
update.MultiplyEquals(log10(bleuScoresHope[0][k]));
}
}
@ -301,11 +284,10 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
}
}
else {
cerr << "Rank " << rank << ", epoch " << epoch << ", check, no constraint violated for this batch" << endl;
vector<int> status(3);
status[0] = 1;
cerr << "Rank " << rank << ", epoch " << epoch << ", check, no constraint violated for this batch" << endl;
vector<int> status(2);
status[0] = 0;
status[1] = 0;
status[2] = 0;
return status;
}
@ -318,49 +300,30 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
float modelScoreDiff = featureValueDiffs[i].InnerProduct(newWeights);
float loss = all_losses[i];
float diff = loss - (modelScoreDiff + m_precision);
float diff = loss - (modelScoreDiff + m_margin_slack);
if (diff > epsilon) {
++violatedConstraintsAfter;
newDistanceFromOptimum += diff;
}
}
cerr << "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl;
if (controlUpdates && violatedConstraintsAfter > 0) {
float distanceChange = oldDistanceFromOptimum - newDistanceFromOptimum;
if ((violatedConstraintsBefore - violatedConstraintsAfter) <= 0 && distanceChange < 0) {
vector<int> statusPlus(3);
statusPlus[0] = -1;
statusPlus[1] = -1;
statusPlus[2] = -1;
return statusPlus;
}
}
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl);
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
// Apply learning rate (fixed or flexible)
if (learning_rate != 1) {
cerr << "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl);
summedUpdate.MultiplyEquals(learning_rate);
cerr << "Rank " << rank << ", epoch " << epoch << ", update after applying learning rate: " << summedUpdate << endl;
}
// Apply threshold scaling
if (max_sentence_update != -1) {
cerr << "Rank " << rank << ", epoch " << epoch << ", update before scaling to max-sentence-update: " << summedUpdate << endl;
summedUpdate.ThresholdScaling(max_sentence_update);
cerr << "Rank " << rank << ", epoch " << epoch << ", update after scaling to max-sentence-update: " << summedUpdate << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update after applying learning rate: " << summedUpdate << endl);
}
// apply update to weight vector
cerr << "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl);
currWeights.PlusEquals(summedUpdate);
cerr << "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl);
vector<int> statusPlus(3);
statusPlus[0] = 0;
statusPlus[1] = violatedConstraintsBefore;
statusPlus[2] = violatedConstraintsAfter;
vector<int> statusPlus(2);
statusPlus[0] = violatedConstraintsBefore;
statusPlus[1] = violatedConstraintsAfter;
return statusPlus;
}
@ -371,26 +334,27 @@ vector<int> MiraOptimiser::updateWeightsAnalytically(ScoreComponentCollection& c
float bleuScoreFear,
size_t sentenceId,
float learning_rate,
float max_sentence_update,
size_t rank,
size_t epoch,
bool controlUpdates) {
size_t epoch) {
float epsilon = 0.0001;
float oldDistanceFromOptimum = 0;
bool constraintViolatedBefore = false;
ScoreComponentCollection weightUpdate;
cerr << "hope: " << featureValuesHope << endl;
cerr << "fear: " << featureValuesFear << endl;
// cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope << endl;
// cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear << endl;
ScoreComponentCollection featureValueDiff = featureValuesHope;
featureValueDiff.MinusEquals(featureValuesFear);
cerr << "hope - fear: " << featureValueDiff << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
float loss = bleuScoreHope - bleuScoreFear;
float diff = loss - (modelScoreDiff + m_precision);
// approximate comparison between floats
cerr << "constraint: " << (modelScoreDiff + m_precision) << " >= " << loss << endl;
float diff = 0;
if (loss > (modelScoreDiff + m_margin_slack)) {
diff = loss - (modelScoreDiff + m_margin_slack);
}
cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
if (diff > epsilon) {
// constraint violated
oldDistanceFromOptimum += diff;
@ -417,17 +381,16 @@ vector<int> MiraOptimiser::updateWeightsAnalytically(ScoreComponentCollection& c
weightUpdate.PlusEquals(featureValueDiff);
}
else {
cerr << "Rank " << rank << ", epoch " << epoch << ", no update because squared norm is 0, can only happen if oracle == hypothesis, are bleu scores equal as well?" << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", no update because squared norm is 0" << endl);
}
}
if (!constraintViolatedBefore) {
// constraint satisfied, nothing to do
cerr << "Rank " << rank << ", epoch " << epoch << ", check, constraint already satisfied" << endl;
vector<int> status(3);
status[0] = 1;
cerr << "Rank " << rank << ", epoch " << epoch << ", constraint already satisfied" << endl;
vector<int> status(2);
status[0] = 0;
status[1] = 0;
status[2] = 0;
return status;
}
@ -439,35 +402,25 @@ vector<int> MiraOptimiser::updateWeightsAnalytically(ScoreComponentCollection& c
featureValueDiff = featureValuesHope;
featureValueDiff.MinusEquals(featureValuesFear);
modelScoreDiff = featureValueDiff.InnerProduct(newWeights);
diff = loss - (modelScoreDiff + m_precision);
diff = loss - (modelScoreDiff + m_margin_slack);
// approximate comparison between floats!
if (diff > epsilon) {
constraintViolatedAfter = true;
newDistanceFromOptimum += (loss - (modelScoreDiff + m_precision));
newDistanceFromOptimum += (loss - modelScoreDiff);
}
cerr << "Rank " << rank << ", epoch " << epoch << ", check, constraint violated before? " << constraintViolatedBefore << ", after? " << constraintViolatedAfter << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl;
float distanceChange = oldDistanceFromOptimum - newDistanceFromOptimum;
if (controlUpdates && constraintViolatedAfter && distanceChange < 0) {
vector<int> statusPlus(3);
statusPlus[0] = -1;
statusPlus[1] = 1;
statusPlus[2] = 1;
return statusPlus;
}
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, constraint violated before? " << constraintViolatedBefore << ", after? " << constraintViolatedAfter << endl);
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
// apply update to weight vector
cerr << "Rank " << rank << ", weights before update: " << currWeights << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl);
currWeights.PlusEquals(weightUpdate);
cerr << "Rank " << rank << ", weights after update: " << currWeights << endl;
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl);
vector<int> statusPlus(3);
statusPlus[0] = 0;
statusPlus[1] = 1;
statusPlus[2] = constraintViolatedAfter ? 1 : 0;
return statusPlus;
vector<int> status(2);
status[0] = 1;
status[1] = constraintViolatedAfter ? 1 : 0;
return status;
}
}

View File

@ -36,11 +36,9 @@ namespace Mira {
const std::vector<std::vector<float> >& bleuScoresHope,
const std::vector<std::vector<float> >& bleuScoresFear,
const std::vector< size_t> sentenceIds,
float learning_rate,
float max_sentence_update,
size_t rank,
size_t epoch,
bool controlUpdates) = 0;
float learning_rate,
size_t rank,
size_t epoch) = 0;
};
class Perceptron : public Optimiser {
@ -52,10 +50,8 @@ namespace Mira {
const std::vector<std::vector<float> >& bleuScoresFear,
const std::vector< size_t> sentenceIds,
float learning_rate,
float max_sentence_update,
size_t rank,
size_t epoch,
bool controlUpdates);
size_t rank,
size_t epoch);
};
class MiraOptimiser : public Optimiser {
@ -63,13 +59,13 @@ namespace Mira {
MiraOptimiser() :
Optimiser() { }
MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin, bool scale_update, float precision) :
MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin, bool scale_update, float margin_slack) :
Optimiser(),
m_onlyViolatedConstraints(onlyViolatedConstraints),
m_slack(slack),
m_scale_margin(scale_margin),
m_scale_update(scale_update),
m_precision(precision) { }
m_margin_slack(margin_slack) { }
std::vector<int> updateWeightsAnalytically(Moses::ScoreComponentCollection& currWeights,
Moses::ScoreComponentCollection& featureValuesHope,
@ -78,10 +74,8 @@ namespace Mira {
float bleuScoresFear,
size_t sentenceId,
float learning_rate,
float max_sentence_update,
size_t rank,
size_t epoch,
bool controlUpdates);
size_t epoch);
std::vector<int> updateWeights(Moses::ScoreComponentCollection& currWeights,
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
const std::vector<std::vector<float> >& losses,
@ -89,27 +83,27 @@ namespace Mira {
const std::vector< Moses::ScoreComponentCollection>& oracleFeatureValues,
const std::vector< float> oracleBleuScores,
const std::vector< size_t> sentenceIds,
float learning_rate,
float max_sentence_update,
size_t rank,
size_t epoch,
bool controlUpdates);
float learning_rate,
size_t rank,
size_t epoch);
virtual std::vector<int> updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
const std::vector<std::vector<float> >& bleuScoresHope,
const std::vector<std::vector<float> >& bleuScoresFear,
const std::vector< size_t> sentenceIds,
float learning_rate,
float max_sentence_update,
size_t rank,
size_t epoch,
bool controlUpdates);
float learning_rate,
size_t rank,
size_t epoch);
void setSlack(float slack) {
m_slack = slack;
}
void setMarginSlack(float margin_slack) {
m_margin_slack = margin_slack;
}
private:
// add only violated constraints to the optimisation problem
@ -123,7 +117,7 @@ namespace Mira {
// scale update with log 10 of oracle BLEU score
bool m_scale_update;
float m_precision;
float m_margin_slack;
};
}

View File

@ -31,10 +31,8 @@ vector<int> Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeig
const vector< vector<float> >& dummy2,
const vector< size_t> dummy3,
float perceptron_learning_rate,
float dummy4,
size_t rank,
size_t epoch,
bool dummy5)
size_t epoch)
{
cerr << "hope: " << featureValuesHope[0][0] << endl;
cerr << "fear: " << featureValuesFear[0][0] << endl;