diff --git a/mira/Decoder.cpp b/mira/Decoder.cpp index e0505dcf4..756df14ba 100644 --- a/mira/Decoder.cpp +++ b/mira/Decoder.cpp @@ -127,7 +127,7 @@ namespace Mira { //std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl; float scoreWithoutBleu = path.GetTotalScore() - bleuObjectiveWeight * bleuScore; - std::cout << "Score w/o bleu: " << scoreWithoutBleu << ", bleu: " << bleuScore << endl; + cerr << "Score w/o bleu: " << scoreWithoutBleu << ", bleu: " << bleuScore << endl; // set bleu score to zero in the feature vector since we do not want to optimise its weight setBleuScore(featureValues.back(), 0); diff --git a/mira/Main.cpp b/mira/Main.cpp index d1edb8066..b5eba4e7a 100644 --- a/mira/Main.cpp +++ b/mira/Main.cpp @@ -156,13 +156,13 @@ int main(int argc, char** argv) { // TODO: initialise weights equally const vector featureFunctions = StaticData::Instance().GetTranslationSystem (TranslationSystem::DEFAULT).GetFeatureFunctions(); for (size_t i = 0; i < featureFunctions.size(); ++i) { - cout << "Feature functions: " << featureFunctions[i]->GetScoreProducerDescription() << ": " << featureFunctions[i]->GetNumScoreComponents() << endl; + cerr << "Feature functions: " << featureFunctions[i]->GetScoreProducerDescription() << ": " << featureFunctions[i]->GetNumScoreComponents() << endl; vector< float> weights = startWeights.GetScoresForProducer(featureFunctions[i]); - cout << "weights: "; + cerr << "weights: "; for (size_t j = 0; j < weights.size(); ++j) { cout << weights[j]; } - cout << endl; + cerr << endl; } //Optionally shuffle the sentences @@ -173,7 +173,7 @@ int main(int argc, char** argv) { } if (shuffle) { - cout << "Shuffling input sentences.." << endl; + cerr << "Shuffling input sentences.." << endl; RandomIndex rindex; random_shuffle(order.begin(), order.end(), rindex); } @@ -218,12 +218,12 @@ int main(int argc, char** argv) { time_t now = time(0); // get current time struct tm* tm = localtime(&now); // get struct filled out - cout << "Start date/time: " << tm->tm_mon+1 << "/" << tm->tm_mday << "/" << tm->tm_year + 1900 + cerr << "Start date/time: " << tm->tm_mon+1 << "/" << tm->tm_mday << "/" << tm->tm_year + 1900 << ", " << tm->tm_hour << ":" << tm->tm_min << ":" << tm->tm_sec << endl; // TODO: stop MIRA when score on dev or tuning set does not improve further? for (size_t epoch = 1; epoch <= epochs; ++epoch) { - cout << "\nEpoch " << epoch << std::endl; + cerr << "\nEpoch " << epoch << std::endl; size_t weightEpochDump = 0; //number of weight dumps this epoch @@ -237,14 +237,14 @@ int main(int argc, char** argv) { for (vector::const_iterator sid = shard.begin(); sid != shard.end(); ++sid) { const string& input = inputSentences[*sid]; const vector& refs = referenceSentences[*sid]; - cout << "Input sentence " << *sid << ": \"" << input << "\"" << std::endl; + cerr << "Input sentence " << *sid << ": \"" << input << "\"" << std::endl; // feature values for hypotheses i,j (matrix: batchSize x 3*n x featureValues) vector > featureValues(batchSize); vector > bleuScores(batchSize); // MODEL - cout << "Run decoder to get nbest wrt model score" << std::endl; + cerr << "Run decoder to get nbest wrt model score" << std::endl; decoder->getNBest(input, *sid, n, @@ -256,7 +256,7 @@ int main(int argc, char** argv) { decoder->cleanup(); // HOPE - cout << "Run decoder to get nbest hope translations" << std::endl; + cerr << "Run decoder to get nbest hope translations" << std::endl; size_t oraclePos = featureValues[batch].size(); vector oracle = decoder->getNBest(input, *sid, @@ -272,7 +272,7 @@ int main(int argc, char** argv) { float oracleBleuScore = bleuScores[batch][oraclePos]; // FEAR - cout << "Run decoder to get nbest fear translations" << std::endl; + cerr << "Run decoder to get nbest fear translations" << std::endl; decoder->getNBest(input, *sid, n, @@ -299,7 +299,7 @@ int main(int argc, char** argv) { ScoreComponentCollection oldWeights(mosesWeights); //run optimiser - cout << "Run optimiser.." << endl; + cerr << "Run optimiser.." << endl; optimiser->updateWeights(mosesWeights, featureValues, losses, oracleFeatureValues); //update moses weights @@ -385,7 +385,7 @@ int main(int argc, char** argv) { now = time(0); // get current time tm = localtime(&now); // get struct filled out - cout << "End date/time: " << tm->tm_mon+1 << "/" << tm->tm_mday << "/" << tm->tm_year + 1900 + cerr << "End date/time: " << tm->tm_mon+1 << "/" << tm->tm_mday << "/" << tm->tm_year + 1900 << ", " << tm->tm_hour << ":" << tm->tm_min << ":" << tm->tm_sec << endl; delete decoder; diff --git a/mira/MiraOptimiser.cpp b/mira/MiraOptimiser.cpp index 2e602456f..f7c3597b5 100644 --- a/mira/MiraOptimiser.cpp +++ b/mira/MiraOptimiser.cpp @@ -24,11 +24,11 @@ void MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, featureValueDiff.MinusEquals(featureValues[i][j]); float modelScoreDiff = featureValueDiff.InnerProduct(currWeights); if (modelScoreDiff < losses[i][j]) { - cerr << "Constraint violated: " << modelScoreDiff << " (modelScoreDiff) < " << losses[i][j] << " (loss)" << endl; + //cerr << "Constraint violated: " << modelScoreDiff << " (modelScoreDiff) < " << losses[i][j] << " (loss)" << endl; ++numberOfViolatedConstraints; } else { - cerr << "Constraint satisfied: " << modelScoreDiff << " (modelScoreDiff) >= " << losses[i][j] << " (loss)" << endl; + //cerr << "Constraint satisfied: " << modelScoreDiff << " (modelScoreDiff) >= " << losses[i][j] << " (loss)" << endl; } // Objective: 1/2 * ||w' - w||^2 + C * SUM_1_m[ max_1_n (l_ij - Delta_h_ij.w')] @@ -59,7 +59,7 @@ void MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, } } else { - cout << "No constraint violated for this batch" << endl; + cerr << "No constraint violated for this batch" << endl; } } else { @@ -71,11 +71,11 @@ void MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, if (j == m_n) { // TODO: use oracle index // oracle alphas[j] = m_c; - //std::cout << "alpha " << j << ": " << alphas[j] << endl; + //std::cerr << "alpha " << j << ": " << alphas[j] << endl; } else { alphas[j] = 0; - //std::cout << "alpha " << j << ": " << alphas[j] << endl; + //std::cerr << "alpha " << j << ": " << alphas[j] << endl; } } @@ -87,7 +87,7 @@ void MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, ++pairs; // Compute delta: - cout << "\nComparing pair" << j << "," << k << endl; + cerr << "\nComparing pair" << j << "," << k << endl; ScoreComponentCollection featureValueDiffs; float delta = computeDelta(currWeights, featureValues[i], j, k, losses[i], alphas, featureValueDiffs); @@ -99,7 +99,7 @@ void MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, } } - cout << "number of pairs: " << pairs << endl; + cerr << "number of pairs: " << pairs << endl; } } } @@ -128,7 +128,7 @@ float MiraOptimiser::computeDelta(ScoreComponentCollection& currWeights, featureValueDiffs = featureValuesHope; featureValueDiffs.MinusEquals(featureValuesFear); - cout << "feature value diffs: " << featureValueDiffs << endl; + cerr << "feature value diffs: " << featureValueDiffs << endl; squaredNorm = featureValueDiffs.InnerProduct(featureValueDiffs); diffOfModelScores = featureValueDiffs.InnerProduct(currWeights); @@ -140,15 +140,15 @@ float MiraOptimiser::computeDelta(ScoreComponentCollection& currWeights, // TODO: simplify and use BLEU scores of hypotheses directly? float lossDiff = losses[indexFear] - losses[indexHope]; delta = (lossDiff - diffOfModelScores) / squaredNorm; - cout << "delta: " << delta << endl; - cout << "loss diff - model diff: " << lossDiff << " - " << diffOfModelScores << endl; + cerr << "delta: " << delta << endl; + cerr << "loss diff - model diff: " << lossDiff << " - " << diffOfModelScores << endl; // clipping // fear translation: e_ij --> alpha_ij = alpha_ij + delta // hope translation: e_ij' --> alpha_ij' = alpha_ij' - delta // clipping interval: [-alpha_ij, alpha_ij'] // clip delta - cout << "Interval [" << (-1 * alphas[indexFear]) << "," << alphas[indexHope] << "]" << endl; + cerr << "Interval [" << (-1 * alphas[indexFear]) << "," << alphas[indexHope] << "]" << endl; if (delta > alphas[indexHope]) { //cout << "clipping " << delta << " to " << alphas[indexHope] << endl; delta = alphas[indexHope];