mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
code clean-up, step 1
git-svn-id: http://svn.statmt.org/repository/mira@3918 cc96ff50-19ce-11e0-b349-13d7f0bd23df
This commit is contained in:
parent
8e6c963041
commit
120be1df4f
@ -32,8 +32,6 @@ using namespace Moses;
|
||||
|
||||
namespace Mira {
|
||||
|
||||
//Decoder::~Decoder() {}
|
||||
|
||||
/**
|
||||
* Allocates a char* and copies string into it.
|
||||
**/
|
||||
@ -70,8 +68,8 @@ namespace Mira {
|
||||
|
||||
MosesDecoder::MosesDecoder(bool scaleByInputLength, float historySmoothing)
|
||||
: m_manager(NULL) {
|
||||
// force initialisation of the phrase dictionary (TODO: what for?)
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
// force initialisation of the phrase dictionary (TODO: why?)
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
m_sentence = new Sentence(Input);
|
||||
stringstream in("Initialising decoder..\n");
|
||||
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
|
||||
@ -176,61 +174,6 @@ namespace Mira {
|
||||
return best;
|
||||
}
|
||||
|
||||
vector<float> MosesDecoder::getBleuAndScore(const std::string& source,
|
||||
size_t sentenceid,
|
||||
float bleuObjectiveWeight,
|
||||
float bleuScoreWeight,
|
||||
bool distinct,
|
||||
size_t rank,
|
||||
size_t epoch)
|
||||
{
|
||||
StaticData &staticData = StaticData::InstanceNonConst();
|
||||
|
||||
m_sentence = new Sentence(Input);
|
||||
stringstream in(source + "\n");
|
||||
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
|
||||
m_sentence->Read(in,inputFactorOrder);
|
||||
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
|
||||
|
||||
// set the weight for the bleu feature
|
||||
ostringstream bleuWeightStr;
|
||||
bleuWeightStr << (bleuObjectiveWeight * bleuScoreWeight);
|
||||
PARAM_VEC bleuWeight(1,bleuWeightStr.str());
|
||||
|
||||
staticData.GetParameter()->OverwriteParam("weight-bl", bleuWeight);
|
||||
staticData.ReLoadBleuScoreFeatureParameter();
|
||||
|
||||
m_bleuScoreFeature->SetCurrentSourceLength((*m_sentence).GetSize());
|
||||
m_bleuScoreFeature->SetCurrentReference(sentenceid);
|
||||
|
||||
//run the decoder
|
||||
m_manager = new Moses::Manager(*m_sentence, staticData.GetSearchAlgorithm(), &system);
|
||||
m_manager->ProcessSentence();
|
||||
TrellisPathList sentences;
|
||||
m_manager->CalcNBest(1, sentences, distinct);
|
||||
|
||||
// read off the feature values and bleu scores for each sentence in the nbest list
|
||||
Moses::TrellisPathList::const_iterator iter = sentences.begin();
|
||||
vector<float> bleuAndScore;
|
||||
const Moses::TrellisPath &path = **iter;
|
||||
float bleuScore = getBleuScore(path.GetScoreBreakdown());
|
||||
float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
|
||||
bleuAndScore.push_back(bleuScore);
|
||||
bleuAndScore.push_back(scoreWithoutBleu);
|
||||
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", 1best translation: ");
|
||||
Phrase phrase = path.GetTargetPhrase();
|
||||
for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
|
||||
const Word &word = phrase.GetWord(pos);
|
||||
Word *newWord = new Word(word);
|
||||
VERBOSE(1, *newWord);
|
||||
}
|
||||
|
||||
VERBOSE(1, endl);
|
||||
|
||||
return bleuAndScore;
|
||||
}
|
||||
|
||||
size_t MosesDecoder::getCurrentInputLength() {
|
||||
return (*m_sentence).GetSize();
|
||||
}
|
||||
@ -270,27 +213,5 @@ namespace Mira {
|
||||
void MosesDecoder::printReferenceLength(const vector<size_t>& ref_ids) {
|
||||
m_bleuScoreFeature->PrintReferenceLength(ref_ids);
|
||||
}
|
||||
|
||||
vector<float> MosesDecoder::calculateBleuOfCorpus(const vector< vector< const Word*> >& words, vector<size_t>& ref_ids, size_t epoch, size_t rank) {
|
||||
vector<float> bleu = m_bleuScoreFeature->CalculateBleuOfCorpus(words, ref_ids);
|
||||
if (bleu.size() > 0) {
|
||||
cerr << "\nRank " << rank << ", BLEU after epoch " << epoch << ": " << bleu[4]*100 << ", "
|
||||
<< bleu[0]*100 << "/" << bleu[1]*100 << "/" << bleu[2]*100 << "/" << bleu[3]*100 << " "
|
||||
<< "(BP=" << bleu[5] << ", " << "ratio=" << bleu[6] << ", "
|
||||
<< "hyp_len=" << bleu[7] << ", ref_len=" << bleu[8] << ")" << endl;
|
||||
vector<float> bleuAndRatio(2);
|
||||
bleuAndRatio[0] = bleu[4]*100;
|
||||
bleuAndRatio[1] = bleu[6];
|
||||
return bleuAndRatio;
|
||||
}
|
||||
else {
|
||||
cerr << "\nRank " << rank << ", BLEU after epoch " << epoch << ": 0" << endl;
|
||||
vector<float> bleuAndRatio(2);
|
||||
bleuAndRatio[0] = 0;
|
||||
bleuAndRatio[1] = 0;
|
||||
return bleuAndRatio;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -64,20 +64,12 @@ class MosesDecoder {
|
||||
bool distinct,
|
||||
size_t rank,
|
||||
size_t epoch);
|
||||
std::vector<float> getBleuAndScore(const std::string& source,
|
||||
size_t sentenceid,
|
||||
float bleuObjectiveWeight,
|
||||
float bleuScoreWeight,
|
||||
bool distinct,
|
||||
size_t rank,
|
||||
size_t epoch);
|
||||
size_t getCurrentInputLength();
|
||||
void updateHistory(const std::vector<const Moses::Word*>& words);
|
||||
void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
|
||||
void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
|
||||
void printBleuFeatureHistory(std::ostream& out);
|
||||
void printReferenceLength(const std::vector<size_t>& ref_ids);
|
||||
std::vector<float> calculateBleuOfCorpus(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& ref_ids, size_t epoch, size_t rank);
|
||||
Moses::ScoreComponentCollection getWeights();
|
||||
void setWeights(const Moses::ScoreComponentCollection& weights);
|
||||
void cleanup();
|
||||
|
@ -5,187 +5,6 @@ using namespace std;
|
||||
|
||||
namespace Mira {
|
||||
|
||||
vector<FValue> Hildreth::optimise (const vector<FVector>& a, const vector<FValue>& b) {
|
||||
|
||||
size_t i;
|
||||
int max_iter = 10000;
|
||||
float eps = 0.00000001;
|
||||
float zero = 0.000000000001;
|
||||
|
||||
vector<FValue> alpha ( b.size() );
|
||||
vector<FValue> F ( b.size() );
|
||||
vector<FValue> kkt ( b.size() );
|
||||
|
||||
float max_kkt = -1e100;
|
||||
|
||||
size_t K = b.size();
|
||||
|
||||
float A[K][K];
|
||||
bool is_computed[K];
|
||||
for ( i = 0; i < K; i++ )
|
||||
{
|
||||
A[i][i] = a[i].inner_product(a[i]);
|
||||
is_computed[i] = false;
|
||||
}
|
||||
|
||||
int max_kkt_i = -1;
|
||||
|
||||
|
||||
for ( i = 0; i < b.size(); i++ )
|
||||
{
|
||||
F[i] = b[i];
|
||||
kkt[i] = F[i];
|
||||
if ( kkt[i] > max_kkt )
|
||||
{
|
||||
max_kkt = kkt[i];
|
||||
max_kkt_i = i;
|
||||
}
|
||||
}
|
||||
|
||||
int iter = 0;
|
||||
FValue diff_alpha;
|
||||
FValue try_alpha;
|
||||
FValue add_alpha;
|
||||
|
||||
while ( max_kkt >= eps && iter < max_iter )
|
||||
{
|
||||
|
||||
diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
|
||||
try_alpha = alpha[max_kkt_i] + diff_alpha;
|
||||
add_alpha = 0.0;
|
||||
|
||||
if ( try_alpha < 0.0 )
|
||||
add_alpha = -1.0 * alpha[max_kkt_i];
|
||||
else
|
||||
add_alpha = diff_alpha;
|
||||
|
||||
alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
|
||||
|
||||
if ( !is_computed[max_kkt_i] )
|
||||
{
|
||||
for ( i = 0; i < K; i++ )
|
||||
{
|
||||
A[i][max_kkt_i] = a[i].inner_product(a[max_kkt_i] ); // for version 1
|
||||
//A[i][max_kkt_i] = 0; // for version 1
|
||||
is_computed[max_kkt_i] = true;
|
||||
}
|
||||
}
|
||||
|
||||
for ( i = 0; i < F.size(); i++ )
|
||||
{
|
||||
F[i] -= add_alpha * A[i][max_kkt_i];
|
||||
kkt[i] = F[i];
|
||||
if ( alpha[i] > zero )
|
||||
kkt[i] = abs ( F[i] );
|
||||
}
|
||||
max_kkt = -1e100;
|
||||
max_kkt_i = -1;
|
||||
for ( i = 0; i < F.size(); i++ )
|
||||
if ( kkt[i] > max_kkt )
|
||||
{
|
||||
max_kkt = kkt[i];
|
||||
max_kkt_i = i;
|
||||
}
|
||||
|
||||
iter++;
|
||||
}
|
||||
|
||||
return alpha;
|
||||
}
|
||||
|
||||
vector<FValue> Hildreth::optimise (const vector<FVector>& a, const vector<FValue>& b, FValue C) {
|
||||
|
||||
size_t i;
|
||||
int max_iter = 10000;
|
||||
FValue eps = 0.00000001;
|
||||
FValue zero = 0.000000000001;
|
||||
|
||||
vector<FValue> alpha ( b.size() );
|
||||
vector<FValue> F ( b.size() );
|
||||
vector<FValue> kkt ( b.size() );
|
||||
|
||||
float max_kkt = -1e100;
|
||||
|
||||
size_t K = b.size();
|
||||
|
||||
float A[K][K];
|
||||
bool is_computed[K];
|
||||
for ( i = 0; i < K; i++ )
|
||||
{
|
||||
A[i][i] = a[i].inner_product(a[i]);
|
||||
is_computed[i] = false;
|
||||
}
|
||||
|
||||
int max_kkt_i = -1;
|
||||
|
||||
|
||||
for ( i = 0; i < b.size(); i++ )
|
||||
{
|
||||
F[i] = b[i];
|
||||
kkt[i] = F[i];
|
||||
if ( kkt[i] > max_kkt )
|
||||
{
|
||||
max_kkt = kkt[i];
|
||||
max_kkt_i = i;
|
||||
}
|
||||
}
|
||||
|
||||
int iter = 0;
|
||||
FValue diff_alpha;
|
||||
FValue try_alpha;
|
||||
FValue add_alpha;
|
||||
|
||||
while ( max_kkt >= eps && iter < max_iter )
|
||||
{
|
||||
|
||||
diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
|
||||
try_alpha = alpha[max_kkt_i] + diff_alpha;
|
||||
add_alpha = 0.0;
|
||||
|
||||
if ( try_alpha < 0.0 )
|
||||
add_alpha = -1.0 * alpha[max_kkt_i];
|
||||
else if (try_alpha > C)
|
||||
add_alpha = C - alpha[max_kkt_i];
|
||||
else
|
||||
add_alpha = diff_alpha;
|
||||
|
||||
alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
|
||||
|
||||
if ( !is_computed[max_kkt_i] )
|
||||
{
|
||||
for ( i = 0; i < K; i++ )
|
||||
{
|
||||
A[i][max_kkt_i] = a[i].inner_product(a[max_kkt_i] ); // for version 1
|
||||
//A[i][max_kkt_i] = 0; // for version 1
|
||||
is_computed[max_kkt_i] = true;
|
||||
}
|
||||
}
|
||||
|
||||
for ( i = 0; i < F.size(); i++ )
|
||||
{
|
||||
F[i] -= add_alpha * A[i][max_kkt_i];
|
||||
kkt[i] = F[i];
|
||||
if (alpha[i] > C - zero)
|
||||
kkt[i]=-kkt[i];
|
||||
else if (alpha[i] > zero)
|
||||
kkt[i] = abs(F[i]);
|
||||
|
||||
}
|
||||
max_kkt = -1e100;
|
||||
max_kkt_i = -1;
|
||||
for ( i = 0; i < F.size(); i++ )
|
||||
if ( kkt[i] > max_kkt )
|
||||
{
|
||||
max_kkt = kkt[i];
|
||||
max_kkt_i = i;
|
||||
}
|
||||
|
||||
iter++;
|
||||
}
|
||||
|
||||
return alpha;
|
||||
}
|
||||
|
||||
vector<FValue> Hildreth::optimise (const vector<ScoreComponentCollection>& a, const vector<FValue>& b) {
|
||||
|
||||
size_t i;
|
||||
|
@ -5,8 +5,6 @@ namespace Mira {
|
||||
|
||||
class Hildreth {
|
||||
public :
|
||||
static std::vector<Moses::FValue> optimise (const std::vector<Moses::FVector>& a, const std::vector<Moses::FValue>& b );
|
||||
static std::vector<Moses::FValue> optimise (const std::vector<Moses::FVector>& a, const std::vector<Moses::FValue>& b, Moses::FValue C);
|
||||
static std::vector<Moses::FValue> optimise (const std::vector<Moses::ScoreComponentCollection>& a, const std::vector<Moses::FValue>& b );
|
||||
static std::vector<Moses::FValue> optimise (const std::vector<Moses::ScoreComponentCollection>& a, const std::vector<Moses::FValue>& b, Moses::FValue C);
|
||||
};
|
||||
|
@ -166,7 +166,6 @@ int main(int argc, char** argv) {
|
||||
string decoder_settings;
|
||||
float min_weight_change;
|
||||
float decrease_learning_rate;
|
||||
bool devBleu;
|
||||
bool normaliseWeights;
|
||||
bool print_feature_values;
|
||||
bool historyOf1best;
|
||||
@ -178,7 +177,6 @@ int main(int argc, char** argv) {
|
||||
float bleuScoreWeight;
|
||||
float margin_slack;
|
||||
float margin_slack_incr;
|
||||
bool analytical_update;
|
||||
bool perceptron_update;
|
||||
bool hope_fear;
|
||||
bool model_hope_fear;
|
||||
@ -189,7 +187,6 @@ int main(int argc, char** argv) {
|
||||
desc.add_options()
|
||||
("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
|
||||
("adapt-after-epoch", po::value<size_t>(&adapt_after_epoch)->default_value(0), "Index of epoch after which adaptive parameters will be adapted")
|
||||
("analytical-update", po::value<bool>(&analytical_update)->default_value(0), "Use one best lists and compute the update analytically")
|
||||
("average-weights", po::value<bool>(&averageWeights)->default_value(false), "Set decoder weights to average weights after each update")
|
||||
("base-of-log", po::value<size_t>(&baseOfLog)->default_value(10), "Base for log-ing feature values")
|
||||
("batch-size,b", po::value<size_t>(&batchSize)->default_value(1), "Size of batch that is send to optimiser for weight adjustments")
|
||||
@ -201,9 +198,7 @@ int main(int argc, char** argv) {
|
||||
("core-weights", po::value<string>(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)")
|
||||
("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
|
||||
("decr-learning-rate", po::value<float>(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch")
|
||||
("dev-bleu", po::value<bool>(&devBleu)->default_value(true), "Compute BLEU score of oracle translations of the whole tuning set")
|
||||
("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use nbest list with distinct translations in inference step")
|
||||
("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights, when using mpi")
|
||||
("epochs,e", po::value<size_t>(&epochs)->default_value(10), "Number of epochs")
|
||||
("fear-n", po::value<int>(&fear_n)->default_value(-1), "Number of fear translations used")
|
||||
("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
|
||||
@ -214,12 +209,12 @@ int main(int argc, char** argv) {
|
||||
("hope-n", po::value<int>(&hope_n)->default_value(-1), "Number of hope translations used")
|
||||
("input-file,i", po::value<string>(&inputFile), "Input file containing tokenised source")
|
||||
("learner,l", po::value<string>(&learner)->default_value("mira"), "Learning algorithm")
|
||||
("margin-slack", po::value<float>(&margin_slack)->default_value(0), "Slack when comparing left and right hand side of constraints")
|
||||
("margin-incr", po::value<float>(&margin_slack_incr)->default_value(0), "Increment margin slack after every epoch by this amount")
|
||||
("mira-learning-rate", po::value<float>(&mira_learning_rate)->default_value(1), "Learning rate for MIRA (fixed or flexible)")
|
||||
("log-feature-values", po::value<bool>(&logFeatureValues)->default_value(false), "Take log of feature values according to the given base.")
|
||||
("margin-incr", po::value<float>(&margin_slack_incr)->default_value(0), "Increment margin slack after every epoch by this amount")
|
||||
("margin-slack", po::value<float>(&margin_slack)->default_value(0), "Slack when comparing left and right hand side of constraints")
|
||||
("min-learning-rate", po::value<float>(&min_learning_rate)->default_value(0), "Set a minimum learning rate")
|
||||
("min-weight-change", po::value<float>(&min_weight_change)->default_value(0.01), "Set minimum weight change for stopping criterion")
|
||||
("mira-learning-rate", po::value<float>(&mira_learning_rate)->default_value(1), "Learning rate for MIRA (fixed or flexible)")
|
||||
("mixing-frequency", po::value<size_t>(&mixingFrequency)->default_value(5), "How often per epoch to mix weights, when using mpi")
|
||||
("model-hope-fear", po::value<bool>(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimization")
|
||||
("nbest,n", po::value<size_t>(&n)->default_value(1), "Number of translations in nbest list")
|
||||
@ -229,6 +224,8 @@ int main(int argc, char** argv) {
|
||||
("print-feature-values", po::value<bool>(&print_feature_values)->default_value(false), "Print out feature values")
|
||||
("reference-files,r", po::value<vector<string> >(&referenceFiles), "Reference translation files for training")
|
||||
("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths")
|
||||
("scale-margin", po::value<size_t>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
|
||||
("scale-update", po::value<bool>(&scale_update)->default_value(false), "Scale the update by the Bleu score of the oracle translation")
|
||||
("sentence-level-bleu", po::value<bool>(&sentenceLevelBleu)->default_value(true), "Use a sentences level bleu scoring function")
|
||||
("shuffle", po::value<bool>(&shuffle)->default_value(false), "Shuffle input sentences before processing")
|
||||
("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimizer")
|
||||
@ -236,8 +233,7 @@ int main(int argc, char** argv) {
|
||||
("slack-step", po::value<float>(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided")
|
||||
("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
|
||||
("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
|
||||
("scale-margin", po::value<size_t>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
|
||||
("scale-update", po::value<bool>(&scale_update)->default_value(false), "Scale the update by the Bleu score of the oracle translation")
|
||||
("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights, when using mpi")
|
||||
("weight-dump-stem", po::value<string>(&weightDumpStem)->default_value("weights"), "Stem of filename to use for dumping weights");
|
||||
|
||||
po::options_description cmdline_options;
|
||||
@ -355,42 +351,31 @@ int main(int argc, char** argv) {
|
||||
perceptron_update = true;
|
||||
model_hope_fear = false; // mira only
|
||||
hope_fear = false; // mira only
|
||||
analytical_update = false; // mira only
|
||||
} else {
|
||||
cerr << "Error: Unknown optimiser: " << learner << endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// resolve parameter dependencies
|
||||
if (perceptron_update || analytical_update) {
|
||||
if (batchSize > 1 && perceptron_update) {
|
||||
batchSize = 1;
|
||||
cerr << "Info: Setting batch size to 1 for perceptron/analytical update" << endl;
|
||||
cerr << "Info: Setting batch size to 1 for perceptron update" << endl;
|
||||
}
|
||||
|
||||
if (hope_n == -1 && fear_n == -1) {
|
||||
hope_n = n;
|
||||
fear_n = n;
|
||||
}
|
||||
|
||||
if ((model_hope_fear || analytical_update) && hope_fear) {
|
||||
if (model_hope_fear && hope_fear) {
|
||||
hope_fear = false; // is true by default
|
||||
}
|
||||
|
||||
if (!hope_fear && !analytical_update) {
|
||||
if (!hope_fear) {
|
||||
model_hope_fear = true;
|
||||
}
|
||||
|
||||
if (model_hope_fear && analytical_update) {
|
||||
cerr << "Error: Must choose between model-hope-fear and analytical update" << endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!sentenceLevelBleu) {
|
||||
if (!historyOf1best && !historyOfOracles) {
|
||||
historyOf1best = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (burnIn && sentenceLevelBleu) {
|
||||
burnIn = false;
|
||||
cerr << "Info: Burn-in not needed when using sentence-level BLEU, deactivating burn-in." << endl;
|
||||
@ -545,7 +530,6 @@ int main(int argc, char** argv) {
|
||||
int sumStillViolatedConstraints_lastEpoch = 0;
|
||||
int sumConstraintChangeAbs;
|
||||
int sumConstraintChangeAbs_lastEpoch = 0;
|
||||
// size_t sumBleuChangeAbs;
|
||||
float *sendbuf, *recvbuf;
|
||||
sendbuf = (float *) malloc(sizeof(float));
|
||||
recvbuf = (float *) malloc(sizeof(float));
|
||||
@ -553,7 +537,6 @@ int main(int argc, char** argv) {
|
||||
// sum of violated constraints
|
||||
sumStillViolatedConstraints = 0;
|
||||
sumConstraintChangeAbs = 0;
|
||||
// sumBleuChangeAbs = 0;
|
||||
|
||||
numberOfUpdatesThisEpoch = 0;
|
||||
// Sum up weights over one epoch, final average uses weights from last epoch
|
||||
@ -619,7 +602,7 @@ int main(int argc, char** argv) {
|
||||
dummyFeatureValues.push_back(newFeatureValues);
|
||||
dummyBleuScores.push_back(newBleuScores);
|
||||
|
||||
if (perceptron_update || analytical_update) {
|
||||
if (perceptron_update) {
|
||||
if (historyOf1best) {
|
||||
// MODEL (for updating the history)
|
||||
cerr << "Rank " << rank << ", run decoder to get 1best wrt model score (for history)" << endl;
|
||||
@ -778,15 +761,6 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
}
|
||||
|
||||
/* // get 1best model results with old weights
|
||||
vector< vector <float > > bestModelOld_batch;
|
||||
for (size_t i = 0; i < actualBatchSize; ++i) {
|
||||
string& input = inputSentences[*current_sid_start + i];
|
||||
vector <float> bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest, rank, epoch);
|
||||
bestModelOld_batch.push_back(bestModelOld);
|
||||
decoder->cleanup();
|
||||
}*/
|
||||
|
||||
// optionally print out the feature values
|
||||
if (print_feature_values) {
|
||||
cerr << "\nRank " << rank << ", epoch " << epoch << ", feature values: " << endl;
|
||||
@ -823,14 +797,9 @@ int main(int argc, char** argv) {
|
||||
vector<vector<float> > dummy1;
|
||||
vector<size_t> dummy2;
|
||||
update_status = optimiser->updateWeightsHopeFear(mosesWeights,
|
||||
featureValuesHope, featureValuesFear, dummy1, dummy1, dummy2,
|
||||
featureValuesHope, featureValuesFear, dummy1, dummy1, dummy2,
|
||||
learning_rate, rank, epoch);
|
||||
}
|
||||
else if (analytical_update) {
|
||||
update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically(mosesWeights,
|
||||
featureValuesHope[0][0], featureValuesFear[0][0], bleuScoresHope[0][0], bleuScoresFear[0][0],
|
||||
ref_ids[0], learning_rate, rank, epoch);
|
||||
}
|
||||
else {
|
||||
if (hope_fear) {
|
||||
if (coreWeightMap.size() > 0) {
|
||||
@ -859,7 +828,7 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
update_status = optimiser->updateWeightsHopeFear(mosesWeights,
|
||||
featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear, ref_ids,
|
||||
featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear, ref_ids,
|
||||
learning_rate, rank, epoch);
|
||||
}
|
||||
else {
|
||||
@ -900,17 +869,6 @@ int main(int argc, char** argv) {
|
||||
weightDifference.MinusEquals(oldWeights);
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weight difference: " << weightDifference << endl);
|
||||
|
||||
/* // get 1best model results with new weights (for each sentence in batch)
|
||||
vector<float> bestModelNew;
|
||||
for (size_t i = 0; i < actualBatchSize; ++i) {
|
||||
string& input = inputSentences[*current_sid_start + i];
|
||||
bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest, rank, epoch);
|
||||
decoder->cleanup();
|
||||
sumBleuChangeAbs += abs(bestModelOld_batch[i][0] - bestModelNew[0]);
|
||||
VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", 1best model bleu, old: " << bestModelOld_batch[i][0] << ", new: " << bestModelNew[0] << endl);
|
||||
VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", 1best model score, old: " << bestModelOld_batch[i][1] << ", new: " << bestModelNew[1] << endl);
|
||||
}*/
|
||||
|
||||
// update history (for approximate document Bleu)
|
||||
if (sentenceLevelBleu) {
|
||||
for (size_t i = 0; i < oracles.size(); ++i) {
|
||||
|
@ -110,7 +110,7 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
ScoreComponentCollection update(featureValueDiffs[k]);
|
||||
update.MultiplyEquals(alpha);
|
||||
|
||||
// sum up update
|
||||
// sum updates
|
||||
summedUpdate.PlusEquals(update);
|
||||
}
|
||||
}
|
||||
@ -122,24 +122,6 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
return status;
|
||||
}
|
||||
|
||||
ScoreComponentCollection newWeights(currWeights);
|
||||
newWeights.PlusEquals(summedUpdate);
|
||||
|
||||
// Sanity check: are there still violated constraints after optimisation?
|
||||
int violatedConstraintsAfter = 0;
|
||||
float newDistanceFromOptimum = 0;
|
||||
for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
|
||||
float modelScoreDiff = featureValueDiffs[i].InnerProduct(newWeights);
|
||||
float loss = all_losses[i];
|
||||
float diff = loss - (modelScoreDiff + m_margin_slack);
|
||||
if (diff > epsilon) {
|
||||
++violatedConstraintsAfter;
|
||||
newDistanceFromOptimum += diff;
|
||||
}
|
||||
}
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl);
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
|
||||
|
||||
// apply learning rate
|
||||
if (learning_rate != 1) {
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl);
|
||||
@ -158,6 +140,21 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
currWeights.PlusEquals(summedUpdate);
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl);
|
||||
|
||||
// Sanity check: are there still violated constraints after optimisation?
|
||||
int violatedConstraintsAfter = 0;
|
||||
float newDistanceFromOptimum = 0;
|
||||
for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
|
||||
float modelScoreDiff = featureValueDiffs[i].InnerProduct(currWeights);
|
||||
float loss = all_losses[i];
|
||||
float diff = loss - (modelScoreDiff + m_margin_slack);
|
||||
if (diff > epsilon) {
|
||||
++violatedConstraintsAfter;
|
||||
newDistanceFromOptimum += diff;
|
||||
}
|
||||
}
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl);
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
|
||||
|
||||
vector<int> status(2);
|
||||
status[0] = violatedConstraintsBefore;
|
||||
status[1] = violatedConstraintsAfter;
|
||||
@ -291,25 +288,7 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
|
||||
return status;
|
||||
}
|
||||
|
||||
ScoreComponentCollection newWeights(currWeights);
|
||||
newWeights.PlusEquals(summedUpdate);
|
||||
|
||||
// Sanity check: are there still violated constraints after optimisation?
|
||||
int violatedConstraintsAfter = 0;
|
||||
float newDistanceFromOptimum = 0;
|
||||
for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
|
||||
float modelScoreDiff = featureValueDiffs[i].InnerProduct(newWeights);
|
||||
float loss = all_losses[i];
|
||||
float diff = loss - (modelScoreDiff + m_margin_slack);
|
||||
if (diff > epsilon) {
|
||||
++violatedConstraintsAfter;
|
||||
newDistanceFromOptimum += diff;
|
||||
}
|
||||
}
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl);
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
|
||||
|
||||
// Apply learning rate (fixed or flexible)
|
||||
// apply learning rate
|
||||
if (learning_rate != 1) {
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl);
|
||||
summedUpdate.MultiplyEquals(learning_rate);
|
||||
@ -321,107 +300,27 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
|
||||
currWeights.PlusEquals(summedUpdate);
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl);
|
||||
|
||||
// Sanity check: are there still violated constraints after optimisation?
|
||||
int violatedConstraintsAfter = 0;
|
||||
float newDistanceFromOptimum = 0;
|
||||
for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
|
||||
float modelScoreDiff = featureValueDiffs[i].InnerProduct(currWeights);
|
||||
float loss = all_losses[i];
|
||||
float diff = loss - (modelScoreDiff + m_margin_slack);
|
||||
if (diff > epsilon) {
|
||||
++violatedConstraintsAfter;
|
||||
newDistanceFromOptimum += diff;
|
||||
}
|
||||
}
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl);
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
|
||||
|
||||
|
||||
vector<int> statusPlus(2);
|
||||
statusPlus[0] = violatedConstraintsBefore;
|
||||
statusPlus[1] = violatedConstraintsAfter;
|
||||
return statusPlus;
|
||||
}
|
||||
|
||||
vector<int> MiraOptimiser::updateWeightsAnalytically(ScoreComponentCollection& currWeights,
|
||||
ScoreComponentCollection& featureValuesHope,
|
||||
ScoreComponentCollection& featureValuesFear,
|
||||
float bleuScoreHope,
|
||||
float bleuScoreFear,
|
||||
size_t sentenceId,
|
||||
float learning_rate,
|
||||
size_t rank,
|
||||
size_t epoch) {
|
||||
|
||||
float epsilon = 0.0001;
|
||||
float oldDistanceFromOptimum = 0;
|
||||
bool constraintViolatedBefore = false;
|
||||
ScoreComponentCollection weightUpdate;
|
||||
|
||||
// cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope << endl;
|
||||
// cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear << endl;
|
||||
ScoreComponentCollection featureValueDiff = featureValuesHope;
|
||||
featureValueDiff.MinusEquals(featureValuesFear);
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
|
||||
float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
|
||||
float loss = bleuScoreHope - bleuScoreFear;
|
||||
float diff = 0;
|
||||
if (loss > (modelScoreDiff + m_margin_slack)) {
|
||||
diff = loss - (modelScoreDiff + m_margin_slack);
|
||||
}
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
|
||||
|
||||
if (diff > epsilon) {
|
||||
// constraint violated
|
||||
oldDistanceFromOptimum += diff;
|
||||
constraintViolatedBefore = true;
|
||||
|
||||
// compute alpha for given constraint: (loss - model score diff) / || feature value diff ||^2
|
||||
// featureValueDiff.GetL2Norm() * featureValueDiff.GetL2Norm() == featureValueDiff.InnerProduct(featureValueDiff)
|
||||
// from Crammer&Singer 2006: alpha = min {C , l_t/ ||x||^2}
|
||||
float squaredNorm = featureValueDiff.GetL2Norm() * featureValueDiff.GetL2Norm();
|
||||
|
||||
if (squaredNorm > 0) {
|
||||
float alpha = diff / squaredNorm;
|
||||
if (m_slack > 0 ) {
|
||||
if (alpha > m_slack) {
|
||||
alpha = m_slack;
|
||||
}
|
||||
else if (alpha < m_slack*(-1)) {
|
||||
alpha = m_slack*(-1);
|
||||
}
|
||||
}
|
||||
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl;
|
||||
featureValueDiff.MultiplyEquals(alpha);
|
||||
weightUpdate.PlusEquals(featureValueDiff);
|
||||
}
|
||||
else {
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", no update because squared norm is 0" << endl);
|
||||
}
|
||||
}
|
||||
|
||||
if (!constraintViolatedBefore) {
|
||||
// constraint satisfied, nothing to do
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", constraint already satisfied" << endl;
|
||||
vector<int> status(2);
|
||||
status[0] = 0;
|
||||
status[1] = 0;
|
||||
return status;
|
||||
}
|
||||
|
||||
// sanity check: constraint still violated after optimisation?
|
||||
ScoreComponentCollection newWeights(currWeights);
|
||||
newWeights.PlusEquals(weightUpdate);
|
||||
bool constraintViolatedAfter = false;
|
||||
float newDistanceFromOptimum = 0;
|
||||
featureValueDiff = featureValuesHope;
|
||||
featureValueDiff.MinusEquals(featureValuesFear);
|
||||
modelScoreDiff = featureValueDiff.InnerProduct(newWeights);
|
||||
diff = loss - (modelScoreDiff + m_margin_slack);
|
||||
// approximate comparison between floats!
|
||||
if (diff > epsilon) {
|
||||
constraintViolatedAfter = true;
|
||||
newDistanceFromOptimum += (loss - modelScoreDiff);
|
||||
}
|
||||
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, constraint violated before? " << constraintViolatedBefore << ", after? " << constraintViolatedAfter << endl);
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
|
||||
|
||||
// apply update to weight vector
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl);
|
||||
currWeights.PlusEquals(weightUpdate);
|
||||
VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl);
|
||||
|
||||
vector<int> status(2);
|
||||
status[0] = 1;
|
||||
status[1] = constraintViolatedAfter ? 1 : 0;
|
||||
return status;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -67,15 +67,6 @@ namespace Mira {
|
||||
m_scale_update(scale_update),
|
||||
m_margin_slack(margin_slack) { }
|
||||
|
||||
std::vector<int> updateWeightsAnalytically(Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& featureValuesHope,
|
||||
Moses::ScoreComponentCollection& featureValuesFear,
|
||||
float bleuScoresHope,
|
||||
float bleuScoresFear,
|
||||
size_t sentenceId,
|
||||
float learning_rate,
|
||||
size_t rank,
|
||||
size_t epoch);
|
||||
std::vector<int> updateWeights(Moses::ScoreComponentCollection& currWeights,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
|
||||
const std::vector<std::vector<float> >& losses,
|
||||
@ -117,6 +108,7 @@ namespace Mira {
|
||||
// scale update with log 10 of oracle BLEU score
|
||||
bool m_scale_update;
|
||||
|
||||
// slack when comparing losses to model scores
|
||||
float m_margin_slack;
|
||||
};
|
||||
}
|
||||
|
@ -45,7 +45,6 @@ vector<int> Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeig
|
||||
vector<int> update_status;
|
||||
update_status.push_back(0);
|
||||
update_status.push_back(0);
|
||||
update_status.push_back(0);
|
||||
return update_status;
|
||||
}
|
||||
|
||||
|
@ -94,14 +94,12 @@ void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::strin
|
||||
for (size_t order = 1; order <= BleuScoreState::bleu_order; order++) {
|
||||
for (size_t end_idx = order; end_idx <= refTokens.size(); end_idx++) {
|
||||
Phrase ngram(Output);
|
||||
//cerr << "start: " << end_idx-order << " end: " << end_idx << endl;
|
||||
for (size_t s_idx = end_idx - order; s_idx < end_idx; s_idx++) {
|
||||
const Factor* f = fc.AddFactor(Output, 0, refTokens[s_idx]);
|
||||
Word w;
|
||||
w.SetFactor(0, f);
|
||||
ngram.AddWord(w);
|
||||
}
|
||||
//cerr << "Ref: " << ngram << endl;
|
||||
ref_pair.second[ngram] += 1;
|
||||
}
|
||||
}
|
||||
@ -120,10 +118,10 @@ void BleuScoreFeature::SetCurrentReference(size_t ref_id) {
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the pseudo-document big_O after each translation of a source sentence.
|
||||
* (big_O is an exponentially-weighted moving average of vectors c(e;{r_k}))
|
||||
* big_O = 0.9 * (big_O + c(e_oracle))
|
||||
* big_O_f = 0.9 * (big_O_f + |f|) input length of document big_O
|
||||
* Update the pseudo-document O after each translation of a source sentence.
|
||||
* (O is an exponentially-weighted moving average of vectors c(e;{r_k}))
|
||||
* O = m_historySmoothing * (O + c(e_oracle))
|
||||
* O_f = m_historySmoothing * (O_f + |f|) input length of pseudo-document
|
||||
*/
|
||||
void BleuScoreFeature::UpdateHistory(const vector< const Word* >& hypo) {
|
||||
Phrase phrase(Output, hypo);
|
||||
@ -138,7 +136,6 @@ void BleuScoreFeature::UpdateHistory(const vector< const Word* >& hypo) {
|
||||
for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
|
||||
m_count_history[i] = m_historySmoothing * (m_count_history[i] + ngram_counts[i]);
|
||||
m_match_history[i] = m_historySmoothing * (m_match_history[i] + ngram_matches[i]);
|
||||
//cerr << "precisionHistory " << i + 1 << ": " << (m_match_history[i]/m_count_history[i]) << " (" << m_match_history[i] << "/" << m_count_history[i] << ")" << endl;
|
||||
}
|
||||
|
||||
// update counts for reference and target length
|
||||
@ -148,7 +145,7 @@ void BleuScoreFeature::UpdateHistory(const vector< const Word* >& hypo) {
|
||||
}
|
||||
|
||||
/*
|
||||
* Update history with a batch of oracle translations
|
||||
* Update history with a batch of translations
|
||||
*/
|
||||
void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypos, vector<size_t>& sourceLengths, vector<size_t>& ref_ids, size_t rank, size_t epoch) {
|
||||
for (size_t batchPosition = 0; batchPosition < hypos.size(); ++batchPosition){
|
||||
@ -195,7 +192,7 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
|
||||
}
|
||||
|
||||
/*
|
||||
* Update history with a batch of oracle translations
|
||||
* Print batch of reference translations
|
||||
*/
|
||||
void BleuScoreFeature::PrintReferenceLength(const vector<size_t>& ref_ids) {
|
||||
for (size_t batchPosition = 0; batchPosition < ref_ids.size(); ++batchPosition){
|
||||
@ -325,7 +322,6 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
|
||||
}
|
||||
|
||||
new_state->m_source_length = cur_hypo.GetWordsBitmap().GetSize();
|
||||
new_state->m_source_phrase_length = cur_hypo.GetCurrSourceWordsRange().GetNumWordsCovered(); // todo: delete
|
||||
new_state->m_words = new_words.GetSubString(WordsRange(ctx_start_idx,
|
||||
ctx_end_idx));
|
||||
new_state->m_target_length += cur_hypo.GetTargetPhrase().GetSize();
|
||||
@ -337,7 +333,6 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
|
||||
|
||||
// Calculate new bleu.
|
||||
new_bleu = CalculateBleu(new_state);
|
||||
//cerr << "NS: " << *new_state << " NB " << new_bleu << endl;
|
||||
|
||||
// Set score to new Bleu score
|
||||
accumulator->PlusEquals(this, new_bleu - old_bleu);
|
||||
@ -396,82 +391,6 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
|
||||
return precision;
|
||||
}
|
||||
|
||||
vector<float> BleuScoreFeature::CalculateBleuOfCorpus(const vector< vector< const Word* > >& oracles, const vector<size_t>& ref_ids) {
|
||||
// get ngram matches and counts for all oracle sentences and their references
|
||||
vector<size_t> sumOfClippedNgramMatches(BleuScoreState::bleu_order);
|
||||
vector<size_t> sumOfNgramCounts(BleuScoreState::bleu_order);
|
||||
size_t ref_length = 0;
|
||||
size_t target_length = 0;
|
||||
|
||||
for (size_t batchPosition = 0; batchPosition < oracles.size(); ++batchPosition){
|
||||
Phrase phrase(Output, oracles[batchPosition]);
|
||||
size_t ref_id = ref_ids[batchPosition];
|
||||
size_t cur_ref_length = m_refs[ref_id].first;
|
||||
NGrams cur_ref_ngrams = m_refs[ref_id].second;
|
||||
|
||||
ref_length += cur_ref_length;
|
||||
target_length += oracles[batchPosition].size();
|
||||
|
||||
std::vector< size_t > ngram_counts(BleuScoreState::bleu_order);
|
||||
std::vector< size_t > clipped_ngram_matches(BleuScoreState::bleu_order);
|
||||
GetClippedNgramMatchesAndCounts(phrase, cur_ref_ngrams, ngram_counts, clipped_ngram_matches, 0);
|
||||
|
||||
// add clipped ngram matches and ngram counts to corpus sums
|
||||
for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
|
||||
sumOfClippedNgramMatches[i] += clipped_ngram_matches[i];
|
||||
sumOfNgramCounts[i] += ngram_counts[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (!sumOfNgramCounts[0]) {
|
||||
vector<float> empty(0);
|
||||
return empty;
|
||||
}
|
||||
if (!sumOfClippedNgramMatches[0]) {
|
||||
vector<float> empty(0);
|
||||
return empty; // if we have no unigram matches, score should be 0
|
||||
}
|
||||
|
||||
// calculate bleu score
|
||||
float precision = 1.0;
|
||||
|
||||
vector<float> bleu;
|
||||
// Calculate geometric mean of modified ngram precisions
|
||||
// BLEU = BP * exp(SUM_1_4 1/4 * log p_n)
|
||||
// = BP * 4th root(PRODUCT_1_4 p_n)
|
||||
for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
|
||||
if (sumOfNgramCounts[i]) {
|
||||
precision *= 1.0*sumOfClippedNgramMatches[i] / sumOfNgramCounts[i];
|
||||
bleu.push_back(1.0*sumOfClippedNgramMatches[i] / sumOfNgramCounts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// take geometric mean
|
||||
precision = pow(precision, (float)1/4);
|
||||
|
||||
// Apply brevity penalty if applicable.
|
||||
// BP = 1 if c > r
|
||||
// BP = e^(1- r/c)) if c <= r
|
||||
// where
|
||||
// c: length of the candidate translation
|
||||
// r: effective reference length (sum of best match lengths for each candidate sentence)
|
||||
float BP;
|
||||
if (target_length < ref_length) {
|
||||
precision *= exp(1 - (1.0*ref_length/target_length));
|
||||
BP = exp(1 - (1.0*ref_length/target_length));
|
||||
}
|
||||
else {
|
||||
BP = 1.0;
|
||||
}
|
||||
|
||||
bleu.push_back(precision);
|
||||
bleu.push_back(BP);
|
||||
bleu.push_back(1.0*target_length/ref_length);
|
||||
bleu.push_back(target_length);
|
||||
bleu.push_back(ref_length);
|
||||
return bleu;
|
||||
}
|
||||
|
||||
const FFState* BleuScoreFeature::EmptyHypothesisState(const InputType& input) const
|
||||
{
|
||||
return new BleuScoreState();
|
||||
|
@ -29,8 +29,6 @@ private:
|
||||
size_t m_source_length;
|
||||
size_t m_target_length;
|
||||
|
||||
size_t m_source_phrase_length; // todo: delete
|
||||
|
||||
// scaled reference length is needed for scoring incomplete hypotheses against reference translation
|
||||
float m_scaled_ref_length;
|
||||
|
||||
@ -52,7 +50,7 @@ public:
|
||||
m_target_length_history(0),
|
||||
m_ref_length_history(0),
|
||||
m_scale_by_input_length(true),
|
||||
m_historySmoothing(0.9) {}
|
||||
m_historySmoothing(0.7) {}
|
||||
|
||||
BleuScoreFeature(bool scaleByInputLength, float historySmoothing):
|
||||
StatefulFeatureFunction("BleuScore"),
|
||||
@ -101,11 +99,10 @@ public:
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
float CalculateBleu(BleuScoreState*) const;
|
||||
std::vector<float> CalculateBleuOfCorpus(const std::vector< std::vector< const Word* > >& hypos, const std::vector<size_t>& ref_ids);
|
||||
const FFState* EmptyHypothesisState(const InputType&) const;
|
||||
|
||||
private:
|
||||
// counts for pseudo-document big_O
|
||||
// counts for pseudo-document
|
||||
std::vector< float > m_count_history;
|
||||
std::vector< float > m_match_history;
|
||||
float m_source_length_history;
|
||||
@ -117,9 +114,10 @@ private:
|
||||
NGrams m_cur_ref_ngrams;
|
||||
size_t m_cur_ref_length;
|
||||
|
||||
// whether or not to scale the BLEU score by a history of the input size
|
||||
// scale BLEU score by history of input size
|
||||
bool m_scale_by_input_length;
|
||||
|
||||
// smoothing factor for history counts
|
||||
float m_historySmoothing;
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user