mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-27 03:49:57 +03:00
Merge from miramerge.
Reverted ChartHypothesis as it breaks chart-decoding.
This commit is contained in:
parent
ced24a881d
commit
1e10bb7ef7
@ -184,9 +184,9 @@ namespace Mira {
|
||||
m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch);
|
||||
}
|
||||
|
||||
void MosesDecoder::loadReferenceSentences(const vector<vector<string> >& refs) {
|
||||
/* void MosesDecoder::loadReferenceSentences(const vector<vector<string> >& refs) {
|
||||
m_bleuScoreFeature->LoadReferences(refs);
|
||||
}
|
||||
}*/
|
||||
|
||||
void MosesDecoder::printBleuFeatureHistory(std::ostream& out) {
|
||||
m_bleuScoreFeature->PrintHistory(out);
|
||||
@ -200,9 +200,11 @@ namespace Mira {
|
||||
return m_bleuScoreFeature->GetReferenceLength(ref_id);
|
||||
}
|
||||
|
||||
void MosesDecoder::setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
|
||||
void MosesDecoder::setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
|
||||
bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
|
||||
float scaleByX, float historySmoothing, size_t scheme, float relax_BP) {
|
||||
m_bleuScoreFeature->SetBleuParameters(scaleByInputLength, scaleByRefLength, scaleByAvgLength, scaleByTargetLength,
|
||||
m_bleuScoreFeature->SetBleuParameters(scaleByInputLength, scaleByRefLength, scaleByAvgLength,
|
||||
scaleByTargetLengthLinear, scaleByTargetLengthTrend,
|
||||
scaleByX, historySmoothing, scheme, relax_BP);
|
||||
}
|
||||
}
|
||||
|
@ -64,11 +64,12 @@ class MosesDecoder {
|
||||
size_t getCurrentInputLength();
|
||||
void updateHistory(const std::vector<const Moses::Word*>& words);
|
||||
void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
|
||||
void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
|
||||
// void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
|
||||
void printBleuFeatureHistory(std::ostream& out);
|
||||
void printReferenceLength(const std::vector<size_t>& ref_ids);
|
||||
size_t getReferenceLength(size_t ref_id);
|
||||
void setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
|
||||
void setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
|
||||
bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
|
||||
float scaleByX, float historySmoothing, size_t scheme, float relax_BP);
|
||||
Moses::ScoreComponentCollection getWeights();
|
||||
void setWeights(const Moses::ScoreComponentCollection& weights);
|
||||
|
124
mira/Main.cpp
124
mira/Main.cpp
@ -82,7 +82,8 @@ int main(int argc, char** argv) {
|
||||
float historySmoothing;
|
||||
bool scaleByInputLength;
|
||||
bool scaleByReferenceLength;
|
||||
bool scaleByTargetLength;
|
||||
bool scaleByTargetLengthLinear;
|
||||
bool scaleByTargetLengthTrend;
|
||||
bool scaleByAvgLength;
|
||||
float scaleByX;
|
||||
float slack;
|
||||
@ -119,6 +120,8 @@ int main(int argc, char** argv) {
|
||||
float max_length_dev_hypos;
|
||||
float max_length_dev_reference;
|
||||
float relax_BP;
|
||||
bool stabiliseLength;
|
||||
bool delayUpdates;
|
||||
po::options_description desc("Allowed options");
|
||||
desc.add_options()
|
||||
("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
|
||||
@ -133,6 +136,7 @@ int main(int argc, char** argv) {
|
||||
("core-weights", po::value<string>(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)")
|
||||
("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
|
||||
("decr-learning-rate", po::value<float>(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch")
|
||||
("delay-updates", po::value<bool>(&delayUpdates)->default_value(false), "Delay all updates until the end of an epoch")
|
||||
("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use n-best list with distinct translations in inference step")
|
||||
("epochs,e", po::value<size_t>(&epochs)->default_value(10), "Number of epochs")
|
||||
("fear-n", po::value<int>(&fear_n)->default_value(-1), "Number of fear translations used")
|
||||
@ -164,7 +168,8 @@ int main(int argc, char** argv) {
|
||||
("relax-BP", po::value<float>(&relax_BP)->default_value(1), "Relax the BP by setting this value between 0 and 1")
|
||||
("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by (a history of) the input length")
|
||||
("scale-by-reference-length", po::value<bool>(&scaleByReferenceLength)->default_value(false), "Scale BLEU by (a history of) the reference length")
|
||||
("scale-by-target-length", po::value<bool>(&scaleByTargetLength)->default_value(false), "Scale BLEU by (a history of) the target length")
|
||||
("scale-by-target-length-linear", po::value<bool>(&scaleByTargetLengthLinear)->default_value(false), "Scale BLEU by (a history of) the target length (linear future estimate)")
|
||||
("scale-by-target-length-trend", po::value<bool>(&scaleByTargetLengthTrend)->default_value(false), "Scale BLEU by (a history of) the target length (trend-based future estimate)")
|
||||
("scale-by-avg-length", po::value<bool>(&scaleByAvgLength)->default_value(false), "Scale BLEU by (a history of) the average of input and reference length")
|
||||
("scale-by-x", po::value<float>(&scaleByX)->default_value(1), "Scale the BLEU score by value x")
|
||||
("scale-margin", po::value<size_t>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
|
||||
@ -174,6 +179,7 @@ int main(int argc, char** argv) {
|
||||
("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimiser")
|
||||
("slack-min", po::value<float>(&slack_min)->default_value(0.01), "Minimum slack used")
|
||||
("slack-step", po::value<float>(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided")
|
||||
("stabilise-length", po::value<bool>(&stabiliseLength)->default_value(false), "Stabilise word penalty when length ratio >= 1")
|
||||
("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
|
||||
("threads", po::value<int>(&threadcount)->default_value(1), "Number of threads used")
|
||||
("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
|
||||
@ -268,11 +274,7 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
}
|
||||
|
||||
if (scaleByReferenceLength)
|
||||
scaleByInputLength = false;
|
||||
if (scaleByTargetLength)
|
||||
scaleByInputLength = false;
|
||||
if (scaleByAvgLength)
|
||||
if (scaleByReferenceLength || scaleByTargetLengthLinear || scaleByTargetLengthTrend || scaleByAvgLength)
|
||||
scaleByInputLength = false;
|
||||
|
||||
// initialise Moses
|
||||
@ -285,7 +287,8 @@ int main(int argc, char** argv) {
|
||||
vector<string> decoder_params;
|
||||
boost::split(decoder_params, decoder_settings, boost::is_any_of("\t "));
|
||||
MosesDecoder* decoder = new MosesDecoder(mosesConfigFile, verbosity, decoder_params.size(), decoder_params);
|
||||
decoder->setBleuParameters(scaleByInputLength, scaleByReferenceLength, scaleByAvgLength, scaleByTargetLength,
|
||||
decoder->setBleuParameters(scaleByInputLength, scaleByReferenceLength, scaleByAvgLength,
|
||||
scaleByTargetLengthLinear, scaleByTargetLengthTrend,
|
||||
scaleByX, historySmoothing, bleu_smoothing_scheme, relax_BP);
|
||||
if (normaliseWeights) {
|
||||
ScoreComponentCollection startWeights = decoder->getWeights();
|
||||
@ -409,6 +412,12 @@ int main(int argc, char** argv) {
|
||||
ScoreComponentCollection mixedAverageWeightsPrevious;
|
||||
ScoreComponentCollection mixedAverageWeightsBeforePrevious;
|
||||
|
||||
// when length ratio >= 1, set this to true
|
||||
bool fixLength = false;
|
||||
|
||||
// for accumulating delayed updates
|
||||
ScoreComponentCollection delayedWeightUpdates;
|
||||
|
||||
bool stop = false;
|
||||
// int sumStillViolatedConstraints;
|
||||
float *sendbuf, *recvbuf;
|
||||
@ -427,6 +436,12 @@ int main(int argc, char** argv) {
|
||||
// number of weight dumps this epoch
|
||||
size_t weightEpochDump = 0;
|
||||
|
||||
// sum lengths of dev hypothesis/references to calculate translation length ratio for this epoch
|
||||
size_t dev_hypothesis_length = 0;
|
||||
size_t dev_reference_length = 0;
|
||||
|
||||
delayedWeightUpdates.ZeroAll();
|
||||
|
||||
size_t shardPosition = 0;
|
||||
vector<size_t>::const_iterator sid = shard.begin();
|
||||
while (sid != shard.end()) {
|
||||
@ -460,7 +475,7 @@ int main(int argc, char** argv) {
|
||||
for (size_t batchPosition = 0; batchPosition < batchSize && sid
|
||||
!= shard.end(); ++batchPosition) {
|
||||
string& input = inputSentences[*sid];
|
||||
const vector<string>& refs = referenceSentences[*sid];
|
||||
// const vector<string>& refs = referenceSentences[*sid];
|
||||
cerr << "\nRank " << rank << ", epoch " << epoch << ", input sentence " << *sid << ": \"" << input << "\"" << " (batch pos " << batchPosition << ")" << endl;
|
||||
|
||||
vector<ScoreComponentCollection> newFeatureValues;
|
||||
@ -474,7 +489,7 @@ int main(int argc, char** argv) {
|
||||
featureValuesFear.push_back(newFeatureValues);
|
||||
bleuScoresHope.push_back(newBleuScores);
|
||||
bleuScoresFear.push_back(newBleuScores);
|
||||
if (historyOf1best) {
|
||||
if (historyOf1best || stabiliseLength) {
|
||||
dummyFeatureValues.push_back(newFeatureValues);
|
||||
dummyBleuScores.push_back(newBleuScores);
|
||||
}
|
||||
@ -493,13 +508,16 @@ int main(int argc, char** argv) {
|
||||
cerr << ", l-ratio hope: " << hope_length_ratio << endl;
|
||||
|
||||
vector<const Word*> bestModel;
|
||||
if (historyOf1best) {
|
||||
if (historyOf1best || stabiliseLength) {
|
||||
// MODEL (for updating the history only, using dummy vectors)
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history)" << endl;
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history or length stabilisation)" << endl;
|
||||
bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
|
||||
dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true,
|
||||
distinctNbest, rank, epoch);
|
||||
decoder->cleanup();
|
||||
cerr << endl;
|
||||
dev_hypothesis_length += bestModel.size();
|
||||
dev_reference_length += reference_length;
|
||||
}
|
||||
|
||||
// FEAR
|
||||
@ -576,6 +594,10 @@ int main(int argc, char** argv) {
|
||||
oneBests.push_back(bestModel);
|
||||
float model_length_ratio = (float)bestModel.size()/reference_length;
|
||||
cerr << ", l-ratio model: " << model_length_ratio << endl;
|
||||
if (stabiliseLength) {
|
||||
dev_hypothesis_length += bestModel.size();
|
||||
dev_reference_length += reference_length;
|
||||
}
|
||||
|
||||
// FEAR
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best fear translations" << endl;
|
||||
@ -622,6 +644,19 @@ int main(int argc, char** argv) {
|
||||
break;
|
||||
}
|
||||
|
||||
// set word penalty to 0 before optimising (if 'stabilise-length' is active)
|
||||
if (fixLength) {
|
||||
iter = featureFunctions.begin();
|
||||
for (; iter != featureFunctions.end(); ++iter) {
|
||||
if ((*iter)->GetScoreProducerWeightShortName() == "w") {
|
||||
ignoreWPFeature(featureValues, (*iter));
|
||||
ignoreWPFeature(featureValuesHope, (*iter));
|
||||
ignoreWPFeature(featureValuesFear, (*iter));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// take logs of feature values
|
||||
if (logFeatureValues) {
|
||||
takeLogs(featureValuesHope, baseOfLog);
|
||||
@ -654,24 +689,28 @@ int main(int argc, char** argv) {
|
||||
// Run optimiser on batch:
|
||||
VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl);
|
||||
size_t update_status;
|
||||
ScoreComponentCollection weightUpdate;
|
||||
if (perceptron_update) {
|
||||
vector<vector<float> > dummy1;
|
||||
update_status = optimiser->updateWeightsHopeFear(mosesWeights,
|
||||
update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate,
|
||||
featureValuesHope, featureValuesFear, dummy1, dummy1, learning_rate, rank, epoch);
|
||||
}
|
||||
else if (hope_fear) {
|
||||
update_status = optimiser->updateWeightsHopeFear(mosesWeights,
|
||||
update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate,
|
||||
featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear, learning_rate, rank, epoch);
|
||||
}
|
||||
else {
|
||||
// model_hope_fear
|
||||
update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights,
|
||||
update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights, weightUpdate,
|
||||
featureValues, losses, bleuScores, oracleFeatureValues, oracleBleuScores, learning_rate, rank, epoch);
|
||||
}
|
||||
|
||||
// sumStillViolatedConstraints += update_status;
|
||||
|
||||
if (update_status == 0) { // if weights were updated
|
||||
// apply weight update
|
||||
mosesWeights.PlusEquals(weightUpdate);
|
||||
|
||||
if (normaliseWeights) {
|
||||
mosesWeights.L1Normalise();
|
||||
}
|
||||
@ -690,6 +729,9 @@ int main(int argc, char** argv) {
|
||||
mosesWeights = averageWeights;
|
||||
}
|
||||
|
||||
if (delayUpdates)
|
||||
delayedWeightUpdates.PlusEquals(weightUpdate);
|
||||
else
|
||||
// set new Moses weights
|
||||
decoder->setWeights(mosesWeights);
|
||||
}
|
||||
@ -802,8 +844,25 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
}
|
||||
}// end dumping
|
||||
|
||||
} // end of shard loop, end of this epoch
|
||||
|
||||
if (delayUpdates) {
|
||||
// apply all updates from this epoch to the weight vector
|
||||
ScoreComponentCollection mosesWeights = decoder->getWeights();
|
||||
mosesWeights.PlusEquals(delayedWeightUpdates);
|
||||
decoder->setWeights(mosesWeights);
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", delayed update, new moses weights: " << mosesWeights << endl;
|
||||
}
|
||||
|
||||
if (stabiliseLength && !fixLength) {
|
||||
float lengthRatio = (float)(dev_hypothesis_length+1) / dev_reference_length;
|
||||
if (lengthRatio >= 1) {
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", length ratio >= 1, fixing word penalty. " << endl;
|
||||
fixLength = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbosity > 0) {
|
||||
cerr << "Bleu feature history after epoch " << epoch << endl;
|
||||
decoder->printBleuFeatureHistory(cerr);
|
||||
@ -840,28 +899,19 @@ int main(int argc, char** argv) {
|
||||
if (rank == 0 && (epoch >= 2)) {
|
||||
ScoreComponentCollection firstDiff(mixedAverageWeights);
|
||||
firstDiff.MinusEquals(mixedAverageWeightsPrevious);
|
||||
VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << endl);
|
||||
VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff <<
|
||||
" (max: " << firstDiff.GetLInfNorm() << ")" << endl);
|
||||
ScoreComponentCollection secondDiff(mixedAverageWeights);
|
||||
secondDiff.MinusEquals(mixedAverageWeightsBeforePrevious);
|
||||
VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << endl << endl);
|
||||
VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff <<
|
||||
" (max: " << secondDiff.GetLInfNorm() << ")" << endl << endl);
|
||||
|
||||
// check whether stopping criterion has been reached
|
||||
// (both difference vectors must have all weight changes smaller than min_weight_change)
|
||||
FVector changes1 = firstDiff.GetScoresVector();
|
||||
FVector changes2 = secondDiff.GetScoresVector();
|
||||
FVector::const_iterator iterator1 = changes1.cbegin();
|
||||
FVector::const_iterator iterator2 = changes2.cbegin();
|
||||
while (iterator1 != changes1.cend()) {
|
||||
if (abs((*iterator1).second) >= min_weight_change || abs(
|
||||
(*iterator2).second) >= min_weight_change) {
|
||||
if (firstDiff.GetLInfNorm() >= min_weight_change)
|
||||
reached = false;
|
||||
if (secondDiff.GetLInfNorm() >= min_weight_change)
|
||||
reached = false;
|
||||
break;
|
||||
}
|
||||
|
||||
++iterator1;
|
||||
++iterator2;
|
||||
}
|
||||
|
||||
if (reached) {
|
||||
// stop MIRA
|
||||
stop = true;
|
||||
@ -991,16 +1041,20 @@ void printFeatureValues(vector<vector<ScoreComponentCollection> > &featureValues
|
||||
}
|
||||
|
||||
void ignoreCoreFeatures(vector<vector<ScoreComponentCollection> > &featureValues, StrFloatMap &coreWeightMap) {
|
||||
for (size_t i = 0; i < featureValues.size(); ++i) {
|
||||
for (size_t i = 0; i < featureValues.size(); ++i)
|
||||
for (size_t j = 0; j < featureValues[i].size(); ++j) {
|
||||
// set all core features to 0
|
||||
StrFloatMap::iterator p;
|
||||
for(p = coreWeightMap.begin(); p!=coreWeightMap.end(); ++p)
|
||||
{
|
||||
featureValues[i][j].Assign(p->first, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ignoreWPFeature(vector<vector<ScoreComponentCollection> > &featureValues, const ScoreProducer* sp) {
|
||||
for (size_t i = 0; i < featureValues.size(); ++i)
|
||||
for (size_t j = 0; j < featureValues[i].size(); ++j)
|
||||
// set WP feature to 0
|
||||
featureValues[i][j].Assign(sp, 0);
|
||||
}
|
||||
|
||||
void takeLogs(vector<vector<ScoreComponentCollection> > &featureValues, size_t base) {
|
||||
|
@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
#include "ScoreComponentCollection.h"
|
||||
#include "Word.h"
|
||||
#include "ScoreProducer.h"
|
||||
|
||||
typedef std::map<const std::string, float> StrFloatMap;
|
||||
typedef std::pair<const std::string, float> StrFloatPair;
|
||||
@ -46,6 +47,7 @@ bool loadWeights(const std::string& filename, StrFloatMap& coreWeightMap);
|
||||
bool evaluateModulo(size_t shard_position, size_t mix_or_dump_base, size_t actual_batch_size);
|
||||
void printFeatureValues(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues);
|
||||
void ignoreCoreFeatures(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, StrFloatMap &coreWeightMap);
|
||||
void ignoreWPFeature(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, const Moses::ScoreProducer* sp);
|
||||
void takeLogs(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, size_t base);
|
||||
void deleteTranslations(std::vector<std::vector<const Moses::Word*> > &translations);
|
||||
|
||||
|
@ -7,7 +7,9 @@ using namespace std;
|
||||
|
||||
namespace Mira {
|
||||
|
||||
size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
size_t MiraOptimiser::updateWeights(
|
||||
ScoreComponentCollection& currWeights,
|
||||
ScoreComponentCollection& weightUpdate,
|
||||
const vector<vector<ScoreComponentCollection> >& featureValues,
|
||||
const vector<vector<float> >& losses,
|
||||
const vector<vector<float> >& bleuScores,
|
||||
@ -142,9 +144,7 @@ size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
}
|
||||
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
|
||||
|
||||
// apply update to weight vector
|
||||
currWeights.PlusEquals(summedUpdate);
|
||||
weightUpdate.PlusEquals(summedUpdate);
|
||||
|
||||
// Sanity check: are there still violated constraints after optimisation?
|
||||
/* int violatedConstraintsAfter = 0;
|
||||
@ -164,7 +164,9 @@ size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
|
||||
size_t MiraOptimiser::updateWeightsHopeFear(
|
||||
Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& weightUpdate,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
|
||||
const std::vector<std::vector<float> >& bleuScoresHope,
|
||||
@ -299,9 +301,7 @@ size_t MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& cur
|
||||
}
|
||||
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
|
||||
|
||||
// apply update to weight vector
|
||||
currWeights.PlusEquals(summedUpdate);
|
||||
weightUpdate.PlusEquals(summedUpdate);
|
||||
|
||||
// Sanity check: are there still violated constraints after optimisation?
|
||||
/* int violatedConstraintsAfter = 0;
|
||||
|
@ -30,7 +30,9 @@ namespace Mira {
|
||||
public:
|
||||
Optimiser() {}
|
||||
|
||||
virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
|
||||
virtual size_t updateWeightsHopeFear(
|
||||
Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& weightUpdate,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
|
||||
const std::vector<std::vector<float> >& bleuScoresHope,
|
||||
@ -42,7 +44,9 @@ namespace Mira {
|
||||
|
||||
class Perceptron : public Optimiser {
|
||||
public:
|
||||
virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
|
||||
virtual size_t updateWeightsHopeFear(
|
||||
Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& weightUpdate,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
|
||||
const std::vector<std::vector<float> >& bleuScoresHope,
|
||||
@ -66,6 +70,7 @@ namespace Mira {
|
||||
m_margin_slack(margin_slack) { }
|
||||
|
||||
size_t updateWeights(Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& weightUpdate,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
|
||||
const std::vector<std::vector<float> >& losses,
|
||||
const std::vector<std::vector<float> >& bleuScores,
|
||||
@ -75,6 +80,7 @@ namespace Mira {
|
||||
size_t rank,
|
||||
size_t epoch);
|
||||
virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& weightUpdate,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
|
||||
const std::vector<std::vector<float> >& bleuScoresHope,
|
||||
|
@ -24,7 +24,9 @@ using namespace std;
|
||||
|
||||
namespace Mira {
|
||||
|
||||
size_t Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeights,
|
||||
size_t Perceptron::updateWeightsHopeFear(
|
||||
ScoreComponentCollection& currWeights,
|
||||
ScoreComponentCollection& weightUpdate,
|
||||
const vector< vector<ScoreComponentCollection> >& featureValuesHope,
|
||||
const vector< vector<ScoreComponentCollection> >& featureValuesFear,
|
||||
const vector< vector<float> >& dummy1,
|
||||
@ -39,7 +41,7 @@ size_t Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeights,
|
||||
featureValueDiff.MinusEquals(featureValuesFear[0][0]);
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
|
||||
featureValueDiff.MultiplyEquals(perceptron_learning_rate);
|
||||
currWeights.PlusEquals(featureValueDiff);
|
||||
weightUpdate.PlusEquals(featureValueDiff);
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << featureValueDiff << endl;
|
||||
return 0;
|
||||
}
|
||||
|
@ -72,6 +72,9 @@ my $moses_ini_file = ¶m_required("train.moses-ini-file");
|
||||
my $input_file = ¶m_required("train.input-file");
|
||||
&check_exists ("train input file", $input_file);
|
||||
my $reference_files = ¶m_required("train.reference-files");
|
||||
for my $ref (glob $reference_files . "*") {
|
||||
&check_exists ("ref files", $ref);
|
||||
}
|
||||
my $trainer_exe = ¶m_required("train.trainer");
|
||||
&check_exists("Training executable", $trainer_exe);
|
||||
#my $weights_file = ¶m_required("train.weights-file");
|
||||
@ -94,20 +97,21 @@ my $burn_in_reference_files = ¶m("train.burn-in-reference-files");
|
||||
my $skipTrain = ¶m("train.skip", 0);
|
||||
|
||||
#devtest configuration
|
||||
my ($devtest_input_file, $devtest_reference_file,$devtest_ini_file,$bleu_script,$use_moses);
|
||||
my ($devtest_input_file, $devtest_reference_files,$devtest_ini_file,$bleu_script,$use_moses);
|
||||
my $test_exe = ¶m("devtest.moses");
|
||||
&check_exists("test executable", $test_exe);
|
||||
$bleu_script = ¶m_required("devtest.bleu");
|
||||
&check_exists("multi-bleu script", $bleu_script);
|
||||
$devtest_input_file = ¶m_required("devtest.input-file");
|
||||
$devtest_reference_file = ¶m_required("devtest.reference-file");
|
||||
&check_exists ("devtest input file", $devtest_input_file);
|
||||
|
||||
for my $ref (glob $devtest_reference_file . "*") {
|
||||
$devtest_reference_files = ¶m_required("devtest.reference-file");
|
||||
for my $ref (glob $devtest_reference_files . "*") {
|
||||
&check_exists ("devtest ref file", $ref);
|
||||
}
|
||||
$devtest_ini_file = ¶m_required("devtest.moses-ini-file");
|
||||
&check_exists ("devtest ini file", $devtest_ini_file);
|
||||
|
||||
|
||||
my $weight_file_stem = "$name-weights";
|
||||
my $extra_memory_devtest = ¶m("devtest.extra-memory",0);
|
||||
my $skip_devtest = ¶m("devtest.skip-devtest",0);
|
||||
@ -174,8 +178,9 @@ my @refs;
|
||||
if (ref($reference_files) eq 'ARRAY') {
|
||||
@refs = @$reference_files;
|
||||
} else {
|
||||
@refs = glob $reference_files;
|
||||
@refs = glob $reference_files . "*"
|
||||
}
|
||||
my $arr_refs = \@refs;
|
||||
|
||||
if (!$skipTrain) {
|
||||
#write the script
|
||||
@ -198,7 +203,6 @@ print TRAIN "-f $moses_ini_file \\\n";
|
||||
print TRAIN "-i $input_file \\\n";
|
||||
|
||||
for my $ref (@refs) {
|
||||
&check_exists("train ref file", $ref);
|
||||
print TRAIN "-r $ref ";
|
||||
}
|
||||
print TRAIN "\\\n";
|
||||
@ -206,15 +210,15 @@ print TRAIN "\\\n";
|
||||
if ($burn_in) {
|
||||
print TRAIN "--burn-in 1 \\\n";
|
||||
print TRAIN "--burn-in-input-file $burn_in_input_file \\\n";
|
||||
my @refs;
|
||||
my @burnin_refs;
|
||||
if (ref($burn_in_reference_files) eq 'ARRAY') {
|
||||
@refs = @$burn_in_reference_files;
|
||||
@burnin_refs = @$burn_in_reference_files;
|
||||
} else {
|
||||
@refs = glob $burn_in_reference_files;
|
||||
@burnin_refs = glob $burn_in_reference_files . "*";
|
||||
}
|
||||
for my $ref (@refs) {
|
||||
&check_exists("burn-in ref file", $ref);
|
||||
print TRAIN "--burn-in-reference-files $ref ";
|
||||
for my $burnin_ref (@burnin_refs) {
|
||||
&check_exists("burn-in ref file", $burnin_ref);
|
||||
print TRAIN "--burn-in-reference-files $burnin_ref ";
|
||||
}
|
||||
print TRAIN "\\\n";
|
||||
}
|
||||
@ -317,10 +321,10 @@ while(1) {
|
||||
my $suffix = "";
|
||||
print "weight file exists? ".(-e $new_weight_file)."\n";
|
||||
if (!$skip_devtest) {
|
||||
createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "devtest", $devtest_ini_file, $devtest_input_file, $devtest_reference_file, $skip_submit_test);
|
||||
createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "devtest", $devtest_ini_file, $devtest_input_file, $devtest_reference_files, $skip_submit_test);
|
||||
}
|
||||
if (!$skip_dev) {
|
||||
createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "dev", $moses_ini_file, $input_file, $refs[0], $skip_submit_test);
|
||||
createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "dev", $moses_ini_file, $input_file, $reference_files, $skip_submit_test);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -47,6 +47,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "ChartHypothesis.h"
|
||||
#include "DotChart.h"
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include "FeatureVector.h"
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace Moses;
|
||||
@ -345,7 +348,7 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
|
||||
// print the surface factor of the translation
|
||||
out << translationId << " ||| ";
|
||||
OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
|
||||
out << " |||";
|
||||
out << " ||| ";
|
||||
|
||||
// print the scores in a hardwired order
|
||||
// before each model type, the corresponding command-line-like name must be emitted
|
||||
@ -362,18 +365,15 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::string lastName = "";
|
||||
|
||||
// translation components
|
||||
const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries();
|
||||
if (pds.size() > 0) {
|
||||
|
||||
for( size_t i=0; i<pds.size(); i++ ) {
|
||||
size_t pd_numinputscore = pds[i]->GetNumInputScores();
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
|
||||
for (size_t j = 0; j<scores.size(); ++j){
|
||||
|
||||
if (labeledOutput && (i == 0) ){
|
||||
if ((j == 0) || (j == pd_numinputscore)){
|
||||
lastName = pds[i]->GetScoreProducerWeightShortName(j);
|
||||
@ -393,12 +393,10 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
|
||||
// generation
|
||||
const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries();
|
||||
if (gds.size() > 0) {
|
||||
|
||||
for( size_t i=0; i<gds.size(); i++ ) {
|
||||
size_t pd_numinputscore = gds[i]->GetNumInputScores();
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
|
||||
for (size_t j = 0; j<scores.size(); ++j){
|
||||
|
||||
if (labeledOutput && (i == 0) ){
|
||||
if ((j == 0) || (j == pd_numinputscore)){
|
||||
lastName = gds[i]->GetScoreProducerWeightShortName(j);
|
||||
@ -410,9 +408,21 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
|
||||
}
|
||||
}
|
||||
|
||||
// output sparse features
|
||||
lastName = "";
|
||||
const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions();
|
||||
for( size_t i=0; i<sff.size(); i++ )
|
||||
if (sff[i]->GetNumScoreComponents() == ScoreProducer::unlimited)
|
||||
OutputSparseFeatureScores( out, path, sff[i], lastName );
|
||||
|
||||
const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions();
|
||||
for( size_t i=0; i<slf.size(); i++ )
|
||||
if (sff[i]->GetNumScoreComponents() == ScoreProducer::unlimited)
|
||||
OutputSparseFeatureScores( out, path, slf[i], lastName );
|
||||
|
||||
|
||||
// total
|
||||
out << " |||" << path.GetTotalScore();
|
||||
out << " ||| " << path.GetTotalScore();
|
||||
|
||||
/*
|
||||
if (includeAlignment) {
|
||||
@ -443,6 +453,32 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
|
||||
m_nBestOutputCollector->Write(translationId, out.str());
|
||||
}
|
||||
|
||||
void IOWrapper::OutputSparseFeatureScores( std::ostream& out, const ChartTrellisPath &path, const FeatureFunction *ff, std::string &lastName )
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
bool labeledOutput = staticData.IsLabeledNBestList();
|
||||
const FVector scores = path.GetScoreBreakdown().GetVectorForProducer( ff );
|
||||
|
||||
// report weighted aggregate
|
||||
if (! ff->GetSparseFeatureReporting()) {
|
||||
const FVector &weights = staticData.GetAllWeights().GetScoresVector();
|
||||
if (labeledOutput && !boost::contains(ff->GetScoreProducerDescription(), ":"))
|
||||
out << " " << ff->GetScoreProducerWeightShortName() << ":";
|
||||
out << " " << scores.inner_product(weights);
|
||||
}
|
||||
|
||||
// report each feature
|
||||
else {
|
||||
for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
|
||||
if (i->second != 0) { // do not report zero-valued features
|
||||
if (labeledOutput)
|
||||
out << " " << i->first << ":";
|
||||
out << " " << i->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IOWrapper::FixPrecision(std::ostream &stream, size_t size)
|
||||
{
|
||||
stream.setf(std::ios::fixed);
|
||||
|
@ -44,6 +44,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "OutputCollector.h"
|
||||
#include "ChartHypothesis.h"
|
||||
|
||||
#include "ChartTrellisPath.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class FactorCollection;
|
||||
@ -82,6 +84,7 @@ public:
|
||||
void OutputBestHypo(const Moses::ChartHypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors);
|
||||
void OutputBestHypo(const std::vector<const Moses::Factor*>& mbrBestHypo, long translationId, bool reportSegmentation, bool reportAllFactors);
|
||||
void OutputNBestList(const Moses::ChartTrellisPathList &nBestList, const Moses::ChartHypothesis *bestHypo, const Moses::TranslationSystem* system, long translationId);
|
||||
void OutputSparseFeatureScores(std::ostream& out, const Moses::ChartTrellisPath &path, const Moses::FeatureFunction *ff, std::string &lastName);
|
||||
void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, long translationId);
|
||||
void Backtrack(const Moses::ChartHypothesis *hypo);
|
||||
|
||||
|
@ -165,18 +165,25 @@ bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source
|
||||
}
|
||||
static void PrintFeatureWeight(const FeatureFunction* ff)
|
||||
{
|
||||
|
||||
size_t numScoreComps = ff->GetNumScoreComponents();
|
||||
if (numScoreComps != ScoreProducer::unlimited) {
|
||||
vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
|
||||
for (size_t i = 0; i < numScoreComps; ++i) {
|
||||
for (size_t i = 0; i < numScoreComps; ++i)
|
||||
cout << ff->GetScoreProducerDescription() << " "
|
||||
<< ff->GetScoreProducerWeightShortName() << " "
|
||||
<< values[i] << endl;
|
||||
}
|
||||
} else {
|
||||
}
|
||||
|
||||
static void PrintSparseFeatureWeight(const FeatureFunction* ff)
|
||||
{
|
||||
if (ff->GetNumScoreComponents() == ScoreProducer::unlimited) {
|
||||
if (ff->GetSparseProducerWeight() == 1)
|
||||
cout << ff->GetScoreProducerDescription() << " " <<
|
||||
ff->GetScoreProducerWeightShortName() << " sparse" << endl;
|
||||
else
|
||||
cout << ff->GetScoreProducerDescription() << " " <<
|
||||
ff->GetScoreProducerWeightShortName() << " " << ff->GetSparseProducerWeight() << endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -201,6 +208,9 @@ static void ShowWeights()
|
||||
for (size_t i = 0; i < slf.size(); ++i) {
|
||||
PrintFeatureWeight(slf[i]);
|
||||
}
|
||||
for (size_t i = 0; i < sff.size(); ++i) {
|
||||
PrintSparseFeatureWeight(sff[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -287,21 +287,27 @@ private:
|
||||
|
||||
static void PrintFeatureWeight(const FeatureFunction* ff)
|
||||
{
|
||||
|
||||
size_t numScoreComps = ff->GetNumScoreComponents();
|
||||
if (numScoreComps != ScoreProducer::unlimited) {
|
||||
vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
|
||||
for (size_t i = 0; i < numScoreComps; ++i) {
|
||||
for (size_t i = 0; i < numScoreComps; ++i)
|
||||
cout << ff->GetScoreProducerDescription() << " "
|
||||
<< ff->GetScoreProducerWeightShortName() << " "
|
||||
<< values[i] << endl;
|
||||
}
|
||||
} else {
|
||||
cout << ff->GetScoreProducerDescription() << " " <<
|
||||
ff->GetScoreProducerWeightShortName() << " sparse" << endl;
|
||||
}
|
||||
}
|
||||
|
||||
static void PrintSparseFeatureWeight(const FeatureFunction* ff)
|
||||
{
|
||||
if (ff->GetNumScoreComponents() == ScoreProducer::unlimited) {
|
||||
if (ff->GetSparseProducerWeight() == 1)
|
||||
cout << ff->GetScoreProducerDescription() << " " <<
|
||||
ff->GetScoreProducerWeightShortName() << " sparse" << endl;
|
||||
else
|
||||
cout << ff->GetScoreProducerDescription() << " " <<
|
||||
ff->GetScoreProducerWeightShortName() << " " << ff->GetSparseProducerWeight() << endl;
|
||||
}
|
||||
}
|
||||
|
||||
static void ShowWeights()
|
||||
{
|
||||
@ -324,6 +330,9 @@ static void ShowWeights()
|
||||
for (size_t i = 0; i < gds.size(); ++i) {
|
||||
PrintFeatureWeight(gds[i]);
|
||||
}
|
||||
for (size_t i = 0; i < sff.size(); ++i) {
|
||||
PrintSparseFeatureWeight(sff[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/** main function of the command line version of the decoder **/
|
||||
|
@ -81,11 +81,13 @@ void BleuScoreFeature::PrintHistory(std::ostream& out) const {
|
||||
}
|
||||
}
|
||||
|
||||
void BleuScoreFeature::SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
|
||||
void BleuScoreFeature::SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
|
||||
bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
|
||||
float scaleByX, float historySmoothing, size_t scheme, float relaxBP) {
|
||||
m_scale_by_input_length = scaleByInputLength;
|
||||
m_scale_by_ref_length = scaleByRefLength;
|
||||
m_scale_by_target_length = scaleByTargetLength;
|
||||
m_scale_by_target_length_linear = scaleByTargetLengthLinear;
|
||||
m_scale_by_target_length_trend = scaleByTargetLengthTrend;
|
||||
m_scale_by_avg_length = scaleByAvgLength;
|
||||
m_scale_by_x = scaleByX;
|
||||
m_historySmoothing = historySmoothing;
|
||||
@ -97,6 +99,7 @@ void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::strin
|
||||
{
|
||||
m_refs.clear();
|
||||
FactorCollection& fc = FactorCollection::Instance();
|
||||
cerr << "Number of reference files: " << refs.size() << endl;
|
||||
for (size_t file_id = 0; file_id < refs.size(); file_id++) {
|
||||
for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) {
|
||||
const string& ref = refs[file_id][ref_id];
|
||||
@ -430,13 +433,19 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
|
||||
else if (m_scale_by_ref_length) {
|
||||
precision *= m_ref_length_history + m_cur_ref_length;
|
||||
}
|
||||
else if (m_scale_by_target_length) {
|
||||
precision *= m_target_length_history + state->m_target_length;
|
||||
else if (m_scale_by_target_length_linear) {
|
||||
// length of current hypothesis + number of words still to translate from source (rest being translated 1-to-1)
|
||||
float scaled_target_length = state->m_target_length + (m_cur_source_length - state->m_source_length);
|
||||
precision *= m_target_length_history + scaled_target_length;
|
||||
}
|
||||
else if (m_scale_by_target_length_trend) {
|
||||
// length of full target if remaining words were translated with the same fertility as so far
|
||||
float scaled_target_length = ((float)m_cur_source_length/state->m_source_length) * state->m_target_length;
|
||||
precision *= m_target_length_history + scaled_target_length;
|
||||
}
|
||||
else if (m_scale_by_avg_length) {
|
||||
precision *= (m_source_length_history + m_ref_length_history + m_cur_source_length + + m_cur_ref_length) / 2;
|
||||
}
|
||||
|
||||
return precision*m_scale_by_x;
|
||||
}
|
||||
|
||||
|
@ -80,7 +80,8 @@ public:
|
||||
void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
|
||||
void PrintReferenceLength(const std::vector<size_t>& ref_ids);
|
||||
size_t GetReferenceLength(size_t ref_id);
|
||||
void SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
|
||||
void SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
|
||||
bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
|
||||
float scaleByX, float historySmoothing, size_t scheme, float relaxBP);
|
||||
void GetNgramMatchCounts(Phrase&,
|
||||
const NGrams&,
|
||||
@ -125,8 +126,11 @@ private:
|
||||
// scale BLEU score by (history of) reference length
|
||||
bool m_scale_by_ref_length;
|
||||
|
||||
// scale BLEU score by (history of) target length
|
||||
bool m_scale_by_target_length;
|
||||
// scale BLEU score by (history of) target length (linear future estimate)
|
||||
bool m_scale_by_target_length_linear;
|
||||
|
||||
// scale BLEU score by (history of) target length (trend-based future estimate)
|
||||
bool m_scale_by_target_length_trend;
|
||||
|
||||
// scale BLEU score by (history of) the average of input and reference length
|
||||
bool m_scale_by_avg_length;
|
||||
|
@ -255,17 +255,10 @@ namespace Moses {
|
||||
}
|
||||
|
||||
FVector& FVector::operator+= (const FVector& rhs) {
|
||||
if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
|
||||
if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
|
||||
resize(rhs.m_coreFeatures.size());
|
||||
}
|
||||
for (iterator i = begin(); i != end(); ++i) {
|
||||
set(i->first,i->second + rhs.get(i->first));
|
||||
}
|
||||
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
|
||||
if (!hasNonDefaultValue(i->first)) {
|
||||
set(i->first,i->second);
|
||||
}
|
||||
}
|
||||
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
|
||||
set(i->first, get(i->first) + i->second);
|
||||
for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
|
||||
if (i < rhs.m_coreFeatures.size()) {
|
||||
m_coreFeatures[i] += rhs.m_coreFeatures[i];
|
||||
@ -275,17 +268,10 @@ namespace Moses {
|
||||
}
|
||||
|
||||
FVector& FVector::operator-= (const FVector& rhs) {
|
||||
if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
|
||||
if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
|
||||
resize(rhs.m_coreFeatures.size());
|
||||
}
|
||||
for (iterator i = begin(); i != end(); ++i) {
|
||||
set(i->first,i->second - rhs.get(i->first));
|
||||
}
|
||||
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
|
||||
if (!hasNonDefaultValue(i->first)) {
|
||||
set(i->first,-(i->second));
|
||||
}
|
||||
}
|
||||
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
|
||||
set(i->first, get(i->first) -(i->second));
|
||||
for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
|
||||
if (i < rhs.m_coreFeatures.size()) {
|
||||
m_coreFeatures[i] -= rhs.m_coreFeatures[i];
|
||||
@ -336,28 +322,6 @@ namespace Moses {
|
||||
return *this;
|
||||
}
|
||||
|
||||
FVector& FVector::max_equals(const FVector& rhs) {
|
||||
if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
|
||||
resize(rhs.m_coreFeatures.size());
|
||||
}
|
||||
for (iterator i = begin(); i != end(); ++i) {
|
||||
set(i->first, max(i->second , rhs.get(i->first) ));
|
||||
}
|
||||
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
|
||||
if (!hasNonDefaultValue(i->first)) {
|
||||
set(i->first, i->second);
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
|
||||
if (i < rhs.m_coreFeatures.size()) {
|
||||
m_coreFeatures[i] = max(m_coreFeatures[i], rhs.m_coreFeatures[i]);
|
||||
} else {
|
||||
m_coreFeatures[i] = max(m_coreFeatures[i],(float)0);
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
FVector& FVector::operator*= (const FValue& rhs) {
|
||||
//NB Could do this with boost::bind ?
|
||||
for (iterator i = begin(); i != end(); ++i) {
|
||||
@ -367,7 +331,6 @@ namespace Moses {
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
FVector& FVector::operator/= (const FValue& rhs) {
|
||||
for (iterator i = begin(); i != end(); ++i) {
|
||||
i->second /= rhs;
|
||||
@ -387,6 +350,25 @@ namespace Moses {
|
||||
return norm;
|
||||
}
|
||||
|
||||
FValue FVector::l2norm() const {
|
||||
return sqrt(inner_product(*this));
|
||||
}
|
||||
|
||||
FValue FVector::linfnorm() const {
|
||||
FValue norm = 0;
|
||||
for (const_iterator i = cbegin(); i != cend(); ++i) {
|
||||
float absValue = abs(i->second);
|
||||
if (absValue > norm)
|
||||
norm = absValue;
|
||||
}
|
||||
for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
|
||||
float absValue = m_coreFeatures[i];
|
||||
if (absValue > norm)
|
||||
norm = absValue;
|
||||
}
|
||||
return norm;
|
||||
}
|
||||
|
||||
FValue FVector::sum() const {
|
||||
FValue sum = 0;
|
||||
for (const_iterator i = cbegin(); i != cend(); ++i) {
|
||||
@ -396,10 +378,6 @@ namespace Moses {
|
||||
return sum;
|
||||
}
|
||||
|
||||
FValue FVector::l2norm() const {
|
||||
return sqrt(inner_product(*this));
|
||||
}
|
||||
|
||||
FValue FVector::inner_product(const FVector& rhs) const {
|
||||
CHECK(m_coreFeatures.size() == rhs.m_coreFeatures.size());
|
||||
FValue product = 0.0;
|
||||
@ -437,10 +415,6 @@ namespace Moses {
|
||||
return FVector(lhs) /= rhs;
|
||||
}
|
||||
|
||||
const FVector fvmax(const FVector& lhs, const FVector& rhs) {
|
||||
return FVector(lhs).max_equals(rhs);
|
||||
}
|
||||
|
||||
FValue inner_product(const FVector& lhs, const FVector& rhs) {
|
||||
if (lhs.size() >= rhs.size()) {
|
||||
return rhs.inner_product(lhs);
|
||||
|
@ -177,6 +177,7 @@ namespace Moses {
|
||||
/** norms and sums */
|
||||
FValue l1norm() const;
|
||||
FValue l2norm() const;
|
||||
FValue linfnorm() const;
|
||||
FValue sum() const;
|
||||
|
||||
/** pretty printing */
|
||||
@ -292,6 +293,10 @@ namespace Moses {
|
||||
return (m_fv->m_features[m_name] += lhs);
|
||||
}
|
||||
|
||||
FValue operator -=(FValue lhs) {
|
||||
return (m_fv->m_features[m_name] -= lhs);
|
||||
}
|
||||
|
||||
private:
|
||||
FValue m_tmp;
|
||||
|
||||
|
@ -224,26 +224,6 @@ BOOST_AUTO_TEST_CASE(core_scalar)
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(core_max)
|
||||
{
|
||||
FVector f1(2);
|
||||
FVector f2(2);
|
||||
FName n1("a");
|
||||
FName n2("b");
|
||||
FName n3("c");
|
||||
f1[0] = 1.1; f1[1] = -0.1; ; f1[n2] = -1.5; f1[n3] = 2.2;
|
||||
f2[0] = 0.5; f2[1] = 0.25; f2[n1] = 1; f2[n3] = 2.4;
|
||||
|
||||
FVector m = fvmax(f1,f2);
|
||||
|
||||
BOOST_CHECK_CLOSE((FValue)m[0], 1.1 , TOL);
|
||||
BOOST_CHECK_CLOSE((FValue)m[1], 0.25 , TOL);
|
||||
BOOST_CHECK_CLOSE((FValue)m[n1], 1 , TOL);
|
||||
BOOST_CHECK_CLOSE((FValue)m[n2],0 , TOL);
|
||||
BOOST_CHECK_CLOSE((FValue)m[n3],2.4 , TOL);
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(l1norm)
|
||||
{
|
||||
FVector f1(3);
|
||||
|
@ -63,8 +63,8 @@ void ScoreComponentCollection::MultiplyEquals(float scalar)
|
||||
|
||||
// Multiply all weights of this sparse producer by a given scalar
|
||||
void ScoreComponentCollection::MultiplyEquals(const ScoreProducer* sp, float scalar) {
|
||||
CHECK(sp->GetNumScoreComponents() == ScoreProducer::unlimited);
|
||||
std::string prefix = sp->GetScoreProducerWeightShortName() + FName::SEP;
|
||||
assert(sp->GetNumScoreComponents() == ScoreProducer::unlimited);
|
||||
std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
|
||||
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
|
||||
std::stringstream name;
|
||||
name << i->first;
|
||||
@ -100,6 +100,10 @@ float ScoreComponentCollection::GetL2Norm() const {
|
||||
return m_scores.l2norm();
|
||||
}
|
||||
|
||||
float ScoreComponentCollection::GetLInfNorm() const {
|
||||
return m_scores.linfnorm();
|
||||
}
|
||||
|
||||
void ScoreComponentCollection::Save(ostream& out) const {
|
||||
ScoreIndexMap::const_iterator iter = s_scoreIndexes.begin();
|
||||
for (; iter != s_scoreIndexes.end(); ++iter ) {
|
||||
|
@ -150,6 +150,21 @@ public:
|
||||
m_scores -= rhs.m_scores;
|
||||
}
|
||||
|
||||
//For features which have an unbounded number of components
|
||||
void MinusEquals(const ScoreProducer*sp, const std::string& name, float score)
|
||||
{
|
||||
assert(sp->GetNumScoreComponents() == ScoreProducer::unlimited);
|
||||
FName fname(sp->GetScoreProducerDescription(),name);
|
||||
m_scores[fname] -= score;
|
||||
}
|
||||
|
||||
//For features which have an unbounded number of components
|
||||
void SparseMinusEquals(const std::string& full_name, float score)
|
||||
{
|
||||
FName fname(full_name);
|
||||
m_scores[fname] -= score;
|
||||
}
|
||||
|
||||
|
||||
//! Add scores from a single ScoreProducer only
|
||||
//! The length of scores must be equal to the number of score components
|
||||
@ -192,6 +207,13 @@ public:
|
||||
m_scores[fname] += score;
|
||||
}
|
||||
|
||||
//For features which have an unbounded number of components
|
||||
void SparsePlusEquals(const std::string& full_name, float score)
|
||||
{
|
||||
FName fname(full_name);
|
||||
m_scores[fname] += score;
|
||||
}
|
||||
|
||||
void Assign(const ScoreProducer* sp, const std::vector<float>& scores)
|
||||
{
|
||||
IndexPair indexes = GetIndexes(sp);
|
||||
@ -307,6 +329,7 @@ public:
|
||||
void L1Normalise();
|
||||
float GetL1Norm() const;
|
||||
float GetL2Norm() const;
|
||||
float GetLInfNorm() const;
|
||||
void Save(const std::string& filename) const;
|
||||
void Save(std::ostream&) const;
|
||||
|
||||
|
@ -54,6 +54,8 @@ public:
|
||||
|
||||
void SetSparseFeatureReporting() { m_reportSparseFeatures = true; }
|
||||
bool GetSparseFeatureReporting() const { return m_reportSparseFeatures; }
|
||||
|
||||
virtual float GetSparseProducerWeight() const { return 1; }
|
||||
};
|
||||
|
||||
|
||||
|
@ -1442,7 +1442,7 @@ bool StaticData::LoadReferences()
|
||||
}
|
||||
string line;
|
||||
while (getline(in,line)) {
|
||||
references.back().push_back(line);
|
||||
references[i].push_back(line);
|
||||
}
|
||||
if (i > 0) {
|
||||
if (references[i].size() != references[i-1].size()) {
|
||||
@ -1459,14 +1459,12 @@ bool StaticData::LoadReferences()
|
||||
|
||||
bool StaticData::LoadDiscrimLMFeature()
|
||||
{
|
||||
cerr << "Loading discriminative language models.. ";
|
||||
|
||||
// only load if specified
|
||||
const vector<string> &wordFile = m_parameter->GetParam("discrim-lmodel-file");
|
||||
if (wordFile.empty()) {
|
||||
return true;
|
||||
}
|
||||
cerr << wordFile.size() << " models" << endl;
|
||||
cerr << "Loading " << wordFile.size() << " discriminative language model(s).." << endl;
|
||||
|
||||
// if this weight is specified, the sparse DLM weights will be scaled with an additional weight
|
||||
vector<string> dlmWeightStr = m_parameter->GetParam("weight-dlm");
|
||||
@ -1495,6 +1493,11 @@ bool StaticData::LoadDiscrimLMFeature()
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (m_searchAlgorithm == ChartDecoding && !include_lower_ngrams) {
|
||||
UserMessage::Add("Excluding lower order DLM ngrams is currently not supported for chart decoding.");
|
||||
return false;
|
||||
}
|
||||
|
||||
m_targetNgramFeatures.push_back(new TargetNgramFeature(factorId, order, include_lower_ngrams));
|
||||
if (i < dlmWeights.size())
|
||||
m_targetNgramFeatures[i]->SetSparseProducerWeight(dlmWeights[i]);
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "TargetPhrase.h"
|
||||
#include "Hypothesis.h"
|
||||
#include "ScoreComponentCollection.h"
|
||||
#include "ChartHypothesis.h"
|
||||
|
||||
namespace Moses {
|
||||
|
||||
@ -45,7 +46,7 @@ bool TargetNgramFeature::Load(const std::string &filePath)
|
||||
|
||||
std::string line;
|
||||
m_vocab.insert(BOS_);
|
||||
m_vocab.insert(BOS_);
|
||||
m_vocab.insert(EOS_);
|
||||
while (getline(inFile, line)) {
|
||||
m_vocab.insert(line);
|
||||
}
|
||||
@ -54,10 +55,9 @@ bool TargetNgramFeature::Load(const std::string &filePath)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
string TargetNgramFeature::GetScoreProducerWeightShortName(unsigned) const
|
||||
{
|
||||
return "dlmn";
|
||||
return "dlm";
|
||||
}
|
||||
|
||||
size_t TargetNgramFeature::GetNumInputScores() const
|
||||
@ -65,7 +65,6 @@ size_t TargetNgramFeature::GetNumInputScores() const
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
const FFState* TargetNgramFeature::EmptyHypothesisState(const InputType &/*input*/) const
|
||||
{
|
||||
vector<Word> bos(1,m_bos);
|
||||
@ -76,8 +75,8 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
const TargetNgramState* tnState = dynamic_cast<const TargetNgramState*>(prev_state);
|
||||
CHECK(tnState);
|
||||
const TargetNgramState* tnState = static_cast<const TargetNgramState*>(prev_state);
|
||||
assert(tnState);
|
||||
|
||||
// current hypothesis target phrase
|
||||
const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
|
||||
@ -85,7 +84,7 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
|
||||
|
||||
// extract all ngrams from current hypothesis
|
||||
vector<Word> prev_words = tnState->GetWords();
|
||||
string curr_ngram;
|
||||
stringstream curr_ngram;
|
||||
bool skip = false;
|
||||
|
||||
// include lower order ngrams?
|
||||
@ -94,7 +93,9 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
|
||||
|
||||
for (size_t n = m_n; n >= smallest_n; --n) { // iterate over ngram size
|
||||
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
|
||||
const string& curr_w = targetPhrase.GetWord(i).GetFactor(m_factorType)->GetString();
|
||||
// const string& curr_w = targetPhrase.GetWord(i).GetFactor(m_factorType)->GetString();
|
||||
const string& curr_w = targetPhrase.GetWord(i).GetString(m_factorType);
|
||||
|
||||
if (m_vocab.size() && (m_vocab.find(curr_w) == m_vocab.end())) continue; // skip ngrams
|
||||
|
||||
if (n > 1) {
|
||||
@ -129,23 +130,23 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
|
||||
}
|
||||
|
||||
if (!skip) {
|
||||
curr_ngram.append(curr_w);
|
||||
accumulator->PlusEquals(this,curr_ngram,1);
|
||||
curr_ngram << curr_w;
|
||||
accumulator->PlusEquals(this,curr_ngram.str(),1);
|
||||
}
|
||||
curr_ngram.clear();
|
||||
curr_ngram.str("");
|
||||
}
|
||||
}
|
||||
|
||||
if (cur_hypo.GetWordsBitmap().IsComplete()) {
|
||||
for (size_t n = m_n; n >= smallest_n; --n) {
|
||||
string last_ngram;
|
||||
stringstream last_ngram;
|
||||
skip = false;
|
||||
for (size_t i = cur_hypo.GetSize() - n + 1; i < cur_hypo.GetSize() && !skip; ++i)
|
||||
appendNgram(cur_hypo.GetWord(i), skip, last_ngram);
|
||||
|
||||
if (n > 1 && !skip) {
|
||||
last_ngram.append(EOS_);
|
||||
accumulator->PlusEquals(this,last_ngram,1);
|
||||
last_ngram << EOS_;
|
||||
accumulator->PlusEquals(this, last_ngram.str(), 1);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
@ -169,13 +170,267 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
|
||||
return new TargetNgramState(new_prev_words);
|
||||
}
|
||||
|
||||
void TargetNgramFeature::appendNgram(const Word& word, bool& skip, string& ngram) const {
|
||||
const string& w = word.GetFactor(m_factorType)->GetString();
|
||||
void TargetNgramFeature::appendNgram(const Word& word, bool& skip, stringstream &ngram) const {
|
||||
// const string& w = word.GetFactor(m_factorType)->GetString();
|
||||
const string& w = word.GetString(m_factorType);
|
||||
if (m_vocab.size() && (m_vocab.find(w) == m_vocab.end())) skip = true;
|
||||
else {
|
||||
ngram.append(w);
|
||||
ngram.append(":");
|
||||
ngram << w;
|
||||
ngram << ":";
|
||||
}
|
||||
}
|
||||
|
||||
FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
vector<const Word*> contextFactor;
|
||||
contextFactor.reserve(m_n);
|
||||
|
||||
// get index map for underlying hypotheses
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
cur_hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
|
||||
|
||||
// loop over rule
|
||||
bool makePrefix = false;
|
||||
bool makeSuffix = false;
|
||||
bool collectForPrefix = true;
|
||||
size_t prefixTerminals = 0;
|
||||
size_t suffixTerminals = 0;
|
||||
bool onlyTerminals = true;
|
||||
bool prev_is_NT = false;
|
||||
size_t prev_subPhraseLength = 0;
|
||||
for (size_t phrasePos = 0; phrasePos < cur_hypo.GetCurrTargetPhrase().GetSize(); phrasePos++)
|
||||
{
|
||||
// consult rule for either word or non-terminal
|
||||
const Word &word = cur_hypo.GetCurrTargetPhrase().GetWord(phrasePos);
|
||||
// cerr << "word: " << word << endl;
|
||||
|
||||
// regular word
|
||||
if (!word.IsNonTerminal()) {
|
||||
contextFactor.push_back(&word);
|
||||
prev_is_NT = false;
|
||||
|
||||
if (phrasePos==0)
|
||||
makePrefix = true;
|
||||
if (phrasePos==cur_hypo.GetCurrTargetPhrase().GetSize()-1 || prev_is_NT)
|
||||
makeSuffix = true;
|
||||
|
||||
// beginning/end of sentence symbol <s>,</s>?
|
||||
string factorZero = word.GetString(0);
|
||||
if (factorZero.compare("<s>") == 0)
|
||||
prefixTerminals++;
|
||||
// end of sentence symbol </s>?
|
||||
else if (factorZero.compare("</s>") == 0)
|
||||
suffixTerminals++;
|
||||
// everything else
|
||||
else {
|
||||
stringstream ngram;
|
||||
ngram << m_baseName;
|
||||
if (m_factorType == 0)
|
||||
ngram << factorZero;
|
||||
else
|
||||
ngram << word.GetString(m_factorType);
|
||||
accumulator->SparsePlusEquals(ngram.str(), 1);
|
||||
|
||||
if (collectForPrefix)
|
||||
prefixTerminals++;
|
||||
else
|
||||
suffixTerminals++;
|
||||
}
|
||||
}
|
||||
|
||||
// non-terminal, add phrase from underlying hypothesis
|
||||
else if (m_n > 1)
|
||||
{
|
||||
// look up underlying hypothesis
|
||||
size_t nonTermIndex = nonTermIndexMap[phrasePos];
|
||||
const ChartHypothesis *prevHypo = cur_hypo.GetPrevHypo(nonTermIndex);
|
||||
|
||||
const TargetNgramChartState* prevState =
|
||||
static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId));
|
||||
size_t subPhraseLength = prevState->GetNumTargetTerminals();
|
||||
|
||||
// special case: rule starts with non-terminal
|
||||
if (phrasePos == 0) {
|
||||
if (subPhraseLength == 1) {
|
||||
makePrefix = true;
|
||||
++prefixTerminals;
|
||||
|
||||
const Word &word = prevState->GetSuffix().GetWord(0);
|
||||
// cerr << "NT0 --> : " << word << endl;
|
||||
contextFactor.push_back(&word);
|
||||
}
|
||||
else {
|
||||
onlyTerminals = false;
|
||||
collectForPrefix = false;
|
||||
int suffixPos = prevState->GetSuffix().GetSize() - (m_n-1);
|
||||
if (suffixPos < 0) suffixPos = 0; // push all words if less than order
|
||||
for(;(size_t)suffixPos < prevState->GetSuffix().GetSize(); suffixPos++)
|
||||
{
|
||||
const Word &word = prevState->GetSuffix().GetWord(suffixPos);
|
||||
// cerr << "NT0 --> : " << word << endl;
|
||||
contextFactor.push_back(&word);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// internal non-terminal
|
||||
else
|
||||
{
|
||||
// push its prefix
|
||||
for(size_t prefixPos = 0; prefixPos < m_n-1
|
||||
&& prefixPos < subPhraseLength; prefixPos++)
|
||||
{
|
||||
const Word &word = prevState->GetPrefix().GetWord(prefixPos);
|
||||
// cerr << "NT --> " << word << endl;
|
||||
contextFactor.push_back(&word);
|
||||
}
|
||||
|
||||
if (subPhraseLength==1) {
|
||||
if (collectForPrefix)
|
||||
++prefixTerminals;
|
||||
else
|
||||
++suffixTerminals;
|
||||
|
||||
if (phrasePos == cur_hypo.GetCurrTargetPhrase().GetSize()-1)
|
||||
makeSuffix = true;
|
||||
}
|
||||
else {
|
||||
onlyTerminals = false;
|
||||
collectForPrefix = true;
|
||||
|
||||
// check if something follows this NT
|
||||
bool wordFollowing = (phrasePos < cur_hypo.GetCurrTargetPhrase().GetSize() - 1)? true : false;
|
||||
|
||||
// check if we are dealing with a large sub-phrase
|
||||
if (wordFollowing && subPhraseLength > m_n - 1)
|
||||
{
|
||||
// clear up pending ngrams
|
||||
MakePrefixNgrams(contextFactor, accumulator, prefixTerminals);
|
||||
contextFactor.clear();
|
||||
makePrefix = false;
|
||||
makeSuffix = true;
|
||||
collectForPrefix = false;
|
||||
prefixTerminals = 0;
|
||||
suffixTerminals = 0;
|
||||
|
||||
// push its suffix
|
||||
size_t remainingWords = (remainingWords > m_n-1) ? m_n-1 : subPhraseLength - (m_n-1);
|
||||
for(size_t suffixPos = 0; suffixPos < prevState->GetSuffix().GetSize(); suffixPos++) {
|
||||
const Word &word = prevState->GetSuffix().GetWord(suffixPos);
|
||||
// cerr << "NT --> : " << word << endl;
|
||||
contextFactor.push_back(&word);
|
||||
}
|
||||
}
|
||||
// subphrase can be used as suffix and as prefix for the next part
|
||||
else if (wordFollowing && subPhraseLength == m_n - 1)
|
||||
{
|
||||
// clear up pending ngrams
|
||||
MakePrefixNgrams(contextFactor, accumulator, prefixTerminals);
|
||||
makePrefix = false;
|
||||
makeSuffix = true;
|
||||
collectForPrefix = false;
|
||||
prefixTerminals = 0;
|
||||
suffixTerminals = 0;
|
||||
}
|
||||
else if (prev_is_NT && prev_subPhraseLength > 1 && subPhraseLength > 1) {
|
||||
// two NTs in a row: make transition
|
||||
MakePrefixNgrams(contextFactor, accumulator, 1, m_n-2);
|
||||
MakeSuffixNgrams(contextFactor, accumulator, 1, m_n-2);
|
||||
makePrefix = false;
|
||||
makeSuffix = false;
|
||||
collectForPrefix = false;
|
||||
prefixTerminals = 0;
|
||||
suffixTerminals = 0;
|
||||
|
||||
// remove duplicates
|
||||
stringstream curr_ngram;
|
||||
curr_ngram << m_baseName;
|
||||
curr_ngram << (*contextFactor[m_n-2]).GetString(m_factorType);
|
||||
curr_ngram << ":";
|
||||
curr_ngram << (*contextFactor[m_n-1]).GetString(m_factorType);
|
||||
accumulator->SparseMinusEquals(curr_ngram.str(),1);
|
||||
}
|
||||
}
|
||||
}
|
||||
prev_is_NT = true;
|
||||
prev_subPhraseLength = subPhraseLength;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_n > 1) {
|
||||
if (onlyTerminals) {
|
||||
MakePrefixNgrams(contextFactor, accumulator, prefixTerminals-1);
|
||||
}
|
||||
else {
|
||||
if (makePrefix)
|
||||
MakePrefixNgrams(contextFactor, accumulator, prefixTerminals);
|
||||
if (makeSuffix)
|
||||
MakeSuffixNgrams(contextFactor, accumulator, suffixTerminals);
|
||||
|
||||
// remove duplicates
|
||||
size_t size = contextFactor.size();
|
||||
if (makePrefix && makeSuffix && (size <= m_n)) {
|
||||
stringstream curr_ngram;
|
||||
curr_ngram << m_baseName;
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
curr_ngram << (*contextFactor[i]).GetString(m_factorType);
|
||||
if (i < size-1)
|
||||
curr_ngram << ":";
|
||||
}
|
||||
accumulator->SparseMinusEquals(curr_ngram.str(), 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// cerr << endl;
|
||||
return new TargetNgramChartState(cur_hypo, featureId, m_n);
|
||||
}
|
||||
|
||||
void TargetNgramFeature::MakePrefixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfStartPos, size_t offset) const {
|
||||
stringstream ngram;
|
||||
size_t size = contextFactor.size();
|
||||
for (size_t k = 0; k < numberOfStartPos; ++k) {
|
||||
size_t max_end = (size < m_n+k+offset)? size: m_n+k+offset;
|
||||
for (size_t end_pos = 1+k+offset; end_pos < max_end; ++end_pos) {
|
||||
ngram << m_baseName;
|
||||
for (size_t i=k+offset; i <= end_pos; ++i) {
|
||||
if (i > k+offset)
|
||||
ngram << ":";
|
||||
string factorZero = (*contextFactor[i]).GetString(0);
|
||||
if (m_factorType == 0 || factorZero.compare("<s>") == 0 || factorZero.compare("</s>") == 0)
|
||||
ngram << factorZero;
|
||||
else
|
||||
ngram << (*contextFactor[i]).GetString(m_factorType);
|
||||
const Word w = *contextFactor[i];
|
||||
}
|
||||
// cerr << "p-ngram: " << ngram.str() << endl;
|
||||
accumulator->SparsePlusEquals(ngram.str(), 1);
|
||||
ngram.str("");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TargetNgramFeature::MakeSuffixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfEndPos, size_t offset) const {
|
||||
stringstream ngram;
|
||||
for (size_t k = 0; k < numberOfEndPos; ++k) {
|
||||
size_t end_pos = contextFactor.size()-1-k-offset;
|
||||
for (int start_pos=end_pos-1; (start_pos >= 0) && (end_pos-start_pos < m_n); --start_pos) {
|
||||
ngram << m_baseName;
|
||||
for (size_t j=start_pos; j <= end_pos; ++j){
|
||||
string factorZero = (*contextFactor[j]).GetString(0);
|
||||
if (m_factorType == 0 || factorZero.compare("<s>") == 0 || factorZero.compare("</s>") == 0)
|
||||
ngram << factorZero;
|
||||
else
|
||||
ngram << (*contextFactor[j]).GetString(m_factorType);
|
||||
if (j < end_pos)
|
||||
ngram << ":";
|
||||
}
|
||||
// cerr << "s-ngram: " << ngram.str() << endl;
|
||||
accumulator->SparsePlusEquals(ngram.str(), 1);
|
||||
ngram.str("");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,10 @@
|
||||
#include "FFState.h"
|
||||
#include "Word.h"
|
||||
|
||||
#include "LM/SingleFactor.h"
|
||||
#include "ChartHypothesis.h"
|
||||
#include "ChartManager.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
@ -22,43 +26,190 @@ class TargetNgramState : public FFState {
|
||||
std::vector<Word> m_words;
|
||||
};
|
||||
|
||||
class TargetNgramChartState : public FFState
|
||||
{
|
||||
private:
|
||||
Phrase m_contextPrefix, m_contextSuffix;
|
||||
|
||||
size_t m_numTargetTerminals; // This isn't really correct except for the surviving hypothesis
|
||||
|
||||
size_t m_startPos, m_endPos, m_inputSize;
|
||||
|
||||
/** Construct the prefix string of up to specified size
|
||||
* \param ret prefix string
|
||||
* \param size maximum size (typically max lm context window)
|
||||
*/
|
||||
size_t CalcPrefix(const ChartHypothesis &hypo, const int featureId, Phrase &ret, size_t size) const
|
||||
{
|
||||
const TargetPhrase &target = hypo.GetCurrTargetPhrase();
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
target.GetAlignmentInfo().GetNonTermIndexMap();
|
||||
|
||||
// loop over the rule that is being applied
|
||||
for (size_t pos = 0; pos < target.GetSize(); ++pos) {
|
||||
const Word &word = target.GetWord(pos);
|
||||
|
||||
// for non-terminals, retrieve it from underlying hypothesis
|
||||
if (word.IsNonTerminal()) {
|
||||
size_t nonTermInd = nonTermIndexMap[pos];
|
||||
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd);
|
||||
size = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->CalcPrefix(*prevHypo, featureId, ret, size);
|
||||
// Phrase phrase = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->GetPrefix();
|
||||
// size = phrase.GetSize();
|
||||
}
|
||||
// for words, add word
|
||||
else {
|
||||
ret.AddWord(word);
|
||||
size--;
|
||||
}
|
||||
|
||||
// finish when maximum length reached
|
||||
if (size==0)
|
||||
break;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/** Construct the suffix phrase of up to specified size
|
||||
* will always be called after the construction of prefix phrase
|
||||
* \param ret suffix phrase
|
||||
* \param size maximum size of suffix
|
||||
*/
|
||||
size_t CalcSuffix(const ChartHypothesis &hypo, int featureId, Phrase &ret, size_t size) const
|
||||
{
|
||||
size_t prefixSize = m_contextPrefix.GetSize();
|
||||
assert(prefixSize <= m_numTargetTerminals);
|
||||
|
||||
// special handling for small hypotheses
|
||||
// does the prefix match the entire hypothesis string? -> just copy prefix
|
||||
if (prefixSize == m_numTargetTerminals) {
|
||||
size_t maxCount = std::min(prefixSize, size);
|
||||
size_t pos= prefixSize - 1;
|
||||
|
||||
for (size_t ind = 0; ind < maxCount; ++ind) {
|
||||
const Word &word = m_contextPrefix.GetWord(pos);
|
||||
ret.PrependWord(word);
|
||||
--pos;
|
||||
}
|
||||
|
||||
size -= maxCount;
|
||||
return size;
|
||||
}
|
||||
// construct suffix analogous to prefix
|
||||
else {
|
||||
const TargetPhrase targetPhrase = hypo.GetCurrTargetPhrase();
|
||||
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
||||
targetPhrase.GetAlignmentInfo().GetNonTermIndexMap();
|
||||
for (int pos = (int) targetPhrase.GetSize() - 1; pos >= 0 ; --pos) {
|
||||
const Word &word = targetPhrase.GetWord(pos);
|
||||
|
||||
if (word.IsNonTerminal()) {
|
||||
size_t nonTermInd = nonTermIndexMap[pos];
|
||||
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd);
|
||||
size = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->CalcSuffix(*prevHypo, featureId, ret, size);
|
||||
}
|
||||
else {
|
||||
ret.PrependWord(word);
|
||||
size--;
|
||||
}
|
||||
|
||||
if (size==0)
|
||||
break;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
TargetNgramChartState(const ChartHypothesis &hypo, int featureId, size_t order)
|
||||
:m_contextPrefix(order - 1),
|
||||
m_contextSuffix(order - 1)
|
||||
{
|
||||
m_numTargetTerminals = hypo.GetCurrTargetPhrase().GetNumTerminals();
|
||||
const WordsRange range = hypo.GetCurrSourceRange();
|
||||
m_startPos = range.GetStartPos();
|
||||
m_endPos = range.GetEndPos();
|
||||
m_inputSize = hypo.GetManager().GetSource().GetSize();
|
||||
|
||||
const std::vector<const ChartHypothesis*> prevHypos = hypo.GetPrevHypos();
|
||||
for (std::vector<const ChartHypothesis*>::const_iterator i = prevHypos.begin(); i != prevHypos.end(); ++i) {
|
||||
// keep count of words (= length of generated string)
|
||||
m_numTargetTerminals += static_cast<const TargetNgramChartState*>((*i)->GetFFState(featureId))->GetNumTargetTerminals();
|
||||
}
|
||||
|
||||
CalcPrefix(hypo, featureId, m_contextPrefix, order - 1);
|
||||
CalcSuffix(hypo, featureId, m_contextSuffix, order - 1);
|
||||
}
|
||||
|
||||
size_t GetNumTargetTerminals() const {
|
||||
return m_numTargetTerminals;
|
||||
}
|
||||
|
||||
const Phrase &GetPrefix() const {
|
||||
return m_contextPrefix;
|
||||
}
|
||||
const Phrase &GetSuffix() const {
|
||||
return m_contextSuffix;
|
||||
}
|
||||
|
||||
int Compare(const FFState& o) const {
|
||||
const TargetNgramChartState &other =
|
||||
static_cast<const TargetNgramChartState &>( o );
|
||||
|
||||
// prefix
|
||||
if (m_startPos > 0) // not for "<s> ..."
|
||||
{
|
||||
int ret = GetPrefix().Compare(other.GetPrefix());
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (m_endPos < m_inputSize - 1)// not for "... </s>"
|
||||
{
|
||||
int ret = GetSuffix().Compare(other.GetSuffix());
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
/** Sets the features of observed ngrams.
|
||||
*/
|
||||
class TargetNgramFeature : public StatefulFeatureFunction {
|
||||
public:
|
||||
TargetNgramFeature(FactorType factorType = 0, size_t n = 3, bool lower_ngrams = true):
|
||||
StatefulFeatureFunction("dlmn", ScoreProducer::unlimited),
|
||||
StatefulFeatureFunction("dlm", ScoreProducer::unlimited),
|
||||
m_factorType(factorType),
|
||||
m_n(n),
|
||||
m_lower_ngrams(lower_ngrams),
|
||||
m_sparseProducerWeight(1)
|
||||
{
|
||||
FactorCollection& factorCollection = FactorCollection::Instance();
|
||||
const Factor* bosFactor =
|
||||
factorCollection.AddFactor(Output,m_factorType,BOS_);
|
||||
const Factor* bosFactor = factorCollection.AddFactor(Output,m_factorType,BOS_);
|
||||
m_bos.SetFactor(m_factorType,bosFactor);
|
||||
m_baseName = GetScoreProducerDescription();
|
||||
m_baseName.append("_");
|
||||
}
|
||||
|
||||
|
||||
bool Load(const std::string &filePath);
|
||||
|
||||
std::string GetScoreProducerWeightShortName(unsigned) const;
|
||||
size_t GetNumInputScores() const;
|
||||
|
||||
void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
|
||||
float GetSparseProducerWeight() { return m_sparseProducerWeight; }
|
||||
float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
|
||||
|
||||
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
|
||||
|
||||
virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */,
|
||||
int /* featureID */,
|
||||
ScoreComponentCollection* ) const
|
||||
{
|
||||
abort();
|
||||
}
|
||||
virtual FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureId,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
private:
|
||||
FactorType m_factorType;
|
||||
Word m_bos;
|
||||
@ -69,7 +220,13 @@ private:
|
||||
// additional weight that all sparse weights are scaled with
|
||||
float m_sparseProducerWeight;
|
||||
|
||||
void appendNgram(const Word& word, bool& skip, std::string& ngram) const;
|
||||
std::string m_baseName;
|
||||
|
||||
void appendNgram(const Word& word, bool& skip, std::stringstream& ngram) const;
|
||||
void MakePrefixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator,
|
||||
size_t numberOfStartPos = 1, size_t offset = 0) const;
|
||||
void MakeSuffixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator,
|
||||
size_t numberOfEndPos = 1, size_t offset = 0) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -85,6 +85,15 @@ std::string Word::GetString(const vector<FactorType> factorType,bool endWithBlan
|
||||
return strme.str();
|
||||
}
|
||||
|
||||
std::string Word::GetString(FactorType factorType) const
|
||||
{
|
||||
const Factor *factor = m_factorArray[factorType];
|
||||
if (factor != NULL)
|
||||
return factor->GetString();
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void Word::CreateFromString(FactorDirection direction
|
||||
, const std::vector<FactorType> &factorOrder
|
||||
, const std::string &str
|
||||
@ -94,7 +103,8 @@ void Word::CreateFromString(FactorDirection direction
|
||||
|
||||
vector<string> wordVec;
|
||||
Tokenize(wordVec, str, "|");
|
||||
CHECK(wordVec.size() == factorOrder.size());
|
||||
if (!isNonTerminal)
|
||||
assert(wordVec.size() == factorOrder.size());
|
||||
|
||||
const Factor *factor;
|
||||
for (size_t ind = 0; ind < wordVec.size(); ++ind) {
|
||||
|
@ -101,6 +101,7 @@ public:
|
||||
* these debugging functions.
|
||||
*/
|
||||
std::string GetString(const std::vector<FactorType> factorType,bool endWithBlank) const;
|
||||
std::string GetString(FactorType factorType) const;
|
||||
TO_STRING();
|
||||
|
||||
//! transitive comparison of Word objects
|
||||
|
Loading…
Reference in New Issue
Block a user