Merge from miramerge.

Reverted ChartHypothesis as it breaks chart-decoding.
This commit is contained in:
Barry Haddow 2012-01-20 15:35:55 +00:00
parent ced24a881d
commit 1e10bb7ef7
25 changed files with 801 additions and 245 deletions

View File

@ -184,9 +184,9 @@ namespace Mira {
m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch);
}
void MosesDecoder::loadReferenceSentences(const vector<vector<string> >& refs) {
/* void MosesDecoder::loadReferenceSentences(const vector<vector<string> >& refs) {
m_bleuScoreFeature->LoadReferences(refs);
}
}*/
void MosesDecoder::printBleuFeatureHistory(std::ostream& out) {
m_bleuScoreFeature->PrintHistory(out);
@ -200,9 +200,11 @@ namespace Mira {
return m_bleuScoreFeature->GetReferenceLength(ref_id);
}
void MosesDecoder::setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
void MosesDecoder::setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
float scaleByX, float historySmoothing, size_t scheme, float relax_BP) {
m_bleuScoreFeature->SetBleuParameters(scaleByInputLength, scaleByRefLength, scaleByAvgLength, scaleByTargetLength,
m_bleuScoreFeature->SetBleuParameters(scaleByInputLength, scaleByRefLength, scaleByAvgLength,
scaleByTargetLengthLinear, scaleByTargetLengthTrend,
scaleByX, historySmoothing, scheme, relax_BP);
}
}

View File

@ -64,11 +64,12 @@ class MosesDecoder {
size_t getCurrentInputLength();
void updateHistory(const std::vector<const Moses::Word*>& words);
void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
// void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
void printBleuFeatureHistory(std::ostream& out);
void printReferenceLength(const std::vector<size_t>& ref_ids);
size_t getReferenceLength(size_t ref_id);
void setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
void setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
float scaleByX, float historySmoothing, size_t scheme, float relax_BP);
Moses::ScoreComponentCollection getWeights();
void setWeights(const Moses::ScoreComponentCollection& weights);

View File

@ -82,7 +82,8 @@ int main(int argc, char** argv) {
float historySmoothing;
bool scaleByInputLength;
bool scaleByReferenceLength;
bool scaleByTargetLength;
bool scaleByTargetLengthLinear;
bool scaleByTargetLengthTrend;
bool scaleByAvgLength;
float scaleByX;
float slack;
@ -119,6 +120,8 @@ int main(int argc, char** argv) {
float max_length_dev_hypos;
float max_length_dev_reference;
float relax_BP;
bool stabiliseLength;
bool delayUpdates;
po::options_description desc("Allowed options");
desc.add_options()
("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
@ -133,6 +136,7 @@ int main(int argc, char** argv) {
("core-weights", po::value<string>(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)")
("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
("decr-learning-rate", po::value<float>(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch")
("delay-updates", po::value<bool>(&delayUpdates)->default_value(false), "Delay all updates until the end of an epoch")
("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use n-best list with distinct translations in inference step")
("epochs,e", po::value<size_t>(&epochs)->default_value(10), "Number of epochs")
("fear-n", po::value<int>(&fear_n)->default_value(-1), "Number of fear translations used")
@ -164,7 +168,8 @@ int main(int argc, char** argv) {
("relax-BP", po::value<float>(&relax_BP)->default_value(1), "Relax the BP by setting this value between 0 and 1")
("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by (a history of) the input length")
("scale-by-reference-length", po::value<bool>(&scaleByReferenceLength)->default_value(false), "Scale BLEU by (a history of) the reference length")
("scale-by-target-length", po::value<bool>(&scaleByTargetLength)->default_value(false), "Scale BLEU by (a history of) the target length")
("scale-by-target-length-linear", po::value<bool>(&scaleByTargetLengthLinear)->default_value(false), "Scale BLEU by (a history of) the target length (linear future estimate)")
("scale-by-target-length-trend", po::value<bool>(&scaleByTargetLengthTrend)->default_value(false), "Scale BLEU by (a history of) the target length (trend-based future estimate)")
("scale-by-avg-length", po::value<bool>(&scaleByAvgLength)->default_value(false), "Scale BLEU by (a history of) the average of input and reference length")
("scale-by-x", po::value<float>(&scaleByX)->default_value(1), "Scale the BLEU score by value x")
("scale-margin", po::value<size_t>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
@ -174,6 +179,7 @@ int main(int argc, char** argv) {
("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimiser")
("slack-min", po::value<float>(&slack_min)->default_value(0.01), "Minimum slack used")
("slack-step", po::value<float>(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided")
("stabilise-length", po::value<bool>(&stabiliseLength)->default_value(false), "Stabilise word penalty when length ratio >= 1")
("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
("threads", po::value<int>(&threadcount)->default_value(1), "Number of threads used")
("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
@ -268,11 +274,7 @@ int main(int argc, char** argv) {
}
}
if (scaleByReferenceLength)
scaleByInputLength = false;
if (scaleByTargetLength)
scaleByInputLength = false;
if (scaleByAvgLength)
if (scaleByReferenceLength || scaleByTargetLengthLinear || scaleByTargetLengthTrend || scaleByAvgLength)
scaleByInputLength = false;
// initialise Moses
@ -285,7 +287,8 @@ int main(int argc, char** argv) {
vector<string> decoder_params;
boost::split(decoder_params, decoder_settings, boost::is_any_of("\t "));
MosesDecoder* decoder = new MosesDecoder(mosesConfigFile, verbosity, decoder_params.size(), decoder_params);
decoder->setBleuParameters(scaleByInputLength, scaleByReferenceLength, scaleByAvgLength, scaleByTargetLength,
decoder->setBleuParameters(scaleByInputLength, scaleByReferenceLength, scaleByAvgLength,
scaleByTargetLengthLinear, scaleByTargetLengthTrend,
scaleByX, historySmoothing, bleu_smoothing_scheme, relax_BP);
if (normaliseWeights) {
ScoreComponentCollection startWeights = decoder->getWeights();
@ -409,6 +412,12 @@ int main(int argc, char** argv) {
ScoreComponentCollection mixedAverageWeightsPrevious;
ScoreComponentCollection mixedAverageWeightsBeforePrevious;
// when length ratio >= 1, set this to true
bool fixLength = false;
// for accumulating delayed updates
ScoreComponentCollection delayedWeightUpdates;
bool stop = false;
// int sumStillViolatedConstraints;
float *sendbuf, *recvbuf;
@ -427,6 +436,12 @@ int main(int argc, char** argv) {
// number of weight dumps this epoch
size_t weightEpochDump = 0;
// sum lengths of dev hypothesis/references to calculate translation length ratio for this epoch
size_t dev_hypothesis_length = 0;
size_t dev_reference_length = 0;
delayedWeightUpdates.ZeroAll();
size_t shardPosition = 0;
vector<size_t>::const_iterator sid = shard.begin();
while (sid != shard.end()) {
@ -460,7 +475,7 @@ int main(int argc, char** argv) {
for (size_t batchPosition = 0; batchPosition < batchSize && sid
!= shard.end(); ++batchPosition) {
string& input = inputSentences[*sid];
const vector<string>& refs = referenceSentences[*sid];
// const vector<string>& refs = referenceSentences[*sid];
cerr << "\nRank " << rank << ", epoch " << epoch << ", input sentence " << *sid << ": \"" << input << "\"" << " (batch pos " << batchPosition << ")" << endl;
vector<ScoreComponentCollection> newFeatureValues;
@ -474,7 +489,7 @@ int main(int argc, char** argv) {
featureValuesFear.push_back(newFeatureValues);
bleuScoresHope.push_back(newBleuScores);
bleuScoresFear.push_back(newBleuScores);
if (historyOf1best) {
if (historyOf1best || stabiliseLength) {
dummyFeatureValues.push_back(newFeatureValues);
dummyBleuScores.push_back(newBleuScores);
}
@ -493,13 +508,16 @@ int main(int argc, char** argv) {
cerr << ", l-ratio hope: " << hope_length_ratio << endl;
vector<const Word*> bestModel;
if (historyOf1best) {
if (historyOf1best || stabiliseLength) {
// MODEL (for updating the history only, using dummy vectors)
cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history)" << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history or length stabilisation)" << endl;
bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true,
distinctNbest, rank, epoch);
decoder->cleanup();
cerr << endl;
dev_hypothesis_length += bestModel.size();
dev_reference_length += reference_length;
}
// FEAR
@ -576,6 +594,10 @@ int main(int argc, char** argv) {
oneBests.push_back(bestModel);
float model_length_ratio = (float)bestModel.size()/reference_length;
cerr << ", l-ratio model: " << model_length_ratio << endl;
if (stabiliseLength) {
dev_hypothesis_length += bestModel.size();
dev_reference_length += reference_length;
}
// FEAR
cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best fear translations" << endl;
@ -622,6 +644,19 @@ int main(int argc, char** argv) {
break;
}
// set word penalty to 0 before optimising (if 'stabilise-length' is active)
if (fixLength) {
iter = featureFunctions.begin();
for (; iter != featureFunctions.end(); ++iter) {
if ((*iter)->GetScoreProducerWeightShortName() == "w") {
ignoreWPFeature(featureValues, (*iter));
ignoreWPFeature(featureValuesHope, (*iter));
ignoreWPFeature(featureValuesFear, (*iter));
break;
}
}
}
// take logs of feature values
if (logFeatureValues) {
takeLogs(featureValuesHope, baseOfLog);
@ -654,24 +689,28 @@ int main(int argc, char** argv) {
// Run optimiser on batch:
VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl);
size_t update_status;
ScoreComponentCollection weightUpdate;
if (perceptron_update) {
vector<vector<float> > dummy1;
update_status = optimiser->updateWeightsHopeFear(mosesWeights,
update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate,
featureValuesHope, featureValuesFear, dummy1, dummy1, learning_rate, rank, epoch);
}
else if (hope_fear) {
update_status = optimiser->updateWeightsHopeFear(mosesWeights,
update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate,
featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear, learning_rate, rank, epoch);
}
else {
// model_hope_fear
update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights,
update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights, weightUpdate,
featureValues, losses, bleuScores, oracleFeatureValues, oracleBleuScores, learning_rate, rank, epoch);
}
// sumStillViolatedConstraints += update_status;
if (update_status == 0) { // if weights were updated
// apply weight update
mosesWeights.PlusEquals(weightUpdate);
if (normaliseWeights) {
mosesWeights.L1Normalise();
}
@ -690,8 +729,11 @@ int main(int argc, char** argv) {
mosesWeights = averageWeights;
}
// set new Moses weights
decoder->setWeights(mosesWeights);
if (delayUpdates)
delayedWeightUpdates.PlusEquals(weightUpdate);
else
// set new Moses weights
decoder->setWeights(mosesWeights);
}
// update history (for approximate document Bleu)
@ -802,8 +844,25 @@ int main(int argc, char** argv) {
}
}
}// end dumping
} // end of shard loop, end of this epoch
if (delayUpdates) {
// apply all updates from this epoch to the weight vector
ScoreComponentCollection mosesWeights = decoder->getWeights();
mosesWeights.PlusEquals(delayedWeightUpdates);
decoder->setWeights(mosesWeights);
cerr << "Rank " << rank << ", epoch " << epoch << ", delayed update, new moses weights: " << mosesWeights << endl;
}
if (stabiliseLength && !fixLength) {
float lengthRatio = (float)(dev_hypothesis_length+1) / dev_reference_length;
if (lengthRatio >= 1) {
cerr << "Rank " << rank << ", epoch " << epoch << ", length ratio >= 1, fixing word penalty. " << endl;
fixLength = 1;
}
}
if (verbosity > 0) {
cerr << "Bleu feature history after epoch " << epoch << endl;
decoder->printBleuFeatureHistory(cerr);
@ -840,28 +899,19 @@ int main(int argc, char** argv) {
if (rank == 0 && (epoch >= 2)) {
ScoreComponentCollection firstDiff(mixedAverageWeights);
firstDiff.MinusEquals(mixedAverageWeightsPrevious);
VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << endl);
VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff <<
" (max: " << firstDiff.GetLInfNorm() << ")" << endl);
ScoreComponentCollection secondDiff(mixedAverageWeights);
secondDiff.MinusEquals(mixedAverageWeightsBeforePrevious);
VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << endl << endl);
VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff <<
" (max: " << secondDiff.GetLInfNorm() << ")" << endl << endl);
// check whether stopping criterion has been reached
// (both difference vectors must have all weight changes smaller than min_weight_change)
FVector changes1 = firstDiff.GetScoresVector();
FVector changes2 = secondDiff.GetScoresVector();
FVector::const_iterator iterator1 = changes1.cbegin();
FVector::const_iterator iterator2 = changes2.cbegin();
while (iterator1 != changes1.cend()) {
if (abs((*iterator1).second) >= min_weight_change || abs(
(*iterator2).second) >= min_weight_change) {
reached = false;
break;
}
++iterator1;
++iterator2;
}
if (firstDiff.GetLInfNorm() >= min_weight_change)
reached = false;
if (secondDiff.GetLInfNorm() >= min_weight_change)
reached = false;
if (reached) {
// stop MIRA
stop = true;
@ -991,16 +1041,20 @@ void printFeatureValues(vector<vector<ScoreComponentCollection> > &featureValues
}
void ignoreCoreFeatures(vector<vector<ScoreComponentCollection> > &featureValues, StrFloatMap &coreWeightMap) {
for (size_t i = 0; i < featureValues.size(); ++i) {
for (size_t i = 0; i < featureValues.size(); ++i)
for (size_t j = 0; j < featureValues[i].size(); ++j) {
// set all core features to 0
StrFloatMap::iterator p;
for(p = coreWeightMap.begin(); p!=coreWeightMap.end(); ++p)
{
featureValues[i][j].Assign(p->first, 0);
}
}
}
}
void ignoreWPFeature(vector<vector<ScoreComponentCollection> > &featureValues, const ScoreProducer* sp) {
for (size_t i = 0; i < featureValues.size(); ++i)
for (size_t j = 0; j < featureValues[i].size(); ++j)
// set WP feature to 0
featureValues[i][j].Assign(sp, 0);
}
void takeLogs(vector<vector<ScoreComponentCollection> > &featureValues, size_t base) {

View File

@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "ScoreComponentCollection.h"
#include "Word.h"
#include "ScoreProducer.h"
typedef std::map<const std::string, float> StrFloatMap;
typedef std::pair<const std::string, float> StrFloatPair;
@ -46,6 +47,7 @@ bool loadWeights(const std::string& filename, StrFloatMap& coreWeightMap);
bool evaluateModulo(size_t shard_position, size_t mix_or_dump_base, size_t actual_batch_size);
void printFeatureValues(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues);
void ignoreCoreFeatures(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, StrFloatMap &coreWeightMap);
void ignoreWPFeature(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, const Moses::ScoreProducer* sp);
void takeLogs(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, size_t base);
void deleteTranslations(std::vector<std::vector<const Moses::Word*> > &translations);

View File

@ -7,7 +7,9 @@ using namespace std;
namespace Mira {
size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
size_t MiraOptimiser::updateWeights(
ScoreComponentCollection& currWeights,
ScoreComponentCollection& weightUpdate,
const vector<vector<ScoreComponentCollection> >& featureValues,
const vector<vector<float> >& losses,
const vector<vector<float> >& bleuScores,
@ -142,9 +144,7 @@ size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
}
cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
// apply update to weight vector
currWeights.PlusEquals(summedUpdate);
weightUpdate.PlusEquals(summedUpdate);
// Sanity check: are there still violated constraints after optimisation?
/* int violatedConstraintsAfter = 0;
@ -164,7 +164,9 @@ size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
return 0;
}
size_t MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
size_t MiraOptimiser::updateWeightsHopeFear(
Moses::ScoreComponentCollection& currWeights,
Moses::ScoreComponentCollection& weightUpdate,
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
const std::vector<std::vector<float> >& bleuScoresHope,
@ -299,9 +301,7 @@ size_t MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& cur
}
cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
// apply update to weight vector
currWeights.PlusEquals(summedUpdate);
weightUpdate.PlusEquals(summedUpdate);
// Sanity check: are there still violated constraints after optimisation?
/* int violatedConstraintsAfter = 0;

View File

@ -30,7 +30,9 @@ namespace Mira {
public:
Optimiser() {}
virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
virtual size_t updateWeightsHopeFear(
Moses::ScoreComponentCollection& currWeights,
Moses::ScoreComponentCollection& weightUpdate,
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
const std::vector<std::vector<float> >& bleuScoresHope,
@ -42,7 +44,9 @@ namespace Mira {
class Perceptron : public Optimiser {
public:
virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
virtual size_t updateWeightsHopeFear(
Moses::ScoreComponentCollection& currWeights,
Moses::ScoreComponentCollection& weightUpdate,
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
const std::vector<std::vector<float> >& bleuScoresHope,
@ -66,6 +70,7 @@ namespace Mira {
m_margin_slack(margin_slack) { }
size_t updateWeights(Moses::ScoreComponentCollection& currWeights,
Moses::ScoreComponentCollection& weightUpdate,
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
const std::vector<std::vector<float> >& losses,
const std::vector<std::vector<float> >& bleuScores,
@ -75,6 +80,7 @@ namespace Mira {
size_t rank,
size_t epoch);
virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
Moses::ScoreComponentCollection& weightUpdate,
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
const std::vector<std::vector<float> >& bleuScoresHope,

View File

@ -24,7 +24,9 @@ using namespace std;
namespace Mira {
size_t Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeights,
size_t Perceptron::updateWeightsHopeFear(
ScoreComponentCollection& currWeights,
ScoreComponentCollection& weightUpdate,
const vector< vector<ScoreComponentCollection> >& featureValuesHope,
const vector< vector<ScoreComponentCollection> >& featureValuesFear,
const vector< vector<float> >& dummy1,
@ -39,7 +41,7 @@ size_t Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeights,
featureValueDiff.MinusEquals(featureValuesFear[0][0]);
cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
featureValueDiff.MultiplyEquals(perceptron_learning_rate);
currWeights.PlusEquals(featureValueDiff);
weightUpdate.PlusEquals(featureValueDiff);
cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << featureValueDiff << endl;
return 0;
}

View File

@ -72,6 +72,9 @@ my $moses_ini_file = &param_required("train.moses-ini-file");
my $input_file = &param_required("train.input-file");
&check_exists ("train input file", $input_file);
my $reference_files = &param_required("train.reference-files");
for my $ref (glob $reference_files . "*") {
&check_exists ("ref files", $ref);
}
my $trainer_exe = &param_required("train.trainer");
&check_exists("Training executable", $trainer_exe);
#my $weights_file = &param_required("train.weights-file");
@ -94,20 +97,21 @@ my $burn_in_reference_files = &param("train.burn-in-reference-files");
my $skipTrain = &param("train.skip", 0);
#devtest configuration
my ($devtest_input_file, $devtest_reference_file,$devtest_ini_file,$bleu_script,$use_moses);
my ($devtest_input_file, $devtest_reference_files,$devtest_ini_file,$bleu_script,$use_moses);
my $test_exe = &param("devtest.moses");
&check_exists("test executable", $test_exe);
$bleu_script = &param_required("devtest.bleu");
&check_exists("multi-bleu script", $bleu_script);
$devtest_input_file = &param_required("devtest.input-file");
$devtest_reference_file = &param_required("devtest.reference-file");
&check_exists ("devtest input file", $devtest_input_file);
for my $ref (glob $devtest_reference_file . "*") {
$devtest_reference_files = &param_required("devtest.reference-file");
for my $ref (glob $devtest_reference_files . "*") {
&check_exists ("devtest ref file", $ref);
}
$devtest_ini_file = &param_required("devtest.moses-ini-file");
&check_exists ("devtest ini file", $devtest_ini_file);
my $weight_file_stem = "$name-weights";
my $extra_memory_devtest = &param("devtest.extra-memory",0);
my $skip_devtest = &param("devtest.skip-devtest",0);
@ -174,8 +178,9 @@ my @refs;
if (ref($reference_files) eq 'ARRAY') {
@refs = @$reference_files;
} else {
@refs = glob $reference_files;
@refs = glob $reference_files . "*"
}
my $arr_refs = \@refs;
if (!$skipTrain) {
#write the script
@ -198,7 +203,6 @@ print TRAIN "-f $moses_ini_file \\\n";
print TRAIN "-i $input_file \\\n";
for my $ref (@refs) {
&check_exists("train ref file", $ref);
print TRAIN "-r $ref ";
}
print TRAIN "\\\n";
@ -206,15 +210,15 @@ print TRAIN "\\\n";
if ($burn_in) {
print TRAIN "--burn-in 1 \\\n";
print TRAIN "--burn-in-input-file $burn_in_input_file \\\n";
my @refs;
my @burnin_refs;
if (ref($burn_in_reference_files) eq 'ARRAY') {
@refs = @$burn_in_reference_files;
@burnin_refs = @$burn_in_reference_files;
} else {
@refs = glob $burn_in_reference_files;
@burnin_refs = glob $burn_in_reference_files . "*";
}
for my $ref (@refs) {
&check_exists("burn-in ref file", $ref);
print TRAIN "--burn-in-reference-files $ref ";
for my $burnin_ref (@burnin_refs) {
&check_exists("burn-in ref file", $burnin_ref);
print TRAIN "--burn-in-reference-files $burnin_ref ";
}
print TRAIN "\\\n";
}
@ -317,10 +321,10 @@ while(1) {
my $suffix = "";
print "weight file exists? ".(-e $new_weight_file)."\n";
if (!$skip_devtest) {
createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "devtest", $devtest_ini_file, $devtest_input_file, $devtest_reference_file, $skip_submit_test);
createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "devtest", $devtest_ini_file, $devtest_input_file, $devtest_reference_files, $skip_submit_test);
}
if (!$skip_dev) {
createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "dev", $moses_ini_file, $input_file, $refs[0], $skip_submit_test);
createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "dev", $moses_ini_file, $input_file, $reference_files, $skip_submit_test);
}
}

View File

@ -47,6 +47,9 @@ POSSIBILITY OF SUCH DAMAGE.
#include "ChartHypothesis.h"
#include "DotChart.h"
#include <boost/algorithm/string.hpp>
#include "FeatureVector.h"
using namespace std;
using namespace Moses;
@ -345,7 +348,7 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
// print the surface factor of the translation
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
out << " |||";
out << " ||| ";
// print the scores in a hardwired order
// before each model type, the corresponding command-line-like name must be emitted
@ -362,26 +365,23 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
}
}
std::string lastName = "";
// translation components
const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries();
if (pds.size() > 0) {
for( size_t i=0; i<pds.size(); i++ ) {
size_t pd_numinputscore = pds[i]->GetNumInputScores();
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
for (size_t j = 0; j<scores.size(); ++j){
if (labeledOutput && (i == 0) ){
if ((j == 0) || (j == pd_numinputscore)){
lastName = pds[i]->GetScoreProducerWeightShortName(j);
out << " " << lastName << ":";
}
}
out << " " << scores[j];
}
size_t pd_numinputscore = pds[i]->GetNumInputScores();
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
for (size_t j = 0; j<scores.size(); ++j){
if (labeledOutput && (i == 0) ){
if ((j == 0) || (j == pd_numinputscore)){
lastName = pds[i]->GetScoreProducerWeightShortName(j);
out << " " << lastName << ":";
}
}
out << " " << scores[j];
}
}
}
@ -393,26 +393,36 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
// generation
const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries();
if (gds.size() > 0) {
for( size_t i=0; i<gds.size(); i++ ) {
size_t pd_numinputscore = gds[i]->GetNumInputScores();
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
for (size_t j = 0; j<scores.size(); ++j){
if (labeledOutput && (i == 0) ){
if ((j == 0) || (j == pd_numinputscore)){
lastName = gds[i]->GetScoreProducerWeightShortName(j);
out << " " << lastName << ":";
}
}
out << " " << scores[j];
}
size_t pd_numinputscore = gds[i]->GetNumInputScores();
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
for (size_t j = 0; j<scores.size(); ++j){
if (labeledOutput && (i == 0) ){
if ((j == 0) || (j == pd_numinputscore)){
lastName = gds[i]->GetScoreProducerWeightShortName(j);
out << " " << lastName << ":";
}
}
out << " " << scores[j];
}
}
}
// output sparse features
lastName = "";
const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ )
if (sff[i]->GetNumScoreComponents() == ScoreProducer::unlimited)
OutputSparseFeatureScores( out, path, sff[i], lastName );
const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions();
for( size_t i=0; i<slf.size(); i++ )
if (sff[i]->GetNumScoreComponents() == ScoreProducer::unlimited)
OutputSparseFeatureScores( out, path, slf[i], lastName );
// total
out << " |||" << path.GetTotalScore();
out << " ||| " << path.GetTotalScore();
/*
if (includeAlignment) {
@ -443,6 +453,32 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
m_nBestOutputCollector->Write(translationId, out.str());
}
void IOWrapper::OutputSparseFeatureScores( std::ostream& out, const ChartTrellisPath &path, const FeatureFunction *ff, std::string &lastName )
{
const StaticData &staticData = StaticData::Instance();
bool labeledOutput = staticData.IsLabeledNBestList();
const FVector scores = path.GetScoreBreakdown().GetVectorForProducer( ff );
// report weighted aggregate
if (! ff->GetSparseFeatureReporting()) {
const FVector &weights = staticData.GetAllWeights().GetScoresVector();
if (labeledOutput && !boost::contains(ff->GetScoreProducerDescription(), ":"))
out << " " << ff->GetScoreProducerWeightShortName() << ":";
out << " " << scores.inner_product(weights);
}
// report each feature
else {
for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
if (i->second != 0) { // do not report zero-valued features
if (labeledOutput)
out << " " << i->first << ":";
out << " " << i->second;
}
}
}
}
void IOWrapper::FixPrecision(std::ostream &stream, size_t size)
{
stream.setf(std::ios::fixed);

View File

@ -44,6 +44,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include "OutputCollector.h"
#include "ChartHypothesis.h"
#include "ChartTrellisPath.h"
namespace Moses
{
class FactorCollection;
@ -82,6 +84,7 @@ public:
void OutputBestHypo(const Moses::ChartHypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors);
void OutputBestHypo(const std::vector<const Moses::Factor*>& mbrBestHypo, long translationId, bool reportSegmentation, bool reportAllFactors);
void OutputNBestList(const Moses::ChartTrellisPathList &nBestList, const Moses::ChartHypothesis *bestHypo, const Moses::TranslationSystem* system, long translationId);
void OutputSparseFeatureScores(std::ostream& out, const Moses::ChartTrellisPath &path, const Moses::FeatureFunction *ff, std::string &lastName);
void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, long translationId);
void Backtrack(const Moses::ChartHypothesis *hypo);

View File

@ -165,18 +165,25 @@ bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source
}
static void PrintFeatureWeight(const FeatureFunction* ff)
{
size_t numScoreComps = ff->GetNumScoreComponents();
if (numScoreComps != ScoreProducer::unlimited) {
vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
for (size_t i = 0; i < numScoreComps; ++i) {
for (size_t i = 0; i < numScoreComps; ++i)
cout << ff->GetScoreProducerDescription() << " "
<< ff->GetScoreProducerWeightShortName() << " "
<< values[i] << endl;
}
} else {
cout << ff->GetScoreProducerDescription() << " " <<
ff->GetScoreProducerWeightShortName() << " sparse" << endl;
}
}
static void PrintSparseFeatureWeight(const FeatureFunction* ff)
{
if (ff->GetNumScoreComponents() == ScoreProducer::unlimited) {
if (ff->GetSparseProducerWeight() == 1)
cout << ff->GetScoreProducerDescription() << " " <<
ff->GetScoreProducerWeightShortName() << " sparse" << endl;
else
cout << ff->GetScoreProducerDescription() << " " <<
ff->GetScoreProducerWeightShortName() << " " << ff->GetSparseProducerWeight() << endl;
}
}
@ -201,6 +208,9 @@ static void ShowWeights()
for (size_t i = 0; i < slf.size(); ++i) {
PrintFeatureWeight(slf[i]);
}
for (size_t i = 0; i < sff.size(); ++i) {
PrintSparseFeatureWeight(sff[i]);
}
}

View File

@ -287,21 +287,27 @@ private:
static void PrintFeatureWeight(const FeatureFunction* ff)
{
size_t numScoreComps = ff->GetNumScoreComponents();
if (numScoreComps != ScoreProducer::unlimited) {
vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
for (size_t i = 0; i < numScoreComps; ++i) {
for (size_t i = 0; i < numScoreComps; ++i)
cout << ff->GetScoreProducerDescription() << " "
<< ff->GetScoreProducerWeightShortName() << " "
<< values[i] << endl;
}
} else {
cout << ff->GetScoreProducerDescription() << " " <<
ff->GetScoreProducerWeightShortName() << " sparse" << endl;
}
}
static void PrintSparseFeatureWeight(const FeatureFunction* ff)
{
if (ff->GetNumScoreComponents() == ScoreProducer::unlimited) {
if (ff->GetSparseProducerWeight() == 1)
cout << ff->GetScoreProducerDescription() << " " <<
ff->GetScoreProducerWeightShortName() << " sparse" << endl;
else
cout << ff->GetScoreProducerDescription() << " " <<
ff->GetScoreProducerWeightShortName() << " " << ff->GetSparseProducerWeight() << endl;
}
}
static void ShowWeights()
{
@ -324,6 +330,9 @@ static void ShowWeights()
for (size_t i = 0; i < gds.size(); ++i) {
PrintFeatureWeight(gds[i]);
}
for (size_t i = 0; i < sff.size(); ++i) {
PrintSparseFeatureWeight(sff[i]);
}
}
/** main function of the command line version of the decoder **/

View File

@ -81,11 +81,13 @@ void BleuScoreFeature::PrintHistory(std::ostream& out) const {
}
}
void BleuScoreFeature::SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
float scaleByX, float historySmoothing, size_t scheme, float relaxBP) {
void BleuScoreFeature::SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
float scaleByX, float historySmoothing, size_t scheme, float relaxBP) {
m_scale_by_input_length = scaleByInputLength;
m_scale_by_ref_length = scaleByRefLength;
m_scale_by_target_length = scaleByTargetLength;
m_scale_by_target_length_linear = scaleByTargetLengthLinear;
m_scale_by_target_length_trend = scaleByTargetLengthTrend;
m_scale_by_avg_length = scaleByAvgLength;
m_scale_by_x = scaleByX;
m_historySmoothing = historySmoothing;
@ -97,6 +99,7 @@ void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::strin
{
m_refs.clear();
FactorCollection& fc = FactorCollection::Instance();
cerr << "Number of reference files: " << refs.size() << endl;
for (size_t file_id = 0; file_id < refs.size(); file_id++) {
for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) {
const string& ref = refs[file_id][ref_id];
@ -430,13 +433,19 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
else if (m_scale_by_ref_length) {
precision *= m_ref_length_history + m_cur_ref_length;
}
else if (m_scale_by_target_length) {
precision *= m_target_length_history + state->m_target_length;
else if (m_scale_by_target_length_linear) {
// length of current hypothesis + number of words still to translate from source (rest being translated 1-to-1)
float scaled_target_length = state->m_target_length + (m_cur_source_length - state->m_source_length);
precision *= m_target_length_history + scaled_target_length;
}
else if (m_scale_by_target_length_trend) {
// length of full target if remaining words were translated with the same fertility as so far
float scaled_target_length = ((float)m_cur_source_length/state->m_source_length) * state->m_target_length;
precision *= m_target_length_history + scaled_target_length;
}
else if (m_scale_by_avg_length) {
precision *= (m_source_length_history + m_ref_length_history + m_cur_source_length + + m_cur_ref_length) / 2;
}
return precision*m_scale_by_x;
}

View File

@ -80,7 +80,8 @@ public:
void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
void PrintReferenceLength(const std::vector<size_t>& ref_ids);
size_t GetReferenceLength(size_t ref_id);
void SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
void SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
float scaleByX, float historySmoothing, size_t scheme, float relaxBP);
void GetNgramMatchCounts(Phrase&,
const NGrams&,
@ -125,8 +126,11 @@ private:
// scale BLEU score by (history of) reference length
bool m_scale_by_ref_length;
// scale BLEU score by (history of) target length
bool m_scale_by_target_length;
// scale BLEU score by (history of) target length (linear future estimate)
bool m_scale_by_target_length_linear;
// scale BLEU score by (history of) target length (trend-based future estimate)
bool m_scale_by_target_length_trend;
// scale BLEU score by (history of) the average of input and reference length
bool m_scale_by_avg_length;

View File

@ -255,17 +255,10 @@ namespace Moses {
}
FVector& FVector::operator+= (const FVector& rhs) {
if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
resize(rhs.m_coreFeatures.size());
}
for (iterator i = begin(); i != end(); ++i) {
set(i->first,i->second + rhs.get(i->first));
}
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
if (!hasNonDefaultValue(i->first)) {
set(i->first,i->second);
}
}
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
set(i->first, get(i->first) + i->second);
for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
if (i < rhs.m_coreFeatures.size()) {
m_coreFeatures[i] += rhs.m_coreFeatures[i];
@ -275,17 +268,10 @@ namespace Moses {
}
FVector& FVector::operator-= (const FVector& rhs) {
if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
resize(rhs.m_coreFeatures.size());
}
for (iterator i = begin(); i != end(); ++i) {
set(i->first,i->second - rhs.get(i->first));
}
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
if (!hasNonDefaultValue(i->first)) {
set(i->first,-(i->second));
}
}
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
set(i->first, get(i->first) -(i->second));
for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
if (i < rhs.m_coreFeatures.size()) {
m_coreFeatures[i] -= rhs.m_coreFeatures[i];
@ -336,28 +322,6 @@ namespace Moses {
return *this;
}
FVector& FVector::max_equals(const FVector& rhs) {
if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
resize(rhs.m_coreFeatures.size());
}
for (iterator i = begin(); i != end(); ++i) {
set(i->first, max(i->second , rhs.get(i->first) ));
}
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
if (!hasNonDefaultValue(i->first)) {
set(i->first, i->second);
}
}
for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
if (i < rhs.m_coreFeatures.size()) {
m_coreFeatures[i] = max(m_coreFeatures[i], rhs.m_coreFeatures[i]);
} else {
m_coreFeatures[i] = max(m_coreFeatures[i],(float)0);
}
}
return *this;
}
FVector& FVector::operator*= (const FValue& rhs) {
//NB Could do this with boost::bind ?
for (iterator i = begin(); i != end(); ++i) {
@ -367,7 +331,6 @@ namespace Moses {
return *this;
}
FVector& FVector::operator/= (const FValue& rhs) {
for (iterator i = begin(); i != end(); ++i) {
i->second /= rhs;
@ -387,6 +350,25 @@ namespace Moses {
return norm;
}
FValue FVector::l2norm() const {
return sqrt(inner_product(*this));
}
FValue FVector::linfnorm() const {
FValue norm = 0;
for (const_iterator i = cbegin(); i != cend(); ++i) {
float absValue = abs(i->second);
if (absValue > norm)
norm = absValue;
}
for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
float absValue = m_coreFeatures[i];
if (absValue > norm)
norm = absValue;
}
return norm;
}
FValue FVector::sum() const {
FValue sum = 0;
for (const_iterator i = cbegin(); i != cend(); ++i) {
@ -396,10 +378,6 @@ namespace Moses {
return sum;
}
FValue FVector::l2norm() const {
return sqrt(inner_product(*this));
}
FValue FVector::inner_product(const FVector& rhs) const {
CHECK(m_coreFeatures.size() == rhs.m_coreFeatures.size());
FValue product = 0.0;
@ -437,10 +415,6 @@ namespace Moses {
return FVector(lhs) /= rhs;
}
const FVector fvmax(const FVector& lhs, const FVector& rhs) {
return FVector(lhs).max_equals(rhs);
}
FValue inner_product(const FVector& lhs, const FVector& rhs) {
if (lhs.size() >= rhs.size()) {
return rhs.inner_product(lhs);

View File

@ -177,6 +177,7 @@ namespace Moses {
/** norms and sums */
FValue l1norm() const;
FValue l2norm() const;
FValue linfnorm() const;
FValue sum() const;
/** pretty printing */
@ -292,6 +293,10 @@ namespace Moses {
return (m_fv->m_features[m_name] += lhs);
}
FValue operator -=(FValue lhs) {
return (m_fv->m_features[m_name] -= lhs);
}
private:
FValue m_tmp;

View File

@ -224,26 +224,6 @@ BOOST_AUTO_TEST_CASE(core_scalar)
}
BOOST_AUTO_TEST_CASE(core_max)
{
FVector f1(2);
FVector f2(2);
FName n1("a");
FName n2("b");
FName n3("c");
f1[0] = 1.1; f1[1] = -0.1; ; f1[n2] = -1.5; f1[n3] = 2.2;
f2[0] = 0.5; f2[1] = 0.25; f2[n1] = 1; f2[n3] = 2.4;
FVector m = fvmax(f1,f2);
BOOST_CHECK_CLOSE((FValue)m[0], 1.1 , TOL);
BOOST_CHECK_CLOSE((FValue)m[1], 0.25 , TOL);
BOOST_CHECK_CLOSE((FValue)m[n1], 1 , TOL);
BOOST_CHECK_CLOSE((FValue)m[n2],0 , TOL);
BOOST_CHECK_CLOSE((FValue)m[n3],2.4 , TOL);
}
BOOST_AUTO_TEST_CASE(l1norm)
{
FVector f1(3);

View File

@ -63,8 +63,8 @@ void ScoreComponentCollection::MultiplyEquals(float scalar)
// Multiply all weights of this sparse producer by a given scalar
void ScoreComponentCollection::MultiplyEquals(const ScoreProducer* sp, float scalar) {
CHECK(sp->GetNumScoreComponents() == ScoreProducer::unlimited);
std::string prefix = sp->GetScoreProducerWeightShortName() + FName::SEP;
assert(sp->GetNumScoreComponents() == ScoreProducer::unlimited);
std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
std::stringstream name;
name << i->first;
@ -100,6 +100,10 @@ float ScoreComponentCollection::GetL2Norm() const {
return m_scores.l2norm();
}
float ScoreComponentCollection::GetLInfNorm() const {
return m_scores.linfnorm();
}
void ScoreComponentCollection::Save(ostream& out) const {
ScoreIndexMap::const_iterator iter = s_scoreIndexes.begin();
for (; iter != s_scoreIndexes.end(); ++iter ) {

View File

@ -150,6 +150,21 @@ public:
m_scores -= rhs.m_scores;
}
//For features which have an unbounded number of components
void MinusEquals(const ScoreProducer*sp, const std::string& name, float score)
{
assert(sp->GetNumScoreComponents() == ScoreProducer::unlimited);
FName fname(sp->GetScoreProducerDescription(),name);
m_scores[fname] -= score;
}
//For features which have an unbounded number of components
void SparseMinusEquals(const std::string& full_name, float score)
{
FName fname(full_name);
m_scores[fname] -= score;
}
//! Add scores from a single ScoreProducer only
//! The length of scores must be equal to the number of score components
@ -192,6 +207,13 @@ public:
m_scores[fname] += score;
}
//For features which have an unbounded number of components
void SparsePlusEquals(const std::string& full_name, float score)
{
FName fname(full_name);
m_scores[fname] += score;
}
void Assign(const ScoreProducer* sp, const std::vector<float>& scores)
{
IndexPair indexes = GetIndexes(sp);
@ -307,6 +329,7 @@ public:
void L1Normalise();
float GetL1Norm() const;
float GetL2Norm() const;
float GetLInfNorm() const;
void Save(const std::string& filename) const;
void Save(std::ostream&) const;

View File

@ -54,6 +54,8 @@ public:
void SetSparseFeatureReporting() { m_reportSparseFeatures = true; }
bool GetSparseFeatureReporting() const { return m_reportSparseFeatures; }
virtual float GetSparseProducerWeight() const { return 1; }
};

View File

@ -1442,7 +1442,7 @@ bool StaticData::LoadReferences()
}
string line;
while (getline(in,line)) {
references.back().push_back(line);
references[i].push_back(line);
}
if (i > 0) {
if (references[i].size() != references[i-1].size()) {
@ -1459,14 +1459,12 @@ bool StaticData::LoadReferences()
bool StaticData::LoadDiscrimLMFeature()
{
cerr << "Loading discriminative language models.. ";
// only load if specified
// only load if specified
const vector<string> &wordFile = m_parameter->GetParam("discrim-lmodel-file");
if (wordFile.empty()) {
return true;
}
cerr << wordFile.size() << " models" << endl;
cerr << "Loading " << wordFile.size() << " discriminative language model(s).." << endl;
// if this weight is specified, the sparse DLM weights will be scaled with an additional weight
vector<string> dlmWeightStr = m_parameter->GetParam("weight-dlm");
@ -1495,6 +1493,11 @@ bool StaticData::LoadDiscrimLMFeature()
}
}
else {
if (m_searchAlgorithm == ChartDecoding && !include_lower_ngrams) {
UserMessage::Add("Excluding lower order DLM ngrams is currently not supported for chart decoding.");
return false;
}
m_targetNgramFeatures.push_back(new TargetNgramFeature(factorId, order, include_lower_ngrams));
if (i < dlmWeights.size())
m_targetNgramFeatures[i]->SetSparseProducerWeight(dlmWeights[i]);

View File

@ -3,6 +3,7 @@
#include "TargetPhrase.h"
#include "Hypothesis.h"
#include "ScoreComponentCollection.h"
#include "ChartHypothesis.h"
namespace Moses {
@ -12,25 +13,25 @@ int TargetNgramState::Compare(const FFState& other) const {
const TargetNgramState& rhs = dynamic_cast<const TargetNgramState&>(other);
int result;
if (m_words.size() == rhs.m_words.size()) {
for (size_t i = 0; i < m_words.size(); ++i) {
result = Word::Compare(m_words[i],rhs.m_words[i]);
if (result != 0) return result;
}
for (size_t i = 0; i < m_words.size(); ++i) {
result = Word::Compare(m_words[i],rhs.m_words[i]);
if (result != 0) return result;
}
return 0;
}
else if (m_words.size() < rhs.m_words.size()) {
for (size_t i = 0; i < m_words.size(); ++i) {
result = Word::Compare(m_words[i],rhs.m_words[i]);
if (result != 0) return result;
}
return -1;
for (size_t i = 0; i < m_words.size(); ++i) {
result = Word::Compare(m_words[i],rhs.m_words[i]);
if (result != 0) return result;
}
return -1;
}
else {
for (size_t i = 0; i < rhs.m_words.size(); ++i) {
result = Word::Compare(m_words[i],rhs.m_words[i]);
if (result != 0) return result;
}
return 1;
for (size_t i = 0; i < rhs.m_words.size(); ++i) {
result = Word::Compare(m_words[i],rhs.m_words[i]);
if (result != 0) return result;
}
return 1;
}
}
@ -45,7 +46,7 @@ bool TargetNgramFeature::Load(const std::string &filePath)
std::string line;
m_vocab.insert(BOS_);
m_vocab.insert(BOS_);
m_vocab.insert(EOS_);
while (getline(inFile, line)) {
m_vocab.insert(line);
}
@ -54,10 +55,9 @@ bool TargetNgramFeature::Load(const std::string &filePath)
return true;
}
string TargetNgramFeature::GetScoreProducerWeightShortName(unsigned) const
{
return "dlmn";
return "dlm";
}
size_t TargetNgramFeature::GetNumInputScores() const
@ -65,7 +65,6 @@ size_t TargetNgramFeature::GetNumInputScores() const
return 0;
}
const FFState* TargetNgramFeature::EmptyHypothesisState(const InputType &/*input*/) const
{
vector<Word> bos(1,m_bos);
@ -76,8 +75,8 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{
const TargetNgramState* tnState = dynamic_cast<const TargetNgramState*>(prev_state);
CHECK(tnState);
const TargetNgramState* tnState = static_cast<const TargetNgramState*>(prev_state);
assert(tnState);
// current hypothesis target phrase
const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
@ -85,7 +84,7 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
// extract all ngrams from current hypothesis
vector<Word> prev_words = tnState->GetWords();
string curr_ngram;
stringstream curr_ngram;
bool skip = false;
// include lower order ngrams?
@ -94,7 +93,9 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
for (size_t n = m_n; n >= smallest_n; --n) { // iterate over ngram size
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const string& curr_w = targetPhrase.GetWord(i).GetFactor(m_factorType)->GetString();
// const string& curr_w = targetPhrase.GetWord(i).GetFactor(m_factorType)->GetString();
const string& curr_w = targetPhrase.GetWord(i).GetString(m_factorType);
if (m_vocab.size() && (m_vocab.find(curr_w) == m_vocab.end())) continue; // skip ngrams
if (n > 1) {
@ -129,23 +130,23 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
}
if (!skip) {
curr_ngram.append(curr_w);
accumulator->PlusEquals(this,curr_ngram,1);
curr_ngram << curr_w;
accumulator->PlusEquals(this,curr_ngram.str(),1);
}
curr_ngram.clear();
curr_ngram.str("");
}
}
if (cur_hypo.GetWordsBitmap().IsComplete()) {
for (size_t n = m_n; n >= smallest_n; --n) {
string last_ngram;
stringstream last_ngram;
skip = false;
for (size_t i = cur_hypo.GetSize() - n + 1; i < cur_hypo.GetSize() && !skip; ++i)
appendNgram(cur_hypo.GetWord(i), skip, last_ngram);
if (n > 1 && !skip) {
last_ngram.append(EOS_);
accumulator->PlusEquals(this,last_ngram,1);
last_ngram << EOS_;
accumulator->PlusEquals(this, last_ngram.str(), 1);
}
}
return NULL;
@ -169,13 +170,267 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
return new TargetNgramState(new_prev_words);
}
void TargetNgramFeature::appendNgram(const Word& word, bool& skip, string& ngram) const {
const string& w = word.GetFactor(m_factorType)->GetString();
void TargetNgramFeature::appendNgram(const Word& word, bool& skip, stringstream &ngram) const {
// const string& w = word.GetFactor(m_factorType)->GetString();
const string& w = word.GetString(m_factorType);
if (m_vocab.size() && (m_vocab.find(w) == m_vocab.end())) skip = true;
else {
ngram.append(w);
ngram.append(":");
ngram << w;
ngram << ":";
}
}
FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const
{
vector<const Word*> contextFactor;
contextFactor.reserve(m_n);
// get index map for underlying hypotheses
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
cur_hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
// loop over rule
bool makePrefix = false;
bool makeSuffix = false;
bool collectForPrefix = true;
size_t prefixTerminals = 0;
size_t suffixTerminals = 0;
bool onlyTerminals = true;
bool prev_is_NT = false;
size_t prev_subPhraseLength = 0;
for (size_t phrasePos = 0; phrasePos < cur_hypo.GetCurrTargetPhrase().GetSize(); phrasePos++)
{
// consult rule for either word or non-terminal
const Word &word = cur_hypo.GetCurrTargetPhrase().GetWord(phrasePos);
// cerr << "word: " << word << endl;
// regular word
if (!word.IsNonTerminal()) {
contextFactor.push_back(&word);
prev_is_NT = false;
if (phrasePos==0)
makePrefix = true;
if (phrasePos==cur_hypo.GetCurrTargetPhrase().GetSize()-1 || prev_is_NT)
makeSuffix = true;
// beginning/end of sentence symbol <s>,</s>?
string factorZero = word.GetString(0);
if (factorZero.compare("<s>") == 0)
prefixTerminals++;
// end of sentence symbol </s>?
else if (factorZero.compare("</s>") == 0)
suffixTerminals++;
// everything else
else {
stringstream ngram;
ngram << m_baseName;
if (m_factorType == 0)
ngram << factorZero;
else
ngram << word.GetString(m_factorType);
accumulator->SparsePlusEquals(ngram.str(), 1);
if (collectForPrefix)
prefixTerminals++;
else
suffixTerminals++;
}
}
// non-terminal, add phrase from underlying hypothesis
else if (m_n > 1)
{
// look up underlying hypothesis
size_t nonTermIndex = nonTermIndexMap[phrasePos];
const ChartHypothesis *prevHypo = cur_hypo.GetPrevHypo(nonTermIndex);
const TargetNgramChartState* prevState =
static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId));
size_t subPhraseLength = prevState->GetNumTargetTerminals();
// special case: rule starts with non-terminal
if (phrasePos == 0) {
if (subPhraseLength == 1) {
makePrefix = true;
++prefixTerminals;
const Word &word = prevState->GetSuffix().GetWord(0);
// cerr << "NT0 --> : " << word << endl;
contextFactor.push_back(&word);
}
else {
onlyTerminals = false;
collectForPrefix = false;
int suffixPos = prevState->GetSuffix().GetSize() - (m_n-1);
if (suffixPos < 0) suffixPos = 0; // push all words if less than order
for(;(size_t)suffixPos < prevState->GetSuffix().GetSize(); suffixPos++)
{
const Word &word = prevState->GetSuffix().GetWord(suffixPos);
// cerr << "NT0 --> : " << word << endl;
contextFactor.push_back(&word);
}
}
}
// internal non-terminal
else
{
// push its prefix
for(size_t prefixPos = 0; prefixPos < m_n-1
&& prefixPos < subPhraseLength; prefixPos++)
{
const Word &word = prevState->GetPrefix().GetWord(prefixPos);
// cerr << "NT --> " << word << endl;
contextFactor.push_back(&word);
}
if (subPhraseLength==1) {
if (collectForPrefix)
++prefixTerminals;
else
++suffixTerminals;
if (phrasePos == cur_hypo.GetCurrTargetPhrase().GetSize()-1)
makeSuffix = true;
}
else {
onlyTerminals = false;
collectForPrefix = true;
// check if something follows this NT
bool wordFollowing = (phrasePos < cur_hypo.GetCurrTargetPhrase().GetSize() - 1)? true : false;
// check if we are dealing with a large sub-phrase
if (wordFollowing && subPhraseLength > m_n - 1)
{
// clear up pending ngrams
MakePrefixNgrams(contextFactor, accumulator, prefixTerminals);
contextFactor.clear();
makePrefix = false;
makeSuffix = true;
collectForPrefix = false;
prefixTerminals = 0;
suffixTerminals = 0;
// push its suffix
size_t remainingWords = (remainingWords > m_n-1) ? m_n-1 : subPhraseLength - (m_n-1);
for(size_t suffixPos = 0; suffixPos < prevState->GetSuffix().GetSize(); suffixPos++) {
const Word &word = prevState->GetSuffix().GetWord(suffixPos);
// cerr << "NT --> : " << word << endl;
contextFactor.push_back(&word);
}
}
// subphrase can be used as suffix and as prefix for the next part
else if (wordFollowing && subPhraseLength == m_n - 1)
{
// clear up pending ngrams
MakePrefixNgrams(contextFactor, accumulator, prefixTerminals);
makePrefix = false;
makeSuffix = true;
collectForPrefix = false;
prefixTerminals = 0;
suffixTerminals = 0;
}
else if (prev_is_NT && prev_subPhraseLength > 1 && subPhraseLength > 1) {
// two NTs in a row: make transition
MakePrefixNgrams(contextFactor, accumulator, 1, m_n-2);
MakeSuffixNgrams(contextFactor, accumulator, 1, m_n-2);
makePrefix = false;
makeSuffix = false;
collectForPrefix = false;
prefixTerminals = 0;
suffixTerminals = 0;
// remove duplicates
stringstream curr_ngram;
curr_ngram << m_baseName;
curr_ngram << (*contextFactor[m_n-2]).GetString(m_factorType);
curr_ngram << ":";
curr_ngram << (*contextFactor[m_n-1]).GetString(m_factorType);
accumulator->SparseMinusEquals(curr_ngram.str(),1);
}
}
}
prev_is_NT = true;
prev_subPhraseLength = subPhraseLength;
}
}
if (m_n > 1) {
if (onlyTerminals) {
MakePrefixNgrams(contextFactor, accumulator, prefixTerminals-1);
}
else {
if (makePrefix)
MakePrefixNgrams(contextFactor, accumulator, prefixTerminals);
if (makeSuffix)
MakeSuffixNgrams(contextFactor, accumulator, suffixTerminals);
// remove duplicates
size_t size = contextFactor.size();
if (makePrefix && makeSuffix && (size <= m_n)) {
stringstream curr_ngram;
curr_ngram << m_baseName;
for (size_t i = 0; i < size; ++i) {
curr_ngram << (*contextFactor[i]).GetString(m_factorType);
if (i < size-1)
curr_ngram << ":";
}
accumulator->SparseMinusEquals(curr_ngram.str(), 1);
}
}
}
// cerr << endl;
return new TargetNgramChartState(cur_hypo, featureId, m_n);
}
void TargetNgramFeature::MakePrefixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfStartPos, size_t offset) const {
stringstream ngram;
size_t size = contextFactor.size();
for (size_t k = 0; k < numberOfStartPos; ++k) {
size_t max_end = (size < m_n+k+offset)? size: m_n+k+offset;
for (size_t end_pos = 1+k+offset; end_pos < max_end; ++end_pos) {
ngram << m_baseName;
for (size_t i=k+offset; i <= end_pos; ++i) {
if (i > k+offset)
ngram << ":";
string factorZero = (*contextFactor[i]).GetString(0);
if (m_factorType == 0 || factorZero.compare("<s>") == 0 || factorZero.compare("</s>") == 0)
ngram << factorZero;
else
ngram << (*contextFactor[i]).GetString(m_factorType);
const Word w = *contextFactor[i];
}
// cerr << "p-ngram: " << ngram.str() << endl;
accumulator->SparsePlusEquals(ngram.str(), 1);
ngram.str("");
}
}
}
void TargetNgramFeature::MakeSuffixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfEndPos, size_t offset) const {
stringstream ngram;
for (size_t k = 0; k < numberOfEndPos; ++k) {
size_t end_pos = contextFactor.size()-1-k-offset;
for (int start_pos=end_pos-1; (start_pos >= 0) && (end_pos-start_pos < m_n); --start_pos) {
ngram << m_baseName;
for (size_t j=start_pos; j <= end_pos; ++j){
string factorZero = (*contextFactor[j]).GetString(0);
if (m_factorType == 0 || factorZero.compare("<s>") == 0 || factorZero.compare("</s>") == 0)
ngram << factorZero;
else
ngram << (*contextFactor[j]).GetString(m_factorType);
if (j < end_pos)
ngram << ":";
}
// cerr << "s-ngram: " << ngram.str() << endl;
accumulator->SparsePlusEquals(ngram.str(), 1);
ngram.str("");
}
}
}
}

View File

@ -9,6 +9,10 @@
#include "FFState.h"
#include "Word.h"
#include "LM/SingleFactor.h"
#include "ChartHypothesis.h"
#include "ChartManager.h"
namespace Moses
{
@ -22,43 +26,190 @@ class TargetNgramState : public FFState {
std::vector<Word> m_words;
};
class TargetNgramChartState : public FFState
{
private:
Phrase m_contextPrefix, m_contextSuffix;
size_t m_numTargetTerminals; // This isn't really correct except for the surviving hypothesis
size_t m_startPos, m_endPos, m_inputSize;
/** Construct the prefix string of up to specified size
* \param ret prefix string
* \param size maximum size (typically max lm context window)
*/
size_t CalcPrefix(const ChartHypothesis &hypo, const int featureId, Phrase &ret, size_t size) const
{
const TargetPhrase &target = hypo.GetCurrTargetPhrase();
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
target.GetAlignmentInfo().GetNonTermIndexMap();
// loop over the rule that is being applied
for (size_t pos = 0; pos < target.GetSize(); ++pos) {
const Word &word = target.GetWord(pos);
// for non-terminals, retrieve it from underlying hypothesis
if (word.IsNonTerminal()) {
size_t nonTermInd = nonTermIndexMap[pos];
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd);
size = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->CalcPrefix(*prevHypo, featureId, ret, size);
// Phrase phrase = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->GetPrefix();
// size = phrase.GetSize();
}
// for words, add word
else {
ret.AddWord(word);
size--;
}
// finish when maximum length reached
if (size==0)
break;
}
return size;
}
/** Construct the suffix phrase of up to specified size
* will always be called after the construction of prefix phrase
* \param ret suffix phrase
* \param size maximum size of suffix
*/
size_t CalcSuffix(const ChartHypothesis &hypo, int featureId, Phrase &ret, size_t size) const
{
size_t prefixSize = m_contextPrefix.GetSize();
assert(prefixSize <= m_numTargetTerminals);
// special handling for small hypotheses
// does the prefix match the entire hypothesis string? -> just copy prefix
if (prefixSize == m_numTargetTerminals) {
size_t maxCount = std::min(prefixSize, size);
size_t pos= prefixSize - 1;
for (size_t ind = 0; ind < maxCount; ++ind) {
const Word &word = m_contextPrefix.GetWord(pos);
ret.PrependWord(word);
--pos;
}
size -= maxCount;
return size;
}
// construct suffix analogous to prefix
else {
const TargetPhrase targetPhrase = hypo.GetCurrTargetPhrase();
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
targetPhrase.GetAlignmentInfo().GetNonTermIndexMap();
for (int pos = (int) targetPhrase.GetSize() - 1; pos >= 0 ; --pos) {
const Word &word = targetPhrase.GetWord(pos);
if (word.IsNonTerminal()) {
size_t nonTermInd = nonTermIndexMap[pos];
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd);
size = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->CalcSuffix(*prevHypo, featureId, ret, size);
}
else {
ret.PrependWord(word);
size--;
}
if (size==0)
break;
}
return size;
}
}
public:
TargetNgramChartState(const ChartHypothesis &hypo, int featureId, size_t order)
:m_contextPrefix(order - 1),
m_contextSuffix(order - 1)
{
m_numTargetTerminals = hypo.GetCurrTargetPhrase().GetNumTerminals();
const WordsRange range = hypo.GetCurrSourceRange();
m_startPos = range.GetStartPos();
m_endPos = range.GetEndPos();
m_inputSize = hypo.GetManager().GetSource().GetSize();
const std::vector<const ChartHypothesis*> prevHypos = hypo.GetPrevHypos();
for (std::vector<const ChartHypothesis*>::const_iterator i = prevHypos.begin(); i != prevHypos.end(); ++i) {
// keep count of words (= length of generated string)
m_numTargetTerminals += static_cast<const TargetNgramChartState*>((*i)->GetFFState(featureId))->GetNumTargetTerminals();
}
CalcPrefix(hypo, featureId, m_contextPrefix, order - 1);
CalcSuffix(hypo, featureId, m_contextSuffix, order - 1);
}
size_t GetNumTargetTerminals() const {
return m_numTargetTerminals;
}
const Phrase &GetPrefix() const {
return m_contextPrefix;
}
const Phrase &GetSuffix() const {
return m_contextSuffix;
}
int Compare(const FFState& o) const {
const TargetNgramChartState &other =
static_cast<const TargetNgramChartState &>( o );
// prefix
if (m_startPos > 0) // not for "<s> ..."
{
int ret = GetPrefix().Compare(other.GetPrefix());
if (ret != 0)
return ret;
}
if (m_endPos < m_inputSize - 1)// not for "... </s>"
{
int ret = GetSuffix().Compare(other.GetSuffix());
if (ret != 0)
return ret;
}
return 0;
}
};
/** Sets the features of observed ngrams.
*/
class TargetNgramFeature : public StatefulFeatureFunction {
public:
TargetNgramFeature(FactorType factorType = 0, size_t n = 3, bool lower_ngrams = true):
StatefulFeatureFunction("dlmn", ScoreProducer::unlimited),
StatefulFeatureFunction("dlm", ScoreProducer::unlimited),
m_factorType(factorType),
m_n(n),
m_lower_ngrams(lower_ngrams),
m_sparseProducerWeight(1)
{
FactorCollection& factorCollection = FactorCollection::Instance();
const Factor* bosFactor =
factorCollection.AddFactor(Output,m_factorType,BOS_);
const Factor* bosFactor = factorCollection.AddFactor(Output,m_factorType,BOS_);
m_bos.SetFactor(m_factorType,bosFactor);
m_baseName = GetScoreProducerDescription();
m_baseName.append("_");
}
bool Load(const std::string &filePath);
std::string GetScoreProducerWeightShortName(unsigned) const;
size_t GetNumInputScores() const;
void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
float GetSparseProducerWeight() { return m_sparseProducerWeight; }
float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */,
int /* featureID */,
ScoreComponentCollection* ) const
{
abort();
}
virtual FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureId,
ScoreComponentCollection* accumulator) const;
private:
FactorType m_factorType;
Word m_bos;
@ -69,7 +220,13 @@ private:
// additional weight that all sparse weights are scaled with
float m_sparseProducerWeight;
void appendNgram(const Word& word, bool& skip, std::string& ngram) const;
std::string m_baseName;
void appendNgram(const Word& word, bool& skip, std::stringstream& ngram) const;
void MakePrefixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator,
size_t numberOfStartPos = 1, size_t offset = 0) const;
void MakeSuffixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator,
size_t numberOfEndPos = 1, size_t offset = 0) const;
};
}

View File

@ -85,6 +85,15 @@ std::string Word::GetString(const vector<FactorType> factorType,bool endWithBlan
return strme.str();
}
std::string Word::GetString(FactorType factorType) const
{
const Factor *factor = m_factorArray[factorType];
if (factor != NULL)
return factor->GetString();
else
return NULL;
}
void Word::CreateFromString(FactorDirection direction
, const std::vector<FactorType> &factorOrder
, const std::string &str
@ -94,7 +103,8 @@ void Word::CreateFromString(FactorDirection direction
vector<string> wordVec;
Tokenize(wordVec, str, "|");
CHECK(wordVec.size() == factorOrder.size());
if (!isNonTerminal)
assert(wordVec.size() == factorOrder.size());
const Factor *factor;
for (size_t ind = 0; ind < wordVec.size(); ++ind) {

View File

@ -101,6 +101,7 @@ public:
* these debugging functions.
*/
std::string GetString(const std::vector<FactorType> factorType,bool endWithBlank) const;
std::string GetString(FactorType factorType) const;
TO_STRING();
//! transitive comparison of Word objects