diff --git a/mira/Decoder.cpp b/mira/Decoder.cpp index 57acdff89..0e21cbed1 100644 --- a/mira/Decoder.cpp +++ b/mira/Decoder.cpp @@ -184,9 +184,9 @@ namespace Mira { m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch); } - void MosesDecoder::loadReferenceSentences(const vector >& refs) { +/* void MosesDecoder::loadReferenceSentences(const vector >& refs) { m_bleuScoreFeature->LoadReferences(refs); - } + }*/ void MosesDecoder::printBleuFeatureHistory(std::ostream& out) { m_bleuScoreFeature->PrintHistory(out); @@ -200,9 +200,11 @@ namespace Mira { return m_bleuScoreFeature->GetReferenceLength(ref_id); } - void MosesDecoder::setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength, + void MosesDecoder::setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, + bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend, float scaleByX, float historySmoothing, size_t scheme, float relax_BP) { - m_bleuScoreFeature->SetBleuParameters(scaleByInputLength, scaleByRefLength, scaleByAvgLength, scaleByTargetLength, + m_bleuScoreFeature->SetBleuParameters(scaleByInputLength, scaleByRefLength, scaleByAvgLength, + scaleByTargetLengthLinear, scaleByTargetLengthTrend, scaleByX, historySmoothing, scheme, relax_BP); } } diff --git a/mira/Decoder.h b/mira/Decoder.h index fc1e82b8c..067f1cdcb 100644 --- a/mira/Decoder.h +++ b/mira/Decoder.h @@ -64,11 +64,12 @@ class MosesDecoder { size_t getCurrentInputLength(); void updateHistory(const std::vector& words); void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector& sourceLengths, std::vector& ref_ids, size_t rank, size_t epoch); - void loadReferenceSentences(const std::vector >& refs); +// void loadReferenceSentences(const std::vector >& refs); void printBleuFeatureHistory(std::ostream& out); void printReferenceLength(const std::vector& ref_ids); size_t getReferenceLength(size_t ref_id); - void setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength, + void setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, + bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend, float scaleByX, float historySmoothing, size_t scheme, float relax_BP); Moses::ScoreComponentCollection getWeights(); void setWeights(const Moses::ScoreComponentCollection& weights); diff --git a/mira/Main.cpp b/mira/Main.cpp index bc38466d1..a96e02ac4 100644 --- a/mira/Main.cpp +++ b/mira/Main.cpp @@ -82,7 +82,8 @@ int main(int argc, char** argv) { float historySmoothing; bool scaleByInputLength; bool scaleByReferenceLength; - bool scaleByTargetLength; + bool scaleByTargetLengthLinear; + bool scaleByTargetLengthTrend; bool scaleByAvgLength; float scaleByX; float slack; @@ -119,6 +120,8 @@ int main(int argc, char** argv) { float max_length_dev_hypos; float max_length_dev_reference; float relax_BP; + bool stabiliseLength; + bool delayUpdates; po::options_description desc("Allowed options"); desc.add_options() ("accumulate-weights", po::value(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs") @@ -133,6 +136,7 @@ int main(int argc, char** argv) { ("core-weights", po::value(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)") ("decoder-settings", po::value(&decoder_settings)->default_value(""), "Decoder settings for tuning runs") ("decr-learning-rate", po::value(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch") + ("delay-updates", po::value(&delayUpdates)->default_value(false), "Delay all updates until the end of an epoch") ("distinct-nbest", po::value(&distinctNbest)->default_value(true), "Use n-best list with distinct translations in inference step") ("epochs,e", po::value(&epochs)->default_value(10), "Number of epochs") ("fear-n", po::value(&fear_n)->default_value(-1), "Number of fear translations used") @@ -164,7 +168,8 @@ int main(int argc, char** argv) { ("relax-BP", po::value(&relax_BP)->default_value(1), "Relax the BP by setting this value between 0 and 1") ("scale-by-input-length", po::value(&scaleByInputLength)->default_value(true), "Scale the BLEU score by (a history of) the input length") ("scale-by-reference-length", po::value(&scaleByReferenceLength)->default_value(false), "Scale BLEU by (a history of) the reference length") - ("scale-by-target-length", po::value(&scaleByTargetLength)->default_value(false), "Scale BLEU by (a history of) the target length") + ("scale-by-target-length-linear", po::value(&scaleByTargetLengthLinear)->default_value(false), "Scale BLEU by (a history of) the target length (linear future estimate)") + ("scale-by-target-length-trend", po::value(&scaleByTargetLengthTrend)->default_value(false), "Scale BLEU by (a history of) the target length (trend-based future estimate)") ("scale-by-avg-length", po::value(&scaleByAvgLength)->default_value(false), "Scale BLEU by (a history of) the average of input and reference length") ("scale-by-x", po::value(&scaleByX)->default_value(1), "Scale the BLEU score by value x") ("scale-margin", po::value(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation") @@ -174,6 +179,7 @@ int main(int argc, char** argv) { ("slack", po::value(&slack)->default_value(0.01), "Use slack in optimiser") ("slack-min", po::value(&slack_min)->default_value(0.01), "Minimum slack used") ("slack-step", po::value(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided") + ("stabilise-length", po::value(&stabiliseLength)->default_value(false), "Stabilise word penalty when length ratio >= 1") ("stop-weights", po::value(&weightConvergence)->default_value(true), "Stop when weights converge") ("threads", po::value(&threadcount)->default_value(1), "Number of threads used") ("verbosity,v", po::value(&verbosity)->default_value(0), "Verbosity level") @@ -268,11 +274,7 @@ int main(int argc, char** argv) { } } - if (scaleByReferenceLength) - scaleByInputLength = false; - if (scaleByTargetLength) - scaleByInputLength = false; - if (scaleByAvgLength) + if (scaleByReferenceLength || scaleByTargetLengthLinear || scaleByTargetLengthTrend || scaleByAvgLength) scaleByInputLength = false; // initialise Moses @@ -285,7 +287,8 @@ int main(int argc, char** argv) { vector decoder_params; boost::split(decoder_params, decoder_settings, boost::is_any_of("\t ")); MosesDecoder* decoder = new MosesDecoder(mosesConfigFile, verbosity, decoder_params.size(), decoder_params); - decoder->setBleuParameters(scaleByInputLength, scaleByReferenceLength, scaleByAvgLength, scaleByTargetLength, + decoder->setBleuParameters(scaleByInputLength, scaleByReferenceLength, scaleByAvgLength, + scaleByTargetLengthLinear, scaleByTargetLengthTrend, scaleByX, historySmoothing, bleu_smoothing_scheme, relax_BP); if (normaliseWeights) { ScoreComponentCollection startWeights = decoder->getWeights(); @@ -409,6 +412,12 @@ int main(int argc, char** argv) { ScoreComponentCollection mixedAverageWeightsPrevious; ScoreComponentCollection mixedAverageWeightsBeforePrevious; + // when length ratio >= 1, set this to true + bool fixLength = false; + + // for accumulating delayed updates + ScoreComponentCollection delayedWeightUpdates; + bool stop = false; // int sumStillViolatedConstraints; float *sendbuf, *recvbuf; @@ -427,6 +436,12 @@ int main(int argc, char** argv) { // number of weight dumps this epoch size_t weightEpochDump = 0; + // sum lengths of dev hypothesis/references to calculate translation length ratio for this epoch + size_t dev_hypothesis_length = 0; + size_t dev_reference_length = 0; + + delayedWeightUpdates.ZeroAll(); + size_t shardPosition = 0; vector::const_iterator sid = shard.begin(); while (sid != shard.end()) { @@ -460,7 +475,7 @@ int main(int argc, char** argv) { for (size_t batchPosition = 0; batchPosition < batchSize && sid != shard.end(); ++batchPosition) { string& input = inputSentences[*sid]; - const vector& refs = referenceSentences[*sid]; +// const vector& refs = referenceSentences[*sid]; cerr << "\nRank " << rank << ", epoch " << epoch << ", input sentence " << *sid << ": \"" << input << "\"" << " (batch pos " << batchPosition << ")" << endl; vector newFeatureValues; @@ -474,7 +489,7 @@ int main(int argc, char** argv) { featureValuesFear.push_back(newFeatureValues); bleuScoresHope.push_back(newBleuScores); bleuScoresFear.push_back(newBleuScores); - if (historyOf1best) { + if (historyOf1best || stabiliseLength) { dummyFeatureValues.push_back(newFeatureValues); dummyBleuScores.push_back(newBleuScores); } @@ -493,13 +508,16 @@ int main(int argc, char** argv) { cerr << ", l-ratio hope: " << hope_length_ratio << endl; vector bestModel; - if (historyOf1best) { + if (historyOf1best || stabiliseLength) { // MODEL (for updating the history only, using dummy vectors) - cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history)" << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history or length stabilisation)" << endl; bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight, dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true, distinctNbest, rank, epoch); decoder->cleanup(); + cerr << endl; + dev_hypothesis_length += bestModel.size(); + dev_reference_length += reference_length; } // FEAR @@ -576,6 +594,10 @@ int main(int argc, char** argv) { oneBests.push_back(bestModel); float model_length_ratio = (float)bestModel.size()/reference_length; cerr << ", l-ratio model: " << model_length_ratio << endl; + if (stabiliseLength) { + dev_hypothesis_length += bestModel.size(); + dev_reference_length += reference_length; + } // FEAR cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best fear translations" << endl; @@ -622,6 +644,19 @@ int main(int argc, char** argv) { break; } + // set word penalty to 0 before optimising (if 'stabilise-length' is active) + if (fixLength) { + iter = featureFunctions.begin(); + for (; iter != featureFunctions.end(); ++iter) { + if ((*iter)->GetScoreProducerWeightShortName() == "w") { + ignoreWPFeature(featureValues, (*iter)); + ignoreWPFeature(featureValuesHope, (*iter)); + ignoreWPFeature(featureValuesFear, (*iter)); + break; + } + } + } + // take logs of feature values if (logFeatureValues) { takeLogs(featureValuesHope, baseOfLog); @@ -654,24 +689,28 @@ int main(int argc, char** argv) { // Run optimiser on batch: VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl); size_t update_status; + ScoreComponentCollection weightUpdate; if (perceptron_update) { vector > dummy1; - update_status = optimiser->updateWeightsHopeFear(mosesWeights, + update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate, featureValuesHope, featureValuesFear, dummy1, dummy1, learning_rate, rank, epoch); } else if (hope_fear) { - update_status = optimiser->updateWeightsHopeFear(mosesWeights, + update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate, featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear, learning_rate, rank, epoch); } else { // model_hope_fear - update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights, + update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights, weightUpdate, featureValues, losses, bleuScores, oracleFeatureValues, oracleBleuScores, learning_rate, rank, epoch); } // sumStillViolatedConstraints += update_status; if (update_status == 0) { // if weights were updated + // apply weight update + mosesWeights.PlusEquals(weightUpdate); + if (normaliseWeights) { mosesWeights.L1Normalise(); } @@ -690,8 +729,11 @@ int main(int argc, char** argv) { mosesWeights = averageWeights; } - // set new Moses weights - decoder->setWeights(mosesWeights); + if (delayUpdates) + delayedWeightUpdates.PlusEquals(weightUpdate); + else + // set new Moses weights + decoder->setWeights(mosesWeights); } // update history (for approximate document Bleu) @@ -802,8 +844,25 @@ int main(int argc, char** argv) { } } }// end dumping + } // end of shard loop, end of this epoch + if (delayUpdates) { + // apply all updates from this epoch to the weight vector + ScoreComponentCollection mosesWeights = decoder->getWeights(); + mosesWeights.PlusEquals(delayedWeightUpdates); + decoder->setWeights(mosesWeights); + cerr << "Rank " << rank << ", epoch " << epoch << ", delayed update, new moses weights: " << mosesWeights << endl; + } + + if (stabiliseLength && !fixLength) { + float lengthRatio = (float)(dev_hypothesis_length+1) / dev_reference_length; + if (lengthRatio >= 1) { + cerr << "Rank " << rank << ", epoch " << epoch << ", length ratio >= 1, fixing word penalty. " << endl; + fixLength = 1; + } + } + if (verbosity > 0) { cerr << "Bleu feature history after epoch " << epoch << endl; decoder->printBleuFeatureHistory(cerr); @@ -840,28 +899,19 @@ int main(int argc, char** argv) { if (rank == 0 && (epoch >= 2)) { ScoreComponentCollection firstDiff(mixedAverageWeights); firstDiff.MinusEquals(mixedAverageWeightsPrevious); - VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << endl); + VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << + " (max: " << firstDiff.GetLInfNorm() << ")" << endl); ScoreComponentCollection secondDiff(mixedAverageWeights); secondDiff.MinusEquals(mixedAverageWeightsBeforePrevious); - VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << endl << endl); + VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << + " (max: " << secondDiff.GetLInfNorm() << ")" << endl << endl); // check whether stopping criterion has been reached // (both difference vectors must have all weight changes smaller than min_weight_change) - FVector changes1 = firstDiff.GetScoresVector(); - FVector changes2 = secondDiff.GetScoresVector(); - FVector::const_iterator iterator1 = changes1.cbegin(); - FVector::const_iterator iterator2 = changes2.cbegin(); - while (iterator1 != changes1.cend()) { - if (abs((*iterator1).second) >= min_weight_change || abs( - (*iterator2).second) >= min_weight_change) { - reached = false; - break; - } - - ++iterator1; - ++iterator2; - } - + if (firstDiff.GetLInfNorm() >= min_weight_change) + reached = false; + if (secondDiff.GetLInfNorm() >= min_weight_change) + reached = false; if (reached) { // stop MIRA stop = true; @@ -991,16 +1041,20 @@ void printFeatureValues(vector > &featureValues } void ignoreCoreFeatures(vector > &featureValues, StrFloatMap &coreWeightMap) { - for (size_t i = 0; i < featureValues.size(); ++i) { + for (size_t i = 0; i < featureValues.size(); ++i) for (size_t j = 0; j < featureValues[i].size(); ++j) { // set all core features to 0 StrFloatMap::iterator p; for(p = coreWeightMap.begin(); p!=coreWeightMap.end(); ++p) - { featureValues[i][j].Assign(p->first, 0); - } } - } +} + +void ignoreWPFeature(vector > &featureValues, const ScoreProducer* sp) { + for (size_t i = 0; i < featureValues.size(); ++i) + for (size_t j = 0; j < featureValues[i].size(); ++j) + // set WP feature to 0 + featureValues[i][j].Assign(sp, 0); } void takeLogs(vector > &featureValues, size_t base) { diff --git a/mira/Main.h b/mira/Main.h index 68de9b9c9..4fd859b95 100644 --- a/mira/Main.h +++ b/mira/Main.h @@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "ScoreComponentCollection.h" #include "Word.h" +#include "ScoreProducer.h" typedef std::map StrFloatMap; typedef std::pair StrFloatPair; @@ -46,6 +47,7 @@ bool loadWeights(const std::string& filename, StrFloatMap& coreWeightMap); bool evaluateModulo(size_t shard_position, size_t mix_or_dump_base, size_t actual_batch_size); void printFeatureValues(std::vector > &featureValues); void ignoreCoreFeatures(std::vector > &featureValues, StrFloatMap &coreWeightMap); +void ignoreWPFeature(std::vector > &featureValues, const Moses::ScoreProducer* sp); void takeLogs(std::vector > &featureValues, size_t base); void deleteTranslations(std::vector > &translations); diff --git a/mira/MiraOptimiser.cpp b/mira/MiraOptimiser.cpp index 17f2b6dad..6dd88ba5d 100644 --- a/mira/MiraOptimiser.cpp +++ b/mira/MiraOptimiser.cpp @@ -7,7 +7,9 @@ using namespace std; namespace Mira { -size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, +size_t MiraOptimiser::updateWeights( + ScoreComponentCollection& currWeights, + ScoreComponentCollection& weightUpdate, const vector >& featureValues, const vector >& losses, const vector >& bleuScores, @@ -142,9 +144,7 @@ size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, } cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl; - - // apply update to weight vector - currWeights.PlusEquals(summedUpdate); + weightUpdate.PlusEquals(summedUpdate); // Sanity check: are there still violated constraints after optimisation? /* int violatedConstraintsAfter = 0; @@ -164,7 +164,9 @@ size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights, return 0; } -size_t MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights, +size_t MiraOptimiser::updateWeightsHopeFear( + Moses::ScoreComponentCollection& currWeights, + Moses::ScoreComponentCollection& weightUpdate, const std::vector< std::vector >& featureValuesHope, const std::vector< std::vector >& featureValuesFear, const std::vector >& bleuScoresHope, @@ -299,9 +301,7 @@ size_t MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& cur } cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl; - - // apply update to weight vector - currWeights.PlusEquals(summedUpdate); + weightUpdate.PlusEquals(summedUpdate); // Sanity check: are there still violated constraints after optimisation? /* int violatedConstraintsAfter = 0; diff --git a/mira/Optimiser.h b/mira/Optimiser.h index 5827f1f5e..709c876e3 100644 --- a/mira/Optimiser.h +++ b/mira/Optimiser.h @@ -30,7 +30,9 @@ namespace Mira { public: Optimiser() {} - virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights, + virtual size_t updateWeightsHopeFear( + Moses::ScoreComponentCollection& currWeights, + Moses::ScoreComponentCollection& weightUpdate, const std::vector >& featureValuesHope, const std::vector >& featureValuesFear, const std::vector >& bleuScoresHope, @@ -42,7 +44,9 @@ namespace Mira { class Perceptron : public Optimiser { public: - virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights, + virtual size_t updateWeightsHopeFear( + Moses::ScoreComponentCollection& currWeights, + Moses::ScoreComponentCollection& weightUpdate, const std::vector >& featureValuesHope, const std::vector >& featureValuesFear, const std::vector >& bleuScoresHope, @@ -66,6 +70,7 @@ namespace Mira { m_margin_slack(margin_slack) { } size_t updateWeights(Moses::ScoreComponentCollection& currWeights, + Moses::ScoreComponentCollection& weightUpdate, const std::vector >& featureValues, const std::vector >& losses, const std::vector >& bleuScores, @@ -75,6 +80,7 @@ namespace Mira { size_t rank, size_t epoch); virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights, + Moses::ScoreComponentCollection& weightUpdate, const std::vector >& featureValuesHope, const std::vector >& featureValuesFear, const std::vector >& bleuScoresHope, diff --git a/mira/Perceptron.cpp b/mira/Perceptron.cpp index 322a0984e..a2bd7cde3 100644 --- a/mira/Perceptron.cpp +++ b/mira/Perceptron.cpp @@ -24,7 +24,9 @@ using namespace std; namespace Mira { -size_t Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeights, +size_t Perceptron::updateWeightsHopeFear( + ScoreComponentCollection& currWeights, + ScoreComponentCollection& weightUpdate, const vector< vector >& featureValuesHope, const vector< vector >& featureValuesFear, const vector< vector >& dummy1, @@ -39,7 +41,7 @@ size_t Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeights, featureValueDiff.MinusEquals(featureValuesFear[0][0]); cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl; featureValueDiff.MultiplyEquals(perceptron_learning_rate); - currWeights.PlusEquals(featureValueDiff); + weightUpdate.PlusEquals(featureValueDiff); cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << featureValueDiff << endl; return 0; } diff --git a/mira/training-expt.perl b/mira/training-expt.perl index afe8b09e7..df7df44a9 100755 --- a/mira/training-expt.perl +++ b/mira/training-expt.perl @@ -72,6 +72,9 @@ my $moses_ini_file = ¶m_required("train.moses-ini-file"); my $input_file = ¶m_required("train.input-file"); &check_exists ("train input file", $input_file); my $reference_files = ¶m_required("train.reference-files"); +for my $ref (glob $reference_files . "*") { + &check_exists ("ref files", $ref); +} my $trainer_exe = ¶m_required("train.trainer"); &check_exists("Training executable", $trainer_exe); #my $weights_file = ¶m_required("train.weights-file"); @@ -94,20 +97,21 @@ my $burn_in_reference_files = ¶m("train.burn-in-reference-files"); my $skipTrain = ¶m("train.skip", 0); #devtest configuration -my ($devtest_input_file, $devtest_reference_file,$devtest_ini_file,$bleu_script,$use_moses); +my ($devtest_input_file, $devtest_reference_files,$devtest_ini_file,$bleu_script,$use_moses); my $test_exe = ¶m("devtest.moses"); &check_exists("test executable", $test_exe); $bleu_script = ¶m_required("devtest.bleu"); &check_exists("multi-bleu script", $bleu_script); $devtest_input_file = ¶m_required("devtest.input-file"); -$devtest_reference_file = ¶m_required("devtest.reference-file"); &check_exists ("devtest input file", $devtest_input_file); - -for my $ref (glob $devtest_reference_file . "*") { +$devtest_reference_files = ¶m_required("devtest.reference-file"); +for my $ref (glob $devtest_reference_files . "*") { &check_exists ("devtest ref file", $ref); } $devtest_ini_file = ¶m_required("devtest.moses-ini-file"); &check_exists ("devtest ini file", $devtest_ini_file); + + my $weight_file_stem = "$name-weights"; my $extra_memory_devtest = ¶m("devtest.extra-memory",0); my $skip_devtest = ¶m("devtest.skip-devtest",0); @@ -174,8 +178,9 @@ my @refs; if (ref($reference_files) eq 'ARRAY') { @refs = @$reference_files; } else { - @refs = glob $reference_files; + @refs = glob $reference_files . "*" } +my $arr_refs = \@refs; if (!$skipTrain) { #write the script @@ -198,7 +203,6 @@ print TRAIN "-f $moses_ini_file \\\n"; print TRAIN "-i $input_file \\\n"; for my $ref (@refs) { - &check_exists("train ref file", $ref); print TRAIN "-r $ref "; } print TRAIN "\\\n"; @@ -206,15 +210,15 @@ print TRAIN "\\\n"; if ($burn_in) { print TRAIN "--burn-in 1 \\\n"; print TRAIN "--burn-in-input-file $burn_in_input_file \\\n"; - my @refs; + my @burnin_refs; if (ref($burn_in_reference_files) eq 'ARRAY') { - @refs = @$burn_in_reference_files; + @burnin_refs = @$burn_in_reference_files; } else { - @refs = glob $burn_in_reference_files; + @burnin_refs = glob $burn_in_reference_files . "*"; } - for my $ref (@refs) { - &check_exists("burn-in ref file", $ref); - print TRAIN "--burn-in-reference-files $ref "; + for my $burnin_ref (@burnin_refs) { + &check_exists("burn-in ref file", $burnin_ref); + print TRAIN "--burn-in-reference-files $burnin_ref "; } print TRAIN "\\\n"; } @@ -317,10 +321,10 @@ while(1) { my $suffix = ""; print "weight file exists? ".(-e $new_weight_file)."\n"; if (!$skip_devtest) { - createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "devtest", $devtest_ini_file, $devtest_input_file, $devtest_reference_file, $skip_submit_test); + createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "devtest", $devtest_ini_file, $devtest_input_file, $devtest_reference_files, $skip_submit_test); } if (!$skip_dev) { - createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "dev", $moses_ini_file, $input_file, $refs[0], $skip_submit_test); + createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "dev", $moses_ini_file, $input_file, $reference_files, $skip_submit_test); } } diff --git a/moses-chart-cmd/src/IOWrapper.cpp b/moses-chart-cmd/src/IOWrapper.cpp index cf90b877b..ce31bc192 100644 --- a/moses-chart-cmd/src/IOWrapper.cpp +++ b/moses-chart-cmd/src/IOWrapper.cpp @@ -47,6 +47,9 @@ POSSIBILITY OF SUCH DAMAGE. #include "ChartHypothesis.h" #include "DotChart.h" +#include +#include "FeatureVector.h" + using namespace std; using namespace Moses; @@ -345,7 +348,7 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha // print the surface factor of the translation out << translationId << " ||| "; OutputSurface(out, outputPhrase, m_outputFactorOrder, false); - out << " |||"; + out << " ||| "; // print the scores in a hardwired order // before each model type, the corresponding command-line-like name must be emitted @@ -362,26 +365,23 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha } } - std::string lastName = ""; // translation components const vector& pds = system->GetPhraseDictionaries(); if (pds.size() > 0) { - for( size_t i=0; iGetNumInputScores(); - vector scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] ); - for (size_t j = 0; jGetScoreProducerWeightShortName(j); - out << " " << lastName << ":"; - } - } - out << " " << scores[j]; - } + size_t pd_numinputscore = pds[i]->GetNumInputScores(); + vector scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] ); + for (size_t j = 0; jGetScoreProducerWeightShortName(j); + out << " " << lastName << ":"; + } + } + out << " " << scores[j]; + } } } @@ -393,26 +393,36 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha // generation const vector& gds = system->GetGenerationDictionaries(); if (gds.size() > 0) { - for( size_t i=0; iGetNumInputScores(); - vector scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] ); - for (size_t j = 0; jGetScoreProducerWeightShortName(j); - out << " " << lastName << ":"; - } - } - out << " " << scores[j]; - } + size_t pd_numinputscore = gds[i]->GetNumInputScores(); + vector scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] ); + for (size_t j = 0; jGetScoreProducerWeightShortName(j); + out << " " << lastName << ":"; + } + } + out << " " << scores[j]; + } } } + // output sparse features + lastName = ""; + const vector& sff = system->GetStatefulFeatureFunctions(); + for( size_t i=0; iGetNumScoreComponents() == ScoreProducer::unlimited) + OutputSparseFeatureScores( out, path, sff[i], lastName ); + + const vector& slf = system->GetStatelessFeatureFunctions(); + for( size_t i=0; iGetNumScoreComponents() == ScoreProducer::unlimited) + OutputSparseFeatureScores( out, path, slf[i], lastName ); + // total - out << " |||" << path.GetTotalScore(); + out << " ||| " << path.GetTotalScore(); /* if (includeAlignment) { @@ -443,6 +453,32 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha m_nBestOutputCollector->Write(translationId, out.str()); } +void IOWrapper::OutputSparseFeatureScores( std::ostream& out, const ChartTrellisPath &path, const FeatureFunction *ff, std::string &lastName ) +{ + const StaticData &staticData = StaticData::Instance(); + bool labeledOutput = staticData.IsLabeledNBestList(); + const FVector scores = path.GetScoreBreakdown().GetVectorForProducer( ff ); + + // report weighted aggregate + if (! ff->GetSparseFeatureReporting()) { + const FVector &weights = staticData.GetAllWeights().GetScoresVector(); + if (labeledOutput && !boost::contains(ff->GetScoreProducerDescription(), ":")) + out << " " << ff->GetScoreProducerWeightShortName() << ":"; + out << " " << scores.inner_product(weights); + } + + // report each feature + else { + for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) { + if (i->second != 0) { // do not report zero-valued features + if (labeledOutput) + out << " " << i->first << ":"; + out << " " << i->second; + } + } + } +} + void IOWrapper::FixPrecision(std::ostream &stream, size_t size) { stream.setf(std::ios::fixed); diff --git a/moses-chart-cmd/src/IOWrapper.h b/moses-chart-cmd/src/IOWrapper.h index 5936e7405..058ee0712 100644 --- a/moses-chart-cmd/src/IOWrapper.h +++ b/moses-chart-cmd/src/IOWrapper.h @@ -44,6 +44,8 @@ POSSIBILITY OF SUCH DAMAGE. #include "OutputCollector.h" #include "ChartHypothesis.h" +#include "ChartTrellisPath.h" + namespace Moses { class FactorCollection; @@ -82,6 +84,7 @@ public: void OutputBestHypo(const Moses::ChartHypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors); void OutputBestHypo(const std::vector& mbrBestHypo, long translationId, bool reportSegmentation, bool reportAllFactors); void OutputNBestList(const Moses::ChartTrellisPathList &nBestList, const Moses::ChartHypothesis *bestHypo, const Moses::TranslationSystem* system, long translationId); + void OutputSparseFeatureScores(std::ostream& out, const Moses::ChartTrellisPath &path, const Moses::FeatureFunction *ff, std::string &lastName); void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, long translationId); void Backtrack(const Moses::ChartHypothesis *hypo); diff --git a/moses-chart-cmd/src/Main.cpp b/moses-chart-cmd/src/Main.cpp index 2c9002720..bec974ee8 100644 --- a/moses-chart-cmd/src/Main.cpp +++ b/moses-chart-cmd/src/Main.cpp @@ -165,18 +165,25 @@ bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source } static void PrintFeatureWeight(const FeatureFunction* ff) { - size_t numScoreComps = ff->GetNumScoreComponents(); if (numScoreComps != ScoreProducer::unlimited) { vector values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff); - for (size_t i = 0; i < numScoreComps; ++i) { + for (size_t i = 0; i < numScoreComps; ++i) cout << ff->GetScoreProducerDescription() << " " << ff->GetScoreProducerWeightShortName() << " " << values[i] << endl; - } - } else { - cout << ff->GetScoreProducerDescription() << " " << - ff->GetScoreProducerWeightShortName() << " sparse" << endl; + } +} + +static void PrintSparseFeatureWeight(const FeatureFunction* ff) +{ + if (ff->GetNumScoreComponents() == ScoreProducer::unlimited) { + if (ff->GetSparseProducerWeight() == 1) + cout << ff->GetScoreProducerDescription() << " " << + ff->GetScoreProducerWeightShortName() << " sparse" << endl; + else + cout << ff->GetScoreProducerDescription() << " " << + ff->GetScoreProducerWeightShortName() << " " << ff->GetSparseProducerWeight() << endl; } } @@ -201,6 +208,9 @@ static void ShowWeights() for (size_t i = 0; i < slf.size(); ++i) { PrintFeatureWeight(slf[i]); } + for (size_t i = 0; i < sff.size(); ++i) { + PrintSparseFeatureWeight(sff[i]); + } } diff --git a/moses-cmd/src/Main.cpp b/moses-cmd/src/Main.cpp index 0eccac246..feb092dab 100644 --- a/moses-cmd/src/Main.cpp +++ b/moses-cmd/src/Main.cpp @@ -287,21 +287,27 @@ private: static void PrintFeatureWeight(const FeatureFunction* ff) { - size_t numScoreComps = ff->GetNumScoreComponents(); if (numScoreComps != ScoreProducer::unlimited) { vector values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff); - for (size_t i = 0; i < numScoreComps; ++i) { + for (size_t i = 0; i < numScoreComps; ++i) cout << ff->GetScoreProducerDescription() << " " << ff->GetScoreProducerWeightShortName() << " " << values[i] << endl; - } - } else { - cout << ff->GetScoreProducerDescription() << " " << - ff->GetScoreProducerWeightShortName() << " sparse" << endl; } } +static void PrintSparseFeatureWeight(const FeatureFunction* ff) +{ + if (ff->GetNumScoreComponents() == ScoreProducer::unlimited) { + if (ff->GetSparseProducerWeight() == 1) + cout << ff->GetScoreProducerDescription() << " " << + ff->GetScoreProducerWeightShortName() << " sparse" << endl; + else + cout << ff->GetScoreProducerDescription() << " " << + ff->GetScoreProducerWeightShortName() << " " << ff->GetSparseProducerWeight() << endl; + } +} static void ShowWeights() { @@ -324,6 +330,9 @@ static void ShowWeights() for (size_t i = 0; i < gds.size(); ++i) { PrintFeatureWeight(gds[i]); } + for (size_t i = 0; i < sff.size(); ++i) { + PrintSparseFeatureWeight(sff[i]); + } } /** main function of the command line version of the decoder **/ diff --git a/moses/src/BleuScoreFeature.cpp b/moses/src/BleuScoreFeature.cpp index 142fa27c4..4ab9f92a9 100644 --- a/moses/src/BleuScoreFeature.cpp +++ b/moses/src/BleuScoreFeature.cpp @@ -81,11 +81,13 @@ void BleuScoreFeature::PrintHistory(std::ostream& out) const { } } -void BleuScoreFeature::SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength, - float scaleByX, float historySmoothing, size_t scheme, float relaxBP) { +void BleuScoreFeature::SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, + bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend, + float scaleByX, float historySmoothing, size_t scheme, float relaxBP) { m_scale_by_input_length = scaleByInputLength; m_scale_by_ref_length = scaleByRefLength; - m_scale_by_target_length = scaleByTargetLength; + m_scale_by_target_length_linear = scaleByTargetLengthLinear; + m_scale_by_target_length_trend = scaleByTargetLengthTrend; m_scale_by_avg_length = scaleByAvgLength; m_scale_by_x = scaleByX; m_historySmoothing = historySmoothing; @@ -97,6 +99,7 @@ void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::strin { m_refs.clear(); FactorCollection& fc = FactorCollection::Instance(); + cerr << "Number of reference files: " << refs.size() << endl; for (size_t file_id = 0; file_id < refs.size(); file_id++) { for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) { const string& ref = refs[file_id][ref_id]; @@ -430,13 +433,19 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const { else if (m_scale_by_ref_length) { precision *= m_ref_length_history + m_cur_ref_length; } - else if (m_scale_by_target_length) { - precision *= m_target_length_history + state->m_target_length; + else if (m_scale_by_target_length_linear) { + // length of current hypothesis + number of words still to translate from source (rest being translated 1-to-1) + float scaled_target_length = state->m_target_length + (m_cur_source_length - state->m_source_length); + precision *= m_target_length_history + scaled_target_length; + } + else if (m_scale_by_target_length_trend) { + // length of full target if remaining words were translated with the same fertility as so far + float scaled_target_length = ((float)m_cur_source_length/state->m_source_length) * state->m_target_length; + precision *= m_target_length_history + scaled_target_length; } else if (m_scale_by_avg_length) { precision *= (m_source_length_history + m_ref_length_history + m_cur_source_length + + m_cur_ref_length) / 2; } - return precision*m_scale_by_x; } diff --git a/moses/src/BleuScoreFeature.h b/moses/src/BleuScoreFeature.h index ac5d113fd..3ff76c465 100644 --- a/moses/src/BleuScoreFeature.h +++ b/moses/src/BleuScoreFeature.h @@ -80,7 +80,8 @@ public: void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector& sourceLengths, std::vector& ref_ids, size_t rank, size_t epoch); void PrintReferenceLength(const std::vector& ref_ids); size_t GetReferenceLength(size_t ref_id); - void SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength, + void SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, + bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend, float scaleByX, float historySmoothing, size_t scheme, float relaxBP); void GetNgramMatchCounts(Phrase&, const NGrams&, @@ -125,8 +126,11 @@ private: // scale BLEU score by (history of) reference length bool m_scale_by_ref_length; - // scale BLEU score by (history of) target length - bool m_scale_by_target_length; + // scale BLEU score by (history of) target length (linear future estimate) + bool m_scale_by_target_length_linear; + + // scale BLEU score by (history of) target length (trend-based future estimate) + bool m_scale_by_target_length_trend; // scale BLEU score by (history of) the average of input and reference length bool m_scale_by_avg_length; diff --git a/moses/src/FeatureVector.cpp b/moses/src/FeatureVector.cpp index 05da851b0..ce4754bb5 100644 --- a/moses/src/FeatureVector.cpp +++ b/moses/src/FeatureVector.cpp @@ -255,17 +255,10 @@ namespace Moses { } FVector& FVector::operator+= (const FVector& rhs) { - if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) { + if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) resize(rhs.m_coreFeatures.size()); - } - for (iterator i = begin(); i != end(); ++i) { - set(i->first,i->second + rhs.get(i->first)); - } - for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) { - if (!hasNonDefaultValue(i->first)) { - set(i->first,i->second); - } - } + for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) + set(i->first, get(i->first) + i->second); for (size_t i = 0; i < m_coreFeatures.size(); ++i) { if (i < rhs.m_coreFeatures.size()) { m_coreFeatures[i] += rhs.m_coreFeatures[i]; @@ -275,17 +268,10 @@ namespace Moses { } FVector& FVector::operator-= (const FVector& rhs) { - if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) { + if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) resize(rhs.m_coreFeatures.size()); - } - for (iterator i = begin(); i != end(); ++i) { - set(i->first,i->second - rhs.get(i->first)); - } - for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) { - if (!hasNonDefaultValue(i->first)) { - set(i->first,-(i->second)); - } - } + for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) + set(i->first, get(i->first) -(i->second)); for (size_t i = 0; i < m_coreFeatures.size(); ++i) { if (i < rhs.m_coreFeatures.size()) { m_coreFeatures[i] -= rhs.m_coreFeatures[i]; @@ -336,28 +322,6 @@ namespace Moses { return *this; } - FVector& FVector::max_equals(const FVector& rhs) { - if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) { - resize(rhs.m_coreFeatures.size()); - } - for (iterator i = begin(); i != end(); ++i) { - set(i->first, max(i->second , rhs.get(i->first) )); - } - for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) { - if (!hasNonDefaultValue(i->first)) { - set(i->first, i->second); - } - } - for (size_t i = 0; i < m_coreFeatures.size(); ++i) { - if (i < rhs.m_coreFeatures.size()) { - m_coreFeatures[i] = max(m_coreFeatures[i], rhs.m_coreFeatures[i]); - } else { - m_coreFeatures[i] = max(m_coreFeatures[i],(float)0); - } - } - return *this; - } - FVector& FVector::operator*= (const FValue& rhs) { //NB Could do this with boost::bind ? for (iterator i = begin(); i != end(); ++i) { @@ -367,7 +331,6 @@ namespace Moses { return *this; } - FVector& FVector::operator/= (const FValue& rhs) { for (iterator i = begin(); i != end(); ++i) { i->second /= rhs; @@ -387,6 +350,25 @@ namespace Moses { return norm; } + FValue FVector::l2norm() const { + return sqrt(inner_product(*this)); + } + + FValue FVector::linfnorm() const { + FValue norm = 0; + for (const_iterator i = cbegin(); i != cend(); ++i) { + float absValue = abs(i->second); + if (absValue > norm) + norm = absValue; + } + for (size_t i = 0; i < m_coreFeatures.size(); ++i) { + float absValue = m_coreFeatures[i]; + if (absValue > norm) + norm = absValue; + } + return norm; + } + FValue FVector::sum() const { FValue sum = 0; for (const_iterator i = cbegin(); i != cend(); ++i) { @@ -395,11 +377,7 @@ namespace Moses { sum += m_coreFeatures.sum(); return sum; } - - FValue FVector::l2norm() const { - return sqrt(inner_product(*this)); - } - + FValue FVector::inner_product(const FVector& rhs) const { CHECK(m_coreFeatures.size() == rhs.m_coreFeatures.size()); FValue product = 0.0; @@ -436,11 +414,7 @@ namespace Moses { const FVector operator/(const FVector& lhs, const FValue& rhs) { return FVector(lhs) /= rhs; } - - const FVector fvmax(const FVector& lhs, const FVector& rhs) { - return FVector(lhs).max_equals(rhs); - } - + FValue inner_product(const FVector& lhs, const FVector& rhs) { if (lhs.size() >= rhs.size()) { return rhs.inner_product(lhs); diff --git a/moses/src/FeatureVector.h b/moses/src/FeatureVector.h index db2e9202f..f2d187ba6 100644 --- a/moses/src/FeatureVector.h +++ b/moses/src/FeatureVector.h @@ -177,6 +177,7 @@ namespace Moses { /** norms and sums */ FValue l1norm() const; FValue l2norm() const; + FValue linfnorm() const; FValue sum() const; /** pretty printing */ @@ -292,6 +293,10 @@ namespace Moses { return (m_fv->m_features[m_name] += lhs); } + FValue operator -=(FValue lhs) { + return (m_fv->m_features[m_name] -= lhs); + } + private: FValue m_tmp; diff --git a/moses/src/FeatureVectorTest.cpp b/moses/src/FeatureVectorTest.cpp index f6520ead8..af1829e62 100644 --- a/moses/src/FeatureVectorTest.cpp +++ b/moses/src/FeatureVectorTest.cpp @@ -224,26 +224,6 @@ BOOST_AUTO_TEST_CASE(core_scalar) } -BOOST_AUTO_TEST_CASE(core_max) -{ - FVector f1(2); - FVector f2(2); - FName n1("a"); - FName n2("b"); - FName n3("c"); - f1[0] = 1.1; f1[1] = -0.1; ; f1[n2] = -1.5; f1[n3] = 2.2; - f2[0] = 0.5; f2[1] = 0.25; f2[n1] = 1; f2[n3] = 2.4; - - FVector m = fvmax(f1,f2); - - BOOST_CHECK_CLOSE((FValue)m[0], 1.1 , TOL); - BOOST_CHECK_CLOSE((FValue)m[1], 0.25 , TOL); - BOOST_CHECK_CLOSE((FValue)m[n1], 1 , TOL); - BOOST_CHECK_CLOSE((FValue)m[n2],0 , TOL); - BOOST_CHECK_CLOSE((FValue)m[n3],2.4 , TOL); - -} - BOOST_AUTO_TEST_CASE(l1norm) { FVector f1(3); diff --git a/moses/src/ScoreComponentCollection.cpp b/moses/src/ScoreComponentCollection.cpp index b7e54c554..d9cb40e37 100644 --- a/moses/src/ScoreComponentCollection.cpp +++ b/moses/src/ScoreComponentCollection.cpp @@ -63,8 +63,8 @@ void ScoreComponentCollection::MultiplyEquals(float scalar) // Multiply all weights of this sparse producer by a given scalar void ScoreComponentCollection::MultiplyEquals(const ScoreProducer* sp, float scalar) { - CHECK(sp->GetNumScoreComponents() == ScoreProducer::unlimited); - std::string prefix = sp->GetScoreProducerWeightShortName() + FName::SEP; + assert(sp->GetNumScoreComponents() == ScoreProducer::unlimited); + std::string prefix = sp->GetScoreProducerDescription() + FName::SEP; for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) { std::stringstream name; name << i->first; @@ -100,6 +100,10 @@ float ScoreComponentCollection::GetL2Norm() const { return m_scores.l2norm(); } +float ScoreComponentCollection::GetLInfNorm() const { + return m_scores.linfnorm(); +} + void ScoreComponentCollection::Save(ostream& out) const { ScoreIndexMap::const_iterator iter = s_scoreIndexes.begin(); for (; iter != s_scoreIndexes.end(); ++iter ) { diff --git a/moses/src/ScoreComponentCollection.h b/moses/src/ScoreComponentCollection.h index dc0913eb6..133e2840d 100644 --- a/moses/src/ScoreComponentCollection.h +++ b/moses/src/ScoreComponentCollection.h @@ -150,6 +150,21 @@ public: m_scores -= rhs.m_scores; } + //For features which have an unbounded number of components + void MinusEquals(const ScoreProducer*sp, const std::string& name, float score) + { + assert(sp->GetNumScoreComponents() == ScoreProducer::unlimited); + FName fname(sp->GetScoreProducerDescription(),name); + m_scores[fname] -= score; + } + + //For features which have an unbounded number of components + void SparseMinusEquals(const std::string& full_name, float score) + { + FName fname(full_name); + m_scores[fname] -= score; + } + //! Add scores from a single ScoreProducer only //! The length of scores must be equal to the number of score components @@ -192,6 +207,13 @@ public: m_scores[fname] += score; } + //For features which have an unbounded number of components + void SparsePlusEquals(const std::string& full_name, float score) + { + FName fname(full_name); + m_scores[fname] += score; + } + void Assign(const ScoreProducer* sp, const std::vector& scores) { IndexPair indexes = GetIndexes(sp); @@ -307,6 +329,7 @@ public: void L1Normalise(); float GetL1Norm() const; float GetL2Norm() const; + float GetLInfNorm() const; void Save(const std::string& filename) const; void Save(std::ostream&) const; diff --git a/moses/src/ScoreProducer.h b/moses/src/ScoreProducer.h index 113a37770..65b655972 100644 --- a/moses/src/ScoreProducer.h +++ b/moses/src/ScoreProducer.h @@ -54,6 +54,8 @@ public: void SetSparseFeatureReporting() { m_reportSparseFeatures = true; } bool GetSparseFeatureReporting() const { return m_reportSparseFeatures; } + + virtual float GetSparseProducerWeight() const { return 1; } }; diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp index d24555f77..406ba6c43 100644 --- a/moses/src/StaticData.cpp +++ b/moses/src/StaticData.cpp @@ -1442,7 +1442,7 @@ bool StaticData::LoadReferences() } string line; while (getline(in,line)) { - references.back().push_back(line); + references[i].push_back(line); } if (i > 0) { if (references[i].size() != references[i-1].size()) { @@ -1459,14 +1459,12 @@ bool StaticData::LoadReferences() bool StaticData::LoadDiscrimLMFeature() { - cerr << "Loading discriminative language models.. "; - - // only load if specified + // only load if specified const vector &wordFile = m_parameter->GetParam("discrim-lmodel-file"); if (wordFile.empty()) { return true; } - cerr << wordFile.size() << " models" << endl; + cerr << "Loading " << wordFile.size() << " discriminative language model(s).." << endl; // if this weight is specified, the sparse DLM weights will be scaled with an additional weight vector dlmWeightStr = m_parameter->GetParam("weight-dlm"); @@ -1495,6 +1493,11 @@ bool StaticData::LoadDiscrimLMFeature() } } else { + if (m_searchAlgorithm == ChartDecoding && !include_lower_ngrams) { + UserMessage::Add("Excluding lower order DLM ngrams is currently not supported for chart decoding."); + return false; + } + m_targetNgramFeatures.push_back(new TargetNgramFeature(factorId, order, include_lower_ngrams)); if (i < dlmWeights.size()) m_targetNgramFeatures[i]->SetSparseProducerWeight(dlmWeights[i]); diff --git a/moses/src/TargetNgramFeature.cpp b/moses/src/TargetNgramFeature.cpp index 9da9ba670..3fefdfba2 100644 --- a/moses/src/TargetNgramFeature.cpp +++ b/moses/src/TargetNgramFeature.cpp @@ -3,6 +3,7 @@ #include "TargetPhrase.h" #include "Hypothesis.h" #include "ScoreComponentCollection.h" +#include "ChartHypothesis.h" namespace Moses { @@ -12,25 +13,25 @@ int TargetNgramState::Compare(const FFState& other) const { const TargetNgramState& rhs = dynamic_cast(other); int result; if (m_words.size() == rhs.m_words.size()) { - for (size_t i = 0; i < m_words.size(); ++i) { - result = Word::Compare(m_words[i],rhs.m_words[i]); - if (result != 0) return result; - } + for (size_t i = 0; i < m_words.size(); ++i) { + result = Word::Compare(m_words[i],rhs.m_words[i]); + if (result != 0) return result; + } return 0; } else if (m_words.size() < rhs.m_words.size()) { - for (size_t i = 0; i < m_words.size(); ++i) { - result = Word::Compare(m_words[i],rhs.m_words[i]); - if (result != 0) return result; - } - return -1; + for (size_t i = 0; i < m_words.size(); ++i) { + result = Word::Compare(m_words[i],rhs.m_words[i]); + if (result != 0) return result; + } + return -1; } else { - for (size_t i = 0; i < rhs.m_words.size(); ++i) { - result = Word::Compare(m_words[i],rhs.m_words[i]); - if (result != 0) return result; - } - return 1; + for (size_t i = 0; i < rhs.m_words.size(); ++i) { + result = Word::Compare(m_words[i],rhs.m_words[i]); + if (result != 0) return result; + } + return 1; } } @@ -45,7 +46,7 @@ bool TargetNgramFeature::Load(const std::string &filePath) std::string line; m_vocab.insert(BOS_); - m_vocab.insert(BOS_); + m_vocab.insert(EOS_); while (getline(inFile, line)) { m_vocab.insert(line); } @@ -54,10 +55,9 @@ bool TargetNgramFeature::Load(const std::string &filePath) return true; } - string TargetNgramFeature::GetScoreProducerWeightShortName(unsigned) const { - return "dlmn"; + return "dlm"; } size_t TargetNgramFeature::GetNumInputScores() const @@ -65,7 +65,6 @@ size_t TargetNgramFeature::GetNumInputScores() const return 0; } - const FFState* TargetNgramFeature::EmptyHypothesisState(const InputType &/*input*/) const { vector bos(1,m_bos); @@ -76,8 +75,8 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const { - const TargetNgramState* tnState = dynamic_cast(prev_state); - CHECK(tnState); + const TargetNgramState* tnState = static_cast(prev_state); + assert(tnState); // current hypothesis target phrase const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase(); @@ -85,7 +84,7 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo, // extract all ngrams from current hypothesis vector prev_words = tnState->GetWords(); - string curr_ngram; + stringstream curr_ngram; bool skip = false; // include lower order ngrams? @@ -94,7 +93,9 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo, for (size_t n = m_n; n >= smallest_n; --n) { // iterate over ngram size for (size_t i = 0; i < targetPhrase.GetSize(); ++i) { - const string& curr_w = targetPhrase.GetWord(i).GetFactor(m_factorType)->GetString(); +// const string& curr_w = targetPhrase.GetWord(i).GetFactor(m_factorType)->GetString(); + const string& curr_w = targetPhrase.GetWord(i).GetString(m_factorType); + if (m_vocab.size() && (m_vocab.find(curr_w) == m_vocab.end())) continue; // skip ngrams if (n > 1) { @@ -129,23 +130,23 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo, } if (!skip) { - curr_ngram.append(curr_w); - accumulator->PlusEquals(this,curr_ngram,1); + curr_ngram << curr_w; + accumulator->PlusEquals(this,curr_ngram.str(),1); } - curr_ngram.clear(); + curr_ngram.str(""); } } if (cur_hypo.GetWordsBitmap().IsComplete()) { for (size_t n = m_n; n >= smallest_n; --n) { - string last_ngram; + stringstream last_ngram; skip = false; for (size_t i = cur_hypo.GetSize() - n + 1; i < cur_hypo.GetSize() && !skip; ++i) appendNgram(cur_hypo.GetWord(i), skip, last_ngram); if (n > 1 && !skip) { - last_ngram.append(EOS_); - accumulator->PlusEquals(this,last_ngram,1); + last_ngram << EOS_; + accumulator->PlusEquals(this, last_ngram.str(), 1); } } return NULL; @@ -169,13 +170,267 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo, return new TargetNgramState(new_prev_words); } -void TargetNgramFeature::appendNgram(const Word& word, bool& skip, string& ngram) const { - const string& w = word.GetFactor(m_factorType)->GetString(); +void TargetNgramFeature::appendNgram(const Word& word, bool& skip, stringstream &ngram) const { +// const string& w = word.GetFactor(m_factorType)->GetString(); + const string& w = word.GetString(m_factorType); if (m_vocab.size() && (m_vocab.find(w) == m_vocab.end())) skip = true; else { - ngram.append(w); - ngram.append(":"); + ngram << w; + ngram << ":"; } } + +FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const +{ + vector contextFactor; + contextFactor.reserve(m_n); + + // get index map for underlying hypotheses + const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = + cur_hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap(); + + // loop over rule + bool makePrefix = false; + bool makeSuffix = false; + bool collectForPrefix = true; + size_t prefixTerminals = 0; + size_t suffixTerminals = 0; + bool onlyTerminals = true; + bool prev_is_NT = false; + size_t prev_subPhraseLength = 0; + for (size_t phrasePos = 0; phrasePos < cur_hypo.GetCurrTargetPhrase().GetSize(); phrasePos++) + { + // consult rule for either word or non-terminal + const Word &word = cur_hypo.GetCurrTargetPhrase().GetWord(phrasePos); +// cerr << "word: " << word << endl; + + // regular word + if (!word.IsNonTerminal()) { + contextFactor.push_back(&word); + prev_is_NT = false; + + if (phrasePos==0) + makePrefix = true; + if (phrasePos==cur_hypo.GetCurrTargetPhrase().GetSize()-1 || prev_is_NT) + makeSuffix = true; + + // beginning/end of sentence symbol ,? + string factorZero = word.GetString(0); + if (factorZero.compare("") == 0) + prefixTerminals++; + // end of sentence symbol ? + else if (factorZero.compare("") == 0) + suffixTerminals++; + // everything else + else { + stringstream ngram; + ngram << m_baseName; + if (m_factorType == 0) + ngram << factorZero; + else + ngram << word.GetString(m_factorType); + accumulator->SparsePlusEquals(ngram.str(), 1); + + if (collectForPrefix) + prefixTerminals++; + else + suffixTerminals++; + } + } + + // non-terminal, add phrase from underlying hypothesis + else if (m_n > 1) + { + // look up underlying hypothesis + size_t nonTermIndex = nonTermIndexMap[phrasePos]; + const ChartHypothesis *prevHypo = cur_hypo.GetPrevHypo(nonTermIndex); + + const TargetNgramChartState* prevState = + static_cast(prevHypo->GetFFState(featureId)); + size_t subPhraseLength = prevState->GetNumTargetTerminals(); + + // special case: rule starts with non-terminal + if (phrasePos == 0) { + if (subPhraseLength == 1) { + makePrefix = true; + ++prefixTerminals; + + const Word &word = prevState->GetSuffix().GetWord(0); +// cerr << "NT0 --> : " << word << endl; + contextFactor.push_back(&word); + } + else { + onlyTerminals = false; + collectForPrefix = false; + int suffixPos = prevState->GetSuffix().GetSize() - (m_n-1); + if (suffixPos < 0) suffixPos = 0; // push all words if less than order + for(;(size_t)suffixPos < prevState->GetSuffix().GetSize(); suffixPos++) + { + const Word &word = prevState->GetSuffix().GetWord(suffixPos); +// cerr << "NT0 --> : " << word << endl; + contextFactor.push_back(&word); + } + } + } + + // internal non-terminal + else + { + // push its prefix + for(size_t prefixPos = 0; prefixPos < m_n-1 + && prefixPos < subPhraseLength; prefixPos++) + { + const Word &word = prevState->GetPrefix().GetWord(prefixPos); +// cerr << "NT --> " << word << endl; + contextFactor.push_back(&word); + } + + if (subPhraseLength==1) { + if (collectForPrefix) + ++prefixTerminals; + else + ++suffixTerminals; + + if (phrasePos == cur_hypo.GetCurrTargetPhrase().GetSize()-1) + makeSuffix = true; + } + else { + onlyTerminals = false; + collectForPrefix = true; + + // check if something follows this NT + bool wordFollowing = (phrasePos < cur_hypo.GetCurrTargetPhrase().GetSize() - 1)? true : false; + + // check if we are dealing with a large sub-phrase + if (wordFollowing && subPhraseLength > m_n - 1) + { + // clear up pending ngrams + MakePrefixNgrams(contextFactor, accumulator, prefixTerminals); + contextFactor.clear(); + makePrefix = false; + makeSuffix = true; + collectForPrefix = false; + prefixTerminals = 0; + suffixTerminals = 0; + + // push its suffix + size_t remainingWords = (remainingWords > m_n-1) ? m_n-1 : subPhraseLength - (m_n-1); + for(size_t suffixPos = 0; suffixPos < prevState->GetSuffix().GetSize(); suffixPos++) { + const Word &word = prevState->GetSuffix().GetWord(suffixPos); +// cerr << "NT --> : " << word << endl; + contextFactor.push_back(&word); + } + } + // subphrase can be used as suffix and as prefix for the next part + else if (wordFollowing && subPhraseLength == m_n - 1) + { + // clear up pending ngrams + MakePrefixNgrams(contextFactor, accumulator, prefixTerminals); + makePrefix = false; + makeSuffix = true; + collectForPrefix = false; + prefixTerminals = 0; + suffixTerminals = 0; + } + else if (prev_is_NT && prev_subPhraseLength > 1 && subPhraseLength > 1) { + // two NTs in a row: make transition + MakePrefixNgrams(contextFactor, accumulator, 1, m_n-2); + MakeSuffixNgrams(contextFactor, accumulator, 1, m_n-2); + makePrefix = false; + makeSuffix = false; + collectForPrefix = false; + prefixTerminals = 0; + suffixTerminals = 0; + + // remove duplicates + stringstream curr_ngram; + curr_ngram << m_baseName; + curr_ngram << (*contextFactor[m_n-2]).GetString(m_factorType); + curr_ngram << ":"; + curr_ngram << (*contextFactor[m_n-1]).GetString(m_factorType); + accumulator->SparseMinusEquals(curr_ngram.str(),1); + } + } + } + prev_is_NT = true; + prev_subPhraseLength = subPhraseLength; + } + } + + if (m_n > 1) { + if (onlyTerminals) { + MakePrefixNgrams(contextFactor, accumulator, prefixTerminals-1); + } + else { + if (makePrefix) + MakePrefixNgrams(contextFactor, accumulator, prefixTerminals); + if (makeSuffix) + MakeSuffixNgrams(contextFactor, accumulator, suffixTerminals); + + // remove duplicates + size_t size = contextFactor.size(); + if (makePrefix && makeSuffix && (size <= m_n)) { + stringstream curr_ngram; + curr_ngram << m_baseName; + for (size_t i = 0; i < size; ++i) { + curr_ngram << (*contextFactor[i]).GetString(m_factorType); + if (i < size-1) + curr_ngram << ":"; + } + accumulator->SparseMinusEquals(curr_ngram.str(), 1); + } + } + } + +// cerr << endl; + return new TargetNgramChartState(cur_hypo, featureId, m_n); +} + +void TargetNgramFeature::MakePrefixNgrams(std::vector &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfStartPos, size_t offset) const { + stringstream ngram; + size_t size = contextFactor.size(); + for (size_t k = 0; k < numberOfStartPos; ++k) { + size_t max_end = (size < m_n+k+offset)? size: m_n+k+offset; + for (size_t end_pos = 1+k+offset; end_pos < max_end; ++end_pos) { + ngram << m_baseName; + for (size_t i=k+offset; i <= end_pos; ++i) { + if (i > k+offset) + ngram << ":"; + string factorZero = (*contextFactor[i]).GetString(0); + if (m_factorType == 0 || factorZero.compare("") == 0 || factorZero.compare("") == 0) + ngram << factorZero; + else + ngram << (*contextFactor[i]).GetString(m_factorType); + const Word w = *contextFactor[i]; + } +// cerr << "p-ngram: " << ngram.str() << endl; + accumulator->SparsePlusEquals(ngram.str(), 1); + ngram.str(""); + } + } +} + +void TargetNgramFeature::MakeSuffixNgrams(std::vector &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfEndPos, size_t offset) const { + stringstream ngram; + for (size_t k = 0; k < numberOfEndPos; ++k) { + size_t end_pos = contextFactor.size()-1-k-offset; + for (int start_pos=end_pos-1; (start_pos >= 0) && (end_pos-start_pos < m_n); --start_pos) { + ngram << m_baseName; + for (size_t j=start_pos; j <= end_pos; ++j){ + string factorZero = (*contextFactor[j]).GetString(0); + if (m_factorType == 0 || factorZero.compare("") == 0 || factorZero.compare("") == 0) + ngram << factorZero; + else + ngram << (*contextFactor[j]).GetString(m_factorType); + if (j < end_pos) + ngram << ":"; + } +// cerr << "s-ngram: " << ngram.str() << endl; + accumulator->SparsePlusEquals(ngram.str(), 1); + ngram.str(""); + } + } +} + } diff --git a/moses/src/TargetNgramFeature.h b/moses/src/TargetNgramFeature.h index ca87f5dd6..681e7d6aa 100644 --- a/moses/src/TargetNgramFeature.h +++ b/moses/src/TargetNgramFeature.h @@ -9,6 +9,10 @@ #include "FFState.h" #include "Word.h" +#include "LM/SingleFactor.h" +#include "ChartHypothesis.h" +#include "ChartManager.h" + namespace Moses { @@ -22,43 +26,190 @@ class TargetNgramState : public FFState { std::vector m_words; }; +class TargetNgramChartState : public FFState +{ +private: + Phrase m_contextPrefix, m_contextSuffix; + + size_t m_numTargetTerminals; // This isn't really correct except for the surviving hypothesis + + size_t m_startPos, m_endPos, m_inputSize; + + /** Construct the prefix string of up to specified size + * \param ret prefix string + * \param size maximum size (typically max lm context window) + */ + size_t CalcPrefix(const ChartHypothesis &hypo, const int featureId, Phrase &ret, size_t size) const + { + const TargetPhrase &target = hypo.GetCurrTargetPhrase(); + const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = + target.GetAlignmentInfo().GetNonTermIndexMap(); + + // loop over the rule that is being applied + for (size_t pos = 0; pos < target.GetSize(); ++pos) { + const Word &word = target.GetWord(pos); + + // for non-terminals, retrieve it from underlying hypothesis + if (word.IsNonTerminal()) { + size_t nonTermInd = nonTermIndexMap[pos]; + const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd); + size = static_cast(prevHypo->GetFFState(featureId))->CalcPrefix(*prevHypo, featureId, ret, size); +// Phrase phrase = static_cast(prevHypo->GetFFState(featureId))->GetPrefix(); +// size = phrase.GetSize(); + } + // for words, add word + else { + ret.AddWord(word); + size--; + } + + // finish when maximum length reached + if (size==0) + break; + } + + return size; + } + + /** Construct the suffix phrase of up to specified size + * will always be called after the construction of prefix phrase + * \param ret suffix phrase + * \param size maximum size of suffix + */ + size_t CalcSuffix(const ChartHypothesis &hypo, int featureId, Phrase &ret, size_t size) const + { + size_t prefixSize = m_contextPrefix.GetSize(); + assert(prefixSize <= m_numTargetTerminals); + + // special handling for small hypotheses + // does the prefix match the entire hypothesis string? -> just copy prefix + if (prefixSize == m_numTargetTerminals) { + size_t maxCount = std::min(prefixSize, size); + size_t pos= prefixSize - 1; + + for (size_t ind = 0; ind < maxCount; ++ind) { + const Word &word = m_contextPrefix.GetWord(pos); + ret.PrependWord(word); + --pos; + } + + size -= maxCount; + return size; + } + // construct suffix analogous to prefix + else { + const TargetPhrase targetPhrase = hypo.GetCurrTargetPhrase(); + const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = + targetPhrase.GetAlignmentInfo().GetNonTermIndexMap(); + for (int pos = (int) targetPhrase.GetSize() - 1; pos >= 0 ; --pos) { + const Word &word = targetPhrase.GetWord(pos); + + if (word.IsNonTerminal()) { + size_t nonTermInd = nonTermIndexMap[pos]; + const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd); + size = static_cast(prevHypo->GetFFState(featureId))->CalcSuffix(*prevHypo, featureId, ret, size); + } + else { + ret.PrependWord(word); + size--; + } + + if (size==0) + break; + } + + return size; + } + } + +public: + TargetNgramChartState(const ChartHypothesis &hypo, int featureId, size_t order) + :m_contextPrefix(order - 1), + m_contextSuffix(order - 1) + { + m_numTargetTerminals = hypo.GetCurrTargetPhrase().GetNumTerminals(); + const WordsRange range = hypo.GetCurrSourceRange(); + m_startPos = range.GetStartPos(); + m_endPos = range.GetEndPos(); + m_inputSize = hypo.GetManager().GetSource().GetSize(); + + const std::vector prevHypos = hypo.GetPrevHypos(); + for (std::vector::const_iterator i = prevHypos.begin(); i != prevHypos.end(); ++i) { + // keep count of words (= length of generated string) + m_numTargetTerminals += static_cast((*i)->GetFFState(featureId))->GetNumTargetTerminals(); + } + + CalcPrefix(hypo, featureId, m_contextPrefix, order - 1); + CalcSuffix(hypo, featureId, m_contextSuffix, order - 1); + } + + size_t GetNumTargetTerminals() const { + return m_numTargetTerminals; + } + + const Phrase &GetPrefix() const { + return m_contextPrefix; + } + const Phrase &GetSuffix() const { + return m_contextSuffix; + } + + int Compare(const FFState& o) const { + const TargetNgramChartState &other = + static_cast( o ); + + // prefix + if (m_startPos > 0) // not for " ..." + { + int ret = GetPrefix().Compare(other.GetPrefix()); + if (ret != 0) + return ret; + } + + if (m_endPos < m_inputSize - 1)// not for "... " + { + int ret = GetSuffix().Compare(other.GetSuffix()); + if (ret != 0) + return ret; + } + return 0; + } +}; + /** Sets the features of observed ngrams. */ class TargetNgramFeature : public StatefulFeatureFunction { public: TargetNgramFeature(FactorType factorType = 0, size_t n = 3, bool lower_ngrams = true): - StatefulFeatureFunction("dlmn", ScoreProducer::unlimited), + StatefulFeatureFunction("dlm", ScoreProducer::unlimited), m_factorType(factorType), m_n(n), m_lower_ngrams(lower_ngrams), m_sparseProducerWeight(1) { FactorCollection& factorCollection = FactorCollection::Instance(); - const Factor* bosFactor = - factorCollection.AddFactor(Output,m_factorType,BOS_); + const Factor* bosFactor = factorCollection.AddFactor(Output,m_factorType,BOS_); m_bos.SetFactor(m_factorType,bosFactor); + m_baseName = GetScoreProducerDescription(); + m_baseName.append("_"); } - bool Load(const std::string &filePath); std::string GetScoreProducerWeightShortName(unsigned) const; size_t GetNumInputScores() const; void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; } - float GetSparseProducerWeight() { return m_sparseProducerWeight; } + float GetSparseProducerWeight() const { return m_sparseProducerWeight; } virtual const FFState* EmptyHypothesisState(const InputType &input) const; virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; - virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */, - int /* featureID */, - ScoreComponentCollection* ) const - { - abort(); - } + virtual FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureId, + ScoreComponentCollection* accumulator) const; + private: FactorType m_factorType; Word m_bos; @@ -69,7 +220,13 @@ private: // additional weight that all sparse weights are scaled with float m_sparseProducerWeight; - void appendNgram(const Word& word, bool& skip, std::string& ngram) const; + std::string m_baseName; + + void appendNgram(const Word& word, bool& skip, std::stringstream& ngram) const; + void MakePrefixNgrams(std::vector &contextFactor, ScoreComponentCollection* accumulator, + size_t numberOfStartPos = 1, size_t offset = 0) const; + void MakeSuffixNgrams(std::vector &contextFactor, ScoreComponentCollection* accumulator, + size_t numberOfEndPos = 1, size_t offset = 0) const; }; } diff --git a/moses/src/Word.cpp b/moses/src/Word.cpp index bc0a8e120..1ff5df496 100644 --- a/moses/src/Word.cpp +++ b/moses/src/Word.cpp @@ -85,6 +85,15 @@ std::string Word::GetString(const vector factorType,bool endWithBlan return strme.str(); } +std::string Word::GetString(FactorType factorType) const +{ + const Factor *factor = m_factorArray[factorType]; + if (factor != NULL) + return factor->GetString(); + else + return NULL; +} + void Word::CreateFromString(FactorDirection direction , const std::vector &factorOrder , const std::string &str @@ -94,7 +103,8 @@ void Word::CreateFromString(FactorDirection direction vector wordVec; Tokenize(wordVec, str, "|"); - CHECK(wordVec.size() == factorOrder.size()); + if (!isNonTerminal) + assert(wordVec.size() == factorOrder.size()); const Factor *factor; for (size_t ind = 0; ind < wordVec.size(); ++ind) { diff --git a/moses/src/Word.h b/moses/src/Word.h index 4818abb60..7dd395030 100644 --- a/moses/src/Word.h +++ b/moses/src/Word.h @@ -101,6 +101,7 @@ public: * these debugging functions. */ std::string GetString(const std::vector factorType,bool endWithBlank) const; + std::string GetString(FactorType factorType) const; TO_STRING(); //! transitive comparison of Word objects