mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-05 02:22:21 +03:00
introduce parameter --scale-by-input-length
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/mira-mtm5@3731 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
8de8773b39
commit
4a6027f7c7
@ -66,14 +66,14 @@ namespace Mira {
|
||||
delete[] mosesargv;
|
||||
}
|
||||
|
||||
MosesDecoder::MosesDecoder(const vector<vector<string> >& refs, bool useScaledReference)
|
||||
MosesDecoder::MosesDecoder(const vector<vector<string> >& refs, bool useScaledReference, bool scaleByInputLength)
|
||||
: m_manager(NULL) {
|
||||
// force initialisation of the phrase dictionary
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
|
||||
|
||||
// Add the bleu feature
|
||||
m_bleuScoreFeature = new BleuScoreFeature(useScaledReference);
|
||||
m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength);
|
||||
(const_cast<TranslationSystem&>(system)).AddFeatureFunction(m_bleuScoreFeature);
|
||||
m_bleuScoreFeature->LoadReferences(refs);
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ void initMoses(const std::string& inifile, int debuglevel, int argc=0, char** a
|
||||
**/
|
||||
class MosesDecoder {
|
||||
public:
|
||||
MosesDecoder(const std::vector<std::vector<std::string> >& refs, bool useScaledReference);
|
||||
MosesDecoder(const std::vector<std::vector<std::string> >& refs, bool useScaledReference, bool scaleByInputLength);
|
||||
|
||||
//returns the best sentence
|
||||
std::vector<const Moses::Word*> getNBest(const std::string& source,
|
||||
|
@ -87,6 +87,7 @@ int main(int argc, char** argv) {
|
||||
bool onlyViolatedConstraints;
|
||||
bool accumulateWeights;
|
||||
bool useScaledReference;
|
||||
bool scaleByInputLength;
|
||||
float clipping;
|
||||
bool fixedClipping;
|
||||
po::options_description desc("Allowed options");
|
||||
@ -108,6 +109,7 @@ int main(int argc, char** argv) {
|
||||
("only-violated-constraints", po::value<bool>(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem")
|
||||
("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
|
||||
("use-scaled-reference", po::value<bool>(&useScaledReference)->default_value(true), "Use scaled reference length for comparing target and reference length of phrases")
|
||||
("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths")
|
||||
("clipping", po::value<float>(&clipping)->default_value(0.01f), "Set a clipping threshold for SMO to regularise updates")
|
||||
("fixed-clipping", po::value<bool>(&fixedClipping)->default_value(false), "Use a fixed clipping threshold with SMO (instead of adaptive)");
|
||||
|
||||
@ -163,7 +165,7 @@ int main(int argc, char** argv) {
|
||||
|
||||
// initialise moses
|
||||
initMoses(mosesConfigFile, verbosity);//, argc, argv);
|
||||
MosesDecoder* decoder = new MosesDecoder(referenceSentences, useScaledReference) ;
|
||||
MosesDecoder* decoder = new MosesDecoder(referenceSentences, useScaledReference, scaleByInputLength);
|
||||
ScoreComponentCollection startWeights = decoder->getWeights();
|
||||
startWeights.L1Normalise();
|
||||
decoder->setWeights(startWeights);
|
||||
|
@ -78,16 +78,18 @@ BleuScoreFeature::BleuScoreFeature():
|
||||
m_source_length_history(0),
|
||||
m_target_length_history(0),
|
||||
m_ref_length_history(0),
|
||||
m_use_scaled_reference(true) {}
|
||||
m_use_scaled_reference(true),
|
||||
m_scale_by_input_length(true) {}
|
||||
|
||||
BleuScoreFeature::BleuScoreFeature(bool useScaledReference):
|
||||
BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLength):
|
||||
StatefulFeatureFunction("BleuScore"),
|
||||
m_count_history(BleuScoreState::bleu_order),
|
||||
m_match_history(BleuScoreState::bleu_order),
|
||||
m_source_length_history(0),
|
||||
m_target_length_history(0),
|
||||
m_ref_length_history(0),
|
||||
m_use_scaled_reference(useScaledReference) {}
|
||||
m_use_scaled_reference(useScaledReference),
|
||||
m_scale_by_input_length(scaleByInputLength) {}
|
||||
|
||||
void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
|
||||
{
|
||||
@ -152,15 +154,6 @@ void BleuScoreFeature::UpdateHistory(const vector< const Word* >& hypo) {
|
||||
m_source_length_history = 0.9 * (m_source_length_history + m_cur_source_length);
|
||||
m_target_length_history = 0.9 * (m_target_length_history + hypo.size());
|
||||
m_ref_length_history = 0.9 * (m_ref_length_history + m_cur_ref_length);
|
||||
//std::cout << "reference_length/target_length history: " << (m_ref_length_history/m_target_length_history) << endl;
|
||||
//std::cout << "source length history: " << m_source_length_history << endl;
|
||||
/*cerr << "oracle length: " << hypo.size() << endl;
|
||||
cerr << "refer. length: " << m_cur_ref_length << endl;
|
||||
cerr << "ratio length: " << (float)hypo.size()/m_cur_ref_length << endl;
|
||||
cerr << "target history: " << m_target_length_history << endl;
|
||||
cerr << "refer. history: " << m_ref_length_history << endl;
|
||||
cerr << "ratio history: " << (float)m_target_length_history/m_ref_length_history << endl << endl;*/
|
||||
//cerr << "source/reference ratio: " << (float)m_cur_source_length/m_cur_ref_length << endl << endl;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -320,7 +313,10 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
|
||||
// Approximate bleu score as of Chiang/Resnik is scaled by the size of the input:
|
||||
// B(e;f,{r_k}) = (O_f + |f|) * BLEU(O + c(e;{r_k}))
|
||||
// where c(e;) is a vector of reference length, ngram counts and ngram matches
|
||||
precision *= m_source_length_history + state->m_source_length;
|
||||
if (m_scale_by_input_length) {
|
||||
precision *= m_source_length_history + state->m_source_length;
|
||||
}
|
||||
|
||||
return precision;
|
||||
}
|
||||
|
||||
|
@ -45,7 +45,7 @@ typedef std::map< Phrase, size_t > NGrams;
|
||||
class BleuScoreFeature : public StatefulFeatureFunction {
|
||||
public:
|
||||
BleuScoreFeature();
|
||||
BleuScoreFeature(bool useScaledReference);
|
||||
BleuScoreFeature(bool useScaledReference, bool scaleByInputLength);
|
||||
|
||||
std::string GetScoreProducerDescription() const
|
||||
{
|
||||
@ -87,6 +87,9 @@ private:
|
||||
// whether or not to use the scaled reference
|
||||
bool m_use_scaled_reference;
|
||||
|
||||
// whether or not to scale the BLEU score by a history of the input size
|
||||
bool m_scale_by_input_length;
|
||||
|
||||
// counts for pseudo-document big_O
|
||||
std::vector< float > m_count_history;
|
||||
std::vector< float > m_match_history;
|
||||
|
Loading…
Reference in New Issue
Block a user