mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
phrase boundary feature
git-svn-id: http://svn.statmt.org/repository/mira@3893 cc96ff50-19ce-11e0-b349-13d7f0bd23df
This commit is contained in:
parent
86dba9f743
commit
08c3efdf88
@ -69,6 +69,7 @@ libmoses_la_HEADERS = \
|
||||
Parameter.h \
|
||||
PartialTranslOptColl.h \
|
||||
Phrase.h \
|
||||
PhraseBoundaryFeature.h \
|
||||
PhraseDictionary.h \
|
||||
PhraseDictionaryDynSuffixArray.h \
|
||||
PhraseDictionaryMemory.h \
|
||||
@ -200,6 +201,7 @@ libmoses_la_SOURCES = \
|
||||
Parameter.cpp \
|
||||
PartialTranslOptColl.cpp \
|
||||
Phrase.cpp \
|
||||
PhraseBoundaryFeature.cpp \
|
||||
PhraseDictionary.cpp \
|
||||
PhraseDictionaryDynSuffixArray.cpp \
|
||||
PhraseDictionaryMemory.cpp \
|
||||
|
@ -140,6 +140,8 @@ Parameter::Parameter()
|
||||
AddParam("enable-online-command", "enable online commands to change some decoder parameters (default false); if enabled, use-persistent-cache is disabled");
|
||||
AddParam("discrim-lmodel-file", "Order, factor and vocabulary file for discriminative LM. Use * for filename to indicate unlimited vocabulary.");
|
||||
AddParam("phrase-pair-feature", "Source and target factors for phrase pair feature");
|
||||
AddParam("phrase-boundary-source-feature", "Source factors for phrase boundary feature");
|
||||
AddParam("phrase-boundary-target-feature", "Target factors for phrase boundary feature");
|
||||
|
||||
AddParam("show-weights", "print feature weights and exit");
|
||||
}
|
||||
|
94
moses/src/PhraseBoundaryFeature.cpp
Normal file
94
moses/src/PhraseBoundaryFeature.cpp
Normal file
@ -0,0 +1,94 @@
|
||||
#include "PhraseBoundaryFeature.h"
|
||||
|
||||
#include "Hypothesis.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses {
|
||||
|
||||
int PhraseBoundaryState::Compare(const FFState& other) const
|
||||
{
|
||||
const PhraseBoundaryState& rhs = dynamic_cast<const PhraseBoundaryState&>(other);
|
||||
return Word::Compare(*m_word,*(rhs.m_word));
|
||||
}
|
||||
|
||||
|
||||
PhraseBoundaryFeature::PhraseBoundaryFeature
|
||||
(const FactorList& sourceFactors, const FactorList& targetFactors) :
|
||||
StatefulFeatureFunction("pb"), m_sourceFactors(sourceFactors),
|
||||
m_targetFactors(targetFactors)
|
||||
{
|
||||
}
|
||||
|
||||
size_t PhraseBoundaryFeature::GetNumScoreComponents() const
|
||||
{
|
||||
return ScoreProducer::unlimited;
|
||||
}
|
||||
|
||||
string PhraseBoundaryFeature::GetScoreProducerWeightShortName() const
|
||||
{
|
||||
return "pb";
|
||||
}
|
||||
|
||||
size_t PhraseBoundaryFeature::GetNumInputScores() const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
const FFState* PhraseBoundaryFeature::EmptyHypothesisState(const InputType &input) const
|
||||
{
|
||||
return new PhraseBoundaryState(NULL);
|
||||
}
|
||||
|
||||
|
||||
void PhraseBoundaryFeature::AddFeatures(
|
||||
const Word* leftWord, const Word* rightWord, const FactorList& factors, const string& side,
|
||||
ScoreComponentCollection* scores) const {
|
||||
for (size_t i = 0; i < factors.size(); ++i) {
|
||||
ostringstream name;
|
||||
name << side << ":";
|
||||
name << factors[i];
|
||||
name << ":";
|
||||
if (leftWord) {
|
||||
name << leftWord->GetFactor(factors[i])->GetString();
|
||||
} else {
|
||||
name << BOS_;
|
||||
}
|
||||
name << ":";
|
||||
if (rightWord) {
|
||||
name << rightWord->GetFactor(factors[i])->GetString();
|
||||
} else {
|
||||
name << EOS_;
|
||||
}
|
||||
scores->PlusEquals(this,name.str(),1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
FFState* PhraseBoundaryFeature::Evaluate
|
||||
(const Hypothesis& cur_hypo, const FFState* prev_state,
|
||||
ScoreComponentCollection* scores) const
|
||||
{
|
||||
const PhraseBoundaryState* pbState = dynamic_cast<const PhraseBoundaryState*>(prev_state);
|
||||
const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
|
||||
if (targetPhrase.GetSize() == 0) {
|
||||
return new PhraseBoundaryState(*pbState);
|
||||
}
|
||||
const Word* leftWord = pbState->GetWord();
|
||||
const Word* rightWord = &(targetPhrase.GetWord(0));
|
||||
AddFeatures(leftWord,rightWord,m_sourceFactors,"src",scores);
|
||||
AddFeatures(leftWord,rightWord,m_targetFactors,"tgt",scores);
|
||||
|
||||
const Word* endWord = &(targetPhrase.GetWord(targetPhrase.GetSize()-1));
|
||||
|
||||
//if end of sentence add EOS
|
||||
if (cur_hypo.IsSourceCompleted()) {
|
||||
AddFeatures(endWord,NULL,m_sourceFactors,"src",scores);
|
||||
AddFeatures(endWord,NULL,m_targetFactors,"tgt",scores);
|
||||
}
|
||||
|
||||
return new PhraseBoundaryState(endWord);
|
||||
}
|
||||
|
||||
|
||||
}
|
54
moses/src/PhraseBoundaryFeature.h
Normal file
54
moses/src/PhraseBoundaryFeature.h
Normal file
@ -0,0 +1,54 @@
|
||||
#ifndef moses_PhraseBoundaryFeature_h
|
||||
#define moses_PhraseBoundaryFeature_h
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "FeatureFunction.h"
|
||||
#include "FFState.h"
|
||||
#include "Word.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class PhraseBoundaryState : public FFState {
|
||||
public:
|
||||
PhraseBoundaryState(const Word* word) : m_word(word) {}
|
||||
const Word* GetWord() const {return m_word;}
|
||||
virtual int Compare(const FFState& other) const;
|
||||
|
||||
|
||||
private:
|
||||
const Word* m_word;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Concatenations of factors on boundaries of phrases.
|
||||
**/
|
||||
class PhraseBoundaryFeature : public StatefulFeatureFunction {
|
||||
public:
|
||||
PhraseBoundaryFeature(const FactorList& sourceFactors, const FactorList& targetFactors);
|
||||
|
||||
size_t GetNumScoreComponents() const;
|
||||
std::string GetScoreProducerWeightShortName() const;
|
||||
size_t GetNumInputScores() const;
|
||||
|
||||
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
|
||||
|
||||
virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
private:
|
||||
void AddFeatures(
|
||||
const Word* leftWord, const Word* rightWord, const FactorList& factors,
|
||||
const std::string& side, ScoreComponentCollection* scores) const ;
|
||||
FactorList m_sourceFactors;
|
||||
FactorList m_targetFactors;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
@ -35,6 +35,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "LexicalReordering.h"
|
||||
#include "GlobalLexicalModel.h"
|
||||
#include "SentenceStats.h"
|
||||
#include "PhraseBoundaryFeature.h"
|
||||
#include "PhraseDictionary.h"
|
||||
#include "PhrasePairFeature.h"
|
||||
#include "UserMessage.h"
|
||||
@ -69,6 +70,7 @@ StaticData StaticData::s_instance;
|
||||
|
||||
StaticData::StaticData()
|
||||
:m_targetBigramFeature(NULL)
|
||||
,m_phraseBoundaryFeature(NULL)
|
||||
,m_phrasePairFeature(NULL)
|
||||
,m_numLinkParams(1)
|
||||
,m_fLMsLoaded(false)
|
||||
@ -459,6 +461,7 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
if (!LoadReferences()) return false;
|
||||
if (!LoadDiscrimLMFeature()) return false;
|
||||
if (!LoadPhrasePairFeature()) return false;
|
||||
if (!LoadPhraseBoundaryFeature()) return false;
|
||||
|
||||
//configure the translation systems with these tables
|
||||
vector<string> tsConfig = m_parameter->GetParam("translation-systems");
|
||||
@ -549,6 +552,9 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
if (m_phrasePairFeature) {
|
||||
m_translationSystems.find(config[0])->second.AddFeatureFunction(m_phrasePairFeature);
|
||||
}
|
||||
if (m_phraseBoundaryFeature) {
|
||||
m_translationSystems.find(config[0])->second.AddFeatureFunction(m_phraseBoundaryFeature);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -630,6 +636,7 @@ StaticData::~StaticData()
|
||||
delete m_unknownWordPenaltyProducer;
|
||||
delete m_targetBigramFeature;
|
||||
delete m_phrasePairFeature;
|
||||
delete m_phraseBoundaryFeature;
|
||||
|
||||
//delete m_parameter;
|
||||
|
||||
@ -1306,6 +1313,37 @@ bool StaticData::LoadDiscrimLMFeature()
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StaticData::LoadPhraseBoundaryFeature()
|
||||
{
|
||||
const vector<string> &phraseBoundarySourceFactors =
|
||||
m_parameter->GetParam("phrase-boundary-source-feature");
|
||||
const vector<string> &phraseBoundaryTargetFactors =
|
||||
m_parameter->GetParam("phrase-boundary-target-feature");
|
||||
if (phraseBoundarySourceFactors.size() == 0 && phraseBoundaryTargetFactors.size() == 0) {
|
||||
return true;
|
||||
}
|
||||
if (phraseBoundarySourceFactors.size() > 1) {
|
||||
UserMessage::Add("Need to specify comma separated list of source factors for phrase boundary");
|
||||
return false;
|
||||
}
|
||||
if (phraseBoundaryTargetFactors.size() > 1) {
|
||||
UserMessage::Add("Need to specify comma separated list of target factors for phrase boundary");
|
||||
return false;
|
||||
}
|
||||
FactorList sourceFactors;
|
||||
FactorList targetFactors;
|
||||
if (phraseBoundarySourceFactors.size()) {
|
||||
sourceFactors = Tokenize<FactorType>(phraseBoundarySourceFactors[0],",");
|
||||
}
|
||||
if (phraseBoundaryTargetFactors.size()) {
|
||||
targetFactors = Tokenize<FactorType>(phraseBoundaryTargetFactors[0],",");
|
||||
}
|
||||
//cerr << "source "; for (size_t i = 0; i < sourceFactors.size(); ++i) cerr << sourceFactors[i] << " "; cerr << endl;
|
||||
//cerr << "target "; for (size_t i = 0; i < targetFactors.size(); ++i) cerr << targetFactors[i] << " "; cerr << endl;
|
||||
m_phraseBoundaryFeature = new PhraseBoundaryFeature(sourceFactors,targetFactors);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StaticData::LoadPhrasePairFeature()
|
||||
{
|
||||
const vector<string> &phrasePairFactors =
|
||||
|
@ -56,6 +56,7 @@ namespace Moses
|
||||
class InputType;
|
||||
class LexicalReordering;
|
||||
class GlobalLexicalModel;
|
||||
class PhraseBoundaryFeature;
|
||||
class PhraseDictionaryFeature;
|
||||
class PhrasePairFeature;
|
||||
class BleuScoreFeature;
|
||||
@ -90,6 +91,7 @@ protected:
|
||||
// Other = 1 = used to calculate LM score once all steps have been processed
|
||||
std::map<std::string, TranslationSystem> m_translationSystems;
|
||||
TargetBigramFeature *m_targetBigramFeature;
|
||||
PhraseBoundaryFeature *m_phraseBoundaryFeature;
|
||||
PhrasePairFeature *m_phrasePairFeature;
|
||||
float
|
||||
m_beamWidth,
|
||||
@ -233,6 +235,7 @@ protected:
|
||||
//References used for scoring feature (eg BleuScoreFeature) for online training
|
||||
bool LoadReferences();
|
||||
bool LoadDiscrimLMFeature();
|
||||
bool LoadPhraseBoundaryFeature();
|
||||
bool LoadPhrasePairFeature();
|
||||
|
||||
void ReduceTransOptCache() const;
|
||||
|
Loading…
Reference in New Issue
Block a user