phrase boundary feature

git-svn-id: http://svn.statmt.org/repository/mira@3893 cc96ff50-19ce-11e0-b349-13d7f0bd23df
This commit is contained in:
bhaddow 2011-05-10 22:02:25 +00:00 committed by Ondrej Bojar
parent 86dba9f743
commit 08c3efdf88
6 changed files with 193 additions and 0 deletions

View File

@ -69,6 +69,7 @@ libmoses_la_HEADERS = \
Parameter.h \
PartialTranslOptColl.h \
Phrase.h \
PhraseBoundaryFeature.h \
PhraseDictionary.h \
PhraseDictionaryDynSuffixArray.h \
PhraseDictionaryMemory.h \
@ -200,6 +201,7 @@ libmoses_la_SOURCES = \
Parameter.cpp \
PartialTranslOptColl.cpp \
Phrase.cpp \
PhraseBoundaryFeature.cpp \
PhraseDictionary.cpp \
PhraseDictionaryDynSuffixArray.cpp \
PhraseDictionaryMemory.cpp \

View File

@ -140,6 +140,8 @@ Parameter::Parameter()
AddParam("enable-online-command", "enable online commands to change some decoder parameters (default false); if enabled, use-persistent-cache is disabled");
AddParam("discrim-lmodel-file", "Order, factor and vocabulary file for discriminative LM. Use * for filename to indicate unlimited vocabulary.");
AddParam("phrase-pair-feature", "Source and target factors for phrase pair feature");
AddParam("phrase-boundary-source-feature", "Source factors for phrase boundary feature");
AddParam("phrase-boundary-target-feature", "Target factors for phrase boundary feature");
AddParam("show-weights", "print feature weights and exit");
}

View File

@ -0,0 +1,94 @@
#include "PhraseBoundaryFeature.h"
#include "Hypothesis.h"
using namespace std;
namespace Moses {
int PhraseBoundaryState::Compare(const FFState& other) const
{
const PhraseBoundaryState& rhs = dynamic_cast<const PhraseBoundaryState&>(other);
return Word::Compare(*m_word,*(rhs.m_word));
}
PhraseBoundaryFeature::PhraseBoundaryFeature
(const FactorList& sourceFactors, const FactorList& targetFactors) :
StatefulFeatureFunction("pb"), m_sourceFactors(sourceFactors),
m_targetFactors(targetFactors)
{
}
size_t PhraseBoundaryFeature::GetNumScoreComponents() const
{
return ScoreProducer::unlimited;
}
string PhraseBoundaryFeature::GetScoreProducerWeightShortName() const
{
return "pb";
}
size_t PhraseBoundaryFeature::GetNumInputScores() const
{
return 0;
}
const FFState* PhraseBoundaryFeature::EmptyHypothesisState(const InputType &input) const
{
return new PhraseBoundaryState(NULL);
}
void PhraseBoundaryFeature::AddFeatures(
const Word* leftWord, const Word* rightWord, const FactorList& factors, const string& side,
ScoreComponentCollection* scores) const {
for (size_t i = 0; i < factors.size(); ++i) {
ostringstream name;
name << side << ":";
name << factors[i];
name << ":";
if (leftWord) {
name << leftWord->GetFactor(factors[i])->GetString();
} else {
name << BOS_;
}
name << ":";
if (rightWord) {
name << rightWord->GetFactor(factors[i])->GetString();
} else {
name << EOS_;
}
scores->PlusEquals(this,name.str(),1);
}
}
FFState* PhraseBoundaryFeature::Evaluate
(const Hypothesis& cur_hypo, const FFState* prev_state,
ScoreComponentCollection* scores) const
{
const PhraseBoundaryState* pbState = dynamic_cast<const PhraseBoundaryState*>(prev_state);
const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
if (targetPhrase.GetSize() == 0) {
return new PhraseBoundaryState(*pbState);
}
const Word* leftWord = pbState->GetWord();
const Word* rightWord = &(targetPhrase.GetWord(0));
AddFeatures(leftWord,rightWord,m_sourceFactors,"src",scores);
AddFeatures(leftWord,rightWord,m_targetFactors,"tgt",scores);
const Word* endWord = &(targetPhrase.GetWord(targetPhrase.GetSize()-1));
//if end of sentence add EOS
if (cur_hypo.IsSourceCompleted()) {
AddFeatures(endWord,NULL,m_sourceFactors,"src",scores);
AddFeatures(endWord,NULL,m_targetFactors,"tgt",scores);
}
return new PhraseBoundaryState(endWord);
}
}

View File

@ -0,0 +1,54 @@
#ifndef moses_PhraseBoundaryFeature_h
#define moses_PhraseBoundaryFeature_h
#include <sstream>
#include <string>
#include "FeatureFunction.h"
#include "FFState.h"
#include "Word.h"
namespace Moses
{
class PhraseBoundaryState : public FFState {
public:
PhraseBoundaryState(const Word* word) : m_word(word) {}
const Word* GetWord() const {return m_word;}
virtual int Compare(const FFState& other) const;
private:
const Word* m_word;
};
/**
* Concatenations of factors on boundaries of phrases.
**/
class PhraseBoundaryFeature : public StatefulFeatureFunction {
public:
PhraseBoundaryFeature(const FactorList& sourceFactors, const FactorList& targetFactors);
size_t GetNumScoreComponents() const;
std::string GetScoreProducerWeightShortName() const;
size_t GetNumInputScores() const;
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
private:
void AddFeatures(
const Word* leftWord, const Word* rightWord, const FactorList& factors,
const std::string& side, ScoreComponentCollection* scores) const ;
FactorList m_sourceFactors;
FactorList m_targetFactors;
};
}
#endif

View File

@ -35,6 +35,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "LexicalReordering.h"
#include "GlobalLexicalModel.h"
#include "SentenceStats.h"
#include "PhraseBoundaryFeature.h"
#include "PhraseDictionary.h"
#include "PhrasePairFeature.h"
#include "UserMessage.h"
@ -69,6 +70,7 @@ StaticData StaticData::s_instance;
StaticData::StaticData()
:m_targetBigramFeature(NULL)
,m_phraseBoundaryFeature(NULL)
,m_phrasePairFeature(NULL)
,m_numLinkParams(1)
,m_fLMsLoaded(false)
@ -459,6 +461,7 @@ bool StaticData::LoadData(Parameter *parameter)
if (!LoadReferences()) return false;
if (!LoadDiscrimLMFeature()) return false;
if (!LoadPhrasePairFeature()) return false;
if (!LoadPhraseBoundaryFeature()) return false;
//configure the translation systems with these tables
vector<string> tsConfig = m_parameter->GetParam("translation-systems");
@ -549,6 +552,9 @@ bool StaticData::LoadData(Parameter *parameter)
if (m_phrasePairFeature) {
m_translationSystems.find(config[0])->second.AddFeatureFunction(m_phrasePairFeature);
}
if (m_phraseBoundaryFeature) {
m_translationSystems.find(config[0])->second.AddFeatureFunction(m_phraseBoundaryFeature);
}
}
@ -630,6 +636,7 @@ StaticData::~StaticData()
delete m_unknownWordPenaltyProducer;
delete m_targetBigramFeature;
delete m_phrasePairFeature;
delete m_phraseBoundaryFeature;
//delete m_parameter;
@ -1306,6 +1313,37 @@ bool StaticData::LoadDiscrimLMFeature()
return true;
}
bool StaticData::LoadPhraseBoundaryFeature()
{
const vector<string> &phraseBoundarySourceFactors =
m_parameter->GetParam("phrase-boundary-source-feature");
const vector<string> &phraseBoundaryTargetFactors =
m_parameter->GetParam("phrase-boundary-target-feature");
if (phraseBoundarySourceFactors.size() == 0 && phraseBoundaryTargetFactors.size() == 0) {
return true;
}
if (phraseBoundarySourceFactors.size() > 1) {
UserMessage::Add("Need to specify comma separated list of source factors for phrase boundary");
return false;
}
if (phraseBoundaryTargetFactors.size() > 1) {
UserMessage::Add("Need to specify comma separated list of target factors for phrase boundary");
return false;
}
FactorList sourceFactors;
FactorList targetFactors;
if (phraseBoundarySourceFactors.size()) {
sourceFactors = Tokenize<FactorType>(phraseBoundarySourceFactors[0],",");
}
if (phraseBoundaryTargetFactors.size()) {
targetFactors = Tokenize<FactorType>(phraseBoundaryTargetFactors[0],",");
}
//cerr << "source "; for (size_t i = 0; i < sourceFactors.size(); ++i) cerr << sourceFactors[i] << " "; cerr << endl;
//cerr << "target "; for (size_t i = 0; i < targetFactors.size(); ++i) cerr << targetFactors[i] << " "; cerr << endl;
m_phraseBoundaryFeature = new PhraseBoundaryFeature(sourceFactors,targetFactors);
return true;
}
bool StaticData::LoadPhrasePairFeature()
{
const vector<string> &phrasePairFactors =

View File

@ -56,6 +56,7 @@ namespace Moses
class InputType;
class LexicalReordering;
class GlobalLexicalModel;
class PhraseBoundaryFeature;
class PhraseDictionaryFeature;
class PhrasePairFeature;
class BleuScoreFeature;
@ -90,6 +91,7 @@ protected:
// Other = 1 = used to calculate LM score once all steps have been processed
std::map<std::string, TranslationSystem> m_translationSystems;
TargetBigramFeature *m_targetBigramFeature;
PhraseBoundaryFeature *m_phraseBoundaryFeature;
PhrasePairFeature *m_phrasePairFeature;
float
m_beamWidth,
@ -233,6 +235,7 @@ protected:
//References used for scoring feature (eg BleuScoreFeature) for online training
bool LoadReferences();
bool LoadDiscrimLMFeature();
bool LoadPhraseBoundaryFeature();
bool LoadPhrasePairFeature();
void ReduceTransOptCache() const;