Implementation of discriminative reordering feature

Testing git

Add 'both' option for d-reo features, in addition to prev and curr

Start to implement gibbler discrim reordering feature in terms of Moses
one. Pause while I try for a generic single phrase stateful.

Creating new feature to wrap some Moses features

Minor progress in impl of single state feature.
Comment change.

Create a new subclass of StatefulFeatureFunction, which does things
the right way, ie by taking a TranslationOption instead of a Hypo.

Implementation of SingleStateFeature

This can be used to wrap any Moses feature function whose state just depends
on the previous hypothesis, in particular it is used for the reordering
features.

Ignore ltmain.sh

Don't track this file

touch ltmain.sh

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/samplerank@4120 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
bhaddow 2011-08-05 13:42:22 +00:00
parent 465442340a
commit 680f852499
18 changed files with 414 additions and 6729 deletions

7
.gitignore vendored
View File

@ -1,5 +1,11 @@
*.lo
*.o *.o
*~ *~
.libs
.deps
*.la
*.a
*.swp
Makefile Makefile
Makefile.in Makefile.in
aclocal.m4 aclocal.m4
@ -8,6 +14,7 @@ config.h
config.log config.log
config.status config.status
configure configure
ltmain.sh
misc/.deps misc/.deps
misc/Makefile misc/Makefile
misc/Makefile.in misc/Makefile.in

View File

@ -83,7 +83,7 @@ void LexicalReorderingFeatureFunction::updateTarget() {
const Hypothesis * currHypo = getSample().GetTargetTail(); const Hypothesis * currHypo = getSample().GetTargetTail();
LRStateHandle prevState(dynamic_cast<const LexicalReorderingState*>(m_mosesLexReorder->EmptyHypothesisState(currHypo->GetInput()))); LRStateHandle prevState(dynamic_cast<const LexicalReorderingState*>(m_mosesLexReorder->EmptyHypothesisState(currHypo->GetInput())));
while ((currHypo = (currHypo->GetNextHypo()))) { while ((currHypo = (currHypo->GetNextHypo()))) {
LRStateHandle currState(dynamic_cast<const LexicalReorderingState*>(m_mosesLexReorder->Evaluate(*currHypo,prevState.get(),&m_accumulator))); LRStateHandle currState(dynamic_cast<const LexicalReorderingState*>(m_mosesLexReorder->Evaluate(currHypo->GetTranslationOption(),prevState.get(),&m_accumulator)));
for (size_t i = 0; i < currHypo->GetCurrTargetWordsRange().GetNumWordsCovered(); ++i) { for (size_t i = 0; i < currHypo->GetCurrTargetWordsRange().GetNumWordsCovered(); ++i) {
m_prevStates.push_back(prevState); m_prevStates.push_back(prevState);
} }

View File

@ -31,6 +31,7 @@ libjosiah_a_SOURCES = \
SampleCollector.cpp \ SampleCollector.cpp \
Sampler.cpp \ Sampler.cpp \
Selector.cpp \ Selector.cpp \
SingleStateFeature.cpp \
SourceToTargetRatio.cpp \ SourceToTargetRatio.cpp \
StatelessFeature.cpp \ StatelessFeature.cpp \
TrainingSource.cpp \ TrainingSource.cpp \

View File

@ -33,260 +33,28 @@ using namespace std;
using boost::lexical_cast; using boost::lexical_cast;
namespace Josiah { namespace Josiah {
string ReorderingFeatureTemplate::BOS = "<s>";
FeatureFunctionHandle ReorderingFeature::getFunction(const Sample& sample) const
ReorderingFeature::ReorderingFeature(const vector<string>& msd,
const std::vector<std::string>& msdVocab)
{ {
return FeatureFunctionHandle(
const static string SOURCE = "source"; new ReorderingFeatureFunction(sample,m_mosesFeature));
const static string TARGET = "target";
const static string PREV = "prev";
const static string CURR = "curr";
for (vector<string>::const_iterator i = msdVocab.begin(); i != msdVocab.end();
++i) {
vector<string> msdVocabConfig = Tokenize(*i,":");
if (msdVocabConfig.size() != 3) {
ostringstream errmsg;
errmsg << "msdvocab configuration '" << *i << "' has incorrect format";
throw runtime_error(errmsg.str());
}
size_t factorId = lexical_cast<size_t>(msdVocabConfig[0]);
bool source = true;
if (msdVocabConfig[1] == TARGET) {
source = false;
} else if (msdVocabConfig[1] != SOURCE) {
throw runtime_error("msd vocab config has invalid source/target identifier");
}
string filename = msdVocabConfig[2];
vocab_t* vocab = NULL;
if (source) {
vocab = &(m_sourceVocabs[factorId]);
} else {
vocab = &(m_targetVocabs[factorId]);
}
loadVocab(filename,vocab);
} }
for (vector<string>::const_iterator i = msd.begin(); i != msd.end(); ++i) { ReorderingFeatureFunction::ReorderingFeatureFunction(
vector<string> msdConfig = Tokenize(*i,":"); const Sample& sample,
if (msdConfig.size() != 4) { const DiscriminativeReorderingFeature& mosesFeature) :
ostringstream errmsg; FeatureFunction(sample),
errmsg << "msd configuration '" << *i << "' has incorrect format"; m_mosesFeature(mosesFeature) {}
throw runtime_error(errmsg.str());
}
size_t factorId = lexical_cast<size_t>(msdConfig[1]);
bool source = true;
if (msdConfig[2] == TARGET) {
source = false;
} else if (msdConfig[2] != SOURCE) {
throw runtime_error("msd config has invalid source/target identifier");
}
bool curr = true;
if (msdConfig[3] == PREV) {
curr = false;
} else if (msdConfig[3] != CURR) {
throw runtime_error("msd config has invalid curr/prev identifier");
}
if (msdConfig[0] == "edge") {
m_templates.push_back(new EdgeReorderingFeatureTemplate(factorId,source,curr));
} else {
ostringstream errmsg;
errmsg << "Unknown msd feature type '" << msdConfig[0] << "'" << endl;
throw runtime_error(errmsg.str());
}
//set vocabulary, if necessary void ReorderingFeatureFunction::assignScore(FVector& scores) {
vocab_t* vocab = NULL; //Use the score cached by updateTarget()
if (source) { scores += m_accumulator.GetScoresVector();
if (m_sourceVocabs.find(factorId) != m_sourceVocabs.end()) { }
vocab = &(m_sourceVocabs[factorId]);
} void ReorderingFeatureFunction::updateTarget() {
} else { //update all the previous states, and the accumulator
if (m_targetVocabs.find(factorId) != m_targetVocabs.end()) { m_prevStates.clear();
vocab = &(m_targetVocabs[factorId]); m_accumulator.ZeroAll();
} const Hypothesis* currHypo = getSample().GetTargetTail();
}
m_templates.back()->setVocab(vocab);
} }
} }
FeatureFunctionHandle ReorderingFeature::getFunction(const Sample& sample) const {
return FeatureFunctionHandle(new ReorderingFeatureFunction(sample, *this));
}
const std::vector<ReorderingFeatureTemplate*>& ReorderingFeature::getTemplates() const {
return m_templates;
}
void ReorderingFeature::loadVocab(string filename, vocab_t* vocab) {
VERBOSE(1, "Loading vocabulary for reordering feature from " << filename << endl);
vocab->clear();
ifstream in(filename.c_str());
if (!in) {
ostringstream errmsg;
errmsg << "Unable to load vocabulary from " << filename;
throw runtime_error(errmsg.str());
}
string line;
while (getline(in,line)) {
vocab->insert(line);
}
}
bool ReorderingFeatureTemplate::checkVocab(const std::string& word) const {
if (!m_vocab) return true;
return m_vocab->find(word) != m_vocab->end();
}
ReorderingFeatureFunction::ReorderingFeatureFunction(const Sample& sample, const ReorderingFeature& parent)
: FeatureFunction(sample), m_parent(parent)
{}
/** Assign features for the following options, assuming they are contiguous on the target side */
void ReorderingFeatureFunction::assign(const TranslationOption* prevOption, const TranslationOption* currOption, FVector& scores) {
for (vector<ReorderingFeatureTemplate*>::const_iterator i = m_parent.getTemplates().begin();
i != m_parent.getTemplates().end(); ++i) {
(*i)->assign(prevOption,currOption,getMsd(prevOption, currOption), scores);
}
}
const string& ReorderingFeatureFunction::getMsd(const TranslationOption* prevOption, const TranslationOption* currOption) {
int prevStart = -1;
int prevEnd = -1;
if (prevOption) {
prevStart = prevOption->GetSourceWordsRange().GetStartPos();
prevEnd = prevOption->GetSourceWordsRange().GetEndPos();
}
int currStart = currOption->GetSourceWordsRange().GetStartPos();
int currEnd = currOption->GetSourceWordsRange().GetEndPos();
static string monotone = "msd:m";
static string swap = "msd:s";
static string discontinuous = "msd:d";
if (prevEnd + 1 == currStart) {
return monotone;
} else if (currEnd + 1 == prevStart) {
return swap;
} else {
return discontinuous;
}
}
void EdgeReorderingFeatureTemplate::assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption,
const std::string& prefix, FVector& scores)
{
static const string sourcePrev = "s:p:";
static const string sourceCurr = "s:c:";
static const string targetPrev = "t:p:";
static const string targetCurr = "t:c:";
const Word* edge = NULL;
const string* position = NULL;
if (m_source && m_curr) {
edge = &(currOption->GetSourcePhrase()->GetWord(0));
position = &sourceCurr;
} else if (m_source && !m_curr) {
if (prevOption) {
const Phrase* sourcePhrase = prevOption->GetSourcePhrase();
edge = &(sourcePhrase->GetWord(sourcePhrase->GetSize()-1));
}
position = &sourcePrev;
} else if (!m_source && m_curr) {
edge = &(currOption->GetTargetPhrase().GetWord(0));
position = &targetCurr;
} else {
if (prevOption) {
const Phrase& targetPhrase = prevOption->GetTargetPhrase();
edge = &(targetPhrase.GetWord(targetPhrase.GetSize()-1));
}
position = &targetPrev;
}
ostringstream namestr;
namestr << *position;
namestr << m_factor;
namestr << ":";
if (edge) {
const string& word = edge->GetFactor(m_factor)->GetString();
if (!checkVocab(word)) return;
namestr << word;
} else {
namestr << BOS;
}
FName name(prefix,namestr.str());
++scores[name];
}
/** Assign the total score of this feature on the current hypo */
void ReorderingFeatureFunction::assignScore(FVector& scores)
{
const Hypothesis* currHypo = getSample().GetTargetTail();
const TranslationOption* prevOption = NULL;
while ((currHypo = (currHypo->GetNextHypo()))) {
const TranslationOption* currOption = &(currHypo->GetTranslationOption());
assign(prevOption,currOption,scores);
prevOption = currOption;
}
}
/** Score due to one segment */
void ReorderingFeatureFunction::doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores)
{
if (gap.leftHypo) {
assign(&(gap.leftHypo->GetTranslationOption()), option, scores);
}
if (gap.rightHypo) {
assign(option,&(gap.rightHypo->GetTranslationOption()), scores);
}
}
/** Score due to two segments. The left and right refer to the target positions.**/
void ReorderingFeatureFunction::doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores)
{
if (gap.leftHypo) {
assign(&(gap.leftHypo->GetTranslationOption()), leftOption,scores);
}
assign(leftOption,rightOption,scores);
if (gap.rightHypo) {
assign(rightOption, &(gap.rightHypo->GetTranslationOption()), scores);
}
}
void ReorderingFeatureFunction::doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
{
if (leftGap.leftHypo) {
assign(&(leftGap.leftHypo->GetTranslationOption()),leftOption,scores);
}
assign(leftOption, &(leftGap.rightHypo->GetTranslationOption()), scores);
assign(&(rightGap.leftHypo->GetTranslationOption()),rightOption,scores);
if (rightGap.rightHypo) {
assign(rightOption, &(rightGap.rightHypo->GetTranslationOption()),scores);
}
}
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
void ReorderingFeatureFunction::doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
{
if (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos()) {
TargetGap gap(leftGap.leftHypo, rightGap.rightHypo, WordsRange(leftGap.segment.GetStartPos(),rightGap.segment.GetEndPos()));
doContiguousPairedUpdate(leftOption,rightOption,gap,scores);
} else {
doDiscontiguousPairedUpdate(leftOption,rightOption,leftGap,rightGap,scores);
}
}
}

View File

@ -21,78 +21,30 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <boost/unordered_set.hpp> #include <boost/unordered_set.hpp>
#include "DiscriminativeReorderingFeature.h"
#include "FeatureFunction.h" #include "FeatureFunction.h"
namespace Josiah { namespace Josiah {
typedef boost::unordered_set<std::string> vocab_t; typedef boost::shared_ptr<const Moses::DiscriminativeReorderingState> DRStateHandle;
/**
* Used to define different types of reordering features.
**/
class ReorderingFeatureTemplate {
public:
ReorderingFeatureTemplate(): m_vocab(NULL) {}
static std::string BOS;
virtual void assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption,
const std::string& prefix, FVector& scores) = 0;
void setVocab(vocab_t* vocab) {m_vocab = vocab;}
bool checkVocab(const std::string& word) const ;
virtual ~ReorderingFeatureTemplate() {}
private:
vocab_t* m_vocab;
};
class EdgeReorderingFeatureTemplate : public ReorderingFeatureTemplate {
public:
EdgeReorderingFeatureTemplate(size_t factor, bool source, bool curr) : m_factor(factor), m_source(source), m_curr(curr) {}
virtual void assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption,
const std::string& prefix, FVector& scores);
private:
size_t m_factor;
bool m_source; //source or target?
bool m_curr; //curr of prev?
};
/** /**
* Features related to the ordering between segments. * Wraps the Moses DiscriminativeReorderingFeature
**/ **/
class ReorderingFeature : public Feature { class ReorderingFeature : public Feature {
public: public:
/** ReorderingFeature(const std::vector<std::string>& featureConfig,
* The msd vector will indicate which types of msd features are to be included. Each element is made const std::vector<std::string>& vocabConfig) :
* up of four parts, separated by colons. The fields are: m_mosesFeature(featureConfig,vocabConfig) {}
* type: The type of feature (currently only edge is supported)
* factor_id: An integer representing the factor
* source/target: One of two possible values indicating whether the
* source or target words are used.
* prev/curr: Indicates whether the feature uses the previous or
current segment
*
* The msdVocab configuration items specify a vocabulary file for
* the source or target of a given factor. The format of these config
* strings is factor_id:source/target:filename
*
*/
ReorderingFeature(const std::vector<std::string>& msd,
const std::vector<std::string>& msdVocab);
virtual FeatureFunctionHandle getFunction(const Sample& sample) const; virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
const std::vector<ReorderingFeatureTemplate*>& getTemplates() const;
private: private:
std::vector<ReorderingFeatureTemplate*> m_templates; Moses::DiscriminativeReorderingFeature m_mosesFeature;
std::map<size_t,vocab_t> m_sourceVocabs;
std::map<size_t,vocab_t > m_targetVocabs;
void loadVocab(std::string filename, vocab_t* vocab);
}; };
@ -101,11 +53,14 @@ class ReorderingFeatureFunction : public FeatureFunction {
public: public:
ReorderingFeatureFunction(const Sample& sample, const ReorderingFeature& parent); ReorderingFeatureFunction(const Sample& sample,
const Moses::DiscriminativeReorderingFeature& mosesFeature);
/** Assign the total score of this feature on the current hypo */ /** Assign the total score of this feature on the current hypo */
virtual void assignScore(FVector& scores); virtual void assignScore(FVector& scores);
virtual void updateTarget();
/** Score due to one segment */ /** Score due to one segment */
virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores); virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores);
@ -121,16 +76,9 @@ class ReorderingFeatureFunction : public FeatureFunction {
private: private:
const ReorderingFeature& m_parent; const Moses::DiscriminativeReorderingFeature& m_mosesFeature;
Moses::ScoreComponentCollection m_accumulator;
std::vector<DRStateHandle> m_prevStates;
/** Assign features for the following tow options, assuming they are contiguous on the target side */
void assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption, FVector& scores);
/** Monotone, swapped or discontinuous? The segments are assumed to have contiguous translations on the target side. */
const std::string& getMsd(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption);
}; };
} }

View File

@ -69,6 +69,7 @@ static void MixWeights(size_t size, size_t rank) {
#endif #endif
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
int rank = 0, size = 1; int rank = 0, size = 1;
#ifdef MPI_ENABLED #ifdef MPI_ENABLED

View File

@ -0,0 +1,133 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "SingleStateFeature.h"
#include "Gibbler.h"
using namespace Moses;
using namespace std;
namespace Josiah {
FeatureFunctionHandle SingleStateFeature::getFunction
(const Sample& sample) const {
return FeatureFunctionHandle(
new SingleStateFeatureFunction(sample,m_mosesFeature));
}
void SingleStateFeatureFunction::assignScore(FVector& scores) {
//Use the score cached by updateTarget()
scores += m_accumulator.GetScoresVector();
}
void SingleStateFeatureFunction::updateTarget() {
//Update the prevStates map, and the cached scores
m_prevStates.clear();
m_accumulator.ZeroAll();
const Moses::Hypothesis* currHypo = getSample().GetTargetTail();
StateHandle prevState(
m_mosesFeature->EmptyHypothesisState(currHypo->GetInput()));
while ((currHypo = (currHypo->GetNextHypo()))) {
StateHandle currState(m_mosesFeature->Evaluate(
*currHypo, prevState.get(), &m_accumulator));
for (size_t i = 0; i < currHypo->GetCurrTargetWordsRange().
GetNumWordsCovered(); ++i) {
m_prevStates.push_back(prevState);
}
prevState = currState;
}
}
/** Score due to one segment */
void SingleStateFeatureFunction::doSingleUpdate(
const TranslationOption* option,
const TargetGap& gap, FVector& scores) {
ScoreComponentCollection accumulator;
//the previous state of the (new) hypo
StateHandle prevState = m_prevStates[gap.segment.GetStartPos()];
//Evaluate the score of inserting this hypo, and get the prev state
//for the next hypo.
prevState.reset(m_mosesFeature->Evaluate(
*option,prevState.get(),&accumulator));
//if there's a hypo on the right, then evaluate it
if (gap.rightHypo) {
prevState.reset(m_mosesFeature->Evaluate(
gap.rightHypo->GetTranslationOption(),prevState.get(),&accumulator));
}
scores += accumulator.GetScoresVector();
}
/** Score due to two segments.
The left and right refer to the target positions.**/
void SingleStateFeatureFunction::doContiguousPairedUpdate(
const TranslationOption* leftOption,
const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores) {
ScoreComponentCollection accumulator;
//The previous state of the (new) current hypo.
StateHandle prevState(m_prevStates[gap.segment.GetStartPos()]);
//Evaluate the hypos in the gap
prevState.reset(m_mosesFeature->Evaluate(
*leftOption,prevState.get(),&accumulator));
prevState.reset(m_mosesFeature->Evaluate(
*rightOption,prevState.get(),&accumulator));
//if there's a hypo on the right, evaluate it
if (gap.rightHypo) {
prevState.reset(m_mosesFeature->Evaluate(
gap.rightHypo->GetTranslationOption(),prevState.get(),&accumulator));
}
scores += accumulator.GetScoresVector();
}
void SingleStateFeatureFunction::doDiscontiguousPairedUpdate(
const TranslationOption* leftOption,
const TranslationOption* rightOption,
const TargetGap& leftGap,
const TargetGap& rightGap,
FVector& scores) {
doSingleUpdate(leftOption,leftGap,scores);
doSingleUpdate(rightOption,rightGap,scores);
}
/** Score due to flip.
Again, left and right refer to order on the <emph>target</emph> side. */
void SingleStateFeatureFunction::doFlipUpdate(
const TranslationOption* leftOption,
const TranslationOption* rightOption,
const TargetGap& leftGap,
const TargetGap& rightGap,
FVector& scores) {
if (leftGap.segment.GetEndPos() + 1 == rightGap.segment.GetStartPos()) {
TargetGap gap(leftGap.leftHypo,rightGap.rightHypo,
WordsRange(leftGap.segment.GetStartPos(),rightGap.segment.GetEndPos()));
doContiguousPairedUpdate(leftOption,rightOption,gap,scores);
} else {
doDiscontiguousPairedUpdate(leftOption,rightOption,leftGap,rightGap,scores);
}
}
}

View File

@ -0,0 +1,99 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <string>
#include <boost/shared_ptr.hpp>
#include "../moses/src/FFState.h"
#include "../moses/src/FeatureFunction.h"
#include "FeatureFunction.h"
namespace Josiah {
class Sample;
typedef boost::shared_ptr<const FFState> StateHandle;
/**
* Wraps a moses feature, whose state depends only on the previous phrase.
**/
class SingleStateFeature : public Feature {
public:
SingleStateFeature(const Moses::OptionStatefulFeatureFunction* mosesFeature) :
m_mosesFeature(mosesFeature) {}
virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
private:
const Moses::OptionStatefulFeatureFunction* m_mosesFeature;
};
class SingleStateFeatureFunction : public FeatureFunction {
public:
SingleStateFeatureFunction(
const Sample& sample,
const Moses::OptionStatefulFeatureFunction* mosesFeature):
FeatureFunction(sample),
m_mosesFeature(mosesFeature) {}
/** Assign the total score of this feature on the current hypo */
virtual void assignScore(FVector& scores);
virtual void updateTarget();
/** Score due to one segment */
virtual void doSingleUpdate(const Moses::TranslationOption* option,
const TargetGap& gap, FVector& scores);
/** Score due to two segments.
The left and right refer to the target positions.**/
virtual void doContiguousPairedUpdate(
const Moses::TranslationOption* leftOption,
const Moses::TranslationOption* rightOption,
const TargetGap& gap, FVector& scores);
virtual void doDiscontiguousPairedUpdate(
const Moses::TranslationOption* leftOption,
const Moses::TranslationOption* rightOption,
const TargetGap& leftGap,
const TargetGap& rightGap,
FVector& scores);
/** Score due to flip.
Again, left and right refer to order on the <emph>target</emph> side. */
virtual void doFlipUpdate(
const Moses::TranslationOption* leftOption,
const Moses::TranslationOption* rightOption,
const TargetGap& leftGap,
const TargetGap& rightGap,
FVector& scores);
private:
const OptionStatefulFeatureFunction* m_mosesFeature;
Moses::ScoreComponentCollection m_accumulator;
std::vector<StateHandle> m_prevStates;
};
}

View File

@ -1,9 +1,11 @@
#include <boost/program_options.hpp> #include <boost/program_options.hpp>
#include "Utils.h" #include "Utils.h"
#include "Pos.h" #include "Pos.h"
#include "Dependency.h" #include "Dependency.h"
#include "DiscriminativeLMFeature.h" #include "DiscriminativeLMFeature.h"
#include "DiscriminativeReorderingFeature.h"
#include "DistortionPenaltyFeature.h" #include "DistortionPenaltyFeature.h"
#include "LanguageModelFeature.h" #include "LanguageModelFeature.h"
#include "LexicalReorderingFeature.h" #include "LexicalReorderingFeature.h"
@ -17,6 +19,7 @@
#include "ReorderingFeature.h" #include "ReorderingFeature.h"
#include "SourceToTargetRatio.h" #include "SourceToTargetRatio.h"
#include "StatelessFeature.h" #include "StatelessFeature.h"
#include "SingleStateFeature.h"
#include "WordPenaltyFeature.h" #include "WordPenaltyFeature.h"
using namespace std; using namespace std;
@ -25,6 +28,8 @@ namespace po = boost::program_options;
namespace Josiah { namespace Josiah {
// template class SingleStateFeature
// <Moses::DiscriminativeReorderingFeature,DiscriminativeReorderingState>;
@ -47,7 +52,8 @@ namespace Josiah {
fv.push_back(FeatureHandle(new DistortionPenaltyFeature())); fv.push_back(FeatureHandle(new DistortionPenaltyFeature()));
const std::vector<LexicalReordering*>& reorderModels = system.GetReorderModels(); const std::vector<LexicalReordering*>& reorderModels = system.GetReorderModels();
for (size_t i = 0; i < reorderModels.size(); ++i) { for (size_t i = 0; i < reorderModels.size(); ++i) {
fv.push_back(FeatureHandle(new LexicalReorderingFeature(reorderModels[i],i))); fv.push_back(FeatureHandle(new SingleStateFeature(reorderModels[i])));
// fv.push_back(FeatureHandle(new LexicalReorderingFeature(reorderModels[i],i)));
} }
if (filename.empty()) return; if (filename.empty()) return;
@ -158,7 +164,9 @@ namespace Josiah {
fv.push_back(FeatureHandle(new DiscriminativeLMBigramFeature(discrimlmFactor,discrimlmVocab))); fv.push_back(FeatureHandle(new DiscriminativeLMBigramFeature(discrimlmFactor,discrimlmVocab)));
} }
if (msdConfig.size()) { if (msdConfig.size()) {
fv.push_back(FeatureHandle(new ReorderingFeature(msdConfig,msdVocab))); //TODO
fv.push_back(FeatureHandle(new SingleStateFeature
(new Moses::DiscriminativeReorderingFeature(msdConfig,msdVocab))));
} }
if (phrasePairSourceTarget) { if (phrasePairSourceTarget) {
fv.push_back(FeatureHandle( fv.push_back(FeatureHandle(

6343
ltmain.sh

File diff suppressed because it is too large Load Diff

View File

@ -65,51 +65,54 @@ TemplateStateHandle EdgeReorderingFeatureTemplate::EmptyState(const InputType&)
} }
TemplateStateHandle EdgeReorderingFeatureTemplate::Evaluate( TemplateStateHandle EdgeReorderingFeatureTemplate::Evaluate(
const Hypothesis& cur_hypo, const TranslationOption& currOption,
const TemplateStateHandle state, const TemplateStateHandle state,
const string& prefix, const string& prefix,
FVector& accumulator) const FVector& accumulator) const
{ {
static const string sourcePrev = "s:p:";
static const string sourceCurr = "s:c:";
static const string targetPrev = "t:p:";
static const string targetCurr = "t:c:";
const EdgeReorderingFeatureTemplateState* edgeState = const EdgeReorderingFeatureTemplateState* edgeState =
dynamic_cast<const EdgeReorderingFeatureTemplateState*>(state.get()); dynamic_cast<const EdgeReorderingFeatureTemplateState*>(state.get());
assert(edgeState); assert(edgeState);
const TranslationOption& currOption = cur_hypo.GetTranslationOption(); ostringstream namestr;
namestr << m_posString;
const Factor* edge = NULL; namestr << m_factor;
const string* position = NULL; namestr << ":";
if (m_source && m_curr) { vector<string> edges;
edge = currOption.GetSourcePhrase()->GetWord(0).GetFactor(m_factor); //TODO: do these three on ctor
position = &sourceCurr; if (m_pos == dreo_prev || m_pos == dreo_both) {
} else if (m_source && !m_curr) { const Factor* edge = edgeState->GetLastFactor();
edge = edgeState->GetLastFactor();
position = &sourcePrev;
} else if (!m_source && m_curr) {
edge = currOption.GetTargetPhrase().GetWord(0).GetFactor(m_factor);
position = &targetCurr;
} else {
edge = edgeState->GetLastFactor();
position = &targetPrev;
}
if (!edge || CheckVocab(edge->GetString())) {
ostringstream namestr;
namestr << *position;
namestr << m_factor;
namestr << ":";
if (edge) { if (edge) {
const string& word = edge->GetString(); const string& word = edge->GetString();
namestr << word; edges.push_back(word);
} else { } else {
namestr << BOS_; edges.push_back(BOS_);
}
}
if (m_pos == dreo_curr || m_pos == dreo_both) {
const Factor* edge = NULL;
if (m_source) {
edge = currOption.GetSourcePhrase()->GetWord(0).GetFactor(m_factor);
} else {
edge = currOption.GetTargetPhrase().GetWord(0).GetFactor(m_factor);
}
if (edge) {
const string& word = edge->GetString();
edges.push_back(word);
} else {
edges.push_back(BOS_);
}
}
if (CheckVocab(edges[0])) {
if (edges.size() == 1 || CheckVocab(edges[1])) {
namestr << edges[0];
if (edges.size() == 2) {
namestr << ":" << edges[1];
}
FName name(prefix,namestr.str());
++accumulator[name];
} }
FName name(prefix,namestr.str());
++accumulator[name];
} }
return TemplateStateHandle return TemplateStateHandle
(new EdgeReorderingFeatureTemplateState(&currOption,m_source,m_factor)); (new EdgeReorderingFeatureTemplateState(&currOption,m_source,m_factor));
@ -190,12 +193,13 @@ const string& DiscriminativeReorderingState::GetMsd(const WordsRange& currWordsR
DiscriminativeReorderingFeature::DiscriminativeReorderingFeature DiscriminativeReorderingFeature::DiscriminativeReorderingFeature
(const vector<string>& featureConfig, (const vector<string>& featureConfig,
const vector<string>& vocabConfig) : const vector<string>& vocabConfig) :
StatefulFeatureFunction("dreo") OptionStatefulFeatureFunction("dreo")
{ {
const static string SOURCE = "source"; const static string SOURCE = "source";
const static string TARGET = "target"; const static string TARGET = "target";
const static string PREV = "prev"; const static string PREV = "prev";
const static string CURR = "curr"; const static string CURR = "curr";
const static string BOTH = "both";
//load vocabularies //load vocabularies
for (vector<string>::const_iterator i = vocabConfig.begin(); i != vocabConfig.end(); for (vector<string>::const_iterator i = vocabConfig.begin(); i != vocabConfig.end();
@ -214,7 +218,7 @@ DiscriminativeReorderingFeature::DiscriminativeReorderingFeature
throw runtime_error("Discrim reordering vocab config has invalid source/target identifier"); throw runtime_error("Discrim reordering vocab config has invalid source/target identifier");
} }
string filename = vc[2]; string filename = vc[2];
vocab_t* vocab = NULL; Vocab_t* vocab = NULL;
if (source) { if (source) {
vocab = &(m_sourceVocabs[factorId]); vocab = &(m_sourceVocabs[factorId]);
} else { } else {
@ -238,14 +242,16 @@ DiscriminativeReorderingFeature::DiscriminativeReorderingFeature
} else if (fc[2] != SOURCE) { } else if (fc[2] != SOURCE) {
throw runtime_error("Discrim reordering feature config has invalid source/target identifier"); throw runtime_error("Discrim reordering feature config has invalid source/target identifier");
} }
bool curr = true; DreoPosition_t pos = dreo_curr;
if (fc[3] == PREV) { if (fc[3] == PREV) {
curr = false; pos = dreo_prev;
} else if (fc[3] == BOTH) {
pos = dreo_both;
} else if (fc[3] != CURR) { } else if (fc[3] != CURR) {
throw runtime_error("Discrim reordering config has invalid curr/prev identifier"); throw runtime_error("Discrim reordering config has invalid curr/prev identifier");
} }
if (fc[0] == "edge") { if (fc[0] == "edge") {
m_templates.push_back(new EdgeReorderingFeatureTemplate(factorId,source,curr)); m_templates.push_back(new EdgeReorderingFeatureTemplate(factorId,source,pos));
} else { } else {
ostringstream errmsg; ostringstream errmsg;
errmsg << "Unknown msd feature type '" << fc[0] << "'" << endl; errmsg << "Unknown msd feature type '" << fc[0] << "'" << endl;
@ -253,7 +259,7 @@ DiscriminativeReorderingFeature::DiscriminativeReorderingFeature
} }
//set vocabulary, if necessary //set vocabulary, if necessary
vocab_t* vocab = NULL; Vocab_t* vocab = NULL;
if (source) { if (source) {
if (m_sourceVocabs.find(factorId) != m_sourceVocabs.end()) { if (m_sourceVocabs.find(factorId) != m_sourceVocabs.end()) {
vocab = &(m_sourceVocabs[factorId]); vocab = &(m_sourceVocabs[factorId]);
@ -295,7 +301,8 @@ const FFState* DiscriminativeReorderingFeature::EmptyHypothesisState
return state; return state;
} }
FFState* DiscriminativeReorderingFeature::Evaluate(const Hypothesis& cur_hypo, FFState* DiscriminativeReorderingFeature::Evaluate(
const TranslationOption& option,
const FFState* prev_state, const FFState* prev_state,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const
{ {
@ -304,17 +311,17 @@ FFState* DiscriminativeReorderingFeature::Evaluate(const Hypothesis& cur_hypo,
assert(state); assert(state);
FVector scores; FVector scores;
DiscriminativeReorderingState* newState = new DiscriminativeReorderingState DiscriminativeReorderingState* newState = new DiscriminativeReorderingState
(&(cur_hypo.GetCurrSourceWordsRange())); (&(option.GetSourceWordsRange()));
string stem = state->GetMsd(cur_hypo.GetCurrSourceWordsRange()); string stem = state->GetMsd(option.GetSourceWordsRange());
for (size_t i = 0; i < m_templates.size(); ++i) { for (size_t i = 0; i < m_templates.size(); ++i) {
newState->AddTemplateState(m_templates[i]->Evaluate( newState->AddTemplateState(m_templates[i]->Evaluate(
cur_hypo, state->GetTemplateState(i), stem, scores)); option, state->GetTemplateState(i), stem, scores));
} }
accumulator->PlusEquals(scores); accumulator->PlusEquals(scores);
return newState; return newState;
} }
void DiscriminativeReorderingFeature::LoadVocab(string filename, vocab_t* vocab) void DiscriminativeReorderingFeature::LoadVocab(string filename, Vocab_t* vocab)
{ {
VERBOSE(1, "Loading vocabulary for reordering feature from " << filename << endl); VERBOSE(1, "Loading vocabulary for reordering feature from " << filename << endl);
vocab->clear(); vocab->clear();

View File

@ -39,7 +39,8 @@ namespace Moses {
class TranslationOption; class TranslationOption;
typedef boost::unordered_set<std::string> vocab_t; typedef boost::unordered_set<std::string> Vocab_t;
enum DreoPosition_t {dreo_curr, dreo_prev, dreo_both};
/* /*
* The reordering feature is made up of several subfeatures (Templates) * The reordering feature is made up of several subfeatures (Templates)
@ -58,19 +59,19 @@ class ReorderingFeatureTemplate {
public: public:
ReorderingFeatureTemplate(): m_vocab(NULL) {} ReorderingFeatureTemplate(): m_vocab(NULL) {}
virtual TemplateStateHandle Evaluate( virtual TemplateStateHandle Evaluate(
const Hypothesis& cur_hypo, const TranslationOption& cur_option,
const TemplateStateHandle state, const TemplateStateHandle state,
const std::string& stem, const std::string& stem,
FVector& accumulator) const = 0; FVector& accumulator) const = 0;
virtual TemplateStateHandle EmptyState(const InputType &input) const = 0; virtual TemplateStateHandle EmptyState(const InputType &input) const = 0;
void SetVocab(vocab_t* vocab) {m_vocab = vocab;} void SetVocab(Vocab_t* vocab) {m_vocab = vocab;}
protected: protected:
bool CheckVocab(const std::string& token) const; bool CheckVocab(const std::string& token) const;
virtual ~ReorderingFeatureTemplate() {} virtual ~ReorderingFeatureTemplate() {}
private: private:
vocab_t* m_vocab; Vocab_t* m_vocab;
}; };
class EdgeReorderingFeatureTemplateState: public ReorderingFeatureTemplateState { class EdgeReorderingFeatureTemplateState: public ReorderingFeatureTemplateState {
@ -92,11 +93,31 @@ class EdgeReorderingFeatureTemplateState: public ReorderingFeatureTemplateState
class EdgeReorderingFeatureTemplate : public ReorderingFeatureTemplate { class EdgeReorderingFeatureTemplate : public ReorderingFeatureTemplate {
public: public:
EdgeReorderingFeatureTemplate(size_t factor, bool source, bool curr) EdgeReorderingFeatureTemplate(size_t factor, bool source, DreoPosition_t pos)
: m_factor(factor), m_source(source), m_curr(curr) {} : m_factor(factor), m_source(source), m_pos(pos)
{
if (source) {
if (m_pos == dreo_curr) {
m_posString = "s:c";
} else if (m_pos == dreo_prev) {
m_posString = "s:p";
} else if (m_pos == dreo_both) {
m_posString = "s:b";
}
} else {
if (m_pos == dreo_curr) {
m_posString = "t:c";
} else if (m_pos == dreo_prev) {
m_posString = "t:p";
} else if (m_pos == dreo_both) {
m_posString = "t:b";
}
}
}
virtual TemplateStateHandle Evaluate( virtual TemplateStateHandle Evaluate(
const Hypothesis& cur_hypo, const TranslationOption& cur_option,
const TemplateStateHandle state, const TemplateStateHandle state,
const std::string& stem, const std::string& stem,
FVector& accumulator) const; FVector& accumulator) const;
@ -105,7 +126,8 @@ class EdgeReorderingFeatureTemplate : public ReorderingFeatureTemplate {
private: private:
size_t m_factor; size_t m_factor;
bool m_source; //source or target? bool m_source; //source or target?
bool m_curr; //curr of prev? DreoPosition_t m_pos;
std::string m_posString;
}; };
@ -114,7 +136,6 @@ class DiscriminativeReorderingState: public FFState {
DiscriminativeReorderingState(); DiscriminativeReorderingState();
DiscriminativeReorderingState(const WordsRange* prevWordsRange); DiscriminativeReorderingState(const WordsRange* prevWordsRange);
virtual int Compare(const FFState& other) const; virtual int Compare(const FFState& other) const;
//Implemented to here.
void AddTemplateState(TemplateStateHandle templateState); void AddTemplateState(TemplateStateHandle templateState);
TemplateStateHandle GetTemplateState(size_t index) const; TemplateStateHandle GetTemplateState(size_t index) const;
const std::string& GetMsd(const WordsRange& currWordsRange) const; const std::string& GetMsd(const WordsRange& currWordsRange) const;
@ -124,7 +145,7 @@ class DiscriminativeReorderingState: public FFState {
const WordsRange* m_prevWordsRange; const WordsRange* m_prevWordsRange;
}; };
class DiscriminativeReorderingFeature : public StatefulFeatureFunction { class DiscriminativeReorderingFeature : public OptionStatefulFeatureFunction {
public: public:
DiscriminativeReorderingFeature(const std::vector<std::string>& featureConfig, DiscriminativeReorderingFeature(const std::vector<std::string>& featureConfig,
const std::vector<std::string>& vocabConfig); const std::vector<std::string>& vocabConfig);
@ -135,16 +156,16 @@ class DiscriminativeReorderingFeature : public StatefulFeatureFunction {
virtual const FFState* EmptyHypothesisState(const InputType &input) const; virtual const FFState* EmptyHypothesisState(const InputType &input) const;
virtual FFState* Evaluate(const Hypothesis& cur_hypo, virtual FFState* Evaluate(const TranslationOption& cur_option,
const FFState* prev_state, const FFState* prev_state,
ScoreComponentCollection* accumulator) const; ScoreComponentCollection* accumulator) const;
private: private:
std::vector<ReorderingFeatureTemplate*> m_templates; std::vector<ReorderingFeatureTemplate*> m_templates;
std::map<FactorType,vocab_t> m_sourceVocabs; std::map<FactorType,Vocab_t> m_sourceVocabs;
std::map<FactorType,vocab_t> m_targetVocabs; std::map<FactorType,Vocab_t> m_targetVocabs;
void LoadVocab(std::string filename, vocab_t* vocab); void LoadVocab(std::string filename, Vocab_t* vocab);
}; };

View File

@ -1,4 +1,5 @@
#include "FeatureFunction.h" #include "FeatureFunction.h"
#include "Hypothesis.h"
#include <cassert> #include <cassert>
@ -18,6 +19,15 @@ void StatelessFeatureFunction::Evaluate(
bool StatefulFeatureFunction::IsStateless() const { return false; } bool StatefulFeatureFunction::IsStateless() const { return false; }
FFState* OptionStatefulFeatureFunction::Evaluate(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const {
return Evaluate(
cur_hypo.GetTranslationOption(),
prev_state,
accumulator);
}
} }

View File

@ -12,6 +12,7 @@ class Hypothesis;
class FFState; class FFState;
class InputType; class InputType;
class ScoreComponentCollection; class ScoreComponentCollection;
class TranslationOption;
class FeatureFunction: public ScoreProducer { class FeatureFunction: public ScoreProducer {
@ -89,6 +90,29 @@ struct FeatureNameCounter {
template <typename T> size_t FeatureNameCounter<T>::s_created(0); template <typename T> size_t FeatureNameCounter<T>::s_created(0);
/**
* Stateful feature function that just requires a TranslationOption, rather
* than a Hypothesis. This is the way all feature functions should be, but
* for historical reasons the LM uses the Hypothesis.
**/
class OptionStatefulFeatureFunction : public StatefulFeatureFunction {
public:
OptionStatefulFeatureFunction(const std::string& description) :
StatefulFeatureFunction(description) {}
virtual FFState* Evaluate(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
virtual FFState* Evaluate(
const TranslationOption& cur_option,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const = 0;
};
} }
#endif #endif

View File

@ -13,7 +13,7 @@ LexicalReordering::LexicalReordering(std::vector<FactorType>& f_factors,
const std::string &modelType, const std::string &modelType,
const std::string &filePath, const std::string &filePath,
const std::vector<float>& weights) const std::vector<float>& weights)
: StatefulFeatureFunction("LexicalReordering_" + modelType), : OptionStatefulFeatureFunction("LexicalReordering_" + modelType),
m_configuration(this, modelType) { m_configuration(this, modelType) {
std::cerr << "Creating lexical reordering...\n"; std::cerr << "Creating lexical reordering...\n";
std::cerr << "weights: "; std::cerr << "weights: ";
@ -71,12 +71,12 @@ Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const {
return m_table->GetScore(f, e, Phrase(Output)); return m_table->GetScore(f, e, Phrase(Output));
} }
FFState* LexicalReordering::Evaluate(const Hypothesis& hypo, FFState* LexicalReordering::Evaluate(const TranslationOption& option,
const FFState* prev_state, const FFState* prev_state,
ScoreComponentCollection* out) const { ScoreComponentCollection* out) const {
Scores score(GetNumScoreComponents(), 0); Scores score(GetNumScoreComponents(), 0);
const LexicalReorderingState *prev = dynamic_cast<const LexicalReorderingState *>(prev_state); const LexicalReorderingState *prev = dynamic_cast<const LexicalReorderingState *>(prev_state);
LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), score); LexicalReorderingState *next_state = prev->Expand(option, score);
out->PlusEquals(this, score); out->PlusEquals(this, score);

View File

@ -19,10 +19,10 @@ namespace Moses
class Factor; class Factor;
class Phrase; class Phrase;
class Hypothesis; class TranslatuionOption;
class InputType; class InputType;
class LexicalReordering : public StatefulFeatureFunction { class LexicalReordering : public OptionStatefulFeatureFunction {
public: public:
LexicalReordering(std::vector<FactorType>& f_factors, LexicalReordering(std::vector<FactorType>& f_factors,
std::vector<FactorType>& e_factors, std::vector<FactorType>& e_factors,
@ -35,7 +35,7 @@ public:
return m_configuration.GetNumScoreComponents(); return m_configuration.GetNumScoreComponents();
} }
virtual FFState* Evaluate(const Hypothesis& cur_hypo, virtual FFState* Evaluate(const TranslationOption& cur_option,
const FFState* prev_state, const FFState* prev_state,
ScoreComponentCollection* accumulator) const; ScoreComponentCollection* accumulator) const;

View File

@ -85,7 +85,7 @@ public:
} }
} }
FVector GetScoresVector() const FVector& GetScoresVector()
{ {
return m_scores; return m_scores;
} }

View File

@ -40,6 +40,7 @@ then
fi fi
fi fi
touch ltmain.sh
echo "Calling $ACLOCAL..." echo "Calling $ACLOCAL..."
$ACLOCAL -I m4 || die "aclocal failed" $ACLOCAL -I m4 || die "aclocal failed"