Implementation of discriminative reordering feature

Testing git

Add 'both' option for d-reo features, in addition to prev and curr

Start to implement gibbler discrim reordering feature in terms of Moses
one. Pause while I try for a generic single phrase stateful.

Creating new feature to wrap some Moses features

Minor progress in impl of single state feature.
Comment change.

Create a new subclass of StatefulFeatureFunction, which does things
the right way, ie by taking a TranslationOption instead of a Hypo.

Implementation of SingleStateFeature

This can be used to wrap any Moses feature function whose state just depends
on the previous hypothesis, in particular it is used for the reordering
features.

Ignore ltmain.sh

Don't track this file

touch ltmain.sh

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/samplerank@4120 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
bhaddow 2011-08-05 13:42:22 +00:00
parent 465442340a
commit 680f852499
18 changed files with 414 additions and 6729 deletions

7
.gitignore vendored
View File

@ -1,5 +1,11 @@
*.lo
*.o
*~
.libs
.deps
*.la
*.a
*.swp
Makefile
Makefile.in
aclocal.m4
@ -8,6 +14,7 @@ config.h
config.log
config.status
configure
ltmain.sh
misc/.deps
misc/Makefile
misc/Makefile.in

View File

@ -83,7 +83,7 @@ void LexicalReorderingFeatureFunction::updateTarget() {
const Hypothesis * currHypo = getSample().GetTargetTail();
LRStateHandle prevState(dynamic_cast<const LexicalReorderingState*>(m_mosesLexReorder->EmptyHypothesisState(currHypo->GetInput())));
while ((currHypo = (currHypo->GetNextHypo()))) {
LRStateHandle currState(dynamic_cast<const LexicalReorderingState*>(m_mosesLexReorder->Evaluate(*currHypo,prevState.get(),&m_accumulator)));
LRStateHandle currState(dynamic_cast<const LexicalReorderingState*>(m_mosesLexReorder->Evaluate(currHypo->GetTranslationOption(),prevState.get(),&m_accumulator)));
for (size_t i = 0; i < currHypo->GetCurrTargetWordsRange().GetNumWordsCovered(); ++i) {
m_prevStates.push_back(prevState);
}

View File

@ -31,6 +31,7 @@ libjosiah_a_SOURCES = \
SampleCollector.cpp \
Sampler.cpp \
Selector.cpp \
SingleStateFeature.cpp \
SourceToTargetRatio.cpp \
StatelessFeature.cpp \
TrainingSource.cpp \

View File

@ -33,260 +33,28 @@ using namespace std;
using boost::lexical_cast;
namespace Josiah {
string ReorderingFeatureTemplate::BOS = "<s>";
ReorderingFeature::ReorderingFeature(const vector<string>& msd,
const std::vector<std::string>& msdVocab)
FeatureFunctionHandle ReorderingFeature::getFunction(const Sample& sample) const
{
const static string SOURCE = "source";
const static string TARGET = "target";
const static string PREV = "prev";
const static string CURR = "curr";
for (vector<string>::const_iterator i = msdVocab.begin(); i != msdVocab.end();
++i) {
vector<string> msdVocabConfig = Tokenize(*i,":");
if (msdVocabConfig.size() != 3) {
ostringstream errmsg;
errmsg << "msdvocab configuration '" << *i << "' has incorrect format";
throw runtime_error(errmsg.str());
}
size_t factorId = lexical_cast<size_t>(msdVocabConfig[0]);
bool source = true;
if (msdVocabConfig[1] == TARGET) {
source = false;
} else if (msdVocabConfig[1] != SOURCE) {
throw runtime_error("msd vocab config has invalid source/target identifier");
}
string filename = msdVocabConfig[2];
vocab_t* vocab = NULL;
if (source) {
vocab = &(m_sourceVocabs[factorId]);
} else {
vocab = &(m_targetVocabs[factorId]);
}
loadVocab(filename,vocab);
return FeatureFunctionHandle(
new ReorderingFeatureFunction(sample,m_mosesFeature));
}
for (vector<string>::const_iterator i = msd.begin(); i != msd.end(); ++i) {
vector<string> msdConfig = Tokenize(*i,":");
if (msdConfig.size() != 4) {
ostringstream errmsg;
errmsg << "msd configuration '" << *i << "' has incorrect format";
throw runtime_error(errmsg.str());
}
size_t factorId = lexical_cast<size_t>(msdConfig[1]);
bool source = true;
if (msdConfig[2] == TARGET) {
source = false;
} else if (msdConfig[2] != SOURCE) {
throw runtime_error("msd config has invalid source/target identifier");
}
bool curr = true;
if (msdConfig[3] == PREV) {
curr = false;
} else if (msdConfig[3] != CURR) {
throw runtime_error("msd config has invalid curr/prev identifier");
}
if (msdConfig[0] == "edge") {
m_templates.push_back(new EdgeReorderingFeatureTemplate(factorId,source,curr));
} else {
ostringstream errmsg;
errmsg << "Unknown msd feature type '" << msdConfig[0] << "'" << endl;
throw runtime_error(errmsg.str());
}
ReorderingFeatureFunction::ReorderingFeatureFunction(
const Sample& sample,
const DiscriminativeReorderingFeature& mosesFeature) :
FeatureFunction(sample),
m_mosesFeature(mosesFeature) {}
//set vocabulary, if necessary
vocab_t* vocab = NULL;
if (source) {
if (m_sourceVocabs.find(factorId) != m_sourceVocabs.end()) {
vocab = &(m_sourceVocabs[factorId]);
}
} else {
if (m_targetVocabs.find(factorId) != m_targetVocabs.end()) {
vocab = &(m_targetVocabs[factorId]);
}
}
m_templates.back()->setVocab(vocab);
void ReorderingFeatureFunction::assignScore(FVector& scores) {
//Use the score cached by updateTarget()
scores += m_accumulator.GetScoresVector();
}
void ReorderingFeatureFunction::updateTarget() {
//update all the previous states, and the accumulator
m_prevStates.clear();
m_accumulator.ZeroAll();
const Hypothesis* currHypo = getSample().GetTargetTail();
}
}
FeatureFunctionHandle ReorderingFeature::getFunction(const Sample& sample) const {
return FeatureFunctionHandle(new ReorderingFeatureFunction(sample, *this));
}
const std::vector<ReorderingFeatureTemplate*>& ReorderingFeature::getTemplates() const {
return m_templates;
}
void ReorderingFeature::loadVocab(string filename, vocab_t* vocab) {
VERBOSE(1, "Loading vocabulary for reordering feature from " << filename << endl);
vocab->clear();
ifstream in(filename.c_str());
if (!in) {
ostringstream errmsg;
errmsg << "Unable to load vocabulary from " << filename;
throw runtime_error(errmsg.str());
}
string line;
while (getline(in,line)) {
vocab->insert(line);
}
}
bool ReorderingFeatureTemplate::checkVocab(const std::string& word) const {
if (!m_vocab) return true;
return m_vocab->find(word) != m_vocab->end();
}
ReorderingFeatureFunction::ReorderingFeatureFunction(const Sample& sample, const ReorderingFeature& parent)
: FeatureFunction(sample), m_parent(parent)
{}
/** Assign features for the following options, assuming they are contiguous on the target side */
void ReorderingFeatureFunction::assign(const TranslationOption* prevOption, const TranslationOption* currOption, FVector& scores) {
for (vector<ReorderingFeatureTemplate*>::const_iterator i = m_parent.getTemplates().begin();
i != m_parent.getTemplates().end(); ++i) {
(*i)->assign(prevOption,currOption,getMsd(prevOption, currOption), scores);
}
}
const string& ReorderingFeatureFunction::getMsd(const TranslationOption* prevOption, const TranslationOption* currOption) {
int prevStart = -1;
int prevEnd = -1;
if (prevOption) {
prevStart = prevOption->GetSourceWordsRange().GetStartPos();
prevEnd = prevOption->GetSourceWordsRange().GetEndPos();
}
int currStart = currOption->GetSourceWordsRange().GetStartPos();
int currEnd = currOption->GetSourceWordsRange().GetEndPos();
static string monotone = "msd:m";
static string swap = "msd:s";
static string discontinuous = "msd:d";
if (prevEnd + 1 == currStart) {
return monotone;
} else if (currEnd + 1 == prevStart) {
return swap;
} else {
return discontinuous;
}
}
void EdgeReorderingFeatureTemplate::assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption,
const std::string& prefix, FVector& scores)
{
static const string sourcePrev = "s:p:";
static const string sourceCurr = "s:c:";
static const string targetPrev = "t:p:";
static const string targetCurr = "t:c:";
const Word* edge = NULL;
const string* position = NULL;
if (m_source && m_curr) {
edge = &(currOption->GetSourcePhrase()->GetWord(0));
position = &sourceCurr;
} else if (m_source && !m_curr) {
if (prevOption) {
const Phrase* sourcePhrase = prevOption->GetSourcePhrase();
edge = &(sourcePhrase->GetWord(sourcePhrase->GetSize()-1));
}
position = &sourcePrev;
} else if (!m_source && m_curr) {
edge = &(currOption->GetTargetPhrase().GetWord(0));
position = &targetCurr;
} else {
if (prevOption) {
const Phrase& targetPhrase = prevOption->GetTargetPhrase();
edge = &(targetPhrase.GetWord(targetPhrase.GetSize()-1));
}
position = &targetPrev;
}
ostringstream namestr;
namestr << *position;
namestr << m_factor;
namestr << ":";
if (edge) {
const string& word = edge->GetFactor(m_factor)->GetString();
if (!checkVocab(word)) return;
namestr << word;
} else {
namestr << BOS;
}
FName name(prefix,namestr.str());
++scores[name];
}
/** Assign the total score of this feature on the current hypo */
void ReorderingFeatureFunction::assignScore(FVector& scores)
{
const Hypothesis* currHypo = getSample().GetTargetTail();
const TranslationOption* prevOption = NULL;
while ((currHypo = (currHypo->GetNextHypo()))) {
const TranslationOption* currOption = &(currHypo->GetTranslationOption());
assign(prevOption,currOption,scores);
prevOption = currOption;
}
}
/** Score due to one segment */
void ReorderingFeatureFunction::doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores)
{
if (gap.leftHypo) {
assign(&(gap.leftHypo->GetTranslationOption()), option, scores);
}
if (gap.rightHypo) {
assign(option,&(gap.rightHypo->GetTranslationOption()), scores);
}
}
/** Score due to two segments. The left and right refer to the target positions.**/
void ReorderingFeatureFunction::doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores)
{
if (gap.leftHypo) {
assign(&(gap.leftHypo->GetTranslationOption()), leftOption,scores);
}
assign(leftOption,rightOption,scores);
if (gap.rightHypo) {
assign(rightOption, &(gap.rightHypo->GetTranslationOption()), scores);
}
}
void ReorderingFeatureFunction::doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
{
if (leftGap.leftHypo) {
assign(&(leftGap.leftHypo->GetTranslationOption()),leftOption,scores);
}
assign(leftOption, &(leftGap.rightHypo->GetTranslationOption()), scores);
assign(&(rightGap.leftHypo->GetTranslationOption()),rightOption,scores);
if (rightGap.rightHypo) {
assign(rightOption, &(rightGap.rightHypo->GetTranslationOption()),scores);
}
}
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
void ReorderingFeatureFunction::doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
{
if (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos()) {
TargetGap gap(leftGap.leftHypo, rightGap.rightHypo, WordsRange(leftGap.segment.GetStartPos(),rightGap.segment.GetEndPos()));
doContiguousPairedUpdate(leftOption,rightOption,gap,scores);
} else {
doDiscontiguousPairedUpdate(leftOption,rightOption,leftGap,rightGap,scores);
}
}
}

View File

@ -21,78 +21,30 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <boost/unordered_set.hpp>
#include "DiscriminativeReorderingFeature.h"
#include "FeatureFunction.h"
namespace Josiah {
typedef boost::unordered_set<std::string> vocab_t;
typedef boost::shared_ptr<const Moses::DiscriminativeReorderingState> DRStateHandle;
/**
* Used to define different types of reordering features.
**/
class ReorderingFeatureTemplate {
public:
ReorderingFeatureTemplate(): m_vocab(NULL) {}
static std::string BOS;
virtual void assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption,
const std::string& prefix, FVector& scores) = 0;
void setVocab(vocab_t* vocab) {m_vocab = vocab;}
bool checkVocab(const std::string& word) const ;
virtual ~ReorderingFeatureTemplate() {}
private:
vocab_t* m_vocab;
};
class EdgeReorderingFeatureTemplate : public ReorderingFeatureTemplate {
public:
EdgeReorderingFeatureTemplate(size_t factor, bool source, bool curr) : m_factor(factor), m_source(source), m_curr(curr) {}
virtual void assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption,
const std::string& prefix, FVector& scores);
private:
size_t m_factor;
bool m_source; //source or target?
bool m_curr; //curr of prev?
};
/**
* Features related to the ordering between segments.
* Wraps the Moses DiscriminativeReorderingFeature
**/
class ReorderingFeature : public Feature {
public:
/**
* The msd vector will indicate which types of msd features are to be included. Each element is made
* up of four parts, separated by colons. The fields are:
* type: The type of feature (currently only edge is supported)
* factor_id: An integer representing the factor
* source/target: One of two possible values indicating whether the
* source or target words are used.
* prev/curr: Indicates whether the feature uses the previous or
current segment
*
* The msdVocab configuration items specify a vocabulary file for
* the source or target of a given factor. The format of these config
* strings is factor_id:source/target:filename
*
*/
ReorderingFeature(const std::vector<std::string>& msd,
const std::vector<std::string>& msdVocab);
ReorderingFeature(const std::vector<std::string>& featureConfig,
const std::vector<std::string>& vocabConfig) :
m_mosesFeature(featureConfig,vocabConfig) {}
virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
const std::vector<ReorderingFeatureTemplate*>& getTemplates() const;
private:
std::vector<ReorderingFeatureTemplate*> m_templates;
std::map<size_t,vocab_t> m_sourceVocabs;
std::map<size_t,vocab_t > m_targetVocabs;
void loadVocab(std::string filename, vocab_t* vocab);
Moses::DiscriminativeReorderingFeature m_mosesFeature;
};
@ -101,11 +53,14 @@ class ReorderingFeatureFunction : public FeatureFunction {
public:
ReorderingFeatureFunction(const Sample& sample, const ReorderingFeature& parent);
ReorderingFeatureFunction(const Sample& sample,
const Moses::DiscriminativeReorderingFeature& mosesFeature);
/** Assign the total score of this feature on the current hypo */
virtual void assignScore(FVector& scores);
virtual void updateTarget();
/** Score due to one segment */
virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores);
@ -121,16 +76,9 @@ class ReorderingFeatureFunction : public FeatureFunction {
private:
const ReorderingFeature& m_parent;
/** Assign features for the following tow options, assuming they are contiguous on the target side */
void assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption, FVector& scores);
/** Monotone, swapped or discontinuous? The segments are assumed to have contiguous translations on the target side. */
const std::string& getMsd(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption);
const Moses::DiscriminativeReorderingFeature& m_mosesFeature;
Moses::ScoreComponentCollection m_accumulator;
std::vector<DRStateHandle> m_prevStates;
};
}

View File

@ -69,6 +69,7 @@ static void MixWeights(size_t size, size_t rank) {
#endif
}
int main(int argc, char** argv) {
int rank = 0, size = 1;
#ifdef MPI_ENABLED

View File

@ -0,0 +1,133 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "SingleStateFeature.h"
#include "Gibbler.h"
using namespace Moses;
using namespace std;
namespace Josiah {
FeatureFunctionHandle SingleStateFeature::getFunction
(const Sample& sample) const {
return FeatureFunctionHandle(
new SingleStateFeatureFunction(sample,m_mosesFeature));
}
void SingleStateFeatureFunction::assignScore(FVector& scores) {
//Use the score cached by updateTarget()
scores += m_accumulator.GetScoresVector();
}
void SingleStateFeatureFunction::updateTarget() {
//Update the prevStates map, and the cached scores
m_prevStates.clear();
m_accumulator.ZeroAll();
const Moses::Hypothesis* currHypo = getSample().GetTargetTail();
StateHandle prevState(
m_mosesFeature->EmptyHypothesisState(currHypo->GetInput()));
while ((currHypo = (currHypo->GetNextHypo()))) {
StateHandle currState(m_mosesFeature->Evaluate(
*currHypo, prevState.get(), &m_accumulator));
for (size_t i = 0; i < currHypo->GetCurrTargetWordsRange().
GetNumWordsCovered(); ++i) {
m_prevStates.push_back(prevState);
}
prevState = currState;
}
}
/** Score due to one segment */
void SingleStateFeatureFunction::doSingleUpdate(
const TranslationOption* option,
const TargetGap& gap, FVector& scores) {
ScoreComponentCollection accumulator;
//the previous state of the (new) hypo
StateHandle prevState = m_prevStates[gap.segment.GetStartPos()];
//Evaluate the score of inserting this hypo, and get the prev state
//for the next hypo.
prevState.reset(m_mosesFeature->Evaluate(
*option,prevState.get(),&accumulator));
//if there's a hypo on the right, then evaluate it
if (gap.rightHypo) {
prevState.reset(m_mosesFeature->Evaluate(
gap.rightHypo->GetTranslationOption(),prevState.get(),&accumulator));
}
scores += accumulator.GetScoresVector();
}
/** Score due to two segments.
The left and right refer to the target positions.**/
void SingleStateFeatureFunction::doContiguousPairedUpdate(
const TranslationOption* leftOption,
const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores) {
ScoreComponentCollection accumulator;
//The previous state of the (new) current hypo.
StateHandle prevState(m_prevStates[gap.segment.GetStartPos()]);
//Evaluate the hypos in the gap
prevState.reset(m_mosesFeature->Evaluate(
*leftOption,prevState.get(),&accumulator));
prevState.reset(m_mosesFeature->Evaluate(
*rightOption,prevState.get(),&accumulator));
//if there's a hypo on the right, evaluate it
if (gap.rightHypo) {
prevState.reset(m_mosesFeature->Evaluate(
gap.rightHypo->GetTranslationOption(),prevState.get(),&accumulator));
}
scores += accumulator.GetScoresVector();
}
void SingleStateFeatureFunction::doDiscontiguousPairedUpdate(
const TranslationOption* leftOption,
const TranslationOption* rightOption,
const TargetGap& leftGap,
const TargetGap& rightGap,
FVector& scores) {
doSingleUpdate(leftOption,leftGap,scores);
doSingleUpdate(rightOption,rightGap,scores);
}
/** Score due to flip.
Again, left and right refer to order on the <emph>target</emph> side. */
void SingleStateFeatureFunction::doFlipUpdate(
const TranslationOption* leftOption,
const TranslationOption* rightOption,
const TargetGap& leftGap,
const TargetGap& rightGap,
FVector& scores) {
if (leftGap.segment.GetEndPos() + 1 == rightGap.segment.GetStartPos()) {
TargetGap gap(leftGap.leftHypo,rightGap.rightHypo,
WordsRange(leftGap.segment.GetStartPos(),rightGap.segment.GetEndPos()));
doContiguousPairedUpdate(leftOption,rightOption,gap,scores);
} else {
doDiscontiguousPairedUpdate(leftOption,rightOption,leftGap,rightGap,scores);
}
}
}

View File

@ -0,0 +1,99 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <string>
#include <boost/shared_ptr.hpp>
#include "../moses/src/FFState.h"
#include "../moses/src/FeatureFunction.h"
#include "FeatureFunction.h"
namespace Josiah {
class Sample;
typedef boost::shared_ptr<const FFState> StateHandle;
/**
* Wraps a moses feature, whose state depends only on the previous phrase.
**/
class SingleStateFeature : public Feature {
public:
SingleStateFeature(const Moses::OptionStatefulFeatureFunction* mosesFeature) :
m_mosesFeature(mosesFeature) {}
virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
private:
const Moses::OptionStatefulFeatureFunction* m_mosesFeature;
};
class SingleStateFeatureFunction : public FeatureFunction {
public:
SingleStateFeatureFunction(
const Sample& sample,
const Moses::OptionStatefulFeatureFunction* mosesFeature):
FeatureFunction(sample),
m_mosesFeature(mosesFeature) {}
/** Assign the total score of this feature on the current hypo */
virtual void assignScore(FVector& scores);
virtual void updateTarget();
/** Score due to one segment */
virtual void doSingleUpdate(const Moses::TranslationOption* option,
const TargetGap& gap, FVector& scores);
/** Score due to two segments.
The left and right refer to the target positions.**/
virtual void doContiguousPairedUpdate(
const Moses::TranslationOption* leftOption,
const Moses::TranslationOption* rightOption,
const TargetGap& gap, FVector& scores);
virtual void doDiscontiguousPairedUpdate(
const Moses::TranslationOption* leftOption,
const Moses::TranslationOption* rightOption,
const TargetGap& leftGap,
const TargetGap& rightGap,
FVector& scores);
/** Score due to flip.
Again, left and right refer to order on the <emph>target</emph> side. */
virtual void doFlipUpdate(
const Moses::TranslationOption* leftOption,
const Moses::TranslationOption* rightOption,
const TargetGap& leftGap,
const TargetGap& rightGap,
FVector& scores);
private:
const OptionStatefulFeatureFunction* m_mosesFeature;
Moses::ScoreComponentCollection m_accumulator;
std::vector<StateHandle> m_prevStates;
};
}

View File

@ -1,9 +1,11 @@
#include <boost/program_options.hpp>
#include "Utils.h"
#include "Pos.h"
#include "Dependency.h"
#include "DiscriminativeLMFeature.h"
#include "DiscriminativeReorderingFeature.h"
#include "DistortionPenaltyFeature.h"
#include "LanguageModelFeature.h"
#include "LexicalReorderingFeature.h"
@ -17,6 +19,7 @@
#include "ReorderingFeature.h"
#include "SourceToTargetRatio.h"
#include "StatelessFeature.h"
#include "SingleStateFeature.h"
#include "WordPenaltyFeature.h"
using namespace std;
@ -25,6 +28,8 @@ namespace po = boost::program_options;
namespace Josiah {
// template class SingleStateFeature
// <Moses::DiscriminativeReorderingFeature,DiscriminativeReorderingState>;
@ -47,7 +52,8 @@ namespace Josiah {
fv.push_back(FeatureHandle(new DistortionPenaltyFeature()));
const std::vector<LexicalReordering*>& reorderModels = system.GetReorderModels();
for (size_t i = 0; i < reorderModels.size(); ++i) {
fv.push_back(FeatureHandle(new LexicalReorderingFeature(reorderModels[i],i)));
fv.push_back(FeatureHandle(new SingleStateFeature(reorderModels[i])));
// fv.push_back(FeatureHandle(new LexicalReorderingFeature(reorderModels[i],i)));
}
if (filename.empty()) return;
@ -158,7 +164,9 @@ namespace Josiah {
fv.push_back(FeatureHandle(new DiscriminativeLMBigramFeature(discrimlmFactor,discrimlmVocab)));
}
if (msdConfig.size()) {
fv.push_back(FeatureHandle(new ReorderingFeature(msdConfig,msdVocab)));
//TODO
fv.push_back(FeatureHandle(new SingleStateFeature
(new Moses::DiscriminativeReorderingFeature(msdConfig,msdVocab))));
}
if (phrasePairSourceTarget) {
fv.push_back(FeatureHandle(

6343
ltmain.sh

File diff suppressed because it is too large Load Diff

View File

@ -65,51 +65,54 @@ TemplateStateHandle EdgeReorderingFeatureTemplate::EmptyState(const InputType&)
}
TemplateStateHandle EdgeReorderingFeatureTemplate::Evaluate(
const Hypothesis& cur_hypo,
const TranslationOption& currOption,
const TemplateStateHandle state,
const string& prefix,
FVector& accumulator) const
{
static const string sourcePrev = "s:p:";
static const string sourceCurr = "s:c:";
static const string targetPrev = "t:p:";
static const string targetCurr = "t:c:";
const EdgeReorderingFeatureTemplateState* edgeState =
dynamic_cast<const EdgeReorderingFeatureTemplateState*>(state.get());
assert(edgeState);
const TranslationOption& currOption = cur_hypo.GetTranslationOption();
const Factor* edge = NULL;
const string* position = NULL;
if (m_source && m_curr) {
edge = currOption.GetSourcePhrase()->GetWord(0).GetFactor(m_factor);
position = &sourceCurr;
} else if (m_source && !m_curr) {
edge = edgeState->GetLastFactor();
position = &sourcePrev;
} else if (!m_source && m_curr) {
edge = currOption.GetTargetPhrase().GetWord(0).GetFactor(m_factor);
position = &targetCurr;
} else {
edge = edgeState->GetLastFactor();
position = &targetPrev;
}
if (!edge || CheckVocab(edge->GetString())) {
ostringstream namestr;
namestr << *position;
namestr << m_factor;
namestr << ":";
ostringstream namestr;
namestr << m_posString;
namestr << m_factor;
namestr << ":";
vector<string> edges;
//TODO: do these three on ctor
if (m_pos == dreo_prev || m_pos == dreo_both) {
const Factor* edge = edgeState->GetLastFactor();
if (edge) {
const string& word = edge->GetString();
namestr << word;
edges.push_back(word);
} else {
namestr << BOS_;
edges.push_back(BOS_);
}
}
if (m_pos == dreo_curr || m_pos == dreo_both) {
const Factor* edge = NULL;
if (m_source) {
edge = currOption.GetSourcePhrase()->GetWord(0).GetFactor(m_factor);
} else {
edge = currOption.GetTargetPhrase().GetWord(0).GetFactor(m_factor);
}
if (edge) {
const string& word = edge->GetString();
edges.push_back(word);
} else {
edges.push_back(BOS_);
}
}
if (CheckVocab(edges[0])) {
if (edges.size() == 1 || CheckVocab(edges[1])) {
namestr << edges[0];
if (edges.size() == 2) {
namestr << ":" << edges[1];
}
FName name(prefix,namestr.str());
++accumulator[name];
}
FName name(prefix,namestr.str());
++accumulator[name];
}
return TemplateStateHandle
(new EdgeReorderingFeatureTemplateState(&currOption,m_source,m_factor));
@ -190,12 +193,13 @@ const string& DiscriminativeReorderingState::GetMsd(const WordsRange& currWordsR
DiscriminativeReorderingFeature::DiscriminativeReorderingFeature
(const vector<string>& featureConfig,
const vector<string>& vocabConfig) :
StatefulFeatureFunction("dreo")
OptionStatefulFeatureFunction("dreo")
{
const static string SOURCE = "source";
const static string TARGET = "target";
const static string PREV = "prev";
const static string CURR = "curr";
const static string BOTH = "both";
//load vocabularies
for (vector<string>::const_iterator i = vocabConfig.begin(); i != vocabConfig.end();
@ -214,7 +218,7 @@ DiscriminativeReorderingFeature::DiscriminativeReorderingFeature
throw runtime_error("Discrim reordering vocab config has invalid source/target identifier");
}
string filename = vc[2];
vocab_t* vocab = NULL;
Vocab_t* vocab = NULL;
if (source) {
vocab = &(m_sourceVocabs[factorId]);
} else {
@ -238,14 +242,16 @@ DiscriminativeReorderingFeature::DiscriminativeReorderingFeature
} else if (fc[2] != SOURCE) {
throw runtime_error("Discrim reordering feature config has invalid source/target identifier");
}
bool curr = true;
DreoPosition_t pos = dreo_curr;
if (fc[3] == PREV) {
curr = false;
pos = dreo_prev;
} else if (fc[3] == BOTH) {
pos = dreo_both;
} else if (fc[3] != CURR) {
throw runtime_error("Discrim reordering config has invalid curr/prev identifier");
}
if (fc[0] == "edge") {
m_templates.push_back(new EdgeReorderingFeatureTemplate(factorId,source,curr));
m_templates.push_back(new EdgeReorderingFeatureTemplate(factorId,source,pos));
} else {
ostringstream errmsg;
errmsg << "Unknown msd feature type '" << fc[0] << "'" << endl;
@ -253,7 +259,7 @@ DiscriminativeReorderingFeature::DiscriminativeReorderingFeature
}
//set vocabulary, if necessary
vocab_t* vocab = NULL;
Vocab_t* vocab = NULL;
if (source) {
if (m_sourceVocabs.find(factorId) != m_sourceVocabs.end()) {
vocab = &(m_sourceVocabs[factorId]);
@ -295,7 +301,8 @@ const FFState* DiscriminativeReorderingFeature::EmptyHypothesisState
return state;
}
FFState* DiscriminativeReorderingFeature::Evaluate(const Hypothesis& cur_hypo,
FFState* DiscriminativeReorderingFeature::Evaluate(
const TranslationOption& option,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{
@ -304,17 +311,17 @@ FFState* DiscriminativeReorderingFeature::Evaluate(const Hypothesis& cur_hypo,
assert(state);
FVector scores;
DiscriminativeReorderingState* newState = new DiscriminativeReorderingState
(&(cur_hypo.GetCurrSourceWordsRange()));
string stem = state->GetMsd(cur_hypo.GetCurrSourceWordsRange());
(&(option.GetSourceWordsRange()));
string stem = state->GetMsd(option.GetSourceWordsRange());
for (size_t i = 0; i < m_templates.size(); ++i) {
newState->AddTemplateState(m_templates[i]->Evaluate(
cur_hypo, state->GetTemplateState(i), stem, scores));
option, state->GetTemplateState(i), stem, scores));
}
accumulator->PlusEquals(scores);
return newState;
}
void DiscriminativeReorderingFeature::LoadVocab(string filename, vocab_t* vocab)
void DiscriminativeReorderingFeature::LoadVocab(string filename, Vocab_t* vocab)
{
VERBOSE(1, "Loading vocabulary for reordering feature from " << filename << endl);
vocab->clear();

View File

@ -39,7 +39,8 @@ namespace Moses {
class TranslationOption;
typedef boost::unordered_set<std::string> vocab_t;
typedef boost::unordered_set<std::string> Vocab_t;
enum DreoPosition_t {dreo_curr, dreo_prev, dreo_both};
/*
* The reordering feature is made up of several subfeatures (Templates)
@ -58,19 +59,19 @@ class ReorderingFeatureTemplate {
public:
ReorderingFeatureTemplate(): m_vocab(NULL) {}
virtual TemplateStateHandle Evaluate(
const Hypothesis& cur_hypo,
const TranslationOption& cur_option,
const TemplateStateHandle state,
const std::string& stem,
FVector& accumulator) const = 0;
virtual TemplateStateHandle EmptyState(const InputType &input) const = 0;
void SetVocab(vocab_t* vocab) {m_vocab = vocab;}
void SetVocab(Vocab_t* vocab) {m_vocab = vocab;}
protected:
bool CheckVocab(const std::string& token) const;
virtual ~ReorderingFeatureTemplate() {}
private:
vocab_t* m_vocab;
Vocab_t* m_vocab;
};
class EdgeReorderingFeatureTemplateState: public ReorderingFeatureTemplateState {
@ -92,11 +93,31 @@ class EdgeReorderingFeatureTemplateState: public ReorderingFeatureTemplateState
class EdgeReorderingFeatureTemplate : public ReorderingFeatureTemplate {
public:
EdgeReorderingFeatureTemplate(size_t factor, bool source, bool curr)
: m_factor(factor), m_source(source), m_curr(curr) {}
EdgeReorderingFeatureTemplate(size_t factor, bool source, DreoPosition_t pos)
: m_factor(factor), m_source(source), m_pos(pos)
{
if (source) {
if (m_pos == dreo_curr) {
m_posString = "s:c";
} else if (m_pos == dreo_prev) {
m_posString = "s:p";
} else if (m_pos == dreo_both) {
m_posString = "s:b";
}
} else {
if (m_pos == dreo_curr) {
m_posString = "t:c";
} else if (m_pos == dreo_prev) {
m_posString = "t:p";
} else if (m_pos == dreo_both) {
m_posString = "t:b";
}
}
}
virtual TemplateStateHandle Evaluate(
const Hypothesis& cur_hypo,
const TranslationOption& cur_option,
const TemplateStateHandle state,
const std::string& stem,
FVector& accumulator) const;
@ -105,7 +126,8 @@ class EdgeReorderingFeatureTemplate : public ReorderingFeatureTemplate {
private:
size_t m_factor;
bool m_source; //source or target?
bool m_curr; //curr of prev?
DreoPosition_t m_pos;
std::string m_posString;
};
@ -114,7 +136,6 @@ class DiscriminativeReorderingState: public FFState {
DiscriminativeReorderingState();
DiscriminativeReorderingState(const WordsRange* prevWordsRange);
virtual int Compare(const FFState& other) const;
//Implemented to here.
void AddTemplateState(TemplateStateHandle templateState);
TemplateStateHandle GetTemplateState(size_t index) const;
const std::string& GetMsd(const WordsRange& currWordsRange) const;
@ -124,7 +145,7 @@ class DiscriminativeReorderingState: public FFState {
const WordsRange* m_prevWordsRange;
};
class DiscriminativeReorderingFeature : public StatefulFeatureFunction {
class DiscriminativeReorderingFeature : public OptionStatefulFeatureFunction {
public:
DiscriminativeReorderingFeature(const std::vector<std::string>& featureConfig,
const std::vector<std::string>& vocabConfig);
@ -135,16 +156,16 @@ class DiscriminativeReorderingFeature : public StatefulFeatureFunction {
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
virtual FFState* Evaluate(const Hypothesis& cur_hypo,
virtual FFState* Evaluate(const TranslationOption& cur_option,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
private:
std::vector<ReorderingFeatureTemplate*> m_templates;
std::map<FactorType,vocab_t> m_sourceVocabs;
std::map<FactorType,vocab_t> m_targetVocabs;
std::map<FactorType,Vocab_t> m_sourceVocabs;
std::map<FactorType,Vocab_t> m_targetVocabs;
void LoadVocab(std::string filename, vocab_t* vocab);
void LoadVocab(std::string filename, Vocab_t* vocab);
};

View File

@ -1,4 +1,5 @@
#include "FeatureFunction.h"
#include "Hypothesis.h"
#include <cassert>
@ -18,6 +19,15 @@ void StatelessFeatureFunction::Evaluate(
bool StatefulFeatureFunction::IsStateless() const { return false; }
FFState* OptionStatefulFeatureFunction::Evaluate(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const {
return Evaluate(
cur_hypo.GetTranslationOption(),
prev_state,
accumulator);
}
}

View File

@ -12,6 +12,7 @@ class Hypothesis;
class FFState;
class InputType;
class ScoreComponentCollection;
class TranslationOption;
class FeatureFunction: public ScoreProducer {
@ -89,6 +90,29 @@ struct FeatureNameCounter {
template <typename T> size_t FeatureNameCounter<T>::s_created(0);
/**
* Stateful feature function that just requires a TranslationOption, rather
* than a Hypothesis. This is the way all feature functions should be, but
* for historical reasons the LM uses the Hypothesis.
**/
class OptionStatefulFeatureFunction : public StatefulFeatureFunction {
public:
OptionStatefulFeatureFunction(const std::string& description) :
StatefulFeatureFunction(description) {}
virtual FFState* Evaluate(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
virtual FFState* Evaluate(
const TranslationOption& cur_option,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const = 0;
};
}
#endif

View File

@ -13,7 +13,7 @@ LexicalReordering::LexicalReordering(std::vector<FactorType>& f_factors,
const std::string &modelType,
const std::string &filePath,
const std::vector<float>& weights)
: StatefulFeatureFunction("LexicalReordering_" + modelType),
: OptionStatefulFeatureFunction("LexicalReordering_" + modelType),
m_configuration(this, modelType) {
std::cerr << "Creating lexical reordering...\n";
std::cerr << "weights: ";
@ -71,12 +71,12 @@ Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const {
return m_table->GetScore(f, e, Phrase(Output));
}
FFState* LexicalReordering::Evaluate(const Hypothesis& hypo,
FFState* LexicalReordering::Evaluate(const TranslationOption& option,
const FFState* prev_state,
ScoreComponentCollection* out) const {
Scores score(GetNumScoreComponents(), 0);
const LexicalReorderingState *prev = dynamic_cast<const LexicalReorderingState *>(prev_state);
LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), score);
LexicalReorderingState *next_state = prev->Expand(option, score);
out->PlusEquals(this, score);

View File

@ -19,10 +19,10 @@ namespace Moses
class Factor;
class Phrase;
class Hypothesis;
class TranslatuionOption;
class InputType;
class LexicalReordering : public StatefulFeatureFunction {
class LexicalReordering : public OptionStatefulFeatureFunction {
public:
LexicalReordering(std::vector<FactorType>& f_factors,
std::vector<FactorType>& e_factors,
@ -35,7 +35,7 @@ public:
return m_configuration.GetNumScoreComponents();
}
virtual FFState* Evaluate(const Hypothesis& cur_hypo,
virtual FFState* Evaluate(const TranslationOption& cur_option,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;

View File

@ -85,7 +85,7 @@ public:
}
}
FVector GetScoresVector()
const FVector& GetScoresVector()
{
return m_scores;
}

View File

@ -40,6 +40,7 @@ then
fi
fi
touch ltmain.sh
echo "Calling $ACLOCAL..."
$ACLOCAL -I m4 || die "aclocal failed"