LexicalReordering.cpp compiles now. Interface to StaticData still to be done.

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/hierarchical-reo@2697 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
chardmeier 2010-01-28 10:39:43 +00:00
parent 621acd4dbf
commit 38688cc30c
5 changed files with 276 additions and 276 deletions

View File

@ -1,6 +1,9 @@
#pragma once
#include <cassert>
#include <vector>
namespace Moses {
class FFState {
@ -9,4 +12,54 @@ class FFState {
virtual int Compare(const FFState& other) const = 0;
};
class FFStateArray : public FFState {
private:
std::vector<const FFState *> m_states;
public:
FFStateArray() {}
explicit FFStateArray(size_t nElements) : m_states(nElements) {}
void push_back(const FFState *s) {
m_states.push_back(s);
}
const FFState *operator[](size_t n) const {
assert(n < m_states.size());
return m_states[n];
}
const FFState *&operator[](size_t n) {
assert(n < m_states.size());
return m_states[n];
}
size_t size() const {
return m_states.size();
}
virtual int Compare(const FFState& other) const {
const FFStateArray *a = dynamic_cast<const FFStateArray *> (&other);
// if the types are different, fall back on pointer comparison to get a well-defined ordering
if(a == NULL)
return &other < this ? -1 : 1;
size_t i;
for(i = 0; i < m_states.size(); i++) {
if(i >= a->m_states.size())
return -1;
if(m_states[i] < a->m_states[i])
return -1;
if(m_states[i] > a->m_states[i])
return 1;
}
if(i == a->m_states.size())
return 0;
else
return 1;
}
};
}

View File

@ -1,269 +1,217 @@
#include <sstream>
#include "FFState.h"
#include "LexicalReordering.h"
#include "LexicalReorderingState.h"
#include "StaticData.h"
namespace Moses
{
LexicalReordering::LexicalReordering(const std::string &filePath,
const std::vector<float>& weights,
Direction direction,
Condition condition,
std::vector< FactorType >& f_factors,
std::vector< FactorType >& e_factors)
: m_NumScoreComponents(weights.size()), m_MaxContextLength(0)
{
std::cerr << "Creating lexical reordering...\n";
//add ScoreProducer
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
const_cast<StaticData&>(StaticData::Instance()).SetWeightsForScoreProducer(this, weights);
std::cerr << "weights: ";
for(size_t w = 0; w < weights.size(); ++w){
std::cerr << weights[w] << " ";
}
std::cerr << "\n";
m_Direction = DecodeDirection(direction);
m_Condition = DecodeCondition(condition);
//m_FactorsE = e_factors;
//m_FactorsF = f_factors;
//Todo:should check that
//- if condition contains e or c than e_factors non empty
//- if condition contains f f_factors non empty
for(size_t i = 0; i < m_Condition.size(); ++i){
switch(m_Condition[i]){
case E:
m_FactorsE = e_factors;
if(m_FactorsE.empty()){
//problem
std::cerr << "Problem e factor mask is unexpectedly empty\n";
}
break;
case F:
m_FactorsF = f_factors;
if(m_FactorsF.empty()){
//problem
std::cerr << "Problem f factor mask is unexpectedly empty\n";
}
break;
case C:
m_FactorsC = e_factors;
m_MaxContextLength = 1;
if(m_FactorsC.empty()){
//problem
std::cerr << "Problem c factor mask is unexpectedly empty\n";
}
break;
default:
//problem
std::cerr << "Unknown conditioning option!\n";
break;
LexicalReordering::LexicalReordering(std::vector<FactorType>& f_factors,
std::vector<FactorType>& e_factors,
const std::string &modelType,
const std::string &filePath,
const std::vector<float>& weights) {
std::cerr << "Creating lexical reordering...\n";
//add ScoreProducer
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
const_cast<StaticData&>(StaticData::Instance()).SetWeightsForScoreProducer(this, weights);
std::cerr << "weights: ";
for(size_t w = 0; w < weights.size(); ++w){
std::cerr << weights[w] << " ";
}
}
if(weights.size() == m_Direction.size()){
m_OneScorePerDirection = true;
std::cerr << "Reordering types NOT individualy weighted!\n";
} else {
m_OneScorePerDirection = false;
}
m_Table = LexicalReorderingTable::LoadAvailable(filePath, m_FactorsF, m_FactorsE, m_FactorsC);
}
std::cerr << "\n";
LexicalReordering::~LexicalReordering(){
if(m_Table){
delete m_Table;
}
}
std::vector<float> LexicalReordering::CalcScore(Hypothesis* hypothesis) const {
std::vector<float> score(GetNumScoreComponents(), 0);
std::vector<float> values;
//for every direction
for(size_t i = 0; i < m_Direction.size(); ++i){
//grab data
if(Forward == m_Direction[i]){
//relates to prev hypothesis as we dont know next phrase for current yet
//sanity check: is there a previous hypothesis?
if(0 == hypothesis->GetPrevHypo()->GetId()){
continue; //no score continue with next direction
}
//grab probs for prev hypothesis
const ScoreComponentCollection &reorderingScoreColl =
hypothesis->GetPrevHypo()->GetCachedReorderingScore();
values = reorderingScoreColl.GetScoresForProducer(this);
/*
values = m_Table->GetScore((hypothesis->GetPrevHypo()->GetSourcePhrase()).GetSubString(hypothesis->GetPrevHypo()->GetCurrSourceWordsRange()),
hypothesis->GetPrevHypo()->GetCurrTargetPhrase(),
auxGetContext(hypothesis->GetPrevHypo()));
*/
}
if(Backward == m_Direction[i])
{
const ScoreComponentCollection &reorderingScoreColl =
hypothesis->GetCachedReorderingScore();
values = reorderingScoreColl.GetScoresForProducer(this);
/*
values = m_Table->GetScore(hypothesis->GetSourcePhrase().GetSubString(hypothesis->GetCurrSourceWordsRange()),
hypothesis->GetCurrTargetPhrase(),
auxGetContext(hypothesis));
*/
m_oneScorePerDirection = false; // default setting
m_modelTypeString = modelType;
m_modelType = Tokenize<std::string>(modelType,"-");
std::vector<LexicalReordering::Condition> conditions;
for(std::vector<std::string>::iterator it = m_modelType.begin(); it != m_modelType.end(); ++it)
if(DecodeDirection(*it) ||
DecodeCondition(*it) ||
DecodeNumFeatureFunctions(*it))
it = m_modelType.erase(it);
if(m_direction.empty())
m_direction.push_back(Backward); // default setting
//m_FactorsE = e_factors;
//m_FactorsF = f_factors;
//Todo:should check that
//- if condition contains e or c than e_factors non empty
//- if condition contains f f_factors non empty
for(size_t i = 0; i < m_condition.size(); ++i){
switch(m_condition[i]){
case E:
m_factorsE = e_factors;
if(m_factorsE.empty()){
//problem
UserMessage::Add("TL factor mask for lexical reordering is unexpectedly empty");
exit(1);
}
break;
case F:
m_factorsF = f_factors;
if(m_factorsF.empty()){
UserMessage::Add("SL factor mask for lexical reordering is unexpectedly empty");
exit(1);
}
break;
default:
UserMessage::Add("Unknown conditioning option!");
exit(1);
}
}
//add score
//sanity check: do we have any probs?
assert(values.size() == (GetNumOrientationTypes() * m_Direction.size()));
OrientationType orientation = GetOrientationType(hypothesis);
float value = values[orientation + i * GetNumOrientationTypes()];
if(m_OneScorePerDirection){
//one score per direction
score[i] = value;
} else {
//one score per direction and orientation
score[orientation + i * GetNumOrientationTypes()] = value;
size_t total_scores = 0;
for(size_t i = 0; i < m_direction.size(); i++) {
LexicalReorderingState *s = LexicalReorderingState::CreateLexicalReorderingState(m_modelType, m_direction[i]);
m_scoreOffset.push_back(total_scores);
total_scores += s->GetNumberOfScores();
delete s;
}
}
return score;
}
if(m_oneScorePerDirection)
m_numScoreComponents = m_direction.size();
else
m_numScoreComponents = total_scores;
Phrase LexicalReordering::auxGetContext(const Hypothesis* hypothesis) const {
const Hypothesis* h = hypothesis;
Phrase c(Output);
if(0 == hypothesis->GetId()){
return c;
}
while(0 != hypothesis->GetPrevHypo()->GetId() && c.GetSize() < m_MaxContextLength){
hypothesis = hypothesis->GetPrevHypo();
int needed = m_MaxContextLength - c.GetSize();
const Phrase& p = hypothesis->GetCurrTargetPhrase();
Phrase tmp(Output);
if(needed > p.GetSize()){
//needed -= p.GetSize();
tmp = p;
} else {
WordsRange range(p.GetSize() - needed, p.GetSize()-1);
tmp = p.GetSubString(range);
}
//new code: new append returns void not this...
tmp.Append(c); c = tmp;
}
return c;
}
std::vector<LexicalReordering::Condition> LexicalReordering::DecodeCondition(LexicalReordering::Condition c){
std::vector<LexicalReordering::Condition> result;
switch(c){
case F:
case E:
case C:
result.push_back(c);
break;
case FE:
result.push_back(F);
result.push_back(E);
break;
case FEC:
result.push_back(F);
result.push_back(E);
result.push_back(C);
break;
}
return result;
}
std::vector<LexicalReordering::Direction> LexicalReordering::DecodeDirection(LexicalReordering::Direction d){
std::vector<Direction> result;
if(Bidirectional == d){
result.push_back(Backward);
result.push_back(Forward);
} else {
result.push_back(d);
}
return result;
}
LexicalReordering::OrientationType LexicalMonotonicReordering::GetOrientationType(Hypothesis* currHypothesis) const
{
const Hypothesis* prevHypothesis = currHypothesis->GetPrevHypo();
const WordsRange currWordsRange = currHypothesis->GetCurrSourceWordsRange();
//check if there is a previous hypo
if(0 == prevHypothesis->GetId()){
if(0 == currWordsRange.GetStartPos()){
return Monotone;
} else {
return NonMonotone;
if(weights.size() != m_numScoreComponents) {
std::ostringstream os;
os << "Lexical reordering model (type " << modelType << "): expected " << m_numScoreComponents << " weights, got " << weights.size() << std::endl;
UserMessage::Add(os.str());
exit(1);
}
} else {
const WordsRange prevWordsRange = prevHypothesis->GetCurrSourceWordsRange();
if(prevWordsRange.GetEndPos() == currWordsRange.GetStartPos()-1){
return Monotone;
} else {
return NonMonotone;
}
}
}
LexicalReordering::OrientationType LexicalOrientationReordering::GetOrientationType(Hypothesis* currHypothesis) const
{
const Hypothesis* prevHypothesis = currHypothesis->GetPrevHypo();
const WordsRange currWordsRange = currHypothesis->GetCurrSourceWordsRange();
//check if there is a previous hypo
if(0 == prevHypothesis->GetId()){
if(0 == currWordsRange.GetStartPos()){
return Monotone;
} else {
return Discontinuous;
}
} else {
const WordsRange prevWordsRange = prevHypothesis->GetCurrSourceWordsRange();
if(prevWordsRange.GetEndPos() == currWordsRange.GetStartPos()-1){
return Monotone;
} else if(prevWordsRange.GetStartPos() == currWordsRange.GetEndPos()+1) {
return Swap;
} else {
return Discontinuous;
}
}
m_table = LexicalReorderingTable::LoadAvailable(filePath, m_factorsF, m_factorsE, std::vector<FactorType>());
}
LexicalReordering::OrientationType LexicalDirectionalReordering::GetOrientationType(Hypothesis* currHypothesis) const{
const Hypothesis* prevHypothesis = currHypothesis->GetPrevHypo();
const WordsRange currWordsRange = currHypothesis->GetCurrSourceWordsRange();
//check if there is a previous hypo
if(0 == prevHypothesis->GetId()){
return Right;
} else {
const WordsRange prevWordsRange = prevHypothesis->GetCurrSourceWordsRange();
if(prevWordsRange.GetEndPos() <= currWordsRange.GetStartPos()){
return Right;
} else {
return Left;
}
}
}
Score LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
{
return m_Table->GetScore(f, e, Phrase(Output));
LexicalReordering::~LexicalReordering() {
if(m_table)
delete m_table;
}
FFState* LexicalReordering::Evaluate(
const Hypothesis& hypo,
const FFState* prev_state,
ScoreComponentCollection* out) const {
out->PlusEquals(this, CalcScore(const_cast<Hypothesis*>(&hypo)));
bool LexicalReordering::DecodeCondition(std::string configElement) {
if(configElement == "f") {
if(!m_condition.empty())
goto double_spec;
m_condition.push_back(F);
return true;
} else if(configElement == "fe") {
if(!m_condition.empty())
goto double_spec;
m_condition.push_back(F);
m_condition.push_back(E);
return true;
}
return false;
double_spec:
UserMessage::Add("Lexical reordering conditioning (f/fe) specified twice.");
exit(1);
}
//TODO need to return proper state, calc score should not use previous
//hypothesis, it should use the state.
return NULL;
bool LexicalReordering::DecodeDirection(std::string configElement) {
if(configElement == "backward" || configElement == "unidirectional") {
if(!m_direction.empty())
goto double_spec;
m_direction.push_back(Backward);
return true;
} else if(configElement == "forward") {
if(!m_direction.empty())
goto double_spec;
m_direction.push_back(Backward);
return true;
} else if(configElement == "bidirectional") {
if(!m_direction.empty())
goto double_spec;
m_direction.push_back(Backward);
m_direction.push_back(Forward);
return true;
}
return false;
double_spec:
UserMessage::Add("Lexical reordering direction (forward/backward/bidirectional) specified twice.");
exit(1);
}
bool LexicalReordering::DecodeNumFeatureFunctions(std::string configElement) {
// not checking for double specification here for convenience
if(configElement == "collapseff") {
m_oneScorePerDirection = true;
VERBOSE(1, "Collapsing reordering distributions into one feature function." << std::endl);
return true;
} else if(configElement == "allff") {
m_oneScorePerDirection = false;
VERBOSE(1, "Using one feature function for each orientation type." << std::endl);
return true;
}
return false;
}
Score LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const {
return m_table->GetScore(f, e, Phrase(Output));
}
FFState* LexicalReordering::Evaluate(const Hypothesis& hypo,
const FFState* prev_state,
ScoreComponentCollection* out) const {
const FFStateArray *prev_states = dynamic_cast<const FFStateArray *> (prev_state);
FFStateArray *next_states = new FFStateArray(prev_states->size());
std::vector<float> score(GetNumScoreComponents(), 0);
std::vector<float> values;
//for every direction
for(size_t i = 0; i < m_direction.size(); ++i) {
ReorderingType reo;
const LexicalReorderingState *prev = dynamic_cast<const LexicalReorderingState *> ((*prev_states)[i]);
(*next_states)[i] = prev->Expand(hypo, reo);
const Hypothesis *cache_hypo;
switch (m_direction[i]) {
case Forward:
//TODO: still using GetPrevHypo here
cache_hypo = hypo.GetPrevHypo();
if(cache_hypo->GetId() == 0)
continue;
break;
case Backward:
cache_hypo = &hypo;
break;
}
const ScoreComponentCollection &reorderingScoreColl = cache_hypo->GetCachedReorderingScore();
values = reorderingScoreColl.GetScoresForProducer(this);
assert(values.size() == (m_numScoreComponents));
float value = values[reo + m_scoreOffset[i]];
if(m_oneScorePerDirection) {
//one score per direction
score[i] = value;
} else {
//one score per direction and orientation
score[reo + m_scoreOffset[i]] = value;
}
}
out->PlusEquals(this, score);
return next_states;
}
const FFState* LexicalReordering::EmptyHypothesisState() const {
return NULL;
FFStateArray *states = new FFStateArray(m_direction.size());
for(int i = 0; i < m_direction.size(); i++)
states->push_back(LexicalReorderingState::CreateLexicalReorderingState(m_modelType, m_direction[i]));
return states;
}
}

View File

@ -26,23 +26,21 @@ class InputType;
class LexicalReordering : public StatefulFeatureFunction {
public:
typedef int ReorderingType;
typedef int OrientationType;
enum Direction {Forward, Backward, Bidirectional, Unidirectional = Backward};
enum Condition {F,E,C,FE,FEC};
enum Direction {Forward, Backward};
enum Condition {F,E,FE};
LexicalReordering *CreateLexicalReorderingModel(std::vector<FactorType>& f_factors,
std::vector<FactorType>& e_factors,
const std::string &modelType,
const std::string &filePath,
const std::vector<float>& weights);
LexicalReordering(std::vector<FactorType>& f_factors,
std::vector<FactorType>& e_factors,
const std::string &modelType,
const std::string &filePath,
const std::vector<float>& weights);
virtual ~LexicalReordering();
virtual size_t GetNumScoreComponents() const {
return m_NumScoreComponents;
return m_numScoreComponents;
};
virtual FFState* Evaluate(
const Hypothesis& cur_hypo,
virtual FFState* Evaluate(const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
@ -54,28 +52,27 @@ public:
return "d";
};
virtual int GetNumOrientationTypes() const = 0;
virtual OrientationType GetOrientationType(Hypothesis*) const = 0;
std::vector<float> CalcScore(Hypothesis* hypothesis) const;
void InitializeForInput(const InputType& i){
m_Table->InitializeForInput(i);
m_table->InitializeForInput(i);
}
Score GetProb(const Phrase& f, const Phrase& e) const;
//helpers
static std::vector<Condition> DecodeCondition(Condition c);
static std::vector<Direction> DecodeDirection(Direction d);
private:
Phrase auxGetContext(const Hypothesis* hypothesis) const;
private:
LexicalReorderingTable* m_Table;
size_t m_NumScoreComponents;
std::vector< Direction > m_Direction;
std::vector< Condition > m_Condition;
bool m_OneScorePerDirection;
std::vector< FactorType > m_FactorsE, m_FactorsF, m_FactorsC;
int m_MaxContextLength;
bool DecodeCondition(std::string s);
bool DecodeDirection(std::string s);
bool DecodeNumFeatureFunctions(std::string s);
std::string m_modelTypeString;
std::vector<std::string> m_modelType;
LexicalReorderingTable* m_table;
size_t m_numScoreComponents;
std::vector<Direction> m_direction;
std::vector<Condition> m_condition;
std::vector<size_t> m_scoreOffset;
bool m_oneScorePerDirection;
std::vector<FactorType> m_factorsE, m_factorsF;
};
}

View File

@ -26,7 +26,7 @@ class LexicalReorderingState : public FFState {
virtual LexicalReorderingState* Expand(const Hypothesis& hypo,
LexicalReordering::ReorderingType& reoType) const;
static LexicalReorderingState* CreateLexicalReorderingState(std::vector<std::string>& config,
static LexicalReorderingState* CreateLexicalReorderingState(const std::vector<std::string>& config,
LexicalReordering::Direction dir);
int GetNumberOfScores() const;
protected:

View File

@ -21,10 +21,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#pragma once
#include <limits>
#include <list>
#include <vector>
#include <map>
#include <memory>
#include <utility>
#ifdef WITH_THREADS
#include <boost/thread/mutex.hpp>
@ -148,7 +150,7 @@ protected:
size_t m_timeout_threshold; //! seconds after which time out is activated
bool m_useTransOptCache; //! flag indicating, if the persistent translation option cache should be used
mutable std::map<std::pair<size_t, Phrase>, pair<TranslationOptionList*,clock_t> > m_transOptCache; //! persistent translation option cache
mutable std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> > m_transOptCache; //! persistent translation option cache
size_t m_transOptCacheMaxSize; //! maximum size for persistent translation option cache
//FIXME: Single lock for cache not most efficient. However using a
//reader-writer for LRU cache is tricky - how to record last used time?
@ -169,7 +171,7 @@ protected:
StaticData();
//! helper fn to set bool param from ini file/command line
void SetBooleanParameter(bool *paramter, string parameterName, bool defaultValue);
void SetBooleanParameter(bool *paramter, std::string parameterName, bool defaultValue);
/***
* load all language models as specified in ini file
@ -328,7 +330,7 @@ public:
}
bool UseEarlyDiscarding() const
{
return m_earlyDiscardingThreshold != -numeric_limits<float>::infinity();
return m_earlyDiscardingThreshold != -std::numeric_limits<float>::infinity();
}
float GetTranslationOptionThreshold() const
{