Code cleanup and reorganization. A few classes have been renamed to shorter names.

This commit is contained in:
Ulrich Germann 2015-02-15 01:45:22 +00:00
parent e520a3d39e
commit ccf44f39fb
6 changed files with 748 additions and 703 deletions

View File

@ -11,110 +11,125 @@ using namespace boost::algorithm;
namespace Moses
{
LexicalReordering::LexicalReordering(const std::string &line)
: StatefulFeatureFunction(line)
{
std::cerr << "Initializing LexicalReordering.." << std::endl;
map<string,string> sparseArgs;
m_haveDefaultScores = false;
for (size_t i = 0; i < m_args.size(); ++i) {
const vector<string> &args = m_args[i];
LexicalReordering::
LexicalReordering(const std::string &line)
: StatefulFeatureFunction(line)
{
VERBOSE(1, "Initializing LexicalReordering.." << std::endl);
if (args[0] == "type") {
m_configuration.reset(new LexicalReorderingConfiguration(args[1]));
m_configuration->SetScoreProducer(this);
m_modelTypeString = m_configuration->GetModelString();
} else if (args[0] == "input-factor") {
m_factorsF =Tokenize<FactorType>(args[1]);
} else if (args[0] == "output-factor") {
m_factorsE =Tokenize<FactorType>(args[1]);
} else if (args[0] == "path") {
m_filePath = args[1];
} else if (starts_with(args[0], "sparse-")) {
sparseArgs[args[0].substr(7)] = args[1];
} else if (args[0] == "default-scores") {
vector<string> tokens = Tokenize(args[1],",");
for(size_t i=0; i<tokens.size(); i++) {
m_defaultScores.push_back( TransformScore( Scan<float>(tokens[i]) ) );
map<string,string> sparseArgs;
m_haveDefaultScores = false;
for (size_t i = 0; i < m_args.size(); ++i) {
const vector<string> &args = m_args[i];
if (args[0] == "type") {
m_configuration.reset(new LexicalReorderingConfiguration(args[1]));
m_configuration->SetScoreProducer(this);
m_modelTypeString = m_configuration->GetModelString();
} else if (args[0] == "input-factor") {
m_factorsF =Tokenize<FactorType>(args[1]);
} else if (args[0] == "output-factor") {
m_factorsE =Tokenize<FactorType>(args[1]);
} else if (args[0] == "path") {
m_filePath = args[1];
} else if (starts_with(args[0], "sparse-")) {
sparseArgs[args[0].substr(7)] = args[1];
} else if (args[0] == "default-scores") {
vector<string> tokens = Tokenize(args[1],",");
for(size_t i=0; i<tokens.size(); i++) {
m_defaultScores.push_back( TransformScore( Scan<float>(tokens[i]) ) );
}
m_haveDefaultScores = true;
} else {
UTIL_THROW(util::Exception,"Unknown argument " + args[0]);
}
m_haveDefaultScores = true;
} else {
UTIL_THROW(util::Exception,"Unknown argument " + args[0]);
}
switch(m_configuration->GetCondition()) {
case LexicalReorderingConfiguration::FE:
case LexicalReorderingConfiguration::E:
if(m_factorsE.empty()) {
UTIL_THROW(util::Exception,
"TL factor mask for lexical reordering is unexpectedly empty");
}
if(m_configuration->GetCondition() == LexicalReorderingConfiguration::E)
break; // else fall through
case LexicalReorderingConfiguration::F:
if(m_factorsF.empty()) {
UTIL_THROW(util::Exception,
"SL factor mask for lexical reordering is unexpectedly empty");
}
break;
default:
UTIL_THROW(util::Exception,"Unknown conditioning option!");
}
// sanity check: number of default scores
if (m_haveDefaultScores)
{
if(m_defaultScores.size() != m_configuration->GetNumScoreComponents())
{
UTIL_THROW(util::Exception,"wrong number of default scores ("
<< m_defaultScores.size()
<< ") for lexicalized reordering model (expected "
<< m_configuration->GetNumScoreComponents() << ")");
}
}
m_configuration->ConfigureSparse(sparseArgs, this);
}
switch(m_configuration->GetCondition()) {
case LexicalReorderingConfiguration::FE:
case LexicalReorderingConfiguration::E:
if(m_factorsE.empty()) {
UTIL_THROW(util::Exception,"TL factor mask for lexical reordering is unexpectedly empty");
}
if(m_configuration->GetCondition() == LexicalReorderingConfiguration::E)
break; // else fall through
case LexicalReorderingConfiguration::F:
if(m_factorsF.empty()) {
UTIL_THROW(util::Exception,"SL factor mask for lexical reordering is unexpectedly empty");
}
break;
default:
UTIL_THROW(util::Exception,"Unknown conditioning option!");
LexicalReordering::
~LexicalReordering()
{
}
// sanity check: number of default scores
if (m_haveDefaultScores) {
if(m_defaultScores.size() != m_configuration->GetNumScoreComponents()) {
UTIL_THROW(util::Exception,"wrong number of default scores (" << m_defaultScores.size() << ") for lexicalized reordering model (expected " << m_configuration->GetNumScoreComponents() << ")");
}
void LexicalReordering::Load()
{
typedef LexicalReorderingTable LRT;
m_table.reset(LRT::LoadAvailable(m_filePath, m_factorsF, m_factorsE,
std::vector<FactorType>()));
}
m_configuration->ConfigureSparse(sparseArgs, this);
}
LexicalReordering::~LexicalReordering()
{
}
void LexicalReordering::Load()
{
m_table.reset(LexicalReorderingTable::LoadAvailable(m_filePath, m_factorsF, m_factorsE, std::vector<FactorType>()));
}
Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
{
return m_table->GetScore(f, e, Phrase(ARRAY_SIZE_INCR));
}
FFState* LexicalReordering::EvaluateWhenApplied(const Hypothesis& hypo,
const FFState* prev_state,
ScoreComponentCollection* out) const
{
VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START" << std::endl);
Scores score(GetNumScoreComponents(), 0);
const LexicalReorderingState *prev = dynamic_cast<const LexicalReorderingState *>(prev_state);
LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), hypo.GetInput(), out);
out->PlusEquals(this, score);
VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) END" << std::endl);
return next_state;
}
const FFState* LexicalReordering::EmptyHypothesisState(const InputType &input) const
{
return m_configuration->CreateLexicalReorderingState(input);
}
bool LexicalReordering::IsUseable(const FactorMask &mask) const
{
for (size_t i = 0; i < m_factorsE.size(); ++i) {
const FactorType &factor = m_factorsE[i];
if (!mask[factor]) {
return false;
}
Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
{
return m_table->GetScore(f, e, Phrase(ARRAY_SIZE_INCR));
}
return true;
}
FFState*
LexicalReordering::
EvaluateWhenApplied(const Hypothesis& hypo,
const FFState* prev_state,
ScoreComponentCollection* out) const
{
VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START"
<< std::endl);
Scores score(GetNumScoreComponents(), 0);
const LexicalReorderingState *prev = dynamic_cast<const LexicalReorderingState *>(prev_state);
LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), hypo.GetInput(), out);
out->PlusEquals(this, score);
VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) END" << std::endl);
return next_state;
}
const FFState* LexicalReordering::EmptyHypothesisState(const InputType &input) const
{
return m_configuration->CreateLexicalReorderingState(input);
}
bool LexicalReordering::IsUseable(const FactorMask &mask) const
{
for (size_t i = 0; i < m_factorsE.size(); ++i) {
const FactorType &factor = m_factorsE[i];
if (!mask[factor]) {
return false;
}
}
return true;
}
}

View File

@ -1,4 +1,4 @@
// -*- c++ -*-
#include <vector>
#include <string>
@ -14,506 +14,521 @@
namespace Moses
{
size_t LexicalReorderingConfiguration::GetNumberOfTypes() const
{
switch (m_modelType) {
case LexicalReorderingConfiguration::MSD:
return 3;
break;
case LexicalReorderingConfiguration::MSLR:
return 4;
break;
default:
return 2;
}
}
typedef LexicalReorderingConfiguration LexReoConf;
size_t LexicalReorderingConfiguration::GetNumScoreComponents() const
{
size_t score_per_dir = m_collapseScores ? 1 : GetNumberOfTypes();
if (m_direction == Bidirectional) {
return 2 * score_per_dir + m_additionalScoreComponents;
} else {
return score_per_dir + m_additionalScoreComponents;
}
}
void LexicalReorderingConfiguration::ConfigureSparse
(const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer)
{
if (sparseArgs.size()) {
m_sparse.reset(new SparseReordering(sparseArgs, producer));
}
}
void LexicalReorderingConfiguration::SetAdditionalScoreComponents(size_t number)
{
m_additionalScoreComponents = number;
}
LexicalReorderingConfiguration::LexicalReorderingConfiguration(const std::string &modelType)
: m_modelString(modelType), m_scoreProducer(NULL), m_modelType(None), m_phraseBased(true), m_collapseScores(false), m_direction(Backward), m_additionalScoreComponents(0)
{
std::vector<std::string> config = Tokenize<std::string>(modelType, "-");
for (size_t i=0; i<config.size(); ++i) {
if (config[i] == "hier") {
m_phraseBased = false;
} else if (config[i] == "phrase") {
m_phraseBased = true;
} else if (config[i] == "wbe") {
m_phraseBased = true;
// no word-based decoding available, fall-back to phrase-based
// This is the old lexical reordering model combination of moses
} else if (config[i] == "msd") {
m_modelType = MSD;
} else if (config[i] == "mslr") {
m_modelType = MSLR;
} else if (config[i] == "monotonicity") {
m_modelType = Monotonic;
} else if (config[i] == "leftright") {
m_modelType = LeftRight;
} else if (config[i] == "backward" || config[i] == "unidirectional") {
// note: unidirectional is deprecated, use backward instead
m_direction = Backward;
} else if (config[i] == "forward") {
m_direction = Forward;
} else if (config[i] == "bidirectional") {
m_direction = Bidirectional;
} else if (config[i] == "f") {
m_condition = F;
} else if (config[i] == "fe") {
m_condition = FE;
} else if (config[i] == "collapseff") {
m_collapseScores = true;
} else if (config[i] == "allff") {
m_collapseScores = false;
} else {
std::cerr << "Illegal part in the lexical reordering configuration string: " << config[i] << std::endl;
exit(1);
}
bool
IsMonotonicStep(WordsRange const& prev, // words range of last source phrase
WordsRange const& cur, // words range of current source phrase
WordsBitmap const& cov) // coverage bitmap
{
size_t e = prev.GetEndPos() + 1;
size_t s = cur.GetStartPos();
return (s == e || (s >= e && !cov.GetValue(e)));
}
if (m_modelType == None) {
std::cerr << "You need to specify the type of the reordering model (msd, monotonicity,...)" << std::endl;
exit(1);
}
}
LexicalReorderingState *LexicalReorderingConfiguration::CreateLexicalReorderingState(const InputType &input) const
{
LexicalReorderingState *bwd = NULL, *fwd = NULL;
size_t offset = 0;
switch(m_direction) {
case Backward:
case Bidirectional:
if (m_phraseBased) { //Same for forward and backward
bwd = new PhraseBasedReorderingState(*this, LexicalReorderingConfiguration::Backward, offset);
} else {
bwd = new HierarchicalReorderingBackwardState(*this, offset);
}
offset += m_collapseScores ? 1 : GetNumberOfTypes();
if (m_direction == Backward)
return bwd; // else fall through
case Forward:
if (m_phraseBased) { //Same for forward and backward
fwd = new PhraseBasedReorderingState(*this, LexicalReorderingConfiguration::Forward, offset);
} else {
fwd = new HierarchicalReorderingForwardState(*this, input.GetSize(), offset);
}
offset += m_collapseScores ? 1 : GetNumberOfTypes();
if (m_direction == Forward)
return fwd;
bool
IsSwap(WordsRange const& prev, WordsRange const& cur, WordsBitmap const& cov)
{
size_t s = prev.GetStartPos();
size_t e = cur.GetEndPos();
return (e+1 == s || (e < s && !cov.GetValue(s-1)));
}
return new BidirectionalReorderingState(*this, bwd, fwd, 0);
}
void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const TranslationOption &topt, const InputType& input, ReorderingType reoType) const
{
// don't call this on a bidirectional object
UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward,
"Unknown direction: " << m_direction);
const TranslationOption* relevantOpt = &topt;
if (m_direction != LexicalReorderingConfiguration::Backward) relevantOpt = m_prevOption;
const Scores *cachedScores = relevantOpt->GetLexReorderingScores(m_configuration.GetScoreProducer());
// look up applicable score from vectore of scores
if(cachedScores) {
Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0);
const Scores &scoreSet = *cachedScores;
if(m_configuration.CollapseScores()) {
scores[m_offset] = scoreSet[m_offset + reoType];
} else {
std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
scores[m_offset + reoType] = scoreSet[m_offset + reoType];
}
accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
size_t
LexicalReorderingConfiguration::
GetNumberOfTypes() const
{
return ((m_modelType == LexReoConf::MSD) ? 3 :
(m_modelType == LexReoConf::MSLR) ? 4 : 2);
}
// else: use default scores (if specified)
else if (m_configuration.GetScoreProducer()->GetHaveDefaultScores()) {
Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0);
if(m_configuration.CollapseScores()) {
scores[m_offset] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType);
} else {
scores[m_offset + reoType] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType);
}
accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
size_t
LexicalReorderingConfiguration::
GetNumScoreComponents() const
{
size_t score_per_dir = m_collapseScores ? 1 : GetNumberOfTypes();
return ((m_direction == Bidirectional)
? 2 * score_per_dir + m_additionalScoreComponents
: score_per_dir + m_additionalScoreComponents);
}
// note: if no default score, no cost
const SparseReordering* sparse = m_configuration.GetSparseReordering();
if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType, m_direction, accum);
void
LexicalReorderingConfiguration::
ConfigureSparse(std::map<std::string,std::string> const& sparseArgs,
const LexicalReordering* producer)
{
if (sparseArgs.size())
m_sparse.reset(new SparseReordering(sparseArgs, producer));
}
}
void LexicalReorderingConfiguration::SetAdditionalScoreComponents(size_t number)
{
m_additionalScoreComponents = number;
}
LexicalReorderingConfiguration::
LexicalReorderingConfiguration(const std::string &modelType)
: m_modelString(modelType)
, m_scoreProducer(NULL)
, m_modelType(None)
, m_phraseBased(true)
, m_collapseScores(false)
, m_direction(Backward)
, m_additionalScoreComponents(0)
{
std::vector<std::string> config = Tokenize<std::string>(modelType, "-");
for (size_t i=0; i<config.size(); ++i)
{
if (config[i] == "hier") { m_phraseBased = false; }
else if (config[i] == "phrase") { m_phraseBased = true; }
else if (config[i] == "wbe") { m_phraseBased = true; }
// no word-based decoding available, fall-back to phrase-based
// This is the old lexical reordering model combination of moses
else if (config[i] == "msd") { m_modelType = MSD; }
else if (config[i] == "mslr") { m_modelType = MSLR; }
else if (config[i] == "monotonicity") { m_modelType = Monotonic; }
else if (config[i] == "leftright") { m_modelType = LeftRight; }
else if (config[i] == "backward") { m_direction = Backward; }
// note: unidirectional is deprecated, use backward instead
else if (config[i] == "unidirectional") { m_direction = Backward; }
else if (config[i] == "forward") { m_direction = Forward; }
else if (config[i] == "bidirectional") { m_direction = Bidirectional; }
else if (config[i] == "f") { m_condition = F; }
else if (config[i] == "fe") { m_condition = FE; }
else if (config[i] == "collapseff") { m_collapseScores = true; }
else if (config[i] == "allff") { m_collapseScores = false; }
else
{
std::cerr
<< "Illegal part in the lexical reordering configuration string: "
<< config[i] << std::endl;
exit(1);
}
}
if (m_modelType == None)
{
std::cerr
<< "You need to specify the type of the reordering model "
<< "(msd, monotonicity,...)" << std::endl;
exit(1);
}
}
LexicalReorderingState *
LexicalReorderingConfiguration::
CreateLexicalReorderingState(const InputType &input) const
{
LexicalReorderingState *bwd = NULL, *fwd = NULL;
size_t offset = 0;
switch(m_direction)
{
case Backward:
case Bidirectional:
bwd = (m_phraseBased
? new PhraseBasedReorderingState(*this, Backward, offset);
: new HierarchicalReorderingBackwardState(*this, offset));
offset += m_collapseScores ? 1 : GetNumberOfTypes();
if (m_direction == Backward) return bwd; // else fall through
case Forward:
fwd = (m_phraseBased
? new PhraseBasedReorderingState(*this, Forward, offset)
: new HierarchicalReorderingForwardState(*this, input.GetSize(),
offset));
offset += m_collapseScores ? 1 : GetNumberOfTypes();
if (m_direction == Forward) return fwd;
}
return new BidirectionalReorderingState(*this, bwd, fwd, 0);
}
void
LexicalReorderingState::
CopyScores(ScoreComponentCollection* accum,
const TranslationOption &topt,
const InputType& input,
ReorderingType reoType) const
{
// don't call this on a bidirectional object
UTIL_THROW_IF2(m_direction != Backward && m_direction != Forward,
"Unknown direction: " << m_direction);
TranslationOption const*
relevantOpt = (m_direction == Backward) ? &topt : m_prevOption;
LexicalReordering* reotable = m_configuration.GetScoreProducer();
Scores const* cachedScores = relevantOpt->GetLexReorderingScores(reotable);
size_t off_remote = m_offset + reoType;
size_t off_local = m_configuration.CollapseScores() ? m_offset : off_remote;
// look up applicable score from vectore of scores
if(cachedScores)
{
Scores scores(reotable->GetNumScoreComponents(),0);
socres[off_local ] (*cachedScores)[off_remote];
accum->PlusEquals(reotable, scores);
}
// else: use default scores (if specified)
else if (reotable->GetHaveDefaultScores())
{
Scores scores(reotable->GetNumScoreComponents(),0);
scores[off_local] = reotable->GetDefaultScore(off_remote);
accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
}
// note: if no default score, no cost
const SparseReordering* sparse = m_configuration.GetSparseReordering();
if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
m_direction, accum);
}
int LexicalReorderingState::ComparePrevScores(const TranslationOption *other) const
{
const Scores* myPrevScores = m_prevOption->GetLexReorderingScores(m_configuration.GetScoreProducer());
const Scores* otherPrevScores = other->GetLexReorderingScores(m_configuration.GetScoreProducer());
int
LexicalReorderingState::
ComparePrevScores(const TranslationOption *other) const
{
LexicalReordering* reotable = m_configuration.GetScoreProducer();
const Scores* myPrevScores = m_prevOption->GetLexReorderingScores(reotable);
const Scores* otherPrevScores = other->GetLexReorderingScores(reotable);
if(myPrevScores == otherPrevScores)
return 0;
if(myPrevScores == otherPrevScores)
return 0;
// The pointers are NULL if a phrase pair isn't found in the reordering table.
if(otherPrevScores == NULL)
return -1;
if(myPrevScores == NULL)
return 1;
for(size_t i = m_offset; i < m_offset + m_configuration.GetNumberOfTypes(); i++)
if((*myPrevScores)[i] < (*otherPrevScores)[i])
// The pointers are NULL if a phrase pair isn't found in the reordering table.
if(otherPrevScores == NULL)
return -1;
else if((*myPrevScores)[i] > (*otherPrevScores)[i])
if(myPrevScores == NULL)
return 1;
return 0;
}
for(size_t i = m_offset; i < m_offset + m_configuration.GetNumberOfTypes(); i++)
if((*myPrevScores)[i] < (*otherPrevScores)[i])
return -1;
else if((*myPrevScores)[i] > (*otherPrevScores)[i])
return 1;
bool PhraseBasedReorderingState::m_useFirstBackwardScore = true;
PhraseBasedReorderingState::PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, const TranslationOption &topt)
: LexicalReorderingState(prev, topt), m_prevRange(topt.GetSourceWordsRange()), m_first(false) {}
PhraseBasedReorderingState::PhraseBasedReorderingState(const LexicalReorderingConfiguration &config,
LexicalReorderingConfiguration::Direction dir, size_t offset)
: LexicalReorderingState(config, dir, offset), m_prevRange(NOT_FOUND,NOT_FOUND), m_first(true) {}
int PhraseBasedReorderingState::Compare(const FFState& o) const
{
if (&o == this)
return 0;
}
const PhraseBasedReorderingState* other = static_cast<const PhraseBasedReorderingState*>(&o);
if (m_prevRange == other->m_prevRange) {
if (m_direction == LexicalReorderingConfiguration::Forward) {
return ComparePrevScores(other->m_prevOption);
} else {
bool PhraseBasedReorderingState::m_useFirstBackwardScore = true;
PhraseBasedReorderingState::PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, const TranslationOption &topt)
: LexicalReorderingState(prev, topt), m_prevRange(topt.GetSourceWordsRange()), m_first(false) {}
PhraseBasedReorderingState::PhraseBasedReorderingState(const LexReoConf &config,
LexReoConf::Direction dir, size_t offset)
: LexicalReorderingState(config, dir, offset), m_prevRange(NOT_FOUND,NOT_FOUND), m_first(true) {}
int PhraseBasedReorderingState::Compare(const FFState& o) const
{
if (&o == this)
return 0;
}
} else if (m_prevRange < other->m_prevRange) {
return -1;
}
return 1;
}
LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOption& topt, const InputType& input,ScoreComponentCollection* scores) const
{
ReorderingType reoType;
const WordsRange currWordsRange = topt.GetSourceWordsRange();
const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType();
if ((m_direction != LexicalReorderingConfiguration::Forward && m_useFirstBackwardScore) || !m_first) {
if (modelType == LexicalReorderingConfiguration::MSD) {
reoType = GetOrientationTypeMSD(currWordsRange);
} else if (modelType == LexicalReorderingConfiguration::MSLR) {
reoType = GetOrientationTypeMSLR(currWordsRange);
} else if (modelType == LexicalReorderingConfiguration::Monotonic) {
reoType = GetOrientationTypeMonotonic(currWordsRange);
} else {
reoType = GetOrientationTypeLeftRight(currWordsRange);
const PhraseBasedReorderingState* other = static_cast<const PhraseBasedReorderingState*>(&o);
if (m_prevRange == other->m_prevRange) {
if (m_direction == LexReoConf::Forward) {
return ComparePrevScores(other->m_prevOption);
} else {
return 0;
}
} else if (m_prevRange < other->m_prevRange) {
return -1;
}
CopyScores(scores, topt, input, reoType);
return 1;
}
return new PhraseBasedReorderingState(this, topt);
}
LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOption& topt, const InputType& input,ScoreComponentCollection* scores) const
{
ReorderingType reoType;
const WordsRange currWordsRange = topt.GetSourceWordsRange();
const LexReoConf::ModelType modelType = m_configuration.GetModelType();
LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeMSD(WordsRange currRange) const
{
if (m_first) {
if (currRange.GetStartPos() == 0) {
if ((m_direction != LexReoConf::Forward && m_useFirstBackwardScore) || !m_first) {
if (modelType == LexReoConf::MSD) {
reoType = GetOrientationTypeMSD(currWordsRange);
} else if (modelType == LexReoConf::MSLR) {
reoType = GetOrientationTypeMSLR(currWordsRange);
} else if (modelType == LexReoConf::Monotonic) {
reoType = GetOrientationTypeMonotonic(currWordsRange);
} else {
reoType = GetOrientationTypeLeftRight(currWordsRange);
}
CopyScores(scores, topt, input, reoType);
}
return new PhraseBasedReorderingState(this, topt);
}
LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeMSD(WordsRange currRange) const
{
if (m_first) {
if (currRange.GetStartPos() == 0) {
return M;
} else {
return D;
}
}
if (m_prevRange.GetEndPos() == currRange.GetStartPos()-1) {
return M;
} else {
return D;
} else if (m_prevRange.GetStartPos() == currRange.GetEndPos()+1) {
return S;
}
return D;
}
if (m_prevRange.GetEndPos() == currRange.GetStartPos()-1) {
return M;
} else if (m_prevRange.GetStartPos() == currRange.GetEndPos()+1) {
return S;
}
return D;
}
LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeMSLR(WordsRange currRange) const
{
if (m_first) {
if (currRange.GetStartPos() == 0) {
LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeMSLR(WordsRange currRange) const
{
if (m_first) {
if (currRange.GetStartPos() == 0) {
return M;
} else {
return DR;
}
}
if (m_prevRange.GetEndPos() == currRange.GetStartPos()-1) {
return M;
} else {
} else if (m_prevRange.GetStartPos() == currRange.GetEndPos()+1) {
return S;
} else if (m_prevRange.GetEndPos() < currRange.GetStartPos()) {
return DR;
}
}
if (m_prevRange.GetEndPos() == currRange.GetStartPos()-1) {
return M;
} else if (m_prevRange.GetStartPos() == currRange.GetEndPos()+1) {
return S;
} else if (m_prevRange.GetEndPos() < currRange.GetStartPos()) {
return DR;
}
return DL;
}
LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeMonotonic(WordsRange currRange) const
{
if ((m_first && currRange.GetStartPos() == 0) ||
(m_prevRange.GetEndPos() == currRange.GetStartPos()-1)) {
return M;
}
return NM;
}
LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeLeftRight(WordsRange currRange) const
{
if (m_first ||
(m_prevRange.GetEndPos() <= currRange.GetStartPos())) {
return R;
}
return L;
}
///////////////////////////
//BidirectionalReorderingState
int BidirectionalReorderingState::Compare(const FFState& o) const
{
if (&o == this)
return 0;
const BidirectionalReorderingState &other = static_cast<const BidirectionalReorderingState &>(o);
if(m_backward->Compare(*other.m_backward) < 0)
return -1;
else if(m_backward->Compare(*other.m_backward) > 0)
return 1;
else
return m_forward->Compare(*other.m_forward);
}
LexicalReorderingState* BidirectionalReorderingState::Expand(const TranslationOption& topt, const InputType& input, ScoreComponentCollection* scores) const
{
LexicalReorderingState *newbwd = m_backward->Expand(topt,input, scores);
LexicalReorderingState *newfwd = m_forward->Expand(topt, input, scores);
return new BidirectionalReorderingState(m_configuration, newbwd, newfwd, m_offset);
}
///////////////////////////
//HierarchicalReorderingBackwardState
HierarchicalReorderingBackwardState::HierarchicalReorderingBackwardState(const HierarchicalReorderingBackwardState *prev,
const TranslationOption &topt, ReorderingStack reoStack)
: LexicalReorderingState(prev, topt), m_reoStack(reoStack) {}
HierarchicalReorderingBackwardState::HierarchicalReorderingBackwardState(const LexicalReorderingConfiguration &config, size_t offset)
: LexicalReorderingState(config, LexicalReorderingConfiguration::Backward, offset) {}
int HierarchicalReorderingBackwardState::Compare(const FFState& o) const
{
const HierarchicalReorderingBackwardState& other = static_cast<const HierarchicalReorderingBackwardState&>(o);
return m_reoStack.Compare(other.m_reoStack);
}
LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const TranslationOption& topt, const InputType& input,ScoreComponentCollection* scores) const
{
HierarchicalReorderingBackwardState* nextState = new HierarchicalReorderingBackwardState(this, topt, m_reoStack);
ReorderingType reoType;
const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType();
int reoDistance = nextState->m_reoStack.ShiftReduce(topt.GetSourceWordsRange());
if (modelType == LexicalReorderingConfiguration::MSD) {
reoType = GetOrientationTypeMSD(reoDistance);
} else if (modelType == LexicalReorderingConfiguration::MSLR) {
reoType = GetOrientationTypeMSLR(reoDistance);
} else if (modelType == LexicalReorderingConfiguration::LeftRight) {
reoType = GetOrientationTypeLeftRight(reoDistance);
} else {
reoType = GetOrientationTypeMonotonic(reoDistance);
return DL;
}
CopyScores(scores, topt, input, reoType);
return nextState;
}
LexicalReorderingState::ReorderingType HierarchicalReorderingBackwardState::GetOrientationTypeMSD(int reoDistance) const
{
if (reoDistance == 1) {
return M;
} else if (reoDistance == -1) {
return S;
LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeMonotonic(WordsRange currRange) const
{
if ((m_first && currRange.GetStartPos() == 0) ||
(m_prevRange.GetEndPos() == currRange.GetStartPos()-1)) {
return M;
}
return NM;
}
return D;
}
LexicalReorderingState::ReorderingType HierarchicalReorderingBackwardState::GetOrientationTypeMSLR(int reoDistance) const
{
if (reoDistance == 1) {
return M;
} else if (reoDistance == -1) {
return S;
} else if (reoDistance > 1) {
return DR;
LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeLeftRight(WordsRange currRange) const
{
if (m_first ||
(m_prevRange.GetEndPos() <= currRange.GetStartPos())) {
return R;
}
return L;
}
return DL;
}
LexicalReorderingState::ReorderingType HierarchicalReorderingBackwardState::GetOrientationTypeMonotonic(int reoDistance) const
{
if (reoDistance == 1) {
return M;
///////////////////////////
//BidirectionalReorderingState
int BidirectionalReorderingState::Compare(const FFState& o) const
{
if (&o == this)
return 0;
const BidirectionalReorderingState &other = static_cast<const BidirectionalReorderingState &>(o);
if(m_backward->Compare(*other.m_backward) < 0)
return -1;
else if(m_backward->Compare(*other.m_backward) > 0)
return 1;
else
return m_forward->Compare(*other.m_forward);
}
return NM;
}
LexicalReorderingState::ReorderingType HierarchicalReorderingBackwardState::GetOrientationTypeLeftRight(int reoDistance) const
{
if (reoDistance >= 1) {
return R;
LexicalReorderingState* BidirectionalReorderingState::Expand(const TranslationOption& topt, const InputType& input, ScoreComponentCollection* scores) const
{
LexicalReorderingState *newbwd = m_backward->Expand(topt,input, scores);
LexicalReorderingState *newfwd = m_forward->Expand(topt, input, scores);
return new BidirectionalReorderingState(m_configuration, newbwd, newfwd, m_offset);
}
return L;
}
///////////////////////////
//HierarchicalReorderingBackwardState
HierarchicalReorderingBackwardState::HierarchicalReorderingBackwardState(const HierarchicalReorderingBackwardState *prev,
const TranslationOption &topt, ReorderingStack reoStack)
: LexicalReorderingState(prev, topt), m_reoStack(reoStack) {}
HierarchicalReorderingBackwardState::HierarchicalReorderingBackwardState(const LexReoConf &config, size_t offset)
: LexicalReorderingState(config, LexReoConf::Backward, offset) {}
///////////////////////////
//HierarchicalReorderingForwardState
HierarchicalReorderingForwardState::HierarchicalReorderingForwardState(const LexicalReorderingConfiguration &config, size_t size, size_t offset)
: LexicalReorderingState(config, LexicalReorderingConfiguration::Forward, offset), m_first(true), m_prevRange(NOT_FOUND,NOT_FOUND), m_coverage(size) {}
HierarchicalReorderingForwardState::HierarchicalReorderingForwardState(const HierarchicalReorderingForwardState *prev, const TranslationOption &topt)
: LexicalReorderingState(prev, topt), m_first(false), m_prevRange(topt.GetSourceWordsRange()), m_coverage(prev->m_coverage)
{
const WordsRange currWordsRange = topt.GetSourceWordsRange();
m_coverage.SetValue(currWordsRange.GetStartPos(), currWordsRange.GetEndPos(), true);
}
int HierarchicalReorderingForwardState::Compare(const FFState& o) const
{
if (&o == this)
return 0;
const HierarchicalReorderingForwardState* other = static_cast<const HierarchicalReorderingForwardState*>(&o);
if (m_prevRange == other->m_prevRange) {
return ComparePrevScores(other->m_prevOption);
} else if (m_prevRange < other->m_prevRange) {
return -1;
int HierarchicalReorderingBackwardState::Compare(const FFState& o) const
{
const HierarchicalReorderingBackwardState& other = static_cast<const HierarchicalReorderingBackwardState&>(o);
return m_reoStack.Compare(other.m_reoStack);
}
return 1;
}
// For compatibility with the phrase-based reordering model, scoring is one step delayed.
// The forward model takes determines orientations heuristically as follows:
// mono: if the next phrase comes after the conditioning phrase and
// - there is a gap to the right of the conditioning phrase, or
// - the next phrase immediately follows it
// swap: if the next phrase goes before the conditioning phrase and
// - there is a gap to the left of the conditioning phrase, or
// - the next phrase immediately precedes it
// dright: if the next phrase follows the conditioning phrase and other stuff comes in between
// dleft: if the next phrase precedes the conditioning phrase and other stuff comes in between
LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const TranslationOption& topt, const InputType& input,ScoreComponentCollection* scores) const
{
LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const TranslationOption& topt, const InputType& input,ScoreComponentCollection* scores) const
{
const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType();
const WordsRange currWordsRange = topt.GetSourceWordsRange();
// keep track of the current coverage ourselves so we don't need the hypothesis
WordsBitmap coverage = m_coverage;
coverage.SetValue(currWordsRange.GetStartPos(), currWordsRange.GetEndPos(), true);
HierarchicalReorderingBackwardState* nextState = new HierarchicalReorderingBackwardState(this, topt, m_reoStack);
ReorderingType reoType;
const LexReoConf::ModelType modelType = m_configuration.GetModelType();
ReorderingType reoType;
int reoDistance = nextState->m_reoStack.ShiftReduce(topt.GetSourceWordsRange());
if (m_first) {
} else {
if (modelType == LexicalReorderingConfiguration::MSD) {
reoType = GetOrientationTypeMSD(currWordsRange, coverage);
} else if (modelType == LexicalReorderingConfiguration::MSLR) {
reoType = GetOrientationTypeMSLR(currWordsRange, coverage);
} else if (modelType == LexicalReorderingConfiguration::Monotonic) {
reoType = GetOrientationTypeMonotonic(currWordsRange, coverage);
if (modelType == LexReoConf::MSD) {
reoType = GetOrientationTypeMSD(reoDistance);
} else if (modelType == LexReoConf::MSLR) {
reoType = GetOrientationTypeMSLR(reoDistance);
} else if (modelType == LexReoConf::LeftRight) {
reoType = GetOrientationTypeLeftRight(reoDistance);
} else {
reoType = GetOrientationTypeLeftRight(currWordsRange, coverage);
reoType = GetOrientationTypeMonotonic(reoDistance);
}
CopyScores(scores, topt, input, reoType);
return nextState;
}
return new HierarchicalReorderingForwardState(this, topt);
}
LexicalReorderingState::ReorderingType HierarchicalReorderingForwardState::GetOrientationTypeMSD(WordsRange currRange, WordsBitmap coverage) const
{
if (currRange.GetStartPos() > m_prevRange.GetEndPos() &&
(!coverage.GetValue(m_prevRange.GetEndPos()+1) || currRange.GetStartPos() == m_prevRange.GetEndPos()+1)) {
return M;
} else if (currRange.GetEndPos() < m_prevRange.GetStartPos() &&
(!coverage.GetValue(m_prevRange.GetStartPos()-1) || currRange.GetEndPos() == m_prevRange.GetStartPos()-1)) {
return S;
LexicalReorderingState::ReorderingType HierarchicalReorderingBackwardState::GetOrientationTypeMSD(int reoDistance) const
{
if (reoDistance == 1) {
return M;
} else if (reoDistance == -1) {
return S;
}
return D;
}
return D;
}
LexicalReorderingState::ReorderingType HierarchicalReorderingForwardState::GetOrientationTypeMSLR(WordsRange currRange, WordsBitmap coverage) const
{
if (currRange.GetStartPos() > m_prevRange.GetEndPos() &&
(!coverage.GetValue(m_prevRange.GetEndPos()+1) || currRange.GetStartPos() == m_prevRange.GetEndPos()+1)) {
return M;
} else if (currRange.GetEndPos() < m_prevRange.GetStartPos() &&
(!coverage.GetValue(m_prevRange.GetStartPos()-1) || currRange.GetEndPos() == m_prevRange.GetStartPos()-1)) {
return S;
} else if (currRange.GetStartPos() > m_prevRange.GetEndPos()) {
return DR;
LexicalReorderingState::ReorderingType HierarchicalReorderingBackwardState::GetOrientationTypeMSLR(int reoDistance) const
{
if (reoDistance == 1) {
return M;
} else if (reoDistance == -1) {
return S;
} else if (reoDistance > 1) {
return DR;
}
return DL;
}
return DL;
}
LexicalReorderingState::ReorderingType HierarchicalReorderingForwardState::GetOrientationTypeMonotonic(WordsRange currRange, WordsBitmap coverage) const
{
if (currRange.GetStartPos() > m_prevRange.GetEndPos() &&
(!coverage.GetValue(m_prevRange.GetEndPos()+1) || currRange.GetStartPos() == m_prevRange.GetEndPos()+1)) {
return M;
LexicalReorderingState::ReorderingType HierarchicalReorderingBackwardState::GetOrientationTypeMonotonic(int reoDistance) const
{
if (reoDistance == 1) {
return M;
}
return NM;
}
return NM;
}
LexicalReorderingState::ReorderingType HierarchicalReorderingForwardState::GetOrientationTypeLeftRight(WordsRange currRange, WordsBitmap /* coverage */) const
{
if (currRange.GetStartPos() > m_prevRange.GetEndPos()) {
return R;
LexicalReorderingState::ReorderingType HierarchicalReorderingBackwardState::GetOrientationTypeLeftRight(int reoDistance) const
{
if (reoDistance >= 1) {
return R;
}
return L;
}
///////////////////////////
//HierarchicalReorderingForwardState
HierarchicalReorderingForwardState::
HierarchicalReorderingForwardState(const LexReoConf &config, size_t size, size_t offset)
: LexicalReorderingState(config, LexReoConf::Forward, offset), m_first(true), m_prevRange(NOT_FOUND,NOT_FOUND), m_coverage(size) {}
HierarchicalReorderingForwardState::HierarchicalReorderingForwardState(const HierarchicalReorderingForwardState *prev, const TranslationOption &topt)
: LexicalReorderingState(prev, topt), m_first(false), m_prevRange(topt.GetSourceWordsRange()), m_coverage(prev->m_coverage)
{
const WordsRange currWordsRange = topt.GetSourceWordsRange();
m_coverage.SetValue(currWordsRange.GetStartPos(), currWordsRange.GetEndPos(), true);
}
int HierarchicalReorderingForwardState::Compare(const FFState& o) const
{
if (&o == this)
return 0;
const HierarchicalReorderingForwardState* other = static_cast<const HierarchicalReorderingForwardState*>(&o);
if (m_prevRange == other->m_prevRange) {
return ComparePrevScores(other->m_prevOption);
} else if (m_prevRange < other->m_prevRange) {
return -1;
}
return 1;
}
// For compatibility with the phrase-based reordering model, scoring is one step delayed.
// The forward model takes determines orientations heuristically as follows:
// mono: if the next phrase comes after the conditioning phrase and
// - there is a gap to the right of the conditioning phrase, or
// - the next phrase immediately follows it
// swap: if the next phrase goes before the conditioning phrase and
// - there is a gap to the left of the conditioning phrase, or
// - the next phrase immediately precedes it
// dright: if the next phrase follows the cond. phr.
// and other stuff comes in between
// dleft: if the next phrase precedes the conditioning phrase
// and other stuff comes in between
LexicalReorderingState*
HierarchicalReorderingForwardState::
Expand(const TranslationOption& topt, const InputType& input,
ScoreComponentCollection* scores) const
{
LexReoConf::ModelType const modelType = m_configuration.GetModelType();
WordsRange const& currRange = topt.GetSourceWordsRange();
// keep track of the current cov. ourselves so we don't need the hypothesis
WordsBitmap cov = m_coverage;
cov.SetValue(currRange.GetStartPos(), currRange.GetEndPos(), true);
if (!m_first)
{
ReorderingType reoType
= ((modelType == LexReoConf::MSD)
? GetOrientationTypeMSD(currWordsRange, coverage)
: (modelType == LexReoConf::MSLR)
? GetOrientationTypeMSLR(currWordsRange, coverage)
: (modelType == LexReoConf::Monotonic)
? GetOrientationTypeMonotonic(currWordsRange, coverage);
: GetOrientationTypeLeftRight(currWordsRange, coverage));
CopyScores(scores, topt, input, reoType);
}
return new HierarchicalReorderingForwardState(this, topt);
}
LexicalReorderingState::ReorderingType
HierarchicalReorderingForwardState::
GetOrientationTypeMSD(WordsRange currRange, WordsBitmap coverage) const
{
return (IsMonotonicStep(m_prevRange,currRange,coverage) ? M
: IsSwap(m_prevRange, currRange, coverage) ? S : D);
}
LexicalReorderingState::ReorderingType
HierarchicalReorderingForwardState::
GetOrientationTypeMSLR(WordsRange currRange, WordsBitmap coverage) const
{
return (IsMonotonicStep(m_prevRange,currRange,coverage) ? M
: IsSwap(m_prevRange, currRange, coverage) ? S
: (currRange.GetStartPos() > m_prevRange.GetEndPos()) ? DR : DL);
}
LexicalReorderingState::ReorderingType
HierarchicalReorderingForwardState::
GetOrientationTypeMonotonic(WordsRange currRange, WordsBitmap coverage) const
{
return IsMonotonicStep(m_prevRange, currRange, coverage) ? M : NM;
}
LexicalReorderingState::ReorderingType
HierarchicalReorderingForwardState::
GetOrientationTypeLeftRight(WordsRange currRange, WordsBitmap coverage) const
{
return currRange.GetStartPos() > m_prevRange.GetEndPos() ? R : L;
}
return L;
}
}

View File

@ -94,6 +94,9 @@ private:
//! Abstract class for lexical reordering model states
class LexicalReorderingState : public FFState
{
typedef LexicalReorderingConfiguration::Forward Forward;
typedef LexicalReorderingConfiguration::Backward Backward;
public:
virtual int Compare(const FFState& o) const = 0;
virtual LexicalReorderingState* Expand(const TranslationOption& hypo, const InputType& input, ScoreComponentCollection* scores) const = 0;

View File

@ -418,7 +418,8 @@ namespace Moses
if(d == InvalidOffT) d = fTell(ot);
else
{
TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '" << line << "\n");
TRACE_ERR("ERROR: source phrase already inserted (A)!\nline("
<< lnc << "): '" << line << "\n");
return false;
}
}
@ -448,7 +449,8 @@ namespace Moses
if(d == InvalidOffT) d = fTell(ot);
else
{
TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" << lnc << "): '" << line << "\n");
TRACE_ERR("ERROR: source phrase already inserted (A)!\nline("
<< lnc << "): '" << line << "\n");
return false;
}
}
@ -602,7 +604,8 @@ namespace Moses
auxCacheForSrcPhrase(f);
}
}
std::cerr << "Cached " << m_Cache.size() - prev_cache_size << " new primary reordering table keys\n";
std::cerr << "Cached " << m_Cache.size() - prev_cache_size
<< " new primary reordering table keys\n";
}
}

View File

@ -1,5 +1,6 @@
// $Id$
// -*- c++ -*-
// vim:tabstop=2
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
@ -24,151 +25,155 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
LexicalReorderingTableCompact::LexicalReorderingTableCompact(
const std::string& filePath,
const std::vector<FactorType>& f_factors,
const std::vector<FactorType>& e_factors,
const std::vector<FactorType>& c_factors)
: LexicalReorderingTable(f_factors, e_factors, c_factors),
m_inMemory(StaticData::Instance().UseMinlexrInMemory()),
m_numScoreComponent(6), m_multipleScoreTrees(true),
m_hash(10, 16), m_scoreTrees(1)
{
Load(filePath);
}
LexicalReorderingTableCompact::
LexicalReorderingTableCompact(const std::string& filePath,
const std::vector<FactorType>& f_factors,
const std::vector<FactorType>& e_factors,
const std::vector<FactorType>& c_factors)
: LexicalReorderingTable(f_factors, e_factors, c_factors)
, m_inMemory(StaticData::Instance().UseMinlexrInMemory())
, m_numScoreComponent(6)
, m_multipleScoreTrees(true)
, m_hash(10, 16)
, m_scoreTrees(1)
{
Load(filePath);
}
LexicalReorderingTableCompact::LexicalReorderingTableCompact(
const std::vector<FactorType>& f_factors,
const std::vector<FactorType>& e_factors,
const std::vector<FactorType>& c_factors)
: LexicalReorderingTable(f_factors, e_factors, c_factors),
m_inMemory(StaticData::Instance().UseMinlexrInMemory()),
m_numScoreComponent(6), m_multipleScoreTrees(true),
m_hash(10, 16), m_scoreTrees(1)
{ }
LexicalReorderingTableCompact::
LexicalReorderingTableCompact(const std::vector<FactorType>& f_factors,
const std::vector<FactorType>& e_factors,
const std::vector<FactorType>& c_factors)
: LexicalReorderingTable(f_factors, e_factors, c_factors)
, m_inMemory(StaticData::Instance().UseMinlexrInMemory())
, m_numScoreComponent(6)
, m_multipleScoreTrees(true)
, m_hash(10, 16)
, m_scoreTrees(1)
{ }
LexicalReorderingTableCompact::~LexicalReorderingTableCompact()
{
for(size_t i = 0; i < m_scoreTrees.size(); i++)
delete m_scoreTrees[i];
}
LexicalReorderingTableCompact::
~LexicalReorderingTableCompact()
{
for(size_t i = 0; i < m_scoreTrees.size(); i++)
delete m_scoreTrees[i];
}
std::vector<float> LexicalReorderingTableCompact::GetScore(const Phrase& f,
const Phrase& e,
const Phrase& c)
{
std::string key;
Scores scores;
std::vector<float>
LexicalReorderingTableCompact::
GetScore(const Phrase& f, const Phrase& e, const Phrase& c)
{
std::string key;
Scores scores;
if(0 == c.GetSize())
key = MakeKey(f, e, c);
else
for(size_t i = 0; i <= c.GetSize(); ++i) {
Phrase sub_c(c.GetSubString(WordsRange(i,c.GetSize()-1)));
key = MakeKey(f,e,sub_c);
}
size_t index = m_hash[key];
if(m_hash.GetSize() != index) {
std::string scoresString;
if(m_inMemory)
scoresString = m_scoresMemory[index];
if(0 == c.GetSize())
key = MakeKey(f, e, c);
else
scoresString = m_scoresMapped[index];
for(size_t i = 0; i <= c.GetSize(); ++i)
{
Phrase sub_c(c.GetSubString(WordsRange(i,c.GetSize()-1)));
key = MakeKey(f,e,sub_c);
}
BitWrapper<> bitStream(scoresString);
for(size_t i = 0; i < m_numScoreComponent; i++)
scores.push_back(m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream));
size_t index = m_hash[key];
if(m_hash.GetSize() != index)
{
std::string scoresString;
if(m_inMemory)
scoresString = m_scoresMemory[index];
else
scoresString = m_scoresMapped[index];
return scores;
BitWrapper<> bitStream(scoresString);
for(size_t i = 0; i < m_numScoreComponent; i++)
scores.push_back(m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream));
return scores;
}
return Scores();
}
return Scores();
}
std::string LexicalReorderingTableCompact::MakeKey(const Phrase& f,
const Phrase& e,
const Phrase& c) const
{
return MakeKey(Trim(f.GetStringRep(m_FactorsF)),
Trim(e.GetStringRep(m_FactorsE)),
Trim(c.GetStringRep(m_FactorsC)));
}
std::string LexicalReorderingTableCompact::MakeKey(const std::string& f,
const std::string& e,
const std::string& c) const
{
std::string key;
if(!f.empty()) {
key += f;
std::string
LexicalReorderingTableCompact::
MakeKey(const Phrase& f,
const Phrase& e,
const Phrase& c) const
{
return MakeKey(Trim(f.GetStringRep(m_FactorsF)),
Trim(e.GetStringRep(m_FactorsE)),
Trim(c.GetStringRep(m_FactorsC)));
}
if(!m_FactorsE.empty()) {
if(!key.empty()) {
key += " ||| ";
}
key += e;
}
if(!m_FactorsC.empty()) {
if(!key.empty()) {
key += " ||| ";
}
key += c;
}
key += " ||| ";
return key;
}
LexicalReorderingTable* LexicalReorderingTableCompact::CheckAndLoad(
const std::string& filePath,
const std::vector<FactorType>& f_factors,
const std::vector<FactorType>& e_factors,
const std::vector<FactorType>& c_factors)
{
std::string
LexicalReorderingTableCompact::
MakeKey(const std::string& f,
const std::string& e,
const std::string& c) const
{
std::string key;
if(!f.empty()) key += f;
if(!m_FactorsE.empty()) { if(!key.empty()) key += " ||| "; key += e; }
if(!m_FactorsC.empty()) { if(!key.empty()) key += " ||| "; key += c; }
key += " ||| ";
return key;
}
LexicalReorderingTable*
LexicalReorderingTableCompact::
CheckAndLoad
(const std::string& filePath,
const std::vector<FactorType>& f_factors,
const std::vector<FactorType>& e_factors,
const std::vector<FactorType>& c_factors)
{
#ifdef HAVE_CMPH
std::string minlexr = ".minlexr";
// file name is specified without suffix
if(FileExists(filePath + minlexr)) {
//there exists a compact binary version use that
VERBOSE(2,"Using compact lexical reordering table" << std::endl);
return new LexicalReorderingTableCompact(filePath + minlexr, f_factors, e_factors, c_factors);
}
// file name is specified with suffix
if(filePath.substr(filePath.length() - minlexr.length(), minlexr.length()) == minlexr
&& FileExists(filePath)) {
//there exists a compact binary version use that
VERBOSE(2,"Using compact lexical reordering table" << std::endl);
return new LexicalReorderingTableCompact(filePath, f_factors, e_factors, c_factors);
}
std::string minlexr = ".minlexr";
// file name is specified without suffix
if(FileExists(filePath + minlexr)) {
//there exists a compact binary version use that
VERBOSE(2,"Using compact lexical reordering table" << std::endl);
return new LexicalReorderingTableCompact(filePath + minlexr, f_factors, e_factors, c_factors);
}
// file name is specified with suffix
if(filePath.substr(filePath.length() - minlexr.length(), minlexr.length()) == minlexr
&& FileExists(filePath)) {
//there exists a compact binary version use that
VERBOSE(2,"Using compact lexical reordering table" << std::endl);
return new LexicalReorderingTableCompact(filePath, f_factors, e_factors, c_factors);
}
#endif
return 0;
}
void LexicalReorderingTableCompact::Load(std::string filePath)
{
std::FILE* pFile = std::fopen(filePath.c_str(), "r");
if(m_inMemory)
m_hash.Load(pFile);
else
m_hash.LoadIndex(pFile);
size_t read = 0;
read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, pFile);
read += std::fread(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1, pFile);
if(m_multipleScoreTrees) {
m_scoreTrees.resize(m_numScoreComponent);
for(size_t i = 0; i < m_numScoreComponent; i++)
m_scoreTrees[i] = new CanonicalHuffman<float>(pFile);
} else {
m_scoreTrees.resize(1);
m_scoreTrees[0] = new CanonicalHuffman<float>(pFile);
return 0;
}
if(m_inMemory)
m_scoresMemory.load(pFile, false);
else
m_scoresMapped.load(pFile, true);
}
void
LexicalReorderingTableCompact::
Load(std::string filePath)
{
std::FILE* pFile = std::fopen(filePath.c_str(), "r");
if(m_inMemory)
m_hash.Load(pFile);
else
m_hash.LoadIndex(pFile);
size_t read = 0;
read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, pFile);
read += std::fread(&m_multipleScoreTrees,
sizeof(m_multipleScoreTrees), 1, pFile);
if(m_multipleScoreTrees) {
m_scoreTrees.resize(m_numScoreComponent);
for(size_t i = 0; i < m_numScoreComponent; i++)
m_scoreTrees[i] = new CanonicalHuffman<float>(pFile);
} else {
m_scoreTrees.resize(1);
m_scoreTrees[0] = new CanonicalHuffman<float>(pFile);
}
if(m_inMemory)
m_scoresMemory.load(pFile, false);
else
m_scoresMapped.load(pFile, true);
}
}

View File

@ -36,49 +36,53 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
class LexicalReorderingTableCompact: public LexicalReorderingTable
{
private:
bool m_inMemory;
class LexicalReorderingTableCompact:
public LexicalReorderingTable
{
private:
bool m_inMemory;
size_t m_numScoreComponent;
bool m_multipleScoreTrees;
size_t m_numScoreComponent;
bool m_multipleScoreTrees;
BlockHashIndex m_hash;
BlockHashIndex m_hash;
typedef CanonicalHuffman<float> ScoreTree;
std::vector<ScoreTree*> m_scoreTrees;
typedef CanonicalHuffman<float> ScoreTree;
std::vector<ScoreTree*> m_scoreTrees;
StringVector<unsigned char, unsigned long, MmapAllocator> m_scoresMapped;
StringVector<unsigned char, unsigned long, std::allocator> m_scoresMemory;
StringVector<unsigned char, unsigned long, MmapAllocator> m_scoresMapped;
StringVector<unsigned char, unsigned long, std::allocator> m_scoresMemory;
std::string MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const;
std::string MakeKey(const std::string& f, const std::string& e, const std::string& c) const;
std::string MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const;
std::string MakeKey(const std::string& f, const std::string& e, const std::string& c) const;
public:
LexicalReorderingTableCompact(
const std::string& filePath,
const std::vector<FactorType>& f_factors,
const std::vector<FactorType>& e_factors,
const std::vector<FactorType>& c_factors);
public:
LexicalReorderingTableCompact(const std::string& filePath,
const std::vector<FactorType>& f_factors,
const std::vector<FactorType>& e_factors,
const std::vector<FactorType>& c_factors);
LexicalReorderingTableCompact(
const std::vector<FactorType>& f_factors,
const std::vector<FactorType>& e_factors,
const std::vector<FactorType>& c_factors);
LexicalReorderingTableCompact(const std::vector<FactorType>& f_factors,
const std::vector<FactorType>& e_factors,
const std::vector<FactorType>& c_factors);
virtual ~LexicalReorderingTableCompact();
virtual
~LexicalReorderingTableCompact();
virtual std::vector<float> GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
virtual
std::vector<float>
GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
static LexicalReorderingTable* CheckAndLoad(
const std::string& filePath,
const std::vector<FactorType>& f_factors,
const std::vector<FactorType>& e_factors,
const std::vector<FactorType>& c_factors);
static
LexicalReorderingTable*
CheckAndLoad(const std::string& filePath,
const std::vector<FactorType>& f_factors,
const std::vector<FactorType>& e_factors,
const std::vector<FactorType>& c_factors);
void Load(std::string filePath);
};
void
Load(std::string filePath);
};
}