mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-01 08:21:47 +03:00
Cleaned up lescial reordering scoring, and sent vectors as references instead of copying them. Fixed bugs in extract: it used to choose the wrong orientation at end of sentences, and the hierarchical model typ is no longer dependent on the phrase-based model type.
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/hierarchical-reo@2892 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
1b1f786373
commit
c65945b531
@ -102,8 +102,8 @@ class LexicalReorderingState : public FFState {
|
||||
static const ReorderingType D = 2; // discontinuous
|
||||
static const ReorderingType DL = 2; // discontinuous, left
|
||||
static const ReorderingType DR = 3; // discontinuous, right
|
||||
static const ReorderingType R = 0; // left
|
||||
static const ReorderingType L = 1; // right
|
||||
static const ReorderingType R = 0; // right
|
||||
static const ReorderingType L = 1; // left
|
||||
};
|
||||
|
||||
class BidirectionalReorderingState : public LexicalReorderingState {
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <cstdlib>
|
||||
#include <numeric>
|
||||
#include <cstdio>
|
||||
//#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include "zlib.h"
|
||||
@ -51,31 +50,31 @@ void ModelScore::reset_f() {
|
||||
}
|
||||
}
|
||||
|
||||
void ModelScore::add_example(const std::string& previous, std::string& next) {
|
||||
void ModelScore::add_example(const string& previous, string& next) {
|
||||
count_fe_prev[getType(previous)]++;
|
||||
count_f_prev[getType(previous)]++;
|
||||
count_fe_next[getType(next)]++;
|
||||
count_f_next[getType(next)]++;
|
||||
}
|
||||
|
||||
const std::vector<double>& ModelScore::get_scores_fe_prev() const {
|
||||
const vector<double>& ModelScore::get_scores_fe_prev() const {
|
||||
return count_fe_prev;
|
||||
}
|
||||
|
||||
const std::vector<double>& ModelScore::get_scores_fe_next() const {
|
||||
const vector<double>& ModelScore::get_scores_fe_next() const {
|
||||
return count_fe_next;
|
||||
}
|
||||
|
||||
const std::vector<double>& ModelScore::get_scores_f_prev() const {
|
||||
const vector<double>& ModelScore::get_scores_f_prev() const {
|
||||
return count_f_prev;
|
||||
}
|
||||
|
||||
const std::vector<double>& ModelScore::get_scores_f_next() const {
|
||||
const vector<double>& ModelScore::get_scores_f_next() const {
|
||||
return count_f_next;
|
||||
}
|
||||
|
||||
|
||||
ORIENTATION ModelScore::getType(const std::string& s) {
|
||||
ORIENTATION ModelScore::getType(const string& s) {
|
||||
if (s.compare("mono") == 0) {
|
||||
return MONO;
|
||||
} else if (s.compare("swap") == 0) {
|
||||
@ -95,7 +94,7 @@ ORIENTATION ModelScore::getType(const std::string& s) {
|
||||
}
|
||||
|
||||
|
||||
ORIENTATION ModelScoreMSLR::getType(const std::string& s) {
|
||||
ORIENTATION ModelScoreMSLR::getType(const string& s) {
|
||||
if (s.compare("mono") == 0) {
|
||||
return MONO;
|
||||
} else if (s.compare("swap") == 0) {
|
||||
@ -114,7 +113,7 @@ ORIENTATION ModelScoreMSLR::getType(const std::string& s) {
|
||||
}
|
||||
|
||||
|
||||
ORIENTATION ModelScoreLR::getType(const std::string& s) {
|
||||
ORIENTATION ModelScoreLR::getType(const string& s) {
|
||||
if (s.compare("mono") == 0 || s.compare("dright") == 0) {
|
||||
return DRIGHT;
|
||||
} else if (s.compare("swap") == 0 || s.compare("dleft") == 0) {
|
||||
@ -129,7 +128,7 @@ ORIENTATION ModelScoreLR::getType(const std::string& s) {
|
||||
}
|
||||
|
||||
|
||||
ORIENTATION ModelScoreMSD::getType(const std::string& s) {
|
||||
ORIENTATION ModelScoreMSD::getType(const string& s) {
|
||||
if (s.compare("mono") == 0) {
|
||||
return MONO;
|
||||
} else if (s.compare("swap") == 0) {
|
||||
@ -147,7 +146,7 @@ ORIENTATION ModelScoreMSD::getType(const std::string& s) {
|
||||
}
|
||||
}
|
||||
|
||||
ORIENTATION ModelScoreMonotonicity::getType(const std::string& s) {
|
||||
ORIENTATION ModelScoreMonotonicity::getType(const string& s) {
|
||||
if (s.compare("mono") == 0) {
|
||||
return MONO;
|
||||
} else if (s.compare("swap") == 0 ||
|
||||
@ -163,152 +162,127 @@ ORIENTATION ModelScoreMonotonicity::getType(const std::string& s) {
|
||||
}
|
||||
|
||||
|
||||
std::vector<double> ScorerMSLR::createSmoothing(std::vector<double> scores, double weight) const {
|
||||
double total = accumulate(scores.begin(), scores.end(), 0);
|
||||
vector<double> res;
|
||||
res.push_back(weight*(scores[MONO]+0.1)/total);
|
||||
res.push_back(weight*(scores[SWAP]+0.1)/total);
|
||||
res.push_back(weight*(scores[DRIGHT]+0.1)/total);
|
||||
res.push_back(weight*(scores[DLEFT]+0.1)/total);
|
||||
return res;
|
||||
|
||||
void ScorerMSLR::score(const vector<double>& all_scores, vector<double>& scores) const {
|
||||
scores.push_back(all_scores[MONO]);
|
||||
scores.push_back(all_scores[SWAP]);
|
||||
scores.push_back(all_scores[DLEFT]);
|
||||
scores.push_back(all_scores[DRIGHT]);
|
||||
}
|
||||
|
||||
std::vector<double> ScorerMSLR::createConstSmoothing(double weight) const {
|
||||
vector<double> smoothing;
|
||||
void ScorerMSD::score(const vector<double>& all_scores, vector<double>& scores) const {
|
||||
scores.push_back(all_scores[MONO]);
|
||||
scores.push_back(all_scores[SWAP]);
|
||||
scores.push_back(all_scores[DRIGHT]+all_scores[DLEFT]+all_scores[OTHER]);
|
||||
}
|
||||
|
||||
void ScorerMonotonicity::score(const vector<double>& all_scores, vector<double>& scores) const {
|
||||
scores.push_back(all_scores[MONO]);
|
||||
scores.push_back(all_scores[SWAP]+all_scores[DRIGHT]+all_scores[DLEFT]+all_scores[OTHER]+all_scores[NOMONO]);
|
||||
}
|
||||
|
||||
|
||||
void ScorerLR::score(const vector<double>& all_scores, vector<double>& scores) const {
|
||||
scores.push_back(all_scores[MONO]+all_scores[DRIGHT]);
|
||||
scores.push_back(all_scores[SWAP]+all_scores[DLEFT]);
|
||||
}
|
||||
|
||||
|
||||
void ScorerMSLR::createSmoothing(const vector<double>& scores, double weight, vector<double>& smoothing) const {
|
||||
double total = accumulate(scores.begin(), scores.end(), 0);
|
||||
smoothing.push_back(weight*(scores[MONO]+0.1)/total);
|
||||
smoothing.push_back(weight*(scores[SWAP]+0.1)/total);
|
||||
smoothing.push_back(weight*(scores[DLEFT]+0.1)/total);
|
||||
smoothing.push_back(weight*(scores[DRIGHT]+0.1)/total);
|
||||
}
|
||||
|
||||
void ScorerMSLR::createConstSmoothing(double weight, vector<double>& smoothing) const {
|
||||
for (int i=1; i<=4; ++i) {
|
||||
smoothing.push_back(weight);
|
||||
}
|
||||
return smoothing;
|
||||
}
|
||||
|
||||
|
||||
std::vector<double> ScorerMSD::createSmoothing(std::vector<double> scores, double weight) const {
|
||||
void ScorerMSD::createSmoothing(const vector<double>& scores, double weight, vector<double>& smoothing) const {
|
||||
double total = accumulate(scores.begin(), scores.end(), 0);
|
||||
vector<double> res;
|
||||
res.push_back(weight*(scores[MONO]+0.1)/total);
|
||||
res.push_back(weight*(scores[SWAP]+0.1)/total);
|
||||
res.push_back(weight*(scores[DLEFT]+scores[DRIGHT]+scores[OTHER]+0.1)/total);
|
||||
return res;
|
||||
smoothing.push_back(weight*(scores[MONO]+0.1)/total);
|
||||
smoothing.push_back(weight*(scores[SWAP]+0.1)/total);
|
||||
smoothing.push_back(weight*(scores[DLEFT]+scores[DRIGHT]+scores[OTHER]+0.1)/total);
|
||||
}
|
||||
|
||||
std::vector<double> ScorerMSD::createConstSmoothing(double weight) const {
|
||||
vector<double> smoothing;
|
||||
void ScorerMSD::createConstSmoothing(double weight, vector<double>& smoothing) const {
|
||||
for (int i=1; i<=3; ++i) {
|
||||
smoothing.push_back(weight);
|
||||
}
|
||||
return smoothing;
|
||||
}
|
||||
|
||||
std::vector<double> ScorerMonotonicity::createSmoothing(std::vector<double> scores, double weight) const {
|
||||
void ScorerMonotonicity::createSmoothing(const vector<double>& scores, double weight, vector<double>& smoothing) const {
|
||||
double total = accumulate(scores.begin(), scores.end(), 0);
|
||||
vector<double> res;
|
||||
res.push_back(weight*(scores[MONO]+0.1)/total);
|
||||
res.push_back(weight*(scores[SWAP]+scores[DLEFT]+scores[DRIGHT]+scores[OTHER]+scores[NOMONO]+0.1)/total);
|
||||
return res;
|
||||
smoothing.push_back(weight*(scores[MONO]+0.1)/total);
|
||||
smoothing.push_back(weight*(scores[SWAP]+scores[DLEFT]+scores[DRIGHT]+scores[OTHER]+scores[NOMONO]+0.1)/total);
|
||||
}
|
||||
|
||||
std::vector<double> ScorerMonotonicity::createConstSmoothing(double weight) const {
|
||||
vector<double> smoothing;
|
||||
void ScorerMonotonicity::createConstSmoothing(double weight, vector<double>& smoothing) const {
|
||||
for (double i=1; i<=2; ++i) {
|
||||
smoothing.push_back(weight);
|
||||
}
|
||||
return smoothing;
|
||||
}
|
||||
|
||||
|
||||
std::vector<double> ScorerLR::createSmoothing(std::vector<double> scores, double weight) const {
|
||||
void ScorerLR::createSmoothing(const vector<double>& scores, double weight, vector<double>& smoothing) const {
|
||||
double total = accumulate(scores.begin(), scores.end(), 0);
|
||||
vector<double> res;
|
||||
res.push_back(weight*(scores[MONO]+scores[DRIGHT]+0.1)/total);
|
||||
res.push_back(weight*(scores[SWAP]+scores[DLEFT])/total);
|
||||
return res;
|
||||
smoothing.push_back(weight*(scores[MONO]+scores[DRIGHT]+0.1)/total);
|
||||
smoothing.push_back(weight*(scores[SWAP]+scores[DLEFT])/total);
|
||||
}
|
||||
|
||||
std::vector<double> ScorerLR::createConstSmoothing(double weight) const {
|
||||
vector<double> smoothing;
|
||||
void ScorerLR::createConstSmoothing(double weight, vector<double>& smoothing) const {
|
||||
for (int i=1; i<=2; ++i) {
|
||||
smoothing.push_back(weight);
|
||||
}
|
||||
return smoothing;
|
||||
}
|
||||
|
||||
std::vector<double> ScorerMSLR::score(vector<double> all_scores) const {
|
||||
vector<double> s;
|
||||
s.push_back(all_scores[MONO]);
|
||||
s.push_back(all_scores[SWAP]);
|
||||
s.push_back(all_scores[DRIGHT]);
|
||||
s.push_back(all_scores[DLEFT]);
|
||||
return s;
|
||||
}
|
||||
|
||||
std::vector<double> ScorerMSD::score(vector<double> all_scores) const {
|
||||
vector<double> s;
|
||||
s.push_back(all_scores[MONO]);
|
||||
s.push_back(all_scores[SWAP]);
|
||||
s.push_back(all_scores[DRIGHT]+all_scores[DLEFT]+all_scores[OTHER]);
|
||||
return s;
|
||||
}
|
||||
|
||||
std::vector<double> ScorerMonotonicity::score(vector<double> all_scores) const {
|
||||
vector<double> s;
|
||||
s.push_back(all_scores[MONO]);
|
||||
s.push_back(all_scores[SWAP]+all_scores[DRIGHT]+all_scores[DLEFT]+all_scores[OTHER]+all_scores[NOMONO]);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
std::vector<double> ScorerLR::score(vector<double> all_scores) const {
|
||||
vector<double> s;
|
||||
s.push_back(all_scores[MONO]+all_scores[DRIGHT]);
|
||||
s.push_back(all_scores[SWAP]+all_scores[DLEFT]);
|
||||
return s;
|
||||
}
|
||||
|
||||
void Model::score_fe(const string& f, const string& e) {
|
||||
if (!fe) //Make sure we do not do anything if it is not a fe model
|
||||
return;
|
||||
//file >> f >> " " >> e >> " ||| ";
|
||||
fprintf(file,"%s ||| %s ||| ",f.c_str(),e.c_str());
|
||||
//condition on the previous phrase
|
||||
if (previous) {
|
||||
vector<double> scores = scorer->score(modelscore->get_scores_fe_prev());
|
||||
vector<double> scores;
|
||||
scorer->score(modelscore->get_scores_fe_prev(), scores);
|
||||
double sum = 0;
|
||||
for(int i=0; i<scores.size(); ++i) {
|
||||
scores[i] += smoothing_prev[i];
|
||||
sum += scores[i];
|
||||
}
|
||||
for(int i=0; i<scores.size(); ++i) {
|
||||
//file >> scores[i]/sum >> " ";
|
||||
fprintf(file,"%f ",scores[i]/sum);
|
||||
}
|
||||
fprintf(file, "||| ");
|
||||
}
|
||||
//condition on the next phrase
|
||||
if (next) {
|
||||
//file >> "||| ";
|
||||
fprintf(file, "||| ");
|
||||
vector<double> scores = scorer->score(modelscore->get_scores_fe_next());
|
||||
vector<double> scores;
|
||||
scorer->score(modelscore->get_scores_fe_next(), scores);
|
||||
double sum = 0;
|
||||
for(int i=0; i<scores.size(); ++i) {
|
||||
scores[i] += smoothing_next[i];
|
||||
sum += scores[i];
|
||||
}
|
||||
for(int i=0; i<scores.size(); ++i) {
|
||||
//file >> scores[i]/sum >> " ";
|
||||
fprintf(file, "%f ", scores[i]/sum);
|
||||
}
|
||||
}
|
||||
//file >> "\n";
|
||||
fprintf(file,"\n");
|
||||
}
|
||||
|
||||
void Model::score_f(const string& f) {
|
||||
if (fe) //Make sure we do not do anything if it is not a f model
|
||||
return;
|
||||
//file >> f >> " ||| ";
|
||||
fprintf(file, "%s ||| ", f.c_str());
|
||||
//condition on the previous phrase
|
||||
if (previous) {
|
||||
vector<double> scores = scorer->score(modelscore->get_scores_f_prev());
|
||||
vector<double> scores;
|
||||
scorer->score(modelscore->get_scores_f_prev(), scores);
|
||||
double sum = 0;
|
||||
for(int i=0; i<scores.size(); ++i) {
|
||||
scores[i] += smoothing_prev[i];
|
||||
@ -317,23 +291,21 @@ void Model::score_f(const string& f) {
|
||||
for(int i=0; i<scores.size(); ++i) {
|
||||
fprintf(file, "%f ", scores[i]/sum);
|
||||
}
|
||||
fprintf(file, "||| ");
|
||||
}
|
||||
//condition on the next phrase
|
||||
if (next) {
|
||||
//file >> "||| ";
|
||||
fprintf(file, "||| ");
|
||||
vector<double> scores = scorer->score(modelscore->get_scores_f_next());
|
||||
vector<double> scores;
|
||||
scorer->score(modelscore->get_scores_f_next(), scores);
|
||||
double sum = 0;
|
||||
for(int i=0; i<scores.size(); ++i) {
|
||||
scores[i] += smoothing_next[i];
|
||||
sum += scores[i];
|
||||
}
|
||||
for(int i=0; i<scores.size(); ++i) {
|
||||
//file >> scores[i]/sum >> " ";
|
||||
fprintf(file, "%f ", scores[i]/sum);
|
||||
}
|
||||
}
|
||||
//file >> "\n";
|
||||
fprintf(file, "\n");
|
||||
}
|
||||
|
||||
@ -395,7 +367,7 @@ void Model::split_config(const string& config, string& dir, string& lang, string
|
||||
getline(is, lang, '-');
|
||||
}
|
||||
|
||||
Model* Model::createModel(ModelScore* modelscore, const std::string& config, const std::string& filepath) {
|
||||
Model* Model::createModel(ModelScore* modelscore, const string& config, const string& filepath) {
|
||||
string dir, lang, orient, filename;
|
||||
split_config(config,dir,lang,orient);
|
||||
|
||||
@ -415,13 +387,14 @@ Model* Model::createModel(ModelScore* modelscore, const std::string& config, con
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Model::createSmoothing(double w) {
|
||||
smoothing_prev = scorer->createSmoothing(modelscore->get_scores_fe_prev(),w);
|
||||
smoothing_next = scorer->createSmoothing(modelscore->get_scores_fe_prev(),w);
|
||||
scorer->createSmoothing(modelscore->get_scores_fe_prev(), w, smoothing_prev);
|
||||
scorer->createSmoothing(modelscore->get_scores_fe_prev(), w, smoothing_next);
|
||||
}
|
||||
|
||||
void Model::createConstSmoothing(double w) {
|
||||
vector<double> i;
|
||||
smoothing_prev = scorer->createConstSmoothing(w);
|
||||
smoothing_next = scorer->createConstSmoothing(w);
|
||||
scorer->createConstSmoothing(w, smoothing_prev);
|
||||
scorer->createConstSmoothing(w, smoothing_next);
|
||||
}
|
||||
|
@ -65,37 +65,37 @@ class ModelScoreMonotonicity : public ModelScore {
|
||||
class Scorer {
|
||||
public:
|
||||
~Scorer() {}
|
||||
virtual std::vector<double> score(std::vector<double>) const = 0;
|
||||
virtual std::vector<double> createSmoothing(std::vector<double>, double) const = 0;
|
||||
virtual std::vector<double> createConstSmoothing(double) const = 0;
|
||||
virtual void score(const std::vector<double>&, std::vector<double>&) const = 0;
|
||||
virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const = 0;
|
||||
virtual void createConstSmoothing(double, std::vector<double>&) const = 0;
|
||||
};
|
||||
|
||||
class ScorerMSLR : public Scorer {
|
||||
public:
|
||||
virtual std::vector<double> score(std::vector<double>) const;
|
||||
virtual std::vector<double> createSmoothing(std::vector<double>, double) const;
|
||||
virtual std::vector<double> createConstSmoothing(double) const;
|
||||
virtual void score(const std::vector<double>&, std::vector<double>&) const;
|
||||
virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const;
|
||||
virtual void createConstSmoothing(double, std::vector<double>&) const;
|
||||
};
|
||||
|
||||
class ScorerMSD : public Scorer {
|
||||
public:
|
||||
virtual std::vector<double> score(std::vector<double>) const;
|
||||
virtual std::vector<double> createSmoothing(std::vector<double>, double) const;
|
||||
virtual std::vector<double> createConstSmoothing(double) const;
|
||||
virtual void score(const std::vector<double>&, std::vector<double>&) const;
|
||||
virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const;
|
||||
virtual void createConstSmoothing(double, std::vector<double>&) const;
|
||||
};
|
||||
|
||||
class ScorerMonotonicity : public Scorer {
|
||||
public:
|
||||
virtual std::vector<double> score(std::vector<double>) const;
|
||||
virtual std::vector<double> createSmoothing(std::vector<double>, double) const;
|
||||
virtual std::vector<double> createConstSmoothing(double) const;
|
||||
virtual void score(const std::vector<double>&, std::vector<double>&) const;
|
||||
virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const;
|
||||
virtual void createConstSmoothing(double, std::vector<double>&) const;
|
||||
};
|
||||
|
||||
class ScorerLR : public Scorer {
|
||||
public:
|
||||
virtual std::vector<double> score(std::vector<double>) const;
|
||||
virtual std::vector<double> createSmoothing(std::vector<double>, double) const;
|
||||
virtual std::vector<double> createConstSmoothing(double) const;
|
||||
virtual void score(const std::vector<double>&, std::vector<double>&) const;
|
||||
virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const;
|
||||
virtual void createConstSmoothing(double, std::vector<double>&) const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -45,7 +45,7 @@ typedef vector < HPhrase > HPhraseVector;
|
||||
|
||||
// SentenceVertices represents, from all extracted phrases, all vertices that have the same positioning
|
||||
// The key of the map is the English index and the value is a set of the foreign ones
|
||||
typedef map <int, set<int> > HSenteceVertices;
|
||||
typedef map <int, set<int> > HSentenceVertices;
|
||||
|
||||
enum REO_MODEL_TYPE {REO_MSD, REO_MSLR, REO_MONO};
|
||||
enum REO_POS {LEFT, RIGHT, DLEFT, DRIGHT, UNKNOWN};
|
||||
@ -64,18 +64,19 @@ public:
|
||||
REO_POS getOrientWordModel(SentenceAlignment &, REO_MODEL_TYPE,
|
||||
int, int, int, int, int, int, int,
|
||||
bool (*)(int, int), bool (*)(int, int));
|
||||
REO_POS getOrientPhraseModel(REO_MODEL_TYPE,
|
||||
REO_POS getOrientPhraseModel(SentenceAlignment &, REO_MODEL_TYPE,
|
||||
int, int, int, int, int, int, int,
|
||||
bool (*)(int, int), bool (*)(int, int),
|
||||
const HSenteceVertices &, const HSenteceVertices &);
|
||||
REO_POS getOrientHierModel(REO_MODEL_TYPE,
|
||||
const HSentenceVertices &, const HSentenceVertices &);
|
||||
REO_POS getOrientHierModel(SentenceAlignment &, REO_MODEL_TYPE,
|
||||
int, int, int, int, int, int, int,
|
||||
bool (*)(int, int), bool (*)(int, int),
|
||||
const HSenteceVertices &, const HSenteceVertices &,
|
||||
const HSentenceVertices &, const HSentenceVertices &,
|
||||
const HSentenceVertices &, const HSentenceVertices &,
|
||||
REO_POS);
|
||||
|
||||
void insertVertex(HSenteceVertices &, int, int);
|
||||
void insertPhraseVertices(HSenteceVertices &, HSenteceVertices &, HSenteceVertices &, HSenteceVertices &,
|
||||
void insertVertex(HSentenceVertices &, int, int);
|
||||
void insertPhraseVertices(HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, HSentenceVertices &,
|
||||
int, int, int, int);
|
||||
string getOrientString(REO_POS, REO_MODEL_TYPE);
|
||||
|
||||
@ -266,17 +267,17 @@ void extract(SentenceAlignment &sentence) {
|
||||
|
||||
HPhraseVector inboundPhrases;
|
||||
|
||||
HSenteceVertices inTopLeft;
|
||||
HSenteceVertices inTopRight;
|
||||
HSenteceVertices inBottomLeft;
|
||||
HSenteceVertices inBottomRight;
|
||||
HSentenceVertices inTopLeft;
|
||||
HSentenceVertices inTopRight;
|
||||
HSentenceVertices inBottomLeft;
|
||||
HSentenceVertices inBottomRight;
|
||||
|
||||
HSenteceVertices outTopLeft;
|
||||
HSenteceVertices outTopRight;
|
||||
HSenteceVertices outBottomLeft;
|
||||
HSenteceVertices outBottomRight;
|
||||
HSentenceVertices outTopLeft;
|
||||
HSentenceVertices outTopRight;
|
||||
HSentenceVertices outBottomLeft;
|
||||
HSentenceVertices outBottomRight;
|
||||
|
||||
HSenteceVertices::const_iterator it;
|
||||
HSentenceVertices::const_iterator it;
|
||||
|
||||
bool relaxLimit = hierModel;
|
||||
bool buildExtraStructure = phraseModel || hierModel;
|
||||
@ -327,12 +328,8 @@ void extract(SentenceAlignment &sentence) {
|
||||
endF++){ // at this point we have extracted a phrase
|
||||
if(buildExtraStructure){ // phrase || hier
|
||||
if(endE-startE < maxPhraseLength && endF-startF < maxPhraseLength){ // within limit
|
||||
inboundPhrases.push_back(
|
||||
HPhrase(
|
||||
HPhraseVertex(startF,startE),
|
||||
HPhraseVertex(endF,endE)
|
||||
)
|
||||
);
|
||||
inboundPhrases.push_back(HPhrase(HPhraseVertex(startF,startE),
|
||||
HPhraseVertex(endF,endE)));
|
||||
insertPhraseVertices(inTopLeft, inTopRight, inBottomLeft, inBottomRight,
|
||||
startF, startE, endF, endE);
|
||||
}
|
||||
@ -377,16 +374,21 @@ void extract(SentenceAlignment &sentence) {
|
||||
endF, startF, endE, startE, 0, countF, -1,
|
||||
<, &ge);
|
||||
}
|
||||
phrasePrevOrient = getOrientPhraseModel(phraseType, startF, endF, startE, endE, countF-1, 0, 1, &ge, <, inBottomRight, inBottomLeft);
|
||||
phraseNextOrient = getOrientPhraseModel(phraseType, endF, startF, endE, startE, 0, countF-1, -1, <, &ge, inBottomLeft, inBottomRight);
|
||||
if (phraseModel) {
|
||||
phrasePrevOrient = getOrientPhraseModel(sentence, phraseType, startF, endF, startE, endE, countF-1, 0, 1, &ge, <, inBottomRight, inBottomLeft);
|
||||
phraseNextOrient = getOrientPhraseModel(sentence, phraseType, endF, startF, endE, startE, 0, countF-1, -1, <, &ge, inBottomLeft, inBottomRight);
|
||||
}
|
||||
else {
|
||||
phrasePrevOrient = phraseNextOrient = UNKNOWN;
|
||||
}
|
||||
if(hierModel){
|
||||
hierPrevOrient = getOrientHierModel(phraseType, startF, endF, startE, endE, countF-1, 0, 1, &ge, <, outBottomRight, outBottomLeft, phrasePrevOrient);
|
||||
hierNextOrient = getOrientHierModel(phraseType, endF, startF, endE, startE, 0, countF-1, -1, <, &ge, outBottomLeft, outBottomRight, phraseNextOrient);
|
||||
hierPrevOrient = getOrientHierModel(sentence, hierType, startF, endF, startE, endE, countF-1, 0, 1, &ge, <, inBottomRight, inBottomLeft, outBottomRight, outBottomLeft, phrasePrevOrient);
|
||||
hierNextOrient = getOrientHierModel(sentence, hierType, endF, startF, endE, startE, 0, countF-1, -1, <, &ge, inBottomLeft, inBottomRight, outBottomLeft, outBottomRight, phraseNextOrient);
|
||||
}
|
||||
|
||||
orientationInfo = ((wordModel)? getOrientString(wordPrevOrient, wordType) + " " + getOrientString(wordNextOrient, wordType) : " ") + " | " +
|
||||
((phraseModel)? getOrientString(phrasePrevOrient, phraseType) + " " + getOrientString(phraseNextOrient, phraseType) : " ") + " | " +
|
||||
((hierModel)? getOrientString(hierPrevOrient, hierType) + " " + getOrientString(hierNextOrient, hierType) : " ");
|
||||
orientationInfo = ((wordModel)? getOrientString(wordPrevOrient, wordType) + " " + getOrientString(wordNextOrient, wordType) : "") + " | " +
|
||||
((phraseModel)? getOrientString(phrasePrevOrient, phraseType) + " " + getOrientString(phraseNextOrient, phraseType) : "") + " | " +
|
||||
((hierModel)? getOrientString(hierPrevOrient, hierType) + " " + getOrientString(hierNextOrient, hierType) : "");
|
||||
|
||||
addPhrase(sentence, startE, endE, startF, endF, orientationInfo);
|
||||
}
|
||||
@ -419,14 +421,16 @@ REO_POS getOrientWordModel(SentenceAlignment & sentence, REO_MODEL_TYPE modelTyp
|
||||
}
|
||||
|
||||
// to be called with countF-1 instead of countF
|
||||
REO_POS getOrientPhraseModel (REO_MODEL_TYPE modelType,
|
||||
REO_POS getOrientPhraseModel (SentenceAlignment & sentence,
|
||||
REO_MODEL_TYPE modelType,
|
||||
int startF, int endF, int startE, int endE, int countF, int zero, int unit,
|
||||
bool (*ge)(int, int), bool (*le)(int, int),
|
||||
const HSenteceVertices & inBottomRight, const HSenteceVertices & inBottomLeft){
|
||||
bool (*ge)(int, int), bool (*lt)(int, int),
|
||||
const HSentenceVertices & inBottomRight, const HSentenceVertices & inBottomLeft){
|
||||
|
||||
HSenteceVertices::const_iterator it;
|
||||
HSentenceVertices::const_iterator it;
|
||||
|
||||
if((startE == zero && startF == zero) ||
|
||||
if((startE == 0 && startF == 0) ||
|
||||
(startE == sentence.english.size()-1 && startF == sentence.foreign.size()-1) ||
|
||||
(it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
|
||||
it->second.find(startF-unit) != it->second.end())
|
||||
return LEFT;
|
||||
@ -442,45 +446,60 @@ REO_POS getOrientPhraseModel (REO_MODEL_TYPE modelType,
|
||||
it->second.find(indexF) != it->second.end())
|
||||
return DRIGHT;
|
||||
bool connectedRightTop = false;
|
||||
for(int indexF=endF+2*unit; (*le)(indexF, countF) && !connectedRightTop; indexF=indexF+unit)
|
||||
for(int indexF=endF+2*unit; (*lt)(indexF, countF) && !connectedRightTop; indexF=indexF+unit)
|
||||
if(connectedRightTop = (it = inBottomLeft.find(startE - unit)) != inBottomRight.end() &&
|
||||
it->second.find(indexF) != it->second.end())
|
||||
return DLEFT;
|
||||
return DRIGHT;
|
||||
return UNKNOWN;
|
||||
}
|
||||
|
||||
// to be called with countF-1 instead of countF
|
||||
REO_POS getOrientHierModel (REO_MODEL_TYPE modelType,
|
||||
REO_POS getOrientHierModel (SentenceAlignment & sentence,
|
||||
REO_MODEL_TYPE modelType,
|
||||
int startF, int endF, int startE, int endE, int countF, int zero, int unit,
|
||||
bool (*ge)(int, int), bool (*le)(int, int),
|
||||
const HSenteceVertices & outBottomRight, const HSenteceVertices & outBottomLeft,
|
||||
bool (*ge)(int, int), bool (*lt)(int, int),
|
||||
const HSentenceVertices & inBottomRight, const HSentenceVertices & inBottomLeft,
|
||||
const HSentenceVertices & outBottomRight, const HSentenceVertices & outBottomLeft,
|
||||
REO_POS phraseOrient){
|
||||
|
||||
HSenteceVertices::const_iterator it;
|
||||
HSentenceVertices::const_iterator it;
|
||||
|
||||
if(phraseOrient == LEFT || ((it = outBottomRight.find(startE - unit)) != outBottomRight.end() &&
|
||||
if(phraseOrient == LEFT ||
|
||||
(startE == 0 && startF == 0) ||
|
||||
(startE == sentence.english.size()-1 && startF == sentence.foreign.size()-1) ||
|
||||
((it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
|
||||
it->second.find(startF-unit) != it->second.end()) ||
|
||||
((it = outBottomRight.find(startE - unit)) != outBottomRight.end() &&
|
||||
it->second.find(startF-unit) != it->second.end()))
|
||||
return LEFT;
|
||||
if(modelType == REO_MONO)
|
||||
return UNKNOWN;
|
||||
if(phraseOrient == RIGHT || ((it = outBottomLeft.find(startE - unit)) != outBottomLeft.end() && it->second.find(endF + unit) != it->second.end()))
|
||||
if(phraseOrient == RIGHT ||
|
||||
((it = inBottomLeft.find(startE - unit)) != inBottomLeft.end() &&
|
||||
it->second.find(endF + unit) != it->second.end()) ||
|
||||
((it = outBottomLeft.find(startE - unit)) != outBottomLeft.end() &&
|
||||
it->second.find(endF + unit) != it->second.end()))
|
||||
return RIGHT;
|
||||
if(modelType == REO_MSD)
|
||||
return UNKNOWN;
|
||||
if(phraseOrient == DRIGHT)
|
||||
return DRIGHT;
|
||||
if(phraseOrient == DLEFT)
|
||||
return DLEFT;
|
||||
if(phraseOrient != UNKNOWN)
|
||||
return phraseOrient;
|
||||
bool connectedLeftTop = false;
|
||||
for(int indexF=startF-2*unit; (*ge)(indexF, zero) && !connectedLeftTop; indexF=indexF-unit)
|
||||
if(connectedLeftTop = (it = outBottomRight.find(startE - unit)) != outBottomRight.end() &&
|
||||
it->second.find(indexF) != it->second.end())
|
||||
for(int indexF=startF-2*unit; (*ge)(indexF, zero) && !connectedLeftTop; indexF=indexF-unit) {
|
||||
if((connectedLeftTop = (it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
|
||||
it->second.find(indexF) != it->second.end()) ||
|
||||
(connectedLeftTop = (it = outBottomRight.find(startE - unit)) != outBottomRight.end() &&
|
||||
it->second.find(indexF) != it->second.end()))
|
||||
return DRIGHT;
|
||||
}
|
||||
bool connectedRightTop = false;
|
||||
for(int indexF=endF+2*unit; (*le)(indexF, countF) && !connectedRightTop; indexF=indexF+unit)
|
||||
if(connectedRightTop = (it = outBottomLeft.find(startE - unit)) != outBottomRight.end() &&
|
||||
it->second.find(indexF) != it->second.end())
|
||||
for(int indexF=endF+2*unit; (*lt)(indexF, countF) && !connectedRightTop; indexF=indexF+unit) {
|
||||
if((connectedRightTop = (it = inBottomLeft.find(startE - unit)) != inBottomRight.end() &&
|
||||
it->second.find(indexF) != it->second.end()) ||
|
||||
(connectedRightTop = (it = outBottomLeft.find(startE - unit)) != outBottomRight.end() &&
|
||||
it->second.find(indexF) != it->second.end()))
|
||||
return DLEFT;
|
||||
}
|
||||
return UNKNOWN;
|
||||
}
|
||||
|
||||
@ -511,20 +530,20 @@ bool lt(int first, int second){
|
||||
return first < second;
|
||||
}
|
||||
|
||||
void insertVertex( HSenteceVertices & corners, int x, int y ){
|
||||
void insertVertex( HSentenceVertices & corners, int x, int y ){
|
||||
set<int> tmp;
|
||||
tmp.insert(x);
|
||||
pair< HSenteceVertices::iterator, bool > ret = corners.insert( pair<int, set<int> > (y, tmp) );
|
||||
pair< HSentenceVertices::iterator, bool > ret = corners.insert( pair<int, set<int> > (y, tmp) );
|
||||
if(ret.second == false){
|
||||
ret.first->second.insert(x);
|
||||
}
|
||||
}
|
||||
|
||||
void insertPhraseVertices(
|
||||
HSenteceVertices & topLeft,
|
||||
HSenteceVertices & topRight,
|
||||
HSenteceVertices & bottomLeft,
|
||||
HSenteceVertices & bottomRight,
|
||||
HSentenceVertices & topLeft,
|
||||
HSentenceVertices & topRight,
|
||||
HSentenceVertices & bottomLeft,
|
||||
HSentenceVertices & bottomRight,
|
||||
int startF, int startE, int endF, int endE) {
|
||||
|
||||
insertVertex(topLeft, startF, startE);
|
||||
|
Loading…
Reference in New Issue
Block a user