mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
lex reordering scoring uses FilePiece/StringPiece
This commit is contained in:
parent
1530ae4f5f
commit
2e8bad22e4
@ -1,2 +1,2 @@
|
||||
exe lexical-reordering-score : InputFileStream.cpp reordering_classes.cpp score.cpp ../..//z ;
|
||||
exe lexical-reordering-score : InputFileStream.cpp reordering_classes.cpp score.cpp ../../util//kenutil ../..//z ;
|
||||
|
||||
|
@ -56,7 +56,7 @@ void ModelScore::reset_f()
|
||||
}
|
||||
}
|
||||
|
||||
void ModelScore::add_example(const string& previous, string& next)
|
||||
void ModelScore::add_example(const StringPiece& previous, const StringPiece& next)
|
||||
{
|
||||
count_fe_prev[getType(previous)]++;
|
||||
count_f_prev[getType(previous)]++;
|
||||
@ -85,7 +85,7 @@ const vector<double>& ModelScore::get_scores_f_next() const
|
||||
}
|
||||
|
||||
|
||||
ORIENTATION ModelScore::getType(const string& s)
|
||||
ORIENTATION ModelScore::getType(const StringPiece& s)
|
||||
{
|
||||
if (s.compare("mono") == 0) {
|
||||
return MONO;
|
||||
@ -106,7 +106,7 @@ ORIENTATION ModelScore::getType(const string& s)
|
||||
}
|
||||
|
||||
|
||||
ORIENTATION ModelScoreMSLR::getType(const string& s)
|
||||
ORIENTATION ModelScoreMSLR::getType(const StringPiece& s)
|
||||
{
|
||||
if (s.compare("mono") == 0) {
|
||||
return MONO;
|
||||
@ -126,7 +126,7 @@ ORIENTATION ModelScoreMSLR::getType(const string& s)
|
||||
}
|
||||
|
||||
|
||||
ORIENTATION ModelScoreLR::getType(const string& s)
|
||||
ORIENTATION ModelScoreLR::getType(const StringPiece& s)
|
||||
{
|
||||
if (s.compare("mono") == 0 || s.compare("dright") == 0) {
|
||||
return DRIGHT;
|
||||
@ -142,7 +142,7 @@ ORIENTATION ModelScoreLR::getType(const string& s)
|
||||
}
|
||||
|
||||
|
||||
ORIENTATION ModelScoreMSD::getType(const string& s)
|
||||
ORIENTATION ModelScoreMSD::getType(const StringPiece& s)
|
||||
{
|
||||
if (s.compare("mono") == 0) {
|
||||
return MONO;
|
||||
@ -161,7 +161,7 @@ ORIENTATION ModelScoreMSD::getType(const string& s)
|
||||
}
|
||||
}
|
||||
|
||||
ORIENTATION ModelScoreMonotonicity::getType(const string& s)
|
||||
ORIENTATION ModelScoreMonotonicity::getType(const StringPiece& s)
|
||||
{
|
||||
if (s.compare("mono") == 0) {
|
||||
return MONO;
|
||||
|
@ -12,6 +12,8 @@
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
#include "util/string_piece.hh"
|
||||
|
||||
|
||||
enum ORIENTATION {MONO, SWAP, DRIGHT, DLEFT, OTHER, NOMONO};
|
||||
|
||||
@ -27,12 +29,12 @@ private:
|
||||
std::vector<double> count_f_next;
|
||||
|
||||
protected:
|
||||
virtual ORIENTATION getType(const std::string& s);
|
||||
virtual ORIENTATION getType(const StringPiece& s);
|
||||
|
||||
public:
|
||||
ModelScore();
|
||||
virtual ~ModelScore();
|
||||
void add_example(const std::string& previous, std::string& next);
|
||||
void add_example(const StringPiece& previous, const StringPiece& next);
|
||||
void reset_fe();
|
||||
void reset_f();
|
||||
const std::vector<double>& get_scores_fe_prev() const;
|
||||
@ -46,25 +48,25 @@ public:
|
||||
class ModelScoreMSLR : public ModelScore
|
||||
{
|
||||
protected:
|
||||
virtual ORIENTATION getType(const std::string& s);
|
||||
virtual ORIENTATION getType(const StringPiece& s);
|
||||
};
|
||||
|
||||
class ModelScoreLR : public ModelScore
|
||||
{
|
||||
protected:
|
||||
virtual ORIENTATION getType(const std::string& s);
|
||||
virtual ORIENTATION getType(const StringPiece& s);
|
||||
};
|
||||
|
||||
class ModelScoreMSD : public ModelScore
|
||||
{
|
||||
protected:
|
||||
virtual ORIENTATION getType(const std::string& s);
|
||||
virtual ORIENTATION getType(const StringPiece& s);
|
||||
};
|
||||
|
||||
class ModelScoreMonotonicity : public ModelScore
|
||||
{
|
||||
protected:
|
||||
virtual ORIENTATION getType(const std::string& s);
|
||||
virtual ORIENTATION getType(const StringPiece& s);
|
||||
};
|
||||
|
||||
//Class for calculating total counts, and to calculate smoothing
|
||||
|
@ -13,15 +13,28 @@
|
||||
#include <sstream>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "InputFileStream.h"
|
||||
|
||||
#include "util/exception.hh"
|
||||
#include "util/file_piece.hh"
|
||||
#include "util/string_piece.hh"
|
||||
#include "util/tokenize_piece.hh"
|
||||
|
||||
#include "InputFileStream.h"
|
||||
#include "reordering_classes.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void split_line(const string& line, string& foreign, string& english, string& wbe, string& phrase, string& hier);
|
||||
void get_orientations(const string& pair, string& previous, string& next);
|
||||
void split_line(const StringPiece& line, StringPiece& foreign, StringPiece& english, StringPiece& wbe, StringPiece& phrase, StringPiece& hier);
|
||||
void get_orientations(const StringPiece& pair, StringPiece& previous, StringPiece& next);
|
||||
|
||||
class FileFormatException : public util::Exception
|
||||
{
|
||||
public:
|
||||
FileFormatException() throw() {
|
||||
*this << "Invalid extract file format: ";
|
||||
}
|
||||
~FileFormatException() throw() {}
|
||||
};
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
@ -38,11 +51,7 @@ int main(int argc, char* argv[])
|
||||
double smoothingValue = atof(argv[2]);
|
||||
string filepath = argv[3];
|
||||
|
||||
Moses::InputFileStream eFile(extractFileName);
|
||||
if (!eFile) {
|
||||
cerr << "Could not open the extract file " << extractFileName <<"for scoring of lexical reordering models\n";
|
||||
exit(1);
|
||||
}
|
||||
util::FilePiece eFile(extractFileName);
|
||||
|
||||
bool smoothWithCounts = false;
|
||||
map<string,ModelScore*> modelScores;
|
||||
@ -51,8 +60,8 @@ int main(int argc, char* argv[])
|
||||
bool phrase = false;
|
||||
bool wbe = false;
|
||||
|
||||
string e,f,w,p,h;
|
||||
string prev, next;
|
||||
StringPiece e,f,w,p,h;
|
||||
StringPiece prev, next;
|
||||
|
||||
int i = 4;
|
||||
while (i<argc) {
|
||||
@ -96,8 +105,14 @@ int main(int argc, char* argv[])
|
||||
////////////////////////////////////
|
||||
//calculate smoothing
|
||||
if (smoothWithCounts) {
|
||||
string line;
|
||||
while (getline(eFile,line)) {
|
||||
util::FilePiece eFileForCounts(extractFileName);
|
||||
while (true) {
|
||||
StringPiece line;
|
||||
try {
|
||||
line = eFileForCounts.ReadLine();
|
||||
} catch (util::EndOfFileException &e) {
|
||||
break;
|
||||
}
|
||||
split_line(line,e,f,w,p,h);
|
||||
if (hier) {
|
||||
get_orientations(h, prev, next);
|
||||
@ -118,9 +133,6 @@ int main(int argc, char* argv[])
|
||||
models[i]->createSmoothing(smoothingValue);
|
||||
}
|
||||
|
||||
//reopen eFile
|
||||
eFile.Close();
|
||||
eFile.Open(extractFileName);
|
||||
} else {
|
||||
//constant smoothing
|
||||
for (size_t i=0; i<models.size(); ++i) {
|
||||
@ -130,14 +142,20 @@ int main(int argc, char* argv[])
|
||||
|
||||
////////////////////////////////////
|
||||
//calculate scores for reordering table
|
||||
string line,f_current,e_current;
|
||||
string f_current,e_current;
|
||||
bool first = true;
|
||||
while (getline(eFile, line)) {
|
||||
while (true) {
|
||||
StringPiece line;
|
||||
try {
|
||||
line = eFile.ReadLine();
|
||||
} catch (util::EndOfFileException &e) {
|
||||
break;
|
||||
}
|
||||
split_line(line,f,e,w,p,h);
|
||||
|
||||
if (first) {
|
||||
f_current = f;
|
||||
e_current = e;
|
||||
f_current = f.as_string(); //FIXME: Avoid the copy.
|
||||
e_current = e.as_string();
|
||||
first = false;
|
||||
} else if (f.compare(f_current) != 0 || e.compare(e_current) != 0) {
|
||||
//fe - score
|
||||
@ -159,8 +177,8 @@ int main(int argc, char* argv[])
|
||||
it->second->reset_f();
|
||||
}
|
||||
}
|
||||
f_current = f;
|
||||
e_current = e;
|
||||
f_current = f.as_string();
|
||||
e_current = e.as_string();
|
||||
}
|
||||
|
||||
// uppdate counts
|
||||
@ -179,10 +197,10 @@ int main(int argc, char* argv[])
|
||||
}
|
||||
//Score the last phrases
|
||||
for (size_t i=0; i<models.size(); ++i) {
|
||||
models[i]->score_fe(f,e);
|
||||
models[i]->score_fe(f_current,e_current);
|
||||
}
|
||||
for (size_t i=0; i<models.size(); ++i) {
|
||||
models[i]->score_f(f);
|
||||
models[i]->score_f(f_current);
|
||||
}
|
||||
|
||||
//Zip all files
|
||||
@ -193,22 +211,61 @@ int main(int argc, char* argv[])
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <class It> StringPiece
|
||||
GrabOrDie(It &it, const StringPiece& line) {
|
||||
UTIL_THROW_IF(!it, FileFormatException, line.as_string());
|
||||
return *it++;
|
||||
}
|
||||
|
||||
|
||||
void split_line(const string& line, string& foreign, string& english, string& wbe, string& phrase, string& hier)
|
||||
void split_line(
|
||||
const StringPiece& line,
|
||||
StringPiece& foreign,
|
||||
StringPiece& english,
|
||||
StringPiece& wbe,
|
||||
StringPiece& phrase,
|
||||
StringPiece& hier)
|
||||
{
|
||||
/*Format is source ||| target ||| orientations
|
||||
followed by one of the following 4 possibilities
|
||||
eps
|
||||
||| weights
|
||||
| phrase | hier
|
||||
| phrase | hier ||| weight
|
||||
*/
|
||||
|
||||
util::TokenIter<util::MultiCharacter> pipes(line, util::MultiCharacter(" ||| "));
|
||||
foreign = GrabOrDie(pipes,line);
|
||||
english = GrabOrDie(pipes,line);
|
||||
StringPiece rest = GrabOrDie(pipes,line);
|
||||
|
||||
util::TokenIter<util::MultiCharacter> singlePipe(rest, util::MultiCharacter(" | "));
|
||||
wbe = GrabOrDie(singlePipe,line);
|
||||
if (singlePipe) {
|
||||
phrase = GrabOrDie(singlePipe, line);
|
||||
hier = GrabOrDie(singlePipe, line);
|
||||
} else {
|
||||
phrase.clear();
|
||||
hier.clear();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
int begin = 0;
|
||||
int end = line.find(" ||| ");
|
||||
foreign = line.substr(begin, end - begin);
|
||||
cerr << begin << " " << end << " " << foreign << endl;
|
||||
|
||||
begin = end+5;
|
||||
end = line.find(" ||| ", begin);
|
||||
english = line.substr(begin, end - begin);
|
||||
cerr << begin << " " << end << " " << english << endl;
|
||||
|
||||
begin = end+5;
|
||||
end = line.find(" | ", begin);
|
||||
wbe = line.substr(begin, end - begin);
|
||||
cerr << begin << " " << end << " " << wbe << endl;
|
||||
exit(0);
|
||||
|
||||
begin = end+3;
|
||||
end = line.find(" | ", begin);
|
||||
@ -216,10 +273,12 @@ void split_line(const string& line, string& foreign, string& english, string& wb
|
||||
|
||||
begin = end+3;
|
||||
hier = line.substr(begin, line.size() - begin);
|
||||
*/
|
||||
}
|
||||
|
||||
void get_orientations(const string& pair, string& previous, string& next)
|
||||
void get_orientations(const StringPiece& pair, StringPiece& previous, StringPiece& next)
|
||||
{
|
||||
istringstream is(pair);
|
||||
is >> previous >> next;
|
||||
util::TokenIter<util::SingleCharacter> tok(pair, util::SingleCharacter(' '));
|
||||
previous = GrabOrDie(tok,pair);
|
||||
next = GrabOrDie(tok,pair);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user