mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
Merge branch 'master' of ssh://github.com/moses-smt/mosesdecoder
This commit is contained in:
commit
e31bc247ea
@ -3,6 +3,6 @@
|
||||
# you can install all 3rd-party dependencies by running make -f contrib/Makefiles/install-dependencies.gmake
|
||||
|
||||
set -e -o pipefail
|
||||
OPT=${OPT:-$(pwd)/OPT}
|
||||
OPT=${OPT:-$(pwd)/opt}
|
||||
./bjam --with-irstlm=$OPT/irstlm-5.80.08 --with-boost=$OPT --with-cmph=$OPT --with-xmlrpc-c=$OPT --with-mm --with-probing-pt -j$(getconf _NPROCESSORS_ONLN) $@
|
||||
|
||||
|
@ -1105,6 +1105,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ControlRecombination.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/CorrectionPattern.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/CorrectionPattern.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/CorrectionPattern.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/CorrectionPattern.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/CountNonTerms.cpp</name>
|
||||
<type>1</type>
|
||||
@ -1170,6 +1180,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/DynamicCacheBasedLanguageModel.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/EditOps.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/EditOps.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/EditOps.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/EditOps.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/FFState.cpp</name>
|
||||
<type>1</type>
|
||||
|
@ -31,6 +31,8 @@ Point.cpp
|
||||
PerScorer.cpp
|
||||
HwcmScorer.cpp
|
||||
InternalTree.cpp
|
||||
M2.cpp
|
||||
M2Scorer.cpp
|
||||
Scorer.cpp
|
||||
ScorerFactory.cpp
|
||||
Optimizer.cpp
|
||||
|
61
mert/M2.cpp
Normal file
61
mert/M2.cpp
Normal file
@ -0,0 +1,61 @@
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include "M2.h"
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
namespace M2
|
||||
{
|
||||
|
||||
bool Annot::lowercase = true;
|
||||
|
||||
std::string Annot::transform(const std::string& e)
|
||||
{
|
||||
std::string temp = e;
|
||||
if(lowercase) {
|
||||
boost::erase_all(temp, " ");
|
||||
return ToLower(temp);
|
||||
} else
|
||||
return e;
|
||||
}
|
||||
|
||||
const std::string ToLower(const std::string& str)
|
||||
{
|
||||
std::string lc(str);
|
||||
std::transform(lc.begin(), lc.end(), lc.begin(), (int(*)(int))std::tolower);
|
||||
return lc;
|
||||
}
|
||||
|
||||
|
||||
Edit operator+(Edit& e1, Edit& e2)
|
||||
{
|
||||
std::string edit;
|
||||
if(e1.edit.size() > 0 && e2.edit.size() > 0)
|
||||
edit = e1.edit + " " + e2.edit;
|
||||
else if(e1.edit.size() > 0)
|
||||
edit = e1.edit;
|
||||
else if(e2.edit.size() > 0)
|
||||
edit = e2.edit;
|
||||
|
||||
return Edit(e1.cost + e2.cost, e1.changed + e2.changed, e1.unchanged + e2.unchanged, edit);
|
||||
}
|
||||
|
||||
|
||||
Edge operator+(Edge e1, Edge e2)
|
||||
{
|
||||
return Edge(e1.v, e2.u, e1.edit + e2.edit);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& o, Sentence s)
|
||||
{
|
||||
for(Sentence::iterator it = s.begin(); it != s.end(); it++)
|
||||
o << *it << " ";
|
||||
return o;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
480
mert/M2.h
Normal file
480
mert/M2.h
Normal file
@ -0,0 +1,480 @@
|
||||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <queue>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <sstream>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
namespace M2
|
||||
{
|
||||
|
||||
typedef std::vector<float> Stats;
|
||||
|
||||
typedef std::vector<std::string> Sentence;
|
||||
|
||||
std::ostream& operator<<(std::ostream& o, Sentence s);
|
||||
|
||||
const std::string ToLower(const std::string& str);
|
||||
|
||||
struct Annot {
|
||||
size_t i;
|
||||
size_t j;
|
||||
|
||||
std::string type;
|
||||
std::string edit;
|
||||
|
||||
size_t annotator;
|
||||
|
||||
bool operator<(Annot a) const {
|
||||
return i < a.i || (i == a.i && j < a.j)
|
||||
|| (i == a.i && j == a.j && annotator < a.annotator)
|
||||
|| (i == a.i && j == a.j && annotator == a.annotator && transform(edit) < transform(a.edit));
|
||||
}
|
||||
|
||||
bool operator==(Annot a) const {
|
||||
return (!(*this < a) && !(a < *this));
|
||||
}
|
||||
|
||||
static std::string transform(const std::string& e);
|
||||
|
||||
static bool lowercase;
|
||||
};
|
||||
|
||||
typedef std::set<Annot> Annots;
|
||||
typedef std::set<size_t> Users;
|
||||
|
||||
struct Unit {
|
||||
Sentence first;
|
||||
Annots second;
|
||||
Users third;
|
||||
};
|
||||
|
||||
typedef std::vector<Unit> M2File;
|
||||
|
||||
struct Edit {
|
||||
Edit(float c = 1.0, size_t ch = 0, size_t unch = 1, std::string e = "")
|
||||
: cost(c), changed(ch), unchanged(unch), edit(e) {}
|
||||
|
||||
float cost;
|
||||
size_t changed;
|
||||
size_t unchanged;
|
||||
std::string edit;
|
||||
};
|
||||
|
||||
Edit operator+(Edit& e1, Edit& e2);
|
||||
|
||||
struct Vertex {
|
||||
Vertex(size_t a = 0, size_t b = 0) : i(a), j(b) {}
|
||||
|
||||
bool operator<(const Vertex &v) const {
|
||||
return i < v.i || (i == v.i && j < v.j);
|
||||
}
|
||||
|
||||
bool operator==(const Vertex &v) const {
|
||||
return i == v.i && j == v.j;
|
||||
}
|
||||
|
||||
size_t i;
|
||||
size_t j;
|
||||
};
|
||||
|
||||
struct Edge {
|
||||
Edge(Vertex vv = Vertex(), Vertex uu = Vertex(), Edit editt = Edit())
|
||||
: v(vv), u(uu), edit(editt) {}
|
||||
|
||||
bool operator<(const Edge &e) const {
|
||||
return v < e.v || (v == e.v && u < e.u);
|
||||
}
|
||||
|
||||
Vertex v;
|
||||
Vertex u;
|
||||
Edit edit;
|
||||
};
|
||||
|
||||
Edge operator+(Edge e1, Edge e2);
|
||||
|
||||
typedef std::vector<size_t> Row;
|
||||
typedef std::vector<Row> Matrix;
|
||||
|
||||
struct Info {
|
||||
Info(Vertex vv = Vertex(), Edit editt = Edit())
|
||||
: v(vv), edit(editt) {}
|
||||
|
||||
bool operator<(const Info &i) const {
|
||||
return v < i.v;
|
||||
}
|
||||
|
||||
Vertex v;
|
||||
Edit edit;
|
||||
};
|
||||
|
||||
typedef std::set<Info> Track;
|
||||
typedef std::vector<Track> TrackRow;
|
||||
typedef std::vector<TrackRow> TrackMatrix;
|
||||
|
||||
typedef std::set<Vertex> Vertices;
|
||||
typedef std::set<Edge> Edges;
|
||||
|
||||
class M2
|
||||
{
|
||||
private:
|
||||
M2File m_m2;
|
||||
|
||||
size_t m_max_unchanged;
|
||||
float m_beta;
|
||||
bool m_lowercase;
|
||||
bool m_verbose;
|
||||
|
||||
public:
|
||||
M2() : m_max_unchanged(2), m_beta(0.5), m_lowercase(true), m_verbose(false) { }
|
||||
M2(size_t max_unchanged, float beta, bool truecase, bool verbose = false)
|
||||
: m_max_unchanged(max_unchanged), m_beta(beta), m_lowercase(!truecase), m_verbose(verbose) {
|
||||
if(!m_lowercase) {
|
||||
Annot::lowercase = false;
|
||||
}
|
||||
}
|
||||
|
||||
float Beta() {
|
||||
return m_beta;
|
||||
}
|
||||
|
||||
void ReadM2(const std::string& filename) {
|
||||
std::ifstream m2file(filename.c_str());
|
||||
std::string line;
|
||||
|
||||
Unit unit;
|
||||
bool first = true;
|
||||
|
||||
while(std::getline(m2file, line)) {
|
||||
if(line.size() > 2) {
|
||||
if(line.substr(0, 2) == "S ") {
|
||||
if(!first) {
|
||||
if(unit.third.empty())
|
||||
unit.third.insert(0);
|
||||
m_m2.push_back(unit);
|
||||
}
|
||||
first = false;
|
||||
|
||||
unit.first = Sentence();
|
||||
unit.second = Annots();
|
||||
|
||||
std::string sentenceLine = line.substr(2);
|
||||
boost::split(unit.first, sentenceLine, boost::is_any_of(" "), boost::token_compress_on);
|
||||
}
|
||||
if(line.substr(0, 2) == "A ") {
|
||||
std::string annotLine = line.substr(2);
|
||||
|
||||
std::vector<std::string> annot;
|
||||
boost::iter_split(annot, annotLine, boost::algorithm::first_finder("|||"));
|
||||
|
||||
if(annot[1] != "noop") {
|
||||
Annot a;
|
||||
std::stringstream rangeStr(annot[0]);
|
||||
rangeStr >> a.i >> a.j;
|
||||
a.type = annot[1];
|
||||
a.edit = annot[2];
|
||||
|
||||
std::stringstream annotStr(annot[5]);
|
||||
annotStr >> a.annotator;
|
||||
|
||||
unit.third.insert(a.annotator);
|
||||
unit.second.insert(a);
|
||||
} else {
|
||||
std::stringstream annotStr(annot[5]);
|
||||
size_t annotator;
|
||||
annotStr >> annotator;
|
||||
unit.third.insert(annotator);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if(unit.third.empty())
|
||||
unit.third.insert(0);
|
||||
m_m2.push_back(unit);
|
||||
}
|
||||
|
||||
size_t LevenshteinMatrix(const Sentence &s1, const Sentence &s2, Matrix &d, TrackMatrix &bt) {
|
||||
size_t n = s1.size();
|
||||
size_t m = s2.size();
|
||||
|
||||
if (n == 0)
|
||||
return m;
|
||||
if (m == 0)
|
||||
return n;
|
||||
|
||||
d.resize(n + 1, Row(m + 1, 0));
|
||||
bt.resize(n + 1, TrackRow(m + 1));
|
||||
|
||||
for(size_t i = 0; i <= n; ++i) {
|
||||
d[i][0] = i;
|
||||
if(i > 0)
|
||||
bt[i][0].insert(Info(Vertex(i - 1, 0), Edit(1, 1, 0, "")));
|
||||
}
|
||||
for(size_t j = 0; j <= m; ++j) {
|
||||
d[0][j] = j;
|
||||
if(j > 0)
|
||||
bt[0][j].insert(Info(Vertex(0, j - 1), Edit(1, 1, 0, s2[j - 1])));
|
||||
}
|
||||
|
||||
int cost;
|
||||
for(size_t i = 1; i <= n; ++i) {
|
||||
for(size_t j = 1; j <= m; ++j) {
|
||||
if(Annot::transform(s1[i-1]) == Annot::transform(s2[j-1]))
|
||||
cost = 0;
|
||||
else
|
||||
cost = 2;
|
||||
|
||||
size_t left = d[i][j - 1] + 1;
|
||||
size_t down = d[i - 1][j] + 1;
|
||||
size_t diag = d[i - 1][j - 1] + cost;
|
||||
|
||||
d[i][j] = std::min(left, std::min(down, diag));
|
||||
|
||||
if(d[i][j] == left)
|
||||
bt[i][j].insert(Info(Vertex(i, j - 1), Edit(1, 1, 0, s2[j - 1])));
|
||||
if(d[i][j] == down)
|
||||
bt[i][j].insert(Info(Vertex(i - 1, j), Edit(1, 1, 0, "")));
|
||||
if(d[i][j] == diag)
|
||||
bt[i][j].insert(Info(Vertex(i - 1, j - 1), cost ? Edit(1, 1, 0, s2[j - 1]) : Edit(1, 0, 1, s2[j - 1]) ));
|
||||
}
|
||||
}
|
||||
return d[n][m];
|
||||
}
|
||||
|
||||
|
||||
void BuildGraph(const TrackMatrix &bt, Vertices &V, Edges &E) {
|
||||
Vertex start(bt.size() - 1, bt[0].size() - 1);
|
||||
|
||||
std::queue<Vertex> Q;
|
||||
Q.push(start);
|
||||
while(!Q.empty()) {
|
||||
Vertex v = Q.front();
|
||||
Q.pop();
|
||||
if(V.count(v) > 0)
|
||||
continue;
|
||||
V.insert(v);
|
||||
for(Track::iterator it = bt[v.i][v.j].begin();
|
||||
it != bt[v.i][v.j].end(); ++it) {
|
||||
Edge e(it->v, v, it->edit);
|
||||
E.insert(e);
|
||||
if(V.count(e.v) == 0)
|
||||
Q.push(e.v);
|
||||
}
|
||||
}
|
||||
|
||||
Edges newE;
|
||||
do {
|
||||
newE.clear();
|
||||
for(Edges::iterator it1 = E.begin(); it1 != E.end(); ++it1) {
|
||||
for(Edges::iterator it2 = E.begin(); it2 != E.end(); ++it2) {
|
||||
if(it1->u == it2->v) {
|
||||
Edge e = *it1 + *it2;
|
||||
if(e.edit.changed > 0 &&
|
||||
e.edit.unchanged <= m_max_unchanged &&
|
||||
E.count(e) == 0)
|
||||
newE.insert(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
E.insert(newE.begin(), newE.end());
|
||||
} while(newE.size() > 0);
|
||||
}
|
||||
|
||||
void AddWeights(Edges &E, const Unit &u, size_t aid) {
|
||||
for(Edges::iterator it1 = E.begin(); it1 != E.end(); ++it1) {
|
||||
if(it1->edit.changed > 0) {
|
||||
const_cast<float&>(it1->edit.cost) += 0.001;
|
||||
for(Annots::iterator it2 = u.second.begin(); it2 != u.second.end(); ++it2) {
|
||||
// if matches an annotator
|
||||
if(it1->v.i == it2->i && it1->u.i == it2->j
|
||||
&& Annot::transform(it1->edit.edit) == Annot::transform(it2->edit)
|
||||
&& it2->annotator == aid) {
|
||||
int newWeight = -(m_max_unchanged + 1) * E.size();
|
||||
const_cast<float&>(it1->edit.cost) = newWeight;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BellmanFord(Vertices &V, Edges &E) {
|
||||
Vertex source(0, 0);
|
||||
std::map<Vertex, float> distance;
|
||||
std::map<Vertex, Vertex> predecessor;
|
||||
|
||||
for(Vertices::iterator it = V.begin(); it != V.end(); ++it) {
|
||||
if(*it == source)
|
||||
distance[*it] = 0;
|
||||
else {
|
||||
distance[*it] = std::numeric_limits<float>::infinity();
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t i = 1; i < V.size(); ++i) {
|
||||
for(Edges::iterator it = E.begin(); it != E.end(); ++it) {
|
||||
if(distance[it->v] + it->edit.cost < distance[it->u]) {
|
||||
distance[it->u] = distance[it->v] + it->edit.cost;
|
||||
predecessor[it->u] = it->v;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Edges newE;
|
||||
|
||||
Vertex v = *V.rbegin();
|
||||
while(true) {
|
||||
//std::cout << predecessor[v] << " -> " << v << std::endl;
|
||||
Edges::iterator it = E.find(Edge(predecessor[v], v));
|
||||
if(it != E.end()) {
|
||||
Edge f = *it;
|
||||
//std::cout << f << std::endl;
|
||||
newE.insert(f);
|
||||
|
||||
v = predecessor[v];
|
||||
if(v == source)
|
||||
break;
|
||||
} else {
|
||||
std::cout << "Error" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
E.clear();
|
||||
E.insert(newE.begin(), newE.end());
|
||||
}
|
||||
|
||||
void AddStats(const std::vector<Edges> &Es, const Unit &u, Stats &stats, size_t line) {
|
||||
|
||||
std::map<size_t, Stats> statsPerAnnotator;
|
||||
for(std::set<size_t>::iterator it = u.third.begin();
|
||||
it != u.third.end(); ++it) {
|
||||
statsPerAnnotator[*it] = Stats(4, 0);
|
||||
}
|
||||
|
||||
for(Annots::iterator it = u.second.begin(); it != u.second.end(); it++)
|
||||
statsPerAnnotator[it->annotator][2]++;
|
||||
|
||||
for(std::set<size_t>::iterator ait = u.third.begin();
|
||||
ait != u.third.end(); ++ait) {
|
||||
for(Edges::iterator eit = Es[*ait].begin(); eit != Es[*ait].end(); ++eit) {
|
||||
if(eit->edit.changed > 0) {
|
||||
statsPerAnnotator[*ait][1]++;
|
||||
Annot f;
|
||||
f.i = eit->v.i;
|
||||
f.j = eit->u.i;
|
||||
f.annotator = *ait;
|
||||
f.edit = eit->edit.edit;
|
||||
for(Annots::iterator fit = u.second.begin(); fit != u.second.end(); fit++) {
|
||||
if(f == *fit)
|
||||
statsPerAnnotator[*ait][0]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
size_t bestAnnot = 0;
|
||||
float bestF = -1;
|
||||
for(std::set<size_t>::iterator it = u.third.begin();
|
||||
it != u.third.end(); ++it) {
|
||||
Stats localStats = stats;
|
||||
localStats[0] += statsPerAnnotator[*it][0];
|
||||
localStats[1] += statsPerAnnotator[*it][1];
|
||||
localStats[2] += statsPerAnnotator[*it][2];
|
||||
if(m_verbose)
|
||||
std::cerr << *it << " : " << localStats[0] << " " << localStats[1] << " " << localStats[2] << std::endl;
|
||||
float f = FScore(localStats);
|
||||
if(m_verbose)
|
||||
std::cerr << f << std::endl;
|
||||
if(f > bestF) {
|
||||
bestF = f;
|
||||
bestAnnot = *it;
|
||||
}
|
||||
}
|
||||
if(m_verbose)
|
||||
std::cerr << ">> Chosen Annotator for line " << line + 1 << " : " << bestAnnot << std::endl;
|
||||
stats[0] += statsPerAnnotator[bestAnnot][0];
|
||||
stats[1] += statsPerAnnotator[bestAnnot][1];
|
||||
stats[2] += statsPerAnnotator[bestAnnot][2];
|
||||
}
|
||||
|
||||
void SufStats(const std::string &sStr, size_t i, Stats &stats) {
|
||||
std::string temp = sStr;
|
||||
|
||||
Sentence s;
|
||||
boost::split(s, temp, boost::is_any_of(" "), boost::token_compress_on);
|
||||
|
||||
Unit &unit = m_m2[i];
|
||||
|
||||
Matrix d;
|
||||
TrackMatrix bt;
|
||||
size_t distance = LevenshteinMatrix(unit.first, s, d, bt);
|
||||
|
||||
std::vector<Vertices> Vs(unit.third.size());
|
||||
std::vector<Edges> Es(unit.third.size());
|
||||
|
||||
if(distance > unit.first.size()) {
|
||||
std::cerr << "Levenshtein distance is greater than source size." << std::endl;
|
||||
stats[0] = 0;
|
||||
stats[1] = distance;
|
||||
stats[2] = 0;
|
||||
stats[3] = unit.first.size();
|
||||
return;
|
||||
} else if(distance > 0) {
|
||||
for(size_t j = 0; j < unit.third.size(); j++) {
|
||||
BuildGraph(bt, Vs[j], Es[j]);
|
||||
AddWeights(Es[j], unit, j);
|
||||
BellmanFord(Vs[j], Es[j]);
|
||||
}
|
||||
}
|
||||
AddStats(Es, unit, stats, i);
|
||||
stats[3] = unit.first.size();
|
||||
}
|
||||
|
||||
|
||||
float FScore(const Stats& stats) {
|
||||
float p = 1.0;
|
||||
if(stats[1] != 0)
|
||||
p = (float)stats[0] / (float)stats[1];
|
||||
|
||||
float r = 1.0;
|
||||
if(stats[2] != 0)
|
||||
r = (float)stats[0] / (float)stats[2];
|
||||
|
||||
float denom = (m_beta * m_beta * p + r);
|
||||
float f = 0.0;
|
||||
if(denom != 0)
|
||||
f = ((1 + m_beta * m_beta) * p * r) / denom;
|
||||
return f;
|
||||
}
|
||||
|
||||
void FScore(const Stats& stats, float &p, float &r, float &f) {
|
||||
p = 1.0;
|
||||
if(stats[1] != 0)
|
||||
p = (float)stats[0] / (float)stats[1];
|
||||
|
||||
r = 1.0;
|
||||
if(stats[2] != 0)
|
||||
r = (float)stats[0] / (float)stats[2];
|
||||
|
||||
float denom = (m_beta * m_beta * p + r);
|
||||
f = 0.0;
|
||||
if(denom != 0)
|
||||
f = ((1 + m_beta * m_beta) * p * r) / denom;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
137
mert/M2Scorer.cpp
Normal file
137
mert/M2Scorer.cpp
Normal file
@ -0,0 +1,137 @@
|
||||
#include "M2Scorer.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
#include <sstream>
|
||||
#include <cstdlib>
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
M2Scorer::M2Scorer(const string& config)
|
||||
: StatisticsBasedScorer("M2Scorer", config),
|
||||
beta_(Scan<float>(getConfig("beta", "0.5"))),
|
||||
max_unchanged_words_(Scan<int>(getConfig("max_unchanged_words", "2"))),
|
||||
truecase_(Scan<bool>(getConfig("truecase", "false"))),
|
||||
verbose_(Scan<bool>(getConfig("verbose", "false"))),
|
||||
m2_(max_unchanged_words_, beta_, truecase_)
|
||||
{}
|
||||
|
||||
void M2Scorer::setReferenceFiles(const vector<string>& referenceFiles)
|
||||
{
|
||||
for(size_t i = 0; i < referenceFiles.size(); ++i) {
|
||||
m2_.ReadM2(referenceFiles[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void M2Scorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
|
||||
{
|
||||
string sentence = trimStr(this->preprocessSentence(text));
|
||||
std::vector<ScoreStatsType> stats(4, 0);
|
||||
m2_.SufStats(sentence, sid, stats);
|
||||
entry.set(stats);
|
||||
}
|
||||
|
||||
float M2Scorer::calculateScore(const vector<ScoreStatsType>& comps) const
|
||||
{
|
||||
|
||||
if (comps.size() != NumberOfScores()) {
|
||||
throw runtime_error("Size of stat vector for M2Scorer is not " + NumberOfScores());
|
||||
}
|
||||
|
||||
float beta = beta_;
|
||||
|
||||
|
||||
float p = 0.0;
|
||||
float r = 0.0;
|
||||
float f = 0.0;
|
||||
|
||||
if(comps[1] != 0)
|
||||
p = comps[0] / (double)comps[1];
|
||||
else
|
||||
p = 1.0;
|
||||
|
||||
if(comps[2] != 0)
|
||||
r = comps[0] / (double)comps[2];
|
||||
else
|
||||
r = 1.0;
|
||||
|
||||
float denom = beta * beta * p + r;
|
||||
if(denom != 0)
|
||||
f = (1.0 + beta * beta) * p * r / denom;
|
||||
else
|
||||
f = 0.0;
|
||||
|
||||
if(verbose_)
|
||||
std::cerr << comps[0] << " " << comps[1] << " " << comps[2] << std::endl;
|
||||
|
||||
if(verbose_)
|
||||
std::cerr << p << " " << r << " " << f << std::endl;
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
float M2Scorer::getReferenceLength(const vector<ScoreStatsType>& comps) const
|
||||
{
|
||||
return comps[3];
|
||||
}
|
||||
|
||||
std::vector<ScoreStatsType> randomStats(float decay, int max)
|
||||
{
|
||||
int gold = rand() % max;
|
||||
int prop = rand() % max;
|
||||
int corr = 0.0;
|
||||
|
||||
if(std::min(prop, gold) > 0)
|
||||
corr = rand() % std::min(prop, gold);
|
||||
|
||||
//std::cerr << corr << " " << prop << " " << gold << std::endl;
|
||||
|
||||
std::vector<ScoreStatsType> stats(3, 0.0);
|
||||
stats[0] = corr * decay;
|
||||
stats[1] = prop * decay;
|
||||
stats[2] = gold * decay;
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
float sentenceM2(const std::vector<ScoreStatsType>& stats)
|
||||
{
|
||||
float beta = 0.5;
|
||||
|
||||
std::vector<ScoreStatsType> smoothStats(3, 0.0); // = randomStats(0.001, 5);
|
||||
smoothStats[0] += stats[0];
|
||||
smoothStats[1] += stats[1];
|
||||
smoothStats[2] += stats[2];
|
||||
|
||||
float p = 0.0;
|
||||
float r = 0.0;
|
||||
float f = 0.0;
|
||||
|
||||
if(smoothStats[1] != 0)
|
||||
p = smoothStats[0] / smoothStats[1];
|
||||
else
|
||||
p = 1.0;
|
||||
|
||||
if(smoothStats[2] != 0)
|
||||
r = smoothStats[0] / smoothStats[2];
|
||||
else
|
||||
r = 1.0;
|
||||
|
||||
float denom = beta * beta * p + r;
|
||||
if(denom != 0)
|
||||
f = (1.0 + beta * beta) * p * r / denom;
|
||||
else
|
||||
f = 0.0;
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
}
|
52
mert/M2Scorer.h
Normal file
52
mert/M2Scorer.h
Normal file
@ -0,0 +1,52 @@
|
||||
#ifndef MERT_M2_SCORER_H_
|
||||
#define MERT_M2_SCORER_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
||||
#include "Types.h"
|
||||
#include "Util.h"
|
||||
#include "StatisticsBasedScorer.h"
|
||||
#include "M2.h"
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
/**
|
||||
* M2Scorer class can compute CoNLL m2 F-score.
|
||||
*/
|
||||
class M2Scorer: public StatisticsBasedScorer
|
||||
{
|
||||
public:
|
||||
explicit M2Scorer(const std::string& config);
|
||||
|
||||
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
|
||||
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
|
||||
|
||||
virtual std::size_t NumberOfScores() const {
|
||||
return 4;
|
||||
}
|
||||
|
||||
virtual float calculateScore(const std::vector<ScoreStatsType>& comps) const;
|
||||
virtual float getReferenceLength(const std::vector<ScoreStatsType>& comps) const;
|
||||
|
||||
private:
|
||||
float beta_;
|
||||
int max_unchanged_words_;
|
||||
bool truecase_;
|
||||
bool verbose_;
|
||||
M2::M2 m2_;
|
||||
|
||||
std::map<std::pair<size_t, std::string>, std::vector<ScoreStatsType> > seen_;
|
||||
|
||||
// no copying allowed
|
||||
M2Scorer(const M2Scorer&);
|
||||
M2Scorer& operator=(const M2Scorer&);
|
||||
};
|
||||
|
||||
float sentenceM2 (const std::vector<ScoreStatsType>& stats);
|
||||
|
||||
}
|
||||
|
||||
#endif // MERT_M2_SCORER_H_
|
@ -11,6 +11,7 @@
|
||||
#include "SemposScorer.h"
|
||||
#include "PermutationScorer.h"
|
||||
#include "MeteorScorer.h"
|
||||
#include "M2Scorer.h"
|
||||
#include "HwcmScorer.h"
|
||||
#include "Reference.h"
|
||||
|
||||
@ -34,6 +35,7 @@ vector<string> ScorerFactory::getTypes()
|
||||
types.push_back(string("LRSCORE"));
|
||||
types.push_back(string("METEOR"));
|
||||
types.push_back(string("HWCM"));
|
||||
types.push_back(string("M2SCORER"));
|
||||
return types;
|
||||
}
|
||||
|
||||
@ -54,6 +56,8 @@ Scorer* ScorerFactory::getScorer(const string& type, const string& config)
|
||||
return new CderScorer(config, false);
|
||||
} else if (type == "SEMPOS") {
|
||||
return new SemposScorer(config);
|
||||
} else if (type == "M2SCORER") {
|
||||
return new M2Scorer(config);
|
||||
} else if ((type == "HAMMING") || (type == "KENDALL")) {
|
||||
return (PermutationScorer*) new PermutationScorer(type, config);
|
||||
} else if (type == "METEOR") {
|
||||
|
354
moses/FF/CorrectionPattern.cpp
Normal file
354
moses/FF/CorrectionPattern.cpp
Normal file
@ -0,0 +1,354 @@
|
||||
#include <sstream>
|
||||
#include "CorrectionPattern.h"
|
||||
#include "moses/Phrase.h"
|
||||
#include "moses/TargetPhrase.h"
|
||||
#include "moses/InputPath.h"
|
||||
#include "moses/Hypothesis.h"
|
||||
#include "moses/ChartHypothesis.h"
|
||||
#include "moses/ScoreComponentCollection.h"
|
||||
#include "moses/TranslationOption.h"
|
||||
#include "util/string_piece_hash.hh"
|
||||
#include "util/exception.hh"
|
||||
|
||||
#include <functional>
|
||||
#include <algorithm>
|
||||
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include "Diffs.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
using namespace std;
|
||||
|
||||
std::string MakePair(const std::string &s1, const std::string &s2, bool general)
|
||||
{
|
||||
std::vector<std::string> sourceList;
|
||||
std::vector<std::string> targetList;
|
||||
|
||||
if(general) {
|
||||
Diffs diffs = CreateDiff(s1, s2);
|
||||
|
||||
size_t i = 0, j = 0;
|
||||
char lastType = 'm';
|
||||
|
||||
std::string source, target;
|
||||
std::string match;
|
||||
|
||||
int count = 1;
|
||||
|
||||
BOOST_FOREACH(Diff type, diffs) {
|
||||
if(type == 'm') {
|
||||
if(lastType != 'm') {
|
||||
sourceList.push_back(source);
|
||||
targetList.push_back(target);
|
||||
}
|
||||
source.clear();
|
||||
target.clear();
|
||||
|
||||
if(s1[i] == '+') {
|
||||
if(match.size() >= 3) {
|
||||
sourceList.push_back("(\\w{3,})·");
|
||||
std::string temp = "1";
|
||||
sprintf((char*)temp.c_str(), "%d", count);
|
||||
targetList.push_back("\\" + temp + "·");
|
||||
count++;
|
||||
} else {
|
||||
sourceList.push_back(match + "·");
|
||||
targetList.push_back(match + "·");
|
||||
}
|
||||
match.clear();
|
||||
} else
|
||||
match.push_back(s1[i]);
|
||||
|
||||
i++;
|
||||
j++;
|
||||
} else if(type == 'd') {
|
||||
if(s1[i] == '+')
|
||||
source += "·";
|
||||
else
|
||||
source.push_back(s1[i]);
|
||||
i++;
|
||||
} else if(type == 'i') {
|
||||
if(s2[j] == '+')
|
||||
target += "·";
|
||||
else
|
||||
target.push_back(s2[j]);
|
||||
j++;
|
||||
}
|
||||
if(type != 'm' && !match.empty()) {
|
||||
if(match.size() >= 3) {
|
||||
sourceList.push_back("(\\w{3,})");
|
||||
std::string temp = "1";
|
||||
sprintf((char*)temp.c_str(), "%d", count);
|
||||
targetList.push_back("\\" + temp);
|
||||
count++;
|
||||
} else {
|
||||
sourceList.push_back(match);
|
||||
targetList.push_back(match);
|
||||
}
|
||||
|
||||
match.clear();
|
||||
}
|
||||
|
||||
lastType = type;
|
||||
}
|
||||
if(lastType != 'm') {
|
||||
sourceList.push_back(source);
|
||||
targetList.push_back(target);
|
||||
}
|
||||
|
||||
if(!match.empty()) {
|
||||
if(match.size() >= 3) {
|
||||
sourceList.push_back("(\\w{3,})");
|
||||
std::string temp = "1";
|
||||
sprintf((char*)temp.c_str(), "%d", count);
|
||||
targetList.push_back("\\"+ temp);
|
||||
count++;
|
||||
} else {
|
||||
sourceList.push_back(match);
|
||||
targetList.push_back(match);
|
||||
}
|
||||
}
|
||||
match.clear();
|
||||
} else {
|
||||
std::string cs1 = s1;
|
||||
std::string cs2 = s2;
|
||||
boost::replace_all(cs1, "+", "·");
|
||||
boost::replace_all(cs2, "+", "·");
|
||||
|
||||
sourceList.push_back(cs1);
|
||||
targetList.push_back(cs2);
|
||||
}
|
||||
|
||||
std::stringstream out;
|
||||
out << "sub(«";
|
||||
out << boost::join(sourceList, "");
|
||||
out << "»,«";
|
||||
out << boost::join(targetList, "");
|
||||
out << "»)";
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
std::string CorrectionPattern::CreateSinglePattern(const Tokens &s1, const Tokens &s2) const
|
||||
{
|
||||
std::stringstream out;
|
||||
if(s1.empty()) {
|
||||
out << "ins(«" << boost::join(s2, "·") << "»)";
|
||||
return out.str();
|
||||
} else if(s2.empty()) {
|
||||
out << "del(«" << boost::join(s1, "·") << "»)";
|
||||
return out.str();
|
||||
} else {
|
||||
typename Tokens::value_type v1 = boost::join(s1, "+");
|
||||
typename Tokens::value_type v2 = boost::join(s2, "+");
|
||||
out << MakePair(v1, v2, m_general);
|
||||
return out.str();
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> GetContext(size_t pos,
|
||||
size_t len,
|
||||
size_t window,
|
||||
const InputType &input,
|
||||
const InputPath &inputPath,
|
||||
const std::vector<FactorType>& factorTypes,
|
||||
bool isRight)
|
||||
{
|
||||
|
||||
const Sentence& sentence = static_cast<const Sentence&>(input);
|
||||
const Range& range = inputPath.GetWordsRange();
|
||||
|
||||
int leftPos = range.GetStartPos() + pos - len - 1;
|
||||
int rightPos = range.GetStartPos() + pos;
|
||||
|
||||
std::vector<std::string> contexts;
|
||||
|
||||
for(int length = 1; length <= (int)window; ++length) {
|
||||
std::vector<std::string> current;
|
||||
if(!isRight) {
|
||||
for(int i = 0; i < length; i++) {
|
||||
if(leftPos - i >= 0) {
|
||||
current.push_back(sentence.GetWord(leftPos - i).GetString(factorTypes, false));
|
||||
} else {
|
||||
current.push_back("<s>");
|
||||
}
|
||||
}
|
||||
|
||||
if(current.back() == "<s>" && current.size() >= 2 && current[current.size()-2] == "<s>")
|
||||
continue;
|
||||
|
||||
std::reverse(current.begin(), current.end());
|
||||
contexts.push_back("left(«" + boost::join(current, "·") + "»)_");
|
||||
}
|
||||
if(isRight) {
|
||||
for(int i = 0; i < length; i++) {
|
||||
if(rightPos + i < (int)sentence.GetSize()) {
|
||||
current.push_back(sentence.GetWord(rightPos + i).GetString(factorTypes, false));
|
||||
} else {
|
||||
current.push_back("</s>");
|
||||
}
|
||||
}
|
||||
|
||||
if(current.back() == "</s>" && current.size() >= 2 && current[current.size()-2] == "</s>")
|
||||
continue;
|
||||
|
||||
contexts.push_back("_right(«" + boost::join(current, "·") + "»)");
|
||||
}
|
||||
}
|
||||
return contexts;
|
||||
}
|
||||
|
||||
std::vector<std::string>
|
||||
CorrectionPattern::CreatePattern(const Tokens &s1,
|
||||
const Tokens &s2,
|
||||
const InputType &input,
|
||||
const InputPath &inputPath) const
|
||||
{
|
||||
|
||||
Diffs diffs = CreateDiff(s1, s2);
|
||||
size_t i = 0, j = 0;
|
||||
char lastType = 'm';
|
||||
std::vector<std::string> patternList;
|
||||
Tokens source, target;
|
||||
BOOST_FOREACH(Diff type, diffs) {
|
||||
if(type == 'm') {
|
||||
if(lastType != 'm') {
|
||||
std::string pattern = CreateSinglePattern(source, target);
|
||||
patternList.push_back(pattern);
|
||||
|
||||
if(m_context > 0) {
|
||||
std::vector<std::string> leftContexts = GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, false);
|
||||
std::vector<std::string> rightContexts = GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, true);
|
||||
|
||||
BOOST_FOREACH(std::string left, leftContexts)
|
||||
patternList.push_back(left + pattern);
|
||||
|
||||
BOOST_FOREACH(std::string right, rightContexts)
|
||||
patternList.push_back(pattern + right);
|
||||
|
||||
BOOST_FOREACH(std::string left, leftContexts)
|
||||
BOOST_FOREACH(std::string right, rightContexts)
|
||||
patternList.push_back(left + pattern + right);
|
||||
}
|
||||
}
|
||||
source.clear();
|
||||
target.clear();
|
||||
if(s1[i] != s2[j]) {
|
||||
source.push_back(s1[i]);
|
||||
target.push_back(s2[j]);
|
||||
}
|
||||
i++;
|
||||
j++;
|
||||
} else if(type == 'd') {
|
||||
source.push_back(s1[i]);
|
||||
i++;
|
||||
} else if(type == 'i') {
|
||||
target.push_back(s2[j]);
|
||||
j++;
|
||||
}
|
||||
lastType = type;
|
||||
}
|
||||
if(lastType != 'm') {
|
||||
std::string pattern = CreateSinglePattern(source, target);
|
||||
patternList.push_back(pattern);
|
||||
|
||||
if(m_context > 0) {
|
||||
std::vector<std::string> leftContexts = GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, false);
|
||||
std::vector<std::string> rightContexts = GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, true);
|
||||
|
||||
BOOST_FOREACH(std::string left, leftContexts)
|
||||
patternList.push_back(left + pattern);
|
||||
|
||||
BOOST_FOREACH(std::string right, rightContexts)
|
||||
patternList.push_back(pattern + right);
|
||||
|
||||
BOOST_FOREACH(std::string left, leftContexts)
|
||||
BOOST_FOREACH(std::string right, rightContexts)
|
||||
patternList.push_back(left + pattern + right);
|
||||
}
|
||||
}
|
||||
|
||||
return patternList;
|
||||
}
|
||||
|
||||
CorrectionPattern::CorrectionPattern(const std::string &line)
|
||||
: StatelessFeatureFunction(0, line), m_factors(1, 0), m_general(false),
|
||||
m_context(0), m_contextFactors(1, 0)
|
||||
{
|
||||
std::cerr << "Initializing correction pattern feature.." << std::endl;
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
void CorrectionPattern::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "factor") {
|
||||
m_factors = std::vector<FactorType>(1, Scan<FactorType>(value));
|
||||
} else if (key == "context-factor") {
|
||||
m_contextFactors = std::vector<FactorType>(1, Scan<FactorType>(value));
|
||||
} else if (key == "general") {
|
||||
m_general = Scan<bool>(value);
|
||||
} else if (key == "context") {
|
||||
m_context = Scan<size_t>(value);
|
||||
} else {
|
||||
StatelessFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
void CorrectionPattern::EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
{
|
||||
ComputeFeatures(input, inputPath, targetPhrase, &scoreBreakdown);
|
||||
}
|
||||
|
||||
void CorrectionPattern::ComputeFeatures(
|
||||
const InputType &input,
|
||||
const InputPath &inputPath,
|
||||
const TargetPhrase& target,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
const Phrase &source = inputPath.GetPhrase();
|
||||
|
||||
std::vector<std::string> sourceTokens;
|
||||
for(size_t i = 0; i < source.GetSize(); ++i)
|
||||
sourceTokens.push_back(source.GetWord(i).GetString(m_factors, false));
|
||||
|
||||
std::vector<std::string> targetTokens;
|
||||
for(size_t i = 0; i < target.GetSize(); ++i)
|
||||
targetTokens.push_back(target.GetWord(i).GetString(m_factors, false));
|
||||
|
||||
std::vector<std::string> patternList = CreatePattern(sourceTokens, targetTokens, input, inputPath);
|
||||
for(size_t i = 0; i < patternList.size(); ++i)
|
||||
accumulator->PlusEquals(this, patternList[i], 1);
|
||||
|
||||
/*
|
||||
BOOST_FOREACH(std::string w, sourceTokens)
|
||||
std::cerr << w << " ";
|
||||
std::cerr << std::endl;
|
||||
BOOST_FOREACH(std::string w, targetTokens)
|
||||
std::cerr << w << " ";
|
||||
std::cerr << std::endl;
|
||||
BOOST_FOREACH(std::string w, patternList)
|
||||
std::cerr << w << " ";
|
||||
std::cerr << std::endl << std::endl;
|
||||
*/
|
||||
}
|
||||
|
||||
bool CorrectionPattern::IsUseable(const FactorMask &mask) const
|
||||
{
|
||||
bool ret = true;
|
||||
for(size_t i = 0; i < m_factors.size(); ++i)
|
||||
ret = ret && mask[m_factors[i]];
|
||||
for(size_t i = 0; i < m_contextFactors.size(); ++i)
|
||||
ret = ret && mask[m_contextFactors[i]];
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
73
moses/FF/CorrectionPattern.h
Normal file
73
moses/FF/CorrectionPattern.h
Normal file
@ -0,0 +1,73 @@
|
||||
#ifndef moses_CorrectionPattern_h
|
||||
#define moses_CorrectionPattern_h
|
||||
|
||||
#include <string>
|
||||
#include <boost/unordered_set.hpp>
|
||||
|
||||
#include "StatelessFeatureFunction.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
#include "moses/AlignmentInfo.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
typedef std::vector<std::string> Tokens;
|
||||
|
||||
/** Sets the features for length of source phrase, target phrase, both.
|
||||
*/
|
||||
class CorrectionPattern : public StatelessFeatureFunction
|
||||
{
|
||||
private:
|
||||
std::vector<FactorType> m_factors;
|
||||
bool m_general;
|
||||
size_t m_context;
|
||||
std::vector<FactorType> m_contextFactors;
|
||||
|
||||
public:
|
||||
CorrectionPattern(const std::string &line);
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const;
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
|
||||
virtual void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
|
||||
void EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
void EvaluateWhenApplied(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
|
||||
void ComputeFeatures(const InputType &input,
|
||||
const InputPath &inputPath,
|
||||
const TargetPhrase& targetPhrase,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
std::vector<std::string> CreatePattern(const Tokens &s1,
|
||||
const Tokens &s2,
|
||||
const InputType &input,
|
||||
const InputPath &inputPath) const;
|
||||
|
||||
std::string CreateSinglePattern(const Tokens &s1, const Tokens &s2) const;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // moses_CorrectionPattern_h
|
150
moses/FF/Diffs.h
Normal file
150
moses/FF/Diffs.h
Normal file
@ -0,0 +1,150 @@
|
||||
#ifndef moses_Diffs_h
|
||||
#define moses_Diffs_h
|
||||
|
||||
#include <cmath>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
typedef char Diff;
|
||||
typedef std::vector<Diff> Diffs;
|
||||
|
||||
template <class Sequence, class Pred>
|
||||
void CreateDiffRec(size_t** c,
|
||||
const Sequence &s1,
|
||||
const Sequence &s2,
|
||||
size_t start,
|
||||
size_t i,
|
||||
size_t j,
|
||||
Diffs& diffs,
|
||||
Pred pred)
|
||||
{
|
||||
if(i > 0 && j > 0 && pred(s1[i - 1 + start], s2[j - 1 + start])) {
|
||||
CreateDiffRec(c, s1, s2, start, i - 1, j - 1, diffs, pred);
|
||||
diffs.push_back(Diff('m'));
|
||||
} else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
|
||||
CreateDiffRec(c, s1, s2, start, i, j-1, diffs, pred);
|
||||
diffs.push_back(Diff('i'));
|
||||
} else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
|
||||
CreateDiffRec(c, s1, s2, start, i-1, j, diffs, pred);
|
||||
diffs.push_back(Diff('d'));
|
||||
}
|
||||
}
|
||||
|
||||
template <class Sequence, class Pred>
|
||||
Diffs CreateDiff(const Sequence& s1,
|
||||
const Sequence& s2,
|
||||
Pred pred)
|
||||
{
|
||||
|
||||
Diffs diffs;
|
||||
|
||||
size_t n = s2.size();
|
||||
|
||||
int start = 0;
|
||||
int m_end = s1.size() - 1;
|
||||
int n_end = s2.size() - 1;
|
||||
|
||||
while(start <= m_end && start <= n_end && pred(s1[start], s2[start])) {
|
||||
diffs.push_back(Diff('m'));
|
||||
start++;
|
||||
}
|
||||
while(start <= m_end && start <= n_end && pred(s1[m_end], s2[n_end])) {
|
||||
m_end--;
|
||||
n_end--;
|
||||
}
|
||||
|
||||
size_t m_new = m_end - start + 1;
|
||||
size_t n_new = n_end - start + 1;
|
||||
|
||||
size_t** c = new size_t*[m_new + 1];
|
||||
for(size_t i = 0; i <= m_new; ++i) {
|
||||
c[i] = new size_t[n_new + 1];
|
||||
c[i][0] = 0;
|
||||
}
|
||||
for(size_t j = 0; j <= n_new; ++j)
|
||||
c[0][j] = 0;
|
||||
for(size_t i = 1; i <= m_new; ++i)
|
||||
for(size_t j = 1; j <= n_new; ++j)
|
||||
if(pred(s1[i - 1 + start], s2[j - 1 + start]))
|
||||
c[i][j] = c[i-1][j-1] + 1;
|
||||
else
|
||||
c[i][j] = c[i][j-1] > c[i-1][j] ? c[i][j-1] : c[i-1][j];
|
||||
|
||||
CreateDiffRec(c, s1, s2, start, m_new, n_new, diffs, pred);
|
||||
|
||||
for(size_t i = 0; i <= m_new; ++i)
|
||||
delete[] c[i];
|
||||
delete[] c;
|
||||
|
||||
for (size_t i = n_end + 1; i < n; ++i)
|
||||
diffs.push_back(Diff('m'));
|
||||
|
||||
return diffs;
|
||||
}
|
||||
|
||||
template <class Sequence>
|
||||
Diffs CreateDiff(const Sequence& s1, const Sequence& s2)
|
||||
{
|
||||
return CreateDiff(s1, s2, std::equal_to<typename Sequence::value_type>());
|
||||
}
|
||||
|
||||
template <class Sequence, class Sig, class Stats>
|
||||
void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats)
|
||||
{
|
||||
if(sig.size() != stats.size())
|
||||
throw "Signature size differs from score array size.";
|
||||
|
||||
size_t m = 0, d = 0, i = 0, s = 0;
|
||||
Diffs diff = CreateDiff(s1, s2);
|
||||
|
||||
for(int j = 0; j < (int)diff.size(); ++j) {
|
||||
if(diff[j] == 'm')
|
||||
m++;
|
||||
else if(diff[j] == 'd') {
|
||||
d++;
|
||||
int k = 0;
|
||||
while(j - k >= 0 && j + 1 + k < (int)diff.size() &&
|
||||
diff[j - k] == 'd' && diff[j + 1 + k] == 'i') {
|
||||
d--;
|
||||
s++;
|
||||
k++;
|
||||
}
|
||||
j += k;
|
||||
} else if(diff[j] == 'i')
|
||||
i++;
|
||||
}
|
||||
|
||||
for(size_t j = 0; j < sig.size(); ++j) {
|
||||
switch (sig[j]) {
|
||||
case 'l':
|
||||
stats[j] += d + i + s;
|
||||
break;
|
||||
case 'm':
|
||||
stats[j] += m;
|
||||
break;
|
||||
case 'd':
|
||||
stats[j] += d;
|
||||
break;
|
||||
case 'i':
|
||||
stats[j] += i;
|
||||
break;
|
||||
case 's':
|
||||
stats[j] += s;
|
||||
break;
|
||||
case 'r':
|
||||
float macc = 1;
|
||||
if (d + i + s + m)
|
||||
macc = 1.0 - (float)(d + i + s)/(float)(d + i + s + m);
|
||||
if(macc > 0)
|
||||
stats[j] += log(macc);
|
||||
else
|
||||
stats[j] += log(1.0/(float)(d + i + s + m + 1));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
119
moses/FF/EditOps.cpp
Normal file
119
moses/FF/EditOps.cpp
Normal file
@ -0,0 +1,119 @@
|
||||
#include <sstream>
|
||||
#include "EditOps.h"
|
||||
#include "moses/Phrase.h"
|
||||
#include "moses/TargetPhrase.h"
|
||||
#include "moses/Hypothesis.h"
|
||||
#include "moses/ChartHypothesis.h"
|
||||
#include "moses/ScoreComponentCollection.h"
|
||||
#include "moses/TranslationOption.h"
|
||||
#include "util/string_piece_hash.hh"
|
||||
#include "util/exception.hh"
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include "Diffs.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
using namespace std;
|
||||
|
||||
std::string ParseScores(const std::string &line, const std::string& defaultScores)
|
||||
{
|
||||
std::vector<std::string> toks = Tokenize(line);
|
||||
UTIL_THROW_IF2(toks.empty(), "Empty line");
|
||||
|
||||
for (size_t i = 1; i < toks.size(); ++i) {
|
||||
std::vector<std::string> args = TokenizeFirstOnly(toks[i], "=");
|
||||
UTIL_THROW_IF2(args.size() != 2,
|
||||
"Incorrect format for feature function arg: " << toks[i]);
|
||||
|
||||
if (args[0] == "scores") {
|
||||
return args[1];
|
||||
}
|
||||
}
|
||||
return defaultScores;
|
||||
}
|
||||
|
||||
EditOps::EditOps(const std::string &line)
|
||||
: StatelessFeatureFunction(ParseScores(line, "dis").size(), line)
|
||||
, m_factorType(0), m_chars(false), m_scores(ParseScores(line, "dis"))
|
||||
{
|
||||
std::cerr << "Initializing EditOps feature.." << std::endl;
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
void EditOps::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "factor") {
|
||||
m_factorType = Scan<FactorType>(value);
|
||||
} else if (key == "chars") {
|
||||
m_chars = Scan<bool>(value);
|
||||
} else if (key == "scores") {
|
||||
m_scores = value;
|
||||
} else {
|
||||
StatelessFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
void EditOps::Load()
|
||||
{ }
|
||||
|
||||
void EditOps::EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &target
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
ComputeFeatures(source, target, &scoreBreakdown);
|
||||
}
|
||||
|
||||
void EditOps::ComputeFeatures(
|
||||
const Phrase &source,
|
||||
const TargetPhrase& target,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
std::vector<float> ops(GetNumScoreComponents(), 0);
|
||||
|
||||
if(m_chars) {
|
||||
std::vector<FactorType> factors;
|
||||
factors.push_back(m_factorType);
|
||||
|
||||
std::string sourceStr = source.GetStringRep(factors);
|
||||
std::string targetStr = target.GetStringRep(factors);
|
||||
|
||||
AddStats(sourceStr, targetStr, m_scores, ops);
|
||||
} else {
|
||||
std::vector<std::string> sourceTokens;
|
||||
//std::cerr << "Ed src: ";
|
||||
for(size_t i = 0; i < source.GetSize(); ++i) {
|
||||
if(!source.GetWord(i).IsNonTerminal())
|
||||
sourceTokens.push_back(source.GetWord(i).GetFactor(m_factorType)->GetString().as_string());
|
||||
//std::cerr << sourceTokens.back() << " ";
|
||||
}
|
||||
//std::cerr << std::endl;
|
||||
|
||||
std::vector<std::string> targetTokens;
|
||||
//std::cerr << "Ed trg: ";
|
||||
for(size_t i = 0; i < target.GetSize(); ++i) {
|
||||
if(!target.GetWord(i).IsNonTerminal())
|
||||
targetTokens.push_back(target.GetWord(i).GetFactor(m_factorType)->GetString().as_string());
|
||||
//std::cerr << targetTokens.back() << " ";
|
||||
}
|
||||
//std::cerr << std::endl;
|
||||
|
||||
AddStats(sourceTokens, targetTokens, m_scores, ops);
|
||||
}
|
||||
|
||||
accumulator->PlusEquals(this, ops);
|
||||
}
|
||||
|
||||
bool EditOps::IsUseable(const FactorMask &mask) const
|
||||
{
|
||||
bool ret = mask[m_factorType];
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
64
moses/FF/EditOps.h
Normal file
64
moses/FF/EditOps.h
Normal file
@ -0,0 +1,64 @@
|
||||
#ifndef moses_EditOps_h
|
||||
#define moses_EditOps_h
|
||||
|
||||
#include <string>
|
||||
#include <boost/unordered_set.hpp>
|
||||
|
||||
#include "StatelessFeatureFunction.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
#include "moses/AlignmentInfo.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
typedef std::vector<std::string> Tokens;
|
||||
|
||||
/** Calculates string edit operations that transform source phrase into target
|
||||
* phrase using the LCS algorithm. Potentially usefule for monolingual tasks
|
||||
* like paraphrasing, summarization, correction.
|
||||
*/
|
||||
class EditOps : public StatelessFeatureFunction
|
||||
{
|
||||
private:
|
||||
FactorType m_factorType;
|
||||
bool m_chars;
|
||||
std::string m_scores;
|
||||
|
||||
public:
|
||||
EditOps(const std::string &line);
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const;
|
||||
|
||||
void Load();
|
||||
|
||||
virtual void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
void EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
void EvaluateWhenApplied(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
|
||||
void ComputeFeatures(const Phrase &source,
|
||||
const TargetPhrase& targetPhrase,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // moses_CorrectionPattern_h
|
@ -73,8 +73,14 @@
|
||||
#include "moses/Syntax/InputWeightFF.h"
|
||||
#include "moses/Syntax/RuleTableFF.h"
|
||||
|
||||
#include "moses/FF/EditOps.h"
|
||||
#include "moses/FF/CorrectionPattern.h"
|
||||
|
||||
#ifdef HAVE_VW
|
||||
#include "moses/FF/VW/VW.h"
|
||||
#include "moses/FF/VW/VWFeatureContextBigrams.h"
|
||||
#include "moses/FF/VW/VWFeatureContextBilingual.h"
|
||||
#include "moses/FF/VW/VWFeatureContextWindow.h"
|
||||
#include "moses/FF/VW/VWFeatureSourceBagOfWords.h"
|
||||
#include "moses/FF/VW/VWFeatureSourceBigrams.h"
|
||||
#include "moses/FF/VW/VWFeatureSourceIndicator.h"
|
||||
@ -294,8 +300,14 @@ FeatureRegistry::FeatureRegistry()
|
||||
MOSES_FNAME(SkeletonTranslationOptionListFeature);
|
||||
MOSES_FNAME(SkeletonPT);
|
||||
|
||||
MOSES_FNAME(EditOps);
|
||||
MOSES_FNAME(CorrectionPattern);
|
||||
|
||||
#ifdef HAVE_VW
|
||||
MOSES_FNAME(VW);
|
||||
MOSES_FNAME(VWFeatureContextBigrams);
|
||||
MOSES_FNAME(VWFeatureContextBilingual);
|
||||
MOSES_FNAME(VWFeatureContextWindow);
|
||||
MOSES_FNAME(VWFeatureSourceBagOfWords);
|
||||
MOSES_FNAME(VWFeatureSourceBigrams);
|
||||
MOSES_FNAME(VWFeatureSourceIndicator);
|
||||
|
@ -76,7 +76,7 @@ public:
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedScores) const {
|
||||
}
|
||||
}
|
||||
|
||||
void EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
|
@ -3,10 +3,11 @@
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
OSMLM* ConstructOSMLM(const char *file)
|
||||
OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method)
|
||||
{
|
||||
lm::ngram::ModelType model_type;
|
||||
lm::ngram::Config config;
|
||||
config.load_method = load_method;
|
||||
if (lm::ngram::RecognizeBinary(file, model_type)) {
|
||||
switch(model_type) {
|
||||
case lm::ngram::PROBING:
|
||||
|
@ -47,7 +47,7 @@ private:
|
||||
|
||||
typedef KenOSMBase OSMLM;
|
||||
|
||||
OSMLM* ConstructOSMLM(const char *file);
|
||||
OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method);
|
||||
|
||||
|
||||
} // namespace
|
||||
|
@ -17,6 +17,7 @@ OpSequenceModel::OpSequenceModel(const std::string &line)
|
||||
tFactor = 0;
|
||||
numFeatures = 5;
|
||||
ReadParameters();
|
||||
load_method = util::READ;
|
||||
}
|
||||
|
||||
OpSequenceModel::~OpSequenceModel()
|
||||
@ -27,7 +28,7 @@ OpSequenceModel::~OpSequenceModel()
|
||||
void OpSequenceModel :: readLanguageModel(const char *lmFile)
|
||||
{
|
||||
string unkOp = "_TRANS_SLF_";
|
||||
OSM = ConstructOSMLM(m_lmPath.c_str());
|
||||
OSM = ConstructOSMLM(m_lmPath.c_str(), load_method);
|
||||
|
||||
State startState = OSM->NullContextState();
|
||||
State endState;
|
||||
@ -248,6 +249,20 @@ void OpSequenceModel::SetParameter(const std::string& key, const std::string& va
|
||||
sFactor = Scan<int>(value);
|
||||
} else if (key == "output-factor") {
|
||||
tFactor = Scan<int>(value);
|
||||
} else if (key == "load") {
|
||||
if (value == "lazy") {
|
||||
load_method = util::LAZY;
|
||||
} else if (value == "populate_or_lazy") {
|
||||
load_method = util::POPULATE_OR_LAZY;
|
||||
} else if (value == "populate_or_read" || value == "populate") {
|
||||
load_method = util::POPULATE_OR_READ;
|
||||
} else if (value == "read") {
|
||||
load_method = util::READ;
|
||||
} else if (value == "parallel_read") {
|
||||
load_method = util::PARALLEL_READ;
|
||||
} else {
|
||||
UTIL_THROW2("Unknown KenLM load method " << value);
|
||||
}
|
||||
} else {
|
||||
StatefulFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ public:
|
||||
int sFactor; // Source Factor ...
|
||||
int tFactor; // Target Factor ...
|
||||
int numFeatures; // Number of features used ...
|
||||
util::LoadMethod load_method; // method to load model
|
||||
|
||||
OpSequenceModel(const std::string &line);
|
||||
~OpSequenceModel();
|
||||
|
40
moses/FF/VW/AlignmentConstraint.h
Normal file
40
moses/FF/VW/AlignmentConstraint.h
Normal file
@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/**
|
||||
* Helper class for storing alignment constraints.
|
||||
*/
|
||||
class AlignmentConstraint
|
||||
{
|
||||
public:
|
||||
AlignmentConstraint() : m_min(std::numeric_limits<int>::max()), m_max(-1) {}
|
||||
|
||||
AlignmentConstraint(int min, int max) : m_min(min), m_max(max) {}
|
||||
|
||||
/**
|
||||
* We are aligned to point => our min cannot be larger, our max cannot be smaller.
|
||||
*/
|
||||
void Update(int point) {
|
||||
if (m_min > point) m_min = point;
|
||||
if (m_max < point) m_max = point;
|
||||
}
|
||||
|
||||
bool IsSet() const {
|
||||
return m_max != -1;
|
||||
}
|
||||
|
||||
int GetMin() const {
|
||||
return m_min;
|
||||
}
|
||||
|
||||
int GetMax() const {
|
||||
return m_max;
|
||||
}
|
||||
|
||||
private:
|
||||
int m_min, m_max;
|
||||
};
|
||||
|
||||
}
|
637
moses/FF/VW/VW.cpp
Normal file
637
moses/FF/VW/VW.cpp
Normal file
@ -0,0 +1,637 @@
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/functional/hash.hpp>
|
||||
|
||||
#include "moses/FF/StatefulFeatureFunction.h"
|
||||
#include "moses/PP/CountsPhraseProperty.h"
|
||||
#include "moses/TranslationOptionList.h"
|
||||
#include "moses/TranslationOption.h"
|
||||
#include "moses/Util.h"
|
||||
#include "moses/TypeDef.h"
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/Phrase.h"
|
||||
#include "moses/AlignmentInfo.h"
|
||||
#include "moses/AlignmentInfoCollection.h"
|
||||
#include "moses/Word.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
|
||||
#include "Normalizer.h"
|
||||
#include "Classifier.h"
|
||||
#include "VWFeatureBase.h"
|
||||
#include "TabbedSentence.h"
|
||||
#include "ThreadLocalByFeatureStorage.h"
|
||||
#include "TrainingLoss.h"
|
||||
#include "VWTargetSentence.h"
|
||||
#include "VWState.h"
|
||||
#include "VW.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
VW::VW(const std::string &line)
|
||||
: StatefulFeatureFunction(1, line)
|
||||
, TLSTargetSentence(this)
|
||||
, m_train(false)
|
||||
, m_sentenceStartWord(Word())
|
||||
{
|
||||
ReadParameters();
|
||||
Discriminative::ClassifierFactory *classifierFactory = m_train
|
||||
? new Discriminative::ClassifierFactory(m_modelPath)
|
||||
: new Discriminative::ClassifierFactory(m_modelPath, m_vwOptions);
|
||||
|
||||
m_tlsClassifier = new TLSClassifier(this, *classifierFactory);
|
||||
|
||||
m_tlsFutureScores = new TLSFloatHashMap(this);
|
||||
m_tlsComputedStateExtensions = new TLSStateExtensions(this);
|
||||
m_tlsTranslationOptionFeatures = new TLSFeatureVectorMap(this);
|
||||
m_tlsTargetContextFeatures = new TLSFeatureVectorMap(this);
|
||||
|
||||
if (! m_normalizer) {
|
||||
VERBOSE(1, "VW :: No loss function specified, assuming logistic loss.\n");
|
||||
m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
|
||||
}
|
||||
|
||||
if (! m_trainingLoss) {
|
||||
VERBOSE(1, "VW :: Using basic 1/0 loss calculation in training.\n");
|
||||
m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
|
||||
}
|
||||
|
||||
// create a virtual beginning-of-sentence word with all factors replaced by <S>
|
||||
const Factor *bosFactor = FactorCollection::Instance().AddFactor(BOS_);
|
||||
for (size_t i = 0; i < MAX_NUM_FACTORS; i++)
|
||||
m_sentenceStartWord.SetFactor(i, bosFactor);
|
||||
}
|
||||
|
||||
VW::~VW()
|
||||
{
|
||||
delete m_tlsClassifier;
|
||||
delete m_normalizer;
|
||||
// TODO delete more stuff
|
||||
}
|
||||
|
||||
FFState* VW::EvaluateWhenApplied(
|
||||
const Hypothesis& curHypo,
|
||||
const FFState* prevState,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
VERBOSE(3, "VW :: Evaluating translation options\n");
|
||||
|
||||
const VWState& prevVWState = *static_cast<const VWState *>(prevState);
|
||||
|
||||
const std::vector<VWFeatureBase*>& contextFeatures =
|
||||
VWFeatureBase::GetTargetContextFeatures(GetScoreProducerDescription());
|
||||
|
||||
if (contextFeatures.empty()) {
|
||||
// no target context features => we already evaluated everything in
|
||||
// EvaluateTranslationOptionListWithSourceContext(). Nothing to do now,
|
||||
// no state information to track.
|
||||
return new VWState();
|
||||
}
|
||||
|
||||
size_t spanStart = curHypo.GetTranslationOption().GetStartPos();
|
||||
size_t spanEnd = curHypo.GetTranslationOption().GetEndPos();
|
||||
|
||||
// compute our current key
|
||||
size_t cacheKey = MakeCacheKey(prevState, spanStart, spanEnd);
|
||||
|
||||
boost::unordered_map<size_t, FloatHashMap> &computedStateExtensions
|
||||
= *m_tlsComputedStateExtensions->GetStored();
|
||||
|
||||
if (computedStateExtensions.find(cacheKey) == computedStateExtensions.end()) {
|
||||
// we have not computed this set of translation options yet
|
||||
const TranslationOptionList *topts =
|
||||
curHypo.GetManager().getSntTranslationOptions()->GetTranslationOptionList(spanStart, spanEnd);
|
||||
|
||||
const InputType& input = curHypo.GetManager().GetSource();
|
||||
|
||||
Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
|
||||
|
||||
// extract target context features
|
||||
size_t contextHash = prevVWState.hash();
|
||||
|
||||
FeatureVectorMap &contextFeaturesCache = *m_tlsTargetContextFeatures->GetStored();
|
||||
|
||||
FeatureVectorMap::const_iterator contextIt = contextFeaturesCache.find(contextHash);
|
||||
if (contextIt == contextFeaturesCache.end()) {
|
||||
// we have not extracted features for this context yet
|
||||
|
||||
const Phrase &targetContext = prevVWState.GetPhrase();
|
||||
Discriminative::FeatureVector contextVector;
|
||||
const AlignmentInfo *alignInfo = TransformAlignmentInfo(curHypo, targetContext.GetSize());
|
||||
for(size_t i = 0; i < contextFeatures.size(); ++i)
|
||||
(*contextFeatures[i])(input, targetContext, *alignInfo, classifier, contextVector);
|
||||
|
||||
contextFeaturesCache[contextHash] = contextVector;
|
||||
VERBOSE(3, "VW :: context cache miss\n");
|
||||
} else {
|
||||
// context already in cache, simply put feature IDs in the classifier object
|
||||
classifier.AddLabelIndependentFeatureVector(contextIt->second);
|
||||
VERBOSE(3, "VW :: context cache hit\n");
|
||||
}
|
||||
|
||||
std::vector<float> losses(topts->size());
|
||||
|
||||
for (size_t toptIdx = 0; toptIdx < topts->size(); toptIdx++) {
|
||||
const TranslationOption *topt = topts->Get(toptIdx);
|
||||
const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
|
||||
size_t toptHash = hash_value(*topt);
|
||||
|
||||
// start with pre-computed source-context-only VW scores
|
||||
losses[toptIdx] = m_tlsFutureScores->GetStored()->find(toptHash)->second;
|
||||
|
||||
// add all features associated with this translation option
|
||||
// (pre-computed when evaluated with source context)
|
||||
const Discriminative::FeatureVector &targetFeatureVector =
|
||||
m_tlsTranslationOptionFeatures->GetStored()->find(toptHash)->second;
|
||||
|
||||
classifier.AddLabelDependentFeatureVector(targetFeatureVector);
|
||||
|
||||
// add classifier score with context+target features only to the total loss
|
||||
losses[toptIdx] += classifier.Predict(MakeTargetLabel(targetPhrase));
|
||||
}
|
||||
|
||||
// normalize classifier scores to get a probability distribution
|
||||
(*m_normalizer)(losses);
|
||||
|
||||
// fill our cache with the results
|
||||
FloatHashMap &toptScores = computedStateExtensions[cacheKey];
|
||||
for (size_t toptIdx = 0; toptIdx < topts->size(); toptIdx++) {
|
||||
const TranslationOption *topt = topts->Get(toptIdx);
|
||||
size_t toptHash = hash_value(*topt);
|
||||
toptScores[toptHash] = FloorScore(TransformScore(losses[toptIdx]));
|
||||
}
|
||||
|
||||
VERBOSE(3, "VW :: cache miss\n");
|
||||
} else {
|
||||
VERBOSE(3, "VW :: cache hit\n");
|
||||
}
|
||||
|
||||
// now our cache is guaranteed to contain the required score, simply look it up
|
||||
std::vector<float> newScores(m_numScoreComponents);
|
||||
size_t toptHash = hash_value(curHypo.GetTranslationOption());
|
||||
newScores[0] = computedStateExtensions[cacheKey][toptHash];
|
||||
VERBOSE(3, "VW :: adding score: " << newScores[0] << "\n");
|
||||
accumulator->PlusEquals(this, newScores);
|
||||
|
||||
return new VWState(prevVWState, curHypo);
|
||||
}
|
||||
|
||||
const FFState* VW::EmptyHypothesisState(const InputType &input) const
|
||||
{
|
||||
size_t maxContextSize = VWFeatureBase::GetMaximumContextSize(GetScoreProducerDescription());
|
||||
Phrase initialPhrase;
|
||||
for (size_t i = 0; i < maxContextSize; i++)
|
||||
initialPhrase.AddWord(m_sentenceStartWord);
|
||||
|
||||
return new VWState(initialPhrase);
|
||||
}
|
||||
|
||||
void VW::EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{
|
||||
Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
|
||||
|
||||
if (translationOptionList.size() == 0)
|
||||
return; // nothing to do
|
||||
|
||||
VERBOSE(3, "VW :: Evaluating translation options\n");
|
||||
|
||||
// which feature functions do we use (on the source and target side)
|
||||
const std::vector<VWFeatureBase*>& sourceFeatures =
|
||||
VWFeatureBase::GetSourceFeatures(GetScoreProducerDescription());
|
||||
|
||||
const std::vector<VWFeatureBase*>& contextFeatures =
|
||||
VWFeatureBase::GetTargetContextFeatures(GetScoreProducerDescription());
|
||||
|
||||
const std::vector<VWFeatureBase*>& targetFeatures =
|
||||
VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription());
|
||||
|
||||
size_t maxContextSize = VWFeatureBase::GetMaximumContextSize(GetScoreProducerDescription());
|
||||
|
||||
// only use stateful score computation when needed
|
||||
bool haveTargetContextFeatures = ! contextFeatures.empty();
|
||||
|
||||
const Range &sourceRange = translationOptionList.Get(0)->GetSourceWordsRange();
|
||||
|
||||
if (m_train) {
|
||||
//
|
||||
// extract features for training the classifier (only call this when using vwtrainer, not in Moses!)
|
||||
//
|
||||
|
||||
// find which topts are correct
|
||||
std::vector<bool> correct(translationOptionList.size());
|
||||
std::vector<int> startsAt(translationOptionList.size());
|
||||
std::set<int> uncoveredStartingPositions;
|
||||
|
||||
for (size_t i = 0; i < translationOptionList.size(); i++) {
|
||||
std::pair<bool, int> isCorrect = IsCorrectTranslationOption(* translationOptionList.Get(i));
|
||||
correct[i] = isCorrect.first;
|
||||
startsAt[i] = isCorrect.second;
|
||||
if (isCorrect.first) {
|
||||
uncoveredStartingPositions.insert(isCorrect.second);
|
||||
}
|
||||
}
|
||||
|
||||
// optionally update translation options using leave-one-out
|
||||
std::vector<bool> keep = (m_leaveOneOut.size() > 0)
|
||||
? LeaveOneOut(translationOptionList, correct)
|
||||
: std::vector<bool>(translationOptionList.size(), true);
|
||||
|
||||
while (! uncoveredStartingPositions.empty()) {
|
||||
int currentStart = *uncoveredStartingPositions.begin();
|
||||
uncoveredStartingPositions.erase(uncoveredStartingPositions.begin());
|
||||
|
||||
// check whether we (still) have some correct translation
|
||||
int firstCorrect = -1;
|
||||
for (size_t i = 0; i < translationOptionList.size(); i++) {
|
||||
if (keep[i] && correct[i] && startsAt[i] == currentStart) {
|
||||
firstCorrect = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// do not train if there are no positive examples
|
||||
if (firstCorrect == -1) {
|
||||
VERBOSE(3, "VW :: skipping topt collection, no correct translation for span at current tgt start position\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
// the first correct topt can be used by some loss functions
|
||||
const TargetPhrase &correctPhrase = translationOptionList.Get(firstCorrect)->GetTargetPhrase();
|
||||
|
||||
// feature extraction *at prediction time* outputs feature hashes which can be cached;
|
||||
// this is training time, simply store everything in this dummyVector
|
||||
Discriminative::FeatureVector dummyVector;
|
||||
|
||||
// extract source side features
|
||||
for(size_t i = 0; i < sourceFeatures.size(); ++i)
|
||||
(*sourceFeatures[i])(input, sourceRange, classifier, dummyVector);
|
||||
|
||||
// build target-side context
|
||||
Phrase targetContext;
|
||||
for (size_t i = 0; i < maxContextSize; i++)
|
||||
targetContext.AddWord(m_sentenceStartWord);
|
||||
|
||||
const Phrase *targetSent = GetStored()->m_sentence;
|
||||
|
||||
// word alignment info shifted by context size
|
||||
AlignmentInfo contextAlignment = TransformAlignmentInfo(*GetStored()->m_alignment, maxContextSize, currentStart);
|
||||
|
||||
if (currentStart > 0)
|
||||
targetContext.Append(targetSent->GetSubString(Range(0, currentStart - 1)));
|
||||
|
||||
// extract target-context features
|
||||
for(size_t i = 0; i < contextFeatures.size(); ++i)
|
||||
(*contextFeatures[i])(input, targetContext, contextAlignment, classifier, dummyVector);
|
||||
|
||||
// go over topts, extract target side features and train the classifier
|
||||
for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
|
||||
|
||||
// this topt was discarded by leaving one out
|
||||
if (! keep[toptIdx])
|
||||
continue;
|
||||
|
||||
// extract target-side features for each topt
|
||||
const TargetPhrase &targetPhrase = translationOptionList.Get(toptIdx)->GetTargetPhrase();
|
||||
for(size_t i = 0; i < targetFeatures.size(); ++i)
|
||||
(*targetFeatures[i])(input, targetPhrase, classifier, dummyVector);
|
||||
|
||||
bool isCorrect = correct[toptIdx] && startsAt[toptIdx] == currentStart;
|
||||
float loss = (*m_trainingLoss)(targetPhrase, correctPhrase, isCorrect);
|
||||
|
||||
// train classifier on current example
|
||||
classifier.Train(MakeTargetLabel(targetPhrase), loss);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
//
|
||||
// predict using a trained classifier, use this in decoding (=at test time)
|
||||
//
|
||||
|
||||
std::vector<float> losses(translationOptionList.size());
|
||||
|
||||
Discriminative::FeatureVector outFeaturesSourceNamespace;
|
||||
|
||||
// extract source side features
|
||||
for(size_t i = 0; i < sourceFeatures.size(); ++i)
|
||||
(*sourceFeatures[i])(input, sourceRange, classifier, outFeaturesSourceNamespace);
|
||||
|
||||
for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
|
||||
const TranslationOption *topt = translationOptionList.Get(toptIdx);
|
||||
const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
|
||||
Discriminative::FeatureVector outFeaturesTargetNamespace;
|
||||
|
||||
// extract target-side features for each topt
|
||||
for(size_t i = 0; i < targetFeatures.size(); ++i)
|
||||
(*targetFeatures[i])(input, targetPhrase, classifier, outFeaturesTargetNamespace);
|
||||
|
||||
// cache the extracted target features (i.e. features associated with given topt)
|
||||
// for future use at decoding time
|
||||
size_t toptHash = hash_value(*topt);
|
||||
m_tlsTranslationOptionFeatures->GetStored()->insert(
|
||||
std::make_pair(toptHash, outFeaturesTargetNamespace));
|
||||
|
||||
// get classifier score
|
||||
losses[toptIdx] = classifier.Predict(MakeTargetLabel(targetPhrase));
|
||||
}
|
||||
|
||||
// normalize classifier scores to get a probability distribution
|
||||
std::vector<float> rawLosses = losses;
|
||||
(*m_normalizer)(losses);
|
||||
|
||||
// update scores of topts
|
||||
for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
|
||||
TranslationOption *topt = *(translationOptionList.begin() + toptIdx);
|
||||
if (! haveTargetContextFeatures) {
|
||||
// no target context features; evaluate the FF now
|
||||
std::vector<float> newScores(m_numScoreComponents);
|
||||
newScores[0] = FloorScore(TransformScore(losses[toptIdx]));
|
||||
|
||||
ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
|
||||
scoreBreakDown.PlusEquals(this, newScores);
|
||||
|
||||
topt->UpdateScore();
|
||||
} else {
|
||||
// We have target context features => this is just a partial score,
|
||||
// do not add it to the score component collection.
|
||||
size_t toptHash = hash_value(*topt);
|
||||
|
||||
// Subtract the score contribution of target-only features, otherwise it would
|
||||
// be included twice.
|
||||
Discriminative::FeatureVector emptySource;
|
||||
const Discriminative::FeatureVector &targetFeatureVector =
|
||||
m_tlsTranslationOptionFeatures->GetStored()->find(toptHash)->second;
|
||||
classifier.AddLabelIndependentFeatureVector(emptySource);
|
||||
classifier.AddLabelDependentFeatureVector(targetFeatureVector);
|
||||
float targetOnlyLoss = classifier.Predict(VW_DUMMY_LABEL);
|
||||
|
||||
float futureScore = rawLosses[toptIdx] - targetOnlyLoss;
|
||||
m_tlsFutureScores->GetStored()->insert(std::make_pair(toptHash, futureScore));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VW::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "train") {
|
||||
m_train = Scan<bool>(value);
|
||||
} else if (key == "path") {
|
||||
m_modelPath = value;
|
||||
} else if (key == "vw-options") {
|
||||
m_vwOptions = value;
|
||||
} else if (key == "leave-one-out-from") {
|
||||
m_leaveOneOut = value;
|
||||
} else if (key == "training-loss") {
|
||||
// which type of loss to use for training
|
||||
if (value == "basic") {
|
||||
m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
|
||||
} else if (value == "bleu") {
|
||||
m_trainingLoss = (TrainingLoss *) new TrainingLossBLEU();
|
||||
} else {
|
||||
UTIL_THROW2("Unknown training loss type:" << value);
|
||||
}
|
||||
} else if (key == "loss") {
|
||||
// which normalizer to use (theoretically depends on the loss function used for training the
|
||||
// classifier (squared/logistic/hinge/...), hence the name "loss"
|
||||
if (value == "logistic") {
|
||||
m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
|
||||
} else if (value == "squared") {
|
||||
m_normalizer = (Discriminative::Normalizer *) new Discriminative::SquaredLossNormalizer();
|
||||
} else {
|
||||
UTIL_THROW2("Unknown loss type:" << value);
|
||||
}
|
||||
} else {
|
||||
StatefulFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
void VW::InitializeForInput(ttasksptr const& ttask)
|
||||
{
|
||||
// do not keep future cost estimates across sentences!
|
||||
m_tlsFutureScores->GetStored()->clear();
|
||||
|
||||
// invalidate our caches after each sentence
|
||||
m_tlsComputedStateExtensions->GetStored()->clear();
|
||||
|
||||
// it's not certain that we should clear these caches; we do it
|
||||
// because they shouldn't be allowed to grow indefinitely large but
|
||||
// target contexts and translation options will have identical features
|
||||
// the next time we extract them...
|
||||
m_tlsTargetContextFeatures->GetStored()->clear();
|
||||
m_tlsTranslationOptionFeatures->GetStored()->clear();
|
||||
|
||||
InputType const& source = *(ttask->GetSource().get());
|
||||
// tabbed sentence is assumed only in training
|
||||
if (! m_train)
|
||||
return;
|
||||
|
||||
UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
|
||||
"This feature function requires the TabbedSentence input type");
|
||||
|
||||
const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
|
||||
UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2,
|
||||
"TabbedSentence must contain target<tab>alignment");
|
||||
|
||||
// target sentence represented as a phrase
|
||||
Phrase *target = new Phrase();
|
||||
target->CreateFromString(
|
||||
Output
|
||||
, StaticData::Instance().options()->output.factor_order
|
||||
, tabbedSentence.GetColumns()[0]
|
||||
, NULL);
|
||||
|
||||
// word alignment between source and target sentence
|
||||
// we don't store alignment info in AlignmentInfoCollection because we keep alignments of whole
|
||||
// sentences, not phrases
|
||||
AlignmentInfo *alignment = new AlignmentInfo(tabbedSentence.GetColumns()[1]);
|
||||
|
||||
VWTargetSentence &targetSent = *GetStored();
|
||||
targetSent.Clear();
|
||||
targetSent.m_sentence = target;
|
||||
targetSent.m_alignment = alignment;
|
||||
|
||||
// pre-compute max- and min- aligned points for faster translation option checking
|
||||
targetSent.SetConstraints(source.GetSize());
|
||||
}
|
||||
|
||||
/*************************************************************************************
|
||||
* private methods
|
||||
************************************************************************************/
|
||||
|
||||
const AlignmentInfo *VW::TransformAlignmentInfo(const Hypothesis &curHypo, size_t contextSize) const
|
||||
{
|
||||
std::set<std::pair<size_t, size_t> > alignmentPoints;
|
||||
const Hypothesis *contextHypo = curHypo.GetPrevHypo();
|
||||
int idxInContext = contextSize - 1;
|
||||
int processedWordsInHypo = 0;
|
||||
while (idxInContext >= 0 && contextHypo) {
|
||||
int idxInHypo = contextHypo->GetCurrTargetLength() - 1 - processedWordsInHypo;
|
||||
if (idxInHypo >= 0) {
|
||||
const AlignmentInfo &hypoAlign = contextHypo->GetCurrTargetPhrase().GetAlignTerm();
|
||||
std::set<size_t> alignedToTgt = hypoAlign.GetAlignmentsForTarget(idxInHypo);
|
||||
size_t srcOffset = contextHypo->GetCurrSourceWordsRange().GetStartPos();
|
||||
BOOST_FOREACH(size_t srcIdx, alignedToTgt) {
|
||||
alignmentPoints.insert(std::make_pair(srcOffset + srcIdx, idxInContext));
|
||||
}
|
||||
processedWordsInHypo++;
|
||||
idxInContext--;
|
||||
} else {
|
||||
processedWordsInHypo = 0;
|
||||
contextHypo = contextHypo->GetPrevHypo();
|
||||
}
|
||||
}
|
||||
|
||||
return AlignmentInfoCollection::Instance().Add(alignmentPoints);
|
||||
}
|
||||
|
||||
AlignmentInfo VW::TransformAlignmentInfo(const AlignmentInfo &alignInfo, size_t contextSize, int currentStart) const
|
||||
{
|
||||
std::set<std::pair<size_t, size_t> > alignmentPoints;
|
||||
for (int i = std::max(0, currentStart - (int)contextSize); i < currentStart; i++) {
|
||||
std::set<size_t> alignedToTgt = alignInfo.GetAlignmentsForTarget(i);
|
||||
BOOST_FOREACH(size_t srcIdx, alignedToTgt) {
|
||||
alignmentPoints.insert(std::make_pair(srcIdx, i + contextSize));
|
||||
}
|
||||
}
|
||||
return AlignmentInfo(alignmentPoints);
|
||||
}
|
||||
|
||||
std::pair<bool, int> VW::IsCorrectTranslationOption(const TranslationOption &topt) const
|
||||
{
|
||||
|
||||
//std::cerr << topt.GetSourceWordsRange() << std::endl;
|
||||
|
||||
int sourceStart = topt.GetSourceWordsRange().GetStartPos();
|
||||
int sourceEnd = topt.GetSourceWordsRange().GetEndPos();
|
||||
|
||||
const VWTargetSentence &targetSentence = *GetStored();
|
||||
|
||||
// [targetStart, targetEnd] spans aligned target words
|
||||
int targetStart = targetSentence.m_sentence->GetSize();
|
||||
int targetEnd = -1;
|
||||
|
||||
// get the left-most and right-most alignment point within source span
|
||||
for(int i = sourceStart; i <= sourceEnd; ++i) {
|
||||
if(targetSentence.m_sourceConstraints[i].IsSet()) {
|
||||
if(targetStart > targetSentence.m_sourceConstraints[i].GetMin())
|
||||
targetStart = targetSentence.m_sourceConstraints[i].GetMin();
|
||||
if(targetEnd < targetSentence.m_sourceConstraints[i].GetMax())
|
||||
targetEnd = targetSentence.m_sourceConstraints[i].GetMax();
|
||||
}
|
||||
}
|
||||
// there was no alignment
|
||||
if(targetEnd == -1)
|
||||
return std::make_pair(false, -1);
|
||||
|
||||
//std::cerr << "Shorter: " << targetStart << " " << targetEnd << std::endl;
|
||||
|
||||
// [targetStart2, targetEnd2] spans unaligned words left and right of [targetStart, targetEnd]
|
||||
int targetStart2 = targetStart;
|
||||
for(int i = targetStart2; i >= 0 && !targetSentence.m_targetConstraints[i].IsSet(); --i)
|
||||
targetStart2 = i;
|
||||
|
||||
int targetEnd2 = targetEnd;
|
||||
for(int i = targetEnd2;
|
||||
i < targetSentence.m_sentence->GetSize() && !targetSentence.m_targetConstraints[i].IsSet();
|
||||
++i)
|
||||
targetEnd2 = i;
|
||||
|
||||
//std::cerr << "Longer: " << targetStart2 << " " << targetEnd2 << std::endl;
|
||||
|
||||
const TargetPhrase &tphrase = topt.GetTargetPhrase();
|
||||
//std::cerr << tphrase << std::endl;
|
||||
|
||||
// if target phrase is shorter than inner span return false
|
||||
if(tphrase.GetSize() < targetEnd - targetStart + 1)
|
||||
return std::make_pair(false, -1);
|
||||
|
||||
// if target phrase is longer than outer span return false
|
||||
if(tphrase.GetSize() > targetEnd2 - targetStart2 + 1)
|
||||
return std::make_pair(false, -1);
|
||||
|
||||
// for each possible starting point
|
||||
for(int tempStart = targetStart2; tempStart <= targetStart; tempStart++) {
|
||||
bool found = true;
|
||||
// check if the target phrase is within longer span
|
||||
for(int i = tempStart; i <= targetEnd2 && i < tphrase.GetSize() + tempStart; ++i) {
|
||||
if(tphrase.GetWord(i - tempStart) != targetSentence.m_sentence->GetWord(i)) {
|
||||
found = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// return true if there was a match
|
||||
if(found) {
|
||||
//std::cerr << "Found" << std::endl;
|
||||
return std::make_pair(true, tempStart);
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_pair(false, -1);
|
||||
}
|
||||
|
||||
std::vector<bool> VW::LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const
|
||||
{
|
||||
UTIL_THROW_IF2(m_leaveOneOut.size() == 0 || ! m_train, "LeaveOneOut called in wrong setting!");
|
||||
|
||||
float sourceRawCount = 0.0;
|
||||
const float ONE = 1.0001; // I don't understand floating point numbers
|
||||
|
||||
std::vector<bool> keepOpt;
|
||||
|
||||
for (size_t i = 0; i < topts.size(); i++) {
|
||||
TranslationOption *topt = *(topts.begin() + i);
|
||||
const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
|
||||
|
||||
// extract raw counts from phrase-table property
|
||||
const CountsPhraseProperty *property =
|
||||
static_cast<const CountsPhraseProperty *>(targetPhrase.GetProperty("Counts"));
|
||||
|
||||
if (! property) {
|
||||
VERBOSE(2, "VW :: Counts not found for topt! Is this an OOV?\n");
|
||||
// keep all translation opts without updating, this is either OOV or bad usage...
|
||||
keepOpt.assign(topts.size(), true);
|
||||
return keepOpt;
|
||||
}
|
||||
|
||||
if (sourceRawCount == 0.0) {
|
||||
sourceRawCount = property->GetSourceMarginal() - ONE; // discount one occurrence of the source phrase
|
||||
if (sourceRawCount <= 0) {
|
||||
// no translation options survived, source phrase was a singleton
|
||||
keepOpt.assign(topts.size(), false);
|
||||
return keepOpt;
|
||||
}
|
||||
}
|
||||
|
||||
float discount = correct[i] ? ONE : 0.0;
|
||||
float target = property->GetTargetMarginal() - discount;
|
||||
float joint = property->GetJointCount() - discount;
|
||||
if (discount != 0.0) VERBOSE(3, "VW :: leaving one out!\n");
|
||||
|
||||
if (joint > 0) {
|
||||
// topt survived leaving one out, update its scores
|
||||
const FeatureFunction *feature = &FindFeatureFunction(m_leaveOneOut);
|
||||
std::vector<float> scores = targetPhrase.GetScoreBreakdown().GetScoresForProducer(feature);
|
||||
UTIL_THROW_IF2(scores.size() != 4, "Unexpected number of scores in feature " << m_leaveOneOut);
|
||||
scores[0] = TransformScore(joint / target); // P(f|e)
|
||||
scores[2] = TransformScore(joint / sourceRawCount); // P(e|f)
|
||||
|
||||
ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
|
||||
scoreBreakDown.Assign(feature, scores);
|
||||
topt->UpdateScore();
|
||||
keepOpt.push_back(true);
|
||||
} else {
|
||||
// they only occurred together once, discard topt
|
||||
VERBOSE(2, "VW :: discarded topt when leaving one out\n");
|
||||
keepOpt.push_back(false);
|
||||
}
|
||||
}
|
||||
|
||||
return keepOpt;
|
||||
}
|
||||
|
||||
} // namespace Moses
|
528
moses/FF/VW/VW.h
528
moses/FF/VW/VW.h
@ -3,8 +3,12 @@
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "moses/FF/StatelessFeatureFunction.h"
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/functional/hash.hpp>
|
||||
|
||||
#include "moses/FF/StatefulFeatureFunction.h"
|
||||
#include "moses/PP/CountsPhraseProperty.h"
|
||||
#include "moses/TranslationOptionList.h"
|
||||
#include "moses/TranslationOption.h"
|
||||
@ -13,6 +17,8 @@
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/Phrase.h"
|
||||
#include "moses/AlignmentInfo.h"
|
||||
#include "moses/Word.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
|
||||
#include "Normalizer.h"
|
||||
#include "Classifier.h"
|
||||
@ -20,119 +26,50 @@
|
||||
#include "TabbedSentence.h"
|
||||
#include "ThreadLocalByFeatureStorage.h"
|
||||
#include "TrainingLoss.h"
|
||||
#include "VWTargetSentence.h"
|
||||
|
||||
/*
|
||||
* VW classifier feature. See vw/README.md for further information.
|
||||
*
|
||||
* TODO: say which paper to cite.
|
||||
*/
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
const std::string VW_DUMMY_LABEL = "1111"; // VW does not use the actual label, other classifiers might
|
||||
|
||||
/**
|
||||
* Helper class for storing alignment constraints.
|
||||
*/
|
||||
class Constraint
|
||||
{
|
||||
public:
|
||||
Constraint() : m_min(std::numeric_limits<int>::max()), m_max(-1) {}
|
||||
|
||||
Constraint(int min, int max) : m_min(min), m_max(max) {}
|
||||
|
||||
/**
|
||||
* We are aligned to point => our min cannot be larger, our max cannot be smaller.
|
||||
*/
|
||||
void Update(int point) {
|
||||
if (m_min > point) m_min = point;
|
||||
if (m_max < point) m_max = point;
|
||||
}
|
||||
|
||||
bool IsSet() const {
|
||||
return m_max != -1;
|
||||
}
|
||||
|
||||
int GetMin() const {
|
||||
return m_min;
|
||||
}
|
||||
|
||||
int GetMax() const {
|
||||
return m_max;
|
||||
}
|
||||
|
||||
private:
|
||||
int m_min, m_max;
|
||||
};
|
||||
|
||||
/**
|
||||
* VW thread-specific data about target sentence.
|
||||
*/
|
||||
struct VWTargetSentence {
|
||||
VWTargetSentence() : m_sentence(NULL), m_alignment(NULL) {}
|
||||
|
||||
void Clear() {
|
||||
if (m_sentence) delete m_sentence;
|
||||
if (m_alignment) delete m_alignment;
|
||||
}
|
||||
|
||||
~VWTargetSentence() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
void SetConstraints(size_t sourceSize) {
|
||||
// initialize to unconstrained
|
||||
m_sourceConstraints.assign(sourceSize, Constraint());
|
||||
m_targetConstraints.assign(m_sentence->GetSize(), Constraint());
|
||||
|
||||
// set constraints according to alignment points
|
||||
AlignmentInfo::const_iterator it;
|
||||
for (it = m_alignment->begin(); it != m_alignment->end(); it++) {
|
||||
int src = it->first;
|
||||
int tgt = it->second;
|
||||
|
||||
if (src >= m_sourceConstraints.size() || tgt >= m_targetConstraints.size()) {
|
||||
UTIL_THROW2("VW :: alignment point out of bounds: " << src << "-" << tgt);
|
||||
}
|
||||
|
||||
m_sourceConstraints[src].Update(tgt);
|
||||
m_targetConstraints[tgt].Update(src);
|
||||
}
|
||||
}
|
||||
|
||||
Phrase *m_sentence;
|
||||
AlignmentInfo *m_alignment;
|
||||
std::vector<Constraint> m_sourceConstraints, m_targetConstraints;
|
||||
};
|
||||
// dummy class label; VW does not use the actual label, other classifiers might
|
||||
const std::string VW_DUMMY_LABEL = "1111";
|
||||
|
||||
// thread-specific classifier instance
|
||||
typedef ThreadLocalByFeatureStorage<Discriminative::Classifier, Discriminative::ClassifierFactory &> TLSClassifier;
|
||||
|
||||
// current target sentence, used in VW training (vwtrainer), not in decoding (prediction time)
|
||||
typedef ThreadLocalByFeatureStorage<VWTargetSentence> TLSTargetSentence;
|
||||
|
||||
class VW : public StatelessFeatureFunction, public TLSTargetSentence
|
||||
// hash table of feature vectors
|
||||
typedef boost::unordered_map<size_t, Discriminative::FeatureVector> FeatureVectorMap;
|
||||
|
||||
// thread-specific feature vector hash
|
||||
typedef ThreadLocalByFeatureStorage<FeatureVectorMap> TLSFeatureVectorMap;
|
||||
|
||||
// hash table of partial scores
|
||||
typedef boost::unordered_map<size_t, float> FloatHashMap;
|
||||
|
||||
// thread-specific score hash table, used for caching
|
||||
typedef ThreadLocalByFeatureStorage<FloatHashMap> TLSFloatHashMap;
|
||||
|
||||
// thread-specific hash tablei for caching full classifier outputs
|
||||
typedef ThreadLocalByFeatureStorage<boost::unordered_map<size_t, FloatHashMap> > TLSStateExtensions;
|
||||
|
||||
/*
|
||||
* VW feature function. A discriminative classifier with source and target context features.
|
||||
*/
|
||||
class VW : public StatefulFeatureFunction, public TLSTargetSentence
|
||||
{
|
||||
public:
|
||||
VW(const std::string &line)
|
||||
: StatelessFeatureFunction(1, line)
|
||||
, TLSTargetSentence(this)
|
||||
, m_train(false) {
|
||||
ReadParameters();
|
||||
Discriminative::ClassifierFactory *classifierFactory = m_train
|
||||
? new Discriminative::ClassifierFactory(m_modelPath)
|
||||
: new Discriminative::ClassifierFactory(m_modelPath, m_vwOptions);
|
||||
VW(const std::string &line);
|
||||
|
||||
m_tlsClassifier = new TLSClassifier(this, *classifierFactory);
|
||||
|
||||
if (! m_normalizer) {
|
||||
VERBOSE(1, "VW :: No loss function specified, assuming logistic loss.\n");
|
||||
m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
|
||||
}
|
||||
|
||||
if (! m_trainingLoss) {
|
||||
VERBOSE(1, "VW :: Using basic 1/0 loss calculation in training.\n");
|
||||
m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~VW() {
|
||||
delete m_tlsClassifier;
|
||||
delete m_normalizer;
|
||||
}
|
||||
virtual ~VW();
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const {
|
||||
return true;
|
||||
@ -152,335 +89,89 @@ public:
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
|
||||
}
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
|
||||
// This behavior of this method depends on whether it's called during VW
|
||||
// training (feature extraction) by vwtrainer or during decoding (prediction
|
||||
// time) by Moses.
|
||||
//
|
||||
// When predicting, it evaluates all translation options with the VW model;
|
||||
// if no target-context features are defined, this is the final score and it
|
||||
// is added directly to the TranslationOption score. If there are target
|
||||
// context features, the score is a partial score and it is only stored in
|
||||
// cache; the final score is computed based on target context in
|
||||
// EvaluateWhenApplied().
|
||||
//
|
||||
// This method is also used in training by vwtrainer in which case features
|
||||
// are written to a file, no classifier predictions take place. Target-side
|
||||
// context is constant at training time (we know the true target sentence),
|
||||
// so target-context features are extracted here as well.
|
||||
virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const;
|
||||
|
||||
if (translationOptionList.size() == 0)
|
||||
return; // nothing to do
|
||||
// Evaluate VW during decoding. This is only used at prediction time (not in training).
|
||||
// When no target-context features are defined, VW predictions were already fully calculated
|
||||
// in EvaluateTranslationOptionListWithSourceContext() and the scores were added to the model.
|
||||
// If there are target-context features, we compute the context-dependent part of the
|
||||
// classifier score and combine it with the source-context only partial score which was computed
|
||||
// in EvaluateTranslationOptionListWithSourceContext(). Various caches are used to make this
|
||||
// method more efficient.
|
||||
virtual FFState* EvaluateWhenApplied(
|
||||
const Hypothesis& curHypo,
|
||||
const FFState* prevState,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
VERBOSE(2, "VW :: Evaluating translation options\n");
|
||||
|
||||
// which feature functions do we use (on the source and target side)
|
||||
const std::vector<VWFeatureBase*>& sourceFeatures =
|
||||
VWFeatureBase::GetSourceFeatures(GetScoreProducerDescription());
|
||||
|
||||
const std::vector<VWFeatureBase*>& targetFeatures =
|
||||
VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription());
|
||||
|
||||
const Range &sourceRange = translationOptionList.Get(0)->GetSourceWordsRange();
|
||||
const InputPath &inputPath = translationOptionList.Get(0)->GetInputPath();
|
||||
|
||||
if (m_train) {
|
||||
//
|
||||
// extract features for training the classifier (only call this when using vwtrainer, not in Moses!)
|
||||
//
|
||||
|
||||
// find which topts are correct
|
||||
std::vector<bool> correct(translationOptionList.size());
|
||||
for (size_t i = 0; i < translationOptionList.size(); i++)
|
||||
correct[i] = IsCorrectTranslationOption(* translationOptionList.Get(i));
|
||||
|
||||
// optionally update translation options using leave-one-out
|
||||
std::vector<bool> keep = (m_leaveOneOut.size() > 0)
|
||||
? LeaveOneOut(translationOptionList, correct)
|
||||
: std::vector<bool>(translationOptionList.size(), true);
|
||||
|
||||
// check whether we (still) have some correct translation
|
||||
int firstCorrect = -1;
|
||||
for (size_t i = 0; i < translationOptionList.size(); i++) {
|
||||
if (keep[i] && correct[i]) {
|
||||
firstCorrect = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// do not train if there are no positive examples
|
||||
if (firstCorrect == -1) {
|
||||
VERBOSE(2, "VW :: skipping topt collection, no correct translation for span\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// the first correct topt can be used by some loss functions
|
||||
const TargetPhrase &correctPhrase = translationOptionList.Get(firstCorrect)->GetTargetPhrase();
|
||||
|
||||
// extract source side features
|
||||
for(size_t i = 0; i < sourceFeatures.size(); ++i)
|
||||
(*sourceFeatures[i])(input, inputPath, sourceRange, classifier);
|
||||
|
||||
// go over topts, extract target side features and train the classifier
|
||||
for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
|
||||
|
||||
// this topt was discarded by leaving one out
|
||||
if (! keep[toptIdx])
|
||||
continue;
|
||||
|
||||
// extract target-side features for each topt
|
||||
const TargetPhrase &targetPhrase = translationOptionList.Get(toptIdx)->GetTargetPhrase();
|
||||
for(size_t i = 0; i < targetFeatures.size(); ++i)
|
||||
(*targetFeatures[i])(input, inputPath, targetPhrase, classifier);
|
||||
|
||||
float loss = (*m_trainingLoss)(targetPhrase, correctPhrase, correct[toptIdx]);
|
||||
|
||||
// train classifier on current example
|
||||
classifier.Train(MakeTargetLabel(targetPhrase), loss);
|
||||
}
|
||||
} else {
|
||||
//
|
||||
// predict using a trained classifier, use this in decoding (=at test time)
|
||||
//
|
||||
|
||||
std::vector<float> losses(translationOptionList.size());
|
||||
|
||||
// extract source side features
|
||||
for(size_t i = 0; i < sourceFeatures.size(); ++i)
|
||||
(*sourceFeatures[i])(input, inputPath, sourceRange, classifier);
|
||||
|
||||
for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
|
||||
const TranslationOption *topt = translationOptionList.Get(toptIdx);
|
||||
const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
|
||||
|
||||
// extract target-side features for each topt
|
||||
for(size_t i = 0; i < targetFeatures.size(); ++i)
|
||||
(*targetFeatures[i])(input, inputPath, targetPhrase, classifier);
|
||||
|
||||
// get classifier score
|
||||
losses[toptIdx] = classifier.Predict(MakeTargetLabel(targetPhrase));
|
||||
}
|
||||
|
||||
// normalize classifier scores to get a probability distribution
|
||||
(*m_normalizer)(losses);
|
||||
|
||||
// update scores of topts
|
||||
for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
|
||||
TranslationOption *topt = *(translationOptionList.begin() + toptIdx);
|
||||
std::vector<float> newScores(m_numScoreComponents);
|
||||
newScores[0] = FloorScore(TransformScore(losses[toptIdx]));
|
||||
|
||||
ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
|
||||
scoreBreakDown.PlusEquals(this, newScores);
|
||||
|
||||
topt->UpdateScore();
|
||||
}
|
||||
}
|
||||
virtual FFState* EvaluateWhenApplied(
|
||||
const ChartHypothesis&,
|
||||
int,
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
throw new std::logic_error("hiearchical/syntax not supported");
|
||||
}
|
||||
|
||||
void EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
// Initial VW state; contains unaligned BOS symbols.
|
||||
const FFState* EmptyHypothesisState(const InputType &input) const;
|
||||
|
||||
void EvaluateWhenApplied(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value) {
|
||||
if (key == "train") {
|
||||
m_train = Scan<bool>(value);
|
||||
} else if (key == "path") {
|
||||
m_modelPath = value;
|
||||
} else if (key == "vw-options") {
|
||||
m_vwOptions = value;
|
||||
} else if (key == "leave-one-out-from") {
|
||||
m_leaveOneOut = value;
|
||||
} else if (key == "training-loss") {
|
||||
// which type of loss to use for training
|
||||
if (value == "basic") {
|
||||
m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
|
||||
} else if (value == "bleu") {
|
||||
m_trainingLoss = (TrainingLoss *) new TrainingLossBLEU();
|
||||
} else {
|
||||
UTIL_THROW2("Unknown training loss type:" << value);
|
||||
}
|
||||
} else if (key == "loss") {
|
||||
// which normalizer to use (theoretically depends on the loss function used for training the
|
||||
// classifier (squared/logistic/hinge/...), hence the name "loss"
|
||||
if (value == "logistic") {
|
||||
m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
|
||||
} else if (value == "squared") {
|
||||
m_normalizer = (Discriminative::Normalizer *) new Discriminative::SquaredLossNormalizer();
|
||||
} else {
|
||||
UTIL_THROW2("Unknown loss type:" << value);
|
||||
}
|
||||
} else {
|
||||
StatelessFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void InitializeForInput(ttasksptr const& ttask) {
|
||||
InputType const& source = *(ttask->GetSource().get());
|
||||
// tabbed sentence is assumed only in training
|
||||
if (! m_train)
|
||||
return;
|
||||
|
||||
UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
|
||||
"This feature function requires the TabbedSentence input type");
|
||||
|
||||
const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
|
||||
UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2,
|
||||
"TabbedSentence must contain target<tab>alignment");
|
||||
|
||||
// target sentence represented as a phrase
|
||||
Phrase *target = new Phrase();
|
||||
target->CreateFromString(
|
||||
Output
|
||||
, StaticData::Instance().options()->output.factor_order
|
||||
, tabbedSentence.GetColumns()[0]
|
||||
, NULL);
|
||||
|
||||
// word alignment between source and target sentence
|
||||
// we don't store alignment info in AlignmentInfoCollection because we keep alignments of whole
|
||||
// sentences, not phrases
|
||||
AlignmentInfo *alignment = new AlignmentInfo(tabbedSentence.GetColumns()[1]);
|
||||
|
||||
VWTargetSentence &targetSent = *GetStored();
|
||||
targetSent.Clear();
|
||||
targetSent.m_sentence = target;
|
||||
targetSent.m_alignment = alignment;
|
||||
|
||||
// pre-compute max- and min- aligned points for faster translation option checking
|
||||
targetSent.SetConstraints(source.GetSize());
|
||||
}
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
// At prediction time, this clears our caches. At training time, we load the next sentence, its
|
||||
// translation and word alignment.
|
||||
virtual void InitializeForInput(ttasksptr const& ttask);
|
||||
|
||||
private:
|
||||
std::string MakeTargetLabel(const TargetPhrase &targetPhrase) const {
|
||||
return VW_DUMMY_LABEL;
|
||||
inline std::string MakeTargetLabel(const TargetPhrase &targetPhrase) const {
|
||||
return VW_DUMMY_LABEL; // VW does not care about class labels in our setting (--csoaa_ldf mc).
|
||||
}
|
||||
|
||||
bool IsCorrectTranslationOption(const TranslationOption &topt) const {
|
||||
|
||||
//std::cerr << topt.GetSourceWordsRange() << std::endl;
|
||||
|
||||
int sourceStart = topt.GetSourceWordsRange().GetStartPos();
|
||||
int sourceEnd = topt.GetSourceWordsRange().GetEndPos();
|
||||
|
||||
const VWTargetSentence &targetSentence = *GetStored();
|
||||
|
||||
// [targetStart, targetEnd] spans aligned target words
|
||||
int targetStart = targetSentence.m_sentence->GetSize();
|
||||
int targetEnd = -1;
|
||||
|
||||
// get the left-most and right-most alignment point within source span
|
||||
for(int i = sourceStart; i <= sourceEnd; ++i) {
|
||||
if(targetSentence.m_sourceConstraints[i].IsSet()) {
|
||||
if(targetStart > targetSentence.m_sourceConstraints[i].GetMin())
|
||||
targetStart = targetSentence.m_sourceConstraints[i].GetMin();
|
||||
if(targetEnd < targetSentence.m_sourceConstraints[i].GetMax())
|
||||
targetEnd = targetSentence.m_sourceConstraints[i].GetMax();
|
||||
}
|
||||
}
|
||||
// there was no alignment
|
||||
if(targetEnd == -1)
|
||||
return false;
|
||||
|
||||
//std::cerr << "Shorter: " << targetStart << " " << targetEnd << std::endl;
|
||||
|
||||
// [targetStart2, targetEnd2] spans unaligned words left and right of [targetStart, targetEnd]
|
||||
int targetStart2 = targetStart;
|
||||
for(int i = targetStart2; i >= 0 && !targetSentence.m_targetConstraints[i].IsSet(); --i)
|
||||
targetStart2 = i;
|
||||
|
||||
int targetEnd2 = targetEnd;
|
||||
for(int i = targetEnd2;
|
||||
i < targetSentence.m_sentence->GetSize() && !targetSentence.m_targetConstraints[i].IsSet();
|
||||
++i)
|
||||
targetEnd2 = i;
|
||||
|
||||
//std::cerr << "Longer: " << targetStart2 << " " << targetEnd2 << std::endl;
|
||||
|
||||
const TargetPhrase &tphrase = topt.GetTargetPhrase();
|
||||
//std::cerr << tphrase << std::endl;
|
||||
|
||||
// if target phrase is shorter than inner span return false
|
||||
if(tphrase.GetSize() < targetEnd - targetStart + 1)
|
||||
return false;
|
||||
|
||||
// if target phrase is longer than outer span return false
|
||||
if(tphrase.GetSize() > targetEnd2 - targetStart2 + 1)
|
||||
return false;
|
||||
|
||||
// for each possible starting point
|
||||
for(int tempStart = targetStart2; tempStart <= targetStart; tempStart++) {
|
||||
bool found = true;
|
||||
// check if the target phrase is within longer span
|
||||
for(int i = tempStart; i <= targetEnd2 && i < tphrase.GetSize() + tempStart; ++i) {
|
||||
if(tphrase.GetWord(i - tempStart) != targetSentence.m_sentence->GetWord(i)) {
|
||||
found = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// return true if there was a match
|
||||
if(found) {
|
||||
//std::cerr << "Found" << std::endl;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
inline size_t MakeCacheKey(const FFState *prevState, size_t spanStart, size_t spanEnd) const {
|
||||
size_t key = 0;
|
||||
boost::hash_combine(key, prevState);
|
||||
boost::hash_combine(key, spanStart);
|
||||
boost::hash_combine(key, spanEnd);
|
||||
return key;
|
||||
}
|
||||
|
||||
std::vector<bool> LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const {
|
||||
UTIL_THROW_IF2(m_leaveOneOut.size() == 0 || ! m_train, "LeaveOneOut called in wrong setting!");
|
||||
// used in decoding to transform the global word alignment information into
|
||||
// context-phrase internal alignment information (i.e., with target indices correspoding
|
||||
// to positions in contextPhrase)
|
||||
const AlignmentInfo *TransformAlignmentInfo(const Hypothesis &curHypo, size_t contextSize) const;
|
||||
|
||||
float sourceRawCount = 0.0;
|
||||
const float ONE = 1.0001; // I don't understand floating point numbers
|
||||
// used during training to extract relevant alignment points from the full sentence alignment
|
||||
// and shift them by target context size
|
||||
AlignmentInfo TransformAlignmentInfo(const AlignmentInfo &alignInfo, size_t contextSize, int currentStart) const;
|
||||
|
||||
std::vector<bool> keepOpt;
|
||||
// At training time, determine whether a translation option is correct for the current target sentence
|
||||
// based on word alignment. This is a bit complicated because we need to handle various corner-cases
|
||||
// where some word(s) on phrase borders are unaligned.
|
||||
std::pair<bool, int> IsCorrectTranslationOption(const TranslationOption &topt) const;
|
||||
|
||||
for (size_t i = 0; i < topts.size(); i++) {
|
||||
TranslationOption *topt = *(topts.begin() + i);
|
||||
const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
|
||||
|
||||
// extract raw counts from phrase-table property
|
||||
const CountsPhraseProperty *property =
|
||||
static_cast<const CountsPhraseProperty *>(targetPhrase.GetProperty("Counts"));
|
||||
|
||||
if (! property) {
|
||||
VERBOSE(1, "VW :: Counts not found for topt! Is this an OOV?\n");
|
||||
// keep all translation opts without updating, this is either OOV or bad usage...
|
||||
keepOpt.assign(topts.size(), true);
|
||||
return keepOpt;
|
||||
}
|
||||
|
||||
if (sourceRawCount == 0.0) {
|
||||
sourceRawCount = property->GetSourceMarginal() - ONE; // discount one occurrence of the source phrase
|
||||
if (sourceRawCount <= 0) {
|
||||
// no translation options survived, source phrase was a singleton
|
||||
keepOpt.assign(topts.size(), false);
|
||||
return keepOpt;
|
||||
}
|
||||
}
|
||||
|
||||
float discount = correct[i] ? ONE : 0.0;
|
||||
float target = property->GetTargetMarginal() - discount;
|
||||
float joint = property->GetJointCount() - discount;
|
||||
if (discount != 0.0) VERBOSE(2, "VW :: leaving one out!\n");
|
||||
|
||||
if (joint > 0) {
|
||||
// topt survived leaving one out, update its scores
|
||||
const FeatureFunction *feature = &FindFeatureFunction(m_leaveOneOut);
|
||||
std::vector<float> scores = targetPhrase.GetScoreBreakdown().GetScoresForProducer(feature);
|
||||
UTIL_THROW_IF2(scores.size() != 4, "Unexpected number of scores in feature " << m_leaveOneOut);
|
||||
scores[0] = TransformScore(joint / target); // P(f|e)
|
||||
scores[2] = TransformScore(joint / sourceRawCount); // P(e|f)
|
||||
|
||||
ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
|
||||
scoreBreakDown.Assign(feature, scores);
|
||||
topt->UpdateScore();
|
||||
keepOpt.push_back(true);
|
||||
} else {
|
||||
// they only occurred together once, discard topt
|
||||
VERBOSE(2, "VW :: discarded topt when leaving one out\n");
|
||||
keepOpt.push_back(false);
|
||||
}
|
||||
}
|
||||
|
||||
return keepOpt;
|
||||
}
|
||||
// At training time, optionally discount occurrences of phrase pairs from the current sentence, helps prevent
|
||||
// over-fitting.
|
||||
std::vector<bool> LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const;
|
||||
|
||||
bool m_train; // false means predict
|
||||
std::string m_modelPath;
|
||||
std::string m_vwOptions;
|
||||
std::string m_modelPath; // path to the VW model file; at training time, this is where extracted features are stored
|
||||
std::string m_vwOptions; // options for Vowpal Wabbit
|
||||
|
||||
// BOS token, all factors
|
||||
Word m_sentenceStartWord;
|
||||
|
||||
// calculator of training loss
|
||||
TrainingLoss *m_trainingLoss = NULL;
|
||||
@ -488,9 +179,16 @@ private:
|
||||
// optionally contains feature name of a phrase table where we recompute scores with leaving one out
|
||||
std::string m_leaveOneOut;
|
||||
|
||||
// normalizer, typically this means softmax
|
||||
Discriminative::Normalizer *m_normalizer = NULL;
|
||||
|
||||
// thread-specific classifier instance
|
||||
TLSClassifier *m_tlsClassifier;
|
||||
|
||||
// caches for partial scores and feature vectors
|
||||
TLSFloatHashMap *m_tlsFutureScores;
|
||||
TLSStateExtensions *m_tlsComputedStateExtensions;
|
||||
TLSFeatureVectorMap *m_tlsTranslationOptionFeatures, *m_tlsTargetContextFeatures;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
@ -2,11 +2,26 @@
|
||||
#include <string>
|
||||
|
||||
#include "VWFeatureBase.h"
|
||||
#include "VWFeatureContext.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_features;
|
||||
std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_sourceFeatures;
|
||||
std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_targetContextFeatures;
|
||||
std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_targetFeatures;
|
||||
|
||||
std::map<std::string, size_t> VWFeatureBase::s_targetContextLength;
|
||||
|
||||
|
||||
void VWFeatureBase::UpdateContextSize(const std::string &usedBy)
|
||||
{
|
||||
// using the standard map behavior here: if the entry does not
|
||||
// exist, it will be added and initialized to zero
|
||||
size_t currentSize = s_targetContextLength[usedBy];
|
||||
size_t newSize = static_cast<VWFeatureContext *const>(this)->GetContextSize();
|
||||
s_targetContextLength[usedBy] = std::max(currentSize, newSize);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -12,11 +12,17 @@
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
enum VWFeatureType {
|
||||
vwft_source,
|
||||
vwft_target,
|
||||
vwft_targetContext
|
||||
};
|
||||
|
||||
class VWFeatureBase : public StatelessFeatureFunction
|
||||
{
|
||||
public:
|
||||
VWFeatureBase(const std::string &line, bool isSource = true)
|
||||
: StatelessFeatureFunction(0, line), m_usedBy(1, "VW0"), m_isSource(isSource) {
|
||||
VWFeatureBase(const std::string &line, VWFeatureType featureType = vwft_source)
|
||||
: StatelessFeatureFunction(0, line), m_usedBy(1, "VW0"), m_featureType(featureType) {
|
||||
// defaults
|
||||
m_sourceFactors.push_back(0);
|
||||
m_targetFactors.push_back(0);
|
||||
@ -71,26 +77,47 @@ public:
|
||||
return s_sourceFeatures[name];
|
||||
}
|
||||
|
||||
// Return only target-context classifier features
|
||||
static const std::vector<VWFeatureBase*>& GetTargetContextFeatures(std::string name = "VW0") {
|
||||
// don't throw an exception when there are no target-context features, this feature type is not mandatory
|
||||
return s_targetContextFeatures[name];
|
||||
}
|
||||
|
||||
// Return only target-dependent classifier features
|
||||
static const std::vector<VWFeatureBase*>& GetTargetFeatures(std::string name = "VW0") {
|
||||
UTIL_THROW_IF2(s_targetFeatures.count(name) == 0, "No target features registered for parent classifier: " + name);
|
||||
return s_targetFeatures[name];
|
||||
}
|
||||
|
||||
// Required length context (maximum context size of defined target-context features)
|
||||
static size_t GetMaximumContextSize(std::string name = "VW0") {
|
||||
return s_targetContextLength[name]; // 0 by default
|
||||
}
|
||||
|
||||
// Overload to process source-dependent data, create features once for every
|
||||
// source sentence word range.
|
||||
virtual void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const Range &sourceRange
|
||||
, Discriminative::Classifier &classifier) const = 0;
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const = 0;
|
||||
|
||||
// Overload to process target-dependent features, create features once for
|
||||
// every target phrase. One source word range will have at leat one target
|
||||
// every target phrase. One source word range will have at least one target
|
||||
// phrase, but may have more.
|
||||
virtual void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Discriminative::Classifier &classifier) const = 0;
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const = 0;
|
||||
|
||||
// Overload to process target-context dependent features, these features are
|
||||
// evaluated during decoding. For efficiency, features are not fed directly into
|
||||
// the classifier object but instead output in the vector "features" and managed
|
||||
// separately in VW.h.
|
||||
virtual void operator()(const InputType &input
|
||||
, const Phrase &contextPhrase
|
||||
, const AlignmentInfo &alignmentInfo
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const = 0;
|
||||
|
||||
protected:
|
||||
std::vector<FactorType> m_sourceFactors, m_targetFactors;
|
||||
@ -99,10 +126,15 @@ protected:
|
||||
for(std::vector<std::string>::const_iterator it = m_usedBy.begin();
|
||||
it != m_usedBy.end(); it++) {
|
||||
s_features[*it].push_back(this);
|
||||
if(m_isSource)
|
||||
|
||||
if(m_featureType == vwft_source) {
|
||||
s_sourceFeatures[*it].push_back(this);
|
||||
else
|
||||
} else if (m_featureType == vwft_targetContext) {
|
||||
s_targetContextFeatures[*it].push_back(this);
|
||||
UpdateContextSize(*it);
|
||||
} else {
|
||||
s_targetFeatures[*it].push_back(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -112,11 +144,16 @@ private:
|
||||
Tokenize(m_usedBy, usedBy, ",");
|
||||
}
|
||||
|
||||
void UpdateContextSize(const std::string &usedBy);
|
||||
|
||||
std::vector<std::string> m_usedBy;
|
||||
bool m_isSource;
|
||||
VWFeatureType m_featureType;
|
||||
static std::map<std::string, std::vector<VWFeatureBase*> > s_features;
|
||||
static std::map<std::string, std::vector<VWFeatureBase*> > s_sourceFeatures;
|
||||
static std::map<std::string, std::vector<VWFeatureBase*> > s_targetContextFeatures;
|
||||
static std::map<std::string, std::vector<VWFeatureBase*> > s_targetFeatures;
|
||||
|
||||
static std::map<std::string, size_t> s_targetContextLength;
|
||||
};
|
||||
|
||||
}
|
||||
|
116
moses/FF/VW/VWFeatureContext.h
Normal file
116
moses/FF/VW/VWFeatureContext.h
Normal file
@ -0,0 +1,116 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <boost/foreach.hpp>
|
||||
#include "VWFeatureBase.h"
|
||||
#include "moses/InputType.h"
|
||||
#include "moses/TypeDef.h"
|
||||
#include "moses/Word.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
// Inherit from this for source-dependent classifier features. They will
|
||||
// automatically register with the classifier class named VW0 or one or more
|
||||
// names specified by the used-by=name1,name2,... parameter.
|
||||
//
|
||||
// The classifier gets a full list by calling
|
||||
// VWFeatureBase::GetTargetContextFeatures(GetScoreProducerDescription())
|
||||
|
||||
|
||||
class VWFeatureContext : public VWFeatureBase
|
||||
{
|
||||
public:
|
||||
VWFeatureContext(const std::string &line, size_t contextSize)
|
||||
: VWFeatureBase(line, vwft_targetContext), m_contextSize(contextSize) {
|
||||
}
|
||||
|
||||
// Gets its pure virtual functions from VWFeatureBase
|
||||
|
||||
virtual void operator()(const InputType &input
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
}
|
||||
|
||||
virtual void operator()(const InputType &input
|
||||
, const Range &sourceRange
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
}
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value) {
|
||||
if (key == "size") {
|
||||
m_contextSize = Scan<size_t>(value);
|
||||
} else if (key == "factor-positions") {
|
||||
// factor positions: assuming a factor such as positional morphological tag, use this
|
||||
// option to select only certain positions; this assumes that only a single
|
||||
// target-side factor is defined
|
||||
Tokenize<size_t>(m_factorPositions, value, ",");
|
||||
} else {
|
||||
VWFeatureBase::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
size_t GetContextSize() {
|
||||
return m_contextSize;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Get word with the correct subset of factors as string. Because we're target
|
||||
// context features, we look at a limited number of words to the left of the
|
||||
// current translation. posFromEnd is interpreted like this:
|
||||
// 0 = last word of the hypothesis
|
||||
// 1 = next to last word
|
||||
// ...etc.
|
||||
inline std::string GetWord(const Phrase &phrase, size_t posFromEnd) const {
|
||||
const Word &word = phrase.GetWord(phrase.GetSize() - posFromEnd - 1);
|
||||
if (m_factorPositions.empty()) {
|
||||
return word.GetString(m_targetFactors, false);
|
||||
} else {
|
||||
if (m_targetFactors.size() != 1)
|
||||
UTIL_THROW2("You can only use factor-positions when a single target-side factor is defined.");
|
||||
const std::string &fullFactor = word.GetFactor(m_targetFactors[0])->GetString().as_string();
|
||||
|
||||
// corner cases: at sentence beginning/end, we don't have the correct factors set up
|
||||
// similarly for UNK
|
||||
if (fullFactor == BOS_ || fullFactor == EOS_ || fullFactor == UNKNOWN_FACTOR)
|
||||
return fullFactor;
|
||||
|
||||
std::string subFactor(m_factorPositions.size(), 'x'); // initialize string with correct size and placeholder chars
|
||||
for (size_t i = 0; i < m_factorPositions.size(); i++)
|
||||
subFactor[i] = fullFactor[m_factorPositions[i]];
|
||||
|
||||
return subFactor;
|
||||
}
|
||||
}
|
||||
|
||||
// some target-context feature functions also look at the source
|
||||
inline std::string GetSourceWord(const InputType &input, size_t pos) const {
|
||||
return input.GetWord(pos).GetString(m_sourceFactors, false);
|
||||
}
|
||||
|
||||
// get source words aligned to a particular context word
|
||||
std::vector<std::string> GetAlignedSourceWords(const Phrase &contextPhrase
|
||||
, const InputType &input
|
||||
, const AlignmentInfo &alignInfo
|
||||
, size_t posFromEnd) const {
|
||||
size_t idx = contextPhrase.GetSize() - posFromEnd - 1;
|
||||
std::set<size_t> alignedToTarget = alignInfo.GetAlignmentsForTarget(idx);
|
||||
std::vector<std::string> out;
|
||||
out.reserve(alignedToTarget.size());
|
||||
BOOST_FOREACH(size_t srcIdx, alignedToTarget) {
|
||||
out.push_back(GetSourceWord(input, srcIdx));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// required context size
|
||||
size_t m_contextSize;
|
||||
|
||||
// factor positions: assuming a factor such as positional morphological tag, use this
|
||||
// option to select only certain positions
|
||||
std::vector<size_t> m_factorPositions;
|
||||
};
|
||||
|
||||
}
|
40
moses/FF/VW/VWFeatureContextBigrams.h
Normal file
40
moses/FF/VW/VWFeatureContextBigrams.h
Normal file
@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "VWFeatureContext.h"
|
||||
#include "moses/Util.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class VWFeatureContextBigrams : public VWFeatureContext
|
||||
{
|
||||
public:
|
||||
VWFeatureContextBigrams(const std::string &line)
|
||||
: VWFeatureContext(line, DEFAULT_WINDOW_SIZE) {
|
||||
ReadParameters();
|
||||
|
||||
// Call this last
|
||||
VWFeatureBase::UpdateRegister();
|
||||
}
|
||||
|
||||
virtual void operator()(const InputType &input
|
||||
, const Phrase &contextPhrase
|
||||
, const AlignmentInfo &alignmentInfo
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
for (size_t i = 1; i < m_contextSize; i++)
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("tcbigram^-" + SPrint(i + 1)
|
||||
+ "^" + GetWord(contextPhrase, i - 1) + "^" + GetWord(contextPhrase, i)));
|
||||
}
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value) {
|
||||
VWFeatureContext::SetParameter(key, value);
|
||||
}
|
||||
|
||||
private:
|
||||
static const int DEFAULT_WINDOW_SIZE = 1;
|
||||
};
|
||||
|
||||
}
|
45
moses/FF/VW/VWFeatureContextBilingual.h
Normal file
45
moses/FF/VW/VWFeatureContextBilingual.h
Normal file
@ -0,0 +1,45 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <boost/foreach.hpp>
|
||||
#include <algorithm>
|
||||
#include "VWFeatureContext.h"
|
||||
#include "moses/Util.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class VWFeatureContextBilingual : public VWFeatureContext
|
||||
{
|
||||
public:
|
||||
VWFeatureContextBilingual(const std::string &line)
|
||||
: VWFeatureContext(line, DEFAULT_WINDOW_SIZE) {
|
||||
ReadParameters();
|
||||
|
||||
// Call this last
|
||||
VWFeatureBase::UpdateRegister();
|
||||
}
|
||||
|
||||
virtual void operator()(const InputType &input
|
||||
, const Phrase &contextPhrase
|
||||
, const AlignmentInfo &alignmentInfo
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
for (size_t i = 0; i < m_contextSize; i++) {
|
||||
std::string tgtWord = GetWord(contextPhrase, i);
|
||||
std::vector<std::string> alignedTo = GetAlignedSourceWords(contextPhrase, input, alignmentInfo, i);
|
||||
BOOST_FOREACH(const std::string &srcWord, alignedTo) {
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("tcblng^-" + SPrint(i + 1) + "^" + tgtWord + "^" + srcWord));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value) {
|
||||
VWFeatureContext::SetParameter(key, value);
|
||||
}
|
||||
|
||||
private:
|
||||
static const int DEFAULT_WINDOW_SIZE = 1;
|
||||
};
|
||||
|
||||
}
|
39
moses/FF/VW/VWFeatureContextWindow.h
Normal file
39
moses/FF/VW/VWFeatureContextWindow.h
Normal file
@ -0,0 +1,39 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "VWFeatureContext.h"
|
||||
#include "moses/Util.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class VWFeatureContextWindow : public VWFeatureContext
|
||||
{
|
||||
public:
|
||||
VWFeatureContextWindow(const std::string &line)
|
||||
: VWFeatureContext(line, DEFAULT_WINDOW_SIZE) {
|
||||
ReadParameters();
|
||||
|
||||
// Call this last
|
||||
VWFeatureBase::UpdateRegister();
|
||||
}
|
||||
|
||||
virtual void operator()(const InputType &input
|
||||
, const Phrase &contextPhrase
|
||||
, const AlignmentInfo &alignmentInfo
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
for (size_t i = 0; i < m_contextSize; i++)
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("tcwin^-" + SPrint(i + 1) + "^" + GetWord(contextPhrase, i)));
|
||||
}
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value) {
|
||||
VWFeatureContext::SetParameter(key, value);
|
||||
}
|
||||
|
||||
private:
|
||||
static const int DEFAULT_WINDOW_SIZE = 1;
|
||||
};
|
||||
|
||||
}
|
@ -19,15 +19,22 @@ class VWFeatureSource : public VWFeatureBase
|
||||
{
|
||||
public:
|
||||
VWFeatureSource(const std::string &line)
|
||||
: VWFeatureBase(line, true) {
|
||||
: VWFeatureBase(line, vwft_source) {
|
||||
}
|
||||
|
||||
// Gets its pure virtual functions from VWFeatureBase
|
||||
|
||||
virtual void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Discriminative::Classifier &classifier) const {
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
}
|
||||
|
||||
virtual void operator()(const InputType &input
|
||||
, const Phrase &contextPhrase
|
||||
, const AlignmentInfo &alignmentInfo
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
}
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value) {
|
||||
|
@ -18,11 +18,11 @@ public:
|
||||
}
|
||||
|
||||
void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const Range &sourceRange
|
||||
, Discriminative::Classifier &classifier) const {
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
for (size_t i = 0; i < input.GetSize(); i++) {
|
||||
classifier.AddLabelIndependentFeature("bow^" + GetWord(input, i));
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("bow^" + GetWord(input, i)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -18,11 +18,11 @@ public:
|
||||
}
|
||||
|
||||
void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const Range &sourceRange
|
||||
, Discriminative::Classifier &classifier) const {
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
for (size_t i = 1; i < input.GetSize(); i++) {
|
||||
classifier.AddLabelIndependentFeature("bigram^" + GetWord(input, i - 1) + "^" + GetWord(input, i));
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("bigram^" + GetWord(input, i - 1) + "^" + GetWord(input, i)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,12 +23,12 @@ public:
|
||||
}
|
||||
|
||||
void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const Range &sourceRange
|
||||
, Discriminative::Classifier &classifier) const {
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
const Features& features = *m_tls.GetStored();
|
||||
for (size_t i = 0; i < features.size(); i++) {
|
||||
classifier.AddLabelIndependentFeature("srcext^" + features[i]);
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("srcext^" + features[i]));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20,9 +20,9 @@ public:
|
||||
}
|
||||
|
||||
void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const Range &sourceRange
|
||||
, Discriminative::Classifier &classifier) const {
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
size_t begin = sourceRange.GetStartPos();
|
||||
size_t end = sourceRange.GetEndPos() + 1;
|
||||
|
||||
@ -31,7 +31,7 @@ public:
|
||||
for (size_t i = 0; i < end - begin; i++)
|
||||
words[i] = GetWord(input, begin + i);
|
||||
|
||||
classifier.AddLabelIndependentFeature("sind^" + Join(" ", words));
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("sind^" + Join(" ", words)));
|
||||
}
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value) {
|
||||
|
@ -20,14 +20,14 @@ public:
|
||||
}
|
||||
|
||||
void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const Range &sourceRange
|
||||
, Discriminative::Classifier &classifier) const {
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
size_t begin = sourceRange.GetStartPos();
|
||||
size_t end = sourceRange.GetEndPos() + 1;
|
||||
|
||||
while (begin < end) {
|
||||
classifier.AddLabelIndependentFeature("sin^" + GetWord(input, begin++));
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("sin^" + GetWord(input, begin++)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -51,9 +51,9 @@ public:
|
||||
}
|
||||
|
||||
void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const Range &sourceRange
|
||||
, Discriminative::Classifier &classifier) const {
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
int begin = sourceRange.GetStartPos();
|
||||
int end = sourceRange.GetEndPos() + 1;
|
||||
int inputLen = input.GetSize();
|
||||
@ -64,24 +64,24 @@ public:
|
||||
// before current phrase
|
||||
for (int i = std::max(0, begin - m_size); i < begin; i++) {
|
||||
BOOST_FOREACH(const Sense &sense, senses[i]) {
|
||||
classifier.AddLabelIndependentFeature("snsb^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
|
||||
classifier.AddLabelIndependentFeature("snsb^" + forms[i] + sense.m_label, sense.m_prob);
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("snsb^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("snsb^" + forms[i] + sense.m_label, sense.m_prob));
|
||||
}
|
||||
}
|
||||
|
||||
// within current phrase
|
||||
for (int i = begin; i < end; i++) {
|
||||
BOOST_FOREACH(const Sense &sense, senses[i]) {
|
||||
classifier.AddLabelIndependentFeature("snsin^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
|
||||
classifier.AddLabelIndependentFeature("snsin^" + forms[i] + sense.m_label, sense.m_prob);
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("snsin^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("snsin^" + forms[i] + sense.m_label, sense.m_prob));
|
||||
}
|
||||
}
|
||||
|
||||
// after current phrase
|
||||
for (int i = end; i < std::min(end + m_size, inputLen); i++) {
|
||||
BOOST_FOREACH(const Sense &sense, senses[i]) {
|
||||
classifier.AddLabelIndependentFeature("snsa^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
|
||||
classifier.AddLabelIndependentFeature("snsa^" + forms[i] + sense.m_label, sense.m_prob);
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("snsa^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("snsa^" + forms[i] + sense.m_label, sense.m_prob));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -20,19 +20,19 @@ public:
|
||||
}
|
||||
|
||||
void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const Range &sourceRange
|
||||
, Discriminative::Classifier &classifier) const {
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
int begin = sourceRange.GetStartPos();
|
||||
int end = sourceRange.GetEndPos() + 1;
|
||||
int inputLen = input.GetSize();
|
||||
|
||||
for (int i = std::max(0, begin - m_size); i < begin; i++) {
|
||||
classifier.AddLabelIndependentFeature("c^" + SPrint(i - begin) + "^" + GetWord(input, i));
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("c^" + SPrint(i - begin) + "^" + GetWord(input, i)));
|
||||
}
|
||||
|
||||
for (int i = end; i < std::min(end + m_size, inputLen); i++) {
|
||||
classifier.AddLabelIndependentFeature("c^" + SPrint(i - end + 1) + "^" + GetWord(input, i));
|
||||
outFeatures.push_back(classifier.AddLabelIndependentFeature("c^" + SPrint(i - end + 1) + "^" + GetWord(input, i)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,15 +17,22 @@ class VWFeatureTarget : public VWFeatureBase
|
||||
{
|
||||
public:
|
||||
VWFeatureTarget(const std::string &line)
|
||||
: VWFeatureBase(line, false) {
|
||||
: VWFeatureBase(line, vwft_target) {
|
||||
}
|
||||
|
||||
// Gets its pure virtual functions from VWFeatureBase
|
||||
|
||||
virtual void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const Range &sourceRange
|
||||
, Discriminative::Classifier &classifier) const {
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
}
|
||||
|
||||
virtual void operator()(const InputType &input
|
||||
, const Phrase &contextPhrase
|
||||
, const AlignmentInfo &alignmentInfo
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
}
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value) {
|
||||
|
@ -17,11 +17,11 @@ public:
|
||||
}
|
||||
|
||||
void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Discriminative::Classifier &classifier) const {
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
for (size_t i = 1; i < targetPhrase.GetSize(); i++) {
|
||||
classifier.AddLabelDependentFeature("tbigram^" + GetWord(targetPhrase, i - 1) + "^" + GetWord(targetPhrase, i));
|
||||
outFeatures.push_back(classifier.AddLabelDependentFeature("tbigram^" + GetWord(targetPhrase, i - 1) + "^" + GetWord(targetPhrase, i)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,10 +17,10 @@ public:
|
||||
}
|
||||
|
||||
void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Discriminative::Classifier &classifier) const {
|
||||
classifier.AddLabelDependentFeature("tind^" + targetPhrase.GetStringRep(m_targetFactors));
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
outFeatures.push_back(classifier.AddLabelDependentFeature("tind^" + targetPhrase.GetStringRep(m_targetFactors)));
|
||||
}
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value) {
|
||||
|
@ -17,11 +17,11 @@ public:
|
||||
}
|
||||
|
||||
void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Discriminative::Classifier &classifier) const {
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
for (size_t i = 0; i < targetPhrase.GetSize(); i++) {
|
||||
classifier.AddLabelDependentFeature("tin^" + GetWord(targetPhrase, i));
|
||||
outFeatures.push_back(classifier.AddLabelDependentFeature("tin^" + GetWord(targetPhrase, i)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20,9 +20,9 @@ public:
|
||||
}
|
||||
|
||||
void operator()(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Discriminative::Classifier &classifier) const {
|
||||
, Discriminative::Classifier &classifier
|
||||
, Discriminative::FeatureVector &outFeatures) const {
|
||||
std::vector<FeatureFunction*> features = FeatureFunction::GetFeatureFunctions();
|
||||
for (size_t i = 0; i < features.size(); i++) {
|
||||
std::string fname = features[i]->GetScoreProducerDescription();
|
||||
@ -31,7 +31,7 @@ public:
|
||||
|
||||
std::vector<float> scores = targetPhrase.GetScoreBreakdown().GetScoresForProducer(features[i]);
|
||||
for(size_t j = 0; j < scores.size(); ++j)
|
||||
classifier.AddLabelDependentFeature(fname + "^" + boost::lexical_cast<std::string>(j), scores[j]);
|
||||
outFeatures.push_back(classifier.AddLabelDependentFeature(fname + "^" + boost::lexical_cast<std::string>(j), scores[j]));
|
||||
}
|
||||
}
|
||||
|
||||
|
77
moses/FF/VW/VWState.cpp
Normal file
77
moses/FF/VW/VWState.cpp
Normal file
@ -0,0 +1,77 @@
|
||||
#include "VWState.h"
|
||||
|
||||
#include "moses/FF/FFState.h"
|
||||
#include "moses/Phrase.h"
|
||||
#include "moses/Hypothesis.h"
|
||||
#include "moses/Util.h"
|
||||
#include "moses/TypeDef.h"
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/TranslationOption.h"
|
||||
#include <boost/functional/hash.hpp>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
VWState::VWState() : m_spanStart(0), m_spanEnd(0)
|
||||
{
|
||||
ComputeHash();
|
||||
}
|
||||
|
||||
VWState::VWState(const Phrase &phrase)
|
||||
: m_phrase(phrase), m_spanStart(0), m_spanEnd(0)
|
||||
{
|
||||
ComputeHash();
|
||||
}
|
||||
|
||||
VWState::VWState(const VWState &prevState, const Hypothesis &curHypo)
|
||||
{
|
||||
VERBOSE(3, "VW :: updating state\n>> previous state: " << prevState << "\n");
|
||||
|
||||
// copy phrase from previous state
|
||||
Phrase phrase = prevState.GetPhrase();
|
||||
size_t contextSize = phrase.GetSize(); // identical to VWFeatureBase::GetMaximumContextSize()
|
||||
|
||||
// add words from current hypothesis
|
||||
phrase.Append(curHypo.GetCurrTargetPhrase());
|
||||
|
||||
VERBOSE(3, ">> current hypo: " << curHypo.GetCurrTargetPhrase() << "\n");
|
||||
|
||||
// get a slice of appropriate length
|
||||
Range range(phrase.GetSize() - contextSize, phrase.GetSize() - 1);
|
||||
m_phrase = phrase.GetSubString(range);
|
||||
|
||||
// set current span start/end
|
||||
m_spanStart = curHypo.GetTranslationOption().GetStartPos();
|
||||
m_spanEnd = curHypo.GetTranslationOption().GetEndPos();
|
||||
|
||||
// compute our hash
|
||||
ComputeHash();
|
||||
|
||||
VERBOSE(3, ">> updated state: " << *this << "\n");
|
||||
}
|
||||
|
||||
bool VWState::operator==(const FFState& o) const
|
||||
{
|
||||
const VWState &other = static_cast<const VWState &>(o);
|
||||
|
||||
return m_phrase == other.GetPhrase()
|
||||
&& m_spanStart == other.GetSpanStart()
|
||||
&& m_spanEnd == other.GetSpanEnd();
|
||||
}
|
||||
|
||||
void VWState::ComputeHash()
|
||||
{
|
||||
m_hash = 0;
|
||||
|
||||
boost::hash_combine(m_hash, m_phrase);
|
||||
boost::hash_combine(m_hash, m_spanStart);
|
||||
boost::hash_combine(m_hash, m_spanEnd);
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &out, const VWState &state)
|
||||
{
|
||||
out << state.GetPhrase() << "::" << state.GetSpanStart() << "-" << state.GetSpanEnd();
|
||||
return out;
|
||||
}
|
||||
|
||||
}
|
56
moses/FF/VW/VWState.h
Normal file
56
moses/FF/VW/VWState.h
Normal file
@ -0,0 +1,56 @@
|
||||
#pragma once
|
||||
|
||||
#include <ostream>
|
||||
|
||||
#include "moses/FF/FFState.h"
|
||||
#include "moses/Phrase.h"
|
||||
#include "moses/Hypothesis.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/**
|
||||
* VW state, used in decoding (when target context is enabled).
|
||||
*/
|
||||
class VWState : public FFState
|
||||
{
|
||||
public:
|
||||
// empty state, used only when VWState is ignored
|
||||
VWState();
|
||||
|
||||
// used for construction of the initial VW state
|
||||
VWState(const Phrase &phrase);
|
||||
|
||||
// continue from previous VW state with a new hypothesis
|
||||
VWState(const VWState &prevState, const Hypothesis &curHypo);
|
||||
|
||||
virtual bool operator==(const FFState& o) const;
|
||||
|
||||
inline virtual size_t hash() const {
|
||||
return m_hash;
|
||||
}
|
||||
|
||||
inline const Phrase &GetPhrase() const {
|
||||
return m_phrase;
|
||||
}
|
||||
|
||||
inline size_t GetSpanStart() const {
|
||||
return m_spanStart;
|
||||
}
|
||||
|
||||
inline size_t GetSpanEnd() const {
|
||||
return m_spanEnd;
|
||||
}
|
||||
|
||||
private:
|
||||
void ComputeHash();
|
||||
|
||||
Phrase m_phrase;
|
||||
size_t m_spanStart, m_spanEnd;
|
||||
size_t m_hash;
|
||||
};
|
||||
|
||||
// how to print a VW state
|
||||
std::ostream &operator<<(std::ostream &out, const VWState &state);
|
||||
|
||||
}
|
55
moses/FF/VW/VWTargetSentence.h
Normal file
55
moses/FF/VW/VWTargetSentence.h
Normal file
@ -0,0 +1,55 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "moses/AlignmentInfo.h"
|
||||
#include "moses/Phrase.h"
|
||||
|
||||
#include "AlignmentConstraint.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/**
|
||||
* VW thread-specific data about target sentence.
|
||||
*/
|
||||
class VWTargetSentence
|
||||
{
|
||||
public:
|
||||
VWTargetSentence() : m_sentence(NULL), m_alignment(NULL) {}
|
||||
|
||||
void Clear() {
|
||||
if (m_sentence) delete m_sentence;
|
||||
if (m_alignment) delete m_alignment;
|
||||
}
|
||||
|
||||
~VWTargetSentence() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
void SetConstraints(size_t sourceSize) {
|
||||
// initialize to unconstrained
|
||||
m_sourceConstraints.assign(sourceSize, AlignmentConstraint());
|
||||
m_targetConstraints.assign(m_sentence->GetSize(), AlignmentConstraint());
|
||||
|
||||
// set constraints according to alignment points
|
||||
AlignmentInfo::const_iterator it;
|
||||
for (it = m_alignment->begin(); it != m_alignment->end(); it++) {
|
||||
int src = it->first;
|
||||
int tgt = it->second;
|
||||
|
||||
if (src >= m_sourceConstraints.size() || tgt >= m_targetConstraints.size()) {
|
||||
UTIL_THROW2("VW :: alignment point out of bounds: " << src << "-" << tgt);
|
||||
}
|
||||
|
||||
m_sourceConstraints[src].Update(tgt);
|
||||
m_targetConstraints[tgt].Update(src);
|
||||
}
|
||||
}
|
||||
|
||||
Phrase *m_sentence;
|
||||
AlignmentInfo *m_alignment;
|
||||
std::vector<AlignmentConstraint> m_sourceConstraints, m_targetConstraints;
|
||||
};
|
||||
|
||||
}
|
@ -59,6 +59,7 @@ Parameter::Parameter()
|
||||
AddParam(main_opts,"version", "show version of Moses and libraries used");
|
||||
AddParam(main_opts,"show-weights", "print feature weights and exit");
|
||||
AddParam(main_opts,"time-out", "seconds after which is interrupted (-1=no time-out, default is -1)");
|
||||
AddParam(main_opts,"segment-time-out", "seconds for single segment after which is interrupted (-1=no time-out, default is -1)");
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// factorization options
|
||||
|
@ -54,8 +54,8 @@ void ReorderingConstraint::SetWall( size_t pos, bool value )
|
||||
void ReorderingConstraint::FinalizeWalls()
|
||||
{
|
||||
for(size_t z = 0; z < m_zone.size(); z++ ) {
|
||||
const size_t startZone = m_zone[z][0];
|
||||
const size_t endZone = m_zone[z][1];// note: wall after endZone is not local
|
||||
const size_t startZone = m_zone[z].first;
|
||||
const size_t endZone = m_zone[z].second;// note: wall after endZone is not local
|
||||
for( size_t pos = startZone; pos < endZone; pos++ ) {
|
||||
if (m_wall[ pos ]) {
|
||||
m_localWall[ pos ] = z;
|
||||
@ -65,8 +65,8 @@ void ReorderingConstraint::FinalizeWalls()
|
||||
// enforce that local walls only apply to innermost zone
|
||||
else if (m_localWall[ pos ] != NOT_A_ZONE) {
|
||||
size_t assigned_z = m_localWall[ pos ];
|
||||
if ((m_zone[assigned_z][0] < startZone) ||
|
||||
(m_zone[assigned_z][1] > endZone)) {
|
||||
if ((m_zone[assigned_z].first < startZone) ||
|
||||
(m_zone[assigned_z].second > endZone)) {
|
||||
m_localWall[ pos ] = z;
|
||||
}
|
||||
}
|
||||
@ -97,9 +97,9 @@ void ReorderingConstraint::SetMonotoneAtPunctuation( const Phrase &sentence )
|
||||
void ReorderingConstraint::SetZone( size_t startPos, size_t endPos )
|
||||
{
|
||||
VERBOSE(3,"SETTING zone " << startPos << "-" << endPos << std::endl);
|
||||
std::vector< size_t > newZone;
|
||||
newZone.push_back( startPos );
|
||||
newZone.push_back( endPos );
|
||||
std::pair<size_t,size_t> newZone;
|
||||
newZone.first = startPos;
|
||||
newZone.second = endPos;
|
||||
m_zone.push_back( newZone );
|
||||
m_active = true;
|
||||
}
|
||||
@ -138,8 +138,8 @@ bool ReorderingConstraint::Check( const Bitmap &bitmap, size_t startPos, size_t
|
||||
|
||||
// check zones
|
||||
for(size_t z = 0; z < m_zone.size(); z++ ) {
|
||||
const size_t startZone = m_zone[z][0];
|
||||
const size_t endZone = m_zone[z][1];
|
||||
const size_t startZone = m_zone[z].first;
|
||||
const size_t endZone = m_zone[z].second;
|
||||
|
||||
// fine, if translation has not reached zone yet and phrase outside zone
|
||||
if (lastPos < startZone && ( endPos < startZone || startPos > endZone ) ) {
|
||||
@ -236,4 +236,25 @@ bool ReorderingConstraint::Check( const Bitmap &bitmap, size_t startPos, size_t
|
||||
return true;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const ReorderingConstraint &obj)
|
||||
{
|
||||
out << "Zones:";
|
||||
for (size_t i = 0; i < obj.m_zone.size(); ++i) {
|
||||
const std::pair<size_t,size_t> &zone1 = obj.m_zone[i];
|
||||
out << zone1.first << "-" << zone1.second << " ";
|
||||
}
|
||||
|
||||
out << "Walls:";
|
||||
for (size_t i = 0; i < obj.m_size; ++i) {
|
||||
out << obj.m_wall[i];
|
||||
}
|
||||
|
||||
out << " Local walls:";
|
||||
for (size_t i = 0; i < obj.m_size; ++i) {
|
||||
out << obj.m_localWall[i] << " ";
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -45,13 +45,13 @@ class Bitmap;
|
||||
*/
|
||||
class ReorderingConstraint
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream& out, const ReorderingConstraint& reorderingConstraint);
|
||||
friend std::ostream& operator<<(std::ostream& out, const ReorderingConstraint &obj);
|
||||
protected:
|
||||
// const size_t m_size; /**< number of words in sentence */
|
||||
size_t m_size; /**< number of words in sentence */
|
||||
bool *m_wall; /**< flag for each word if it is a wall */
|
||||
size_t *m_localWall; /**< flag for each word if it is a local wall */
|
||||
std::vector< std::vector< size_t > > m_zone; /** zones that limit reordering */
|
||||
std::vector< std::pair<size_t,size_t> > m_zone; /** zones that limit reordering */
|
||||
bool m_active; /**< flag indicating, if there are any active constraints */
|
||||
int m_max_distortion;
|
||||
public:
|
||||
@ -93,7 +93,7 @@ public:
|
||||
void SetZone( size_t startPos, size_t endPos );
|
||||
|
||||
//! returns the vector of zones
|
||||
std::vector< std::vector< size_t > > & GetZones() {
|
||||
std::vector< std::pair<size_t,size_t> > & GetZones() {
|
||||
return m_zone;
|
||||
}
|
||||
|
||||
|
@ -17,21 +17,34 @@ Search::Search(Manager& manager)
|
||||
, interrupted_flag(0)
|
||||
{
|
||||
m_initialTransOpt.SetInputPath(m_inputPath);
|
||||
m_timer.start();
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
Search::
|
||||
out_of_time()
|
||||
{
|
||||
int const& timelimit = m_options.search.timeout;
|
||||
if (!timelimit) return false;
|
||||
double elapsed_time = GetUserTime();
|
||||
if (elapsed_time <= timelimit) return false;
|
||||
VERBOSE(1,"Decoding is out of time (" << elapsed_time << ","
|
||||
<< timelimit << ")" << std::endl);
|
||||
interrupted_flag = 1;
|
||||
return true;
|
||||
if (timelimit > 0) {
|
||||
double elapsed_time = GetUserTime();
|
||||
if (elapsed_time > timelimit) {
|
||||
VERBOSE(1,"Decoding is out of time (" << elapsed_time << ","
|
||||
<< timelimit << ")" << std::endl);
|
||||
interrupted_flag = 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
int const& segment_timelimit = m_options.search.segment_timeout;
|
||||
if (segment_timelimit > 0) {
|
||||
double elapsed_time = m_timer.get_elapsed_time();
|
||||
if (elapsed_time > segment_timelimit) {
|
||||
VERBOSE(1,"Decoding for segment is out of time (" << elapsed_time << ","
|
||||
<< segment_timelimit << ")" << std::endl);
|
||||
interrupted_flag = 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "Phrase.h"
|
||||
#include "InputPath.h"
|
||||
#include "Bitmaps.h"
|
||||
#include "Timer.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -48,6 +49,7 @@ protected:
|
||||
/** flag indicating that decoder ran out of time (see switch -time-out) */
|
||||
size_t interrupted_flag;
|
||||
|
||||
Timer m_timer;
|
||||
bool out_of_time();
|
||||
};
|
||||
|
||||
|
@ -97,7 +97,6 @@ void SearchCubePruning::Decode()
|
||||
|
||||
// go through each stack
|
||||
size_t stackNo = 1;
|
||||
int timelimit = m_options.search.timeout;
|
||||
std::vector < HypothesisStack* >::iterator iterStack;
|
||||
for (iterStack = m_hypoStackColl.begin() + 1 ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
|
||||
// BOOST_FOREACH(HypothesisStack* hstack, m_hypoStackColl) {
|
||||
|
@ -155,7 +155,9 @@ aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
|
||||
m_xmlOptions,
|
||||
m_reorderingConstraint,
|
||||
xmlWalls, placeholders);
|
||||
UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line);
|
||||
if (!OK) {
|
||||
TRACE_ERR("Unable to parse XML in line: " << line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -76,8 +76,9 @@ private:
|
||||
MinHeapSorter hs(A);
|
||||
std::make_heap(A.begin(), A.begin() + n, hs);
|
||||
|
||||
size_t h = n;
|
||||
size_t m1, m2;
|
||||
// marked volatile to prevent the intel compiler from generating bad code
|
||||
volatile size_t h = n;
|
||||
volatile size_t m1, m2;
|
||||
while(h > 1) {
|
||||
m1 = A[0];
|
||||
std::pop_heap(A.begin(), A.begin() + h, hs);
|
||||
|
@ -38,6 +38,7 @@ namespace Moses
|
||||
param.SetParameter(early_discarding_threshold, "early-discarding-threshold",
|
||||
DEFAULT_EARLY_DISCARDING_THRESHOLD);
|
||||
param.SetParameter(timeout, "time-out", 0);
|
||||
param.SetParameter(segment_timeout, "segment-time-out", 0);
|
||||
param.SetParameter(max_phrase_length, "max-phrase-length",
|
||||
DEFAULT_MAX_PHRASE_LENGTH);
|
||||
param.SetParameter(trans_opt_threshold, "translation-option-threshold",
|
||||
|
@ -25,6 +25,7 @@ namespace Moses
|
||||
float beam_width;
|
||||
|
||||
int timeout;
|
||||
int segment_timeout;
|
||||
|
||||
bool consensus; //! Use Consensus decoding (DeNero et al 2009)
|
||||
|
||||
|
@ -240,7 +240,7 @@ sub train_transliteration_module{
|
||||
|
||||
`$MOSES_SRC_DIR/scripts/ems/support/substitute-filtered-tables.perl $OUT_DIR/tuning/filtered/moses.ini < $OUT_DIR/model/moses.ini > $OUT_DIR/tuning/moses.filtered.ini`;
|
||||
|
||||
`$MOSES_SRC_DIR/scripts/training/mert-moses.pl $OUT_DIR/tuning/input $OUT_DIR/tuning/reference $DECODER $OUT_DIR/tuning/moses.filtered.ini --nbest 100 --working-dir $OUT_DIR/tuning/tmp --decoder-flags "-threads 16 -drop-unknown -v 0 -distortion-limit 0" --rootdir $MOSES_SRC_DIR/scripts -mertdir $MOSES_SRC_DIR/mert -threads=16 --no-filter-phrase-table`;
|
||||
`$MOSES_SRC_DIR/scripts/training/mert-moses.pl $OUT_DIR/tuning/input $OUT_DIR/tuning/reference $DECODER $OUT_DIR/tuning/moses.filtered.ini --nbest 100 --working-dir $OUT_DIR/tuning/tmp --decoder-flags "-threads 16 -drop-unknown -v 0 -distortion-limit 0" --rootdir $MOSES_SRC_DIR/scripts -mertdir $MOSES_SRC_DIR/bin -threads=16 --no-filter-phrase-table`;
|
||||
|
||||
`cp $OUT_DIR/tuning/tmp/moses.ini $OUT_DIR/tuning/moses.ini`;
|
||||
|
||||
|
@ -54,7 +54,7 @@ output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-ext
|
||||
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
|
||||
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
|
||||
# "Farasa: A Fast and Furious Segmenter for Arabic"
|
||||
input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
#input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
|
||||
|
||||
# truecasers - comment out if you do not use the truecaser
|
||||
|
@ -54,7 +54,7 @@ output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-ext
|
||||
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
|
||||
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
|
||||
# "Farasa: A Fast and Furious Segmenter for Arabic"
|
||||
input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
#input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
|
||||
# truecasers - comment out if you do not use the truecaser
|
||||
input-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
|
@ -57,7 +57,7 @@ output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-ext
|
||||
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
|
||||
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
|
||||
# "Farasa: A Fast and Furious Segmenter for Arabic"
|
||||
input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
#input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
|
||||
# truecasers - comment out if you do not use the truecaser
|
||||
input-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
|
@ -57,7 +57,7 @@ output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-ext
|
||||
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
|
||||
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
|
||||
# "Farasa: A Fast and Furious Segmenter for Arabic"
|
||||
input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
#input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
|
||||
# truecasers - comment out if you do not use the truecaser
|
||||
input-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
|
@ -54,7 +54,7 @@ output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-ext
|
||||
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
|
||||
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
|
||||
# "Farasa: A Fast and Furious Segmenter for Arabic"
|
||||
input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
#input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
|
||||
# truecasers - comment out if you do not use the truecaser
|
||||
input-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
|
@ -54,7 +54,7 @@ output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-ext
|
||||
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
|
||||
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
|
||||
# "Farasa: A Fast and Furious Segmenter for Arabic"
|
||||
input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
#input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
|
||||
# truecasers - comment out if you do not use the truecaser
|
||||
input-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
|
@ -827,7 +827,7 @@ create-config
|
||||
in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table transliteration-table generation-table-pruned sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm
|
||||
out: config
|
||||
ignore-if: use-hiero thot
|
||||
rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini mmsapt no-glue-grammar dont-tune-glue-grammar use-syntax-input-weight-feature
|
||||
rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini mmsapt no-glue-grammar dont-tune-glue-grammar use-syntax-input-weight-feature operation-sequence-model-load-method
|
||||
default-name: model/moses.ini
|
||||
error: Unknown option
|
||||
error: requires an argument
|
||||
@ -1540,6 +1540,150 @@ analysis-precision
|
||||
rerun-on-change: precision-by-coverage-base
|
||||
final-model: yes
|
||||
|
||||
[QUALITY-ESTIMATION] single
|
||||
tokenize-input
|
||||
in: raw-input
|
||||
out: tokenized-input
|
||||
default-name: quality-estimation/input.tok
|
||||
pass-unless: input-tokenizer
|
||||
template: $input-tokenizer < IN > OUT
|
||||
tokenize-input-devtest
|
||||
in: raw-input-devtest
|
||||
out: tokenized-input-devtest
|
||||
default-name: quality-estimation/input.devtest.tok
|
||||
pass-unless: input-tokenizer
|
||||
template: $input-tokenizer < IN > OUT
|
||||
lowercase-input
|
||||
in: tokenized-input
|
||||
out: truecased-input
|
||||
default-name: quality-estimation/input.lc
|
||||
pass-unless: input-lowercaser
|
||||
ignore-if: input-truecaser
|
||||
template: $input-lowercaser < IN > OUT
|
||||
lowercase-input-devtest
|
||||
in: tokenized-input-devtest
|
||||
out: truecased-input-devtest
|
||||
default-name: quality-estimation/input.devtest.lc
|
||||
pass-unless: input-lowercaser
|
||||
ignore-if: input-truecaser
|
||||
template: $input-lowercaser < IN > OUT
|
||||
truecase-input
|
||||
in: tokenized-input TRUECASER:truecase-model
|
||||
out: truecased-input
|
||||
rerun-on-change: input-truecaser
|
||||
default-name: quality-estimation/input.tc
|
||||
ignore-unless: input-truecaser
|
||||
template: $input-truecaser -model IN1.$input-extension < IN > OUT
|
||||
truecase-input-devtest
|
||||
in: tokenized-input-devtest TRUECASER:truecase-model
|
||||
out: truecased-input-devtest
|
||||
rerun-on-change: input-truecaser
|
||||
ignore-unless: input-truecaser
|
||||
default-name: quality-estimation/input.devtest.tc
|
||||
template: $input-truecaser -model IN1.$input-extension < IN > OUT
|
||||
split-input
|
||||
in: truecased-input SPLITTER:splitter-model
|
||||
out: split-input
|
||||
rerun-on-change: input-splitter
|
||||
default-name: quality-estimation/input.split
|
||||
pass-unless: input-splitter
|
||||
template: $input-splitter -model IN1.$input-extension < IN > OUT
|
||||
split-input-devtest
|
||||
in: truecased-input-devtest SPLITTER:splitter-model
|
||||
out: split-input-devtest
|
||||
rerun-on-change: input-splitter
|
||||
default-name: quality-estimation/input.devtest.split
|
||||
pass-unless: input-splitter
|
||||
template: $input-splitter -model IN1.$input-extension < IN > OUT
|
||||
tokenize-reference
|
||||
in: raw-reference
|
||||
out: tokenized-reference
|
||||
default-name: quality-estimation/reference.tok
|
||||
pass-unless: output-tokenizer
|
||||
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
|
||||
template: $output-tokenizer < IN > OUT
|
||||
tokenize-reference-devtest
|
||||
in: raw-reference-devtest
|
||||
out: tokenized-reference-devtest
|
||||
default-name: quality-estimation/reference.devtest.tok
|
||||
pass-unless: output-tokenizer
|
||||
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
|
||||
template: $output-tokenizer < IN > OUT
|
||||
lowercase-reference
|
||||
in: tokenized-reference
|
||||
out: truecased-reference
|
||||
default-name: quality-estimation/reference.lc
|
||||
pass-unless: output-lowercaser
|
||||
ignore-if: output-truecaser
|
||||
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
|
||||
template: $output-lowercaser < IN > OUT
|
||||
lowercase-reference-devtest
|
||||
in: tokenized-reference-devtest
|
||||
out: truecased-reference-devtest
|
||||
default-name: quality-estimation/reference.devtest.lc
|
||||
pass-unless: output-lowercaser
|
||||
ignore-if: output-truecaser
|
||||
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
|
||||
template: $output-lowercaser < IN > OUT
|
||||
truecase-reference
|
||||
in: tokenized-reference TRUECASER:truecase-model
|
||||
out: truecased-reference
|
||||
rerun-on-change: output-truecaser
|
||||
default-name: quality-estimation/reference.tc
|
||||
ignore-unless: output-truecaser
|
||||
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
|
||||
template: $output-truecaser -model IN1.$output-extension < IN > OUT
|
||||
truecase-reference-devtest
|
||||
in: tokenized-reference-devtest TRUECASER:truecase-model
|
||||
out: truecased-reference-devtest
|
||||
rerun-on-change: output-truecaser
|
||||
default-name: quality-estimation/reference.devtest.tc
|
||||
ignore-unless: output-truecaser
|
||||
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
|
||||
template: $output-truecaser -model IN1.$output-extension < IN > OUT
|
||||
decode
|
||||
in: TUNING:config-with-reused-weights split-input
|
||||
out: rich-output
|
||||
default-name: quality-estimation/output
|
||||
template: $decoder -v 0 -tt -f IN < IN1 > OUT
|
||||
error: Translation was not performed correctly
|
||||
not-error: trans: No such file or directory
|
||||
decode-devtest
|
||||
in: TUNING:config-with-reused-weights split-input-devtest
|
||||
out: rich-output-devtest
|
||||
default-name: quality-estimation/output-devtest
|
||||
template: $decoder -v 0 -tt -f IN < IN1 > OUT
|
||||
error: Translation was not performed correctly
|
||||
not-error: trans: No such file or directory
|
||||
remove-markup
|
||||
in: rich-output
|
||||
out: cleaned-output
|
||||
default-name: quality-estimation/tokenized-output
|
||||
template: $moses-script-dir/ems/support/remove-segmentation-markup.perl < IN > OUT
|
||||
remove-markup-devtest
|
||||
in: rich-output-devtest
|
||||
out: cleaned-output-devtest
|
||||
default-name: quality-estimation/tokenized-output-devtest
|
||||
template: $moses-script-dir/ems/support/remove-segmentation-markup.perl < IN > OUT
|
||||
score-output
|
||||
in: cleaned-output truecased-reference
|
||||
out: scored-output
|
||||
default-name: quality-estimation/output-scored
|
||||
tmp-name: quality-estimation/ter
|
||||
template: mkdir TMP ; $moses-script-dir/ems/support/ter.perl $tercom IN IN1 TMP > OUT
|
||||
score-output-devtest
|
||||
in: cleaned-output-devtest truecased-reference-devtest
|
||||
out: scored-output-devtest
|
||||
default-name: quality-estimation/output-scored-devtest
|
||||
tmp-name: quality-estimation/ter-devtest
|
||||
template: mkdir TMP ; $moses-script-dir/ems/support/ter.perl $tercom IN IN1 TMP > OUT
|
||||
train
|
||||
in: input rich-output scored-output input-devtest rich-output-devtest scored-output-devtest
|
||||
out: quality-estimation-model
|
||||
default-name: quality-estimation/model
|
||||
template: $trainer --train-rich IN1 --train-ter IN2 --eval-rich IN4 --eval-ter IN5 --model OUT
|
||||
final-model: yes
|
||||
|
||||
[REPORTING] single
|
||||
report
|
||||
in: EVALUATION:nist-bleu-score EVALUATION:nist-bleu-c-score EVALUATION:bolt-bleu-score EVALUATION:bolt-bleu-c-score EVALUATION:multi-bleu-score EVALUATION:multi-bleu-c-score EVALUATION:multi-bleu-detok-score EVALUATION:multi-bleu-c-detok-score EVALUATION:meteor-score EVALUATION:ter-score EVALUATION:wer-score EVALUATION:ibm-bleu-score EVALUATION:ibm-bleu-c-score EVALUATION:analysis EVALUATION:analysis-coverage EVALUATION:analysis-prec TRAINING:biconcor-model EVALUATION:wade-analysis
|
||||
|
@ -2660,12 +2660,16 @@ sub define_training_create_config {
|
||||
|
||||
if ($osm) {
|
||||
my $osm_settings = &get("TRAINING:operation-sequence-model-settings");
|
||||
if ($osm_settings =~ /-factor *(\S+)/){
|
||||
if ($osm_settings =~ /-factor *(\S+)/) {
|
||||
$cmd .= "-osm-model $osm/ -osm-setting $1 ";
|
||||
}
|
||||
else {
|
||||
$cmd .= "-osm-model $osm/operationLM.bin ";
|
||||
}
|
||||
my $osm_load_method = &get("TRAINING:operation-sequence-model-load-method");
|
||||
if (defined($osm_load_method)) {
|
||||
$cmd .= "-osm-load-method $osm_load_method ";
|
||||
}
|
||||
}
|
||||
|
||||
if (&get("TRAINING:phrase-orientation")) {
|
||||
|
42
scripts/ems/support/create-xml.perl
Executable file
42
scripts/ems/support/create-xml.perl
Executable file
@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env perl
|
||||
#
|
||||
# This file is part of moses. Its use is licensed under the GNU Lesser General
|
||||
# Public License version 2.1 or, at your option, any later version.
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my ($type) = @ARGV;
|
||||
if ($type =~ /^s/i) {
|
||||
print "<srcset setid=\"test\" srclang=\"any\">\n";
|
||||
print "<doc docid=\"doc\">\n";
|
||||
}
|
||||
elsif ($type =~ /^t/i) {
|
||||
print "<tstset setid=\"test\" tgtlang=\"any\" srclang=\"any\">\n";
|
||||
print "<doc sysid=\"moses\" docid=\"doc\">\n";
|
||||
}
|
||||
elsif ($type =~ /^r/i) {
|
||||
print "<refset setid=\"test\" tgtlang=\"any\" srclang=\"any\">\n";
|
||||
print "<doc sysid=\"ref\" docid=\"doc\">\n";
|
||||
}
|
||||
else {
|
||||
die("ERROR: specify source / target / ref");
|
||||
}
|
||||
|
||||
my $i = 0;
|
||||
while(<STDIN>) {
|
||||
chomp;
|
||||
print "<seg id=\"".(++$i)."\">$_</seg>\n";
|
||||
}
|
||||
|
||||
print "</doc>\n";
|
||||
|
||||
if ($type =~ /^s/i) {
|
||||
print "</srcset>\n";
|
||||
}
|
||||
elsif ($type =~ /^t/i) {
|
||||
print "</tstset>\n";
|
||||
}
|
||||
elsif ($type =~ /^r/i) {
|
||||
print "</refset>\n";
|
||||
}
|
@ -9,7 +9,16 @@ use strict;
|
||||
$|++;
|
||||
|
||||
while(<STDIN>) {
|
||||
s/ \|\d+\-\d+\| / /g;
|
||||
s/ \|\d+\-\d+\|$//;
|
||||
print $_;
|
||||
chop;
|
||||
s/\|[^\|]+\|//g;
|
||||
s/\s+/ /g;
|
||||
s/^ //;
|
||||
s/ $//;
|
||||
print $_."\n";
|
||||
}
|
||||
|
||||
#while(<STDIN>) {
|
||||
# s/ \|\d+\-\d+\| / /g;
|
||||
# s/ \|\d+\-\d+\|$//;
|
||||
# print $_;
|
||||
#}
|
||||
|
15
scripts/ems/support/ter.perl
Normal file
15
scripts/ems/support/ter.perl
Normal file
@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env perl
|
||||
#
|
||||
# This file is part of moses. Its use is licensed under the GNU Lesser General
|
||||
# Public License version 2.1 or, at your option, any later version.
|
||||
|
||||
use strict;
|
||||
use FindBin qw($RealBin);
|
||||
|
||||
my ($jar, $hyp,$ref,$tmp) = @ARGV;
|
||||
`mkdir -p $tmp`;
|
||||
`$RealBin/create-xml.perl test < $hyp > $tmp/hyp`;
|
||||
`$RealBin/create-xml.perl ref < $ref > $tmp/ref`;
|
||||
`java -jar $jar -h $tmp/hyp -r $tmp/ref -o ter -n $tmp/out`;
|
||||
print `cat $tmp/out.ter`;
|
||||
|
@ -83,6 +83,7 @@ my($_EXTERNAL_BINDIR,
|
||||
$_CONFIG,
|
||||
$_OSM,
|
||||
$_OSM_FACTORS,
|
||||
$_OSM_LOAD_METHOD,
|
||||
$_POST_DECODING_TRANSLIT,
|
||||
$_TRANSLITERATION_PHRASE_TABLE,
|
||||
$_HIERARCHICAL,
|
||||
@ -238,6 +239,7 @@ $_HELP = 1
|
||||
'config=s' => \$_CONFIG,
|
||||
'osm-model=s' => \$_OSM,
|
||||
'osm-setting=s' => \$_OSM_FACTORS,
|
||||
'osm-load-method=s' => \$_OSM_LOAD_METHOD,
|
||||
'post-decoding-translit=s' => \$_POST_DECODING_TRANSLIT,
|
||||
'transliteration-phrase-table=s' => \$_TRANSLITERATION_PHRASE_TABLE,
|
||||
'mmsapt' => \$_MMSAPT,
|
||||
@ -2249,6 +2251,8 @@ sub create_ini {
|
||||
|
||||
if($_OSM)
|
||||
{
|
||||
my $load_method = "";
|
||||
$load_method = " load=$_OSM_LOAD_METHOD" if defined($_OSM_LOAD_METHOD);
|
||||
if (defined($_OSM_FACTORS))
|
||||
{
|
||||
my $count = 0;
|
||||
@ -2258,11 +2262,11 @@ sub create_ini {
|
||||
my ($factor_f,$factor_e) = split(/\-/,$factor_val);
|
||||
|
||||
if($count == 0){
|
||||
$feature_spec .= "OpSequenceModel name=OpSequenceModel$count num-features=5 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=yes \n";
|
||||
$feature_spec .= "OpSequenceModel$load_method name=OpSequenceModel$count num-features=5 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=yes \n";
|
||||
$weight_spec .= "OpSequenceModel$count= 0.08 -0.02 0.02 -0.001 0.03\n";
|
||||
}
|
||||
else{
|
||||
$feature_spec .= "OpSequenceModel name=OpSequenceModel$count num-features=1 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=no \n";
|
||||
$feature_spec .= "OpSequenceModel$load_method name=OpSequenceModel$count num-features=1 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=no \n";
|
||||
$weight_spec .= "OpSequenceModel$count= 0.08 \n";
|
||||
|
||||
}
|
||||
@ -2271,7 +2275,7 @@ sub create_ini {
|
||||
}
|
||||
else
|
||||
{
|
||||
$feature_spec .= "OpSequenceModel name=OpSequenceModel0 num-features=5 path=". $_OSM . " \n";
|
||||
$feature_spec .= "OpSequenceModel$load_method name=OpSequenceModel0 num-features=5 path=". $_OSM . " \n";
|
||||
$weight_spec .= "OpSequenceModel0= 0.08 -0.02 0.02 -0.001 0.03\n";
|
||||
}
|
||||
}
|
||||
@ -2292,7 +2296,9 @@ sub create_ini {
|
||||
}
|
||||
$type = "KENLM" unless defined $type; # default to KENLM if no type given
|
||||
|
||||
if ($type =~ /^\d+$/) {
|
||||
if ($type =~ /^8-(.+)/) {
|
||||
$type = "KENLM load=$1";
|
||||
} elsif ($type =~ /^\d+$/) {
|
||||
# backwards compatibility if the type is given not as string but as a number
|
||||
if ($type == 0) {
|
||||
$type = "SRILM";
|
||||
|
@ -24,6 +24,8 @@ class ezexample;
|
||||
|
||||
namespace Discriminative
|
||||
{
|
||||
typedef std::pair<uint32_t, float> FeatureType; // feature hash (=ID) and value
|
||||
typedef std::vector<FeatureType> FeatureVector;
|
||||
|
||||
/**
|
||||
* Abstract class to be implemented by classifiers.
|
||||
@ -34,12 +36,22 @@ public:
|
||||
/**
|
||||
* Add a feature that does not depend on the class (label).
|
||||
*/
|
||||
virtual void AddLabelIndependentFeature(const StringPiece &name, float value) = 0;
|
||||
virtual FeatureType AddLabelIndependentFeature(const StringPiece &name, float value) = 0;
|
||||
|
||||
/**
|
||||
* Add a feature that is specific for the given class.
|
||||
*/
|
||||
virtual void AddLabelDependentFeature(const StringPiece &name, float value) = 0;
|
||||
virtual FeatureType AddLabelDependentFeature(const StringPiece &name, float value) = 0;
|
||||
|
||||
/**
|
||||
* Efficient addition of features when their IDs are already computed.
|
||||
*/
|
||||
virtual void AddLabelIndependentFeatureVector(const FeatureVector &features) = 0;
|
||||
|
||||
/**
|
||||
* Efficient addition of features when their IDs are already computed.
|
||||
*/
|
||||
virtual void AddLabelDependentFeatureVector(const FeatureVector &features) = 0;
|
||||
|
||||
/**
|
||||
* Train using current example. Use loss to distinguish positive and negative training examples.
|
||||
@ -54,12 +66,12 @@ public:
|
||||
virtual float Predict(const StringPiece &label) = 0;
|
||||
|
||||
// helper methods for indicator features
|
||||
void AddLabelIndependentFeature(const StringPiece &name) {
|
||||
AddLabelIndependentFeature(name, 1.0);
|
||||
FeatureType AddLabelIndependentFeature(const StringPiece &name) {
|
||||
return AddLabelIndependentFeature(name, 1.0);
|
||||
}
|
||||
|
||||
void AddLabelDependentFeature(const StringPiece &name) {
|
||||
AddLabelDependentFeature(name, 1.0);
|
||||
FeatureType AddLabelDependentFeature(const StringPiece &name) {
|
||||
return AddLabelDependentFeature(name, 1.0);
|
||||
}
|
||||
|
||||
virtual ~Classifier() {}
|
||||
@ -95,8 +107,10 @@ public:
|
||||
VWTrainer(const std::string &outputFile);
|
||||
virtual ~VWTrainer();
|
||||
|
||||
virtual void AddLabelIndependentFeature(const StringPiece &name, float value);
|
||||
virtual void AddLabelDependentFeature(const StringPiece &name, float value);
|
||||
virtual FeatureType AddLabelIndependentFeature(const StringPiece &name, float value);
|
||||
virtual FeatureType AddLabelDependentFeature(const StringPiece &name, float value);
|
||||
virtual void AddLabelIndependentFeatureVector(const FeatureVector &features);
|
||||
virtual void AddLabelDependentFeatureVector(const FeatureVector &features);
|
||||
virtual void Train(const StringPiece &label, float loss);
|
||||
virtual float Predict(const StringPiece &label);
|
||||
|
||||
@ -121,15 +135,17 @@ public:
|
||||
VWPredictor(const std::string &modelFile, const std::string &vwOptions);
|
||||
virtual ~VWPredictor();
|
||||
|
||||
virtual void AddLabelIndependentFeature(const StringPiece &name, float value);
|
||||
virtual void AddLabelDependentFeature(const StringPiece &name, float value);
|
||||
virtual FeatureType AddLabelIndependentFeature(const StringPiece &name, float value);
|
||||
virtual FeatureType AddLabelDependentFeature(const StringPiece &name, float value);
|
||||
virtual void AddLabelIndependentFeatureVector(const FeatureVector &features);
|
||||
virtual void AddLabelDependentFeatureVector(const FeatureVector &features);
|
||||
virtual void Train(const StringPiece &label, float loss);
|
||||
virtual float Predict(const StringPiece &label);
|
||||
|
||||
friend class ClassifierFactory;
|
||||
|
||||
protected:
|
||||
void AddFeature(const StringPiece &name, float values);
|
||||
FeatureType AddFeature(const StringPiece &name, float values);
|
||||
|
||||
::vw *m_VWInstance, *m_VWParser;
|
||||
::ezexample *m_ex;
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define moses_Normalizer_h
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "Util.h"
|
||||
|
||||
namespace Discriminative
|
||||
@ -45,16 +46,25 @@ public:
|
||||
virtual ~SquaredLossNormalizer() {}
|
||||
};
|
||||
|
||||
// safe softmax
|
||||
class LogisticLossNormalizer : public Normalizer
|
||||
{
|
||||
public:
|
||||
virtual void operator()(std::vector<float> &losses) const {
|
||||
float sum = 0;
|
||||
std::vector<float>::iterator it;
|
||||
|
||||
float sum = 0;
|
||||
float max = 0;
|
||||
for (it = losses.begin(); it != losses.end(); it++) {
|
||||
*it = exp(-*it);
|
||||
*it = -*it;
|
||||
max = std::max(max, *it);
|
||||
}
|
||||
|
||||
for (it = losses.begin(); it != losses.end(); it++) {
|
||||
*it = exp(*it - max);
|
||||
sum += *it;
|
||||
}
|
||||
|
||||
for (it = losses.begin(); it != losses.end(); it++) {
|
||||
*it /= sum;
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ VWPredictor::~VWPredictor()
|
||||
VW::finish(*m_VWInstance);
|
||||
}
|
||||
|
||||
void VWPredictor::AddLabelIndependentFeature(const StringPiece &name, float value)
|
||||
FeatureType VWPredictor::AddLabelIndependentFeature(const StringPiece &name, float value)
|
||||
{
|
||||
// label-independent features are kept in a different feature namespace ('s' = source)
|
||||
|
||||
@ -48,10 +48,10 @@ void VWPredictor::AddLabelIndependentFeature(const StringPiece &name, float valu
|
||||
m_ex->addns('s');
|
||||
if (DEBUG) std::cerr << "VW :: Setting source namespace\n";
|
||||
}
|
||||
AddFeature(name, value); // namespace 's' is set up, add the feature
|
||||
return AddFeature(name, value); // namespace 's' is set up, add the feature
|
||||
}
|
||||
|
||||
void VWPredictor::AddLabelDependentFeature(const StringPiece &name, float value)
|
||||
FeatureType VWPredictor::AddLabelDependentFeature(const StringPiece &name, float value)
|
||||
{
|
||||
// VW does not use the label directly, instead, we do a Cartesian product between source and target feature
|
||||
// namespaces, where the source namespace ('s') contains label-independent features and the target
|
||||
@ -63,7 +63,37 @@ void VWPredictor::AddLabelDependentFeature(const StringPiece &name, float value)
|
||||
m_ex->addns('t');
|
||||
if (DEBUG) std::cerr << "VW :: Setting target namespace\n";
|
||||
}
|
||||
AddFeature(name, value);
|
||||
return AddFeature(name, value);
|
||||
}
|
||||
|
||||
void VWPredictor::AddLabelIndependentFeatureVector(const FeatureVector &features)
|
||||
{
|
||||
if (m_isFirstSource) {
|
||||
// the first feature of a new example => create the source namespace for
|
||||
// label-independent features to live in
|
||||
m_isFirstSource = false;
|
||||
m_ex->finish();
|
||||
m_ex->addns('s');
|
||||
if (DEBUG) std::cerr << "VW :: Setting source namespace\n";
|
||||
}
|
||||
|
||||
// add each feature index using this "low level" call to VW
|
||||
for (FeatureVector::const_iterator it = features.begin(); it != features.end(); it++)
|
||||
m_ex->addf(it->first, it->second);
|
||||
}
|
||||
|
||||
void VWPredictor::AddLabelDependentFeatureVector(const FeatureVector &features)
|
||||
{
|
||||
if (m_isFirstTarget) {
|
||||
// the first target-side feature => create namespace 't'
|
||||
m_isFirstTarget = false;
|
||||
m_ex->addns('t');
|
||||
if (DEBUG) std::cerr << "VW :: Setting target namespace\n";
|
||||
}
|
||||
|
||||
// add each feature index using this "low level" call to VW
|
||||
for (FeatureVector::const_iterator it = features.begin(); it != features.end(); it++)
|
||||
m_ex->addf(it->first, it->second);
|
||||
}
|
||||
|
||||
void VWPredictor::Train(const StringPiece &label, float loss)
|
||||
@ -82,10 +112,10 @@ float VWPredictor::Predict(const StringPiece &label)
|
||||
return loss;
|
||||
}
|
||||
|
||||
void VWPredictor::AddFeature(const StringPiece &name, float value)
|
||||
FeatureType VWPredictor::AddFeature(const StringPiece &name, float value)
|
||||
{
|
||||
if (DEBUG) std::cerr << "VW :: Adding feature: " << EscapeSpecialChars(name.as_string()) << ":" << value << "\n";
|
||||
m_ex->addf(EscapeSpecialChars(name.as_string()), value);
|
||||
return std::make_pair(m_ex->addf(EscapeSpecialChars(name.as_string()), value), value);
|
||||
}
|
||||
|
||||
} // namespace Discriminative
|
||||
|
@ -25,7 +25,7 @@ VWTrainer::~VWTrainer()
|
||||
close(m_bfos);
|
||||
}
|
||||
|
||||
void VWTrainer::AddLabelIndependentFeature(const StringPiece &name, float value)
|
||||
FeatureType VWTrainer::AddLabelIndependentFeature(const StringPiece &name, float value)
|
||||
{
|
||||
if (m_isFirstSource) {
|
||||
if (m_isFirstExample) {
|
||||
@ -43,9 +43,11 @@ void VWTrainer::AddLabelIndependentFeature(const StringPiece &name, float value)
|
||||
}
|
||||
|
||||
AddFeature(name, value);
|
||||
|
||||
return std::make_pair(0, value); // we don't hash features
|
||||
}
|
||||
|
||||
void VWTrainer::AddLabelDependentFeature(const StringPiece &name, float value)
|
||||
FeatureType VWTrainer::AddLabelDependentFeature(const StringPiece &name, float value)
|
||||
{
|
||||
if (m_isFirstTarget) {
|
||||
m_isFirstTarget = false;
|
||||
@ -56,6 +58,18 @@ void VWTrainer::AddLabelDependentFeature(const StringPiece &name, float value)
|
||||
}
|
||||
|
||||
AddFeature(name, value);
|
||||
|
||||
return std::make_pair(0, value); // we don't hash features
|
||||
}
|
||||
|
||||
void VWTrainer::AddLabelIndependentFeatureVector(const FeatureVector &features)
|
||||
{
|
||||
throw logic_error("VW trainer does not support feature IDs.");
|
||||
}
|
||||
|
||||
void VWTrainer::AddLabelDependentFeatureVector(const FeatureVector &features)
|
||||
{
|
||||
throw logic_error("VW trainer does not support feature IDs.");
|
||||
}
|
||||
|
||||
void VWTrainer::Train(const StringPiece &label, float loss)
|
||||
|
Loading…
Reference in New Issue
Block a user