daily automatic beautifier

This commit is contained in:
MosesAdmin 2016-06-02 00:00:39 +01:00
parent 2ad1eacef7
commit ea306f62b7
9 changed files with 606 additions and 597 deletions

View File

@ -3,21 +3,23 @@
#include "M2.h"
namespace MosesTuning {
namespace MosesTuning
{
namespace M2 {
namespace M2
{
bool Annot::lowercase = true;
std::string Annot::transform(const std::string& e) {
std::string Annot::transform(const std::string& e)
{
std::string temp = e;
if(lowercase) {
boost::erase_all(temp, " ");
return ToLower(temp);
}
else
} else
return e;
}
}
const std::string ToLower(const std::string& str)
{
@ -27,7 +29,8 @@ const std::string ToLower(const std::string& str)
}
Edit operator+(Edit& e1, Edit& e2) {
Edit operator+(Edit& e1, Edit& e2)
{
std::string edit;
if(e1.edit.size() > 0 && e2.edit.size() > 0)
edit = e1.edit + " " + e2.edit;
@ -40,11 +43,13 @@ Edit operator+(Edit& e1, Edit& e2) {
}
Edge operator+(Edge e1, Edge e2) {
Edge operator+(Edge e1, Edge e2)
{
return Edge(e1.v, e2.u, e1.edit + e2.edit);
}
std::ostream& operator<<(std::ostream& o, Sentence s) {
std::ostream& operator<<(std::ostream& o, Sentence s)
{
for(Sentence::iterator it = s.begin(); it != s.end(); it++)
o << *it << " ";
return o;

View File

@ -16,9 +16,11 @@
namespace MosesTuning {
namespace MosesTuning
{
namespace M2 {
namespace M2
{
typedef std::vector<float> Stats;
@ -127,8 +129,9 @@ typedef std::vector<TrackRow> TrackMatrix;
typedef std::set<Vertex> Vertices;
typedef std::set<Edge> Edges;
class M2 {
private:
class M2
{
private:
M2File m_m2;
size_t m_max_unchanged;
@ -136,7 +139,7 @@ class M2 {
bool m_lowercase;
bool m_verbose;
public:
public:
M2() : m_max_unchanged(2), m_beta(0.5), m_lowercase(true), m_verbose(false) { }
M2(size_t max_unchanged, float beta, bool truecase, bool verbose = false)
: m_max_unchanged(max_unchanged), m_beta(beta), m_lowercase(!truecase), m_verbose(verbose) {
@ -190,8 +193,7 @@ class M2 {
unit.third.insert(a.annotator);
unit.second.insert(a);
}
else {
} else {
std::stringstream annotStr(annot[5]);
size_t annotator;
annotStr >> annotator;
@ -345,8 +347,7 @@ class M2 {
v = predecessor[v];
if(v == source)
break;
}
else {
} else {
std::cout << "Error" << std::endl;
break;
}
@ -430,8 +431,7 @@ class M2 {
stats[2] = 0;
stats[3] = unit.first.size();
return;
}
else if(distance > 0) {
} else if(distance > 0) {
for(size_t j = 0; j < unit.third.size(); j++) {
BuildGraph(bt, Vs[j], Es[j]);
AddWeights(Es[j], unit, j);

View File

@ -78,11 +78,13 @@ float M2Scorer::calculateScore(const vector<ScoreStatsType>& comps) const
return f;
}
float M2Scorer::getReferenceLength(const vector<ScoreStatsType>& comps) const {
float M2Scorer::getReferenceLength(const vector<ScoreStatsType>& comps) const
{
return comps[3];
}
std::vector<ScoreStatsType> randomStats(float decay, int max) {
std::vector<ScoreStatsType> randomStats(float decay, int max)
{
int gold = rand() % max;
int prop = rand() % max;
int corr = 0.0;

View File

@ -23,7 +23,8 @@ namespace Moses
using namespace std;
std::string MakePair(const std::string &s1, const std::string &s2, bool general) {
std::string MakePair(const std::string &s1, const std::string &s2, bool general)
{
std::vector<std::string> sourceList;
std::vector<std::string> targetList;
@ -54,27 +55,23 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
sprintf((char*)temp.c_str(), "%d", count);
targetList.push_back("\\" + temp + "·");
count++;
}
else {
} else {
sourceList.push_back(match + "·");
targetList.push_back(match + "·");
}
match.clear();
}
else
} else
match.push_back(s1[i]);
i++;
j++;
}
else if(type == 'd') {
} else if(type == 'd') {
if(s1[i] == '+')
source += "·";
else
source.push_back(s1[i]);
i++;
}
else if(type == 'i') {
} else if(type == 'i') {
if(s2[j] == '+')
target += "·";
else
@ -88,8 +85,7 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
sprintf((char*)temp.c_str(), "%d", count);
targetList.push_back("\\" + temp);
count++;
}
else {
} else {
sourceList.push_back(match);
targetList.push_back(match);
}
@ -111,15 +107,13 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
sprintf((char*)temp.c_str(), "%d", count);
targetList.push_back("\\"+ temp);
count++;
}
else {
} else {
sourceList.push_back(match);
targetList.push_back(match);
}
}
match.clear();
}
else {
} else {
std::string cs1 = s1;
std::string cs2 = s2;
boost::replace_all(cs1, "+", "·");
@ -139,17 +133,16 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
return out.str();
}
std::string CorrectionPattern::CreateSinglePattern(const Tokens &s1, const Tokens &s2) const {
std::string CorrectionPattern::CreateSinglePattern(const Tokens &s1, const Tokens &s2) const
{
std::stringstream out;
if(s1.empty()) {
out << "ins(«" << boost::join(s2, "·") << "»)";
return out.str();
}
else if(s2.empty()) {
} else if(s2.empty()) {
out << "del(«" << boost::join(s1, "·") << "»)";
return out.str();
}
else {
} else {
typename Tokens::value_type v1 = boost::join(s1, "+");
typename Tokens::value_type v2 = boost::join(s2, "+");
out << MakePair(v1, v2, m_general);
@ -163,7 +156,8 @@ std::vector<std::string> GetContext(size_t pos,
const InputType &input,
const InputPath &inputPath,
const std::vector<FactorType>& factorTypes,
bool isRight) {
bool isRight)
{
const Sentence& sentence = static_cast<const Sentence&>(input);
const Range& range = inputPath.GetWordsRange();
@ -179,8 +173,7 @@ std::vector<std::string> GetContext(size_t pos,
for(int i = 0; i < length; i++) {
if(leftPos - i >= 0) {
current.push_back(sentence.GetWord(leftPos - i).GetString(factorTypes, false));
}
else {
} else {
current.push_back("<s>");
}
}
@ -195,8 +188,7 @@ std::vector<std::string> GetContext(size_t pos,
for(int i = 0; i < length; i++) {
if(rightPos + i < (int)sentence.GetSize()) {
current.push_back(sentence.GetWord(rightPos + i).GetString(factorTypes, false));
}
else {
} else {
current.push_back("</s>");
}
}
@ -214,7 +206,8 @@ std::vector<std::string>
CorrectionPattern::CreatePattern(const Tokens &s1,
const Tokens &s2,
const InputType &input,
const InputPath &inputPath) const {
const InputPath &inputPath) const
{
Diffs diffs = CreateDiff(s1, s2);
size_t i = 0, j = 0;
@ -250,12 +243,10 @@ CorrectionPattern::CreatePattern(const Tokens &s1,
}
i++;
j++;
}
else if(type == 'd') {
} else if(type == 'd') {
source.push_back(s1[i]);
i++;
}
else if(type == 'i') {
} else if(type == 'i') {
target.push_back(s2[j]);
j++;
}

View File

@ -17,16 +17,15 @@ void CreateDiffRec(size_t** c,
size_t i,
size_t j,
Diffs& diffs,
Pred pred) {
Pred pred)
{
if(i > 0 && j > 0 && pred(s1[i - 1 + start], s2[j - 1 + start])) {
CreateDiffRec(c, s1, s2, start, i - 1, j - 1, diffs, pred);
diffs.push_back(Diff('m'));
}
else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
} else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
CreateDiffRec(c, s1, s2, start, i, j-1, diffs, pred);
diffs.push_back(Diff('i'));
}
else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
} else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
CreateDiffRec(c, s1, s2, start, i-1, j, diffs, pred);
diffs.push_back(Diff('d'));
}
@ -35,7 +34,8 @@ void CreateDiffRec(size_t** c,
template <class Sequence, class Pred>
Diffs CreateDiff(const Sequence& s1,
const Sequence& s2,
Pred pred) {
Pred pred)
{
Diffs diffs;
@ -84,12 +84,14 @@ Diffs CreateDiff(const Sequence& s1,
}
template <class Sequence>
Diffs CreateDiff(const Sequence& s1, const Sequence& s2) {
Diffs CreateDiff(const Sequence& s1, const Sequence& s2)
{
return CreateDiff(s1, s2, std::equal_to<typename Sequence::value_type>());
}
template <class Sequence, class Sig, class Stats>
void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats) {
void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats)
{
if(sig.size() != stats.size())
throw "Signature size differs from score array size.";
@ -109,18 +111,27 @@ void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& sta
k++;
}
j += k;
}
else if(diff[j] == 'i')
} else if(diff[j] == 'i')
i++;
}
for(size_t j = 0; j < sig.size(); ++j) {
switch (sig[j]) {
case 'l': stats[j] += d + i + s; break;
case 'm': stats[j] += m; break;
case 'd': stats[j] += d; break;
case 'i': stats[j] += i; break;
case 's': stats[j] += s; break;
case 'l':
stats[j] += d + i + s;
break;
case 'm':
stats[j] += m;
break;
case 'd':
stats[j] += d;
break;
case 'i':
stats[j] += i;
break;
case 's':
stats[j] += s;
break;
case 'r':
float macc = 1;
if (d + i + s + m)

View File

@ -21,7 +21,8 @@ namespace Moses
using namespace std;
std::string ParseScores(const std::string &line, const std::string& defaultScores) {
std::string ParseScores(const std::string &line, const std::string& defaultScores)
{
std::vector<std::string> toks = Tokenize(line);
UTIL_THROW_IF2(toks.empty(), "Empty line");
@ -84,8 +85,7 @@ void EditOps::ComputeFeatures(
std::string targetStr = target.GetStringRep(factors);
AddStats(sourceStr, targetStr, m_scores, ops);
}
else {
} else {
std::vector<std::string> sourceTokens;
//std::cerr << "Ed src: ";
for(size_t i = 0; i < source.GetSize(); ++i) {