daily automatic beautifier

This commit is contained in:
MosesAdmin 2016-06-02 00:00:39 +01:00
parent 2ad1eacef7
commit ea306f62b7
9 changed files with 606 additions and 597 deletions

View File

@ -3,19 +3,21 @@
#include "M2.h" #include "M2.h"
namespace MosesTuning { namespace MosesTuning
{
namespace M2 { namespace M2
{
bool Annot::lowercase = true; bool Annot::lowercase = true;
std::string Annot::transform(const std::string& e) { std::string Annot::transform(const std::string& e)
{
std::string temp = e; std::string temp = e;
if(lowercase) { if(lowercase) {
boost::erase_all(temp, " "); boost::erase_all(temp, " ");
return ToLower(temp); return ToLower(temp);
} } else
else
return e; return e;
} }
@ -27,7 +29,8 @@ const std::string ToLower(const std::string& str)
} }
Edit operator+(Edit& e1, Edit& e2) { Edit operator+(Edit& e1, Edit& e2)
{
std::string edit; std::string edit;
if(e1.edit.size() > 0 && e2.edit.size() > 0) if(e1.edit.size() > 0 && e2.edit.size() > 0)
edit = e1.edit + " " + e2.edit; edit = e1.edit + " " + e2.edit;
@ -40,11 +43,13 @@ Edit operator+(Edit& e1, Edit& e2) {
} }
Edge operator+(Edge e1, Edge e2) { Edge operator+(Edge e1, Edge e2)
{
return Edge(e1.v, e2.u, e1.edit + e2.edit); return Edge(e1.v, e2.u, e1.edit + e2.edit);
} }
std::ostream& operator<<(std::ostream& o, Sentence s) { std::ostream& operator<<(std::ostream& o, Sentence s)
{
for(Sentence::iterator it = s.begin(); it != s.end(); it++) for(Sentence::iterator it = s.begin(); it != s.end(); it++)
o << *it << " "; o << *it << " ";
return o; return o;

View File

@ -16,9 +16,11 @@
namespace MosesTuning { namespace MosesTuning
{
namespace M2 { namespace M2
{
typedef std::vector<float> Stats; typedef std::vector<float> Stats;
@ -127,7 +129,8 @@ typedef std::vector<TrackRow> TrackMatrix;
typedef std::set<Vertex> Vertices; typedef std::set<Vertex> Vertices;
typedef std::set<Edge> Edges; typedef std::set<Edge> Edges;
class M2 { class M2
{
private: private:
M2File m_m2; M2File m_m2;
@ -190,8 +193,7 @@ class M2 {
unit.third.insert(a.annotator); unit.third.insert(a.annotator);
unit.second.insert(a); unit.second.insert(a);
} } else {
else {
std::stringstream annotStr(annot[5]); std::stringstream annotStr(annot[5]);
size_t annotator; size_t annotator;
annotStr >> annotator; annotStr >> annotator;
@ -345,8 +347,7 @@ class M2 {
v = predecessor[v]; v = predecessor[v];
if(v == source) if(v == source)
break; break;
} } else {
else {
std::cout << "Error" << std::endl; std::cout << "Error" << std::endl;
break; break;
} }
@ -430,8 +431,7 @@ class M2 {
stats[2] = 0; stats[2] = 0;
stats[3] = unit.first.size(); stats[3] = unit.first.size();
return; return;
} } else if(distance > 0) {
else if(distance > 0) {
for(size_t j = 0; j < unit.third.size(); j++) { for(size_t j = 0; j < unit.third.size(); j++) {
BuildGraph(bt, Vs[j], Es[j]); BuildGraph(bt, Vs[j], Es[j]);
AddWeights(Es[j], unit, j); AddWeights(Es[j], unit, j);

View File

@ -78,11 +78,13 @@ float M2Scorer::calculateScore(const vector<ScoreStatsType>& comps) const
return f; return f;
} }
float M2Scorer::getReferenceLength(const vector<ScoreStatsType>& comps) const { float M2Scorer::getReferenceLength(const vector<ScoreStatsType>& comps) const
{
return comps[3]; return comps[3];
} }
std::vector<ScoreStatsType> randomStats(float decay, int max) { std::vector<ScoreStatsType> randomStats(float decay, int max)
{
int gold = rand() % max; int gold = rand() % max;
int prop = rand() % max; int prop = rand() % max;
int corr = 0.0; int corr = 0.0;

View File

@ -23,7 +23,8 @@ namespace Moses
using namespace std; using namespace std;
std::string MakePair(const std::string &s1, const std::string &s2, bool general) { std::string MakePair(const std::string &s1, const std::string &s2, bool general)
{
std::vector<std::string> sourceList; std::vector<std::string> sourceList;
std::vector<std::string> targetList; std::vector<std::string> targetList;
@ -54,27 +55,23 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
sprintf((char*)temp.c_str(), "%d", count); sprintf((char*)temp.c_str(), "%d", count);
targetList.push_back("\\" + temp + "·"); targetList.push_back("\\" + temp + "·");
count++; count++;
} } else {
else {
sourceList.push_back(match + "·"); sourceList.push_back(match + "·");
targetList.push_back(match + "·"); targetList.push_back(match + "·");
} }
match.clear(); match.clear();
} } else
else
match.push_back(s1[i]); match.push_back(s1[i]);
i++; i++;
j++; j++;
} } else if(type == 'd') {
else if(type == 'd') {
if(s1[i] == '+') if(s1[i] == '+')
source += "·"; source += "·";
else else
source.push_back(s1[i]); source.push_back(s1[i]);
i++; i++;
} } else if(type == 'i') {
else if(type == 'i') {
if(s2[j] == '+') if(s2[j] == '+')
target += "·"; target += "·";
else else
@ -88,8 +85,7 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
sprintf((char*)temp.c_str(), "%d", count); sprintf((char*)temp.c_str(), "%d", count);
targetList.push_back("\\" + temp); targetList.push_back("\\" + temp);
count++; count++;
} } else {
else {
sourceList.push_back(match); sourceList.push_back(match);
targetList.push_back(match); targetList.push_back(match);
} }
@ -111,15 +107,13 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
sprintf((char*)temp.c_str(), "%d", count); sprintf((char*)temp.c_str(), "%d", count);
targetList.push_back("\\"+ temp); targetList.push_back("\\"+ temp);
count++; count++;
} } else {
else {
sourceList.push_back(match); sourceList.push_back(match);
targetList.push_back(match); targetList.push_back(match);
} }
} }
match.clear(); match.clear();
} } else {
else {
std::string cs1 = s1; std::string cs1 = s1;
std::string cs2 = s2; std::string cs2 = s2;
boost::replace_all(cs1, "+", "·"); boost::replace_all(cs1, "+", "·");
@ -139,17 +133,16 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
return out.str(); return out.str();
} }
std::string CorrectionPattern::CreateSinglePattern(const Tokens &s1, const Tokens &s2) const { std::string CorrectionPattern::CreateSinglePattern(const Tokens &s1, const Tokens &s2) const
{
std::stringstream out; std::stringstream out;
if(s1.empty()) { if(s1.empty()) {
out << "ins(«" << boost::join(s2, "·") << "»)"; out << "ins(«" << boost::join(s2, "·") << "»)";
return out.str(); return out.str();
} } else if(s2.empty()) {
else if(s2.empty()) {
out << "del(«" << boost::join(s1, "·") << "»)"; out << "del(«" << boost::join(s1, "·") << "»)";
return out.str(); return out.str();
} } else {
else {
typename Tokens::value_type v1 = boost::join(s1, "+"); typename Tokens::value_type v1 = boost::join(s1, "+");
typename Tokens::value_type v2 = boost::join(s2, "+"); typename Tokens::value_type v2 = boost::join(s2, "+");
out << MakePair(v1, v2, m_general); out << MakePair(v1, v2, m_general);
@ -163,7 +156,8 @@ std::vector<std::string> GetContext(size_t pos,
const InputType &input, const InputType &input,
const InputPath &inputPath, const InputPath &inputPath,
const std::vector<FactorType>& factorTypes, const std::vector<FactorType>& factorTypes,
bool isRight) { bool isRight)
{
const Sentence& sentence = static_cast<const Sentence&>(input); const Sentence& sentence = static_cast<const Sentence&>(input);
const Range& range = inputPath.GetWordsRange(); const Range& range = inputPath.GetWordsRange();
@ -179,8 +173,7 @@ std::vector<std::string> GetContext(size_t pos,
for(int i = 0; i < length; i++) { for(int i = 0; i < length; i++) {
if(leftPos - i >= 0) { if(leftPos - i >= 0) {
current.push_back(sentence.GetWord(leftPos - i).GetString(factorTypes, false)); current.push_back(sentence.GetWord(leftPos - i).GetString(factorTypes, false));
} } else {
else {
current.push_back("<s>"); current.push_back("<s>");
} }
} }
@ -195,8 +188,7 @@ std::vector<std::string> GetContext(size_t pos,
for(int i = 0; i < length; i++) { for(int i = 0; i < length; i++) {
if(rightPos + i < (int)sentence.GetSize()) { if(rightPos + i < (int)sentence.GetSize()) {
current.push_back(sentence.GetWord(rightPos + i).GetString(factorTypes, false)); current.push_back(sentence.GetWord(rightPos + i).GetString(factorTypes, false));
} } else {
else {
current.push_back("</s>"); current.push_back("</s>");
} }
} }
@ -214,7 +206,8 @@ std::vector<std::string>
CorrectionPattern::CreatePattern(const Tokens &s1, CorrectionPattern::CreatePattern(const Tokens &s1,
const Tokens &s2, const Tokens &s2,
const InputType &input, const InputType &input,
const InputPath &inputPath) const { const InputPath &inputPath) const
{
Diffs diffs = CreateDiff(s1, s2); Diffs diffs = CreateDiff(s1, s2);
size_t i = 0, j = 0; size_t i = 0, j = 0;
@ -250,12 +243,10 @@ CorrectionPattern::CreatePattern(const Tokens &s1,
} }
i++; i++;
j++; j++;
} } else if(type == 'd') {
else if(type == 'd') {
source.push_back(s1[i]); source.push_back(s1[i]);
i++; i++;
} } else if(type == 'i') {
else if(type == 'i') {
target.push_back(s2[j]); target.push_back(s2[j]);
j++; j++;
} }

View File

@ -17,16 +17,15 @@ void CreateDiffRec(size_t** c,
size_t i, size_t i,
size_t j, size_t j,
Diffs& diffs, Diffs& diffs,
Pred pred) { Pred pred)
{
if(i > 0 && j > 0 && pred(s1[i - 1 + start], s2[j - 1 + start])) { if(i > 0 && j > 0 && pred(s1[i - 1 + start], s2[j - 1 + start])) {
CreateDiffRec(c, s1, s2, start, i - 1, j - 1, diffs, pred); CreateDiffRec(c, s1, s2, start, i - 1, j - 1, diffs, pred);
diffs.push_back(Diff('m')); diffs.push_back(Diff('m'));
} } else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
CreateDiffRec(c, s1, s2, start, i, j-1, diffs, pred); CreateDiffRec(c, s1, s2, start, i, j-1, diffs, pred);
diffs.push_back(Diff('i')); diffs.push_back(Diff('i'));
} } else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
CreateDiffRec(c, s1, s2, start, i-1, j, diffs, pred); CreateDiffRec(c, s1, s2, start, i-1, j, diffs, pred);
diffs.push_back(Diff('d')); diffs.push_back(Diff('d'));
} }
@ -35,7 +34,8 @@ void CreateDiffRec(size_t** c,
template <class Sequence, class Pred> template <class Sequence, class Pred>
Diffs CreateDiff(const Sequence& s1, Diffs CreateDiff(const Sequence& s1,
const Sequence& s2, const Sequence& s2,
Pred pred) { Pred pred)
{
Diffs diffs; Diffs diffs;
@ -84,12 +84,14 @@ Diffs CreateDiff(const Sequence& s1,
} }
template <class Sequence> template <class Sequence>
Diffs CreateDiff(const Sequence& s1, const Sequence& s2) { Diffs CreateDiff(const Sequence& s1, const Sequence& s2)
{
return CreateDiff(s1, s2, std::equal_to<typename Sequence::value_type>()); return CreateDiff(s1, s2, std::equal_to<typename Sequence::value_type>());
} }
template <class Sequence, class Sig, class Stats> template <class Sequence, class Sig, class Stats>
void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats) { void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats)
{
if(sig.size() != stats.size()) if(sig.size() != stats.size())
throw "Signature size differs from score array size."; throw "Signature size differs from score array size.";
@ -109,18 +111,27 @@ void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& sta
k++; k++;
} }
j += k; j += k;
} } else if(diff[j] == 'i')
else if(diff[j] == 'i')
i++; i++;
} }
for(size_t j = 0; j < sig.size(); ++j) { for(size_t j = 0; j < sig.size(); ++j) {
switch (sig[j]) { switch (sig[j]) {
case 'l': stats[j] += d + i + s; break; case 'l':
case 'm': stats[j] += m; break; stats[j] += d + i + s;
case 'd': stats[j] += d; break; break;
case 'i': stats[j] += i; break; case 'm':
case 's': stats[j] += s; break; stats[j] += m;
break;
case 'd':
stats[j] += d;
break;
case 'i':
stats[j] += i;
break;
case 's':
stats[j] += s;
break;
case 'r': case 'r':
float macc = 1; float macc = 1;
if (d + i + s + m) if (d + i + s + m)

View File

@ -21,7 +21,8 @@ namespace Moses
using namespace std; using namespace std;
std::string ParseScores(const std::string &line, const std::string& defaultScores) { std::string ParseScores(const std::string &line, const std::string& defaultScores)
{
std::vector<std::string> toks = Tokenize(line); std::vector<std::string> toks = Tokenize(line);
UTIL_THROW_IF2(toks.empty(), "Empty line"); UTIL_THROW_IF2(toks.empty(), "Empty line");
@ -84,8 +85,7 @@ void EditOps::ComputeFeatures(
std::string targetStr = target.GetStringRep(factors); std::string targetStr = target.GetStringRep(factors);
AddStats(sourceStr, targetStr, m_scores, ops); AddStats(sourceStr, targetStr, m_scores, ops);
} } else {
else {
std::vector<std::string> sourceTokens; std::vector<std::string> sourceTokens;
//std::cerr << "Ed src: "; //std::cerr << "Ed src: ";
for(size_t i = 0; i < source.GetSize(); ++i) { for(size_t i = 0; i < source.GetSize(); ++i) {