mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-06 19:49:41 +03:00
Merge ../mosesdecoder into perf_moses2
This commit is contained in:
commit
b75ef6f619
21
mert/M2.cpp
21
mert/M2.cpp
@ -3,19 +3,21 @@
|
||||
|
||||
#include "M2.h"
|
||||
|
||||
namespace MosesTuning {
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
namespace M2 {
|
||||
namespace M2
|
||||
{
|
||||
|
||||
bool Annot::lowercase = true;
|
||||
|
||||
std::string Annot::transform(const std::string& e) {
|
||||
std::string Annot::transform(const std::string& e)
|
||||
{
|
||||
std::string temp = e;
|
||||
if(lowercase) {
|
||||
boost::erase_all(temp, " ");
|
||||
return ToLower(temp);
|
||||
}
|
||||
else
|
||||
} else
|
||||
return e;
|
||||
}
|
||||
|
||||
@ -27,7 +29,8 @@ const std::string ToLower(const std::string& str)
|
||||
}
|
||||
|
||||
|
||||
Edit operator+(Edit& e1, Edit& e2) {
|
||||
Edit operator+(Edit& e1, Edit& e2)
|
||||
{
|
||||
std::string edit;
|
||||
if(e1.edit.size() > 0 && e2.edit.size() > 0)
|
||||
edit = e1.edit + " " + e2.edit;
|
||||
@ -40,11 +43,13 @@ Edit operator+(Edit& e1, Edit& e2) {
|
||||
}
|
||||
|
||||
|
||||
Edge operator+(Edge e1, Edge e2) {
|
||||
Edge operator+(Edge e1, Edge e2)
|
||||
{
|
||||
return Edge(e1.v, e2.u, e1.edit + e2.edit);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& o, Sentence s) {
|
||||
std::ostream& operator<<(std::ostream& o, Sentence s)
|
||||
{
|
||||
for(Sentence::iterator it = s.begin(); it != s.end(); it++)
|
||||
o << *it << " ";
|
||||
return o;
|
||||
|
18
mert/M2.h
18
mert/M2.h
@ -16,9 +16,11 @@
|
||||
|
||||
|
||||
|
||||
namespace MosesTuning {
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
namespace M2 {
|
||||
namespace M2
|
||||
{
|
||||
|
||||
typedef std::vector<float> Stats;
|
||||
|
||||
@ -127,7 +129,8 @@ typedef std::vector<TrackRow> TrackMatrix;
|
||||
typedef std::set<Vertex> Vertices;
|
||||
typedef std::set<Edge> Edges;
|
||||
|
||||
class M2 {
|
||||
class M2
|
||||
{
|
||||
private:
|
||||
M2File m_m2;
|
||||
|
||||
@ -190,8 +193,7 @@ class M2 {
|
||||
|
||||
unit.third.insert(a.annotator);
|
||||
unit.second.insert(a);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
std::stringstream annotStr(annot[5]);
|
||||
size_t annotator;
|
||||
annotStr >> annotator;
|
||||
@ -345,8 +347,7 @@ class M2 {
|
||||
v = predecessor[v];
|
||||
if(v == source)
|
||||
break;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
std::cout << "Error" << std::endl;
|
||||
break;
|
||||
}
|
||||
@ -430,8 +431,7 @@ class M2 {
|
||||
stats[2] = 0;
|
||||
stats[3] = unit.first.size();
|
||||
return;
|
||||
}
|
||||
else if(distance > 0) {
|
||||
} else if(distance > 0) {
|
||||
for(size_t j = 0; j < unit.third.size(); j++) {
|
||||
BuildGraph(bt, Vs[j], Es[j]);
|
||||
AddWeights(Es[j], unit, j);
|
||||
|
@ -78,11 +78,13 @@ float M2Scorer::calculateScore(const vector<ScoreStatsType>& comps) const
|
||||
return f;
|
||||
}
|
||||
|
||||
float M2Scorer::getReferenceLength(const vector<ScoreStatsType>& comps) const {
|
||||
float M2Scorer::getReferenceLength(const vector<ScoreStatsType>& comps) const
|
||||
{
|
||||
return comps[3];
|
||||
}
|
||||
|
||||
std::vector<ScoreStatsType> randomStats(float decay, int max) {
|
||||
std::vector<ScoreStatsType> randomStats(float decay, int max)
|
||||
{
|
||||
int gold = rand() % max;
|
||||
int prop = rand() % max;
|
||||
int corr = 0.0;
|
||||
|
@ -23,7 +23,8 @@ namespace Moses
|
||||
|
||||
using namespace std;
|
||||
|
||||
std::string MakePair(const std::string &s1, const std::string &s2, bool general) {
|
||||
std::string MakePair(const std::string &s1, const std::string &s2, bool general)
|
||||
{
|
||||
std::vector<std::string> sourceList;
|
||||
std::vector<std::string> targetList;
|
||||
|
||||
@ -54,27 +55,23 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
|
||||
sprintf((char*)temp.c_str(), "%d", count);
|
||||
targetList.push_back("\\" + temp + "·");
|
||||
count++;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
sourceList.push_back(match + "·");
|
||||
targetList.push_back(match + "·");
|
||||
}
|
||||
match.clear();
|
||||
}
|
||||
else
|
||||
} else
|
||||
match.push_back(s1[i]);
|
||||
|
||||
i++;
|
||||
j++;
|
||||
}
|
||||
else if(type == 'd') {
|
||||
} else if(type == 'd') {
|
||||
if(s1[i] == '+')
|
||||
source += "·";
|
||||
else
|
||||
source.push_back(s1[i]);
|
||||
i++;
|
||||
}
|
||||
else if(type == 'i') {
|
||||
} else if(type == 'i') {
|
||||
if(s2[j] == '+')
|
||||
target += "·";
|
||||
else
|
||||
@ -88,8 +85,7 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
|
||||
sprintf((char*)temp.c_str(), "%d", count);
|
||||
targetList.push_back("\\" + temp);
|
||||
count++;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
sourceList.push_back(match);
|
||||
targetList.push_back(match);
|
||||
}
|
||||
@ -111,15 +107,13 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
|
||||
sprintf((char*)temp.c_str(), "%d", count);
|
||||
targetList.push_back("\\"+ temp);
|
||||
count++;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
sourceList.push_back(match);
|
||||
targetList.push_back(match);
|
||||
}
|
||||
}
|
||||
match.clear();
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
std::string cs1 = s1;
|
||||
std::string cs2 = s2;
|
||||
boost::replace_all(cs1, "+", "·");
|
||||
@ -139,17 +133,16 @@ std::string MakePair(const std::string &s1, const std::string &s2, bool general)
|
||||
return out.str();
|
||||
}
|
||||
|
||||
std::string CorrectionPattern::CreateSinglePattern(const Tokens &s1, const Tokens &s2) const {
|
||||
std::string CorrectionPattern::CreateSinglePattern(const Tokens &s1, const Tokens &s2) const
|
||||
{
|
||||
std::stringstream out;
|
||||
if(s1.empty()) {
|
||||
out << "ins(«" << boost::join(s2, "·") << "»)";
|
||||
return out.str();
|
||||
}
|
||||
else if(s2.empty()) {
|
||||
} else if(s2.empty()) {
|
||||
out << "del(«" << boost::join(s1, "·") << "»)";
|
||||
return out.str();
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
typename Tokens::value_type v1 = boost::join(s1, "+");
|
||||
typename Tokens::value_type v2 = boost::join(s2, "+");
|
||||
out << MakePair(v1, v2, m_general);
|
||||
@ -163,7 +156,8 @@ std::vector<std::string> GetContext(size_t pos,
|
||||
const InputType &input,
|
||||
const InputPath &inputPath,
|
||||
const std::vector<FactorType>& factorTypes,
|
||||
bool isRight) {
|
||||
bool isRight)
|
||||
{
|
||||
|
||||
const Sentence& sentence = static_cast<const Sentence&>(input);
|
||||
const Range& range = inputPath.GetWordsRange();
|
||||
@ -179,8 +173,7 @@ std::vector<std::string> GetContext(size_t pos,
|
||||
for(int i = 0; i < length; i++) {
|
||||
if(leftPos - i >= 0) {
|
||||
current.push_back(sentence.GetWord(leftPos - i).GetString(factorTypes, false));
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
current.push_back("<s>");
|
||||
}
|
||||
}
|
||||
@ -195,8 +188,7 @@ std::vector<std::string> GetContext(size_t pos,
|
||||
for(int i = 0; i < length; i++) {
|
||||
if(rightPos + i < (int)sentence.GetSize()) {
|
||||
current.push_back(sentence.GetWord(rightPos + i).GetString(factorTypes, false));
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
current.push_back("</s>");
|
||||
}
|
||||
}
|
||||
@ -214,7 +206,8 @@ std::vector<std::string>
|
||||
CorrectionPattern::CreatePattern(const Tokens &s1,
|
||||
const Tokens &s2,
|
||||
const InputType &input,
|
||||
const InputPath &inputPath) const {
|
||||
const InputPath &inputPath) const
|
||||
{
|
||||
|
||||
Diffs diffs = CreateDiff(s1, s2);
|
||||
size_t i = 0, j = 0;
|
||||
@ -250,12 +243,10 @@ CorrectionPattern::CreatePattern(const Tokens &s1,
|
||||
}
|
||||
i++;
|
||||
j++;
|
||||
}
|
||||
else if(type == 'd') {
|
||||
} else if(type == 'd') {
|
||||
source.push_back(s1[i]);
|
||||
i++;
|
||||
}
|
||||
else if(type == 'i') {
|
||||
} else if(type == 'i') {
|
||||
target.push_back(s2[j]);
|
||||
j++;
|
||||
}
|
||||
|
@ -17,16 +17,15 @@ void CreateDiffRec(size_t** c,
|
||||
size_t i,
|
||||
size_t j,
|
||||
Diffs& diffs,
|
||||
Pred pred) {
|
||||
Pred pred)
|
||||
{
|
||||
if(i > 0 && j > 0 && pred(s1[i - 1 + start], s2[j - 1 + start])) {
|
||||
CreateDiffRec(c, s1, s2, start, i - 1, j - 1, diffs, pred);
|
||||
diffs.push_back(Diff('m'));
|
||||
}
|
||||
else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
|
||||
} else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
|
||||
CreateDiffRec(c, s1, s2, start, i, j-1, diffs, pred);
|
||||
diffs.push_back(Diff('i'));
|
||||
}
|
||||
else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
|
||||
} else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
|
||||
CreateDiffRec(c, s1, s2, start, i-1, j, diffs, pred);
|
||||
diffs.push_back(Diff('d'));
|
||||
}
|
||||
@ -35,7 +34,8 @@ void CreateDiffRec(size_t** c,
|
||||
template <class Sequence, class Pred>
|
||||
Diffs CreateDiff(const Sequence& s1,
|
||||
const Sequence& s2,
|
||||
Pred pred) {
|
||||
Pred pred)
|
||||
{
|
||||
|
||||
Diffs diffs;
|
||||
|
||||
@ -84,12 +84,14 @@ Diffs CreateDiff(const Sequence& s1,
|
||||
}
|
||||
|
||||
template <class Sequence>
|
||||
Diffs CreateDiff(const Sequence& s1, const Sequence& s2) {
|
||||
Diffs CreateDiff(const Sequence& s1, const Sequence& s2)
|
||||
{
|
||||
return CreateDiff(s1, s2, std::equal_to<typename Sequence::value_type>());
|
||||
}
|
||||
|
||||
template <class Sequence, class Sig, class Stats>
|
||||
void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats) {
|
||||
void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats)
|
||||
{
|
||||
if(sig.size() != stats.size())
|
||||
throw "Signature size differs from score array size.";
|
||||
|
||||
@ -109,18 +111,27 @@ void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& sta
|
||||
k++;
|
||||
}
|
||||
j += k;
|
||||
}
|
||||
else if(diff[j] == 'i')
|
||||
} else if(diff[j] == 'i')
|
||||
i++;
|
||||
}
|
||||
|
||||
for(size_t j = 0; j < sig.size(); ++j) {
|
||||
switch (sig[j]) {
|
||||
case 'l': stats[j] += d + i + s; break;
|
||||
case 'm': stats[j] += m; break;
|
||||
case 'd': stats[j] += d; break;
|
||||
case 'i': stats[j] += i; break;
|
||||
case 's': stats[j] += s; break;
|
||||
case 'l':
|
||||
stats[j] += d + i + s;
|
||||
break;
|
||||
case 'm':
|
||||
stats[j] += m;
|
||||
break;
|
||||
case 'd':
|
||||
stats[j] += d;
|
||||
break;
|
||||
case 'i':
|
||||
stats[j] += i;
|
||||
break;
|
||||
case 's':
|
||||
stats[j] += s;
|
||||
break;
|
||||
case 'r':
|
||||
float macc = 1;
|
||||
if (d + i + s + m)
|
||||
|
@ -21,7 +21,8 @@ namespace Moses
|
||||
|
||||
using namespace std;
|
||||
|
||||
std::string ParseScores(const std::string &line, const std::string& defaultScores) {
|
||||
std::string ParseScores(const std::string &line, const std::string& defaultScores)
|
||||
{
|
||||
std::vector<std::string> toks = Tokenize(line);
|
||||
UTIL_THROW_IF2(toks.empty(), "Empty line");
|
||||
|
||||
@ -84,8 +85,7 @@ void EditOps::ComputeFeatures(
|
||||
std::string targetStr = target.GetStringRep(factors);
|
||||
|
||||
AddStats(sourceStr, targetStr, m_scores, ops);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
std::vector<std::string> sourceTokens;
|
||||
//std::cerr << "Ed src: ";
|
||||
for(size_t i = 0; i < source.GetSize(); ++i) {
|
||||
|
@ -1540,6 +1540,150 @@ analysis-precision
|
||||
rerun-on-change: precision-by-coverage-base
|
||||
final-model: yes
|
||||
|
||||
[QUALITY-ESTIMATION] single
|
||||
tokenize-input
|
||||
in: raw-input
|
||||
out: tokenized-input
|
||||
default-name: quality-estimation/input.tok
|
||||
pass-unless: input-tokenizer
|
||||
template: $input-tokenizer < IN > OUT
|
||||
tokenize-input-devtest
|
||||
in: raw-input-devtest
|
||||
out: tokenized-input-devtest
|
||||
default-name: quality-estimation/input.devtest.tok
|
||||
pass-unless: input-tokenizer
|
||||
template: $input-tokenizer < IN > OUT
|
||||
lowercase-input
|
||||
in: tokenized-input
|
||||
out: truecased-input
|
||||
default-name: quality-estimation/input.lc
|
||||
pass-unless: input-lowercaser
|
||||
ignore-if: input-truecaser
|
||||
template: $input-lowercaser < IN > OUT
|
||||
lowercase-input-devtest
|
||||
in: tokenized-input-devtest
|
||||
out: truecased-input-devtest
|
||||
default-name: quality-estimation/input.devtest.lc
|
||||
pass-unless: input-lowercaser
|
||||
ignore-if: input-truecaser
|
||||
template: $input-lowercaser < IN > OUT
|
||||
truecase-input
|
||||
in: tokenized-input TRUECASER:truecase-model
|
||||
out: truecased-input
|
||||
rerun-on-change: input-truecaser
|
||||
default-name: quality-estimation/input.tc
|
||||
ignore-unless: input-truecaser
|
||||
template: $input-truecaser -model IN1.$input-extension < IN > OUT
|
||||
truecase-input-devtest
|
||||
in: tokenized-input-devtest TRUECASER:truecase-model
|
||||
out: truecased-input-devtest
|
||||
rerun-on-change: input-truecaser
|
||||
ignore-unless: input-truecaser
|
||||
default-name: quality-estimation/input.devtest.tc
|
||||
template: $input-truecaser -model IN1.$input-extension < IN > OUT
|
||||
split-input
|
||||
in: truecased-input SPLITTER:splitter-model
|
||||
out: split-input
|
||||
rerun-on-change: input-splitter
|
||||
default-name: quality-estimation/input.split
|
||||
pass-unless: input-splitter
|
||||
template: $input-splitter -model IN1.$input-extension < IN > OUT
|
||||
split-input-devtest
|
||||
in: truecased-input-devtest SPLITTER:splitter-model
|
||||
out: split-input-devtest
|
||||
rerun-on-change: input-splitter
|
||||
default-name: quality-estimation/input.devtest.split
|
||||
pass-unless: input-splitter
|
||||
template: $input-splitter -model IN1.$input-extension < IN > OUT
|
||||
tokenize-reference
|
||||
in: raw-reference
|
||||
out: tokenized-reference
|
||||
default-name: quality-estimation/reference.tok
|
||||
pass-unless: output-tokenizer
|
||||
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
|
||||
template: $output-tokenizer < IN > OUT
|
||||
tokenize-reference-devtest
|
||||
in: raw-reference-devtest
|
||||
out: tokenized-reference-devtest
|
||||
default-name: quality-estimation/reference.devtest.tok
|
||||
pass-unless: output-tokenizer
|
||||
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
|
||||
template: $output-tokenizer < IN > OUT
|
||||
lowercase-reference
|
||||
in: tokenized-reference
|
||||
out: truecased-reference
|
||||
default-name: quality-estimation/reference.lc
|
||||
pass-unless: output-lowercaser
|
||||
ignore-if: output-truecaser
|
||||
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
|
||||
template: $output-lowercaser < IN > OUT
|
||||
lowercase-reference-devtest
|
||||
in: tokenized-reference-devtest
|
||||
out: truecased-reference-devtest
|
||||
default-name: quality-estimation/reference.devtest.lc
|
||||
pass-unless: output-lowercaser
|
||||
ignore-if: output-truecaser
|
||||
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
|
||||
template: $output-lowercaser < IN > OUT
|
||||
truecase-reference
|
||||
in: tokenized-reference TRUECASER:truecase-model
|
||||
out: truecased-reference
|
||||
rerun-on-change: output-truecaser
|
||||
default-name: quality-estimation/reference.tc
|
||||
ignore-unless: output-truecaser
|
||||
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
|
||||
template: $output-truecaser -model IN1.$output-extension < IN > OUT
|
||||
truecase-reference-devtest
|
||||
in: tokenized-reference-devtest TRUECASER:truecase-model
|
||||
out: truecased-reference-devtest
|
||||
rerun-on-change: output-truecaser
|
||||
default-name: quality-estimation/reference.devtest.tc
|
||||
ignore-unless: output-truecaser
|
||||
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
|
||||
template: $output-truecaser -model IN1.$output-extension < IN > OUT
|
||||
decode
|
||||
in: TUNING:config-with-reused-weights split-input
|
||||
out: rich-output
|
||||
default-name: quality-estimation/output
|
||||
template: $decoder -v 0 -tt -f IN < IN1 > OUT
|
||||
error: Translation was not performed correctly
|
||||
not-error: trans: No such file or directory
|
||||
decode-devtest
|
||||
in: TUNING:config-with-reused-weights split-input-devtest
|
||||
out: rich-output-devtest
|
||||
default-name: quality-estimation/output-devtest
|
||||
template: $decoder -v 0 -tt -f IN < IN1 > OUT
|
||||
error: Translation was not performed correctly
|
||||
not-error: trans: No such file or directory
|
||||
remove-markup
|
||||
in: rich-output
|
||||
out: cleaned-output
|
||||
default-name: quality-estimation/tokenized-output
|
||||
template: $moses-script-dir/ems/support/remove-segmentation-markup.perl < IN > OUT
|
||||
remove-markup-devtest
|
||||
in: rich-output-devtest
|
||||
out: cleaned-output-devtest
|
||||
default-name: quality-estimation/tokenized-output-devtest
|
||||
template: $moses-script-dir/ems/support/remove-segmentation-markup.perl < IN > OUT
|
||||
score-output
|
||||
in: cleaned-output truecased-reference
|
||||
out: scored-output
|
||||
default-name: quality-estimation/output-scored
|
||||
tmp-name: quality-estimation/ter
|
||||
template: mkdir TMP ; $moses-script-dir/ems/support/ter.perl $tercom IN IN1 TMP > OUT
|
||||
score-output-devtest
|
||||
in: cleaned-output-devtest truecased-reference-devtest
|
||||
out: scored-output-devtest
|
||||
default-name: quality-estimation/output-scored-devtest
|
||||
tmp-name: quality-estimation/ter-devtest
|
||||
template: mkdir TMP ; $moses-script-dir/ems/support/ter.perl $tercom IN IN1 TMP > OUT
|
||||
train
|
||||
in: input rich-output scored-output input-devtest rich-output-devtest scored-output-devtest
|
||||
out: quality-estimation-model
|
||||
default-name: quality-estimation/model
|
||||
template: $trainer --train-rich IN1 --train-ter IN2 --eval-rich IN4 --eval-ter IN5 --model OUT
|
||||
final-model: yes
|
||||
|
||||
[REPORTING] single
|
||||
report
|
||||
in: EVALUATION:nist-bleu-score EVALUATION:nist-bleu-c-score EVALUATION:bolt-bleu-score EVALUATION:bolt-bleu-c-score EVALUATION:multi-bleu-score EVALUATION:multi-bleu-c-score EVALUATION:multi-bleu-detok-score EVALUATION:multi-bleu-c-detok-score EVALUATION:meteor-score EVALUATION:ter-score EVALUATION:wer-score EVALUATION:ibm-bleu-score EVALUATION:ibm-bleu-c-score EVALUATION:analysis EVALUATION:analysis-coverage EVALUATION:analysis-prec TRAINING:biconcor-model EVALUATION:wade-analysis
|
||||
|
42
scripts/ems/support/create-xml.perl
Executable file
42
scripts/ems/support/create-xml.perl
Executable file
@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env perl
|
||||
#
|
||||
# This file is part of moses. Its use is licensed under the GNU Lesser General
|
||||
# Public License version 2.1 or, at your option, any later version.
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my ($type) = @ARGV;
|
||||
if ($type =~ /^s/i) {
|
||||
print "<srcset setid=\"test\" srclang=\"any\">\n";
|
||||
print "<doc docid=\"doc\">\n";
|
||||
}
|
||||
elsif ($type =~ /^t/i) {
|
||||
print "<tstset setid=\"test\" tgtlang=\"any\" srclang=\"any\">\n";
|
||||
print "<doc sysid=\"moses\" docid=\"doc\">\n";
|
||||
}
|
||||
elsif ($type =~ /^r/i) {
|
||||
print "<refset setid=\"test\" tgtlang=\"any\" srclang=\"any\">\n";
|
||||
print "<doc sysid=\"ref\" docid=\"doc\">\n";
|
||||
}
|
||||
else {
|
||||
die("ERROR: specify source / target / ref");
|
||||
}
|
||||
|
||||
my $i = 0;
|
||||
while(<STDIN>) {
|
||||
chomp;
|
||||
print "<seg id=\"".(++$i)."\">$_</seg>\n";
|
||||
}
|
||||
|
||||
print "</doc>\n";
|
||||
|
||||
if ($type =~ /^s/i) {
|
||||
print "</srcset>\n";
|
||||
}
|
||||
elsif ($type =~ /^t/i) {
|
||||
print "</tstset>\n";
|
||||
}
|
||||
elsif ($type =~ /^r/i) {
|
||||
print "</refset>\n";
|
||||
}
|
@ -9,7 +9,16 @@ use strict;
|
||||
$|++;
|
||||
|
||||
while(<STDIN>) {
|
||||
s/ \|\d+\-\d+\| / /g;
|
||||
s/ \|\d+\-\d+\|$//;
|
||||
print $_;
|
||||
chop;
|
||||
s/\|[^\|]+\|//g;
|
||||
s/\s+/ /g;
|
||||
s/^ //;
|
||||
s/ $//;
|
||||
print $_."\n";
|
||||
}
|
||||
|
||||
#while(<STDIN>) {
|
||||
# s/ \|\d+\-\d+\| / /g;
|
||||
# s/ \|\d+\-\d+\|$//;
|
||||
# print $_;
|
||||
#}
|
||||
|
15
scripts/ems/support/ter.perl
Normal file
15
scripts/ems/support/ter.perl
Normal file
@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env perl
|
||||
#
|
||||
# This file is part of moses. Its use is licensed under the GNU Lesser General
|
||||
# Public License version 2.1 or, at your option, any later version.
|
||||
|
||||
use strict;
|
||||
use FindBin qw($RealBin);
|
||||
|
||||
my ($jar, $hyp,$ref,$tmp) = @ARGV;
|
||||
`mkdir -p $tmp`;
|
||||
`$RealBin/create-xml.perl test < $hyp > $tmp/hyp`;
|
||||
`$RealBin/create-xml.perl ref < $ref > $tmp/ref`;
|
||||
`java -jar $jar -h $tmp/hyp -r $tmp/ref -o ter -n $tmp/out`;
|
||||
print `cat $tmp/out.ter`;
|
||||
|
Loading…
Reference in New Issue
Block a user