mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-29 06:52:34 +03:00
add --MultiLabel
This commit is contained in:
parent
e389b8f51d
commit
b556cdc464
@ -54,6 +54,8 @@ void AlignedSentence::PopulateAlignment(const std::string &line)
|
||||
int sourcePos = alignPair[0];
|
||||
int targetPos = alignPair[1];
|
||||
|
||||
cerr << "m_source=" << m_source.size() << endl;
|
||||
|
||||
assert(sourcePos < m_source.size());
|
||||
assert(targetPos < m_target.size());
|
||||
Word *sourceWord = m_source[sourcePos];
|
||||
|
@ -32,7 +32,7 @@ void AlignedSentenceSyntax::Populate(bool isSyntax, int mixedSyntaxType, const P
|
||||
// parse source and target string
|
||||
if (isSyntax) {
|
||||
line = "<xml><tree label=\"X\">" + line + "</tree></xml>";
|
||||
XMLParse(phrase, tree, line);
|
||||
XMLParse(phrase, tree, line, params);
|
||||
|
||||
if (mixedSyntaxType != 0) {
|
||||
// mixed syntax. Always add [X] where there isn't 1
|
||||
@ -72,7 +72,10 @@ void Escape(string &text)
|
||||
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::XMLParse(Phrase &output, SyntaxTree &tree, const pugi::xml_node &parentNode)
|
||||
void AlignedSentenceSyntax::XMLParse(Phrase &output,
|
||||
SyntaxTree &tree,
|
||||
const pugi::xml_node &parentNode,
|
||||
const Parameter ¶ms)
|
||||
{
|
||||
int childNum = 0;
|
||||
for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling())
|
||||
@ -88,7 +91,7 @@ void AlignedSentenceSyntax::XMLParse(Phrase &output, SyntaxTree &tree, const pug
|
||||
label = attribute.as_string();
|
||||
|
||||
// recursively call this function. For proper recursive trees
|
||||
XMLParse(output, tree, childNode);
|
||||
XMLParse(output, tree, childNode, params);
|
||||
}
|
||||
|
||||
|
||||
@ -113,7 +116,8 @@ void AlignedSentenceSyntax::XMLParse(Phrase &output, SyntaxTree &tree, const pug
|
||||
// fill syntax labels
|
||||
if (!label.empty()) {
|
||||
label = "[" + label + "]";
|
||||
tree.Add(startPos, endPos, label);
|
||||
cerr << "add " << label << " to " << "[" << startPos << "-" << endPos << "]" << endl;
|
||||
tree.Add(startPos, endPos, label, params);
|
||||
}
|
||||
|
||||
++childNum;
|
||||
@ -121,14 +125,17 @@ void AlignedSentenceSyntax::XMLParse(Phrase &output, SyntaxTree &tree, const pug
|
||||
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::XMLParse(Phrase &output, SyntaxTree &tree, const std::string input)
|
||||
void AlignedSentenceSyntax::XMLParse(Phrase &output,
|
||||
SyntaxTree &tree,
|
||||
const std::string input,
|
||||
const Parameter ¶ms)
|
||||
{
|
||||
pugi::xml_document doc;
|
||||
pugi::xml_parse_result result = doc.load(input.c_str(),
|
||||
pugi::parse_default | pugi::parse_comments);
|
||||
|
||||
pugi::xml_node topNode = doc.child("xml");
|
||||
XMLParse(output, tree, topNode);
|
||||
XMLParse(output, tree, topNode, params);
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::CreateNonTerms()
|
||||
|
@ -26,8 +26,14 @@ protected:
|
||||
std::string m_sourceStr, m_targetStr, m_alignmentStr;
|
||||
SyntaxTree m_sourceTree, m_targetTree;
|
||||
|
||||
void XMLParse(Phrase &output, SyntaxTree &tree, const std::string input);
|
||||
void XMLParse(Phrase &output, SyntaxTree &tree, const pugi::xml_node &parentNode);
|
||||
void XMLParse(Phrase &output,
|
||||
SyntaxTree &tree,
|
||||
const std::string input,
|
||||
const Parameter ¶ms);
|
||||
void XMLParse(Phrase &output,
|
||||
SyntaxTree &tree,
|
||||
const pugi::xml_node &parentNode,
|
||||
const Parameter ¶ms);
|
||||
void CreateNonTerms();
|
||||
void CreateNonTerms(ConsistentPhrase &cp,
|
||||
const SyntaxTree::Labels &sourceLabels,
|
||||
|
@ -33,7 +33,8 @@ int main(int argc, char** argv)
|
||||
|
||||
("SourceSyntax", "Source sentence is a parse tree")
|
||||
("TargetSyntax", "Target sentence is a parse tree")
|
||||
("MixedSyntaxType", po::value<int>()->default_value(params.mixedSyntaxType), "Hieu's Mixed syntax type. 0(default)=no mixed syntax, 1=add [X] only if no syntactic label. 2=add [X] everywhere");
|
||||
("MixedSyntaxType", po::value<int>()->default_value(params.mixedSyntaxType), "Hieu's Mixed syntax type. 0(default)=no mixed syntax, 1=add [X] only if no syntactic label. 2=add [X] everywhere")
|
||||
("MultiLabel", po::value<int>()->default_value(params.multiLabel), "What to do with multiple labels on the same span. 0(default)=keep them all, 1=keep only top-most, 2=keep only bottom-most");
|
||||
|
||||
po::variables_map vm;
|
||||
try
|
||||
@ -70,6 +71,7 @@ int main(int argc, char** argv)
|
||||
if (vm.count("SourceSyntax")) params.sourceSyntax = true;
|
||||
if (vm.count("TargetSyntax")) params.targetSyntax = true;
|
||||
if (vm.count("MixedSyntaxType")) params.mixedSyntaxType = vm["MixedSyntaxType"].as<int>();
|
||||
if (vm.count("MultiLabel")) params.multiLabel = vm["MultiLabel"].as<int>();
|
||||
|
||||
// input files;
|
||||
string pathTarget = argv[1];
|
||||
@ -91,8 +93,11 @@ int main(int argc, char** argv)
|
||||
|
||||
|
||||
// MAIN LOOP
|
||||
int lineNum = 1;
|
||||
string lineTarget, lineSource, lineAlignment;
|
||||
while (getline(strmTarget, lineTarget)) {
|
||||
cerr << lineNum << " ";
|
||||
|
||||
bool success;
|
||||
success = getline(strmSource, lineSource);
|
||||
if (!success) {
|
||||
@ -130,6 +135,8 @@ int main(int argc, char** argv)
|
||||
rules.Output(extractInvFile, false);
|
||||
|
||||
delete alignedSentence;
|
||||
|
||||
++lineNum;
|
||||
}
|
||||
|
||||
if (!params.gluePath.empty()) {
|
||||
|
@ -24,6 +24,7 @@ Parameter::Parameter()
|
||||
,targetSyntax(false)
|
||||
|
||||
,mixedSyntaxType(0)
|
||||
,multiLabel(0)
|
||||
,nonTermConsecSourceMixed(true)
|
||||
{}
|
||||
|
||||
|
@ -33,7 +33,7 @@ public:
|
||||
|
||||
bool sourceSyntax, targetSyntax;
|
||||
|
||||
int mixedSyntaxType;
|
||||
int mixedSyntaxType, multiLabel;
|
||||
bool nonTermConsecSourceMixed;
|
||||
|
||||
};
|
||||
|
@ -1,10 +1,28 @@
|
||||
#include <cassert>
|
||||
#include "SyntaxTree.h"
|
||||
#include "Parameter.h"
|
||||
|
||||
void SyntaxTree::Add(int startPos, int endPos, const std::string &label)
|
||||
void SyntaxTree::Add(int startPos, int endPos, const std::string &label, const Parameter ¶ms)
|
||||
{
|
||||
Range range(startPos, endPos);
|
||||
Labels &labels = m_coll[range];
|
||||
labels.push_back(label);
|
||||
|
||||
bool add = true;
|
||||
if (labels.size()) {
|
||||
if (params.multiLabel == 1) {
|
||||
// delete the label in collection and add new
|
||||
assert(labels.size() == 1);
|
||||
labels.clear();
|
||||
}
|
||||
else if (params.multiLabel == 2) {
|
||||
// ignore this label
|
||||
add = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (add) {
|
||||
labels.push_back(label);
|
||||
}
|
||||
}
|
||||
|
||||
void SyntaxTree::AddToAll(const std::string &label)
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
class Parameter;
|
||||
|
||||
class SyntaxTree
|
||||
{
|
||||
public:
|
||||
@ -11,7 +13,7 @@ public:
|
||||
typedef std::vector<std::string> Labels;
|
||||
typedef std::map<Range, Labels> Coll;
|
||||
|
||||
void Add(int startPos, int endPos, const std::string &label);
|
||||
void Add(int startPos, int endPos, const std::string &label, const Parameter ¶ms);
|
||||
void AddToAll(const std::string &label);
|
||||
|
||||
const Labels &Find(int startPos, int endPos) const;
|
||||
|
Loading…
Reference in New Issue
Block a user