mixed syntax type

This commit is contained in:
Hieu Hoang 2014-02-27 21:35:29 +00:00
parent 6ea84f2f13
commit 303c9cfa36
7 changed files with 21 additions and 12 deletions

View File

@ -95,7 +95,7 @@ std::vector<int> AlignedSentence::GetSourceAlignmentCount() const
void AlignedSentence::Create(const Parameter &params)
{
CreateConsistentPhrases(params);
m_consistentPhrases.AddHieroNonTerms(false);
m_consistentPhrases.AddHieroNonTerms();
}
void AlignedSentence::CreateConsistentPhrases(const Parameter &params)

View File

@ -28,24 +28,33 @@ AlignedSentenceSyntax::~AlignedSentenceSyntax() {
void AlignedSentenceSyntax::Create(const Parameter &params)
{
m_sourceTree.SetDefaultLabel("[X]");
m_targetTree.SetDefaultLabel("[X]");
// parse source and target string
if (params.sourceSyntax) {
m_sourceStr = "<xml>" + m_sourceStr + "</xml>";
XMLParse(m_source, m_sourceTree, m_sourceStr, params);
if (params.mixedSyntaxType != 0) {
// mixed syntax. Always add [X] where there isn't 1
m_sourceTree.SetDefaultLabel("[X]");
}
}
else {
PopulateWordVec(m_source, m_sourceStr);
m_sourceTree.SetDefaultLabel("[X]");
}
if (params.targetSyntax) {
m_targetStr = "<xml>" + m_targetStr + "</xml>";
XMLParse(m_target, m_targetTree, m_targetStr, params);
if (params.mixedSyntaxType != 0) {
// mixed syntax. Always add [X] where there isn't 1
m_targetTree.SetDefaultLabel("[X]");
}
}
else {
PopulateWordVec(m_target, m_targetStr);
m_targetTree.SetDefaultLabel("[X]");
}
PopulateAlignment(m_alignmentStr);

View File

@ -84,7 +84,7 @@ std::string ConsistentPhrases::Debug() const
return out.str();
}
void ConsistentPhrases::AddHieroNonTerms(bool onlyIfNoNonTerm)
void ConsistentPhrases::AddHieroNonTerms()
{
// add [X] labels everywhere
for (int i = 0; i < m_coll.size(); ++i) {
@ -94,10 +94,6 @@ void ConsistentPhrases::AddHieroNonTerms(bool onlyIfNoNonTerm)
ConsistentPhrases::Coll::iterator iter;
for (iter = coll.begin(); iter != coll.end(); ++iter) {
ConsistentPhrase &cp = **iter;
if (onlyIfNoNonTerm && cp.GetNonTerms().size()) {
continue;
}
cp.AddNonTerms("[X]", "[X]");
}
}

View File

@ -25,7 +25,7 @@ public:
void Add(int sourceStart, int sourceEnd,
int targetStart, int targetEnd);
void AddHieroNonTerms(bool onlyIfNoNonTerm);
void AddHieroNonTerms();
const Coll &GetColl(int sourceStart, int sourceEnd) const;
Coll &GetColl(int sourceStart, int sourceEnd);

View File

@ -25,7 +25,8 @@ int main(int argc, char** argv)
("help", "Print help messages")
("MaxSpan", po::value<int>()->default_value(params.maxSpan), "Max (source) span of a rule. ie. number of words in the source")
("SourceSyntax", "Source sentence is a parse tree")
("TargetSyntax", "Target sentence is a parse tree");
("TargetSyntax", "Target sentence is a parse tree")
("MixedSyntaxType", po::value<int>()->default_value(params.mixedSyntaxType), "Hieu's Mixed syntax type. 0(default)=no mixed syntax, 1=add [X] only if no syntactic label. 2=add [X] everywhere");
po::variables_map vm;
try
@ -52,9 +53,10 @@ int main(int argc, char** argv)
return EXIT_FAILURE;
}
if (vm.count("maxSpan")) params.maxSpan = vm["maxSpan"].as<int>();
if (vm.count("MaxSpan")) params.maxSpan = vm["MaxSpan"].as<int>();
if (vm.count("SourceSyntax")) params.sourceSyntax = true;
if (vm.count("TargetSyntax")) params.targetSyntax = true;
if (vm.count("MixedSyntaxType")) params.mixedSyntaxType = vm["MixedSyntaxType"].as<int>();
// input files;
string pathTarget = argv[1];

View File

@ -18,6 +18,7 @@ Parameter::Parameter()
,sourceSyntax(false)
,targetSyntax(false)
,mixedSyntaxType(0)
{}
Parameter::~Parameter() {

View File

@ -24,6 +24,7 @@ public:
bool fractionalCounting;
bool sourceSyntax, targetSyntax;
int mixedSyntaxType;
};