requireAlignedWord

This commit is contained in:
Hieu Hoang 2014-02-20 02:04:34 +00:00
parent c0a5c33d86
commit d6cd2b757b

View File

@ -54,71 +54,88 @@ bool Rule::CanExtend(const Parameter &params) const
void Rule::Prevalidate(const Parameter &params) void Rule::Prevalidate(const Parameter &params)
{ {
if (m_arcs.size() >= params.maxSymbolsSource) { if (m_arcs.size() >= params.maxSymbolsSource) {
m_canExtend = false; m_canExtend = false;
if (m_arcs.size() > params.maxSymbolsSource) { if (m_arcs.size() > params.maxSymbolsSource) {
m_isValid = false; m_isValid = false;
return;
}
}
// last word is a non-term
if (m_arcs.back()->IsNonTerm()) {
const ConsistentRange *sourceRange = static_cast<const ConsistentRange *>(m_arcs.back());
// check number of non-terms
int numNonTerms = 0;
for (size_t i = 0; i < m_arcs.size(); ++i) {
const LatticeArc *arc = m_arcs[i];
if (arc->IsNonTerm()) {
++numNonTerms;
} }
} }
// last word is a non-term if (numNonTerms > params.maxNonTerm) {
if (m_arcs.back()->IsNonTerm()) { m_isValid = false;
const ConsistentRange *sourceRange = static_cast<const ConsistentRange *>(m_arcs.back()); m_canExtend = false;
return;
}
// check number of non-terms // check if non-term is big enough
int numNonTerms = 0; if (sourceRange->GetWidth() < params.minHoleSource) {
for (size_t i = 0; i < m_arcs.size(); ++i) { m_isValid = false;
const LatticeArc *arc = m_arcs[i]; m_canExtend = false;
if (arc->IsNonTerm()) { return;
++numNonTerms; }
}
}
if (numNonTerms > params.maxNonTerm) { // check if 2 consecutive non-terms in source
if (!params.nonTermConsecSource) {
size_t numSymbols = m_arcs.size();
if (numSymbols > 1 && m_arcs[numSymbols - 2]->IsNonTerm()) {
m_isValid = false; m_isValid = false;
m_canExtend = false; m_canExtend = false;
return; return;
} }
}
// check if non-term is big enough //check to see if it overlaps with any other non-terms
if (sourceRange->GetWidth() < params.minHoleSource) { const ConsistentRange &lastTargetRange = sourceRange->GetOtherRange();
m_isValid = false;
m_canExtend = false;
return;
}
// check if 2 consecutive non-terms in source for (size_t i = 0; i < m_arcs.size() - 1; ++i) {
if (!params.nonTermConsecSource) { const LatticeArc *arc = m_arcs[i];
size_t numSymbols = m_arcs.size();
if (numSymbols > 1 && m_arcs[numSymbols - 2]->IsNonTerm()) { if (arc->IsNonTerm()) {
const ConsistentRange *sourceRange = static_cast<const ConsistentRange *>(arc);
const ConsistentRange &targetRange = sourceRange->GetOtherRange();
if (lastTargetRange.Overlap(targetRange)) {
m_isValid = false; m_isValid = false;
m_canExtend = false; m_canExtend = false;
return; return;
} }
} }
}
}
//check to see if it overlaps with any other non-terms if (params.requireAlignedWord) {
const ConsistentRange &lastTargetRange = sourceRange->GetOtherRange(); bool ok = false;
for (size_t i = 0; i < m_arcs.size(); ++i) {
const LatticeArc *arc = m_arcs[i];
for (size_t i = 0; i < m_arcs.size() - 1; ++i) { if (!arc->IsNonTerm()) {
const LatticeArc *arc = m_arcs[i]; const Word *word = static_cast<const Word *>(arc);
if (word->GetAlignment().size()) {
if (arc->IsNonTerm()) { ok = true;
const ConsistentRange *sourceRange = static_cast<const ConsistentRange *>(arc); break;
const ConsistentRange &targetRange = sourceRange->GetOtherRange();
if (lastTargetRange.Overlap(targetRange)) {
m_isValid = false;
m_canExtend = false;
return;
}
} }
} }
} }
if (params.requireAlignedWord) { if (!ok) {
m_isValid = false;
} }
}
} }
void Rule::Fillout(const ConsistentPhrases &consistentPhrases, void Rule::Fillout(const ConsistentPhrases &consistentPhrases,