mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-29 06:52:34 +03:00
requireAlignedWord
This commit is contained in:
parent
c0a5c33d86
commit
d6cd2b757b
@ -54,71 +54,88 @@ bool Rule::CanExtend(const Parameter ¶ms) const
|
|||||||
|
|
||||||
void Rule::Prevalidate(const Parameter ¶ms)
|
void Rule::Prevalidate(const Parameter ¶ms)
|
||||||
{
|
{
|
||||||
if (m_arcs.size() >= params.maxSymbolsSource) {
|
if (m_arcs.size() >= params.maxSymbolsSource) {
|
||||||
m_canExtend = false;
|
m_canExtend = false;
|
||||||
if (m_arcs.size() > params.maxSymbolsSource) {
|
if (m_arcs.size() > params.maxSymbolsSource) {
|
||||||
m_isValid = false;
|
m_isValid = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// last word is a non-term
|
||||||
|
if (m_arcs.back()->IsNonTerm()) {
|
||||||
|
const ConsistentRange *sourceRange = static_cast<const ConsistentRange *>(m_arcs.back());
|
||||||
|
|
||||||
|
// check number of non-terms
|
||||||
|
int numNonTerms = 0;
|
||||||
|
for (size_t i = 0; i < m_arcs.size(); ++i) {
|
||||||
|
const LatticeArc *arc = m_arcs[i];
|
||||||
|
if (arc->IsNonTerm()) {
|
||||||
|
++numNonTerms;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// last word is a non-term
|
if (numNonTerms > params.maxNonTerm) {
|
||||||
if (m_arcs.back()->IsNonTerm()) {
|
m_isValid = false;
|
||||||
const ConsistentRange *sourceRange = static_cast<const ConsistentRange *>(m_arcs.back());
|
m_canExtend = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// check number of non-terms
|
// check if non-term is big enough
|
||||||
int numNonTerms = 0;
|
if (sourceRange->GetWidth() < params.minHoleSource) {
|
||||||
for (size_t i = 0; i < m_arcs.size(); ++i) {
|
m_isValid = false;
|
||||||
const LatticeArc *arc = m_arcs[i];
|
m_canExtend = false;
|
||||||
if (arc->IsNonTerm()) {
|
return;
|
||||||
++numNonTerms;
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (numNonTerms > params.maxNonTerm) {
|
// check if 2 consecutive non-terms in source
|
||||||
|
if (!params.nonTermConsecSource) {
|
||||||
|
size_t numSymbols = m_arcs.size();
|
||||||
|
if (numSymbols > 1 && m_arcs[numSymbols - 2]->IsNonTerm()) {
|
||||||
m_isValid = false;
|
m_isValid = false;
|
||||||
m_canExtend = false;
|
m_canExtend = false;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// check if non-term is big enough
|
//check to see if it overlaps with any other non-terms
|
||||||
if (sourceRange->GetWidth() < params.minHoleSource) {
|
const ConsistentRange &lastTargetRange = sourceRange->GetOtherRange();
|
||||||
m_isValid = false;
|
|
||||||
m_canExtend = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if 2 consecutive non-terms in source
|
for (size_t i = 0; i < m_arcs.size() - 1; ++i) {
|
||||||
if (!params.nonTermConsecSource) {
|
const LatticeArc *arc = m_arcs[i];
|
||||||
size_t numSymbols = m_arcs.size();
|
|
||||||
if (numSymbols > 1 && m_arcs[numSymbols - 2]->IsNonTerm()) {
|
if (arc->IsNonTerm()) {
|
||||||
|
const ConsistentRange *sourceRange = static_cast<const ConsistentRange *>(arc);
|
||||||
|
const ConsistentRange &targetRange = sourceRange->GetOtherRange();
|
||||||
|
|
||||||
|
if (lastTargetRange.Overlap(targetRange)) {
|
||||||
m_isValid = false;
|
m_isValid = false;
|
||||||
m_canExtend = false;
|
m_canExtend = false;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//check to see if it overlaps with any other non-terms
|
if (params.requireAlignedWord) {
|
||||||
const ConsistentRange &lastTargetRange = sourceRange->GetOtherRange();
|
bool ok = false;
|
||||||
|
for (size_t i = 0; i < m_arcs.size(); ++i) {
|
||||||
|
const LatticeArc *arc = m_arcs[i];
|
||||||
|
|
||||||
for (size_t i = 0; i < m_arcs.size() - 1; ++i) {
|
if (!arc->IsNonTerm()) {
|
||||||
const LatticeArc *arc = m_arcs[i];
|
const Word *word = static_cast<const Word *>(arc);
|
||||||
|
if (word->GetAlignment().size()) {
|
||||||
if (arc->IsNonTerm()) {
|
ok = true;
|
||||||
const ConsistentRange *sourceRange = static_cast<const ConsistentRange *>(arc);
|
break;
|
||||||
const ConsistentRange &targetRange = sourceRange->GetOtherRange();
|
|
||||||
|
|
||||||
if (lastTargetRange.Overlap(targetRange)) {
|
|
||||||
m_isValid = false;
|
|
||||||
m_canExtend = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.requireAlignedWord) {
|
if (!ok) {
|
||||||
|
m_isValid = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rule::Fillout(const ConsistentPhrases &consistentPhrases,
|
void Rule::Fillout(const ConsistentPhrases &consistentPhrases,
|
||||||
|
Loading…
Reference in New Issue
Block a user