mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-28 14:32:38 +03:00
197 lines
6.5 KiB
C++
197 lines
6.5 KiB
C++
/***********************************************************************
|
|
Moses - statistical machine translation system
|
|
Copyright (C) 2006-2012 University of Edinburgh
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
***********************************************************************/
|
|
|
|
#include "Parser.h"
|
|
|
|
#include "moses/ChartTranslationOptionList.h"
|
|
#include "moses/InputType.h"
|
|
#include "moses/NonTerminal.h"
|
|
#include "moses/TranslationModel/RuleTable/UTrieNode.h"
|
|
#include "moses/TranslationModel/RuleTable/UTrie.h"
|
|
#include "moses/StaticData.h"
|
|
#include "ApplicableRuleTrie.h"
|
|
#include "StackLattice.h"
|
|
#include "StackLatticeBuilder.h"
|
|
#include "StackLatticeSearcher.h"
|
|
#include "VarSpanTrieBuilder.h"
|
|
|
|
#include <memory>
|
|
#include <vector>
|
|
|
|
namespace Moses
|
|
{
|
|
|
|
void Scope3Parser::GetChartRuleCollection(
|
|
const WordsRange &range,
|
|
ChartParserCallback &outColl)
|
|
{
|
|
const size_t start = range.GetStartPos();
|
|
const size_t end = range.GetEndPos();
|
|
|
|
std::vector<std::pair<const UTrieNode *, const VarSpanNode *> > &pairVec = m_ruleApplications[start][end-start+1];
|
|
|
|
MatchCallback matchCB(range, outColl);
|
|
for (std::vector<std::pair<const UTrieNode *, const VarSpanNode *> >::const_iterator p = pairVec.begin(); p != pairVec.end(); ++p) {
|
|
const UTrieNode &ruleNode = *(p->first);
|
|
const VarSpanNode &varSpanNode = *(p->second);
|
|
|
|
const UTrieNode::LabelMap &labelMap = ruleNode.GetLabelMap();
|
|
|
|
if (varSpanNode.m_rank == 0) { // Purely lexical rule.
|
|
assert(labelMap.size() == 1);
|
|
const TargetPhraseCollection &tpc = labelMap.begin()->second;
|
|
matchCB.m_tpc = &tpc;
|
|
matchCB(m_emptyStackVec);
|
|
} else { // Rule has at least one non-terminal.
|
|
varSpanNode.CalculateRanges(start, end, m_ranges);
|
|
m_latticeBuilder.Build(start, end, ruleNode, varSpanNode, m_ranges,
|
|
*this, m_lattice,
|
|
m_quickCheckTable);
|
|
StackLatticeSearcher<MatchCallback> searcher(m_lattice, m_ranges);
|
|
UTrieNode::LabelMap::const_iterator p = labelMap.begin();
|
|
for (; p != labelMap.end(); ++p) {
|
|
const std::vector<int> &labels = p->first;
|
|
const TargetPhraseCollection &tpc = p->second;
|
|
assert(labels.size() == varSpanNode.m_rank);
|
|
bool failCheck = false;
|
|
for (size_t i = 0; i < varSpanNode.m_rank; ++i) {
|
|
if (!m_quickCheckTable[i][labels[i]]) {
|
|
failCheck = true;
|
|
break;
|
|
}
|
|
}
|
|
if (failCheck) {
|
|
continue;
|
|
}
|
|
matchCB.m_tpc = &tpc;
|
|
searcher.Search(labels, matchCB);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void Scope3Parser::Init()
|
|
{
|
|
InitRuleApplicationVector();
|
|
|
|
const Sentence &sentence = dynamic_cast<const Sentence &>(GetSentence());
|
|
|
|
// Build a map from Words to index-sets.
|
|
SentenceMap sentMap;
|
|
FillSentenceMap(sentence, sentMap);
|
|
|
|
// Build a trie containing 'elastic' application contexts
|
|
const UTrieNode &rootNode = m_ruleTable.GetRootNode();
|
|
std::auto_ptr<ApplicableRuleTrie> art(new ApplicableRuleTrie(-1, -1, rootNode));
|
|
art->Extend(rootNode, -1, sentMap, false);
|
|
|
|
// Build a trie containing just the non-terminal contexts and insert pointers
|
|
// to its nodes back into the ART trie. Contiguous non-terminal contexts are
|
|
// merged and the number of split points is recorded.
|
|
VarSpanTrieBuilder vstBuilder;
|
|
m_varSpanTrie = vstBuilder.Build(*art);
|
|
|
|
// Fill each cell with a list of pointers to relevant ART nodes.
|
|
AddRulesToCells(*art, std::make_pair<int, int>(-1, -1), sentence.GetSize()-1, 0);
|
|
}
|
|
|
|
void Scope3Parser::InitRuleApplicationVector()
|
|
{
|
|
const size_t sourceSize = GetSentence().GetSize();
|
|
m_ruleApplications.resize(sourceSize);
|
|
for (size_t start = 0; start < sourceSize; ++start) {
|
|
size_t maxSpan = sourceSize-start+1;
|
|
m_ruleApplications[start].resize(maxSpan+1);
|
|
}
|
|
}
|
|
|
|
void Scope3Parser::FillSentenceMap(
|
|
const Sentence &sent, SentenceMap &sentMap)
|
|
{
|
|
for (size_t i = 0; i < sent.GetSize(); ++i) {
|
|
sentMap[sent.GetWord(i)].push_back(i);
|
|
}
|
|
}
|
|
|
|
void Scope3Parser::AddRulesToCells(
|
|
const ApplicableRuleTrie &node,
|
|
std::pair<int, int> start,
|
|
int maxPos,
|
|
int depth)
|
|
{
|
|
if (depth > 0) {
|
|
// Determine the start range for this path if not already known.
|
|
if (start.first == -1 && start.second == -1) {
|
|
assert(depth == 1);
|
|
start.first = std::max(0, node.m_start);
|
|
start.second = node.m_start;
|
|
} else if (start.second < 0) {
|
|
assert(depth > 1);
|
|
if (node.m_start == -1) {
|
|
--start.second; // Record split point
|
|
} else {
|
|
int numSplitPoints = -1 - start.second;
|
|
start.second = node.m_start - (numSplitPoints+1);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (node.m_node->HasRules()) {
|
|
assert(depth > 0);
|
|
assert(node.m_vstNode);
|
|
// Determine the end range for this path.
|
|
std::pair<int, int> end;
|
|
if (node.m_end == -1) {
|
|
end.first = (*(node.m_vstNode->m_label))[2];
|
|
end.second = (*(node.m_vstNode->m_label))[3];
|
|
assert(end.first != -1);
|
|
if (end.second == -1) {
|
|
end.second = maxPos;
|
|
}
|
|
} else {
|
|
assert(node.m_start == node.m_end); // Should be a terminal
|
|
end.first = end.second = node.m_start;
|
|
}
|
|
// Add a (rule trie node, VST node) pair for each cell in the range.
|
|
int s2 = start.second;
|
|
if (s2 < 0) {
|
|
int numSplitPoints = -1 - s2;
|
|
s2 = maxPos - numSplitPoints;
|
|
}
|
|
for (int i = start.first; i <= s2; ++i) {
|
|
int e1 = std::max(i+depth-1, end.first);
|
|
for (int j = e1; j <= end.second; ++j) {
|
|
size_t span = j-i+1;
|
|
assert(span >= 1);
|
|
if (m_maxChartSpan && span > m_maxChartSpan) {
|
|
break;
|
|
}
|
|
m_ruleApplications[i][span].push_back(std::make_pair(node.m_node,
|
|
node.m_vstNode));
|
|
}
|
|
}
|
|
}
|
|
|
|
for (std::vector<ApplicableRuleTrie*>::const_iterator p = node.m_children.begin(); p != node.m_children.end(); ++p) {
|
|
AddRulesToCells(**p, start, maxPos, depth+1);
|
|
}
|
|
}
|
|
|
|
} // namespace Moses
|