2010-04-12 19:22:50 +04:00
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2009 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
|
2015-05-29 22:57:25 +03:00
|
|
|
#include "SyntaxNodeCollection.h"
|
2012-04-23 17:24:54 +04:00
|
|
|
|
|
|
|
#include <cassert>
|
2010-04-12 19:22:50 +04:00
|
|
|
#include <iostream>
|
|
|
|
|
2012-06-30 18:43:47 +04:00
|
|
|
namespace MosesTraining
|
|
|
|
{
|
|
|
|
|
2015-05-29 20:46:02 +03:00
|
|
|
SyntaxNodeCollection::~SyntaxNodeCollection()
|
2010-04-12 19:22:50 +04:00
|
|
|
{
|
2012-04-23 17:24:54 +04:00
|
|
|
Clear();
|
|
|
|
}
|
|
|
|
|
2015-05-29 20:46:02 +03:00
|
|
|
void SyntaxNodeCollection::Clear()
|
2012-04-23 17:24:54 +04:00
|
|
|
{
|
|
|
|
m_top = 0;
|
2011-02-24 16:57:11 +03:00
|
|
|
// loop through all m_nodes, delete them
|
2012-05-10 16:48:51 +04:00
|
|
|
for(size_t i=0; i<m_nodes.size(); i++) {
|
2011-02-24 16:57:11 +03:00
|
|
|
delete m_nodes[i];
|
|
|
|
}
|
2012-04-23 17:24:54 +04:00
|
|
|
m_nodes.clear();
|
|
|
|
m_index.clear();
|
2010-04-12 19:22:50 +04:00
|
|
|
}
|
|
|
|
|
2015-05-29 22:57:25 +03:00
|
|
|
SyntaxNode *SyntaxNodeCollection::AddNode(int startPos, int endPos,
|
|
|
|
const std::string &label)
|
2010-04-12 19:22:50 +04:00
|
|
|
{
|
2011-02-24 16:57:11 +03:00
|
|
|
SyntaxNode* newNode = new SyntaxNode( startPos, endPos, label );
|
|
|
|
m_nodes.push_back( newNode );
|
|
|
|
m_index[ startPos ][ endPos ].push_back( newNode );
|
2013-04-25 19:27:50 +04:00
|
|
|
m_size = std::max(endPos+1, m_size);
|
2012-05-25 20:29:47 +04:00
|
|
|
return newNode;
|
2010-04-12 19:22:50 +04:00
|
|
|
}
|
|
|
|
|
2015-05-29 20:46:02 +03:00
|
|
|
ParentNodes SyntaxNodeCollection::Parse()
|
2011-02-24 16:57:11 +03:00
|
|
|
{
|
|
|
|
ParentNodes parents;
|
|
|
|
|
|
|
|
// looping through all spans of size >= 2
|
2013-04-25 19:27:50 +04:00
|
|
|
for( int length=2; length<=m_size; length++ ) {
|
|
|
|
for( int startPos = 0; startPos <= m_size-length; startPos++ ) {
|
2011-02-24 16:57:11 +03:00
|
|
|
if (HasNode( startPos, startPos+length-1 )) {
|
|
|
|
// processing one (parent) span
|
|
|
|
|
|
|
|
//std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
|
|
|
|
SplitPoints splitPoints;
|
|
|
|
splitPoints.push_back( startPos );
|
|
|
|
//std::cerr << " " << startPos;
|
|
|
|
|
|
|
|
int first = 1;
|
|
|
|
int covered = 0;
|
2013-04-25 19:27:50 +04:00
|
|
|
int found_somehing = 1; // break loop if nothing found
|
|
|
|
while( covered < length && found_somehing ) {
|
2011-02-24 16:57:11 +03:00
|
|
|
// find largest covering subspan (child)
|
|
|
|
// starting at last covered position
|
2013-04-25 19:27:50 +04:00
|
|
|
found_somehing = 0;
|
2011-02-24 16:57:11 +03:00
|
|
|
for( int midPos=length-first; midPos>covered; midPos-- ) {
|
|
|
|
if( HasNode( startPos+covered, startPos+midPos-1 ) ) {
|
|
|
|
covered = midPos;
|
|
|
|
splitPoints.push_back( startPos+covered );
|
|
|
|
// std::cerr << " " << ( startPos+covered );
|
|
|
|
first = 0;
|
2013-04-25 19:27:50 +04:00
|
|
|
found_somehing = 1;
|
2011-02-24 16:57:11 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// std::cerr << std::endl;
|
|
|
|
parents.push_back( splitPoints );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return parents;
|
2010-04-12 19:22:50 +04:00
|
|
|
}
|
|
|
|
|
2015-05-29 20:46:02 +03:00
|
|
|
bool SyntaxNodeCollection::HasNode( int startPos, int endPos ) const
|
2010-04-12 19:22:50 +04:00
|
|
|
{
|
2011-02-24 16:57:11 +03:00
|
|
|
return GetNodes( startPos, endPos).size() > 0;
|
2010-04-12 19:22:50 +04:00
|
|
|
}
|
|
|
|
|
2015-05-29 20:46:02 +03:00
|
|
|
const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodes( int startPos, int endPos ) const
|
2010-04-12 19:22:50 +04:00
|
|
|
{
|
2011-02-24 16:57:11 +03:00
|
|
|
SyntaxTreeIndexIterator startIndex = m_index.find( startPos );
|
|
|
|
if (startIndex == m_index.end() )
|
|
|
|
return m_emptyNode;
|
|
|
|
|
|
|
|
SyntaxTreeIndexIterator2 endIndex = startIndex->second.find( endPos );
|
|
|
|
if (endIndex == startIndex->second.end())
|
|
|
|
return m_emptyNode;
|
|
|
|
|
|
|
|
return endIndex->second;
|
2010-04-12 19:22:50 +04:00
|
|
|
}
|
|
|
|
|
2015-05-29 20:46:02 +03:00
|
|
|
void SyntaxNodeCollection::ConnectNodes()
|
2012-04-23 17:24:54 +04:00
|
|
|
{
|
|
|
|
typedef SyntaxTreeIndex2::const_reverse_iterator InnerIterator;
|
|
|
|
|
|
|
|
SyntaxNode *prev = 0;
|
|
|
|
// Iterate over all start indices from lowest to highest.
|
|
|
|
for (SyntaxTreeIndexIterator p = m_index.begin(); p != m_index.end(); ++p) {
|
|
|
|
const SyntaxTreeIndex2 &inner = p->second;
|
|
|
|
// Iterate over all end indices from highest to lowest.
|
|
|
|
for (InnerIterator q = inner.rbegin(); q != inner.rend(); ++q) {
|
|
|
|
const std::vector<SyntaxNode*> &nodes = q->second;
|
|
|
|
// Iterate over all nodes that cover the same span in order of tree
|
|
|
|
// depth, top-most first.
|
|
|
|
for (std::vector<SyntaxNode*>::const_reverse_iterator r = nodes.rbegin();
|
|
|
|
r != nodes.rend(); ++r) {
|
|
|
|
SyntaxNode *node = *r;
|
|
|
|
if (!prev) {
|
|
|
|
// node is the root.
|
|
|
|
m_top = node;
|
|
|
|
node->SetParent(0);
|
|
|
|
} else if (prev->GetStart() == node->GetStart()) {
|
|
|
|
// prev is the parent of node.
|
|
|
|
assert(prev->GetEnd() >= node->GetEnd());
|
|
|
|
node->SetParent(prev);
|
|
|
|
prev->AddChild(node);
|
|
|
|
} else {
|
|
|
|
// prev is a descendant of node's parent. The lowest common
|
|
|
|
// ancestor of prev and node will be node's parent.
|
|
|
|
SyntaxNode *ancestor = prev->GetParent();
|
|
|
|
while (ancestor->GetEnd() < node->GetEnd()) {
|
|
|
|
ancestor = ancestor->GetParent();
|
|
|
|
}
|
|
|
|
assert(ancestor);
|
|
|
|
node->SetParent(ancestor);
|
|
|
|
ancestor->AddChild(node);
|
|
|
|
}
|
|
|
|
prev = node;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-29 22:57:25 +03:00
|
|
|
} // namespace MosesTraining
|