Ongoing moses/phrase-extract refactoring

This commit is contained in:
Phil Williams 2015-06-03 14:09:49 +01:00
parent ed321791a7
commit 9097fd8965
2 changed files with 39 additions and 25 deletions

View File

@ -47,7 +47,7 @@ SyntaxNode *SyntaxNodeCollection::AddNode(int startPos, int endPos,
SyntaxNode* newNode = new SyntaxNode(label, startPos, endPos);
m_nodes.push_back( newNode );
m_index[ startPos ][ endPos ].push_back( newNode );
m_size = std::max(endPos+1, m_size);
m_numWords = std::max(endPos+1, m_numWords);
return newNode;
}
@ -56,8 +56,8 @@ ParentNodes SyntaxNodeCollection::Parse()
ParentNodes parents;
// looping through all spans of size >= 2
for( int length=2; length<=m_size; length++ ) {
for( int startPos = 0; startPos <= m_size-length; startPos++ ) {
for( int length=2; length<=m_numWords; length++ ) {
for( int startPos = 0; startPos <= m_numWords-length; startPos++ ) {
if (HasNode( startPos, startPos+length-1 )) {
// processing one (parent) span
@ -96,13 +96,14 @@ bool SyntaxNodeCollection::HasNode( int startPos, int endPos ) const
return GetNodes( startPos, endPos).size() > 0;
}
const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodes( int startPos, int endPos ) const
const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodes(
int startPos, int endPos ) const
{
SyntaxTreeIndexIterator startIndex = m_index.find( startPos );
NodeIndex::const_iterator startIndex = m_index.find( startPos );
if (startIndex == m_index.end() )
return m_emptyNode;
SyntaxTreeIndexIterator2 endIndex = startIndex->second.find( endPos );
InnerNodeIndex::const_iterator endIndex = startIndex->second.find( endPos );
if (endIndex == startIndex->second.end())
return m_emptyNode;
@ -120,14 +121,15 @@ std::auto_ptr<SyntaxTree> SyntaxNodeCollection::ExtractTree()
}
// Connect the SyntaxTrees.
typedef SyntaxTreeIndex2::const_reverse_iterator InnerIterator;
typedef NodeIndex::const_iterator OuterIterator;
typedef InnerNodeIndex::const_reverse_iterator InnerIterator;
SyntaxTree *root = 0;
SyntaxNode *prevNode = 0;
SyntaxTree *prevTree = 0;
// Iterate over all start indices from lowest to highest.
for (SyntaxTreeIndexIterator p = m_index.begin(); p != m_index.end(); ++p) {
const SyntaxTreeIndex2 &inner = p->second;
for (OuterIterator p = m_index.begin(); p != m_index.end(); ++p) {
const InnerNodeIndex &inner = p->second;
// Iterate over all end indices from highest to lowest.
for (InnerIterator q = inner.rbegin(); q != inner.rend(); ++q) {
const std::vector<SyntaxNode*> &nodes = q->second;

View File

@ -34,38 +34,50 @@ namespace MosesTraining
typedef std::vector< int > SplitPoints;
typedef std::vector< SplitPoints > ParentNodes;
/** A collection of SyntaxNodes organized by start and end position.
*
*/
class SyntaxNodeCollection
{
protected:
std::vector< SyntaxNode* > m_nodes;
typedef std::map< int, std::vector< SyntaxNode* > > SyntaxTreeIndex2;
typedef SyntaxTreeIndex2::const_iterator SyntaxTreeIndexIterator2;
typedef std::map< int, SyntaxTreeIndex2 > SyntaxTreeIndex;
typedef SyntaxTreeIndex::const_iterator SyntaxTreeIndexIterator;
SyntaxTreeIndex m_index;
int m_size;
std::vector< SyntaxNode* > m_emptyNode;
public:
SyntaxNodeCollection() : m_size(0) {}
SyntaxNodeCollection() : m_numWords(0) {}
~SyntaxNodeCollection();
//! Construct and insert a new SyntaxNode.
SyntaxNode *AddNode( int startPos, int endPos, const std::string &label );
// TODO Rename (and move?)
ParentNodes Parse();
//! Return true iff there are one or more SyntaxNodes with the given span.
bool HasNode( int startPos, int endPos ) const;
//! Lookup the SyntaxNodes for a given span.
const std::vector< SyntaxNode* >& GetNodes( int startPos, int endPos ) const;
const std::vector< SyntaxNode* >& GetAllNodes() {
return m_nodes;
};
//! Get a vector of pointers to all SyntaxNodes (unordered).
const std::vector< SyntaxNode* >& GetAllNodes() { return m_nodes; };
size_t GetNumWords() const {
return m_size;
return m_numWords;
}
void Clear();
std::auto_ptr<SyntaxTree> ExtractTree();
private:
typedef std::map< int, std::vector< SyntaxNode* > > InnerNodeIndex;
typedef std::map< int, InnerNodeIndex > NodeIndex;
// Not copyable.
SyntaxNodeCollection(const SyntaxNodeCollection &);
SyntaxNodeCollection &operator=(const SyntaxNodeCollection &);
std::vector< SyntaxNode* > m_nodes;
NodeIndex m_index;
int m_numWords;
std::vector< SyntaxNode* > m_emptyNode;
};
} // namespace MosesTraining