Ongoing moses/phrase-extract refactoring

This commit is contained in:
Phil Williams 2015-06-03 14:20:00 +01:00
parent 9097fd8965
commit 8653bd8159
4 changed files with 50 additions and 50 deletions

View File

@ -51,46 +51,6 @@ SyntaxNode *SyntaxNodeCollection::AddNode(int startPos, int endPos,
return newNode;
}
ParentNodes SyntaxNodeCollection::Parse()
{
ParentNodes parents;
// looping through all spans of size >= 2
for( int length=2; length<=m_numWords; length++ ) {
for( int startPos = 0; startPos <= m_numWords-length; startPos++ ) {
if (HasNode( startPos, startPos+length-1 )) {
// processing one (parent) span
//std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
SplitPoints splitPoints;
splitPoints.push_back( startPos );
//std::cerr << " " << startPos;
int first = 1;
int covered = 0;
int found_somehing = 1; // break loop if nothing found
while( covered < length && found_somehing ) {
// find largest covering subspan (child)
// starting at last covered position
found_somehing = 0;
for( int midPos=length-first; midPos>covered; midPos-- ) {
if( HasNode( startPos+covered, startPos+midPos-1 ) ) {
covered = midPos;
splitPoints.push_back( startPos+covered );
// std::cerr << " " << ( startPos+covered );
first = 0;
found_somehing = 1;
}
}
}
// std::cerr << std::endl;
parents.push_back( splitPoints );
}
}
}
return parents;
}
bool SyntaxNodeCollection::HasNode( int startPos, int endPos ) const
{
return GetNodes( startPos, endPos).size() > 0;

View File

@ -31,9 +31,6 @@
namespace MosesTraining
{
typedef std::vector< int > SplitPoints;
typedef std::vector< SplitPoints > ParentNodes;
/** A collection of SyntaxNodes organized by start and end position.
*
*/
@ -47,9 +44,6 @@ public:
//! Construct and insert a new SyntaxNode.
SyntaxNode *AddNode( int startPos, int endPos, const std::string &label );
// TODO Rename (and move?)
ParentNodes Parse();
//! Return true iff there are one or more SyntaxNodes with the given span.
bool HasNode( int startPos, int endPos ) const;

View File

@ -50,7 +50,7 @@ int main(int argc, char* argv[])
// output tree
// cerr << "BEFORE:" << endl << tree;
ParentNodes parents = tree.Parse();
ParentNodes parents = determineSplitPoints(tree);
// execute selected grammar relaxation schemes
if (leftBinarizeFlag)
@ -271,3 +271,45 @@ void SAMT( SyntaxNodeCollection &tree, ParentNodes &parents )
tree.AddNode( nodes[i]->start, nodes[i]->end, nodes[i]->label);
}
}
ParentNodes determineSplitPoints(const SyntaxNodeCollection &nodeColl)
{
ParentNodes parents;
const std::size_t numWords = nodeColl.GetNumWords();
// looping through all spans of size >= 2
for( int length=2; length<=numWords; length++ ) {
for( int startPos = 0; startPos <= numWords-length; startPos++ ) {
if (nodeColl.HasNode( startPos, startPos+length-1 )) {
// processing one (parent) span
//std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
SplitPoints splitPoints;
splitPoints.push_back( startPos );
//std::cerr << " " << startPos;
int first = 1;
int covered = 0;
int found_somehing = 1; // break loop if nothing found
while( covered < length && found_somehing ) {
// find largest covering subspan (child)
// starting at last covered position
found_somehing = 0;
for( int midPos=length-first; midPos>covered; midPos-- ) {
if( nodeColl.HasNode( startPos+covered, startPos+midPos-1 ) ) {
covered = midPos;
splitPoints.push_back( startPos+covered );
// std::cerr << " " << ( startPos+covered );
first = 0;
found_somehing = 1;
}
}
}
// std::cerr << std::endl;
parents.push_back( splitPoints );
}
}
}
return parents;
}

View File

@ -37,10 +37,14 @@ bool leftBinarizeFlag = false;
bool rightBinarizeFlag = false;
char SAMTLevel = 0;
typedef std::vector< int > SplitPoints;
typedef std::vector< SplitPoints > ParentNodes;
// functions
void init(int argc, char* argv[]);
ParentNodes determineSplitPoints(const MosesTraining::SyntaxNodeCollection &);
void store( MosesTraining::SyntaxNodeCollection &tree, const std::vector<std::string> &words );
void LeftBinarize( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );
void RightBinarize( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );
void SAMT( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );
void LeftBinarize( MosesTraining::SyntaxNodeCollection &tree, ParentNodes &parents );
void RightBinarize( MosesTraining::SyntaxNodeCollection &tree, ParentNodes &parents );
void SAMT( MosesTraining::SyntaxNodeCollection &tree, ParentNodes &parents );