mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 12:52:29 +03:00
Ongoing moses/phrase-extract refactoring
This commit is contained in:
parent
9097fd8965
commit
8653bd8159
@ -51,46 +51,6 @@ SyntaxNode *SyntaxNodeCollection::AddNode(int startPos, int endPos,
|
||||
return newNode;
|
||||
}
|
||||
|
||||
ParentNodes SyntaxNodeCollection::Parse()
|
||||
{
|
||||
ParentNodes parents;
|
||||
|
||||
// looping through all spans of size >= 2
|
||||
for( int length=2; length<=m_numWords; length++ ) {
|
||||
for( int startPos = 0; startPos <= m_numWords-length; startPos++ ) {
|
||||
if (HasNode( startPos, startPos+length-1 )) {
|
||||
// processing one (parent) span
|
||||
|
||||
//std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
|
||||
SplitPoints splitPoints;
|
||||
splitPoints.push_back( startPos );
|
||||
//std::cerr << " " << startPos;
|
||||
|
||||
int first = 1;
|
||||
int covered = 0;
|
||||
int found_somehing = 1; // break loop if nothing found
|
||||
while( covered < length && found_somehing ) {
|
||||
// find largest covering subspan (child)
|
||||
// starting at last covered position
|
||||
found_somehing = 0;
|
||||
for( int midPos=length-first; midPos>covered; midPos-- ) {
|
||||
if( HasNode( startPos+covered, startPos+midPos-1 ) ) {
|
||||
covered = midPos;
|
||||
splitPoints.push_back( startPos+covered );
|
||||
// std::cerr << " " << ( startPos+covered );
|
||||
first = 0;
|
||||
found_somehing = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
// std::cerr << std::endl;
|
||||
parents.push_back( splitPoints );
|
||||
}
|
||||
}
|
||||
}
|
||||
return parents;
|
||||
}
|
||||
|
||||
bool SyntaxNodeCollection::HasNode( int startPos, int endPos ) const
|
||||
{
|
||||
return GetNodes( startPos, endPos).size() > 0;
|
||||
|
@ -31,9 +31,6 @@
|
||||
namespace MosesTraining
|
||||
{
|
||||
|
||||
typedef std::vector< int > SplitPoints;
|
||||
typedef std::vector< SplitPoints > ParentNodes;
|
||||
|
||||
/** A collection of SyntaxNodes organized by start and end position.
|
||||
*
|
||||
*/
|
||||
@ -47,9 +44,6 @@ public:
|
||||
//! Construct and insert a new SyntaxNode.
|
||||
SyntaxNode *AddNode( int startPos, int endPos, const std::string &label );
|
||||
|
||||
// TODO Rename (and move?)
|
||||
ParentNodes Parse();
|
||||
|
||||
//! Return true iff there are one or more SyntaxNodes with the given span.
|
||||
bool HasNode( int startPos, int endPos ) const;
|
||||
|
||||
|
@ -50,7 +50,7 @@ int main(int argc, char* argv[])
|
||||
// output tree
|
||||
// cerr << "BEFORE:" << endl << tree;
|
||||
|
||||
ParentNodes parents = tree.Parse();
|
||||
ParentNodes parents = determineSplitPoints(tree);
|
||||
|
||||
// execute selected grammar relaxation schemes
|
||||
if (leftBinarizeFlag)
|
||||
@ -271,3 +271,45 @@ void SAMT( SyntaxNodeCollection &tree, ParentNodes &parents )
|
||||
tree.AddNode( nodes[i]->start, nodes[i]->end, nodes[i]->label);
|
||||
}
|
||||
}
|
||||
|
||||
ParentNodes determineSplitPoints(const SyntaxNodeCollection &nodeColl)
|
||||
{
|
||||
ParentNodes parents;
|
||||
|
||||
const std::size_t numWords = nodeColl.GetNumWords();
|
||||
|
||||
// looping through all spans of size >= 2
|
||||
for( int length=2; length<=numWords; length++ ) {
|
||||
for( int startPos = 0; startPos <= numWords-length; startPos++ ) {
|
||||
if (nodeColl.HasNode( startPos, startPos+length-1 )) {
|
||||
// processing one (parent) span
|
||||
|
||||
//std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
|
||||
SplitPoints splitPoints;
|
||||
splitPoints.push_back( startPos );
|
||||
//std::cerr << " " << startPos;
|
||||
|
||||
int first = 1;
|
||||
int covered = 0;
|
||||
int found_somehing = 1; // break loop if nothing found
|
||||
while( covered < length && found_somehing ) {
|
||||
// find largest covering subspan (child)
|
||||
// starting at last covered position
|
||||
found_somehing = 0;
|
||||
for( int midPos=length-first; midPos>covered; midPos-- ) {
|
||||
if( nodeColl.HasNode( startPos+covered, startPos+midPos-1 ) ) {
|
||||
covered = midPos;
|
||||
splitPoints.push_back( startPos+covered );
|
||||
// std::cerr << " " << ( startPos+covered );
|
||||
first = 0;
|
||||
found_somehing = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
// std::cerr << std::endl;
|
||||
parents.push_back( splitPoints );
|
||||
}
|
||||
}
|
||||
}
|
||||
return parents;
|
||||
}
|
||||
|
@ -37,10 +37,14 @@ bool leftBinarizeFlag = false;
|
||||
bool rightBinarizeFlag = false;
|
||||
char SAMTLevel = 0;
|
||||
|
||||
typedef std::vector< int > SplitPoints;
|
||||
typedef std::vector< SplitPoints > ParentNodes;
|
||||
|
||||
// functions
|
||||
void init(int argc, char* argv[]);
|
||||
ParentNodes determineSplitPoints(const MosesTraining::SyntaxNodeCollection &);
|
||||
void store( MosesTraining::SyntaxNodeCollection &tree, const std::vector<std::string> &words );
|
||||
void LeftBinarize( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );
|
||||
void RightBinarize( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );
|
||||
void SAMT( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );
|
||||
void LeftBinarize( MosesTraining::SyntaxNodeCollection &tree, ParentNodes &parents );
|
||||
void RightBinarize( MosesTraining::SyntaxNodeCollection &tree, ParentNodes &parents );
|
||||
void SAMT( MosesTraining::SyntaxNodeCollection &tree, ParentNodes &parents );
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user