mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 19:37:58 +03:00
Ongoing moses/phrase-extract refactoring
This commit is contained in:
parent
9097fd8965
commit
8653bd8159
@ -51,46 +51,6 @@ SyntaxNode *SyntaxNodeCollection::AddNode(int startPos, int endPos,
|
|||||||
return newNode;
|
return newNode;
|
||||||
}
|
}
|
||||||
|
|
||||||
ParentNodes SyntaxNodeCollection::Parse()
|
|
||||||
{
|
|
||||||
ParentNodes parents;
|
|
||||||
|
|
||||||
// looping through all spans of size >= 2
|
|
||||||
for( int length=2; length<=m_numWords; length++ ) {
|
|
||||||
for( int startPos = 0; startPos <= m_numWords-length; startPos++ ) {
|
|
||||||
if (HasNode( startPos, startPos+length-1 )) {
|
|
||||||
// processing one (parent) span
|
|
||||||
|
|
||||||
//std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
|
|
||||||
SplitPoints splitPoints;
|
|
||||||
splitPoints.push_back( startPos );
|
|
||||||
//std::cerr << " " << startPos;
|
|
||||||
|
|
||||||
int first = 1;
|
|
||||||
int covered = 0;
|
|
||||||
int found_somehing = 1; // break loop if nothing found
|
|
||||||
while( covered < length && found_somehing ) {
|
|
||||||
// find largest covering subspan (child)
|
|
||||||
// starting at last covered position
|
|
||||||
found_somehing = 0;
|
|
||||||
for( int midPos=length-first; midPos>covered; midPos-- ) {
|
|
||||||
if( HasNode( startPos+covered, startPos+midPos-1 ) ) {
|
|
||||||
covered = midPos;
|
|
||||||
splitPoints.push_back( startPos+covered );
|
|
||||||
// std::cerr << " " << ( startPos+covered );
|
|
||||||
first = 0;
|
|
||||||
found_somehing = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// std::cerr << std::endl;
|
|
||||||
parents.push_back( splitPoints );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return parents;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SyntaxNodeCollection::HasNode( int startPos, int endPos ) const
|
bool SyntaxNodeCollection::HasNode( int startPos, int endPos ) const
|
||||||
{
|
{
|
||||||
return GetNodes( startPos, endPos).size() > 0;
|
return GetNodes( startPos, endPos).size() > 0;
|
||||||
|
@ -31,9 +31,6 @@
|
|||||||
namespace MosesTraining
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
|
|
||||||
typedef std::vector< int > SplitPoints;
|
|
||||||
typedef std::vector< SplitPoints > ParentNodes;
|
|
||||||
|
|
||||||
/** A collection of SyntaxNodes organized by start and end position.
|
/** A collection of SyntaxNodes organized by start and end position.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@ -47,9 +44,6 @@ public:
|
|||||||
//! Construct and insert a new SyntaxNode.
|
//! Construct and insert a new SyntaxNode.
|
||||||
SyntaxNode *AddNode( int startPos, int endPos, const std::string &label );
|
SyntaxNode *AddNode( int startPos, int endPos, const std::string &label );
|
||||||
|
|
||||||
// TODO Rename (and move?)
|
|
||||||
ParentNodes Parse();
|
|
||||||
|
|
||||||
//! Return true iff there are one or more SyntaxNodes with the given span.
|
//! Return true iff there are one or more SyntaxNodes with the given span.
|
||||||
bool HasNode( int startPos, int endPos ) const;
|
bool HasNode( int startPos, int endPos ) const;
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ int main(int argc, char* argv[])
|
|||||||
// output tree
|
// output tree
|
||||||
// cerr << "BEFORE:" << endl << tree;
|
// cerr << "BEFORE:" << endl << tree;
|
||||||
|
|
||||||
ParentNodes parents = tree.Parse();
|
ParentNodes parents = determineSplitPoints(tree);
|
||||||
|
|
||||||
// execute selected grammar relaxation schemes
|
// execute selected grammar relaxation schemes
|
||||||
if (leftBinarizeFlag)
|
if (leftBinarizeFlag)
|
||||||
@ -271,3 +271,45 @@ void SAMT( SyntaxNodeCollection &tree, ParentNodes &parents )
|
|||||||
tree.AddNode( nodes[i]->start, nodes[i]->end, nodes[i]->label);
|
tree.AddNode( nodes[i]->start, nodes[i]->end, nodes[i]->label);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ParentNodes determineSplitPoints(const SyntaxNodeCollection &nodeColl)
|
||||||
|
{
|
||||||
|
ParentNodes parents;
|
||||||
|
|
||||||
|
const std::size_t numWords = nodeColl.GetNumWords();
|
||||||
|
|
||||||
|
// looping through all spans of size >= 2
|
||||||
|
for( int length=2; length<=numWords; length++ ) {
|
||||||
|
for( int startPos = 0; startPos <= numWords-length; startPos++ ) {
|
||||||
|
if (nodeColl.HasNode( startPos, startPos+length-1 )) {
|
||||||
|
// processing one (parent) span
|
||||||
|
|
||||||
|
//std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
|
||||||
|
SplitPoints splitPoints;
|
||||||
|
splitPoints.push_back( startPos );
|
||||||
|
//std::cerr << " " << startPos;
|
||||||
|
|
||||||
|
int first = 1;
|
||||||
|
int covered = 0;
|
||||||
|
int found_somehing = 1; // break loop if nothing found
|
||||||
|
while( covered < length && found_somehing ) {
|
||||||
|
// find largest covering subspan (child)
|
||||||
|
// starting at last covered position
|
||||||
|
found_somehing = 0;
|
||||||
|
for( int midPos=length-first; midPos>covered; midPos-- ) {
|
||||||
|
if( nodeColl.HasNode( startPos+covered, startPos+midPos-1 ) ) {
|
||||||
|
covered = midPos;
|
||||||
|
splitPoints.push_back( startPos+covered );
|
||||||
|
// std::cerr << " " << ( startPos+covered );
|
||||||
|
first = 0;
|
||||||
|
found_somehing = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// std::cerr << std::endl;
|
||||||
|
parents.push_back( splitPoints );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parents;
|
||||||
|
}
|
||||||
|
@ -37,10 +37,14 @@ bool leftBinarizeFlag = false;
|
|||||||
bool rightBinarizeFlag = false;
|
bool rightBinarizeFlag = false;
|
||||||
char SAMTLevel = 0;
|
char SAMTLevel = 0;
|
||||||
|
|
||||||
|
typedef std::vector< int > SplitPoints;
|
||||||
|
typedef std::vector< SplitPoints > ParentNodes;
|
||||||
|
|
||||||
// functions
|
// functions
|
||||||
void init(int argc, char* argv[]);
|
void init(int argc, char* argv[]);
|
||||||
|
ParentNodes determineSplitPoints(const MosesTraining::SyntaxNodeCollection &);
|
||||||
void store( MosesTraining::SyntaxNodeCollection &tree, const std::vector<std::string> &words );
|
void store( MosesTraining::SyntaxNodeCollection &tree, const std::vector<std::string> &words );
|
||||||
void LeftBinarize( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );
|
void LeftBinarize( MosesTraining::SyntaxNodeCollection &tree, ParentNodes &parents );
|
||||||
void RightBinarize( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );
|
void RightBinarize( MosesTraining::SyntaxNodeCollection &tree, ParentNodes &parents );
|
||||||
void SAMT( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );
|
void SAMT( MosesTraining::SyntaxNodeCollection &tree, ParentNodes &parents );
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user