Ongoing moses/phrase-extract refactoring

This commit is contained in:
Phil Williams 2015-06-01 16:40:35 +01:00
parent f61091e38d
commit f37415a259
36 changed files with 246 additions and 273 deletions

View File

@ -134,7 +134,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
if (targetPhrase.GetAlignNonTerm().GetSize() != 0) { if (targetPhrase.GetAlignNonTerm().GetSize() != 0) {
// Initialize phrase orientation scoring object // Initialize phrase orientation scoring object
Moses::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(), MosesTraining::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
targetPhrase.GetAlignTerm(), targetPhrase.GetAlignNonTerm()); targetPhrase.GetAlignTerm(), targetPhrase.GetAlignNonTerm());
PhraseOrientationFeature::ReoClassData* reoClassData = new PhraseOrientationFeature::ReoClassData(); PhraseOrientationFeature::ReoClassData* reoClassData = new PhraseOrientationFeature::ReoClassData();
@ -150,7 +150,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
// LEFT-TO-RIGHT DIRECTION // LEFT-TO-RIGHT DIRECTION
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_L2R); MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_L2R);
if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary) if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary)
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule && (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
@ -170,7 +170,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
if (reoClassData->firstNonTerminalPreviousSourceSpanIsAligned && if (reoClassData->firstNonTerminalPreviousSourceSpanIsAligned &&
reoClassData->firstNonTerminalFollowingSourceSpanIsAligned) { reoClassData->firstNonTerminalFollowingSourceSpanIsAligned) {
// discontinuous // discontinuous
l2rOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT; l2rOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
} else { } else {
reoClassData->firstNonTerminalIsBoundary = true; reoClassData->firstNonTerminalIsBoundary = true;
} }
@ -180,7 +180,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
// RIGHT-TO-LEFT DIRECTION // RIGHT-TO-LEFT DIRECTION
Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_R2L); MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_R2L);
if ( ((targetIndex == targetPhrase.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,targetPhrase.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary) if ( ((targetIndex == targetPhrase.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,targetPhrase.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary)
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule && (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
@ -200,7 +200,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
if (reoClassData->lastNonTerminalPreviousSourceSpanIsAligned && if (reoClassData->lastNonTerminalPreviousSourceSpanIsAligned &&
reoClassData->lastNonTerminalFollowingSourceSpanIsAligned) { reoClassData->lastNonTerminalFollowingSourceSpanIsAligned) {
// discontinuous // discontinuous
r2lOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT; r2lOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
} else { } else {
reoClassData->lastNonTerminalIsBoundary = true; reoClassData->lastNonTerminalIsBoundary = true;
} }
@ -335,25 +335,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
// LEFT-TO-RIGHT DIRECTION // LEFT-TO-RIGHT DIRECTION
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT]; MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
IFFEATUREVERBOSE(2) { IFFEATUREVERBOSE(2) {
FEATUREVERBOSE(2, "l2rOrientation "); FEATUREVERBOSE(2, "l2rOrientation ");
switch (l2rOrientation) { switch (l2rOrientation) {
case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT: case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
FEATUREVERBOSE2(2, "mono" << std::endl); FEATUREVERBOSE2(2, "mono" << std::endl);
break; break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT: case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
FEATUREVERBOSE2(2, "swap" << std::endl); FEATUREVERBOSE2(2, "swap" << std::endl);
break; break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT: case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
FEATUREVERBOSE2(2, "dleft" << std::endl); FEATUREVERBOSE2(2, "dleft" << std::endl);
break; break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT: case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
FEATUREVERBOSE2(2, "dright" << std::endl); FEATUREVERBOSE2(2, "dright" << std::endl);
break; break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN: case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR // modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl); FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
break; break;
default: default:
@ -396,23 +396,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
} else { } else {
if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) { if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
newScores[0] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono()); newScores[0] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono());
// if sub-derivation has left-boundary non-terminal: // if sub-derivation has left-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation // add recursive actual score of boundary non-terminal from subderivation
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores, accumulator); LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
} else if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) { } else if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
newScores[1] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap()); newScores[1] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
// if sub-derivation has left-boundary non-terminal: // if sub-derivation has left-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation // add recursive actual score of boundary non-terminal from subderivation
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores, accumulator); LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
} else if ( ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) || } else if ( ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) || ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) { ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous()); newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
// if sub-derivation has left-boundary non-terminal: // if sub-derivation has left-boundary non-terminal:
@ -437,25 +437,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
// RIGHT-TO-LEFT DIRECTION // RIGHT-TO-LEFT DIRECTION
Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT]; MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
IFFEATUREVERBOSE(2) { IFFEATUREVERBOSE(2) {
FEATUREVERBOSE(2, "r2lOrientation "); FEATUREVERBOSE(2, "r2lOrientation ");
switch (r2lOrientation) { switch (r2lOrientation) {
case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT: case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
FEATUREVERBOSE2(2, "mono" << std::endl); FEATUREVERBOSE2(2, "mono" << std::endl);
break; break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT: case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
FEATUREVERBOSE2(2, "swap" << std::endl); FEATUREVERBOSE2(2, "swap" << std::endl);
break; break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT: case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
FEATUREVERBOSE2(2, "dleft" << std::endl); FEATUREVERBOSE2(2, "dleft" << std::endl);
break; break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT: case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
FEATUREVERBOSE2(2, "dright" << std::endl); FEATUREVERBOSE2(2, "dright" << std::endl);
break; break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN: case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR // modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl); FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
break; break;
default: default:
@ -498,23 +498,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
} else { } else {
if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) { if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
newScores[m_offsetR2LScores+0] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono()); newScores[m_offsetR2LScores+0] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono());
// if sub-derivation has right-boundary non-terminal: // if sub-derivation has right-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation // add recursive actual score of boundary non-terminal from subderivation
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores, accumulator); RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
} else if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) { } else if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
newScores[m_offsetR2LScores+1] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap()); newScores[m_offsetR2LScores+1] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
// if sub-derivation has right-boundary non-terminal: // if sub-derivation has right-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation // add recursive actual score of boundary non-terminal from subderivation
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores, accumulator); RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
} else if ( ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) || } else if ( ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) || ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) { ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous()); newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
// if sub-derivation has right-boundary non-terminal: // if sub-derivation has right-boundary non-terminal:
@ -862,17 +862,17 @@ void PhraseOrientationFeature::SparseNonTerminalR2LScore(const Factor* nonTermin
} }
const std::string* PhraseOrientationFeature::ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const const std::string* PhraseOrientationFeature::ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const
{ {
if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) { if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
return &MORIENT; return &MORIENT;
} else if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) { } else if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
return &SORIENT; return &SORIENT;
} else if ( ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) || } else if ( ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) || ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( o == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) { ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
return &DORIENT; return &DORIENT;
} else { } else {

View File

@ -302,8 +302,8 @@ public:
struct ReoClassData { struct ReoClassData {
public: public:
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R; std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L; std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
bool firstNonTerminalIsBoundary; bool firstNonTerminalIsBoundary;
bool firstNonTerminalPreviousSourceSpanIsAligned; bool firstNonTerminalPreviousSourceSpanIsAligned;
bool firstNonTerminalFollowingSourceSpanIsAligned; bool firstNonTerminalFollowingSourceSpanIsAligned;
@ -401,7 +401,7 @@ protected:
ScoreComponentCollection* scoreBreakdown, ScoreComponentCollection* scoreBreakdown,
const std::string* o) const; const std::string* o) const;
const std::string* ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const; const std::string* ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const;
static const std::string MORIENT; static const std::string MORIENT;
static const std::string SORIENT; static const std::string SORIENT;

View File

@ -10,10 +10,6 @@
#include "ScoreFeature.h" #include "ScoreFeature.h"
#include "extract-ghkm/Node.h" #include "extract-ghkm/Node.h"
using namespace MosesTraining;
using namespace Moses;
using namespace GHKM;
namespace MosesTraining namespace MosesTraining
{ {

View File

@ -0,0 +1,12 @@
#pragma once
#include "syntax-common/tree.h"
#include "SyntaxNode.h"
namespace MosesTraining
{
typedef Syntax::Tree<SyntaxNode> SyntaxTree;
} // namespace MosesTraining

View File

@ -25,7 +25,7 @@
#include <cassert> #include <cassert>
#include <cstdlib> #include <cstdlib>
namespace Moses namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -70,4 +70,4 @@ void FlipAlignment(Alignment &a)
} }
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -23,7 +23,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
namespace Moses namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -35,5 +35,5 @@ void ReadAlignment(const std::string &, Alignment &);
void FlipAlignment(Alignment &); void FlipAlignment(Alignment &);
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -19,23 +19,24 @@
#include "AlignmentGraph.h" #include "AlignmentGraph.h"
#include "ComposedRule.h"
#include "Node.h"
#include "Options.h"
#include "ParseTree.h"
#include "Subgraph.h"
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
#include <memory> #include <memory>
#include <stack> #include <stack>
namespace Moses #include "SyntaxTree.h"
#include "ComposedRule.h"
#include "Node.h"
#include "Options.h"
#include "Subgraph.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
AlignmentGraph::AlignmentGraph(const ParseTree *t, AlignmentGraph::AlignmentGraph(const SyntaxTree *t,
const std::vector<std::string> &s, const std::vector<std::string> &s,
const Alignment &a) const Alignment &a)
{ {
@ -208,7 +209,7 @@ void AlignmentGraph::ExtractComposedRules(Node *node, const Options &options)
} }
} }
Node *AlignmentGraph::CopyParseTree(const ParseTree *root) Node *AlignmentGraph::CopyParseTree(const SyntaxTree *root)
{ {
NodeType nodeType = (root->IsLeaf()) ? TARGET : TREE; NodeType nodeType = (root->IsLeaf()) ? TARGET : TREE;
@ -218,10 +219,10 @@ Node *AlignmentGraph::CopyParseTree(const ParseTree *root)
n->SetPcfgScore(root->value().GetPcfgScore()); n->SetPcfgScore(root->value().GetPcfgScore());
} }
const std::vector<ParseTree *> &children = root->children(); const std::vector<SyntaxTree *> &children = root->children();
std::vector<Node *> childNodes; std::vector<Node *> childNodes;
childNodes.reserve(children.size()); childNodes.reserve(children.size());
for (std::vector<ParseTree *>::const_iterator p(children.begin()); for (std::vector<SyntaxTree *>::const_iterator p(children.begin());
p != children.end(); ++p) { p != children.end(); ++p) {
Node *child = CopyParseTree(*p); Node *child = CopyParseTree(*p);
child->AddParent(n.get()); child->AddParent(n.get());
@ -385,4 +386,4 @@ Node *AlignmentGraph::DetermineAttachmentPoint(int index)
} }
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -21,15 +21,16 @@
#ifndef EXTRACT_GHKM_ALIGNMENT_GRAPH_H_ #ifndef EXTRACT_GHKM_ALIGNMENT_GRAPH_H_
#define EXTRACT_GHKM_ALIGNMENT_GRAPH_H_ #define EXTRACT_GHKM_ALIGNMENT_GRAPH_H_
#include "Alignment.h"
#include "Options.h"
#include "ParseTree.h"
#include <set> #include <set>
#include <string> #include <string>
#include <vector> #include <vector>
namespace Moses #include "SyntaxTree.h"
#include "Alignment.h"
#include "Options.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -40,7 +41,7 @@ class Subgraph;
class AlignmentGraph class AlignmentGraph
{ {
public: public:
AlignmentGraph(const ParseTree *, AlignmentGraph(const SyntaxTree *,
const std::vector<std::string> &, const std::vector<std::string> &,
const Alignment &); const Alignment &);
@ -61,7 +62,7 @@ private:
AlignmentGraph(const AlignmentGraph &); AlignmentGraph(const AlignmentGraph &);
AlignmentGraph &operator=(const AlignmentGraph &); AlignmentGraph &operator=(const AlignmentGraph &);
Node *CopyParseTree(const ParseTree *); Node *CopyParseTree(const SyntaxTree *);
void ComputeFrontierSet(Node *, const Options &, std::set<Node *> &) const; void ComputeFrontierSet(Node *, const Options &, std::set<Node *> &) const;
void CalcComplementSpans(Node *); void CalcComplementSpans(Node *);
void GetTargetTreeLeaves(Node *, std::vector<Node *> &); void GetTargetTreeLeaves(Node *, std::vector<Node *> &);
@ -77,6 +78,6 @@ private:
}; };
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining
#endif #endif

View File

@ -19,15 +19,15 @@
#include "ComposedRule.h" #include "ComposedRule.h"
#include "Node.h"
#include "Options.h"
#include "Subgraph.h"
#include <set> #include <set>
#include <vector> #include <vector>
#include <queue> #include <queue>
namespace Moses #include "Node.h"
#include "Options.h"
#include "Subgraph.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -128,4 +128,4 @@ Subgraph ComposedRule::CreateSubgraph()
} }
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -21,12 +21,12 @@
#ifndef EXTRACT_GHKM_COMPOSED_RULE_H_ #ifndef EXTRACT_GHKM_COMPOSED_RULE_H_
#define EXTRACT_GHKM_COMPOSED_RULE_H_ #define EXTRACT_GHKM_COMPOSED_RULE_H_
#include "Subgraph.h"
#include <vector> #include <vector>
#include <queue> #include <queue>
namespace Moses #include "Subgraph.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -67,6 +67,6 @@ private:
}; };
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining
#endif #endif

View File

@ -23,7 +23,7 @@
#include <string> #include <string>
namespace Moses namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -41,6 +41,6 @@ private:
}; };
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining
#endif #endif

View File

@ -19,29 +19,6 @@
#include "ExtractGHKM.h" #include "ExtractGHKM.h"
#include "Alignment.h"
#include "AlignmentGraph.h"
#include "Exception.h"
#include "InputFileStream.h"
#include "Node.h"
#include "OutputFileStream.h"
#include "Options.h"
#include "ParseTree.h"
#include "PhraseOrientation.h"
#include "ScfgRule.h"
#include "ScfgRuleWriter.h"
#include "Span.h"
#include "StsgRule.h"
#include "StsgRuleWriter.h"
#include "SyntaxNode.h"
#include "SyntaxNodeCollection.h"
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
#include "XmlTreeParser.h"
#include <boost/program_options.hpp>
#include <cassert> #include <cassert>
#include <cstdlib> #include <cstdlib>
#include <fstream> #include <fstream>
@ -51,13 +28,40 @@
#include <sstream> #include <sstream>
#include <vector> #include <vector>
namespace Moses #include <boost/program_options.hpp>
#include "InputFileStream.h"
#include "OutputFileStream.h"
#include "SyntaxNode.h"
#include "SyntaxNodeCollection.h"
#include "SyntaxTree.h"
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
#include "Alignment.h"
#include "AlignmentGraph.h"
#include "Exception.h"
#include "Node.h"
#include "Options.h"
#include "PhraseOrientation.h"
#include "ScfgRule.h"
#include "ScfgRuleWriter.h"
#include "Span.h"
#include "StsgRule.h"
#include "StsgRuleWriter.h"
#include "XmlTreeParser.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
int ExtractGHKM::Main(int argc, char *argv[]) int ExtractGHKM::Main(int argc, char *argv[])
{ {
using Moses::InputFileStream;
using Moses::OutputFileStream;
// Process command-line options. // Process command-line options.
Options options; Options options;
ProcessOptions(argc, argv, options); ProcessOptions(argc, argv, options);
@ -158,7 +162,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
std::cerr << "skipping line " << lineNum << " with empty target tree\n"; std::cerr << "skipping line " << lineNum << " with empty target tree\n";
continue; continue;
} }
std::auto_ptr<ParseTree> targetParseTree; std::auto_ptr<SyntaxTree> targetParseTree;
try { try {
targetParseTree = targetXmlTreeParser.Parse(targetLine); targetParseTree = targetXmlTreeParser.Parse(targetLine);
assert(targetParseTree.get()); assert(targetParseTree.get());
@ -173,8 +177,8 @@ int ExtractGHKM::Main(int argc, char *argv[])
// Parse source tree and construct a SyntaxTree object. // Parse source tree and construct a SyntaxTree object.
MosesTraining::SyntaxNodeCollection sourceSyntaxTree; SyntaxNodeCollection sourceSyntaxTree;
MosesTraining::SyntaxNode *sourceSyntaxTreeRoot=NULL; SyntaxNode *sourceSyntaxTreeRoot=NULL;
if (options.sourceLabels) { if (options.sourceLabels) {
try { try {
@ -197,8 +201,9 @@ int ExtractGHKM::Main(int argc, char *argv[])
// Read source tokens. // Read source tokens.
std::vector<std::string> sourceTokens(ReadTokens(sourceLine)); std::vector<std::string> sourceTokens(ReadTokens(sourceLine));
// Construct a source ParseTree object from the SyntaxNodeCollection object. // Construct a source SyntaxTree object from the SyntaxNodeCollection
std::auto_ptr<ParseTree> sourceParseTree; // object.
std::auto_ptr<SyntaxTree> sourceParseTree;
if (options.sourceLabels) { if (options.sourceLabels) {
try { try {
@ -264,12 +269,12 @@ int ExtractGHKM::Main(int argc, char *argv[])
const std::vector<const Subgraph *> &rules = (*p)->GetRules(); const std::vector<const Subgraph *> &rules = (*p)->GetRules();
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation=Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN, r2lOrientation=Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN; PhraseOrientation::REO_CLASS l2rOrientation=PhraseOrientation::REO_CLASS_UNKNOWN, r2lOrientation=PhraseOrientation::REO_CLASS_UNKNOWN;
if (options.phraseOrientation && !rules.empty()) { if (options.phraseOrientation && !rules.empty()) {
int sourceSpanBegin = *((*p)->GetSpan().begin()); int sourceSpanBegin = *((*p)->GetSpan().begin());
int sourceSpanEnd = *((*p)->GetSpan().rbegin()); int sourceSpanEnd = *((*p)->GetSpan().rbegin());
l2rOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,Moses::GHKM::PhraseOrientation::REO_DIR_L2R); l2rOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,PhraseOrientation::REO_DIR_L2R);
r2lOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,Moses::GHKM::PhraseOrientation::REO_DIR_R2L); r2lOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,PhraseOrientation::REO_DIR_R2L);
// std::cerr << "span " << sourceSpanBegin << " " << sourceSpanEnd << std::endl; // std::cerr << "span " << sourceSpanBegin << " " << sourceSpanEnd << std::endl;
// std::cerr << "phraseOrientation " << phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd) << std::endl; // std::cerr << "phraseOrientation " << phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd) << std::endl;
} }
@ -310,8 +315,8 @@ int ExtractGHKM::Main(int argc, char *argv[])
fwdExtractStream << " "; fwdExtractStream << " ";
phraseOrientation.WriteOrientation(fwdExtractStream,r2lOrientation); phraseOrientation.WriteOrientation(fwdExtractStream,r2lOrientation);
fwdExtractStream << "}}"; fwdExtractStream << "}}";
phraseOrientation.IncrementPriorCount(Moses::GHKM::PhraseOrientation::REO_DIR_L2R,l2rOrientation,1); phraseOrientation.IncrementPriorCount(PhraseOrientation::REO_DIR_L2R,l2rOrientation,1);
phraseOrientation.IncrementPriorCount(Moses::GHKM::PhraseOrientation::REO_DIR_R2L,r2lOrientation,1); phraseOrientation.IncrementPriorCount(PhraseOrientation::REO_DIR_R2L,r2lOrientation,1);
} }
fwdExtractStream << std::endl; fwdExtractStream << std::endl;
invExtractStream << std::endl; invExtractStream << std::endl;
@ -400,7 +405,7 @@ void ExtractGHKM::OpenOutputFileOrDie(const std::string &filename,
} }
void ExtractGHKM::OpenOutputFileOrDie(const std::string &filename, void ExtractGHKM::OpenOutputFileOrDie(const std::string &filename,
OutputFileStream &stream) Moses::OutputFileStream &stream)
{ {
bool ret = stream.Open(filename); bool ret = stream.Open(filename);
if (!ret) { if (!ret) {
@ -823,16 +828,16 @@ void ExtractGHKM::WriteSourceLabelSet(
} }
void ExtractGHKM::CollectWordLabelCounts( void ExtractGHKM::CollectWordLabelCounts(
ParseTree &root, SyntaxTree &root,
const Options &options, const Options &options,
std::map<std::string, int> &wordCount, std::map<std::string, int> &wordCount,
std::map<std::string, std::string> &wordLabel) std::map<std::string, std::string> &wordLabel)
{ {
for (ParseTree::ConstLeafIterator p(root); for (SyntaxTree::ConstLeafIterator p(root);
p != ParseTree::ConstLeafIterator(); ++p) { p != SyntaxTree::ConstLeafIterator(); ++p) {
const ParseTree &leaf = *p; const SyntaxTree &leaf = *p;
const std::string &word = leaf.value().GetLabel(); const std::string &word = leaf.value().GetLabel();
const ParseTree *ancestor = leaf.parent(); const SyntaxTree *ancestor = leaf.parent();
// If unary rule elimination is enabled and this word is at the end of a // If unary rule elimination is enabled and this word is at the end of a
// chain of unary rewrites, e.g. // chain of unary rewrites, e.g.
// PN-SB -> NE -> word // PN-SB -> NE -> word
@ -849,12 +854,12 @@ void ExtractGHKM::CollectWordLabelCounts(
} }
} }
std::vector<std::string> ExtractGHKM::ReadTokens(const ParseTree &root) const std::vector<std::string> ExtractGHKM::ReadTokens(const SyntaxTree &root) const
{ {
std::vector<std::string> tokens; std::vector<std::string> tokens;
for (ParseTree::ConstLeafIterator p(root); for (SyntaxTree::ConstLeafIterator p(root);
p != ParseTree::ConstLeafIterator(); ++p) { p != SyntaxTree::ConstLeafIterator(); ++p) {
const ParseTree &leaf = *p; const SyntaxTree &leaf = *p;
const std::string &word = leaf.value().GetLabel(); const std::string &word = leaf.value().GetLabel();
tokens.push_back(word); tokens.push_back(word);
} }
@ -956,4 +961,4 @@ void ExtractGHKM::StripBitParLabels(
} }
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -25,13 +25,11 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "ParseTree.h" #include "OutputFileStream.h"
#include "SyntaxTree.h"
namespace Moses namespace MosesTraining
{ {
class OutputFileStream;
namespace GHKM namespace GHKM
{ {
@ -52,9 +50,9 @@ private:
void Error(const std::string &) const; void Error(const std::string &) const;
void OpenInputFileOrDie(const std::string &, std::ifstream &); void OpenInputFileOrDie(const std::string &, std::ifstream &);
void OpenOutputFileOrDie(const std::string &, std::ofstream &); void OpenOutputFileOrDie(const std::string &, std::ofstream &);
void OpenOutputFileOrDie(const std::string &, OutputFileStream &); void OpenOutputFileOrDie(const std::string &, Moses::OutputFileStream &);
void RecordTreeLabels(const ParseTree &, std::set<std::string> &); void RecordTreeLabels(const SyntaxTree &, std::set<std::string> &);
void CollectWordLabelCounts(ParseTree &, void CollectWordLabelCounts(SyntaxTree &,
const Options &, const Options &,
std::map<std::string, int> &, std::map<std::string, int> &,
std::map<std::string, std::string> &); std::map<std::string, std::string> &);
@ -78,7 +76,7 @@ private:
std::map<std::string, int> &outTopLabelSet) const; std::map<std::string, int> &outTopLabelSet) const;
std::vector<std::string> ReadTokens(const std::string &) const; std::vector<std::string> ReadTokens(const std::string &) const;
std::vector<std::string> ReadTokens(const ParseTree &root) const; std::vector<std::string> ReadTokens(const SyntaxTree &root) const;
void ProcessOptions(int, char *[], Options &) const; void ProcessOptions(int, char *[], Options &) const;
@ -86,5 +84,4 @@ private:
}; };
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -21,6 +21,6 @@
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
Moses::GHKM::ExtractGHKM tool; MosesTraining::GHKM::ExtractGHKM tool;
return tool.Main(argc, argv); return tool.Main(argc, argv);
} }

View File

@ -21,7 +21,7 @@
#include "Subgraph.h" #include "Subgraph.h"
namespace Moses namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -70,4 +70,4 @@ void Node::GetTargetWords(std::vector<std::string> &targetWords) const
} }
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -21,14 +21,14 @@
#ifndef EXTRACT_GHKM_NODE_H_ #ifndef EXTRACT_GHKM_NODE_H_
#define EXTRACT_GHKM_NODE_H_ #define EXTRACT_GHKM_NODE_H_
#include "Span.h"
#include <cassert> #include <cassert>
#include <iterator> #include <iterator>
#include <string> #include <string>
#include <vector> #include <vector>
namespace Moses #include "Span.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -215,6 +215,6 @@ Node *Node::LowestCommonAncestor(InputIterator first, InputIterator last)
} }
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining
#endif #endif

View File

@ -21,7 +21,7 @@
#include <string> #include <string>
namespace Moses namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -89,5 +89,5 @@ public:
}; };
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -1,38 +0,0 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#ifndef EXTRACT_GHKM_PARSE_TREE_H_
#define EXTRACT_GHKM_PARSE_TREE_H_
#include "syntax-common/tree.h"
#include "SyntaxNode.h"
namespace Moses
{
namespace GHKM
{
typedef MosesTraining::Syntax::Tree<MosesTraining::SyntaxNode> ParseTree;
} // namespace GHKM
} // namespace Moses
#endif

View File

@ -26,7 +26,7 @@
#include <boost/assign/list_of.hpp> #include <boost/assign/list_of.hpp>
namespace Moses namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -469,5 +469,5 @@ void PhraseOrientation::WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE
} }
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -1,4 +1,3 @@
/*********************************************************************** /***********************************************************************
Moses - statistical machine translation system Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh Copyright (C) 2006-2011 University of Edinburgh
@ -20,16 +19,18 @@
#pragma once #pragma once
#include "Alignment.h"
#include "moses/AlignmentInfo.h"
#include <map> #include <map>
#include <set> #include <set>
#include <string> #include <string>
#include <vector> #include <vector>
#include <boost/unordered_map.hpp> #include <boost/unordered_map.hpp>
namespace Moses #include "moses/AlignmentInfo.h"
#include "Alignment.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -53,8 +54,8 @@ public:
PhraseOrientation(int sourceSize, PhraseOrientation(int sourceSize,
int targetSize, int targetSize,
const AlignmentInfo &alignTerm, const Moses::AlignmentInfo &alignTerm,
const AlignmentInfo &alignNonTerm); const Moses::AlignmentInfo &alignNonTerm);
REO_CLASS GetOrientationInfo(int startF, int endF, REO_DIR direction) const; REO_CLASS GetOrientationInfo(int startF, int endF, REO_DIR direction) const;
REO_CLASS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const; REO_CLASS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const;
@ -119,5 +120,4 @@ private:
}; };
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -3,7 +3,7 @@
#include "Node.h" #include "Node.h"
#include "Subgraph.h" #include "Subgraph.h"
namespace Moses namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -38,4 +38,4 @@ bool Rule::PartitionOrderComp(const Node *a, const Node *b)
} }
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -7,7 +7,7 @@
#include "Alignment.h" #include "Alignment.h"
namespace Moses namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -54,6 +54,6 @@ protected:
}; };
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining
#endif #endif

View File

@ -26,13 +26,13 @@
#include "SyntaxNode.h" #include "SyntaxNode.h"
#include "SyntaxNodeCollection.h" #include "SyntaxNodeCollection.h"
namespace Moses namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
ScfgRule::ScfgRule(const Subgraph &fragment, ScfgRule::ScfgRule(const Subgraph &fragment,
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree) const SyntaxNodeCollection *sourceSyntaxTree)
: m_graphFragment(fragment) : m_graphFragment(fragment)
, m_sourceLHS("X", NonTerminal) , m_sourceLHS("X", NonTerminal)
, m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal) , m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal)
@ -134,13 +134,13 @@ ScfgRule::ScfgRule(const Subgraph &fragment,
} }
} }
void ScfgRule::PushSourceLabel( void ScfgRule::PushSourceLabel(const SyntaxNodeCollection *sourceSyntaxTree,
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree, const Node *node,
const Node *node, const std::string &nonMatchingLabel) const std::string &nonMatchingLabel)
{ {
ContiguousSpan span = Closure(node->GetSpan()); ContiguousSpan span = Closure(node->GetSpan());
if (sourceSyntaxTree->HasNode(span.first,span.second)) { // does a source constituent match the span? if (sourceSyntaxTree->HasNode(span.first,span.second)) { // does a source constituent match the span?
std::vector<MosesTraining::SyntaxNode*> sourceLabels = std::vector<SyntaxNode*> sourceLabels =
sourceSyntaxTree->GetNodes(span.first,span.second); sourceSyntaxTree->GetNodes(span.first,span.second);
if (!sourceLabels.empty()) { if (!sourceLabels.empty()) {
// store the topmost matching label from the source syntax tree // store the topmost matching label from the source syntax tree
@ -197,4 +197,4 @@ void ScfgRule::UpdateSourceLabelCoocCounts(std::map< std::string, std::map<std::
} }
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -29,7 +29,7 @@
#include "Rule.h" #include "Rule.h"
#include "SyntaxNodeCollection.h" #include "SyntaxNodeCollection.h"
namespace Moses namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -41,7 +41,7 @@ class ScfgRule : public Rule
{ {
public: public:
ScfgRule(const Subgraph &fragment, ScfgRule(const Subgraph &fragment,
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree = 0); const SyntaxNodeCollection *sourceSyntaxTree = 0);
const Subgraph &GetGraphFragment() const { const Subgraph &GetGraphFragment() const {
return m_graphFragment; return m_graphFragment;
@ -78,8 +78,7 @@ public:
} }
private: private:
void PushSourceLabel( void PushSourceLabel(const SyntaxNodeCollection *sourceSyntaxTree,
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree,
const Node *node, const std::string &nonMatchingLabel); const Node *node, const std::string &nonMatchingLabel);
const Subgraph& m_graphFragment; const Subgraph& m_graphFragment;
@ -94,4 +93,4 @@ private:
}; };
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -19,10 +19,6 @@
#include "ScfgRuleWriter.h" #include "ScfgRuleWriter.h"
#include "Alignment.h"
#include "Options.h"
#include "ScfgRule.h"
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <ostream> #include <ostream>
@ -30,7 +26,11 @@
#include <sstream> #include <sstream>
#include <vector> #include <vector>
namespace Moses #include "Alignment.h"
#include "Options.h"
#include "ScfgRule.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -229,4 +229,4 @@ void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out)
} }
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -19,11 +19,11 @@
#pragma once #pragma once
#include "Subgraph.h"
#include <ostream> #include <ostream>
namespace Moses #include "Subgraph.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -57,5 +57,5 @@ private:
}; };
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -19,7 +19,7 @@
#include "Span.h" #include "Span.h"
namespace Moses namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -45,4 +45,4 @@ ContiguousSpan Closure(const Span &s)
} }
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -24,7 +24,7 @@
#include <map> #include <map>
#include <set> #include <set>
namespace Moses namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -36,7 +36,7 @@ bool SpansIntersect(const Span &, const ContiguousSpan &);
ContiguousSpan Closure(const Span &); ContiguousSpan Closure(const Span &);
} // namespace Moses } // namespace MosesTraining
} // namespace GHKM } // namespace GHKM
#endif #endif

View File

@ -1,11 +1,11 @@
#include "StsgRule.h" #include "StsgRule.h"
#include <algorithm>
#include "Node.h" #include "Node.h"
#include "Subgraph.h" #include "Subgraph.h"
#include <algorithm> namespace MosesTraining
namespace Moses
{ {
namespace GHKM namespace GHKM
{ {
@ -91,4 +91,4 @@ StsgRule::StsgRule(const Subgraph &fragment)
} }
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -2,12 +2,12 @@
#ifndef EXTRACT_GHKM_STSG_RULE_H_ #ifndef EXTRACT_GHKM_STSG_RULE_H_
#define EXTRACT_GHKM_STSG_RULE_H_ #define EXTRACT_GHKM_STSG_RULE_H_
#include <vector>
#include "Rule.h" #include "Rule.h"
#include "Subgraph.h" #include "Subgraph.h"
#include <vector> namespace MosesTraining
namespace Moses
{ {
namespace GHKM namespace GHKM
{ {
@ -39,6 +39,6 @@ private:
}; };
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining
#endif #endif

View File

@ -1,9 +1,5 @@
#include "StsgRuleWriter.h" #include "StsgRuleWriter.h"
#include "Alignment.h"
#include "Options.h"
#include "StsgRule.h"
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <ostream> #include <ostream>
@ -11,7 +7,11 @@
#include <sstream> #include <sstream>
#include <vector> #include <vector>
namespace Moses #include "Alignment.h"
#include "Options.h"
#include "StsgRule.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -92,4 +92,4 @@ void StsgRuleWriter::Write(const StsgRule &rule)
} }
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -2,11 +2,11 @@
#ifndef EXTRACT_GHKM_STSG_RULE_WRITER_H_ #ifndef EXTRACT_GHKM_STSG_RULE_WRITER_H_
#define EXTRACT_GHKM_STSG_RULE_WRITER_H_ #define EXTRACT_GHKM_STSG_RULE_WRITER_H_
#include "Subgraph.h"
#include <ostream> #include <ostream>
namespace Moses #include "Subgraph.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -36,6 +36,6 @@ private:
}; };
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining
#endif #endif

View File

@ -18,10 +18,11 @@
***********************************************************************/ ***********************************************************************/
#include <iostream> #include <iostream>
#include "Subgraph.h"
#include "Node.h"
namespace Moses #include "Node.h"
#include "Subgraph.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -193,5 +194,5 @@ void Subgraph::RecursivelyGetPartsOfSpeech(const Node *n, std::vector<std::strin
} }
} }
} // namespace Moses } // namespace MosesTraining
} // namespace GHKM } // namespace GHKM

View File

@ -19,12 +19,12 @@
#pragma once #pragma once
#include "Node.h"
#include <set> #include <set>
#include <vector> #include <vector>
namespace Moses #include "Node.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -137,5 +137,5 @@ private:
}; };
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -19,18 +19,17 @@
#include "XmlTreeParser.h" #include "XmlTreeParser.h"
#include "ParseTree.h"
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
#include "util/tokenize.hh"
#include <cassert> #include <cassert>
#include <vector> #include <vector>
using namespace MosesTraining; #include "util/tokenize.hh"
namespace Moses #include "SyntaxTree.h"
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
@ -42,7 +41,7 @@ XmlTreeParser::XmlTreeParser(std::set<std::string> &labelSet,
{ {
} }
std::auto_ptr<ParseTree> XmlTreeParser::Parse(const std::string &line) std::auto_ptr<SyntaxTree> XmlTreeParser::Parse(const std::string &line)
{ {
m_line = line; m_line = line;
m_tree.Clear(); m_tree.Clear();
@ -61,12 +60,12 @@ std::auto_ptr<ParseTree> XmlTreeParser::Parse(const std::string &line)
return ConvertTree(*root, m_words); return ConvertTree(*root, m_words);
} }
// Converts a SyntaxNode tree to a Moses::GHKM::ParseTree. // Converts a SyntaxNode tree to a MosesTraining::GHKM::SyntaxTree.
std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree( std::auto_ptr<SyntaxTree> XmlTreeParser::ConvertTree(
const SyntaxNode &tree, const SyntaxNode &tree,
const std::vector<std::string> &words) const std::vector<std::string> &words)
{ {
std::auto_ptr<ParseTree> root(new ParseTree(tree)); std::auto_ptr<SyntaxTree> root(new SyntaxTree(tree));
const std::vector<SyntaxNode*> &children = tree.GetChildren(); const std::vector<SyntaxNode*> &children = tree.GetChildren();
if (children.empty()) { if (children.empty()) {
if (tree.GetStart() != tree.GetEnd()) { if (tree.GetStart() != tree.GetEnd()) {
@ -76,14 +75,14 @@ std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
throw Exception(msg.str()); throw Exception(msg.str());
} }
SyntaxNode value(tree.GetStart(), tree.GetStart(), words[tree.GetStart()]); SyntaxNode value(tree.GetStart(), tree.GetStart(), words[tree.GetStart()]);
std::auto_ptr<ParseTree> leaf(new ParseTree(value)); std::auto_ptr<SyntaxTree> leaf(new SyntaxTree(value));
leaf->parent() = root.get(); leaf->parent() = root.get();
root->children().push_back(leaf.release()); root->children().push_back(leaf.release());
} else { } else {
for (std::vector<SyntaxNode*>::const_iterator p = children.begin(); for (std::vector<SyntaxNode*>::const_iterator p = children.begin();
p != children.end(); ++p) { p != children.end(); ++p) {
assert(*p); assert(*p);
std::auto_ptr<ParseTree> child = ConvertTree(**p, words); std::auto_ptr<SyntaxTree> child = ConvertTree(**p, words);
child->parent() = root.get(); child->parent() = root.get();
root->children().push_back(child.release()); root->children().push_back(child.release());
} }
@ -92,4 +91,4 @@ std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
} }
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining

View File

@ -21,32 +21,32 @@
#ifndef EXTRACT_GHKM_XML_TREE_PARSER_H_ #ifndef EXTRACT_GHKM_XML_TREE_PARSER_H_
#define EXTRACT_GHKM_XML_TREE_PARSER_H_ #define EXTRACT_GHKM_XML_TREE_PARSER_H_
#include "Exception.h"
#include <map> #include <map>
#include <memory> #include <memory>
#include <set> #include <set>
#include <string> #include <string>
#include <vector> #include <vector>
#include "ParseTree.h"
#include "SyntaxNode.h" #include "SyntaxNode.h"
#include "SyntaxNodeCollection.h" #include "SyntaxNodeCollection.h"
#include "SyntaxTree.h"
namespace Moses #include "Exception.h"
namespace MosesTraining
{ {
namespace GHKM namespace GHKM
{ {
// Parses a string in Moses' XML parse tree format and returns a ParseTree // Parses a string in Moses' XML parse tree format and returns a SyntaxTree
// object. // object.
class XmlTreeParser class XmlTreeParser
{ {
public: public:
XmlTreeParser(std::set<std::string> &, std::map<std::string, int> &); XmlTreeParser(std::set<std::string> &, std::map<std::string, int> &);
std::auto_ptr<ParseTree> Parse(const std::string &); std::auto_ptr<SyntaxTree> Parse(const std::string &);
static std::auto_ptr<ParseTree> ConvertTree(const MosesTraining::SyntaxNode &, static std::auto_ptr<SyntaxTree> ConvertTree(const SyntaxNode &,
const std::vector<std::string> &); const std::vector<std::string> &);
const std::vector<std::string>& GetWords() { const std::vector<std::string>& GetWords() {
@ -58,11 +58,11 @@ private:
std::set<std::string> &m_labelSet; std::set<std::string> &m_labelSet;
std::map<std::string, int> &m_topLabelSet; std::map<std::string, int> &m_topLabelSet;
std::string m_line; std::string m_line;
MosesTraining::SyntaxNodeCollection m_tree; SyntaxNodeCollection m_tree;
std::vector<std::string> m_words; std::vector<std::string> m_words;
}; };
} // namespace GHKM } // namespace GHKM
} // namespace Moses } // namespace MosesTraining
#endif #endif