mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
Ongoing moses/phrase-extract refactoring
This commit is contained in:
parent
f61091e38d
commit
f37415a259
@ -134,7 +134,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
|||||||
if (targetPhrase.GetAlignNonTerm().GetSize() != 0) {
|
if (targetPhrase.GetAlignNonTerm().GetSize() != 0) {
|
||||||
|
|
||||||
// Initialize phrase orientation scoring object
|
// Initialize phrase orientation scoring object
|
||||||
Moses::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
|
MosesTraining::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
|
||||||
targetPhrase.GetAlignTerm(), targetPhrase.GetAlignNonTerm());
|
targetPhrase.GetAlignTerm(), targetPhrase.GetAlignNonTerm());
|
||||||
|
|
||||||
PhraseOrientationFeature::ReoClassData* reoClassData = new PhraseOrientationFeature::ReoClassData();
|
PhraseOrientationFeature::ReoClassData* reoClassData = new PhraseOrientationFeature::ReoClassData();
|
||||||
@ -150,7 +150,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
|||||||
|
|
||||||
// LEFT-TO-RIGHT DIRECTION
|
// LEFT-TO-RIGHT DIRECTION
|
||||||
|
|
||||||
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_L2R);
|
MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_L2R);
|
||||||
|
|
||||||
if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary)
|
if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary)
|
||||||
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
|
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
|
||||||
@ -170,7 +170,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
|||||||
if (reoClassData->firstNonTerminalPreviousSourceSpanIsAligned &&
|
if (reoClassData->firstNonTerminalPreviousSourceSpanIsAligned &&
|
||||||
reoClassData->firstNonTerminalFollowingSourceSpanIsAligned) {
|
reoClassData->firstNonTerminalFollowingSourceSpanIsAligned) {
|
||||||
// discontinuous
|
// discontinuous
|
||||||
l2rOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
l2rOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||||
} else {
|
} else {
|
||||||
reoClassData->firstNonTerminalIsBoundary = true;
|
reoClassData->firstNonTerminalIsBoundary = true;
|
||||||
}
|
}
|
||||||
@ -180,7 +180,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
|||||||
|
|
||||||
// RIGHT-TO-LEFT DIRECTION
|
// RIGHT-TO-LEFT DIRECTION
|
||||||
|
|
||||||
Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_R2L);
|
MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_R2L);
|
||||||
|
|
||||||
if ( ((targetIndex == targetPhrase.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,targetPhrase.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary)
|
if ( ((targetIndex == targetPhrase.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,targetPhrase.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary)
|
||||||
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
|
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
|
||||||
@ -200,7 +200,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
|||||||
if (reoClassData->lastNonTerminalPreviousSourceSpanIsAligned &&
|
if (reoClassData->lastNonTerminalPreviousSourceSpanIsAligned &&
|
||||||
reoClassData->lastNonTerminalFollowingSourceSpanIsAligned) {
|
reoClassData->lastNonTerminalFollowingSourceSpanIsAligned) {
|
||||||
// discontinuous
|
// discontinuous
|
||||||
r2lOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
r2lOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||||
} else {
|
} else {
|
||||||
reoClassData->lastNonTerminalIsBoundary = true;
|
reoClassData->lastNonTerminalIsBoundary = true;
|
||||||
}
|
}
|
||||||
@ -335,25 +335,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
|||||||
|
|
||||||
// LEFT-TO-RIGHT DIRECTION
|
// LEFT-TO-RIGHT DIRECTION
|
||||||
|
|
||||||
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
|
MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
|
||||||
|
|
||||||
IFFEATUREVERBOSE(2) {
|
IFFEATUREVERBOSE(2) {
|
||||||
FEATUREVERBOSE(2, "l2rOrientation ");
|
FEATUREVERBOSE(2, "l2rOrientation ");
|
||||||
switch (l2rOrientation) {
|
switch (l2rOrientation) {
|
||||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||||
FEATUREVERBOSE2(2, "mono" << std::endl);
|
FEATUREVERBOSE2(2, "mono" << std::endl);
|
||||||
break;
|
break;
|
||||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||||
FEATUREVERBOSE2(2, "swap" << std::endl);
|
FEATUREVERBOSE2(2, "swap" << std::endl);
|
||||||
break;
|
break;
|
||||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||||
FEATUREVERBOSE2(2, "dleft" << std::endl);
|
FEATUREVERBOSE2(2, "dleft" << std::endl);
|
||||||
break;
|
break;
|
||||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||||
FEATUREVERBOSE2(2, "dright" << std::endl);
|
FEATUREVERBOSE2(2, "dright" << std::endl);
|
||||||
break;
|
break;
|
||||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||||
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
|
// modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
|
||||||
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
|
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -396,23 +396,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
|||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||||
|
|
||||||
newScores[0] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono());
|
newScores[0] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono());
|
||||||
// if sub-derivation has left-boundary non-terminal:
|
// if sub-derivation has left-boundary non-terminal:
|
||||||
// add recursive actual score of boundary non-terminal from subderivation
|
// add recursive actual score of boundary non-terminal from subderivation
|
||||||
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
|
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
|
||||||
|
|
||||||
} else if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
} else if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||||
|
|
||||||
newScores[1] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
|
newScores[1] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
|
||||||
// if sub-derivation has left-boundary non-terminal:
|
// if sub-derivation has left-boundary non-terminal:
|
||||||
// add recursive actual score of boundary non-terminal from subderivation
|
// add recursive actual score of boundary non-terminal from subderivation
|
||||||
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
|
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
|
||||||
|
|
||||||
} else if ( ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
} else if ( ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||||
( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||||
( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||||
|
|
||||||
newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
|
newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
|
||||||
// if sub-derivation has left-boundary non-terminal:
|
// if sub-derivation has left-boundary non-terminal:
|
||||||
@ -437,25 +437,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
|||||||
|
|
||||||
// RIGHT-TO-LEFT DIRECTION
|
// RIGHT-TO-LEFT DIRECTION
|
||||||
|
|
||||||
Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
|
MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
|
||||||
|
|
||||||
IFFEATUREVERBOSE(2) {
|
IFFEATUREVERBOSE(2) {
|
||||||
FEATUREVERBOSE(2, "r2lOrientation ");
|
FEATUREVERBOSE(2, "r2lOrientation ");
|
||||||
switch (r2lOrientation) {
|
switch (r2lOrientation) {
|
||||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||||
FEATUREVERBOSE2(2, "mono" << std::endl);
|
FEATUREVERBOSE2(2, "mono" << std::endl);
|
||||||
break;
|
break;
|
||||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||||
FEATUREVERBOSE2(2, "swap" << std::endl);
|
FEATUREVERBOSE2(2, "swap" << std::endl);
|
||||||
break;
|
break;
|
||||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||||
FEATUREVERBOSE2(2, "dleft" << std::endl);
|
FEATUREVERBOSE2(2, "dleft" << std::endl);
|
||||||
break;
|
break;
|
||||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||||
FEATUREVERBOSE2(2, "dright" << std::endl);
|
FEATUREVERBOSE2(2, "dright" << std::endl);
|
||||||
break;
|
break;
|
||||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||||
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
|
// modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
|
||||||
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
|
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -498,23 +498,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
|||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||||
|
|
||||||
newScores[m_offsetR2LScores+0] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono());
|
newScores[m_offsetR2LScores+0] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono());
|
||||||
// if sub-derivation has right-boundary non-terminal:
|
// if sub-derivation has right-boundary non-terminal:
|
||||||
// add recursive actual score of boundary non-terminal from subderivation
|
// add recursive actual score of boundary non-terminal from subderivation
|
||||||
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
|
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
|
||||||
|
|
||||||
} else if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
} else if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||||
|
|
||||||
newScores[m_offsetR2LScores+1] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
|
newScores[m_offsetR2LScores+1] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
|
||||||
// if sub-derivation has right-boundary non-terminal:
|
// if sub-derivation has right-boundary non-terminal:
|
||||||
// add recursive actual score of boundary non-terminal from subderivation
|
// add recursive actual score of boundary non-terminal from subderivation
|
||||||
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
|
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
|
||||||
|
|
||||||
} else if ( ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
} else if ( ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||||
( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||||
( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||||
|
|
||||||
newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
|
newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
|
||||||
// if sub-derivation has right-boundary non-terminal:
|
// if sub-derivation has right-boundary non-terminal:
|
||||||
@ -862,17 +862,17 @@ void PhraseOrientationFeature::SparseNonTerminalR2LScore(const Factor* nonTermin
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const std::string* PhraseOrientationFeature::ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const
|
const std::string* PhraseOrientationFeature::ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const
|
||||||
{
|
{
|
||||||
if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||||
return &MORIENT;
|
return &MORIENT;
|
||||||
|
|
||||||
} else if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
} else if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||||
return &SORIENT;
|
return &SORIENT;
|
||||||
|
|
||||||
} else if ( ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
} else if ( ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||||
( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||||
( o == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||||
return &DORIENT;
|
return &DORIENT;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -302,8 +302,8 @@ public:
|
|||||||
|
|
||||||
struct ReoClassData {
|
struct ReoClassData {
|
||||||
public:
|
public:
|
||||||
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
|
std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
|
||||||
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
|
std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
|
||||||
bool firstNonTerminalIsBoundary;
|
bool firstNonTerminalIsBoundary;
|
||||||
bool firstNonTerminalPreviousSourceSpanIsAligned;
|
bool firstNonTerminalPreviousSourceSpanIsAligned;
|
||||||
bool firstNonTerminalFollowingSourceSpanIsAligned;
|
bool firstNonTerminalFollowingSourceSpanIsAligned;
|
||||||
@ -401,7 +401,7 @@ protected:
|
|||||||
ScoreComponentCollection* scoreBreakdown,
|
ScoreComponentCollection* scoreBreakdown,
|
||||||
const std::string* o) const;
|
const std::string* o) const;
|
||||||
|
|
||||||
const std::string* ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const;
|
const std::string* ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const;
|
||||||
|
|
||||||
static const std::string MORIENT;
|
static const std::string MORIENT;
|
||||||
static const std::string SORIENT;
|
static const std::string SORIENT;
|
||||||
|
@ -10,10 +10,6 @@
|
|||||||
#include "ScoreFeature.h"
|
#include "ScoreFeature.h"
|
||||||
#include "extract-ghkm/Node.h"
|
#include "extract-ghkm/Node.h"
|
||||||
|
|
||||||
using namespace MosesTraining;
|
|
||||||
using namespace Moses;
|
|
||||||
using namespace GHKM;
|
|
||||||
|
|
||||||
namespace MosesTraining
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
|
|
||||||
|
12
phrase-extract/SyntaxTree.h
Normal file
12
phrase-extract/SyntaxTree.h
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "syntax-common/tree.h"
|
||||||
|
|
||||||
|
#include "SyntaxNode.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
|
{
|
||||||
|
|
||||||
|
typedef Syntax::Tree<SyntaxNode> SyntaxTree;
|
||||||
|
|
||||||
|
} // namespace MosesTraining
|
@ -25,7 +25,7 @@
|
|||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
|
||||||
namespace Moses
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -70,4 +70,4 @@ void FlipAlignment(Alignment &a)
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace Moses
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -35,5 +35,5 @@ void ReadAlignment(const std::string &, Alignment &);
|
|||||||
void FlipAlignment(Alignment &);
|
void FlipAlignment(Alignment &);
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
|
@ -19,23 +19,24 @@
|
|||||||
|
|
||||||
#include "AlignmentGraph.h"
|
#include "AlignmentGraph.h"
|
||||||
|
|
||||||
#include "ComposedRule.h"
|
|
||||||
#include "Node.h"
|
|
||||||
#include "Options.h"
|
|
||||||
#include "ParseTree.h"
|
|
||||||
#include "Subgraph.h"
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <stack>
|
#include <stack>
|
||||||
|
|
||||||
namespace Moses
|
#include "SyntaxTree.h"
|
||||||
|
|
||||||
|
#include "ComposedRule.h"
|
||||||
|
#include "Node.h"
|
||||||
|
#include "Options.h"
|
||||||
|
#include "Subgraph.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
|
|
||||||
AlignmentGraph::AlignmentGraph(const ParseTree *t,
|
AlignmentGraph::AlignmentGraph(const SyntaxTree *t,
|
||||||
const std::vector<std::string> &s,
|
const std::vector<std::string> &s,
|
||||||
const Alignment &a)
|
const Alignment &a)
|
||||||
{
|
{
|
||||||
@ -208,7 +209,7 @@ void AlignmentGraph::ExtractComposedRules(Node *node, const Options &options)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Node *AlignmentGraph::CopyParseTree(const ParseTree *root)
|
Node *AlignmentGraph::CopyParseTree(const SyntaxTree *root)
|
||||||
{
|
{
|
||||||
NodeType nodeType = (root->IsLeaf()) ? TARGET : TREE;
|
NodeType nodeType = (root->IsLeaf()) ? TARGET : TREE;
|
||||||
|
|
||||||
@ -218,10 +219,10 @@ Node *AlignmentGraph::CopyParseTree(const ParseTree *root)
|
|||||||
n->SetPcfgScore(root->value().GetPcfgScore());
|
n->SetPcfgScore(root->value().GetPcfgScore());
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<ParseTree *> &children = root->children();
|
const std::vector<SyntaxTree *> &children = root->children();
|
||||||
std::vector<Node *> childNodes;
|
std::vector<Node *> childNodes;
|
||||||
childNodes.reserve(children.size());
|
childNodes.reserve(children.size());
|
||||||
for (std::vector<ParseTree *>::const_iterator p(children.begin());
|
for (std::vector<SyntaxTree *>::const_iterator p(children.begin());
|
||||||
p != children.end(); ++p) {
|
p != children.end(); ++p) {
|
||||||
Node *child = CopyParseTree(*p);
|
Node *child = CopyParseTree(*p);
|
||||||
child->AddParent(n.get());
|
child->AddParent(n.get());
|
||||||
@ -385,4 +386,4 @@ Node *AlignmentGraph::DetermineAttachmentPoint(int index)
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
@ -21,15 +21,16 @@
|
|||||||
#ifndef EXTRACT_GHKM_ALIGNMENT_GRAPH_H_
|
#ifndef EXTRACT_GHKM_ALIGNMENT_GRAPH_H_
|
||||||
#define EXTRACT_GHKM_ALIGNMENT_GRAPH_H_
|
#define EXTRACT_GHKM_ALIGNMENT_GRAPH_H_
|
||||||
|
|
||||||
#include "Alignment.h"
|
|
||||||
#include "Options.h"
|
|
||||||
#include "ParseTree.h"
|
|
||||||
|
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace Moses
|
#include "SyntaxTree.h"
|
||||||
|
|
||||||
|
#include "Alignment.h"
|
||||||
|
#include "Options.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -40,7 +41,7 @@ class Subgraph;
|
|||||||
class AlignmentGraph
|
class AlignmentGraph
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
AlignmentGraph(const ParseTree *,
|
AlignmentGraph(const SyntaxTree *,
|
||||||
const std::vector<std::string> &,
|
const std::vector<std::string> &,
|
||||||
const Alignment &);
|
const Alignment &);
|
||||||
|
|
||||||
@ -61,7 +62,7 @@ private:
|
|||||||
AlignmentGraph(const AlignmentGraph &);
|
AlignmentGraph(const AlignmentGraph &);
|
||||||
AlignmentGraph &operator=(const AlignmentGraph &);
|
AlignmentGraph &operator=(const AlignmentGraph &);
|
||||||
|
|
||||||
Node *CopyParseTree(const ParseTree *);
|
Node *CopyParseTree(const SyntaxTree *);
|
||||||
void ComputeFrontierSet(Node *, const Options &, std::set<Node *> &) const;
|
void ComputeFrontierSet(Node *, const Options &, std::set<Node *> &) const;
|
||||||
void CalcComplementSpans(Node *);
|
void CalcComplementSpans(Node *);
|
||||||
void GetTargetTreeLeaves(Node *, std::vector<Node *> &);
|
void GetTargetTreeLeaves(Node *, std::vector<Node *> &);
|
||||||
@ -77,6 +78,6 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -19,15 +19,15 @@
|
|||||||
|
|
||||||
#include "ComposedRule.h"
|
#include "ComposedRule.h"
|
||||||
|
|
||||||
#include "Node.h"
|
|
||||||
#include "Options.h"
|
|
||||||
#include "Subgraph.h"
|
|
||||||
|
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
|
||||||
namespace Moses
|
#include "Node.h"
|
||||||
|
#include "Options.h"
|
||||||
|
#include "Subgraph.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -128,4 +128,4 @@ Subgraph ComposedRule::CreateSubgraph()
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
@ -21,12 +21,12 @@
|
|||||||
#ifndef EXTRACT_GHKM_COMPOSED_RULE_H_
|
#ifndef EXTRACT_GHKM_COMPOSED_RULE_H_
|
||||||
#define EXTRACT_GHKM_COMPOSED_RULE_H_
|
#define EXTRACT_GHKM_COMPOSED_RULE_H_
|
||||||
|
|
||||||
#include "Subgraph.h"
|
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
|
||||||
namespace Moses
|
#include "Subgraph.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -67,6 +67,6 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace Moses
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -41,6 +41,6 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -19,29 +19,6 @@
|
|||||||
|
|
||||||
#include "ExtractGHKM.h"
|
#include "ExtractGHKM.h"
|
||||||
|
|
||||||
#include "Alignment.h"
|
|
||||||
#include "AlignmentGraph.h"
|
|
||||||
#include "Exception.h"
|
|
||||||
#include "InputFileStream.h"
|
|
||||||
#include "Node.h"
|
|
||||||
#include "OutputFileStream.h"
|
|
||||||
#include "Options.h"
|
|
||||||
#include "ParseTree.h"
|
|
||||||
#include "PhraseOrientation.h"
|
|
||||||
#include "ScfgRule.h"
|
|
||||||
#include "ScfgRuleWriter.h"
|
|
||||||
#include "Span.h"
|
|
||||||
#include "StsgRule.h"
|
|
||||||
#include "StsgRuleWriter.h"
|
|
||||||
#include "SyntaxNode.h"
|
|
||||||
#include "SyntaxNodeCollection.h"
|
|
||||||
#include "tables-core.h"
|
|
||||||
#include "XmlException.h"
|
|
||||||
#include "XmlTree.h"
|
|
||||||
#include "XmlTreeParser.h"
|
|
||||||
|
|
||||||
#include <boost/program_options.hpp>
|
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
@ -51,13 +28,40 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace Moses
|
#include <boost/program_options.hpp>
|
||||||
|
|
||||||
|
#include "InputFileStream.h"
|
||||||
|
#include "OutputFileStream.h"
|
||||||
|
#include "SyntaxNode.h"
|
||||||
|
#include "SyntaxNodeCollection.h"
|
||||||
|
#include "SyntaxTree.h"
|
||||||
|
#include "tables-core.h"
|
||||||
|
#include "XmlException.h"
|
||||||
|
#include "XmlTree.h"
|
||||||
|
|
||||||
|
#include "Alignment.h"
|
||||||
|
#include "AlignmentGraph.h"
|
||||||
|
#include "Exception.h"
|
||||||
|
#include "Node.h"
|
||||||
|
#include "Options.h"
|
||||||
|
#include "PhraseOrientation.h"
|
||||||
|
#include "ScfgRule.h"
|
||||||
|
#include "ScfgRuleWriter.h"
|
||||||
|
#include "Span.h"
|
||||||
|
#include "StsgRule.h"
|
||||||
|
#include "StsgRuleWriter.h"
|
||||||
|
#include "XmlTreeParser.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
|
|
||||||
int ExtractGHKM::Main(int argc, char *argv[])
|
int ExtractGHKM::Main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
using Moses::InputFileStream;
|
||||||
|
using Moses::OutputFileStream;
|
||||||
|
|
||||||
// Process command-line options.
|
// Process command-line options.
|
||||||
Options options;
|
Options options;
|
||||||
ProcessOptions(argc, argv, options);
|
ProcessOptions(argc, argv, options);
|
||||||
@ -158,7 +162,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
|
|||||||
std::cerr << "skipping line " << lineNum << " with empty target tree\n";
|
std::cerr << "skipping line " << lineNum << " with empty target tree\n";
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
std::auto_ptr<ParseTree> targetParseTree;
|
std::auto_ptr<SyntaxTree> targetParseTree;
|
||||||
try {
|
try {
|
||||||
targetParseTree = targetXmlTreeParser.Parse(targetLine);
|
targetParseTree = targetXmlTreeParser.Parse(targetLine);
|
||||||
assert(targetParseTree.get());
|
assert(targetParseTree.get());
|
||||||
@ -173,8 +177,8 @@ int ExtractGHKM::Main(int argc, char *argv[])
|
|||||||
|
|
||||||
|
|
||||||
// Parse source tree and construct a SyntaxTree object.
|
// Parse source tree and construct a SyntaxTree object.
|
||||||
MosesTraining::SyntaxNodeCollection sourceSyntaxTree;
|
SyntaxNodeCollection sourceSyntaxTree;
|
||||||
MosesTraining::SyntaxNode *sourceSyntaxTreeRoot=NULL;
|
SyntaxNode *sourceSyntaxTreeRoot=NULL;
|
||||||
|
|
||||||
if (options.sourceLabels) {
|
if (options.sourceLabels) {
|
||||||
try {
|
try {
|
||||||
@ -197,8 +201,9 @@ int ExtractGHKM::Main(int argc, char *argv[])
|
|||||||
// Read source tokens.
|
// Read source tokens.
|
||||||
std::vector<std::string> sourceTokens(ReadTokens(sourceLine));
|
std::vector<std::string> sourceTokens(ReadTokens(sourceLine));
|
||||||
|
|
||||||
// Construct a source ParseTree object from the SyntaxNodeCollection object.
|
// Construct a source SyntaxTree object from the SyntaxNodeCollection
|
||||||
std::auto_ptr<ParseTree> sourceParseTree;
|
// object.
|
||||||
|
std::auto_ptr<SyntaxTree> sourceParseTree;
|
||||||
|
|
||||||
if (options.sourceLabels) {
|
if (options.sourceLabels) {
|
||||||
try {
|
try {
|
||||||
@ -264,12 +269,12 @@ int ExtractGHKM::Main(int argc, char *argv[])
|
|||||||
|
|
||||||
const std::vector<const Subgraph *> &rules = (*p)->GetRules();
|
const std::vector<const Subgraph *> &rules = (*p)->GetRules();
|
||||||
|
|
||||||
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation=Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN, r2lOrientation=Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN;
|
PhraseOrientation::REO_CLASS l2rOrientation=PhraseOrientation::REO_CLASS_UNKNOWN, r2lOrientation=PhraseOrientation::REO_CLASS_UNKNOWN;
|
||||||
if (options.phraseOrientation && !rules.empty()) {
|
if (options.phraseOrientation && !rules.empty()) {
|
||||||
int sourceSpanBegin = *((*p)->GetSpan().begin());
|
int sourceSpanBegin = *((*p)->GetSpan().begin());
|
||||||
int sourceSpanEnd = *((*p)->GetSpan().rbegin());
|
int sourceSpanEnd = *((*p)->GetSpan().rbegin());
|
||||||
l2rOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,Moses::GHKM::PhraseOrientation::REO_DIR_L2R);
|
l2rOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,PhraseOrientation::REO_DIR_L2R);
|
||||||
r2lOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,Moses::GHKM::PhraseOrientation::REO_DIR_R2L);
|
r2lOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,PhraseOrientation::REO_DIR_R2L);
|
||||||
// std::cerr << "span " << sourceSpanBegin << " " << sourceSpanEnd << std::endl;
|
// std::cerr << "span " << sourceSpanBegin << " " << sourceSpanEnd << std::endl;
|
||||||
// std::cerr << "phraseOrientation " << phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd) << std::endl;
|
// std::cerr << "phraseOrientation " << phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd) << std::endl;
|
||||||
}
|
}
|
||||||
@ -310,8 +315,8 @@ int ExtractGHKM::Main(int argc, char *argv[])
|
|||||||
fwdExtractStream << " ";
|
fwdExtractStream << " ";
|
||||||
phraseOrientation.WriteOrientation(fwdExtractStream,r2lOrientation);
|
phraseOrientation.WriteOrientation(fwdExtractStream,r2lOrientation);
|
||||||
fwdExtractStream << "}}";
|
fwdExtractStream << "}}";
|
||||||
phraseOrientation.IncrementPriorCount(Moses::GHKM::PhraseOrientation::REO_DIR_L2R,l2rOrientation,1);
|
phraseOrientation.IncrementPriorCount(PhraseOrientation::REO_DIR_L2R,l2rOrientation,1);
|
||||||
phraseOrientation.IncrementPriorCount(Moses::GHKM::PhraseOrientation::REO_DIR_R2L,r2lOrientation,1);
|
phraseOrientation.IncrementPriorCount(PhraseOrientation::REO_DIR_R2L,r2lOrientation,1);
|
||||||
}
|
}
|
||||||
fwdExtractStream << std::endl;
|
fwdExtractStream << std::endl;
|
||||||
invExtractStream << std::endl;
|
invExtractStream << std::endl;
|
||||||
@ -400,7 +405,7 @@ void ExtractGHKM::OpenOutputFileOrDie(const std::string &filename,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ExtractGHKM::OpenOutputFileOrDie(const std::string &filename,
|
void ExtractGHKM::OpenOutputFileOrDie(const std::string &filename,
|
||||||
OutputFileStream &stream)
|
Moses::OutputFileStream &stream)
|
||||||
{
|
{
|
||||||
bool ret = stream.Open(filename);
|
bool ret = stream.Open(filename);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
@ -823,16 +828,16 @@ void ExtractGHKM::WriteSourceLabelSet(
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ExtractGHKM::CollectWordLabelCounts(
|
void ExtractGHKM::CollectWordLabelCounts(
|
||||||
ParseTree &root,
|
SyntaxTree &root,
|
||||||
const Options &options,
|
const Options &options,
|
||||||
std::map<std::string, int> &wordCount,
|
std::map<std::string, int> &wordCount,
|
||||||
std::map<std::string, std::string> &wordLabel)
|
std::map<std::string, std::string> &wordLabel)
|
||||||
{
|
{
|
||||||
for (ParseTree::ConstLeafIterator p(root);
|
for (SyntaxTree::ConstLeafIterator p(root);
|
||||||
p != ParseTree::ConstLeafIterator(); ++p) {
|
p != SyntaxTree::ConstLeafIterator(); ++p) {
|
||||||
const ParseTree &leaf = *p;
|
const SyntaxTree &leaf = *p;
|
||||||
const std::string &word = leaf.value().GetLabel();
|
const std::string &word = leaf.value().GetLabel();
|
||||||
const ParseTree *ancestor = leaf.parent();
|
const SyntaxTree *ancestor = leaf.parent();
|
||||||
// If unary rule elimination is enabled and this word is at the end of a
|
// If unary rule elimination is enabled and this word is at the end of a
|
||||||
// chain of unary rewrites, e.g.
|
// chain of unary rewrites, e.g.
|
||||||
// PN-SB -> NE -> word
|
// PN-SB -> NE -> word
|
||||||
@ -849,12 +854,12 @@ void ExtractGHKM::CollectWordLabelCounts(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> ExtractGHKM::ReadTokens(const ParseTree &root) const
|
std::vector<std::string> ExtractGHKM::ReadTokens(const SyntaxTree &root) const
|
||||||
{
|
{
|
||||||
std::vector<std::string> tokens;
|
std::vector<std::string> tokens;
|
||||||
for (ParseTree::ConstLeafIterator p(root);
|
for (SyntaxTree::ConstLeafIterator p(root);
|
||||||
p != ParseTree::ConstLeafIterator(); ++p) {
|
p != SyntaxTree::ConstLeafIterator(); ++p) {
|
||||||
const ParseTree &leaf = *p;
|
const SyntaxTree &leaf = *p;
|
||||||
const std::string &word = leaf.value().GetLabel();
|
const std::string &word = leaf.value().GetLabel();
|
||||||
tokens.push_back(word);
|
tokens.push_back(word);
|
||||||
}
|
}
|
||||||
@ -956,4 +961,4 @@ void ExtractGHKM::StripBitParLabels(
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
@ -25,13 +25,11 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "ParseTree.h"
|
#include "OutputFileStream.h"
|
||||||
|
#include "SyntaxTree.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
|
|
||||||
class OutputFileStream;
|
|
||||||
|
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -52,9 +50,9 @@ private:
|
|||||||
void Error(const std::string &) const;
|
void Error(const std::string &) const;
|
||||||
void OpenInputFileOrDie(const std::string &, std::ifstream &);
|
void OpenInputFileOrDie(const std::string &, std::ifstream &);
|
||||||
void OpenOutputFileOrDie(const std::string &, std::ofstream &);
|
void OpenOutputFileOrDie(const std::string &, std::ofstream &);
|
||||||
void OpenOutputFileOrDie(const std::string &, OutputFileStream &);
|
void OpenOutputFileOrDie(const std::string &, Moses::OutputFileStream &);
|
||||||
void RecordTreeLabels(const ParseTree &, std::set<std::string> &);
|
void RecordTreeLabels(const SyntaxTree &, std::set<std::string> &);
|
||||||
void CollectWordLabelCounts(ParseTree &,
|
void CollectWordLabelCounts(SyntaxTree &,
|
||||||
const Options &,
|
const Options &,
|
||||||
std::map<std::string, int> &,
|
std::map<std::string, int> &,
|
||||||
std::map<std::string, std::string> &);
|
std::map<std::string, std::string> &);
|
||||||
@ -78,7 +76,7 @@ private:
|
|||||||
std::map<std::string, int> &outTopLabelSet) const;
|
std::map<std::string, int> &outTopLabelSet) const;
|
||||||
|
|
||||||
std::vector<std::string> ReadTokens(const std::string &) const;
|
std::vector<std::string> ReadTokens(const std::string &) const;
|
||||||
std::vector<std::string> ReadTokens(const ParseTree &root) const;
|
std::vector<std::string> ReadTokens(const SyntaxTree &root) const;
|
||||||
|
|
||||||
void ProcessOptions(int, char *[], Options &) const;
|
void ProcessOptions(int, char *[], Options &) const;
|
||||||
|
|
||||||
@ -86,5 +84,4 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
|
@ -21,6 +21,6 @@
|
|||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
Moses::GHKM::ExtractGHKM tool;
|
MosesTraining::GHKM::ExtractGHKM tool;
|
||||||
return tool.Main(argc, argv);
|
return tool.Main(argc, argv);
|
||||||
}
|
}
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
#include "Subgraph.h"
|
#include "Subgraph.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -70,4 +70,4 @@ void Node::GetTargetWords(std::vector<std::string> &targetWords) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
@ -21,14 +21,14 @@
|
|||||||
#ifndef EXTRACT_GHKM_NODE_H_
|
#ifndef EXTRACT_GHKM_NODE_H_
|
||||||
#define EXTRACT_GHKM_NODE_H_
|
#define EXTRACT_GHKM_NODE_H_
|
||||||
|
|
||||||
#include "Span.h"
|
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace Moses
|
#include "Span.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -215,6 +215,6 @@ Node *Node::LowestCommonAncestor(InputIterator first, InputIterator last)
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace Moses
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -89,5 +89,5 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
|
@ -1,38 +0,0 @@
|
|||||||
/***********************************************************************
|
|
||||||
Moses - statistical machine translation system
|
|
||||||
Copyright (C) 2006-2011 University of Edinburgh
|
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
|
||||||
modify it under the terms of the GNU Lesser General Public
|
|
||||||
License as published by the Free Software Foundation; either
|
|
||||||
version 2.1 of the License, or (at your option) any later version.
|
|
||||||
|
|
||||||
This library is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
Lesser General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Lesser General Public
|
|
||||||
License along with this library; if not, write to the Free Software
|
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
***********************************************************************/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
#ifndef EXTRACT_GHKM_PARSE_TREE_H_
|
|
||||||
#define EXTRACT_GHKM_PARSE_TREE_H_
|
|
||||||
|
|
||||||
#include "syntax-common/tree.h"
|
|
||||||
|
|
||||||
#include "SyntaxNode.h"
|
|
||||||
|
|
||||||
namespace Moses
|
|
||||||
{
|
|
||||||
namespace GHKM
|
|
||||||
{
|
|
||||||
|
|
||||||
typedef MosesTraining::Syntax::Tree<MosesTraining::SyntaxNode> ParseTree;
|
|
||||||
|
|
||||||
} // namespace GHKM
|
|
||||||
} // namespace Moses
|
|
||||||
|
|
||||||
#endif
|
|
@ -26,7 +26,7 @@
|
|||||||
|
|
||||||
#include <boost/assign/list_of.hpp>
|
#include <boost/assign/list_of.hpp>
|
||||||
|
|
||||||
namespace Moses
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -469,5 +469,5 @@ void PhraseOrientation::WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
Moses - statistical machine translation system
|
Moses - statistical machine translation system
|
||||||
Copyright (C) 2006-2011 University of Edinburgh
|
Copyright (C) 2006-2011 University of Edinburgh
|
||||||
@ -20,16 +19,18 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "Alignment.h"
|
|
||||||
#include "moses/AlignmentInfo.h"
|
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <boost/unordered_map.hpp>
|
#include <boost/unordered_map.hpp>
|
||||||
|
|
||||||
namespace Moses
|
#include "moses/AlignmentInfo.h"
|
||||||
|
|
||||||
|
#include "Alignment.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -53,8 +54,8 @@ public:
|
|||||||
|
|
||||||
PhraseOrientation(int sourceSize,
|
PhraseOrientation(int sourceSize,
|
||||||
int targetSize,
|
int targetSize,
|
||||||
const AlignmentInfo &alignTerm,
|
const Moses::AlignmentInfo &alignTerm,
|
||||||
const AlignmentInfo &alignNonTerm);
|
const Moses::AlignmentInfo &alignNonTerm);
|
||||||
|
|
||||||
REO_CLASS GetOrientationInfo(int startF, int endF, REO_DIR direction) const;
|
REO_CLASS GetOrientationInfo(int startF, int endF, REO_DIR direction) const;
|
||||||
REO_CLASS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const;
|
REO_CLASS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const;
|
||||||
@ -119,5 +120,4 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
#include "Node.h"
|
#include "Node.h"
|
||||||
#include "Subgraph.h"
|
#include "Subgraph.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -38,4 +38,4 @@ bool Rule::PartitionOrderComp(const Node *a, const Node *b)
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "Alignment.h"
|
#include "Alignment.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -54,6 +54,6 @@ protected:
|
|||||||
};
|
};
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -26,13 +26,13 @@
|
|||||||
#include "SyntaxNode.h"
|
#include "SyntaxNode.h"
|
||||||
#include "SyntaxNodeCollection.h"
|
#include "SyntaxNodeCollection.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
|
|
||||||
ScfgRule::ScfgRule(const Subgraph &fragment,
|
ScfgRule::ScfgRule(const Subgraph &fragment,
|
||||||
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree)
|
const SyntaxNodeCollection *sourceSyntaxTree)
|
||||||
: m_graphFragment(fragment)
|
: m_graphFragment(fragment)
|
||||||
, m_sourceLHS("X", NonTerminal)
|
, m_sourceLHS("X", NonTerminal)
|
||||||
, m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal)
|
, m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal)
|
||||||
@ -134,13 +134,13 @@ ScfgRule::ScfgRule(const Subgraph &fragment,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScfgRule::PushSourceLabel(
|
void ScfgRule::PushSourceLabel(const SyntaxNodeCollection *sourceSyntaxTree,
|
||||||
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree,
|
const Node *node,
|
||||||
const Node *node, const std::string &nonMatchingLabel)
|
const std::string &nonMatchingLabel)
|
||||||
{
|
{
|
||||||
ContiguousSpan span = Closure(node->GetSpan());
|
ContiguousSpan span = Closure(node->GetSpan());
|
||||||
if (sourceSyntaxTree->HasNode(span.first,span.second)) { // does a source constituent match the span?
|
if (sourceSyntaxTree->HasNode(span.first,span.second)) { // does a source constituent match the span?
|
||||||
std::vector<MosesTraining::SyntaxNode*> sourceLabels =
|
std::vector<SyntaxNode*> sourceLabels =
|
||||||
sourceSyntaxTree->GetNodes(span.first,span.second);
|
sourceSyntaxTree->GetNodes(span.first,span.second);
|
||||||
if (!sourceLabels.empty()) {
|
if (!sourceLabels.empty()) {
|
||||||
// store the topmost matching label from the source syntax tree
|
// store the topmost matching label from the source syntax tree
|
||||||
@ -197,4 +197,4 @@ void ScfgRule::UpdateSourceLabelCoocCounts(std::map< std::string, std::map<std::
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
@ -29,7 +29,7 @@
|
|||||||
#include "Rule.h"
|
#include "Rule.h"
|
||||||
#include "SyntaxNodeCollection.h"
|
#include "SyntaxNodeCollection.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -41,7 +41,7 @@ class ScfgRule : public Rule
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ScfgRule(const Subgraph &fragment,
|
ScfgRule(const Subgraph &fragment,
|
||||||
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree = 0);
|
const SyntaxNodeCollection *sourceSyntaxTree = 0);
|
||||||
|
|
||||||
const Subgraph &GetGraphFragment() const {
|
const Subgraph &GetGraphFragment() const {
|
||||||
return m_graphFragment;
|
return m_graphFragment;
|
||||||
@ -78,8 +78,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void PushSourceLabel(
|
void PushSourceLabel(const SyntaxNodeCollection *sourceSyntaxTree,
|
||||||
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree,
|
|
||||||
const Node *node, const std::string &nonMatchingLabel);
|
const Node *node, const std::string &nonMatchingLabel);
|
||||||
|
|
||||||
const Subgraph& m_graphFragment;
|
const Subgraph& m_graphFragment;
|
||||||
@ -94,4 +93,4 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
@ -19,10 +19,6 @@
|
|||||||
|
|
||||||
#include "ScfgRuleWriter.h"
|
#include "ScfgRuleWriter.h"
|
||||||
|
|
||||||
#include "Alignment.h"
|
|
||||||
#include "Options.h"
|
|
||||||
#include "ScfgRule.h"
|
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
@ -30,7 +26,11 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace Moses
|
#include "Alignment.h"
|
||||||
|
#include "Options.h"
|
||||||
|
#include "ScfgRule.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -229,4 +229,4 @@ void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out)
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
@ -19,11 +19,11 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "Subgraph.h"
|
|
||||||
|
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
|
|
||||||
namespace Moses
|
#include "Subgraph.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -57,5 +57,5 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@
|
|||||||
|
|
||||||
#include "Span.h"
|
#include "Span.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -45,4 +45,4 @@ ContiguousSpan Closure(const Span &s)
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
@ -24,7 +24,7 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
|
||||||
namespace Moses
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -36,7 +36,7 @@ bool SpansIntersect(const Span &, const ContiguousSpan &);
|
|||||||
|
|
||||||
ContiguousSpan Closure(const Span &);
|
ContiguousSpan Closure(const Span &);
|
||||||
|
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
#include "StsgRule.h"
|
#include "StsgRule.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
#include "Node.h"
|
#include "Node.h"
|
||||||
#include "Subgraph.h"
|
#include "Subgraph.h"
|
||||||
|
|
||||||
#include <algorithm>
|
namespace MosesTraining
|
||||||
|
|
||||||
namespace Moses
|
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -91,4 +91,4 @@ StsgRule::StsgRule(const Subgraph &fragment)
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
@ -2,12 +2,12 @@
|
|||||||
#ifndef EXTRACT_GHKM_STSG_RULE_H_
|
#ifndef EXTRACT_GHKM_STSG_RULE_H_
|
||||||
#define EXTRACT_GHKM_STSG_RULE_H_
|
#define EXTRACT_GHKM_STSG_RULE_H_
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "Rule.h"
|
#include "Rule.h"
|
||||||
#include "Subgraph.h"
|
#include "Subgraph.h"
|
||||||
|
|
||||||
#include <vector>
|
namespace MosesTraining
|
||||||
|
|
||||||
namespace Moses
|
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -39,6 +39,6 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,9 +1,5 @@
|
|||||||
#include "StsgRuleWriter.h"
|
#include "StsgRuleWriter.h"
|
||||||
|
|
||||||
#include "Alignment.h"
|
|
||||||
#include "Options.h"
|
|
||||||
#include "StsgRule.h"
|
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
@ -11,7 +7,11 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace Moses
|
#include "Alignment.h"
|
||||||
|
#include "Options.h"
|
||||||
|
#include "StsgRule.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -92,4 +92,4 @@ void StsgRuleWriter::Write(const StsgRule &rule)
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
@ -2,11 +2,11 @@
|
|||||||
#ifndef EXTRACT_GHKM_STSG_RULE_WRITER_H_
|
#ifndef EXTRACT_GHKM_STSG_RULE_WRITER_H_
|
||||||
#define EXTRACT_GHKM_STSG_RULE_WRITER_H_
|
#define EXTRACT_GHKM_STSG_RULE_WRITER_H_
|
||||||
|
|
||||||
#include "Subgraph.h"
|
|
||||||
|
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
|
|
||||||
namespace Moses
|
#include "Subgraph.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -36,6 +36,6 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -18,10 +18,11 @@
|
|||||||
***********************************************************************/
|
***********************************************************************/
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "Subgraph.h"
|
|
||||||
#include "Node.h"
|
|
||||||
|
|
||||||
namespace Moses
|
#include "Node.h"
|
||||||
|
#include "Subgraph.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -193,5 +194,5 @@ void Subgraph::RecursivelyGetPartsOfSpeech(const Node *n, std::vector<std::strin
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
|
@ -19,12 +19,12 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "Node.h"
|
|
||||||
|
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace Moses
|
#include "Node.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -137,5 +137,5 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
|
@ -19,18 +19,17 @@
|
|||||||
|
|
||||||
#include "XmlTreeParser.h"
|
#include "XmlTreeParser.h"
|
||||||
|
|
||||||
#include "ParseTree.h"
|
|
||||||
#include "tables-core.h"
|
|
||||||
#include "XmlException.h"
|
|
||||||
#include "XmlTree.h"
|
|
||||||
#include "util/tokenize.hh"
|
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
using namespace MosesTraining;
|
#include "util/tokenize.hh"
|
||||||
|
|
||||||
namespace Moses
|
#include "SyntaxTree.h"
|
||||||
|
#include "tables-core.h"
|
||||||
|
#include "XmlException.h"
|
||||||
|
#include "XmlTree.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
@ -42,7 +41,7 @@ XmlTreeParser::XmlTreeParser(std::set<std::string> &labelSet,
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
std::auto_ptr<ParseTree> XmlTreeParser::Parse(const std::string &line)
|
std::auto_ptr<SyntaxTree> XmlTreeParser::Parse(const std::string &line)
|
||||||
{
|
{
|
||||||
m_line = line;
|
m_line = line;
|
||||||
m_tree.Clear();
|
m_tree.Clear();
|
||||||
@ -61,12 +60,12 @@ std::auto_ptr<ParseTree> XmlTreeParser::Parse(const std::string &line)
|
|||||||
return ConvertTree(*root, m_words);
|
return ConvertTree(*root, m_words);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Converts a SyntaxNode tree to a Moses::GHKM::ParseTree.
|
// Converts a SyntaxNode tree to a MosesTraining::GHKM::SyntaxTree.
|
||||||
std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
|
std::auto_ptr<SyntaxTree> XmlTreeParser::ConvertTree(
|
||||||
const SyntaxNode &tree,
|
const SyntaxNode &tree,
|
||||||
const std::vector<std::string> &words)
|
const std::vector<std::string> &words)
|
||||||
{
|
{
|
||||||
std::auto_ptr<ParseTree> root(new ParseTree(tree));
|
std::auto_ptr<SyntaxTree> root(new SyntaxTree(tree));
|
||||||
const std::vector<SyntaxNode*> &children = tree.GetChildren();
|
const std::vector<SyntaxNode*> &children = tree.GetChildren();
|
||||||
if (children.empty()) {
|
if (children.empty()) {
|
||||||
if (tree.GetStart() != tree.GetEnd()) {
|
if (tree.GetStart() != tree.GetEnd()) {
|
||||||
@ -76,14 +75,14 @@ std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
|
|||||||
throw Exception(msg.str());
|
throw Exception(msg.str());
|
||||||
}
|
}
|
||||||
SyntaxNode value(tree.GetStart(), tree.GetStart(), words[tree.GetStart()]);
|
SyntaxNode value(tree.GetStart(), tree.GetStart(), words[tree.GetStart()]);
|
||||||
std::auto_ptr<ParseTree> leaf(new ParseTree(value));
|
std::auto_ptr<SyntaxTree> leaf(new SyntaxTree(value));
|
||||||
leaf->parent() = root.get();
|
leaf->parent() = root.get();
|
||||||
root->children().push_back(leaf.release());
|
root->children().push_back(leaf.release());
|
||||||
} else {
|
} else {
|
||||||
for (std::vector<SyntaxNode*>::const_iterator p = children.begin();
|
for (std::vector<SyntaxNode*>::const_iterator p = children.begin();
|
||||||
p != children.end(); ++p) {
|
p != children.end(); ++p) {
|
||||||
assert(*p);
|
assert(*p);
|
||||||
std::auto_ptr<ParseTree> child = ConvertTree(**p, words);
|
std::auto_ptr<SyntaxTree> child = ConvertTree(**p, words);
|
||||||
child->parent() = root.get();
|
child->parent() = root.get();
|
||||||
root->children().push_back(child.release());
|
root->children().push_back(child.release());
|
||||||
}
|
}
|
||||||
@ -92,4 +91,4 @@ std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
@ -21,32 +21,32 @@
|
|||||||
#ifndef EXTRACT_GHKM_XML_TREE_PARSER_H_
|
#ifndef EXTRACT_GHKM_XML_TREE_PARSER_H_
|
||||||
#define EXTRACT_GHKM_XML_TREE_PARSER_H_
|
#define EXTRACT_GHKM_XML_TREE_PARSER_H_
|
||||||
|
|
||||||
#include "Exception.h"
|
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "ParseTree.h"
|
|
||||||
#include "SyntaxNode.h"
|
#include "SyntaxNode.h"
|
||||||
#include "SyntaxNodeCollection.h"
|
#include "SyntaxNodeCollection.h"
|
||||||
|
#include "SyntaxTree.h"
|
||||||
|
|
||||||
namespace Moses
|
#include "Exception.h"
|
||||||
|
|
||||||
|
namespace MosesTraining
|
||||||
{
|
{
|
||||||
namespace GHKM
|
namespace GHKM
|
||||||
{
|
{
|
||||||
|
|
||||||
// Parses a string in Moses' XML parse tree format and returns a ParseTree
|
// Parses a string in Moses' XML parse tree format and returns a SyntaxTree
|
||||||
// object.
|
// object.
|
||||||
class XmlTreeParser
|
class XmlTreeParser
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
XmlTreeParser(std::set<std::string> &, std::map<std::string, int> &);
|
XmlTreeParser(std::set<std::string> &, std::map<std::string, int> &);
|
||||||
std::auto_ptr<ParseTree> Parse(const std::string &);
|
std::auto_ptr<SyntaxTree> Parse(const std::string &);
|
||||||
|
|
||||||
static std::auto_ptr<ParseTree> ConvertTree(const MosesTraining::SyntaxNode &,
|
static std::auto_ptr<SyntaxTree> ConvertTree(const SyntaxNode &,
|
||||||
const std::vector<std::string> &);
|
const std::vector<std::string> &);
|
||||||
|
|
||||||
const std::vector<std::string>& GetWords() {
|
const std::vector<std::string>& GetWords() {
|
||||||
@ -58,11 +58,11 @@ private:
|
|||||||
std::set<std::string> &m_labelSet;
|
std::set<std::string> &m_labelSet;
|
||||||
std::map<std::string, int> &m_topLabelSet;
|
std::map<std::string, int> &m_topLabelSet;
|
||||||
std::string m_line;
|
std::string m_line;
|
||||||
MosesTraining::SyntaxNodeCollection m_tree;
|
SyntaxNodeCollection m_tree;
|
||||||
std::vector<std::string> m_words;
|
std::vector<std::string> m_words;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace GHKM
|
} // namespace GHKM
|
||||||
} // namespace Moses
|
} // namespace MosesTraining
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user