mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
Ongoing moses/phrase-extract refactoring
This commit is contained in:
parent
f61091e38d
commit
f37415a259
@ -134,7 +134,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
if (targetPhrase.GetAlignNonTerm().GetSize() != 0) {
|
||||
|
||||
// Initialize phrase orientation scoring object
|
||||
Moses::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
|
||||
MosesTraining::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
|
||||
targetPhrase.GetAlignTerm(), targetPhrase.GetAlignNonTerm());
|
||||
|
||||
PhraseOrientationFeature::ReoClassData* reoClassData = new PhraseOrientationFeature::ReoClassData();
|
||||
@ -150,7 +150,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
|
||||
// LEFT-TO-RIGHT DIRECTION
|
||||
|
||||
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_L2R);
|
||||
MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_L2R);
|
||||
|
||||
if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary)
|
||||
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
|
||||
@ -170,7 +170,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
if (reoClassData->firstNonTerminalPreviousSourceSpanIsAligned &&
|
||||
reoClassData->firstNonTerminalFollowingSourceSpanIsAligned) {
|
||||
// discontinuous
|
||||
l2rOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
l2rOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
} else {
|
||||
reoClassData->firstNonTerminalIsBoundary = true;
|
||||
}
|
||||
@ -180,7 +180,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
|
||||
// RIGHT-TO-LEFT DIRECTION
|
||||
|
||||
Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_R2L);
|
||||
MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_R2L);
|
||||
|
||||
if ( ((targetIndex == targetPhrase.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,targetPhrase.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary)
|
||||
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
|
||||
@ -200,7 +200,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
if (reoClassData->lastNonTerminalPreviousSourceSpanIsAligned &&
|
||||
reoClassData->lastNonTerminalFollowingSourceSpanIsAligned) {
|
||||
// discontinuous
|
||||
r2lOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
r2lOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
} else {
|
||||
reoClassData->lastNonTerminalIsBoundary = true;
|
||||
}
|
||||
@ -335,25 +335,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
// LEFT-TO-RIGHT DIRECTION
|
||||
|
||||
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
|
||||
MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
|
||||
|
||||
IFFEATUREVERBOSE(2) {
|
||||
FEATUREVERBOSE(2, "l2rOrientation ");
|
||||
switch (l2rOrientation) {
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||
FEATUREVERBOSE2(2, "mono" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
FEATUREVERBOSE2(2, "swap" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
FEATUREVERBOSE2(2, "dleft" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
FEATUREVERBOSE2(2, "dright" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
|
||||
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
|
||||
break;
|
||||
default:
|
||||
@ -396,23 +396,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
} else {
|
||||
|
||||
if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
|
||||
newScores[0] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono());
|
||||
// if sub-derivation has left-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
|
||||
|
||||
} else if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
} else if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
|
||||
newScores[1] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
|
||||
// if sub-derivation has left-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
|
||||
|
||||
} else if ( ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
} else if ( ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
|
||||
newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
|
||||
// if sub-derivation has left-boundary non-terminal:
|
||||
@ -437,25 +437,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
// RIGHT-TO-LEFT DIRECTION
|
||||
|
||||
Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
|
||||
MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
|
||||
|
||||
IFFEATUREVERBOSE(2) {
|
||||
FEATUREVERBOSE(2, "r2lOrientation ");
|
||||
switch (r2lOrientation) {
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||
FEATUREVERBOSE2(2, "mono" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
FEATUREVERBOSE2(2, "swap" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
FEATUREVERBOSE2(2, "dleft" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
FEATUREVERBOSE2(2, "dright" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
|
||||
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
|
||||
break;
|
||||
default:
|
||||
@ -498,23 +498,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
} else {
|
||||
|
||||
if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
|
||||
newScores[m_offsetR2LScores+0] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono());
|
||||
// if sub-derivation has right-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
|
||||
|
||||
} else if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
} else if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
|
||||
newScores[m_offsetR2LScores+1] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
|
||||
// if sub-derivation has right-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
|
||||
|
||||
} else if ( ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
} else if ( ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
|
||||
newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
|
||||
// if sub-derivation has right-boundary non-terminal:
|
||||
@ -862,17 +862,17 @@ void PhraseOrientationFeature::SparseNonTerminalR2LScore(const Factor* nonTermin
|
||||
}
|
||||
|
||||
|
||||
const std::string* PhraseOrientationFeature::ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const
|
||||
const std::string* PhraseOrientationFeature::ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const
|
||||
{
|
||||
if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
return &MORIENT;
|
||||
|
||||
} else if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
} else if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
return &SORIENT;
|
||||
|
||||
} else if ( ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( o == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
} else if ( ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
return &DORIENT;
|
||||
|
||||
} else {
|
||||
|
@ -302,8 +302,8 @@ public:
|
||||
|
||||
struct ReoClassData {
|
||||
public:
|
||||
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
|
||||
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
|
||||
std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
|
||||
std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
|
||||
bool firstNonTerminalIsBoundary;
|
||||
bool firstNonTerminalPreviousSourceSpanIsAligned;
|
||||
bool firstNonTerminalFollowingSourceSpanIsAligned;
|
||||
@ -401,7 +401,7 @@ protected:
|
||||
ScoreComponentCollection* scoreBreakdown,
|
||||
const std::string* o) const;
|
||||
|
||||
const std::string* ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const;
|
||||
const std::string* ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const;
|
||||
|
||||
static const std::string MORIENT;
|
||||
static const std::string SORIENT;
|
||||
|
@ -10,10 +10,6 @@
|
||||
#include "ScoreFeature.h"
|
||||
#include "extract-ghkm/Node.h"
|
||||
|
||||
using namespace MosesTraining;
|
||||
using namespace Moses;
|
||||
using namespace GHKM;
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
|
||||
|
12
phrase-extract/SyntaxTree.h
Normal file
12
phrase-extract/SyntaxTree.h
Normal file
@ -0,0 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include "syntax-common/tree.h"
|
||||
|
||||
#include "SyntaxNode.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
|
||||
typedef Syntax::Tree<SyntaxNode> SyntaxTree;
|
||||
|
||||
} // namespace MosesTraining
|
@ -25,7 +25,7 @@
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -70,4 +70,4 @@ void FlipAlignment(Alignment &a)
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -35,5 +35,5 @@ void ReadAlignment(const std::string &, Alignment &);
|
||||
void FlipAlignment(Alignment &);
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
||||
|
@ -19,23 +19,24 @@
|
||||
|
||||
#include "AlignmentGraph.h"
|
||||
|
||||
#include "ComposedRule.h"
|
||||
#include "Node.h"
|
||||
#include "Options.h"
|
||||
#include "ParseTree.h"
|
||||
#include "Subgraph.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <memory>
|
||||
#include <stack>
|
||||
|
||||
namespace Moses
|
||||
#include "SyntaxTree.h"
|
||||
|
||||
#include "ComposedRule.h"
|
||||
#include "Node.h"
|
||||
#include "Options.h"
|
||||
#include "Subgraph.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
|
||||
AlignmentGraph::AlignmentGraph(const ParseTree *t,
|
||||
AlignmentGraph::AlignmentGraph(const SyntaxTree *t,
|
||||
const std::vector<std::string> &s,
|
||||
const Alignment &a)
|
||||
{
|
||||
@ -208,7 +209,7 @@ void AlignmentGraph::ExtractComposedRules(Node *node, const Options &options)
|
||||
}
|
||||
}
|
||||
|
||||
Node *AlignmentGraph::CopyParseTree(const ParseTree *root)
|
||||
Node *AlignmentGraph::CopyParseTree(const SyntaxTree *root)
|
||||
{
|
||||
NodeType nodeType = (root->IsLeaf()) ? TARGET : TREE;
|
||||
|
||||
@ -218,10 +219,10 @@ Node *AlignmentGraph::CopyParseTree(const ParseTree *root)
|
||||
n->SetPcfgScore(root->value().GetPcfgScore());
|
||||
}
|
||||
|
||||
const std::vector<ParseTree *> &children = root->children();
|
||||
const std::vector<SyntaxTree *> &children = root->children();
|
||||
std::vector<Node *> childNodes;
|
||||
childNodes.reserve(children.size());
|
||||
for (std::vector<ParseTree *>::const_iterator p(children.begin());
|
||||
for (std::vector<SyntaxTree *>::const_iterator p(children.begin());
|
||||
p != children.end(); ++p) {
|
||||
Node *child = CopyParseTree(*p);
|
||||
child->AddParent(n.get());
|
||||
@ -385,4 +386,4 @@ Node *AlignmentGraph::DetermineAttachmentPoint(int index)
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
@ -21,15 +21,16 @@
|
||||
#ifndef EXTRACT_GHKM_ALIGNMENT_GRAPH_H_
|
||||
#define EXTRACT_GHKM_ALIGNMENT_GRAPH_H_
|
||||
|
||||
#include "Alignment.h"
|
||||
#include "Options.h"
|
||||
#include "ParseTree.h"
|
||||
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
#include "SyntaxTree.h"
|
||||
|
||||
#include "Alignment.h"
|
||||
#include "Options.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -40,7 +41,7 @@ class Subgraph;
|
||||
class AlignmentGraph
|
||||
{
|
||||
public:
|
||||
AlignmentGraph(const ParseTree *,
|
||||
AlignmentGraph(const SyntaxTree *,
|
||||
const std::vector<std::string> &,
|
||||
const Alignment &);
|
||||
|
||||
@ -61,7 +62,7 @@ private:
|
||||
AlignmentGraph(const AlignmentGraph &);
|
||||
AlignmentGraph &operator=(const AlignmentGraph &);
|
||||
|
||||
Node *CopyParseTree(const ParseTree *);
|
||||
Node *CopyParseTree(const SyntaxTree *);
|
||||
void ComputeFrontierSet(Node *, const Options &, std::set<Node *> &) const;
|
||||
void CalcComplementSpans(Node *);
|
||||
void GetTargetTreeLeaves(Node *, std::vector<Node *> &);
|
||||
@ -77,6 +78,6 @@ private:
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
||||
#endif
|
||||
|
@ -19,15 +19,15 @@
|
||||
|
||||
#include "ComposedRule.h"
|
||||
|
||||
#include "Node.h"
|
||||
#include "Options.h"
|
||||
#include "Subgraph.h"
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
|
||||
namespace Moses
|
||||
#include "Node.h"
|
||||
#include "Options.h"
|
||||
#include "Subgraph.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -128,4 +128,4 @@ Subgraph ComposedRule::CreateSubgraph()
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
@ -21,12 +21,12 @@
|
||||
#ifndef EXTRACT_GHKM_COMPOSED_RULE_H_
|
||||
#define EXTRACT_GHKM_COMPOSED_RULE_H_
|
||||
|
||||
#include "Subgraph.h"
|
||||
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
|
||||
namespace Moses
|
||||
#include "Subgraph.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -67,6 +67,6 @@ private:
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
||||
#endif
|
||||
|
@ -23,7 +23,7 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -41,6 +41,6 @@ private:
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
||||
#endif
|
||||
|
@ -19,29 +19,6 @@
|
||||
|
||||
#include "ExtractGHKM.h"
|
||||
|
||||
#include "Alignment.h"
|
||||
#include "AlignmentGraph.h"
|
||||
#include "Exception.h"
|
||||
#include "InputFileStream.h"
|
||||
#include "Node.h"
|
||||
#include "OutputFileStream.h"
|
||||
#include "Options.h"
|
||||
#include "ParseTree.h"
|
||||
#include "PhraseOrientation.h"
|
||||
#include "ScfgRule.h"
|
||||
#include "ScfgRuleWriter.h"
|
||||
#include "Span.h"
|
||||
#include "StsgRule.h"
|
||||
#include "StsgRuleWriter.h"
|
||||
#include "SyntaxNode.h"
|
||||
#include "SyntaxNodeCollection.h"
|
||||
#include "tables-core.h"
|
||||
#include "XmlException.h"
|
||||
#include "XmlTree.h"
|
||||
#include "XmlTreeParser.h"
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
@ -51,13 +28,40 @@
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include "InputFileStream.h"
|
||||
#include "OutputFileStream.h"
|
||||
#include "SyntaxNode.h"
|
||||
#include "SyntaxNodeCollection.h"
|
||||
#include "SyntaxTree.h"
|
||||
#include "tables-core.h"
|
||||
#include "XmlException.h"
|
||||
#include "XmlTree.h"
|
||||
|
||||
#include "Alignment.h"
|
||||
#include "AlignmentGraph.h"
|
||||
#include "Exception.h"
|
||||
#include "Node.h"
|
||||
#include "Options.h"
|
||||
#include "PhraseOrientation.h"
|
||||
#include "ScfgRule.h"
|
||||
#include "ScfgRuleWriter.h"
|
||||
#include "Span.h"
|
||||
#include "StsgRule.h"
|
||||
#include "StsgRuleWriter.h"
|
||||
#include "XmlTreeParser.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
|
||||
int ExtractGHKM::Main(int argc, char *argv[])
|
||||
{
|
||||
using Moses::InputFileStream;
|
||||
using Moses::OutputFileStream;
|
||||
|
||||
// Process command-line options.
|
||||
Options options;
|
||||
ProcessOptions(argc, argv, options);
|
||||
@ -158,7 +162,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
|
||||
std::cerr << "skipping line " << lineNum << " with empty target tree\n";
|
||||
continue;
|
||||
}
|
||||
std::auto_ptr<ParseTree> targetParseTree;
|
||||
std::auto_ptr<SyntaxTree> targetParseTree;
|
||||
try {
|
||||
targetParseTree = targetXmlTreeParser.Parse(targetLine);
|
||||
assert(targetParseTree.get());
|
||||
@ -173,8 +177,8 @@ int ExtractGHKM::Main(int argc, char *argv[])
|
||||
|
||||
|
||||
// Parse source tree and construct a SyntaxTree object.
|
||||
MosesTraining::SyntaxNodeCollection sourceSyntaxTree;
|
||||
MosesTraining::SyntaxNode *sourceSyntaxTreeRoot=NULL;
|
||||
SyntaxNodeCollection sourceSyntaxTree;
|
||||
SyntaxNode *sourceSyntaxTreeRoot=NULL;
|
||||
|
||||
if (options.sourceLabels) {
|
||||
try {
|
||||
@ -197,8 +201,9 @@ int ExtractGHKM::Main(int argc, char *argv[])
|
||||
// Read source tokens.
|
||||
std::vector<std::string> sourceTokens(ReadTokens(sourceLine));
|
||||
|
||||
// Construct a source ParseTree object from the SyntaxNodeCollection object.
|
||||
std::auto_ptr<ParseTree> sourceParseTree;
|
||||
// Construct a source SyntaxTree object from the SyntaxNodeCollection
|
||||
// object.
|
||||
std::auto_ptr<SyntaxTree> sourceParseTree;
|
||||
|
||||
if (options.sourceLabels) {
|
||||
try {
|
||||
@ -264,12 +269,12 @@ int ExtractGHKM::Main(int argc, char *argv[])
|
||||
|
||||
const std::vector<const Subgraph *> &rules = (*p)->GetRules();
|
||||
|
||||
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation=Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN, r2lOrientation=Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN;
|
||||
PhraseOrientation::REO_CLASS l2rOrientation=PhraseOrientation::REO_CLASS_UNKNOWN, r2lOrientation=PhraseOrientation::REO_CLASS_UNKNOWN;
|
||||
if (options.phraseOrientation && !rules.empty()) {
|
||||
int sourceSpanBegin = *((*p)->GetSpan().begin());
|
||||
int sourceSpanEnd = *((*p)->GetSpan().rbegin());
|
||||
l2rOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,Moses::GHKM::PhraseOrientation::REO_DIR_L2R);
|
||||
r2lOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,Moses::GHKM::PhraseOrientation::REO_DIR_R2L);
|
||||
l2rOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,PhraseOrientation::REO_DIR_L2R);
|
||||
r2lOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,PhraseOrientation::REO_DIR_R2L);
|
||||
// std::cerr << "span " << sourceSpanBegin << " " << sourceSpanEnd << std::endl;
|
||||
// std::cerr << "phraseOrientation " << phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd) << std::endl;
|
||||
}
|
||||
@ -310,8 +315,8 @@ int ExtractGHKM::Main(int argc, char *argv[])
|
||||
fwdExtractStream << " ";
|
||||
phraseOrientation.WriteOrientation(fwdExtractStream,r2lOrientation);
|
||||
fwdExtractStream << "}}";
|
||||
phraseOrientation.IncrementPriorCount(Moses::GHKM::PhraseOrientation::REO_DIR_L2R,l2rOrientation,1);
|
||||
phraseOrientation.IncrementPriorCount(Moses::GHKM::PhraseOrientation::REO_DIR_R2L,r2lOrientation,1);
|
||||
phraseOrientation.IncrementPriorCount(PhraseOrientation::REO_DIR_L2R,l2rOrientation,1);
|
||||
phraseOrientation.IncrementPriorCount(PhraseOrientation::REO_DIR_R2L,r2lOrientation,1);
|
||||
}
|
||||
fwdExtractStream << std::endl;
|
||||
invExtractStream << std::endl;
|
||||
@ -400,7 +405,7 @@ void ExtractGHKM::OpenOutputFileOrDie(const std::string &filename,
|
||||
}
|
||||
|
||||
void ExtractGHKM::OpenOutputFileOrDie(const std::string &filename,
|
||||
OutputFileStream &stream)
|
||||
Moses::OutputFileStream &stream)
|
||||
{
|
||||
bool ret = stream.Open(filename);
|
||||
if (!ret) {
|
||||
@ -823,16 +828,16 @@ void ExtractGHKM::WriteSourceLabelSet(
|
||||
}
|
||||
|
||||
void ExtractGHKM::CollectWordLabelCounts(
|
||||
ParseTree &root,
|
||||
SyntaxTree &root,
|
||||
const Options &options,
|
||||
std::map<std::string, int> &wordCount,
|
||||
std::map<std::string, std::string> &wordLabel)
|
||||
{
|
||||
for (ParseTree::ConstLeafIterator p(root);
|
||||
p != ParseTree::ConstLeafIterator(); ++p) {
|
||||
const ParseTree &leaf = *p;
|
||||
for (SyntaxTree::ConstLeafIterator p(root);
|
||||
p != SyntaxTree::ConstLeafIterator(); ++p) {
|
||||
const SyntaxTree &leaf = *p;
|
||||
const std::string &word = leaf.value().GetLabel();
|
||||
const ParseTree *ancestor = leaf.parent();
|
||||
const SyntaxTree *ancestor = leaf.parent();
|
||||
// If unary rule elimination is enabled and this word is at the end of a
|
||||
// chain of unary rewrites, e.g.
|
||||
// PN-SB -> NE -> word
|
||||
@ -849,12 +854,12 @@ void ExtractGHKM::CollectWordLabelCounts(
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> ExtractGHKM::ReadTokens(const ParseTree &root) const
|
||||
std::vector<std::string> ExtractGHKM::ReadTokens(const SyntaxTree &root) const
|
||||
{
|
||||
std::vector<std::string> tokens;
|
||||
for (ParseTree::ConstLeafIterator p(root);
|
||||
p != ParseTree::ConstLeafIterator(); ++p) {
|
||||
const ParseTree &leaf = *p;
|
||||
for (SyntaxTree::ConstLeafIterator p(root);
|
||||
p != SyntaxTree::ConstLeafIterator(); ++p) {
|
||||
const SyntaxTree &leaf = *p;
|
||||
const std::string &word = leaf.value().GetLabel();
|
||||
tokens.push_back(word);
|
||||
}
|
||||
@ -956,4 +961,4 @@ void ExtractGHKM::StripBitParLabels(
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
@ -25,13 +25,11 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ParseTree.h"
|
||||
#include "OutputFileStream.h"
|
||||
#include "SyntaxTree.h"
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
|
||||
class OutputFileStream;
|
||||
|
||||
namespace GHKM
|
||||
{
|
||||
|
||||
@ -52,9 +50,9 @@ private:
|
||||
void Error(const std::string &) const;
|
||||
void OpenInputFileOrDie(const std::string &, std::ifstream &);
|
||||
void OpenOutputFileOrDie(const std::string &, std::ofstream &);
|
||||
void OpenOutputFileOrDie(const std::string &, OutputFileStream &);
|
||||
void RecordTreeLabels(const ParseTree &, std::set<std::string> &);
|
||||
void CollectWordLabelCounts(ParseTree &,
|
||||
void OpenOutputFileOrDie(const std::string &, Moses::OutputFileStream &);
|
||||
void RecordTreeLabels(const SyntaxTree &, std::set<std::string> &);
|
||||
void CollectWordLabelCounts(SyntaxTree &,
|
||||
const Options &,
|
||||
std::map<std::string, int> &,
|
||||
std::map<std::string, std::string> &);
|
||||
@ -78,7 +76,7 @@ private:
|
||||
std::map<std::string, int> &outTopLabelSet) const;
|
||||
|
||||
std::vector<std::string> ReadTokens(const std::string &) const;
|
||||
std::vector<std::string> ReadTokens(const ParseTree &root) const;
|
||||
std::vector<std::string> ReadTokens(const SyntaxTree &root) const;
|
||||
|
||||
void ProcessOptions(int, char *[], Options &) const;
|
||||
|
||||
@ -86,5 +84,4 @@ private:
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
|
||||
} // namespace MosesTraining
|
||||
|
@ -21,6 +21,6 @@
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
Moses::GHKM::ExtractGHKM tool;
|
||||
MosesTraining::GHKM::ExtractGHKM tool;
|
||||
return tool.Main(argc, argv);
|
||||
}
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
#include "Subgraph.h"
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -70,4 +70,4 @@ void Node::GetTargetWords(std::vector<std::string> &targetWords) const
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
@ -21,14 +21,14 @@
|
||||
#ifndef EXTRACT_GHKM_NODE_H_
|
||||
#define EXTRACT_GHKM_NODE_H_
|
||||
|
||||
#include "Span.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <iterator>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
#include "Span.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -215,6 +215,6 @@ Node *Node::LowestCommonAncestor(InputIterator first, InputIterator last)
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
||||
#endif
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -89,5 +89,5 @@ public:
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
||||
|
@ -1,38 +0,0 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2011 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
#ifndef EXTRACT_GHKM_PARSE_TREE_H_
|
||||
#define EXTRACT_GHKM_PARSE_TREE_H_
|
||||
|
||||
#include "syntax-common/tree.h"
|
||||
|
||||
#include "SyntaxNode.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
|
||||
typedef MosesTraining::Syntax::Tree<MosesTraining::SyntaxNode> ParseTree;
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
|
||||
#endif
|
@ -26,7 +26,7 @@
|
||||
|
||||
#include <boost/assign/list_of.hpp>
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -469,5 +469,5 @@ void PhraseOrientation::WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2011 University of Edinburgh
|
||||
@ -20,16 +19,18 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Alignment.h"
|
||||
#include "moses/AlignmentInfo.h"
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/unordered_map.hpp>
|
||||
|
||||
namespace Moses
|
||||
#include "moses/AlignmentInfo.h"
|
||||
|
||||
#include "Alignment.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -53,8 +54,8 @@ public:
|
||||
|
||||
PhraseOrientation(int sourceSize,
|
||||
int targetSize,
|
||||
const AlignmentInfo &alignTerm,
|
||||
const AlignmentInfo &alignNonTerm);
|
||||
const Moses::AlignmentInfo &alignTerm,
|
||||
const Moses::AlignmentInfo &alignNonTerm);
|
||||
|
||||
REO_CLASS GetOrientationInfo(int startF, int endF, REO_DIR direction) const;
|
||||
REO_CLASS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const;
|
||||
@ -119,5 +120,4 @@ private:
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
|
||||
} // namespace MosesTraining
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include "Node.h"
|
||||
#include "Subgraph.h"
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -38,4 +38,4 @@ bool Rule::PartitionOrderComp(const Node *a, const Node *b)
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
#include "Alignment.h"
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -54,6 +54,6 @@ protected:
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
||||
#endif
|
||||
|
@ -26,13 +26,13 @@
|
||||
#include "SyntaxNode.h"
|
||||
#include "SyntaxNodeCollection.h"
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
|
||||
ScfgRule::ScfgRule(const Subgraph &fragment,
|
||||
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree)
|
||||
const SyntaxNodeCollection *sourceSyntaxTree)
|
||||
: m_graphFragment(fragment)
|
||||
, m_sourceLHS("X", NonTerminal)
|
||||
, m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal)
|
||||
@ -134,13 +134,13 @@ ScfgRule::ScfgRule(const Subgraph &fragment,
|
||||
}
|
||||
}
|
||||
|
||||
void ScfgRule::PushSourceLabel(
|
||||
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree,
|
||||
const Node *node, const std::string &nonMatchingLabel)
|
||||
void ScfgRule::PushSourceLabel(const SyntaxNodeCollection *sourceSyntaxTree,
|
||||
const Node *node,
|
||||
const std::string &nonMatchingLabel)
|
||||
{
|
||||
ContiguousSpan span = Closure(node->GetSpan());
|
||||
if (sourceSyntaxTree->HasNode(span.first,span.second)) { // does a source constituent match the span?
|
||||
std::vector<MosesTraining::SyntaxNode*> sourceLabels =
|
||||
std::vector<SyntaxNode*> sourceLabels =
|
||||
sourceSyntaxTree->GetNodes(span.first,span.second);
|
||||
if (!sourceLabels.empty()) {
|
||||
// store the topmost matching label from the source syntax tree
|
||||
@ -197,4 +197,4 @@ void ScfgRule::UpdateSourceLabelCoocCounts(std::map< std::string, std::map<std::
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
@ -29,7 +29,7 @@
|
||||
#include "Rule.h"
|
||||
#include "SyntaxNodeCollection.h"
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -41,7 +41,7 @@ class ScfgRule : public Rule
|
||||
{
|
||||
public:
|
||||
ScfgRule(const Subgraph &fragment,
|
||||
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree = 0);
|
||||
const SyntaxNodeCollection *sourceSyntaxTree = 0);
|
||||
|
||||
const Subgraph &GetGraphFragment() const {
|
||||
return m_graphFragment;
|
||||
@ -78,9 +78,8 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
void PushSourceLabel(
|
||||
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree,
|
||||
const Node *node, const std::string &nonMatchingLabel);
|
||||
void PushSourceLabel(const SyntaxNodeCollection *sourceSyntaxTree,
|
||||
const Node *node, const std::string &nonMatchingLabel);
|
||||
|
||||
const Subgraph& m_graphFragment;
|
||||
Symbol m_sourceLHS;
|
||||
@ -94,4 +93,4 @@ private:
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
@ -19,10 +19,6 @@
|
||||
|
||||
#include "ScfgRuleWriter.h"
|
||||
|
||||
#include "Alignment.h"
|
||||
#include "Options.h"
|
||||
#include "ScfgRule.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <ostream>
|
||||
@ -30,7 +26,11 @@
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
#include "Alignment.h"
|
||||
#include "Options.h"
|
||||
#include "ScfgRule.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -229,4 +229,4 @@ void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out)
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
@ -19,11 +19,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Subgraph.h"
|
||||
|
||||
#include <ostream>
|
||||
|
||||
namespace Moses
|
||||
#include "Subgraph.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -57,5 +57,5 @@ private:
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
||||
|
||||
#include "Span.h"
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -45,4 +45,4 @@ ContiguousSpan Closure(const Span &s)
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -36,7 +36,7 @@ bool SpansIntersect(const Span &, const ContiguousSpan &);
|
||||
|
||||
ContiguousSpan Closure(const Span &);
|
||||
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
} // namespace GHKM
|
||||
|
||||
#endif
|
||||
|
@ -1,11 +1,11 @@
|
||||
#include "StsgRule.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "Node.h"
|
||||
#include "Subgraph.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -91,4 +91,4 @@ StsgRule::StsgRule(const Subgraph &fragment)
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
@ -2,12 +2,12 @@
|
||||
#ifndef EXTRACT_GHKM_STSG_RULE_H_
|
||||
#define EXTRACT_GHKM_STSG_RULE_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "Rule.h"
|
||||
#include "Subgraph.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -39,6 +39,6 @@ private:
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
||||
#endif
|
||||
|
@ -1,9 +1,5 @@
|
||||
#include "StsgRuleWriter.h"
|
||||
|
||||
#include "Alignment.h"
|
||||
#include "Options.h"
|
||||
#include "StsgRule.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <ostream>
|
||||
@ -11,7 +7,11 @@
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
#include "Alignment.h"
|
||||
#include "Options.h"
|
||||
#include "StsgRule.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -92,4 +92,4 @@ void StsgRuleWriter::Write(const StsgRule &rule)
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
@ -2,11 +2,11 @@
|
||||
#ifndef EXTRACT_GHKM_STSG_RULE_WRITER_H_
|
||||
#define EXTRACT_GHKM_STSG_RULE_WRITER_H_
|
||||
|
||||
#include "Subgraph.h"
|
||||
|
||||
#include <ostream>
|
||||
|
||||
namespace Moses
|
||||
#include "Subgraph.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -36,6 +36,6 @@ private:
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
||||
#endif
|
||||
|
@ -18,10 +18,11 @@
|
||||
***********************************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include "Subgraph.h"
|
||||
#include "Node.h"
|
||||
|
||||
namespace Moses
|
||||
#include "Node.h"
|
||||
#include "Subgraph.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -193,5 +194,5 @@ void Subgraph::RecursivelyGetPartsOfSpeech(const Node *n, std::vector<std::strin
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
} // namespace GHKM
|
||||
|
@ -19,12 +19,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Node.h"
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses
|
||||
#include "Node.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -137,5 +137,5 @@ private:
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
||||
|
@ -19,18 +19,17 @@
|
||||
|
||||
#include "XmlTreeParser.h"
|
||||
|
||||
#include "ParseTree.h"
|
||||
#include "tables-core.h"
|
||||
#include "XmlException.h"
|
||||
#include "XmlTree.h"
|
||||
#include "util/tokenize.hh"
|
||||
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
|
||||
using namespace MosesTraining;
|
||||
#include "util/tokenize.hh"
|
||||
|
||||
namespace Moses
|
||||
#include "SyntaxTree.h"
|
||||
#include "tables-core.h"
|
||||
#include "XmlException.h"
|
||||
#include "XmlTree.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
@ -42,7 +41,7 @@ XmlTreeParser::XmlTreeParser(std::set<std::string> &labelSet,
|
||||
{
|
||||
}
|
||||
|
||||
std::auto_ptr<ParseTree> XmlTreeParser::Parse(const std::string &line)
|
||||
std::auto_ptr<SyntaxTree> XmlTreeParser::Parse(const std::string &line)
|
||||
{
|
||||
m_line = line;
|
||||
m_tree.Clear();
|
||||
@ -61,12 +60,12 @@ std::auto_ptr<ParseTree> XmlTreeParser::Parse(const std::string &line)
|
||||
return ConvertTree(*root, m_words);
|
||||
}
|
||||
|
||||
// Converts a SyntaxNode tree to a Moses::GHKM::ParseTree.
|
||||
std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
|
||||
// Converts a SyntaxNode tree to a MosesTraining::GHKM::SyntaxTree.
|
||||
std::auto_ptr<SyntaxTree> XmlTreeParser::ConvertTree(
|
||||
const SyntaxNode &tree,
|
||||
const std::vector<std::string> &words)
|
||||
{
|
||||
std::auto_ptr<ParseTree> root(new ParseTree(tree));
|
||||
std::auto_ptr<SyntaxTree> root(new SyntaxTree(tree));
|
||||
const std::vector<SyntaxNode*> &children = tree.GetChildren();
|
||||
if (children.empty()) {
|
||||
if (tree.GetStart() != tree.GetEnd()) {
|
||||
@ -76,14 +75,14 @@ std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
|
||||
throw Exception(msg.str());
|
||||
}
|
||||
SyntaxNode value(tree.GetStart(), tree.GetStart(), words[tree.GetStart()]);
|
||||
std::auto_ptr<ParseTree> leaf(new ParseTree(value));
|
||||
std::auto_ptr<SyntaxTree> leaf(new SyntaxTree(value));
|
||||
leaf->parent() = root.get();
|
||||
root->children().push_back(leaf.release());
|
||||
} else {
|
||||
for (std::vector<SyntaxNode*>::const_iterator p = children.begin();
|
||||
p != children.end(); ++p) {
|
||||
assert(*p);
|
||||
std::auto_ptr<ParseTree> child = ConvertTree(**p, words);
|
||||
std::auto_ptr<SyntaxTree> child = ConvertTree(**p, words);
|
||||
child->parent() = root.get();
|
||||
root->children().push_back(child.release());
|
||||
}
|
||||
@ -92,4 +91,4 @@ std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
@ -21,32 +21,32 @@
|
||||
#ifndef EXTRACT_GHKM_XML_TREE_PARSER_H_
|
||||
#define EXTRACT_GHKM_XML_TREE_PARSER_H_
|
||||
|
||||
#include "Exception.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ParseTree.h"
|
||||
#include "SyntaxNode.h"
|
||||
#include "SyntaxNodeCollection.h"
|
||||
#include "SyntaxTree.h"
|
||||
|
||||
namespace Moses
|
||||
#include "Exception.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
|
||||
// Parses a string in Moses' XML parse tree format and returns a ParseTree
|
||||
// Parses a string in Moses' XML parse tree format and returns a SyntaxTree
|
||||
// object.
|
||||
class XmlTreeParser
|
||||
{
|
||||
public:
|
||||
XmlTreeParser(std::set<std::string> &, std::map<std::string, int> &);
|
||||
std::auto_ptr<ParseTree> Parse(const std::string &);
|
||||
std::auto_ptr<SyntaxTree> Parse(const std::string &);
|
||||
|
||||
static std::auto_ptr<ParseTree> ConvertTree(const MosesTraining::SyntaxNode &,
|
||||
static std::auto_ptr<SyntaxTree> ConvertTree(const SyntaxNode &,
|
||||
const std::vector<std::string> &);
|
||||
|
||||
const std::vector<std::string>& GetWords() {
|
||||
@ -58,11 +58,11 @@ private:
|
||||
std::set<std::string> &m_labelSet;
|
||||
std::map<std::string, int> &m_topLabelSet;
|
||||
std::string m_line;
|
||||
MosesTraining::SyntaxNodeCollection m_tree;
|
||||
SyntaxNodeCollection m_tree;
|
||||
std::vector<std::string> m_words;
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Moses
|
||||
} // namespace MosesTraining
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user