Ongoing moses/phrase-extract refactoring

This commit is contained in:
Phil Williams 2015-06-01 16:40:35 +01:00
parent f61091e38d
commit f37415a259
36 changed files with 246 additions and 273 deletions

View File

@ -134,7 +134,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
if (targetPhrase.GetAlignNonTerm().GetSize() != 0) {
// Initialize phrase orientation scoring object
Moses::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
MosesTraining::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
targetPhrase.GetAlignTerm(), targetPhrase.GetAlignNonTerm());
PhraseOrientationFeature::ReoClassData* reoClassData = new PhraseOrientationFeature::ReoClassData();
@ -150,7 +150,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
// LEFT-TO-RIGHT DIRECTION
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_L2R);
MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_L2R);
if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary)
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
@ -170,7 +170,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
if (reoClassData->firstNonTerminalPreviousSourceSpanIsAligned &&
reoClassData->firstNonTerminalFollowingSourceSpanIsAligned) {
// discontinuous
l2rOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
l2rOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
} else {
reoClassData->firstNonTerminalIsBoundary = true;
}
@ -180,7 +180,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
// RIGHT-TO-LEFT DIRECTION
Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_R2L);
MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_R2L);
if ( ((targetIndex == targetPhrase.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,targetPhrase.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary)
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
@ -200,7 +200,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
if (reoClassData->lastNonTerminalPreviousSourceSpanIsAligned &&
reoClassData->lastNonTerminalFollowingSourceSpanIsAligned) {
// discontinuous
r2lOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
r2lOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
} else {
reoClassData->lastNonTerminalIsBoundary = true;
}
@ -335,25 +335,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
// LEFT-TO-RIGHT DIRECTION
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
IFFEATUREVERBOSE(2) {
FEATUREVERBOSE(2, "l2rOrientation ");
switch (l2rOrientation) {
case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
FEATUREVERBOSE2(2, "mono" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
FEATUREVERBOSE2(2, "swap" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
FEATUREVERBOSE2(2, "dleft" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
FEATUREVERBOSE2(2, "dright" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
// modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
break;
default:
@ -396,23 +396,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
} else {
if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
newScores[0] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono());
// if sub-derivation has left-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
} else if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
} else if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
newScores[1] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
// if sub-derivation has left-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
} else if ( ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
} else if ( ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
// if sub-derivation has left-boundary non-terminal:
@ -437,25 +437,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
// RIGHT-TO-LEFT DIRECTION
Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
IFFEATUREVERBOSE(2) {
FEATUREVERBOSE(2, "r2lOrientation ");
switch (r2lOrientation) {
case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
FEATUREVERBOSE2(2, "mono" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
FEATUREVERBOSE2(2, "swap" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
FEATUREVERBOSE2(2, "dleft" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
FEATUREVERBOSE2(2, "dright" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
// modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
break;
default:
@ -498,23 +498,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
} else {
if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
newScores[m_offsetR2LScores+0] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono());
// if sub-derivation has right-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
} else if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
} else if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
newScores[m_offsetR2LScores+1] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
// if sub-derivation has right-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
} else if ( ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
} else if ( ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
// if sub-derivation has right-boundary non-terminal:
@ -862,17 +862,17 @@ void PhraseOrientationFeature::SparseNonTerminalR2LScore(const Factor* nonTermin
}
const std::string* PhraseOrientationFeature::ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const
const std::string* PhraseOrientationFeature::ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const
{
if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
return &MORIENT;
} else if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
} else if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
return &SORIENT;
} else if ( ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( o == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
} else if ( ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
return &DORIENT;
} else {

View File

@ -302,8 +302,8 @@ public:
struct ReoClassData {
public:
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
bool firstNonTerminalIsBoundary;
bool firstNonTerminalPreviousSourceSpanIsAligned;
bool firstNonTerminalFollowingSourceSpanIsAligned;
@ -401,7 +401,7 @@ protected:
ScoreComponentCollection* scoreBreakdown,
const std::string* o) const;
const std::string* ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const;
const std::string* ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const;
static const std::string MORIENT;
static const std::string SORIENT;

View File

@ -10,10 +10,6 @@
#include "ScoreFeature.h"
#include "extract-ghkm/Node.h"
using namespace MosesTraining;
using namespace Moses;
using namespace GHKM;
namespace MosesTraining
{

View File

@ -0,0 +1,12 @@
#pragma once
#include "syntax-common/tree.h"
#include "SyntaxNode.h"
namespace MosesTraining
{
typedef Syntax::Tree<SyntaxNode> SyntaxTree;
} // namespace MosesTraining

View File

@ -25,7 +25,7 @@
#include <cassert>
#include <cstdlib>
namespace Moses
namespace MosesTraining
{
namespace GHKM
{
@ -70,4 +70,4 @@ void FlipAlignment(Alignment &a)
}
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -23,7 +23,7 @@
#include <utility>
#include <vector>
namespace Moses
namespace MosesTraining
{
namespace GHKM
{
@ -35,5 +35,5 @@ void ReadAlignment(const std::string &, Alignment &);
void FlipAlignment(Alignment &);
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -19,23 +19,24 @@
#include "AlignmentGraph.h"
#include "ComposedRule.h"
#include "Node.h"
#include "Options.h"
#include "ParseTree.h"
#include "Subgraph.h"
#include <algorithm>
#include <cassert>
#include <memory>
#include <stack>
namespace Moses
#include "SyntaxTree.h"
#include "ComposedRule.h"
#include "Node.h"
#include "Options.h"
#include "Subgraph.h"
namespace MosesTraining
{
namespace GHKM
{
AlignmentGraph::AlignmentGraph(const ParseTree *t,
AlignmentGraph::AlignmentGraph(const SyntaxTree *t,
const std::vector<std::string> &s,
const Alignment &a)
{
@ -208,7 +209,7 @@ void AlignmentGraph::ExtractComposedRules(Node *node, const Options &options)
}
}
Node *AlignmentGraph::CopyParseTree(const ParseTree *root)
Node *AlignmentGraph::CopyParseTree(const SyntaxTree *root)
{
NodeType nodeType = (root->IsLeaf()) ? TARGET : TREE;
@ -218,10 +219,10 @@ Node *AlignmentGraph::CopyParseTree(const ParseTree *root)
n->SetPcfgScore(root->value().GetPcfgScore());
}
const std::vector<ParseTree *> &children = root->children();
const std::vector<SyntaxTree *> &children = root->children();
std::vector<Node *> childNodes;
childNodes.reserve(children.size());
for (std::vector<ParseTree *>::const_iterator p(children.begin());
for (std::vector<SyntaxTree *>::const_iterator p(children.begin());
p != children.end(); ++p) {
Node *child = CopyParseTree(*p);
child->AddParent(n.get());
@ -385,4 +386,4 @@ Node *AlignmentGraph::DetermineAttachmentPoint(int index)
}
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -21,15 +21,16 @@
#ifndef EXTRACT_GHKM_ALIGNMENT_GRAPH_H_
#define EXTRACT_GHKM_ALIGNMENT_GRAPH_H_
#include "Alignment.h"
#include "Options.h"
#include "ParseTree.h"
#include <set>
#include <string>
#include <vector>
namespace Moses
#include "SyntaxTree.h"
#include "Alignment.h"
#include "Options.h"
namespace MosesTraining
{
namespace GHKM
{
@ -40,7 +41,7 @@ class Subgraph;
class AlignmentGraph
{
public:
AlignmentGraph(const ParseTree *,
AlignmentGraph(const SyntaxTree *,
const std::vector<std::string> &,
const Alignment &);
@ -61,7 +62,7 @@ private:
AlignmentGraph(const AlignmentGraph &);
AlignmentGraph &operator=(const AlignmentGraph &);
Node *CopyParseTree(const ParseTree *);
Node *CopyParseTree(const SyntaxTree *);
void ComputeFrontierSet(Node *, const Options &, std::set<Node *> &) const;
void CalcComplementSpans(Node *);
void GetTargetTreeLeaves(Node *, std::vector<Node *> &);
@ -77,6 +78,6 @@ private:
};
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining
#endif

View File

@ -19,15 +19,15 @@
#include "ComposedRule.h"
#include "Node.h"
#include "Options.h"
#include "Subgraph.h"
#include <set>
#include <vector>
#include <queue>
namespace Moses
#include "Node.h"
#include "Options.h"
#include "Subgraph.h"
namespace MosesTraining
{
namespace GHKM
{
@ -128,4 +128,4 @@ Subgraph ComposedRule::CreateSubgraph()
}
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -21,12 +21,12 @@
#ifndef EXTRACT_GHKM_COMPOSED_RULE_H_
#define EXTRACT_GHKM_COMPOSED_RULE_H_
#include "Subgraph.h"
#include <vector>
#include <queue>
namespace Moses
#include "Subgraph.h"
namespace MosesTraining
{
namespace GHKM
{
@ -67,6 +67,6 @@ private:
};
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining
#endif

View File

@ -23,7 +23,7 @@
#include <string>
namespace Moses
namespace MosesTraining
{
namespace GHKM
{
@ -41,6 +41,6 @@ private:
};
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining
#endif

View File

@ -19,29 +19,6 @@
#include "ExtractGHKM.h"
#include "Alignment.h"
#include "AlignmentGraph.h"
#include "Exception.h"
#include "InputFileStream.h"
#include "Node.h"
#include "OutputFileStream.h"
#include "Options.h"
#include "ParseTree.h"
#include "PhraseOrientation.h"
#include "ScfgRule.h"
#include "ScfgRuleWriter.h"
#include "Span.h"
#include "StsgRule.h"
#include "StsgRuleWriter.h"
#include "SyntaxNode.h"
#include "SyntaxNodeCollection.h"
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
#include "XmlTreeParser.h"
#include <boost/program_options.hpp>
#include <cassert>
#include <cstdlib>
#include <fstream>
@ -51,13 +28,40 @@
#include <sstream>
#include <vector>
namespace Moses
#include <boost/program_options.hpp>
#include "InputFileStream.h"
#include "OutputFileStream.h"
#include "SyntaxNode.h"
#include "SyntaxNodeCollection.h"
#include "SyntaxTree.h"
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
#include "Alignment.h"
#include "AlignmentGraph.h"
#include "Exception.h"
#include "Node.h"
#include "Options.h"
#include "PhraseOrientation.h"
#include "ScfgRule.h"
#include "ScfgRuleWriter.h"
#include "Span.h"
#include "StsgRule.h"
#include "StsgRuleWriter.h"
#include "XmlTreeParser.h"
namespace MosesTraining
{
namespace GHKM
{
int ExtractGHKM::Main(int argc, char *argv[])
{
using Moses::InputFileStream;
using Moses::OutputFileStream;
// Process command-line options.
Options options;
ProcessOptions(argc, argv, options);
@ -158,7 +162,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
std::cerr << "skipping line " << lineNum << " with empty target tree\n";
continue;
}
std::auto_ptr<ParseTree> targetParseTree;
std::auto_ptr<SyntaxTree> targetParseTree;
try {
targetParseTree = targetXmlTreeParser.Parse(targetLine);
assert(targetParseTree.get());
@ -173,8 +177,8 @@ int ExtractGHKM::Main(int argc, char *argv[])
// Parse source tree and construct a SyntaxTree object.
MosesTraining::SyntaxNodeCollection sourceSyntaxTree;
MosesTraining::SyntaxNode *sourceSyntaxTreeRoot=NULL;
SyntaxNodeCollection sourceSyntaxTree;
SyntaxNode *sourceSyntaxTreeRoot=NULL;
if (options.sourceLabels) {
try {
@ -197,8 +201,9 @@ int ExtractGHKM::Main(int argc, char *argv[])
// Read source tokens.
std::vector<std::string> sourceTokens(ReadTokens(sourceLine));
// Construct a source ParseTree object from the SyntaxNodeCollection object.
std::auto_ptr<ParseTree> sourceParseTree;
// Construct a source SyntaxTree object from the SyntaxNodeCollection
// object.
std::auto_ptr<SyntaxTree> sourceParseTree;
if (options.sourceLabels) {
try {
@ -264,12 +269,12 @@ int ExtractGHKM::Main(int argc, char *argv[])
const std::vector<const Subgraph *> &rules = (*p)->GetRules();
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation=Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN, r2lOrientation=Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN;
PhraseOrientation::REO_CLASS l2rOrientation=PhraseOrientation::REO_CLASS_UNKNOWN, r2lOrientation=PhraseOrientation::REO_CLASS_UNKNOWN;
if (options.phraseOrientation && !rules.empty()) {
int sourceSpanBegin = *((*p)->GetSpan().begin());
int sourceSpanEnd = *((*p)->GetSpan().rbegin());
l2rOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,Moses::GHKM::PhraseOrientation::REO_DIR_L2R);
r2lOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,Moses::GHKM::PhraseOrientation::REO_DIR_R2L);
l2rOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,PhraseOrientation::REO_DIR_L2R);
r2lOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,PhraseOrientation::REO_DIR_R2L);
// std::cerr << "span " << sourceSpanBegin << " " << sourceSpanEnd << std::endl;
// std::cerr << "phraseOrientation " << phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd) << std::endl;
}
@ -310,8 +315,8 @@ int ExtractGHKM::Main(int argc, char *argv[])
fwdExtractStream << " ";
phraseOrientation.WriteOrientation(fwdExtractStream,r2lOrientation);
fwdExtractStream << "}}";
phraseOrientation.IncrementPriorCount(Moses::GHKM::PhraseOrientation::REO_DIR_L2R,l2rOrientation,1);
phraseOrientation.IncrementPriorCount(Moses::GHKM::PhraseOrientation::REO_DIR_R2L,r2lOrientation,1);
phraseOrientation.IncrementPriorCount(PhraseOrientation::REO_DIR_L2R,l2rOrientation,1);
phraseOrientation.IncrementPriorCount(PhraseOrientation::REO_DIR_R2L,r2lOrientation,1);
}
fwdExtractStream << std::endl;
invExtractStream << std::endl;
@ -400,7 +405,7 @@ void ExtractGHKM::OpenOutputFileOrDie(const std::string &filename,
}
void ExtractGHKM::OpenOutputFileOrDie(const std::string &filename,
OutputFileStream &stream)
Moses::OutputFileStream &stream)
{
bool ret = stream.Open(filename);
if (!ret) {
@ -823,16 +828,16 @@ void ExtractGHKM::WriteSourceLabelSet(
}
void ExtractGHKM::CollectWordLabelCounts(
ParseTree &root,
SyntaxTree &root,
const Options &options,
std::map<std::string, int> &wordCount,
std::map<std::string, std::string> &wordLabel)
{
for (ParseTree::ConstLeafIterator p(root);
p != ParseTree::ConstLeafIterator(); ++p) {
const ParseTree &leaf = *p;
for (SyntaxTree::ConstLeafIterator p(root);
p != SyntaxTree::ConstLeafIterator(); ++p) {
const SyntaxTree &leaf = *p;
const std::string &word = leaf.value().GetLabel();
const ParseTree *ancestor = leaf.parent();
const SyntaxTree *ancestor = leaf.parent();
// If unary rule elimination is enabled and this word is at the end of a
// chain of unary rewrites, e.g.
// PN-SB -> NE -> word
@ -849,12 +854,12 @@ void ExtractGHKM::CollectWordLabelCounts(
}
}
std::vector<std::string> ExtractGHKM::ReadTokens(const ParseTree &root) const
std::vector<std::string> ExtractGHKM::ReadTokens(const SyntaxTree &root) const
{
std::vector<std::string> tokens;
for (ParseTree::ConstLeafIterator p(root);
p != ParseTree::ConstLeafIterator(); ++p) {
const ParseTree &leaf = *p;
for (SyntaxTree::ConstLeafIterator p(root);
p != SyntaxTree::ConstLeafIterator(); ++p) {
const SyntaxTree &leaf = *p;
const std::string &word = leaf.value().GetLabel();
tokens.push_back(word);
}
@ -956,4 +961,4 @@ void ExtractGHKM::StripBitParLabels(
}
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -25,13 +25,11 @@
#include <string>
#include <vector>
#include "ParseTree.h"
#include "OutputFileStream.h"
#include "SyntaxTree.h"
namespace Moses
namespace MosesTraining
{
class OutputFileStream;
namespace GHKM
{
@ -52,9 +50,9 @@ private:
void Error(const std::string &) const;
void OpenInputFileOrDie(const std::string &, std::ifstream &);
void OpenOutputFileOrDie(const std::string &, std::ofstream &);
void OpenOutputFileOrDie(const std::string &, OutputFileStream &);
void RecordTreeLabels(const ParseTree &, std::set<std::string> &);
void CollectWordLabelCounts(ParseTree &,
void OpenOutputFileOrDie(const std::string &, Moses::OutputFileStream &);
void RecordTreeLabels(const SyntaxTree &, std::set<std::string> &);
void CollectWordLabelCounts(SyntaxTree &,
const Options &,
std::map<std::string, int> &,
std::map<std::string, std::string> &);
@ -78,7 +76,7 @@ private:
std::map<std::string, int> &outTopLabelSet) const;
std::vector<std::string> ReadTokens(const std::string &) const;
std::vector<std::string> ReadTokens(const ParseTree &root) const;
std::vector<std::string> ReadTokens(const SyntaxTree &root) const;
void ProcessOptions(int, char *[], Options &) const;
@ -86,5 +84,4 @@ private:
};
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -21,6 +21,6 @@
int main(int argc, char *argv[])
{
Moses::GHKM::ExtractGHKM tool;
MosesTraining::GHKM::ExtractGHKM tool;
return tool.Main(argc, argv);
}

View File

@ -21,7 +21,7 @@
#include "Subgraph.h"
namespace Moses
namespace MosesTraining
{
namespace GHKM
{
@ -70,4 +70,4 @@ void Node::GetTargetWords(std::vector<std::string> &targetWords) const
}
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -21,14 +21,14 @@
#ifndef EXTRACT_GHKM_NODE_H_
#define EXTRACT_GHKM_NODE_H_
#include "Span.h"
#include <cassert>
#include <iterator>
#include <string>
#include <vector>
namespace Moses
#include "Span.h"
namespace MosesTraining
{
namespace GHKM
{
@ -215,6 +215,6 @@ Node *Node::LowestCommonAncestor(InputIterator first, InputIterator last)
}
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining
#endif

View File

@ -21,7 +21,7 @@
#include <string>
namespace Moses
namespace MosesTraining
{
namespace GHKM
{
@ -89,5 +89,5 @@ public:
};
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -1,38 +0,0 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#ifndef EXTRACT_GHKM_PARSE_TREE_H_
#define EXTRACT_GHKM_PARSE_TREE_H_
#include "syntax-common/tree.h"
#include "SyntaxNode.h"
namespace Moses
{
namespace GHKM
{
typedef MosesTraining::Syntax::Tree<MosesTraining::SyntaxNode> ParseTree;
} // namespace GHKM
} // namespace Moses
#endif

View File

@ -26,7 +26,7 @@
#include <boost/assign/list_of.hpp>
namespace Moses
namespace MosesTraining
{
namespace GHKM
{
@ -469,5 +469,5 @@ void PhraseOrientation::WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE
}
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -1,4 +1,3 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
@ -20,16 +19,18 @@
#pragma once
#include "Alignment.h"
#include "moses/AlignmentInfo.h"
#include <map>
#include <set>
#include <string>
#include <vector>
#include <boost/unordered_map.hpp>
namespace Moses
#include "moses/AlignmentInfo.h"
#include "Alignment.h"
namespace MosesTraining
{
namespace GHKM
{
@ -53,8 +54,8 @@ public:
PhraseOrientation(int sourceSize,
int targetSize,
const AlignmentInfo &alignTerm,
const AlignmentInfo &alignNonTerm);
const Moses::AlignmentInfo &alignTerm,
const Moses::AlignmentInfo &alignNonTerm);
REO_CLASS GetOrientationInfo(int startF, int endF, REO_DIR direction) const;
REO_CLASS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const;
@ -119,5 +120,4 @@ private:
};
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -3,7 +3,7 @@
#include "Node.h"
#include "Subgraph.h"
namespace Moses
namespace MosesTraining
{
namespace GHKM
{
@ -38,4 +38,4 @@ bool Rule::PartitionOrderComp(const Node *a, const Node *b)
}
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -7,7 +7,7 @@
#include "Alignment.h"
namespace Moses
namespace MosesTraining
{
namespace GHKM
{
@ -54,6 +54,6 @@ protected:
};
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining
#endif

View File

@ -26,13 +26,13 @@
#include "SyntaxNode.h"
#include "SyntaxNodeCollection.h"
namespace Moses
namespace MosesTraining
{
namespace GHKM
{
ScfgRule::ScfgRule(const Subgraph &fragment,
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree)
const SyntaxNodeCollection *sourceSyntaxTree)
: m_graphFragment(fragment)
, m_sourceLHS("X", NonTerminal)
, m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal)
@ -134,13 +134,13 @@ ScfgRule::ScfgRule(const Subgraph &fragment,
}
}
void ScfgRule::PushSourceLabel(
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree,
const Node *node, const std::string &nonMatchingLabel)
void ScfgRule::PushSourceLabel(const SyntaxNodeCollection *sourceSyntaxTree,
const Node *node,
const std::string &nonMatchingLabel)
{
ContiguousSpan span = Closure(node->GetSpan());
if (sourceSyntaxTree->HasNode(span.first,span.second)) { // does a source constituent match the span?
std::vector<MosesTraining::SyntaxNode*> sourceLabels =
std::vector<SyntaxNode*> sourceLabels =
sourceSyntaxTree->GetNodes(span.first,span.second);
if (!sourceLabels.empty()) {
// store the topmost matching label from the source syntax tree
@ -197,4 +197,4 @@ void ScfgRule::UpdateSourceLabelCoocCounts(std::map< std::string, std::map<std::
}
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -29,7 +29,7 @@
#include "Rule.h"
#include "SyntaxNodeCollection.h"
namespace Moses
namespace MosesTraining
{
namespace GHKM
{
@ -41,7 +41,7 @@ class ScfgRule : public Rule
{
public:
ScfgRule(const Subgraph &fragment,
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree = 0);
const SyntaxNodeCollection *sourceSyntaxTree = 0);
const Subgraph &GetGraphFragment() const {
return m_graphFragment;
@ -78,9 +78,8 @@ public:
}
private:
void PushSourceLabel(
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree,
const Node *node, const std::string &nonMatchingLabel);
void PushSourceLabel(const SyntaxNodeCollection *sourceSyntaxTree,
const Node *node, const std::string &nonMatchingLabel);
const Subgraph& m_graphFragment;
Symbol m_sourceLHS;
@ -94,4 +93,4 @@ private:
};
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -19,10 +19,6 @@
#include "ScfgRuleWriter.h"
#include "Alignment.h"
#include "Options.h"
#include "ScfgRule.h"
#include <cassert>
#include <cmath>
#include <ostream>
@ -30,7 +26,11 @@
#include <sstream>
#include <vector>
namespace Moses
#include "Alignment.h"
#include "Options.h"
#include "ScfgRule.h"
namespace MosesTraining
{
namespace GHKM
{
@ -229,4 +229,4 @@ void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out)
}
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -19,11 +19,11 @@
#pragma once
#include "Subgraph.h"
#include <ostream>
namespace Moses
#include "Subgraph.h"
namespace MosesTraining
{
namespace GHKM
{
@ -57,5 +57,5 @@ private:
};
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -19,7 +19,7 @@
#include "Span.h"
namespace Moses
namespace MosesTraining
{
namespace GHKM
{
@ -45,4 +45,4 @@ ContiguousSpan Closure(const Span &s)
}
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -24,7 +24,7 @@
#include <map>
#include <set>
namespace Moses
namespace MosesTraining
{
namespace GHKM
{
@ -36,7 +36,7 @@ bool SpansIntersect(const Span &, const ContiguousSpan &);
ContiguousSpan Closure(const Span &);
} // namespace Moses
} // namespace MosesTraining
} // namespace GHKM
#endif

View File

@ -1,11 +1,11 @@
#include "StsgRule.h"
#include <algorithm>
#include "Node.h"
#include "Subgraph.h"
#include <algorithm>
namespace Moses
namespace MosesTraining
{
namespace GHKM
{
@ -91,4 +91,4 @@ StsgRule::StsgRule(const Subgraph &fragment)
}
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -2,12 +2,12 @@
#ifndef EXTRACT_GHKM_STSG_RULE_H_
#define EXTRACT_GHKM_STSG_RULE_H_
#include <vector>
#include "Rule.h"
#include "Subgraph.h"
#include <vector>
namespace Moses
namespace MosesTraining
{
namespace GHKM
{
@ -39,6 +39,6 @@ private:
};
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining
#endif

View File

@ -1,9 +1,5 @@
#include "StsgRuleWriter.h"
#include "Alignment.h"
#include "Options.h"
#include "StsgRule.h"
#include <cassert>
#include <cmath>
#include <ostream>
@ -11,7 +7,11 @@
#include <sstream>
#include <vector>
namespace Moses
#include "Alignment.h"
#include "Options.h"
#include "StsgRule.h"
namespace MosesTraining
{
namespace GHKM
{
@ -92,4 +92,4 @@ void StsgRuleWriter::Write(const StsgRule &rule)
}
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -2,11 +2,11 @@
#ifndef EXTRACT_GHKM_STSG_RULE_WRITER_H_
#define EXTRACT_GHKM_STSG_RULE_WRITER_H_
#include "Subgraph.h"
#include <ostream>
namespace Moses
#include "Subgraph.h"
namespace MosesTraining
{
namespace GHKM
{
@ -36,6 +36,6 @@ private:
};
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining
#endif

View File

@ -18,10 +18,11 @@
***********************************************************************/
#include <iostream>
#include "Subgraph.h"
#include "Node.h"
namespace Moses
#include "Node.h"
#include "Subgraph.h"
namespace MosesTraining
{
namespace GHKM
{
@ -193,5 +194,5 @@ void Subgraph::RecursivelyGetPartsOfSpeech(const Node *n, std::vector<std::strin
}
}
} // namespace Moses
} // namespace MosesTraining
} // namespace GHKM

View File

@ -19,12 +19,12 @@
#pragma once
#include "Node.h"
#include <set>
#include <vector>
namespace Moses
#include "Node.h"
namespace MosesTraining
{
namespace GHKM
{
@ -137,5 +137,5 @@ private:
};
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -19,18 +19,17 @@
#include "XmlTreeParser.h"
#include "ParseTree.h"
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
#include "util/tokenize.hh"
#include <cassert>
#include <vector>
using namespace MosesTraining;
#include "util/tokenize.hh"
namespace Moses
#include "SyntaxTree.h"
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
namespace MosesTraining
{
namespace GHKM
{
@ -42,7 +41,7 @@ XmlTreeParser::XmlTreeParser(std::set<std::string> &labelSet,
{
}
std::auto_ptr<ParseTree> XmlTreeParser::Parse(const std::string &line)
std::auto_ptr<SyntaxTree> XmlTreeParser::Parse(const std::string &line)
{
m_line = line;
m_tree.Clear();
@ -61,12 +60,12 @@ std::auto_ptr<ParseTree> XmlTreeParser::Parse(const std::string &line)
return ConvertTree(*root, m_words);
}
// Converts a SyntaxNode tree to a Moses::GHKM::ParseTree.
std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
// Converts a SyntaxNode tree to a MosesTraining::GHKM::SyntaxTree.
std::auto_ptr<SyntaxTree> XmlTreeParser::ConvertTree(
const SyntaxNode &tree,
const std::vector<std::string> &words)
{
std::auto_ptr<ParseTree> root(new ParseTree(tree));
std::auto_ptr<SyntaxTree> root(new SyntaxTree(tree));
const std::vector<SyntaxNode*> &children = tree.GetChildren();
if (children.empty()) {
if (tree.GetStart() != tree.GetEnd()) {
@ -76,14 +75,14 @@ std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
throw Exception(msg.str());
}
SyntaxNode value(tree.GetStart(), tree.GetStart(), words[tree.GetStart()]);
std::auto_ptr<ParseTree> leaf(new ParseTree(value));
std::auto_ptr<SyntaxTree> leaf(new SyntaxTree(value));
leaf->parent() = root.get();
root->children().push_back(leaf.release());
} else {
for (std::vector<SyntaxNode*>::const_iterator p = children.begin();
p != children.end(); ++p) {
assert(*p);
std::auto_ptr<ParseTree> child = ConvertTree(**p, words);
std::auto_ptr<SyntaxTree> child = ConvertTree(**p, words);
child->parent() = root.get();
root->children().push_back(child.release());
}
@ -92,4 +91,4 @@ std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
}
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining

View File

@ -21,32 +21,32 @@
#ifndef EXTRACT_GHKM_XML_TREE_PARSER_H_
#define EXTRACT_GHKM_XML_TREE_PARSER_H_
#include "Exception.h"
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "ParseTree.h"
#include "SyntaxNode.h"
#include "SyntaxNodeCollection.h"
#include "SyntaxTree.h"
namespace Moses
#include "Exception.h"
namespace MosesTraining
{
namespace GHKM
{
// Parses a string in Moses' XML parse tree format and returns a ParseTree
// Parses a string in Moses' XML parse tree format and returns a SyntaxTree
// object.
class XmlTreeParser
{
public:
XmlTreeParser(std::set<std::string> &, std::map<std::string, int> &);
std::auto_ptr<ParseTree> Parse(const std::string &);
std::auto_ptr<SyntaxTree> Parse(const std::string &);
static std::auto_ptr<ParseTree> ConvertTree(const MosesTraining::SyntaxNode &,
static std::auto_ptr<SyntaxTree> ConvertTree(const SyntaxNode &,
const std::vector<std::string> &);
const std::vector<std::string>& GetWords() {
@ -58,11 +58,11 @@ private:
std::set<std::string> &m_labelSet;
std::map<std::string, int> &m_topLabelSet;
std::string m_line;
MosesTraining::SyntaxNodeCollection m_tree;
SyntaxNodeCollection m_tree;
std::vector<std::string> m_words;
};
} // namespace GHKM
} // namespace Moses
} // namespace MosesTraining
#endif