mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 12:52:29 +03:00
Hiero phrase orientation
This commit is contained in:
parent
831dc83778
commit
bd3f573452
@ -15,7 +15,6 @@
|
||||
#include "moses/Hypothesis.h"
|
||||
#include "moses/ChartHypothesis.h"
|
||||
#include "moses/ChartManager.h"
|
||||
#include "phrase-extract/extract-ghkm/Alignment.h"
|
||||
#include <boost/shared_ptr.hpp>
|
||||
|
||||
|
||||
@ -23,15 +22,59 @@ namespace Moses
|
||||
{
|
||||
size_t PhraseOrientationFeatureState::hash() const
|
||||
{
|
||||
UTIL_THROW2("TODO:Haven't figure this out yet");
|
||||
if (!m_distinguishStates) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ret = 0;
|
||||
|
||||
if (m_leftBoundaryIsSet) {
|
||||
HashCombineLeftBoundaryRecursive(ret, *this, m_useSparseNT);
|
||||
}
|
||||
if (m_rightBoundaryIsSet) {
|
||||
boost::hash_combine(ret, 42);
|
||||
HashCombineRightBoundaryRecursive(ret, *this, m_useSparseNT);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool PhraseOrientationFeatureState::operator==(const FFState& other) const
|
||||
{
|
||||
UTIL_THROW2("TODO:Haven't figure this out yet");
|
||||
if (!m_distinguishStates) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const PhraseOrientationFeatureState &otherState = static_cast<const PhraseOrientationFeatureState&>(other);
|
||||
|
||||
if (!m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet &&
|
||||
!m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) {
|
||||
return true;
|
||||
}
|
||||
if (m_leftBoundaryIsSet != otherState.m_leftBoundaryIsSet) {
|
||||
return false;
|
||||
}
|
||||
if (m_rightBoundaryIsSet != otherState.m_rightBoundaryIsSet) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_leftBoundaryIsSet) {
|
||||
int compareLeft = CompareLeftBoundaryRecursive(*this, otherState, m_useSparseNT);
|
||||
if (compareLeft != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (m_rightBoundaryIsSet) {
|
||||
int compareRight = CompareRightBoundaryRecursive(*this, otherState, m_useSparseNT);
|
||||
if (compareRight != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const std::string PhraseOrientationFeature::MORIENT("M");
|
||||
const std::string PhraseOrientationFeature::SORIENT("S");
|
||||
const std::string PhraseOrientationFeature::DORIENT("D");
|
||||
@ -143,7 +186,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
if (targetPhrase.GetAlignNonTerm().GetSize() != 0) {
|
||||
|
||||
// Initialize phrase orientation scoring object
|
||||
MosesTraining::Syntax::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
|
||||
MosesTraining::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
|
||||
targetPhrase.GetAlignTerm(), targetPhrase.GetAlignNonTerm());
|
||||
|
||||
PhraseOrientationFeature::ReoClassData* reoClassData = new PhraseOrientationFeature::ReoClassData();
|
||||
@ -159,7 +202,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
|
||||
// LEFT-TO-RIGHT DIRECTION
|
||||
|
||||
MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::Syntax::GHKM::PhraseOrientation::REO_DIR_L2R);
|
||||
MosesTraining::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::PhraseOrientation::REO_DIR_L2R);
|
||||
|
||||
if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary)
|
||||
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
|
||||
@ -179,7 +222,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
if (reoClassData->firstNonTerminalPreviousSourceSpanIsAligned &&
|
||||
reoClassData->firstNonTerminalFollowingSourceSpanIsAligned) {
|
||||
// discontinuous
|
||||
l2rOrientation = MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
l2rOrientation = MosesTraining::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
} else {
|
||||
reoClassData->firstNonTerminalIsBoundary = true;
|
||||
}
|
||||
@ -189,7 +232,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
|
||||
// RIGHT-TO-LEFT DIRECTION
|
||||
|
||||
MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::Syntax::GHKM::PhraseOrientation::REO_DIR_R2L);
|
||||
MosesTraining::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::PhraseOrientation::REO_DIR_R2L);
|
||||
|
||||
if ( ((targetIndex == targetPhrase.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,targetPhrase.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary)
|
||||
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
|
||||
@ -209,7 +252,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
if (reoClassData->lastNonTerminalPreviousSourceSpanIsAligned &&
|
||||
reoClassData->lastNonTerminalFollowingSourceSpanIsAligned) {
|
||||
// discontinuous
|
||||
r2lOrientation = MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
r2lOrientation = MosesTraining::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
} else {
|
||||
reoClassData->lastNonTerminalIsBoundary = true;
|
||||
}
|
||||
@ -344,25 +387,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
// LEFT-TO-RIGHT DIRECTION
|
||||
|
||||
MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
|
||||
MosesTraining::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
|
||||
|
||||
IFFEATUREVERBOSE(2) {
|
||||
FEATUREVERBOSE(2, "l2rOrientation ");
|
||||
switch (l2rOrientation) {
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||
case MosesTraining::PhraseOrientation::REO_CLASS_LEFT:
|
||||
FEATUREVERBOSE2(2, "mono" << std::endl);
|
||||
break;
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
case MosesTraining::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
FEATUREVERBOSE2(2, "swap" << std::endl);
|
||||
break;
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
case MosesTraining::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
FEATUREVERBOSE2(2, "dleft" << std::endl);
|
||||
break;
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
case MosesTraining::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
FEATUREVERBOSE2(2, "dright" << std::endl);
|
||||
break;
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_MSLR
|
||||
case MosesTraining::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == MosesTraining::PhraseOrientation::REO_MSLR
|
||||
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
|
||||
break;
|
||||
default:
|
||||
@ -405,23 +448,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
} else {
|
||||
|
||||
if ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
if ( l2rOrientation == MosesTraining::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
|
||||
newScores[0] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono());
|
||||
// if sub-derivation has left-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
|
||||
|
||||
} else if ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
} else if ( l2rOrientation == MosesTraining::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
|
||||
newScores[1] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
|
||||
// if sub-derivation has left-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
|
||||
|
||||
} else if ( ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
} else if ( ( l2rOrientation == MosesTraining::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( l2rOrientation == MosesTraining::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( l2rOrientation == MosesTraining::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
|
||||
newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
|
||||
// if sub-derivation has left-boundary non-terminal:
|
||||
@ -446,25 +489,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
// RIGHT-TO-LEFT DIRECTION
|
||||
|
||||
MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
|
||||
MosesTraining::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
|
||||
|
||||
IFFEATUREVERBOSE(2) {
|
||||
FEATUREVERBOSE(2, "r2lOrientation ");
|
||||
switch (r2lOrientation) {
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||
case MosesTraining::PhraseOrientation::REO_CLASS_LEFT:
|
||||
FEATUREVERBOSE2(2, "mono" << std::endl);
|
||||
break;
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
case MosesTraining::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
FEATUREVERBOSE2(2, "swap" << std::endl);
|
||||
break;
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
case MosesTraining::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
FEATUREVERBOSE2(2, "dleft" << std::endl);
|
||||
break;
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
case MosesTraining::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
FEATUREVERBOSE2(2, "dright" << std::endl);
|
||||
break;
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_MSLR
|
||||
case MosesTraining::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == MosesTraining::PhraseOrientation::REO_MSLR
|
||||
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
|
||||
break;
|
||||
default:
|
||||
@ -507,23 +550,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
} else {
|
||||
|
||||
if ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
if ( r2lOrientation == MosesTraining::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
|
||||
newScores[m_offsetR2LScores+0] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono());
|
||||
// if sub-derivation has right-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
|
||||
|
||||
} else if ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
} else if ( r2lOrientation == MosesTraining::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
|
||||
newScores[m_offsetR2LScores+1] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
|
||||
// if sub-derivation has right-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
|
||||
|
||||
} else if ( ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
} else if ( ( r2lOrientation == MosesTraining::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( r2lOrientation == MosesTraining::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( r2lOrientation == MosesTraining::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
|
||||
newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
|
||||
// if sub-derivation has right-boundary non-terminal:
|
||||
@ -871,17 +914,17 @@ void PhraseOrientationFeature::SparseNonTerminalR2LScore(const Factor* nonTermin
|
||||
}
|
||||
|
||||
|
||||
const std::string* PhraseOrientationFeature::ToString(const MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS o) const
|
||||
const std::string* PhraseOrientationFeature::ToString(const MosesTraining::PhraseOrientation::REO_CLASS o) const
|
||||
{
|
||||
if ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
if ( o == MosesTraining::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
return &MORIENT;
|
||||
|
||||
} else if ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
} else if ( o == MosesTraining::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
return &SORIENT;
|
||||
|
||||
} else if ( ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
} else if ( ( o == MosesTraining::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( o == MosesTraining::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( o == MosesTraining::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
return &DORIENT;
|
||||
|
||||
} else {
|
||||
|
@ -16,7 +16,7 @@
|
||||
#include "StatefulFeatureFunction.h"
|
||||
#include "FFState.h"
|
||||
#include "moses/Factor.h"
|
||||
#include "phrase-extract/extract-ghkm/PhraseOrientation.h"
|
||||
#include "phrase-extract/PhraseOrientation.h"
|
||||
#include "moses/PP/OrientationPhraseProperty.h"
|
||||
#include <boost/unordered_set.hpp>
|
||||
|
||||
@ -226,6 +226,54 @@ protected:
|
||||
return CompareRightBoundaryRecursive(*prevState, *otherPrevState, useSparseNT);
|
||||
};
|
||||
|
||||
|
||||
static void HashCombineLeftBoundaryRecursive(size_t &hash, const PhraseOrientationFeatureState& state, bool useSparseNT) {
|
||||
if (useSparseNT) {
|
||||
boost::hash_combine(hash, state.m_leftBoundaryNonTerminalSymbol);
|
||||
}
|
||||
// boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex);
|
||||
// boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations);
|
||||
|
||||
for (size_t i=0; i<state.m_leftBoundaryNonTerminalL2RScores.size(); ++i) {
|
||||
if (state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i]) {
|
||||
boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RScores[i]);
|
||||
} else {
|
||||
boost::hash_combine(hash, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (!state.m_leftBoundaryRecursionGuard) {
|
||||
const PhraseOrientationFeatureState *prevState = state.m_leftBoundaryPrevState;
|
||||
if (prevState->m_leftBoundaryIsSet) {
|
||||
HashCombineLeftBoundaryRecursive(hash, *prevState, useSparseNT);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void HashCombineRightBoundaryRecursive(size_t &hash, const PhraseOrientationFeatureState& state, bool useSparseNT) {
|
||||
if (useSparseNT) {
|
||||
boost::hash_combine(hash, state.m_rightBoundaryNonTerminalSymbol);
|
||||
}
|
||||
// boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex);
|
||||
// boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations);
|
||||
|
||||
for (size_t i=0; i<state.m_rightBoundaryNonTerminalR2LScores.size(); ++i) {
|
||||
if (state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i]) {
|
||||
boost::hash_combine(hash, state.m_rightBoundaryNonTerminalR2LScores[i]);
|
||||
} else {
|
||||
boost::hash_combine(hash, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (!state.m_rightBoundaryRecursionGuard) {
|
||||
const PhraseOrientationFeatureState *prevState = state.m_rightBoundaryPrevState;
|
||||
if (prevState->m_rightBoundaryIsSet) {
|
||||
HashCombineRightBoundaryRecursive(hash, *prevState, useSparseNT);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<std::size_t N> static bool Smaller(const std::bitset<N>& x, const std::bitset<N>& y) {
|
||||
for (size_t i=0; i<N; ++i) {
|
||||
if (x[i] ^ y[i])
|
||||
@ -264,8 +312,8 @@ public:
|
||||
|
||||
struct ReoClassData {
|
||||
public:
|
||||
std::vector<MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
|
||||
std::vector<MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
|
||||
std::vector<MosesTraining::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
|
||||
std::vector<MosesTraining::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
|
||||
bool firstNonTerminalIsBoundary;
|
||||
bool firstNonTerminalPreviousSourceSpanIsAligned;
|
||||
bool firstNonTerminalFollowingSourceSpanIsAligned;
|
||||
@ -351,7 +399,7 @@ protected:
|
||||
ScoreComponentCollection* scoreBreakdown,
|
||||
const std::string* o) const;
|
||||
|
||||
const std::string* ToString(const MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS o) const;
|
||||
const std::string* ToString(const MosesTraining::PhraseOrientation::REO_CLASS o) const;
|
||||
|
||||
static const std::string MORIENT;
|
||||
static const std::string SORIENT;
|
||||
|
@ -105,7 +105,7 @@ lib moses :
|
||||
TranslationModel/RuleTable/*.cpp
|
||||
TranslationModel/Scope3Parser/*.cpp
|
||||
TranslationModel/CYKPlusParser/*.cpp
|
||||
../phrase-extract/extract-ghkm/PhraseOrientation.cpp
|
||||
../phrase-extract/PhraseOrientation.cpp
|
||||
FF/*.cpp
|
||||
FF/bilingual-lm/*.cpp
|
||||
FF/OSM-Feature/*.cpp
|
||||
|
@ -27,10 +27,6 @@
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace Syntax
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
|
||||
void ReadAlignment(const std::string &s, Alignment &a)
|
||||
{
|
||||
@ -46,7 +42,7 @@ void ReadAlignment(const std::string &s, Alignment &a)
|
||||
}
|
||||
int src = std::atoi(s.substr(begin, end-begin).c_str());
|
||||
if (end+1 == s.size()) {
|
||||
throw Exception("Target index missing");
|
||||
throw Syntax::Exception("Target index missing");
|
||||
}
|
||||
|
||||
begin = end+1;
|
||||
@ -71,6 +67,4 @@ void FlipAlignment(Alignment &a)
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Syntax
|
||||
} // namespace MosesTraining
|
@ -25,10 +25,6 @@
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace Syntax
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
|
||||
typedef std::vector<std::pair<int, int> > Alignment;
|
||||
|
||||
@ -36,6 +32,4 @@ void ReadAlignment(const std::string &, Alignment &);
|
||||
|
||||
void FlipAlignment(Alignment &);
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Syntax
|
||||
} // namespace MosesTraining
|
@ -26,6 +26,8 @@
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
|
||||
#include "extract-ghkm/PhraseOrientation.h"
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
|
||||
@ -37,8 +39,6 @@ public:
|
||||
std::string target;
|
||||
std::string alignment;
|
||||
std::string alignmentInv;
|
||||
std::string orientation;
|
||||
std::string orientationForward;
|
||||
std::string sourceContextLeft;
|
||||
std::string sourceContextRight;
|
||||
std::string targetContextLeft;
|
||||
@ -51,14 +51,14 @@ public:
|
||||
int endS;
|
||||
float count;
|
||||
double pcfgScore;
|
||||
Syntax::GHKM::PhraseOrientation::REO_CLASS l2rOrientation;
|
||||
Syntax::GHKM::PhraseOrientation::REO_CLASS r2lOrientation;
|
||||
|
||||
ExtractedRule(int sT, int eT, int sS, int eS)
|
||||
: source()
|
||||
, target()
|
||||
, alignment()
|
||||
, alignmentInv()
|
||||
, orientation()
|
||||
, orientationForward()
|
||||
, sourceContextLeft()
|
||||
, sourceContextRight()
|
||||
, targetContextLeft()
|
||||
@ -70,8 +70,10 @@ public:
|
||||
, startS(sS)
|
||||
, endS(eS)
|
||||
, count(0)
|
||||
, pcfgScore(0.0) {
|
||||
}
|
||||
, pcfgScore(0.0)
|
||||
, l2rOrientation(Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN)
|
||||
, r2lOrientation(Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN)
|
||||
{ }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -28,10 +28,6 @@
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace Syntax
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
|
||||
std::vector<float> PhraseOrientation::m_l2rOrientationPriorCounts = boost::assign::list_of(0)(0)(0)(0)(0);
|
||||
std::vector<float> PhraseOrientation::m_r2lOrientationPriorCounts = boost::assign::list_of(0)(0)(0)(0)(0);
|
||||
@ -100,6 +96,18 @@ PhraseOrientation::PhraseOrientation(int sourceSize,
|
||||
Init(sourceSize, targetSize, m_alignedToT, alignedToS, alignedCountS);
|
||||
}
|
||||
|
||||
PhraseOrientation::PhraseOrientation(int sourceSize,
|
||||
int targetSize,
|
||||
const std::vector<std::vector<int> > &alignedToT,
|
||||
const std::vector<std::vector<int> > &alignedToS,
|
||||
const std::vector<int> &alignedCountS)
|
||||
: m_countF(sourceSize)
|
||||
, m_countE(targetSize)
|
||||
, m_alignedToT(alignedToT)
|
||||
{
|
||||
Init(sourceSize, targetSize, m_alignedToT, alignedToS, alignedCountS);
|
||||
}
|
||||
|
||||
|
||||
void PhraseOrientation::Init(int sourceSize,
|
||||
int targetSize,
|
||||
@ -470,6 +478,4 @@ void PhraseOrientation::WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Syntax
|
||||
} // namespace MosesTraining
|
@ -32,10 +32,6 @@
|
||||
|
||||
namespace MosesTraining
|
||||
{
|
||||
namespace Syntax
|
||||
{
|
||||
namespace GHKM
|
||||
{
|
||||
|
||||
// The key of the map is the English index and the value is a set of the source ones
|
||||
typedef std::map <int, std::set<int> > HSentenceVertices;
|
||||
@ -49,6 +45,7 @@ public:
|
||||
enum REO_CLASS {REO_CLASS_LEFT, REO_CLASS_RIGHT, REO_CLASS_DLEFT, REO_CLASS_DRIGHT, REO_CLASS_UNKNOWN};
|
||||
enum REO_DIR {REO_DIR_L2R, REO_DIR_R2L, REO_DIR_BIDIR};
|
||||
|
||||
PhraseOrientation() {};
|
||||
|
||||
PhraseOrientation(int sourceSize,
|
||||
int targetSize,
|
||||
@ -59,6 +56,12 @@ public:
|
||||
const Moses::AlignmentInfo &alignTerm,
|
||||
const Moses::AlignmentInfo &alignNonTerm);
|
||||
|
||||
PhraseOrientation(int sourceSize,
|
||||
int targetSize,
|
||||
const std::vector<std::vector<int> > &alignedToT,
|
||||
const std::vector<std::vector<int> > &alignedToS,
|
||||
const std::vector<int> &alignedCountS);
|
||||
|
||||
REO_CLASS GetOrientationInfo(int startF, int endF, REO_DIR direction) const;
|
||||
REO_CLASS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const;
|
||||
const std::string GetOrientationInfoString(int startF, int endF, REO_DIR direction=REO_DIR_BIDIR) const;
|
||||
@ -104,8 +107,8 @@ private:
|
||||
return first < second;
|
||||
};
|
||||
|
||||
const int m_countF;
|
||||
const int m_countE;
|
||||
int m_countF;
|
||||
int m_countE;
|
||||
|
||||
std::vector<std::vector<int> > m_alignedToT;
|
||||
|
||||
@ -121,6 +124,4 @@ private:
|
||||
static std::vector<float> m_r2lOrientationPriorCounts;
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
} // namespace Syntax
|
||||
} // namespace MosesTraining
|
@ -54,6 +54,7 @@ public:
|
||||
bool conditionOnTargetLhs;
|
||||
bool boundaryRules;
|
||||
bool flexScoreFlag;
|
||||
bool phraseOrientation;
|
||||
|
||||
RuleExtractionOptions()
|
||||
: maxSpan(10)
|
||||
@ -86,7 +87,8 @@ public:
|
||||
, unpairedExtractFormat(false)
|
||||
, conditionOnTargetLhs(false)
|
||||
, boundaryRules(false)
|
||||
, flexScoreFlag(false) {}
|
||||
, flexScoreFlag(false)
|
||||
, phraseOrientation(false) {}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -46,6 +46,7 @@
|
||||
#include "XmlTree.h"
|
||||
#include "InputFileStream.h"
|
||||
#include "OutputFileStream.h"
|
||||
#include "extract-ghkm/PhraseOrientation.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace MosesTraining;
|
||||
@ -62,6 +63,7 @@ private:
|
||||
Moses::OutputFileStream& m_extractFileInv;
|
||||
Moses::OutputFileStream& m_extractFileContext;
|
||||
Moses::OutputFileStream& m_extractFileContextInv;
|
||||
Syntax::GHKM::PhraseOrientation m_phraseOrientation;
|
||||
|
||||
vector< ExtractedRule > m_extractedRules;
|
||||
|
||||
@ -109,6 +111,7 @@ public:
|
||||
void collectWordLabelCounts(SentenceAlignmentWithSyntax &sentence );
|
||||
void writeGlueGrammar(const string &, RuleExtractionOptions &options, set< string > &targetLabelCollection, map< string, int > &targetTopLabelCollection);
|
||||
void writeUnknownWordLabel(const string &);
|
||||
void writePhraseOrientationPriors(const string &);
|
||||
|
||||
double getPcfgScore(const SyntaxNode &);
|
||||
|
||||
@ -142,7 +145,8 @@ int main(int argc, char* argv[])
|
||||
<< " | --UnpairedExtractFormat"
|
||||
<< " | --ConditionOnTargetLHS ]"
|
||||
<< " | --BoundaryRules[" << options.boundaryRules << "]"
|
||||
<< " | --FlexibilityScore\n";
|
||||
<< " | --FlexibilityScore"
|
||||
<< " | --PhraseOrientation\n";
|
||||
|
||||
exit(1);
|
||||
}
|
||||
@ -267,6 +271,8 @@ int main(int argc, char* argv[])
|
||||
options.conditionOnTargetLhs = true;
|
||||
} else if (strcmp(argv[i],"--FlexibilityScore") == 0) {
|
||||
options.flexScoreFlag = true;
|
||||
} else if (strcmp(argv[i],"--PhraseOrientation") == 0) {
|
||||
options.phraseOrientation = true;
|
||||
} else if (strcmp(argv[i],"-threads") == 0 ||
|
||||
strcmp(argv[i],"--threads") == 0 ||
|
||||
strcmp(argv[i],"--Threads") == 0) {
|
||||
@ -377,6 +383,11 @@ int main(int argc, char* argv[])
|
||||
|
||||
if (options.unknownWordLabelFlag)
|
||||
writeUnknownWordLabel(fileNameUnknownWordLabel);
|
||||
|
||||
if (options.phraseOrientation) {
|
||||
std::string fileNamePhraseOrientationPriors = fileNameExtract + string(".phraseOrientationPriors");
|
||||
writePhraseOrientationPriors(fileNamePhraseOrientationPriors);
|
||||
}
|
||||
}
|
||||
|
||||
void ExtractTask::Run()
|
||||
@ -392,6 +403,12 @@ void ExtractTask::extractRules()
|
||||
int countT = m_sentence.target.size();
|
||||
int countS = m_sentence.source.size();
|
||||
|
||||
// initialize phrase orientation scoring object (for lexicalized reordering model)
|
||||
if (m_options.phraseOrientation) {
|
||||
m_sentence.invertAlignment(); // fill m_sentence.alignedToS
|
||||
m_phraseOrientation = Syntax::GHKM::PhraseOrientation(countS, countT, m_sentence.alignedToT, m_sentence.alignedToS, m_sentence.alignedCountS);
|
||||
}
|
||||
|
||||
// phrase repository for creating hiero phrases
|
||||
RuleExist ruleExist(countT);
|
||||
|
||||
@ -990,6 +1007,10 @@ void ExtractTask::addRule( int startT, int endT, int startS, int endS, int count
|
||||
}
|
||||
}
|
||||
|
||||
rule.alignment.erase(rule.alignment.size()-1);
|
||||
if (!m_options.onlyDirectFlag)
|
||||
rule.alignmentInv.erase(rule.alignmentInv.size()-1);
|
||||
|
||||
// context (words to left and right)
|
||||
if (m_options.flexScoreFlag) {
|
||||
rule.sourceContextLeft = startS == 0 ? "<s>" : m_sentence.source[startS-1];
|
||||
@ -998,9 +1019,14 @@ void ExtractTask::addRule( int startT, int endT, int startS, int endS, int count
|
||||
rule.targetContextRight = endT+1 == m_sentence.target.size() ? "<s>" : m_sentence.target[endT+1];
|
||||
}
|
||||
|
||||
rule.alignment.erase(rule.alignment.size()-1);
|
||||
if (!m_options.onlyDirectFlag)
|
||||
rule.alignmentInv.erase(rule.alignmentInv.size()-1);
|
||||
// phrase orientation (lexicalized reordering model)
|
||||
if (m_options.phraseOrientation) {
|
||||
rule.l2rOrientation = m_phraseOrientation.GetOrientationInfo(startS,endS,Syntax::GHKM::PhraseOrientation::REO_DIR_L2R);
|
||||
rule.r2lOrientation = m_phraseOrientation.GetOrientationInfo(startS,endS,Syntax::GHKM::PhraseOrientation::REO_DIR_R2L);
|
||||
// std::cerr << "span " << startS << " " << endS << std::endl;
|
||||
// std::cerr << "phraseOrientationL2R " << m_phraseOrientation.GetOrientationInfo(startS,endS,Syntax::GHKM::PhraseOrientation::REO_DIR_L2R) << std::endl;
|
||||
// std::cerr << "phraseOrientationR2L " << m_phraseOrientation.GetOrientationInfo(startS,endS,Syntax::GHKM::PhraseOrientation::REO_DIR_R2L) << std::endl;
|
||||
}
|
||||
|
||||
addRuleToCollection( rule );
|
||||
}
|
||||
@ -1070,6 +1096,15 @@ void ExtractTask::writeRulesToFile()
|
||||
if (m_options.pcfgScore) {
|
||||
out << " ||| " << rule->pcfgScore;
|
||||
}
|
||||
if (m_options.phraseOrientation) {
|
||||
out << " {{Orientation ";
|
||||
m_phraseOrientation.WriteOrientation(out,rule->l2rOrientation);
|
||||
out << " ";
|
||||
m_phraseOrientation.WriteOrientation(out,rule->r2lOrientation);
|
||||
m_phraseOrientation.IncrementPriorCount(Syntax::GHKM::PhraseOrientation::REO_DIR_L2R,rule->l2rOrientation,1);
|
||||
m_phraseOrientation.IncrementPriorCount(Syntax::GHKM::PhraseOrientation::REO_DIR_R2L,rule->r2lOrientation,1);
|
||||
out << "}}";
|
||||
}
|
||||
out << "\n";
|
||||
|
||||
if (!m_options.onlyDirectFlag) {
|
||||
@ -1171,8 +1206,7 @@ void collectWordLabelCounts( SentenceAlignmentWithSyntax &sentence )
|
||||
}
|
||||
}
|
||||
|
||||
void writeUnknownWordLabel(const string & fileName)
|
||||
{
|
||||
void writeUnknownWordLabel(const string & fileName) {
|
||||
ofstream outFile;
|
||||
outFile.open(fileName.c_str());
|
||||
typedef map<string,int>::const_iterator I;
|
||||
@ -1196,6 +1230,14 @@ void writeUnknownWordLabel(const string & fileName)
|
||||
outFile.close();
|
||||
}
|
||||
|
||||
void writePhraseOrientationPriors(const string &fileName)
|
||||
{
|
||||
ofstream outFile;
|
||||
outFile.open(fileName.c_str());
|
||||
Syntax::GHKM::PhraseOrientation::WritePriorCounts(outFile);
|
||||
outFile.close();
|
||||
}
|
||||
|
||||
double getPcfgScore(const SyntaxNode &node)
|
||||
{
|
||||
double score = 0.0f;
|
||||
|
@ -2364,6 +2364,12 @@ sub define_training_extract_phrases {
|
||||
$cmd .= "-unknown-word-soft-matches $unknown_word_soft_matches ";
|
||||
}
|
||||
|
||||
if (&get("TRAINING:phrase-orientation")) {
|
||||
$cmd .= "-phrase-orientation ";
|
||||
my $phrase_orientation_priors_file = &versionize(&long_file_name("phrase-orientation-priors","model",""));
|
||||
$cmd .= "-phrase-orientation-priors-file $phrase_orientation_priors_file ";
|
||||
}
|
||||
|
||||
if (&get("TRAINING:use-ghkm")) {
|
||||
$cmd .= "-ghkm ";
|
||||
}
|
||||
@ -2372,12 +2378,6 @@ sub define_training_extract_phrases {
|
||||
$cmd .= "-ghkm-tree-fragments ";
|
||||
}
|
||||
|
||||
if (&get("TRAINING:ghkm-phrase-orientation")) {
|
||||
$cmd .= "-ghkm-phrase-orientation ";
|
||||
my $phrase_orientation_priors_file = &versionize(&long_file_name("phrase-orientation-priors","model",""));
|
||||
$cmd .= "-phrase-orientation-priors-file $phrase_orientation_priors_file ";
|
||||
}
|
||||
|
||||
if (&get("TRAINING:ghkm-source-labels")) {
|
||||
$cmd .= "-ghkm-source-labels ";
|
||||
my $source_labels_file = &versionize(&long_file_name("source-labels","model",""));
|
||||
@ -2427,16 +2427,16 @@ sub define_training_build_ttable {
|
||||
|
||||
if (&get("TRAINING:hierarchical-rule-set")) {
|
||||
|
||||
if (&get("TRAINING:ghkm-tree-fragments")) {
|
||||
$cmd .= "-ghkm-tree-fragments ";
|
||||
}
|
||||
|
||||
if (&get("TRAINING:ghkm-phrase-orientation")) {
|
||||
$cmd .= "-ghkm-phrase-orientation ";
|
||||
if (&get("TRAINING:phrase-orientation")) {
|
||||
$cmd .= "-phrase-orientation ";
|
||||
my $phrase_orientation_priors_file = &versionize(&long_file_name("phrase-orientation-priors","model",""));
|
||||
$cmd .= "-phrase-orientation-priors-file $phrase_orientation_priors_file ";
|
||||
}
|
||||
|
||||
if (&get("TRAINING:ghkm-tree-fragments")) {
|
||||
$cmd .= "-ghkm-tree-fragments ";
|
||||
}
|
||||
|
||||
if (&get("TRAINING:ghkm-source-labels")) {
|
||||
$cmd .= "-ghkm-source-labels ";
|
||||
my $source_labels_file = &versionize(&long_file_name("source-labels","model",""));
|
||||
@ -2640,6 +2640,10 @@ sub define_training_create_config {
|
||||
}
|
||||
}
|
||||
|
||||
if (&get("TRAINING:phrase-orientation")) {
|
||||
$cmd .= "-phrase-orientation ";
|
||||
}
|
||||
|
||||
if (&get("TRAINING:ghkm-source-labels")) {
|
||||
$cmd .= "-ghkm-source-labels ";
|
||||
my $source_labels_file = &versionize(&long_file_name("source-labels","model",""));
|
||||
|
@ -93,10 +93,10 @@ my($_EXTERNAL_BINDIR,
|
||||
$_GLUE_GRAMMAR_FILE,
|
||||
$_DONT_TUNE_GLUE_GRAMMAR,
|
||||
$_UNKNOWN_WORD_LABEL_FILE,
|
||||
$_PHRASE_ORIENTATION,
|
||||
$_PHRASE_ORIENTATION_PRIORS_FILE,
|
||||
$_GHKM,
|
||||
$_GHKM_TREE_FRAGMENTS,
|
||||
$_GHKM_PHRASE_ORIENTATION,
|
||||
$_PHRASE_ORIENTATION_PRIORS_FILE,
|
||||
$_GHKM_SOURCE_LABELS,
|
||||
$_GHKM_SOURCE_LABELS_FILE,
|
||||
$_GHKM_PARTS_OF_SPEECH,
|
||||
@ -210,10 +210,10 @@ $_HELP = 1
|
||||
'dont-tune-glue-grammar' => \$_DONT_TUNE_GLUE_GRAMMAR,
|
||||
'unknown-word-label-file=s' => \$_UNKNOWN_WORD_LABEL_FILE,
|
||||
'unknown-word-soft-matches-file=s' => \$_UNKNOWN_WORD_SOFT_MATCHES_FILE, # give dummy label to unknown word, and allow soft matches to all other labels (with cost determined by sparse features)
|
||||
'phrase-orientation' => \$_PHRASE_ORIENTATION,
|
||||
'phrase-orientation-priors-file=s' => \$_PHRASE_ORIENTATION_PRIORS_FILE, # currently relevant for Hiero and GHKM extraction only; phrase orientation for PBT has different implementation
|
||||
'ghkm' => \$_GHKM,
|
||||
'ghkm-tree-fragments' => \$_GHKM_TREE_FRAGMENTS,
|
||||
'ghkm-phrase-orientation' => \$_GHKM_PHRASE_ORIENTATION,
|
||||
'phrase-orientation-priors-file=s' => \$_PHRASE_ORIENTATION_PRIORS_FILE, # currently relevant for GHKM extraction only; phrase orientation for PBT has different implementation
|
||||
'ghkm-source-labels' => \$_GHKM_SOURCE_LABELS,
|
||||
'ghkm-source-labels-file=s' => \$_GHKM_SOURCE_LABELS_FILE,
|
||||
'ghkm-parts-of-speech' => \$_GHKM_PARTS_OF_SPEECH,
|
||||
@ -1561,11 +1561,11 @@ sub extract_phrase {
|
||||
$cmd .= " --PCFG" if $_PCFG;
|
||||
$cmd .= " --UnpairedExtractFormat" if $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2;
|
||||
$cmd .= " --ConditionOnTargetLHS" if $_ALT_DIRECT_RULE_SCORE_1;
|
||||
$cmd .= " --PhraseOrientation" if $_PHRASE_ORIENTATION;
|
||||
$cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if defined($_PHRASE_ORIENTATION_PRIORS_FILE);
|
||||
if (defined($_GHKM))
|
||||
{
|
||||
$cmd .= " --TreeFragments" if $_GHKM_TREE_FRAGMENTS;
|
||||
$cmd .= " --PhraseOrientation" if $_GHKM_PHRASE_ORIENTATION;
|
||||
$cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if defined($_PHRASE_ORIENTATION_PRIORS_FILE);
|
||||
$cmd .= " --SourceLabels" if $_GHKM_SOURCE_LABELS;
|
||||
$cmd .= " --PartsOfSpeech" if $_GHKM_PARTS_OF_SPEECH;
|
||||
$cmd .= " --PartsOfSpeechFactor" if $_GHKM_PARTS_OF_SPEECH_FACTOR;
|
||||
@ -1701,6 +1701,7 @@ sub score_phrase_phrase_extract {
|
||||
my $LOG_PROB = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /LogProb/);
|
||||
my $NEG_LOG_PROB = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /NegLogProb/);
|
||||
my $NO_LEX = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /NoLex/);
|
||||
my $MIN_COUNT = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /MinCount ([\d\.]+)/) ? $1 : undef;
|
||||
my $MIN_COUNT_HIERARCHICAL = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /MinCountHierarchical ([\d\.]+)/) ? $1 : undef;
|
||||
my $SOURCE_LABELS = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SourceLabels/);
|
||||
my $SOURCE_LABEL_COUNTS_LHS = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SourceLabelCountsLHS/);
|
||||
@ -1748,13 +1749,14 @@ sub score_phrase_phrase_extract {
|
||||
$cmd .= " --SpanLength" if $SPAN_LENGTH && $inverse eq "";
|
||||
$cmd .= " --UnalignedPenalty" if $UNALIGNED_COUNT;
|
||||
$cmd .= " --UnalignedFunctionWordPenalty ".($inverse ? $UNALIGNED_FW_F : $UNALIGNED_FW_E) if $UNALIGNED_FW_COUNT;
|
||||
$cmd .= " --MinCount $MIN_COUNT" if $MIN_COUNT;
|
||||
$cmd .= " --MinCountHierarchical $MIN_COUNT_HIERARCHICAL" if $MIN_COUNT_HIERARCHICAL;
|
||||
$cmd .= " --PCFG" if $_PCFG;
|
||||
$cmd .= " --UnpairedExtractFormat" if $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2;
|
||||
$cmd .= " --ConditionOnTargetLHS" if $_ALT_DIRECT_RULE_SCORE_1;
|
||||
$cmd .= " --TreeFragments" if $_GHKM_TREE_FRAGMENTS;
|
||||
$cmd .= " --PhraseOrientation" if $_GHKM_PHRASE_ORIENTATION;
|
||||
$cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if $_GHKM_PHRASE_ORIENTATION && defined($_PHRASE_ORIENTATION_PRIORS_FILE);
|
||||
$cmd .= " --PhraseOrientation" if $_PHRASE_ORIENTATION;
|
||||
$cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if $_PHRASE_ORIENTATION && defined($_PHRASE_ORIENTATION_PRIORS_FILE);
|
||||
$cmd .= " --SourceLabels $_GHKM_SOURCE_LABELS_FILE" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
|
||||
$cmd .= " --PartsOfSpeech $_GHKM_PARTS_OF_SPEECH_FILE" if $_GHKM_PARTS_OF_SPEECH && defined($_GHKM_PARTS_OF_SPEECH_FILE);
|
||||
$cmd .= " $DOMAIN" if $DOMAIN;
|
||||
@ -2365,6 +2367,7 @@ sub create_ini {
|
||||
print INI "PhrasePenalty\n";
|
||||
print INI "SoftMatchingFeature name=SM0 path=$_UNKNOWN_WORD_SOFT_MATCHES_FILE\n" if $_TARGET_SYNTAX && defined($_UNKNOWN_WORD_SOFT_MATCHES_FILE);
|
||||
print INI "SoftSourceSyntacticConstraintsFeature sourceLabelSetFile=$_GHKM_SOURCE_LABELS_FILE\n" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
|
||||
print INI "PhraseOrientationFeature\n" if $_PHRASE_ORIENTATION;
|
||||
print INI $feature_spec;
|
||||
|
||||
print INI "\n# dense weights for feature functions\n";
|
||||
@ -2375,6 +2378,7 @@ sub create_ini {
|
||||
print INI "WordPenalty0= -1\n";
|
||||
print INI "PhrasePenalty0= 0.2\n";
|
||||
print INI "SoftSourceSyntacticConstraintsFeature0= -0.2 -0.2 -0.2 0.1 0.1 0.1\n" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
|
||||
print INI "PhraseOrientationFeature0= 0.05 0.05 0.05 0.05 0.05 0.05\n" if $_PHRASE_ORIENTATION;
|
||||
print INI $weight_spec;
|
||||
close(INI);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user