2014-12-03 23:04:26 +03:00
//
// REFERENCE
// ---------
2015-01-14 14:07:42 +03:00
// When using this feature, please cite:
//
2014-12-03 23:04:26 +03:00
// Matthias Huck, Joern Wuebker, Felix Rietig, and Hermann Ney.
2015-01-14 14:07:42 +03:00
// A Phrase Orientation Model for Hierarchical Machine Translation.
// In ACL 2013 Eighth Workshop on Statistical Machine Translation (WMT 2013), pages 452-463, Sofia, Bulgaria, August 2013.
2014-12-03 23:04:26 +03:00
//
2014-09-09 21:17:05 +04:00
# include "PhraseOrientationFeature.h"
# include "moses/InputFileStream.h"
# include "moses/ScoreComponentCollection.h"
2014-09-12 20:17:40 +04:00
# include "moses/StaticData.h"
2014-09-09 21:17:05 +04:00
# include "moses/Hypothesis.h"
# include "moses/ChartHypothesis.h"
# include "moses/ChartManager.h"
2014-09-12 16:51:04 +04:00
# include "phrase-extract/extract-ghkm/Alignment.h"
2015-02-20 23:53:50 +03:00
# include <boost/shared_ptr.hpp>
2014-09-09 21:17:05 +04:00
namespace Moses
{
2015-01-28 23:50:24 +03:00
2015-01-29 23:23:41 +03:00
const std : : string PhraseOrientationFeature : : MORIENT ( " M " ) ;
const std : : string PhraseOrientationFeature : : SORIENT ( " S " ) ;
const std : : string PhraseOrientationFeature : : DORIENT ( " D " ) ;
2014-09-09 21:17:05 +04:00
PhraseOrientationFeature : : PhraseOrientationFeature ( const std : : string & line )
2014-12-03 23:04:26 +03:00
: StatefulFeatureFunction ( 6 , line )
, m_glueTargetLHSStr ( " Q " )
2015-01-27 00:11:37 +03:00
, m_distinguishStates ( true )
2015-01-29 23:23:41 +03:00
, m_useSparseWord ( false )
, m_useSparseNT ( false )
2015-01-27 00:11:37 +03:00
, m_offsetR2LScores ( m_numScoreComponents / 2 )
2015-01-28 23:50:24 +03:00
, m_useTargetWordList ( false )
, m_useSourceWordList ( false )
2014-09-09 21:17:05 +04:00
{
VERBOSE ( 1 , " Initializing feature " < < GetScoreProducerDescription ( ) < < " ... " ) ;
ReadParameters ( ) ;
2015-01-28 23:50:24 +03:00
FactorCollection & factorCollection = FactorCollection : : Instance ( ) ;
2015-01-29 23:23:41 +03:00
m_glueTargetLHS = factorCollection . AddFactor ( m_glueTargetLHSStr , true ) ;
2014-12-03 23:04:26 +03:00
VERBOSE ( 1 , " Done. " < < std : : endl ) ;
2014-09-09 21:17:05 +04:00
}
2015-01-28 23:50:24 +03:00
2014-09-09 21:17:05 +04:00
void PhraseOrientationFeature : : SetParameter ( const std : : string & key , const std : : string & value )
{
2015-01-14 14:07:42 +03:00
if ( key = = " glueTargetLHS " ) {
2014-12-03 23:04:26 +03:00
m_glueTargetLHSStr = value ;
2015-01-16 20:48:58 +03:00
} else if ( key = = " distinguishStates " ) {
m_distinguishStates = Scan < bool > ( value ) ;
2015-01-29 23:23:41 +03:00
} else if ( key = = " sparseWord " ) {
2015-02-19 15:27:23 +03:00
m_useSparseWord = Scan < bool > ( value ) ;
2015-01-29 23:23:41 +03:00
} else if ( key = = " sparseNT " ) {
2015-02-19 15:27:23 +03:00
m_useSparseNT = Scan < bool > ( value ) ;
2015-01-28 23:50:24 +03:00
} else if ( key = = " targetWordList " ) {
2015-02-19 15:27:23 +03:00
m_filenameTargetWordList = value ;
2015-01-28 23:50:24 +03:00
} else if ( key = = " sourceWordList " ) {
2015-02-19 15:27:23 +03:00
m_filenameSourceWordList = value ;
2015-01-14 14:07:42 +03:00
} else {
2014-12-03 23:04:26 +03:00
StatefulFeatureFunction : : SetParameter ( key , value ) ;
2014-09-09 21:17:05 +04:00
}
}
2015-01-28 23:50:24 +03:00
void PhraseOrientationFeature : : Load ( )
{
if ( ! m_filenameTargetWordList . empty ( ) ) {
LoadWordList ( m_filenameTargetWordList , m_targetWordList ) ;
m_useTargetWordList = true ;
}
if ( ! m_filenameSourceWordList . empty ( ) ) {
LoadWordList ( m_filenameSourceWordList , m_sourceWordList ) ;
m_useSourceWordList = true ;
}
}
void PhraseOrientationFeature : : LoadWordList ( const std : : string & filename ,
2015-02-19 15:27:23 +03:00
boost : : unordered_set < const Factor * > & list )
2015-01-28 23:50:24 +03:00
{
FEATUREVERBOSE ( 2 , " Loading word list from file " < < filename < < std : : endl ) ;
FactorCollection & factorCollection = FactorCollection : : Instance ( ) ;
list . clear ( ) ;
std : : string line ;
InputFileStream inFile ( filename ) ;
while ( getline ( inFile , line ) ) {
const Factor * factor = factorCollection . AddFactor ( line , false ) ;
list . insert ( factor ) ;
}
inFile . Close ( ) ;
}
2015-02-19 15:27:23 +03:00
void PhraseOrientationFeature : : EvaluateInIsolation ( const Phrase & source ,
const TargetPhrase & targetPhrase ,
ScoreComponentCollection & scoreBreakdown ,
ScoreComponentCollection & estimatedFutureScore ) const
2015-01-27 00:11:37 +03:00
{
targetPhrase . SetRuleSource ( source ) ;
if ( const PhraseProperty * property = targetPhrase . GetProperty ( " Orientation " ) ) {
const OrientationPhraseProperty * orientationPhraseProperty = static_cast < const OrientationPhraseProperty * > ( property ) ;
LookaheadScore ( orientationPhraseProperty , scoreBreakdown ) ;
} else {
// abort with error message if the phrase does not translate an unknown word
UTIL_THROW_IF2 ( ! targetPhrase . GetWord ( 0 ) . IsOOV ( ) , GetScoreProducerDescription ( )
< < " : Missing Orientation property. "
< < " Please check phrase table and glue rules. " ) ;
}
2015-02-20 23:53:50 +03:00
IFFEATUREVERBOSE ( 2 ) {
FEATUREVERBOSE ( 2 , " BEGIN========EvaluateInIsolation======== " < < std : : endl ) ;
FEATUREVERBOSE ( 2 , source < < std : : endl ) ;
FEATUREVERBOSE ( 2 , targetPhrase < < std : : endl ) ;
for ( AlignmentInfo : : const_iterator it = targetPhrase . GetAlignTerm ( ) . begin ( ) ;
it ! = targetPhrase . GetAlignTerm ( ) . end ( ) ; + + it ) {
FEATUREVERBOSE ( 2 , " alignTerm " < < it - > first < < " " < < it - > second < < std : : endl ) ;
}
for ( AlignmentInfo : : const_iterator it = targetPhrase . GetAlignNonTerm ( ) . begin ( ) ;
it ! = targetPhrase . GetAlignNonTerm ( ) . end ( ) ; + + it ) {
FEATUREVERBOSE ( 2 , " alignNonTerm " < < it - > first < < " " < < it - > second < < std : : endl ) ;
}
}
if ( targetPhrase . GetAlignNonTerm ( ) . GetSize ( ) ! = 0 ) {
// Initialize phrase orientation scoring object
Moses : : GHKM : : PhraseOrientation phraseOrientation ( source . GetSize ( ) , targetPhrase . GetSize ( ) ,
targetPhrase . GetAlignTerm ( ) , targetPhrase . GetAlignNonTerm ( ) ) ;
PhraseOrientationFeature : : ReoClassData * reoClassData = new PhraseOrientationFeature : : ReoClassData ( ) ;
// Determine orientation classes of non-terminals
const Factor * targetPhraseLHS = targetPhrase . GetTargetLHS ( ) [ 0 ] ;
for ( AlignmentInfo : : const_iterator it = targetPhrase . GetAlignNonTerm ( ) . begin ( ) ;
it ! = targetPhrase . GetAlignNonTerm ( ) . end ( ) ; + + it ) {
size_t sourceIndex = it - > first ;
size_t targetIndex = it - > second ;
// LEFT-TO-RIGHT DIRECTION
Moses : : GHKM : : PhraseOrientation : : REO_CLASS l2rOrientation = phraseOrientation . GetOrientationInfo ( sourceIndex , sourceIndex , Moses : : GHKM : : PhraseOrientation : : REO_DIR_L2R ) ;
if ( ( ( targetIndex = = 0 ) | | ! phraseOrientation . TargetSpanIsAligned ( 0 , targetIndex ) ) // boundary non-terminal in rule-initial position (left boundary)
& & ( targetPhraseLHS ! = m_glueTargetLHS ) ) { // and not glue rule
FEATUREVERBOSE ( 3 , " Left boundary: targetIndex== " < < targetIndex ) ;
if ( targetIndex ! = 0 ) {
FEATUREVERBOSE2 ( 3 , " (!=0) " ) ;
}
FEATUREVERBOSE2 ( 3 , std : : endl ) ;
reoClassData - > firstNonTerminalPreviousSourceSpanIsAligned = ( ( sourceIndex > 0 ) & & phraseOrientation . SourceSpanIsAligned ( 0 , sourceIndex - 1 ) ) ;
reoClassData - > firstNonTerminalFollowingSourceSpanIsAligned = ( ( sourceIndex < source . GetSize ( ) - 1 ) & & phraseOrientation . SourceSpanIsAligned ( sourceIndex , source . GetSize ( ) - 1 ) ) ;
FEATUREVERBOSE ( 4 , " firstNonTerminalPreviousSourceSpanIsAligned== " < < reoClassData - > firstNonTerminalPreviousSourceSpanIsAligned < < std : : endl ) ;
FEATUREVERBOSE ( 4 , " firstNonTerminalFollowingSourceSpanIsAligned== " < < reoClassData - > firstNonTerminalFollowingSourceSpanIsAligned < < std : : endl ; ) ;
if ( reoClassData - > firstNonTerminalPreviousSourceSpanIsAligned & &
reoClassData - > firstNonTerminalFollowingSourceSpanIsAligned ) {
// discontinuous
l2rOrientation = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_DLEFT ;
} else {
reoClassData - > firstNonTerminalIsBoundary = true ;
}
}
reoClassData - > nonTerminalReoClassL2R . push_back ( l2rOrientation ) ;
// RIGHT-TO-LEFT DIRECTION
Moses : : GHKM : : PhraseOrientation : : REO_CLASS r2lOrientation = phraseOrientation . GetOrientationInfo ( sourceIndex , sourceIndex , Moses : : GHKM : : PhraseOrientation : : REO_DIR_R2L ) ;
if ( ( ( targetIndex = = targetPhrase . GetSize ( ) - 1 ) | | ! phraseOrientation . TargetSpanIsAligned ( targetIndex , targetPhrase . GetSize ( ) - 1 ) ) // boundary non-terminal in rule-final position (right boundary)
& & ( targetPhraseLHS ! = m_glueTargetLHS ) ) { // and not glue rule
FEATUREVERBOSE ( 3 , " Right boundary: targetIndex== " < < targetIndex ) ;
if ( targetIndex ! = targetPhrase . GetSize ( ) - 1 ) {
FEATUREVERBOSE2 ( 3 , " (!=targetPhrase.GetSize()-1) " ) ;
}
FEATUREVERBOSE2 ( 3 , std : : endl ) ;
reoClassData - > lastNonTerminalPreviousSourceSpanIsAligned = ( ( sourceIndex > 0 ) & & phraseOrientation . SourceSpanIsAligned ( 0 , sourceIndex - 1 ) ) ;
reoClassData - > lastNonTerminalFollowingSourceSpanIsAligned = ( ( sourceIndex < source . GetSize ( ) - 1 ) & & phraseOrientation . SourceSpanIsAligned ( sourceIndex , source . GetSize ( ) - 1 ) ) ;
FEATUREVERBOSE ( 4 , " lastNonTerminalPreviousSourceSpanIsAligned== " < < reoClassData - > lastNonTerminalPreviousSourceSpanIsAligned < < std : : endl ) ;
FEATUREVERBOSE ( 4 , " lastNonTerminalFollowingSourceSpanIsAligned== " < < reoClassData - > lastNonTerminalFollowingSourceSpanIsAligned < < std : : endl ; ) ;
if ( reoClassData - > lastNonTerminalPreviousSourceSpanIsAligned & &
reoClassData - > lastNonTerminalFollowingSourceSpanIsAligned ) {
// discontinuous
r2lOrientation = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_DLEFT ;
} else {
reoClassData - > lastNonTerminalIsBoundary = true ;
}
}
reoClassData - > nonTerminalReoClassR2L . push_back ( r2lOrientation ) ;
}
bool inserted = targetPhrase . SetData ( " Orientation " , boost : : shared_ptr < void > ( reoClassData ) ) ;
UTIL_THROW_IF2 ( ! inserted , GetScoreProducerDescription ( )
< < " : Insertion of orientation data attempted repeatedly. " ) ;
}
FEATUREVERBOSE ( 2 , " END========EvaluateInIsolation======== " < < std : : endl ) ;
2015-01-27 00:11:37 +03:00
}
2015-01-28 23:50:24 +03:00
2015-02-19 15:27:23 +03:00
void PhraseOrientationFeature : : LookaheadScore ( const OrientationPhraseProperty * orientationPhraseProperty ,
ScoreComponentCollection & scoreBreakdown ,
bool subtract ) const
2015-01-27 00:11:37 +03:00
{
size_t ffScoreIndex = scoreBreakdown . GetIndexes ( this ) . first ;
std : : vector < float > scoresL2R ;
scoresL2R . push_back ( TransformScore ( orientationPhraseProperty - > GetLeftToRightProbabilityMono ( ) ) ) ;
scoresL2R . push_back ( TransformScore ( orientationPhraseProperty - > GetLeftToRightProbabilitySwap ( ) ) ) ;
scoresL2R . push_back ( TransformScore ( orientationPhraseProperty - > GetLeftToRightProbabilityDiscontinuous ( ) ) ) ;
size_t heuristicScoreIndexL2R = GetHeuristicScoreIndex ( scoresL2R , 0 ) ;
if ( subtract ) {
2015-02-19 15:27:23 +03:00
scoreBreakdown . PlusEquals ( ffScoreIndex + heuristicScoreIndexL2R ,
2015-01-27 00:11:37 +03:00
- scoresL2R [ heuristicScoreIndexL2R ] ) ;
} else {
2015-02-19 15:27:23 +03:00
scoreBreakdown . PlusEquals ( ffScoreIndex + heuristicScoreIndexL2R ,
2015-01-27 00:11:37 +03:00
scoresL2R [ heuristicScoreIndexL2R ] ) ;
}
std : : vector < float > scoresR2L ;
scoresR2L . push_back ( TransformScore ( orientationPhraseProperty - > GetRightToLeftProbabilityMono ( ) ) ) ;
scoresR2L . push_back ( TransformScore ( orientationPhraseProperty - > GetRightToLeftProbabilitySwap ( ) ) ) ;
scoresR2L . push_back ( TransformScore ( orientationPhraseProperty - > GetRightToLeftProbabilityDiscontinuous ( ) ) ) ;
size_t heuristicScoreIndexR2L = GetHeuristicScoreIndex ( scoresR2L , m_offsetR2LScores ) ;
if ( subtract ) {
2015-02-19 15:27:23 +03:00
scoreBreakdown . PlusEquals ( ffScoreIndex + m_offsetR2LScores + heuristicScoreIndexR2L ,
2015-01-27 00:11:37 +03:00
- scoresR2L [ heuristicScoreIndexR2L ] ) ;
} else {
2015-02-19 15:27:23 +03:00
scoreBreakdown . PlusEquals ( ffScoreIndex + m_offsetR2LScores + heuristicScoreIndexR2L ,
2015-01-27 00:11:37 +03:00
scoresR2L [ heuristicScoreIndexR2L ] ) ;
}
}
2014-09-12 16:51:04 +04:00
2015-01-28 23:50:24 +03:00
2014-12-03 23:04:26 +03:00
FFState * PhraseOrientationFeature : : EvaluateWhenApplied (
2014-09-09 21:17:05 +04:00
const ChartHypothesis & hypo ,
2014-12-03 23:04:26 +03:00
int featureID , // used to index the state in the previous hypotheses
2014-09-09 21:17:05 +04:00
ScoreComponentCollection * accumulator ) const
{
2014-09-12 20:50:02 +04:00
// Dense scores
2014-12-03 23:04:26 +03:00
std : : vector < float > newScores ( m_numScoreComponents , 0 ) ;
2014-09-12 20:50:02 +04:00
// Read Orientation property
2014-09-09 21:17:05 +04:00
const TargetPhrase & currTarPhr = hypo . GetCurrTargetPhrase ( ) ;
2014-09-12 16:51:04 +04:00
const Phrase * currSrcPhr = currTarPhr . GetRuleSource ( ) ;
2015-01-29 23:23:41 +03:00
// State: used to propagate orientation probabilities in case of boundary non-terminals
PhraseOrientationFeatureState * state = new PhraseOrientationFeatureState ( m_distinguishStates , m_useSparseWord , m_useSparseNT ) ;
2015-01-14 14:07:42 +03:00
IFFEATUREVERBOSE ( 2 ) {
FEATUREVERBOSE ( 2 , * currSrcPhr < < std : : endl ) ;
FEATUREVERBOSE ( 2 , currTarPhr < < std : : endl ) ;
2015-01-09 17:03:18 +03:00
for ( AlignmentInfo : : const_iterator it = currTarPhr . GetAlignTerm ( ) . begin ( ) ;
2015-01-14 14:07:42 +03:00
it ! = currTarPhr . GetAlignTerm ( ) . end ( ) ; + + it ) {
2015-01-09 17:03:18 +03:00
FEATUREVERBOSE ( 2 , " alignTerm " < < it - > first < < " " < < it - > second < < std : : endl ) ;
}
for ( AlignmentInfo : : const_iterator it = currTarPhr . GetAlignNonTerm ( ) . begin ( ) ;
2015-01-14 14:07:42 +03:00
it ! = currTarPhr . GetAlignNonTerm ( ) . end ( ) ; + + it ) {
2015-01-09 17:03:18 +03:00
FEATUREVERBOSE ( 2 , " alignNonTerm " < < it - > first < < " " < < it - > second < < std : : endl ) ;
}
2014-09-12 16:51:04 +04:00
}
2015-02-20 23:53:50 +03:00
// Retrieve phrase orientation scoring object
const PhraseOrientationFeature : : ReoClassData * reoClassData = NULL ;
if ( currTarPhr . GetAlignNonTerm ( ) . GetSize ( ) ! = 0 ) {
const boost : : shared_ptr < void > data = currTarPhr . GetData ( " Orientation " ) ;
UTIL_THROW_IF2 ( ! data , GetScoreProducerDescription ( )
< < " : Orientation data not set in target phrase. " ) ;
reoClassData = static_cast < const PhraseOrientationFeature : : ReoClassData * > ( data . get ( ) ) ;
}
2015-01-14 14:07:42 +03:00
2014-09-12 20:50:02 +04:00
// Get index map for underlying hypotheses
2014-09-09 21:17:05 +04:00
const AlignmentInfo : : NonTermIndexMap & nonTermIndexMap =
2015-01-14 14:07:42 +03:00
currTarPhr . GetAlignNonTerm ( ) . GetNonTermIndexMap ( ) ;
2014-09-09 21:17:05 +04:00
2015-02-20 23:53:50 +03:00
// Retrieve non-terminals orientation classes & score orientations
size_t nNT = 0 ;
2014-09-12 20:50:02 +04:00
2014-12-06 01:33:40 +03:00
for ( AlignmentInfo : : const_iterator it = currTarPhr . GetAlignNonTerm ( ) . begin ( ) ;
2015-01-14 14:07:42 +03:00
it ! = currTarPhr . GetAlignNonTerm ( ) . end ( ) ; + + it ) {
size_t sourceIndex = it - > first ;
size_t targetIndex = it - > second ;
size_t nonTermIndex = nonTermIndexMap [ targetIndex ] ;
2015-02-20 23:53:50 +03:00
FEATUREVERBOSE ( 2 , " Scoring nonTermIndex== " < < nonTermIndex < < " targetIndex== " < < targetIndex < < " sourceIndex== " < < sourceIndex < < std : : endl ) ;
2015-01-14 14:07:42 +03:00
// consult subderivation
const ChartHypothesis * prevHypo = hypo . GetPrevHypo ( nonTermIndex ) ;
const TargetPhrase & prevTarPhr = prevHypo - > GetCurrTargetPhrase ( ) ;
2015-01-29 23:23:41 +03:00
const Factor * prevTarPhrLHS = prevTarPhr . GetTargetLHS ( ) [ 0 ] ;
2015-01-14 14:07:42 +03:00
if ( const PhraseProperty * property = prevTarPhr . GetProperty ( " Orientation " ) ) {
const OrientationPhraseProperty * orientationPhraseProperty = static_cast < const OrientationPhraseProperty * > ( property ) ;
FEATUREVERBOSE ( 5 , " orientationPhraseProperty: "
< < " L2R_Mono " < < orientationPhraseProperty - > GetLeftToRightProbabilityMono ( )
< < " L2R_Swap " < < orientationPhraseProperty - > GetLeftToRightProbabilitySwap ( )
< < " L2R_Dright " < < orientationPhraseProperty - > GetLeftToRightProbabilityDright ( )
< < " L2R_Dleft " < < orientationPhraseProperty - > GetLeftToRightProbabilityDleft ( )
< < " R2L_Mono " < < orientationPhraseProperty - > GetRightToLeftProbabilityMono ( )
< < " R2L_Swap " < < orientationPhraseProperty - > GetRightToLeftProbabilitySwap ( )
< < " R2L_Dright " < < orientationPhraseProperty - > GetRightToLeftProbabilityDright ( )
< < " R2L_Dleft " < < orientationPhraseProperty - > GetRightToLeftProbabilityDleft ( )
< < std : : endl ) ;
2015-02-19 15:27:23 +03:00
2015-01-27 00:11:37 +03:00
LookaheadScore ( orientationPhraseProperty , * accumulator , true ) ;
2015-01-14 14:07:42 +03:00
const PhraseOrientationFeatureState * prevState =
static_cast < const PhraseOrientationFeatureState * > ( prevHypo - > GetFFState ( featureID ) ) ;
// LEFT-TO-RIGHT DIRECTION
2015-02-20 23:53:50 +03:00
Moses : : GHKM : : PhraseOrientation : : REO_CLASS l2rOrientation = reoClassData - > nonTerminalReoClassL2R [ nNT ] ;
2015-01-14 14:07:42 +03:00
IFFEATUREVERBOSE ( 2 ) {
FEATUREVERBOSE ( 2 , " l2rOrientation " ) ;
switch ( l2rOrientation ) {
case Moses : : GHKM : : PhraseOrientation : : REO_CLASS_LEFT :
FEATUREVERBOSE2 ( 2 , " mono " < < std : : endl ) ;
break ;
case Moses : : GHKM : : PhraseOrientation : : REO_CLASS_RIGHT :
FEATUREVERBOSE2 ( 2 , " swap " < < std : : endl ) ;
break ;
case Moses : : GHKM : : PhraseOrientation : : REO_CLASS_DLEFT :
FEATUREVERBOSE2 ( 2 , " dleft " < < std : : endl ) ;
break ;
case Moses : : GHKM : : PhraseOrientation : : REO_CLASS_DRIGHT :
FEATUREVERBOSE2 ( 2 , " dright " < < std : : endl ) ;
break ;
case Moses : : GHKM : : PhraseOrientation : : REO_CLASS_UNKNOWN :
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
FEATUREVERBOSE2 ( 2 , " unknown->dleft " < < std : : endl ) ;
break ;
default :
UTIL_THROW2 ( GetScoreProducerDescription ( )
< < " : Unsupported orientation type. " ) ;
break ;
2014-12-06 01:33:40 +03:00
}
2015-01-14 14:07:42 +03:00
}
2014-12-03 23:04:26 +03:00
2015-02-20 23:53:50 +03:00
if ( reoClassData - > firstNonTerminalIsBoundary ) {
2015-01-14 14:07:42 +03:00
// delay left-to-right scoring
2014-12-06 01:33:40 +03:00
2015-02-20 23:53:50 +03:00
FEATUREVERBOSE ( 3 , " Delaying left-to-right scoring " < < std : : endl ) ;
2015-01-14 14:07:42 +03:00
2015-02-20 23:53:50 +03:00
std : : bitset < 3 > possibleFutureOrientationsL2R ( 0x7 ) ;
possibleFutureOrientationsL2R [ 0 ] = ! reoClassData - > firstNonTerminalPreviousSourceSpanIsAligned ;
possibleFutureOrientationsL2R [ 1 ] = ! reoClassData - > firstNonTerminalFollowingSourceSpanIsAligned ;
2015-01-14 14:07:42 +03:00
2015-02-20 23:53:50 +03:00
// add heuristic scores
2015-01-14 14:07:42 +03:00
2015-02-20 23:53:50 +03:00
std : : vector < float > scoresL2R ;
scoresL2R . push_back ( TransformScore ( orientationPhraseProperty - > GetLeftToRightProbabilityMono ( ) ) ) ;
scoresL2R . push_back ( TransformScore ( orientationPhraseProperty - > GetLeftToRightProbabilitySwap ( ) ) ) ;
scoresL2R . push_back ( TransformScore ( orientationPhraseProperty - > GetLeftToRightProbabilityDiscontinuous ( ) ) ) ;
size_t heuristicScoreIndexL2R = GetHeuristicScoreIndex ( scoresL2R , 0 , possibleFutureOrientationsL2R ) ;
newScores [ heuristicScoreIndexL2R ] + = scoresL2R [ heuristicScoreIndexL2R ] ;
state - > SetLeftBoundaryL2R ( scoresL2R , heuristicScoreIndexL2R , possibleFutureOrientationsL2R , prevTarPhrLHS , prevState ) ;
if ( ( possibleFutureOrientationsL2R & prevState - > m_leftBoundaryNonTerminalL2RPossibleFutureOrientations ) = = 0x4 ) {
// recursive: discontinuous orientation
FEATUREVERBOSE ( 5 , " previous state: L2R discontinuous orientation "
< < possibleFutureOrientationsL2R < < " & " < < prevState - > m_leftBoundaryNonTerminalL2RPossibleFutureOrientations
< < " = " < < ( possibleFutureOrientationsL2R & prevState - > m_leftBoundaryNonTerminalL2RPossibleFutureOrientations )
< < std : : endl ) ;
LeftBoundaryL2RScoreRecursive ( featureID , prevState , 0x4 , newScores , accumulator ) ;
state - > m_leftBoundaryRecursionGuard = true ; // prevent subderivation from being scored recursively multiple times
2014-12-06 01:33:40 +03:00
}
2014-12-03 23:04:26 +03:00
2015-02-20 23:53:50 +03:00
} else {
2015-01-29 23:23:41 +03:00
if ( l2rOrientation = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_LEFT ) {
2015-01-16 20:48:58 +03:00
newScores [ 0 ] + = TransformScore ( orientationPhraseProperty - > GetLeftToRightProbabilityMono ( ) ) ;
2015-01-14 14:07:42 +03:00
// if sub-derivation has left-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
2015-01-29 23:23:41 +03:00
LeftBoundaryL2RScoreRecursive ( featureID , prevState , 0x1 , newScores , accumulator ) ;
} else if ( l2rOrientation = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_RIGHT ) {
2015-01-16 20:48:58 +03:00
newScores [ 1 ] + = TransformScore ( orientationPhraseProperty - > GetLeftToRightProbabilitySwap ( ) ) ;
2015-01-14 14:07:42 +03:00
// if sub-derivation has left-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
2015-01-29 23:23:41 +03:00
LeftBoundaryL2RScoreRecursive ( featureID , prevState , 0x2 , newScores , accumulator ) ;
} else if ( ( l2rOrientation = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_DLEFT ) | |
( l2rOrientation = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_DRIGHT ) | |
( l2rOrientation = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_UNKNOWN ) ) {
2015-01-16 20:48:58 +03:00
newScores [ 2 ] + = TransformScore ( orientationPhraseProperty - > GetLeftToRightProbabilityDiscontinuous ( ) ) ;
2015-01-14 14:07:42 +03:00
// if sub-derivation has left-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
2015-01-29 23:23:41 +03:00
LeftBoundaryL2RScoreRecursive ( featureID , prevState , 0x4 , newScores , accumulator ) ;
} else {
2015-01-14 14:07:42 +03:00
UTIL_THROW2 ( GetScoreProducerDescription ( )
< < " : Unsupported orientation type. " ) ;
2015-01-29 23:23:41 +03:00
}
// sparse scores
if ( m_useSparseWord ) {
SparseWordL2RScore ( prevHypo , accumulator , ToString ( l2rOrientation ) ) ;
}
if ( m_useSparseNT ) {
SparseNonTerminalL2RScore ( prevTarPhrLHS , accumulator , ToString ( l2rOrientation ) ) ;
2014-12-06 01:33:40 +03:00
}
2015-01-14 14:07:42 +03:00
}
2014-12-03 23:04:26 +03:00
2015-01-14 14:07:42 +03:00
// RIGHT-TO-LEFT DIRECTION
2015-02-20 23:53:50 +03:00
Moses : : GHKM : : PhraseOrientation : : REO_CLASS r2lOrientation = reoClassData - > nonTerminalReoClassR2L [ nNT ] ;
2015-01-14 14:07:42 +03:00
IFFEATUREVERBOSE ( 2 ) {
FEATUREVERBOSE ( 2 , " r2lOrientation " ) ;
switch ( r2lOrientation ) {
case Moses : : GHKM : : PhraseOrientation : : REO_CLASS_LEFT :
FEATUREVERBOSE2 ( 2 , " mono " < < std : : endl ) ;
break ;
case Moses : : GHKM : : PhraseOrientation : : REO_CLASS_RIGHT :
FEATUREVERBOSE2 ( 2 , " swap " < < std : : endl ) ;
break ;
case Moses : : GHKM : : PhraseOrientation : : REO_CLASS_DLEFT :
FEATUREVERBOSE2 ( 2 , " dleft " < < std : : endl ) ;
break ;
case Moses : : GHKM : : PhraseOrientation : : REO_CLASS_DRIGHT :
FEATUREVERBOSE2 ( 2 , " dright " < < std : : endl ) ;
break ;
case Moses : : GHKM : : PhraseOrientation : : REO_CLASS_UNKNOWN :
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
FEATUREVERBOSE2 ( 2 , " unknown->dleft " < < std : : endl ) ;
break ;
default :
UTIL_THROW2 ( GetScoreProducerDescription ( )
< < " : Unsupported orientation type. " ) ;
break ;
}
}
2014-12-03 23:04:26 +03:00
2015-02-20 23:53:50 +03:00
if ( reoClassData - > lastNonTerminalIsBoundary ) {
2015-01-14 14:07:42 +03:00
// delay right-to-left scoring
2015-02-20 23:53:50 +03:00
FEATUREVERBOSE ( 3 , " Delaying right-to-left scoring " < < std : : endl ) ;
2014-12-06 01:33:40 +03:00
2015-02-20 23:53:50 +03:00
std : : bitset < 3 > possibleFutureOrientationsR2L ( 0x7 ) ;
possibleFutureOrientationsR2L [ 0 ] = ! reoClassData - > lastNonTerminalFollowingSourceSpanIsAligned ;
possibleFutureOrientationsR2L [ 1 ] = ! reoClassData - > lastNonTerminalPreviousSourceSpanIsAligned ;
2015-01-14 14:07:42 +03:00
2015-02-20 23:53:50 +03:00
// add heuristic scores
2015-01-14 14:07:42 +03:00
2015-02-20 23:53:50 +03:00
std : : vector < float > scoresR2L ;
scoresR2L . push_back ( TransformScore ( orientationPhraseProperty - > GetRightToLeftProbabilityMono ( ) ) ) ;
scoresR2L . push_back ( TransformScore ( orientationPhraseProperty - > GetRightToLeftProbabilitySwap ( ) ) ) ;
scoresR2L . push_back ( TransformScore ( orientationPhraseProperty - > GetRightToLeftProbabilityDiscontinuous ( ) ) ) ;
2015-01-14 14:07:42 +03:00
2015-02-20 23:53:50 +03:00
size_t heuristicScoreIndexR2L = GetHeuristicScoreIndex ( scoresR2L , m_offsetR2LScores , possibleFutureOrientationsR2L ) ;
newScores [ m_offsetR2LScores + heuristicScoreIndexR2L ] + = scoresR2L [ heuristicScoreIndexR2L ] ;
state - > SetRightBoundaryR2L ( scoresR2L , heuristicScoreIndexR2L , possibleFutureOrientationsR2L , prevTarPhrLHS , prevState ) ;
if ( ( possibleFutureOrientationsR2L & prevState - > m_rightBoundaryNonTerminalR2LPossibleFutureOrientations ) = = 0x4 ) {
// recursive: discontinuous orientation
FEATUREVERBOSE ( 5 , " previous state: R2L discontinuous orientation "
< < possibleFutureOrientationsR2L < < " & " < < prevState - > m_rightBoundaryNonTerminalR2LPossibleFutureOrientations
< < " = " < < ( possibleFutureOrientationsR2L & prevState - > m_rightBoundaryNonTerminalR2LPossibleFutureOrientations )
< < std : : endl ) ;
RightBoundaryR2LScoreRecursive ( featureID , prevState , 0x4 , newScores , accumulator ) ;
state - > m_rightBoundaryRecursionGuard = true ; // prevent subderivation from being scored recursively multiple times
2014-12-06 01:33:40 +03:00
}
2014-09-09 21:17:05 +04:00
2015-02-20 23:53:50 +03:00
} else {
2015-01-29 23:23:41 +03:00
if ( r2lOrientation = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_LEFT ) {
2015-01-16 20:48:58 +03:00
newScores [ m_offsetR2LScores + 0 ] + = TransformScore ( orientationPhraseProperty - > GetRightToLeftProbabilityMono ( ) ) ;
2015-01-14 14:07:42 +03:00
// if sub-derivation has right-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
2015-01-29 23:23:41 +03:00
RightBoundaryR2LScoreRecursive ( featureID , prevState , 0x1 , newScores , accumulator ) ;
} else if ( r2lOrientation = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_RIGHT ) {
2015-01-16 20:48:58 +03:00
newScores [ m_offsetR2LScores + 1 ] + = TransformScore ( orientationPhraseProperty - > GetRightToLeftProbabilitySwap ( ) ) ;
2015-01-14 14:07:42 +03:00
// if sub-derivation has right-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
2015-01-29 23:23:41 +03:00
RightBoundaryR2LScoreRecursive ( featureID , prevState , 0x2 , newScores , accumulator ) ;
} else if ( ( r2lOrientation = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_DLEFT ) | |
( r2lOrientation = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_DRIGHT ) | |
( r2lOrientation = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_UNKNOWN ) ) {
2015-01-16 20:48:58 +03:00
newScores [ m_offsetR2LScores + 2 ] + = TransformScore ( orientationPhraseProperty - > GetRightToLeftProbabilityDiscontinuous ( ) ) ;
2015-01-14 14:07:42 +03:00
// if sub-derivation has right-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
2015-01-29 23:23:41 +03:00
RightBoundaryR2LScoreRecursive ( featureID , prevState , 0x4 , newScores , accumulator ) ;
} else {
2015-01-14 14:07:42 +03:00
UTIL_THROW2 ( GetScoreProducerDescription ( )
< < " : Unsupported orientation type. " ) ;
2015-01-29 23:23:41 +03:00
}
// sparse scores
if ( m_useSparseWord ) {
SparseWordR2LScore ( prevHypo , accumulator , ToString ( r2lOrientation ) ) ;
}
if ( m_useSparseNT ) {
SparseNonTerminalR2LScore ( prevTarPhrLHS , accumulator , ToString ( r2lOrientation ) ) ;
2014-12-06 01:33:40 +03:00
}
2014-09-09 21:17:05 +04:00
}
2015-01-29 23:23:41 +03:00
2015-01-14 14:07:42 +03:00
} else {
// abort with error message if the phrase does not translate an unknown word
UTIL_THROW_IF2 ( ! prevTarPhr . GetWord ( 0 ) . IsOOV ( ) , GetScoreProducerDescription ( )
< < " : Missing Orientation property. "
< < " Please check phrase table and glue rules. " ) ;
}
2015-02-20 23:53:50 +03:00
+ + nNT ;
2014-09-09 21:17:05 +04:00
}
accumulator - > PlusEquals ( this , newScores ) ;
2014-12-03 23:04:26 +03:00
return state ;
}
2015-01-28 23:50:24 +03:00
2015-01-27 00:11:37 +03:00
size_t PhraseOrientationFeature : : GetHeuristicScoreIndex ( const std : : vector < float > & scores ,
2015-02-19 15:27:23 +03:00
size_t weightsVectorOffset ,
const std : : bitset < 3 > possibleFutureOrientations ) const
2015-01-27 00:11:37 +03:00
{
2015-02-20 23:53:50 +03:00
if ( m_weightsVector . empty ( ) ) {
m_weightsVector = StaticData : : Instance ( ) . GetAllWeights ( ) . GetScoresForProducer ( this ) ;
}
2015-01-27 00:11:37 +03:00
std : : vector < float > weightedScores ;
for ( size_t i = 0 ; i < 3 ; + + i ) {
weightedScores . push_back ( m_weightsVector [ weightsVectorOffset + i ] * scores [ i ] ) ;
}
size_t heuristicScoreIndex = 0 ;
for ( size_t i = 1 ; i < 3 ; + + i ) {
if ( possibleFutureOrientations [ i ] ) {
if ( weightedScores [ i ] > weightedScores [ heuristicScoreIndex ] ) {
heuristicScoreIndex = i ;
}
}
}
IFFEATUREVERBOSE ( 5 ) {
FEATUREVERBOSE ( 5 , " Heuristic score computation: "
2015-02-20 23:53:50 +03:00
< < " heuristicScoreIndex== " < < heuristicScoreIndex ) ;
2015-01-27 00:11:37 +03:00
for ( size_t i = 0 ; i < 3 ; + + i )
2015-02-20 23:53:50 +03:00
FEATUREVERBOSE2 ( 5 , " m_weightsVector[ " < < weightsVectorOffset + i < < " ]== " < < m_weightsVector [ weightsVectorOffset + i ] ) ;
2015-01-27 00:11:37 +03:00
for ( size_t i = 0 ; i < 3 ; + + i )
2015-02-20 23:53:50 +03:00
FEATUREVERBOSE2 ( 5 , " scores[ " < < i < < " ]== " < < scores [ i ] ) ;
2015-01-27 00:11:37 +03:00
for ( size_t i = 0 ; i < 3 ; + + i )
2015-02-20 23:53:50 +03:00
FEATUREVERBOSE2 ( 5 , " weightedScores[ " < < i < < " ]== " < < weightedScores [ i ] ) ;
2015-01-27 00:11:37 +03:00
for ( size_t i = 0 ; i < 3 ; + + i )
2015-02-20 23:53:50 +03:00
FEATUREVERBOSE2 ( 5 , " possibleFutureOrientations[ " < < i < < " ]== " < < possibleFutureOrientations [ i ] ) ;
2015-01-27 00:11:37 +03:00
if ( possibleFutureOrientations = = 0x7 ) {
FEATUREVERBOSE2 ( 5 , " (all orientations possible) " ) ;
}
FEATUREVERBOSE2 ( 5 , std : : endl ) ;
}
return heuristicScoreIndex ;
}
2015-01-28 23:50:24 +03:00
2014-12-03 23:04:26 +03:00
void PhraseOrientationFeature : : LeftBoundaryL2RScoreRecursive ( int featureID ,
2015-01-14 14:07:42 +03:00
const PhraseOrientationFeatureState * state ,
const std : : bitset < 3 > orientation ,
2015-01-29 23:23:41 +03:00
std : : vector < float > & newScores ,
2015-02-19 15:27:23 +03:00
ScoreComponentCollection * scoreBreakdown ) const
// TODO: passing both newScores and scoreBreakdown seems redundant (scoreBreakdown needed for sparse scores)
2014-12-03 23:04:26 +03:00
{
2015-01-14 14:07:42 +03:00
if ( state - > m_leftBoundaryIsSet ) {
2015-01-29 23:23:41 +03:00
const std : : string * recursiveOrientationString ;
2014-12-03 23:04:26 +03:00
// subtract heuristic score from subderivation
newScores [ state - > m_leftBoundaryNonTerminalL2RHeuristicScoreIndex ] - = state - > m_leftBoundaryNonTerminalL2RScores [ state - > m_leftBoundaryNonTerminalL2RHeuristicScoreIndex ] ;
// add actual score
std : : bitset < 3 > recursiveOrientation = orientation ;
2015-01-14 14:07:42 +03:00
if ( ( orientation = = 0x4 ) | | ( orientation = = 0x0 ) ) {
2014-12-03 23:04:26 +03:00
// discontinuous
2015-01-29 23:23:41 +03:00
recursiveOrientationString = & DORIENT ;
2014-12-03 23:04:26 +03:00
newScores [ 2 ] + = state - > GetLeftBoundaryL2RScoreDiscontinuous ( ) ;
2015-01-14 14:07:42 +03:00
} else {
2014-12-03 23:04:26 +03:00
recursiveOrientation & = state - > m_leftBoundaryNonTerminalL2RPossibleFutureOrientations ;
2015-01-14 14:07:42 +03:00
if ( recursiveOrientation = = 0x1 ) {
2014-12-03 23:04:26 +03:00
// monotone
2015-01-29 23:23:41 +03:00
recursiveOrientationString = & MORIENT ;
2014-12-03 23:04:26 +03:00
newScores [ 0 ] + = state - > GetLeftBoundaryL2RScoreMono ( ) ;
2015-01-14 14:07:42 +03:00
} else if ( recursiveOrientation = = 0x2 ) {
2014-12-03 23:04:26 +03:00
// swap
2015-01-29 23:23:41 +03:00
recursiveOrientationString = & SORIENT ;
2014-12-03 23:04:26 +03:00
newScores [ 1 ] + = state - > GetLeftBoundaryL2RScoreSwap ( ) ;
2015-01-14 14:07:42 +03:00
} else if ( recursiveOrientation = = 0x4 ) {
2014-12-03 23:04:26 +03:00
// discontinuous
2015-01-29 23:23:41 +03:00
recursiveOrientationString = & DORIENT ;
2014-12-03 23:04:26 +03:00
newScores [ 2 ] + = state - > GetLeftBoundaryL2RScoreDiscontinuous ( ) ;
2015-01-14 14:07:42 +03:00
} else if ( recursiveOrientation = = 0x0 ) {
2014-12-03 23:04:26 +03:00
// discontinuous
2015-01-29 23:23:41 +03:00
recursiveOrientationString = & DORIENT ;
2014-12-03 23:04:26 +03:00
newScores [ 2 ] + = state - > GetLeftBoundaryL2RScoreDiscontinuous ( ) ;
2015-01-14 14:07:42 +03:00
} else {
2014-12-03 23:04:26 +03:00
UTIL_THROW2 ( GetScoreProducerDescription ( )
< < " : Error in recursive scoring. " ) ;
}
}
2015-01-29 23:23:41 +03:00
if ( m_useSparseNT ) {
SparseNonTerminalL2RScore ( state - > m_leftBoundaryNonTerminalSymbol , scoreBreakdown , recursiveOrientationString ) ;
}
2014-12-03 23:04:26 +03:00
FEATUREVERBOSE ( 6 , " Left boundary recursion: " < < orientation < < " & " < < state - > m_leftBoundaryNonTerminalL2RPossibleFutureOrientations < < " = " < < recursiveOrientation
< < " --- Subtracted heuristic score: " < < state - > m_leftBoundaryNonTerminalL2RScores [ state - > m_leftBoundaryNonTerminalL2RHeuristicScoreIndex ] < < std : : endl ) ;
2015-01-14 14:07:42 +03:00
if ( ! state - > m_leftBoundaryRecursionGuard ) {
2014-12-03 23:04:26 +03:00
// recursive call
2015-01-08 21:40:58 +03:00
const PhraseOrientationFeatureState * prevState = state - > m_leftBoundaryPrevState ;
2015-01-29 23:23:41 +03:00
LeftBoundaryL2RScoreRecursive ( featureID , prevState , recursiveOrientation , newScores , scoreBreakdown ) ;
2015-01-14 14:07:42 +03:00
} else {
2014-12-03 23:04:26 +03:00
FEATUREVERBOSE ( 6 , " m_leftBoundaryRecursionGuard " < < std : : endl ) ;
}
}
}
2015-01-28 23:50:24 +03:00
2014-12-03 23:04:26 +03:00
void PhraseOrientationFeature : : RightBoundaryR2LScoreRecursive ( int featureID ,
2015-01-14 14:07:42 +03:00
const PhraseOrientationFeatureState * state ,
const std : : bitset < 3 > orientation ,
2015-01-29 23:23:41 +03:00
std : : vector < float > & newScores ,
2015-02-19 15:27:23 +03:00
ScoreComponentCollection * scoreBreakdown ) const
// TODO: passing both newScores and scoreBreakdown seems redundant (scoreBreakdown needed for sparse scores)
2014-12-03 23:04:26 +03:00
{
2015-01-14 14:07:42 +03:00
if ( state - > m_rightBoundaryIsSet ) {
2015-01-29 23:23:41 +03:00
const std : : string * recursiveOrientationString ;
2014-12-03 23:04:26 +03:00
// subtract heuristic score from subderivation
newScores [ m_offsetR2LScores + state - > m_rightBoundaryNonTerminalR2LHeuristicScoreIndex ] - = state - > m_rightBoundaryNonTerminalR2LScores [ state - > m_rightBoundaryNonTerminalR2LHeuristicScoreIndex ] ;
// add actual score
std : : bitset < 3 > recursiveOrientation = orientation ;
2015-01-14 14:07:42 +03:00
if ( ( orientation = = 0x4 ) | | ( orientation = = 0x0 ) ) {
2014-12-03 23:04:26 +03:00
// discontinuous
2015-01-29 23:23:41 +03:00
recursiveOrientationString = & DORIENT ;
2014-12-03 23:04:26 +03:00
newScores [ m_offsetR2LScores + 2 ] + = state - > GetRightBoundaryR2LScoreDiscontinuous ( ) ;
2015-01-14 14:07:42 +03:00
} else {
2014-12-03 23:04:26 +03:00
recursiveOrientation & = state - > m_rightBoundaryNonTerminalR2LPossibleFutureOrientations ;
2015-01-14 14:07:42 +03:00
if ( recursiveOrientation = = 0x1 ) {
2014-12-03 23:04:26 +03:00
// monotone
2015-01-29 23:23:41 +03:00
recursiveOrientationString = & MORIENT ;
2014-12-03 23:04:26 +03:00
newScores [ m_offsetR2LScores + 0 ] + = state - > GetRightBoundaryR2LScoreMono ( ) ;
2015-01-14 14:07:42 +03:00
} else if ( recursiveOrientation = = 0x2 ) {
2014-12-03 23:04:26 +03:00
// swap
2015-01-29 23:23:41 +03:00
recursiveOrientationString = & SORIENT ;
2014-12-03 23:04:26 +03:00
newScores [ m_offsetR2LScores + 1 ] + = state - > GetRightBoundaryR2LScoreSwap ( ) ;
2015-01-14 14:07:42 +03:00
} else if ( recursiveOrientation = = 0x4 ) {
2014-12-03 23:04:26 +03:00
// discontinuous
2015-01-29 23:23:41 +03:00
recursiveOrientationString = & DORIENT ;
2014-12-03 23:04:26 +03:00
newScores [ m_offsetR2LScores + 2 ] + = state - > GetRightBoundaryR2LScoreDiscontinuous ( ) ;
2015-01-14 14:07:42 +03:00
} else if ( recursiveOrientation = = 0x0 ) {
2014-12-03 23:04:26 +03:00
// discontinuous
2015-01-29 23:23:41 +03:00
recursiveOrientationString = & DORIENT ;
2014-12-03 23:04:26 +03:00
newScores [ m_offsetR2LScores + 2 ] + = state - > GetRightBoundaryR2LScoreDiscontinuous ( ) ;
2015-01-14 14:07:42 +03:00
} else {
2014-12-03 23:04:26 +03:00
UTIL_THROW2 ( GetScoreProducerDescription ( )
< < " : Error in recursive scoring. " ) ;
}
}
2015-01-29 23:23:41 +03:00
if ( m_useSparseNT ) {
SparseNonTerminalR2LScore ( state - > m_rightBoundaryNonTerminalSymbol , scoreBreakdown , recursiveOrientationString ) ;
}
2014-12-03 23:04:26 +03:00
FEATUREVERBOSE ( 6 , " Right boundary recursion: " < < orientation < < " & " < < state - > m_rightBoundaryNonTerminalR2LPossibleFutureOrientations < < " = " < < recursiveOrientation
< < " --- Subtracted heuristic score: " < < state - > m_rightBoundaryNonTerminalR2LScores [ state - > m_rightBoundaryNonTerminalR2LHeuristicScoreIndex ] < < std : : endl ) ;
2015-01-14 14:07:42 +03:00
if ( ! state - > m_rightBoundaryRecursionGuard ) {
2014-12-03 23:04:26 +03:00
// recursive call
2015-01-08 21:40:58 +03:00
const PhraseOrientationFeatureState * prevState = state - > m_rightBoundaryPrevState ;
2015-01-29 23:23:41 +03:00
RightBoundaryR2LScoreRecursive ( featureID , prevState , recursiveOrientation , newScores , scoreBreakdown ) ;
2015-01-14 14:07:42 +03:00
} else {
2014-12-03 23:04:26 +03:00
FEATUREVERBOSE ( 6 , " m_rightBoundaryRecursionGuard " < < std : : endl ) ;
}
}
2014-09-09 21:17:05 +04:00
}
2014-09-12 16:51:04 +04:00
2015-01-14 14:07:42 +03:00
2015-01-29 23:23:41 +03:00
void PhraseOrientationFeature : : SparseWordL2RScore ( const ChartHypothesis * hypo ,
2015-02-19 15:27:23 +03:00
ScoreComponentCollection * scoreBreakdown ,
const std : : string * o ) const
2015-01-28 23:50:24 +03:00
{
// target word
const ChartHypothesis * currHypo = hypo ;
const TargetPhrase * targetPhrase = & currHypo - > GetCurrTargetPhrase ( ) ;
const Word * targetWord = & targetPhrase - > GetWord ( 0 ) ;
// TODO: boundary words in the feature state?
while ( targetWord - > IsNonTerminal ( ) ) {
const AlignmentInfo : : NonTermIndexMap & nonTermIndexMap =
targetPhrase - > GetAlignNonTerm ( ) . GetNonTermIndexMap ( ) ;
size_t nonTermIndex = nonTermIndexMap [ 0 ] ;
currHypo = currHypo - > GetPrevHypo ( nonTermIndex ) ;
targetPhrase = & currHypo - > GetCurrTargetPhrase ( ) ;
targetWord = & targetPhrase - > GetWord ( 0 ) ;
}
const std : : string & targetWordString = ( * targetWord ) [ 0 ] - > GetString ( ) . as_string ( ) ;
if ( targetWordString ! = " <s> " & & targetWordString ! = " </s> " ) {
if ( ! m_useTargetWordList | | m_targetWordList . find ( ( * targetWord ) [ 0 ] ) ! = m_targetWordList . end ( ) ) {
scoreBreakdown - > PlusEquals ( this ,
2015-01-29 23:23:41 +03:00
" L2R " + * o + " _tw_ " + targetWordString ,
2015-01-28 23:50:24 +03:00
1 ) ;
2015-01-29 23:23:41 +03:00
FEATUREVERBOSE ( 3 , " Sparse: L2R " + * o + " _tw_ " + targetWordString < < std : : endl ) ;
2015-01-28 23:50:24 +03:00
} else {
scoreBreakdown - > PlusEquals ( this ,
2015-01-29 23:23:41 +03:00
" L2R " + * o + " _tw_OTHER " ,
2015-01-28 23:50:24 +03:00
1 ) ;
2015-01-29 23:23:41 +03:00
FEATUREVERBOSE ( 3 , " Sparse: L2R " + * o + " _tw_OTHER " < < std : : endl ) ;
2015-01-28 23:50:24 +03:00
}
}
// source word
2015-02-19 15:27:23 +03:00
2015-01-28 23:50:24 +03:00
WordsRange sourceSpan = hypo - > GetCurrSourceRange ( ) ;
const InputType & input = hypo - > GetManager ( ) . GetSource ( ) ;
const Sentence & sourceSentence = static_cast < const Sentence & > ( input ) ;
const Word & sourceWord = sourceSentence . GetWord ( sourceSpan . GetStartPos ( ) ) ;
const std : : string & sourceWordString = sourceWord [ 0 ] - > GetString ( ) . as_string ( ) ;
if ( sourceWordString ! = " <s> " & & sourceWordString ! = " </s> " ) {
if ( ! m_useSourceWordList | | m_sourceWordList . find ( sourceWord [ 0 ] ) ! = m_sourceWordList . end ( ) ) {
scoreBreakdown - > PlusEquals ( this ,
2015-01-29 23:23:41 +03:00
" L2R " + * o + " _sw_ " + sourceWordString ,
2015-01-28 23:50:24 +03:00
1 ) ;
2015-01-29 23:23:41 +03:00
FEATUREVERBOSE ( 3 , " Sparse: L2R " + * o + " _sw_ " + sourceWordString < < std : : endl ) ;
2015-01-28 23:50:24 +03:00
} else {
scoreBreakdown - > PlusEquals ( this ,
2015-01-29 23:23:41 +03:00
" L2R " + * o + " _sw_OTHER " ,
2015-01-28 23:50:24 +03:00
1 ) ;
2015-01-29 23:23:41 +03:00
FEATUREVERBOSE ( 3 , " Sparse: L2R " + * o + " _sw_OTHER " < < std : : endl ) ;
2015-01-28 23:50:24 +03:00
}
}
}
2015-01-29 23:23:41 +03:00
void PhraseOrientationFeature : : SparseWordR2LScore ( const ChartHypothesis * hypo ,
2015-02-19 15:27:23 +03:00
ScoreComponentCollection * scoreBreakdown ,
const std : : string * o ) const
2015-01-28 23:50:24 +03:00
{
// target word
const ChartHypothesis * currHypo = hypo ;
const TargetPhrase * targetPhrase = & currHypo - > GetCurrTargetPhrase ( ) ;
const Word * targetWord = & targetPhrase - > GetWord ( targetPhrase - > GetSize ( ) - 1 ) ;
// TODO: boundary words in the feature state?
while ( targetWord - > IsNonTerminal ( ) ) {
const AlignmentInfo : : NonTermIndexMap & nonTermIndexMap =
targetPhrase - > GetAlignNonTerm ( ) . GetNonTermIndexMap ( ) ;
size_t nonTermIndex = nonTermIndexMap [ targetPhrase - > GetSize ( ) - 1 ] ;
currHypo = currHypo - > GetPrevHypo ( nonTermIndex ) ;
targetPhrase = & currHypo - > GetCurrTargetPhrase ( ) ;
targetWord = & targetPhrase - > GetWord ( targetPhrase - > GetSize ( ) - 1 ) ;
}
const std : : string & targetWordString = ( * targetWord ) [ 0 ] - > GetString ( ) . as_string ( ) ;
if ( targetWordString ! = " <s> " & & targetWordString ! = " </s> " ) {
if ( ! m_useTargetWordList | | m_targetWordList . find ( ( * targetWord ) [ 0 ] ) ! = m_targetWordList . end ( ) ) {
scoreBreakdown - > PlusEquals ( this ,
2015-01-29 23:23:41 +03:00
" R2L " + * o + " _tw_ " + targetWordString ,
2015-01-28 23:50:24 +03:00
1 ) ;
2015-01-29 23:23:41 +03:00
FEATUREVERBOSE ( 3 , " Sparse: R2L " + * o + " _tw_ " + targetWordString < < std : : endl ) ;
2015-01-28 23:50:24 +03:00
} else {
scoreBreakdown - > PlusEquals ( this ,
2015-01-29 23:23:41 +03:00
" R2L " + * o + " _tw_OTHER " ,
2015-01-28 23:50:24 +03:00
1 ) ;
2015-01-29 23:23:41 +03:00
FEATUREVERBOSE ( 3 , " Sparse: R2L " + * o + " _tw_OTHER " < < std : : endl ) ;
2015-01-28 23:50:24 +03:00
}
}
// source word
2015-02-19 15:27:23 +03:00
2015-01-28 23:50:24 +03:00
WordsRange sourceSpan = hypo - > GetCurrSourceRange ( ) ;
const InputType & input = hypo - > GetManager ( ) . GetSource ( ) ;
const Sentence & sourceSentence = static_cast < const Sentence & > ( input ) ;
const Word & sourceWord = sourceSentence . GetWord ( sourceSpan . GetEndPos ( ) ) ;
const std : : string & sourceWordString = sourceWord [ 0 ] - > GetString ( ) . as_string ( ) ;
if ( sourceWordString ! = " <s> " & & sourceWordString ! = " </s> " ) {
if ( ! m_useSourceWordList | | m_sourceWordList . find ( sourceWord [ 0 ] ) ! = m_sourceWordList . end ( ) ) {
scoreBreakdown - > PlusEquals ( this ,
2015-01-29 23:23:41 +03:00
" R2L " + * o + " _sw_ " + sourceWordString ,
2015-01-28 23:50:24 +03:00
1 ) ;
2015-01-29 23:23:41 +03:00
FEATUREVERBOSE ( 3 , " Sparse: R2L " + * o + " _sw_ " + sourceWordString < < std : : endl ) ;
2015-01-28 23:50:24 +03:00
} else {
scoreBreakdown - > PlusEquals ( this ,
2015-01-29 23:23:41 +03:00
" R2L " + * o + " _sw_OTHER " ,
2015-01-28 23:50:24 +03:00
1 ) ;
2015-01-29 23:23:41 +03:00
FEATUREVERBOSE ( 3 , " Sparse: R2L " + * o + " _sw_OTHER " < < std : : endl ) ;
2015-01-28 23:50:24 +03:00
}
}
}
2015-01-29 23:23:41 +03:00
void PhraseOrientationFeature : : SparseNonTerminalL2RScore ( const Factor * nonTerminalSymbol ,
2015-02-19 15:27:23 +03:00
ScoreComponentCollection * scoreBreakdown ,
const std : : string * o ) const
2015-01-29 23:23:41 +03:00
{
if ( nonTerminalSymbol ! = m_glueTargetLHS ) {
const std : : string & nonTerminalString = nonTerminalSymbol - > GetString ( ) . as_string ( ) ;
scoreBreakdown - > PlusEquals ( this ,
" L2R " + * o + " _n_ " + nonTerminalString ,
1 ) ;
FEATUREVERBOSE ( 3 , " Sparse: L2R " + * o + " _n_ " + nonTerminalString < < std : : endl ) ;
}
}
void PhraseOrientationFeature : : SparseNonTerminalR2LScore ( const Factor * nonTerminalSymbol ,
2015-02-19 15:27:23 +03:00
ScoreComponentCollection * scoreBreakdown ,
const std : : string * o ) const
2015-01-29 23:23:41 +03:00
{
if ( nonTerminalSymbol ! = m_glueTargetLHS ) {
const std : : string & nonTerminalString = nonTerminalSymbol - > GetString ( ) . as_string ( ) ;
scoreBreakdown - > PlusEquals ( this ,
" R2L " + * o + " _n_ " + nonTerminalString ,
1 ) ;
FEATUREVERBOSE ( 3 , " Sparse: R2L " + * o + " _n_ " + nonTerminalString < < std : : endl ) ;
}
}
const std : : string * PhraseOrientationFeature : : ToString ( const Moses : : GHKM : : PhraseOrientation : : REO_CLASS o ) const
{
if ( o = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_LEFT ) {
return & MORIENT ;
} else if ( o = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_RIGHT ) {
return & SORIENT ;
} else if ( ( o = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_DLEFT ) | |
( o = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_DRIGHT ) | |
( o = = Moses : : GHKM : : PhraseOrientation : : REO_CLASS_UNKNOWN ) ) {
return & DORIENT ;
} else {
UTIL_THROW2 ( GetScoreProducerDescription ( )
< < " : Unsupported orientation type. " ) ;
}
return NULL ;
}
2014-09-09 21:17:05 +04:00
}