2013-04-22 15:21:59 +04:00
/***********************************************************************
Moses - factored phrase - based language decoder
Copyright ( C ) 2006 University of Edinburgh
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2.1 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2013-04-29 23:51:00 +04:00
# include "util/exception.hh"
2013-04-22 15:21:59 +04:00
# include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
using namespace std ;
namespace Moses
{
2013-05-10 15:30:01 +04:00
PhraseDictionaryMultiModel : : PhraseDictionaryMultiModel ( const std : : string & line )
: PhraseDictionary ( " PhraseDictionaryMultiModel " , line )
2013-04-22 15:21:59 +04:00
{
2013-05-10 17:48:06 +04:00
for ( size_t i = 0 ; i < m_args . size ( ) ; + + i ) {
const vector < string > & args = m_args [ i ] ;
if ( args [ 0 ] = = " mode " ) {
m_mode = args [ 1 ] ;
if ( m_mode ! = " interpolate " ) {
ostringstream msg ;
msg < < " combination mode unknown: " < < m_mode ;
throw runtime_error ( msg . str ( ) ) ;
}
}
else if ( args [ 0 ] = = " components " ) {
m_pdStr = Tokenize ( args [ 1 ] , " , " ) ;
m_numModels = m_pdStr . size ( ) ;
}
2013-05-10 18:33:46 +04:00
else if ( args [ 0 ] = = " lambda " ) {
m_multimodelweights = Tokenize < float > ( args [ 1 ] , " , " ) ;
}
2013-05-10 17:48:06 +04:00
} // for
2013-05-10 18:33:46 +04:00
2013-05-16 16:11:03 +04:00
size_t numWeights = m_numScoreComponents ;
if ( m_mode = = " interpolate " ) {
numWeights - - ;
}
CHECK ( m_pdStr . size ( ) = = m_multimodelweights . size ( ) | | m_pdStr . size ( ) * numWeights = = m_multimodelweights . size ( ) ) ;
2013-04-22 15:21:59 +04:00
}
2013-05-13 20:20:14 +04:00
PhraseDictionaryMultiModel : : PhraseDictionaryMultiModel ( const std : : string & description , const std : : string & line )
: PhraseDictionary ( description , line )
{
for ( size_t i = 0 ; i < m_args . size ( ) ; + + i ) {
const vector < string > & args = m_args [ i ] ;
2013-05-13 21:39:06 +04:00
if ( args [ 0 ] = = " components " ) {
2013-05-13 20:20:14 +04:00
m_pdStr = Tokenize ( args [ 1 ] , " , " ) ;
m_numModels = m_pdStr . size ( ) ;
}
else if ( args [ 0 ] = = " lambda " ) {
m_multimodelweights = Tokenize < float > ( args [ 1 ] , " , " ) ;
}
} // for
2013-05-16 16:11:03 +04:00
if ( description = = " PhraseDictionaryMultiModelCounts " ) {
CHECK ( m_pdStr . size ( ) = = m_multimodelweights . size ( ) | | m_pdStr . size ( ) * 4 = = m_multimodelweights . size ( ) ) ;
}
2013-05-13 20:20:14 +04:00
}
2013-04-22 15:21:59 +04:00
PhraseDictionaryMultiModel : : ~ PhraseDictionaryMultiModel ( )
{
}
2013-05-10 18:13:03 +04:00
bool PhraseDictionaryMultiModel : : InitDictionary ( )
2013-04-22 15:21:59 +04:00
{
// since the top X target phrases of the final model are not the same as the top X phrases of each component model,
// one could choose a higher value than tableLimit (or 0) here for maximal precision, at a cost of speed.
for ( size_t i = 0 ; i < m_numModels ; + + i ) {
2013-05-10 17:48:06 +04:00
const string & ptName = m_pdStr [ i ] ;
2013-05-14 18:16:09 +04:00
PhraseDictionary * pt = FindPhraseDictionary ( ptName ) ;
2013-05-10 17:48:06 +04:00
CHECK ( pt ) ;
m_pd . push_back ( pt ) ;
2013-04-22 15:21:59 +04:00
}
return true ;
}
2013-05-14 18:16:09 +04:00
PhraseDictionary * PhraseDictionaryMultiModel : : FindPhraseDictionary ( const string & ptName ) const
{
const StaticData & staticData = StaticData : : Instance ( ) ;
const std : : vector < PhraseDictionary * > & pts = staticData . GetPhraseDictionaries ( ) ;
PhraseDictionary * pt = NULL ;
std : : vector < PhraseDictionary * > : : const_iterator iter ;
for ( iter = pts . begin ( ) ; iter ! = pts . end ( ) ; + + iter ) {
PhraseDictionary * currPt = * iter ;
if ( currPt - > GetScoreProducerDescription ( ) = = ptName ) {
pt = currPt ;
break ;
}
}
return pt ;
}
2013-04-22 15:21:59 +04:00
const TargetPhraseCollection * PhraseDictionaryMultiModel : : GetTargetPhraseCollection ( const Phrase & src ) const
{
std : : vector < std : : vector < float > > multimodelweights ;
if ( m_mode = = " interpolate " ) {
//interpolation of phrase penalty is skipped, and fixed-value (2.718) is used instead. results will be screwed up if phrase penalty is not last feature
2013-05-10 15:30:01 +04:00
size_t numWeights = m_numScoreComponents - 1 ;
2013-04-22 15:21:59 +04:00
multimodelweights = getWeights ( numWeights , true ) ;
}
std : : map < std : : string , multiModelStatistics * > * allStats = new ( std : : map < std : : string , multiModelStatistics * > ) ;
CollectSufficientStatistics ( src , allStats ) ;
2013-04-29 23:51:00 +04:00
TargetPhraseCollection * ret = NULL ;
2013-04-22 15:21:59 +04:00
if ( m_mode = = " interpolate " ) {
ret = CreateTargetPhraseCollectionLinearInterpolation ( allStats , multimodelweights ) ;
}
ret - > NthElement ( m_tableLimit ) ; // sort the phrases for pruning later
const_cast < PhraseDictionaryMultiModel * > ( this ) - > CacheForCleanup ( ret ) ;
RemoveAllInMap ( * allStats ) ;
delete allStats ;
return ret ;
}
void PhraseDictionaryMultiModel : : CollectSufficientStatistics ( const Phrase & src , std : : map < std : : string , multiModelStatistics * > * allStats ) const
{
for ( size_t i = 0 ; i < m_numModels ; + + i ) {
2013-05-10 21:01:52 +04:00
const PhraseDictionary & pd = * m_pd [ i ] ;
2013-04-22 15:21:59 +04:00
2013-05-10 21:01:52 +04:00
TargetPhraseCollection * ret_raw = ( TargetPhraseCollection * ) pd . GetTargetPhraseCollection ( src ) ;
2013-04-22 15:21:59 +04:00
if ( ret_raw ! = NULL ) {
TargetPhraseCollection : : iterator iterTargetPhrase , iterLast ;
2013-05-10 18:13:03 +04:00
if ( m_tableLimit ! = 0 & & ret_raw - > GetSize ( ) > m_tableLimit ) {
iterLast = ret_raw - > begin ( ) + m_tableLimit ;
2013-04-22 15:21:59 +04:00
}
else {
iterLast = ret_raw - > end ( ) ;
}
for ( iterTargetPhrase = ret_raw - > begin ( ) ; iterTargetPhrase ! = iterLast ; + + iterTargetPhrase ) {
TargetPhrase * targetPhrase = * iterTargetPhrase ;
2013-05-10 21:01:52 +04:00
std : : vector < float > raw_scores = targetPhrase - > GetScoreBreakdown ( ) . GetScoresForProducer ( & pd ) ;
2013-04-22 15:21:59 +04:00
std : : string targetString = targetPhrase - > GetStringRep ( m_output ) ;
if ( allStats - > find ( targetString ) = = allStats - > end ( ) ) {
multiModelStatistics * statistics = new multiModelStatistics ;
statistics - > targetPhrase = new TargetPhrase ( * targetPhrase ) ; //make a copy so that we don't overwrite the original phrase table info
2013-05-13 16:19:25 +04:00
// zero out scores from original phrase table
statistics - > targetPhrase - > GetScoreBreakdown ( ) . ZeroDenseFeatures ( & pd ) ;
2013-05-10 15:30:01 +04:00
Scores scoreVector ( m_numScoreComponents ) ;
statistics - > p . resize ( m_numScoreComponents ) ;
for ( size_t j = 0 ; j < m_numScoreComponents ; + + j ) {
2013-04-22 15:21:59 +04:00
statistics - > p [ j ] . resize ( m_numModels ) ;
scoreVector [ j ] = - raw_scores [ j ] ;
}
2013-05-13 18:36:09 +04:00
statistics - > targetPhrase - > GetScoreBreakdown ( ) . Assign ( this , scoreVector ) ; // set scores to 0
2013-05-14 20:35:22 +04:00
statistics - > targetPhrase - > Evaluate ( ) ;
2013-04-22 15:21:59 +04:00
( * allStats ) [ targetString ] = statistics ;
}
multiModelStatistics * statistics = ( * allStats ) [ targetString ] ;
2013-05-10 15:30:01 +04:00
for ( size_t j = 0 ; j < m_numScoreComponents ; + + j ) {
2013-04-22 15:21:59 +04:00
statistics - > p [ j ] [ i ] = UntransformScore ( raw_scores [ j ] ) ;
}
( * allStats ) [ targetString ] = statistics ;
}
}
}
}
TargetPhraseCollection * PhraseDictionaryMultiModel : : CreateTargetPhraseCollectionLinearInterpolation ( std : : map < std : : string , multiModelStatistics * > * allStats , std : : vector < std : : vector < float > > & multimodelweights ) const
{
TargetPhraseCollection * ret = new TargetPhraseCollection ( ) ;
for ( std : : map < std : : string , multiModelStatistics * > : : const_iterator iter = allStats - > begin ( ) ; iter ! = allStats - > end ( ) ; + + iter ) {
multiModelStatistics * statistics = iter - > second ;
2013-05-10 15:30:01 +04:00
Scores scoreVector ( m_numScoreComponents ) ;
2013-04-22 15:21:59 +04:00
2013-05-10 15:30:01 +04:00
for ( size_t i = 0 ; i < m_numScoreComponents - 1 ; + + i ) {
2013-04-22 15:21:59 +04:00
scoreVector [ i ] = TransformScore ( std : : inner_product ( statistics - > p [ i ] . begin ( ) , statistics - > p [ i ] . end ( ) , multimodelweights [ i ] . begin ( ) , 0.0 ) ) ;
}
//assuming that last value is phrase penalty
2013-05-10 15:30:01 +04:00
scoreVector [ m_numScoreComponents - 1 ] = 1.0 ;
2013-04-22 15:21:59 +04:00
2013-05-10 21:01:52 +04:00
for ( size_t i = 0 ; i < scoreVector . size ( ) ; + + i ) cerr < < scoreVector [ i ] < < " " ;
cerr < < endl ;
2013-05-13 18:36:09 +04:00
statistics - > targetPhrase - > GetScoreBreakdown ( ) . Assign ( this , scoreVector ) ;
2013-05-14 20:35:22 +04:00
statistics - > targetPhrase - > Evaluate ( ) ;
2013-04-22 15:21:59 +04:00
ret - > Add ( new TargetPhrase ( * statistics - > targetPhrase ) ) ;
}
return ret ;
}
//TODO: is it worth caching the results as long as weights don't change?
std : : vector < std : : vector < float > > PhraseDictionaryMultiModel : : getWeights ( size_t numWeights , bool normalize ) const
{
const std : : vector < float > * weights_ptr ;
std : : vector < float > raw_weights ;
const StaticData & staticData = StaticData : : Instance ( ) ;
weights_ptr = staticData . GetTemporaryMultiModelWeightsVector ( ) ;
2013-05-10 18:33:46 +04:00
// HIEU - uninitialised variable.
2013-04-22 15:21:59 +04:00
//checking weights passed to mosesserver; only valid for this sentence; *don't* raise exception if client weights are malformed
if ( weights_ptr = = NULL | | weights_ptr - > size ( ) = = 0 ) {
2013-05-10 18:33:46 +04:00
weights_ptr = & m_multimodelweights ; //fall back to weights defined in config
2013-04-22 15:21:59 +04:00
}
else if ( weights_ptr - > size ( ) ! = m_numModels & & weights_ptr - > size ( ) ! = m_numModels * numWeights ) {
//TODO: can we pass error message to client if weights are malformed?
std : : stringstream strme ;
strme < < " Must have either one multimodel weight per model ( " < < m_numModels < < " ), or one per weighted feature and model ( " < < numWeights < < " * " < < m_numModels < < " ). You have " < < weights_ptr - > size ( ) < < " . Reverting to weights in config " ;
UserMessage : : Add ( strme . str ( ) ) ;
2013-05-10 18:33:46 +04:00
weights_ptr = & m_multimodelweights ; //fall back to weights defined in config
2013-04-22 15:21:59 +04:00
}
//checking weights defined in config; only valid for this sentence; raise exception if config weights are malformed
if ( weights_ptr = = NULL | | weights_ptr - > size ( ) = = 0 ) {
for ( size_t i = 0 ; i < m_numModels ; i + + ) {
raw_weights . push_back ( 1.0 / m_numModels ) ; //uniform weights created online
}
}
else if ( weights_ptr - > size ( ) ! = m_numModels & & weights_ptr - > size ( ) ! = m_numModels * numWeights ) {
std : : stringstream strme ;
strme < < " Must have either one multimodel weight per model ( " < < m_numModels < < " ), or one per weighted feature and model ( " < < numWeights < < " * " < < m_numModels < < " ). You have " < < weights_ptr - > size ( ) < < " . " ;
2013-04-29 23:51:00 +04:00
UTIL_THROW ( util : : Exception , strme . str ( ) ) ;
2013-04-22 15:21:59 +04:00
}
else {
raw_weights = * weights_ptr ;
}
std : : vector < std : : vector < float > > multimodelweights ( numWeights ) ;
for ( size_t i = 0 ; i < numWeights ; i + + ) {
std : : vector < float > weights_onefeature ( m_numModels ) ;
if ( raw_weights . size ( ) = = m_numModels ) {
weights_onefeature = raw_weights ;
}
else {
copy ( raw_weights . begin ( ) + i * m_numModels , raw_weights . begin ( ) + ( i + 1 ) * m_numModels , weights_onefeature . begin ( ) ) ;
}
if ( normalize ) {
multimodelweights [ i ] = normalizeWeights ( weights_onefeature ) ;
}
else {
multimodelweights [ i ] = weights_onefeature ;
}
}
return multimodelweights ;
}
std : : vector < float > PhraseDictionaryMultiModel : : normalizeWeights ( std : : vector < float > & weights ) const
{
std : : vector < float > ret ( m_numModels ) ;
float total = std : : accumulate ( weights . begin ( ) , weights . end ( ) , 0.0 ) ;
for ( size_t i = 0 ; i < weights . size ( ) ; i + + ) {
ret [ i ] = weights [ i ] / total ;
}
return ret ;
}
ChartRuleLookupManager * PhraseDictionaryMultiModel : : CreateRuleLookupManager ( const InputType & , const ChartCellCollectionBase & )
{
2013-04-29 23:51:00 +04:00
UTIL_THROW ( util : : Exception , " Phrase table used in chart decoder " ) ;
2013-04-22 15:21:59 +04:00
}
//copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
void PhraseDictionaryMultiModel : : CacheForCleanup ( TargetPhraseCollection * tpc ) {
# ifdef WITH_THREADS
boost : : mutex : : scoped_lock lock ( m_sentenceMutex ) ;
PhraseCache & ref = m_sentenceCache [ boost : : this_thread : : get_id ( ) ] ;
# else
PhraseCache & ref = m_sentenceCache ;
# endif
ref . push_back ( tpc ) ;
}
2013-05-10 15:30:01 +04:00
void PhraseDictionaryMultiModel : : CleanUpAfterSentenceProcessing ( const InputType & source ) {
2013-04-22 15:21:59 +04:00
# ifdef WITH_THREADS
boost : : mutex : : scoped_lock lock ( m_sentenceMutex ) ;
PhraseCache & ref = m_sentenceCache [ boost : : this_thread : : get_id ( ) ] ;
# else
PhraseCache & ref = m_sentenceCache ;
# endif
for ( PhraseCache : : iterator it = ref . begin ( ) ; it ! = ref . end ( ) ; it + + ) {
delete * it ;
}
PhraseCache temp ;
temp . swap ( ref ) ;
CleanUpComponentModels ( source ) ;
const StaticData & staticData = StaticData : : Instance ( ) ;
std : : vector < float > empty_vector ;
( const_cast < StaticData & > ( staticData ) ) . SetTemporaryMultiModelWeightsVector ( empty_vector ) ;
}
void PhraseDictionaryMultiModel : : CleanUpComponentModels ( const InputType & source ) {
for ( size_t i = 0 ; i < m_numModels ; + + i ) {
2013-05-10 15:30:01 +04:00
m_pd [ i ] - > CleanUpAfterSentenceProcessing ( source ) ;
2013-04-22 15:21:59 +04:00
}
}
# ifdef WITH_DLIB
vector < float > PhraseDictionaryMultiModel : : MinimizePerplexity ( vector < pair < string , string > > & phrase_pair_vector ) {
const StaticData & staticData = StaticData : : Instance ( ) ;
const string & factorDelimiter = staticData . GetFactorDelimiter ( ) ;
map < pair < string , string > , size_t > phrase_pair_map ;
for ( vector < pair < string , string > > : : const_iterator iter = phrase_pair_vector . begin ( ) ; iter ! = phrase_pair_vector . end ( ) ; + + iter ) {
phrase_pair_map [ * iter ] + = 1 ;
}
vector < multiModelStatisticsOptimization * > optimizerStats ;
for ( map < pair < string , string > , size_t > : : iterator iter = phrase_pair_map . begin ( ) ; iter ! = phrase_pair_map . end ( ) ; + + iter ) {
pair < string , string > phrase_pair = iter - > first ;
string source_string = phrase_pair . first ;
string target_string = phrase_pair . second ;
vector < float > fs ( m_numModels ) ;
map < string , multiModelStatistics * > * allStats = new ( map < string , multiModelStatistics * > ) ;
Phrase sourcePhrase ( 0 ) ;
2013-05-22 17:41:28 +04:00
sourcePhrase . CreateFromString ( Input , m_input , source_string , factorDelimiter , NULL ) ;
2013-04-22 15:21:59 +04:00
CollectSufficientStatistics ( sourcePhrase , allStats ) ; //optimization potential: only call this once per source phrase
//phrase pair not found; leave cache empty
if ( allStats - > find ( target_string ) = = allStats - > end ( ) ) {
RemoveAllInMap ( * allStats ) ;
delete allStats ;
continue ;
}
multiModelStatisticsOptimization * targetStatistics = new multiModelStatisticsOptimization ( ) ;
targetStatistics - > targetPhrase = new TargetPhrase ( * ( * allStats ) [ target_string ] - > targetPhrase ) ;
targetStatistics - > p = ( * allStats ) [ target_string ] - > p ;
targetStatistics - > f = iter - > second ;
optimizerStats . push_back ( targetStatistics ) ;
RemoveAllInMap ( * allStats ) ;
delete allStats ;
}
Sentence sentence ;
2013-05-15 19:40:59 +04:00
CleanUpAfterSentenceProcessing ( sentence ) ; // free memory used by compact phrase tables
2013-04-22 15:21:59 +04:00
2013-05-10 15:30:01 +04:00
size_t numWeights = m_numScoreComponents ;
2013-04-22 15:21:59 +04:00
if ( m_mode = = " interpolate " ) {
//interpolation of phrase penalty is skipped, and fixed-value (2.718) is used instead. results will be screwed up if phrase penalty is not last feature
2013-05-10 15:30:01 +04:00
numWeights = m_numScoreComponents - 1 ;
2013-04-22 15:21:59 +04:00
}
vector < float > ret ( m_numModels * numWeights ) ;
for ( size_t iFeature = 0 ; iFeature < numWeights ; iFeature + + ) {
CrossEntropy * ObjectiveFunction = new CrossEntropy ( optimizerStats , this , iFeature ) ;
vector < float > weight_vector = Optimize ( ObjectiveFunction , m_numModels ) ;
if ( m_mode = = " interpolate " ) {
weight_vector = normalizeWeights ( weight_vector ) ;
}
cerr < < " Weight vector for feature " < < iFeature < < " : " ;
for ( size_t i = 0 ; i < m_numModels ; i + + ) {
ret [ ( iFeature * m_numModels ) + i ] = weight_vector [ i ] ;
cerr < < weight_vector [ i ] < < " " ;
}
cerr < < endl ;
delete ObjectiveFunction ;
}
RemoveAllInColl ( optimizerStats ) ;
return ret ;
}
vector < float > PhraseDictionaryMultiModel : : Optimize ( OptimizationObjective * ObjectiveFunction , size_t numModels ) {
dlib : : matrix < double , 0 , 1 > starting_point ;
starting_point . set_size ( numModels ) ;
starting_point = 1.0 ;
try {
dlib : : find_min_bobyqa ( * ObjectiveFunction ,
starting_point ,
2 * numModels + 1 , // number of interpolation points
dlib : : uniform_matrix < double > ( numModels , 1 , 1e-09 ) , // lower bound constraint
dlib : : uniform_matrix < double > ( numModels , 1 , 1e100 ) , // upper bound constraint
1.0 , // initial trust region radius
1e-5 , // stopping trust region radius
10000 // max number of objective function evaluations
) ;
}
catch ( dlib : : bobyqa_failure & e )
{
cerr < < e . what ( ) < < endl ;
}
vector < float > weight_vector ( numModels ) ;
for ( int i = 0 ; i < starting_point . nr ( ) ; i + + ) {
weight_vector [ i ] = starting_point ( i ) ;
}
cerr < < " Cross-entropy: " < < ( * ObjectiveFunction ) ( starting_point ) < < endl ;
return weight_vector ;
}
double CrossEntropy : : operator ( ) ( const dlib : : matrix < double , 0 , 1 > & arg ) const
{
double total = 0.0 ;
double n = 0.0 ;
std : : vector < float > weight_vector ( m_model - > m_numModels ) ;
for ( int i = 0 ; i < arg . nr ( ) ; i + + ) {
weight_vector [ i ] = arg ( i ) ;
}
if ( m_model - > m_mode = = " interpolate " ) {
weight_vector = m_model - > normalizeWeights ( weight_vector ) ;
}
for ( std : : vector < multiModelStatisticsOptimization * > : : const_iterator iter = m_optimizerStats . begin ( ) ; iter ! = m_optimizerStats . end ( ) ; + + iter ) {
multiModelStatisticsOptimization * statistics = * iter ;
size_t f = statistics - > f ;
double score ;
score = std : : inner_product ( statistics - > p [ m_iFeature ] . begin ( ) , statistics - > p [ m_iFeature ] . end ( ) , weight_vector . begin ( ) , 0.0 ) ;
total - = ( FloorScore ( TransformScore ( score ) ) / TransformScore ( 2 ) ) * f ;
n + = f ;
}
return total / n ;
}
# endif
} //namespace