2012-07-19 20:56:46 +04:00
/***********************************************************************
Moses - factored phrase - based language decoder
Copyright ( C ) 2011 University of Edinburgh
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2.1 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2014-03-21 14:53:15 +04:00
# include <iostream>
2012-07-19 20:56:46 +04:00
# include "ChartRuleLookupManagerMemoryPerSentence.h"
2012-11-12 23:56:18 +04:00
2014-03-21 14:53:15 +04:00
# include "moses/ChartParser.h"
2012-11-12 23:56:18 +04:00
# include "moses/InputType.h"
2014-09-01 21:07:30 +04:00
# include "moses/Terminal.h"
2012-11-12 23:56:18 +04:00
# include "moses/ChartParserCallback.h"
# include "moses/StaticData.h"
# include "moses/NonTerminal.h"
# include "moses/ChartCellCollection.h"
2014-09-01 21:07:30 +04:00
# include "moses/FactorCollection.h"
2014-03-21 14:53:15 +04:00
# include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"
using namespace std ;
2012-07-19 20:56:46 +04:00
namespace Moses
{
ChartRuleLookupManagerMemoryPerSentence : : ChartRuleLookupManagerMemoryPerSentence (
2013-07-31 15:25:34 +04:00
const ChartParser & parser ,
2012-10-11 19:38:39 +04:00
const ChartCellCollectionBase & cellColl ,
2012-08-14 02:53:14 +04:00
const PhraseDictionaryFuzzyMatch & ruleTable )
2013-07-31 15:25:34 +04:00
: ChartRuleLookupManagerCYKPlus ( parser , cellColl )
2012-07-19 20:56:46 +04:00
, m_ruleTable ( ruleTable )
2014-03-21 14:53:15 +04:00
, m_softMatchingMap ( StaticData : : Instance ( ) . GetSoftMatches ( ) )
2012-07-19 20:56:46 +04:00
{
2013-07-31 15:25:34 +04:00
2013-08-02 21:09:47 +04:00
size_t sourceSize = parser . GetSize ( ) ;
2012-07-19 20:56:46 +04:00
2014-03-21 14:53:15 +04:00
m_completedRules . resize ( sourceSize ) ;
2012-07-19 20:56:46 +04:00
2014-03-21 14:53:15 +04:00
m_isSoftMatching = ! m_softMatchingMap . empty ( ) ;
2012-07-19 20:56:46 +04:00
}
void ChartRuleLookupManagerMemoryPerSentence : : GetChartRuleCollection (
2015-01-07 11:59:08 +03:00
const InputPath & inputPath ,
2014-03-21 14:53:15 +04:00
size_t lastPos ,
2012-10-03 16:27:47 +04:00
ChartParserCallback & outColl )
2012-07-19 20:56:46 +04:00
{
2015-01-07 11:59:08 +03:00
const WordsRange & range = inputPath . GetWordsRange ( ) ;
2014-03-21 14:53:15 +04:00
size_t startPos = range . GetStartPos ( ) ;
2012-07-19 20:56:46 +04:00
size_t absEndPos = range . GetEndPos ( ) ;
2014-03-21 14:53:15 +04:00
m_lastPos = lastPos ;
m_stackVec . clear ( ) ;
2014-09-01 21:07:30 +04:00
m_stackScores . clear ( ) ;
2014-03-21 14:53:15 +04:00
m_outColl = & outColl ;
m_unaryPos = absEndPos - 1 ; // rules ending in this position are unary and should not be added to collection
2012-07-19 20:56:46 +04:00
2014-09-01 21:07:30 +04:00
// create/update data structure to quickly look up all chart cells that match start position and label.
UpdateCompressedMatrix ( startPos , absEndPos , lastPos ) ;
2014-03-21 14:53:15 +04:00
const PhraseDictionaryNodeMemory & rootNode = m_ruleTable . GetRootNode ( GetParser ( ) . GetTranslationId ( ) ) ;
2012-07-19 20:56:46 +04:00
2014-12-01 14:05:17 +03:00
// all rules starting with terminal
2014-03-21 14:53:15 +04:00
if ( startPos = = absEndPos ) {
2014-12-01 14:05:17 +03:00
GetTerminalExtension ( & rootNode , startPos ) ;
2014-03-21 14:53:15 +04:00
}
// all rules starting with nonterminal
else if ( absEndPos > startPos ) {
2014-09-01 21:07:30 +04:00
GetNonTerminalExtension ( & rootNode , startPos ) ;
2012-07-19 20:56:46 +04:00
}
2014-03-21 14:53:15 +04:00
// copy temporarily stored rules to out collection
2015-01-07 21:42:20 +03:00
CompletedRuleCollection & rules = m_completedRules [ absEndPos ] ;
2014-03-21 14:53:15 +04:00
for ( vector < CompletedRule * > : : const_iterator iter = rules . begin ( ) ; iter ! = rules . end ( ) ; + + iter ) {
outColl . Add ( ( * iter ) - > GetTPC ( ) , ( * iter ) - > GetStackVector ( ) , range ) ;
}
2012-07-19 20:56:46 +04:00
2015-01-07 21:42:20 +03:00
rules . Clear ( ) ;
2012-07-19 20:56:46 +04:00
2014-03-21 14:53:15 +04:00
}
2012-07-19 20:56:46 +04:00
2014-09-01 21:07:30 +04:00
// Create/update compressed matrix that stores all valid ChartCellLabels for a given start position and label.
void ChartRuleLookupManagerMemoryPerSentence : : UpdateCompressedMatrix ( size_t startPos ,
size_t origEndPos ,
size_t lastPos ) {
std : : vector < size_t > endPosVec ;
size_t numNonTerms = FactorCollection : : Instance ( ) . GetNumNonTerminals ( ) ;
m_compressedMatrixVec . resize ( lastPos + 1 ) ;
// we only need to update cell at [startPos, origEndPos-1] for initial lookup
if ( startPos < origEndPos ) {
endPosVec . push_back ( origEndPos - 1 ) ;
}
// update all cells starting from startPos+1 for lookup of rule extensions
else if ( startPos = = origEndPos )
{
startPos + + ;
for ( size_t endPos = startPos ; endPos < = lastPos ; endPos + + ) {
endPosVec . push_back ( endPos ) ;
}
//re-use data structure for cells with later start position, but remove chart cells that would break max-chart-span
for ( size_t pos = startPos + 1 ; pos < = lastPos ; pos + + ) {
CompressedMatrix & cellMatrix = m_compressedMatrixVec [ pos ] ;
cellMatrix . resize ( numNonTerms ) ;
for ( size_t i = 0 ; i < numNonTerms ; i + + ) {
if ( ! cellMatrix [ i ] . empty ( ) & & cellMatrix [ i ] . back ( ) . endPos > lastPos ) {
cellMatrix [ i ] . pop_back ( ) ;
}
}
}
}
if ( startPos > lastPos ) {
return ;
}
// populate compressed matrix with all chart cells that start at current start position
CompressedMatrix & cellMatrix = m_compressedMatrixVec [ startPos ] ;
cellMatrix . clear ( ) ;
cellMatrix . resize ( numNonTerms ) ;
for ( std : : vector < size_t > : : iterator p = endPosVec . begin ( ) ; p ! = endPosVec . end ( ) ; + + p ) {
size_t endPos = * p ;
// target non-terminal labels for the span
const ChartCellLabelSet & targetNonTerms = GetTargetLabelSet ( startPos , endPos ) ;
if ( targetNonTerms . GetSize ( ) = = 0 ) {
continue ;
}
# if !defined(UNLABELLED_SOURCE)
// source non-terminal labels for the span
const InputPath & inputPath = GetParser ( ) . GetInputPath ( startPos , endPos ) ;
// can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
if ( inputPath . GetNonTerminalSet ( ) . size ( ) = = 0 ) {
continue ;
}
# endif
for ( size_t i = 0 ; i < numNonTerms ; i + + ) {
const ChartCellLabel * cellLabel = targetNonTerms . Find ( i ) ;
if ( cellLabel ! = NULL ) {
float score = cellLabel - > GetBestScore ( m_outColl ) ;
cellMatrix [ i ] . push_back ( ChartCellCache ( endPos , cellLabel , score ) ) ;
}
}
}
}
2014-03-21 14:53:15 +04:00
// if a (partial) rule matches, add it to list completed rules (if non-unary and non-empty), and try find expansions that have this partial rule as prefix.
void ChartRuleLookupManagerMemoryPerSentence : : AddAndExtend (
const PhraseDictionaryNodeMemory * node ,
2014-09-01 21:07:30 +04:00
size_t endPos ) {
2014-03-21 14:53:15 +04:00
const TargetPhraseCollection & tpc = node - > GetTargetPhraseCollection ( ) ;
2014-12-01 14:05:17 +03:00
// add target phrase collection (except if rule is empty or a unary non-terminal rule)
if ( ! tpc . IsEmpty ( ) & & ( m_stackVec . empty ( ) | | endPos ! = m_unaryPos ) ) {
2014-09-01 21:07:30 +04:00
m_completedRules [ endPos ] . Add ( tpc , m_stackVec , m_stackScores , * m_outColl ) ;
2014-03-21 14:53:15 +04:00
}
// get all further extensions of rule (until reaching end of sentence or max-chart-span)
if ( endPos < m_lastPos ) {
if ( ! node - > GetTerminalMap ( ) . empty ( ) ) {
GetTerminalExtension ( node , endPos + 1 ) ;
}
if ( ! node - > GetNonTerminalMap ( ) . empty ( ) ) {
2014-09-01 21:07:30 +04:00
GetNonTerminalExtension ( node , endPos + 1 ) ;
2014-03-21 14:53:15 +04:00
}
}
2012-07-19 20:56:46 +04:00
}
2014-09-01 21:07:30 +04:00
2014-03-21 14:53:15 +04:00
// search all possible terminal extensions of a partial rule (pointed at by node) at a given position
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemoryPerSentence : : GetTerminalExtension (
const PhraseDictionaryNodeMemory * node ,
size_t pos ) {
const Word & sourceWord = GetSourceAt ( pos ) . GetLabel ( ) ;
const PhraseDictionaryNodeMemory : : TerminalMap & terminals = node - > GetTerminalMap ( ) ;
// if node has small number of terminal edges, test word equality for each.
if ( terminals . size ( ) < 5 ) {
for ( PhraseDictionaryNodeMemory : : TerminalMap : : const_iterator iter = terminals . begin ( ) ; iter ! = terminals . end ( ) ; + + iter ) {
const Word & word = iter - > first ;
2014-09-01 21:07:30 +04:00
if ( TerminalEqualityPred ( ) ( word , sourceWord ) ) {
2014-03-21 14:53:15 +04:00
const PhraseDictionaryNodeMemory * child = & iter - > second ;
2014-09-01 21:07:30 +04:00
AddAndExtend ( child , pos ) ;
break ;
2014-03-21 14:53:15 +04:00
}
}
}
// else, do hash lookup
else {
const PhraseDictionaryNodeMemory * child = node - > GetChild ( sourceWord ) ;
if ( child ! = NULL ) {
2014-09-01 21:07:30 +04:00
AddAndExtend ( child , pos ) ;
2014-03-21 14:53:15 +04:00
}
}
}
2012-07-19 20:56:46 +04:00
2014-09-01 21:07:30 +04:00
// search all nonterminal possible nonterminal extensions of a partial rule (pointed at by node) for a variable span (starting from startPos).
2014-03-21 14:53:15 +04:00
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemoryPerSentence : : GetNonTerminalExtension (
const PhraseDictionaryNodeMemory * node ,
2014-09-01 21:07:30 +04:00
size_t startPos ) {
2012-07-19 20:56:46 +04:00
2014-09-01 21:07:30 +04:00
const CompressedMatrix & compressedMatrix = m_compressedMatrixVec [ startPos ] ;
2014-03-21 14:53:15 +04:00
// non-terminal labels in phrase dictionary node
const PhraseDictionaryNodeMemory : : NonTerminalMap & nonTermMap = node - > GetNonTerminalMap ( ) ;
2014-09-01 21:07:30 +04:00
// make room for back pointer
m_stackVec . push_back ( NULL ) ;
m_stackScores . push_back ( 0 ) ;
2012-07-19 20:56:46 +04:00
// loop over possible expansions of the rule
2013-05-24 19:33:30 +04:00
PhraseDictionaryNodeMemory : : NonTerminalMap : : const_iterator p ;
2014-03-21 14:53:15 +04:00
PhraseDictionaryNodeMemory : : NonTerminalMap : : const_iterator end = nonTermMap . end ( ) ;
2012-07-19 20:56:46 +04:00
for ( p = nonTermMap . begin ( ) ; p ! = end ; + + p ) {
// does it match possible source and target non-terminals?
2014-03-21 14:53:15 +04:00
# if defined(UNLABELLED_SOURCE)
const Word & targetNonTerm = p - > first ;
# else
2014-09-01 21:07:30 +04:00
const Word & targetNonTerm = p - > first . second ;
2014-03-21 14:53:15 +04:00
# endif
2014-09-01 21:07:30 +04:00
const PhraseDictionaryNodeMemory * child = & p - > second ;
2014-03-21 14:53:15 +04:00
//soft matching of NTs
if ( m_isSoftMatching & & ! m_softMatchingMap [ targetNonTerm [ 0 ] - > GetId ( ) ] . empty ( ) ) {
const std : : vector < Word > & softMatches = m_softMatchingMap [ targetNonTerm [ 0 ] - > GetId ( ) ] ;
for ( std : : vector < Word > : : const_iterator softMatch = softMatches . begin ( ) ; softMatch ! = softMatches . end ( ) ; + + softMatch ) {
2014-09-01 21:07:30 +04:00
const CompressedColumn & matches = compressedMatrix [ ( * softMatch ) [ 0 ] - > GetId ( ) ] ;
for ( CompressedColumn : : const_iterator match = matches . begin ( ) ; match ! = matches . end ( ) ; + + match ) {
m_stackVec . back ( ) = match - > cellLabel ;
m_stackScores . back ( ) = match - > score ;
AddAndExtend ( child , match - > endPos ) ;
2014-03-21 14:53:15 +04:00
}
}
} // end of soft matches lookup
2014-09-01 21:07:30 +04:00
const CompressedColumn & matches = compressedMatrix [ targetNonTerm [ 0 ] - > GetId ( ) ] ;
for ( CompressedColumn : : const_iterator match = matches . begin ( ) ; match ! = matches . end ( ) ; + + match ) {
m_stackVec . back ( ) = match - > cellLabel ;
m_stackScores . back ( ) = match - > score ;
AddAndExtend ( child , match - > endPos ) ;
2012-07-19 20:56:46 +04:00
}
}
2014-09-01 21:07:30 +04:00
// remove last back pointer
m_stackVec . pop_back ( ) ;
m_stackScores . pop_back ( ) ;
2012-07-19 20:56:46 +04:00
}
} // namespace Moses