2008-06-11 14:52:57 +04:00
// $Id$
// vim::tabstop=2
/***********************************************************************
Moses - factored phrase - based language decoder
Copyright ( C ) 2006 University of Edinburgh
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2.1 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# include <sstream>
# include "memory.h"
# include "Word.h"
# include "TypeDef.h"
2013-07-04 18:13:29 +04:00
# include "FactorTypeSet.h"
2008-06-11 14:52:57 +04:00
# include "StaticData.h" // needed to determine the FactorDelimiter
2013-03-28 14:27:11 +04:00
# include "util/exception.hh"
2012-10-14 20:35:58 +04:00
# include "util/tokenize_piece.hh"
2008-06-11 14:52:57 +04:00
using namespace std ;
2008-10-09 03:51:26 +04:00
namespace Moses
{
2008-06-11 14:52:57 +04:00
// static
int Word : : Compare ( const Word & targetWord , const Word & sourceWord )
{
2011-02-24 16:14:42 +03:00
if ( targetWord . IsNonTerminal ( ) ! = sourceWord . IsNonTerminal ( ) ) {
return targetWord . IsNonTerminal ( ) ? - 1 : 1 ;
}
for ( size_t factorType = 0 ; factorType < MAX_NUM_FACTORS ; factorType + + ) {
const Factor * targetFactor = targetWord [ factorType ]
, * sourceFactor = sourceWord [ factorType ] ;
2010-04-08 21:16:10 +04:00
2011-02-24 16:14:42 +03:00
if ( targetFactor = = NULL | | sourceFactor = = NULL )
continue ;
if ( targetFactor = = sourceFactor )
continue ;
2008-06-11 14:52:57 +04:00
2011-02-24 16:14:42 +03:00
return ( targetFactor < sourceFactor ) ? - 1 : + 1 ;
}
return 0 ;
2008-06-11 14:52:57 +04:00
}
void Word : : Merge ( const Word & sourceWord )
{
2011-02-24 16:14:42 +03:00
for ( unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor + + ) {
const Factor * sourcefactor = sourceWord . m_factorArray [ currFactor ]
, * targetFactor = this - > m_factorArray [ currFactor ] ;
if ( targetFactor = = NULL & & sourcefactor ! = NULL ) {
m_factorArray [ currFactor ] = sourcefactor ;
}
}
2008-06-11 14:52:57 +04:00
}
std : : string Word : : GetString ( const vector < FactorType > factorType , bool endWithBlank ) const
{
2011-02-24 16:14:42 +03:00
stringstream strme ;
2011-11-18 16:07:41 +04:00
CHECK ( factorType . size ( ) < = MAX_NUM_FACTORS ) ;
2011-02-24 16:14:42 +03:00
const std : : string & factorDelimiter = StaticData : : Instance ( ) . GetFactorDelimiter ( ) ;
bool firstPass = true ;
for ( unsigned int i = 0 ; i < factorType . size ( ) ; i + + ) {
const Factor * factor = m_factorArray [ factorType [ i ] ] ;
if ( factor ! = NULL ) {
if ( firstPass ) {
firstPass = false ;
} else {
strme < < factorDelimiter ;
}
strme < < factor - > GetString ( ) ;
}
}
if ( endWithBlank ) strme < < " " ;
return strme . str ( ) ;
2008-06-11 14:52:57 +04:00
}
2013-05-29 21:16:15 +04:00
StringPiece Word : : GetString ( FactorType factorType ) const
{
2013-04-25 22:42:30 +04:00
return m_factorArray [ factorType ] - > GetString ( ) ;
2012-01-20 19:35:55 +04:00
}
2013-03-28 14:27:11 +04:00
class StrayFactorException : public util : : Exception { } ;
2010-04-08 21:16:10 +04:00
void Word : : CreateFromString ( FactorDirection direction
2011-02-24 16:14:42 +03:00
, const std : : vector < FactorType > & factorOrder
2012-10-14 20:35:58 +04:00
, const StringPiece & str
2011-02-24 16:14:42 +03:00
, bool isNonTerminal )
2010-04-08 21:16:10 +04:00
{
2011-02-24 16:14:42 +03:00
FactorCollection & factorCollection = FactorCollection : : Instance ( ) ;
2012-10-14 20:35:58 +04:00
util : : TokenIter < util : : MultiCharacter > fit ( str , StaticData : : Instance ( ) . GetFactorDelimiter ( ) ) ;
for ( size_t ind = 0 ; ind < factorOrder . size ( ) & & fit ; + + ind , + + fit ) {
m_factorArray [ factorOrder [ ind ] ] = factorCollection . AddFactor ( * fit ) ;
2011-02-24 16:14:42 +03:00
}
2013-03-28 14:27:11 +04:00
UTIL_THROW_IF ( fit , StrayFactorException , " You have configured " < < factorOrder . size ( ) < < " factors but the word " < < str < < " contains factor delimiter " < < StaticData : : Instance ( ) . GetFactorDelimiter ( ) < < " too many times. " ) ;
2011-02-24 16:14:42 +03:00
// assume term/non-term same for all factors
m_isNonTerminal = isNonTerminal ;
2010-04-08 21:16:10 +04:00
}
void Word : : CreateUnknownWord ( const Word & sourceWord )
{
2011-02-24 16:14:42 +03:00
FactorCollection & factorCollection = FactorCollection : : Instance ( ) ;
for ( unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor + + ) {
FactorType factorType = static_cast < FactorType > ( currFactor ) ;
const Factor * sourceFactor = sourceWord [ currFactor ] ;
if ( sourceFactor = = NULL )
SetFactor ( factorType , factorCollection . AddFactor ( Output , factorType , UNKNOWN_FACTOR ) ) ;
else
SetFactor ( factorType , factorCollection . AddFactor ( Output , factorType , sourceFactor - > GetString ( ) ) ) ;
}
m_isNonTerminal = sourceWord . IsNonTerminal ( ) ;
2010-04-08 21:16:10 +04:00
}
2011-02-24 16:14:42 +03:00
2013-07-04 18:13:29 +04:00
void Word : : OnlyTheseFactors ( const FactorMask & factors )
{
for ( unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor + + ) {
if ( ! factors [ currFactor ] ) {
SetFactor ( currFactor , NULL ) ;
}
}
}
2008-06-11 14:52:57 +04:00
TO_STRING_BODY ( Word ) ;
// friend
ostream & operator < < ( ostream & out , const Word & word )
2011-02-24 16:14:42 +03:00
{
stringstream strme ;
const std : : string & factorDelimiter = StaticData : : Instance ( ) . GetFactorDelimiter ( ) ;
bool firstPass = true ;
for ( unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor + + ) {
FactorType factorType = static_cast < FactorType > ( currFactor ) ;
const Factor * factor = word . GetFactor ( factorType ) ;
if ( factor ! = NULL ) {
if ( firstPass ) {
firstPass = false ;
} else {
strme < < factorDelimiter ;
}
strme < < * factor ;
}
}
out < < strme . str ( ) < < " " ;
return out ;
2008-06-11 14:52:57 +04:00
}
2008-10-09 03:51:26 +04:00
}