2008-06-11 14:52:57 +04:00
// $Id$
/***********************************************************************
Moses - factored phrase - based language decoder
Copyright ( C ) 2006 University of Edinburgh
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2.1 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# pragma once
# include <limits>
# include <vector>
# include <iostream>
# include <cstring>
# include <cmath>
2009-07-23 14:29:30 +04:00
# include <cstdlib>
2008-06-11 14:52:57 +04:00
# include "TypeDef.h"
# include "WordsRange.h"
2008-10-09 03:51:26 +04:00
namespace Moses
{
2008-12-13 15:08:55 +03:00
typedef unsigned long WordsBitmapID ;
2008-10-09 03:51:26 +04:00
2008-06-11 14:52:57 +04:00
/** vector of boolean used to represent whether a word has been translated or not
*/
class WordsBitmap
{
friend std : : ostream & operator < < ( std : : ostream & out , const WordsBitmap & wordsBitmap ) ;
protected :
const size_t m_size ; /**< number of words in sentence */
bool * m_bitmap ; /**< ticks of words that have been done */
WordsBitmap ( ) ; // not implemented
//! set all elements to false
void Initialize ( )
{
for ( size_t pos = 0 ; pos < m_size ; pos + + )
{
m_bitmap [ pos ] = false ;
}
}
public :
//! create WordsBitmap of length size and initialise
WordsBitmap ( size_t size )
: m_size ( size )
{
m_bitmap = ( bool * ) malloc ( sizeof ( bool ) * size ) ;
Initialize ( ) ;
}
//! deep copy
WordsBitmap ( const WordsBitmap & copy )
: m_size ( copy . m_size )
{
m_bitmap = ( bool * ) malloc ( sizeof ( bool ) * m_size ) ;
for ( size_t pos = 0 ; pos < copy . m_size ; pos + + )
{
m_bitmap [ pos ] = copy . GetValue ( pos ) ;
}
}
~ WordsBitmap ( )
{
free ( m_bitmap ) ;
}
//! count of words translated
size_t GetNumWordsCovered ( ) const
{
size_t count = 0 ;
for ( size_t pos = 0 ; pos < m_size ; pos + + )
{
if ( m_bitmap [ pos ] )
count + + ;
}
return count ;
}
//! position of 1st word not yet translated, or NOT_FOUND if everything already translated
size_t GetFirstGapPos ( ) const
{
for ( size_t pos = 0 ; pos < m_size ; pos + + )
{
if ( ! m_bitmap [ pos ] )
{
return pos ;
}
}
// no starting pos
return NOT_FOUND ;
}
//! position of last translated word
size_t GetLastPos ( ) const
{
for ( int pos = ( int ) m_size - 1 ; pos > = 0 ; pos - - )
{
if ( m_bitmap [ pos ] )
{
return pos ;
}
}
// no starting pos
return NOT_FOUND ;
}
//! whether a word has been translated at a particular position
bool GetValue ( size_t pos ) const
{
return m_bitmap [ pos ] ;
}
//! set value at a particular position
void SetValue ( size_t pos , bool value )
{
m_bitmap [ pos ] = value ;
}
//! set value between 2 positions, inclusive
void SetValue ( size_t startPos , size_t endPos , bool value )
{
for ( size_t pos = startPos ; pos < = endPos ; pos + + )
{
m_bitmap [ pos ] = value ;
}
}
//! whether every word has been translated
bool IsComplete ( ) const
{
return GetSize ( ) = = GetNumWordsCovered ( ) ;
}
//! whether the wordrange overlaps with any translated word in this bitmap
bool Overlap ( const WordsRange & compare ) const
{
for ( size_t pos = compare . GetStartPos ( ) ; pos < = compare . GetEndPos ( ) ; pos + + )
{
if ( m_bitmap [ pos ] )
return true ;
}
return false ;
}
//! number of elements
size_t GetSize ( ) const
{
return m_size ;
}
//! transitive comparison of WordsBitmap
inline int Compare ( const WordsBitmap & compare ) const
{
// -1 = less than
// +1 = more than
// 0 = same
size_t thisSize = GetSize ( )
, compareSize = compare . GetSize ( ) ;
if ( thisSize ! = compareSize )
{
return ( thisSize < compareSize ) ? - 1 : 1 ;
}
return std : : memcmp ( m_bitmap , compare . m_bitmap , thisSize * sizeof ( bool ) ) ;
}
bool operator < ( const WordsBitmap & compare ) const
{
return Compare ( compare ) < 0 ;
}
inline size_t GetEdgeToTheLeftOf ( size_t l ) const
{
if ( l = = 0 ) return l ;
while ( l & & ! m_bitmap [ l - 1 ] ) { - - l ; }
return l ;
}
inline size_t GetEdgeToTheRightOf ( size_t r ) const
{
if ( r + 1 = = m_size ) return r ;
2008-08-26 21:04:43 +04:00
while ( r + 1 < m_size & & ! m_bitmap [ r + 1 ] ) { + + r ; }
2008-06-11 14:52:57 +04:00
return r ;
}
//! TODO - ??? no idea
int GetFutureCosts ( int lastPos ) const ;
2009-01-01 21:16:54 +03:00
//! converts bitmap into an integer ID: it consists of two parts: the first 16 bit are the pattern between the first gap and the last word-1, the second 16 bit are the number of filled positions. enforces a sentence length limit of 65535 and a max distortion of 16
WordsBitmapID GetID ( ) const {
assert ( m_size < ( 1 < < 16 ) ) ;
2008-12-13 15:08:55 +03:00
2009-01-01 21:16:54 +03:00
size_t start = GetFirstGapPos ( ) ;
if ( start = = NOT_FOUND ) start = m_size ; // nothing left
2008-12-13 15:08:55 +03:00
2009-01-01 21:16:54 +03:00
size_t end = GetLastPos ( ) ;
if ( end = = NOT_FOUND ) end = 0 ; // nothing translated yet
2008-12-13 15:08:55 +03:00
2009-01-01 21:16:54 +03:00
assert ( end < start | | end - start < = 16 ) ;
WordsBitmapID id = 0 ;
for ( size_t pos = end ; pos > start ; pos - - ) {
id = id * 2 + ( int ) GetValue ( pos ) ;
}
return id + ( 1 < < 16 ) * start ;
}
//! converts bitmap into an integer ID, with an additional span covered
WordsBitmapID GetIDPlus ( size_t startPos , size_t endPos ) const {
assert ( m_size < ( 1 < < 16 ) ) ;
size_t start = GetFirstGapPos ( ) ;
if ( start = = NOT_FOUND ) start = m_size ; // nothing left
size_t end = GetLastPos ( ) ;
if ( end = = NOT_FOUND ) end = 0 ; // nothing translated yet
if ( start = = startPos ) start = endPos + 1 ;
if ( end < endPos ) end = endPos ;
assert ( end < start | | end - start < = 16 ) ;
WordsBitmapID id = 0 ;
for ( size_t pos = end ; pos > start ; pos - - ) {
id = id * 2 ;
if ( GetValue ( pos ) | | ( startPos < = pos & & pos < = endPos ) )
id + + ;
}
return id + ( 1 < < 16 ) * start ;
}
2008-12-13 15:08:55 +03:00
2009-01-01 21:16:54 +03:00
TO_STRING ( ) ;
2008-06-11 14:52:57 +04:00
} ;
// friend
inline std : : ostream & operator < < ( std : : ostream & out , const WordsBitmap & wordsBitmap )
{
for ( size_t i = 0 ; i < wordsBitmap . m_size ; i + + )
{
out < < ( wordsBitmap . GetValue ( i ) ? 1 : 0 ) ;
}
return out ;
}
2008-10-09 03:51:26 +04:00
}