2008-06-11 14:52:57 +04:00
// $Id$
/***********************************************************************
Moses - factored phrase - based language decoder
Copyright ( C ) 2006 University of Edinburgh
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2.1 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-02-24 14:15:44 +03:00
# ifndef moses_WordsBitmap_h
# define moses_WordsBitmap_h
2008-06-11 14:52:57 +04:00
# include <limits>
# include <vector>
# include <iostream>
# include <cstring>
# include <cmath>
2009-07-23 14:29:30 +04:00
# include <cstdlib>
2008-06-11 14:52:57 +04:00
# include "TypeDef.h"
# include "WordsRange.h"
2008-10-09 03:51:26 +04:00
namespace Moses
{
2008-12-13 15:08:55 +03:00
typedef unsigned long WordsBitmapID ;
2008-10-09 03:51:26 +04:00
2008-06-11 14:52:57 +04:00
/** vector of boolean used to represent whether a word has been translated or not
*/
2011-02-24 16:14:42 +03:00
class WordsBitmap
2008-06-11 14:52:57 +04:00
{
2011-02-24 16:14:42 +03:00
friend std : : ostream & operator < < ( std : : ostream & out , const WordsBitmap & wordsBitmap ) ;
2008-06-11 14:52:57 +04:00
protected :
2011-02-24 16:14:42 +03:00
const size_t m_size ; /**< number of words in sentence */
bool * m_bitmap ; /**< ticks of words that have been done */
WordsBitmap ( ) ; // not implemented
//! set all elements to false
void Initialize ( ) {
for ( size_t pos = 0 ; pos < m_size ; pos + + ) {
m_bitmap [ pos ] = false ;
}
}
//sets elements by vector
void Initialize ( std : : vector < bool > vector ) {
size_t vector_size = vector . size ( ) ;
for ( size_t pos = 0 ; pos < m_size ; pos + + ) {
if ( pos < vector_size & & vector [ pos ] = = true ) m_bitmap [ pos ] = true ;
else m_bitmap [ pos ] = false ;
}
}
2010-05-04 01:39:23 +04:00
2008-06-11 14:52:57 +04:00
public :
2011-02-24 16:14:42 +03:00
//! create WordsBitmap of length size and initialise with vector
WordsBitmap ( size_t size , std : : vector < bool > initialize_vector )
: m_size ( size ) {
m_bitmap = ( bool * ) malloc ( sizeof ( bool ) * size ) ;
Initialize ( initialize_vector ) ;
}
//! create WordsBitmap of length size and initialise
WordsBitmap ( size_t size )
: m_size ( size ) {
m_bitmap = ( bool * ) malloc ( sizeof ( bool ) * size ) ;
Initialize ( ) ;
}
//! deep copy
WordsBitmap ( const WordsBitmap & copy )
: m_size ( copy . m_size ) {
m_bitmap = ( bool * ) malloc ( sizeof ( bool ) * m_size ) ;
for ( size_t pos = 0 ; pos < copy . m_size ; pos + + ) {
m_bitmap [ pos ] = copy . GetValue ( pos ) ;
}
}
~ WordsBitmap ( ) {
free ( m_bitmap ) ;
}
//! count of words translated
size_t GetNumWordsCovered ( ) const {
size_t count = 0 ;
for ( size_t pos = 0 ; pos < m_size ; pos + + ) {
if ( m_bitmap [ pos ] )
count + + ;
}
return count ;
}
//! position of 1st word not yet translated, or NOT_FOUND if everything already translated
size_t GetFirstGapPos ( ) const {
for ( size_t pos = 0 ; pos < m_size ; pos + + ) {
if ( ! m_bitmap [ pos ] ) {
return pos ;
}
}
// no starting pos
return NOT_FOUND ;
}
//! position of last word not yet translated, or NOT_FOUND if everything already translated
size_t GetLastGapPos ( ) const {
for ( int pos = ( int ) m_size - 1 ; pos > = 0 ; pos - - ) {
if ( ! m_bitmap [ pos ] ) {
return pos ;
}
}
// no starting pos
return NOT_FOUND ;
}
//! position of last translated word
size_t GetLastPos ( ) const {
for ( int pos = ( int ) m_size - 1 ; pos > = 0 ; pos - - ) {
if ( m_bitmap [ pos ] ) {
return pos ;
}
}
// no starting pos
return NOT_FOUND ;
}
//! whether a word has been translated at a particular position
bool GetValue ( size_t pos ) const {
return m_bitmap [ pos ] ;
}
//! set value at a particular position
void SetValue ( size_t pos , bool value ) {
m_bitmap [ pos ] = value ;
}
//! set value between 2 positions, inclusive
void SetValue ( size_t startPos , size_t endPos , bool value ) {
for ( size_t pos = startPos ; pos < = endPos ; pos + + ) {
m_bitmap [ pos ] = value ;
}
}
//! whether every word has been translated
bool IsComplete ( ) const {
return GetSize ( ) = = GetNumWordsCovered ( ) ;
}
//! whether the wordrange overlaps with any translated word in this bitmap
bool Overlap ( const WordsRange & compare ) const {
for ( size_t pos = compare . GetStartPos ( ) ; pos < = compare . GetEndPos ( ) ; pos + + ) {
if ( m_bitmap [ pos ] )
return true ;
}
return false ;
}
//! number of elements
size_t GetSize ( ) const {
return m_size ;
}
//! transitive comparison of WordsBitmap
inline int Compare ( const WordsBitmap & compare ) const {
// -1 = less than
// +1 = more than
// 0 = same
size_t thisSize = GetSize ( )
, compareSize = compare . GetSize ( ) ;
if ( thisSize ! = compareSize ) {
return ( thisSize < compareSize ) ? - 1 : 1 ;
}
return std : : memcmp ( m_bitmap , compare . m_bitmap , thisSize * sizeof ( bool ) ) ;
}
bool operator < ( const WordsBitmap & compare ) const {
return Compare ( compare ) < 0 ;
}
inline size_t GetEdgeToTheLeftOf ( size_t l ) const {
if ( l = = 0 ) return l ;
while ( l & & ! m_bitmap [ l - 1 ] ) {
- - l ;
}
return l ;
}
inline size_t GetEdgeToTheRightOf ( size_t r ) const {
if ( r + 1 = = m_size ) return r ;
while ( r + 1 < m_size & & ! m_bitmap [ r + 1 ] ) {
+ + r ;
}
return r ;
}
//! TODO - ??? no idea
int GetFutureCosts ( int lastPos ) const ;
//! converts bitmap into an integer ID: it consists of two parts: the first 16 bit are the pattern between the first gap and the last word-1, the second 16 bit are the number of filled positions. enforces a sentence length limit of 65535 and a max distortion of 16
WordsBitmapID GetID ( ) const {
2011-11-18 16:07:41 +04:00
CHECK ( m_size < ( 1 < < 16 ) ) ;
2011-02-24 16:14:42 +03:00
size_t start = GetFirstGapPos ( ) ;
if ( start = = NOT_FOUND ) start = m_size ; // nothing left
size_t end = GetLastPos ( ) ;
if ( end = = NOT_FOUND ) end = 0 ; // nothing translated yet
2011-11-18 16:07:41 +04:00
CHECK ( end < start | | end - start < = 16 ) ;
2011-02-24 16:14:42 +03:00
WordsBitmapID id = 0 ;
for ( size_t pos = end ; pos > start ; pos - - ) {
id = id * 2 + ( int ) GetValue ( pos ) ;
}
return id + ( 1 < < 16 ) * start ;
}
//! converts bitmap into an integer ID, with an additional span covered
WordsBitmapID GetIDPlus ( size_t startPos , size_t endPos ) const {
2011-11-18 16:07:41 +04:00
CHECK ( m_size < ( 1 < < 16 ) ) ;
2011-02-24 16:14:42 +03:00
size_t start = GetFirstGapPos ( ) ;
if ( start = = NOT_FOUND ) start = m_size ; // nothing left
size_t end = GetLastPos ( ) ;
if ( end = = NOT_FOUND ) end = 0 ; // nothing translated yet
if ( start = = startPos ) start = endPos + 1 ;
if ( end < endPos ) end = endPos ;
2011-11-18 16:07:41 +04:00
CHECK ( end < start | | end - start < = 16 ) ;
2011-02-24 16:14:42 +03:00
WordsBitmapID id = 0 ;
for ( size_t pos = end ; pos > start ; pos - - ) {
id = id * 2 ;
if ( GetValue ( pos ) | | ( startPos < = pos & & pos < = endPos ) )
id + + ;
}
return id + ( 1 < < 16 ) * start ;
}
TO_STRING ( ) ;
2008-06-11 14:52:57 +04:00
} ;
2011-02-24 16:14:42 +03:00
// friend
2008-06-11 14:52:57 +04:00
inline std : : ostream & operator < < ( std : : ostream & out , const WordsBitmap & wordsBitmap )
{
2011-02-24 16:14:42 +03:00
for ( size_t i = 0 ; i < wordsBitmap . m_size ; i + + ) {
out < < ( wordsBitmap . GetValue ( i ) ? 1 : 0 ) ;
}
return out ;
2008-06-11 14:52:57 +04:00
}
2008-10-09 03:51:26 +04:00
}
2010-02-24 14:15:44 +03:00
# endif