2010-10-21 13:49:27 +04:00
|
|
|
#pragma once
|
|
|
|
|
2012-05-07 17:59:37 +04:00
|
|
|
#include "Vocabulary.h"
|
2010-10-21 13:49:27 +04:00
|
|
|
|
2011-02-24 16:57:11 +03:00
|
|
|
class Alignment
|
2010-10-21 13:49:27 +04:00
|
|
|
{
|
|
|
|
public:
|
2011-02-24 16:57:11 +03:00
|
|
|
typedef unsigned int INDEX;
|
|
|
|
|
2010-10-21 13:49:27 +04:00
|
|
|
private:
|
2012-05-07 21:01:23 +04:00
|
|
|
int *m_array;
|
2011-02-24 16:57:11 +03:00
|
|
|
INDEX *m_sentenceEnd;
|
|
|
|
INDEX m_size;
|
|
|
|
INDEX m_sentenceCount;
|
2011-06-22 01:52:13 +04:00
|
|
|
char m_unaligned[ 256 ]; // here for speed (local to PhraseAlignment)
|
2010-10-21 13:49:27 +04:00
|
|
|
|
2012-05-07 20:13:31 +04:00
|
|
|
// No copying allowed.
|
|
|
|
Alignment(const Alignment&);
|
|
|
|
void operator=(const Alignment&);
|
|
|
|
|
2010-10-21 13:49:27 +04:00
|
|
|
public:
|
2012-05-07 18:26:32 +04:00
|
|
|
Alignment();
|
2011-02-24 16:57:11 +03:00
|
|
|
~Alignment();
|
2010-10-21 13:49:27 +04:00
|
|
|
|
2012-05-07 18:26:32 +04:00
|
|
|
void Create(const std::string& fileName );
|
2012-05-07 21:01:23 +04:00
|
|
|
bool PhraseAlignment( INDEX sentence, int target_length,
|
|
|
|
int source_start, int source_end,
|
|
|
|
int &target_start, int &target_end,
|
|
|
|
int &pre_null, int &post_null );
|
2012-05-07 18:26:32 +04:00
|
|
|
void Load(const std::string& fileName );
|
2012-05-07 19:58:44 +04:00
|
|
|
void Save(const std::string& fileName ) const;
|
2012-05-07 18:26:32 +04:00
|
|
|
std::vector<std::string> Tokenize( const char input[] );
|
2011-06-22 01:52:13 +04:00
|
|
|
|
2012-05-07 19:58:44 +04:00
|
|
|
INDEX GetSentenceStart( INDEX sentence ) const {
|
|
|
|
if (sentence == 0) return 0;
|
|
|
|
return m_sentenceEnd[ sentence-1 ] + 2;
|
|
|
|
}
|
|
|
|
INDEX GetNumberOfAlignmentPoints( INDEX sentence ) const {
|
|
|
|
return ( m_sentenceEnd[ sentence ] - GetSentenceStart( sentence ) ) / 2;
|
|
|
|
}
|
2012-05-07 21:01:23 +04:00
|
|
|
int GetSourceWord( INDEX sentence, INDEX alignment_point ) const {
|
2012-05-07 19:58:44 +04:00
|
|
|
return m_array[ GetSentenceStart( sentence ) + alignment_point*2 ];
|
|
|
|
}
|
2012-05-07 21:01:23 +04:00
|
|
|
int GetTargetWord( INDEX sentence, INDEX alignment_point ) const {
|
2012-05-07 19:58:44 +04:00
|
|
|
return m_array[ GetSentenceStart( sentence ) + alignment_point*2 + 1 ];
|
|
|
|
}
|
2010-10-21 13:49:27 +04:00
|
|
|
};
|