Merge pull request #15 from tetsuok/fix-wcharsubscripts

Fix wcharsubscripts. ok, i'll take your word for it. HH
This commit is contained in:
Hieu Hoang 2012-05-10 04:54:28 -07:00
commit a4702ef255
7 changed files with 40 additions and 42 deletions

View File

@ -40,7 +40,7 @@ void Alignment::Create(const string& fileName)
cerr << m_size << " alignment points" << endl;
// allocate memory
m_array = (char*) calloc( sizeof( char ), m_size*2 );
m_array = (int*) calloc( sizeof(int), m_size*2 );
m_sentenceEnd = (INDEX*) calloc( sizeof( INDEX ), m_sentenceCount );
if (m_array == NULL) {
@ -121,13 +121,11 @@ vector<string> Alignment::Tokenize( const char input[] )
return token;
}
bool Alignment::PhraseAlignment( INDEX sentence, char target_length,
char source_start, char source_end,
char &target_start, char &target_end,
char &pre_null, char &post_null )
bool Alignment::PhraseAlignment( INDEX sentence, int target_length,
int source_start, int source_end,
int &target_start, int &target_end,
int &pre_null, int &post_null )
{
vector< char > alignedTargetWords;
// get index for first alignment point
INDEX sentenceStart = 0;
if (sentence > 0) {
@ -138,9 +136,9 @@ bool Alignment::PhraseAlignment( INDEX sentence, char target_length,
target_start = target_length;
target_end = 0;
for(INDEX ap = sentenceStart; ap <= m_sentenceEnd[ sentence ]; ap += 2 ) {
char source = m_array[ ap ];
int source = m_array[ ap ];
if (source >= source_start && source <= source_end ) {
char target = m_array[ ap+1 ];
int target = m_array[ ap+1 ];
if (target < target_start) target_start = target;
if (target > target_end ) target_end = target;
}
@ -151,9 +149,9 @@ bool Alignment::PhraseAlignment( INDEX sentence, char target_length,
// check consistency
for(INDEX ap = sentenceStart; ap <= m_sentenceEnd[ sentence ]; ap += 2 ) {
char target = m_array[ ap+1 ];
int target = m_array[ ap+1 ];
if (target >= target_start && target <= target_end ) {
char source = m_array[ ap ];
int source = m_array[ ap ];
if (source < source_start || source > source_end) {
return false; // alignment point out of range
}
@ -165,19 +163,19 @@ bool Alignment::PhraseAlignment( INDEX sentence, char target_length,
m_unaligned[i] = true;
}
for(INDEX ap = sentenceStart; ap <= m_sentenceEnd[ sentence ]; ap += 2 ) {
char target = m_array[ ap+1 ];
int target = m_array[ ap+1 ];
m_unaligned[ target ] = false;
}
// prior unaligned words
pre_null = 0;
for(char target = target_start-1; target >= 0 && m_unaligned[ target ]; target--) {
for(int target = target_start-1; target >= 0 && m_unaligned[ target ]; target--) {
pre_null++;
}
// post unaligned words;
post_null = 0;
for(char target = target_end+1; target < target_length && m_unaligned[ target ]; target++) {
for(int target = target_end+1; target < target_length && m_unaligned[ target ]; target++) {
post_null++;
}
return true;
@ -192,7 +190,7 @@ void Alignment::Save(const string& fileName ) const
}
fwrite( &m_size, sizeof(INDEX), 1, pFile );
fwrite( m_array, sizeof(char), m_size*2, pFile ); // corpus
fwrite( m_array, sizeof(int), m_size*2, pFile ); // corpus
fwrite( &m_sentenceCount, sizeof(INDEX), 1, pFile );
fwrite( m_sentenceEnd, sizeof(INDEX), m_sentenceCount, pFile); // sentence index
@ -211,8 +209,8 @@ void Alignment::Load(const string& fileName )
fread( &m_size, sizeof(INDEX), 1, pFile );
cerr << "alignment points in corpus: " << m_size << endl;
m_array = (char*) calloc( sizeof(char), m_size*2 );
fread( m_array, sizeof(char), m_size*2, pFile ); // corpus
m_array = (int*) calloc( sizeof(int), m_size*2 );
fread( m_array, sizeof(int), m_size*2, pFile ); // corpus
fread( &m_sentenceCount, sizeof(INDEX), 1, pFile );
cerr << "sentences in corpus: " << m_sentenceCount << endl;

View File

@ -8,7 +8,7 @@ public:
typedef unsigned int INDEX;
private:
char *m_array;
int *m_array;
INDEX *m_sentenceEnd;
INDEX m_size;
INDEX m_sentenceCount;
@ -23,10 +23,10 @@ public:
~Alignment();
void Create(const std::string& fileName );
bool PhraseAlignment( INDEX sentence, char target_length,
char source_start, char source_end,
char &target_start, char &target_end,
char &pre_null, char &post_null );
bool PhraseAlignment( INDEX sentence, int target_length,
int source_start, int source_end,
int &target_start, int &target_end,
int &pre_null, int &post_null );
void Load(const std::string& fileName );
void Save(const std::string& fileName ) const;
std::vector<std::string> Tokenize( const char input[] );
@ -38,10 +38,10 @@ public:
INDEX GetNumberOfAlignmentPoints( INDEX sentence ) const {
return ( m_sentenceEnd[ sentence ] - GetSentenceStart( sentence ) ) / 2;
}
char GetSourceWord( INDEX sentence, INDEX alignment_point ) const {
int GetSourceWord( INDEX sentence, INDEX alignment_point ) const {
return m_array[ GetSentenceStart( sentence ) + alignment_point*2 ];
}
char GetTargetWord( INDEX sentence, INDEX alignment_point ) const {
int GetTargetWord( INDEX sentence, INDEX alignment_point ) const {
return m_array[ GetSentenceStart( sentence ) + alignment_point*2 + 1 ];
}
};

View File

@ -22,7 +22,7 @@ enum {
ALIGNED = 5
};
Mismatch::Mismatch( SuffixArray *sa, TargetCorpus *tc, Alignment *a, INDEX sentence_id, INDEX position, char source_length, char target_length, char source_start, char source_end )
Mismatch::Mismatch( SuffixArray *sa, TargetCorpus *tc, Alignment *a, INDEX sentence_id, INDEX position, int source_length, int target_length, int source_start, int source_end )
:m_suffixArray(sa)
,m_targetCorpus(tc)
,m_alignment(a)
@ -58,8 +58,8 @@ Mismatch::~Mismatch () {}
void Mismatch::PrintClippedHTML( ostream* out, int width )
{
char source_annotation[256], target_annotation[256];
vector< string > label_class;
int source_annotation[256], target_annotation[256];
vector< string > label_class;
label_class.push_back( "" );
label_class.push_back( "mismatch_pre_aligned" );
label_class.push_back( "mismatch_post_aligned" );
@ -281,7 +281,7 @@ void Mismatch::PrintClippedHTML( ostream* out, int width )
*out << "</td></tr>";
}
void Mismatch::LabelSourceMatches( char *source_annotation, char *target_annotation, char source_id, char label ) {
void Mismatch::LabelSourceMatches(int *source_annotation, int *target_annotation, int source_id, int label ) {
for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
if (m_alignment->GetSourceWord( m_sentence_id, ap ) == source_id) {
source_annotation[ source_id ] = label;

View File

@ -17,11 +17,11 @@ private:
Alignment *m_alignment;
INDEX m_sentence_id;
INDEX m_num_alignment_points;
char m_source_length;
char m_target_length;
int m_source_length;
int m_target_length;
INDEX m_source_position;
char m_source_start;
char m_source_end;
int m_source_start;
int m_source_end;
bool m_source_unaligned[ 256 ];
bool m_target_unaligned[ 256 ];
bool m_unaligned;
@ -31,10 +31,10 @@ private:
void operator=(const Mismatch&);
public:
Mismatch( SuffixArray *sa, TargetCorpus *tc, Alignment *a, INDEX sentence_id, INDEX position, char source_length, char target_length, char source_start, char source_end );
Mismatch( SuffixArray *sa, TargetCorpus *tc, Alignment *a, INDEX sentence_id, INDEX position, int source_length, int target_length, int source_start, int source_end );
~Mismatch();
bool Unaligned() const { return m_unaligned; }
void PrintClippedHTML(std::ostream* out, int width );
void LabelSourceMatches( char *source_annotation, char *target_annotation, char source_id, char label );
void LabelSourceMatches(int *source_annotation, int *target_annotation, int source_id, int label );
};

View File

@ -50,16 +50,16 @@ bool PhrasePairCollection::GetCollection( const vector< string >& sourceString )
<< ", starting at word " << source_start
<< " of " << sentence_length
<< ". target sentence has " << target_length << " words.";
char target_start, target_end, pre_null, post_null;
int target_start, target_end, pre_null, post_null;
if (m_alignment->PhraseAlignment( sentence_id, target_length, source_start, source_end, target_start, target_end, pre_null, post_null)) {
cerr << " aligned to [" << (int)target_start << "," << (int)target_end << "]";
cerr << " +(" << (int)pre_null << "," << (int)post_null << ")";
bool null_boundary_words = false;
for( char pre = 0; pre <= pre_null && (pre==0||null_boundary_words); pre++ ) {
for( char post = 0; post <= post_null && (post==0||null_boundary_words); post++ ) {
for (int pre = 0; pre <= pre_null && (pre == 0 || null_boundary_words); pre++ ) {
for (int post = 0; post <= post_null && (post == 0 || null_boundary_words); post++ ) {
vector< WORD_ID > targetString;
cerr << "; ";
for( char target = target_start-pre; target <= target_end+post; target++ ) {
for (int target = target_start - pre; target <= target_end + post; target++) {
targetString.push_back( m_targetCorpus->GetWordId( sentence_id, target) );
cerr << m_targetCorpus->GetWord( sentence_id, target) << " ";
}

View File

@ -97,12 +97,12 @@ WORD TargetCorpus::GetWordFromId( const WORD_ID id ) const
return m_vcb.GetWord( id );
}
WORD TargetCorpus::GetWord( INDEX sentence, char word ) const
WORD TargetCorpus::GetWord( INDEX sentence, int word ) const
{
return m_vcb.GetWord( GetWordId( sentence, word ) );
}
WORD_ID TargetCorpus::GetWordId( INDEX sentence, char word ) const
WORD_ID TargetCorpus::GetWordId( INDEX sentence, int word ) const
{
if (sentence == 0) {
return m_array[ word ];

View File

@ -24,8 +24,8 @@ public:
void Create(const std::string& fileName );
WORD GetWordFromId( const WORD_ID id ) const;
WORD GetWord( INDEX sentence, char word ) const;
WORD_ID GetWordId( INDEX sentence, char word ) const;
WORD GetWord( INDEX sentence, int word ) const;
WORD_ID GetWordId( INDEX sentence, int word ) const;
char GetSentenceLength( INDEX sentence ) const;
void Load(const std::string& fileName );
void Save(const std::string& fileName ) const;