2012-05-07 17:59:37 +04:00
|
|
|
#pragma once
|
|
|
|
|
2011-06-22 01:52:13 +04:00
|
|
|
#include <string>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <cstring>
|
|
|
|
#include <fstream>
|
|
|
|
#include <sstream>
|
|
|
|
#include <iostream>
|
|
|
|
#include "SuffixArray.h"
|
|
|
|
#include "TargetCorpus.h"
|
|
|
|
#include "Alignment.h"
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
class Mismatch
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
typedef unsigned int INDEX;
|
|
|
|
|
|
|
|
private:
|
|
|
|
SuffixArray *m_suffixArray;
|
|
|
|
TargetCorpus *m_targetCorpus;
|
|
|
|
Alignment *m_alignment;
|
|
|
|
INDEX m_sentence_id;
|
|
|
|
INDEX m_num_alignment_points;
|
|
|
|
char m_source_length;
|
|
|
|
char m_target_length;
|
|
|
|
SuffixArray::INDEX m_source_position;
|
|
|
|
char m_source_start, m_source_end;
|
|
|
|
char m_source_unaligned[ 256 ];
|
|
|
|
char m_target_unaligned[ 256 ];
|
|
|
|
char m_unaligned;
|
|
|
|
|
|
|
|
public:
|
|
|
|
Mismatch( SuffixArray *sa, TargetCorpus *tc, Alignment *a, INDEX sentence_id, INDEX position, char source_length, char target_length, char source_start, char source_end )
|
|
|
|
:m_suffixArray(sa)
|
|
|
|
,m_targetCorpus(tc)
|
|
|
|
,m_alignment(a)
|
|
|
|
,m_sentence_id(sentence_id)
|
2012-05-07 16:45:30 +04:00
|
|
|
,m_source_length(source_length)
|
2011-06-22 01:52:13 +04:00
|
|
|
,m_target_length(target_length)
|
2012-05-07 16:45:30 +04:00
|
|
|
,m_source_position(position)
|
2011-06-22 01:52:13 +04:00
|
|
|
,m_source_start(source_start)
|
|
|
|
,m_source_end(source_end)
|
|
|
|
{
|
|
|
|
// initialize unaligned indexes
|
|
|
|
for(char i=0; i<m_source_length; i++) {
|
|
|
|
m_source_unaligned[i] = true;
|
|
|
|
}
|
|
|
|
for(char i=0; i<m_target_length; i++) {
|
|
|
|
m_target_unaligned[i] = true;
|
|
|
|
}
|
|
|
|
m_num_alignment_points =
|
|
|
|
m_alignment->GetNumberOfAlignmentPoints( sentence_id );
|
|
|
|
for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
|
|
|
|
m_source_unaligned[ m_alignment->GetSourceWord( sentence_id, ap ) ] = false;
|
|
|
|
m_target_unaligned[ m_alignment->GetTargetWord( sentence_id, ap ) ] = false;
|
|
|
|
}
|
|
|
|
m_unaligned = true;
|
|
|
|
for(char i=source_start; i<=source_end; i++) {
|
|
|
|
if (!m_source_unaligned[ i ]) {
|
|
|
|
m_unaligned = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
~Mismatch () {}
|
|
|
|
|
|
|
|
bool Unaligned() { return m_unaligned; }
|
|
|
|
void PrintClippedHTML( ostream* out, int width );
|
|
|
|
void LabelSourceMatches( char *source_annotation, char *target_annotation, char source_id, char label );
|
|
|
|
};
|