mosesdecoder/biconcor/PhrasePairCollection.cpp

#include "PhrasePairCollection.h"

#include <cstdlib>
#include <cstring>
#include <algorithm>

#include "Vocabulary.h"
#include "SuffixArray.h"
#include "TargetCorpus.h"
#include "Alignment.h"
#include "PhrasePair.h"
#include "Mismatch.h"

using namespace std;

PhrasePairCollection::PhrasePairCollection( SuffixArray *sa, TargetCorpus *tc, Alignment *a, int max_translation, int max_example )
  :m_suffixArray(sa)
  ,m_targetCorpus(tc)
  ,m_alignment(a)
  ,m_size(0)
  ,m_max_lookup(10000)          // maximum number of source occurrences sampled
  ,m_max_translation(max_translation)    // max number of different distinct translations returned
  ,m_max_example(max_example) // max number of examples returned for each distinct translation
{}

PhrasePairCollection::~PhrasePairCollection()
{}

int PhrasePairCollection::GetCollection( const vector< string >& sourceString )
{
  INDEX first_match, last_match;
  if (! m_suffixArray->FindMatches( sourceString, first_match, last_match )) {
    return 0;
  }
  //cerr << "\tfirst match " << first_match << endl;
  //cerr << "\tlast match " << last_match << endl;

  INDEX found = last_match - first_match +1;

  map< vector< WORD_ID >, INDEX > index;
  int real_count = 0;
  for( INDEX i=first_match; i<=last_match; i++ ) {
    int position = m_suffixArray->GetPosition( i );
    int source_start = m_suffixArray->GetWordInSentence( position );
    int source_end = source_start + sourceString.size()-1;
    INDEX sentence_id = m_suffixArray->GetSentence( position );
    int sentence_length = m_suffixArray->GetSentenceLength( sentence_id );
    int target_length = m_targetCorpus->GetSentenceLength( sentence_id );
    //cerr << "match " << (i-first_match)
    //<< " in sentence " << sentence_id
    //<< ", starting at word " << source_start
    //<< " of " << sentence_length
    //<< ". target sentence has " << target_length << " words.";
    int target_start, target_end, pre_null, post_null;
    if (m_alignment->PhraseAlignment( sentence_id, target_length, source_start, source_end, target_start, target_end, pre_null, post_null)) {
      //cerr << " aligned to [" << (int)target_start << "," << (int)target_end << "]";
      //cerr << " +(" << (int)pre_null << "," << (int)post_null << ")";
      bool null_boundary_words = false;
      for (int pre = 0; pre <= pre_null && (pre == 0 || null_boundary_words); pre++ ) {
        for (int post = 0; post <= post_null && (post == 0 || null_boundary_words); post++ ) {
          vector< WORD_ID > targetString;
          //cerr << "; ";
          for (int target = target_start - pre; target <= target_end + post; target++) {
            targetString.push_back( m_targetCorpus->GetWordId( sentence_id, target) );
            //cerr << m_targetCorpus->GetWord( sentence_id, target) << " ";
          }
          PhrasePair *phrasePair = new PhrasePair( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, target_length, position, source_start, source_end, target_start-pre, target_end+post, pre, post, pre_null-pre, post_null-post);
          // matchCollection.Add( sentence_id, )
          if (index.find( targetString ) == index.end()) {
            index[targetString] = m_collection.size();
            vector< PhrasePair* > emptyVector;
            m_collection.push_back( emptyVector );
          }
          m_collection[ index[targetString] ].push_back( phrasePair );
          m_size++;
        }
      }
    } else {
      //cerr << "mismatch " << (i-first_match)
      //		 << " in sentence " << sentence_id
      //		 << ", starting at word " << source_start
      //		 << " of " << sentence_length
      //		 << ". target sentence has " << target_length << " words.";
      Mismatch *mismatch = new Mismatch( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, position, sentence_length, target_length, source_start, source_end );
      if (mismatch->Unaligned())
        m_unaligned.push_back( mismatch );
      else
        m_mismatch.push_back( mismatch );
    }
    //cerr << endl;

    if (found > (INDEX)m_max_lookup) {
      i += found/m_max_lookup-1;
    }
    real_count++;
  }
  sort(m_collection.begin(), m_collection.end(), CompareBySize());
  return real_count;
}

void PhrasePairCollection::Print(bool pretty) const
{
  vector< vector<PhrasePair*> >::const_iterator ppWithSameTarget;
  int i=0;
  for( ppWithSameTarget = m_collection.begin(); ppWithSameTarget != m_collection.end() && i<m_max_translation; i++, ppWithSameTarget++ ) {
    (*(ppWithSameTarget->begin()))->PrintTarget( &cout );
    int count = ppWithSameTarget->size();
    cout << "(" << count << ")" << endl;
    vector< PhrasePair* >::const_iterator p = ppWithSameTarget->begin();
    for(int j=0; j<ppWithSameTarget->size() && j<m_max_example; j++, p++ ) {
      if (pretty) {
        (*p)->PrintPretty( &cout, 100 );
      } else {
        (*p)->Print( &cout );
      }
      if (ppWithSameTarget->size() > m_max_example) {
        p += ppWithSameTarget->size()/m_max_example-1;
      }
    }
  }
}

void PhrasePairCollection::PrintHTML() const
{
  int pp_target = 0;
  bool singleton = false;
  // loop over all translations
  vector< vector<PhrasePair*> >::const_iterator ppWithSameTarget;
  for( ppWithSameTarget = m_collection.begin(); ppWithSameTarget != m_collection.end() && pp_target<m_max_translation; ppWithSameTarget++, pp_target++ ) {

    int count = ppWithSameTarget->size();
    if (!singleton) {
      if (count == 1) {
        singleton = true;
        cout << "<p class=\"pp_singleton_header\">singleton"
             << (m_collection.end() - ppWithSameTarget==1?"":"s") << " ("
             << (m_collection.end() - ppWithSameTarget)
             << "/" << m_size << ")</p>";
      } else {
        cout << "<p class=\"pp_target_header\">";
        (*(ppWithSameTarget->begin()))->PrintTarget( &cout );
        cout << " (" << count << "/" << m_size << ")" << endl;
        cout << "<p><div id=\"pp_" << pp_target << "\">";
      }
      cout << "<table align=\"center\">";
    }

    vector< PhrasePair* >::const_iterator p;
    // loop over all sentences where translation occurs
    int pp=0;
    int i=0;
    for(p = ppWithSameTarget->begin(); i<10 && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
      (*p)->PrintClippedHTML( &cout, 160 );
      if (count > m_max_example) {
        p += count/m_max_example-1;
        pp += count/m_max_example-1;
      }
    }
    if (i == 10 && pp < count) {
      // extended table
      cout << "<tr><td colspan=7 align=center class=\"pp_more\" onclick=\"javascript:document.getElementById('pp_" << pp_target << "').style.display = 'none'; document.getElementById('pp_ext_" << pp_target << "').style.display = 'block';\">(more)</td></tr></table></div>";
      cout << "<div id=\"pp_ext_" << pp_target << "\" style=\"display:none;\";\">";
      cout << "<table align=\"center\">";
      for(i=0, pp=0, p = ppWithSameTarget->begin(); i<m_max_example && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
        (*p)->PrintClippedHTML( &cout, 160 );
        if (count > m_max_example) {
          p += count/m_max_example-1;
          pp += count/m_max_example-1;
        }
      }
    }
    if (!singleton) cout << "</table></div>\n";

    if (!singleton && pp_target == 9) {
      cout << "<div id=\"pp_toggle\" onclick=\"javascript:document.getElementById('pp_toggle').style.display = 'none'; document.getElementById('pp_additional').style.display = 'block';\">";
      cout << "<p class=\"pp_target_header\">(more)</p></div>";
      cout << "<div id=\"pp_additional\" style=\"display:none;\";\">";
    }
  }
  if (singleton) cout << "</table></div>\n";
  else if (pp_target > 9)	cout << "</div>";

  size_t max_mismatch = m_max_example/3;
  // unaligned phrases
  if (m_unaligned.size() > 0) {
    cout << "<p class=\"pp_singleton_header\">unaligned"
         << " (" << (m_unaligned.size()) << ")</p>";
    cout << "<table align=\"center\">";
    int step_size = 1;
    if (m_unaligned.size() > max_mismatch)
      step_size = (m_unaligned.size()+max_mismatch-1) / max_mismatch;
    for(size_t i=0; i<m_unaligned.size(); i+=step_size)
      m_unaligned[i]->PrintClippedHTML( &cout, 160 );
    cout << "</table>";
  }

  // mismatched phrases
  if (m_mismatch.size() > 0) {
    cout << "<p class=\"pp_singleton_header\">mismatched"
         << " (" << (m_mismatch.size()) << ")</p>";
    cout << "<table align=\"center\">";
    int step_size = 1;
    if (m_mismatch.size() > max_mismatch)
      step_size = (m_mismatch.size()+max_mismatch-1) / max_mismatch;
    for(size_t i=0; i<m_mismatch.size(); i+=step_size)
      m_mismatch[i]->PrintClippedHTML( &cout, 160 );
    cout << "</table>";
  }
}
improvements to web analysis, fixes to syntax wrappers git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3633 1f5c12ca-751b-0410-a591-d2e778427230 2010-10-21 13:49:27 +04:00			`#include "PhrasePairCollection.h"`
Clean up including headers; delete using namespace std in headers. 2012-05-07 18:41:18 +04:00
Modernize "C" includes in biconcor. This is one of those little chores in managing a long-lived C++ project: standard C headers like stdio.h and math.h now have their own place in the C++ standard as resp. cstdio, cmath, and so on. In this branch the #include names are updated for the biconcor/ subdirectory. C++11 adds cstdint, but to support compilation with the previous standard, that change is left for later. 2015-03-28 17:00:30 +03:00			`#include <cstdlib>`
improvements to web analysis, fixes to syntax wrappers git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3633 1f5c12ca-751b-0410-a591-d2e778427230 2010-10-21 13:49:27 +04:00			`#include <cstring>`
			`#include <algorithm>`

Clean up including headers; delete using namespace std in headers. 2012-05-07 18:41:18 +04:00			`#include "Vocabulary.h"`
			`#include "SuffixArray.h"`
			`#include "TargetCorpus.h"`
			`#include "Alignment.h"`
			`#include "PhrasePair.h"`
			`#include "Mismatch.h"`

improvements to web analysis, fixes to syntax wrappers git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3633 1f5c12ca-751b-0410-a591-d2e778427230 2010-10-21 13:49:27 +04:00			`using namespace std;`

extended display options for biconcor 2013-04-05 14:26:00 +04:00			`PhrasePairCollection::PhrasePairCollection( SuffixArray sa, TargetCorpus tc, Alignment *a, int max_translation, int max_example )`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`:m_suffixArray(sa)`
			`,m_targetCorpus(tc)`
			`,m_alignment(a)`
			`,m_size(0)`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`,m_max_lookup(10000) // maximum number of source occurrences sampled`
			`,m_max_translation(max_translation) // max number of different distinct translations returned`
			`,m_max_example(max_example) // max number of examples returned for each distinct translation`
improvements to web analysis, fixes to syntax wrappers git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3633 1f5c12ca-751b-0410-a591-d2e778427230 2010-10-21 13:49:27 +04:00			`{}`

			`PhrasePairCollection::~PhrasePairCollection()`
			`{}`

extended display options for biconcor 2013-04-05 14:26:00 +04:00			`int PhrasePairCollection::GetCollection( const vector< string >& sourceString )`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`{`
			`INDEX first_match, last_match;`
			`if (! m_suffixArray->FindMatches( sourceString, first_match, last_match )) {`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`return 0;`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`}`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`//cerr << "\tfirst match " << first_match << endl;`
			`//cerr << "\tlast match " << last_match << endl;`
improvements to web analysis, fixes to syntax wrappers git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3633 1f5c12ca-751b-0410-a591-d2e778427230 2010-10-21 13:49:27 +04:00
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`INDEX found = last_match - first_match +1;`
improvements to web analysis, fixes to syntax wrappers git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3633 1f5c12ca-751b-0410-a591-d2e778427230 2010-10-21 13:49:27 +04:00
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`map< vector< WORD_ID >, INDEX > index;`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`int real_count = 0;`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`for( INDEX i=first_match; i<=last_match; i++ ) {`
			`int position = m_suffixArray->GetPosition( i );`
			`int source_start = m_suffixArray->GetWordInSentence( position );`
			`int source_end = source_start + sourceString.size()-1;`
			`INDEX sentence_id = m_suffixArray->GetSentence( position );`
			`int sentence_length = m_suffixArray->GetSentenceLength( sentence_id );`
			`int target_length = m_targetCorpus->GetSentenceLength( sentence_id );`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`//cerr << "match " << (i-first_match)`
beautify 2013-05-29 21:16:15 +04:00			`//<< " in sentence " << sentence_id`
			`//<< ", starting at word " << source_start`
			`//<< " of " << sentence_length`
			`//<< ". target sentence has " << target_length << " words.";`
Fix warning: an array subscript is of type char (-Wchar-subscripts). According to gcc documentation, this is a common cause of error. See: http://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html 2012-05-07 21:01:23 +04:00			`int target_start, target_end, pre_null, post_null;`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`if (m_alignment->PhraseAlignment( sentence_id, target_length, source_start, source_end, target_start, target_end, pre_null, post_null)) {`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`//cerr << " aligned to [" << (int)target_start << "," << (int)target_end << "]";`
			`//cerr << " +(" << (int)pre_null << "," << (int)post_null << ")";`
beautify 2013-05-29 21:16:15 +04:00			`bool null_boundary_words = false;`
Fix warning: an array subscript is of type char (-Wchar-subscripts). According to gcc documentation, this is a common cause of error. See: http://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html 2012-05-07 21:01:23 +04:00			`for (int pre = 0; pre <= pre_null && (pre == 0 \|\| null_boundary_words); pre++ ) {`
			`for (int post = 0; post <= post_null && (post == 0 \|\| null_boundary_words); post++ ) {`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`vector< WORD_ID > targetString;`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`//cerr << "; ";`
Fix warning: an array subscript is of type char (-Wchar-subscripts). According to gcc documentation, this is a common cause of error. See: http://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html 2012-05-07 21:01:23 +04:00			`for (int target = target_start - pre; target <= target_end + post; target++) {`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`targetString.push_back( m_targetCorpus->GetWordId( sentence_id, target) );`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`//cerr << m_targetCorpus->GetWord( sentence_id, target) << " ";`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`}`
			`PhrasePair *phrasePair = new PhrasePair( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, target_length, position, source_start, source_end, target_start-pre, target_end+post, pre, post, pre_null-pre, post_null-post);`
			`// matchCollection.Add( sentence_id, )`
			`if (index.find( targetString ) == index.end()) {`
			`index[targetString] = m_collection.size();`
			`vector< PhrasePair* > emptyVector;`
			`m_collection.push_back( emptyVector );`
			`}`
			`m_collection[ index[targetString] ].push_back( phrasePair );`
			`m_size++;`
			`}`
			`}`
beautify 2013-05-29 21:16:15 +04:00			`} else {`
			`//cerr << "mismatch " << (i-first_match)`
			`// << " in sentence " << sentence_id`
			`// << ", starting at word " << source_start`
			`// << " of " << sentence_length`
			`// << ". target sentence has " << target_length << " words.";`
			`Mismatch *mismatch = new Mismatch( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, position, sentence_length, target_length, source_start, source_end );`
			`if (mismatch->Unaligned())`
			`m_unaligned.push_back( mismatch );`
			`else`
			`m_mismatch.push_back( mismatch );`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`}`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`//cerr << endl;`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00
Merged in fixes for compiler warnings regarding comparing signed and unsigned integer expressions. 2012-05-10 16:48:51 +04:00			`if (found > (INDEX)m_max_lookup) {`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`i += found/m_max_lookup-1;`
			`}`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`real_count++;`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`}`
			`sort(m_collection.begin(), m_collection.end(), CompareBySize());`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`return real_count;`
improvements to web analysis, fixes to syntax wrappers git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3633 1f5c12ca-751b-0410-a591-d2e778427230 2010-10-21 13:49:27 +04:00			`}`

extended display options for biconcor 2013-04-05 14:26:00 +04:00			`void PhrasePairCollection::Print(bool pretty) const`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`{`
Add const to member functions. Replace define macros with enum. 2012-05-07 19:58:44 +04:00			`vector< vector<PhrasePair*> >::const_iterator ppWithSameTarget;`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`int i=0;`
			`for( ppWithSameTarget = m_collection.begin(); ppWithSameTarget != m_collection.end() && i<m_max_translation; i++, ppWithSameTarget++ ) {`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`(*(ppWithSameTarget->begin()))->PrintTarget( &cout );`
			`int count = ppWithSameTarget->size();`
			`cout << "(" << count << ")" << endl;`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`vector< PhrasePair* >::const_iterator p = ppWithSameTarget->begin();`
			`for(int j=0; j<ppWithSameTarget->size() && j<m_max_example; j++, p++ ) {`
			`if (pretty) {`
			`(*p)->PrintPretty( &cout, 100 );`
beautify 2013-05-29 21:16:15 +04:00			`} else {`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`(*p)->Print( &cout );`
			`}`
			`if (ppWithSameTarget->size() > m_max_example) {`
			`p += ppWithSameTarget->size()/m_max_example-1;`
			`}`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`}`
			`}`
improvements to web analysis, fixes to syntax wrappers git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3633 1f5c12ca-751b-0410-a591-d2e778427230 2010-10-21 13:49:27 +04:00			`}`

Add const to member functions. Replace define macros with enum. 2012-05-07 19:58:44 +04:00			`void PhrasePairCollection::PrintHTML() const`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`{`
			`int pp_target = 0;`
beautify 2013-05-29 21:16:15 +04:00			`bool singleton = false;`
			`// loop over all translations`
Add const to member functions. Replace define macros with enum. 2012-05-07 19:58:44 +04:00			`vector< vector<PhrasePair*> >::const_iterator ppWithSameTarget;`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`for( ppWithSameTarget = m_collection.begin(); ppWithSameTarget != m_collection.end() && pp_target<m_max_translation; ppWithSameTarget++, pp_target++ ) {`
improvements to ems analysis git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4026 1f5c12ca-751b-0410-a591-d2e778427230 2011-06-22 01:52:13 +04:00
beautify 2013-05-29 21:16:15 +04:00			`int count = ppWithSameTarget->size();`
			`if (!singleton) {`
			`if (count == 1) {`
			`singleton = true;`
			`cout << "<p class=\"pp_singleton_header\">singleton"`
			`<< (m_collection.end() - ppWithSameTarget==1?"":"s") << " ("`
			`<< (m_collection.end() - ppWithSameTarget)`
			`<< "/" << m_size << ")</p>";`
			`} else {`
			`cout << "<p class=\"pp_target_header\">";`
			`(*(ppWithSameTarget->begin()))->PrintTarget( &cout );`
			`cout << " (" << count << "/" << m_size << ")" << endl;`
			`cout << "<p><div id=\"pp_" << pp_target << "\">";`
			`}`
			`cout << "<table align=\"center\">";`
			`}`
Add const to member functions. Replace define macros with enum. 2012-05-07 19:58:44 +04:00
			`vector< PhrasePair* >::const_iterator p;`
beautify 2013-05-29 21:16:15 +04:00			`// loop over all sentences where translation occurs`
improvements to ems analysis git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4026 1f5c12ca-751b-0410-a591-d2e778427230 2011-06-22 01:52:13 +04:00			`int pp=0;`
beautify 2013-05-29 21:16:15 +04:00			`int i=0;`
improvements to ems analysis git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4026 1f5c12ca-751b-0410-a591-d2e778427230 2011-06-22 01:52:13 +04:00			`for(p = ppWithSameTarget->begin(); i<10 && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`(*p)->PrintClippedHTML( &cout, 160 );`
extended display options for biconcor 2013-04-05 14:26:00 +04:00			`if (count > m_max_example) {`
			`p += count/m_max_example-1;`
			`pp += count/m_max_example-1;`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`}`
			`}`
beautify 2013-05-29 21:16:15 +04:00			`if (i == 10 && pp < count) {`
			`// extended table`
			`cout << "<tr><td colspan=7 align=center class=\"pp_more\" onclick=\"javascript:document.getElementById('pp_" << pp_target << "').style.display = 'none'; document.getElementById('pp_ext_" << pp_target << "').style.display = 'block';\">(more)</td></tr></table></div>";`
			`cout << "<div id=\"pp_ext_" << pp_target << "\" style=\"display:none;\";\">";`
			`cout << "<table align=\"center\">";`
			`for(i=0, pp=0, p = ppWithSameTarget->begin(); i<m_max_example && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {`
			`(*p)->PrintClippedHTML( &cout, 160 );`
			`if (count > m_max_example) {`
			`p += count/m_max_example-1;`
			`pp += count/m_max_example-1;`
			`}`
			`}`
			`}`
			`if (!singleton) cout << "</table></div>\n";`

			`if (!singleton && pp_target == 9) {`
			`cout << "<div id=\"pp_toggle\" onclick=\"javascript:document.getElementById('pp_toggle').style.display = 'none'; document.getElementById('pp_additional').style.display = 'block';\">";`
			`cout << "<p class=\"pp_target_header\">(more)</p></div>";`
			`cout << "<div id=\"pp_additional\" style=\"display:none;\";\">";`
			`}`
			`}`
			`if (singleton) cout << "</table></div>\n";`
			`else if (pp_target > 9) cout << "</div>";`

			`size_t max_mismatch = m_max_example/3;`
			`// unaligned phrases`
			`if (m_unaligned.size() > 0) {`
			`cout << "<p class=\"pp_singleton_header\">unaligned"`
			`<< " (" << (m_unaligned.size()) << ")</p>";`
			`cout << "<table align=\"center\">";`
			`int step_size = 1;`
			`if (m_unaligned.size() > max_mismatch)`
			`step_size = (m_unaligned.size()+max_mismatch-1) / max_mismatch;`
			`for(size_t i=0; i<m_unaligned.size(); i+=step_size)`
			`m_unaligned[i]->PrintClippedHTML( &cout, 160 );`
			`cout << "</table>";`
			`}`

			`// mismatched phrases`
			`if (m_mismatch.size() > 0) {`
			`cout << "<p class=\"pp_singleton_header\">mismatched"`
			`<< " (" << (m_mismatch.size()) << ")</p>";`
			`cout << "<table align=\"center\">";`
			`int step_size = 1;`
			`if (m_mismatch.size() > max_mismatch)`
			`step_size = (m_mismatch.size()+max_mismatch-1) / max_mismatch;`
			`for(size_t i=0; i<m_mismatch.size(); i+=step_size)`
			`m_mismatch[i]->PrintClippedHTML( &cout, 160 );`
			`cout << "</table>";`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3902 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:57:11 +03:00			`}`
improvements to web analysis, fixes to syntax wrappers git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3633 1f5c12ca-751b-0410-a591-d2e778427230 2010-10-21 13:49:27 +04:00			`}`