improvements to ems analysis

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4026 1f5c12ca-751b-0410-a591-d2e778427230
2024-10-26 19:37:58 +03:00 · 2011-06-21 21:52:13 +00:00 · 2011-06-21 21:52:13 +00:00 · 6acd6a8684
commit 6acd6a8684
parent 2cdc39f63f
19 changed files with 1142 additions and 312 deletions
--- a/scripts/ems/biconcor/Alignment.h
+++ b/scripts/ems/biconcor/Alignment.h
@ -14,7 +14,7 @@ private:
  INDEX *m_sentenceEnd;
  INDEX m_size;
  INDEX m_sentenceCount;
-  char m_unaligned[ 256 ];
+  char m_unaligned[ 256 ]; // here for speed (local to PhraseAlignment)

 public:
  ~Alignment();
@ -27,4 +27,18 @@ public:
  void Load( string fileName );
  void Save( string fileName );
  vector<string> Tokenize( const char input[] );
+
+	INDEX GetSentenceStart( INDEX sentence ) {
+		if (sentence == 0) return 0;
+		return m_sentenceEnd[ sentence-1 ] + 2;
+	}
+	INDEX GetNumberOfAlignmentPoints( INDEX sentence ) {
+		return ( m_sentenceEnd[ sentence ] - GetSentenceStart( sentence ) ) / 2;
+	}
+	char GetSourceWord( INDEX sentence, INDEX alignment_point ) {
+		return m_array[ GetSentenceStart( sentence ) + alignment_point*2 ];
+	}
+	char GetTargetWord( INDEX sentence, INDEX alignment_point ) {
+		return m_array[ GetSentenceStart( sentence ) + alignment_point*2 + 1 ];
+	}
 };
--- a/scripts/ems/biconcor/Makefile
+++ b/scripts/ems/biconcor/Makefile
@ -6,5 +6,5 @@ clean:
 .cpp.o:
 	g++ -O6 -g -c $<

-biconcor: Vocabulary.o SuffixArray.o TargetCorpus.o Alignment.o PhrasePair.o PhrasePairCollection.o biconcor.o
-	g++ Vocabulary.o SuffixArray.o TargetCorpus.o Alignment.o PhrasePair.o PhrasePairCollection.o biconcor.o -o biconcor
+biconcor: Vocabulary.o SuffixArray.o TargetCorpus.o Alignment.o Mismatch.o PhrasePair.o PhrasePairCollection.o biconcor.o base64.o
+	g++ Vocabulary.o SuffixArray.o TargetCorpus.o Alignment.o Mismatch.o PhrasePair.o PhrasePairCollection.o biconcor.o base64.o -o biconcor
--- a/scripts/ems/biconcor/Mismatch.cpp
+++ b/scripts/ems/biconcor/Mismatch.cpp
@ -0,0 +1,246 @@
+#include "Mismatch.h"
+#include "Vocabulary.h"
+
+using namespace std;
+
+#define UNANNOTATED 0
+#define PRE_ALIGNED 1
+#define POST_ALIGNED 2
+#define UNALIGNED 3
+#define MISALIGNED 4
+#define ALIGNED 5
+
+
+void Mismatch::PrintClippedHTML( ostream* out, int width )
+{
+	char source_annotation[256], target_annotation[256];
+	vector< string > label_class;
+	label_class.push_back( "" );
+	label_class.push_back( "mismatch_pre_aligned" );
+	label_class.push_back( "mismatch_post_aligned" );
+	label_class.push_back( "null_aligned" );
+	label_class.push_back( "mismatch_misaligned" );
+	label_class.push_back( "mismatch_aligned" );
+
+	for(int i=0; i<m_source_length;i++) source_annotation[i] = UNANNOTATED;
+	for(int i=0; i<m_target_length;i++) target_annotation[i] = UNANNOTATED;
+	
+	if (m_unaligned) {
+		// find alignment points for prior and next word(s) and
+		// center target phrase around those.
+		bool found_aligned = false;
+		for(int i=1; i<m_source_length && !found_aligned; i++) {
+			if (m_source_start-i >= 0) {
+				int word_id =  m_source_start-i;
+				source_annotation[ word_id ] = UNALIGNED;
+				if (!m_source_unaligned[ word_id ]) {
+					found_aligned = true;
+					LabelSourceMatches( source_annotation, target_annotation, word_id, PRE_ALIGNED );
+				}
+			}
+
+			if (m_source_end+i < m_source_length) {
+				int word_id = m_source_end+i;
+				source_annotation[ word_id ] = UNALIGNED;
+				if (!m_source_unaligned[ word_id ]) {
+					found_aligned = true;
+					LabelSourceMatches( source_annotation, target_annotation, word_id, POST_ALIGNED );
+				}
+			}
+		}
+		
+	}
+	// misalignment
+	else {
+		// label aligned output words
+		for(int i=m_source_start; i<=m_source_end; i++)
+			LabelSourceMatches( source_annotation, target_annotation, i, ALIGNED );
+
+		// find first and last
+		int target_start = -1;
+		int target_end;
+		for(int i=0; i<m_target_length; i++)
+			if (target_annotation[i] == ALIGNED) {
+				if (target_start == -1)
+					target_start = i;
+				target_end = i;
+			}
+		// go over all enclosed target words
+		for(int i=target_start; i<=target_end; i++) {
+			// label other target words as unaligned or misaligned
+			if (m_target_unaligned[ i ])
+				target_annotation[ i ] = UNALIGNED;
+			else {
+				if (target_annotation[ i ] != ALIGNED)
+					target_annotation[ i ] = MISALIGNED;
+				// loop over aligned source words
+				for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
+					if (m_alignment->GetTargetWord( m_sentence_id, ap ) == i) {
+						int source_word = m_alignment->GetSourceWord( m_sentence_id, ap );
+						// if not part of the source phrase -> also misaligned
+						if (source_word < m_source_start || source_word > m_source_end)
+							source_annotation[ source_word ] = MISALIGNED;
+					}
+				}
+			}						
+		}
+		// closure
+		bool change = true;
+		while(change) {
+			change = false;
+			for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
+				int source_word = m_alignment->GetSourceWord( m_sentence_id, ap );
+				int target_word = m_alignment->GetTargetWord( m_sentence_id, ap );
+				if (source_annotation[source_word] != UNANNOTATED &&
+						target_annotation[target_word] == UNANNOTATED) {
+					target_annotation[target_word] = MISALIGNED;
+					change = true;
+				}
+				if (source_annotation[source_word] == UNANNOTATED &&
+						target_annotation[target_word] != UNANNOTATED) {
+					source_annotation[source_word] = MISALIGNED;
+					change = true;
+				}
+			}
+		}
+	}
+	
+	// print source
+	// shorten source context if too long
+  int sentence_start = m_source_position - m_source_start;
+	int context_space = width/2;
+	for(int i=m_source_start;i<=m_source_end;i++)
+		context_space -= m_suffixArray->GetWord( sentence_start + i ).size() + 1;
+	context_space /= 2;
+
+	int remaining = context_space;
+	int start_word = m_source_start;
+	for(;start_word>0 && remaining>0; start_word--)
+		remaining -= m_suffixArray->GetWord( sentence_start + start_word-1 ).size() + 1;
+	if (remaining<0 || start_word == -1) start_word++;
+
+	remaining = context_space;
+	int end_word = m_source_end;
+	for(;end_word<m_source_length && remaining>0; end_word++)
+		remaining -= m_suffixArray->GetWord( sentence_start + end_word ).size() + 1;
+	end_word--;
+
+	// output with markup
+	*out << "<tr><td class=\"pp_source_left\">";
+	char current_label = UNANNOTATED;
+	if (start_word>0) {
+		current_label = source_annotation[start_word-1];
+		*out << "... ";
+	}
+	for(int i=start_word; i<=end_word; i++) {
+		// change to phrase block
+		if (i == m_source_start) {
+			if (current_label != UNANNOTATED && i!=start_word) 
+				*out << "</span>";				
+			*out << "</td><td class=\"pp_source\">";
+			current_label = UNANNOTATED;
+		}
+
+		// change to labeled word 
+		else if (source_annotation[i] != current_label &&
+						 source_annotation[i] != ALIGNED) {
+			if (current_label != UNANNOTATED && i!=start_word) 
+				*out << "</span>";
+			if (source_annotation[i] != UNANNOTATED)
+				*out << "<span class=\""
+						 << label_class[ source_annotation[i] ]
+						 << "\">";
+			current_label = source_annotation[i];
+		}
+
+		// output word
+		*out << m_suffixArray->GetWord( sentence_start + i ) << " ";
+
+		// change to right context block
+		if (i == m_source_end) {
+			*out << "</td><td class=\"pp_source_right\">";
+			current_label = UNANNOTATED;
+		}
+	}
+
+	if (current_label != UNANNOTATED && end_word>m_source_end)
+		*out << "</span>";
+	if (end_word<m_source_length-1)
+		*out << "... ";
+
+	// print target
+	// shorten target context if too long
+	int target_start = -1;
+	int target_end;
+	for(int i=0; i<m_target_length; i++)
+		if (target_annotation[i] != UNANNOTATED) {
+			if (target_start == -1) 
+				target_start = i;
+			target_end = i;
+		}
+
+	context_space = width/2;
+	for(int i=target_start;i<=target_end;i++)
+		context_space -= m_targetCorpus->GetWord( m_sentence_id, i ).size() + 1;
+	while (context_space < 0) { // shorten matched part, if too long
+		context_space += 
+			m_targetCorpus->GetWord( m_sentence_id, target_start ).size() +
+			m_targetCorpus->GetWord( m_sentence_id, target_end ).size() + 2;
+		target_start++;
+		target_end--;
+	}
+	context_space /= 2;
+
+	remaining = context_space;
+	start_word = target_start;
+	for(;start_word>0 && remaining>0; start_word--) {
+		//cerr << "remaining: " << remaining << ", start_word: " << start_word << endl;
+		remaining -= m_targetCorpus->GetWord( m_sentence_id, start_word-1 ).size() + 1;
+	}
+	if (remaining<0 || start_word == -1) start_word++;
+
+	remaining = context_space;
+	end_word = target_end;
+	for(;end_word<m_target_length && remaining>0; end_word++) {
+		//cerr << "remaining: " << remaining << ", end_word: " << end_word << endl;
+		remaining -= m_targetCorpus->GetWord( m_sentence_id, end_word ).size() + 1;
+	}
+	end_word--;
+
+	// output with markup
+	*out << "</td><td class=\"mismatch_target\">";
+	current_label = UNANNOTATED;
+	if (start_word>0) {
+		current_label = target_annotation[start_word-1];
+		*out << "... ";
+	}
+	for(int i=start_word; i<=end_word; i++) {
+		if (target_annotation[i] != current_label) {
+			if (current_label != UNANNOTATED && i!=start_word) 
+				*out << "</span>";
+			if (target_annotation[i] != UNANNOTATED)
+				*out << "<span class=\""
+						 << label_class[ target_annotation[i] ]
+						 << "\">";
+			current_label = target_annotation[i];
+		}
+
+		// output word
+		*out << m_targetCorpus->GetWord( m_sentence_id, i ) << " ";
+	}
+
+	if (current_label != UNANNOTATED && end_word>target_end)
+		*out << "</span>";
+	if (end_word<m_target_length-1)
+		*out << "... ";
+	*out << "</td></tr>";
+}
+
+void Mismatch::LabelSourceMatches( char *source_annotation, char *target_annotation, char source_id, char label ) {
+	for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
+		if (m_alignment->GetSourceWord( m_sentence_id, ap ) == source_id) {
+			source_annotation[ source_id ] = label;
+			target_annotation[ m_alignment->GetTargetWord( m_sentence_id, ap ) ] = label;
+		}
+	}
+}
--- a/scripts/ems/biconcor/Mismatch.h
+++ b/scripts/ems/biconcor/Mismatch.h
@ -0,0 +1,70 @@
+#include <string>
+#include <stdlib.h>
+#include <cstring>
+#include <fstream>
+#include <sstream>
+#include <iostream>
+#include "SuffixArray.h"
+#include "TargetCorpus.h"
+#include "Alignment.h"
+#pragma once
+
+using namespace std;
+
+class Mismatch
+{
+public:
+  typedef unsigned int INDEX;
+
+private:
+  SuffixArray *m_suffixArray;
+  TargetCorpus *m_targetCorpus;
+  Alignment *m_alignment;
+  INDEX m_sentence_id;
+	INDEX m_num_alignment_points;
+	char m_source_length;
+  char m_target_length;
+  SuffixArray::INDEX m_source_position;
+  char m_source_start, m_source_end;
+	char m_source_unaligned[ 256 ];
+	char m_target_unaligned[ 256 ];
+	char m_unaligned;
+
+public:
+  Mismatch( SuffixArray *sa, TargetCorpus *tc, Alignment *a, INDEX sentence_id, INDEX position, char source_length, char target_length, char source_start, char source_end )
+    :m_suffixArray(sa)
+    ,m_targetCorpus(tc)
+    ,m_alignment(a)
+    ,m_sentence_id(sentence_id)
+    ,m_source_position(position)
+		,m_source_length(source_length)
+    ,m_target_length(target_length)
+    ,m_source_start(source_start)
+    ,m_source_end(source_end)
+  {
+		// initialize unaligned indexes
+		for(char i=0; i<m_source_length; i++) {
+			m_source_unaligned[i] = true;
+		}
+		for(char i=0; i<m_target_length; i++) {
+			m_target_unaligned[i] = true;
+		}
+		m_num_alignment_points = 
+			m_alignment->GetNumberOfAlignmentPoints( sentence_id );
+		for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
+			m_source_unaligned[ m_alignment->GetSourceWord( sentence_id, ap ) ] = false;
+			m_target_unaligned[ m_alignment->GetTargetWord( sentence_id, ap ) ] = false;
+		}
+		m_unaligned = true;
+		for(char i=source_start; i<=source_end; i++) {
+			if (!m_source_unaligned[ i ]) {
+				m_unaligned = false;
+			}
+		}
+	}
+  ~Mismatch () {}
+
+	bool Unaligned() { return m_unaligned; }
+  void PrintClippedHTML( ostream* out, int width );
+	void LabelSourceMatches( char *source_annotation, char *target_annotation, char source_id, char label );
+};
--- a/scripts/ems/biconcor/PhrasePair.cpp
+++ b/scripts/ems/biconcor/PhrasePair.cpp
@ -145,14 +145,27 @@ void PhrasePair::PrintClippedHTML( ostream* out, int width )
  int source_pre_width = (source_width-source.size())/2;
  int source_post_width = (source_width-source.size()+1)/2;

+	// if phrase is too long, don't show any context
  if (source.size() > width) {
    source_pre_width = 0;
    source_post_width = 0;
  }
-  if (source_pre.size()>source_pre_width)
+	// too long -> truncate and add "..."
+  if (source_pre.size()>source_pre_width) {
+		// first skip up to a space
+		while(source_pre_width>0 &&
+					source_pre.substr(source_pre.size()-source_pre_width,1) != " ") {
+			source_pre_width--;
+		}
    source_pre = "..." + source_pre.substr( source_pre.size()-source_pre_width, source_pre_width );
-  if (source_post.size() > source_post_width)
+	}
+  if (source_post.size() > source_post_width) {
+		while(source_post_width>0 &&
+					source_post.substr(source_post_width-1,1) != " ") {
+			source_post_width--;
+		}
    source_post = source_post.substr( 0, source_post_width ) + "...";
+	}

  *out << "<tr><td class=\"pp_source_left\">"
       << source_pre
@ -167,8 +180,13 @@ void PhrasePair::PrintClippedHTML( ostream* out, int width )
  string target_pre = "";
  string target = "";
  string target_post = "";
+	int target_pre_null_width = 0;
+	int target_post_null_width = 0;
  for( char i=0; i<m_target_start; i++ ) {
-    target_pre += " " + m_targetCorpus->GetWord( m_sentence_id, i);
+		WORD word = m_targetCorpus->GetWord( m_sentence_id, i);
+    target_pre += " " + word;
+		if (i >= m_target_start-m_pre_null)
+			target_pre_null_width += word.size() + 1;
  }
  for( char i=m_target_start; i<=m_target_end; i++ ) {
    if (i>m_target_start) target += " ";
@ -176,7 +194,11 @@ void PhrasePair::PrintClippedHTML( ostream* out, int width )
  }
  for( char i=m_target_end+1; i<m_target_length; i++ ) {
    if (i>m_target_end+1) target_post += " ";
-    target_post += m_targetCorpus->GetWord( m_sentence_id, i);
+		WORD word = m_targetCorpus->GetWord( m_sentence_id, i);
+    target_post += word;
+		if (i-(m_target_end+1) < m_post_null) {
+			target_post_null_width += word.size() + 1;
+		}
  }

  int target_pre_width = (target_width-target.size())/2;
@ -186,10 +208,47 @@ void PhrasePair::PrintClippedHTML( ostream* out, int width )
    target_pre_width = 0;
    target_post_width = 0;
  }
-  if (target_pre.size() > target_pre_width)
+
+  if (target_pre.size() < target_pre_width) 
+		target_pre_width = target_pre.size();
+	else {
+		while(target_pre_width>0 &&
+					target_pre.substr(target_pre.size()-target_pre_width,1) != " ") {
+			target_pre_width--;
+		}
    target_pre = "..." + target_pre.substr( target_pre.size()-target_pre_width, target_pre_width );
-  if (target_post.size() > target_post_width)
-    target_post = target_post.substr( 0, target_post_width ) + "...";
+	}
+
+	if (target_post.size() < target_post_width) {
+		target_post_width = target_post.size();
+	}
+	else {
+		while(target_post_width>0 &&
+					target_post.substr(target_post_width-1,1) != " ") {
+			target_post_width--;
+		}
+		target_post = target_post.substr( 0, target_post_width ) + "...";
+	}
+
+	if (m_pre_null) {
+		//cerr << endl << "target_pre_width=" << target_pre_width << ", target_pre_null_width=" << target_pre_null_width << ", target_pre.size()=" << target_pre.size() << endl;
+		if (target_pre_width < target_pre.size())
+			target_pre_null_width -= target_pre.size()-target_pre_width;
+		target_pre = target_pre.substr(0,target_pre_width-target_pre_null_width) 
+			+ "<span class=\"null_aligned\">"
+			+ target_pre.substr(target_pre_width-target_pre_null_width)
+			+ "</span>";
+	}
+	if (m_post_null) {
+		//cerr << endl << "target_post_width=" << target_post_width << ", target_post_null_width=" << target_post_null_width << ", target_post.size()=" << target_post.size() << endl;
+		if (target_post_null_width>target_post.size()) {
+			target_post_null_width = target_post.size();
+		}
+		target_post = "<span class=\"null_aligned\">"
+			+ target_post.substr(0,target_post_null_width) 
+			+ "</span>"
+			+ target_post.substr(target_post_null_width);
+	}

  *out << "<td class=\"pp_target_left\">"
       << target_pre
--- a/scripts/ems/biconcor/PhrasePairCollection.cpp
+++ b/scripts/ems/biconcor/PhrasePairCollection.cpp
@ -47,8 +47,9 @@ bool PhrasePairCollection::GetCollection( const vector< string > sourceString )
    if (m_alignment->PhraseAlignment( sentence_id, target_length, source_start, source_end, target_start, target_end, pre_null, post_null)) {
      cerr << " aligned to [" << (int)target_start << "," << (int)target_end << "]";
      cerr << " +(" << (int)pre_null << "," << (int)post_null << ")";
-      for( char pre = 0; pre <= pre_null; pre++ ) {
-        for( char post = 0; post <= post_null; post++ ) {
+			bool null_boundary_words = false;
+      for( char pre = 0; pre <= pre_null && (pre==0||null_boundary_words); pre++ ) {
+        for( char post = 0; post <= post_null && (post==0||null_boundary_words); post++ ) {
          vector< WORD_ID > targetString;
          cerr << "; ";
          for( char target = target_start-pre; target <= target_end+post; target++ ) {
@ -67,6 +68,18 @@ bool PhrasePairCollection::GetCollection( const vector< string > sourceString )
        }
      }
    }
+		else {
+			cerr << "mismatch " << (i-first_match)
+					 << " in sentence " << sentence_id
+					 << ", starting at word " << source_start
+					 << " of " << sentence_length
+					 << ". target sentence has " << target_length << " words.";
+			Mismatch *mismatch = new Mismatch( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, position, sentence_length, target_length, source_start, source_end );
+			if (mismatch->Unaligned())
+				m_unaligned.push_back( mismatch );
+			else
+				m_mismatch.push_back( mismatch );
+		}
    cerr << endl;

    if (found > m_max_lookup) {
@ -92,23 +105,89 @@ void PhrasePairCollection::Print()

 void PhrasePairCollection::PrintHTML()
 {
-  vector< vector<PhrasePair*> >::iterator ppWithSameTarget;
  int pp_target = 0;
+	bool singleton = false;
+	// loop over all translations
+  vector< vector<PhrasePair*> >::iterator ppWithSameTarget;
  for( ppWithSameTarget = m_collection.begin(); ppWithSameTarget != m_collection.end() && pp_target<m_max_pp_target; ppWithSameTarget++, pp_target++ ) {
-    cout << "<p class=\"pp_target_header\">";
-    (*(ppWithSameTarget->begin()))->PrintTarget( &cout );
-    int count = ppWithSameTarget->size();
-    cout << "(" << count << "/" << m_size << ")" << endl;
-    cout << "<p><table align=\"center\">";
+
+		int count = ppWithSameTarget->size();
+		if (!singleton) {
+			if (count == 1) {
+				singleton = true;
+				cout << "<p class=\"pp_singleton_header\">singleton" 
+						 << (m_collection.end() - ppWithSameTarget==1?"":"s") << " ("
+						 << (m_collection.end() - ppWithSameTarget)
+						 << "/" << m_size << ")</p>";
+			}
+			else {
+				cout << "<p class=\"pp_target_header\">";
+				(*(ppWithSameTarget->begin()))->PrintTarget( &cout );
+				cout << " (" << count << "/" << m_size << ")" << endl;
+				cout << "<p><div id=\"pp_" << pp_target << "\">";
+			}
+			cout << "<table align=\"center\">";
+		}
+		
    vector< PhrasePair* >::iterator p;
-    int pp = 0;
-    for(p = ppWithSameTarget->begin(); pp<count && p != ppWithSameTarget->end(); p++, pp++ ) {
+		// loop over all sentences where translation occurs
+    int pp=0;
+		int i=0;
+    for(p = ppWithSameTarget->begin(); i<10 && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
      (*p)->PrintClippedHTML( &cout, 160 );
      if (count > m_max_pp) {
        p += count/m_max_pp-1;
        pp += count/m_max_pp-1;
      }
    }
-    cout << "</table>\n";
+		if (i == 10 && pp < count) {			
+			// extended table
+			cout << "<tr><td colspan=7 align=center class=\"pp_more\" onclick=\"javascript:document.getElementById('pp_" << pp_target << "').style.display = 'none'; document.getElementById('pp_ext_" << pp_target << "').style.display = 'block';\">(more)</td></tr></table></div>";
+			cout << "<div id=\"pp_ext_" << pp_target << "\" style=\"display:none;\";\">";
+			cout << "<table align=\"center\">";
+			for(i=0, pp=0, p = ppWithSameTarget->begin(); i<m_max_pp && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
+				(*p)->PrintClippedHTML( &cout, 160 );
+				if (count > m_max_pp) {
+					p += count/m_max_pp-1;
+					pp += count/m_max_pp-1;
+				}
+			}
+		}
+		if (!singleton) cout << "</table></div>\n";
+		
+		if (!singleton && pp_target == 9) {
+			cout << "<div id=\"pp_toggle\" onclick=\"javascript:document.getElementById('pp_toggle').style.display = 'none'; document.getElementById('pp_additional').style.display = 'block';\">";
+			cout << "<p class=\"pp_target_header\">(more)</p></div>";
+			cout << "<div id=\"pp_additional\" style=\"display:none;\";\">";
+		}
  }
+	if (singleton) cout << "</table></div>\n";
+	else if (pp_target > 9)	cout << "</div>";
+
+	int max_mismatch = m_max_pp/3;
+	// unaligned phrases
+	if (m_unaligned.size() > 0) {
+		cout << "<p class=\"pp_singleton_header\">unaligned" 
+				 << " (" << (m_unaligned.size()) << ")</p>";
+		cout << "<table align=\"center\">";
+		int step_size = 1;
+		if (m_unaligned.size() > max_mismatch)
+			step_size = (m_unaligned.size()+max_mismatch-1) / max_mismatch;
+		for(int i=0;i<m_unaligned.size();i+=step_size)
+			m_unaligned[i]->PrintClippedHTML( &cout, 160 );
+		cout << "</table>";
+	}
+
+	// mismatched phrases
+	if (m_mismatch.size() > 0) {
+		cout << "<p class=\"pp_singleton_header\">mismatched" 
+				 << " (" << (m_mismatch.size()) << ")</p>";
+		cout << "<table align=\"center\">";
+		int step_size = 1;
+		if (m_mismatch.size() > max_mismatch)
+			step_size = (m_mismatch.size()+max_mismatch-1) / max_mismatch;
+		for(int i=0;i<m_mismatch.size();i+=step_size)
+			m_mismatch[i]->PrintClippedHTML( &cout, 160 );
+		cout << "</table>";
+	}	
 }
--- a/scripts/ems/biconcor/PhrasePairCollection.h
+++ b/scripts/ems/biconcor/PhrasePairCollection.h
@ -3,6 +3,7 @@
 #include "TargetCorpus.h"
 #include "Alignment.h"
 #include "PhrasePair.h"
+#include "Mismatch.h"

 #pragma once

@ -16,6 +17,7 @@ private:
  TargetCorpus *m_targetCorpus;
  Alignment *m_alignment;
  vector< vector<PhrasePair*> > m_collection;
+	vector< Mismatch* > m_mismatch, m_unaligned;
  int m_size;
  int m_max_lookup;
  int m_max_pp_target;
--- a/scripts/ems/biconcor/base64.cpp
+++ b/scripts/ems/biconcor/base64.cpp
@ -0,0 +1,123 @@
+/* 
+   base64.cpp and base64.h
+
+   Copyright (C) 2004-2008 René Nyffenegger
+
+   This source code is provided 'as-is', without any express or implied
+   warranty. In no event will the author be held liable for any damages
+   arising from the use of this software.
+
+   Permission is granted to anyone to use this software for any purpose,
+   including commercial applications, and to alter it and redistribute it
+   freely, subject to the following restrictions:
+
+   1. The origin of this source code must not be misrepresented; you must not
+      claim that you wrote the original source code. If you use this source code
+      in a product, an acknowledgment in the product documentation would be
+      appreciated but is not required.
+
+   2. Altered source versions must be plainly marked as such, and must not be
+      misrepresented as being the original source code.
+
+   3. This notice may not be removed or altered from any source distribution.
+
+   René Nyffenegger rene.nyffenegger@adp-gmbh.ch
+
+*/
+
+#include "base64.h"
+#include <iostream>
+
+static const std::string base64_chars = 
+             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+             "abcdefghijklmnopqrstuvwxyz"
+             "0123456789+/";
+
+
+static inline bool is_base64(unsigned char c) {
+  return (isalnum(c) || (c == '+') || (c == '/'));
+}
+
+std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) {
+  std::string ret;
+  int i = 0;
+  int j = 0;
+  unsigned char char_array_3[3];
+  unsigned char char_array_4[4];
+
+  while (in_len--) {
+    char_array_3[i++] = *(bytes_to_encode++);
+    if (i == 3) {
+      char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
+      char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
+      char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
+      char_array_4[3] = char_array_3[2] & 0x3f;
+
+      for(i = 0; (i <4) ; i++)
+        ret += base64_chars[char_array_4[i]];
+      i = 0;
+    }
+  }
+
+  if (i)
+  {
+    for(j = i; j < 3; j++)
+      char_array_3[j] = '\0';
+
+    char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
+    char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
+    char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
+    char_array_4[3] = char_array_3[2] & 0x3f;
+
+    for (j = 0; (j < i + 1); j++)
+      ret += base64_chars[char_array_4[j]];
+
+    while((i++ < 3))
+      ret += '=';
+
+  }
+
+  return ret;
+
+}
+
+std::string base64_decode(std::string const& encoded_string) {
+  int in_len = encoded_string.size();
+  int i = 0;
+  int j = 0;
+  int in_ = 0;
+  unsigned char char_array_4[4], char_array_3[3];
+  std::string ret;
+
+  while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
+    char_array_4[i++] = encoded_string[in_]; in_++;
+    if (i ==4) {
+      for (i = 0; i <4; i++)
+        char_array_4[i] = base64_chars.find(char_array_4[i]);
+
+      char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+      char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+      char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+
+      for (i = 0; (i < 3); i++)
+        ret += char_array_3[i];
+      i = 0;
+    }
+  }
+
+  if (i) {
+    for (j = i; j <4; j++)
+      char_array_4[j] = 0;
+
+    for (j = 0; j <4; j++)
+      char_array_4[j] = base64_chars.find(char_array_4[j]);
+
+    char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+    char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+    char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+
+    for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
+  }
+
+  return ret;
+}
--- a/scripts/ems/biconcor/base64.h
+++ b/scripts/ems/biconcor/base64.h
@ -0,0 +1,4 @@
+#include <string>
+
+std::string base64_encode(unsigned char const* , unsigned int len);
+std::string base64_decode(std::string const& s);
--- a/scripts/ems/biconcor/biconcor.cpp
+++ b/scripts/ems/biconcor/biconcor.cpp
@ -3,6 +3,7 @@
 #include "Alignment.h"
 #include "PhrasePairCollection.h"
 #include <getopt.h>
+#include "base64.h"

 using namespace std;

@ -32,7 +33,7 @@ int main(int argc, char* argv[])
      {0, 0, 0, 0}
    };
    int option_index = 0;
-    int c = getopt_long (argc, argv, "l:s:c:q:t:a:h", long_options, &option_index);
+    int c = getopt_long (argc, argv, "l:s:c:q:Q:t:a:h", long_options, &option_index);
    if (c == -1) break;
    switch (c) {
    case 'l':
@ -53,6 +54,10 @@ int main(int argc, char* argv[])
      fileNameSource = string(optarg);
      createFlag = true;
      break;
+    case 'Q':
+      query = base64_decode(string(optarg));
+      queryFlag = true;
+      break;
    case 'q':
      query = string(optarg);
      queryFlag = true;
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@ -370,14 +370,14 @@ build-generation-custom
 	ignore-unless: AND generation-factors generation-corpus
 	default-name: model/generation-table
 create-config
-	in: reordering-table phrase-translation-table generation-table LM:binlm biconcor-model
+	in: reordering-table phrase-translation-table generation-table LM:binlm
 	out: config
 	ignore-if: use-hiero INTERPOLATED-LM:script
 	rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings
 	default-name: model/moses.ini
 	error: Unknown option
 create-config-interpolated-lm
-	in: reordering-table phrase-translation-table generation-table INTERPOLATED-LM:binlm biconcor-model
+	in: reordering-table phrase-translation-table generation-table INTERPOLATED-LM:binlm
 	out: config
 	ignore-if: use-hiero
 	ignore-unless: INTERPOLATED-LM:script
@ -777,6 +777,6 @@ analysis-precision

 [REPORTING] single
 report
-	in: EVALUATION:nist-bleu-score EVALUATION:nist-bleu-c-score EVALUATION:multi-bleu-score EVALUATION:multi-bleu-c-score EVALUATION:meteor-score EVALUATION:ter-score EVALUATION:wer-score EVALUATION:ibm-bleu-score EVALUATION:ibm-bleu-c-score EVALUATION:analysis EVALUATION:analysis-coverage EVALUATION:analysis-prec
+	in: EVALUATION:nist-bleu-score EVALUATION:nist-bleu-c-score EVALUATION:multi-bleu-score EVALUATION:multi-bleu-c-score EVALUATION:meteor-score EVALUATION:ter-score EVALUATION:wer-score EVALUATION:ibm-bleu-score EVALUATION:ibm-bleu-c-score EVALUATION:analysis EVALUATION:analysis-coverage EVALUATION:analysis-prec TRAINING:biconcor-model
 	out: report
 	default-name: evaluation/report
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@ -1730,7 +1730,6 @@ sub define_training_create_config {
    my ($config,
 	$reordering_table,$phrase_translation_table,$generation_table,@LM)
 	= &get_output_and_input($step_id);
-    if ($LM[$#LM] =~ /biconcor/ || $LM[$#LM] eq '') { pop @LM; }

    my $cmd = &get_training_setting(9);

--- a/scripts/ems/web/analysis.php
+++ b/scripts/ems/web/analysis.php
@ -1,6 +1,6 @@
 <?php 

-// main page frame, triggers the loading of parts
+# main page frame, triggers the loading of parts
 function show_analysis() {
  global $task,$user,$setup,$id,$set;
  global $dir;
@ -8,13 +8,14 @@ function show_analysis() {
  head("Analysis: $task ($user), Set $set, Run $id");

 ?><script>
-function show(field,sort,count) {
+function show(field,sort,count,filter) {
  var url = '?analysis=' + field + '_show'
      + '&setup=<?php print $setup ?>'
      + '&id=<?php print $id ?>'
      + '&set=<?php print $set ?>'
      + '&sort=' + sort
-      + '&count=' + count;
+      + '&count=' + count
+      + '&filter=' + filter;
  new Ajax.Updater(field, url, { method: 'get', evalScripts: true });
 }
 function ngram_show(type,order,count,sort,smooth) {
@ -61,7 +62,7 @@ function hide_word_info(sentence) {
 function show_biconcor(sentence,phrase) {
  var div = "biconcor-"+sentence;
  var url = '?analysis=biconcor'
-            + '&setup=<?php print $setup ?>&id=<?php print get_biconcor_version($dir,$id); ?>&set=<?php print $set ?>'
+            + '&setup=<?php print $setup ?>&id=<?php print get_biconcor_version($dir,$set,$id); ?>&set=<?php print $set ?>'
 	    + '&sentence=' + sentence
            + '&phrase=' + encodeURIComponent(phrase);
  document.getElementById(div).innerHTML = "<center><img src=\"spinner.gif\" width=48 height=48></center>";
@ -83,7 +84,7 @@ function close_biconcor(sentence) {
 <div id="PrecisionRecallDetails"></div>
 <div id="bleu">(loading...)</div>
 <script language="javascript">
-show('bleu','',5);
+show('bleu','',5,'');
 </script>
 </body></html>
 <?php
@ -93,13 +94,14 @@ function precision_by_coverage() {
  global $experiment,$evalset,$dir,$set,$id;
  $img_width = 1000;

-  print "<h3>Precision by Coverage</h3>";
+  print "<h3>Precision of Input Words by Coverage</h3>";
  print "The graphs display what ratio of words of a specific type are translated correctly (yellow), and what ratio is deleted (blue).";
  print " The extend of the boxes is scaled on the x-axis by the number of tokens of the displayed type.";

  // load data
-  $data = file("$dir/evaluation/$set.analysis.$id/precision-by-corpus-coverage");
+  $data = file(get_current_analysis_filename("precision","precision-by-corpus-coverage"));
  $total = 0;
+  $log_info = array();
  for($i=0;$i<count($data);$i++) {
    $item = split("\t",$data[$i]);
    $info[$item[0]]["precision"] = $item[1];
@ -125,8 +127,8 @@ function precision_by_coverage() {
  print "<h4>By log<sub>2</sub>-count in the training corpus</h4>";
  precision_by_coverage_graph("byCoverage",$log_info,$total,$img_width,SORT_NUMERIC);

-  // load factored data
-  $d = dir("$dir/evaluation/$set.analysis.$id");
+  # load factored data
+  $d = dir("$dir/evaluation/$set.analysis.".get_precision_analysis_version($dir,$set,$id));
  while (false !== ($file = $d->read())) {
    if (preg_match('/precision-by-corpus-coverage.(.+)$/',$file, $match)) {
      precision_by_coverage_factored($img_width,$total,$file,$match[1]);
@ -136,7 +138,7 @@ function precision_by_coverage() {

 function precision_by_coverage_factored($img_width,$total,$file,$factor_id) {
  global $dir,$set,$id;
-  $data = file("$dir/evaluation/$set.analysis.$id/$file");
+  $data = file(get_current_analysis_filename("precision",$file));
  for($i=0;$i<count($data);$i++) {
    $item = split("\t",$data[$i]);
    $factor = $item[0];
@ -187,7 +189,7 @@ function precision_by_word($type) {
      $byFactor = $match[1];
  }

-  $data = file("$dir/evaluation/$set.analysis.$id/precision-by-input-word");
+  $data = file(get_current_analysis_filename("precision","precision-by-input-word"));
  for($i=0;$i<count($data);$i++) {
    $line = rtrim($data[$i]);
    $item = split("\t",$line);
@ -204,8 +206,7 @@ function precision_by_word($type) {
   
    //# filter for factor
    $word = $item[5];
-    $factor = $item[6];
-    if ($byFactor != "false" && $byFactor != $factor) {
+    if ($byFactor != "false" && $byFactor != $item[6]) {
 	continue;
    }

@ -218,7 +219,7 @@ function precision_by_word($type) {

  print "<table border=1><tr><td align=center>Count</td><td align=center colspan=2>Precision</td><td align=center colspan=2>Delete</td><td align=center>Length</td></tr>\n";
  foreach ($info as $word => $wordinfo) {
-      print "<tr><td align=center>$word</td>";
+      print "<tr><td align=center><a href=\"javascript:show('bleu','order',5,'".base64_encode($word)."')\">$word</a></td>";
      printf("<td align=right>%.1f%s</td><td align=right><font size=-1>%.1f/%d</font></td>",$wordinfo["precision"]/$wordinfo["total"]*100,"%",$wordinfo["precision"],$wordinfo["total"]);
      printf("<td align=right>%.1f%s</td><td align=right><font size=-1>%d/%d</font></td>",$wordinfo["delete"]/$wordinfo["total"]*100,"%",$wordinfo["delete"],$wordinfo["total"]);
      printf("<td align=right>%.3f</td>",$wordinfo["length"]/$wordinfo["total"]);
@ -361,7 +362,7 @@ ctx.font = '9px serif';
  print "</script>";
 }

-// stats on precision and recall
+//# stats on precision and recall
 function precision_recall_details() {
 ?>
 <table width=100%>
@ -389,20 +390,20 @@ ngram_show('recall',4,5,'',0);
 <?php
 }

-// stats on ngram precision
+//# stats on ngram precision
 function ngram_summary() {
  global $experiment,$evalset,$dir,$set,$id;

-  // load data
-  $data = file("$dir/evaluation/$set.analysis.$id/summary");
+  //# load data
+  $data = file(get_current_analysis_filename("basic","summary"));
  for($i=0;$i<count($data);$i++) {
    $item = split(": ",$data[$i]);
    $info[$item[0]] = $item[1];
  }

  print "<table cellspacing=5 width=100%><tr><td valign=top align=center bgcolor=#eeeeee>";
-  //foreach (array("precision","recall") as $type) {
-  print "<b>Precision</b>\n";
+  //#foreach (array("precision","recall") as $type) {
+  print "<b>Precision of Output</b>\n";
  $type = "precision";
  print "<table><tr><td>$type</td><td>1-gram</td><td>2-gram</td><td>3-gram</td><td>4-gram</td></tr>\n";
  printf("<tr><td>correct</td><td>%d</td><td>%d</td><td>%d</td><td>%d</td></tr>\n",
@ -424,8 +425,8 @@ function ngram_summary() {
  //}

  print "<A HREF=\"javascript:generic_show('PrecisionRecallDetails','')\">details</A> ";
-  if (file_exists("$dir/evaluation/$set.analysis.$id/precision-by-corpus-coverage")) {
-    print "| <A HREF=\"javascript:generic_show('PrecisionByCoverage','')\">breakdown by coverage</A> ";
+  if (file_exists(get_current_analysis_filename("precision","precision-by-corpus-coverage"))) {
+    print "| <A HREF=\"javascript:generic_show('PrecisionByCoverage','')\">precision of input by coverage</A> ";
  }

 print "</td><td valign=top valign=top align=center bgcolor=#eeeeee>";
@ -445,8 +446,7 @@ function ngram_summary() {
  printf("<p>length-diff: %d (%.1f%s)",$info["precision-1-total"]-$info["recall-1-total"],($info["precision-1-total"]-$info["recall-1-total"])/$info["recall-1-total"]*100,"%");

  // coverage
-  $coverage_id = get_coverage_analysis_version($dir,$set,$id);
-  if (file_exists("$dir/evaluation/$set.analysis.$coverage_id/corpus-coverage-summary")) {
+  if (file_exists(get_current_analysis_filename("coverage","corpus-coverage-summary"))) {
    print "</td><td valign=top align=center bgcolor=#eeeeee>";
    print "<div id=\"CoverageSummary\">";
    coverage_summary();
@ -454,8 +454,8 @@ function ngram_summary() {
  }

  // phrase segmentation
-  if (file_exists("$dir/evaluation/$set.analysis.$id/segmentation") ||
-      file_exists("$dir/evaluation/$set.analysis.$id/rule")) {
+  if (file_exists(get_current_analysis_filename("basic","segmentation")) ||
+      file_exists(get_current_analysis_filename("basic","rule"))) {
    print "</td><td valign=top align=center bgcolor=#eeeeee>";
    print "<div id=\"SegmentationSummary\">";
    segmentation_summary();
@ -463,7 +463,7 @@ function ngram_summary() {
  }

  // rules
-  if (file_exists("$dir/evaluation/$set.analysis.$id/rule")) {
+  if (file_exists(get_current_analysis_filename("basic","rule"))) {
    print "</td><td valign=top align=center bgcolor=#eeeeee>";
    print "<div id=\"RuleSummary\">";
    rule_summary();
@ -479,7 +479,7 @@ function ngram_show($type) {

  // load data
  $order = $_GET['order'];
-  $data = file("$dir/evaluation/$set.analysis.$id/n-gram-$type.$order");
+  $data = file(get_current_analysis_filename("basic","n-gram-$type.$order"));
  for($i=0;$i<count($data);$i++) {
     $item = split("\t",$data[$i]);
     $line["total"] = $item[0]; 
@ -572,7 +572,7 @@ function coverage_details() {
        $total[$corpus][$b][$i] = 0;
      }
    }
-    $data = file(filename_fallback_to_factored("$dir/evaluation/$set.analysis.$id/$corpus-coverage-summary"));
+    $data = file(filename_fallback_to_factored(get_current_analysis_filename("coverage","$corpus-coverage-summary")));
    for($i=0;$i<count($data);$i++) {
      $item = split("\t",$data[$i]);
      if ($item[1]>5) {
@ -614,7 +614,7 @@ function coverage_details() {
  }
  print "</tr></table>\n";

-  $data = file(filename_fallback_to_factored("$dir/evaluation/$set.analysis.$id/ttable-unknown"));
+  $data = file(filename_fallback_to_factored(get_current_analysis_filename("coverage","ttable-unknown")));
  for($i=0;$i<count($data);$i++) {
    list($word,$count) = split("\t",$data[$i]);
    $item["word"] = $word;
@ -678,8 +678,7 @@ function filename_fallback_to_factored($file) {

 function factor_name($input_output,$factor_id) {
  global $dir,$set,$id;
-  $coverage_id = get_coverage_analysis_version($dir,$set,$id);
-  $file = "$dir/evaluation/$set.analysis.$coverage_id/factor-names";
+  $file = get_current_analysis_filename("coverage","factor-names");
  if (!file_exists($file)) {
    return $factor_id;
  }
@ -703,8 +702,7 @@ function coverage_summary() {
      }
      $total[$corpus][$b] = 0;
    }
-    $coverage_id = get_coverage_analysis_version($dir,$set,$id);
-    $data = file(filename_fallback_to_factored("$dir/evaluation/$set.analysis.$coverage_id/$corpus-coverage-summary"));
+    $data = file(filename_fallback_to_factored(get_current_analysis_filename("coverage","$corpus-coverage-summary")));
    for($i=0;$i<count($data);$i++) {
      $item = split("\t",$data[$i]);
      if ($item[0] == 1) {
@ -768,8 +766,9 @@ function segmentation_summary() {
  }

  $total = 0;
-  if (file_exists("$dir/evaluation/$set.analysis.$id/segmentation")) {
-    $data = file("$dir/evaluation/$set.analysis.$id/segmentation");
+  $file = get_current_analysis_filename("basic","segmentation");
+  if (file_exists($file)) {
+    $data = file($file);
    for($i=0;$i<count($data);$i++) {
      list($in,$out,$c) = split("\t",$data[$i]);
      if ($by == "word") { $c *= $in; }
@ -780,9 +779,12 @@ function segmentation_summary() {
    }
  }
  else {
-    $data = file("$dir/evaluation/$set.analysis.$id/rule");
+    $data = file(get_current_analysis_filename("basic","rule"));
    for($i=0;$i<count($data);$i++) {
-      list($type,$rule,$c) = split("\t",$data[$i]);
+      $field = split("\t",$data[$i]);
+      $type = $field[0];
+      $rule = $field[1];
+      if (count($field) > 2) { $c = $field[2]; } else { $c = 0; }
      if ($type == "rule") {
        list($rule_in,$in,$nt,$rule_out,$out) = split(":",$rule);
        if ($by == "word") { $c *= $in; }
@ -822,9 +824,14 @@ function segmentation_summary() {
 // hierarchical rules used in translation
 function rule_summary() {
  global $dir,$set,$id;
-  $data = file("$dir/evaluation/$set.analysis.$id/rule");
+  $data = file(get_current_analysis_filename("basic","rule"));
+  $rule = array(); $count = array(); $count_nt = array(); $count_w = array();
+  $nt_count = 0; $total = 0;
  foreach ($data as $item) {
-    list($type,$d,$d2) = split("\t",$item);
+    $field = split("\t",$item);
+    $type = $field[0];
+    $d = $field[1];
+    if (count($field) > 2) { $d2 = $field[2]; } else { $d2 = 0; }
    if ($type == "sentence-count") {
 	$sentence_count = $d;
    }
@ -843,12 +850,16 @@ function rule_summary() {
 	$rule_out = preg_replace("/b/","y",$rule_out);
 	$rule_out = preg_replace("/c/","z",$rule_out);
 	$nt_count += $d2 * $nt;
+	if (!array_key_exists($d,$rule)) { $rule[$d] = 0; }
 	$rule[$d] += $d2;
+	if (!array_key_exists($nt,$count)) { $count[$nt] = 0; }
 	$count[$nt] += $d2;
 	$just_nt = preg_replace("/\d/","",$rule_in)."-".preg_replace("/\d/","",$rule_out);
 	$no_wc = preg_replace("/\d/","W",$rule_in)."-".preg_replace("/\d/","",$rule_out);
 	if ($just_nt == "-") { $just_nt = "lexical"; }
+	if (!array_key_exists($just_nt,$count_nt)) { $count_nt[$just_nt] = 0; }
 	$count_nt[$just_nt] += $d2;
+	if (!array_key_exists($no_wc,$count_w)) { $count_w[$no_wc] = 0; }
 	$count_w[$no_wc] += $d2;
 	$total += $d2;
    }
@ -866,108 +877,189 @@ function rule_summary() {

 // annotated sentences, navigation
 function bleu_show() {
-  $count = $_GET['count'];
-  if ($count == 0) { $count = 5; }
-
-  print "<b>annotated sentences</b><br><font size=-1>sorted by ";
-
-  if ($_GET['sort'] == "order" || $_GET['sort'] == "") {
-    print "order ";
-  }
-  else {
-    print "<A HREF=\"javascript:show('bleu','order',$count)\">order</A> ";
-  }
-
-  if ($_GET['sort'] == "best") {
-    print "order ";
-  }
-  else {
-    print "<A HREF=\"javascript:show('bleu','best',$count)\">best</A> ";
-  }
-
-  if ($_GET['sort'] == "worst") {
-    print "order ";
-  }
-  else {
-    print "<A HREF=\"javascript:show('bleu','worst',$count)\">worst</A> ";
-  }
-
-  #print "display <A HREF=\"\">fullscreen</A> ";

  $count = $_GET['count'];
  if ($count == 0) { $count = 5; }
-  print "showing $count ";
-  print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',5+$count)\">more</A> ";
-  print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',9999)\">all</A> ";

-  print "</font><BR>\n";
+  $filter = ""; 
+  if (array_key_exists("filter",$_GET)) { 
+    $filter = base64_decode($_GET['filter']);
+  }

-  sentence_annotation();
-  print "<p align=center><A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',5+$count)\">5 more</A> | ";
-  print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',10+$count)\">10 more</A> | ";
-  print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',20+$count)\">20 more</A> | ";
-  print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',50+$count)\">50 more</A> | ";
-  print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',100+$count)\">100 more</A> | ";
-  print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',9999)\">all</A> ";
+  print "<b>annotated sentences</b><br><font size=-1>sorted by: ";
+
+  if ($_GET['sort'] == "order" || $_GET['sort'] == "") { print "order "; }
+  else {
+    print "<A HREF=\"javascript:show('bleu','order',$count,'".base64_encode($filter)."')\">order</A> ";
+  }
+  if ($_GET['sort'] == "best") { print "best "; }
+  else {
+    print "<A HREF=\"javascript:show('bleu','best',$count,'".base64_encode($filter)."')\">best</A> ";
+  }
+  if ($_GET['sort'] == "25") { print "25% "; }
+  else {
+    print "<A HREF=\"javascript:show('bleu','25',$count,'".base64_encode($filter)."')\">25%</A> ";
+  }
+  if ($_GET['sort'] == "avg") { print "avg "; }
+  else {
+    print "<A HREF=\"javascript:show('bleu','avg',$count,'".base64_encode($filter)."')\">avg</A> ";
+  }
+  if ($_GET['sort'] == "75") { print "75% "; }
+  else {
+    print "<A HREF=\"javascript:show('bleu','75',$count,'".base64_encode($filter)."')\">75%</A> ";
+  }
+  if ($_GET['sort'] == "worst") { print "worst; "; }
+  else {
+    print "<A HREF=\"javascript:show('bleu','worst',$count,'".base64_encode($filter)."')\">worst</A>; ";
+  }
+
+  print "showing: $count ";
+  print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',5+$count,'".base64_encode($filter)."')\">more</A> ";
+  print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',9999,'".base64_encode($filter)."')\">all</A>";
+
+  if ($filter != "") {
+    print "; filter: '$filter'";
+  }
+  sentence_annotation($count,$filter);
+  print "<p align=center><A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',5+$count,'".base64_encode($filter)."')\">5 more</A> | ";
+  print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',10+$count,'".base64_encode($filter)."')\">10 more</A> | ";
+  print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',20+$count,'".base64_encode($filter)."')\">20 more</A> | ";
+  print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',50+$count,'".base64_encode($filter)."')\">50 more</A> | ";
+  print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',100+$count,'".base64_encode($filter)."')\">100 more</A> | ";
+  print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',9999,'".base64_encode($filter)."')\">all</A> ";
 }

 // annotated sentences core: reads data, sorts sentences, displays them
-function sentence_annotation() {
+function sentence_annotation($count,$filter) {
  global $set,$id,$dir,$biconcor;

-  // load data
-  $data = file("$dir/evaluation/$set.analysis.$id/bleu-annotation");
+  # get input
+  $filtered = array();
+  $file = get_current_analysis_filename("coverage","input-annotation");
+  if (file_exists($file)) {
+    $input = file($file);
+    # filter is so specified
+    if ($filter != "") {
+      for($i=0;$i<count($input);$i++) {
+        $item = explode("\t",$input[$i]);
+	$word = explode(" ",$item[0]);
+	$keep = 0;
+	for($j=0;$j<count($word);$j++) {
+	  if ($word[$j] == $filter) { 
+	    $keep = 1; 
+	  }
+	}
+	if (!$keep) { $filtered[$i] = 1; }	
+      }
+    }  
+  }
+  
+  # load bleu scores 
+  $data = file(get_current_analysis_filename("basic","bleu-annotation"));
  for($i=0;$i<count($data);$i++) {
-     $item = split("\t",$data[$i]);
-     $line["bleu"] = $item[0]; 
-     $line["id"] = $item[1]; 
-     $line["system"] = $item[2];
-     $line["reference"] = ""; 
-     for($j=3;$j<count($item);$j++) {
-       if ($j>3) { $line["reference"] .= "<br>"; };
-       $line["reference"] .= $item[$j];
-     } 
-     $bleu[] = $line;
+    $item = split("\t",$data[$i]);
+    if (! array_key_exists($item[1],$filtered)) {
+      $line["bleu"] = $item[0]; 
+      $line["id"] = $item[1]; 
+      $line["system"] = $item[2];
+      $line["reference"] = ""; 
+      for($j=3;$j<count($item);$j++) {
+        if ($j>3) { $line["reference"] .= "<br>"; };
+        $line["reference"] .= $item[$j];
+      } 
+      $bleu[] = $line;
+    }
  }

-  $coverage_id = get_coverage_analysis_version($dir,$set,$id);
-  if (file_exists("$dir/evaluation/$set.analysis.$coverage_id/input-annotation")) {
-    $input = file("$dir/evaluation/$set.analysis.$coverage_id/input-annotation");
+  # sort and label additional sentences as filtered
+  global $sort;
+  function cmp($a, $b) {
+    global $sort;
+    if ($sort == "order") {
+      $a_idx = $a["id"];
+      $b_idx = $b["id"];
+    }
+    else if ($sort == "worst" || $sort == "75") {
+      $a_idx = $a["bleu"];
+      $b_idx = $b["bleu"];
+      if ($a_idx == $b_idx) { 
+        $a_idx = $b["id"];
+        $b_idx = $a["id"];
+      }
+    }
+    else if ($sort == "best" || $sort == "avg" || $sort == "25") {
+      $a_idx = -$a["bleu"];
+      $b_idx = -$b["bleu"];
+      if ($a_idx == $b_idx) { 
+        $a_idx = $a["id"];
+        $b_idx = $b["id"];
+      }
+    }
+    if ($a_idx == $b_idx) {
+      return 0;
+    }
+    return ($a_idx < $b_idx) ? -1 : 1;
+  }
+  $sort = $_GET['sort'];
+  if ($sort == '') {
+    $sort = "order";
+  }
+  usort($bleu, 'cmp');
+
+  $offset = 0;
+  if ($sort == "25" || $sort == "75") {
+    $offset = (int) (count($bleu)/4);
+  }
+  else if ($sort == "avg") {
+    $offset = (int) (count($bleu)/2);
  }

-  if (file_exists("$dir/evaluation/$set.analysis.$id/segmentation-annotation")) {
-   $data = file("$dir/evaluation/$set.analysis.$id/segmentation-annotation");
-   for($i=0;$i<count($data);$i++) {
-      $segment = 0;
-      foreach (split(" ",$data[$i]) as $item) {
-	list($in_start,$in_end,$out_start,$out_end) = split(":",$item);
-	$segment++;
-        $segmentation[$i]["input_start"][$in_start] = $segment;
-        $segmentation[$i]["input_end"][$in_end] = $segment;
-        $segmentation[$i]["output_start"][$out_start] = $segment;
-        $segmentation[$i]["output_end"][$out_end+0] = $segment;
+  $retained = array();
+  for($i=$offset;$i<$count+$offset && $i<count($bleu);$i++) {
+    $line = $bleu[$i]; 
+    $retained[$line["id"]] = 1;
+  }
+
+  # get segmentation (phrase alignment)
+  $file = get_current_analysis_filename("basic","segmentation-annotation");
+  if (file_exists($file)) {
+    $data = file($file);
+    for($i=0;$i<count($data);$i++) {
+      if ($filter == "" || array_key_exists($i,$retained)) {
+	$segment = 0;
+	foreach (split(" ",$data[$i]) as $item) {
+	  list($in_start,$in_end,$out_start,$out_end) = split(":",$item);
+	  $segment++;
+	  $segmentation[$i]["input_start"][$in_start] = $segment;
+	  $segmentation[$i]["input_end"][$in_end] = $segment;
+	  $segmentation[$i]["output_start"][$out_start] = $segment;
+	  $segmentation[$i]["output_end"][$out_end+0] = $segment;
+	}
      }
    }
  }

-  // hierarchical data
+  # get hierarchical data
  $hierarchical = 0;
-  if (file_exists("$dir/evaluation/$set.analysis.$id/input-tree")) {
-      $data = file("$dir/evaluation/$set.analysis.$id/input-tree");
-      $span = 0;
-      $last_sentence = -1;
-      $nt_count = array();
-      for($i=0;$i<count($data);$i++) {
-	  list($sentence,$brackets,$nt,$words) = split("\t",$data[$i]);
-	  if ($sentence != $last_sentence) { $span = 0; }
-	  $last_sentence = $sentence;
-	  $segmentation[$sentence][$span]["brackets"] = $brackets;
+  $file = get_current_analysis_filename("basic","input-tree");
+  if (file_exists($file)) {
+    $data = file($file);
+    $span = 0;
+    $last_sentence = -1;
+    $nt_count = array();
+    for($i=0;$i<count($data);$i++) {
+      list($sentence,$brackets,$nt,$words) = split("\t",$data[$i]);
+      if ($sentence != $last_sentence) { $span = 0; }
+      $last_sentence = $sentence;
+      if ($filter == "" || array_key_exists($sentence,$retained)) {
+	$segmentation[$sentence][$span]["brackets"] = $brackets;
 #	  $segmentation[$sentence][$span]["nt"] = $nt;
-	  $segmentation[$sentence][$span]["words"] = rtrim($words);
-	  if ($nt != "") { $nt_count[$nt]++; }
-	  $span++;
+	$segmentation[$sentence][$span]["words"] = rtrim($words);
+	if ($nt != "") { $nt_count[$nt]=1; }
+	$span++;
      }
-      $hierarchical = 1;
+    }
+    $hierarchical = 1;
 #      if (count($nt_count) <= 2) {
 #	  foreach ($segmentation as $sentence => $segmentation_span) {
 #	      foreach ($segmentation_span as $span => $type) {
@ -976,108 +1068,78 @@ function sentence_annotation() {
 #	  }
 #     }
  }
-  if (file_exists("$dir/evaluation/$set.analysis.$id/output-tree")) {
-      $data = file("$dir/evaluation/$set.analysis.$id/output-tree");
-      $span = 0;
-      $last_sentence = -1;
-      $nt_count = array();
-      for($i=0;$i<count($data);$i++) {
-	  list($sentence,$brackets,$nt,$words) = split("\t",$data[$i]);
-	  if ($sentence != $last_sentence) { $span = 0; }
-	  $last_sentence = $sentence;
-	  $segmentation_out[$sentence][$span]["brackets"] = $brackets;
-	  $segmentation_out[$sentence][$span]["nt"] = $nt;
-	  $segmentation_out[$sentence][$span]["words"] = rtrim($words);
-	  if ($nt != "") { $nt_count[$nt]++; }
-	  $span++;
+  $file = get_current_analysis_filename("basic","output-tree");
+  if (file_exists($file)) {
+    $data = file($file);
+    $span = 0;
+    $last_sentence = -1;
+    $nt_count = array();
+    for($i=0;$i<count($data);$i++) {
+      list($sentence,$brackets,$nt,$words) = split("\t",$data[$i]);
+      if ($sentence != $last_sentence) { $span = 0; }
+      $last_sentence = $sentence;
+      if ($filter == "" || array_key_exists($sentence,$retained)) {      
+	$segmentation_out[$sentence][$span]["brackets"] = $brackets;
+	$segmentation_out[$sentence][$span]["nt"] = $nt;
+	$segmentation_out[$sentence][$span]["words"] = rtrim($words);
+	if ($nt != "") { $nt_count[$nt]=1; }
+	$span++;
      }
-      if (count($nt_count) <= 2) {
-	  foreach ($segmentation_out as $sentence => $segmentation_span) {
-	      foreach ($segmentation_span as $span => $type) {
-		  $segmentation_out[$sentence][$span]["nt"]="";
-	      }
-	  }
+    }
+    # no non-terminal markup, if there are two or less non-terminals (X,S)
+    if (count($nt_count) <= 2) {
+      foreach ($segmentation_out as $sentence => $segmentation_span) {
+	foreach ($segmentation_span as $span => $type) {
+	  $segmentation_out[$sentence][$span]["nt"]="";
+	}
      }
+    }
  }
-  if (file_exists("$dir/evaluation/$set.analysis.$id/node")) {
-      $data = file("$dir/evaluation/$set.analysis.$id/node");
-      $n = 0;
-      $last_sentence = -1;
-      for($i=0;$i<count($data);$i++) {
-	  list($sentence,$depth,$start_div,$end_div,$start_div_in,$end_div_in,$children) = split(" ",$data[$i]);
-	  if ($sentence != $last_sentence) { $n = 0; }
-	  $last_sentence = $sentence;
-	  $node[$sentence][$n]['depth'] = $depth;
-	  $node[$sentence][$n]['start_div'] = $start_div;
-	  $node[$sentence][$n]['end_div'] = $end_div;
-	  $node[$sentence][$n]['start_div_in'] = $start_div_in;
-	  $node[$sentence][$n]['end_div_in'] = $end_div_in;
-	  $node[$sentence][$n]['children'] = rtrim($children);
-	  $n++;
+  $file = get_current_analysis_filename("basic","node");
+  if (file_exists($file)) {
+    $data = file($file);
+    $n = 0;
+    $last_sentence = -1;
+    for($i=0;$i<count($data);$i++) {
+      list($sentence,$depth,$start_div,$end_div,$start_div_in,$end_div_in,$children) = split(" ",$data[$i]);
+      if ($sentence != $last_sentence) { $n = 0; }
+      $last_sentence = $sentence;
+      if ($filter == "" || array_key_exists($sentence,$retained)) {
+	$node[$sentence][$n]['depth'] = $depth;
+	$node[$sentence][$n]['start_div'] = $start_div;
+	$node[$sentence][$n]['end_div'] = $end_div;
+	$node[$sentence][$n]['start_div_in'] = $start_div_in;
+	$node[$sentence][$n]['end_div_in'] = $end_div_in;
+	$node[$sentence][$n]['children'] = rtrim($children);
+	$n++;
      }
+    }
  } 

-  $biconcor = get_biconcor_version($dir,$id);
-
-  // sort
-  global $sort;
-  $sort = $_GET['sort'];
-  if ($sort == '') {
-    $sort = "order";
+  # display
+  if ($filter != "") { 
+      print " (".(count($input)-count($filtered))." retaining)";
  }
-  function cmp($a, $b) {
-    global $sort;
-    if ($sort == "order") {
-      $a_idx = $a["id"];
-      $b_idx = $b["id"];
-    }
-    else if ($sort == "worst") {
-      $a_idx = $a["bleu"];
-      $b_idx = $b["bleu"];
-      if ($a_idx == $b_idx) { 
-        $a_idx = $b["id"];
-        $b_idx = $a["id"];
-      }
-    }
-    else if ($sort == "best") {
-      $a_idx = -$a["bleu"];
-      $b_idx = -$b["bleu"];
-      if ($a_idx == $b_idx) { 
-        $a_idx = $a["id"];
-        $b_idx = $b["id"];
-      }
-    }
+  print "</font><BR>\n";

-    if ($a_idx == $b_idx) {
-        return 0;
-    }
-    return ($a_idx < $b_idx) ? -1 : 1;
-  }
-
-  usort($bleu, 'cmp');
-
-  $count = $_GET['count'];
-  if ($count == 0) { $count = 5; }
-
-  // display
-  //print "<div id=\"debug\"></div>";
-  for($i=0;$i<$count && $i<count($bleu);$i++) {
+  $biconcor = get_biconcor_version($dir,$set,$id);
+  //print "<div id=\"debug\">$sort / $offset</div>";
+  for($i=$offset;$i<$count+$offset && $i<count($bleu);$i++) {
     $line = $bleu[$i]; 
     if ($hierarchical) {
 	 annotation_hierarchical($line["id"],$segmentation[$line["id"]],$segmentation_out[$line["id"]],$node[$line["id"]]);
     }
     if ($input) {
-       print "<div id=\"info-$i\" style=\"border-color:black; background:#ffff80; opacity:0; width:100%; border:1px;\">8364 occ. in corpus, 56 translations, entropy: 5.54</div>\n";
+       print "<div id=\"info-".$line["id"]."\" style=\"border-color:black; background:#ffff80; opacity:0; width:100%; border:1px;\">0 occ. in corpus, 0 translations, entropy: 0.00</div>\n";
       if ($biconcor) {
-	   //print "<div id=\"biconcor-$i\" style=\"display: none;\">xxx</div>";
-	   print "<div id=\"biconcor-$i\" class=\"biconcor\">xxx</div>";
+	   print "<div id=\"biconcor-".$line["id"]."\" class=\"biconcor\"><font size=-2>(click on input phrase for bilingual concordancer)</font></div>";
       }
       if ($hierarchical) {
         sentence_annotation_hierarchical("#".$line["id"],$line["id"],$input[$line["id"]],$segmentation[$line["id"]],"in");
       }
       else {
 	 print "<font size=-2>[#".$line["id"]."]</font> ";
-         input_annotation($line["id"],$input[$line["id"]],$segmentation[$line["id"]]);
+         input_annotation($line["id"],$input[$line["id"]],$segmentation[$line["id"]],$filter);
       }
     }
     //else {
@ -1099,19 +1161,20 @@ function coverage($coverage_vector) {
  $coverage = array();
  foreach (split(" ",$coverage_vector) as $item) {
    if (preg_match("/[\-:]/",$item)) {
-      list($from,$to,$corpus_count,$ttable_count,$ttable_entropy) = preg_split("/[\-:]/",$item);
-      $coverage[$from][$to]["corpus_count"] = $corpus_count;
-      $coverage[$from][$to]["ttable_count"] = $ttable_count;
-      $coverage[$from][$to]["ttable_entropy"] = $ttable_entropy;
+      $field = preg_split("/[\-:]/",$item);
+      $from = $field[0];
+      $to = $field[1];
+      if (count($field)>2){ $coverage[$from][$to]["corpus_count"]=$field[2]; }
+      if (count($field)>3){ $coverage[$from][$to]["ttable_count"]=$field[3]; }
+      if (count($field)>4){ $coverage[$from][$to]["ttabel_entropy"]=$field[4]; }
    }
  }
-  $word = split(" ",$words);

  return $coverage;
 }

 // annotate an inpute sentence
-function input_annotation($sentence,$input,$segmentation) {
+function input_annotation($sentence,$input,$segmentation,$filter) {
  global $biconcor;
  list($words,$coverage_vector) = split("\t",$input);

@ -1187,10 +1250,10 @@ function input_annotation($sentence,$input,$segmentation) {
 		for($j=$from;$j<=$to;$j++) {
 		  if ($j>$from) { $phrase .= " "; }
 		  $phrase .= $word[$j];
-                  $highlightwords .= " document.getElementById('inputword-$sentence-$j').style.backgroundColor='#ffff80';";
-                  $lowlightwords .= " document.getElementById('inputword-$sentence-$j').style.backgroundColor='".coverage_color($coverage[$j][$j])."';";
+                  $highlightwords .= " document.getElementById('inputword-$i-$j').style.backgroundColor='#ffff80';";
+                  $lowlightwords .= " document.getElementById('inputword-$i-$j').style.backgroundColor='".coverage_color($coverage[$j][$j])."';";
 		}
-	        print "<td colspan=$size><div style=\"background-color: $color; height:3px;\" onmouseover=\"show_word_info($sentence,".$coverage[$from][$to]["corpus_count"].",".$coverage[$from][$to]["ttable_count"].",".$coverage[$from][$to]["ttable_entropy"]."); this.style.backgroundColor='#ffff80';$highlightwords\" onmouseout=\"hide_word_info($sentence); this.style.backgroundColor='$color';$lowlightwords;\"".($biconcor?" onclick=\"show_biconcor($sentence,'".htmlspecialchars($phrase)."');\"":"").">";
+	        print "<td colspan=$size><div style=\"background-color: $color; height:3px;\" onmouseover=\"show_word_info($sentence,".$coverage[$from][$to]["corpus_count"].",".$coverage[$from][$to]["ttable_count"].",".$coverage[$from][$to]["ttable_entropy"]."); this.style.backgroundColor='#ffff80';$highlightwords\" onmouseout=\"hide_word_info($sentence); this.style.backgroundColor='$color';$lowlightwords;\"".($biconcor?" onclick=\"show_biconcor($sentence,'".base64_encode($phrase)."');\"":"").">";
            }
            print "</div></td>";
 	    $from += $size-1;
@ -1218,7 +1281,14 @@ function input_annotation($sentence,$input,$segmentation) {
 	  $color = '#ffffff';
          $cc = 0; $tc = 0; $te = 0;
        }
-        print "<span id=\"inputword-$sentence-$j\" style=\"background-color: $color;\" onmouseover=\"show_word_info($sentence,$cc,$tc,$te); this.style.backgroundColor='#ffff80';\" onmouseout=\"hide_word_info($sentence);  this.style.backgroundColor='$color';\"".($biconcor?" onclick=\"show_biconcor($sentence,'".htmlspecialchars($word[$j])."');\"":"").">$word[$j]</span>";
+        print "<span id=\"inputword-$sentence-$j\" style=\"background-color: $color;\" onmouseover=\"show_word_info($sentence,$cc,$tc,$te); this.style.backgroundColor='#ffff80';\" onmouseout=\"hide_word_info($sentence);  this.style.backgroundColor='$color';\"".($biconcor?" onclick=\"show_biconcor($sentence,'".base64_encode($word[$j])."');\"":"").">";
+	if ($word[$j] == $filter) {
+	  print "<b><font color=#ff0000>".$word[$j]."</font></b>";
+	}
+	else {
+	  print $word[$j];
+	}
+	print "</span>";
        if ($segmentation && array_key_exists($j,$segmentation["input_end"])) {
          print "</span>";
        }
@ -1295,7 +1365,7 @@ function annotation_hierarchical($sentence,$segmentation,$segmentation_out,$node
 function sentence_annotation_hierarchical($info,$sentence,$sequence,$segmentation,$in_out) {
    $In_Out = $in_out == "out" ? "Out" : "In";

-    list($words,$coverage_vector) = split("\t",$input);
+    #list($words,$coverage_vector) = split("\t",$input);
    $coverage = coverage($sequence);
    $word = preg_split("/\s/",$sequence);

@ -1322,7 +1392,8 @@ function annotation_hierarchical($sentence,$segmentation,$segmentation_out,$node
 	$words = $segmentation[$span]["words"];

 	# non terminal
-	if ($segmentation[$span]["nt"]) {
+	if (array_key_exists("nt",$segmentation[$span]) &&
+	    $segmentation[$span]["nt"] != "") {
 	    print $segmentation[$span]["nt"].": ";
 	}

@ -1359,16 +1430,16 @@ function annotation_hierarchical($sentence,$segmentation,$segmentation_out,$node
 function biconcor($query) {
    global $set,$id,$dir;
    $sentence = $_GET['sentence'];
-    $biconcor = get_biconcor_version($dir,$id);
+    $biconcor = get_biconcor_version($dir,$set,$id);
    print "<center>
-<form action=\"...\" method=get>
+<form method=get id=\"BiconcorForm\">
 <img src=\"close.gif\" width=17 height=17 onClick=\"close_biconcor($sentence);\">
-<input width=20 value=\"$query\">
-<input type=submit value=\"look up\">
+<input width=20 id=\"BiconcorQuery\" value=\"$query\">
+<input type=submit onclick=\"show_biconcor($sentence,encodeBase64(document.getElementById('BiconcorQuery').value));\" value=\"look up\">
 </form>
 <div class=\"biconcor-content\">";
-    $cmd = "./biconcor -l $dir/model/biconcor.$biconcor -q ".escapeshellarg($query)." 2>/dev/null";
-    # print $cmd."<p>";
+    $cmd = "./biconcor -l $dir/model/biconcor.$biconcor -Q ".base64_encode($query)." 2>/dev/null";
+    #print $cmd."<p>";
    system($cmd);
    # print "<p>done.";
    print "</div></center>";
--- a/scripts/ems/web/analysis_diff.php
+++ b/scripts/ems/web/analysis_diff.php
@ -73,8 +73,9 @@ function precision_by_coverage_diff() {
  print "The graphs display what ratio of words of a specific type are translated correctly (yellow), and what ratio is deleted (blue).";
  print " The extend of the boxes is scaled on the x-axis by the number of tokens of the displayed type.";
  // load data
-  $data = file("$dir/evaluation/$set.analysis.$id2/precision-by-corpus-coverage");
+  $data = file(get_current_analysis_filename2("precision","precision-by-corpus-coverage"));
  $total = 0;
+  $log_info = array();
  for($i=0;$i<count($data);$i++) {
    $item = split("\t",$data[$i]);
    $info[$item[0]]["precision"] = $item[1];
@ -100,7 +101,7 @@ function precision_by_coverage_diff() {
  $log_info_new = $log_info;  

  // load base data
-  $data = file("$dir/evaluation/$set.analysis.$id/precision-by-corpus-coverage");
+  $data = file(get_current_analysis_filename("precision","precision-by-corpus-coverage"));
  for($i=0;$i<count($data);$i++) {
    $item = split("\t",$data[$i]);
    $info[$item[0]]["precision"] -= $item[1];
@ -119,10 +120,10 @@ function precision_by_coverage_diff() {
  precision_by_coverage_diff_graph("byCoverage",$log_info,$log_info_new,$total,$img_width,SORT_NUMERIC);

  // load factored data
-  $d = dir("$dir/evaluation/$set.analysis.$id");
+  $d = dir("$dir/evaluation/$set.analysis.".get_precision_analysis_version($dir,$set,$id));
  while (false !== ($file = $d->read())) {
    if (preg_match('/precision-by-corpus-coverage.(.+)$/',$file, $match) &&
-	file_exists("$dir/evaluation/$set.analysis.$id2/precision-by-corpus-coverage.$match[1]")) {
+	file_exists(get_current_analysis_filename2("precision","precision-by-corpus-coverage.$match[1]"))) {
      precision_by_coverage_diff_factored($img_width,$total,$file,$match[1]);
    }
  }
@ -130,7 +131,7 @@ function precision_by_coverage_diff() {

 function precision_by_coverage_diff_factored($img_width,$total,$file,$factor_id) {
  global $dir,$set,$id,$id2;
-  $data = file("$dir/evaluation/$set.analysis.$id2/$file");
+  $data = file(get_current_analysis_filename2("precision",$file));
  for($i=0;$i<count($data);$i++) {
    $item = split("\t",$data[$i]);
    $factor = $item[0];
@ -158,7 +159,7 @@ function precision_by_coverage_diff_factored($img_width,$total,$file,$factor_id)
  $log_info_factored_new = $log_info_factored;

  // baseline data
-  $data = file("$dir/evaluation/$set.analysis.$id/$file");
+  $data = file(get_current_analysis_filename("precision",$file));
  for($i=0;$i<count($data);$i++) {
    $item = split("\t",$data[$i]);
    $factor = $item[0];
@ -205,7 +206,9 @@ function precision_by_word_diff($type) {
      $byFactor = $match[1];
  }

-  $data = file("$dir/evaluation/$set.analysis.$id2/precision-by-input-word");
+  $data = file(get_current_analysis_filename2("precision","precision-by-input-word"));
+  $total = 0;
+  $info = array();
  for($i=0;$i<count($data);$i++) {
    $line = rtrim($data[$i]);
    $item = split("\t",$line);
@ -215,19 +218,23 @@ function precision_by_word_diff($type) {
    $count = $item[4];
    $log_count = -1;
    if ($count>0) {
-	$log_count = (int) (log($count)/log(2));
+      $log_count = (int) (log($count)/log(2));
    }
    if ($byCoverage != -2 && $byCoverage != $log_count) {
-	continue;
+      continue;
    }
   
    //# filter for factor
    $word = $item[5];
-    $factor = $item[6];
-    if ($byFactor != "false" && $byFactor != $factor) {
-	continue;
+    if ($byFactor != "false" && $byFactor != $item[6]) {
+      continue;
+    }
+    if (!array_key_exists($word,$info)) {
+      $info[$word]["precision"] = 0;
+      $info[$word]["delete"] = 0;
+      $info[$word]["length"] = 0;
+      $info[$word]["total"] = 0;
    }
-
    $info[$word]["precision"] += $item[0];
    $info[$word]["delete"] += $item[1];
    $info[$word]["length"] += $item[2];
@ -235,7 +242,7 @@ function precision_by_word_diff($type) {
  }
  $info_new = $info;

-  $data = file("$dir/evaluation/$set.analysis.$id/precision-by-input-word");
+  $data = file(get_current_analysis_filename("precision","precision-by-input-word"));
  for($i=0;$i<count($data);$i++) {
    $line = rtrim($data[$i]);
    $item = split("\t",$line);
@ -252,11 +259,19 @@ function precision_by_word_diff($type) {
   
    //# filter for factor
    $word = $item[5];
-    $factor = $item[6];
-    if ($byFactor != "false" && $byFactor != $factor) {
+    if ($byFactor != "false" && $byFactor != $item[6]) {
 	continue;
    }
-   
+    if (!array_key_exists($word,$info)) {
+      $info[$word]["precision"] = 0;
+      $info[$word]["delete"] = 0;
+      $info[$word]["length"] = 0;
+      $info_new[$word]["length"] = 0;
+      $info_new[$word]["delete"] = 0;
+      $info_new[$word]["precision"] = 0;
+      $info_new[$word]["total"] = 0;
+      $info[$word]["total"] = -$item[3];
+    }
    $info[$word]["precision"] -= $item[0];
    $info[$word]["delete"] -= $item[1];
    $info[$word]["length"] -= $item[2];
@ -308,14 +323,14 @@ ctx.font = '9px serif';
      $height = 90-$line/2*180;
      print "ctx.moveTo(20, $height);\n";
      print "ctx.lineTo($img_width, $height);\n";
-      print "ctx.fillText(\"".sprintf("%d",10*${line}*1.001)."\%\", 0, $height+4);";
+      print "ctx.fillText(\"".sprintf("%d",10 * $line * 1.001)."\%\", 0, $height+4);";
  }
  for($line=-0.4;$line<=0.4;$line+=.2) {
      $height = 250+$line/2*180;
      print "ctx.moveTo(20, $height);\n";
      print "ctx.lineTo($img_width, $height);\n";
      if ($line != 0) {
-	  print "ctx.fillText(\"".sprintf("%d",10*${line}*1.001)."\%\", 0, $height+4);";
+	  print "ctx.fillText(\"".sprintf("%d",10 * $line * 1.001)."\%\", 0, $height+4);";
      }
  }
  print "ctx.strokeStyle = \"rgb(100,100,100)\"; ctx.stroke();\n";
@ -385,7 +400,7 @@ function ngram_summary_diff() {

  // load data
  for($idx=0;$idx<2;$idx++) {
-    $data = file("$dir/evaluation/$set.analysis.".($idx?$id2:$id)."/summary");
+    $data = file(get_analysis_filename($dir,$set,$idx?$id2:$id,"basic","summary"));
    for($i=0;$i<count($data);$i++) {
      $item = split(": ",$data[$i]);
      $info[$idx][$item[0]] = $item[1];
@ -393,7 +408,7 @@ function ngram_summary_diff() {
  }

  print "<table cellspacing=5 width=100%><tr><td valign=top align=center bgcolor=#eeeeee>";
-  print "<b>Precision</b><br>";
+  print "<b>Precision of Output</b><br>";
  //foreach (array("precision","recall") as $type) {
  $type = "precision";
  print "<table><tr><td>$type</td><td>1-gram</td><td>2-gram</td><td>3-gram</td><td>4-gram</td></tr>\n";
@ -416,12 +431,11 @@ function ngram_summary_diff() {
  //}

  print "<A HREF=\"javascript:generic_show_diff('PrecisionRecallDetailsDiff','')\">details</A> ";
-  if (file_exists("$dir/evaluation/$set.analysis.$id/precision-by-corpus-coverage") &&
-      file_exists("$dir/evaluation/$set.analysis.$id2/precision-by-corpus-coverage")) {
-    print "| <A HREF=\"javascript:generic_show_diff('PrecisionByCoverageDiff','')\">breakdown by coverage</A> ";
+  if (file_exists(get_current_analysis_filename("precision","precision-by-corpus-coverage")) &&
+      file_exists(get_current_analysis_filename2("precision","precision-by-corpus-coverage"))) {
+    print "| <A HREF=\"javascript:generic_show_diff('PrecisionByCoverageDiff','')\">precision of input by coverage</A> ";
  }

-
  print "</td><td valign=top align=center bgcolor=#eeeeee>";
  print "<b>Metrics</b><br>\n";

@ -434,6 +448,7 @@ function ngram_summary_diff() {
      }
    }
  }
+  $header = ""; $score_line = ""; $diff_line = "";
  foreach ($score as $name => $value) {
    $header .= "<td>$name</td>";
    $score_line .= "<td>".$score[$name][1]."</td>";
@ -494,7 +509,7 @@ function bleu_diff_annotation() {

  // load data
  for($idx=0;$idx<2;$idx++) {
-    $data = file("$dir/evaluation/$set.analysis.".($idx?$id2:$id)."/bleu-annotation");
+    $data = file(get_analysis_filename($dir,$set,$idx?$id2:$id,"basic","bleu-annotation"));
    for($i=0;$i<count($data);$i++) {
      $item = split("\t",$data[$i]);
      $annotation[$item[1]]["bleu$idx"] = $item[0]; 
@ -505,6 +520,7 @@ function bleu_diff_annotation() {
  }
  $data = array();

+  $identical=0; $same=0; $better=0; $worse=0;
  for($i=0;$i<count($annotation);$i++) {
    if ($annotation[$i]["system1"] == $annotation[$i]["system0"]) {
      $identical++;
@ -609,7 +625,7 @@ function ngram_diff($type) {
  $order = $_GET['order'];
  
  for($idx=0;$idx<2;$idx++) {
-    $data = file("$dir/evaluation/$set.analysis.".($idx?$id2:$id)."/n-gram-$type.$order");
+    $data = file(get_analysis_filename($dir,$set,$idx?$id2:$id,"basic","n-gram-$type.$order"));
    for($i=0;$i<count($data);$i++) {
      $item = split("\t",$data[$i]);
      $ngram_hash[$item[2]]["total$idx"] = $item[0]; 
--- a/scripts/ems/web/bilingual-concordance.css
+++ b/scripts/ems/web/bilingual-concordance.css
@ -1,11 +1,18 @@
 .pp_head {
-  font-size: 150%;
+  font-size: 90%;
  font-weight: bold;
  text-align: center;
 }

 .pp_target_header {
-  font-size: 120%;
+  font-size: 80%;
+  font-weight: bold;
+  text-align: center;
+}
+
+.pp_singleton_header {
+  font-size: 80%;
+  font-variant: small-caps;
  font-weight: bold;
  text-align: center;
 }
@ -29,23 +36,62 @@ td.biconcor {
 }

 td.pp_source_left { 
+  font-size: 70%;
  text-align: right; 
 }

 td.pp_target_left { 
+  font-size: 70%;
  text-align: right; 
 }

 td.pp_source { 
+  font-size: 70%;
  font-weight: bold; 
 }

 td.pp_target { 
+  font-size: 70%;
  font-weight: bold; 
 }

+td.mismatch_target { 
+  font-size: 70%;
+  text-align: center;
+}
+
 td.pp_source_right { 
+  font-size: 70%;
  border-style:solid; 
  border-width:0px 2px 0px 0px ; 
  border-color: black; 
 }
+
+td.pp_target_right { 
+  font-size: 70%;
+}
+
+span.null_aligned {
+  color: blue;
+}
+
+span.mismatch_pre_aligned {
+  color: purple;
+}
+
+span.mismatch_post_aligned {
+  color: olive;
+}
+
+span.mismatch_misaligned {
+  color: red;
+}
+
+span.mismatch_aligned {
+  font-weight: bold; 
+}
+
+td.pp_more {
+  font-size: 70%;
+  text-align: center; 
+}
--- a/scripts/ems/web/general.css
+++ b/scripts/ems/web/general.css
@ -1,4 +1,5 @@
 h2 {
-  font:italic x-large/1.75 'Essays 1743','Times New Roman',serif;text-shadow:0 0 1px #667
+  font:italic x-large/1.75 'Essays 1743','Times New Roman',serif;
+  text-shadow:0 0 1px #667
 }
-~                                                                                             
+
--- a/scripts/ems/web/index.php
+++ b/scripts/ems/web/index.php
@ -12,6 +12,7 @@ function head($title) {
 <script language="javascript" src="/javascripts/prototype.js"></script>
 <script language="javascript" src="/javascripts/scriptaculous.js"></script>
 <script language="javascript" src="hierarchical-segmentation.js"></script>
+<script language="javascript" src="base64.js"></script>
 <link href="general.css" rel="stylesheet" type="text/css">
 <link href="hierarchical-segmentation.css" rel="stylesheet" type="text/css">
 <link href="bilingual-concordance.css" rel="stylesheet" type="text/css">
@ -29,7 +30,7 @@ if (array_key_exists("setup",$_POST) || array_key_exists("setup",$_GET)) {
    $action = $_GET["analysis"];
    $set = $_GET["set"];
    $id = $_GET["id"];
-    $id2 = $_GET["id2"];
+    if (array_key_exists("id2",$_GET)) { $id2 = $_GET["id2"]; }
    if ($action == "show") { show_analysis(); }
    else if ($action == "bleu_show") { bleu_show(); }
    else if ($action == "ngram_precision_show") { ngram_show("precision");}
@ -43,7 +44,7 @@ if (array_key_exists("setup",$_POST) || array_key_exists("setup",$_GET)) {
    else if (preg_match("/PrecisionByWord(.+)_show/",$action,$match)) { precision_by_word($match[1]); }
    else if ($action == "CoverageDetails_show") { coverage_details(); }
    else if ($action == "SegmentationSummary_show") { segmentation_summary(); }
-    else if ($action == "biconcor") { biconcor($_GET["phrase"]); }
+    else if ($action == "biconcor") { biconcor(base64_decode($_GET["phrase"])); }
    else { print "ERROR! $action"; }
  }
  else if (array_key_exists("analysis_diff_home",$_GET)) {
--- a/scripts/ems/web/lib.php
+++ b/scripts/ems/web/lib.php
@ -124,48 +124,136 @@ function process_file_entry($dir,$entry) {
  }
 }

-function get_coverage_analysis_version($dir,$set,$id) {
-  if (file_exists("$dir/evaluation/$set.analysis.$id/input-annotation")) {
-    return $id;
+function get_analysis_version($dir,$set,$id) {
+  global $analysis_version;
+  if ($analysis_version
+      && array_key_exists($id,$analysis_version)
+      && array_key_exists($set,$analysis_version[$id])) {
+    #reset($analysis_version[$id][$set]);
+    #print "$id,$set ( ";
+    #while(list($type,$i) = each($analysis_version[$id][$set])) {
+    #  print "$type=$i ";
+    #}
+    #print ") FROM CACHE<br>"; 
+    return $analysis_version[$id][$set];
  }
+  $analysis_version[$id][$set]["basic"] = 0;
+  $analysis_version[$id][$set]["biconcor"] = 0;
+  $analysis_version[$id][$set]["coverage"] = 0;
+  $analysis_version[$id][$set]["precision"] = 0;
+  $prefix = "$dir/evaluation/$set.analysis";
+
+  # produced by the run itself ?
+  if (file_exists("$prefix.$id/summary")) {
+    $analysis_version[$id][$set]["basic"] = $id;
+  }
+  if (file_exists("$prefix.$id/input-annotation")) {
+    $analysis_version[$id][$set]["coverage"] = $id;
+  }
+  if (file_exists("$prefix.$id/precision-by-input-word")) {
+    $analysis_version[$id][$set]["precision"] = $id;
+  }
+  if (file_exists("$dir/model/biconcor.$id")) {
+    $analysis_version[$id][$set]["biconcor"] = $id;
+  }
+
+  # re-use ?
  if (file_exists("$dir/steps/$id/re-use.$id")) {
    $re_use = file("$dir/steps/$id/re-use.$id");
    foreach($re_use as $line) {
-      if (preg_match("/EVALUATION:(.+):analysis-coverage (\d+)/",$line,$match) &&
+      if (preg_match("/EVALUATION:(.+):analysis (\d+)/",$line,$match) &&
 	  $match[1] == $set &&
-	  file_exists("$dir/evaluation/$set.analysis.$match[2]/input-annotation")) {
-	return $match[2];
+	  file_exists("$prefix.$match[2]/summary")) {
+	$analysis_version[$id][$set]["basic"] = $match[2];
+      }
+      else if (preg_match("/EVALUATION:(.+):analysis-coverage (\d+)/",$line,$match) &&
+	  $match[1] == $set &&
+	  file_exists("$prefix.$match[2]/input-annotation")) {
+	$analysis_version[$id][$set]["coverage"] = $match[2];
+      }
+      else if (preg_match("/EVALUATION:(.+):analysis-precision (\d+)/",$line,$match) &&
+	  $match[1] == $set &&
+	  file_exists("$prefix.$match[2]/precision-by-input-word")) {
+	$analysis_version[$id][$set]["precision"] = $match[2];
+      }
+      else if (preg_match("/TRAINING:build-biconcor (\d+)/",$line,$match) &&
+        file_exists("$dir/model/biconcor.$match[1]")) {
+	$analysis_version[$id][$set]["biconcor"] = $match[1];
      }
    } 
  }
+
  # legacy stuff below...
-  if (! file_exists("$dir/steps/$id/REPORTING_report.$id")) {
-    return 0;
-  }
-  $report = file("$dir/steps/$id/REPORTING_report.$id.INFO");
-  foreach ($report as $line) {
-    if (preg_match("/\# reuse run (\d+) for EVALUATION:(.+):analysis-coverage/",$line,$match) &&
+  if (file_exists("$dir/steps/$id/REPORTING_report.$id")) {
+   $report = file("$dir/steps/$id/REPORTING_report.$id.INFO");
+   foreach ($report as $line) {
+    if (preg_match("/\# reuse run (\d+) for EVALUATION:(.+):analysis/",$line,$match) &&
        $match[2] == $set) {
-      $reuse_id = $match[1];
-      if (file_exists("$dir/evaluation/$set.analysis.$reuse_id/input-annotation")) {
-        return $reuse_id;
+      if (file_exists("$prefix.$match[1]/summary")) {
+        $analysis_version[$id][$set]["basic"] = $match[1];
      }
    }
-  }
-  return 0;
-}
-
-function get_biconcor_version($dir,$id) {
-  if (file_exists("$dir/model/biconcor.$id")) {
-    return $id;
-  }
-  $re_use = file("$dir/steps/$id/re-use.$id");
-  foreach($re_use as $line) {
-    if (preg_match("/TRAINING:build-biconcor (\d+)/",$line,$match) &&
-        file_exists("$dir/model/biconcor.$match[1]")) {
-      return $match[1];
+    if (preg_match("/\# reuse run (\d+) for EVALUATION:(.+):analysis-coverage/",$line,$match) &&
+        $match[2] == $set) {
+      if (file_exists("$prefix.$match[1]/input-annotation")) {
+        $analysis_version[$id][$set]["coverage"] = $match[1];
+      }
    }
+    if (preg_match("/\# reuse run (\d+) for EVALUATION:(.+):analysis-precision/",$line,$match) &&
+        $match[2] == $set) {
+      if (file_exists("$prefix.$match[1]/precision-by-input-word")) {
+        $analysis_version[$id][$set]["precision"] = $match[1];
+      }
+    }
+    if (preg_match("/\# reuse run (\d+) for TRAINING:biconcor/",$line,$match)){
+      if (file_exists("$dir/model/biconcor.$match[1]")) {
+	$analysis_version[$id][$set]["biconcor"] = $match[1];
+      }
+    }
+   }
  }
-  return 0;
+  #print "$id,$set ( ";
+  #reset($analysis_version[$id][$set]);
+  #while(list($type,$i) = each($analysis_version[$id][$set])) {
+  #    print "$type=$i ";
+  #}
+  #print ") ZZ<br>"; 
+  return $analysis_version[$id][$set];
 }

+function get_precision_analysis_version($dir,$set,$id) {
+  $version = get_analysis_version($dir,$set,$id);
+  return $version["precision"];
+}
+
+function get_basic_analysis_version($dir,$set,$id) {
+  $version = get_analysis_version($dir,$set,$id);
+  return $version["basic"];
+}
+
+function get_coverage_analysis_version($dir,$set,$id) {
+  $version = get_analysis_version($dir,$set,$id);
+  return $version["coverage"];
+}
+
+function get_biconcor_version($dir,$set,$id) {
+  $version = get_analysis_version($dir,$set,$id);
+  return $version["biconcor"];
+}
+
+function get_analysis_filename($dir,$set,$id,$type,$file) {
+  $version = get_analysis_version($dir,$set,$id);
+  return "$dir/evaluation/$set.analysis.".$version[$type]."/".$file;
+}
+
+function get_current_analysis_filename($type,$file) {
+  global $dir,$set,$id;
+  $version = get_analysis_version($dir,$set,$id);
+  return "$dir/evaluation/$set.analysis.".$version[$type]."/".$file;
+}
+
+function get_current_analysis_filename2($type,$file) {
+  global $dir,$set,$id2;
+  $version = get_analysis_version($dir,$set,$id2);
+  return "$dir/evaluation/$set.analysis.".$version[$type]."/".$file;
+}
--- a/scripts/ems/web/overview.php
+++ b/scripts/ems/web/overview.php
@ -1,5 +1,7 @@
 <?php

+date_default_timezone_set('Europe/London');
+
 function setup() {
  $setup = file("setup");
  head("All Experimental Setups");
@ -11,7 +13,7 @@ function setup() {
    print "<TR><TD><A HREF=\"?setup=$dir[0]\">$dir[0]</A></TD><TD>$dir[1]</TD><TD>$dir[2]</TD><TD>$dir[3]</TD></TR>\n";
  }
  print "</TABLE>\n";
-  print "<P>To add experiment, edit setup file on web server";
+  print "<P>To add experiment, edit setup in web directory";
 }

 function overview() {
@ -29,10 +31,14 @@ function overview() {

  print "<form action=\"\" method=get>\n";
  output_state_for_form();
+
+  // count how many analyses there are for each test set
  while (list($id,$info) = each($experiment)) {
    reset($evalset);
    while (list($set,$dummy) = each($evalset)) {
      $analysis = "$dir/evaluation/$set.analysis.$id";
+      $report_info = "$dir/steps/$id/REPORTING_report.$id.INFO";
+      // does the analysis file exist?
      if (file_exists($analysis)) {
 	if (!array_key_exists($set,$has_analysis)) { 
 	  $has_analysis[$set] = 0;
@ -117,7 +123,7 @@ function overview() {
 	      list($score) = sscanf($info->result[$set],"%f%s");
 	      if ($score > 0) {
 	        print "score[$id][\"$set\"] = $score;\n";
-		if ($score > $best[$set]) {
+		if (!array_key_exists($set,$best) || $score > $best[$set]) {
 		    $best[$set] = $score;
 		}
 	      }
@ -303,8 +309,8 @@ function output_score($id,$info) {
    if ($has_analysis && array_key_exists($set,$has_analysis)) {
      print "<td align=center>";
      global $dir;
-      $analysis = "$dir/evaluation/$set.analysis.$id";
-      if (file_exists($analysis)) {
+      $analysis = get_analysis_version($dir,$set,$id);
+      if ($analysis["basic"]) {
        print "<a href=\"?analysis=show&setup=$setup&set=$set&id=$id\">&#x24B6;</a> <input type=checkbox name=analysis-$id-$set value=1>";
      }
      print "</td>";