remove zmert and cmert

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3397 1f5c12ca-751b-0410-a591-d2e778427230
2024-11-13 00:59:02 +03:00 · 2010-08-10 14:28:03 +00:00 · 2010-08-10 14:28:03 +00:00 · 321f528ff5
commit 321f528ff5
parent 8616a2bdee
38 changed files with 1 additions and 4295 deletions
--- a/scripts/Makefile
+++ b/scripts/Makefile
@ -26,7 +26,7 @@ RELEASEDIR=$(TARGETDIR)/scripts-$(TS)

 all: compile

-SUBDIRS=cmert-0.5 phrase-extract symal mbr lexical-reordering
+SUBDIRS=phrase-extract symal mbr lexical-reordering
 SUBDIRS_CLEAN=$(SUBDIRS) memscore

 compile: compile-memscore
--- a/scripts/released-files
+++ b/scripts/released-files
@ -63,23 +63,6 @@ training/absolutize_moses_model.pl
 training/build-generation-table.perl
 training/clean-corpus-n.perl
 training/clone_moses_model.pl
-training/cmert-0.5/bleu.py
-training/cmert-0.5/dataset.py
-training/cmert-0.5/log.py
-training/cmert-0.5/mert
-training/cmert-0.5/enhanced-mert
-training/cmert-0.5/reduce-field.pl
-training/cmert-0.5/extend-field.pl
-training/cmert-0.5/python/psyco/classes.py
-training/cmert-0.5/python/psyco/core.py
-training/cmert-0.5/python/psyco/__init__.py
-training/cmert-0.5/python/psyco/kdictproxy.py
-training/cmert-0.5/python/psyco/logger.py
-training/cmert-0.5/python/psyco/profiler.py
-training/cmert-0.5/python/psyco/_psyco.so
-training/cmert-0.5/python/psyco/support.py
-training/cmert-0.5/README
-training/cmert-0.5/score-nbest.py
 training/mbr/mbr
 training/filter-model-given-input.pl
 training/filter-rule-table.py
@ -87,7 +70,6 @@ training/lexical-reordering/score
 training/memscore/memscore
 training/zmert-moses.pl
 training/mert-moses.pl
-training/mert-moses-new.pl
 training/phrase-extract/extract
 training/phrase-extract/extract-rules
 training/phrase-extract/score
--- a/scripts/released-filesWIN32
+++ b/scripts/released-filesWIN32
@ -12,21 +12,6 @@ training/absolutize_moses_model.pl
 training/build-generation-table.perl
 training/clean-corpus-n.perl
 training/clone_moses_model.pl
-training/cmert-0.5/bleu.py
-training/cmert-0.5/dataset.py
-training/cmert-0.5/log.py
-training/cmert-0.5/mert.exe
-training/cmert-0.5/enhanced-mert
-training/cmert-0.5/python/psyco/classes.py
-training/cmert-0.5/python/psyco/core.py
-training/cmert-0.5/python/psyco/__init__.py
-training/cmert-0.5/python/psyco/kdictproxy.py
-training/cmert-0.5/python/psyco/logger.py
-training/cmert-0.5/python/psyco/profiler.py
-training/cmert-0.5/python/psyco/_psyco.so
-training/cmert-0.5/python/psyco/support.py
-training/cmert-0.5/README
-training/cmert-0.5/score-nbest.py
 training/combine_factors.pl
 training/filter-model-given-input.pl
 training/lexical-reordering/score.exe
--- a/scripts/training/cmert-0.5/Makefile
+++ b/scripts/training/cmert-0.5/Makefile
@ -1,15 +0,0 @@
-OBJS=mert.o data.o point.o score.o
-CFLAGS=-O3
-LDFLAGS=
-LDLIBS=-lm 
-
-all: mert
-
-clean: 
-	rm -f *.o
-
-mert: $(OBJS)
-	$(CXX) $(OBJS) $(LDLIBS) -o $@
-
-mert_p: $(OBJS)
-	$(CXX) $(LDFLAGS) $(OBJS) $(LDLIBS) -o $@
--- a/scripts/training/cmert-0.5/README
+++ b/scripts/training/cmert-0.5/README
@ -1,10 +0,0 @@
-CMERT 0.5
-5 Nov 2005
-Copyright (c) 2005 David Chiang. All rights reserved (for now).
-
-Minimalist installation instructions:
-
- make
- set #! lines and sys.path lines in Python scripts
- see run-cmert for example
-
--- a/scripts/training/cmert-0.5/bleu.py
+++ b/scripts/training/cmert-0.5/bleu.py
@ -1,179 +0,0 @@
-#!/usr/bin/python
-
-# $Id$
-
-'''Provides:
-
-cook_refs(refs, n=4): Transform a list of reference sentences as strings into a form usable by cook_test().
-cook_test(test, refs, n=4): Transform a test sentence as a string (together with the cooked reference sentences) into a form usable by score_cooked().
-score_cooked(alltest, n=4): Score a list of cooked test sentences.
-
-score_set(s, testid, refids, n=4): Interface with dataset.py; calculate BLEU score of testid against refids.
-
-The reason for breaking the BLEU computation into three phases cook_refs(), cook_test(), and score_cooked() is to allow the caller to calculate BLEU scores for multiple test sets as efficiently as possible.
-'''
-
-import optparse
-import sys, math, re, xml.sax.saxutils
-sys.path.append('/fs/clip-mteval/Programs/hiero')
-import dataset
-import log
-
-# Added to bypass NIST-style pre-processing of hyp and ref files -- wade
-nonorm = 0
-
-preserve_case = False
-eff_ref_len = "shortest"
-
-normalize1 = [
-    ('<skipped>', ''),         # strip "skipped" tags
-    (r'-\n', ''),              # strip end-of-line hyphenation and join lines
-    (r'\n', ' '),              # join lines
-#    (r'(\d)\s+(?=\d)', r'\1'), # join digits
-]
-normalize1 = [(re.compile(pattern), replace) for (pattern, replace) in normalize1]
-
-normalize2 = [
-    (r'([\{-\~\[-\` -\&\(-\+\:-\@\/])',r' \1 '), # tokenize punctuation. apostrophe is missing
-    (r'([^0-9])([\.,])',r'\1 \2 '),              # tokenize period and comma unless preceded by a digit
-    (r'([\.,])([^0-9])',r' \1 \2'),              # tokenize period and comma unless followed by a digit
-    (r'([0-9])(-)',r'\1 \2 ')                    # tokenize dash when preceded by a digit
-]
-normalize2 = [(re.compile(pattern), replace) for (pattern, replace) in normalize2]
-
-def normalize(s):
-    '''Normalize and tokenize text. This is lifted from NIST mteval-v11a.pl.'''
-    # Added to bypass NIST-style pre-processing of hyp and ref files -- wade
-    if (nonorm):
-        return s.split()
-    if type(s) is not str:
-        s = " ".join(s)
-    # language-independent part:
-    for (pattern, replace) in normalize1:
-        s = re.sub(pattern, replace, s)
-    s = xml.sax.saxutils.unescape(s, {'&quot;':'"'})
-    # language-dependent part (assuming Western languages):
-    s = " %s " % s
-    if not preserve_case:
-        s = s.lower()         # this might not be identical to the original
-    for (pattern, replace) in normalize2:
-        s = re.sub(pattern, replace, s)
-    return s.split()
-
-def count_ngrams(words, n=4):
-    counts = {}
-    for k in xrange(1,n+1):
-        for i in xrange(len(words)-k+1):
-            ngram = tuple(words[i:i+k])
-            counts[ngram] = counts.get(ngram, 0)+1
-    return counts
-
-def cook_refs(refs, n=4):
-    '''Takes a list of reference sentences for a single segment
-    and returns an object that encapsulates everything that BLEU
-    needs to know about them.'''
-    
-    refs = [normalize(ref) for ref in refs]
-    maxcounts = {}
-    for ref in refs:
-        counts = count_ngrams(ref, n)
-        for (ngram,count) in counts.iteritems():
-            maxcounts[ngram] = max(maxcounts.get(ngram,0), count)
-    return ([len(ref) for ref in refs], maxcounts)
-
-def cook_test(test, (reflens, refmaxcounts), n=4):
-    '''Takes a test sentence and returns an object that
-    encapsulates everything that BLEU needs to know about it.'''
-    
-    test = normalize(test)
-    result = {}
-    result["testlen"] = len(test)
-
-    # Calculate effective reference sentence length.
-    
-    if eff_ref_len == "shortest":
-        result["reflen"] = min(reflens)
-    elif eff_ref_len == "average":
-        result["reflen"] = float(sum(reflens))/len(reflens)
-    elif eff_ref_len == "closest":
-        min_diff = None
-        for reflen in reflens:
-            if min_diff is None or abs(reflen-len(test)) < min_diff:
-                min_diff = abs(reflen-len(test))
-                result['reflen'] = reflen
-
-    result["guess"] = [max(len(test)-k+1,0) for k in xrange(1,n+1)]
-
-    result['correct'] = [0]*n
-    counts = count_ngrams(test, n)
-    for (ngram, count) in counts.iteritems():
-        result["correct"][len(ngram)-1] += min(refmaxcounts.get(ngram,0), count)
-
-    return result
-
-def score_cooked(allcomps, n=4):
-    totalcomps = {'testlen':0, 'reflen':0, 'guess':[0]*n, 'correct':[0]*n}
-    for comps in allcomps:
-        for key in ['testlen','reflen']:
-            totalcomps[key] += comps[key]
-        for key in ['guess','correct']:
-            for k in xrange(n):
-                totalcomps[key][k] += comps[key][k]
-    logbleu = 0.0
-    for k in xrange(n):
-        if totalcomps['correct'][k] == 0:
-            return 0.0
-        log.write("%d-grams: %f\n" % (k,float(totalcomps['correct'][k])/totalcomps['guess'][k]))
-        logbleu += math.log(totalcomps['correct'][k])-math.log(totalcomps['guess'][k])
-    logbleu /= float(n)
-    log.write("Effective reference length: %d test length: %d\n" % (totalcomps['reflen'], totalcomps['testlen']))
-    logbleu += min(0,1-float(totalcomps['reflen'])/totalcomps['testlen'])
-    return math.exp(logbleu)
-
-def score_set(set, testid, refids, n=4):
-    alltest = []
-    for seg in set.segs():
-        try:
-            test = seg.versions[testid].words
-        except KeyError:
-            log.write("Warning: missing test sentence\n")
-            continue
-        try:
-            refs = [seg.versions[refid].words for refid in refids]
-        except KeyError:
-            log.write("Warning: missing reference sentence, %s\n" % seg.id)
-        refs = cook_refs(refs, n)
-        alltest.append(cook_test(test, refs, n))
-    log.write("%d sentences\n" % len(alltest))
-    return score_cooked(alltest, n)
-
-if __name__ == "__main__":
-    import psyco
-    psyco.full()
-
-    import getopt
-    raw_test = False
-    (opts,args) = getopt.getopt(sys.argv[1:], "rc", [])
-    for (opt,parm) in opts:
-        if opt == "-r":
-            raw_test = True
-        elif opt == "-c":
-            preserve_case = True
-    
-    s = dataset.Dataset()
-    if args[0] == '-':
-        infile = sys.stdin
-    else:
-        infile = args[0]
-    if raw_test:
-        (root, testids) = s.read_raw(infile, docid='whatever', sysid='testsys')
-    else:
-        (root, testids) = s.read(infile)
-    print "Test systems: %s" % ", ".join(testids)
-    (root, refids) = s.read(args[1])
-    print "Reference systems: %s" % ", ".join(refids)
-    
-    for testid in testids:
-        print "BLEU score: ", score_set(s, testid, refids)
-            
-    
--- a/scripts/training/cmert-0.5/data.c
+++ b/scripts/training/cmert-0.5/data.c
@ -1,93 +0,0 @@
-// $Id$
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "data.h"
-#include "point.h"
-
-extern int comps_n;
-
-data_t *read_data(void) {
-  FILE *fp;
-  static char buf[1000];
-  char *tok, *s;
-  int field;
-  int sent_i, cand_i, cands_n;
-  int total_cands_n;
-  data_t *data;
-  candidate_t *cands;
-  
-  data = malloc(sizeof(data_t));
-
-  data->sents_max = 100;
-  data->sents_n = 0;
-  data->cands_n = malloc(data->sents_max*sizeof(int));
-
-  total_cands_n = 0;
-
-  fp = fopen("cands.opt", "r");
-  while (fgets(buf, sizeof(buf), fp) != NULL) {
-    // should we check to make sure every sentence is accounted for?
-    sscanf(buf, "%d %d", &sent_i, &cands_n);
-    if (sent_i >= data->sents_n)
-      data->sents_n = sent_i+1;
-    if (sent_i >= data->sents_max) {
-      data->sents_max = (sent_i+1)*2;
-      data->cands_n = realloc(data->cands_n, data->sents_max*sizeof(int));
-    }
-    data->cands_n[sent_i] = cands_n;
-    total_cands_n += cands_n;
-  }
-  fclose(fp);
-
-  /* create master array for candidates and then set data->sents
-     to point into it */
-  cands = malloc(total_cands_n * sizeof(candidate_t));
-  data->sents = malloc(data->sents_n * sizeof(candidate_t *));
-  total_cands_n = 0;
-  for (sent_i=0; sent_i<data->sents_n; sent_i++) {
-    data->sents[sent_i] = cands+total_cands_n;
-    total_cands_n += data->cands_n[sent_i];
-  }
-
-
-  cand_i = 0;
-  fp = fopen("feats.opt", "r");
-  while (fgets(buf, sizeof(buf), fp) != NULL) {
-    cands[cand_i].features = malloc(dim*sizeof(float));
-    cands[cand_i].comps = malloc(comps_n*sizeof(int));
-
-    field = 0;
-    s = buf;
-    while ((tok = strsep(&s, " \t\n")) != NULL) {
-      if (!*tok) // empty token
-	continue;
-      // read dim floats and then comps_n ints
-      if (field < dim)
-	cands[cand_i].features[field] = -strtod(tok, NULL); // Venugopal format uses costs
-      else if (field < dim+comps_n)
-	cands[cand_i].comps[field-dim] = strtol(tok, NULL, 10);
-      else {
-	fprintf(stderr, "read_data(): too many fields in line in feats.opt\n");
-	return NULL;
-      }
-      field++;
-    }
-    if (field != dim+comps_n) {
-      fprintf(stderr, "read_data(): wrong number of fields in line in feats.opt - expected %d + %d and found %d on line %d\n",dim,comps_n,field,cand_i);
-      return NULL;
-    }
-    cand_i++;
-  }
-
-  if (cand_i != total_cands_n) {
-    fprintf(stderr, "read_data(): wrong number of lines in cands.opt\n");
-    return NULL;
-  }
-
-  fclose(fp);
-
-  return data;
-}
-
--- a/scripts/training/cmert-0.5/data.h
+++ b/scripts/training/cmert-0.5/data.h
@ -1,18 +0,0 @@
-// $Id$
-#ifndef DATA_H
-#define DATA_H
-
-typedef struct {
-  float *features;
-  int *comps;
-  float m, b; // slope and intercept, used as scratch space
-} candidate_t;
-
-typedef struct {
-  candidate_t **sents;
-  int sents_n, sents_max, *cands_n;
-} data_t;
-
-data_t *read_data(void);
-
-#endif
--- a/scripts/training/cmert-0.5/dataset.py
+++ b/scripts/training/cmert-0.5/dataset.py
@ -1,392 +0,0 @@
-#!/usr/bin/python2.3
-
-# $Id$
-'''Decoder interface:
-
-Dataset.process() expects a function, which in turn takes a Sentence as input
-and produces a Sentence or list of Sentences as output.
-
-The input Sentence will be marked with the <seg> tag it was found in
-the input file with.
-
-The output Sentences should be marked with <seg> tags if they are to
-be marked as such in the output file.
-'''
-
-import sys, sgmllib, xml.sax.saxutils, log
-
-def attrs_to_str(d):
-    if len(d) == 0:
-        return ""
-    l = [""]+["%s=%s" % (name, xml.sax.saxutils.quoteattr(value)) for (name, value) in d]
-    return " ".join(l)
-
-def attrs_to_dict(a):
-    d = {}
-    for (name, value) in a:
-	if d.has_key(name.lower()):
-	    raise ValueError, "duplicate attribute names"
-	d[name.lower()] = value
-    return d
-
-def strip_newlines(s):
-    return " ".join(s.split())
-
-class Sentence(object):
-    def __init__(self, words=None, meta=None):
-        if words is not None:
-            self.words = list(words)
-        else:
-            self.words = []
-        if meta is not None:
-            self.meta = meta
-        else:
-            self.meta = []
-
-    def mark(self, tag, attrs):
-        self.meta.append((tag, attrs, 0, len(self.words)))
-
-    def getmark(self):
-        if len(self.meta) > 0:
-            (tag, attrs, i, j) = self.meta[-1]
-            if i == 0 and j == len(self.words):
-                return (tag, attrs)
-            else:
-                return None
-        else:
-            return None
-
-    def unmark(self):
-        mark = self.getmark()
-        if mark is not None:
-            self.meta = self.meta[:-1]
-        return mark
-
-    def __cmp__(self, other):
-        return cmp((self.words, self.meta), (other.words, other.meta))
-
-    def __str__(self):
-        def cmp_spans((tag1,attr1,i1,j1),(tag2,attr2,i2,j2)):
-            if i1==i2<=j1==j2:
-                return 0
-            elif i2<=i1<=j1<=j2:
-                return -1
-            elif i1<=i2<=j2<=j1:
-                return 1
-            else:
-                return cmp((i1,j1),(i2,j2)) # don't care
-        # this guarantees that equal spans will come out nested
-        # we want the later spans to be outer
-        # this relies on stable sort
-        open = [[] for i in xrange(len(self.words)+1)]
-        # there seems to be a bug still with empty spans
-        empty = [[] for i in xrange(len(self.words)+1)]
-        close = [[] for j in xrange(len(self.words)+1)]
-        for (tag,attrs,i,j) in sorted(self.meta, cmp=cmp_spans):
-            if i == j:
-                # do we want these to nest?
-                empty[i].append("<%s%s/>" % (tag, attrs_to_str(attrs)))
-            open[i].append("<%s%s>" % (tag, attrs_to_str(attrs)))
-            close[j].append("</%s>" % tag)
-
-        result = []
-        if len(empty[0]) > 0:
-            result.extend(empty[0])
-        for i in xrange(len(self.words)):
-            if i > 0:
-                result.append(" ")
-            result.extend(reversed(open[i]))
-            result.append(self.words[i])
-            result.extend(close[i+1])
-            if len(empty[i+1]) > 0:
-                result.extend(empty[i+1])
-
-        return "".join(result)
-
-    def __add__(self, other):
-        if type(other) in (list, tuple):
-            return Sentence(self.words + list(other), self.meta)
-        else:
-            othermeta = [(tag, attrs, i+len(self.words), j+len(self.words)) for (tag, attrs, i, j) in other.meta]
-            return Sentence(self.words + other.words, self.meta+othermeta)
-
-def read_raw(f):
-    """Read a raw file into a list of Sentences."""
-    if type(f) is str:
-        f = file(f, "r")
-    inputs = []
-    i = 0
-    for line in f:
-        sent = process_sgml_line(line, i)
-        sent.mark('seg', [('id',str(i))])
-        inputs.append(sent)
-        i += 1
-    return inputs
-
-class Dataset(object):
-    def __init__(self, id=None):
-	self.id = id
-	self.docs = {}
-	self.sysids = []
-	self.langs = {}
-
-    def read(self, f):
-        '''Read a file into the dataset. Returns (root, sysids)'''
-        if type(f) is str:
-            f = file(f, "r")
-        p = DatasetParser(self)
-        p.feed(f.read())
-        p.close()
-        return (p.root,p.sysids)
-
-    def read_raw(self, f, docid, setid=None, sysid=None, lang=None):
-        """Read a raw file into the dataset."""
-        if setid is not None:
-            if self.id is not None and self.id != setid:
-                raise ValueError, "Set ID does not match"
-            else:
-                self.id = setid
-        if sysid not in self.sysids:
-            self.sysids.append(sysid)
-            self.langs[sysid] = lang
-        if type(f) is str:
-            f = file(f, "r")
-        doc = self.docs.setdefault(docid, Document(docid))
-        i = 0
-        for line in f:
-            if len(doc.segs)-1 < i:
-                doc.segs.append(Segment(i))
-            if doc.segs[i].versions.has_key(sysid):
-                raise ValueError, "multiple versions from same system"
-            doc.segs[i].versions[sysid] = process_sgml_line(line, i)
-            doc.segs[i].versions[sysid].mark('seg', [('id',str(i))])
-            i += 1
-        return (None, [sysid])
-
-    def write(self, f, tag, sysids=None):
-        if type(f) is str:
-            f = file(f, "w")
-        f.write(self.string(tag, sysids))
-
-    def write_raw(self, f, sysid=None):
-        if type(f) is str:
-            f = file(f, "w")
-        for seg in self.segs():
-            f.write(" ".join(seg.versions[sysid].words))
-            f.write("\n")
-
-    def string(self, tag, sysids=None):
-	if sysids is None:
-	    sysids = self.sysids
-	elif type(sysids) is str:
-	    sysids = [sysids]
-	attrs = [('setid', self.id)]
-	if self.langs.has_key(None):
-	    attrs.append(('srclang', self.langs[None]))
-	trglangs = [self.langs[sysid] for sysid in sysids if sysid is not None]
-	for lang in trglangs[1:]:
-	    if lang != trglangs[0]:
-		raise ValueError, "Inconsistent target language"
-	if len(trglangs) >= 1:
-	    attrs.append(('trglang', trglangs[0]))
-
-        return "<%s%s>\n%s</%s>\n" % (tag, 
-				      attrs_to_str(attrs),
-				      "".join([doc.string(sysid) for doc in self.docs.values() for sysid in sysids]),
-                                      tag)
-    
-    def process(self, processor, sysid, lang, srcsysid=None):
-	if sysid in self.sysids:
-	    raise ValueError, "sysid already in use"
-	else:
-	    self.sysids.append(sysid)
-	    self.langs[sysid] = lang
-	for seg in self.segs():
-            if log.level >= 2:
-                sys.stderr.write("Input: %s\n" % str(seg.versions[srcsysid]))
-            seg.versions[sysid] = processor(seg.versions[srcsysid])
-            if log.level >= 2:
-                if type(seg.versions[sysid]) is not list:
-                    sys.stderr.write("Output: %s\n" % str(seg.versions[sysid]))
-                else:
-                    sys.stderr.write("Output (1st): %s\n" % str(seg.versions[sysid][0]))
-                        
-    def segs(self):
-        for doc in self.docs.values():
-            for seg in doc.segs:
-                yield seg
-
-class Document(object):
-    def __init__(self, id):
-	self.id = id
-	self.segs = []
-
-    def string(self, sysid):
-	attrs = [('docid', self.id)]
-	if sysid is not None:
-	    attrs.append(('sysid', sysid))
-	return "<doc%s>\n%s</doc>\n" % (attrs_to_str(attrs),
-					"".join([seg.string(sysid) for seg in self.segs]))
-	
-class Segment(object):
-    def __init__(self, id=None):
-	self.id = id
-	self.versions = {}
-
-    def string(self, sysid):
-        v = self.versions[sysid]
-        if type(v) is not list:
-            v = [v]
-        output = []
-        for i in xrange(len(v)):
-            output.append(str(v[i]))
-            output.append('\n')
-        return "".join(output)
-
-def process_sgml_line(line, id=None):
-    p = DatasetParser(None)
-    p.pos = 0
-    p.words = []
-    p.meta = []
-    p.feed(line)
-    p.close()
-    sent = Sentence(p.words, p.meta)
-    return sent
-
-class DatasetParser(sgmllib.SGMLParser):
-    def __init__(self, set):
-        sgmllib.SGMLParser.__init__(self)
-	self.words = None
-	self.sysids = []
-	self.set = set
-        self.mystack = []
-
-    def handle_starttag(self, tag, method, attrs):
-        thing = method(attrs)
-        self.mystack.append(thing)
-
-    def handle_endtag(self, tag, method):
-        thing = self.mystack.pop()
-        method(thing)
-
-    def unknown_starttag(self, tag, attrs):
-        thing = self.start(tag, attrs)
-        self.mystack.append(thing)
-
-    def unknown_endtag(self, tag):
-        thing = self.mystack.pop()
-        self.end(tag, thing)
-        
-    def start_srcset(self, attrs):
-	attrs = attrs_to_dict(attrs)
-	if self.set.id is None:
-	    self.set.id = attrs['setid']
-	if 0 and self.set.id != attrs['setid']:
-	    raise ValueError, "Set ID does not match"
-	self.lang = attrs['srclang']
-	self.root = 'srcset'
-        return None
-
-    def start_refset(self, attrs):
-	attrs = attrs_to_dict(attrs)
-	if self.set.id is None:
-	    self.set.id = attrs['setid']
-	if 0 and self.set.id != attrs['setid']:
-	    raise ValueError, "Set ID does not match"
-	if self.set.langs.setdefault(None, attrs['srclang']) != attrs['srclang']:
-	    raise ValueError, "Source language does not match"
-	self.lang = attrs['trglang']
-	self.root = 'refset'
-        return None
-
-    def start_tstset(self, attrs):
-	attrs = attrs_to_dict(attrs)
-	if self.set.id is None:
-	    self.set.id = attrs['setid']
-	if 0 and self.set.id != attrs['setid']:
-	    raise ValueError, "Set ID does not match"
-	if 0 and self.set.langs.setdefault(None, attrs['srclang']) != attrs['srclang']:
-	    raise ValueError, "Source language does not match"
-	self.lang = attrs['trglang']
-	self.root = 'tstset'
-        return None
-
-    def end_srcset(self, thing):
-        for sysid in self.sysids:
-            if sysid not in self.set.sysids:
-                self.set.sysids.append(sysid)
-                self.set.langs[sysid] = self.lang
-    end_refset = end_tstset = end_srcset
-
-    def start_doc(self, attrs):
-	attrs = attrs_to_dict(attrs)
-	self.doc = self.set.docs.setdefault(attrs['docid'], Document(attrs['docid']))
-	self.seg_i = 0
-	if self.root == 'srcset':
-	    self.sysid = None
-	else:
-	    self.sysid = attrs['sysid']
-        if self.sysid not in self.sysids:
-            self.sysids.append(self.sysid)
-        return None
-
-    def end_doc(self, thing):
-	pass
-
-    def start_seg(self, attrs):
-        thing = ('seg', attrs, 0, None)
-	attrs = attrs_to_dict(attrs)
-	if len(self.doc.segs)-1 < self.seg_i:
-	    self.doc.segs.append(Segment(attrs.get('id', None)))
-	self.seg = self.doc.segs[self.seg_i]
-        if 0 and self.seg.id is not None and attrs.has_key('id') and self.seg.id != attrs['id']:
-	    raise ValueError, "segment ids do not match (%s != %s)" % (str(self.seg.id), str(attrs.get('id', None)))
-	if self.seg.versions.has_key(self.sysid):
-	    raise ValueError, "multiple versions from same system"
-	self.pos = 0
-        self.words = []
-        self.meta = []
-        return thing
-
-    def end_seg(self, thing):
-        (tag, attrs, i, j) = thing
-        self.meta.append((tag, attrs, i, self.pos))
-	self.seg_i += 1
-	self.seg.versions[self.sysid] = Sentence(self.words, self.meta)
-	self.words = None
-
-    """# Special case for start and end of sentence
-    def start_s(self, attrs):
-        if self.words is not None:
-            self.pos += 1
-            self.words.append('<s>')
-        return None
-
-    def end_s(self, thing):
-        if self.words is not None:
-            self.pos += 1
-            self.words.append('</s>')"""
-
-    def start(self, tag, attrs):
-        if self.words is not None:
-            return (tag, attrs, self.pos, None)
-        else:
-            return None
-
-    def end(self, tag, thing):
-        if self.words is not None:
-            (tag, attrs, i, j) = thing
-            self.meta.append((tag, attrs, i, self.pos))
-
-    def handle_data(self, s):
-        if self.words is not None:
-            words = s.split()
-            self.pos += len(words)
-	    self.words.extend(words)
-
-if __name__ == "__main__":
-    s = Dataset()
-
-    for filename in sys.argv[1:]:
-        s.read_raw(filename, 'whatever', 'whatever', filename, 'English')
-    s.write(sys.stdout, 'tstset')
--- a/scripts/training/cmert-0.5/enhanced-mert
+++ b/scripts/training/cmert-0.5/enhanced-mert
@ -1,119 +0,0 @@
-#! /bin/bash
-
-PrintUsageAndDie(){
-echo "USAGE: enhanced-cmert.sh -d size [-active] [-help]"
-echo "       perform cmert on a subset of the feature scores"
-echo "       the ratios among not activated weights are not modified"
-echo "       Parameters (*=optional):"
-echo "       -d: the number of original features"
-echo "       -rootdir: the scripts root dir"
-echo "       -activate (*): comma-separated (or blank-separated) list of the indexes of active features"
-echo "                      if not set, all features are optimized"
-echo "       -debug(*): debug information"
-echo "       -help(*): print his help"
-echo
-echo "Example: see examples in the directory example which are created with the script readme.txt"
-exit
-}
-
-normalize_weights(){
-perl -ne '{$tot=0;chomp;split;grep($tot+=($_>0)?$_:-$_,@_); grep($_/=$tot,@_); for ($i=0;$i<scalar(@_);$i++){printf STDOUT "%.6f ",$_[$i];};printf STDOUT "\n";}'
-}
-
-activeflag=0;
-help=0
-debug=""
-
-if [ $# -lt 1 ] ; then PrintUsageAndDie ; fi
-
-while [ $# -gt 0 ]
-do
-   case $1 in
-      -help) help=1 ; shift 1 ; ;;
-      -d) size=$2 ; shift 2 ; ;;
-      -rootdir) SCRIPTS_ROOTDIR=$2 ; shift 2 ; ;;
-      -debug) debug="-debug"; shift 1 ; ;;
-      -activate) activeflag=1 ; activefields=$2 ; shift 2 ; ;;
-      *) shift $# ; ;;
-   esac
-done
-
-if [ $help == 1 ] ; then PrintUsageAndDie ; fi
-
-# call the basic mert command
-if [ $activeflag == 0 ] ; then
-$SCRIPTS_ROOTDIR/training/cmert-0.5/mert -d $size
-exit
-fi
-
-# else 
-if [ $debug ] ; then echo "names of active fields: $activefields" ; fi
-
-#get indexes of active fields from file "names.txt
-oldname="__FALSE_NAME__"
-name="__FALSE_NAME__"
-separator="_"
-
-i=1 lastj=1
-for name in `cat names.txt` ; do
-if [ $name == $oldname ] ; then i=$(( i + 1 )) ; else i=1 ; fi
-arrayname[$lastj]=$name
-arrayname2[$lastj]=$name$separator$i
-lastj=$(( lastj + 1 ))
-oldname=$name
-done
-
-#map feature names into feature indexes
-out=""
-for name in `echo $activefields | tr ',' ' ' ` ; do
-match=0; j=1
-while [ $j -lt $lastj ] ; do
-
-if  [ ${arrayname[$j]} == $name -o ${arrayname2[$j]} == "$name" ] ; then
-match=$j
-if [ $out ] ; then out="$out,$j" ; else out="$j" ; fi
-fi
-
-j=$(( j + 1 ))
-done
-
-if [ $match -eq 0 ] ; then echo "feature $name you are asking for is not present"  ; fi
-
-done
-
-activefields=`echo $out | tr ',' '\012' | sort -nu | tr '\012' ',' | perl -pe 's/\,$//' `
-
-if [ $debug ] ; then echo "indexes of active fields: $activefields" ; fi
-
-#filter active fields, perform cmert and ...
-tmpdir=tmp$$
-mkdir -p $tmpdir
-
-for file in feats.opt init.opt ; do
-mv $file $tmpdir
-done
-
-cat $tmpdir/init.opt | tail -1 > $tmpdir/weight.opt
-
-cat $tmpdir/init.opt | perl $SCRIPTS_ROOTDIR/training/cmert-0.5/reduce-field.pl $debug -weight $tmpdir/weight.opt -d $size -activate $activefields | perl -pe 's/^\S+ /1 /' > init.opt
-cat $tmpdir/feats.opt | perl $SCRIPTS_ROOTDIR/training/cmert-0.5/reduce-field.pl $debug -weight $tmpdir/weight.opt -d $size -activate $activefields > feats.opt
-
-active=`cat init.opt | head -1 | awk '{print NF}'`
- 
-$SCRIPTS_ROOTDIR/training/cmert-0.5/mert -d $active 2> reduced_cmert.log 
-
-for file in feats.opt init.opt; do
-mv $file reduced_$file
-mv $tmpdir/$file $file
-done
-
-mv weights.txt reduced_weights.txt
-cat reduced_weights.txt | perl $SCRIPTS_ROOTDIR/training/cmert-0.5/extend-field.pl $debug -weight $tmpdir/weight.opt -d $size -activate $activefields | normalize_weights > weights.txt
-rm -r $tmpdir
-
-bestpointline=`echo "Best point:"`
-bestpointline="$bestpointline "`cat weights.txt`
-bestpointline="$bestpointline => "`cat reduced_cmert.log | grep -i "Best point:" | awk '{print $NF}'`
-echo $bestpointline > /dev/stderr
-
-exit
--- a/scripts/training/cmert-0.5/example/cands.opt
+++ b/scripts/training/cmert-0.5/example/cands.opt
@ -1 +0,0 @@
-0 10
--- a/scripts/training/cmert-0.5/example/feats.opt
+++ b/scripts/training/cmert-0.5/example/feats.opt
@ -1,10 +0,0 @@
-4.0 383.916 60.6749 113.308 28.7833 94.443 -27.9971 66.0 49 66 27 65 16 64 10 63 67
-6.0 370.709 67.0555 105.849 37.0838 85.7675 -29.9969 64.0 49 64 29 63 17 62 10 61 67
-10.0 415.511 57.7613 97.1628 27.7191 83.3125 -28.997 68.0 54 68 30 67 19 66 13 65 67
-6.0 412.823 59.5607 99.215 28.2344 82.0559 -28.997 67.0 53 67 32 66 20 65 13 64 67
-4.0 422.048 56.6241 97.204 28.6241 80.8079 -28.997 67.0 52 67 30 66 19 65 13 64 67
-4.0 392.685 60.6979 105.33 28.4244 90.094 -28.997 66.0 51 66 29 65 17 64 11 63 67
-6.0 365.877 69.0651 108.001 37.33 83.4477 -31.9967 63.0 49 63 29 62 17 61 10 60 67
-6.0 418.054 57.5832 97.2047 26.9759 83.6841 -29.9969 68.0 54 68 32 67 20 66 13 65 67
-6.0 375.021 64.0915 103.471 38.6084 84.3162 -29.9969 63.0 49 63 28 62 16 61 10 60 67
-6.0 364.308 71.1182 110.425 35.7858 82.8551 -30.9968 63.0 49 63 29 62 17 61 10 60 67
--- a/scripts/training/cmert-0.5/example/init.opt
+++ b/scripts/training/cmert-0.5/example/init.opt
@ -1,3 +0,0 @@
-0 0 0 0 0 0 -1 -1
-1 2 2 2 2 2 1 1
-1 1 0.3 0.2 0.2 0.3 0 0
--- a/scripts/training/cmert-0.5/example/names.txt
+++ b/scripts/training/cmert-0.5/example/names.txt
@ -1 +0,0 @@
-d lm tm tm tm tm tm w
--- a/scripts/training/cmert-0.5/example/readme.txt
+++ b/scripts/training/cmert-0.5/example/readme.txt
@ -1,12 +0,0 @@
-mkdir -p example1 
-../enhanced-mert -d 8 >& cmert.log
-mv cmert.log weights.txt example1
-
-mkdir -p example2 
-../enhanced-mert -d 8 -activate lm,tm_2,tm_5,w >& cmert.log
-mv cmert.log weights.txt reduced_* example2
-
-mkdir -p example3
-../enhanced-mert -d 8 -activate d,tm_1,tm_5 >& cmert.log
-mv cmert.log weights.txt reduced_* example3
-
--- a/scripts/training/cmert-0.5/extend-field.pl
+++ b/scripts/training/cmert-0.5/extend-field.pl
@ -1,80 +0,0 @@
-#! /usr/bin/perl
-
-sub PrintArgsAndDie () {
-    print stderr "USAGE: extend-field.pl [-h] \n";
-    print stderr "This scripts extend the number of active fields for the mert procedure. (See the dual script reduce-field.pl)\n";
-    exit(1);
-}
-
-my $weightfile="";
-my $size=-1;
-my $activefields="";
-
-while (@ARGV){
-    if ($ARGV[0] eq "-h"){
-        &PrintArgsAndDie();
-    }
-    if ($ARGV[0] eq "-debug"){
-        $debug=1; 
-	shift(@ARGV);
-    }
-    if ($ARGV[0] eq "-weight"){
-        $weightfile=$ARGV[1];
-        shift(@ARGV); shift(@ARGV);
-    }
-    if ($ARGV[0] eq "-d"){
-        $size=$ARGV[1];
-        shift(@ARGV); shift(@ARGV);
-    }
-    if ($ARGV[0] eq "-activate"){
-        $activefields=$ARGV[1];
-        shift(@ARGV);   shift(@ARGV);
-    }
-
-}
-
-die "Cannot open/find weight file ($weightfile)\n" if ! -e $weightfile;
-
-my @weight=();
-open(IN,$weightfile);
-chomp($weight=<IN>);
-close(IN);
-push @weight,(0,split(/[ \t]+/,$weight));
-my @active=();
-my @invertedactive=();
-
-if ($activefields eq ""){
-	for (my $i=1; $i<=$size; $i++){	$active[$i]=1; };
-}else{
-	@active=split(/,/,$activefields);
-}
-
-for (my $i=0; $i<=$size; $i++){	$invertedactive[$i]=0; };
-for (my $i=0; $i<scalar(@active); $i++){	$invertedactive[$active[$i]]=1; };
-my $j=0;
-for (my $i=1; $i<=$size; $i++){	if (!$invertedactive[$i]){$notactive[$j]=$i; $j++}};
-
-if ($debug>0){
-	print STDERR "ORIGINAL SIZE: $size\n";
-	print STDERR "ORIGINAL WEIGHTS: @weight\n";
-	print STDERR "ORIGINAL ACTIVE: @active\n";
-	print STDERR "ORIGINAL NOTACTIVE: @notactive\n";
-	print STDERR "ORIGINAL INVERTEDACTIVE: @invertedactive\n";
-}
-
-while(chomp($_=<STDIN>)){
-	@field=split(/[ \t]+/,$_);
-
-	my $j=1;
-	for (my $i=1; $i<=$size; $i++){	
-		if ($invertedactive[$i]){
-			print STDOUT "$field[$j] ";
-			print STDERR "j:$j i:$i -> $field[$j]\n" if $debug>0;
-			$j++;
-		}else{
-			printf STDOUT "%.6f ",$field[0]*$weight[$i];
-			print STDERR "i:$i -> $field[0] $weight[$i]\n" if $debug>0;
-		}
-	};
-	print STDOUT "\n";
-}
--- a/scripts/training/cmert-0.5/log.py
+++ b/scripts/training/cmert-0.5/log.py
@ -1,19 +0,0 @@
-#!/usr/bin/python
-
-# $Id$
-import sys
-
-level = 1
-file = sys.stderr
-
-def writeln(s=""):
-    file.write("%s\n" % s)
-    file.flush()
-
-def write(s):
-    file.write(s)
-    file.flush()
-
-
-    
-    
--- a/scripts/training/cmert-0.5/makeinitopt
+++ b/scripts/training/cmert-0.5/makeinitopt
@ -1,27 +0,0 @@
-#!/usr/bin/perl -w
-
-if ($#ARGV != 2) {
-  die "usage: makeinitopt <ranges> <weightfile> <rangefile>"
-}
-$s = $ARGV[0];
-$woutput = $ARGV[1];
-$routput = $ARGV[2];
-open WOUT, ">$woutput" || die "couldn't open $woutput";
-open ROUT, ">$routput" || die "couldn't open $routput";
-
-@w = ();
-@lo = ();
-@hi = ();
-foreach $x (split(/;/, $s)) {
-  if ($x =~ /(.*),(-?[\d.]+)-(-?[\d.]+)/) {
-    push(@w, $1);
-    push(@lo, $2);
-    push(@hi, $3);
-  } else {
-    print STDERR "bad weight range: $x\n";
-  }
-}
-
-print WOUT join(" ", @w), "\n";
-print ROUT join(" ", @lo), "\n";
-print ROUT join(" ", @hi), "\n";
--- a/scripts/training/cmert-0.5/mert-driver
+++ b/scripts/training/cmert-0.5/mert-driver
@ -1,88 +0,0 @@
-#!/bin/sh
-
-WORKDIR=$1
-if [ ! -d $WORKDIR ]; then
-  mkdir -p $WORKDIR
-fi
-
-SRCFILE=$2
-REFPREFIX=$3
-REFFILES=$REFPREFIX[0-9]*
-NBEST=$4
-DECODER=$5
-DECODEROPTS=$6
-RANGES=$7
-START=$8
-
-#default pwdcmd is pwd
-#pwdcmd is pawd if exists
-PWDCMD="pwd"
-___PWDCMD=`which pawd | head -1 | awk '{print $1}'`
-if [ $___PWDCMD -a -e $___PWDCMD ] ; then PWDCMD=$___PWDCMD ; fi;
-
-RUNDIR=`$PWDCMD`
-
-makeinitopt "$RANGES" $WORKDIR/weights.txt $WORKDIR/ranges.txt
-DIM=`cat $WORKDIR/weights.txt | awk '{print NF; exit}'`
-echo $DIM dimensions
-
-PATH=/group/project/statmt/pkoehn/user/abhishek:/group/project/statmt/pkoehn/user/abhishek/cmert-0.5:$PATH
-export PATH
-
-date
-
-echo Reference sets: $REFFILES
-
-if [ "x$START" == "x" ]; then
-  START=1
-fi
-
-I=$START
-PREVLINECOUNT=0
-#$DECODEROPTS =~ s / \-f / -config /;
-#$DECODEROPTS =~ s/^\-f /-config /;
-filename=$WORKDIR/run$I.best$NBEST.out
-
-while true; do
-  echo Run decoder
-
-  WEIGHTS=`cat $WORKDIR/weights.txt`
-
-  ###Changes - AA 29/11/05
-  #echo "$DECODER $NBEST \"$WEIGHTS\" $WORKDIR/run$I \"$DECODEROPTS\" < $SRCFILE > $WORKDIR/run$I.nbest"
-  #$DECODER $NBEST \"$WEIGHTS\" $WORKDIR/run$I \"$DECODEROPTS\" < $SRCFILE > $WORKDIR/run$I.nbest
- 
-  echo "$DECODER $DECODEROPTS \"$WEIGHTS\" -n-best-list $filename $NBEST < $SRCFILE > $WORKDIR/run$I.nbest"
-  $DECODER $DECODEROPTS "$WEIGHTS" -n-best-list $filename $NBEST < $SRCFILE > $WORKDIR/run$I.nbest
-
-  echo Calculate BLEU component scores
-
-  sort -mn -t\| -k 1,1 $WORKDIR/run*.nbest | score-nbest.py $REFFILES $WORKDIR/
-
-  #LINECOUNT=`cat $WORKDIR/feats.opt | awk '{n++} END {print n}'`
-  LINECOUNT=`cat $WORKDIR/cands.opt | awk '{n += $2} END {print n}'`
-  echo $LINECOUNT accumulated translations
-  if [ $LINECOUNT -le $PREVLINECOUNT ]; then 
-    echo "Training finished"
-    date
-    break
-  fi
-
-  echo Optimize feature weights
-
-  cd $WORKDIR
-  cat ranges.txt weights.txt > init.opt
-  rm -f weights.txt
-  mert -d$DIM
-  cd $RUNDIR
-
-  if [ "x`cat $WORKDIR/weights.txt`" == "x" ]; then
-    echo Optimization failed
-    break
-  fi
-
-  I=`expr $I + 1`
-  PREVLINECOUNT=$LINECOUNT
-
-  date
-done
--- a/scripts/training/cmert-0.5/mert.c
+++ b/scripts/training/cmert-0.5/mert.c
@ -1,432 +0,0 @@
-// $Id$
-#include <stdlib.h>
-#include <unistd.h>
-#include <math.h>
-
-#include "data.h"
-#include "point.h"
-#include "score.h"
-
-int verbose = 2;
-
-float min_interval = 1e-3;
-
-typedef struct {
-  float x;
-  int cand;
-  int *delta_comps;
-} intersection_t;
-
-intersection_t *new_intersection(float x, int cand, int *comps1, int *comps2) {
-  intersection_t *inter;
-  int i;
-  inter = malloc(sizeof(intersection_t));
-  inter->x = x;
-  inter->cand = cand; // this is not used but sometimes it's handy
-  inter->delta_comps = malloc(comps_n * sizeof(int));
-  for (i=0; i<comps_n; i++)
-    inter->delta_comps[i] = comps1[i]-comps2[i];
-  return inter;
-}
-
-void intersection_delete(intersection_t *inter) {
-  free(inter->delta_comps);
-  free(inter);
-}
-
-int compare_intersections(intersection_t **i1, intersection_t **i2) {
-  if ((*i1)->x == (*i2)->x)
-    return 0;
-  else if ((*i1)->x < (*i2)->x)
-    return -1;
-  else
-    return 1;
-}
-
-float slow_bleu(data_t *data, point_t *point) {
-  int sent_i, cand_i, cand_n, i;
-  candidate_t *cands;
-  float p, best_p;
-  int best;
-  int *comps;
-  float score;
-  int ties, totalties;
-
-  comps = calloc(comps_n, sizeof(int));
-
-  totalties = 0;
-
-  for (sent_i = 0; sent_i < data->sents_n; sent_i++) {
-    cands = data->sents[sent_i];
-    cand_n = data->cands_n[sent_i];
-
-    ties = 0;
-
-    best = 0;
-    best_p = point_dotproduct(point, cands[0].features);
-    for (cand_i = 1; cand_i < cand_n; cand_i++) {
-      p = point_dotproduct(point, cands[cand_i].features);
-      if (p > best_p) {
-	best_p = p;
-	best = cand_i;
-	ties = 0;
-      } else if (p == best_p) {
-	ties++;
-      }
-    }    
-    totalties += ties;
-    comps_addto(comps, cands[best].comps);
-  }
-  //point_print(point, stderr, 1);
-  //fprintf(stderr, "\n");
-  //fprintf(stderr, "slow bleu => %f\n", compute_score(comps));
-  score = compute_score(comps);
-  free(comps);
-  return score;
-}
-
-/* Global optimization along a line (Och, 2004) */
-point_t *line_optimize(data_t *data, point_t *origin, point_t *dir) {
-  int sent_i, cand_i, cand_n, intersection_i;
-  candidate_t *cands;
-  static intersection_t **intersections = NULL;
-  intersection_t *inter;
-  static int intersection_max;
-  int intersection_n = 0;
-  int prev, leftmost;
-  float x, leftmost_x, prev_x, best_x;
-  float score, best_score;
-  int *comps;
-  point_t *point;
-  int first;
-
-  if (!origin->has_score)
-    point_set_score(origin, slow_bleu(data, origin));
-
-  if (verbose >= 2) {
-    fprintf(stderr, "starting point: ");
-    point_print(origin, stderr, 1);
-    fprintf(stderr, "\n  direction: ");
-    point_print(dir, stderr, 1);
-    fprintf(stderr, "\n");
-  }
-
-  comps = calloc(comps_n, sizeof(int));
-
-  if (intersections == NULL) {
-    intersection_max = 10;
-    intersections = malloc(intersection_max*sizeof(intersection_t *));
-  }
-
-  for (sent_i = 0; sent_i < data->sents_n; sent_i++) {
-    cands = data->sents[sent_i];
-    cand_n = data->cands_n[sent_i];
-
-    if (verbose >= 3)
-      fprintf(stderr, "sentence %d\n", sent_i);
-
-    if (cand_n < 1)
-      continue;
-
-    /* calculate slopes and intercepts */
-    for (cand_i = 0; cand_i < cand_n; cand_i++) {
-      cands[cand_i].m = point_dotproduct(dir, cands[cand_i].features);
-      cands[cand_i].b = point_dotproduct(origin, cands[cand_i].features);
-    }
-
-    /* find intersection points */
-
-    /* find best candidate for x -> -inf */
-    prev = -1;
-    for (cand_i = 0; cand_i < cand_n; cand_i++)
-      if (prev < 0 || 
-	  cands[cand_i].m < cands[prev].m || 
-	  cands[cand_i].m == cands[prev].m && cands[prev].b < cands[cand_i].b)
-	prev = cand_i;
-
-    if (verbose >= 3) {
-      fprintf(stderr, "x->-inf cand %d\n", prev);
-    }
-
-    comps_addto(comps, cands[prev].comps);
-
-    first = 1;
-    while (1) {
-      // find leftmost intersection
-      leftmost = -1;
-      for (cand_i = 0; cand_i < cand_n; cand_i++) {
-	if (cands[prev].m == cands[cand_i].m) {
-	  if (cands[cand_i].b > cands[cand_i].b)
-	    fprintf(stderr, "two parallel lines and discarding the higher -- this shouldn't happen\n");
-	  continue; // no intersection
-	}
-
-	/* optimization: piecewise linear function must be concave up.
-	   Maybe it would be still faster to sort by slope beforehand */
-	if (cands[cand_i].m < cands[prev].m)
-	  continue;
-
-	x = -(cands[prev].b-cands[cand_i].b)/(cands[prev].m-cands[cand_i].m);
-
-	if (leftmost < 0 || x < leftmost_x) {
-	  leftmost = cand_i;
-	  leftmost_x = x;
-	}
-      }
-
-      if (leftmost < 0)
-	break; // no more intersections
-
-      /* Require that the intersection point be at least min_interval
-	 to the right of the previous one. If not, we replace the
-	 previous intersection point with this one. Yes, it can even
-	 happen that the new intersection point is slightly to the
-	 left of the old one, because of numerical imprecision. We
-	 don't check that the new point is also min_interval to the
-	 right of the penultimate one. In that case, the points would
-	 switch places in the sort, resulting in a bogus score for
-	 that inteval. */
-
-      if (first || leftmost_x - prev_x > min_interval) {
-	if (intersection_n == intersection_max) {
-	  intersection_max *= 2;
-	  intersections = realloc(intersections, intersection_max*sizeof(intersection_t));
-	  if (intersections == NULL)
-	    fprintf(stderr, "couldn't realloc intersections\n");
-	}
-	intersections[intersection_n++] = new_intersection(leftmost_x, leftmost, cands[leftmost].comps, cands[prev].comps);
-      } else {
-	// replace the old one
-	inter = new_intersection(leftmost_x, leftmost, cands[leftmost].comps, cands[prev].comps);
-	comps_addto(inter->delta_comps, intersections[intersection_n-1]->delta_comps);
-	intersection_delete(intersections[intersection_n-1]);
-	intersections[intersection_n-1] = inter;
-      }
-
-      if (verbose >= 3)
-	fprintf(stderr, "found intersection point: %f, right cand %d\n", leftmost_x, leftmost);
-      prev = leftmost;
-      prev_x = leftmost_x;
-      first = 0;
-    }
-  } 
-
-  best_score = compute_score(comps);
-  //fprintf(stderr, "x->-inf => %f\n", best_score);
-
-  if (intersection_n == 0)
-    best_x = 0.0;
-  else {
-    qsort(intersections, intersection_n, sizeof(intersection_t *), (int(*)(const void *, const void *))compare_intersections);
-    best_x = intersections[0]->x - 1000.0; // whatever
-  }
-  for (intersection_i = 0; intersection_i < intersection_n; intersection_i++) {
-    comps_addto(comps, intersections[intersection_i]->delta_comps);
-    score = compute_score(comps);
-    //fprintf(stderr, "x=%f => %f\n", intersections[intersection_i]->x, score);
-    if (score > best_score) {
-      best_score = score;
-      if (intersection_i+1 < intersection_n)
-	// what if interval is zero-width?
-	best_x = 0.5*(intersections[intersection_i]->x + intersections[intersection_i+1]->x);
-      else
-	best_x = intersections[intersection_i]->x + 0.1; // whatever
-    }
-  }
-  //fprintf(stderr, "best_x = %f\n", best_x);
-  point = point_copy(dir);
-  point_multiplyby(point, best_x);
-  point_addto(point, origin);
-  point_set_score(point, best_score);
-
-  if (verbose >= 2) {
-    fprintf(stderr, "  ending point: ");
-    point_print(point, stderr, 1);
-    fprintf(stderr, "\n");
-    //check_comps(data, point, comps);
-  }
-
-  for (intersection_i = 0; intersection_i < intersection_n; intersection_i++)
-    intersection_delete(intersections[intersection_i]);
-  free(comps);
-
-  if (best_score < origin->score) {
-    /* this can happen in the case of a tie between two candidates with different bleu component scores. just trash the point and return the starting point */
-    point_delete(point);
-    return point_copy(origin);
-  }
-
-  return point;
-}
-
-point_t *optimize_powell(data_t *data, point_t *point) {
-  int i;
-  point_t **u, **p;
-  float biggestwin, totalwin, extrapolatedwin;
-  int biggestwin_i;
-  point_t *point_e;
-
-  u = malloc(dim*sizeof(point_t *));
-  p = malloc(dim*sizeof(point_t *));
-
-  point = point_copy(point);
-  if (!point->has_score)
-    point_set_score(point, slow_bleu(data, point));
-
-  for (i=0; i<dim; i++) {
-    u[i] = new_point();
-    u[i]->weights[i] = 1.0;
-  }
-
-  while (1) {
-    p[0] = line_optimize(data, point, u[0]);
-    biggestwin_i = 0;
-    biggestwin = p[0]->score - point->score;
-    for (i=1; i<dim; i++) {
-      p[i] = line_optimize(data, p[i-1], u[i]);
-      if (p[i]->score - p[i-1]->score > biggestwin) {
-	biggestwin_i = i;
-	biggestwin = p[i]->score - p[i-1]->score;
-      }
-    }
-
-    totalwin = p[dim-1]->score - point->score;
-
-    if (totalwin < 0.000001)
-      break;
-
-    // last point minus first point
-    point_multiplyby(point, -1.0);
-    point_addto(point, p[dim-1]);
-
-    point_e = point_copy(point);
-    point_addto(point_e, p[dim-1]);
-    point_set_score(point_e, slow_bleu(data, point_e));
-    extrapolatedwin = point_e->score - point->score; // point->score is the original point
-    
-    if (extrapolatedwin > 0 && 
-	2*(2*totalwin - extrapolatedwin) * 
-	powf(totalwin - biggestwin, 2.0f) <
-	powf(extrapolatedwin, 2.0f)*biggestwin) {
-      // replace dominant direction vector with sum vector
-      point_delete(u[biggestwin_i]);
-      point_normalize(point);
-      u[biggestwin_i] = point;
-    }
-      
-    point_delete(point_e);
-
-    // optimization continues with last point
-    point = p[dim-1];
-    
-    for (i=0; i<dim-1; i++)
-      if (i != biggestwin_i)
-	point_delete(p[i]);
-  }
-
-  for (i=0; i<dim; i++)
-    point_delete(u[i]);
-
-  free(u);
-  free(p);
-
-  point_normalize(point);
-  return point;
-}
-
-point_t *optimize_koehn(data_t *data, point_t *point) {
-  point_t *dir, **newpoints;
-  int dir_i;
-  int best_dir = -1;
-  dir = new_point();
-  newpoints = malloc(dim*sizeof(point_t *));
-
-  point = point_copy(point);
-
-  while (1) {
-    for (dir_i = 0; dir_i < dim; dir_i++) {
-      dir->weights[dir_i] = 1.0;
-      newpoints[dir_i] = line_optimize(data, point, dir);
-      if (best_dir < 0 || newpoints[dir_i]->score > newpoints[best_dir]->score)
-	best_dir = dir_i;
-      dir->weights[dir_i] = 0.0;
-    }
-    if (point->has_score && newpoints[best_dir]->score - point->score < 0.000001)
-      break;
-    
-    point_delete(point);
-    point = newpoints[best_dir];
-
-    // discard the other points
-    for (dir_i = 0; dir_i < dim; dir_i++)
-      if (dir_i != best_dir)
-	point_delete(newpoints[dir_i]);
-  }
-    
-  point_delete(dir);
-  free(newpoints);
-
-  point_normalize(point);
-  return point;
-}
-
-void usage(void) {
-  fprintf(stderr, "usage: mert -d <dimensions>\n");
-  exit(1);
-}
-
-int main (int argc, char **argv) {
-  int point_i;
-  int points_n = 20;
-  point_t *min, *max;
-  data_t *data;
-  point_t *bestpoint, *newpoint, *startpoint;
-  int i, c;
-  FILE *fp;
-
-  while ((c = getopt(argc, argv, "d:n:")) != -1) {
-    switch (c) {
-    case 'd':
-      dim = strtol(optarg, NULL, 10);
-      break;
-    case 'n':
-      points_n = strtol(optarg, NULL, 10);
-      break;
-    default:
-      usage();
-    }
-  }
-  argc -= optind;
-  argv += optind;
-
-  if (dim < 0)
-    usage();
-
-  if ((data = read_data()) == NULL) exit(1);
-
-  fp = fopen("init.opt", "r");
-  if ((min = read_point(fp)) == NULL) exit(1);
-  if ((max = read_point(fp)) == NULL) exit(1);
-  if ((startpoint = read_point(fp)) == NULL) exit(1);
-  fclose(fp);
-
-  bestpoint = NULL;
-  for (point_i=0; point_i<points_n; point_i++) {
-    fprintf(stderr, "*** point %d ***\n", point_i);
-    if (point_i == 0)
-      newpoint = startpoint;
-    else
-      newpoint = random_point(min, max);
-    newpoint = optimize_koehn(data, newpoint);
-    if (bestpoint == NULL || newpoint->score > bestpoint->score)
-      bestpoint = newpoint; // who cares about the leak
-  }
-  fprintf(stderr, "Best point: ");
-  point_print(bestpoint, stderr, 1);
-  fprintf(stderr, "\n");
-
-  fp = fopen("weights.txt", "w");
-  point_print(bestpoint, fp, 0);
-  fprintf(fp, "\n");
-  fclose(fp);
-}
--- a/scripts/training/cmert-0.5/point.c
+++ b/scripts/training/cmert-0.5/point.c
@ -1,117 +0,0 @@
-// $Id$
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-
-#include "point.h"
-
-int dim = -1;
-
-point_t *new_point() {
-  point_t *point;
-  point = malloc(sizeof(point_t));
-  point->score = 0.0;
-  point->weights = calloc(dim, sizeof(float));
-  point->has_score = 0;
-  return point;
-}
-
-void point_set_score(point_t *point, float score) {
-  point->has_score = 1;
-  point->score = score;
-}
-
-void point_delete(point_t *point) {
-  free(point->weights);
-  free(point);
-}
-
-point_t *random_point(point_t *min, point_t *max) {
-  int i;
-  point_t *point = new_point();
-  for (i=0; i<dim; i++)
-    point->weights[i] = min->weights[i] + (float)random()/RAND_MAX * (max->weights[i]-min->weights[i]);
-  return point;
-}
-
-point_t *point_copy(point_t *point) {
-  point_t *newpoint;
-  int i;
-  newpoint = new_point();
-  newpoint->score = point->score;
-  newpoint->has_score = point->has_score;
-  for (i=0; i<dim; i++)
-    newpoint->weights[i] = point->weights[i];
-  return newpoint;
-}
-
-float point_dotproduct(point_t *point, float *y) {
-  float result;
-  int i;
-  result = 0.0;
-  for (i=0; i<dim; i++)
-    result += point->weights[i] * y[i];
-  return result;
-}
-
-/* Destructive operations */
-void point_multiplyby(point_t *point, float k) {
-  int i;
-  for (i=0; i<dim; i++)
-    point->weights[i] *= k;
-}
-
-void point_addto(point_t *point1, point_t *point2) {
-  int i;
-  for (i=0; i<dim; i++)
-    point1->weights[i] += point2->weights[i];
-}
-
-void point_normalize(point_t *point) {
-  int i;
-  float norm = 0.0;
-  for (i=0; i<dim; i++)
-    //norm += point->weights[i] * point->weights[i];
-    norm += fabs(point->weights[i]);
-  // norm = sqrt(norm);
-  for (i=0; i<dim; i++)
-    point->weights[i] /= norm;
-}
-
-void point_print(point_t *point, FILE *fp, int with_score) {
-  int i;
-  fprintf(fp, "%f", point->weights[0]);
-  for (i=1; i<dim; i++)
-    fprintf(fp, " %f", point->weights[i]);
-  if (point->has_score && with_score)
-    fprintf(fp, " => %f", point->score);
-}
-
-point_t *read_point(FILE *fp) {
-  static char buf[1000];
-  char *tok, *s;
-  int field;
-  point_t *point;
-
-  point = new_point();
-
-  fgets(buf, sizeof(buf), fp);
-  s = buf;
-  field = 0;
-  while ((tok = strsep(&s, " \t\n")) != NULL) {
-    if (!*tok) // empty token
-      continue;
-    if (field >= dim) {
-      fprintf(stderr, "read_point(): too many fields in line\n");
-      return NULL;
-    } else
-      point->weights[field] = strtod(tok, NULL);
-    field++;
-  }
-  if (field < dim) {
-    fprintf(stderr, "read_point(): wrong number of fields in line\n");
-    return NULL;
-  }
-  return point;
-}
--- a/scripts/training/cmert-0.5/point.h
+++ b/scripts/training/cmert-0.5/point.h
@ -1,26 +0,0 @@
-// $Id$
-#ifndef POINT_H
-#define POINT_H
-
-typedef struct {
-  float *weights;
-  int has_score;
-  float score;
-} point_t;
-
-extern int dim;
-
-point_t *new_point();
-void point_set_score(point_t *point, float score);
-void point_delete(point_t *point);
-point_t *point_copy(point_t *point);
-point_t *random_point(point_t *min, point_t *max);
-float point_dotproduct(point_t *point, float *y);
-void point_multiplyby(point_t *point, float k);
-void point_normalize(point_t *point);
-void point_addto(point_t *point1, point_t *point2);
-#include <stdio.h>
-point_t *read_point(FILE *fp);
-void point_print(point_t *point, FILE *fp, int with_score);
-
-#endif
--- a/scripts/training/cmert-0.5/python/psyco/init.py
+++ b/scripts/training/cmert-0.5/python/psyco/init.py
@ -1,57 +0,0 @@
-###########################################################################
-# 
-#  Psyco top-level file of the Psyco package.
-#   Copyright (C) 2001-2002  Armin Rigo et.al.
-
-"""Psyco -- the Python Specializing Compiler.
-
-Typical usage: add the following lines to your application's main module:
-
-try:
-    import psyco
-    psyco.profile()
-except:
-    print 'Psyco not found, ignoring it'
-"""
-###########################################################################
-
-
-#
-# This module is present to make 'psyco' a package and to
-# publish the main functions and variables.
-#
-# More documentation can be found in core.py.
-#
-
-
-# Try to import the dynamic-loading _psyco and report errors
-try:
-    import _psyco
-except ImportError, e:
-    extramsg = ''
-    import sys, imp
-    try:
-        file, filename, (suffix, mode, type) = imp.find_module('_psyco', __path__)
-    except ImportError:
-        ext = [suffix for suffix, mode, type in imp.get_suffixes()
-               if type == imp.C_EXTENSION]
-        if ext:
-            extramsg = (" (cannot locate the compiled extension '_psyco%s' "
-                        "in the package path '%s')" % (ext[0], '; '.join(__path__)))
-    else:
-        extramsg = (" (check that the compiled extension '%s' is for "
-                    "the correct Python version; this is Python %s)" %
-                    (filename, sys.version.split()[0]))
-    raise ImportError, str(e) + extramsg
-
-# Publish important data by importing them in the package
-from support import __version__, error, warning, _getrealframe, _getemulframe
-from support import version_info, __version__ as hexversion
-from core import full, profile, background, runonly, stop, cannotcompile
-from core import log, bind, unbind, proxy, unproxy, dumpcodebuf
-from _psyco import setfilter
-
-try:
-    from _psyco import compact, compacttype    # Python 2.2 and above only
-except ImportError:
-    pass
--- a/scripts/training/cmert-0.5/python/psyco/_psyco.so
+++ b/scripts/training/cmert-0.5/python/psyco/_psyco.so
--- a/scripts/training/cmert-0.5/python/psyco/classes.py
+++ b/scripts/training/cmert-0.5/python/psyco/classes.py
@ -1,53 +0,0 @@
-###########################################################################
-# 
-#  Psyco class support module.
-#   Copyright (C) 2001-2002  Armin Rigo et.al.
-
-"""Psyco class support module.
-
-'psyco.classes.psyobj' is an alternate Psyco-optimized root for classes.
-Any class inheriting from it or using the metaclass '__metaclass__' might
-get optimized specifically for Psyco. It is equivalent to call
-psyco.bind() on the class object after its creation.
-
-Note that this module has no effect with Python version 2.1 or earlier.
-
-Importing everything from psyco.classes in a module will import the
-'__metaclass__' name, so all classes defined after a
-
-       from psyco.classes import *
-
-will automatically use the Psyco-optimized metaclass.
-"""
-###########################################################################
-
-__all__ = ['psyobj', 'psymetaclass', '__metaclass__']
-
-
-# Python version check
-try:
-    from _psyco import compacttype
-except ImportError:
-    class psyobj:        # compatilibity
-        pass
-    psymetaclass = None
-else:
-    # version >= 2.2 only
-
-    import core
-    from types import FunctionType
-
-    class psymetaclass(compacttype):
-        "Psyco-optimized meta-class. Turns all methods into Psyco proxies."
-
-        def __new__(cls, name, bases, dict):
-            bindlist = dict.get('__psyco__bind__')
-            if bindlist is None:
-                bindlist = [key for key, value in dict.items()
-                            if isinstance(value, FunctionType)]
-            for attr in bindlist:
-                dict[attr] = core.proxy(dict[attr])
-            return super(psymetaclass, cls).__new__(cls, name, bases, dict)
-    
-    psyobj = psymetaclass("psyobj", (), {})
-__metaclass__ = psymetaclass
--- a/scripts/training/cmert-0.5/python/psyco/core.py
+++ b/scripts/training/cmert-0.5/python/psyco/core.py
@ -1,232 +0,0 @@
-###########################################################################
-# 
-#  Psyco main functions.
-#   Copyright (C) 2001-2002  Armin Rigo et.al.
-
-"""Psyco main functions.
-
-Here are the routines that you can use from your applications.
-These are mostly interfaces to the C core, but they depend on
-the Python version.
-
-You can use these functions from the 'psyco' module instead of
-'psyco.core', e.g.
-
-    import psyco
-    psyco.log('/tmp/psyco.log')
-    psyco.profile()
-"""
-###########################################################################
-
-import _psyco
-import types, new
-from support import *
-
-
-# Default charge profiler values
-default_watermark     = 0.09     # between 0.0 (0%) and 1.0 (100%)
-default_halflife      = 0.5      # seconds
-default_pollfreq_profile    = 20       # Hz
-default_pollfreq_background = 100      # Hz -- a maximum for sleep's resolution
-default_parentframe   = 0.25     # should not be more than 0.5 (50%)
-
-
-def full(memory=None, time=None, memorymax=None, timemax=None):
-    """Compile as much as possible.
-
-Typical use is for small scripts performing intensive computations
-or string handling."""
-    import profiler
-    if PYTHON_SUPPORT:
-        p = profiler.FullCompiler()
-    else:
-        p = profiler.ActiveProfiler(0.0, 0.5)
-    p.run(memory, time, memorymax, timemax)
-
-
-def profile(watermark   = default_watermark,
-            halflife    = default_halflife,
-            pollfreq    = default_pollfreq_profile,
-            parentframe = default_parentframe,
-            memory=None, time=None, memorymax=None, timemax=None):
-    """Turn on profiling.
-
-The 'watermark' parameter controls how easily running functions will
-be compiled. The smaller the value, the more functions are compiled."""
-    import profiler
-    p = profiler.ActivePassiveProfiler(watermark, halflife,
-                                       pollfreq, parentframe)
-    p.run(memory, time, memorymax, timemax)
-
-
-def background(watermark   = default_watermark,
-               halflife    = default_halflife,
-               pollfreq    = default_pollfreq_background,
-               parentframe = default_parentframe,
-               memory=None, time=None, memorymax=None, timemax=None):
-    """Turn on passive profiling.
-
-This is a very lightweight mode in which only intensively computing
-functions can be detected. The smaller the 'watermark', the more functions
-are compiled."""
-    import profiler
-    p = profiler.PassiveProfiler(watermark, halflife, pollfreq, parentframe)
-    p.run(memory, time, memorymax, timemax)
-
-
-def runonly(memory=None, time=None, memorymax=None, timemax=None):
-    """Nonprofiler.
-
-XXX check if this is useful and document."""
-    if PYTHON_SUPPORT:
-        import profiler
-        p = profiler.RunOnly()
-        p.run(memory, time, memorymax, timemax)
-
-
-def stop():
-    """Turn off all automatic compilation.  bind() calls remain in effect."""
-    import profiler
-    profiler.go([])
-
-
-def log(logfile='', mode='w', top=10):
-    """Enable logging to the given file.
-
-If the file name is unspecified, a default name is built by appending
-a 'log-psyco' extension to the main script name.
-
-Mode is 'a' to append to a possibly existing file or 'w' to overwrite
-an existing file. Note that the log file may grow quickly in 'a' mode."""
-    import profiler, logger
-    if not logfile:
-        import os
-        logfile, dummy = os.path.splitext(sys.argv[0])
-        if os.path.basename(logfile):
-            logfile += '.'
-        logfile += 'log-psyco'
-    if hasattr(_psyco, 'VERBOSE_LEVEL'):
-        print >> sys.stderr, 'psyco: logging to', logfile
-    # logger.current should be a real file object; subtle problems
-    # will show up if its write() and flush() methods are written
-    # in Python, as Psyco will invoke them while compiling.
-    logger.current = open(logfile, mode)
-    logger.print_charges = top
-    profiler.logger = logger
-    logger.writedate('Logging started')
-    cannotcompile(logger.psycowrite)
-    _psyco.statwrite(logger=logger.psycowrite)
-
-
-def bind(x, rec=None):
-    """Enable compilation of the given function, method, or class object.
-
-If C is a class (or anything with a '__dict__' attribute), bind(C) will
-rebind all functions and methods found in C.__dict__ (which means, for
-classes, all methods defined in the class but not in its parents).
-
-The optional second argument specifies the number of recursive
-compilation levels: all functions called by func are compiled
-up to the given depth of indirection."""
-    if isinstance(x, types.MethodType):
-        x = x.im_func
-    if isinstance(x, types.FunctionType):
-        if rec is None:
-            x.func_code = _psyco.proxycode(x)
-        else:
-            x.func_code = _psyco.proxycode(x, rec)
-        return
-    if hasattr(x, '__dict__'):
-        funcs = [o for o in x.__dict__.values()
-                 if isinstance(o, types.MethodType)
-                 or isinstance(o, types.FunctionType)]
-        if not funcs:
-            raise error, ("nothing bindable found in %s object" %
-                          type(x).__name__)
-        for o in funcs:
-            bind(o, rec)
-        return
-    raise TypeError, "cannot bind %s objects" % type(x).__name__
-
-
-def unbind(x):
-    """Reverse of bind()."""
-    if isinstance(x, types.MethodType):
-        x = x.im_func
-    if isinstance(x, types.FunctionType):
-        try:
-            f = _psyco.unproxycode(x.func_code)
-        except error:
-            pass
-        else:
-            x.func_code = f.func_code
-        return
-    if hasattr(x, '__dict__'):
-        for o in x.__dict__.values():
-            if (isinstance(o, types.MethodType)
-             or isinstance(o, types.FunctionType)):
-                unbind(o)
-        return
-    raise TypeError, "cannot unbind %s objects" % type(x).__name__
-
-
-def proxy(x, rec=None):
-    """Return a Psyco-enabled copy of the function.
-
-The original function is still available for non-compiled calls.
-The optional second argument specifies the number of recursive
-compilation levels: all functions called by func are compiled
-up to the given depth of indirection."""
-    if isinstance(x, types.FunctionType):
-        if rec is None:
-            code = _psyco.proxycode(x)
-        else:
-            code = _psyco.proxycode(x, rec)
-        return new.function(code, x.func_globals, x.func_name)
-    if isinstance(x, types.MethodType):
-        p = proxy(x.im_func, rec)
-        return new.instancemethod(p, x.im_self, x.im_class)
-    raise TypeError, "cannot proxy %s objects" % type(x).__name__
-
-
-def unproxy(proxy):
-    """Return a new copy of the original function of method behind a proxy.
-The result behaves like the original function in that calling it
-does not trigger compilation nor execution of any compiled code."""
-    if isinstance(proxy, types.FunctionType):
-        return _psyco.unproxycode(proxy.func_code)
-    if isinstance(proxy, types.MethodType):
-        f = unproxy(proxy.im_func)
-        return new.instancemethod(f, proxy.im_self, proxy.im_class)
-    raise TypeError, "%s objects cannot be proxies" % type(proxy).__name__
-
-
-def cannotcompile(x):
-    """Instruct Psyco never to compile the given function, method
-or code object."""
-    if isinstance(x, types.MethodType):
-        x = x.im_func
-    if isinstance(x, types.FunctionType):
-        x = x.func_code
-    if isinstance(x, types.CodeType):
-        _psyco.cannotcompile(x)
-    else:
-        raise TypeError, "unexpected %s object" % type(x).__name__
-
-
-def dumpcodebuf():
-    """Write in file psyco.dump a copy of the emitted machine code,
-provided Psyco was compiled with a non-zero CODE_DUMP.
-See py-utils/httpxam.py to examine psyco.dump."""
-    if hasattr(_psyco, 'dumpcodebuf'):
-        _psyco.dumpcodebuf()
-
-
-###########################################################################
-# Psyco variables
-#   error         * the error raised by Psyco
-#   warning       * the warning raised by Psyco
-#   __in_psyco__  * a new built-in variable which is always zero, but which
-#                     Psyco special-cases by returning 1 instead. So
-#                     __in_psyco__ can be used in a function to know if
-#                     that function is being executed by Psyco or not.
--- a/scripts/training/cmert-0.5/python/psyco/kdictproxy.py
+++ b/scripts/training/cmert-0.5/python/psyco/kdictproxy.py
@ -1,133 +0,0 @@
-###########################################################################
-#
-#  Support code for the 'psyco.compact' type.
-
-from __future__ import generators
-
-try:
-    from UserDict import DictMixin
-except ImportError:
-
-    # backported from Python 2.3 to Python 2.2
-    class DictMixin:
-        # Mixin defining all dictionary methods for classes that already have
-        # a minimum dictionary interface including getitem, setitem, delitem,
-        # and keys. Without knowledge of the subclass constructor, the mixin
-        # does not define __init__() or copy().  In addition to the four base
-        # methods, progressively more efficiency comes with defining
-        # __contains__(), __iter__(), and iteritems().
-
-        # second level definitions support higher levels
-        def __iter__(self):
-            for k in self.keys():
-                yield k
-        def has_key(self, key):
-            try:
-                value = self[key]
-            except KeyError:
-                return False
-            return True
-        def __contains__(self, key):
-            return self.has_key(key)
-
-        # third level takes advantage of second level definitions
-        def iteritems(self):
-            for k in self:
-                yield (k, self[k])
-        def iterkeys(self):
-            return self.__iter__()
-
-        # fourth level uses definitions from lower levels
-        def itervalues(self):
-            for _, v in self.iteritems():
-                yield v
-        def values(self):
-            return [v for _, v in self.iteritems()]
-        def items(self):
-            return list(self.iteritems())
-        def clear(self):
-            for key in self.keys():
-                del self[key]
-        def setdefault(self, key, default):
-            try:
-                return self[key]
-            except KeyError:
-                self[key] = default
-            return default
-        def pop(self, key, *args):
-            if len(args) > 1:
-                raise TypeError, "pop expected at most 2 arguments, got "\
-                                  + repr(1 + len(args))
-            try:
-                value = self[key]
-            except KeyError:
-                if args:
-                    return args[0]
-                raise
-            del self[key]
-            return value
-        def popitem(self):
-            try:
-                k, v = self.iteritems().next()
-            except StopIteration:
-                raise KeyError, 'container is empty'
-            del self[k]
-            return (k, v)
-        def update(self, other):
-            # Make progressively weaker assumptions about "other"
-            if hasattr(other, 'iteritems'):  # iteritems saves memory and lookups
-                for k, v in other.iteritems():
-                    self[k] = v
-            elif hasattr(other, '__iter__'): # iter saves memory
-                for k in other:
-                    self[k] = other[k]
-            else:
-                for k in other.keys():
-                    self[k] = other[k]
-        def get(self, key, default=None):
-            try:
-                return self[key]
-            except KeyError:
-                return default
-        def __repr__(self):
-            return repr(dict(self.iteritems()))
-        def __cmp__(self, other):
-            if other is None:
-                return 1
-            if isinstance(other, DictMixin):
-                other = dict(other.iteritems())
-            return cmp(dict(self.iteritems()), other)
-        def __len__(self):
-            return len(self.keys())
-
-###########################################################################
-
-from _psyco import compact   # Python 2.2 and above only
-
-
-class compactdictproxy(DictMixin):
-
-    def __init__(self, ko):
-        self._ko = ko    # compact object of which 'self' is the dict
-
-    def __getitem__(self, key):
-        return compact.__getslot__(self._ko, key)
-
-    def __setitem__(self, key, value):
-        compact.__setslot__(self._ko, key, value)
-
-    def __delitem__(self, key):
-        compact.__delslot__(self._ko, key)
-
-    def keys(self):
-        return compact.__members__.__get__(self._ko)
-
-    def clear(self):
-        keys = self.keys()
-        keys.reverse()
-        for key in keys:
-            del self[key]
-
-    def __repr__(self):
-        keys = ', '.join(self.keys())
-        return '<compactdictproxy object {%s}>' % (keys,)
--- a/scripts/training/cmert-0.5/python/psyco/logger.py
+++ b/scripts/training/cmert-0.5/python/psyco/logger.py
@ -1,90 +0,0 @@
-###########################################################################
-# 
-#  Psyco logger.
-#   Copyright (C) 2001-2002  Armin Rigo et.al.
-
-"""Psyco logger.
-
-See log() in core.py.
-"""
-###########################################################################
-
-
-import _psyco
-from time import time, localtime, strftime
-
-
-current = None
-print_charges = 10
-dump_delay = 0.2
-dump_last = 0.0
-
-def write(s, level):
-    t = time()
-    f = t-int(t)
-    current.write("%s.%02d  %-*s%s\n" % (
-        strftime("%X", localtime(int(t))),
-        int(f*100.0), 63-level, s,
-        "%"*level))
-    current.flush()
-
-def psycowrite(s):
-    t = time()
-    f = t-int(t)
-    current.write("%s.%02d  %-*s%s\n" % (
-        strftime("%X", localtime(int(t))),
-        int(f*100.0), 60, s.strip(),
-        "% %"))
-    current.flush()
-
-##def writelines(lines, level=0):
-##    if lines:
-##        t = time()
-##        f = t-int(t)
-##        timedesc = strftime("%x %X", localtime(int(t)))
-##        print >> current, "%s.%03d  %-*s %s" % (
-##            timedesc, int(f*1000),
-##            50-level, lines[0],
-##            "+"*level)
-##        timedesc = " " * (len(timedesc)+5)
-##        for line in lines[1:]:
-##            print >> current, timedesc, line
-
-def writememory():
-    write("memory usage: %d+ kb" % _psyco.memory(), 1)
-
-def dumpcharges():
-    global dump_last
-    if print_charges:
-        t = time()
-        if not (dump_last <= t < dump_last+dump_delay):
-            if t <= dump_last+1.5*dump_delay:
-                dump_last += dump_delay
-            else:
-                dump_last = t
-            #write("%s: charges:" % who, 0)
-            lst = _psyco.stattop(print_charges)
-            if lst:
-                f = t-int(t)
-                lines = ["%s.%02d   ______\n" % (
-                    strftime("%X", localtime(int(t))),
-                    int(f*100.0))]
-                i = 1
-                for co, charge in lst:
-                    detail = co.co_filename
-                    if len(detail) > 19:
-                        detail = '...' + detail[-17:]
-                    lines.append("        #%-3d |%4.1f %%|  %-26s%20s:%d\n" %
-                                 (i, charge*100.0, co.co_name, detail,
-                                  co.co_firstlineno))
-                    i += 1
-                current.writelines(lines)
-                current.flush()
-
-def writefinalstats():
-    dumpcharges()
-    writememory()
-    writedate("program exit")
-
-def writedate(msg):
-    write('%s, %s' % (msg, strftime("%x")), 20)
--- a/scripts/training/cmert-0.5/python/psyco/profiler.py
+++ b/scripts/training/cmert-0.5/python/psyco/profiler.py
@ -1,388 +0,0 @@
-###########################################################################
-# 
-#  Psyco profiler (Python part).
-#   Copyright (C) 2001-2002  Armin Rigo et.al.
-
-"""Psyco profiler (Python part).
-
-The implementation of the non-time-critical parts of the profiler.
-See profile() and full() in core.py for the easy interface.
-"""
-###########################################################################
-
-import _psyco
-from support import *
-import math, time, types, atexit
-now = time.time
-try:
-    import thread
-except ImportError:
-    import dummy_thread as thread
-
-
-# current profiler instance
-current = None
-
-# enabled profilers, in order of priority
-profilers = []
-
-# logger module (when enabled by core.log())
-logger = None
-
-# a lock for a thread-safe go()
-go_lock = thread.allocate_lock()
-
-def go(stop=0):
-    # run the highest-priority profiler in 'profilers'
-    global current
-    go_lock.acquire()
-    try:
-        prev = current
-        if stop:
-            del profilers[:]
-        if prev:
-            if profilers and profilers[0] is prev:
-                return    # best profiler already running
-            prev.stop()
-            current = None
-        for p in profilers[:]:
-            if p.start():
-                current = p
-                if logger: # and p is not prev:
-                    logger.write("%s: starting" % p.__class__.__name__, 5)
-                return
-    finally:
-        go_lock.release()
-    # no profiler is running now
-    if stop:
-        if logger:
-            logger.writefinalstats()
-    else:
-        tag2bind()
-
-atexit.register(go, 1)
-
-
-def buildfncache(globals, cache):
-    if hasattr(types.IntType, '__dict__'):
-        clstypes = (types.ClassType, types.TypeType)
-    else:
-        clstypes = types.ClassType
-    for x in globals.values():
-        if isinstance(x, types.MethodType):
-            x = x.im_func
-        if isinstance(x, types.FunctionType):
-            cache[x.func_code] = x, ''
-        elif isinstance(x, clstypes):
-            for y in x.__dict__.values():
-                if isinstance(y, types.MethodType):
-                    y = y.im_func
-                if isinstance(y, types.FunctionType):
-                    cache[y.func_code] = y, x.__name__
-
-# code-to-function mapping (cache)
-function_cache = {}
-
-def trytobind(co, globals, log=1):
-    try:
-        f, clsname = function_cache[co]
-    except KeyError:
-        buildfncache(globals, function_cache)
-        try:
-            f, clsname = function_cache[co]
-        except KeyError:
-            if logger:
-                logger.write('warning: cannot find function %s in %s' %
-                             (co.co_name, globals.get('__name__', '?')), 3)
-            return  # give up
-    if logger and log:
-        modulename = globals.get('__name__', '?')
-        if clsname:
-            modulename += '.' + clsname
-        logger.write('bind function: %s.%s' % (modulename, co.co_name), 1)
-    f.func_code = _psyco.proxycode(f)
-
-
-if PYTHON_SUPPORT:
-    # the list of code objects that have been tagged
-    tagged_codes = []
-    
-    def tag(co, globals):
-        if logger:
-            try:
-                f, clsname = function_cache[co]
-            except KeyError:
-                buildfncache(globals, function_cache)
-                try:
-                    f, clsname = function_cache[co]
-                except KeyError:
-                    clsname = ''  # give up
-            modulename = globals.get('__name__', '?')
-            if clsname:
-                modulename += '.' + clsname
-            logger.write('tag function: %s.%s' % (modulename, co.co_name), 1)
-        tagged_codes.append((co, globals))
-        _psyco.turbo_frame(co)
-        _psyco.turbo_code(co)
-
-    def tag2bind():
-        if tagged_codes:
-            if logger:
-                logger.write('profiling stopped, binding %d functions' %
-                             len(tagged_codes), 2)
-            for co, globals in tagged_codes:
-                trytobind(co, globals, 0)
-            function_cache.clear()
-            del tagged_codes[:]
-
-else:
-    # tagging is impossible, always bind
-    tag = trytobind
-    def tag2bind():
-        pass
-
-
-
-class Profiler:
-    MemoryTimerResolution = 0.103
-
-    def run(self, memory, time, memorymax, timemax):
-        self.memory = memory
-        self.memorymax = memorymax
-        self.time = time
-        if timemax is None:
-            self.endtime = None
-        else:
-            self.endtime = now() + timemax
-        self.alarms = []
-        profilers.append(self)
-        go()
-    
-    def start(self):
-        curmem = _psyco.memory()
-        memlimits = []
-        if self.memorymax is not None:
-            if curmem >= self.memorymax:
-                if logger:
-                    logger.writememory()
-                return self.limitreached('memorymax')
-            memlimits.append(self.memorymax)
-        if self.memory is not None:
-            if self.memory <= 0:
-                if logger:
-                    logger.writememory()
-                return self.limitreached('memory')
-            memlimits.append(curmem + self.memory)
-            self.memory_at_start = curmem
-
-        curtime = now()
-        timelimits = []
-        if self.endtime is not None:
-            if curtime >= self.endtime:
-                return self.limitreached('timemax')
-            timelimits.append(self.endtime - curtime)
-        if self.time is not None:
-            if self.time <= 0.0:
-                return self.limitreached('time')
-            timelimits.append(self.time)
-            self.time_at_start = curtime
-        
-        try:
-            self.do_start()
-        except error, e:
-            if logger:
-                logger.write('%s: disabled by psyco.error:' % (
-                    self.__class__.__name__), 4)
-                logger.write('    %s' % str(e), 3)
-            return 0
-        
-        if memlimits:
-            self.memlimits_args = (time.sleep, (self.MemoryTimerResolution,),
-                                   self.check_memory, (min(memlimits),))
-            self.alarms.append(_psyco.alarm(*self.memlimits_args))
-        if timelimits:
-            self.alarms.append(_psyco.alarm(time.sleep, (min(timelimits),),
-                                            self.time_out))
-        return 1
-    
-    def stop(self):
-        for alarm in self.alarms:
-            alarm.stop(0)
-        for alarm in self.alarms:
-            alarm.stop(1)   # wait for parallel threads to stop
-        del self.alarms[:]
-        if self.time is not None:
-            self.time -= now() - self.time_at_start
-        if self.memory is not None:
-            self.memory -= _psyco.memory() - self.memory_at_start
-
-        try:
-            self.do_stop()
-        except error:
-            return 0
-        return 1
-
-    def check_memory(self, limit):
-        if _psyco.memory() < limit:
-            return self.memlimits_args
-        go()
-
-    def time_out(self):
-        self.time = 0.0
-        go()
-
-    def limitreached(self, limitname):
-        try:
-            profilers.remove(self)
-        except ValueError:
-            pass
-        if logger:
-            logger.write('%s: disabled (%s limit reached)' % (
-                self.__class__.__name__, limitname), 4)
-        return 0
-
-
-class FullCompiler(Profiler):
-
-    def do_start(self):
-        _psyco.profiling('f')
-
-    def do_stop(self):
-        _psyco.profiling('.')
-
-
-class RunOnly(Profiler):
-
-    def do_start(self):
-        _psyco.profiling('n')
-
-    def do_stop(self):
-        _psyco.profiling('.')
-
-
-class ChargeProfiler(Profiler):
-
-    def __init__(self, watermark, parentframe):
-        self.watermark = watermark
-        self.parent2 = parentframe * 2.0
-        self.lock = thread.allocate_lock()
-
-    def init_charges(self):
-        _psyco.statwrite(watermark = self.watermark,
-                         parent2   = self.parent2)
-
-    def do_stop(self):
-        _psyco.profiling('.')
-        _psyco.statwrite(callback = None)
-
-
-class ActiveProfiler(ChargeProfiler):
-
-    def active_start(self):
-        _psyco.profiling('p')
-
-    def do_start(self):
-        self.init_charges()
-        self.active_start()
-        _psyco.statwrite(callback = self.charge_callback)
-
-    def charge_callback(self, frame, charge):
-        tag(frame.f_code, frame.f_globals)
-
-
-class PassiveProfiler(ChargeProfiler):
-
-    initial_charge_unit   = _psyco.statread('unit')
-    reset_stats_after     = 120      # half-lives (maximum 200!)
-    reset_limit           = initial_charge_unit * (2.0 ** reset_stats_after)
-
-    def __init__(self, watermark, halflife, pollfreq, parentframe):
-        ChargeProfiler.__init__(self, watermark, parentframe)
-        self.pollfreq = pollfreq
-        # self.progress is slightly more than 1.0, and computed so that
-        # do_profile() will double the change_unit every 'halflife' seconds.
-        self.progress = 2.0 ** (1.0 / (halflife * pollfreq))
-
-    def reset(self):
-        _psyco.statwrite(unit = self.initial_charge_unit, callback = None)
-        _psyco.statreset()
-        if logger:
-            logger.write("%s: resetting stats" % self.__class__.__name__, 1)
-
-    def passive_start(self):
-        self.passivealarm_args = (time.sleep, (1.0 / self.pollfreq,),
-                                  self.do_profile)
-        self.alarms.append(_psyco.alarm(*self.passivealarm_args))
-
-    def do_start(self):
-        tag2bind()
-        self.init_charges()
-        self.passive_start()
-
-    def do_profile(self):
-        _psyco.statcollect()
-        if logger:
-            logger.dumpcharges()
-        nunit = _psyco.statread('unit') * self.progress
-        if nunit > self.reset_limit:
-            self.reset()
-        else:
-            _psyco.statwrite(unit = nunit, callback = self.charge_callback)
-        return self.passivealarm_args
-
-    def charge_callback(self, frame, charge):
-        trytobind(frame.f_code, frame.f_globals)
-
-
-class ActivePassiveProfiler(PassiveProfiler, ActiveProfiler):
-
-    def do_start(self):
-        self.init_charges()
-        self.active_start()
-        self.passive_start()
-
-    def charge_callback(self, frame, charge):
-        tag(frame.f_code, frame.f_globals)
-
-
-
-#
-# we register our own version of sys.settrace(), sys.setprofile()
-# and thread.start_new_thread().
-#
-
-def psyco_settrace(*args, **kw):
-    "This is the Psyco-aware version of sys.settrace()."
-    result = original_settrace(*args, **kw)
-    go()
-    return result
-
-def psyco_setprofile(*args, **kw):
-    "This is the Psyco-aware version of sys.setprofile()."
-    result = original_setprofile(*args, **kw)
-    go()
-    return result
-
-def psyco_thread_stub(callable, args, kw):
-    _psyco.statcollect()
-    if kw is None:
-        return callable(*args)
-    else:
-        return callable(*args, **kw)
-
-def psyco_start_new_thread(callable, args, kw=None):
-    "This is the Psyco-aware version of thread.start_new_thread()."
-    return original_start_new_thread(psyco_thread_stub, (callable, args, kw))
-
-original_settrace         = sys.settrace
-original_setprofile       = sys.setprofile
-original_start_new_thread = thread.start_new_thread
-sys.settrace            = psyco_settrace
-sys.setprofile          = psyco_setprofile
-if PYTHON_SUPPORT:
-    thread.start_new_thread = psyco_start_new_thread
-    # hack to patch threading._start_new_thread if the module is
-    # already loaded
-    if (sys.modules.has_key('threading') and
-        hasattr(sys.modules['threading'], '_start_new_thread')):
-        sys.modules['threading']._start_new_thread = psyco_start_new_thread
--- a/scripts/training/cmert-0.5/python/psyco/support.py
+++ b/scripts/training/cmert-0.5/python/psyco/support.py
@ -1,196 +0,0 @@
-###########################################################################
-# 
-#  Psyco general support module.
-#   Copyright (C) 2001-2002  Armin Rigo et.al.
-
-"""Psyco general support module.
-
-For internal use.
-"""
-###########################################################################
-
-import sys, _psyco, __builtin__
-
-error = _psyco.error
-class warning(Warning):
-    pass
-
-_psyco.NoLocalsWarning = warning
-
-def warn(msg):
-    from warnings import warn
-    warn(msg, warning, stacklevel=2)
-
-#
-# Version checks
-#
-__version__ = 0x010500f0
-if _psyco.PSYVER != __version__:
-    raise error, "version mismatch between Psyco parts, reinstall it"
-
-version_info = (__version__ >> 24,
-                (__version__ >> 16) & 0xff,
-                (__version__ >> 8) & 0xff,
-                {0xa0: 'alpha',
-                 0xb0: 'beta',
-                 0xc0: 'candidate',
-                 0xf0: 'final'}[__version__ & 0xf0],
-                __version__ & 0xf)
-
-
-VERSION_LIMITS = [0x02010000,   # 2.1
-                  0x02020000,   # 2.2
-                  0x02020200,   # 2.2.2
-                  0x02030000,   # 2.3
-                  0x02040000]   # 2.4
-
-if ([v for v in VERSION_LIMITS if v <= sys.hexversion] !=
-    [v for v in VERSION_LIMITS if v <= _psyco.PYVER  ]):
-    if sys.hexversion < VERSION_LIMITS[0]:
-        warn("Psyco requires Python version 2.1 or later")
-    else:
-        warn("Psyco version does not match Python version. "
-             "Psyco must be updated or recompiled")
-
-PYTHON_SUPPORT = hasattr(_psyco, 'turbo_code')
-
-
-if hasattr(_psyco, 'ALL_CHECKS') and hasattr(_psyco, 'VERBOSE_LEVEL'):
-    print >> sys.stderr, ('psyco: running in debugging mode on %s' %
-                          _psyco.PROCESSOR)
-
-
-###########################################################################
-# sys._getframe() gives strange results on a mixed Psyco- and Python-style
-# stack frame. Psyco provides a replacement that partially emulates Python
-# frames from Psyco frames. The new sys._getframe() may return objects of
-# a custom "Psyco frame" type, which with Python >=2.2 is a subtype of the
-# normal frame type.
-#
-# The same problems require some other built-in functions to be replaced
-# as well. Note that the local variables are not available in any
-# dictionary with Psyco.
-
-
-class Frame:
-    pass
-
-
-class PythonFrame(Frame):
-
-    def __init__(self, frame):
-        self.__dict__.update({
-            '_frame': frame,
-            })
-
-    def __getattr__(self, attr):
-        if attr == 'f_back':
-            try:
-                result = embedframe(_psyco.getframe(self._frame))
-            except ValueError:
-                result = None
-            except error:
-                warn("f_back is skipping dead Psyco frames")
-                result = self._frame.f_back
-            self.__dict__['f_back'] = result
-            return result
-        else:
-            return getattr(self._frame, attr)
-
-    def __setattr__(self, attr, value):
-        setattr(self._frame, attr, value)
-
-    def __delattr__(self, attr):
-        delattr(self._frame, attr)
-
-
-class PsycoFrame(Frame):
-
-    def __init__(self, tag):
-        self.__dict__.update({
-            '_tag'     : tag,
-            'f_code'   : tag[0],
-            'f_globals': tag[1],
-            })
-
-    def __getattr__(self, attr):
-        if attr == 'f_back':
-            try:
-                result = embedframe(_psyco.getframe(self._tag))
-            except ValueError:
-                result = None
-        elif attr == 'f_lineno':
-            result = self.f_code.co_firstlineno  # better than nothing
-        elif attr == 'f_builtins':
-            result = self.f_globals['__builtins__']
-        elif attr == 'f_restricted':
-            result = self.f_builtins is not __builtins__
-        elif attr == 'f_locals':
-            raise AttributeError, ("local variables of functions run by Psyco "
-                                   "cannot be accessed in any way, sorry")
-        else:
-            raise AttributeError, ("emulated Psyco frames have "
-                                   "no '%s' attribute" % attr)
-        self.__dict__[attr] = result
-        return result
-
-    def __setattr__(self, attr, value):
-        raise AttributeError, "Psyco frame objects are read-only"
-
-    def __delattr__(self, attr):
-        if attr == 'f_trace':
-            # for bdb which relies on CPython frames exhibiting a slightly
-            # buggy behavior: you can 'del f.f_trace' as often as you like
-            # even without having set it previously.
-            return
-        raise AttributeError, "Psyco frame objects are read-only"
-
-
-def embedframe(result):
-    if type(result) is type(()):
-        return PsycoFrame(result)
-    else:
-        return PythonFrame(result)
-
-def _getframe(depth=0):
-    """Return a frame object from the call stack. This is a replacement for
-sys._getframe() which is aware of Psyco frames.
-
-The returned objects are instances of either PythonFrame or PsycoFrame
-instead of being real Python-level frame object, so that they can emulate
-the common attributes of frame objects.
-
-The original sys._getframe() ignoring Psyco frames altogether is stored in
-psyco._getrealframe(). See also psyco._getemulframe()."""
-    # 'depth+1' to account for this _getframe() Python function
-    return embedframe(_psyco.getframe(depth+1))
-
-def _getemulframe(depth=0):
-    """As _getframe(), but the returned objects are real Python frame objects
-emulating Psyco frames. Some of their attributes can be wrong or missing,
-however."""
-    # 'depth+1' to account for this _getemulframe() Python function
-    return _psyco.getframe(depth+1, 1)
-
-def patch(name, module=__builtin__):
-    f = getattr(_psyco, name)
-    org = getattr(module, name)
-    if org is not f:
-        setattr(module, name, f)
-        setattr(_psyco, 'original_' + name, org)
-
-_getrealframe = sys._getframe
-sys._getframe = _getframe
-patch('globals')
-patch('eval')
-patch('execfile')
-patch('locals')
-patch('vars')
-patch('dir')
-patch('input')
-_psyco.original_raw_input = raw_input
-__builtin__.__in_psyco__ = 0==1   # False
-
-if hasattr(_psyco, 'compact'):
-    import kdictproxy
-    _psyco.compactdictproxy = kdictproxy.compactdictproxy
--- a/scripts/training/cmert-0.5/reduce-field.pl
+++ b/scripts/training/cmert-0.5/reduce-field.pl
@ -1,88 +0,0 @@
-#! /usr/bin/perl
-
-sub PrintArgsAndDie () {
-    print stderr "USAGE: reduce-field.pl [-h] \n";
-    print stderr "This scripts reduce the number of active fields for the mert procedure.\n";
-    exit(1);
-}
-
-my $weightfile="";
-my $size=-1;
-my $activefields="";
-my $debug=0;
-
-while (@ARGV){
-    if ($ARGV[0] eq "-h"){
-        &PrintArgsAndDie();
-    }
-    if ($ARGV[0] eq "-debug"){
-        $debug=1; 
-	shift(@ARGV);
-    }
-    if ($ARGV[0] eq "-weight"){
-        $weightfile=$ARGV[1];
-        shift(@ARGV); shift(@ARGV);
-    }
-    if ($ARGV[0] eq "-d"){
-        $size=$ARGV[1];
-        shift(@ARGV); shift(@ARGV);
-    }
-    if ($ARGV[0] eq "-activate"){
-        $activefields=$ARGV[1];
-        shift(@ARGV);   shift(@ARGV);
-    }
-
-}
-
-die "Cannot open/find weight file ($weightfile)\n" if ! -e $weightfile;
-
-my @weight=();
-open(IN,$weightfile);
-chomp($weight=<IN>);
-close(IN);
-push @weight,split(/[ \t]+/,"1 $weight");
-my @active=();
-my @invertedactive=();
-
-if ($activefields eq ""){
-	for (my $i=1; $i<=$size; $i++){	$active[$i]=1; };
-}else{
-        @active=split(/,/,$activefields);
-}
-
-for (my $i=0; $i<=$size; $i++){	$invertedactive[$i]=0; };
-for (my $i=0; $i<scalar(@active); $i++){	$invertedactive[$active[$i]]=1; };
-my $j=0;
-for (my $i=1; $i<=$size; $i++){	if (!$invertedactive[$i]){$notactive[$j]=$i; $j++}};
-
-if ($debug>0){
-	print STDERR "ORIGINAL SIZE: $size\n";
-	print STDERR "ORIGINAL WEIGHTS: @weight\n";
-	print STDERR "ORIGINAL ACTIVE: @active\n";
-	print STDERR "ORIGINAL NOTACTIVE: @notactive\n";
-	print STDERR "ORIGINAL INVERTEDACTIVE: @invertedactive\n";
-}
-
-while(chomp($_=<STDIN>)){
-	my @field=(0,split(/[ \t]+/,$_));
-
-	my $notactivedweightedsum=0.0;
-	my $j;
-	for (my $i=0; $i<scalar(@notactive); $i++){
-		$j=$notactive[$i];
-		$notactivedweightedsum+=($weight[$j]*$field[$j]);
-		printf STDERR "notactive -> i:$i j:$j -> $weight[$j] - $field[$j] -> $notactivedweightedsum\n" if $debug>0;
-	};
-
-	printf STDOUT "%.3f",$notactivedweightedsum;
-	printf STDERR "sum not active features: %.3f\n",$notactivedweightedsum if $debug>0;
-	for (my $i=0; $i<scalar(@active); $i++){
-		print STDOUT " $field[$active[$i]]";
-		printf STDERR "active -> i:$i j:$active[$i] -> $field[$active[$i]]\n" if $debug>0;
-	};
-	for (my $i=scalar(@active)+scalar(@notactive)+1; $i< scalar(@field); $i++){
-		print STDOUT " $field[$i]";
-		printf STDERR "extra -> i:$i -> $field[$i]\n" if $debug>0;
-	};
-	print STDOUT "\n";
-}
--- a/scripts/training/cmert-0.5/run-cmert
+++ b/scripts/training/cmert-0.5/run-cmert
@ -1,8 +0,0 @@
-#!/bin/sh
-
-unset LANG
-export PATH=$PATH:/group/project/statmt/pkoehn/user/abhishek:/group/project/statmt/pkoehn/user/abhishek/cmert-0.5
-export EVAL=/group/project/statmt/pkoehn/user/abhishek/WST05/fr-en-train/dev
-
-mert-driver cmert-work $EVAL/low.test400.fr.rest $EVAL/low.test400.en 100 pharaoh.2005-07-21 "-config /group/project/statmt/pkoehn/user/abhishek/WST05/fr-en-train/model/pharaoh.ini -dl 4 -b 0.1 -ttable-limit 100" "0.2,0-1;0.2,0.2-0.2;0.2,0-1;0.2,0-1;0.2,0-1;0.2,0-1;0.2,-1-1;0.2,-1-1"
-
--- a/scripts/training/cmert-0.5/score-nbest.py
+++ b/scripts/training/cmert-0.5/score-nbest.py
@ -1,109 +0,0 @@
-#!/usr/bin/python
-
-# $Id$
-"""Convert n-best list in mert.perl format to format required by
-Venugopal's MER trainer. This entails calculating the BLEU component scores."""
-
-"""usage: score-nbest.py <reffile>+ <outprefix>
-
-   The input should be sorted by sentence number and piped into stdin
-   Run it like this: sort -mnk 1,1 *.nbest | score-nbest.py ...
-"""
-
-import sys, itertools, re
-import bleu
-#Comment out this line when moving to python 2.4
-from sets import Set as set
-
-def process(sentnum, testsents):
-    candsfile.write("%d %d\n" % (cur_sentnum, len(testsents)))
-    for (sent,vector) in testsents:
-        comps = bleu.cook_test(sent, cookedrefs[sentnum])
-
-        if comps['testlen'] != comps['guess'][0]:
-            sys.stderr.write("ERROR: test length != guessed 1-grams\n")
-	featsfile.write("%s %s %d\n" % (" ".join([str(v) for v in vector]),
-					    " ".join(["%d %d" % (c,g) for (c,g) in zip(comps['correct'], comps['guess'])]),
-					    comps['reflen']))
-
-if __name__ == "__main__":
-
-    import os
-    machtype=os.environ.get("MACHTYPE")
-    if machtype == "i386":
-        #import psyco
-        #psyco.full()
-        sys.stderr.write("psyco library is NOT imported. Uncomment code in score-nbest.py if you wish to enable it\n")
-    else:
-        sys.stderr.write("psyco library is not imported because it is not available for %s \n" % machtype)
-
-    import getopt
-    (opts,args) = getopt.getopt(sys.argv[1:], "casen", [])
-
-    for (opt,parm) in opts:
-        if opt == "-c":
-            bleu.preserve_case = True
-        if opt == "-a":
-            bleu.eff_ref_len = "average"
-        if opt == "-s":
-            bleu.eff_ref_len = "shortest"
-        if opt == "-e":
-            bleu.eff_ref_len = "closest"
-        if opt == "-n":
-            bleu.nonorm = 1
-
-    print args    
-    cookedrefs = []
-    reffiles = [file(name) for name in args[:-1]]
-    print reffiles
-    for refs in itertools.izip(*reffiles):
-        cookedrefs.append(bleu.cook_refs(refs))
-    
-    outprefix = args[-1]
-
-    featsfile = file(outprefix+"feats.opt", "w")
-    candsfile = file(outprefix+"cands.opt", "w")
-
-    cur_sentnum = None
-    testsents = set()
-    progress = 0
-
-    infile = sys.stdin
-
-    # function that recognizes floats
-    re_float=re.compile(r'^-?[-0-9.e\+]+$')
-    is_float=lambda(x):re_float.match(x)
-
-    for line in infile:
-        try:
-            ##Changed to add a further field - AA 29/11/05
-            #(sentnum, sent, vector) = line.split('|||')
-            (sentnum, sent, vector, prob ) = line.split('|||')
-        except:
-            sys.stderr.write("ERROR: bad input line %s\n" % line)
-        sentnum = int(sentnum)
-        sent = " ".join(sent.split())
-	# filter out score labels (keep only floats) and convert numbers to floats
-        vector = tuple(map(lambda(s): -float(s), filter(is_float, vector.split())))
-
-        if sentnum != cur_sentnum:
-            if cur_sentnum is not None:
-                process(cur_sentnum, testsents)
-            cur_sentnum = sentnum
-            testsents = set()
-        testsents.add((sent,vector))
-
-        if progress % 10000 == 0:
-            sys.stdout.write(".")
-            sys.stdout.flush()
-
-        progress += 1
-    process(cur_sentnum, testsents)
-
-    sys.stdout.write("\n")
-    featsfile.close()
-    candsfile.close()
-        
-            
-    
-
--- a/scripts/training/cmert-0.5/score.c
+++ b/scripts/training/cmert-0.5/score.c
@ -1,34 +0,0 @@
-// $Id$
-#include <math.h>
-#include <stdio.h>
-
-#include "score.h"
-
-int comps_n = 9;
-
-void comps_addto(int *comps1, int *comps2) {
-  int i;
-  for (i=0; i<comps_n; i++)
-    comps1[i] += comps2[i];
-}
-
-float compute_score(int *comps) {
-  float logbleu = 0.0, brevity;
-  int i;
-  int n = (comps_n-1)/2;
-
-  /*for (i=0; i<comps_n; i++)
-    fprintf(stderr, " %d", comps[i]);
-    fprintf(stderr, "\n");*/
-
-  for (i=0; i<n; i++) {
-    if (comps[2*i] == 0)
-      return 0.0;
-    logbleu += log(comps[2*i])-log(comps[2*i+1]);
-  }
-  logbleu /= n;
-  brevity = 1.0-(float)comps[comps_n-1]/comps[1]; // comps[comps_n-1] is the ref length, comps[1] is the test length
-  if (brevity < 0.0)
-    logbleu += brevity;
-  return exp(logbleu);
-}
--- a/scripts/training/cmert-0.5/score.h
+++ b/scripts/training/cmert-0.5/score.h
@ -1,10 +0,0 @@
-// $Id$
-#ifndef SCORE_H
-#define SCORE_H
-
-extern int comps_n;
-
-void comps_addto(int *comps1, int *comps2);
-float compute_score(int *comps);
-
-#endif
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
--- a/zmert/zmert.jar
+++ b/zmert/zmert.jar
--- a/zmert/zmert_v1.41.jar
+++ b/zmert/zmert_v1.41.jar