Fix more Python lint.

Most of the complaints fixed here were from Pocketlint, but many were also from Syntastic the vim plugin.
2024-12-26 13:23:25 +03:00 · 2015-05-16 17:26:56 +07:00 · 2015-05-16 17:26:56 +07:00 · 61162dd242
commit 61162dd242
parent c07ade8142
13 changed files with 1186 additions and 875 deletions
--- a/scripts/ems/support/defaultconfig.py
+++ b/scripts/ems/support/defaultconfig.py
@ -1,53 +1,48 @@
 #!/usr/bin/env python2

-#
-# Version of ConfigParser which accepts default values
-#
+"""Version of ConfigParser which accepts default values."""


 import ConfigParser


 class Config:
-  def __init__(self,filename):
-    self.config = ConfigParser.SafeConfigParser()
-    cfh = open(filename)
-    self.config.readfp(cfh)
-    cfh.close()
+    """Version of ConfigParser which accepts default values."""

-  def get(self,section,name,default=None):
-    if default == None or self.config.has_option(section,name):
-      return self.config.get(section,name)
-    else:
-      return default
+    def __init__(self, filename):
+        self.config = ConfigParser.SafeConfigParser()
+        cfh = open(filename)
+        self.config.readfp(cfh)
+        cfh.close()

-  def getint(self,section,name,default=None):
-    if default == None or self.config.has_option(section,name):
-      return self.config.getint(section,name)
-    else:
-      return default
+    def get(self, section, name, default=None):
+        if default is None or self.config.has_option(section, name):
+            return self.config.get(section, name)
+        else:
+            return default

+    def getint(self, section, name, default=None):
+        if default is None or self.config.has_option(section, name):
+            return self.config.getint(section, name)
+        else:
+            return default

-  def getboolean(self,section,name,default=None):
-    if default == None or self.config.has_option(section,name):
-      return self.config.getboolean(section,name)
-    else:
-      return default
-
-
-  def getfloat(self,section,name,default=None):
-    if default == None or self.config.has_option(section,name):
-      return self.config.getfloat(section,name)
-    else:
-      return default
-
-
-  def __str__(self):
-    ret = ""
-    for section in self.config.sections():
-      for option in self.config.options(section):
-        ret = ret + "%s:%s = %s\n" % (section,option,self.config.get(section,option))
-    return ret
-
+    def getboolean(self, section, name, default=None):
+        if default is None or self.config.has_option(section, name):
+            return self.config.getboolean(section, name)
+        else:
+            return default

+    def getfloat(self, section, name, default=None):
+        if default is None or self.config.has_option(section, name):
+            return self.config.getfloat(section, name)
+        else:
+            return default

+    def __str__(self):
+        ret = ""
+        for section in self.config.sections():
+            for option in self.config.options(section):
+                ret = ret + "%s:%s = %s\n" % (
+                    section, option, self.config.get(section, option))
+        return ret
--- a/scripts/ems/support/mml-filter.py
+++ b/scripts/ems/support/mml-filter.py
@ -1,156 +1,171 @@
 #!/usr/bin/env python2

-#
-# Filter a parallel corpus
-#
+"""Filter a parallel corpus."""
+

-import heapq
 import logging
-import math
 import optparse
 import random
-import sys

 from defaultconfig import Config

-logging.basicConfig(format = "%(asctime)-15s %(message)s")
+
+logging.basicConfig(format="%(asctime)-15s %(message)s")
 log = logging.getLogger("filter")
 log.setLevel(logging.DEBUG)

-class FilterStrategy(object):
-  def __init__(self,config):
-    pass

-  def filter(self,source,target):
-    return True
+class FilterStrategy(object):
+    def __init__(self, config):
+        pass
+
+    def filter(self, source, target):
+        return True


 class RandomFilterStrategy(FilterStrategy):
-  def __init__(self,config):
-    self.threshold = config.getfloat("random", "threshold", 0.1)
-    random.seed()
+    def __init__(self, config):
+        self.threshold = config.getfloat("random", "threshold", 0.1)
+        random.seed()

-  def filter(self, source, target):
-    return random.random() < self.threshold
+    def filter(self, source, target):
+        return random.random() < self.threshold


 class ScoreFilterStrategy(FilterStrategy):
-  """Filter strategy that is based on a file with sentence scores. There are three
-  possible ways of specifying how to filter:
-    i) threshold - filter all sentence pairs whose score is less than the threshold
-    ii) proportion - filter all but a certain proportion (eg a tenth) of the sentences
+    """Filter strategy that is based on a file with sentence scores.
+
+    There are three possible ways of specifying how to filter:
+    i) threshold - filter all sentence pairs whose score is less than the
+        threshold.
+    ii) proportion - filter all but a certain proportion (eg a tenth) of the
+        sentences.
    iii) count - filter all but a given count of the sentences.
    """
-  def __init__(self,config):
-    section = "score"
-    self.score_file = config.get(section,"score_file")
-    self.ignore_score = config.get(section, "ignore_score", "99999")
-    option_names = ("threshold", "proportion", "count")
-    options = [config.config.has_option(section,o) for o in option_names]
-    if sum(options) != 1:
-      raise RuntimeError("Must specify exactly one of %s for score filter" % str(option_names)) 
-    if options[0]:
-      # threshold
-      self.threshold = config.getfloat(section,option_names[0])
-    else:
-      # proportion or count
-      if options[2]:
-        count = config.getint(section,option_names[2])
-      else:
-        # need to count entries
-        count = 0
-        ignore_count = 0
-        for line in open(self.score_file):
-          if line[:-1] != self.ignore_score:
-             count = count + 1
-          else:
-            ignore_count = ignore_count + 1
-        count = int(count * config.getfloat(section,option_names[1]))
-      log.info("Retaining at least %d entries and ignoring %d" % (count, ignore_count))
-      # Find the threshold
-      self.threshold = sorted(\
-        [float(line[:-1]) for line in open(self.score_file)], reverse=True)[ignore_count + count]
-      #self.threshold = heapq.nlargest(count, \
-      #  [float(line[:-1]) for line in open(self.score_file)])[-1]

+    def __init__(self, config):
+        section = "score"
+        self.score_file = config.get(section, "score_file")
+        self.ignore_score = config.get(section, "ignore_score", "99999")
+        option_names = ("threshold", "proportion", "count")
+        options = [config.config.has_option(section, o) for o in option_names]
+        if sum(options) != 1:
+            raise RuntimeError(
+                "Must specify exactly one of %s for score filter"
+                % str(option_names))
+        if options[0]:
+            # Threshold.
+            self.threshold = config.getfloat(section, option_names[0])
+        else:
+            # proportion or count
+            if options[2]:
+                count = config.getint(section, option_names[2])
+            else:
+                # Need to count entries.
+                count = 0
+                ignore_count = 0
+                for line in open(self.score_file):
+                    if line[:-1] != self.ignore_score:
+                        count += 1
+                    else:
+                        ignore_count = ignore_count + 1
+                count = int(count * config.getfloat(section, option_names[1]))
+            log.info(
+                "Retaining at least %d entries and ignoring %d"
+                % (count, ignore_count))
+            # Find the threshold.
+            self.threshold = sorted([
+                float(line[:-1])
+                for line in open(self.score_file)],
+                reverse=True)[ignore_count + count]
+            # import heapq
+            # self.threshold = heapq.nlargest(
+            #     count,
+            #     [float(line[:-1]) for line in open(self.score_file)])[-1]

-    self.sfh = open(self.score_file) 
-    log.info("Thresholding scores at " + str(self.threshold))
+        self.sfh = open(self.score_file)
+        log.info("Thresholding scores at " + str(self.threshold))
+
+    def filter(self, source, target):
+        score = self.sfh.readline()
+        if not score:
+            raise RuntimeError("score file truncated")
+        return (
+            score[:-1] == self.ignore_score or
+            float(score[:-1]) >= self.threshold
+            )

-  def filter(self,source,target):
-    score = self.sfh.readline()
-    if not score:
-      raise RuntimeError("score file truncated")
-    return score[:-1] == self.ignore_score  or float(score[:-1]) >= self.threshold
-  

 def main():
-  parser = optparse.OptionParser(usage = "Usage: %prog [options] config-file")
-  (options,args) = parser.parse_args()
-  if len(args) < 1:
-    parser.error("No configuration file specified")
+    parser = optparse.OptionParser(usage="Usage: %prog [options] config-file")
+    (options, args) = parser.parse_args()
+    if len(args) < 1:
+        parser.error("No configuration file specified")

-  log.info("Loading configuration from " + args[0])
-  config = Config(args[0])
-  log.debug("Configuration:\n" + str(config))
+    log.info("Loading configuration from " + args[0])
+    config = Config(args[0])
+    log.debug("Configuration:\n" + str(config))

-  # Required general parameters
-  source_lang = config.get("general", "source_language")
-  target_lang = config.get("general", "target_language")
-  input_stem = config.get("general", "input_stem")
-  output_stem = config.get("general", "output_stem")
-  strategy = config.get("general", "strategy", "")
+    # Required general parameters
+    source_lang = config.get("general", "source_language")
+    target_lang = config.get("general", "target_language")
+    input_stem = config.get("general", "input_stem")
+    output_stem = config.get("general", "output_stem")
+    strategy = config.get("general", "strategy", "")

-  # Optional general parameters
-  alignment_stem = config.get("general", "alignment_stem", "")
-  alignment_type = config.get("general", "alignment_type", "grow-diag-final-and")
-  domain_file_in = config.get("general", "domain_file", "")
-  domain_file_out = config.get("general", "domain_file_out", "")
+    # Optional general parameters
+    alignment_stem = config.get("general", "alignment_stem", "")
+    alignment_type = config.get(
+        "general", "alignment_type", "grow-diag-final-and")
+    domain_file_in = config.get("general", "domain_file", "")
+    domain_file_out = config.get("general", "domain_file_out", "")

-  strategy_class = globals()[strategy + "FilterStrategy"]
-  strategy = strategy_class(config)
+    strategy_class = globals()[strategy + "FilterStrategy"]
+    strategy = strategy_class(config)

-  source_input_fh = open(input_stem + "." + source_lang)
-  target_input_fh = open(input_stem + "." + target_lang)
-  source_output_fh = open(output_stem + "." + source_lang, "w")
-  target_output_fh = open(output_stem + "." + target_lang, "w")
+    source_input_fh = open(input_stem + "." + source_lang)
+    target_input_fh = open(input_stem + "." + target_lang)
+    source_output_fh = open(output_stem + "." + source_lang, "w")
+    target_output_fh = open(output_stem + "." + target_lang, "w")

-  alignment_input_fh = None
-  alignment_output_fh = None
-  if alignment_stem:
-    alignment_input_fh = open(alignment_stem + "." + alignment_type)
-    alignment_output_fh = open(output_stem + "." + alignment_type,"w")
+    alignment_input_fh = None
+    alignment_output_fh = None
+    if alignment_stem:
+        alignment_input_fh = open(alignment_stem + "." + alignment_type)
+        alignment_output_fh = open(output_stem + "." + alignment_type, "w")

-  domain_boundaries = {}
-  if domain_file_in:
-    dfh = open(domain_file_in)
-    for line in dfh:
-      line_no,name = line[:-1].split()
-      domain_boundaries[int(line_no)] = name
-  
-  domain_output_fh = None
-  if domain_file_out:
-    domain_output_fh = open(domain_file_out, "w")
+    domain_boundaries = {}
+    if domain_file_in:
+        dfh = open(domain_file_in)
+        for line in dfh:
+            line_no, name = line[:-1].split()
+            domain_boundaries[int(line_no)] = name

-  #log.info(str(domain_boundaries))
+    domain_output_fh = None
+    if domain_file_out:
+        domain_output_fh = open(domain_file_out, "w")
+
+    # log.info(str(domain_boundaries))
+
+    retained = 0
+    line_no = 0
+    for source_line in source_input_fh:
+        target_line = target_input_fh.readline()
+        if alignment_input_fh:
+            align_line = alignment_input_fh.readline()
+        if strategy.filter(source_line, target_line):
+            retained = retained + 1
+            print>>source_output_fh, source_line,
+            print>>target_output_fh, target_line,
+            if alignment_input_fh:
+                print>>alignment_output_fh, align_line,
+        line_no = line_no + 1
+        # Check if this is a domain boundary.
+        if domain_boundaries and line_no in domain_boundaries:
+            print >>domain_output_fh, (
+                "%d %s" % (retained, domain_boundaries[line_no]))
+    log.info("Lines retained: %d", retained)

-  retained = 0
-  line_no = 0
-  for source_line in source_input_fh:
-    target_line = target_input_fh.readline()
-    if alignment_input_fh:
-      align_line = alignment_input_fh.readline()
-    if strategy.filter(source_line,target_line):
-      retained = retained + 1
-      print>>source_output_fh, source_line,
-      print>>target_output_fh, target_line,
-      if alignment_input_fh:
-        print>>alignment_output_fh, align_line,
-    line_no = line_no + 1
-    # check if this is a domain boundary
-    if domain_boundaries and domain_boundaries.has_key(line_no):
-      print>>domain_output_fh,"%d %s" % (retained,domain_boundaries[line_no])
-  log.info("Lines retained: %d" % retained)

 if __name__ == "__main__":
-  main()
+    main()
--- a/scripts/generic/bsbleu.py
+++ b/scripts/generic/bsbleu.py
@ -2,73 +2,73 @@
 # compute Bleu scores with confidence intervals via boostrap resampling
 # written by Ulrich Germann

-import math,sys,os
 from argparse import ArgumentParser
-from operator import itemgetter
-from random   import randint
-from operator import itemgetter
+import math
+import os
+from random import randint
+import sys

-def count_ngrams(snt,max_n):
+
+def count_ngrams(snt, max_n):
    """
-    Return a dictionary of ngram counts (up to length /max_n/) 
-    for sentence (list of words) /snt/. 
+    Return a dictionary of ngram counts (up to length /max_n/)
+    for sentence (list of words) /snt/.
    """
    ret = {}
    for i in xrange(len(snt)):
-        for k in xrange(i+1,min(i+max_n+1,len(snt)+1)):
+        for k in xrange(i + 1, min(i + max_n + 1, len(snt) + 1)):
            key = tuple(snt[i:k])
-            ret[key] = ret.get(key,0) + 1
-            pass
-        pass
+            ret[key] = ret.get(key, 0) + 1
    return ret

-def max_counts(ng1,ng2):
+
+def max_counts(ng1, ng2):
    """
-    Return a dicitonary of ngram counts such that 
+    Return a dicitonary of ngram counts such that
    each count is the greater of the two individual counts
-    for each ngram in the input ngram count dictionaries 
+    for each ngram in the input ngram count dictionaries
    /ng1/ and /ng2/.
    """
    ret = ng1.copy()
-    for k,v in ng2.items():
-        ret[k] = max(ret.get(k,0),v)
-        pass
+    for k, v in ng2.items():
+        ret[k] = max(ret.get(k, 0), v)
    return ret

-def ng_hits(hyp,ref,max_n):
+
+def ng_hits(hyp, ref, max_n):
    """
-    return a list of ngram counts such that each ngram count 
-    is the minimum of the counts in hyp and ref, up to ngram 
-    length /max_n/
+    Return a list of ngram counts such that each ngram count
+    is the minimum of the counts in hyp and ref, up to ngram
+    length /max_n/.
    """
    ret = [0 for i in xrange(max_n)]
-    for ng,cnt in hyp.items():
+    for ng, cnt in hyp.items():
        k = ng
        if len(k) <= max_n:
-            ret[len(k)-1] += min(cnt,ref.get(ng,0))
-            pass
-        pass
+            ret[len(k) - 1] += min(cnt, ref.get(ng, 0))
    return ret

+
 class BleuScore:
-    def __init__(self,hyp,ref,max_n=4,bootstrap=1000):
-        # print len(hyp.ngrams),len(ref.ngrams),"X"
-        self.hits = [ng_hits(hyp.ngrams[i],ref.ngrams[i],max_n) 
-                     for i in xrange(len(hyp.ngrams))]
-        self.max_n  = max_n
-        self.hyp    = hyp
-        self.ref    = ref
-        self.lower  = None
-        self.upper  = None
+    def __init__(self, hyp, ref, max_n=4, bootstrap=1000):
+        # print len(hyp.ngrams), len(ref.ngrams), "X"
+        self.hits = [
+            ng_hits(hyp.ngrams[i], ref.ngrams[i], max_n)
+            for i in xrange(len(hyp.ngrams))]
+        self.max_n = max_n
+        self.hyp = hyp
+        self.ref = ref
+        self.lower = None
+        self.upper = None
        self.median = None
-        self.bootstrap = [self.score([randint(0,len(hyp.snt)-1) for s in hyp.snt])
-                         for i in xrange(1000)]
+        self.bootstrap = [
+            self.score([randint(0, len(hyp.snt) - 1) for s in hyp.snt])
+            for i in xrange(1000)]
        self.bootstrap.sort()
        self.actual = self.score([i for i in xrange(len(hyp.snt))])
-        return
-    
-    def score(self,sample):
-        hits  = [0 for i in xrange(self.max_n)]
+
+    def score(self, sample):
+        hits = [0 for i in xrange(self.max_n)]
        self.hyplen = 0
        self.reflen = 0
        for i in sample:
@ -76,94 +76,89 @@ class BleuScore:
            self.reflen += len(self.ref.snt[i])
            for n in xrange(self.max_n):
                hits[n] += self.hits[i][n]
-                pass
-            pass
-        self.prec = [float(hits[n])/(self.hyplen-n*len(sample)) 
+        self.prec = [float(hits[n]) / (self.hyplen - n * len(sample))
                     for n in xrange(self.max_n)]
-        ret = sum([math.log(x) for x in self.prec])/self.max_n
-        self.BP = min(1,math.exp(1.-float(self.reflen)/float(self.hyplen)))
+        ret = sum([math.log(x) for x in self.prec]) / self.max_n
+        self.BP = min(
+            1, math.exp(1. - float(self.reflen) / float(self.hyplen)))
        ret += math.log(self.BP)
        return math.exp(ret)
-        
+
+
 class Document:
-    def __init__(self,fname=None):
+    def __init__(self, fname=None):
        self.fname = fname
        if fname:
            self.snt = [line.strip().split() for line in open(fname)]
-            self.ngrams = [count_ngrams(snt,4) for snt in self.snt]
+            self.ngrams = [count_ngrams(snt, 4) for snt in self.snt]
        else:
            self.snt = None
            self.ngrams = None
-            pass
-        return

-    def merge(self,R):
+    def merge(self, R):
        self.fname = "multi-ref"
        self.ngrams = [x for x in R[0].ngrams]
        self.snt = [x for x in R[0].snt]
        for i in xrange(len(R[0].ngrams)):
-            for k in xrange(1,len(R)):
-                self.ngrams[i] = max_counts(self.ngrams[i],R[k].ngrams[i])
-                pass
-            pass
-        return
+            for k in xrange(1, len(R)):
+                self.ngrams[i] = max_counts(self.ngrams[i], R[k].ngrams[i])

-    def update(self,hyp,R):
-        for i in xrange(len(hyp.snt)):
-            clen = len(hyp.snt[i])
+    def update(self, hyp, R):
+        for i, hyp_snt in enumerate(hyp.snt):
+            clen = len(hyp_snt)
            K = 0
-            for k in xrange(1,len(R)):
-                assert len(R[k].snt) == len(hyp.snt),\
-                    "Mismatch in numer of sentences " +\
-                    "between reference and candidate"
-                if abs(len(R[k].snt[i]) - clen) == abs(len(R[K].snt[i]) - clen):
-                    if len(R[k].snt[i]) < len(R[K].snt[i]): 
+            for k in xrange(1, len(R)):
+                k_snt = R[k].snt[i]
+                assert len(R[k].snt) == len(hyp.snt), (
+                    "Mismatch in number of sentences " +
+                    "between reference and candidate")
+                if abs(len(k_snt) - clen) == abs(len(R[K].snt[i]) - clen):
+                    if len(k_snt) < len(R[K].snt[i]):
                        K = k
-                        pass
-                    pass
-                elif abs(len(R[k].snt[i]) - clen) < abs(len(R[K].snt[i]) - clen):
+                elif abs(len(k_snt) - clen) < abs(len(R[K].snt[i]) - clen):
                    K = k
-                    pass
-                pass
            self.snt[i] = R[K].snt[i]
-            pass
-        return
-        
-    pass
+

 if __name__ == "__main__":
    argparser = ArgumentParser()
-    argparser.add_argument("-r","--ref",nargs='+',help="reference translation(s)")
-    argparser.add_argument("-c","--cand",nargs='+',help="candidate translations")
-    argparser.add_argument("-i","--individual",action='store_true', 
-                           help="compute BLEU scores for individual references")
-    argparser.add_argument("-b","--bootstrap",type=int,default=1000, 
-                           help="sample size for bootstrap resampling")
-    argparser.add_argument("-a","--alpha",help="1-alpha = confidence interval",type=float,default=.05)
+    argparser.add_argument(
+        "-r", "--ref", nargs='+', help="Reference translation(s).")
+    argparser.add_argument(
+        "-c", "--cand", nargs='+', help="Candidate translations.")
+    argparser.add_argument(
+        "-i", "--individual", action='store_true',
+        help="Compute BLEU scores for individual references.")
+    argparser.add_argument(
+        "-b", "--bootstrap", type=int, default=1000,
+        help="Sample size for bootstrap resampling.")
+    argparser.add_argument(
+        "-a", "--alpha", type=float, default=.05,
+        help="1-alpha = confidence interval.")
    args = argparser.parse_args(sys.argv[1:])
-    R = [ Document(fname) for fname in args.ref]
-    C = [ Document(fname) for fname in args.cand]
-    Rx = Document() # for multi-reference BLEU
+    R = [Document(fname) for fname in args.ref]
+    C = [Document(fname) for fname in args.cand]
+    Rx = Document()  # for multi-reference BLEU
    Rx.merge(R)
    for c in C:
        # compute multi-reference BLEU
-        Rx.update(c,R)
-        bleu = BleuScore(c,Rx,bootstrap=args.bootstrap)
-        print "%5.2f %s [%5.2f-%5.2f; %5.2f] %s"%\
-            (100*bleu.actual,
-             os.path.basename(Rx.fname),
-             100*bleu.bootstrap[int((args.alpha/2)*args.bootstrap)],
-             100*bleu.bootstrap[int((1-(args.alpha/2))*args.bootstrap)],
-             100*bleu.bootstrap[int(.5*args.bootstrap)],
-             c.fname) # os.path.basename(c.fname))
+        Rx.update(c, R)
+        bleu = BleuScore(c, Rx, bootstrap=args.bootstrap)
+        print "%5.2f %s [%5.2f-%5.2f; %5.2f] %s" % (
+            100 * bleu.actual,
+            os.path.basename(Rx.fname),
+            100 * bleu.bootstrap[int((args.alpha / 2) * args.bootstrap)],
+            100 * bleu.bootstrap[int((1 - (args.alpha / 2)) * args.bootstrap)],
+            100 * bleu.bootstrap[int(.5 * args.bootstrap)],
+            c.fname)  # os.path.basename(c.fname))

        if args.individual:
            for r in R:
-                bleu = BleuScore(c,r,bootstrap=args.bootstrap)
-                print "  %5.2f %s"%(100*bleu.actual,os.path.basename(r.fname))
-                # print bleu.prec,bleu.hyplen,bleu.reflen,bleu.BP
-                pass
-            pass
+                bleu = BleuScore(c, r, bootstrap=args.bootstrap)
+                print "  %5.2f %s" % (
+                    100 * bleu.actual, os.path.basename(r.fname))
+                # print bleu.prec, bleu.hyplen, bleu.reflen, bleu.BP

-        # print [sum([bleu.hits[i][n] for i in xrange(len(bleu.hits))]) for n in xrange(4)] 
-        pass
+        # print [
+        #     sum([bleu.hits[i][n] for i in xrange(len(bleu.hits))])
+        #     for n in xrange(4)]
--- a/scripts/server/moses.py
+++ b/scripts/server/moses.py
@ -1,237 +1,225 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

-# Python utilities for moses
-# 
-# This package mostly wraps standard Moses utilities into pipes.
-#
-# Written by Ulrich Germann
-# 
-# This package borrows from scripts written by Christian Buck
-# 
-# The package assumes that there is a complete moses installation
-# (including scripts) under one root directory,
-# e.g., via 
-#    bjam --with-xmlrpc-c=... [...] --install-scripts --prefix=${HOME}/moses
-# By default, this root directory is "${HOME}/moses".
+"""
+Python utilities for moses
+
+This package mostly wraps standard Moses utilities into pipes.
+
+Written by Ulrich Germann
+
+This package borrows from scripts written by Christian Buck
+
+The package assumes that there is a complete moses installation
+(including scripts) under one root directory,
+e.g., via ::
+    bjam --with-xmlrpc-c=... [...] --install-scripts --prefix=${HOME}/moses
+By default, this root directory is "${HOME}/moses".
+"""
+
+import os
+import sys
+import time
+import xmlrpclib
+from subprocess import (
+    PIPE,
+    Popen,
+    )
+
+
+moses_root = os.environ.get('MOSES_ROOT', os.environ.get('HOME') + "/moses")

-import xmlrpclib,datetime,argparse,time,os,sys
-from subprocess import *
-from unicodedata import normalize
- 
-moses_root = os.environ.get('MOSES_ROOT',os.environ.get('HOME')+"/moses")

 class ProcessWrapper:

-  def __init__(self,cmd=[]):
-    self.process = None
-    self.cmd = cmd
-    return
+    def __init__(self, cmd=[]):
+        self.process = None
+        self.cmd = cmd

-  def start(self, stdin=PIPE, stdout=PIPE):
-    if self.process:
-      raise Exception("Process is already running")
-    self.process = Popen(self.cmd, stdin = stdin, stdout = stdout)
-    return
+    def start(self, stdin=PIPE, stdout=PIPE):
+        if self.process:
+            raise Exception("Process is already running")
+        self.process = Popen(self.cmd, stdin=stdin, stdout=stdout)
+
+    def __del__(self):
+        if self.process:
+            self.process.terminate()

-  def __del__(self):
-    if self.process:
-      self.process.terminate()
-      pass
-    return
-  pass

 class LineProcessor(ProcessWrapper):

-  def __call__(self,input):
-    if not self.process: self.start()
-    self.process.stdin.write("%s\n"%input.strip())
-    self.process.stdin.flush()
-    return self.process.stdout.readline().strip()
-  pass  
+    def __call__(self, input):
+        if not self.process:
+            self.start()
+        self.process.stdin.write("%s\n" % input.strip())
+        self.process.stdin.flush()
+        return self.process.stdout.readline().strip()
+

 class SentenceSplitter(ProcessWrapper):
-  """
-  Wrapper for standard Moses sentence splitter
-  """
-  def __init__(self,lang):
-    ssplit_cmd = moses_root+"/scripts/ems/support/split-sentences.perl"
-    self.cmd = [ssplit_cmd, "-b", "-q", "-l",lang]
-    self.process = None
-    return
+    """Wrapper for standard Moses sentence splitter."""
+
+    def __init__(self, lang):
+        ssplit_cmd = moses_root + "/scripts/ems/support/split-sentences.perl"
+        self.cmd = [ssplit_cmd, "-b", "-q", "-l", lang]
+        self.process = None
+
+    def __call__(self, input):
+        if not self.process:
+            self.start()
+        self.process.stdin.write(input.strip() + "\n<P>\n")
+        self.process.stdin.flush()
+        x = self.process.stdout.readline().strip()
+        ret = []
+        while x != '<P>' and x != '':
+            ret.append(x)
+            x = self.process.stdout.readline().strip()
+        return ret

-  def __call__(self,input):
-    if not self.process:
-      self.start()
-      pass
-    self.process.stdin.write(input.strip() + "\n<P>\n")
-    self.process.stdin.flush()
-    x = self.process.stdout.readline().strip()
-    ret = []
-    while x != '<P>' and x != '':
-      ret.append(x)
-      x = self.process.stdout.readline().strip()
-      pass
-    return ret

 class Pretokenizer(LineProcessor):
-  """
-  Pretokenizer wrapper; the pretokenizer fixes known issues with the input.
-  """
-  def __init__(self,lang):
-    pretok_cmd = moses_root+"/scripts/tokenizer/pre-tokenizer.perl"
-    self.cmd = [pretok_cmd,"-b", "-q", "-l",lang]
-    self.process = None
-    return
-  pass
+    """Pretokenizer wrapper.
+
+    The pretokenizer fixes known issues with the input.
+    """
+    def __init__(self, lang):
+        pretok_cmd = moses_root + "/scripts/tokenizer/pre-tokenizer.perl"
+        self.cmd = [pretok_cmd, "-b", "-q", "-l", lang]
+        self.process = None
+

 class Tokenizer(LineProcessor):
-  """
-  Tokenizer wrapper; the pretokenizer fixes known issues with the input.
-  """
-  def __init__(self,lang,args=["-a","-no-escape"]):
-    tok_cmd = moses_root+"/scripts/tokenizer/tokenizer.perl"
-    self.cmd = [tok_cmd,"-b", "-q", "-l", lang] + args
-    self.process = None
-    return
-   
+    """Tokenizer wrapper.
+
+    The pretokenizer fixes known issues with the input.
+    """
+    def __init__(self, lang, args=["-a", "-no-escape"]):
+        tok_cmd = moses_root + "/scripts/tokenizer/tokenizer.perl"
+        self.cmd = [tok_cmd, "-b", "-q", "-l", lang] + args
+        self.process = None
+
+
 class Truecaser(LineProcessor):
-  """
-  Truecaser wrapper.
-  """
-  def __init__(self,model):
-    truecase_cmd = moses_root+"/scripts/recaser/truecase.perl"
-    self.cmd = [truecase_cmd,"-b", "--model",model]
-    self.process = None
-    return
-  pass
+    """Truecaser wrapper."""
+    def __init__(self, model):
+        truecase_cmd = moses_root + "/scripts/recaser/truecase.perl"
+        self.cmd = [truecase_cmd, "-b", "--model", model]
+        self.process = None
+

 class LineProcessorPipeline:
-  """
-  Line processor: one line in, one line out
-  """
-  def __init__(self,parts=[]):
-    self.chain = [LineProcessor(p.cmd) for p in parts]
-    return 
-  
-  def start(self):
-    if len(self.chain) == 0:
-      return
-    if self.chain[0].process:
-      return
-    self.chain[0].start()
-    for i in xrange(1,len(self.chain)):
-      self.chain[i].start(stdin = self.chain[i-1].process.stdout)
-      pass
-    return
+    """Line processor: one line in, one line out."""
+    def __init__(self, parts=[]):
+        self.chain = [LineProcessor(p.cmd) for p in parts]

-  def __call__(self,input):
-    if len(self.chain) == 0:
-      return input
-    self.start()
-    self.chain[0].process.stdin.write("%s\n"%input.strip())
-    self.chain[0].process.stdin.flush()
-    return self.chain[0].process.stdout.readline().strip()
+    def start(self):
+        if len(self.chain) == 0:
+            return
+        if self.chain[0].process:
+            return
+        self.chain[0].start()
+        for i in xrange(1, len(self.chain)):
+            self.chain[i].start(stdin=self.chain[i - 1].process.stdout)
+
+    def __call__(self, input):
+        if len(self.chain) == 0:
+            return input
+        self.start()
+        self.chain[0].process.stdin.write("%s\n" % input.strip())
+        self.chain[0].process.stdin.flush()
+        return self.chain[0].process.stdout.readline().strip()

-  pass

 def find_free_port(p):
-  """
-  Find a free port, starting at /p/. 
-  Return the free port, or False if none found.
-  """
-  ret = p
-  while ret - p < 20:
-    devnull = open(os.devnull,"w")
-    n = Popen(["netstat","-tnp"],stdout=PIPE,stderr=devnull)
-    if n.communicate()[0].find(":%d "%ret) < 0:
-      return p
-    ret += 1
-    pass
-  return False
+    """Find a free port, starting at /p/.
+
+    :return: The free port, or False if none found.
+    """
+    ret = p
+    while ret - p < 20:
+        devnull = open(os.devnull, "w")
+        n = Popen(["netstat", "-tnp"], stdout=PIPE, stderr=devnull)
+        if n.communicate()[0].find(":%d " % ret) < 0:
+            return p
+        ret += 1
+    return False
+

 class MosesServer(ProcessWrapper):

-  def __init__(self,args=[]):
-    self.process = None
-    mserver_cmd  = moses_root+"/bin/mosesserver"
-    self.cmd = [mserver_cmd] + args 
-    self.url = None
-    self.proxy = None
-    return
-  
-  def start(self,config=None,args=[],port=7447,debug=False):
-    self.cmd.extend(args)
-    if config:
-      if "-f" in args:
-        raise Exception("Config file specified twice")
-      else:
-        self.cmd.extend(["-f",config])
-        pass
-      pass
-    self.port = port # find_free_port(port)
-    if not self.port:
-      raise Excpetion("Cannot find free port for moses server!")
-    self.cmd.extend(["--server-port", "%d"%self.port])
-    if debug:
-      print >>sys.stderr,self.cmd
-      # self.stderr = open("mserver.%d.stderr"%self.port,'w')
-      # self.stdout = open("mserver.%d.stdout"%self.port,'w')
-      # self.process = Popen(self.cmd,stderr = self.stderr,stdout = self.stdout)
-      self.process = Popen(self.cmd)
-    else:
-      devnull = open(os.devnull,"w")
-      self.process = Popen(self.cmd, stderr=devnull, stdout=devnull)
-      pass
-
-    if self.process.poll():
-      raise Exception("FATAL ERROR: Could not launch moses server!")
-    if debug:
-      print >>sys.stderr,"MOSES port is %d."%self.port 
-      print >>sys.stderr,"Moses poll status is", self.process.poll()
-      pass
-
-    self.url = "http://localhost:%d/RPC2"%self.port
-    self.connect(self.url)
-
-    return True
-
-  def connect(self,url):
-    if url[:4]  != "http":  url = "http://%s"%url
-    if url[-5:] != "/RPC2": url += "/RPC2"
-    self.url = url
-    self.proxy = xmlrpclib.ServerProxy(self.url)
-    return
-
-  def translate(self,input):
-    attempts = 0
-    while attempts < 100:
-      try:
-        if type(input) is unicode:
-          # if the server does not expect unicode, provide a 
-          # properly encoded string!
-          param = {'text': input.strip().encode('utf8')}
-          return self.proxy.translate(param)['text'].decode('utf8')
-
-        elif type(input) is str:
-          param = {'text': input.strip()}
-          return self.proxy.translate(param)['text']
-
-        elif type(input) is list:
-          return [self.translate(x) for x in input]
-
-        elif type(input) is dict:
-          return self.proxy.translate(input)
+    def __init__(self, args=[]):
+        self.process = None
+        mserver_cmd = moses_root + "/bin/mosesserver"
+        self.cmd = [mserver_cmd] + args
+        self.url = None
+        self.proxy = None

+    def start(self, config=None, args=[], port=7447, debug=False):
+        self.cmd.extend(args)
+        if config:
+            if "-f" in args:
+                raise Exception("Config file specified twice")
+            else:
+                self.cmd.extend(["-f", config])
+        self.port = port  # find_free_port(port)
+        if not self.port:
+            raise Exception("Cannot find free port for moses server!")
+        self.cmd.extend(["--server-port", "%d" % self.port])
+        if debug:
+            print >>sys.stderr, self.cmd
+            # self.stderr = open("mserver.%d.stderr"%self.port,'w')
+            # self.stdout = open("mserver.%d.stdout"%self.port,'w')
+            # self.process = Popen(
+            #     self.cmd, stderr=self.stderr, stdout=self.stdout)
+            self.process = Popen(self.cmd)
        else:
-          raise Exception("Can't handle input of this type!")
+            devnull = open(os.devnull, "w")
+            self.process = Popen(self.cmd, stderr=devnull, stdout=devnull)

-      except:
-        attempts += 1
-        print >>sys.stderr, "WAITING", attempts
-        time.sleep(1)
-        pass
-      pass
-    raise Exception("Translation request failed")
-  pass
+        if self.process.poll():
+            raise Exception("FATAL ERROR: Could not launch moses server!")
+        if debug:
+            print >>sys.stderr, "MOSES port is %d." % self.port
+            print >>sys.stderr, "Moses poll status is", self.process.poll()

+        self.url = "http://localhost:%d/RPC2" % self.port
+        self.connect(self.url)
+
+        return True
+
+    def connect(self, url):
+        if url[:4] != "http":
+            url = "http://%s" % url
+        if url[-5:] != "/RPC2":
+            url += "/RPC2"
+        self.url = url
+        self.proxy = xmlrpclib.ServerProxy(self.url)
+
+    def translate(self, input):
+        attempts = 0
+        while attempts < 100:
+            try:
+                if type(input) is unicode:
+                    # If the server does not expect unicode, provide a
+                    # properly encoded string!
+                    param = {'text': input.strip().encode('utf8')}
+                    return self.proxy.translate(param)['text'].decode('utf8')
+
+                elif type(input) is str:
+                    param = {'text': input.strip()}
+                    return self.proxy.translate(param)['text']
+
+                elif type(input) is list:
+                    return [self.translate(x) for x in input]
+
+                elif type(input) is dict:
+                    return self.proxy.translate(input)
+
+                else:
+                    raise Exception("Can't handle input of this type!")
+
+            except:
+                attempts += 1
+                print >>sys.stderr, "WAITING", attempts
+                time.sleep(1)
+        raise Exception("Translation request failed")
--- a/scripts/server/sim-pe.py
+++ b/scripts/server/sim-pe.py
@ -5,29 +5,39 @@
 # This script simulates post-editing of MT output and incrementally
 # updates the dynamic phrase tables in the moses server.

-import xmlrpclib,datetime,argparse,sys,os,time
+import argparse
+import os
+import sys
+import time
+import xmlrpclib
 import moses
-from moses import MosesServer
-from subprocess import *
+from subprocess import (
+    PIPE,
+    Popen,
+    )
+
+
 mserver = moses.MosesServer()

 # We must perform some custom argument processing, as moses parameter
 # specifications do not comply with the standards used in standard
 # argument parsing packages; an isolated double dash separates script
 # arguments from moses arguments
+
+
 def split_args(all_args):
    """
    Split argument list all_args into arguments specific to this script and
-    arguments relating to the moses server. An isolated double dash acts as 
-    the separator between the two types of arguments. 
+    arguments relating to the moses server. An isolated double dash acts as
+    the separator between the two types of arguments.
    """
    my_args = []
    mo_args = []
    arglist = mo_args
    i = 0
-    # IMPORTANT: the code below must be coordinated with 
+    # IMPORTANT: the code below must be coordinated with
    # - the evolution of moses command line arguments
-    # - mert-moses.pl 
+    # - mert-moses.pl
    while i < len(all_args):
        # print i,"MY_ARGS", my_args
        # print i,"MO_ARGS", mo_args
@ -36,14 +46,16 @@ def split_args(all_args):
        elif all_args[i] == "--]":
            arglist = mo_args
        elif all_args[i] == "-i" or all_args[i] == "-input-file":
-            my_args.extend(["-i",all_args[i+1]])
+            my_args.extend(["-i", all_args[i + 1]])
            i += 1
        elif all_args[i] == "-inputtype":
-            if all_args[i+1] != "0":
-                # not yet supported! Therefore:
-                errmsg  = "FATAL ERROR: %s "%sys.argv[0]
-                errmsg += "only supports plain text input at this point."
-                raise Exception(errsmg)
+            if all_args[i + 1] != "0":
+                # Not yet supported! Therefore:
+                errmsg = (
+                    "FATAL ERROR: "
+                    "%s only supports plain text input at this point."
+                    % sys.argv[0])
+                raise Exception(errmsg)
            # my_args.extend(["--input-type",all_args[i+1]])
            i += 1
        elif all_args[i] == "-lattice-samples":
@ -52,13 +64,14 @@ def split_args(all_args):
            # mo_args[i:i+3] = []
            # i += 2
            # This is not yet supported! Therefore:
-            errmsg  = "FATAL ERROR: %s "%sys.argv[0]
-            errmsg += "does not yet support lattice sampling."
-            raise Exception(errsmg)
-        
+            errmsg = (
+                "FATAL ERROR: %s does not yet support lattice sampling."
+                % sys.argv[0])
+            raise Exception(errmsg)
+
        elif all_args[i] == "-n-best-list":
-            my_args.extend(["--nbest",all_args[i+2]])
-            my_args.extend(["--nbest-file",all_args[i+1]])
+            my_args.extend(["--nbest", all_args[i + 2]])
+            my_args.extend(["--nbest-file", all_args[i + 1]])
            i += 2

        elif all_args[i] == "-n-best-distinct":
@ -70,128 +83,148 @@ def split_args(all_args):

        i += 1
        pass
-    return my_args,mo_args
-    
+    return my_args, mo_args
+
+
 def interpret_args(my_args):
    """
    Parse script-specific argument list.
    """
    aparser = argparse.ArgumentParser()

-    aparser.add_argument("-s","--server-cmd",default="mosesserver",
-                         dest="servercmd", help="path to moses server command")
-    aparser.add_argument("--url",help="URL of external moses server.")
-    aparser.add_argument("-p","--port", type=int, default=7447,
-                         help="port number to be used for server")
-    
-    # input / output
-    aparser.add_argument("-i","--input",help="source file",default="-")
-    aparser.add_argument("-r","--ref",help="reference translation",default=None)
-    aparser.add_argument("-a","--aln",help="alignment",default=None)
-    aparser.add_argument("-o","--output",default="-",help="output file")
-    aparser.add_argument("-d","--debug",action="store_true",help="debug mode")
-    
-    # moses reporting options
-    aparser.add_argument("-A","--with-alignment", dest="A",
-                         help="include alignment in output", action="store_true")
-    aparser.add_argument("-G","--with-graph",type=bool, default=False, dest="G",
-                         help="include search graph info in output")
-    aparser.add_argument("-T","--with-transopt",type=bool, default=False, dest = "T",
-                         help="include translation options info in output")
-    aparser.add_argument("-F","--report-all-factors", action="store_true",dest="F",
-                         help="report all factors")
-    aparser.add_argument("-n","--nbest",type=int,dest="nbest",default=0, 
-                         help="size of nbest list")
-    aparser.add_argument("-N","--nbest-file",dest="nbestFile",default=0,
-                         help="output file for nbest list")
-    aparser.add_argument("-u","--nbest-distinct",type=bool,dest="U",default=False,
-                         help="report all factors")
+    aparser.add_argument(
+        "-s", "--server-cmd", default="mosesserver", dest="servercmd",
+        help="Path to moses server command.")
+    aparser.add_argument(
+        "--url", help="URL of external moses server.")
+    aparser.add_argument(
+        "-p", "--port", type=int, default=7447,
+        help="Port number to be used for server.")
+
+    # Input / output.
+    aparser.add_argument(
+        "-i", "--input", default='-', help="source file")
+    aparser.add_argument(
+        "-r", "--ref", default=None, help="Reference translation.")
+    aparser.add_argument(
+        "-a", "--aln", default=None, help="Alignment.")
+    aparser.add_argument(
+        "-o", "--output", default="-", help="Output file.")
+    aparser.add_argument(
+        "-d", "--debug", action='store_true', help="Debug mode.")
+
+    # Moses reporting options.
+    aparser.add_argument(
+        "-A", "--with-alignment", dest="A", action='store_true',
+        help="Include alignment in output.")
+    aparser.add_argument(
+        "-G", "--with-graph", type=bool, default=False, dest="G",
+        help="Include search graph info in output.")
+    aparser.add_argument(
+        "-T", "--with-transopt", type=bool, default=False, dest="T",
+        help="Include translation options info in output.")
+    aparser.add_argument(
+        "-F", "--report-all-factors", action="store_true", dest="F",
+        help="Report all factors.")
+    aparser.add_argument(
+        "-n", "--nbest", type=int, dest="nbest", default=0,
+        help="Size of nbest list.")
+    aparser.add_argument(
+        "-N", "--nbest-file", dest="nbestFile", default=0,
+        help="Output file for nbest list.")
+    aparser.add_argument(
+        "-u", "--nbest-distinct", type=bool, dest="U", default=False,
+        help="Report all factors.")

    return aparser.parse_args(my_args)
-    
+
+
 def translate(proxy, args, line):
    if type(line) is unicode:
-        param = { 'text' : line.strip().encode('utf8') }
+        param = {'text': line.strip().encode('utf8')}
    elif type(line) is str:
-        param = { 'text' : line.strip() }
+        param = {'text': line.strip()}
    else:
        raise Exception("Can't handle input")
-    if args.A: param['align'] = True
-    if args.T: param['topt']  = True
-    if args.F: param['report-all-factors'] = True
-    if args.nbest: 
+    if args.A:
+        param['align'] = True
+    if args.T:
+        param['topt'] = True
+    if args.F:
+        param['report-all-factors'] = True
+    if args.nbest:
        param['nbest'] = int(args.nbest)
        param['add-score-breakdown'] = True
        pass
-    if args.U: 
+    if args.U:
        param['nbest-distinct'] = True
        pass
    attempts = 0
    while attempts < 20:
        t1 = time.time()
        try:
-            return proxy.translate(param) 
+            return proxy.translate(param)

        # except xmlrpclib.Fault as e:
        # except xmlrpclib.ProtocolError as e:
        # except xmlrpclib.ResponseError as e:
        except xmlrpclib.Error as e:
-            time.sleep(2) # give all the stderr stuff a chance to be flushed
-            print >>sys.stderr," XMLRPC error:",e
+            sys.stderr.flush()
+            print >>sys.stderr, " XMLRPC error:", e
            print >>sys.stderr, "Input was"
            print >>sys.stderr, param
            sys.exit(1)

        except IOError as e:
-            print >>sys.stderr,"I/O error({0}): {1}".format(e.errno, e.strerror)
+            print >>sys.stderr, (
+                "I/O error({0}): {1}".format(e.errno, e.strerror))
            time.sleep(5)

        except:
            serverstatus = mserver.process.poll()
-            if serverstatus == None:
-                print >>sys.stderr, "Connection failed after %f seconds"%(time.time()-t1)
+            if serverstatus is None:
+                print >>sys.stderr, (
+                    "Connection failed after %f seconds" % (time.time() - t1))
                attempts += 1
                if attempts > 10:
                    time.sleep(10)
                else:
                    time.sleep(5)
-                    pass
            else:
-                
-                print >>sys.stderr, "Oopsidaisy, server exited with code %d (signal %d)"\
-                    %(serverstatus/256,serverstatus%256)
+                print >>sys.stderr, (
+                    "Oopsidaisy, server exited with code %d (signal %d)"
+                    % (serverstatus / 256, serverstatus % 256))
                pass
            pass
        pass
    raise Exception("Exception: could not reach translation server.")
-    
+

 def read_data(fname):
    """
    Read and return data (source, target or alignment) from file fname.
    """
    if fname[-3:] == ".gz":
-        foo = Popen(["zcat",fname],stdout=PIPE)\
-            .communicate()[0]\
-            .strip().split('\n')
+        process = Popen(["zcat", fname], stdout=PIPE)
+        stdout, _ = process.communicate()
+        foo = stdout.strip().split('\n')
    else:
        foo = [x.strip() for x in open(fname).readlines()]
-        pass
    return foo

-def repack_result(idx,result):
+
+def repack_result(idx, result):
    global args
    if args.nbest:
        for h in result['nbest']:
-            fields = [idx,h['hyp'],h['fvals'],h['totalScore']]
+            fields = [idx, h['hyp'], h['fvals'], h['totalScore']]
            for i in xrange(len(fields)):
                if type(fields[i]) is unicode:
                    fields[i] = fields[i].encode('utf-8')
                    pass
                pass
-            # print fields
-            print >>NBestFile,"%d ||| %s ||| %s ||| %f"%tuple(fields)
-            pass
+            # Print fields.
+            print >>NBestFile, "%d ||| %s ||| %s ||| %f" % tuple(fields)
        pass
    if 'align' in result:
        t = result['text'].split()
@ -200,16 +233,14 @@ def repack_result(idx,result):
        k = 0
        for a in result['align']:
            k = a['tgt-start']
-            if k: print " ".join(t[i:k]).encode('utf8'),span,
+            if k:
+                print " ".join(t[i:k]).encode('utf8'), span,
            i = k
-            span = "|%d %d|"%(a['src-start'],a['src-end'])
-            pass
-        print " ".join(t[k:]).encode('utf8'),span
-        pass
+            span = "|%d %d|" % (a['src-start'], a['src-end'])
+        print " ".join(t[k:]).encode('utf8'), span
    else:
        print result['text'].encode('utf8')
-        pass
-    return
+

 if __name__ == "__main__":
    my_args, mo_args = split_args(sys.argv[1:])
@ -221,17 +252,17 @@ if __name__ == "__main__":
    args = interpret_args(my_args)

    if "-show-weights" in mo_args:
-        # this is for use during tuning, where moses is called to get a list of 
-        # feature names
-        devnull = open(os.devnull,"w")
-        mo = Popen(mserver.cmd + mo_args,stdout=PIPE,stderr=devnull)
+        # This is for use during tuning, where moses is called to get a list
+        # of feature names.
+        devnull = open(os.devnull, "w")
+        mo = Popen(mserver.cmd + mo_args, stdout=PIPE, stderr=devnull)
        print mo.communicate()[0].strip()
        sys.exit(0)
        pass

    if args.nbest:
        if args.nbestFile:
-            NBestFile = open(args.nbestFile,"w")
+            NBestFile = open(args.nbestFile, "w")
        else:
            NBestFile = sys.stdout
            pass
@ -239,8 +270,10 @@ if __name__ == "__main__":

    ref = None
    aln = None
-    if args.ref: ref = read_data(args.ref)
-    if args.aln: aln = read_data(args.aln)
+    if args.ref:
+        ref = read_data(args.ref)
+    if args.aln:
+        aln = read_data(args.aln)

    if ref and aln:
        try:
@ -260,25 +293,21 @@ if __name__ == "__main__":
        line = sys.stdin.readline()
        idx = 0
        while line:
-            result = translate(mserver.proxy,args,line)
-            repack_result(idx,result)
+            result = translate(mserver.proxy, args, line)
+            repack_result(idx, result)
            line = sys.stdin.readline()
            idx += 1
-            pass
-        pass
    else:
        src = read_data(args.input)
        for i in xrange(len(src)):
-            result = translate(mserver.proxy,args,src[i])
-            repack_result(i,result)
+            result = translate(mserver.proxy, args, src[i])
+            repack_result(i, result)
            if args.debug:
                print >>sys.stderr, result['text'].encode('utf-8')
                pass
-            if  ref and aln:
-                result = mserver.proxy.updater({'source'    : src[i],
-                                                'target'    : ref[i],
-                                                'alignment' : aln[i]})
-                pass
-            pass
-        pass
-    pass
+            if ref and aln:
+                result = mserver.proxy.updater({
+                    'source': src[i],
+                    'target': ref[i],
+                    'alignment': aln[i],
+                    })
--- a/scripts/tokenizer/pre_tokenize_cleaning.py
+++ b/scripts/tokenizer/pre_tokenize_cleaning.py
@ -2,12 +2,12 @@

 """
 The Gacha filter cleans out sentence pairs that have global character mean
-lower than a certain threshold. 
- 
-Use this cleaner to produce low quantity of high quality sentence pairs. 
+lower than a certain threshold.

-It is an aggressive cleaner that cleaned out ~64% of the HindEnCorp during 
-WMT14 when threshold is set at 20% (Tan and Pal, 2014); achieving lowest TER.  
+Use this cleaner to produce low quantity of high quality sentence pairs.
+
+It is an aggressive cleaner that cleaned out ~64% of the HindEnCorp during
+WMT14 when threshold is set at 20% (Tan and Pal, 2014); achieving lowest TER.
 (see http://www.aclweb.org/anthology/W/W14/W14-3323.pdf)

 This is inspired by the global character mean that is used in the Gale-Church
@ -24,17 +24,24 @@ where:
 (For details on Gale-Church, see http://www.aclweb.org/anthology/J93-1004.pdf)
 """

-import io, subprocess
+import io
+import subprocess
+

 red = '\033[01;31m'
 native = '\033[m'

+
 def err_msg(txt):
-    return red+txt+native
+    return red + txt + native
+

 def num_char(filename):
-    return float(subprocess.Popen(["wc", "-m", filename], 
-                            stdout=subprocess.PIPE).stdout.read().split()[0])
+    process = subprocess.Popen(
+        ["wc", "-m", filename], stdout=subprocess.PIPE)
+    # TODO: Was this meant to call communicate()?
+    return float(process.stdout.read().split()[0])
+

 def gacha_mean(sourcefile, targetfile):
    """
@ -43,36 +50,44 @@ def gacha_mean(sourcefile, targetfile):
    """
    sys.stderr.write(err_msg('Calculating Gacha mean, please wait ...\n'))
    c = num_char(sourcefile) / num_char(targetfile)
-    sys.stderr.write(err_msg('Gacha mean = '+str(c)+'\n'))
+    sys.stderr.write(err_msg('Gacha mean = ' + str(c) + '\n'))
    sys.stderr.write(err_msg('Filtering starts ...\n'))
    return c

+
+def io_open(path):
+    """Open file `path` for reading, as a UTF-8 text file."""
+    return io.open(path, 'r', encoding='utf8')
+
+
 def main(sourcefile, targetfile, threshold=0.2):
    # Calculates Gacha mean.
    c = gacha_mean(sourcefile, targetfile)
    # Calculates lower and upperbound for filtering
    threshold = float(threshold)
-    lowerbound = (1-threshold) * c
-    upperbound = (1+threshold) * c
-    
+    lowerbound = (1 - threshold) * c
+    upperbound = (1 + threshold) * c
+
    # Start filtering sentences.
-    with io.open(sourcefile, 'r', encoding='utf8') as srcfin, \
-    io.open(targetfile, 'r', encoding='utf8') as trgfin:
+    with io_open(sourcefile) as srcfin, io_open(targetfile) as trgfin:
        for s, t in zip(srcfin, trgfin):
            if lowerbound < len(s) / float(len(t)) < upperbound:
-                print(u"{}\t{}\n".format(s.strip(),t.strip()))
+                print(u"{}\t{}\n".format(s.strip(), t.strip()))
+

 if __name__ == '__main__':
    import sys
-    if len(sys.argv) not in range(3,5):
-        usage_msg = err_msg('Usage: python %s srcfile trgfile (threshold)\n'
-                            % sys.argv[0])
-        
-        example_msg = err_msg('Example: gacha_cleaning.py ~/Europarl.de-en.de '
-                            '~/Europarl.de-en.en 0.4\n'
-                            % sys.argv[0])
+    if len(sys.argv) not in range(3, 5):
+        usage_msg = err_msg(
+            "Usage: python %s srcfile trgfile (threshold)\n"
+            % sys.argv[0])
+
+        example_msg = err_msg(
+            "Example: "
+            "gacha_cleaning.py ~/Europarl.de-en.de ~/Europarl.de-en.en 0.4\n"
+            % sys.argv[0])
        sys.stderr.write(usage_msg)
        sys.stderr.write(example_msg)
        sys.exit(1)
-        
+
    main(*sys.argv[1:])
--- a/scripts/training/filter-rule-table.py
+++ b/scripts/training/filter-rule-table.py
@ -24,9 +24,11 @@
 import optparse
 import sys

+
 class NGram(tuple):
    pass

+
 class Gap:
    def __init__(self, minSpan):
        self.minSpan = minSpan
@ -34,8 +36,12 @@ class Gap:
    def getMinSpan(self):
        return self.minSpan

+
 def printUsage():
-    sys.stderr.write("Usage: filter-rule-table.py [--min-non-initial-rule-count=N] INPUT")
+    sys.stderr.write(
+        "Usage: "
+        "filter-rule-table.py [--min-non-initial-rule-count=N] INPUT")
+

 def main():
    parser = optparse.OptionParser()
@ -54,14 +60,15 @@ def main():
        inputSentences.append(line.split())
    filterRuleTable(sys.stdin, inputSentences, N, options)

+
 def filterRuleTable(ruleTable, inputSentences, N, options):
    # Map each input n-gram (n = 1..N) to a map from sentence indices to
    # lists of intra-sentence indices.
    occurrences = {}
    for i, sentence in enumerate(inputSentences):
-        for n in range(1, N+1):
-            for j in range(0, len(sentence)-n+1):
-                ngram = NGram(sentence[j:j+n])
+        for n in range(1, N + 1):
+            for j in range(0, len(sentence) - n + 1):
+                ngram = NGram(sentence[j:j + n])
                innerMap = occurrences.setdefault(ngram, {})
                indices = innerMap.setdefault(i, [])
                indices.append(j)
@ -70,15 +77,16 @@ def filterRuleTable(ruleTable, inputSentences, N, options):
    prevRuleIncluded = None
    for line in ruleTable:
        rhs, count = parseRule(line)
+        below_threshold = (count is not None and count < options.minCount)
        # Prune non-initial rule if count is below threshold.
-        if count != None and count < options.minCount and isNonInitialRule(rhs):
+        if below_threshold and isNonInitialRule(rhs):
            if prevRHS != rhs:
                prevRuleIncluded = None
                prevRHS = rhs
            continue
        # If source RHS is same as last rule's then we already know whether to
        # filter or not (unless it was pruned before checking).
-        if rhs == prevRHS and prevRuleIncluded != None:
+        if rhs == prevRHS and prevRuleIncluded is not None:
            if prevRuleIncluded:
                print line,
            continue
@ -89,7 +97,10 @@ def filterRuleTable(ruleTable, inputSentences, N, options):
            prevRuleIncluded = True
            continue
        segments = segmentRHS(rhs, N)
-        ngramMaps = [occurrences.get(s, {}) for s in segments if isinstance(s, NGram)]
+        ngramMaps = [
+            occurrences.get(s, {})
+            for s in segments
+            if isinstance(s, NGram)]
        if len(ngramMaps) == 0:
            print line,
            prevRuleIncluded = True
@ -111,9 +122,13 @@ def filterRuleTable(ruleTable, inputSentences, N, options):
                break
        prevRuleIncluded = match

-# Parse a line of the rule table and return a tuple containing two items,
-# the list of RHS source symbols and the rule count (if present).
+
 def parseRule(line):
+    """Parse a line of the rule table.
+
+    :return: A tuple containing two items: the list of RHS source symbols,
+        and the rule count (if present).
+    """
    cols = line.split("|||")
    rhsSourceSymbols = cols[0].split()[:-1]
    ruleCount = None
@ -123,15 +138,18 @@ def parseRule(line):
            ruleCount = float(counts[2])
    return (rhsSourceSymbols, ruleCount)

+
 def isNT(symbol):
    return symbol[0] == '[' and symbol[-1] == ']'

+
 def isNonInitialRule(rhs):
    for symbol in rhs:
        if isNT(symbol):
            return True
    return False

+
 def segmentRHS(rhs, N):
    segments = []
    terminals = []
@ -159,13 +177,14 @@ def segmentRHS(rhs, N):
        segments.append(NGram(terminals))
    return segments

+
 def matchSegments(segments, indexSeq, sentenceLength):
    assert len(segments) > 0
    firstSegment = segments[0]
    i = 0
    if isinstance(firstSegment, Gap):
        minPos = firstSegment.getMinSpan()
-        maxPos = sentenceLength-1
+        maxPos = sentenceLength - 1
    else:
        minPos = indexSeq[i] + len(firstSegment)
        i += 1
@ -175,7 +194,7 @@ def matchSegments(segments, indexSeq, sentenceLength):
            if minPos + segment.getMinSpan() > sentenceLength:
                return False
            minPos = minPos + segment.getMinSpan()
-            maxPos = sentenceLength-1
+            maxPos = sentenceLength - 1
        else:
            pos = indexSeq[i]
            i += 1
@ -185,6 +204,7 @@ def matchSegments(segments, indexSeq, sentenceLength):
            maxPos = minPos
    return True

+
 def enumerateIndexSeqs(ngramMaps, sentenceIndex, minFirstIndex):
    assert len(ngramMaps) > 0
    if len(ngramMaps) == 1:
@ -195,7 +215,7 @@ def enumerateIndexSeqs(ngramMaps, sentenceIndex, minFirstIndex):
    for index in ngramMaps[0][sentenceIndex]:
        if index < minFirstIndex:
            continue
-        for seq in enumerateIndexSeqs(ngramMaps[1:], sentenceIndex, index+1):
+        for seq in enumerateIndexSeqs(ngramMaps[1:], sentenceIndex, index + 1):
            assert seq[0] > index
            yield [index] + seq

--- a/scripts/training/rdlm/average_null_embedding.py
+++ b/scripts/training/rdlm/average_null_embedding.py
@ -2,18 +2,23 @@
 # -*- coding: utf-8 -*-
 # Author: Rico Sennrich

-# average embeddings of special null words for RDLM.
-# Usage: average_null_embedding.py NPLM_PATH INPUT_MODEL TRAINING_FILE OUTPUT_MODEL
+"""Average embeddings of special null words for RDLM.
+
+Usage:
+    average_null_embedding.py NPLM_PATH INPUT_MODEL TRAINING_FILE OUTPUT_MODEL
+"""

 import sys
 import os
 import numpy

+
 def load_model(model_file):
    return nplm.NeuralLM.from_file(model_file)

+
 def get_weights(path, vocab, len_context):
-    d = [[0]*vocab for i in range(len_context)]
+    d = [[0] * vocab for i in range(len_context)]
    for line in open(path):
        for i, word in enumerate(line.split()[:-1]):
            d[i][int(word)] += 1
@ -26,20 +31,23 @@ if __name__ == "__main__":
    training_instances = sys.argv[3]
    model_output = sys.argv[4]

-    sys.path.append(os.path.join(nplm_path,'python'))
+    sys.path.append(os.path.join(nplm_path, 'python'))
    import nplm

    model = load_model(model_input)

-    len_context = len(open(training_instances).readline().split())-1
+    len_context = len(open(training_instances).readline().split()) - 1

    sys.stderr.write('reading ngrams...')
-    weights = numpy.array(get_weights(training_instances, len(model.input_embeddings), len_context))
+    weights = numpy.array(
+        get_weights(
+            training_instances, len(model.input_embeddings), len_context))
    sys.stderr.write('done\n')

    for i in range(len_context):
        index = model.word_to_index_input['<null_{0}>'.format(i)]
-        model.input_embeddings[index] = numpy.average(numpy.array(model.input_embeddings), weights=weights[i], axis=0)
+        model.input_embeddings[index] = numpy.average(
+            numpy.array(model.input_embeddings), weights=weights[i], axis=0)
    sys.stderr.write('writing model...')
-    model.to_file(open(model_output,'w'))
+    model.to_file(open(model_output, 'w'))
    sys.stderr.write('done\n')
--- a/scripts/training/rdlm/extract_syntactic_ngrams.py
+++ b/scripts/training/rdlm/extract_syntactic_ngrams.py
@ -2,17 +2,25 @@
 # -*- coding: utf-8 -*-
 # Author: Rico Sennrich

-# extract syntactic n-grams from dependency treebank in Moses XML format for training RDLM
-# expected format can be produced with mosesdecoder/scripts/training/wrapper/conll2mosesxml.py
-# OOV terminal symbols are mapped to preterminal; OOV nonterminals are mapped to 0 (<unk>)
+"""
+Extract syntactic n-grams from dependency treebank in Moses XML format for
+training RDLM.
+
+Expected format can be produced with
+mosesdecoder/scripts/training/wrapper/conll2mosesxml.py
+
+OOV terminal symbols are mapped to preterminal; OOV nonterminals are mapped
+to 0 (<unk>)
+"""

 from __future__ import print_function, unicode_literals, division
 import sys
 import codecs
 import argparse

-# hack for python2/3 compatibility
+# Hack for python2/3 compatibility
 from io import open
+
 argparse.open = open

 try:
@ -20,46 +28,84 @@ try:
 except ImportError:
    from xml.etree import cElementTree as ET

-def create_parser():
-    parser = argparse.ArgumentParser(description="extract syntactic n-grams from parsed corpus in Moses XML format for training RDLM")

-    parser.add_argument('--input', '-i', type=argparse.FileType('r'), default=sys.stdin, metavar='PATH',
-                        help='input file (default: standard input).')
-    parser.add_argument('--output', '-o', type=argparse.FileType('w'), default=sys.stdout, metavar='PATH',
-                        help='output file (default: standard output).')
-    parser.add_argument('--mode', type=str, help='predict terminals (head) or dependency labels (label)',
-                        choices=['label', 'head'], required=True)
-    parser.add_argument('--vocab', metavar='PATH', type=str, required=True,
-                        help='input layer vocabulary file (one item per line; first line \'<unk>\')')
-    parser.add_argument('--output_vocab', metavar='PATH', type=str,
-                        help='output layer vocabulary file (default: use input layer vocabulary)')
-    parser.add_argument('--left_context', metavar='INT', type=int,
-                        help='size of context vector for left siblings (default: %(default)s)', default=3)
-    parser.add_argument('--right_context', metavar='INT', type=int,
-                        help='size of context vector for right siblings (default: %(default)s)', default=0)
-    parser.add_argument('--up_context', metavar='INT', type=int,
-                        help='size of context vector for ancestors (default: %(default)s)', default=2)
-    parser.add_argument('--glue_symbol', metavar='STR', type=str, default='Q',
-                        help='glue symbol. Will be skipped during extraction (default: %(default)s)')
-    parser.add_argument('--start_symbol', metavar='STR', type=str, default='SSTART',
-                        help='sentence start symbol. Will be skipped during extraction (default: %(default)s)')
-    parser.add_argument('--end_symbol', metavar='STR', type=str, default='SEND',
-                        help='sentence end symbol. Will be skipped during extraction (default: %(default)s)')
-    parser.add_argument('--ptkvz', action='store_true',
-                        help='special rule for German dependency trees: concatenate separable verb prefix and verb')
+def create_parser():
+    parser = argparse.ArgumentParser(
+        description=(
+            "Extract syntactic n-grams from parsed corpus in "
+            "Moses XML format for training RDLM"))
+
+    parser.add_argument(
+        '--input', '-i', type=argparse.FileType('r'), default=sys.stdin,
+        metavar='PATH',
+        help='Input file (default: standard input).')
+    parser.add_argument(
+        '--output', '-o', type=argparse.FileType('w'), default=sys.stdout,
+        metavar='PATH',
+        help='Output file (default: standard output).')
+    parser.add_argument(
+        '--mode', type=str, choices=['label', 'head'], required=True,
+        help='Predict terminals (head) or dependency labels (label).')
+    parser.add_argument(
+        '--vocab', metavar='PATH', type=str, required=True,
+        help=(
+            "Input layer vocabulary file (one item per line; "
+            "first line '<unk>')"))
+    parser.add_argument(
+        '--output_vocab', metavar='PATH', type=str,
+        help=(
+            "Output layer vocabulary file "
+            "(default: use input layer vocabulary)"))
+    parser.add_argument(
+        '--left_context', metavar='INT', type=int, default=3,
+        help=(
+            "Size of context vector for left siblings "
+            "(default: %(default)s)"))
+    parser.add_argument(
+        '--right_context', metavar='INT', type=int, default=0,
+        help=(
+            "Size of context vector for right siblings "
+            "(default: %(default)s)"))
+    parser.add_argument(
+        '--up_context', metavar='INT', type=int, default=2,
+        help=(
+            "Size of context vector for ancestors "
+            "(default: %(default)s)"))
+    parser.add_argument(
+        '--glue_symbol', metavar='STR', type=str, default='Q',
+        help=(
+            "Glue symbol. Will be skipped during extraction "
+            "(default: %(default)s)"))
+    parser.add_argument(
+        '--start_symbol', metavar='STR', type=str, default='SSTART',
+        help=(
+            "Sentence start symbol. Will be skipped during extraction "
+            "(default: %(default)s)"))
+    parser.add_argument(
+        '--end_symbol', metavar='STR', type=str, default='SEND',
+        help=(
+            "Sentence end symbol. Will be skipped during extraction "
+            "(default: %(default)s)"))
+    parser.add_argument(
+        '--ptkvz', action='store_true',
+        help=(
+            "Special rule for German dependency trees: "
+            "concatenate separable verb prefix and verb."))
    return parser

+
 def escape_text(s):

-    s = s.replace('|','&#124;') # factor separator
-    s = s.replace('[','&#91;') # syntax non-terminal
-    s = s.replace(']','&#93;') # syntax non-terminal
-    s = s.replace('\'','&apos;') # xml special character
-    s = s.replace('"','&quot;') # xml special character
+    s = s.replace('|', '&#124;')  # factor separator
+    s = s.replace('[', '&#91;')  # syntax non-terminal
+    s = s.replace(']', '&#93;')  # syntax non-terminal
+    s = s.replace('\'', '&apos;')  # xml special character
+    s = s.replace('"', '&quot;')  # xml special character
    return s

-# deterministic heuristic to get head of subtree
+
 def get_head(xml, add_ptkvz):
+    """Deterministic heuristic to get head of subtree."""
    head = None
    preterminal = None
    for child in xml:
@ -77,23 +123,38 @@ def get_head(xml, add_ptkvz):

    return head, preterminal

-def get_syntactic_ngrams(xml, options, vocab, output_vocab, parent_heads=None, parent_labels=None):
+
+def get_syntactic_ngrams(xml, options, vocab, output_vocab,
+                         parent_heads=None, parent_labels=None):

    if len(xml):

-        # skip glue rules
-        if xml.get('label') == options.glue_symbol or xml.get('label') == options.start_symbol or xml.get('label') == options.end_symbol:
-          for child in xml:
-            get_syntactic_ngrams(child, options, vocab, output_vocab, parent_heads, parent_labels)
-          return
+        # Skip glue rules.
+        skip_glue_labels = [
+            options.glue_symbol,
+            options.start_symbol,
+            options.end_symbo,
+            ]
+        if xml.get('label') in skip_glue_labels:
+            for child in xml:
+                get_syntactic_ngrams(
+                    child, options, vocab, output_vocab, parent_heads,
+                    parent_labels)
+            return

-        # skip virtual nodes
-        if xml.get('label') == '<stop_label>' or xml.get('label') == '<start_label>':
-          return
+        # Skip virtual nodes.
+        skip_virtual_labels = [
+            '<stop_label>',
+            '<start_label>',
+            ]
+        if xml.get('label') in skip_virtual_labels:
+            return

        if not parent_heads:
-            parent_heads = [vocab.get('<root_head>', 0)] * options.up_context
-            parent_labels = [vocab.get('<root_label>', 0)] * options.up_context
+            parent_heads = (
+                [vocab.get('<root_head>', 0)] * options.up_context)
+            parent_labels = (
+                [vocab.get('<root_label>', 0)] * options.up_context)

            head, preterminal = get_head(xml, options.ptkvz)
            if not head:
@ -119,7 +180,8 @@ def get_syntactic_ngrams(xml, options, vocab, output_vocab, parent_heads=None, p
                options.output.write(' '.join(map(str, int_list)) + '\n')
            elif options.mode == 'head' and not head == '<dummy_head>':
                int_list.append(vocab.get(label, 0))
-                int_list.append(output_vocab.get(head, output_vocab.get(preterminal, 0)))
+                int_list.append(
+                    output_vocab.get(head, output_vocab.get(preterminal, 0)))
                options.output.write(' '.join(map(str, int_list)) + '\n')

            parent_heads.append(vocab.get(head, 0))
@ -130,28 +192,29 @@ def get_syntactic_ngrams(xml, options, vocab, output_vocab, parent_heads=None, p
            if options.right_context:
                start = ET.Element('tree')
                start2 = ET.Element('tree')
-                start.set('label','<start_label>')
-                start2.set('label','XY')
+                start.set('label', '<start_label>')
+                start2.set('label', 'XY')
                start2.text = '<start_head>'
                start.append(start2)
-                xml.insert(0,start)
+                xml.insert(0, start)
            if options.left_context:
                end = ET.Element('tree')
                end2 = ET.Element('tree')
-                end.set('label','<stop_label>')
-                end2.set('label','XY')
+                end.set('label', '<stop_label>')
+                end2.set('label', 'XY')
                end2.text = '<stop_head>'
                end.append(end2)
                xml.append(end)

-
        heads = []
        preterminals = []
        labels = []

        for child in xml:
            if not len(child):
-                # mark that the previous sibling is the head of the structure (the head/label are not repeated because they're also head/label of the parent)
+                # Mark that the previous sibling is the head of the
+                # structure (the head/label are not repeated because they're
+                # also head/label of the parent).
                head_child = '<head_head>'
                preterminal_child = head_child
                child_label = '<head_label>'
@ -166,37 +229,60 @@ def get_syntactic_ngrams(xml, options, vocab, output_vocab, parent_heads=None, p
            preterminals.append(preterminal_child)
            labels.append(child_label)

-            heads_idx = [vocab.get(heads[i], vocab.get(preterminals[i], 0)) for i in range(len(heads))]
-            labels_idx = [vocab.get(labels[i], 0) for i in range(len(labels))]
+            heads_idx = [
+                vocab.get(heads[i], vocab.get(preterminals[i], 0))
+                for i in range(len(heads))]
+            labels_idx = [
+                vocab.get(labels[i], 0)
+                for i in range(len(labels))]

-        #ancestor context is same for all children
+        # Ancestor context is the same for all children.
        up_heads = parent_heads[-options.up_context:]
        up_labels = parent_labels[-options.up_context:]

-        for i,child in enumerate(xml):
+        skip_special_heads = [
+            '<dummy_head>',
+            '<head_head>',
+            '<stop_head>',
+            '<start_head>',
+            ]
+        for i, child in enumerate(xml):

-            # skip some special symbols, but recursively extract n-grams for its children
-            if options.mode == 'head' and (heads[i] == '<dummy_head>' or heads[i] == '<head_head>' or heads[i] == '<stop_head>' or heads[i] == '<start_head>'):
+            # Skip some special symbols, but recursively extract n-grams
+            # for its children.
+            if options.mode == 'head' and heads[i] in skip_special_heads:
                parent_heads.append(vocab.get(heads[i], 0))
                parent_labels.append(vocab.get(labels[i], 0))
-                get_syntactic_ngrams(child, options, vocab, output_vocab, parent_heads, parent_labels)
+                get_syntactic_ngrams(
+                    child, options, vocab, output_vocab, parent_heads,
+                    parent_labels)
                parent_heads.pop()
                parent_labels.pop()
                continue

-            previous_heads = heads_idx[max(0,i-options.left_context):i]
-            previous_labels = labels_idx[max(0,i-options.left_context):i]
+            previous_heads = heads_idx[max(0, i - options.left_context):i]
+            previous_labels = labels_idx[max(0, i - options.left_context):i]

-            subsequent_heads = heads_idx[i+1:i+options.right_context+1]
-            subsequent_labels = labels_idx[i+1:i+options.right_context+1]
+            subsequent_heads = heads_idx[i + 1:i + options.right_context + 1]
+            subsequent_labels = labels_idx[i + 1:i + options.right_context + 1]

            if len(previous_heads) < options.left_context:
-                previous_heads = [start_head_idx] * (options.left_context-len(previous_heads)) + previous_heads
-                previous_labels = [start_label_idx] * (options.left_context-len(previous_labels)) + previous_labels
+                previous_heads = (
+                    [start_head_idx] *
+                    (options.left_context - len(previous_heads)) +
+                    previous_heads)
+                previous_labels = (
+                    [start_label_idx] *
+                    (options.left_context - len(previous_labels)) +
+                    previous_labels)

            if len(subsequent_heads) < options.right_context:
-                subsequent_heads = subsequent_heads + [stop_head_idx] * (options.right_context-len(subsequent_heads))
-                subsequent_labels = subsequent_labels + [stop_label_idx] * (options.right_context-len(subsequent_labels))
+                subsequent_heads += (
+                    [stop_head_idx] *
+                    (options.right_context - len(subsequent_heads)))
+                subsequent_labels += (
+                    [stop_label_idx] *
+                    (options.right_context - len(subsequent_labels)))

            int_list = []
            int_list.extend(previous_heads)
@ -209,14 +295,19 @@ def get_syntactic_ngrams(xml, options, vocab, output_vocab, parent_heads=None, p
                int_list.append(output_vocab.get(labels[i], 0))
            elif options.mode == 'head':
                int_list.append(vocab.get(labels[i], 0))
-                int_list.append(output_vocab.get(heads[i], output_vocab.get(preterminals[i], 0)))
+                int_list.append(
+                    output_vocab.get(
+                        heads[i], output_vocab.get(preterminals[i], 0)))

            options.output.write(' '.join(map(str, int_list)) + '\n')

-            parent_heads.append(vocab.get(heads[i], vocab.get(preterminals[i], 0)))
+            parent_heads.append(
+                vocab.get(heads[i], vocab.get(preterminals[i], 0)))
            parent_labels.append(vocab.get(labels[i], 0))

-            get_syntactic_ngrams(child, options, vocab, output_vocab, parent_heads, parent_labels)
+            get_syntactic_ngrams(
+                child, options, vocab, output_vocab, parent_heads,
+                parent_labels)

            parent_heads.pop()
            parent_labels.pop()
@ -224,15 +315,17 @@ def get_syntactic_ngrams(xml, options, vocab, output_vocab, parent_heads=None, p

 def load_vocab(path):
    v = {}
-    for i,line in enumerate(open(path, encoding="UTF-8")):
+    for i, line in enumerate(open(path, encoding="UTF-8")):
        v[line.strip()] = i
    return v

+
 def main(options):
    vocab = load_vocab(options.vocab)

    if options.output_vocab is None:
-        sys.stderr.write('no output vocabulary specified; using input vocabulary\n')
+        sys.stderr.write(
+            "No output vocabulary specified; using input vocabulary.\n")
        output_vocab = vocab
    else:
        output_vocab = load_vocab(options.output_vocab)
@ -275,4 +368,4 @@ if __name__ == '__main__':
    parser = create_parser()
    options = parser.parse_args()

-    main(options)
+    main(options)
--- a/scripts/training/rdlm/extract_vocab.py
+++ b/scripts/training/rdlm/extract_vocab.py
@ -9,6 +9,7 @@ import sys
 import codecs
 import argparse
 from collections import Counter
+from textwrap import dedent

 # hack for python2/3 compatibility
 from io import open
@ -19,37 +20,49 @@ try:
 except ImportError:
    from xml.etree import cElementTree as ET

+
+HELP_TEXT = dedent("""\
+    generate 5 vocabulary files from parsed corpus in moses XML format
+      [PREFIX].special: around 40 symbols reserved for RDLM
+      [PREFIX].preterminals: preterminal symbols
+      [PREFIX].nonterminals: nonterminal symbols (which are not preterminal)
+      [PREFIX].terminals: terminal symbols
+      [PREFIX].all: all of the above
+""")
+
+
 def create_parser():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=HELP_TEXT)

-    help_text =  "generate 5 vocabulary files from parsed corpus in moses XML format\n"
-    help_text += "  [PREFIX].special: around 40 symbols reserved for RDLM\n";
-    help_text += "  [PREFIX].preterminals: preterminal symbols\n";
-    help_text += "  [PREFIX].nonterminals: nonterminal symbols (which are not preterminal)\n";
-    help_text += "  [PREFIX].terminals: terminal symbols\n";
-    help_text += "  [PREFIX].all: all of the above\n"
-
-    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=help_text)
-
-    parser.add_argument('--input', '-i', type=argparse.FileType('r'), default=sys.stdin, metavar='PATH',
-                        help='input text (default: standard input).')
-    parser.add_argument('--output', '-o', type=str, default='vocab', metavar='PREFIX',
-                        help='output prefix (default: "vocab")')
-    parser.add_argument('--ptkvz', action="store_true",
-                    help='special rule for German dependency trees: attach separable verb prefixes to verb')
+    parser.add_argument(
+        '--input', '-i', type=argparse.FileType('r'), default=sys.stdin,
+        metavar='PATH',
+        help="Input text (default: standard input).")
+    parser.add_argument(
+        '--output', '-o', type=str, default='vocab', metavar='PREFIX',
+        help="Output prefix (default: 'vocab')")
+    parser.add_argument(
+        '--ptkvz', action="store_true",
+        help=(
+            "Special rule for German dependency trees: attach separable "
+            "verb prefixes to verb."))

    return parser

-def escape_text(s):

-    s = s.replace('|','&#124;') # factor separator
-    s = s.replace('[','&#91;') # syntax non-terminal
-    s = s.replace(']','&#93;') # syntax non-terminal
-    s = s.replace('\'','&apos;') # xml special character
-    s = s.replace('"','&quot;') # xml special character
+def escape_text(s):
+    s = s.replace('|', '&#124;')  # factor separator
+    s = s.replace('[', '&#91;')  # syntax non-terminal
+    s = s.replace(']', '&#93;')  # syntax non-terminal
+    s = s.replace('\'', '&apos;')  # xml special character
+    s = s.replace('"', '&quot;')  # xml special character
    return s

-# deterministic heuristic to get head of subtree
+
 def get_head(xml, args):
+    """Deterministic heuristic to get head of subtree."""
    head = None
    preterminal = None
    for child in xml:
@ -67,6 +80,7 @@ def get_head(xml, args):

    return head, preterminal

+
 def get_vocab(xml, args):

    if len(xml):
@ -88,6 +102,7 @@ def get_vocab(xml, args):
                continue
            get_vocab(child, args)

+
 def main(args):

    global heads
@ -111,10 +126,24 @@ def main(args):
        get_vocab(xml, args)
        i += 1

-    special_tokens = ['<unk>', '<null>', '<null_label>', '<null_head>', '<head_label>', '<root_label>', '<start_label>', '<stop_label>', '<head_head>', '<root_head>', '<start_head>', '<dummy_head>', '<stop_head>']
+    special_tokens = [
+        '<unk>',
+        '<null>',
+        '<null_label>',
+        '<null_head>',
+        '<head_label>',
+        '<root_label>',
+        '<start_label>',
+        '<stop_label>',
+        '<head_head>',
+        '<root_head>',
+        '<start_head>',
+        '<dummy_head>',
+        '<stop_head>',
+    ]

    for i in range(30):
-      special_tokens.append('<null_{0}>'.format(i))
+        special_tokens.append('<null_{0}>'.format(i))

    f = open(args.output + '.special', 'w', encoding='UTF-8')
    for item in special_tokens:
@ -158,7 +187,6 @@ def main(args):
    f.close()


-
 if __name__ == '__main__':

    if sys.version_info < (3, 0):
--- a/scripts/training/rdlm/train_rdlm.py
+++ b/scripts/training/rdlm/train_rdlm.py
@ -9,7 +9,6 @@ import subprocess
 import sys
 import os
 import codecs
-import copy

 # ../bilingual-lm
 sys.path.append(os.path.join(os.path.dirname(sys.path[0]), 'bilingual-lm'))
@ -17,143 +16,224 @@ import train_nplm
 import extract_vocab
 import extract_syntactic_ngrams

-logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.DEBUG)
+logging.basicConfig(
+    format='%(asctime)s %(levelname)s: %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S', level=logging.DEBUG)
 parser = argparse.ArgumentParser()
-parser.add_argument("--working-dir", dest="working_dir", metavar="PATH")
-parser.add_argument("--corpus", dest="corpus_stem", metavar="PATH", help="input file")
-parser.add_argument("--nplm-home", dest="nplm_home", metavar="PATH", help="location of NPLM", required=True)
-parser.add_argument("--epochs", dest="epochs", type=int, metavar="INT", help="number of training epochs (default: %(default)s)")
-parser.add_argument("--up-context-size", dest="up_context_size", type=int, metavar="INT", help="size of ancestor context (default: %(default)s)")
-parser.add_argument("--left-context-size", dest="left_context_size", type=int, metavar="INT", help="size of sibling context (left) (default: %(default)s)")
-parser.add_argument("--right-context-size", dest="right_context_size", type=int, metavar="INT", help="size of sibling context (right) (default: %(default)s)")
-parser.add_argument("--mode", dest="mode", choices=['head', 'label'], help="type of RDLM to train (both are required for decoding)", required=True)
-parser.add_argument("--minibatch-size", dest="minibatch_size", type=int, metavar="INT", help="minibatch size (default: %(default)s)")
-parser.add_argument("--noise", dest="noise", type=int, metavar="INT", help="number of noise samples for NCE (default: %(default)s)")
-parser.add_argument("--hidden", dest="hidden", type=int, metavar="INT", help="size of hidden layer (0 for single hidden layer) (default: %(default)s)")
-parser.add_argument("--input-embedding", dest="input_embedding", type=int, metavar="INT", help="size of input embedding layer (default: %(default)s)")
-parser.add_argument("--output-embedding", dest="output_embedding", type=int, metavar="INT", help="size of output embedding layer (default: %(default)s)")
-parser.add_argument("--threads", "-t", dest="threads", type=int, metavar="INT", help="number of threads (default: %(default)s)")
-parser.add_argument("--output-model", dest="output_model", metavar="PATH", help="name of output model (default: %(default)s)")
-parser.add_argument("--output-dir", dest="output_dir", metavar="PATH", help="output directory (default: same as working-dir)")
-parser.add_argument("--config-options-file", dest="config_options_file", metavar="PATH")
-parser.add_argument("--log-file", dest="log_file", metavar="PATH", help="log file to write to (default: %(default)s)")
-parser.add_argument("--validation-corpus", dest="validation_corpus", metavar="PATH", help="validation file (default: %(default)s)")
-parser.add_argument("--activation-function", dest="activation_fn", choices=['identity', 'rectifier', 'tanh', 'hardtanh'], help="activation function (default: %(default)s)")
-parser.add_argument("--learning-rate", dest="learning_rate", type=float, metavar="FLOAT", help="learning rate (default: %(default)s)")
-parser.add_argument("--input-words-file", dest="input_words_file", metavar="PATH", help="input vocabulary (default: %(default)s)")
-parser.add_argument("--output-words-file", dest="output_words_file", metavar="PATH", help="output vocabulary (default: %(default)s)")
-parser.add_argument("--input_vocab_size", dest="input_vocab_size", type=int, metavar="INT", help="input vocabulary size (default: %(default)s)")
-parser.add_argument("--output-vocab-size", dest="output_vocab_size", type=int, metavar="INT", help="output vocabulary size (default: %(default)s)")
+parser.add_argument(
+    "--working-dir", dest="working_dir", metavar="PATH")
+parser.add_argument(
+    "--corpus", dest="corpus_stem", metavar="PATH", help="Input file.")
+parser.add_argument(
+    "--nplm-home", dest="nplm_home", metavar="PATH", required=True,
+    help="Location of NPLM.")
+parser.add_argument(
+    "--epochs", dest="epochs", type=int, metavar="INT",
+    help="Number of training epochs (default: %(default)s).")
+parser.add_argument(
+    "--up-context-size", dest="up_context_size", type=int, metavar="INT",
+    help="Size of ancestor context (default: %(default)s).")
+parser.add_argument(
+    "--left-context-size", dest="left_context_size", type=int, metavar="INT",
+    help="Size of sibling context (left) (default: %(default)s).")
+parser.add_argument(
+    "--right-context-size", dest="right_context_size", type=int,
+    metavar="INT",
+    help="Size of sibling context (right) (default: %(default)s).")
+parser.add_argument(
+    "--mode", dest="mode", choices=['head', 'label'], required=True,
+    help="Type of RDLM to train (both are required for decoding).")
+parser.add_argument(
+    "--minibatch-size", dest="minibatch_size", type=int, metavar="INT",
+    help="Minibatch size (default: %(default)s).")
+parser.add_argument(
+    "--noise", dest="noise", type=int, metavar="INT",
+    help="Number of noise samples for NCE (default: %(default)s).")
+parser.add_argument(
+    "--hidden", dest="hidden", type=int, metavar="INT",
+    help=(
+        "Size of hidden layer (0 for single hidden layer) "
+        "(default: %(default)s)"))
+parser.add_argument(
+    "--input-embedding", dest="input_embedding", type=int, metavar="INT",
+    help="Size of input embedding layer (default: %(default)s).")
+parser.add_argument(
+    "--output-embedding", dest="output_embedding", type=int, metavar="INT",
+    help="Size of output embedding layer (default: %(default)s).")
+parser.add_argument(
+    "--threads", "-t", dest="threads", type=int, metavar="INT",
+    help="Number of threads (default: %(default)s).")
+parser.add_argument(
+    "--output-model", dest="output_model", metavar="PATH",
+    help="Name of output model (default: %(default)s).")
+parser.add_argument(
+    "--output-dir", dest="output_dir", metavar="PATH",
+    help="Output directory (default: same as working-dir).")
+parser.add_argument(
+    "--config-options-file", dest="config_options_file", metavar="PATH")
+parser.add_argument(
+    "--log-file", dest="log_file", metavar="PATH",
+    help="Log file to write to (default: %(default)s).")
+parser.add_argument(
+    "--validation-corpus", dest="validation_corpus", metavar="PATH",
+    help="Validation file (default: %(default)s).")
+parser.add_argument(
+    "--activation-function", dest="activation_fn",
+    choices=['identity', 'rectifier', 'tanh', 'hardtanh'],
+    help="Activation function (default: %(default)s).")
+parser.add_argument(
+    "--learning-rate", dest="learning_rate", type=float, metavar="FLOAT",
+    help="Learning rate (default: %(default)s).")
+parser.add_argument(
+    "--input-words-file", dest="input_words_file", metavar="PATH",
+    help="Input vocabulary (default: %(default)s).")
+parser.add_argument(
+    "--output-words-file", dest="output_words_file", metavar="PATH",
+    help="Output vocabulary (default: %(default)s).")
+parser.add_argument(
+    "--input_vocab_size", dest="input_vocab_size", type=int, metavar="INT",
+    help="Input vocabulary size (default: %(default)s).")
+parser.add_argument(
+    "--output-vocab-size", dest="output_vocab_size", type=int, metavar="INT",
+    help="Output vocabulary size (default: %(default)s).")


 parser.set_defaults(
-    working_dir = "working"
-    ,corpus_stem = "train"
-    ,nplm_home = "/home/bhaddow/tools/nplm"
-    ,epochs = 2
-    ,up_context_size = 2
-    ,left_context_size = 3
-    ,right_context_size = 0
-    ,minibatch_size=1000
-    ,noise=100
-    ,hidden=0
-    ,mode='head'
-    ,input_embedding=150
-    ,output_embedding=750
-    ,threads=4
-    ,output_model = "train"
-    ,output_dir = None
-    ,config_options_file = "config"
-    ,log_file = "log"
-    ,validation_corpus = None
-    ,activation_fn = "rectifier"
-    ,learning_rate = 1
-    ,input_words_file = None
-    ,output_words_file = None
-    ,input_vocab_size = 500000
-    ,output_vocab_size = 500000
-    )
+    working_dir="working",
+    corpus_stem="train",
+    nplm_home="/home/bhaddow/tools/nplm",
+    epochs=2,
+    up_context_size=2,
+    left_context_size=3,
+    right_context_size=0,
+    minibatch_size=1000,
+    noise=100,
+    hidden=0,
+    mode='head',
+    input_embedding=150,
+    output_embedding=750,
+    threads=4,
+    output_model="train",
+    output_dir=None,
+    config_options_file="config",
+    log_file="log",
+    validation_corpus=None,
+    activation_fn="rectifier",
+    learning_rate=1,
+    input_words_file=None,
+    output_words_file=None,
+    input_vocab_size=500000,
+    output_vocab_size=500000)
+

 def prepare_vocabulary(options):
-  vocab_prefix = os.path.join(options.working_dir, 'vocab')
-  extract_vocab_options = extract_vocab.create_parser().parse_args(['--input', options.corpus_stem, '--output', vocab_prefix])
-  extract_vocab.main(extract_vocab_options)
+    vocab_prefix = os.path.join(options.working_dir, 'vocab')
+    extract_vocab_options = extract_vocab.create_parser().parse_args(
+        ['--input', options.corpus_stem, '--output', vocab_prefix])
+    extract_vocab.main(extract_vocab_options)

-  if options.input_words_file is None:
-    options.input_words_file = vocab_prefix + '.input'
-    orig = vocab_prefix + '.all'
-    filtered_vocab = open(orig).readlines()
-    if options.input_vocab_size:
-      filtered_vocab = filtered_vocab[:options.input_vocab_size]
-    open(options.input_words_file,'w').writelines(filtered_vocab)
+    if options.input_words_file is None:
+        options.input_words_file = vocab_prefix + '.input'
+        orig = vocab_prefix + '.all'
+        filtered_vocab = open(orig).readlines()
+        if options.input_vocab_size:
+            filtered_vocab = filtered_vocab[:options.input_vocab_size]
+        open(options.input_words_file, 'w').writelines(filtered_vocab)
+
+    if options.output_words_file is None:
+        options.output_words_file = vocab_prefix + '.output'
+        if options.mode == 'label':
+            blacklist = [
+                '<null',
+                '<root',
+                '<start_head',
+                '<dummy',
+                '<head_head',
+                '<stop_head',
+            ]
+            orig = vocab_prefix + '.special'
+            filtered_vocab = open(orig).readlines()
+            orig = vocab_prefix + '.nonterminals'
+            filtered_vocab += open(orig).readlines()
+            filtered_vocab = [
+                word
+                for word in filtered_vocab
+                if not any(word.startswith(prefix) for prefix in blacklist)]
+            if options.output_vocab_size:
+                filtered_vocab = filtered_vocab[:options.output_vocab_size]
+        else:
+            orig = vocab_prefix + '.all'
+            filtered_vocab = open(orig).readlines()[:options.output_vocab_size]
+        open(options.output_words_file, 'w').writelines(filtered_vocab)

-  if options.output_words_file is None:
-    options.output_words_file = vocab_prefix + '.output'
-    if options.mode == 'label':
-      blacklist = ['<null', '<root', '<start_head', '<dummy', '<head_head', '<stop_head']
-      orig = vocab_prefix + '.special'
-      filtered_vocab = open(orig).readlines()
-      orig = vocab_prefix + '.nonterminals'
-      filtered_vocab += open(orig).readlines()
-      filtered_vocab = [word for word in filtered_vocab if not any(word.startswith(prefix) for prefix in blacklist)]
-      if options.output_vocab_size:
-        filtered_vocab = filtered_vocab[:options.output_vocab_size]
-    else:
-      orig = vocab_prefix + '.all'
-      filtered_vocab = open(orig).readlines()[:options.output_vocab_size]
-    open(options.output_words_file,'w').writelines(filtered_vocab)

 def main(options):

-  options.ngram_size = 2*options.up_context_size + 2*options.left_context_size + 2*options.right_context_size
-  if options.mode == 'head':
-    options.ngram_size += 2
-  elif options.mode == 'label':
-    options.ngram_size += 1
+    options.ngram_size = (
+        2 * options.up_context_size +
+        2 * options.left_context_size +
+        2 * options.right_context_size
+        )
+    if options.mode == 'head':
+        options.ngram_size += 2
+    elif options.mode == 'label':
+        options.ngram_size += 1

-  if options.input_words_file is None or options.output_words_file is None:
-    sys.stderr.write('either input vocabulary or output vocabulary not specified: extracting vocabulary from training text\n')
-    prepare_vocabulary(options)
+    if options.input_words_file is None or options.output_words_file is None:
+        sys.stderr.write(
+            "Either input vocabulary or output vocabulary not specified: "
+            "extracting vocabulary from training text.\n")
+        prepare_vocabulary(options)

-  extract_options = extract_syntactic_ngrams.create_parser().parse_args(['--input', options.corpus_stem,
-                                                                         '--output', os.path.join(options.working_dir, os.path.basename(options.corpus_stem) + '.numberized'),
-                                                                         '--vocab', options.input_words_file,
-                                                                         '--output_vocab', options.output_words_file,
-                                                                         '--right_context', str(options.right_context_size),
-                                                                         '--left_context', str(options.left_context_size),
-                                                                         '--up_context', str(options.up_context_size),
-                                                                         '--mode', options.mode
-                                                                         ])
-  sys.stderr.write('extracting syntactic n-grams\n')
-  extract_syntactic_ngrams.main(extract_options)
-
-  if options.validation_corpus:
-    extract_options.input = open(options.validation_corpus)
-    options.validation_file = os.path.join(options.working_dir, os.path.basename(options.validation_corpus))
-    extract_options.output = open(options.validation_file + '.numberized', 'w')
-    sys.stderr.write('extracting syntactic n-grams (validation file)\n')
+    extract_options = extract_syntactic_ngrams.create_parser().parse_args([
+        '--input', options.corpus_stem,
+        '--output', os.path.join(
+            options.working_dir,
+            os.path.basename(options.corpus_stem) + '.numberized'),
+        '--vocab', options.input_words_file,
+        '--output_vocab', options.output_words_file,
+        '--right_context', str(options.right_context_size),
+        '--left_context', str(options.left_context_size),
+        '--up_context', str(options.up_context_size),
+        '--mode', options.mode
+        ])
+    sys.stderr.write('extracting syntactic n-grams\n')
    extract_syntactic_ngrams.main(extract_options)
-    extract_options.output.close()

-  sys.stderr.write('training neural network\n')
-  train_nplm.main(options)
+    if options.validation_corpus:
+        extract_options.input = open(options.validation_corpus)
+        options.validation_file = os.path.join(
+            options.working_dir, os.path.basename(options.validation_corpus))
+        extract_options.output = open(
+            options.validation_file + '.numberized', 'w')
+        sys.stderr.write('extracting syntactic n-grams (validation file)\n')
+        extract_syntactic_ngrams.main(extract_options)
+        extract_options.output.close()
+
+    sys.stderr.write('training neural network\n')
+    train_nplm.main(options)
+
+    sys.stderr.write('averaging null words\n')
+    ret = subprocess.call([
+        os.path.join(sys.path[0], 'average_null_embedding.py'),
+        options.nplm_home,
+        os.path.join(
+            options.output_dir,
+            options.output_model + '.model.nplm.' + str(options.epochs)),
+        os.path.join(
+            options.working_dir,
+            os.path.basename(options.corpus_stem) + '.numberized'),
+        os.path.join(options.output_dir, options.output_model + '.model.nplm')
+        ])
+    if ret:
+        raise Exception("averaging null words failed")

-  sys.stderr.write('averaging null words\n')
-  ret = subprocess.call([os.path.join(sys.path[0], 'average_null_embedding.py'),
-                   options.nplm_home,
-                   os.path.join(options.output_dir, options.output_model + '.model.nplm.' + str(options.epochs)),
-                   os.path.join(options.working_dir, os.path.basename(options.corpus_stem) + '.numberized'),
-                   os.path.join(options.output_dir, options.output_model + '.model.nplm')
-                   ])
-  if ret:
-      raise Exception("averaging null words failed")

 if __name__ == "__main__":
-  if sys.version_info < (3, 0):
-    sys.stderr = codecs.getwriter('UTF-8')(sys.stderr)
-    sys.stdout = codecs.getwriter('UTF-8')(sys.stdout)
-    sys.stdin = codecs.getreader('UTF-8')(sys.stdin)
-
-  options = parser.parse_args()
-  main(options)
+    if sys.version_info < (3, 0):
+        sys.stderr = codecs.getwriter('UTF-8')(sys.stderr)
+        sys.stdout = codecs.getwriter('UTF-8')(sys.stdout)
+        sys.stdin = codecs.getreader('UTF-8')(sys.stdin)

+    options = parser.parse_args()
+    main(options)
--- a/scripts/training/wrappers/conll2mosesxml.py
+++ b/scripts/training/wrappers/conll2mosesxml.py
@ -2,42 +2,76 @@
 # -*- coding: utf-8 -*-
 # Author: Rico Sennrich

-# takes a file in the CoNLL dependency format (from the CoNLL-X shared task on dependency parsing; http://ilk.uvt.nl/conll/#dataformat )
-# and produces Moses XML format. Note that the structure is built based on fields 9 and 10 (projective HEAD and RELATION),
-# which not all parsers produce.
+"""
+Takes a file in the CoNLL dependency format (from the CoNLL-X shared task on
+dependency parsing; http://ilk.uvt.nl/conll/#dataformat ) and produces
+Moses XML format.

-# usage: conll2mosesxml.py [--brackets] < input_file > output_file
+Note that the structure is built based on fields 9 and 10 (projective HEAD
+and RELATION), which not all parsers produce.
+
+Usage: conll2mosesxml.py [--brackets] < input_file > output_file
+"""

 from __future__ import print_function, unicode_literals
 import sys
 import re
 import codecs
-from collections import namedtuple,defaultdict
+from collections import (
+    namedtuple,
+    defaultdict,
+    )
 from lxml import etree as ET


-Word = namedtuple('Word', ['pos','word','lemma','tag','head','func', 'proj_head', 'proj_func'])
+Word = namedtuple(
+    'Word',
+    ['pos', 'word', 'lemma', 'tag', 'head', 'func', 'proj_head', 'proj_func'])
+

 def main(output_format='xml'):
    sentence = []

    for line in sys.stdin:

-        # process sentence
+        # Process sentence.
        if line == "\n":
-            sentence.insert(0,[])
+            sentence.insert(0, [])
            if is_projective(sentence):
-                write(sentence,output_format)
+                write(sentence, output_format)
            else:
-                sys.stderr.write(' '.join(w.word for w in sentence[1:]) + '\n')
+                sys.stderr.write(
+                    ' '.join(w.word for w in sentence[1:]) + '\n')
                sys.stdout.write('\n')
            sentence = []
            continue

        try:
-            pos, word, lemma, tag, tag2, morph, head, func, proj_head, proj_func = line.split()
-        except ValueError: # word may be unicode whitespace
-            pos, word, lemma, tag, tag2, morph, head, func, proj_head, proj_func = re.split(' *\t*',line.strip())
+            (
+                pos,
+                word,
+                lemma,
+                tag,
+                tag2,
+                morph,
+                head,
+                func,
+                proj_head,
+                proj_func,
+            ) = line.split()
+        except ValueError:  # Word may be unicode whitespace.
+            (
+                pos,
+                word,
+                lemma,
+                tag,
+                tag2,
+                morph,
+                head,
+                func,
+                proj_head,
+                proj_func,
+            ) = re.split(' *\t*', line.strip())

        word = escape_special_chars(word)
        lemma = escape_special_chars(lemma)
@ -46,17 +80,20 @@ def main(output_format='xml'):
            proj_head = head
            proj_func = func

-        sentence.append(Word(int(pos), word, lemma, tag2,int(head), func, int(proj_head), proj_func))
+        sentence.append(
+            Word(
+                int(pos), word, lemma, tag2, int(head), func, int(proj_head),
+                proj_func))


-# this script performs the same escaping as escape-special-chars.perl in Moses.
-# most of it is done in function write(), but quotation marks need to be processed first
+# This script performs the same escaping as escape-special-chars.perl in
+# Moses.  Most of it is done in function write(), but quotation marks need
+# to be processed first.
 def escape_special_chars(line):
-
-    line = line.replace('\'','&apos;') # xml
-    line = line.replace('"','&quot;') # xml
-    line = line.replace('[','&#91;') # syntax non-terminal
-    line = line.replace(']','&#93;') # syntax non-terminal
+    line = line.replace('\'', '&apos;')  # xml
+    line = line.replace('"', '&quot;')  # xml
+    line = line.replace('[', '&#91;')  # syntax non-terminal
+    line = line.replace(']', '&#93;')  # syntax non-terminal

    return line

@ -64,7 +101,7 @@ def escape_special_chars(line):
 # make a check if structure is projective
 def is_projective(sentence):
    dominates = defaultdict(set)
-    for i,w in enumerate(sentence):
+    for i, w in enumerate(sentence):
        dominates[i].add(i)
        if not i:
            continue
@ -77,7 +114,7 @@ def is_projective(sentence):

    for i in dominates:
        dependents = dominates[i]
-        if max(dependents) - min(dependents) != len(dependents)-1:
+        if max(dependents) - min(dependents) != len(dependents) - 1:
            sys.stderr.write("error: non-projective structure.\n")
            return False
    return True
@ -86,24 +123,28 @@ def is_projective(sentence):
 def write(sentence, output_format='xml'):

    if output_format == 'xml':
-        tree = create_subtree(0,sentence)
-        out = ET.tostring(tree, encoding = 'UTF-8').decode('UTF-8')
+        tree = create_subtree(0, sentence)
+        out = ET.tostring(tree, encoding='UTF-8').decode('UTF-8')

    if output_format == 'brackets':
-        out = create_brackets(0,sentence)
+        out = create_brackets(0, sentence)

-    out = out.replace('|','&#124;') # factor separator
+    out = out.replace('|', '&#124;')  # factor separator

-    out = out.replace('&amp;apos;','&apos;') # lxml is buggy if input is escaped
-    out = out.replace('&amp;quot;','&quot;') # lxml is buggy if input is escaped
-    out = out.replace('&amp;#91;','&#91;') # lxml is buggy if input is escaped
-    out = out.replace('&amp;#93;','&#93;') # lxml is buggy if input is escaped
+    # lxml is buggy if input is escaped:
+    out = out.replace('&amp;apos;', '&apos;')
+    # lxml is buggy if input is escaped:
+    out = out.replace('&amp;quot;', '&quot;')
+    # lxml is buggy if input is escaped:
+    out = out.replace('&amp;#91;', '&#91;')
+    # lxml is buggy if input is escaped:
+    out = out.replace('&amp;#93;', '&#93;')

    print(out)

-# write node in Moses XML format
-def create_subtree(position, sentence):

+def create_subtree(position, sentence):
+    """"Write node in Moses XML format."""
    element = ET.Element('tree')

    if position:
@ -111,7 +152,7 @@ def create_subtree(position, sentence):
    else:
        element.set('label', 'sent')

-    for i in range(1,position):
+    for i in range(1, position):
        if sentence[i].proj_head == position:
            element.append(create_subtree(i, sentence))

@ -144,7 +185,7 @@ def create_brackets(position, sentence):
    else:
        element = "[ sent "

-    for i in range(1,position):
+    for i in range(1, position):
        if sentence[i].proj_head == position:
            element += create_brackets(i, sentence)

@ -167,7 +208,7 @@ def create_brackets(position, sentence):
    return element

 if __name__ == '__main__':
-    if sys.version_info < (3,0,0):
+    if sys.version_info < (3, 0, 0):
        sys.stdin = codecs.getreader('UTF-8')(sys.stdin)
        sys.stdout = codecs.getwriter('UTF-8')(sys.stdout)
        sys.stderr = codecs.getwriter('UTF-8')(sys.stderr)
--- a/scripts/training/wrappers/mosesxml2brackets.py
+++ b/scripts/training/wrappers/mosesxml2brackets.py
@ -10,17 +10,21 @@ import codecs

 from lxml import etree as ET

+
 def escape(word):
-    word = word.replace('|','&#124;') # factor separator
-    word = word.replace('[','&#91;') # syntax non-terminal
-    word = word.replace(']','&#93;') # syntax non-terminal
-    word = word.replace('\'','&apos;')
-    word = word.replace('\"','&quot;')
+    # Factor separator:
+    word = word.replace('|', '&#124;')
+    # Syntax non-terminal:
+    word = word.replace('[', '&#91;')
+    # Syntax non-terminal:
+    word = word.replace(']', '&#93;')
+    word = word.replace('\'', '&apos;')
+    word = word.replace('\"', '&quot;')

    return word

-def make_brackets(xml):

+def make_brackets(xml):
    out = ' [' + xml.get('label')

    if xml.text and xml.text.strip():