N-best re-ranker and trainer

2024-08-17 07:20:48 +03:00 · 2016-02-23 11:35:52 -05:00 · 2016-02-23 11:35:52 -05:00 · c6314d927d
commit c6314d927d
parent 2d6f616480
4 changed files with 267 additions and 0 deletions
--- a/scripts/nbest-rescore/README.md
+++ b/scripts/nbest-rescore/README.md
@ -0,0 +1,65 @@
+# N-best List Re-Scorer
+
+Written by Michael Denkowski
+
+These scripts simplify running N-best re-ranking experiments with Moses.  You
+can score N-best lists with external tools (such as models that would be very
+costly to integrate with Moses just for feasibility experiments), then use the
+extended feature set to select translations that may be of a higher quality than
+those preferred by the Moses features alone.  In some cases, training a
+re-ranker even without any new features can yield improvement.
+
+### Training
+
+* Use Moses to generate large N-best lists for a dev set.  Use a config file
+(moses.ini) that has been optimized with MERT, MIRA, or similar:
+
+    cat dev-src.txt |moses -f moses.ini -n-best-list dev.best1000.out 1000 distinct
+
+* (Optionally) add new feature scores to the N-best list using any external
+tools.  Make sure the features are added to the correct field using the correct
+format.  You don't need to update the final scores (right now your new features
+have zero weight):
+
+    0 ||| some translation ||| Feature0= -1.75645 Feature1= -1.38629 -2.19722 -2.31428 -0.81093 AwesomeNewFeature= -1.38629 ||| -4.42063
+
+* Run the optimizer (currently K-best MIRA) to learn new re-ranking weights for
+all features in your N-best list.  Supply the reference translation for the dev
+set:
+
+    python train.py --nbest dev.best1000.with-new-features --ref dev-ref.txt --working-dir rescore-work
+
+* You now have a new config file that contains N-best re-scoring weights:
+
+    rescore-work/rescore.ini
+
+### Test
+
+* Use the **original** config file to generate N-best lists for the test set:
+
+    cat test-src.txt |moses -f moses.ini -n-best-list test.best1000.out 100 distinct
+
+* Add any new features you added for training
+
+* Re-score the N-best list (update total scores) using the **re-scoring**
+weights file:
+
+    python rescore.py rescore-work/rescore.ini <test.best1000.with-new-features >test.best1000.rescored
+
+* The N-best list is **not** re-sorted, so the entries will be out of order.
+Use the top-best script to extract the highest scoring entry for each sentence:
+
+    python topbest.py <test.best1000.rescored >test.topbest
+
+### Not implemented yet
+
+The following could be relatively easily implemented by replicating the
+behavior of mert-moses.pl:
+
+* Sparse features (sparse weight file)
+
+* Other optimizers (MERT, PRO, etc.)
+
+* Other objective functions (TER, Meteor, etc.)
+
+* Multiple reference translations
--- a/scripts/nbest-rescore/rescore.py
+++ b/scripts/nbest-rescore/rescore.py
@ -0,0 +1,56 @@
+#!/usr/bin/env python
+#
+# This file is part of moses.  Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+import sys
+
+FEAT_FIELD = 2
+SCORE_FIELD = 3
+
+def main():
+
+    if len(sys.argv[1:]) != 1:
+        sys.stderr.write('Usage: {} moses.ini <nbest.with-new-features >nbest.rescored\n'.format(sys.argv[0]))
+        sys.stderr.write('Entries are _not_ re-sorted based on new score.  Use topbest.py\n')
+        sys.exit(2)
+
+    weights = {}
+
+    # moses.ini
+    ini = open(sys.argv[1])
+    while True:
+        line = ini.readline()
+        if not line:
+            sys.stderr.write('Error: no [weight] section\n')
+            sys.exit(1)
+        if line.strip() == '[weight]':
+            break
+    while True:
+        line = ini.readline()
+        if not line or line.strip().startswith('['):
+            break
+        if line.strip() == '':
+            continue
+        fields = line.split()
+        weights[fields[0]] = [float(f) for f in fields[1:]]
+
+    # N-best
+    for line in sys.stdin:
+        fields = [f.strip() for f in line.split('|||')]
+        feats = fields[FEAT_FIELD].split()
+        key = ''
+        i = 0
+        score = 0
+        for f in feats:
+            if f.endswith('='):
+                key = f
+                i = 0
+            else:
+                score += (float(f) * weights[key][i])
+                i += 1
+        fields[SCORE_FIELD] = str(score)
+        sys.stdout.write('{}\n'.format(' ||| '.join(fields)))
+
+if __name__ == '__main__':
+    main()
--- a/scripts/nbest-rescore/topbest.py
+++ b/scripts/nbest-rescore/topbest.py
@ -0,0 +1,30 @@
+#!/usr/bin/env python
+#
+# This file is part of moses.  Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+import sys
+
+SCORE_FIELD = 3
+
+def main():
+
+    i = ''
+    hyp = ''
+    top = 0
+
+    for line in sys.stdin:
+        fields = [f.strip() for f in line.split('|||')]
+        id = fields[0]
+        if i != id:
+            if i:
+                sys.stdout.write('{}\n'.format(hyp))
+        score = float(fields[SCORE_FIELD])
+        if score > top or i != id:
+            i = id
+            hyp = fields[1]
+            top = score
+    sys.stdout.write('{}\n'.format(hyp))
+
+if __name__ == '__main__':
+    main()
--- a/scripts/nbest-rescore/train.py
+++ b/scripts/nbest-rescore/train.py
@ -0,0 +1,116 @@
+#!/usr/bin/env python
+#
+# This file is part of moses.  Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+import argparse
+import os
+import subprocess
+import sys
+
+# Feature field in N-best format
+FEAT_FIELD = 2
+
+# Location of mert, kbmira, etc. in relation to this script
+BIN_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'bin')
+
+def main():
+
+    # Args
+    parser = argparse.ArgumentParser(description='Learn N-best rescoring weights')
+    parser.add_argument('--nbest', metavar='nbest', \
+            help='Dev set N-best list augmented with new features', required=True)
+    parser.add_argument('--ref', metavar='ref', \
+            help='Dev set reference translation', required=True)
+    parser.add_argument('--working-dir', metavar='rescore-work', \
+            help='Optimizer working directory', required=True)
+    parser.add_argument('--bin-dir', metavar='DIR', \
+            help='Moses bin dir, containing kbmira, evaluator, etc.', default=BIN_DIR)
+    # Since we're starting with uniform weights and only running kbmira once,
+    # run a gratuitous number of iterations.  (mert-moses.pl default is 60
+    # iterations for each Moses run)
+    parser.add_argument('--iterations', metavar='N', type=int, \
+            help='Number of K-best MIRA iterations to run (default: 300)', default=300)
+    args = parser.parse_args()
+
+    # Find executables
+    extractor = os.path.join(args.bin_dir, 'extractor')
+    kbmira = os.path.join(args.bin_dir, 'kbmira')
+    for exe in (extractor, kbmira):
+        if not os.path.exists(exe):
+            sys.stderr.write('Error: cannot find executable "{}" in "{}", please specify --bin-dir\n'.format(exe, args.bin_dir))
+            sys.exit(1)
+
+    # rescore-work dir
+    if not os.path.exists(args.working_dir):
+        os.mkdir(args.working_dir)
+
+    # Feature names and numbers of weights from N-best list
+    # Assume all features are dense (present for each entry)
+    init_weights = []
+    fields = [f.strip() for f in open(args.nbest).readline().split('|||')]
+    feats = fields[FEAT_FIELD].split()
+    for i in range(len(feats)):
+        if feats[i].endswith('='):
+            n_weights = 0
+            j = i + 1
+            while j < len(feats):
+                if feats[j].endswith('='):
+                    break
+                n_weights += 1
+                j += 1
+            # Start all weights at 0
+            init_weights.append([feats[i], [0] * n_weights])
+
+    # Extract score and feature data from N-best list
+    extractor_cmd = [extractor, \
+            '--sctype', 'BLEU', '--scconfig', 'case:true', \
+            '--scfile', os.path.join(args.working_dir, 'scores.dat'), \
+            '--ffile', os.path.join(args.working_dir, 'features.dat'), \
+            '-r', args.ref, \
+            '-n', args.nbest]
+    subprocess.call(extractor_cmd)
+
+    # Write dense feature list
+    with open(os.path.join(args.working_dir, 'init.dense'), 'w') as out:
+        for (feat, weights) in init_weights:
+            for w in weights:
+                out.write('{} {}\n'.format(feat, w))
+
+    # Run K-best MIRA optimizer
+    kbmira_cmd = [kbmira, \
+            '--dense-init', os.path.join(args.working_dir, 'init.dense'), \
+            '--ffile', os.path.join(args.working_dir, 'features.dat'), \
+            '--scfile', os.path.join(args.working_dir, 'scores.dat'), \
+            '-o', os.path.join(args.working_dir, 'mert.out'), \
+            '--iters', str(args.iterations)]
+    subprocess.call(kbmira_cmd)
+
+    # Read optimized weights, sum for normalization
+    opt_weights = []
+    total = 0
+    with open(os.path.join(args.working_dir, 'mert.out')) as inp:
+        # Same structure as original weight list
+        for (feat, weights) in init_weights:
+            opt_weights.append([feat, []])
+            for _ in weights:
+                w = float(inp.readline().split()[1])
+                opt_weights[-1][1].append(w)
+                # Sum for normalization
+                total += abs(w)
+
+    # Normalize weights
+    for (_, weights) in opt_weights:
+        for i in range(len(weights)):
+            weights[i] /= total
+
+    # Generate rescore.ini
+    with open(os.path.join(args.working_dir, 'rescore.ini'), 'w') as out:
+        out.write('# For use with Moses N-best rescorer "scripts/nbest-rescore/rescore.py"\n')
+        out.write('\n')
+        out.write('[weight]\n')
+        for (feat, weights) in opt_weights:
+            out.write('{} {}\n'.format(feat, ' '.join(str(w) for w in weights)))
+
+if __name__ == '__main__':
+    main()