From dbcc264506ca26b471b821ab8fc0b78d88457185 Mon Sep 17 00:00:00 2001 From: Jeroen Vermeulen Date: Tue, 9 Jun 2015 23:10:27 +0700 Subject: [PATCH] Remove unneeded script. Tom Hoar, the author of this script, asked me to remove it because it doesn't actually do what the current name says, and can't work without an additional script which isn't in the repository. --- .../training/convert-moses-ini-v2-to-v1.py | 266 ------------------ 1 file changed, 266 deletions(-) delete mode 100755 scripts/training/convert-moses-ini-v2-to-v1.py diff --git a/scripts/training/convert-moses-ini-v2-to-v1.py b/scripts/training/convert-moses-ini-v2-to-v1.py deleted file mode 100755 index 4b7cfa5fa..000000000 --- a/scripts/training/convert-moses-ini-v2-to-v1.py +++ /dev/null @@ -1,266 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf8 -*- -# -# This file is part of moses. Its use is licensed under the GNU Lesser General -# Public License version 3 or, at your option, any later version. - - -from __future__ import ( - absolute_import, - print_function, - unicode_literals, - ) - -__version__ = '1.0' -__license__ = 'LGPL3' -__source__ = 'Precision Translation Tools Pte Lte' - -import errno -from sys import stdout -from copy import deepcopy -from os.path import ( - dirname, - basename, - exists, - realpath, - ) -from os import makedirs - - -root_escape = '%(escape-prefix)s' - - -class moses2_to_ini(object): - - def __init__(self, inp, out, escape_prefix): - self.inp = inp - self.out = out - self.escape_prefix = escape_prefix - self._config = {} - - def parse(self): - key = '' - section = None - self._config = {} - counter = 0 - - with open(self.inp, 'rb') as f: - contents = f.read().decode('utf8') - - lines = contents.splitlines() - - # Known feature/functions without attributes. - attrless_ffs = [ - 'UnknownWordPenalty', - 'WordPenalty', - 'PhrasePenalty', - 'Distortion', - ] - - # Retrieve all values except feature/functions with attributes. - for i, line in [(i, line.strip()) for i, line in enumerate(lines) - if line.strip() and not line.strip().startswith('#')]: - - if line.startswith('[') and line.endswith(']'): - - section = line.strip('] [') - - if section not in self._config.keys() + ['feature', 'weight']: - # New section not in config and not a reserved section. - counter = 0 - key = section - self._config[key] = {} - - elif section == 'feature' and line in attrless_ffs: - # Known feature/funcions without attributes. - key = '%s0' % line - if key not in self._config: - self._config[key] = {} - self._config[key]['feature'] = line - - elif section == 'feature': - # Skip feature/funcions with arguments. - continue - - elif section == 'weight': - # Add weight value to feature sections. - config_items = [ - (key.strip(), value.strip()) - for key, value in [line.split('=', 1)] - ] - for key, value in config_items: - if key not in self._config: - self._config[key] = {} - self._config[key]['weight'] = value - - else: - self._config[key][counter] = line - counter += 0 - - lines[i] = '' - - # Second, match feature/functions attributes to [weight] section - # values. - stripped_lines = [line.strip() for line in lines] - nonempty_lines = [ - line - for line in stripped_lines - if line != '' and not line.startswith('#') - ] - for i, line in enumerate(nonempty_lines): - # Add "feature" to assist creating tmpdict for feature/functions. - line = 'feature=%s' % line - tmpdict = dict([key.split('=', 1) for key in line.split()]) - - # Feature/functions 'name' attribute must match an entry in - # [weight]. - if tmpdict.get('name') not in self._config: - raise RuntimeError('malformed moses.ini v2 file') - - config_items = [ - (key.strip(), value.strip()) - for key, value in tmpdict.items() - if key.strip() != 'name' - ] - for key, value in config_items: - self._config[tmpdict['name']][key] = value - - return deepcopy(self._config) - - def render(self, config): - self._config = deepcopy(config) - _config = deepcopy(config) - lines = _tolines(_config, self.escape_prefix) - if self.out == '-': - stdout.write('\n'.join(lines)) - else: - contents = '\r\n'.join(lines) - makedir(dirname(self.out)) - with open(self.out, 'wb') as f: - f.write(contents.encode('utf8')) - - def __str__(self): - return '\n'.join(_tolines(self._config, self.escape_prefix)) - - @property - def config(self): - return deepcopy(self._config) - - -def _tolines(config, escape_prefix): - - section_names = sorted(config) - lines = [] - - # Group feature/functions first. - group_ffs = [ - name - for name in section_names - if name[-1].isdigit() - ] - for sectionname in group_ffs: - section = config[sectionname] - lines.append('[%s]' % sectionname) - for option, value in section.items(): - if option == 'path' \ - and escape_prefix is not None \ - and value.startswith(escape_prefix): - value = value.replace(escape_prefix, root_escape, 1) - lines.append('%s=%s' % (option, value)) - lines.append('') - - other_ffs = [ - name - for name in section_names - if not name[-1].isdigit() - ] - for sectionname in other_ffs: - section = config[sectionname] - lines.append('[%s]' % sectionname) - for option, value in section.items(): - lines.append('%s=%s' % (option, value)) - lines.append('') - - return deepcopy(lines) - - -def makedir(path, mode=0o777): - try: - makedirs(path, mode) - except OSError as e: - accepted_errors = [ - errno.EEXIST, - errno.EPERM, - errno.EACCES, - errno.ENOENT, - ] - if e.errno not in accepted_errors: - raise - - -def get_args(): - '''Parse command-line arguments - - Uses the API compatibility between the legacy - argparse.OptionParser and its replacement argparse.ArgumentParser - for functional equivelancy and nearly identical help prompt. - ''' - - description = 'Convert Moses.ini v2 file to standard INI format' - usage = '%s [arguments]' % basename(__file__) - - try: - from argparse import ArgumentParser - except ImportError: - from optparse import OptionParser - argparser = False - escape_help = ( - "Optional. Path of SMT model. If provided, " - "escapes \"escape-prefix\" with \"%(escape-prefix)s\"") - parser = OptionParser(usage=usage, description=description) - add_argument = parser.add_option - else: - argparser = True - escape_help = ( - "Optional. Path of SMT model. If provided, " - "escape \"escape-prefix\" with \"%%(escape-prefix)s\"") - parser = ArgumentParser(usage=usage, description=description) - add_argument = parser.add_argument - - add_argument( - '-i', '--inp', action='store', - help="moses.ini v2 file to convert (required)") - - add_argument( - '-o', '--out', action='store', default='-', - help="standard INI file (default: '-' outputs to stdout)") - - add_argument('-r', '--escape-prefix', action='store', help=escape_help) - - if argparser: - args = vars(parser.parse_args()) - else: - opts = parser.parse_args() - args = vars(opts[0]) - - if args['inp'] is None: - parser.error('argument -i/--inp required') - - args['inp'] = realpath(args['inp']) - - if not exists(args['inp']): - parser.error( - "argument -i/--inp invalid.\n" - "reference: %s" % args['inp']) - - if args['out'] != '-': - args['out'] = realpath(args['out']) - - return args - - -if __name__ == '__main__': - args = get_args() - converter = moses2_to_ini(**args) - config = converter.parse() - converter.render(config)