#!/usr/bin/env python3 # # This file is part of moses. Its use is licensed under the GNU Lesser General # Public License version 2.1 or, at your option, any later version. """Reduce an ngrams file for training nplm to a smaller version of it. The smaller version will have fewer ngrams. """ from sys import argv if len(argv) != 5: print("Wrong number of args, got: " + str(len(argv) - 1) + " expected 4.") print("Usage: reduce_ngrams.py INFILE OUTFILE START_IDX NGRAMS") exit() INFILE = open(argv[1], 'r') OUTFILE = open(argv[2], 'w') START_IDX = int(argv[3]) NGRAMS = int(argv[4]) for line in INFILE: line = line.split() line = line[START_IDX:START_IDX + NGRAMS] linetowrite = "" for token in line: linetowrite = linetowrite + token + " " # Strip final empty space and add newline. linetowrite = linetowrite[:-1] linetowrite = linetowrite + '\n' OUTFILE.write(linetowrite) INFILE.close() OUTFILE.close()