mosesdecoder/scripts/training/bilingual-lm/reduce_ngrams.py
2014-11-26 10:32:37 +00:00

29 lines
781 B
Python
Executable File

#!/usr/bin/env python3
"""Reduces an ngrams file for training nplm to a smaller version of it with less ngrams"""
from sys import argv
if len(argv) != 5:
print("Wrong number of args, got: " + str(len(argv) - 1) + " expected 4.")
print("Usage: reduce_ngrams.py INFILE OUTFILE START_IDX NGRAMS")
exit()
INFILE = open(argv[1], 'r')
OUTFILE = open(argv[2], 'w')
START_IDX = int(argv[3])
NGRAMS = int(argv[4])
for line in INFILE:
line = line.split()
line = line[START_IDX:START_IDX+NGRAMS]
linetowrite = ""
for token in line:
linetowrite = linetowrite + token + " "
#Strip final empty space and add newline
linetowrite = linetowrite[:-1]
linetowrite = linetowrite + '\n'
OUTFILE.write(linetowrite)
INFILE.close()
OUTFILE.close()