mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 21:42:19 +03:00
29 lines
781 B
Python
Executable File
29 lines
781 B
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
"""Reduces an ngrams file for training nplm to a smaller version of it with less ngrams"""
|
|
from sys import argv
|
|
|
|
if len(argv) != 5:
|
|
print("Wrong number of args, got: " + str(len(argv) - 1) + " expected 4.")
|
|
print("Usage: reduce_ngrams.py INFILE OUTFILE START_IDX NGRAMS")
|
|
exit()
|
|
|
|
INFILE = open(argv[1], 'r')
|
|
OUTFILE = open(argv[2], 'w')
|
|
START_IDX = int(argv[3])
|
|
NGRAMS = int(argv[4])
|
|
|
|
for line in INFILE:
|
|
line = line.split()
|
|
line = line[START_IDX:START_IDX+NGRAMS]
|
|
linetowrite = ""
|
|
for token in line:
|
|
linetowrite = linetowrite + token + " "
|
|
#Strip final empty space and add newline
|
|
linetowrite = linetowrite[:-1]
|
|
linetowrite = linetowrite + '\n'
|
|
OUTFILE.write(linetowrite)
|
|
|
|
INFILE.close()
|
|
OUTFILE.close()
|