mirror of
https://github.com/rsennrich/subword-nmt.git
synced 2024-11-27 02:53:55 +03:00
fix merge conflict
This commit is contained in:
parent
b481fdc4c0
commit
90fa4afd13
@ -176,7 +176,7 @@ def prune_stats(stats, big_stats, threshold):
|
||||
big_stats[item] = freq
|
||||
|
||||
|
||||
def main(vocab, outfile, num_symbols, min_frequency=2, verbose=False):
|
||||
def main(infile, outfile, num_symbols, min_frequency=2, verbose=False, is_dict=False):
|
||||
"""Learn num_symbols BPE operations from vocabulary, and write to outfile.
|
||||
"""
|
||||
|
||||
@ -184,7 +184,7 @@ def main(vocab, outfile, num_symbols, min_frequency=2, verbose=False):
|
||||
# version numbering allows bckward compatibility
|
||||
outfile.write('#version: 0.2\n')
|
||||
|
||||
vocab = get_vocabulary(args.input, is_dict = args.dict_input)
|
||||
vocab = get_vocabulary(infile, is_dict)
|
||||
vocab = dict([(tuple(x[:-1])+(x[-1]+'</w>',) ,y) for (x,y) in vocab.items()])
|
||||
sorted_vocab = sorted(vocab.items(), key=lambda x: x[1], reverse=True)
|
||||
|
||||
@ -240,4 +240,4 @@ if __name__ == '__main__':
|
||||
if args.output.name != '<stdout>':
|
||||
args.output = codecs.open(args.output.name, 'w', encoding='utf-8')
|
||||
|
||||
main(vocab, args.output, args.symbols, args.min_frequency, args.verbose)
|
||||
main(args.input, args.output, args.symbols, args.min_frequency, args.verbose, is_dict=args.dict_input)
|
||||
|
@ -91,9 +91,11 @@ if __name__ == '__main__':
|
||||
full_vocab += learn_bpe.get_vocabulary(f)
|
||||
f.seek(0)
|
||||
|
||||
vocab_list = ['{0} {1}'.format(key, freq) for (key, freq) in full_vocab.items()]
|
||||
|
||||
# learn BPE on combined vocabulary
|
||||
with codecs.open(args.output.name, 'w', encoding='UTF-8') as output:
|
||||
learn_bpe.main(full_vocab, output, args.symbols, args.min_frequency, args.verbose)
|
||||
learn_bpe.main(vocab_list, output, args.symbols, args.min_frequency, args.verbose, is_dict=True)
|
||||
|
||||
with codecs.open(args.output.name, encoding='UTF-8') as codes:
|
||||
bpe = apply_bpe.BPE(codes, args.separator, None)
|
||||
|
Loading…
Reference in New Issue
Block a user