fix merge conflict

This commit is contained in:
Rico Sennrich 2017-04-01 21:25:05 +01:00
parent b481fdc4c0
commit 90fa4afd13
2 changed files with 6 additions and 4 deletions

View File

@ -176,7 +176,7 @@ def prune_stats(stats, big_stats, threshold):
big_stats[item] = freq
def main(vocab, outfile, num_symbols, min_frequency=2, verbose=False):
def main(infile, outfile, num_symbols, min_frequency=2, verbose=False, is_dict=False):
"""Learn num_symbols BPE operations from vocabulary, and write to outfile.
"""
@ -184,7 +184,7 @@ def main(vocab, outfile, num_symbols, min_frequency=2, verbose=False):
# version numbering allows bckward compatibility
outfile.write('#version: 0.2\n')
vocab = get_vocabulary(args.input, is_dict = args.dict_input)
vocab = get_vocabulary(infile, is_dict)
vocab = dict([(tuple(x[:-1])+(x[-1]+'</w>',) ,y) for (x,y) in vocab.items()])
sorted_vocab = sorted(vocab.items(), key=lambda x: x[1], reverse=True)
@ -240,4 +240,4 @@ if __name__ == '__main__':
if args.output.name != '<stdout>':
args.output = codecs.open(args.output.name, 'w', encoding='utf-8')
main(vocab, args.output, args.symbols, args.min_frequency, args.verbose)
main(args.input, args.output, args.symbols, args.min_frequency, args.verbose, is_dict=args.dict_input)

View File

@ -91,9 +91,11 @@ if __name__ == '__main__':
full_vocab += learn_bpe.get_vocabulary(f)
f.seek(0)
vocab_list = ['{0} {1}'.format(key, freq) for (key, freq) in full_vocab.items()]
# learn BPE on combined vocabulary
with codecs.open(args.output.name, 'w', encoding='UTF-8') as output:
learn_bpe.main(full_vocab, output, args.symbols, args.min_frequency, args.verbose)
learn_bpe.main(vocab_list, output, args.symbols, args.min_frequency, args.verbose, is_dict=True)
with codecs.open(args.output.name, encoding='UTF-8') as codes:
bpe = apply_bpe.BPE(codes, args.separator, None)