use strip

This commit is contained in:
Kyunghyun Cho 2015-11-25 18:43:44 -05:00
parent 3028cc660d
commit b1e99d9829

View File

@ -30,7 +30,7 @@ if sys.version_info < (3, 0):
class BPE(object):
def __init__(self, codes, separator='@@'):
self.bpe_codes = [tuple(item.split()) for item in codes]
self.bpe_codes = [tuple(item.strip().split()) for item in codes]
# some hacking to deal with duplicates (only consider first instance)
self.bpe_codes = dict([(code,i) for (i,code) in reversed(list(enumerate(self.bpe_codes)))])
@ -40,7 +40,7 @@ class BPE(object):
"""segment single sentence (whitespace-tokenized string) with BPE encoding"""
output = []
for word in sentence.split():
for word in sentence.strip().split():
new_word = encode(word, self.bpe_codes)
for item in new_word[:-1]: