mirror of
https://github.com/rsennrich/subword-nmt.git
synced 2024-11-27 12:42:07 +03:00
use strip
This commit is contained in:
parent
3028cc660d
commit
b1e99d9829
@ -30,7 +30,7 @@ if sys.version_info < (3, 0):
|
||||
class BPE(object):
|
||||
|
||||
def __init__(self, codes, separator='@@'):
|
||||
self.bpe_codes = [tuple(item.split()) for item in codes]
|
||||
self.bpe_codes = [tuple(item.strip().split()) for item in codes]
|
||||
# some hacking to deal with duplicates (only consider first instance)
|
||||
self.bpe_codes = dict([(code,i) for (i,code) in reversed(list(enumerate(self.bpe_codes)))])
|
||||
|
||||
@ -40,7 +40,7 @@ class BPE(object):
|
||||
"""segment single sentence (whitespace-tokenized string) with BPE encoding"""
|
||||
|
||||
output = []
|
||||
for word in sentence.split():
|
||||
for word in sentence.strip().split():
|
||||
new_word = encode(word, self.bpe_codes)
|
||||
|
||||
for item in new_word[:-1]:
|
||||
|
Loading…
Reference in New Issue
Block a user