Rename data.transforms -> data.encoders

Summary: Pull Request resolved: https://github.com/fairinternal/fairseq-py/pull/747

Differential Revision: D16403464

Pulled By: myleott

fbshipit-source-id: ee3b4184f129a02be833c7bdc00685978b4de883
This commit is contained in:
Myle Ott 2019-07-21 03:48:34 -07:00 committed by Facebook Github Bot
parent 69d0f7f826
commit f812e52956
9 changed files with 14 additions and 14 deletions

View File

@ -24,8 +24,8 @@ build_bpe, register_bpe, BPE_REGISTRY = registry.setup_registry(
)
# automatically import any Python files in the transforms/ directory
# automatically import any Python files in the encoders/ directory
for file in os.listdir(os.path.dirname(__file__)):
if file.endswith('.py') and not file.startswith('_'):
module = file[:file.find('.py')]
importlib.import_module('fairseq.data.transforms.' + module)
importlib.import_module('fairseq.data.encoders.' + module)

View File

@ -6,7 +6,7 @@
# can be found in the PATENTS file in the same directory.
from fairseq import file_utils
from fairseq.data.transforms import register_bpe
from fairseq.data.encoders import register_bpe
@register_bpe('gpt2')

View File

@ -5,7 +5,7 @@
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
from fairseq.data.transforms import register_tokenizer
from fairseq.data.encoders import register_tokenizer
@register_tokenizer('moses')

View File

@ -5,7 +5,7 @@
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
from fairseq.data.transforms import register_tokenizer
from fairseq.data.encoders import register_tokenizer
@register_tokenizer('nltk')

View File

@ -6,7 +6,7 @@
# can be found in the PATENTS file in the same directory.
from fairseq import file_utils
from fairseq.data.transforms import register_bpe
from fairseq.data.encoders import register_bpe
@register_bpe('sentencepiece')

View File

@ -7,7 +7,7 @@
import re
from fairseq.data.transforms import register_tokenizer
from fairseq.data.encoders import register_tokenizer
@register_tokenizer('space')

View File

@ -6,7 +6,7 @@
# can be found in the PATENTS file in the same directory.
from fairseq import file_utils
from fairseq.data.transforms import register_bpe
from fairseq.data.encoders import register_bpe
@register_bpe('subword_nmt')

View File

@ -9,7 +9,7 @@
import torch
from fairseq import utils
from fairseq.data import transforms
from fairseq.data import encoders
class Generator(object):
@ -44,8 +44,8 @@ class Generator(object):
# (None if no unknown word replacement, empty if no path to align dictionary)
self.align_dict = utils.load_align_dict(getattr(args, 'replace_unk', None))
self.tokenizer = transforms.build_tokenizer(args)
self.bpe = transforms.build_bpe(args)
self.tokenizer = encoders.build_tokenizer(args)
self.bpe = encoders.build_bpe(args)
def generate(self, src_str, verbose=False):

View File

@ -15,7 +15,7 @@ import fileinput
import torch
from fairseq import checkpoint_utils, options, tasks, utils
from fairseq.data import transforms
from fairseq.data import encoders
Batch = namedtuple('Batch', 'ids src_tokens src_lengths')
@ -103,8 +103,8 @@ def main(args):
generator = task.build_generator(args)
# Handle tokenization and BPE
tokenizer = transforms.build_tokenizer(args)
bpe = transforms.build_bpe(args)
tokenizer = encoders.build_tokenizer(args)
bpe = encoders.build_bpe(args)
def encode_fn(x):
if tokenizer is not None: