diff --git a/fairseq/optim/fp16_optimizer.py b/fairseq/optim/fp16_optimizer.py index e621a6f11..edb4f536e 100644 --- a/fairseq/optim/fp16_optimizer.py +++ b/fairseq/optim/fp16_optimizer.py @@ -34,13 +34,17 @@ class _FP16OptimizerMixin(object): def build_fp32_params(cls, args, params, flatten=True): # create FP32 copy of parameters and grads if flatten: + is_pipeline_parallel = ( + getattr(args, 'pipeline_model_parallel', False) + and getattr(args, 'distributed_no_spawn', False) + ) total_param_size = sum(p.data.numel() for p in params) devices = [torch.cuda.current_device()] - if args.pipeline_model_parallel and args.distributed_no_spawn: + if is_pipeline_parallel: devices = list(set(args.pipeline_devices)) fp32_params = {} for device in devices: - if args.pipeline_model_parallel and args.distributed_no_spawn: + if is_pipeline_parallel: device_param_size = sum(p.data.numel() for p in params if p.device.index == device) device_params = [p for p in params if p.device.index == device] else: diff --git a/hubconf.py b/hubconf.py index 99d9cabbd..c63fa8ae8 100644 --- a/hubconf.py +++ b/hubconf.py @@ -4,15 +4,12 @@ # LICENSE file in the root directory of this source tree. import functools - -from fairseq.hub_utils import BPEHubInterface as bpe # noqa -from fairseq.hub_utils import TokenizerHubInterface as tokenizer # noqa -from fairseq.models import MODEL_REGISTRY +import importlib dependencies = [ 'dataclasses', - 'hydra-core', + 'hydra', 'numpy', 'regex', 'requests', @@ -20,31 +17,51 @@ dependencies = [ ] +# Check for required dependencies and raise a RuntimeError if any are missing. +missing_deps = [] +for dep in dependencies: + try: + importlib.import_module(dep) + except ImportError: + # Hack: the hydra package is provided under the "hydra-core" name in + # pypi. We don't want the user mistakenly calling `pip install hydra` + # since that will install an unrelated package. + if dep == 'hydra': + dep = 'hydra-core' + missing_deps.append(dep) +if len(missing_deps) > 0: + raise RuntimeError('Missing dependencies: {}'.format(', '.join(missing_deps))) + + # torch.hub doesn't build Cython components, so if they are not found then try # to build them here try: - import fairseq.data.token_block_utils_fast -except (ImportError, ModuleNotFoundError): + import fairseq.data.token_block_utils_fast # noqa +except ImportError: try: - import cython + import cython # noqa import os from setuptools import sandbox sandbox.run_setup( os.path.join(os.path.dirname(__file__), 'setup.py'), ['build_ext', '--inplace'], ) - except (ImportError, ModuleNotFoundError): + except ImportError: print( 'Unable to build Cython components. Please make sure Cython is ' 'installed if the torch.hub model you are loading depends on it.' ) +from fairseq.hub_utils import BPEHubInterface as bpe # noqa +from fairseq.hub_utils import TokenizerHubInterface as tokenizer # noqa +from fairseq.models import MODEL_REGISTRY # noqa + + +# automatically expose models defined in FairseqModel::hub_models for _model_type, _cls in MODEL_REGISTRY.items(): for model_name in _cls.hub_models().keys(): globals()[model_name] = functools.partial( _cls.from_pretrained, model_name, ) - # to simplify the interface we only expose named models - # globals()[_model_type] = _cls.from_pretrained diff --git a/tests/gpu/test_binaries_gpu.py b/tests/gpu/test_binaries_gpu.py index b65b545a4..e3fadef9f 100644 --- a/tests/gpu/test_binaries_gpu.py +++ b/tests/gpu/test_binaries_gpu.py @@ -64,9 +64,9 @@ class TestTranslationGPU(unittest.TestCase): "--decoder-layers", "2", "--encoder-embed-dim", - "8", + "64", "--decoder-embed-dim", - "8", + "64", "--fp16", ], run_validation=True, diff --git a/tests/test_fp16_optimizer.py b/tests/test_fp16_optimizer.py index ae7b797ec..bca341af1 100644 --- a/tests/test_fp16_optimizer.py +++ b/tests/test_fp16_optimizer.py @@ -63,7 +63,10 @@ class TestGradientScaling(unittest.TestCase): optimizer = FP16Optimizer.build_optimizer(self.namespace_dls, params) self.run_iter(model, params, optimizer) - self.assertTrue(torch.all(optimizer.fp32_params.eq(torch.tensor([3.1000, 5.1000], device='cuda:0', requires_grad=True)))) + self.assertTrue(all( + torch.all(fp32_params.eq(torch.tensor([3.1000, 5.1000], device='cuda:0', requires_grad=True))) + for fp32_params in optimizer.fp32_params.values() + )) def test_memory_efficient(self): model = copy.deepcopy(self.model)