Fix hub (#2687)

Summary: Pull Request resolved: https://github.com/pytorch/fairseq/pull/2687 Reviewed By: alexeib Differential Revision: D24095130 Pulled By: myleott fbshipit-source-id: 7d371bccb550ec68b2b9b39dfa4c0718356508d6
2024-10-26 17:32:57 +03:00 · 2020-10-02 19:00:29 -07:00 · 2020-10-02 19:00:29 -07:00 · 7c292af66f
commit 7c292af66f
parent 7c392f7d0e
4 changed files with 40 additions and 16 deletions
--- a/fairseq/optim/fp16_optimizer.py
+++ b/fairseq/optim/fp16_optimizer.py
@ -34,13 +34,17 @@ class _FP16OptimizerMixin(object):
    def build_fp32_params(cls, args, params, flatten=True):
        # create FP32 copy of parameters and grads
        if flatten:
+            is_pipeline_parallel = (
+                getattr(args, 'pipeline_model_parallel', False)
+                and getattr(args, 'distributed_no_spawn', False)
+            )
            total_param_size = sum(p.data.numel() for p in params)
            devices = [torch.cuda.current_device()]
-            if args.pipeline_model_parallel and args.distributed_no_spawn:
+            if is_pipeline_parallel:
                devices = list(set(args.pipeline_devices))
            fp32_params = {}
            for device in devices:
-                if args.pipeline_model_parallel and args.distributed_no_spawn:
+                if is_pipeline_parallel:
                    device_param_size = sum(p.data.numel() for p in params if p.device.index == device)
                    device_params = [p for p in params if p.device.index == device]
                else:
--- a/hubconf.py
+++ b/hubconf.py
@ -4,15 +4,12 @@
 # LICENSE file in the root directory of this source tree.

 import functools
-
-from fairseq.hub_utils import BPEHubInterface as bpe  # noqa
-from fairseq.hub_utils import TokenizerHubInterface as tokenizer  # noqa
-from fairseq.models import MODEL_REGISTRY
+import importlib


 dependencies = [
    'dataclasses',
-    'hydra-core',
+    'hydra',
    'numpy',
    'regex',
    'requests',
@ -20,31 +17,51 @@ dependencies = [
 ]


+# Check for required dependencies and raise a RuntimeError if any are missing.
+missing_deps = []
+for dep in dependencies:
+    try:
+        importlib.import_module(dep)
+    except ImportError:
+        # Hack: the hydra package is provided under the "hydra-core" name in
+        # pypi. We don't want the user mistakenly calling `pip install hydra`
+        # since that will install an unrelated package.
+        if dep == 'hydra':
+            dep = 'hydra-core'
+        missing_deps.append(dep)
+if len(missing_deps) > 0:
+    raise RuntimeError('Missing dependencies: {}'.format(', '.join(missing_deps)))
+
+
 # torch.hub doesn't build Cython components, so if they are not found then try
 # to build them here
 try:
-    import fairseq.data.token_block_utils_fast
-except (ImportError, ModuleNotFoundError):
+    import fairseq.data.token_block_utils_fast  # noqa
+except ImportError:
    try:
-        import cython
+        import cython  # noqa
        import os
        from setuptools import sandbox
        sandbox.run_setup(
            os.path.join(os.path.dirname(__file__), 'setup.py'),
            ['build_ext', '--inplace'],
        )
-    except (ImportError, ModuleNotFoundError):
+    except ImportError:
        print(
            'Unable to build Cython components. Please make sure Cython is '
            'installed if the torch.hub model you are loading depends on it.'
        )


+from fairseq.hub_utils import BPEHubInterface as bpe  # noqa
+from fairseq.hub_utils import TokenizerHubInterface as tokenizer  # noqa
+from fairseq.models import MODEL_REGISTRY  # noqa
+
+
+# automatically expose models defined in FairseqModel::hub_models
 for _model_type, _cls in MODEL_REGISTRY.items():
    for model_name in _cls.hub_models().keys():
        globals()[model_name] = functools.partial(
            _cls.from_pretrained,
            model_name,
        )
-    # to simplify the interface we only expose named models
-    # globals()[_model_type] = _cls.from_pretrained
--- a/tests/gpu/test_binaries_gpu.py
+++ b/tests/gpu/test_binaries_gpu.py
@ -64,9 +64,9 @@ class TestTranslationGPU(unittest.TestCase):
                        "--decoder-layers",
                        "2",
                        "--encoder-embed-dim",
-                        "8",
+                        "64",
                        "--decoder-embed-dim",
-                        "8",
+                        "64",
                        "--fp16",
                    ],
                    run_validation=True,
--- a/tests/test_fp16_optimizer.py
+++ b/tests/test_fp16_optimizer.py
@ -63,7 +63,10 @@ class TestGradientScaling(unittest.TestCase):
        optimizer = FP16Optimizer.build_optimizer(self.namespace_dls, params)

        self.run_iter(model, params, optimizer)
-        self.assertTrue(torch.all(optimizer.fp32_params.eq(torch.tensor([3.1000, 5.1000], device='cuda:0', requires_grad=True))))
+        self.assertTrue(all(
+            torch.all(fp32_params.eq(torch.tensor([3.1000, 5.1000], device='cuda:0', requires_grad=True)))
+            for fp32_params in optimizer.fp32_params.values()
+        ))

    def test_memory_efficient(self):
        model = copy.deepcopy(self.model)