mirror of
https://github.com/facebookresearch/fairseq.git
synced 2024-10-26 17:32:57 +03:00
Fix hub (#2687)
Summary: Pull Request resolved: https://github.com/pytorch/fairseq/pull/2687 Reviewed By: alexeib Differential Revision: D24095130 Pulled By: myleott fbshipit-source-id: 7d371bccb550ec68b2b9b39dfa4c0718356508d6
This commit is contained in:
parent
7c392f7d0e
commit
7c292af66f
@ -34,13 +34,17 @@ class _FP16OptimizerMixin(object):
|
||||
def build_fp32_params(cls, args, params, flatten=True):
|
||||
# create FP32 copy of parameters and grads
|
||||
if flatten:
|
||||
is_pipeline_parallel = (
|
||||
getattr(args, 'pipeline_model_parallel', False)
|
||||
and getattr(args, 'distributed_no_spawn', False)
|
||||
)
|
||||
total_param_size = sum(p.data.numel() for p in params)
|
||||
devices = [torch.cuda.current_device()]
|
||||
if args.pipeline_model_parallel and args.distributed_no_spawn:
|
||||
if is_pipeline_parallel:
|
||||
devices = list(set(args.pipeline_devices))
|
||||
fp32_params = {}
|
||||
for device in devices:
|
||||
if args.pipeline_model_parallel and args.distributed_no_spawn:
|
||||
if is_pipeline_parallel:
|
||||
device_param_size = sum(p.data.numel() for p in params if p.device.index == device)
|
||||
device_params = [p for p in params if p.device.index == device]
|
||||
else:
|
||||
|
39
hubconf.py
39
hubconf.py
@ -4,15 +4,12 @@
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import functools
|
||||
|
||||
from fairseq.hub_utils import BPEHubInterface as bpe # noqa
|
||||
from fairseq.hub_utils import TokenizerHubInterface as tokenizer # noqa
|
||||
from fairseq.models import MODEL_REGISTRY
|
||||
import importlib
|
||||
|
||||
|
||||
dependencies = [
|
||||
'dataclasses',
|
||||
'hydra-core',
|
||||
'hydra',
|
||||
'numpy',
|
||||
'regex',
|
||||
'requests',
|
||||
@ -20,31 +17,51 @@ dependencies = [
|
||||
]
|
||||
|
||||
|
||||
# Check for required dependencies and raise a RuntimeError if any are missing.
|
||||
missing_deps = []
|
||||
for dep in dependencies:
|
||||
try:
|
||||
importlib.import_module(dep)
|
||||
except ImportError:
|
||||
# Hack: the hydra package is provided under the "hydra-core" name in
|
||||
# pypi. We don't want the user mistakenly calling `pip install hydra`
|
||||
# since that will install an unrelated package.
|
||||
if dep == 'hydra':
|
||||
dep = 'hydra-core'
|
||||
missing_deps.append(dep)
|
||||
if len(missing_deps) > 0:
|
||||
raise RuntimeError('Missing dependencies: {}'.format(', '.join(missing_deps)))
|
||||
|
||||
|
||||
# torch.hub doesn't build Cython components, so if they are not found then try
|
||||
# to build them here
|
||||
try:
|
||||
import fairseq.data.token_block_utils_fast
|
||||
except (ImportError, ModuleNotFoundError):
|
||||
import fairseq.data.token_block_utils_fast # noqa
|
||||
except ImportError:
|
||||
try:
|
||||
import cython
|
||||
import cython # noqa
|
||||
import os
|
||||
from setuptools import sandbox
|
||||
sandbox.run_setup(
|
||||
os.path.join(os.path.dirname(__file__), 'setup.py'),
|
||||
['build_ext', '--inplace'],
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError):
|
||||
except ImportError:
|
||||
print(
|
||||
'Unable to build Cython components. Please make sure Cython is '
|
||||
'installed if the torch.hub model you are loading depends on it.'
|
||||
)
|
||||
|
||||
|
||||
from fairseq.hub_utils import BPEHubInterface as bpe # noqa
|
||||
from fairseq.hub_utils import TokenizerHubInterface as tokenizer # noqa
|
||||
from fairseq.models import MODEL_REGISTRY # noqa
|
||||
|
||||
|
||||
# automatically expose models defined in FairseqModel::hub_models
|
||||
for _model_type, _cls in MODEL_REGISTRY.items():
|
||||
for model_name in _cls.hub_models().keys():
|
||||
globals()[model_name] = functools.partial(
|
||||
_cls.from_pretrained,
|
||||
model_name,
|
||||
)
|
||||
# to simplify the interface we only expose named models
|
||||
# globals()[_model_type] = _cls.from_pretrained
|
||||
|
@ -64,9 +64,9 @@ class TestTranslationGPU(unittest.TestCase):
|
||||
"--decoder-layers",
|
||||
"2",
|
||||
"--encoder-embed-dim",
|
||||
"8",
|
||||
"64",
|
||||
"--decoder-embed-dim",
|
||||
"8",
|
||||
"64",
|
||||
"--fp16",
|
||||
],
|
||||
run_validation=True,
|
||||
|
@ -63,7 +63,10 @@ class TestGradientScaling(unittest.TestCase):
|
||||
optimizer = FP16Optimizer.build_optimizer(self.namespace_dls, params)
|
||||
|
||||
self.run_iter(model, params, optimizer)
|
||||
self.assertTrue(torch.all(optimizer.fp32_params.eq(torch.tensor([3.1000, 5.1000], device='cuda:0', requires_grad=True))))
|
||||
self.assertTrue(all(
|
||||
torch.all(fp32_params.eq(torch.tensor([3.1000, 5.1000], device='cuda:0', requires_grad=True)))
|
||||
for fp32_params in optimizer.fp32_params.values()
|
||||
))
|
||||
|
||||
def test_memory_efficient(self):
|
||||
model = copy.deepcopy(self.model)
|
||||
|
Loading…
Reference in New Issue
Block a user