mirror of
https://github.com/facebookresearch/fairseq.git
synced 2024-10-03 20:28:26 +03:00
fix flake8 issues (#2570)
Summary: # Before submitting - [ ] Was this discussed/approved via a Github issue? (no need for typos, doc improvements) - [ ] Did you read the [contributor guideline](https://github.com/pytorch/fairseq/blob/main/CONTRIBUTING.md)? - [ ] Did you make sure to update the docs? - [ ] Did you write any new necessary tests? ## What does this PR do? - [x] applies flake8 fixes to main branch (https://github.com/fairinternal/fairseq-py/issues/2546) - still more to be fixed Fix GPU tests: - [x] when torch.ao.quantization import doesn't work use torch.quantization - [x] build apex from earlier commit in circleci so that its compatible with pytorch 1.8 and 1.9 ## PR review Anyone in the community is free to review the PR once the tests have passed. If we didn't discuss your PR in Github issues there's a high chance it will not be merged. ## Did you have fun? Make sure you had fun coding � Pull Request resolved: https://github.com/fairinternal/fairseq-py/pull/2570 Reviewed By: Mortimerp9 Differential Revision: D32955312 Pulled By: dianaml0 fbshipit-source-id: e163cbd4998f171f819e31b0682c1c0f1986f9e1
This commit is contained in:
parent
c620ed066f
commit
88e7d2586b
@ -10,7 +10,7 @@ gpu: &gpu
|
||||
machine:
|
||||
image: ubuntu-1604-cuda-11.1:202012-01
|
||||
resource_class: gpu.nvidia.medium.multi
|
||||
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------------------
|
||||
# Re-usable commands
|
||||
@ -25,7 +25,7 @@ install_dep_common: &install_dep_common
|
||||
pip install --upgrade setuptools
|
||||
pip install bitarray boto3 deepspeed editdistance fastBPE iopath ipdb ipython pyarrow pytest sacremoses sentencepiece subword-nmt hydra-core==1.0.7 omegaconf==2.0.6
|
||||
pip install --progress-bar off pytest
|
||||
pip install --progress-bar off fairscale==0.4.1
|
||||
pip install --progress-bar off fairscale
|
||||
pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda111 -U
|
||||
python -c 'import torch; print("Torch version:", torch.__version__)'
|
||||
python -m torch.utils.collect_env
|
||||
@ -38,6 +38,7 @@ install_dep_fused_ops: &install_dep_fused_ops
|
||||
source activate fairseq
|
||||
git clone https://github.com/NVIDIA/apex
|
||||
cd apex
|
||||
git checkout e2083df5eb96643c61613b9df48dd4eea6b07690
|
||||
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--deprecated_fused_adam" --global-option="--xentropy" --global-option="--fast_multihead_attn" ./
|
||||
cd ~/
|
||||
git clone --depth=1 --branch v2.4 https://github.com/NVIDIA/Megatron-LM.git
|
||||
|
@ -17,7 +17,7 @@ repos:
|
||||
- id: end-of-file-fixer
|
||||
|
||||
- repo: https://github.com/ambv/black
|
||||
rev: 20.8b1
|
||||
rev: 21.12b0
|
||||
hooks:
|
||||
- id: black
|
||||
language_version: python3.8
|
||||
|
@ -8,7 +8,6 @@ import time
|
||||
from collections import OrderedDict
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
try:
|
||||
import torch
|
||||
|
||||
@ -18,7 +17,6 @@ try:
|
||||
else:
|
||||
return a
|
||||
|
||||
|
||||
except ImportError:
|
||||
torch = None
|
||||
|
||||
|
@ -14,23 +14,30 @@ from typing import List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from fairseq.models import (
|
||||
FairseqEncoder,
|
||||
)
|
||||
from torch import Tensor
|
||||
from torch import device as Device
|
||||
|
||||
from fairseq.models import FairseqEncoder
|
||||
from fairseq.models.speech_to_text.utils import (
|
||||
NoOp,
|
||||
attention_suppression,
|
||||
layer_norm_backward_hook,
|
||||
lengths_to_padding_mask,
|
||||
segments_to_sequence,
|
||||
)
|
||||
from fairseq.models.speech_to_text.utils import (
|
||||
attention_suppression,
|
||||
layer_norm_backward_hook,
|
||||
)
|
||||
from torch import Tensor, device as Device
|
||||
from torch.ao.quantization.qconfig import (
|
||||
default_dynamic_qconfig,
|
||||
per_channel_dynamic_qconfig,
|
||||
)
|
||||
|
||||
try:
|
||||
import torch.ao.quantization as quantization
|
||||
from torch.ao.quantization.qconfig import (
|
||||
default_dynamic_qconfig,
|
||||
per_channel_dynamic_qconfig,
|
||||
)
|
||||
except ImportError:
|
||||
import torch.quantization as quantization
|
||||
from torch.quantization.qconfig import (
|
||||
default_dynamic_qconfig,
|
||||
per_channel_dynamic_qconfig,
|
||||
)
|
||||
|
||||
|
||||
class RelativePositionEmbedding(nn.Module):
|
||||
@ -140,7 +147,7 @@ class PositionwiseFF(nn.Module):
|
||||
qconfig = per_channel_dynamic_qconfig
|
||||
else:
|
||||
qconfig = default_dynamic_qconfig
|
||||
torch.ao.quantization.quantize_dynamic(
|
||||
quantization.quantize_dynamic(
|
||||
self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True
|
||||
)
|
||||
return self
|
||||
@ -728,7 +735,7 @@ class NoSegAugmentedMemoryMultiheadAttentionBmm(nn.Module):
|
||||
qconfig = per_channel_dynamic_qconfig
|
||||
else:
|
||||
qconfig = default_dynamic_qconfig
|
||||
torch.ao.quantization.quantize_dynamic(
|
||||
quantization.quantize_dynamic(
|
||||
self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True
|
||||
)
|
||||
return self
|
||||
@ -1771,7 +1778,7 @@ class NoSegAugmentedMemoryTransformerEncoderLayer(FairseqEncoder):
|
||||
qconfig = per_channel_dynamic_qconfig
|
||||
else:
|
||||
qconfig = default_dynamic_qconfig
|
||||
torch.ao.quantization.quantize_dynamic(
|
||||
quantization.quantize_dynamic(
|
||||
self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True
|
||||
)
|
||||
return self
|
||||
|
@ -8,7 +8,6 @@ import logging
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -54,7 +53,6 @@ try:
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
except ImportError:
|
||||
|
||||
def cross_entropy(logits, target, ignore_index=-100, reduction="mean"):
|
||||
|
@ -7,7 +7,6 @@ import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
try:
|
||||
from apex.normalization import FusedLayerNorm as _FusedLayerNorm
|
||||
|
||||
@ -22,7 +21,6 @@ try:
|
||||
with torch.cuda.device(x.device):
|
||||
return super().forward(x)
|
||||
|
||||
|
||||
except ImportError:
|
||||
has_fused_layernorm = False
|
||||
|
||||
|
@ -5,6 +5,11 @@
|
||||
|
||||
import torch
|
||||
|
||||
try:
|
||||
import torch.ao.quantization as quantization
|
||||
except ImportError:
|
||||
import torch.quantization as quantization
|
||||
|
||||
|
||||
def emulate_int(w, bits, method, scale=None, zero_point=None):
|
||||
q = globals()[f"emulate_int8_{method}"]
|
||||
@ -21,7 +26,7 @@ def quantize(w, scale, zero_point, bits=8):
|
||||
|
||||
def emulate_int8_histogram(w, scale=None, zero_point=None, bits=8):
|
||||
if scale is None:
|
||||
obs = torch.ao.quantization.observer.HistogramObserver()
|
||||
obs = quantization.observer.HistogramObserver()
|
||||
obs.to(device=w.device)
|
||||
_ = obs(w.float())
|
||||
scale, zero_point = obs.calculate_qparams()
|
||||
@ -32,7 +37,7 @@ def emulate_int8_histogram(w, scale=None, zero_point=None, bits=8):
|
||||
|
||||
def emulate_int8_channel(w, scale=None, zero_point=None, bits=8):
|
||||
if scale is None:
|
||||
obs = torch.ao.quantization.observer.PerChannelMinMaxObserver(
|
||||
obs = quantization.observer.PerChannelMinMaxObserver(
|
||||
ch_axis=-1, qscheme=torch.per_channel_symmetric
|
||||
)
|
||||
obs.to(device=w.device)
|
||||
@ -45,7 +50,7 @@ def emulate_int8_channel(w, scale=None, zero_point=None, bits=8):
|
||||
|
||||
def emulate_int8_tensor(w, scale=None, zero_point=None, bits=8):
|
||||
if scale is None:
|
||||
obs = torch.ao.quantization.observer.MinMaxObserver()
|
||||
obs = quantization.observer.MinMaxObserver()
|
||||
obs.to(device=w.device)
|
||||
_ = obs(w)
|
||||
scale, zero_point = obs.calculate_qparams()
|
||||
|
@ -27,8 +27,8 @@ def get_fused_adam_class():
|
||||
except ImportError:
|
||||
try:
|
||||
# fallback to the newer interface
|
||||
from apex.optimizers import FusedAdam as _FusedAdam # noqa
|
||||
from apex.multi_tensor_apply import multi_tensor_applier
|
||||
from apex.optimizers import FusedAdam as _FusedAdam # noqa
|
||||
|
||||
if multi_tensor_applier.available:
|
||||
return FusedAdamV2
|
||||
@ -252,8 +252,8 @@ class FusedAdamV1(torch.optim.Optimizer):
|
||||
|
||||
|
||||
try:
|
||||
from apex.optimizers import FusedAdam
|
||||
from apex.multi_tensor_apply import multi_tensor_applier
|
||||
from apex.optimizers import FusedAdam
|
||||
|
||||
class FusedAdamV2(FusedAdam):
|
||||
"""
|
||||
@ -382,6 +382,5 @@ try:
|
||||
|
||||
return loss
|
||||
|
||||
|
||||
except ImportError:
|
||||
pass
|
||||
|
@ -108,16 +108,18 @@ def main():
|
||||
help='Write the new checkpoint containing the averaged weights to this path.')
|
||||
num_group = parser.add_mutually_exclusive_group()
|
||||
num_group.add_argument('--num-epoch-checkpoints', type=int,
|
||||
help='if set, will try to find checkpoints with names checkpoint_xx.pt in the path specified by input, '
|
||||
'and average last this many of them.')
|
||||
help='if set, will try to find checkpoints with names checkpoint_xx.pt in the '
|
||||
'path specified by input, and average last this many of them.')
|
||||
num_group.add_argument('--num-update-checkpoints', type=int,
|
||||
help='if set, will try to find checkpoints with names checkpoint_ee_xx.pt in the path specified by input, '
|
||||
'and average last this many of them.')
|
||||
help='if set, will try to find checkpoints with names checkpoint_ee_xx.pt in the path specified by'
|
||||
' input, and average last this many of them.')
|
||||
parser.add_argument('--checkpoint-upper-bound', type=int,
|
||||
help='when using --num-epoch-checkpoints, this will set an upper bound on which epoch to use, '
|
||||
'when using --num-update-checkpoints, this will set an upper bound on which update to use'
|
||||
'e.g., with --num-epoch-checkpoints=10 --checkpoint-upper-bound=50, checkpoints 41-50 would be averaged.'
|
||||
'e.g., with --num-update-checkpoints=10 --checkpoint-upper-bound=50000, checkpoints 40500-50000 would be averaged assuming --save-interval-updates 500'
|
||||
'e.g., with --num-epoch-checkpoints=10 --checkpoint-upper-bound=50, checkpoints 41-50 would be'
|
||||
' averaged.'
|
||||
'e.g., with --num-update-checkpoints=10 --checkpoint-upper-bound=50000, checkpoints 40500-50000 would'
|
||||
' be averaged assuming --save-interval-updates 500'
|
||||
)
|
||||
# fmt: on
|
||||
args = parser.parse_args()
|
||||
|
@ -11,8 +11,6 @@ import argparse
|
||||
import random
|
||||
import sys
|
||||
|
||||
from sacrebleu import extract_ngrams
|
||||
|
||||
|
||||
def get_phrase(words, index, length):
|
||||
assert index < len(words) - length + 1
|
||||
|
@ -26,13 +26,13 @@ def main():
|
||||
|
||||
if args.input_format == "piece":
|
||||
|
||||
def decode(l):
|
||||
return "".join(sp.DecodePieces(l))
|
||||
def decode(input):
|
||||
return "".join(sp.DecodePieces(input))
|
||||
|
||||
elif args.input_format == "id":
|
||||
|
||||
def decode(l):
|
||||
return "".join(sp.DecodeIds(l))
|
||||
def decode(input):
|
||||
return "".join(sp.DecodeIds(input))
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
@ -49,13 +49,13 @@ def main():
|
||||
|
||||
if args.output_format == "piece":
|
||||
|
||||
def encode(l):
|
||||
return sp.EncodeAsPieces(l)
|
||||
def encode(input):
|
||||
return sp.EncodeAsPieces(input)
|
||||
|
||||
elif args.output_format == "id":
|
||||
|
||||
def encode(l):
|
||||
return list(map(str, sp.EncodeAsIds(l)))
|
||||
def encode(input):
|
||||
return list(map(str, sp.EncodeAsIds(input)))
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
@ -1,3 +1,4 @@
|
||||
[flake8]
|
||||
max-line-length = 127
|
||||
extend-ignore = E203, W503
|
||||
extend-exclude = fairseq/model_parallel/megatron
|
||||
|
@ -140,7 +140,6 @@ def setup_args():
|
||||
@unittest.skipIf(torch.cuda.device_count() < 2, "test requires 2 GPUs")
|
||||
class TestBMUF(unittest.TestCase):
|
||||
def bmuf_process(self, cfg, args, iterations):
|
||||
processes = []
|
||||
results = Manager().dict()
|
||||
torch.multiprocessing.spawn(
|
||||
fn=functools.partial(single_gpu_training, cfg, args),
|
||||
|
@ -399,6 +399,9 @@ def _quantize_language_model(data_dir, arch, extra_flags=None, run_validation=Fa
|
||||
train.main(quantize_args)
|
||||
|
||||
|
||||
@unittest.skipIf(
|
||||
int(torch.__version__[2]) < 10, reason="quantized kernels are only supported on CPU"
|
||||
)
|
||||
@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
|
||||
class TestQuantization(unittest.TestCase):
|
||||
def setUp(self):
|
||||
|
@ -3,11 +3,17 @@
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
from typing import List
|
||||
|
||||
import torch
|
||||
from fairseq.token_generation_constraints import *
|
||||
|
||||
from fairseq.token_generation_constraints import (
|
||||
ConstraintNode,
|
||||
OrderedConstraintState,
|
||||
UnorderedConstraintState,
|
||||
pack_constraints,
|
||||
)
|
||||
|
||||
|
||||
def tensorize(constraints: List[List[int]]) -> torch.Tensor:
|
||||
@ -53,7 +59,7 @@ class TestUnorderedConstraintState(unittest.TestCase):
|
||||
self.examples = [
|
||||
(
|
||||
tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
|
||||
"([None].False#6 ([1].True#4 ([2].False#1 [3].True#1) [3].True#1 [4].True#1) ([4].False#2 ([5].True#2 ([6].False#1 [7].True#1))))",
|
||||
"([None].False#6 ([1].True#4 ([2].False#1 [3].True#1) [3].True#1 [4].True#1) ([4].False#2 ([5].True#2 ([6].False#1 [7].True#1))))", # noqa
|
||||
{1: 4, 2: 1, 3: 2, 4: 3, 5: 2, 6: 1, 7: 1},
|
||||
),
|
||||
([], "[None].False#0", {}),
|
||||
|
@ -49,7 +49,7 @@ class TestFileIO(unittest.TestCase):
|
||||
def test_file_io_async(self):
|
||||
# ioPath `PathManager` is initialized after the first `opena` call.
|
||||
try:
|
||||
from fairseq.file_io import IOPathManager, PathManager
|
||||
from fairseq.file_io import PathManager
|
||||
|
||||
_asyncfile = os.path.join(self._tmpdir, "async.txt")
|
||||
f = PathManager.opena(_asyncfile, "wb")
|
||||
|
@ -3,7 +3,6 @@
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import argparse
|
||||
import copy
|
||||
import logging
|
||||
import unittest
|
||||
|
@ -79,7 +79,7 @@ class TestMultiCorpusSampledDataset(unittest.TestCase):
|
||||
|
||||
def test_multi_corpus_sampled_dataset_weighted_sample(self):
|
||||
def naive_weighted_sample(weights):
|
||||
def f(l):
|
||||
def f(input):
|
||||
v = np.random.random()
|
||||
agg = 0
|
||||
for i, weight in enumerate(weights):
|
||||
|
@ -3,12 +3,10 @@
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import contextlib
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from io import StringIO
|
||||
|
||||
import torch
|
||||
|
||||
|
@ -292,18 +292,18 @@ class RobertaTest(unittest.TestCase):
|
||||
# Decode with incremental state
|
||||
inc_state = {}
|
||||
ro_dec_inc = []
|
||||
for l in range(tgt_len):
|
||||
for i in range(tgt_len):
|
||||
ro, _ = model.decoder.forward(
|
||||
ro_tokens[:, : l + 1], encoder_out=en_enc, incremental_state=inc_state
|
||||
ro_tokens[:, : i + 1], encoder_out=en_enc, incremental_state=inc_state
|
||||
)
|
||||
self.assertEqual(ro.shape, (bs, 1, VOCAB_SIZE))
|
||||
ro_dec_inc.append(ro)
|
||||
|
||||
for l in range(tgt_len):
|
||||
for i in range(tgt_len):
|
||||
# Intra-batch
|
||||
self.assertTensorEqual(ro_dec_inc[l][0], ro_dec_inc[l][1])
|
||||
self.assertTensorEqual(ro_dec_inc[i][0], ro_dec_inc[i][1])
|
||||
# Incremental vs non-incremental
|
||||
self.assertTensorEqual(ro_dec_inc[l][:, 0], ro_dec[:, l])
|
||||
self.assertTensorEqual(ro_dec_inc[i][:, 0], ro_dec[:, i])
|
||||
|
||||
|
||||
def params(model, name):
|
||||
|
@ -320,7 +320,7 @@ class TestSequenceGenerator(TestSequenceGeneratorBase):
|
||||
sample = self.sample.copy()
|
||||
sample["net_input"]["fancy_other_input"] = sample["net_input"]["src_tokens"]
|
||||
hypos = generator.forward(self.sample)
|
||||
eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2
|
||||
eos, w1 = self.tgt_dict.eos(), self.w1
|
||||
# sentence 1, beam 1
|
||||
self.assertHypoTokens(hypos[0][0], [w1, eos])
|
||||
self.assertHypoScore(hypos[0][0], [0.9, 1.0])
|
||||
|
Loading…
Reference in New Issue
Block a user