fix flake8 issues (#2570)

Summary:
# Before submitting

- [ ] Was this discussed/approved via a Github issue? (no need for typos, doc improvements)
- [ ] Did you read the [contributor guideline](https://github.com/pytorch/fairseq/blob/main/CONTRIBUTING.md)?
- [ ] Did you make sure to update the docs?
- [ ] Did you write any new necessary tests?

## What does this PR do?
- [x] applies flake8 fixes to main branch (https://github.com/fairinternal/fairseq-py/issues/2546) - still more to be fixed

Fix GPU tests:
- [x] when torch.ao.quantization import doesn't work use torch.quantization
- [x] build apex from earlier commit in circleci so that its compatible with pytorch 1.8 and 1.9

## PR review
Anyone in the community is free to review the PR once the tests have passed.
If we didn't discuss your PR in Github issues there's a high chance it will not be merged.

## Did you have fun?
Make sure you had fun coding �

Pull Request resolved: https://github.com/fairinternal/fairseq-py/pull/2570

Reviewed By: Mortimerp9

Differential Revision: D32955312

Pulled By: dianaml0

fbshipit-source-id: e163cbd4998f171f819e31b0682c1c0f1986f9e1
This commit is contained in:
dianaml0 2021-12-09 02:33:35 -08:00 committed by Facebook GitHub Bot
parent c620ed066f
commit 88e7d2586b
22 changed files with 73 additions and 61 deletions

View File

@ -10,7 +10,7 @@ gpu: &gpu
machine:
image: ubuntu-1604-cuda-11.1:202012-01
resource_class: gpu.nvidia.medium.multi
# -------------------------------------------------------------------------------------
# Re-usable commands
@ -25,7 +25,7 @@ install_dep_common: &install_dep_common
pip install --upgrade setuptools
pip install bitarray boto3 deepspeed editdistance fastBPE iopath ipdb ipython pyarrow pytest sacremoses sentencepiece subword-nmt hydra-core==1.0.7 omegaconf==2.0.6
pip install --progress-bar off pytest
pip install --progress-bar off fairscale==0.4.1
pip install --progress-bar off fairscale
pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda111 -U
python -c 'import torch; print("Torch version:", torch.__version__)'
python -m torch.utils.collect_env
@ -38,6 +38,7 @@ install_dep_fused_ops: &install_dep_fused_ops
source activate fairseq
git clone https://github.com/NVIDIA/apex
cd apex
git checkout e2083df5eb96643c61613b9df48dd4eea6b07690
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--deprecated_fused_adam" --global-option="--xentropy" --global-option="--fast_multihead_attn" ./
cd ~/
git clone --depth=1 --branch v2.4 https://github.com/NVIDIA/Megatron-LM.git

View File

@ -17,7 +17,7 @@ repos:
- id: end-of-file-fixer
- repo: https://github.com/ambv/black
rev: 20.8b1
rev: 21.12b0
hooks:
- id: black
language_version: python3.8

View File

@ -8,7 +8,6 @@ import time
from collections import OrderedDict
from typing import Dict, Optional
try:
import torch
@ -18,7 +17,6 @@ try:
else:
return a
except ImportError:
torch = None

View File

@ -14,23 +14,30 @@ from typing import List, Optional, Tuple
import torch
import torch.nn as nn
from fairseq.models import (
FairseqEncoder,
)
from torch import Tensor
from torch import device as Device
from fairseq.models import FairseqEncoder
from fairseq.models.speech_to_text.utils import (
NoOp,
attention_suppression,
layer_norm_backward_hook,
lengths_to_padding_mask,
segments_to_sequence,
)
from fairseq.models.speech_to_text.utils import (
attention_suppression,
layer_norm_backward_hook,
)
from torch import Tensor, device as Device
from torch.ao.quantization.qconfig import (
default_dynamic_qconfig,
per_channel_dynamic_qconfig,
)
try:
import torch.ao.quantization as quantization
from torch.ao.quantization.qconfig import (
default_dynamic_qconfig,
per_channel_dynamic_qconfig,
)
except ImportError:
import torch.quantization as quantization
from torch.quantization.qconfig import (
default_dynamic_qconfig,
per_channel_dynamic_qconfig,
)
class RelativePositionEmbedding(nn.Module):
@ -140,7 +147,7 @@ class PositionwiseFF(nn.Module):
qconfig = per_channel_dynamic_qconfig
else:
qconfig = default_dynamic_qconfig
torch.ao.quantization.quantize_dynamic(
quantization.quantize_dynamic(
self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True
)
return self
@ -728,7 +735,7 @@ class NoSegAugmentedMemoryMultiheadAttentionBmm(nn.Module):
qconfig = per_channel_dynamic_qconfig
else:
qconfig = default_dynamic_qconfig
torch.ao.quantization.quantize_dynamic(
quantization.quantize_dynamic(
self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True
)
return self
@ -1771,7 +1778,7 @@ class NoSegAugmentedMemoryTransformerEncoderLayer(FairseqEncoder):
qconfig = per_channel_dynamic_qconfig
else:
qconfig = default_dynamic_qconfig
torch.ao.quantization.quantize_dynamic(
quantization.quantize_dynamic(
self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True
)
return self

View File

@ -8,7 +8,6 @@ import logging
import torch
import torch.nn.functional as F
logger = logging.getLogger(__name__)
@ -54,7 +53,6 @@ try:
else:
raise NotImplementedError
except ImportError:
def cross_entropy(logits, target, ignore_index=-100, reduction="mean"):

View File

@ -7,7 +7,6 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
try:
from apex.normalization import FusedLayerNorm as _FusedLayerNorm
@ -22,7 +21,6 @@ try:
with torch.cuda.device(x.device):
return super().forward(x)
except ImportError:
has_fused_layernorm = False

View File

@ -5,6 +5,11 @@
import torch
try:
import torch.ao.quantization as quantization
except ImportError:
import torch.quantization as quantization
def emulate_int(w, bits, method, scale=None, zero_point=None):
q = globals()[f"emulate_int8_{method}"]
@ -21,7 +26,7 @@ def quantize(w, scale, zero_point, bits=8):
def emulate_int8_histogram(w, scale=None, zero_point=None, bits=8):
if scale is None:
obs = torch.ao.quantization.observer.HistogramObserver()
obs = quantization.observer.HistogramObserver()
obs.to(device=w.device)
_ = obs(w.float())
scale, zero_point = obs.calculate_qparams()
@ -32,7 +37,7 @@ def emulate_int8_histogram(w, scale=None, zero_point=None, bits=8):
def emulate_int8_channel(w, scale=None, zero_point=None, bits=8):
if scale is None:
obs = torch.ao.quantization.observer.PerChannelMinMaxObserver(
obs = quantization.observer.PerChannelMinMaxObserver(
ch_axis=-1, qscheme=torch.per_channel_symmetric
)
obs.to(device=w.device)
@ -45,7 +50,7 @@ def emulate_int8_channel(w, scale=None, zero_point=None, bits=8):
def emulate_int8_tensor(w, scale=None, zero_point=None, bits=8):
if scale is None:
obs = torch.ao.quantization.observer.MinMaxObserver()
obs = quantization.observer.MinMaxObserver()
obs.to(device=w.device)
_ = obs(w)
scale, zero_point = obs.calculate_qparams()

View File

@ -27,8 +27,8 @@ def get_fused_adam_class():
except ImportError:
try:
# fallback to the newer interface
from apex.optimizers import FusedAdam as _FusedAdam # noqa
from apex.multi_tensor_apply import multi_tensor_applier
from apex.optimizers import FusedAdam as _FusedAdam # noqa
if multi_tensor_applier.available:
return FusedAdamV2
@ -252,8 +252,8 @@ class FusedAdamV1(torch.optim.Optimizer):
try:
from apex.optimizers import FusedAdam
from apex.multi_tensor_apply import multi_tensor_applier
from apex.optimizers import FusedAdam
class FusedAdamV2(FusedAdam):
"""
@ -382,6 +382,5 @@ try:
return loss
except ImportError:
pass

View File

@ -108,16 +108,18 @@ def main():
help='Write the new checkpoint containing the averaged weights to this path.')
num_group = parser.add_mutually_exclusive_group()
num_group.add_argument('--num-epoch-checkpoints', type=int,
help='if set, will try to find checkpoints with names checkpoint_xx.pt in the path specified by input, '
'and average last this many of them.')
help='if set, will try to find checkpoints with names checkpoint_xx.pt in the '
'path specified by input, and average last this many of them.')
num_group.add_argument('--num-update-checkpoints', type=int,
help='if set, will try to find checkpoints with names checkpoint_ee_xx.pt in the path specified by input, '
'and average last this many of them.')
help='if set, will try to find checkpoints with names checkpoint_ee_xx.pt in the path specified by'
' input, and average last this many of them.')
parser.add_argument('--checkpoint-upper-bound', type=int,
help='when using --num-epoch-checkpoints, this will set an upper bound on which epoch to use, '
'when using --num-update-checkpoints, this will set an upper bound on which update to use'
'e.g., with --num-epoch-checkpoints=10 --checkpoint-upper-bound=50, checkpoints 41-50 would be averaged.'
'e.g., with --num-update-checkpoints=10 --checkpoint-upper-bound=50000, checkpoints 40500-50000 would be averaged assuming --save-interval-updates 500'
'e.g., with --num-epoch-checkpoints=10 --checkpoint-upper-bound=50, checkpoints 41-50 would be'
' averaged.'
'e.g., with --num-update-checkpoints=10 --checkpoint-upper-bound=50000, checkpoints 40500-50000 would'
' be averaged assuming --save-interval-updates 500'
)
# fmt: on
args = parser.parse_args()

View File

@ -11,8 +11,6 @@ import argparse
import random
import sys
from sacrebleu import extract_ngrams
def get_phrase(words, index, length):
assert index < len(words) - length + 1

View File

@ -26,13 +26,13 @@ def main():
if args.input_format == "piece":
def decode(l):
return "".join(sp.DecodePieces(l))
def decode(input):
return "".join(sp.DecodePieces(input))
elif args.input_format == "id":
def decode(l):
return "".join(sp.DecodeIds(l))
def decode(input):
return "".join(sp.DecodeIds(input))
else:
raise NotImplementedError

View File

@ -49,13 +49,13 @@ def main():
if args.output_format == "piece":
def encode(l):
return sp.EncodeAsPieces(l)
def encode(input):
return sp.EncodeAsPieces(input)
elif args.output_format == "id":
def encode(l):
return list(map(str, sp.EncodeAsIds(l)))
def encode(input):
return list(map(str, sp.EncodeAsIds(input)))
else:
raise NotImplementedError

View File

@ -1,3 +1,4 @@
[flake8]
max-line-length = 127
extend-ignore = E203, W503
extend-exclude = fairseq/model_parallel/megatron

View File

@ -140,7 +140,6 @@ def setup_args():
@unittest.skipIf(torch.cuda.device_count() < 2, "test requires 2 GPUs")
class TestBMUF(unittest.TestCase):
def bmuf_process(self, cfg, args, iterations):
processes = []
results = Manager().dict()
torch.multiprocessing.spawn(
fn=functools.partial(single_gpu_training, cfg, args),

View File

@ -399,6 +399,9 @@ def _quantize_language_model(data_dir, arch, extra_flags=None, run_validation=Fa
train.main(quantize_args)
@unittest.skipIf(
int(torch.__version__[2]) < 10, reason="quantized kernels are only supported on CPU"
)
@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
class TestQuantization(unittest.TestCase):
def setUp(self):

View File

@ -3,11 +3,17 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import sys
import unittest
from typing import List
import torch
from fairseq.token_generation_constraints import *
from fairseq.token_generation_constraints import (
ConstraintNode,
OrderedConstraintState,
UnorderedConstraintState,
pack_constraints,
)
def tensorize(constraints: List[List[int]]) -> torch.Tensor:
@ -53,7 +59,7 @@ class TestUnorderedConstraintState(unittest.TestCase):
self.examples = [
(
tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
"([None].False#6 ([1].True#4 ([2].False#1 [3].True#1) [3].True#1 [4].True#1) ([4].False#2 ([5].True#2 ([6].False#1 [7].True#1))))",
"([None].False#6 ([1].True#4 ([2].False#1 [3].True#1) [3].True#1 [4].True#1) ([4].False#2 ([5].True#2 ([6].False#1 [7].True#1))))", # noqa
{1: 4, 2: 1, 3: 2, 4: 3, 5: 2, 6: 1, 7: 1},
),
([], "[None].False#0", {}),

View File

@ -49,7 +49,7 @@ class TestFileIO(unittest.TestCase):
def test_file_io_async(self):
# ioPath `PathManager` is initialized after the first `opena` call.
try:
from fairseq.file_io import IOPathManager, PathManager
from fairseq.file_io import PathManager
_asyncfile = os.path.join(self._tmpdir, "async.txt")
f = PathManager.opena(_asyncfile, "wb")

View File

@ -3,7 +3,6 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import argparse
import copy
import logging
import unittest

View File

@ -79,7 +79,7 @@ class TestMultiCorpusSampledDataset(unittest.TestCase):
def test_multi_corpus_sampled_dataset_weighted_sample(self):
def naive_weighted_sample(weights):
def f(l):
def f(input):
v = np.random.random()
agg = 0
for i, weight in enumerate(weights):

View File

@ -3,12 +3,10 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import contextlib
import json
import os
import tempfile
import unittest
from io import StringIO
import torch

View File

@ -292,18 +292,18 @@ class RobertaTest(unittest.TestCase):
# Decode with incremental state
inc_state = {}
ro_dec_inc = []
for l in range(tgt_len):
for i in range(tgt_len):
ro, _ = model.decoder.forward(
ro_tokens[:, : l + 1], encoder_out=en_enc, incremental_state=inc_state
ro_tokens[:, : i + 1], encoder_out=en_enc, incremental_state=inc_state
)
self.assertEqual(ro.shape, (bs, 1, VOCAB_SIZE))
ro_dec_inc.append(ro)
for l in range(tgt_len):
for i in range(tgt_len):
# Intra-batch
self.assertTensorEqual(ro_dec_inc[l][0], ro_dec_inc[l][1])
self.assertTensorEqual(ro_dec_inc[i][0], ro_dec_inc[i][1])
# Incremental vs non-incremental
self.assertTensorEqual(ro_dec_inc[l][:, 0], ro_dec[:, l])
self.assertTensorEqual(ro_dec_inc[i][:, 0], ro_dec[:, i])
def params(model, name):

View File

@ -320,7 +320,7 @@ class TestSequenceGenerator(TestSequenceGeneratorBase):
sample = self.sample.copy()
sample["net_input"]["fancy_other_input"] = sample["net_input"]["src_tokens"]
hypos = generator.forward(self.sample)
eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2
eos, w1 = self.tgt_dict.eos(), self.w1
# sentence 1, beam 1
self.assertHypoTokens(hypos[0][0], [w1, eos])
self.assertHypoScore(hypos[0][0], [0.9, 1.0])