From 6ce55e4b011275e43404034832b40648b1483ff6 Mon Sep 17 00:00:00 2001
From: Myle Ott <myleott@fb.com>
Date: Mon, 19 Aug 2019 15:04:41 -0700
Subject: [PATCH] Small fixes

Summary: Pull Request resolved: https://github.com/fairinternal/fairseq-py/pull/835

Differential Revision: D16904038

Pulled By: myleott

fbshipit-source-id: 2c9d0b913f8d688297ac80fcabd905bd1397f66a
---
 eval_lm.py                                    |  6 ++--
 examples/__init__.py                          | 12 ++++----
 examples/noisychannel/__init__.py             | 10 +++----
 examples/noisychannel/rerank.py               |  8 ++++--
 examples/noisychannel/rerank_generate.py      | 28 +++++++++----------
 examples/noisychannel/rerank_options.py       |  8 ++----
 examples/noisychannel/rerank_tune.py          | 14 ++++++----
 examples/noisychannel/rerank_utils.py         |  4 +--
 examples/roberta/wsc/wsc_task.py              |  4 +--
 .../models/vggtransformer.py                  |  4 +--
 fairseq/models/lightconv.py                   |  2 +-
 fairseq/modules/__init__.py                   |  4 ---
 fairseq/modules/cuda_utils.cu                 |  7 +++--
 fairseq/modules/dynamic_convolution.py        |  4 ++-
 fairseq/modules/dynamicconv_layer/__init__.py | 10 +++----
 .../dynamicconv_layer/cuda_function_gen.py    | 18 ++++++------
 .../dynamicconv_layer/dynamicconv_cuda.cpp    |  7 +++++
 .../dynamicconv_layer/dynamicconv_cuda.cuh    |  8 ++++--
 .../dynamicconv_cuda_kernel.cu                |  7 +++--
 .../dynamicconv_layer/dynamicconv_layer.py    | 11 ++++++--
 fairseq/modules/dynamicconv_layer/setup.py    |  6 ++++
 fairseq/modules/lightconv_layer/__init__.py   | 10 +++----
 .../lightconv_layer/cuda_function_gen.py      | 18 ++++++------
 .../lightconv_layer/lightconv_cuda.cpp        |  7 +++++
 .../lightconv_layer/lightconv_cuda.cuh        |  7 +++--
 .../lightconv_layer/lightconv_cuda_kernel.cu  |  7 +++--
 .../lightconv_layer/lightconv_layer.py        | 10 +++++--
 fairseq/modules/lightconv_layer/setup.py      |  6 ++++
 fairseq/modules/lightweight_convolution.py    |  2 ++
 fairseq/modules/unfold.py                     |  1 +
 scripts/average_checkpoints.py                |  4 +--
 scripts/compare_namespaces.py                 |  2 +-
 scripts/count_docs.py                         |  1 -
 scripts/shard_docs.py                         |  2 --
 scripts/split_train_valid_docs.py             |  8 ++++--
 scripts/spm_encode.py                         |  2 +-
 scripts/wav2vec_featurize.py                  | 23 +++++++++------
 tests/speech_recognition/test_collaters.py    |  8 ++----
 .../speech_recognition/test_cross_entropy.py  |  8 ++----
 tests/test_average_checkpoints.py             |  1 -
 validate.py                                   |  9 +++---
 41 files changed, 180 insertions(+), 138 deletions(-)

diff --git a/eval_lm.py b/eval_lm.py
index febed5ac8..f7add27eb 100644
--- a/eval_lm.py
+++ b/eval_lm.py
@@ -200,8 +200,10 @@ def main(parsed_args):
                             is_bpe = False
                             w = ''
                     if args.output_word_probs:
-                        print(str(int(sample_id)) + " " +
-                                  ('\t'.join('{} [{:2f}]'.format(x[0], x[1]) for x in word_prob)))
+                        print(
+                            str(int(sample_id)) + " "
+                            + ('\t'.join('{} [{:2f}]'.format(x[0], x[1]) for x in word_prob))
+                        )
 
             wps_meter.update(sample['ntokens'])
             t.log({'wps': round(wps_meter.avg)})
diff --git a/examples/__init__.py b/examples/__init__.py
index 906098c1e..35b0568cb 100644
--- a/examples/__init__.py
+++ b/examples/__init__.py
@@ -1,10 +1,8 @@
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
 
-__version__ = '0.7.2'
+__version__ = '0.8.0'
 
-import examples.noisychannel # noqa
+import examples.noisychannel  # noqa
diff --git a/examples/noisychannel/__init__.py b/examples/noisychannel/__init__.py
index b10ddbd81..89f1aef4f 100644
--- a/examples/noisychannel/__init__.py
+++ b/examples/noisychannel/__init__.py
@@ -1,8 +1,6 @@
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
 
-from .rerank_options import *
+from .rerank_options import *  # noqa
diff --git a/examples/noisychannel/rerank.py b/examples/noisychannel/rerank.py
index c17d64b4a..46f31b25c 100644
--- a/examples/noisychannel/rerank.py
+++ b/examples/noisychannel/rerank.py
@@ -77,9 +77,11 @@ def score_target_hypo(args, a, b, c, lenpen, target_outfile, hypo_outfile, write
 
         for key in range(len(gen_keys)):
             if args.prefix_len is None:
-                assert hypo_lst[key] in gen_output.no_bpe_hypo[gen_keys[key]], \
-                    ("pred and rescore hypo mismatch: i: " + str(key) + ", " + str(hypo_lst[key]) + str(gen_keys[key]) +
-                    str(gen_output.no_bpe_hypo[key]))
+                assert hypo_lst[key] in gen_output.no_bpe_hypo[gen_keys[key]], (
+                    "pred and rescore hypo mismatch: i: " + str(key) + ", "
+                    + str(hypo_lst[key]) + str(gen_keys[key])
+                    + str(gen_output.no_bpe_hypo[key])
+                )
                 sys_tok = dict.encode_line(hypo_lst[key])
                 ref_tok = dict.encode_line(gen_output.no_bpe_target[gen_keys[key]])
                 scorer.add(ref_tok, sys_tok)
diff --git a/examples/noisychannel/rerank_generate.py b/examples/noisychannel/rerank_generate.py
index 27dcdb599..3d692b6cc 100644
--- a/examples/noisychannel/rerank_generate.py
+++ b/examples/noisychannel/rerank_generate.py
@@ -1,24 +1,24 @@
 #!/usr/bin/env python3 -u
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
-
-import rerank_utils
-import os
-import subprocess
-from examples.noisychannel import rerank_options
-from fairseq import options
-import generate
-import preprocess
-from contextlib import redirect_stdout
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
 
 """
 Generate n-best translations using a trained model.
 """
 
+from contextlib import redirect_stdout
+import os
+import subprocess
+
+import rerank_utils
+from examples.noisychannel import rerank_options
+from fairseq import options
+import generate
+import preprocess
+
+
 def gen_and_reprocess_nbest(args):
     if args.score_dict_dir is None:
         args.score_dict_dir = args.data
diff --git a/examples/noisychannel/rerank_options.py b/examples/noisychannel/rerank_options.py
index 1f8c748b9..41a80d88d 100644
--- a/examples/noisychannel/rerank_options.py
+++ b/examples/noisychannel/rerank_options.py
@@ -1,9 +1,7 @@
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
 
 from fairseq import options
 
diff --git a/examples/noisychannel/rerank_tune.py b/examples/noisychannel/rerank_tune.py
index 805d87579..437f056cb 100644
--- a/examples/noisychannel/rerank_tune.py
+++ b/examples/noisychannel/rerank_tune.py
@@ -27,12 +27,14 @@ def random_search(args):
     param_values += initial_params
     random.seed(args.seed)
 
-    random_params = np.array([[random.uniform(args.lower_bound[i], args.upper_bound[i])
-                               for i in range(len(args.tune_param))]
-                               for k in range(args.num_trials)])
-    set_params = np.array([[initial_params[i][0]
-                            for i in range(len(tuneable_parameters))]
-                            for k in range(args.num_trials)])
+    random_params = np.array([
+        [random.uniform(args.lower_bound[i], args.upper_bound[i]) for i in range(len(args.tune_param))]
+        for k in range(args.num_trials)
+    ])
+    set_params = np.array([
+        [initial_params[i][0] for i in range(len(tuneable_parameters))]
+        for k in range(args.num_trials)
+    ])
     random_params = np.concatenate((random_params, set_params), 1)
 
     rerank_args = vars(args).copy()
diff --git a/examples/noisychannel/rerank_utils.py b/examples/noisychannel/rerank_utils.py
index 9b8bb7bec..c64b18216 100644
--- a/examples/noisychannel/rerank_utils.py
+++ b/examples/noisychannel/rerank_utils.py
@@ -128,8 +128,8 @@ def write_reprocessed(sources, hypos, targets, source_outfile,
         "in writing reprocessed, only one type of prefix may be used"
 
     with open(source_outfile, 'w') as source_file, \
-         open(hypo_outfile, 'w') as hypo_file, \
-         open(target_outfile, 'w') as target_file:
+            open(hypo_outfile, 'w') as hypo_file, \
+            open(target_outfile, 'w') as target_file:
 
         assert len(sources) == len(hypos), "sources and hypos list length mismatch"
         if right_to_left:
diff --git a/examples/roberta/wsc/wsc_task.py b/examples/roberta/wsc/wsc_task.py
index 2af2b338c..312361fae 100644
--- a/examples/roberta/wsc/wsc_task.py
+++ b/examples/roberta/wsc/wsc_task.py
@@ -270,6 +270,7 @@ class WinograndeTask(WSCTask):
     Task for WinoGrande dataset. Efficient implementation for Winograd schema
     tasks with exactly two candidates, one of which is correct.
     """
+
     @classmethod
     def setup_task(cls, args, **kwargs):
         assert args.criterion == 'winogrande', 'Must set --criterion=winogrande'
@@ -280,7 +281,6 @@ class WinograndeTask(WSCTask):
 
         return cls(args, vocab)
 
-
     def load_dataset(self, split, epoch=0, combine=False, data_path=None, return_only=False, **kwargs):
         """Load a given dataset split.
 
@@ -299,7 +299,7 @@ class WinograndeTask(WSCTask):
         candidate_masks = []
         candidate_lengths = []
 
-        itr = wsc_utils.winogrande_jsonl_iterator(data_path, eval=split=='test')
+        itr = wsc_utils.winogrande_jsonl_iterator(data_path, eval=(split == 'test'))
 
         for sample in itr:
             sentence, pronoun_span, query, cand_text = sample
diff --git a/examples/speech_recognition/models/vggtransformer.py b/examples/speech_recognition/models/vggtransformer.py
index 7b208a3b9..3a078ec6f 100644
--- a/examples/speech_recognition/models/vggtransformer.py
+++ b/examples/speech_recognition/models/vggtransformer.py
@@ -13,7 +13,7 @@ from fairseq import utils
 from fairseq.models import (
     FairseqEncoder,
     FairseqIncrementalDecoder,
-    FairseqModel,
+    FairseqEncoderDecoderModel,
     register_model,
     register_model_architecture,
 )
@@ -23,7 +23,7 @@ from fairseq.modules import TransformerDecoderLayer, TransformerEncoderLayer, VG
 
 
 @register_model("asr_vggtransformer")
-class VGGTransformerModel(FairseqModel):
+class VGGTransformerModel(FairseqEncoderDecoderModel):
     """
     Transformers with convolutional context for ASR
     https://arxiv.org/abs/1904.11660
diff --git a/fairseq/models/lightconv.py b/fairseq/models/lightconv.py
index 44d52dcd8..087eee8a2 100644
--- a/fairseq/models/lightconv.py
+++ b/fairseq/models/lightconv.py
@@ -4,7 +4,6 @@
 # LICENSE file in the root directory of this source tree.
 
 import math
-import sys
 
 import torch
 import torch.nn as nn
@@ -174,6 +173,7 @@ class LightConvModel(FairseqEncoderDecoderModel):
         decoder = LightConvDecoder(args, tgt_dict, decoder_embed_tokens)
         return LightConvModel(encoder, decoder)
 
+
 class LightConvEncoder(FairseqEncoder):
     """
     LightConv encoder consisting of *args.encoder_layers* layers. Each layer
diff --git a/fairseq/modules/__init__.py b/fairseq/modules/__init__.py
index ecfdc3d69..7b38fdae3 100644
--- a/fairseq/modules/__init__.py
+++ b/fairseq/modules/__init__.py
@@ -10,14 +10,12 @@ from .character_token_embedder import CharacterTokenEmbedder
 from .conv_tbc import ConvTBC
 from .downsampled_multihead_attention import DownsampledMultiHeadAttention
 from .dynamic_convolution import DynamicConv, DynamicConv1dTBC
-#from .dynamicconv_layer import DynamicconvLayer
 from .gelu import gelu, gelu_accurate
 from .grad_multiply import GradMultiply
 from .highway import Highway
 from .layer_norm import LayerNorm
 from .learned_positional_embedding import LearnedPositionalEmbedding
 from .lightweight_convolution import LightweightConv, LightweightConv1dTBC
-#from .lightconv_layer import LightconvLayer
 from .linearized_convolution import LinearizedConvolution
 from .logsumexp_moe import LogSumExpMoE
 from .mean_pool_gating_network import MeanPoolGatingNetwork
@@ -38,7 +36,6 @@ __all__ = [
     'CharacterTokenEmbedder',
     'ConvTBC',
     'DownsampledMultiHeadAttention',
-#    'DyamicconvLayer',
     'DynamicConv1dTBC',
     'DynamicConv',
     'gelu',
@@ -47,7 +44,6 @@ __all__ = [
     'Highway',
     'LayerNorm',
     'LearnedPositionalEmbedding',
-#    'LightconvLayer',
     'LightweightConv1dTBC',
     'LightweightConv',
     'LinearizedConvolution',
diff --git a/fairseq/modules/cuda_utils.cu b/fairseq/modules/cuda_utils.cu
index 596ff125f..516f1d924 100644
--- a/fairseq/modules/cuda_utils.cu
+++ b/fairseq/modules/cuda_utils.cu
@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
- *
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * 
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
  */
 
 
diff --git a/fairseq/modules/dynamic_convolution.py b/fairseq/modules/dynamic_convolution.py
index 7fbd3f37e..19ffb0bf5 100644
--- a/fairseq/modules/dynamic_convolution.py
+++ b/fairseq/modules/dynamic_convolution.py
@@ -10,6 +10,7 @@ import torch.nn.functional as F
 from fairseq import utils
 from .unfold import unfold1d
 
+
 def DynamicConv(input_size, kernel_size=1, padding_l=None, num_heads=1,
                 weight_dropout=0., weight_softmax=False,
                 renorm_padding=False, bias=False, conv_bias=False,
@@ -28,6 +29,7 @@ def DynamicConv(input_size, kernel_size=1, padding_l=None, num_heads=1,
                             weight_dropout=weight_dropout,
                             weight_softmax=weight_softmax, bias=bias)
 
+
 def Linear(in_features, out_features, bias=True):
     m = nn.Linear(in_features, out_features, bias)
     nn.init.xavier_uniform_(m.weight)
@@ -209,7 +211,7 @@ class DynamicConv1dTBC(nn.Module):
             # turn the convolution filters into band matrices
             weight_expanded = weight.new_zeros(B*H, T, T+K-1, requires_grad=False)
             weight_expanded.as_strided((B*H, T, K), (T*(T+K-1), T+K, 1)).copy_(weight)
-            weight_expanded = weight_expanded.narrow(2, P, T) # B*H x T x T
+            weight_expanded = weight_expanded.narrow(2, P, T)  # B*H x T x T
         output = torch.bmm(weight_expanded, x)
         output = output.transpose(0, 1).contiguous().view(T, B, C)
         return output
diff --git a/fairseq/modules/dynamicconv_layer/__init__.py b/fairseq/modules/dynamicconv_layer/__init__.py
index c62ffac86..22dc6f403 100644
--- a/fairseq/modules/dynamicconv_layer/__init__.py
+++ b/fairseq/modules/dynamicconv_layer/__init__.py
@@ -1,8 +1,6 @@
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
 
-from .dynamicconv_layer import DynamicconvLayer
+from .dynamicconv_layer import DynamicconvLayer  # noqa
diff --git a/fairseq/modules/dynamicconv_layer/cuda_function_gen.py b/fairseq/modules/dynamicconv_layer/cuda_function_gen.py
index caf151e4a..926d6ca84 100644
--- a/fairseq/modules/dynamicconv_layer/cuda_function_gen.py
+++ b/fairseq/modules/dynamicconv_layer/cuda_function_gen.py
@@ -1,9 +1,7 @@
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
 
 
 def gen_forward():
@@ -13,9 +11,10 @@ def gen_forward():
 
     head = """
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
+ * Copyright (c) Facebook, Inc. and its affiliates.
  *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
  */
 
 #include "dynamicconv_cuda.cuh"
@@ -103,9 +102,10 @@ def gen_backward():
 
     head = """
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
+ * Copyright (c) Facebook, Inc. and its affiliates.
  *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
  */
 
 #include "dynamicconv_cuda.cuh"
diff --git a/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cpp b/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cpp
index b76c9e7fe..ebd4df0e9 100644
--- a/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cpp
+++ b/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cpp
@@ -1,3 +1,10 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
 #include <torch/extension.h>
 #include <vector>
 
diff --git a/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cuh b/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cuh
index 5d6ed575f..219625943 100644
--- a/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cuh
+++ b/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cuh
@@ -1,8 +1,10 @@
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
- *
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * 
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
  */
+
 #include <ATen/ATen.h>
 #include <c10/cuda/CUDAStream.h>
 
diff --git a/fairseq/modules/dynamicconv_layer/dynamicconv_cuda_kernel.cu b/fairseq/modules/dynamicconv_layer/dynamicconv_cuda_kernel.cu
index f29e6ded0..300d35b64 100644
--- a/fairseq/modules/dynamicconv_layer/dynamicconv_cuda_kernel.cu
+++ b/fairseq/modules/dynamicconv_layer/dynamicconv_cuda_kernel.cu
@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
- *
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * 
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
  */
 
 #include "dynamicconv_cuda.cuh"
diff --git a/fairseq/modules/dynamicconv_layer/dynamicconv_layer.py b/fairseq/modules/dynamicconv_layer/dynamicconv_layer.py
index d50e13c0d..3e51f09fa 100644
--- a/fairseq/modules/dynamicconv_layer/dynamicconv_layer.py
+++ b/fairseq/modules/dynamicconv_layer/dynamicconv_layer.py
@@ -1,9 +1,16 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
 import torch
 from torch import nn
 from torch.autograd import Function
 import torch.nn.functional as F
+
 import dynamicconv_cuda
 from fairseq import utils
+from fairseq.modules.unfold import unfold1d
 
 
 class dynamicconvFunction(Function):
@@ -68,7 +75,7 @@ class DynamicconvLayer(nn.Module):
 
         T, B, C = x.size()
         K, H = self.kernel_size, self.num_heads
-        R = C // H
+        # R = C // H
 
         # during inference time, incremental BMM is faster
         if incremental_state is not None:
@@ -199,7 +206,7 @@ class DynamicconvLayer(nn.Module):
             # turn the convolution filters into band matrices
             weight_expanded = weight.new_zeros(B*H, T, T+K-1, requires_grad=False)
             weight_expanded.as_strided((B*H, T, K), (T*(T+K-1), T+K, 1)).copy_(weight)
-            weight_expanded = weight_expanded.narrow(2, P, T) # B*H x T x T
+            weight_expanded = weight_expanded.narrow(2, P, T)  # B*H x T x T
         output = torch.bmm(weight_expanded, x)
         output = output.transpose(0, 1).contiguous().view(T, B, C)
         return output
diff --git a/fairseq/modules/dynamicconv_layer/setup.py b/fairseq/modules/dynamicconv_layer/setup.py
index 00ce29bc7..4d789c328 100644
--- a/fairseq/modules/dynamicconv_layer/setup.py
+++ b/fairseq/modules/dynamicconv_layer/setup.py
@@ -1,3 +1,9 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
 from setuptools import setup
 from torch.utils.cpp_extension import CUDAExtension, BuildExtension
 
diff --git a/fairseq/modules/lightconv_layer/__init__.py b/fairseq/modules/lightconv_layer/__init__.py
index 95fe76c7c..3b2a99c12 100644
--- a/fairseq/modules/lightconv_layer/__init__.py
+++ b/fairseq/modules/lightconv_layer/__init__.py
@@ -1,8 +1,6 @@
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
 
-from .lightconv_layer import LightconvLayer
+from .lightconv_layer import LightconvLayer  # noqa
diff --git a/fairseq/modules/lightconv_layer/cuda_function_gen.py b/fairseq/modules/lightconv_layer/cuda_function_gen.py
index 1bb3a1a0d..afec9e19e 100644
--- a/fairseq/modules/lightconv_layer/cuda_function_gen.py
+++ b/fairseq/modules/lightconv_layer/cuda_function_gen.py
@@ -1,9 +1,7 @@
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
 
 
 def gen_forward():
@@ -13,9 +11,10 @@ def gen_forward():
 
     head = """
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
+ * Copyright (c) Facebook, Inc. and its affiliates.
  *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
  */
 
 #include "lightconv_cuda.cuh"
@@ -118,9 +117,10 @@ def gen_backward():
 
     head = """
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
+ * Copyright (c) Facebook, Inc. and its affiliates.
  *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
  */
 
 #include "lightconv_cuda.cuh"
diff --git a/fairseq/modules/lightconv_layer/lightconv_cuda.cpp b/fairseq/modules/lightconv_layer/lightconv_cuda.cpp
index 3dc1765bf..4bf6b5ad3 100644
--- a/fairseq/modules/lightconv_layer/lightconv_cuda.cpp
+++ b/fairseq/modules/lightconv_layer/lightconv_cuda.cpp
@@ -1,3 +1,10 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
 #include <torch/extension.h>
 #include <vector>
 
diff --git a/fairseq/modules/lightconv_layer/lightconv_cuda.cuh b/fairseq/modules/lightconv_layer/lightconv_cuda.cuh
index f4c5fec43..3cae57b68 100644
--- a/fairseq/modules/lightconv_layer/lightconv_cuda.cuh
+++ b/fairseq/modules/lightconv_layer/lightconv_cuda.cuh
@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
- *
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * 
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
  */
 
 #include <ATen/ATen.h>
diff --git a/fairseq/modules/lightconv_layer/lightconv_cuda_kernel.cu b/fairseq/modules/lightconv_layer/lightconv_cuda_kernel.cu
index 8e17e27af..8ee83a56c 100644
--- a/fairseq/modules/lightconv_layer/lightconv_cuda_kernel.cu
+++ b/fairseq/modules/lightconv_layer/lightconv_cuda_kernel.cu
@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
- *
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * 
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
  */
 
 #include "lightconv_cuda.cuh"
diff --git a/fairseq/modules/lightconv_layer/lightconv_layer.py b/fairseq/modules/lightconv_layer/lightconv_layer.py
index 872812827..3daff29d0 100644
--- a/fairseq/modules/lightconv_layer/lightconv_layer.py
+++ b/fairseq/modules/lightconv_layer/lightconv_layer.py
@@ -1,12 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
 import torch
 from torch import nn
 from torch.autograd import Function
 import torch.nn.functional as F
-import time
 
 import lightconv_cuda
 from fairseq import utils
 
+
 class lightconvFunction(Function):
 
     @staticmethod
@@ -26,6 +31,7 @@ class lightconvFunction(Function):
         grad_input, grad_weights = outputs
         return grad_input, grad_weights, None
 
+
 class LightconvLayer(nn.Module):
     def __init__(
             self,
@@ -82,7 +88,7 @@ class LightconvLayer(nn.Module):
             weight = weight.view(1, H, K).expand(T*B, H, K).contiguous().view(T*B*H, K, 1)
 
             weight = F.dropout(weight, self.weight_dropout, training=self.training)
-            output = torch.bmm(x_unfold, weight) # T*B*H x R x 1
+            output = torch.bmm(x_unfold, weight)  # T*B*H x R x 1
             output = output.view(T, B, C)
             return output
 
diff --git a/fairseq/modules/lightconv_layer/setup.py b/fairseq/modules/lightconv_layer/setup.py
index c2a928ed8..0eac1df03 100644
--- a/fairseq/modules/lightconv_layer/setup.py
+++ b/fairseq/modules/lightconv_layer/setup.py
@@ -1,3 +1,9 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
 from setuptools import setup
 from torch.utils.cpp_extension import CUDAExtension, BuildExtension
 
diff --git a/fairseq/modules/lightweight_convolution.py b/fairseq/modules/lightweight_convolution.py
index 95d0418af..037dd1792 100644
--- a/fairseq/modules/lightweight_convolution.py
+++ b/fairseq/modules/lightweight_convolution.py
@@ -10,6 +10,7 @@ import torch.nn.functional as F
 from fairseq import utils
 from fairseq.modules.unfold import unfold1d
 
+
 def LightweightConv(input_size, kernel_size=1, padding_l=None, num_heads=1,
                     weight_dropout=0., weight_softmax=False, bias=False):
     if torch.cuda.is_available():
@@ -26,6 +27,7 @@ def LightweightConv(input_size, kernel_size=1, padding_l=None, num_heads=1,
                                 weight_dropout=weight_dropout,
                                 weight_softmax=weight_softmax, bias=bias)
 
+
 class LightweightConv1d(nn.Module):
     '''Lightweight Convolution assuming the input is BxCxT
     This is just an example that explains LightConv clearer than the TBC version.
diff --git a/fairseq/modules/unfold.py b/fairseq/modules/unfold.py
index eff6ab575..3a142db69 100644
--- a/fairseq/modules/unfold.py
+++ b/fairseq/modules/unfold.py
@@ -5,6 +5,7 @@
 
 import torch.nn.functional as F
 
+
 def unfold1d(x, kernel_size, padding_l, pad_value=0):
     '''unfold T x B x C to T x B x C x K'''
     if kernel_size > 1:
diff --git a/scripts/average_checkpoints.py b/scripts/average_checkpoints.py
index e5e9bce15..190c59ad0 100644
--- a/scripts/average_checkpoints.py
+++ b/scripts/average_checkpoints.py
@@ -121,9 +121,9 @@ def main():
         num = args.num_epoch_checkpoints
 
     assert args.checkpoint_upper_bound is None or args.num_epoch_checkpoints is not None, \
-            '--checkpoint-upper-bound requires --num-epoch-checkpoints'
+        '--checkpoint-upper-bound requires --num-epoch-checkpoints'
     assert args.num_epoch_checkpoints is None or args.num_update_checkpoints is None, \
-            'Cannot combine --num-epoch-checkpoints and --num-update-checkpoints'
+        'Cannot combine --num-epoch-checkpoints and --num-update-checkpoints'
 
     if num is not None:
         args.inputs = last_n_checkpoints(
diff --git a/scripts/compare_namespaces.py b/scripts/compare_namespaces.py
index 52ba2b9fb..db5121189 100644
--- a/scripts/compare_namespaces.py
+++ b/scripts/compare_namespaces.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 """Helper script to compare two argparse.Namespace objects."""
 
-from argparse import Namespace
+from argparse import Namespace  # noqa
 
 
 def main():
diff --git a/scripts/count_docs.py b/scripts/count_docs.py
index 13640f4b6..8d185398a 100644
--- a/scripts/count_docs.py
+++ b/scripts/count_docs.py
@@ -10,7 +10,6 @@ document in a large file. Documents should be separated by a single empty line.
 
 import argparse
 import gzip
-import random
 import sys
 
 import numpy as np
diff --git a/scripts/shard_docs.py b/scripts/shard_docs.py
index f1adac72a..e30d4a122 100644
--- a/scripts/shard_docs.py
+++ b/scripts/shard_docs.py
@@ -10,8 +10,6 @@ should be separated by a single empty line.
 
 import argparse
 import contextlib
-import random
-import sys
 
 
 def main():
diff --git a/scripts/split_train_valid_docs.py b/scripts/split_train_valid_docs.py
index 41fb979ad..9adf99634 100644
--- a/scripts/split_train_valid_docs.py
+++ b/scripts/split_train_valid_docs.py
@@ -19,6 +19,8 @@ def main():
     parser.add_argument('sample_output', help='train output file')
     parser.add_argument('remainder_output', help='valid output file')
     parser.add_argument('-k', type=int, help="remainder size")
+    parser.add_argument('--lines', action='store_true',
+                        help='split lines instead of docs')
     args = parser.parse_args()
 
     assert args.k is not None
@@ -48,6 +50,8 @@ def main():
                 update_sample(doc)
             else:
                 doc.append(line)
+            if args.lines:
+                update_sample(doc)
             if i % 1000000 == 0:
                 print(i, file=sys.stderr, end="", flush=True)
             elif i % 100000 == 0:
@@ -61,7 +65,7 @@ def main():
     with open(args.sample_output, 'w', encoding='utf-8') as out:
         first = True
         for doc in sample:
-            if not first:
+            if not first and not args.lines:
                 out.write("\n")
             first = False
             for line in doc:
@@ -70,7 +74,7 @@ def main():
     with open(args.remainder_output, 'w', encoding='utf-8') as out:
         first = True
         for doc in remainder:
-            if not first:
+            if not first and not args.lines:
                 out.write("\n")
             first = False
             for line in doc:
diff --git a/scripts/spm_encode.py b/scripts/spm_encode.py
index eda9d6220..e1cb54192 100644
--- a/scripts/spm_encode.py
+++ b/scripts/spm_encode.py
@@ -30,7 +30,7 @@ def main():
     args = parser.parse_args()
 
     assert len(args.inputs) == len(args.outputs), \
-            "number of input and output paths should match"
+        "number of input and output paths should match"
 
     sp = spm.SentencePieceProcessor()
     sp.Load(args.model)
diff --git a/scripts/wav2vec_featurize.py b/scripts/wav2vec_featurize.py
index 70764d793..31e12433f 100644
--- a/scripts/wav2vec_featurize.py
+++ b/scripts/wav2vec_featurize.py
@@ -1,22 +1,27 @@
-""" Helper script to pre-compute embeddings for a wav2letter++ dataset
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Helper script to pre-compute embeddings for a wav2letter++ dataset
 """
 
-import glob, os
-import tqdm
+import argparse
+import glob
+import os
 from shutil import copy
 
-import soundfile as sf
-
 import h5py
+import soundfile as sf
 import numpy as np
-
 import torch
 from torch import nn
+import tqdm
 
 from fairseq.models.wav2vec import Wav2VecModel
 
-import argparse
-
 
 def read_audio(fname):
     """ Load an audio file and return PCM along with the sample rate """
@@ -228,4 +233,4 @@ if __name__ == "__main__":
         if not args.no_copy_labels:
             print("Copying label data...")
             writer.copy_labels()
-            print("Done.")
\ No newline at end of file
+            print("Done.")
diff --git a/tests/speech_recognition/test_collaters.py b/tests/speech_recognition/test_collaters.py
index efb0e5879..6a5029a48 100644
--- a/tests/speech_recognition/test_collaters.py
+++ b/tests/speech_recognition/test_collaters.py
@@ -1,10 +1,8 @@
 #!/usr/bin/env python3
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
 
 import unittest
 
diff --git a/tests/speech_recognition/test_cross_entropy.py b/tests/speech_recognition/test_cross_entropy.py
index 11daf4166..508d490e0 100644
--- a/tests/speech_recognition/test_cross_entropy.py
+++ b/tests/speech_recognition/test_cross_entropy.py
@@ -1,10 +1,8 @@
 #!/usr/bin/env python3
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
 
 from examples.speech_recognition.criterions.cross_entropy_acc import CrossEntropyWithAccCriterion
 from .asr_test_base import CrossEntropyCriterionTestBase
diff --git a/tests/test_average_checkpoints.py b/tests/test_average_checkpoints.py
index 21f12cb42..8ed298c3c 100644
--- a/tests/test_average_checkpoints.py
+++ b/tests/test_average_checkpoints.py
@@ -14,7 +14,6 @@ import torch
 from torch import nn
 
 
-
 from scripts.average_checkpoints import average_checkpoints
 
 
diff --git a/validate.py b/validate.py
index ed8f41e40..f768e8cce 100644
--- a/validate.py
+++ b/validate.py
@@ -1,10 +1,9 @@
 #!/usr/bin/env python3 -u
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
 
 import torch