Small fixes

Summary: Pull Request resolved: https://github.com/fairinternal/fairseq-py/pull/835 Differential Revision: D16904038 Pulled By: myleott fbshipit-source-id: 2c9d0b913f8d688297ac80fcabd905bd1397f66a
2024-08-16 20:10:40 +03:00 · 2019-08-19 15:04:41 -07:00 · 2019-08-19 15:04:41 -07:00 · 6ce55e4b01
commit 6ce55e4b01
parent 2eb53b8ef1
41 changed files with 180 additions and 138 deletions
--- a/eval_lm.py
+++ b/eval_lm.py
@ -200,8 +200,10 @@ def main(parsed_args):
                            is_bpe = False
                            w = ''
                    if args.output_word_probs:
-                        print(str(int(sample_id)) + " " +
-                                  ('\t'.join('{} [{:2f}]'.format(x[0], x[1]) for x in word_prob)))
+                        print(
+                            str(int(sample_id)) + " "
+                            + ('\t'.join('{} [{:2f}]'.format(x[0], x[1]) for x in word_prob))
+                        )

            wps_meter.update(sample['ntokens'])
            t.log({'wps': round(wps_meter.avg)})
--- a/examples/init.py
+++ b/examples/init.py
@ -1,10 +1,8 @@
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.

-__version__ = '0.7.2'
+__version__ = '0.8.0'

-import examples.noisychannel # noqa
+import examples.noisychannel  # noqa
--- a/examples/noisychannel/init.py
+++ b/examples/noisychannel/init.py
@ -1,8 +1,6 @@
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.

-from .rerank_options import *
+from .rerank_options import *  # noqa
--- a/examples/noisychannel/rerank.py
+++ b/examples/noisychannel/rerank.py
@ -77,9 +77,11 @@ def score_target_hypo(args, a, b, c, lenpen, target_outfile, hypo_outfile, write

        for key in range(len(gen_keys)):
            if args.prefix_len is None:
-                assert hypo_lst[key] in gen_output.no_bpe_hypo[gen_keys[key]], \
-                    ("pred and rescore hypo mismatch: i: " + str(key) + ", " + str(hypo_lst[key]) + str(gen_keys[key]) +
-                    str(gen_output.no_bpe_hypo[key]))
+                assert hypo_lst[key] in gen_output.no_bpe_hypo[gen_keys[key]], (
+                    "pred and rescore hypo mismatch: i: " + str(key) + ", "
+                    + str(hypo_lst[key]) + str(gen_keys[key])
+                    + str(gen_output.no_bpe_hypo[key])
+                )
                sys_tok = dict.encode_line(hypo_lst[key])
                ref_tok = dict.encode_line(gen_output.no_bpe_target[gen_keys[key]])
                scorer.add(ref_tok, sys_tok)
--- a/examples/noisychannel/rerank_generate.py
+++ b/examples/noisychannel/rerank_generate.py
@ -1,24 +1,24 @@
 #!/usr/bin/env python3 -u
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
-
-import rerank_utils
-import os
-import subprocess
-from examples.noisychannel import rerank_options
-from fairseq import options
-import generate
-import preprocess
-from contextlib import redirect_stdout
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.

 """
 Generate n-best translations using a trained model.
 """

+from contextlib import redirect_stdout
+import os
+import subprocess
+
+import rerank_utils
+from examples.noisychannel import rerank_options
+from fairseq import options
+import generate
+import preprocess
+
+
 def gen_and_reprocess_nbest(args):
    if args.score_dict_dir is None:
        args.score_dict_dir = args.data
--- a/examples/noisychannel/rerank_options.py
+++ b/examples/noisychannel/rerank_options.py
@ -1,9 +1,7 @@
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.

 from fairseq import options

--- a/examples/noisychannel/rerank_tune.py
+++ b/examples/noisychannel/rerank_tune.py
@ -27,12 +27,14 @@ def random_search(args):
    param_values += initial_params
    random.seed(args.seed)

-    random_params = np.array([[random.uniform(args.lower_bound[i], args.upper_bound[i])
-                               for i in range(len(args.tune_param))]
-                               for k in range(args.num_trials)])
-    set_params = np.array([[initial_params[i][0]
-                            for i in range(len(tuneable_parameters))]
-                            for k in range(args.num_trials)])
+    random_params = np.array([
+        [random.uniform(args.lower_bound[i], args.upper_bound[i]) for i in range(len(args.tune_param))]
+        for k in range(args.num_trials)
+    ])
+    set_params = np.array([
+        [initial_params[i][0] for i in range(len(tuneable_parameters))]
+        for k in range(args.num_trials)
+    ])
    random_params = np.concatenate((random_params, set_params), 1)

    rerank_args = vars(args).copy()
--- a/examples/noisychannel/rerank_utils.py
+++ b/examples/noisychannel/rerank_utils.py
@ -128,8 +128,8 @@ def write_reprocessed(sources, hypos, targets, source_outfile,
        "in writing reprocessed, only one type of prefix may be used"

    with open(source_outfile, 'w') as source_file, \
-         open(hypo_outfile, 'w') as hypo_file, \
-         open(target_outfile, 'w') as target_file:
+            open(hypo_outfile, 'w') as hypo_file, \
+            open(target_outfile, 'w') as target_file:

        assert len(sources) == len(hypos), "sources and hypos list length mismatch"
        if right_to_left:
--- a/examples/roberta/wsc/wsc_task.py
+++ b/examples/roberta/wsc/wsc_task.py
@ -270,6 +270,7 @@ class WinograndeTask(WSCTask):
    Task for WinoGrande dataset. Efficient implementation for Winograd schema
    tasks with exactly two candidates, one of which is correct.
    """
+
    @classmethod
    def setup_task(cls, args, **kwargs):
        assert args.criterion == 'winogrande', 'Must set --criterion=winogrande'
@ -280,7 +281,6 @@ class WinograndeTask(WSCTask):

        return cls(args, vocab)

-
    def load_dataset(self, split, epoch=0, combine=False, data_path=None, return_only=False, **kwargs):
        """Load a given dataset split.

@ -299,7 +299,7 @@ class WinograndeTask(WSCTask):
        candidate_masks = []
        candidate_lengths = []

-        itr = wsc_utils.winogrande_jsonl_iterator(data_path, eval=split=='test')
+        itr = wsc_utils.winogrande_jsonl_iterator(data_path, eval=(split == 'test'))

        for sample in itr:
            sentence, pronoun_span, query, cand_text = sample
--- a/examples/speech_recognition/models/vggtransformer.py
+++ b/examples/speech_recognition/models/vggtransformer.py
@ -13,7 +13,7 @@ from fairseq import utils
 from fairseq.models import (
    FairseqEncoder,
    FairseqIncrementalDecoder,
-    FairseqModel,
+    FairseqEncoderDecoderModel,
    register_model,
    register_model_architecture,
 )
@ -23,7 +23,7 @@ from fairseq.modules import TransformerDecoderLayer, TransformerEncoderLayer, VG


@register_model("asr_vggtransformer")
-class VGGTransformerModel(FairseqModel):
+class VGGTransformerModel(FairseqEncoderDecoderModel):
    """
    Transformers with convolutional context for ASR
    https://arxiv.org/abs/1904.11660
--- a/fairseq/models/lightconv.py
+++ b/fairseq/models/lightconv.py
@ -4,7 +4,6 @@
 # LICENSE file in the root directory of this source tree.

 import math
-import sys

 import torch
 import torch.nn as nn
@ -174,6 +173,7 @@ class LightConvModel(FairseqEncoderDecoderModel):
        decoder = LightConvDecoder(args, tgt_dict, decoder_embed_tokens)
        return LightConvModel(encoder, decoder)

+
 class LightConvEncoder(FairseqEncoder):
    """
    LightConv encoder consisting of *args.encoder_layers* layers. Each layer
--- a/fairseq/modules/init.py
+++ b/fairseq/modules/init.py
@ -10,14 +10,12 @@ from .character_token_embedder import CharacterTokenEmbedder
 from .conv_tbc import ConvTBC
 from .downsampled_multihead_attention import DownsampledMultiHeadAttention
 from .dynamic_convolution import DynamicConv, DynamicConv1dTBC
-#from .dynamicconv_layer import DynamicconvLayer
 from .gelu import gelu, gelu_accurate
 from .grad_multiply import GradMultiply
 from .highway import Highway
 from .layer_norm import LayerNorm
 from .learned_positional_embedding import LearnedPositionalEmbedding
 from .lightweight_convolution import LightweightConv, LightweightConv1dTBC
-#from .lightconv_layer import LightconvLayer
 from .linearized_convolution import LinearizedConvolution
 from .logsumexp_moe import LogSumExpMoE
 from .mean_pool_gating_network import MeanPoolGatingNetwork
@ -38,7 +36,6 @@ __all__ = [
    'CharacterTokenEmbedder',
    'ConvTBC',
    'DownsampledMultiHeadAttention',
-#    'DyamicconvLayer',
    'DynamicConv1dTBC',
    'DynamicConv',
    'gelu',
@ -47,7 +44,6 @@ __all__ = [
    'Highway',
    'LayerNorm',
    'LearnedPositionalEmbedding',
-#    'LightconvLayer',
    'LightweightConv1dTBC',
    'LightweightConv',
    'LinearizedConvolution',
--- a/fairseq/modules/cuda_utils.cu
+++ b/fairseq/modules/cuda_utils.cu
@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
- *
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * 
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
 */


--- a/fairseq/modules/dynamic_convolution.py
+++ b/fairseq/modules/dynamic_convolution.py
@ -10,6 +10,7 @@ import torch.nn.functional as F
 from fairseq import utils
 from .unfold import unfold1d

+
 def DynamicConv(input_size, kernel_size=1, padding_l=None, num_heads=1,
                weight_dropout=0., weight_softmax=False,
                renorm_padding=False, bias=False, conv_bias=False,
@ -28,6 +29,7 @@ def DynamicConv(input_size, kernel_size=1, padding_l=None, num_heads=1,
                            weight_dropout=weight_dropout,
                            weight_softmax=weight_softmax, bias=bias)

+
 def Linear(in_features, out_features, bias=True):
    m = nn.Linear(in_features, out_features, bias)
    nn.init.xavier_uniform_(m.weight)
@ -209,7 +211,7 @@ class DynamicConv1dTBC(nn.Module):
            # turn the convolution filters into band matrices
            weight_expanded = weight.new_zeros(B*H, T, T+K-1, requires_grad=False)
            weight_expanded.as_strided((B*H, T, K), (T*(T+K-1), T+K, 1)).copy_(weight)
-            weight_expanded = weight_expanded.narrow(2, P, T) # B*H x T x T
+            weight_expanded = weight_expanded.narrow(2, P, T)  # B*H x T x T
        output = torch.bmm(weight_expanded, x)
        output = output.transpose(0, 1).contiguous().view(T, B, C)
        return output
--- a/fairseq/modules/dynamicconv_layer/init.py
+++ b/fairseq/modules/dynamicconv_layer/init.py
@ -1,8 +1,6 @@
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.

-from .dynamicconv_layer import DynamicconvLayer
+from .dynamicconv_layer import DynamicconvLayer  # noqa
--- a/fairseq/modules/dynamicconv_layer/cuda_function_gen.py
+++ b/fairseq/modules/dynamicconv_layer/cuda_function_gen.py
@ -1,9 +1,7 @@
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.


 def gen_forward():
@ -13,9 +11,10 @@ def gen_forward():

    head = """
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
+ * Copyright (c) Facebook, Inc. and its affiliates.
 *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
 */

 #include "dynamicconv_cuda.cuh"
@ -103,9 +102,10 @@ def gen_backward():

    head = """
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
+ * Copyright (c) Facebook, Inc. and its affiliates.
 *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
 */

 #include "dynamicconv_cuda.cuh"
--- a/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cpp
+++ b/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cpp
@ -1,3 +1,10 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
 #include <torch/extension.h>
 #include <vector>

--- a/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cuh
+++ b/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cuh
@ -1,8 +1,10 @@
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
- *
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * 
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
 */
+
 #include <ATen/ATen.h>
 #include <c10/cuda/CUDAStream.h>

--- a/fairseq/modules/dynamicconv_layer/dynamicconv_cuda_kernel.cu
+++ b/fairseq/modules/dynamicconv_layer/dynamicconv_cuda_kernel.cu
@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
- *
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * 
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
 */

 #include "dynamicconv_cuda.cuh"
--- a/fairseq/modules/dynamicconv_layer/dynamicconv_layer.py
+++ b/fairseq/modules/dynamicconv_layer/dynamicconv_layer.py
@ -1,9 +1,16 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
 import torch
 from torch import nn
 from torch.autograd import Function
 import torch.nn.functional as F
+
 import dynamicconv_cuda
 from fairseq import utils
+from fairseq.modules.unfold import unfold1d


 class dynamicconvFunction(Function):
@ -68,7 +75,7 @@ class DynamicconvLayer(nn.Module):

        T, B, C = x.size()
        K, H = self.kernel_size, self.num_heads
-        R = C // H
+        # R = C // H

        # during inference time, incremental BMM is faster
        if incremental_state is not None:
@ -199,7 +206,7 @@ class DynamicconvLayer(nn.Module):
            # turn the convolution filters into band matrices
            weight_expanded = weight.new_zeros(B*H, T, T+K-1, requires_grad=False)
            weight_expanded.as_strided((B*H, T, K), (T*(T+K-1), T+K, 1)).copy_(weight)
-            weight_expanded = weight_expanded.narrow(2, P, T) # B*H x T x T
+            weight_expanded = weight_expanded.narrow(2, P, T)  # B*H x T x T
        output = torch.bmm(weight_expanded, x)
        output = output.transpose(0, 1).contiguous().view(T, B, C)
        return output
--- a/fairseq/modules/dynamicconv_layer/setup.py
+++ b/fairseq/modules/dynamicconv_layer/setup.py
@ -1,3 +1,9 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
 from setuptools import setup
 from torch.utils.cpp_extension import CUDAExtension, BuildExtension

--- a/fairseq/modules/lightconv_layer/init.py
+++ b/fairseq/modules/lightconv_layer/init.py
@ -1,8 +1,6 @@
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.

-from .lightconv_layer import LightconvLayer
+from .lightconv_layer import LightconvLayer  # noqa
--- a/fairseq/modules/lightconv_layer/cuda_function_gen.py
+++ b/fairseq/modules/lightconv_layer/cuda_function_gen.py
@ -1,9 +1,7 @@
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.


 def gen_forward():
@ -13,9 +11,10 @@ def gen_forward():

    head = """
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
+ * Copyright (c) Facebook, Inc. and its affiliates.
 *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
 */

 #include "lightconv_cuda.cuh"
@ -118,9 +117,10 @@ def gen_backward():

    head = """
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
+ * Copyright (c) Facebook, Inc. and its affiliates.
 *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
 */

 #include "lightconv_cuda.cuh"
--- a/fairseq/modules/lightconv_layer/lightconv_cuda.cpp
+++ b/fairseq/modules/lightconv_layer/lightconv_cuda.cpp
@ -1,3 +1,10 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
 #include <torch/extension.h>
 #include <vector>

--- a/fairseq/modules/lightconv_layer/lightconv_cuda.cuh
+++ b/fairseq/modules/lightconv_layer/lightconv_cuda.cuh
@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
- *
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * 
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
 */

 #include <ATen/ATen.h>
--- a/fairseq/modules/lightconv_layer/lightconv_cuda_kernel.cu
+++ b/fairseq/modules/lightconv_layer/lightconv_cuda_kernel.cu
@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2018-present, Facebook, Inc.
- * All rights reserved.
- *
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * 
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
 */

 #include "lightconv_cuda.cuh"
--- a/fairseq/modules/lightconv_layer/lightconv_layer.py
+++ b/fairseq/modules/lightconv_layer/lightconv_layer.py
@ -1,12 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
 import torch
 from torch import nn
 from torch.autograd import Function
 import torch.nn.functional as F
-import time

 import lightconv_cuda
 from fairseq import utils

+
 class lightconvFunction(Function):

    @staticmethod
@ -26,6 +31,7 @@ class lightconvFunction(Function):
        grad_input, grad_weights = outputs
        return grad_input, grad_weights, None

+
 class LightconvLayer(nn.Module):
    def __init__(
            self,
@ -82,7 +88,7 @@ class LightconvLayer(nn.Module):
            weight = weight.view(1, H, K).expand(T*B, H, K).contiguous().view(T*B*H, K, 1)

            weight = F.dropout(weight, self.weight_dropout, training=self.training)
-            output = torch.bmm(x_unfold, weight) # T*B*H x R x 1
+            output = torch.bmm(x_unfold, weight)  # T*B*H x R x 1
            output = output.view(T, B, C)
            return output

--- a/fairseq/modules/lightconv_layer/setup.py
+++ b/fairseq/modules/lightconv_layer/setup.py
@ -1,3 +1,9 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
 from setuptools import setup
 from torch.utils.cpp_extension import CUDAExtension, BuildExtension

--- a/fairseq/modules/lightweight_convolution.py
+++ b/fairseq/modules/lightweight_convolution.py
@ -10,6 +10,7 @@ import torch.nn.functional as F
 from fairseq import utils
 from fairseq.modules.unfold import unfold1d

+
 def LightweightConv(input_size, kernel_size=1, padding_l=None, num_heads=1,
                    weight_dropout=0., weight_softmax=False, bias=False):
    if torch.cuda.is_available():
@ -26,6 +27,7 @@ def LightweightConv(input_size, kernel_size=1, padding_l=None, num_heads=1,
                                weight_dropout=weight_dropout,
                                weight_softmax=weight_softmax, bias=bias)

+
 class LightweightConv1d(nn.Module):
    '''Lightweight Convolution assuming the input is BxCxT
    This is just an example that explains LightConv clearer than the TBC version.
--- a/fairseq/modules/unfold.py
+++ b/fairseq/modules/unfold.py
@ -5,6 +5,7 @@

 import torch.nn.functional as F

+
 def unfold1d(x, kernel_size, padding_l, pad_value=0):
    '''unfold T x B x C to T x B x C x K'''
    if kernel_size > 1:
--- a/scripts/average_checkpoints.py
+++ b/scripts/average_checkpoints.py
@ -121,9 +121,9 @@ def main():
        num = args.num_epoch_checkpoints

    assert args.checkpoint_upper_bound is None or args.num_epoch_checkpoints is not None, \
-            '--checkpoint-upper-bound requires --num-epoch-checkpoints'
+        '--checkpoint-upper-bound requires --num-epoch-checkpoints'
    assert args.num_epoch_checkpoints is None or args.num_update_checkpoints is None, \
-            'Cannot combine --num-epoch-checkpoints and --num-update-checkpoints'
+        'Cannot combine --num-epoch-checkpoints and --num-update-checkpoints'

    if num is not None:
        args.inputs = last_n_checkpoints(
--- a/scripts/compare_namespaces.py
+++ b/scripts/compare_namespaces.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python
 """Helper script to compare two argparse.Namespace objects."""

-from argparse import Namespace
+from argparse import Namespace  # noqa


 def main():
--- a/scripts/count_docs.py
+++ b/scripts/count_docs.py
@ -10,7 +10,6 @@ document in a large file. Documents should be separated by a single empty line.

 import argparse
 import gzip
-import random
 import sys

 import numpy as np
--- a/scripts/shard_docs.py
+++ b/scripts/shard_docs.py
@ -10,8 +10,6 @@ should be separated by a single empty line.

 import argparse
 import contextlib
-import random
-import sys


 def main():
--- a/scripts/split_train_valid_docs.py
+++ b/scripts/split_train_valid_docs.py
@ -19,6 +19,8 @@ def main():
    parser.add_argument('sample_output', help='train output file')
    parser.add_argument('remainder_output', help='valid output file')
    parser.add_argument('-k', type=int, help="remainder size")
+    parser.add_argument('--lines', action='store_true',
+                        help='split lines instead of docs')
    args = parser.parse_args()

    assert args.k is not None
@ -48,6 +50,8 @@ def main():
                update_sample(doc)
            else:
                doc.append(line)
+            if args.lines:
+                update_sample(doc)
            if i % 1000000 == 0:
                print(i, file=sys.stderr, end="", flush=True)
            elif i % 100000 == 0:
@ -61,7 +65,7 @@ def main():
    with open(args.sample_output, 'w', encoding='utf-8') as out:
        first = True
        for doc in sample:
-            if not first:
+            if not first and not args.lines:
                out.write("\n")
            first = False
            for line in doc:
@ -70,7 +74,7 @@ def main():
    with open(args.remainder_output, 'w', encoding='utf-8') as out:
        first = True
        for doc in remainder:
-            if not first:
+            if not first and not args.lines:
                out.write("\n")
            first = False
            for line in doc:
--- a/scripts/spm_encode.py
+++ b/scripts/spm_encode.py
@ -30,7 +30,7 @@ def main():
    args = parser.parse_args()

    assert len(args.inputs) == len(args.outputs), \
-            "number of input and output paths should match"
+        "number of input and output paths should match"

    sp = spm.SentencePieceProcessor()
    sp.Load(args.model)
--- a/scripts/wav2vec_featurize.py
+++ b/scripts/wav2vec_featurize.py
@ -1,22 +1,27 @@
-""" Helper script to pre-compute embeddings for a wav2letter++ dataset
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Helper script to pre-compute embeddings for a wav2letter++ dataset
 """

-import glob, os
-import tqdm
+import argparse
+import glob
+import os
 from shutil import copy

-import soundfile as sf
-
 import h5py
+import soundfile as sf
 import numpy as np
-
 import torch
 from torch import nn
+import tqdm

 from fairseq.models.wav2vec import Wav2VecModel

-import argparse
-

 def read_audio(fname):
    """ Load an audio file and return PCM along with the sample rate """
@ -228,4 +233,4 @@ if __name__ == "__main__":
        if not args.no_copy_labels:
            print("Copying label data...")
            writer.copy_labels()
-            print("Done.")
+            print("Done.")
--- a/tests/speech_recognition/test_collaters.py
+++ b/tests/speech_recognition/test_collaters.py
@ -1,10 +1,8 @@
 #!/usr/bin/env python3
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.

 import unittest

--- a/tests/speech_recognition/test_cross_entropy.py
+++ b/tests/speech_recognition/test_cross_entropy.py
@ -1,10 +1,8 @@
 #!/usr/bin/env python3
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.

 from examples.speech_recognition.criterions.cross_entropy_acc import CrossEntropyWithAccCriterion
 from .asr_test_base import CrossEntropyCriterionTestBase
--- a/tests/test_average_checkpoints.py
+++ b/tests/test_average_checkpoints.py
@ -14,7 +14,6 @@ import torch
 from torch import nn


-
 from scripts.average_checkpoints import average_checkpoints


--- a/validate.py
+++ b/validate.py
@ -1,10 +1,9 @@
 #!/usr/bin/env python3 -u
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.

 import torch