From 699ab190147e3afff0201c956d8145d158bc6f28 Mon Sep 17 00:00:00 2001
From: Guillaume Wenzek <gwenzek@users.noreply.github.com>
Date: Fri, 23 Sep 2022 18:40:50 +0200
Subject: [PATCH] run all tests (#4733)

* run all tests

* make torch a build-time dependency

* add 'dev' extra deps to install black, flake, pytest at once

* Build docs in CI

This should also help catch some import bugs, since sphinx inspect a lot of code

* CI should do the real install not "--editable"

* check installation succeeded

* add missing __init__.py file

* add check installation

* move check_installation.py to its own script

* fix pytest import mode, force recent numpy, torch

* run black before flake and tests

* torch >= 1.10.0

* use torch 1.10  for GPU tests
---
 .circleci/config.yml                          |  22 ++--
 .github/workflows/build.yml                   |  32 ++++--
 .isort.cfg                                    |   2 -
 README.md                                     |   4 +-
 docs/_static/theme_overrides.css              |   9 --
 docs/conf.py                                  |  38 +------
 docs/requirements.txt                         |   2 -
 .../models/speech_to_text/modules/__init__.py |   0
 pyproject.toml                                |  22 +++-
 scripts/check_installation.py                 |  36 ++++++
 setup.py                                      | 106 ++++++++----------
 11 files changed, 136 insertions(+), 137 deletions(-)
 delete mode 100644 .isort.cfg
 delete mode 100644 docs/_static/theme_overrides.css
 delete mode 100644 docs/requirements.txt
 create mode 100644 fairseq/models/speech_to_text/modules/__init__.py
 create mode 100644 scripts/check_installation.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 59a244f34..6187ccc68 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -31,6 +31,7 @@ install_dep_common: &install_dep_common
         python -m torch.utils.collect_env
 
 install_dep_fused_ops: &install_dep_fused_ops
+  # this version of Apex is from Feb 2021 and doesn't work with torch>=1.12
   - run:
       name: Install Megatron/Apex Dependencies
       working_directory: ~/
@@ -57,22 +58,22 @@ install_dep_xformers: &install_dep_xformers
         pip install -r requirements.txt
         pip install -e .
 
-install_dep_pt19: &install_dep_pt19
+install_dep_pt1_10: &install_dep_pt1_10
   - run:
       name: Install Pytorch Dependencies
       command: |
         source activate fairseq
         pip install --upgrade setuptools
-        pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
+        pip install torch==1.10.1+cu111 torchaudio==0.10.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html
         python -c 'import torch; print("Torch version:", torch.__version__)'
 
-install_dep_pt18: &install_dep_pt18
+install_dep_pt1_12: &install_dep_pt1_12
   - run:
       name: Install Pytorch Dependencies
       command: |
         source activate fairseq
         pip install --upgrade setuptools
-        pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
+        pip install torch==1.12.1+cu116 torchaudio==0.12.1+cu116 -f https://download.pytorch.org/whl/torch_stable.html
         python -c 'import torch; print("Torch version:", torch.__version__)'
 
 install_repo: &install_repo
@@ -121,7 +122,7 @@ create_conda_env: &create_conda_env
 
 jobs:
 
-  gpu_tests_pt19:
+  gpu_tests_pt1_10:
     <<: *gpu
 
     working_directory: ~/fairseq-py
@@ -132,7 +133,7 @@ jobs:
       - <<: *create_conda_env
       - restore_cache:
           key: *cache_key
-      - <<: *install_dep_pt19
+      - <<: *install_dep_pt1_10
       - <<: *install_dep_common
       - <<: *install_dep_fused_ops
       - save_cache:
@@ -142,7 +143,7 @@ jobs:
       - <<: *install_repo
       - <<: *run_unittests
 
-  gpu_tests_pt18:
+  gpu_tests_pt1_12:
     <<: *gpu
 
     working_directory: ~/fairseq-py
@@ -153,7 +154,7 @@ jobs:
       - <<: *create_conda_env
       - restore_cache:
           key: *cache_key
-      - <<: *install_dep_pt18
+      - <<: *install_dep_pt1_12
       - <<: *install_dep_common
       - <<: *install_dep_fused_ops
       - save_cache:
@@ -167,5 +168,6 @@ workflows:
   version: 2
   build:
     jobs:
-      - gpu_tests_pt18
-      - gpu_tests_pt19
+      # TODO: Figure out how to run APEX on  torch 1.12
+      # - gpu_tests_pt1_12
+      - gpu_tests_pt1_10
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 6f8d90e0e..036233d8c 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -34,14 +34,17 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         git submodule update --init --recursive
-        python setup.py build_ext --inplace
-        python -m pip install --editable .
+        python -m pip install .
+
+    - name: Check installation
+      working-directory: /tmp
+      run: python $GITHUB_WORKSPACE/scripts/check_installation.py
 
     - name: Install optional test requirements
       run: |
+        python -m pip install '.[dev,docs]'
         python -m pip install iopath transformers pyarrow
         python -m pip install git+https://github.com/facebookresearch/fairscale.git@main
-        python -m pip install pytest
         python -m pip install pygit2 pgzip
         
     - name: Install xformers for Macos
@@ -55,19 +58,24 @@ jobs:
       run: |
         python -m pip install --progress-bar off git+https://github.com/facebookresearch/xformers.git@main
 
+    - name: Lint with black
+      run: black --check --diff .
+
     - name: Lint with flake8
       run: |
-        pip install flake8
         # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --extend-exclude fairseq/model_parallel/megatron
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --extend-exclude fairseq/model_parallel/megatron
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+
+    - name: Build doc
+      run: make singlehtml
+      working-directory: docs/
 
     - name: Run tests
-      run: |
-         python setup.py test
+      # When installing in non-editable mode, the .so files will be generated in 'site-packages/fairseq'.
+      # But by default, pytest import machinery will load local fairseq, and won't see the .so.
+      # Use --import-mode=append to favorize the 'site-packages/fairseq'.
+      # https://docs.pytest.org/en/7.1.x/explanation/pythonpath.html
+      run: pytest --import-mode=append -vvv tests/
 
-    - name: Lint with black
-      run: |
-        pip install black==22.3.0
-        black --check . --extend-exclude 'examples|fairseq\/model_parallel\/megatron'
diff --git a/.isort.cfg b/.isort.cfg
deleted file mode 100644
index aed482f47..000000000
--- a/.isort.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-[settings]
-known_third_party = _cffi_backend,agg_results,aml,bitarray,boto3,botocore,dump_hubert_feature,dynamicconv_cuda,editdistance,faiss,fasttext,feature_utils,ffmpeg,g2p_en,h5py,hydra,hypothesis,indicnlp,inflect,iopath,joblib,kaldi_io,kenlm,libfb,librosa,lightconv_cuda,matplotlib,misc,mmpt,mmpt_cli,model,nltk,npy_append_array,numpy,omegaconf,pandas,pathbuilder,preprocessing,progressbar,pythainlp,random_sequence_shuffler,regex,sacrebleu,sacremoses,scipy,sentencepiece,setuptools,six,sklearn,soundfile,sweep,sweep_wmt_en2de_transformer_big_common,tabulate,torch,torchaudio,tqdm,unidecode,utils,videoreader,wav2vec_cluster_faiss,wget,yaml
diff --git a/README.md b/README.md
index 2dfd9e96f..047e1b768 100644
--- a/README.md
+++ b/README.md
@@ -147,8 +147,8 @@ and [RoBERTa](https://pytorch.org/hub/pytorch_fairseq_roberta/) for more example
 
 # Requirements and Installation
 
-* [PyTorch](http://pytorch.org/) version >= 1.5.0
-* Python version >= 3.6
+* [PyTorch](http://pytorch.org/) version >= 1.10.0
+* Python version >= 3.8
 * For training new models, you'll also need an NVIDIA GPU and [NCCL](https://github.com/NVIDIA/nccl)
 * **To install fairseq** and develop locally:
 
diff --git a/docs/_static/theme_overrides.css b/docs/_static/theme_overrides.css
deleted file mode 100644
index 2a0764193..000000000
--- a/docs/_static/theme_overrides.css
+++ /dev/null
@@ -1,9 +0,0 @@
-.wy-table-responsive table td kbd {
-    white-space: nowrap;
-}
-.wy-table-responsive table td {
-    white-space: normal !important;
-}
-.wy-table-responsive {
-    overflow: visible !important;
-}
diff --git a/docs/conf.py b/docs/conf.py
index 87b0db98c..0bc049f80 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -88,43 +88,7 @@ todo_include_todos = False
 
 # -- Options for HTML output ----------------------------------------------
 
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = "sphinx_rtd_theme"
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#
-# html_theme_options = {}
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ["_static"]
-
-html_context = {
-    "css_files": [
-        "_static/theme_overrides.css",  # override wide tables in RTD theme
-    ],
-}
-
-# Custom sidebar templates, must be a dictionary that maps document names
-# to template names.
-#
-# This is required for the alabaster theme
-# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
-# html_sidebars = {
-#    '**': [
-#        'about.html',
-#        'navigation.html',
-#        'relations.html',  # needs 'show_related': True theme option to display
-#        'searchbox.html',
-#        'donate.html',
-#    ]
-# }
-
+html_theme = "classic"
 
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {
diff --git a/docs/requirements.txt b/docs/requirements.txt
deleted file mode 100644
index c734a1f04..000000000
--- a/docs/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-sphinx<2.0
-sphinx-argparse
diff --git a/fairseq/models/speech_to_text/modules/__init__.py b/fairseq/models/speech_to_text/modules/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pyproject.toml b/pyproject.toml
index 6d1b4c5b6..f4cb74ba1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,23 @@
 [build-system]
-requires = ["setuptools", "wheel", "cython"]
+requires = [
+  "setuptools>=18.0",
+  "wheel",
+  "cython",
+  "numpy>=1.23.3",
+  "torch>=1.10",
+]
 build-backend = "setuptools.build_meta"
+
+[tool.black]
+extend-exclude = '''
+(
+^/examples/|
+^/fairseq/model_parallel/megatron|
+^/build/
+)
+'''
+
+[tool.isort]
+profile = "black"
+known_third_party = "_cffi_backend,agg_results,aml,bitarray,boto3,botocore,dump_hubert_feature,dynamicconv_cuda,editdistance,faiss,fasttext,feature_utils,ffmpeg,g2p_en,h5py,hydra,hypothesis,indicnlp,inflect,iopath,joblib,kaldi_io,kenlm,libfb,librosa,lightconv_cuda,matplotlib,misc,mmpt,mmpt_cli,model,nltk,npy_append_array,numpy,omegaconf,pandas,pathbuilder,preprocessing,progressbar,pythainlp,random_sequence_shuffler,regex,sacrebleu,sacremoses,scipy,sentencepiece,setuptools,six,sklearn,soundfile,sweep,sweep_wmt_en2de_transformer_big_common,tabulate,torch,torchaudio,tqdm,unidecode,utils,videoreader,wav2vec_cluster_faiss,wget,yaml"
+skip_gitignore = true
diff --git a/scripts/check_installation.py b/scripts/check_installation.py
new file mode 100644
index 000000000..e5a9d9dd4
--- /dev/null
+++ b/scripts/check_installation.py
@@ -0,0 +1,36 @@
+from pathlib import Path
+import os
+
+cwd = Path(".").resolve()
+print("running 'check_installation.py' from:", cwd)
+
+# Old versions of numpy/torch can prevent loading the .so files
+import torch
+
+print("torch:", torch.__version__)
+import numpy
+
+print("numpy:", numpy.__version__)
+
+import fairseq
+
+print("Fairseq installed at:", fairseq.__file__)
+import fairseq.criterions
+import fairseq.dataclass.configs
+
+import _imp
+
+print("Should load following .so suffixes:", _imp.extension_suffixes())
+
+so_files = list(Path(fairseq.__file__).parent.glob("*.so"))
+so_files.extend(Path(fairseq.__file__).parent.glob("data/*.so"))
+print("Found following .so files:")
+for so_file in so_files:
+    print(f"- {so_file}")
+
+from fairseq import libbleu
+
+print("Found libbleu at", libbleu.__file__)
+from fairseq.data import data_utils_fast
+
+print("Found data_utils_fast at", data_utils_fast.__file__)
diff --git a/setup.py b/setup.py
index a7ce61a89..ace36e980 100644
--- a/setup.py
+++ b/setup.py
@@ -9,6 +9,7 @@ import subprocess
 import sys
 
 from setuptools import Extension, find_packages, setup
+from torch.utils import cpp_extension
 
 if sys.version_info < (3, 6):
     sys.exit("Sorry, Python >= 3.6 is required for fairseq.")
@@ -79,71 +80,56 @@ extensions = [
 ]
 
 
-cmdclass = {}
-
-
-try:
-    # torch is not available when generating docs
-    from torch.utils import cpp_extension
-
+extensions.extend(
+    [
+        cpp_extension.CppExtension(
+            "fairseq.libbase",
+            sources=[
+                "fairseq/clib/libbase/balanced_assignment.cpp",
+            ],
+        ),
+        cpp_extension.CppExtension(
+            "fairseq.libnat",
+            sources=[
+                "fairseq/clib/libnat/edit_dist.cpp",
+            ],
+        ),
+        cpp_extension.CppExtension(
+            "alignment_train_cpu_binding",
+            sources=[
+                "examples/operators/alignment_train_cpu.cpp",
+            ],
+        ),
+    ]
+)
+if "CUDA_HOME" in os.environ:
     extensions.extend(
         [
             cpp_extension.CppExtension(
-                "fairseq.libbase",
+                "fairseq.libnat_cuda",
                 sources=[
-                    "fairseq/clib/libbase/balanced_assignment.cpp",
-                ],
-            )
-        ]
-    )
-
-    extensions.extend(
-        [
-            cpp_extension.CppExtension(
-                "fairseq.libnat",
-                sources=[
-                    "fairseq/clib/libnat/edit_dist.cpp",
+                    "fairseq/clib/libnat_cuda/edit_dist.cu",
+                    "fairseq/clib/libnat_cuda/binding.cpp",
                 ],
             ),
             cpp_extension.CppExtension(
-                "alignment_train_cpu_binding",
+                "fairseq.ngram_repeat_block_cuda",
                 sources=[
-                    "examples/operators/alignment_train_cpu.cpp",
+                    "fairseq/clib/cuda/ngram_repeat_block_cuda.cpp",
+                    "fairseq/clib/cuda/ngram_repeat_block_cuda_kernel.cu",
+                ],
+            ),
+            cpp_extension.CppExtension(
+                "alignment_train_cuda_binding",
+                sources=[
+                    "examples/operators/alignment_train_kernel.cu",
+                    "examples/operators/alignment_train_cuda.cpp",
                 ],
             ),
         ]
     )
-    if "CUDA_HOME" in os.environ:
-        extensions.extend(
-            [
-                cpp_extension.CppExtension(
-                    "fairseq.libnat_cuda",
-                    sources=[
-                        "fairseq/clib/libnat_cuda/edit_dist.cu",
-                        "fairseq/clib/libnat_cuda/binding.cpp",
-                    ],
-                ),
-                cpp_extension.CppExtension(
-                    "fairseq.ngram_repeat_block_cuda",
-                    sources=[
-                        "fairseq/clib/cuda/ngram_repeat_block_cuda.cpp",
-                        "fairseq/clib/cuda/ngram_repeat_block_cuda_kernel.cu",
-                    ],
-                ),
-                cpp_extension.CppExtension(
-                    "alignment_train_cuda_binding",
-                    sources=[
-                        "examples/operators/alignment_train_kernel.cu",
-                        "examples/operators/alignment_train_cuda.cpp",
-                    ],
-                ),
-            ]
-        )
-    cmdclass["build_ext"] = cpp_extension.BuildExtension
-
-except ImportError:
-    pass
 
+cmdclass = {"build_ext": cpp_extension.BuildExtension}
 
 if "READTHEDOCS" in os.environ:
     # don't build extensions when generating docs
@@ -190,27 +176,23 @@ def do_setup(package_data):
         ],
         long_description=readme,
         long_description_content_type="text/markdown",
-        setup_requires=[
-            "cython",
-            'numpy<1.20.0; python_version<"3.7"',
-            'numpy; python_version>="3.7"',
-            "setuptools>=18.0",
-        ],
         install_requires=[
             "cffi",
             "cython",
-            'dataclasses; python_version<"3.7"',
             "hydra-core>=1.0.7,<1.1",
             "omegaconf<2.1",
-            'numpy<1.20.0; python_version<"3.7"',
-            'numpy; python_version>="3.7"',
+            "numpy>=1.23.3",
             "regex",
             "sacrebleu>=1.4.12",
-            "torch",
+            "torch>=1.10",
             "tqdm",
             "bitarray",
             "torchaudio>=0.8.0",
         ],
+        extras_require={
+            "dev": ["flake8", "pytest", "black==22.3.0"],
+            "docs": ["sphinx", "sphinx-argparse"],
+        },
         dependency_links=dependency_links,
         packages=find_packages(
             exclude=[