Package config and examples with fairseq (#1356)

Summary: Pull Request resolved: https://github.com/fairinternal/fairseq-py/pull/1356 Reviewed By: alexeib Differential Revision: D24385688 Pulled By: myleott fbshipit-source-id: 72c4a702d93d2854a6409d42913d7413207cb61e
2024-08-17 04:20:36 +03:00 · 2020-10-19 09:22:28 -07:00 · 2020-10-19 09:22:28 -07:00 · 9b8b464070
commit 9b8b464070
parent e3168f74a8
4 changed files with 94 additions and 51 deletions
--- a/examples/linformer/README.md
+++ b/examples/linformer/README.md
@ -6,7 +6,7 @@ This example contains code to train Linformer models as described in our paper
 ## Training a new Linformer RoBERTa model

 You can mostly follow the [RoBERTa pretraining README](/examples/roberta/README.pretraining.md),
-but replace the architecture with `--arch linformer_roberta_base` in your training command.
+updating your training command with `--user-dir examples/linformer/src --arch linformer_roberta_base`.

 ## Citation

--- a/fairseq/utils.py
+++ b/fairseq/utils.py
@ -433,11 +433,17 @@ def import_user_module(args):
    if module_path is not None:
        module_path = os.path.abspath(args.user_dir)
        if not os.path.exists(module_path):
-            fairseq_rel_path = os.path.join(
-                os.path.dirname(__file__), "..", args.user_dir
-            )
+            fairseq_rel_path = os.path.join(os.path.dirname(__file__), args.user_dir)
            if os.path.exists(fairseq_rel_path):
                module_path = fairseq_rel_path
+            else:
+                fairseq_rel_path = os.path.join(
+                    os.path.dirname(__file__), "..", args.user_dir
+                )
+                if os.path.exists(fairseq_rel_path):
+                    module_path = fairseq_rel_path
+                else:
+                    raise FileNotFoundError(module_path)

        # We want to import the module under a unique name so that it doesn't
        # collide with existing modules. At the same time we don't want to
--- a/setup.py
+++ b/setup.py
@ -127,51 +127,86 @@ if "clean" in sys.argv[1:]:
    )


-setup(
-    name="fairseq",
-    version="0.9.0",
-    description="Facebook AI Research Sequence-to-Sequence Toolkit",
-    url="https://github.com/pytorch/fairseq",
-    classifiers=[
-        "Intended Audience :: Science/Research",
-        "License :: OSI Approved :: MIT License",
-        "Programming Language :: Python :: 3.6",
-        "Topic :: Scientific/Engineering :: Artificial Intelligence",
-    ],
-    long_description=readme,
-    long_description_content_type="text/markdown",
-    setup_requires=[
-        "cython",
-        "numpy",
-        "setuptools>=18.0",
-    ],
-    install_requires=[
-        "cffi",
-        "cython",
-        "dataclasses",
-        "editdistance",
-        "hydra-core",
-        "numpy",
-        "regex",
-        "sacrebleu>=1.4.12",
-        "torch",
-        "tqdm",
-    ],
-    dependency_links=dependency_links,
-    packages=find_packages(exclude=["scripts", "tests"]),
-    ext_modules=extensions,
-    test_suite="tests",
-    entry_points={
-        "console_scripts": [
-            "fairseq-eval-lm = fairseq_cli.eval_lm:cli_main",
-            "fairseq-generate = fairseq_cli.generate:cli_main",
-            "fairseq-interactive = fairseq_cli.interactive:cli_main",
-            "fairseq-preprocess = fairseq_cli.preprocess:cli_main",
-            "fairseq-score = fairseq_cli.score:cli_main",
-            "fairseq-train = fairseq_cli.train:cli_main",
-            "fairseq-validate = fairseq_cli.validate:cli_main",
+def do_setup(package_data):
+    setup(
+        name="fairseq",
+        version="0.9.0",
+        description="Facebook AI Research Sequence-to-Sequence Toolkit",
+        url="https://github.com/pytorch/fairseq",
+        classifiers=[
+            "Intended Audience :: Science/Research",
+            "License :: OSI Approved :: MIT License",
+            "Programming Language :: Python :: 3.6",
+            "Topic :: Scientific/Engineering :: Artificial Intelligence",
        ],
-    },
-    cmdclass=cmdclass,
-    zip_safe=False,
-)
+        long_description=readme,
+        long_description_content_type="text/markdown",
+        setup_requires=[
+            "cython",
+            "numpy",
+            "setuptools>=18.0",
+        ],
+        install_requires=[
+            "cffi",
+            "cython",
+            "dataclasses",
+            "editdistance",
+            "hydra-core",
+            "numpy",
+            "regex",
+            "sacrebleu>=1.4.12",
+            "torch",
+            "tqdm",
+        ],
+        dependency_links=dependency_links,
+        packages=find_packages(
+            exclude=[
+                "examples",
+                "examples.*",
+                "scripts",
+                "scripts.*",
+                "tests",
+                "tests.*",
+            ]
+        ),
+        package_data=package_data,
+        ext_modules=extensions,
+        test_suite="tests",
+        entry_points={
+            "console_scripts": [
+                "fairseq-eval-lm = fairseq_cli.eval_lm:cli_main",
+                "fairseq-generate = fairseq_cli.generate:cli_main",
+                "fairseq-interactive = fairseq_cli.interactive:cli_main",
+                "fairseq-preprocess = fairseq_cli.preprocess:cli_main",
+                "fairseq-score = fairseq_cli.score:cli_main",
+                "fairseq-train = fairseq_cli.train:cli_main",
+                "fairseq-validate = fairseq_cli.validate:cli_main",
+            ],
+        },
+        cmdclass=cmdclass,
+        zip_safe=False,
+    )
+
+
+def get_files(path, relative_to="fairseq"):
+    all_files = []
+    for root, _dirs, files in os.walk(path, followlinks=True):
+        root = os.path.relpath(root, relative_to)
+        for file in files:
+            if file.endswith(".pyc"):
+                continue
+            all_files.append(os.path.join(root, file))
+    return all_files
+
+
+try:
+    # symlink config and examples into fairseq package so package_data accepts them
+    os.symlink(os.path.join("..", "config"), "fairseq/config")
+    os.symlink(os.path.join("..", "examples"), "fairseq/examples")
+    package_data = {
+        "fairseq": get_files("fairseq/config") + get_files("fairseq/examples"),
+    }
+    do_setup(package_data)
+finally:
+    os.unlink("fairseq/config")
+    os.unlink("fairseq/examples")
--- a/tests/test_binaries.py
+++ b/tests/test_binaries.py
@ -7,6 +7,7 @@ import contextlib
 import logging
 import os
 import random
+import sys
 import tempfile
 import unittest
 from io import StringIO
@ -294,6 +295,7 @@ class TestTranslation(unittest.TestCase):
                            + dec_ltok_flag,
                        )

+    @unittest.skipIf(sys.platform.lower() == "darwin", "skip latent depth test on MacOS")
    def test_multilingual_translation_latent_depth(self):
        # test with latent depth in encoder, decoder, or both
        encoder_latent_layer = [[], ["--encoder-latent-layer"]]