Simplify --user-dir and require user-dir module name to be globally unique (#2815)

Summary: This PR reverts recent changes that attempted to make `--user-dir` work with non-unique module names. But that new approach introduced other issues (e.g., poor compatibility with multiprocessing and Windows), so let's revert to the previous simpler implementation. Pull Request resolved: https://github.com/pytorch/fairseq/pull/2815 Reviewed By: alexeib Differential Revision: D24611571 Pulled By: myleott fbshipit-source-id: cecfe28395585ca0401f844f10bd0d49d014c4d8
2024-09-11 17:25:31 +03:00 · 2020-10-29 17:07:12 -07:00 · 2020-10-29 17:07:12 -07:00 · a4356b1da2
commit a4356b1da2
parent 6debe29150
33 changed files with 36 additions and 33 deletions
--- a/examples/latent_depth/README.md
+++ b/examples/latent_depth/README.md
@ -14,7 +14,7 @@ lang_pairs_str="eng-aze,eng-bel,eng-ces,eng-glg,eng-por,eng-rus,eng-slk,eng-tur"
 databin_dir=<path to binarized data>

 fairseq-train ${databin_dir} \
-  --user-dir, examples/latent_depth/src \
+  --user-dir examples/latent_depth/latent_depth_src \
  --lang-pairs "${lang_pairs_str}" \
  --arch multilingual_transformer_iwslt_de_en \
  --task multilingual_translation_latent_depth \
--- a/examples/latent_depth/latent_depth_src/init.py
+++ b/examples/latent_depth/latent_depth_src/init.py
--- a/examples/latent_depth/latent_depth_src/loss/init.py
+++ b/examples/latent_depth/latent_depth_src/loss/init.py
--- a/examples/latent_depth/latent_depth_src/loss/latent_depth.py
+++ b/examples/latent_depth/latent_depth_src/loss/latent_depth.py
--- a/examples/latent_depth/latent_depth_src/models/init.py
+++ b/examples/latent_depth/latent_depth_src/models/init.py
--- a/examples/latent_depth/latent_depth_src/models/latent_multilingual_transformer.py
+++ b/examples/latent_depth/latent_depth_src/models/latent_multilingual_transformer.py
--- a/examples/latent_depth/latent_depth_src/models/latent_transformer.py
+++ b/examples/latent_depth/latent_depth_src/models/latent_transformer.py
--- a/examples/latent_depth/latent_depth_src/modules/init.py
+++ b/examples/latent_depth/latent_depth_src/modules/init.py
--- a/examples/latent_depth/latent_depth_src/modules/latent_layers.py
+++ b/examples/latent_depth/latent_depth_src/modules/latent_layers.py
--- a/examples/latent_depth/latent_depth_src/multilingual_translation_latent_depth.py
+++ b/examples/latent_depth/latent_depth_src/multilingual_translation_latent_depth.py
--- a/examples/linformer/README.md
+++ b/examples/linformer/README.md
@ -6,7 +6,7 @@ This example contains code to train Linformer models as described in our paper
 ## Training a new Linformer RoBERTa model

 You can mostly follow the [RoBERTa pretraining README](/examples/roberta/README.pretraining.md),
-updating your training command with `--user-dir examples/linformer/src --arch linformer_roberta_base`.
+updating your training command with `--user-dir examples/linformer/linformer_src --arch linformer_roberta_base`.

 ## Citation

--- a/examples/linformer/linformer_src/init.py
+++ b/examples/linformer/linformer_src/init.py
--- a/examples/linformer/linformer_src/models/init.py
+++ b/examples/linformer/linformer_src/models/init.py
--- a/examples/linformer/linformer_src/models/linformer_roberta.py
+++ b/examples/linformer/linformer_src/models/linformer_roberta.py
--- a/examples/linformer/linformer_src/modules/init.py
+++ b/examples/linformer/linformer_src/modules/init.py
--- a/examples/linformer/linformer_src/modules/linformer_sentence_encoder.py
+++ b/examples/linformer/linformer_src/modules/linformer_sentence_encoder.py
--- a/examples/linformer/linformer_src/modules/linformer_sentence_encoder_layer.py
+++ b/examples/linformer/linformer_src/modules/linformer_sentence_encoder_layer.py
--- a/examples/linformer/linformer_src/modules/multihead_linear_attention.py
+++ b/examples/linformer/linformer_src/modules/multihead_linear_attention.py
--- a/examples/pointer_generator/README.xsum.md
+++ b/examples/pointer_generator/README.xsum.md
@ -77,7 +77,7 @@ update_freq=4
 pointer_layer=-2

 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 fairseq-train bin \
-    --user-dir examples/pointer_generator/src \
+    --user-dir examples/pointer_generator/pointer_generator_src \
    --max-tokens "$max_tokens" \
    --task translation \
    --source-lang src --target-lang tgt \
@ -125,7 +125,7 @@ max_length=60
 length_penalty=1.0

 fairseq-interactive bin \
-    --user-dir examples/pointer_generator/src \
+    --user-dir examples/pointer_generator/pointer_generator_src \
    --batch-size "$batch_size" \
    --task translation \
    --source-lang src --target-lang tgt \
--- a/examples/pointer_generator/pointer_generator_src/init.py
+++ b/examples/pointer_generator/pointer_generator_src/init.py
--- a/examples/pointer_generator/pointer_generator_src/transformer_pg.py
+++ b/examples/pointer_generator/pointer_generator_src/transformer_pg.py
--- a/examples/rxf/README.md
+++ b/examples/rxf/README.md
@ -38,7 +38,7 @@ CUDA_VISIBLE_DEVICES=0 fairseq-train RTE-bin \
    --find-unused-parameters \
    --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \
    --noise-type uniform --r3f-lambda 0.7 \
-    --user-dir examples/rxf;
+    --user-dir examples/rxf/rxf_src
 ```

 ## Citation
--- a/examples/rxf/init.py
+++ b/examples/rxf/init.py
@ -3,4 +3,4 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.

-from . import src  # noqa
+from . import rxf_src  # noqa
--- a/examples/rxf/rxf_src/init.py
+++ b/examples/rxf/rxf_src/init.py
--- a/examples/rxf/rxf_src/label_smoothed_cross_entropy_r3f.py
+++ b/examples/rxf/rxf_src/label_smoothed_cross_entropy_r3f.py
--- a/examples/rxf/rxf_src/sentence_prediction_r3f.py
+++ b/examples/rxf/rxf_src/sentence_prediction_r3f.py
--- a/examples/translation_moe/README.md
+++ b/examples/translation_moe/README.md
@ -18,7 +18,7 @@ The following command will train a `hMoElp` model with `3` experts:
 fairseq-train --ddp-backend='no_c10d' \
    data-bin/wmt17_en_de \
    --max-update 100000 \
-    --task translation_moe --user-dir examples/translation_moe/src \
+    --task translation_moe --user-dir examples/translation_moe/translation_moe_src \
    --method hMoElp --mean-pool-gating-network \
    --num-experts 3 \
    --arch transformer_wmt_en_de --share-all-embeddings \
@ -37,7 +37,7 @@ For example, to generate from expert 0:
 fairseq-generate data-bin/wmt17_en_de \
    --path checkpoints/checkpoint_best.pt \
    --beam 1 --remove-bpe \
-    --task translation_moe --user-dir examples/translation_moe/src \
+    --task translation_moe --user-dir examples/translation_moe/translation_moe_src \
    --method hMoElp --mean-pool-gating-network \
    --num-experts 3 \
    --gen-expert 0
@ -61,7 +61,7 @@ for EXPERT in $(seq 0 2); do \
        --beam 1 \
        --bpe subword_nmt --bpe-codes $BPE_CODE \
        --buffer-size 500 --max-tokens 6000 \
-        --task translation_moe --user-dir examples/translation_moe/src \
+        --task translation_moe --user-dir examples/translation_moe/translation_moe_src \
        --method hMoElp --mean-pool-gating-network \
        --num-experts 3 \
        --gen-expert $EXPERT ; \
--- a/examples/translation_moe/translation_moe_src/init.py
+++ b/examples/translation_moe/translation_moe_src/init.py
--- a/examples/translation_moe/translation_moe_src/logsumexp_moe.py
+++ b/examples/translation_moe/translation_moe_src/logsumexp_moe.py
--- a/examples/translation_moe/translation_moe_src/mean_pool_gating_network.py
+++ b/examples/translation_moe/translation_moe_src/mean_pool_gating_network.py
--- a/examples/translation_moe/translation_moe_src/translation_moe.py
+++ b/examples/translation_moe/translation_moe_src/translation_moe.py
--- a/fairseq/utils.py
+++ b/fairseq/utils.py
@ -450,18 +450,21 @@ def import_user_module(args):
                else:
                    raise FileNotFoundError(module_path)

-        # We want to import the module under a unique name so that it doesn't
-        # collide with existing modules. At the same time we don't want to
-        # import the module multiple times. The solution is to create a
-        # temporary directory and symlink the user_dir under a new name, which is
-        # a deterministic hash of the original module_path.
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            unique_mod_name = "fairseq_user_dir_{}".format(hash(module_path) % 100000)
-            os.symlink(module_path, os.path.join(tmpdirname, unique_mod_name))
+        # ensure that user modules are only imported once
+        import_user_module.memo = getattr(import_user_module, "memo", set())
+        if module_path not in import_user_module.memo:
+            import_user_module.memo.add(module_path)

-            sys.path.insert(0, tmpdirname)
-            importlib.import_module(unique_mod_name)
-            sys.path.remove(tmpdirname)
+            module_parent, module_name = os.path.split(module_path)
+            if module_name not in sys.modules:
+                sys.path.insert(0, module_parent)
+                importlib.import_module(module_name)
+            else:
+                raise ImportError(
+                    "Failed to import --user-dir={} because the corresponding module name "
+                    "({}) is not globally unique. Please rename the directory to "
+                    "something unique and try again.".format(module_path, module_name)
+                )


 def softmax(x, dim: int, onnx_trace: bool = False):
--- a/tests/test_binaries.py
+++ b/tests/test_binaries.py
@ -320,7 +320,7 @@ class TestTranslation(unittest.TestCase):
                            task="multilingual_translation_latent_depth",
                            extra_flags=[
                                "--user-dir",
-                                "examples/latent_depth/src",
+                                "examples/latent_depth/latent_depth_src",
                                "--encoder-layers",
                                "2",
                                "--decoder-layers",
@ -340,7 +340,7 @@ class TestTranslation(unittest.TestCase):
                            run_validation=True,
                            extra_valid_flags=[
                                "--user-dir",
-                                "examples/latent_depth/src",
+                                "examples/latent_depth/latent_depth_src",
                            ]
                            + enc_ll_flag
                            + dec_ll_flag,
@ -349,7 +349,7 @@ class TestTranslation(unittest.TestCase):
                            data_dir,
                            extra_flags=[
                                "--user-dir",
-                                "examples/latent_depth/src",
+                                "examples/latent_depth/latent_depth_src",
                                "--task",
                                "multilingual_translation_latent_depth",
                                "--lang-pairs",
@ -465,7 +465,7 @@ class TestTranslation(unittest.TestCase):
                    "transformer_pointer_generator",
                    extra_flags=[
                        "--user-dir",
-                        "examples/pointer_generator/src",
+                        "examples/pointer_generator/pointer_generator_src",
                        "--encoder-layers",
                        "2",
                        "--decoder-layers",
@ -482,11 +482,11 @@ class TestTranslation(unittest.TestCase):
                        "0",
                    ],
                    run_validation=True,
-                    extra_valid_flags=["--user-dir", "examples/pointer_generator/src"],
+                    extra_valid_flags=["--user-dir", "examples/pointer_generator/pointer_generator_src"],
                )
                generate_main(
                    data_dir,
-                    extra_flags=["--user-dir", "examples/pointer_generator/src"],
+                    extra_flags=["--user-dir", "examples/pointer_generator/pointer_generator_src"],
                )

    def test_lightconv(self):
@ -700,7 +700,7 @@ class TestTranslation(unittest.TestCase):
                        "--task",
                        "translation_moe",
                        "--user-dir",
-                        "examples/translation_moe/src",
+                        "examples/translation_moe/translation_moe_src",
                        "--method",
                        "hMoElp",
                        "--mean-pool-gating-network",
@ -722,7 +722,7 @@ class TestTranslation(unittest.TestCase):
                        "--task",
                        "translation_moe",
                        "--user-dir",
-                        "examples/translation_moe/src",
+                        "examples/translation_moe/translation_moe_src",
                        "--method",
                        "hMoElp",
                        "--mean-pool-gating-network",
@ -1058,7 +1058,7 @@ class TestMaskedLanguageModel(unittest.TestCase):
                    "linformer_roberta_base",
                    extra_flags=[
                        "--user-dir",
-                        "examples/linformer/src",
+                        "examples/linformer/linformer_src",
                        "--encoder-layers",
                        "2",
                    ],
@ -1075,7 +1075,7 @@ class TestMaskedLanguageModel(unittest.TestCase):
                    data_dir,
                    "linformer_roberta_base",
                    num_classes=num_classes,
-                    extra_flags=["--user-dir", "examples/linformer/src"],
+                    extra_flags=["--user-dir", "examples/linformer/linformer_src"],
                )

    def test_linformer_roberta_regression_single(self):
@ -1095,7 +1095,7 @@ class TestMaskedLanguageModel(unittest.TestCase):
                    extra_flags=[
                        "--regression-target",
                        "--user-dir",
-                        "examples/linformer/src",
+                        "examples/linformer/linformer_src",
                    ],
                )

@ -1116,7 +1116,7 @@ class TestMaskedLanguageModel(unittest.TestCase):
                    extra_flags=[
                        "--regression-target",
                        "--user-dir",
-                        "examples/linformer/src",
+                        "examples/linformer/linformer_src",
                    ],
                )

@ -1198,7 +1198,7 @@ class TestMaskedLanguageModel(unittest.TestCase):
                    num_classes=num_classes,
                    extra_flags=[
                        "--user-dir",
-                        "examples/rxf/src",
+                        "examples/rxf/rxf_src",
                        "--criterion",
                        "sentence_prediction_r3f",
                        "--spectral-norm-classification-head",