Simplify --user-dir and require user-dir module name to be globally unique (#2815)

Summary:
This PR reverts recent changes that attempted to make `--user-dir` work with non-unique module names. But that new approach introduced other issues (e.g., poor compatibility with multiprocessing and Windows), so let's revert to the previous simpler implementation.

Pull Request resolved: https://github.com/pytorch/fairseq/pull/2815

Reviewed By: alexeib

Differential Revision: D24611571

Pulled By: myleott

fbshipit-source-id: cecfe28395585ca0401f844f10bd0d49d014c4d8
This commit is contained in:
Myle Ott 2020-10-29 17:07:12 -07:00 committed by Facebook GitHub Bot
parent 6debe29150
commit a4356b1da2
33 changed files with 36 additions and 33 deletions

View File

@ -14,7 +14,7 @@ lang_pairs_str="eng-aze,eng-bel,eng-ces,eng-glg,eng-por,eng-rus,eng-slk,eng-tur"
databin_dir=<path to binarized data>
fairseq-train ${databin_dir} \
--user-dir, examples/latent_depth/src \
--user-dir examples/latent_depth/latent_depth_src \
--lang-pairs "${lang_pairs_str}" \
--arch multilingual_transformer_iwslt_de_en \
--task multilingual_translation_latent_depth \

View File

@ -6,7 +6,7 @@ This example contains code to train Linformer models as described in our paper
## Training a new Linformer RoBERTa model
You can mostly follow the [RoBERTa pretraining README](/examples/roberta/README.pretraining.md),
updating your training command with `--user-dir examples/linformer/src --arch linformer_roberta_base`.
updating your training command with `--user-dir examples/linformer/linformer_src --arch linformer_roberta_base`.
## Citation

View File

@ -77,7 +77,7 @@ update_freq=4
pointer_layer=-2
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 fairseq-train bin \
--user-dir examples/pointer_generator/src \
--user-dir examples/pointer_generator/pointer_generator_src \
--max-tokens "$max_tokens" \
--task translation \
--source-lang src --target-lang tgt \
@ -125,7 +125,7 @@ max_length=60
length_penalty=1.0
fairseq-interactive bin \
--user-dir examples/pointer_generator/src \
--user-dir examples/pointer_generator/pointer_generator_src \
--batch-size "$batch_size" \
--task translation \
--source-lang src --target-lang tgt \

View File

@ -38,7 +38,7 @@ CUDA_VISIBLE_DEVICES=0 fairseq-train RTE-bin \
--find-unused-parameters \
--best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \
--noise-type uniform --r3f-lambda 0.7 \
--user-dir examples/rxf;
--user-dir examples/rxf/rxf_src
```
## Citation

View File

@ -3,4 +3,4 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from . import src # noqa
from . import rxf_src # noqa

View File

@ -18,7 +18,7 @@ The following command will train a `hMoElp` model with `3` experts:
fairseq-train --ddp-backend='no_c10d' \
data-bin/wmt17_en_de \
--max-update 100000 \
--task translation_moe --user-dir examples/translation_moe/src \
--task translation_moe --user-dir examples/translation_moe/translation_moe_src \
--method hMoElp --mean-pool-gating-network \
--num-experts 3 \
--arch transformer_wmt_en_de --share-all-embeddings \
@ -37,7 +37,7 @@ For example, to generate from expert 0:
fairseq-generate data-bin/wmt17_en_de \
--path checkpoints/checkpoint_best.pt \
--beam 1 --remove-bpe \
--task translation_moe --user-dir examples/translation_moe/src \
--task translation_moe --user-dir examples/translation_moe/translation_moe_src \
--method hMoElp --mean-pool-gating-network \
--num-experts 3 \
--gen-expert 0
@ -61,7 +61,7 @@ for EXPERT in $(seq 0 2); do \
--beam 1 \
--bpe subword_nmt --bpe-codes $BPE_CODE \
--buffer-size 500 --max-tokens 6000 \
--task translation_moe --user-dir examples/translation_moe/src \
--task translation_moe --user-dir examples/translation_moe/translation_moe_src \
--method hMoElp --mean-pool-gating-network \
--num-experts 3 \
--gen-expert $EXPERT ; \

View File

@ -450,18 +450,21 @@ def import_user_module(args):
else:
raise FileNotFoundError(module_path)
# We want to import the module under a unique name so that it doesn't
# collide with existing modules. At the same time we don't want to
# import the module multiple times. The solution is to create a
# temporary directory and symlink the user_dir under a new name, which is
# a deterministic hash of the original module_path.
with tempfile.TemporaryDirectory() as tmpdirname:
unique_mod_name = "fairseq_user_dir_{}".format(hash(module_path) % 100000)
os.symlink(module_path, os.path.join(tmpdirname, unique_mod_name))
# ensure that user modules are only imported once
import_user_module.memo = getattr(import_user_module, "memo", set())
if module_path not in import_user_module.memo:
import_user_module.memo.add(module_path)
sys.path.insert(0, tmpdirname)
importlib.import_module(unique_mod_name)
sys.path.remove(tmpdirname)
module_parent, module_name = os.path.split(module_path)
if module_name not in sys.modules:
sys.path.insert(0, module_parent)
importlib.import_module(module_name)
else:
raise ImportError(
"Failed to import --user-dir={} because the corresponding module name "
"({}) is not globally unique. Please rename the directory to "
"something unique and try again.".format(module_path, module_name)
)
def softmax(x, dim: int, onnx_trace: bool = False):

View File

@ -320,7 +320,7 @@ class TestTranslation(unittest.TestCase):
task="multilingual_translation_latent_depth",
extra_flags=[
"--user-dir",
"examples/latent_depth/src",
"examples/latent_depth/latent_depth_src",
"--encoder-layers",
"2",
"--decoder-layers",
@ -340,7 +340,7 @@ class TestTranslation(unittest.TestCase):
run_validation=True,
extra_valid_flags=[
"--user-dir",
"examples/latent_depth/src",
"examples/latent_depth/latent_depth_src",
]
+ enc_ll_flag
+ dec_ll_flag,
@ -349,7 +349,7 @@ class TestTranslation(unittest.TestCase):
data_dir,
extra_flags=[
"--user-dir",
"examples/latent_depth/src",
"examples/latent_depth/latent_depth_src",
"--task",
"multilingual_translation_latent_depth",
"--lang-pairs",
@ -465,7 +465,7 @@ class TestTranslation(unittest.TestCase):
"transformer_pointer_generator",
extra_flags=[
"--user-dir",
"examples/pointer_generator/src",
"examples/pointer_generator/pointer_generator_src",
"--encoder-layers",
"2",
"--decoder-layers",
@ -482,11 +482,11 @@ class TestTranslation(unittest.TestCase):
"0",
],
run_validation=True,
extra_valid_flags=["--user-dir", "examples/pointer_generator/src"],
extra_valid_flags=["--user-dir", "examples/pointer_generator/pointer_generator_src"],
)
generate_main(
data_dir,
extra_flags=["--user-dir", "examples/pointer_generator/src"],
extra_flags=["--user-dir", "examples/pointer_generator/pointer_generator_src"],
)
def test_lightconv(self):
@ -700,7 +700,7 @@ class TestTranslation(unittest.TestCase):
"--task",
"translation_moe",
"--user-dir",
"examples/translation_moe/src",
"examples/translation_moe/translation_moe_src",
"--method",
"hMoElp",
"--mean-pool-gating-network",
@ -722,7 +722,7 @@ class TestTranslation(unittest.TestCase):
"--task",
"translation_moe",
"--user-dir",
"examples/translation_moe/src",
"examples/translation_moe/translation_moe_src",
"--method",
"hMoElp",
"--mean-pool-gating-network",
@ -1058,7 +1058,7 @@ class TestMaskedLanguageModel(unittest.TestCase):
"linformer_roberta_base",
extra_flags=[
"--user-dir",
"examples/linformer/src",
"examples/linformer/linformer_src",
"--encoder-layers",
"2",
],
@ -1075,7 +1075,7 @@ class TestMaskedLanguageModel(unittest.TestCase):
data_dir,
"linformer_roberta_base",
num_classes=num_classes,
extra_flags=["--user-dir", "examples/linformer/src"],
extra_flags=["--user-dir", "examples/linformer/linformer_src"],
)
def test_linformer_roberta_regression_single(self):
@ -1095,7 +1095,7 @@ class TestMaskedLanguageModel(unittest.TestCase):
extra_flags=[
"--regression-target",
"--user-dir",
"examples/linformer/src",
"examples/linformer/linformer_src",
],
)
@ -1116,7 +1116,7 @@ class TestMaskedLanguageModel(unittest.TestCase):
extra_flags=[
"--regression-target",
"--user-dir",
"examples/linformer/src",
"examples/linformer/linformer_src",
],
)
@ -1198,7 +1198,7 @@ class TestMaskedLanguageModel(unittest.TestCase):
num_classes=num_classes,
extra_flags=[
"--user-dir",
"examples/rxf/src",
"examples/rxf/rxf_src",
"--criterion",
"sentence_prediction_r3f",
"--spectral-norm-classification-head",