diff --git a/docs/hydra_integration.md b/docs/hydra_integration.md
index 8e4082cb2..04c797fe5 100644
--- a/docs/hydra_integration.md
+++ b/docs/hydra_integration.md
@@ -211,7 +211,7 @@ works for migrated tasks and models.
 
 ```shell script
 $ fairseq-hydra-train \
-    --config-path /path/to/external/configs \
+    --config-dir /path/to/external/configs \
     --config-name wiki103
 ```
 
diff --git a/examples/wav2vec/README.md b/examples/wav2vec/README.md
index fdbf844ec..10d231ed6 100644
--- a/examples/wav2vec/README.md
+++ b/examples/wav2vec/README.md
@@ -58,12 +58,12 @@ Note that the input is expected to be single channel, sampled at 16 kHz
 ```shell script
 $ fairseq-hydra-train \
     task.data=/path/to/data \
-    --config-path /path/to/fairseq-py/examples/wav2vec/config/pretraining \
+    --config-dir /path/to/fairseq-py/examples/wav2vec/config/pretraining \
     --config-name wav2vec2_base_librispeech
 ```
 
-Note: you can simulate 64 GPUs by using k GPUs and adding command line parameters (before --config-path) 
-`distributed_training.distributed_world_size=k` `+optimization.update_freq='[x]'` where x = 64/k 
+Note: you can simulate 64 GPUs by using k GPUs and adding command line parameters (before `--config-dir`)
+`distributed_training.distributed_world_size=k` `+optimization.update_freq='[x]'` where x = 64/k
 
 ### Train a wav2vec 2.0 large model:
 
@@ -72,12 +72,12 @@ This configuration was used for the large model trained on the Libri-light datas
 ```shell script
 $ fairseq-hydra-train \
     task.data=/path/to/data \
-    --config-path /path/to/fairseq-py/examples/wav2vec/config/pretraining \
+    --config-dir /path/to/fairseq-py/examples/wav2vec/config/pretraining \
     --config-name wav2vec2_large_librivox
 ```
 
-Note: you can simulate 128 GPUs by using k GPUs and adding command line parameters (before --config-path) 
-`distributed_training.distributed_world_size=k` `+optimization.update_freq='[x]'` where x = 128/k 
+Note: you can simulate 128 GPUs by using k GPUs and adding command line parameters (before `--config-dir`)
+`distributed_training.distributed_world_size=k` `+optimization.update_freq='[x]'` where x = 128/k
 
 ### Fine-tune a pre-trained model with CTC:
 
@@ -96,14 +96,14 @@ $ fairseq-hydra-train \
     distributed_training.distributed_port=$PORT \
     task.data=/path/to/data \
     model.w2v_path=/path/to/model.pt \
-    --config-path /path/to/fairseq-py/examples/wav2vec/config/finetuning \
+    --config-dir /path/to/fairseq-py/examples/wav2vec/config/finetuning \
     --config-name base_100h
 ```
 
 There are other config files in the config/finetuning directory that can be used to fine-tune on other splits.
-You can specify the right config via the --config-name parameter. 
+You can specify the right config via the `--config-name` parameter.
 
-Note: you can simulate 24 GPUs by using k GPUs and adding command line parameters (before --config-path) 
+Note: you can simulate 24 GPUs by using k GPUs and adding command line parameters (before `--config-dir`)
 `distributed_training.distributed_world_size=k` `+optimization.update_freq='[x]'` where x = 24/k
 
 Decoding with a language model during training requires wav2letter [python bindings](https://github.com/facebookresearch/wav2letter/wiki/Building-Python-bindings).
diff --git a/examples/wav2vec/wav2vec_manifest.py b/examples/wav2vec/wav2vec_manifest.py
index 1d27f58af..541708455 100644
--- a/examples/wav2vec/wav2vec_manifest.py
+++ b/examples/wav2vec/wav2vec_manifest.py
@@ -47,6 +47,9 @@ def get_parser():
 def main(args):
     assert args.valid_percent >= 0 and args.valid_percent <= 1.0
 
+    if not os.path.exists(args.dest):
+        os.makedirs(args.dest)
+
     dir_path = os.path.realpath(args.root)
     search_path = os.path.join(dir_path, "**/*." + args.ext)
     rand = random.Random(args.seed)
diff --git a/fairseq/config/__init__.py b/fairseq/config/__init__.py
new file mode 100644
index 000000000..626423691
--- /dev/null
+++ b/fairseq/config/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/fairseq/config/config.yaml b/fairseq/config/config.yaml
index 9621baa5e..e20d914b9 100644
--- a/fairseq/config/config.yaml
+++ b/fairseq/config/config.yaml
@@ -1,4 +1,9 @@
 # @package _group_
+
+hydra:
+  run:
+    dir: .
+
 defaults:
     - task: null
     - model: null
diff --git a/fairseq/dataclass/utils.py b/fairseq/dataclass/utils.py
index 9bf4f7d09..beae592d1 100644
--- a/fairseq/dataclass/utils.py
+++ b/fairseq/dataclass/utils.py
@@ -218,8 +218,7 @@ def _override_attr(
             isinstance(val, str)
             and not val.startswith("${")  # not interpolation
             and field_type != str
-            and inspect.isclass(field_type)
-            and not issubclass(field_type, Enum)  # not choices enum
+            and (not inspect.isclass(field_type) or not issubclass(field_type, Enum))  # not choices enum
         ):
             # upgrade old models that stored complex parameters as string
             val = ast.literal_eval(val)