diff --git a/docs/getting_started.rst b/docs/getting_started.rst
index 84b6f7d8d..c76534163 100644
--- a/docs/getting_started.rst
+++ b/docs/getting_started.rst
@@ -158,14 +158,6 @@ Fairseq supports FP16 training with the ``--fp16`` flag:
 
     > fairseq-train --fp16 (...)
 
-Lazily loading large training datasets
---------------------------------------
-
-By default fairseq loads the entire training set into system memory. For large
-datasets, the ``--lazy-load`` option can be used to instead load batches on-demand.
-For optimal performance, use the ``--num-workers`` option to control the number
-of background processes that will load batches.
-
 Distributed training
 --------------------
 
diff --git a/examples/speech_recognition/infer.py b/examples/speech_recognition/infer.py
index 909400b58..df144ab3a 100644
--- a/examples/speech_recognition/infer.py
+++ b/examples/speech_recognition/infer.py
@@ -63,8 +63,8 @@ def check_args(args):
         not args.sampling or args.nbest == args.beam
     ), "--sampling requires --nbest to be equal to --beam"
     assert (
-        args.replace_unk is None or args.raw_text
-    ), "--replace-unk requires a raw text dataset (--raw-text)"
+        args.replace_unk is None or args.dataset_impl == "raw"
+    ), "--replace-unk requires a raw text dataset (--dataset-impl=raw)"
 
 
 def get_dataset_itr(args, task):
diff --git a/fairseq/checkpoint_utils.py b/fairseq/checkpoint_utils.py
index 2ac97d63d..51edb628f 100644
--- a/fairseq/checkpoint_utils.py
+++ b/fairseq/checkpoint_utils.py
@@ -326,6 +326,11 @@ def _upgrade_state_dict(state):
     # default to translation task
     if not hasattr(state["args"], "task"):
         state["args"].task = "translation"
+    # --raw-text and --lazy-load are deprecated
+    if getattr(state["args"], "raw_text", False):
+        state["args"].dataset_impl = "raw"
+    elif getattr(state["args"], "lazy_load", False):
+        state["args"].dataset_impl = "lazy"
 
     # set any missing default values in the task, model or other registries
     registry.set_defaults(state["args"], tasks.TASK_REGISTRY[state["args"].task])
diff --git a/fairseq/tasks/cross_lingual_lm.py b/fairseq/tasks/cross_lingual_lm.py
index 3cb92f8b1..5ef68ce69 100644
--- a/fairseq/tasks/cross_lingual_lm.py
+++ b/fairseq/tasks/cross_lingual_lm.py
@@ -46,10 +46,6 @@ class CrossLingualLMTask(FairseqTask):
         parser.add_argument('--monolingual-langs', default='en', type=str,
                             help='comma separated list of languages for which we'
                                  ' want to train XLM on')
-        parser.add_argument('--raw-text', default=False, action='store_true',
-                            help='load raw text dataset')
-        parser.add_argument('--lazy-load', action='store_true',
-                            help='load the dataset lazily')
         parser.add_argument('--shuffle', action='store_true',
                             help='shuffle each monolingual dataset while'
                             ' training')
diff --git a/fairseq/tasks/denoising.py b/fairseq/tasks/denoising.py
index 10d0ba5cd..054f30353 100644
--- a/fairseq/tasks/denoising.py
+++ b/fairseq/tasks/denoising.py
@@ -32,8 +32,6 @@ class DenoisingTask(FairseqTask):
         parser.add_argument('--tokens-per-sample', default=512, type=int,
                             help='max number of total tokens over all segments'
                                  ' per sample for dataset')
-        parser.add_argument('--raw-text', default=False, action='store_true',
-                            help='load raw text dataset')
         parser.add_argument(
             '--sample-break-mode', default="complete_doc", type=str,
             help='mode for breaking sentence',
diff --git a/fairseq/tasks/language_modeling.py b/fairseq/tasks/language_modeling.py
index 1a1af352c..e69bf38fa 100644
--- a/fairseq/tasks/language_modeling.py
+++ b/fairseq/tasks/language_modeling.py
@@ -63,10 +63,6 @@ class LanguageModelingTask(FairseqTask):
                                  'If set to "eos", includes only one sentence per sample.')
         parser.add_argument('--tokens-per-sample', default=1024, type=int,
                             help='max number of tokens per sample for LM dataset')
-        parser.add_argument('--lazy-load', action='store_true',
-                            help='load the dataset lazily')
-        parser.add_argument('--raw-text', default=False, action='store_true',
-                            help='load raw text dataset')
         parser.add_argument('--output-dictionary-size', default=-1, type=int,
                             help='limit the size of output dictionary')
         parser.add_argument('--self-target', action='store_true',
@@ -97,17 +93,6 @@ class LanguageModelingTask(FairseqTask):
         Args:
             args (argparse.Namespace): parsed command-line arguments
         """
-        if getattr(args, "raw_text", False):
-            utils.deprecation_warning(
-                "--raw-text is deprecated, please use --dataset-impl=raw"
-            )
-            args.dataset_impl = "raw"
-        elif getattr(args, "lazy_load", False):
-            utils.deprecation_warning(
-                "--lazy-load is deprecated, please use --dataset-impl=lazy"
-            )
-            args.dataset_impl = "lazy"
-
         dictionary = None
         output_dictionary = None
         if args.data:
diff --git a/fairseq/tasks/multilingual_translation.py b/fairseq/tasks/multilingual_translation.py
index f0086436e..420911f10 100644
--- a/fairseq/tasks/multilingual_translation.py
+++ b/fairseq/tasks/multilingual_translation.py
@@ -71,10 +71,6 @@ class MultilingualTranslationTask(FairseqTask):
                             help='source language (only needed for inference)')
         parser.add_argument('-t', '--target-lang', default=None, metavar='TARGET',
                             help='target language (only needed for inference)')
-        parser.add_argument('--lazy-load', action='store_true',
-                            help='load the dataset lazily')
-        parser.add_argument('--raw-text', default=False, action='store_true',
-                            help='load raw text dataset')
         parser.add_argument('--left-pad-source', default='True', type=str, metavar='BOOL',
                             help='pad the source on the left (default: True)')
         parser.add_argument('--left-pad-target', default='False', type=str, metavar='BOOL',
@@ -123,12 +119,6 @@ class MultilingualTranslationTask(FairseqTask):
     def prepare(cls, args, **kargs):
         args.left_pad_source = options.eval_bool(args.left_pad_source)
         args.left_pad_target = options.eval_bool(args.left_pad_target)
-        if getattr(args, 'raw_text', False):
-            utils.deprecation_warning('--raw-text is deprecated, please use --dataset-impl=raw')
-            args.dataset_impl = 'raw'
-        elif getattr(args, 'lazy_load', False):
-            utils.deprecation_warning('--lazy-load is deprecated, please use --dataset-impl=lazy')
-            args.dataset_impl = 'lazy'
 
         if args.lang_pairs is None:
             raise ValueError('--lang-pairs is required. List all the language pairs in the training objective.')
diff --git a/fairseq/tasks/semisupervised_translation.py b/fairseq/tasks/semisupervised_translation.py
index 0f0f1d587..e8bbe2b50 100644
--- a/fairseq/tasks/semisupervised_translation.py
+++ b/fairseq/tasks/semisupervised_translation.py
@@ -8,6 +8,8 @@ import os
 
 from fairseq.data import (
     BacktranslationDataset,
+    data_utils,
+    indexed_dataset,
     IndexedCachedDataset,
     IndexedDataset,
     IndexedRawTextDataset,
@@ -143,21 +145,10 @@ class SemisupervisedTranslationTask(MultilingualTranslationTask):
                 filename = os.path.join(data_path, '{}.{}-{}.{}'.format(split, src, tgt, lang))
             else:
                 filename = os.path.join(data_path, '{}.{}-None.{}'.format(split, src, tgt))
-            if self.args.raw_text and IndexedRawTextDataset.exists(filename):
-                return True
-            elif not self.args.raw_text and IndexedDataset.exists(filename):
-                return True
-            return False
+            return indexed_dataset.dataset_exists(filename, impl=self.args.dataset_impl)
 
-        def indexed_dataset(path, dictionary):
-            if self.args.raw_text:
-                return IndexedRawTextDataset(path, dictionary)
-            elif IndexedDataset.exists(path):
-                if self.args.lazy_load:
-                    return IndexedDataset(path, fix_lua_indexing=True)
-                else:
-                    return IndexedCachedDataset(path, fix_lua_indexing=True)
-            return None
+        def load_indexed_dataset(path, dictionary):
+            return data_utils.load_indexed_dataset(path, dictionary, self.args.dataset_impl)
 
         # load parallel datasets
         src_datasets, tgt_datasets = {}, {}
@@ -170,8 +161,8 @@ class SemisupervisedTranslationTask(MultilingualTranslationTask):
                     prefix = os.path.join(data_path, '{}.{}-{}.'.format(split, tgt, src))
                 else:
                     continue
-                src_datasets[lang_pair] = indexed_dataset(prefix + src, self.dicts[src])
-                tgt_datasets[lang_pair] = indexed_dataset(prefix + tgt, self.dicts[tgt])
+                src_datasets[lang_pair] = load_indexed_dataset(prefix + src, self.dicts[src])
+                tgt_datasets[lang_pair] = load_indexed_dataset(prefix + tgt, self.dicts[tgt])
                 print('| parallel-{} {} {} examples'.format(data_path, split, len(src_datasets[lang_pair])))
             if len(src_datasets) == 0:
                 raise FileNotFoundError('Dataset not found: {} ({})'.format(split, data_path))
@@ -184,7 +175,7 @@ class SemisupervisedTranslationTask(MultilingualTranslationTask):
                 if not split_exists(split, tgt, None, tgt):
                     raise FileNotFoundError('Dataset not found: backtranslation {} ({})'.format(split, data_path))
                 filename = os.path.join(data_path, '{}.{}-None.{}'.format(split, tgt, tgt))
-                dataset = indexed_dataset(filename, self.dicts[tgt])
+                dataset = load_indexed_dataset(filename, self.dicts[tgt])
                 lang_pair_dataset_tgt = LanguagePairDataset(
                     dataset,
                     dataset.sizes,
@@ -232,8 +223,8 @@ class SemisupervisedTranslationTask(MultilingualTranslationTask):
                 if not split_exists(split, tgt, None, tgt):
                     continue
                 filename = os.path.join(data_path, '{}.{}-None.{}'.format(split, tgt, tgt))
-                tgt_dataset1 = indexed_dataset(filename, self.dicts[tgt])
-                tgt_dataset2 = indexed_dataset(filename, self.dicts[tgt])
+                tgt_dataset1 = load_indexed_dataset(filename, self.dicts[tgt])
+                tgt_dataset2 = load_indexed_dataset(filename, self.dicts[tgt])
                 noising_dataset = NoisingDataset(
                     tgt_dataset1,
                     self.dicts[tgt],
diff --git a/fairseq/tasks/translation.py b/fairseq/tasks/translation.py
index 3b165232e..1171109ae 100644
--- a/fairseq/tasks/translation.py
+++ b/fairseq/tasks/translation.py
@@ -134,10 +134,6 @@ class TranslationTask(FairseqTask):
                             help='source language')
         parser.add_argument('-t', '--target-lang', default=None, metavar='TARGET',
                             help='target language')
-        parser.add_argument('--lazy-load', action='store_true',
-                            help='load the dataset lazily')
-        parser.add_argument('--raw-text', action='store_true',
-                            help='load raw text dataset')
         parser.add_argument('--load-alignments', action='store_true',
                             help='load the binarized alignments')
         parser.add_argument('--left-pad-source', default='True', type=str, metavar='BOOL',
@@ -168,12 +164,6 @@ class TranslationTask(FairseqTask):
         """
         args.left_pad_source = options.eval_bool(args.left_pad_source)
         args.left_pad_target = options.eval_bool(args.left_pad_target)
-        if getattr(args, 'raw_text', False):
-            utils.deprecation_warning('--raw-text is deprecated, please use --dataset-impl=raw')
-            args.dataset_impl = 'raw'
-        elif getattr(args, 'lazy_load', False):
-            utils.deprecation_warning('--lazy-load is deprecated, please use --dataset-impl=lazy')
-            args.dataset_impl = 'lazy'
 
         paths = args.data.split(os.pathsep)
         assert len(paths) > 0
diff --git a/generate.py b/generate.py
index 069e2bd1f..201171fa9 100644
--- a/generate.py
+++ b/generate.py
@@ -17,8 +17,8 @@ def main(args):
     assert args.path is not None, '--path required for generation!'
     assert not args.sampling or args.nbest == args.beam, \
         '--sampling requires --nbest to be equal to --beam'
-    assert args.replace_unk is None or args.raw_text, \
-        '--replace-unk requires a raw text dataset (--raw-text)'
+    assert args.replace_unk is None or args.dataset_impl == 'raw', \
+        '--replace-unk requires a raw text dataset (--dataset-impl=raw)'
 
     utils.import_user_module(args)