Various fixes (#2127)

Summary: Pull Request resolved: https://github.com/pytorch/fairseq/pull/2127

Reviewed By: ngoyal2707

Differential Revision: D21550962

Pulled By: myleott

fbshipit-source-id: ddbe3f287f170862378e0702fc378a4fe400793a
This commit is contained in:
Myle Ott 2020-05-14 10:21:26 -07:00 committed by Facebook GitHub Bot
parent 4455f67011
commit 9a718e2985
7 changed files with 31 additions and 29 deletions

View File

@ -47,7 +47,7 @@ $ python train.py /manifest/path --save-dir /model/path --num-workers 6 --fp16 -
### Extract embeddings from the downstream task data:
```
$ PYTHONPATH /path/to/fairseq python examples/wav2vec/wav2vec_featurize.py --input /path/to/task/waves --output /path/to/output \
$ PYTHONPATH=/path/to/fairseq python examples/wav2vec/wav2vec_featurize.py --input /path/to/task/waves --output /path/to/output \
--model /model/path/checkpoint_best.pt --split train valid test
```
@ -109,6 +109,6 @@ for k-means training, set vq-type with "kmeans" and add --loss-weights [1] argum
### Tokenize audio data (e.g. for BERT training):
```
$ PYTHONPATH /path/to/fairseq python examples/wav2vec/vq-wav2vec_featurize.py --data-dir /manifest/path --output-dir /path/to/output \
$ PYTHONPATH=/path/to/fairseq python examples/wav2vec/vq-wav2vec_featurize.py --data-dir /manifest/path --output-dir /path/to/output \
--checkpoint /model/path/checkpoint_best.pt --split train valid test --extension tsv
```
```

View File

@ -8,6 +8,7 @@ from collections import Counter
from multiprocessing import Pool
import torch
from fairseq import utils
from fairseq.binarizer import safe_readline
from fairseq.data import data_utils
from fairseq.file_io import PathManager
@ -74,7 +75,10 @@ class Dictionary(object):
Can optionally remove BPE symbols or escape <unk> words.
"""
if torch.is_tensor(tensor) and tensor.dim() == 2:
return "\n".join(self.string(t, bpe_symbol, escape_unk, extra_symbols_to_ignore) for t in tensor)
return "\n".join(
self.string(t, bpe_symbol, escape_unk, extra_symbols_to_ignore)
for t in tensor
)
extra_symbols_to_ignore = set(extra_symbols_to_ignore or [])
extra_symbols_to_ignore.add(self.eos())
@ -91,7 +95,11 @@ class Dictionary(object):
if hasattr(self, "bos_index"):
extra_symbols_to_ignore.add(self.bos())
sent = " ".join(token_string(i) for i in tensor if i.item() not in extra_symbols_to_ignore)
sent = " ".join(
token_string(i)
for i in tensor
if utils.item(i) not in extra_symbols_to_ignore
)
return data_utils.process_bpe_symbol(sent, bpe_symbol)

View File

@ -198,7 +198,8 @@ def log_stop_time(key: str, weight: float = 0., prehook=None):
make sure all gpu operations are done before timer is stopped.
"""
for agg in get_active_aggregators():
agg[key].stop(weight, prehook)
if key in agg:
agg[key].stop(weight, prehook)
def log_custom(

View File

@ -575,6 +575,11 @@ class TransformerDecoder(FairseqIncrementalDecoder):
else None
)
if getattr(args, "layernorm_embedding", False):
self.layernorm_embedding = LayerNorm(embed_dim)
else:
self.layernorm_embedding = None
self.cross_self_attention = getattr(args, "cross_self_attention", False)
if self.decoder_layerdrop > 0.0:
@ -587,7 +592,10 @@ class TransformerDecoder(FairseqIncrementalDecoder):
])
self.num_layers = len(self.layers)
self.adaptive_softmax = None
if args.decoder_normalize_before and not getattr(args, "no_decoder_final_norm", False):
self.layer_norm = LayerNorm(embed_dim)
else:
self.layer_norm = None
self.project_out_dim = (
Linear(embed_dim, self.output_embed_dim, bias=False)
@ -595,6 +603,8 @@ class TransformerDecoder(FairseqIncrementalDecoder):
else None
)
self.adaptive_softmax = None
self.output_projection = None
if args.adaptive_softmax_cutoff is not None:
self.adaptive_softmax = AdaptiveSoftmax(
len(dictionary),
@ -605,19 +615,7 @@ class TransformerDecoder(FairseqIncrementalDecoder):
factor=args.adaptive_softmax_factor,
tie_proj=args.tie_adaptive_proj,
)
if args.decoder_normalize_before and not getattr(
args, "no_decoder_final_norm", False
):
self.layer_norm = LayerNorm(embed_dim)
else:
self.layer_norm = None
if getattr(args, "layernorm_embedding", False):
self.layernorm_embedding = LayerNorm(embed_dim)
else:
self.layernorm_embedding = None
if self.share_input_output_embed:
elif self.share_input_output_embed:
self.output_projection = nn.Linear(
self.embed_tokens.weight.shape[1],
self.embed_tokens.weight.shape[0],

View File

@ -96,8 +96,7 @@ class LengthConstrainedBeamSearch(Search):
min_lens = self.min_len_a * self.src_lengths + self.min_len_b
max_lens = self.max_len_a * self.src_lengths + self.max_len_b
lprobs[step < min_lens, :, self.eos] = -math.inf
lprobs[step == max_lens, :, self.eos] = 0
lprobs[step > max_lens, :, self.eos] = -math.inf
lprobs[step >= max_lens, :, self.eos] = 0
return self.beam.step(step, lprobs, scores)

View File

@ -110,5 +110,5 @@ class TranslationFromPretrainedBARTTask(TranslationTask):
for s_t in src_tokens:
s_t = torch.cat([s_t, s_t.new(1).fill_(src_lang_id)])
source_tokens.append(s_t)
dataset = LanguagePairDataset(src_tokens, src_lengths, self.source_dictionary)
dataset = LanguagePairDataset(source_tokens, src_lengths, self.source_dictionary)
return dataset

View File

@ -66,12 +66,11 @@ class TestJitLSTMModel(unittest.TestCase):
torch.jit.load(f.name)
def assertTensorEqual(self, t1, t2):
t1 = t1[~torch.isnan(t1)] # can cause size mismatch errors if there are NaNs
t2 = t2[~torch.isnan(t2)]
self.assertEqual(t1.size(), t2.size(), "size mismatch")
self.assertEqual(t1.ne(t2).long().sum(), 0)
@unittest.skipIf(
torch.__version__ < "1.6.0", "Targeting OSS scriptability for the 1.6 release"
)
def test_jit_and_export_lstm(self):
task, parser = get_dummy_task_and_parser()
LSTMModel.add_args(parser)
@ -81,9 +80,6 @@ class TestJitLSTMModel(unittest.TestCase):
scripted_model = torch.jit.script(model)
self._test_save_and_load(scripted_model)
@unittest.skipIf(
torch.__version__ < "1.6.0", "Targeting OSS scriptability for the 1.6 release"
)
def test_assert_jit_vs_nonjit_(self):
task, parser = get_dummy_task_and_parser()
LSTMModel.add_args(parser)