mirror of
https://github.com/facebookresearch/fairseq.git
synced 2024-10-26 17:32:57 +03:00
a48f235636
Summary: Pull Request resolved: https://github.com/fairinternal/fairseq-py/pull/1357 Reviewed By: alexeib Differential Revision: D24377772 fbshipit-source-id: 51581af041d42d62166b33a35a1a4228b1a76f0c
49 lines
1.6 KiB
Python
49 lines
1.6 KiB
Python
# Copyright (c) Facebook, Inc. and its affiliates.
|
|
#
|
|
# This source code is licensed under the MIT license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
import unittest
|
|
|
|
import torch
|
|
from fairseq.data import Dictionary
|
|
from fairseq.modules import CharacterTokenEmbedder
|
|
|
|
|
|
class TestCharacterTokenEmbedder(unittest.TestCase):
|
|
def test_character_token_embedder(self):
|
|
vocab = Dictionary()
|
|
vocab.add_symbol("hello")
|
|
vocab.add_symbol("there")
|
|
|
|
embedder = CharacterTokenEmbedder(
|
|
vocab, [(2, 16), (4, 32), (8, 64), (16, 2)], 64, 5, 2
|
|
)
|
|
|
|
test_sents = [["hello", "unk", "there"], ["there"], ["hello", "there"]]
|
|
max_len = max(len(s) for s in test_sents)
|
|
input = torch.LongTensor(len(test_sents), max_len + 2).fill_(vocab.pad())
|
|
for i in range(len(test_sents)):
|
|
input[i][0] = vocab.eos()
|
|
for j in range(len(test_sents[i])):
|
|
input[i][j + 1] = vocab.index(test_sents[i][j])
|
|
input[i][j + 2] = vocab.eos()
|
|
embs = embedder(input)
|
|
|
|
assert embs.size() == (len(test_sents), max_len + 2, 5)
|
|
self.assertAlmostEqual(embs[0][0], embs[1][0])
|
|
self.assertAlmostEqual(embs[0][0], embs[0][-1])
|
|
self.assertAlmostEqual(embs[0][1], embs[2][1])
|
|
self.assertAlmostEqual(embs[0][3], embs[1][1])
|
|
|
|
embs.sum().backward()
|
|
assert embedder.char_embeddings.weight.grad is not None
|
|
|
|
def assertAlmostEqual(self, t1, t2):
|
|
self.assertEqual(t1.size(), t2.size(), "size mismatch")
|
|
self.assertLess((t1 - t2).abs().max(), 1e-6)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|