File: test_transformers.py

package info (click to toggle)
pytorch-text 0.14.1-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 11,560 kB
  • sloc: python: 14,197; cpp: 2,404; sh: 214; makefile: 20
file content (51 lines) | stat: -rw-r--r-- 2,073 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import torch

from ..common.parameterized_utils import nested_params
from ..common.torchtext_test_case import TorchtextTestCase


class TestTransformers(TorchtextTestCase):
    @nested_params(
        [True, False],
        [True, False],
    )
    def test_padded_input_inference(self, with_no_grad, return_all_layers):
        """test transformerencoder inference same with and without padding"""
        from torchtext.models import RobertaEncoderConf, RobertaModel

        def encoder_inference(encoder, input_lst, with_no_grad):
            if with_no_grad:
                with torch.no_grad():
                    res = [encoder(eval_input) for eval_input in input_lst]
            else:
                res = [encoder(eval_input) for eval_input in input_lst]
            return res

        # Roberta config except for less layers (2 instead of 12)
        pad_idx = 1
        encoder_conf = RobertaEncoderConf(
            vocab_size=250002,
            embedding_dim=768,
            ffn_dimension=3072,
            padding_idx=pad_idx,
            max_seq_len=514,
            num_attention_heads=12,
            num_encoder_layers=2,
            dropout=0.1,
            scaling=None,
            normalize_before=False,
        )
        model = RobertaModel(encoder_conf)
        model = model.eval()
        # TODO: make return_all_layers a property of RobertaEncoderConf so it can be passed as arg
        model.encoder.transformer.return_all_layers = return_all_layers

        # result from converting string "some text" to tensor using xlmr_base embeddings
        input_no_pad = torch.Tensor([[0, 3060, 7986, 2]]).to(torch.int)
        data_len = input_no_pad.shape[1]  # sequence length of non-pad data
        # add two padding tokens to input_no_pad
        input_pad = torch.Tensor([[0, 3060, 7986, 2, pad_idx, pad_idx]]).to(torch.int)
        input_lst = [input_no_pad, input_pad]

        output_no_pad, output_pad = encoder_inference(model, input_lst, with_no_grad)
        torch.testing.assert_close(output_no_pad, output_pad[:, :data_len, :])