File: test_text_preprocessing.py

package info (click to toggle)
pytorch-audio 2.6.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 10,696 kB
  • sloc: python: 61,274; cpp: 10,031; sh: 128; ansic: 70; makefile: 34
file content (137 lines) | stat: -rw-r--r-- 3,957 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from parameterized import parameterized
from torchaudio._internal.module_utils import is_module_available
from torchaudio_unittest.common_utils import skipIfNoModule, TorchaudioTestCase

if is_module_available("unidecode") and is_module_available("inflect"):
    from pipeline_tacotron2.text.numbers import (
        _expand_decimal_point,
        _expand_dollars,
        _expand_number,
        _expand_ordinal,
        _expand_pounds,
        _remove_commas,
    )
    from pipeline_tacotron2.text.text_preprocessing import text_to_sequence


@skipIfNoModule("unidecode")
@skipIfNoModule("inflect")
class TestTextPreprocessor(TorchaudioTestCase):
    @parameterized.expand(
        [
            ["dr.  Strange?", [15, 26, 14, 31, 26, 29, 11, 30, 31, 29, 12, 25, 18, 16, 10]],
            ["ML, is        fun.", [24, 23, 6, 11, 20, 30, 11, 17, 32, 25, 7]],
            ["I love torchaudio!", [20, 11, 23, 26, 33, 16, 11, 31, 26, 29, 14, 19, 12, 32, 15, 20, 26, 2]],
            # 'one thousand dollars, twenty cents'
            [
                "$1,000.20",
                [
                    26,
                    25,
                    16,
                    11,
                    31,
                    19,
                    26,
                    32,
                    30,
                    12,
                    25,
                    15,
                    11,
                    15,
                    26,
                    23,
                    23,
                    12,
                    29,
                    30,
                    6,
                    11,
                    31,
                    34,
                    16,
                    25,
                    31,
                    36,
                    11,
                    14,
                    16,
                    25,
                    31,
                    30,
                ],
            ],
        ]
    )
    def test_text_to_sequence(self, sent, seq):

        assert text_to_sequence(sent) == seq

    @parameterized.expand(
        [
            ["He, she, and I have $1,000", "He, she, and I have $1000"],
        ]
    )
    def test_remove_commas(self, sent, truth):

        assert _remove_commas(sent) == truth

    @parameterized.expand(
        [
            ["He, she, and I have £1000", "He, she, and I have 1000 pounds"],
        ]
    )
    def test_expand_pounds(self, sent, truth):

        assert _expand_pounds(sent) == truth

    @parameterized.expand(
        [
            ["He, she, and I have $1000", "He, she, and I have 1000 dollars"],
            ["He, she, and I have $3000.01", "He, she, and I have 3000 dollars, 1 cent"],
            [
                "He has $500.20 and she has $1000.50.",
                "He has 500 dollars, 20 cents and she has 1000 dollars, 50 cents.",
            ],
        ]
    )
    def test_expand_dollars(self, sent, truth):

        assert _expand_dollars(sent) == truth

    @parameterized.expand(
        [
            ["1000.20", "1000 point 20"],
            ["1000.1", "1000 point 1"],
        ]
    )
    def test_expand_decimal_point(self, sent, truth):

        assert _expand_decimal_point(sent) == truth

    @parameterized.expand(
        [
            ["21st centry", "twenty-first centry"],
            ["20th centry", "twentieth centry"],
            ["2nd place.", "second place."],
        ]
    )
    def test_expand_ordinal(self, sent, truth):

        assert _expand_ordinal(sent) == truth
        _expand_ordinal,

    @parameterized.expand(
        [
            ["100020 dollars.", "one hundred thousand twenty dollars."],
            [
                "1234567890!",
                "one billion, two hundred thirty-four million, "
                "five hundred sixty-seven thousand, eight hundred ninety!",
            ],
        ]
    )
    def test_expand_number(self, sent, truth):

        assert _expand_number(sent) == truth