1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
from parameterized import parameterized
from torchaudio._internal.module_utils import is_module_available
from torchaudio_unittest.common_utils import skipIfNoModule, TorchaudioTestCase
if is_module_available("unidecode") and is_module_available("inflect"):
from pipeline_tacotron2.text.numbers import (
_expand_decimal_point,
_expand_dollars,
_expand_number,
_expand_ordinal,
_expand_pounds,
_remove_commas,
)
from pipeline_tacotron2.text.text_preprocessing import text_to_sequence
@skipIfNoModule("unidecode")
@skipIfNoModule("inflect")
class TestTextPreprocessor(TorchaudioTestCase):
@parameterized.expand(
[
["dr. Strange?", [15, 26, 14, 31, 26, 29, 11, 30, 31, 29, 12, 25, 18, 16, 10]],
["ML, is fun.", [24, 23, 6, 11, 20, 30, 11, 17, 32, 25, 7]],
["I love torchaudio!", [20, 11, 23, 26, 33, 16, 11, 31, 26, 29, 14, 19, 12, 32, 15, 20, 26, 2]],
# 'one thousand dollars, twenty cents'
[
"$1,000.20",
[
26,
25,
16,
11,
31,
19,
26,
32,
30,
12,
25,
15,
11,
15,
26,
23,
23,
12,
29,
30,
6,
11,
31,
34,
16,
25,
31,
36,
11,
14,
16,
25,
31,
30,
],
],
]
)
def test_text_to_sequence(self, sent, seq):
assert text_to_sequence(sent) == seq
@parameterized.expand(
[
["He, she, and I have $1,000", "He, she, and I have $1000"],
]
)
def test_remove_commas(self, sent, truth):
assert _remove_commas(sent) == truth
@parameterized.expand(
[
["He, she, and I have £1000", "He, she, and I have 1000 pounds"],
]
)
def test_expand_pounds(self, sent, truth):
assert _expand_pounds(sent) == truth
@parameterized.expand(
[
["He, she, and I have $1000", "He, she, and I have 1000 dollars"],
["He, she, and I have $3000.01", "He, she, and I have 3000 dollars, 1 cent"],
[
"He has $500.20 and she has $1000.50.",
"He has 500 dollars, 20 cents and she has 1000 dollars, 50 cents.",
],
]
)
def test_expand_dollars(self, sent, truth):
assert _expand_dollars(sent) == truth
@parameterized.expand(
[
["1000.20", "1000 point 20"],
["1000.1", "1000 point 1"],
]
)
def test_expand_decimal_point(self, sent, truth):
assert _expand_decimal_point(sent) == truth
@parameterized.expand(
[
["21st centry", "twenty-first centry"],
["20th centry", "twentieth centry"],
["2nd place.", "second place."],
]
)
def test_expand_ordinal(self, sent, truth):
assert _expand_ordinal(sent) == truth
_expand_ordinal,
@parameterized.expand(
[
["100020 dollars.", "one hundred thousand twenty dollars."],
[
"1234567890!",
"one billion, two hundred thirty-four million, "
"five hundred sixty-seven thousand, eight hundred ninety!",
],
]
)
def test_expand_number(self, sent, truth):
assert _expand_number(sent) == truth
|