File: librispeech_test.py

package info (click to toggle)
pytorch-audio 0.7.2-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 5,512 kB
  • sloc: python: 15,606; cpp: 1,352; sh: 257; makefile: 21
file content (112 lines) | stat: -rw-r--r-- 3,545 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os

from torchaudio.datasets import librispeech

from torchaudio_unittest.common_utils import (
    TempDirMixin,
    TorchaudioTestCase,
    get_whitenoise,
    save_wav,
    normalize_wav,
)

# Used to generate a unique utterance for each dummy audio file
NUMBERS = [
    'ZERO',
    'ONE',
    'TWO',
    'THREE',
    'FOUR',
    'FIVE',
    'SIX',
    'SEVEN',
    'EIGHT',
    'NINE'
]


class TestLibriSpeech(TempDirMixin, TorchaudioTestCase):
    backend = 'default'

    root_dir = None
    samples = []

    @classmethod
    def setUpClass(cls):
        cls.root_dir = cls.get_base_temp_dir()
        dataset_dir = os.path.join(
            cls.root_dir, librispeech.FOLDER_IN_ARCHIVE, librispeech.URL
        )
        os.makedirs(dataset_dir, exist_ok=True)
        sample_rate = 16000  # 16kHz
        seed = 0

        for speaker_id in range(5):
            speaker_path = os.path.join(dataset_dir, str(speaker_id))
            os.makedirs(speaker_path, exist_ok=True)

            for chapter_id in range(3):
                chapter_path = os.path.join(speaker_path, str(chapter_id))
                os.makedirs(chapter_path, exist_ok=True)
                trans_content = []

                for utterance_id in range(10):
                    filename = f'{speaker_id}-{chapter_id}-{utterance_id:04d}.wav'
                    path = os.path.join(chapter_path, filename)

                    utterance = ' '.join(
                        [NUMBERS[x] for x in [speaker_id, chapter_id, utterance_id]]
                    )
                    trans_content.append(
                        f'{speaker_id}-{chapter_id}-{utterance_id:04d} {utterance}'
                    )

                    data = get_whitenoise(
                        sample_rate=sample_rate,
                        duration=0.01,
                        n_channels=1,
                        dtype='float32',
                        seed=seed
                    )
                    save_wav(path, data, sample_rate)
                    sample = (
                        normalize_wav(data),
                        sample_rate,
                        utterance,
                        speaker_id,
                        chapter_id,
                        utterance_id
                    )
                    cls.samples.append(sample)

                    seed += 1

                trans_filename = f'{speaker_id}-{chapter_id}.trans.txt'
                trans_path = os.path.join(chapter_path, trans_filename)
                with open(trans_path, 'w') as f:
                    f.write('\n'.join(trans_content))

    @classmethod
    def tearDownClass(cls):
        # In case of test failure
        librispeech.LIBRISPEECH._ext_audio = '.flac'

    def test_librispeech(self):
        librispeech.LIBRISPEECH._ext_audio = '.wav'
        dataset = librispeech.LIBRISPEECH(self.root_dir)
        print(dataset._path)

        num_samples = 0
        for i, (
            data, sample_rate, utterance, speaker_id, chapter_id, utterance_id
        ) in enumerate(dataset):
            self.assertEqual(data, self.samples[i][0], atol=5e-5, rtol=1e-8)
            assert sample_rate == self.samples[i][1]
            assert utterance == self.samples[i][2]
            assert speaker_id == self.samples[i][3]
            assert chapter_id == self.samples[i][4]
            assert utterance_id == self.samples[i][5]
            num_samples += 1

        assert num_samples == len(self.samples)
        librispeech.LIBRISPEECH._ext_audio = '.flac'