File: sox_compatibility_test.py

package info (click to toggle)
pytorch-audio 2.6.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 10,696 kB
  • sloc: python: 61,274; cpp: 10,031; sh: 128; ansic: 70; makefile: 34
file content (93 lines) | stat: -rw-r--r-- 3,022 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import warnings

import torch
import torchaudio.transforms as T
from parameterized import parameterized
from torchaudio_unittest.common_utils import (
    get_asset_path,
    get_whitenoise,
    load_wav,
    save_wav,
    skipIfNoExec,
    skipIfNoSox,
    sox_utils,
    TempDirMixin,
    TorchaudioTestCase,
)


@skipIfNoSox
@skipIfNoExec("sox")
class TestFunctionalFiltering(TempDirMixin, TorchaudioTestCase):
    def run_sox_effect(self, input_file, effect):
        output_file = self.get_temp_path("expected.wav")
        sox_utils.run_sox_effect(input_file, output_file, [str(e) for e in effect])
        return load_wav(output_file)

    def assert_sox_effect(self, result, input_path, effects, atol=1e-04, rtol=1e-5):
        expected, _ = self.run_sox_effect(input_path, effects)
        self.assertEqual(result, expected, atol=atol, rtol=rtol)

    def get_whitenoise(self, sample_rate=8000):
        noise = get_whitenoise(
            sample_rate=sample_rate,
            duration=3,
            scale_factor=0.9,
        )
        path = self.get_temp_path("whitenoise.wav")
        save_wav(path, noise, sample_rate)
        return noise, path

    @parameterized.expand(
        [
            ("q", "quarter_sine"),
            ("h", "half_sine"),
            ("t", "linear"),
        ]
    )
    def test_fade(self, fade_shape_sox, fade_shape):
        fade_in_len, fade_out_len = 44100, 44100
        data, path = self.get_whitenoise(sample_rate=44100)
        result = T.Fade(fade_in_len, fade_out_len, fade_shape)(data)
        self.assert_sox_effect(result, path, ["fade", fade_shape_sox, "1", "0", "1"])

    @parameterized.expand(
        [
            ("amplitude", 1.1),
            ("db", 2),
            ("power", 2),
        ]
    )
    def test_vol(self, gain_type, gain):
        data, path = self.get_whitenoise()
        result = T.Vol(gain, gain_type)(data)
        self.assert_sox_effect(result, path, ["vol", f"{gain}", gain_type])

    @parameterized.expand(["vad-go-stereo-44100.wav", "vad-go-mono-32000.wav"])
    def test_vad(self, filename):
        path = get_asset_path(filename)
        data, sample_rate = load_wav(path)
        result = T.Vad(sample_rate)(data)
        self.assert_sox_effect(result, path, ["vad"])

    def test_vad_warning(self):
        """vad should throw a warning if input dimension is greater than 2"""
        sample_rate = 41100

        data = torch.rand(5, 5, sample_rate)
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            T.Vad(sample_rate)(data)
        assert len(w) == 1

        data = torch.rand(5, sample_rate)
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            T.Vad(sample_rate)(data)
        assert len(w) == 0

        data = torch.rand(sample_rate)
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            T.Vad(sample_rate)(data)
        assert len(w) == 0