1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
|
import unittest
import torch
import torchaudio.transforms as T
from parameterized import param, parameterized
from torchaudio._internal.module_utils import is_module_available
from torchaudio_unittest.common_utils import get_sinusoid, get_spectrogram, get_whitenoise, nested_params, TestBaseMixin
LIBROSA_AVAILABLE = is_module_available("librosa")
if LIBROSA_AVAILABLE:
import librosa
@unittest.skipIf(not LIBROSA_AVAILABLE, "Librosa not available")
class TransformsTestBase(TestBaseMixin):
@parameterized.expand(
[
param(n_fft=400, hop_length=200, power=2.0),
param(n_fft=600, hop_length=100, power=2.0),
param(n_fft=400, hop_length=200, power=3.0),
param(n_fft=200, hop_length=50, power=2.0),
]
)
def test_Spectrogram(self, n_fft, hop_length, power):
sample_rate = 16000
waveform = get_whitenoise(
sample_rate=sample_rate,
n_channels=1,
).to(self.device, self.dtype)
expected = librosa.core.spectrum._spectrogram(
y=waveform[0].cpu().numpy(), n_fft=n_fft, hop_length=hop_length, power=power, pad_mode="reflect"
)[0]
result = T.Spectrogram(n_fft=n_fft, hop_length=hop_length, power=power,).to(self.device, self.dtype)(
waveform
)[0]
self.assertEqual(result, torch.from_numpy(expected), atol=1e-4, rtol=1e-4)
def test_Spectrogram_complex(self):
n_fft = 400
hop_length = 200
sample_rate = 16000
waveform = get_whitenoise(
sample_rate=sample_rate,
n_channels=1,
).to(self.device, self.dtype)
expected = librosa.core.spectrum._spectrogram(
y=waveform[0].cpu().numpy(), n_fft=n_fft, hop_length=hop_length, power=1, pad_mode="reflect"
)[0]
result = T.Spectrogram(n_fft=n_fft, hop_length=hop_length, power=None, return_complex=True,).to(
self.device, self.dtype
)(waveform)[0]
self.assertEqual(result.abs(), torch.from_numpy(expected), atol=1e-4, rtol=1e-4)
@nested_params(
[
param(n_fft=400, hop_length=200, n_mels=64),
param(n_fft=600, hop_length=100, n_mels=128),
param(n_fft=200, hop_length=50, n_mels=32),
],
[param(norm=norm) for norm in [None, "slaney"]],
[param(mel_scale=mel_scale) for mel_scale in ["htk", "slaney"]],
)
def test_MelSpectrogram(self, n_fft, hop_length, n_mels, norm, mel_scale):
sample_rate = 16000
waveform = get_sinusoid(
sample_rate=sample_rate,
n_channels=1,
).to(self.device, self.dtype)
expected = librosa.feature.melspectrogram(
y=waveform[0].cpu().numpy(),
sr=sample_rate,
n_fft=n_fft,
hop_length=hop_length,
n_mels=n_mels,
norm=norm,
htk=mel_scale == "htk",
pad_mode="reflect",
)
result = T.MelSpectrogram(
sample_rate=sample_rate,
window_fn=torch.hann_window,
hop_length=hop_length,
n_mels=n_mels,
n_fft=n_fft,
norm=norm,
mel_scale=mel_scale,
).to(self.device, self.dtype)(waveform)[0]
self.assertEqual(result, torch.from_numpy(expected), atol=5e-4, rtol=1e-5)
def test_magnitude_to_db(self):
spectrogram = get_spectrogram(get_whitenoise(), n_fft=400, power=2).to(self.device, self.dtype)
result = T.AmplitudeToDB("magnitude", 80.0).to(self.device, self.dtype)(spectrogram)[0]
expected = librosa.core.spectrum.amplitude_to_db(spectrogram[0].cpu().numpy())
self.assertEqual(result, torch.from_numpy(expected))
def test_power_to_db(self):
spectrogram = get_spectrogram(get_whitenoise(), n_fft=400, power=2).to(self.device, self.dtype)
result = T.AmplitudeToDB("power", 80.0).to(self.device, self.dtype)(spectrogram)[0]
expected = librosa.core.spectrum.power_to_db(spectrogram[0].cpu().numpy())
self.assertEqual(result, torch.from_numpy(expected))
@nested_params(
[
param(n_fft=400, hop_length=200, n_mels=64, n_mfcc=40),
param(n_fft=600, hop_length=100, n_mels=128, n_mfcc=20),
param(n_fft=200, hop_length=50, n_mels=32, n_mfcc=25),
]
)
def test_mfcc(self, n_fft, hop_length, n_mels, n_mfcc):
sample_rate = 16000
waveform = get_whitenoise(sample_rate=sample_rate, n_channels=1).to(self.device, self.dtype)
result = T.MFCC(
sample_rate=sample_rate,
n_mfcc=n_mfcc,
norm="ortho",
melkwargs={"hop_length": hop_length, "n_fft": n_fft, "n_mels": n_mels},
).to(self.device, self.dtype)(waveform)[0]
melspec = librosa.feature.melspectrogram(
y=waveform[0].cpu().numpy(),
sr=sample_rate,
n_fft=n_fft,
win_length=n_fft,
hop_length=hop_length,
n_mels=n_mels,
htk=True,
norm=None,
pad_mode="reflect",
)
expected = librosa.feature.mfcc(
S=librosa.core.spectrum.power_to_db(melspec), n_mfcc=n_mfcc, dct_type=2, norm="ortho"
)
self.assertEqual(result, torch.from_numpy(expected), atol=5e-4, rtol=1e-5)
@parameterized.expand(
[
param(n_fft=400, hop_length=200),
param(n_fft=600, hop_length=100),
param(n_fft=200, hop_length=50),
]
)
def test_spectral_centroid(self, n_fft, hop_length):
sample_rate = 16000
waveform = get_whitenoise(sample_rate=sample_rate, n_channels=1).to(self.device, self.dtype)
result = T.SpectralCentroid(sample_rate=sample_rate, n_fft=n_fft, hop_length=hop_length,).to(
self.device, self.dtype
)(waveform)
expected = librosa.feature.spectral_centroid(
y=waveform[0].cpu().numpy(), sr=sample_rate, n_fft=n_fft, hop_length=hop_length, pad_mode="reflect"
)
self.assertEqual(result, torch.from_numpy(expected), atol=5e-4, rtol=1e-5)
|