1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
|
import unittest
from distutils.version import StrictVersion
import torch
import torchaudio.functional as F
from parameterized import param
from torchaudio._internal.module_utils import is_module_available
LIBROSA_AVAILABLE = is_module_available("librosa")
if LIBROSA_AVAILABLE:
import librosa
import numpy as np
from torchaudio_unittest.common_utils import get_spectrogram, get_whitenoise, nested_params, TestBaseMixin
@unittest.skipIf(not LIBROSA_AVAILABLE, "Librosa not available")
class Functional(TestBaseMixin):
"""Test suite for functions in `functional` module."""
dtype = torch.float64
@nested_params([0, 0.99])
def test_griffinlim(self, momentum):
# FFT params
n_fft = 400
win_length = n_fft
hop_length = n_fft // 4
window = torch.hann_window(win_length, device=self.device)
power = 1
# GriffinLim params
n_iter = 8
waveform = get_whitenoise(device=self.device, dtype=self.dtype)
specgram = get_spectrogram(
waveform, n_fft=n_fft, hop_length=hop_length, power=power, win_length=win_length, window=window
)
result = F.griffinlim(
specgram,
window=window,
n_fft=n_fft,
hop_length=hop_length,
win_length=win_length,
power=power,
n_iter=n_iter,
momentum=momentum,
length=waveform.size(1),
rand_init=False,
)
expected = librosa.griffinlim(
specgram[0].cpu().numpy(),
n_iter=n_iter,
hop_length=hop_length,
momentum=momentum,
init=None,
length=waveform.size(1),
pad_mode="reflect",
)[None, ...]
self.assertEqual(result, torch.from_numpy(expected), atol=5e-5, rtol=1e-07)
@nested_params(
[
param(),
param(n_mels=128, sample_rate=44100),
param(n_mels=128, fmin=2000.0, fmax=5000.0),
param(n_mels=56, fmin=100.0, fmax=9000.0),
param(n_mels=56, fmin=800.0, fmax=900.0),
param(n_mels=56, fmin=1900.0, fmax=900.0),
param(n_mels=10, fmin=1900.0, fmax=900.0),
],
[param(norm=n) for n in [None, "slaney"]],
[param(mel_scale=s) for s in ["htk", "slaney"]],
)
def test_create_mel_fb(
self, n_mels=40, sample_rate=22050, n_fft=2048, fmin=0.0, fmax=8000.0, norm=None, mel_scale="htk"
):
if norm == "slaney" and StrictVersion(librosa.__version__) < StrictVersion("0.7.2"):
self.skipTest("Test is known to fail with older versions of librosa.")
if self.device != "cpu":
self.skipTest("No need to run this test on CUDA")
expected = librosa.filters.mel(
sr=sample_rate, n_fft=n_fft, n_mels=n_mels, fmax=fmax, fmin=fmin, htk=mel_scale == "htk", norm=norm
).T
result = F.melscale_fbanks(
sample_rate=sample_rate,
n_mels=n_mels,
f_max=fmax,
f_min=fmin,
n_freqs=(n_fft // 2 + 1),
norm=norm,
mel_scale=mel_scale,
)
self.assertEqual(result, torch.from_numpy(expected), atol=7e-5, rtol=1.3e-6)
def test_amplitude_to_DB_power(self):
amin = 1e-10
db_multiplier = 0.0
top_db = 80.0
multiplier = 10.0
spec = get_spectrogram(get_whitenoise(device=self.device, dtype=self.dtype), power=2)
result = F.amplitude_to_DB(spec, multiplier, amin, db_multiplier, top_db)
expected = librosa.core.power_to_db(spec[0].cpu().numpy())[None, ...]
self.assertEqual(result, torch.from_numpy(expected))
def test_amplitude_to_DB(self):
amin = 1e-10
db_multiplier = 0.0
top_db = 80.0
multiplier = 20.0
spec = get_spectrogram(get_whitenoise(device=self.device, dtype=self.dtype), power=1)
result = F.amplitude_to_DB(spec, multiplier, amin, db_multiplier, top_db)
expected = librosa.core.amplitude_to_db(spec[0].cpu().numpy())[None, ...]
self.assertEqual(result, torch.from_numpy(expected))
@unittest.skipIf(not LIBROSA_AVAILABLE, "Librosa not available")
class FunctionalComplex(TestBaseMixin):
@nested_params([0.5, 1.01, 1.3])
def test_phase_vocoder(self, rate):
hop_length = 256
num_freq = 1025
num_frames = 400
# Due to cummulative sum, numerical error in using torch.float32 will
# result in bottom right values of the stretched sectrogram to not
# match with librosa.
spec = torch.randn(num_freq, num_frames, device=self.device, dtype=torch.complex128)
phase_advance = torch.linspace(0, np.pi * hop_length, num_freq, device=self.device, dtype=torch.float64)[
..., None
]
stretched = F.phase_vocoder(spec, rate=rate, phase_advance=phase_advance)
expected_stretched = librosa.phase_vocoder(spec.cpu().numpy(), rate=rate, hop_length=hop_length)
self.assertEqual(stretched, torch.from_numpy(expected_stretched))
|