File: rhythm.py

package info (click to toggle)
python-librosa 0.11.0-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 166,732 kB
  • sloc: python: 21,731; makefile: 141; sh: 2
file content (655 lines) | stat: -rw-r--r-- 24,389 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Rhythmic feature extraction"""

import numpy as np
import scipy

from .. import util

from .._cache import cache
from ..core.audio import autocorrelate
from ..core.spectrum import stft
from ..core.convert import tempo_frequencies, time_to_frames
from ..core.harmonic import f0_harmonics
from ..util.exceptions import ParameterError
from ..filters import get_window
from typing import Optional, Callable, Any
from .._typing import _WindowSpec

__all__ = ["tempogram", "fourier_tempogram", "tempo", "tempogram_ratio"]


# -- Rhythmic features -- #
def tempogram(
    *,
    y: Optional[np.ndarray] = None,
    sr: float = 22050,
    onset_envelope: Optional[np.ndarray] = None,
    hop_length: int = 512,
    win_length: int = 384,
    center: bool = True,
    window: _WindowSpec = "hann",
    norm: Optional[float] = np.inf,
) -> np.ndarray:
    """Compute the tempogram: local autocorrelation of the onset strength envelope. [#]_

    .. [#] Grosche, Peter, Meinard Müller, and Frank Kurth.
        "Cyclic tempogram - A mid-level tempo representation for music signals."
        ICASSP, 2010.

    Parameters
    ----------
    y : np.ndarray [shape=(..., n)] or None
        Audio time series.  Multi-channel is supported.

    sr : number > 0 [scalar]
        sampling rate of ``y``

    onset_envelope : np.ndarray [shape=(..., n) or (..., m, n)] or None
        Optional pre-computed onset strength envelope as provided by
        `librosa.onset.onset_strength`.

        If multi-dimensional, tempograms are computed independently for each
        band (first dimension).

    hop_length : int > 0
        number of audio samples between successive onset measurements

    win_length : int > 0
        length of the onset autocorrelation window (in frames/onset measurements)
        The default settings (384) corresponds to ``384 * hop_length / sr ~= 8.9s``.

    center : bool
        If `True`, onset autocorrelation windows are centered.
        If `False`, windows are left-aligned.

    window : string, function, number, tuple, or np.ndarray [shape=(win_length,)]
        A window specification as in `stft`.

    norm : {np.inf, -np.inf, 0, float > 0, None}
        Normalization mode.  Set to `None` to disable normalization.

    Returns
    -------
    tempogram : np.ndarray [shape=(..., win_length, n)]
        Localized autocorrelation of the onset strength envelope.

        If given multi-band input (``onset_envelope.shape==(m,n)``) then
        ``tempogram[i]`` is the tempogram of ``onset_envelope[i]``.

    Raises
    ------
    ParameterError
        if neither ``y`` nor ``onset_envelope`` are provided

        if ``win_length < 1``

    See Also
    --------
    fourier_tempogram
    librosa.onset.onset_strength
    librosa.util.normalize
    librosa.stft

    Examples
    --------
    >>> # Compute local onset autocorrelation
    >>> y, sr = librosa.load(librosa.ex('nutcracker'), duration=30)
    >>> hop_length = 512
    >>> oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
    >>> tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr,
    ...                                       hop_length=hop_length)
    >>> # Compute global onset autocorrelation
    >>> ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])
    >>> ac_global = librosa.util.normalize(ac_global)
    >>> # Estimate the global tempo for display purposes
    >>> tempo = librosa.feature.tempo(onset_envelope=oenv, sr=sr,
    ...                               hop_length=hop_length)[0]

    >>> import matplotlib.pyplot as plt
    >>> fig, ax = plt.subplots(nrows=4, figsize=(10, 10))
    >>> times = librosa.times_like(oenv, sr=sr, hop_length=hop_length)
    >>> ax[0].plot(times, oenv, label='Onset strength')
    >>> ax[0].label_outer()
    >>> ax[0].legend(frameon=True)
    >>> librosa.display.specshow(tempogram, sr=sr, hop_length=hop_length,
    >>>                          x_axis='time', y_axis='tempo', cmap='magma',
    ...                          ax=ax[1])
    >>> ax[1].axhline(tempo, color='w', linestyle='--', alpha=1,
    ...             label='Estimated tempo={:g}'.format(tempo))
    >>> ax[1].legend(loc='upper right')
    >>> ax[1].set(title='Tempogram')
    >>> x = np.linspace(0, tempogram.shape[0] * float(hop_length) / sr,
    ...                 num=tempogram.shape[0])
    >>> ax[2].plot(x, np.mean(tempogram, axis=1), label='Mean local autocorrelation')
    >>> ax[2].plot(x, ac_global, '--', alpha=0.75, label='Global autocorrelation')
    >>> ax[2].set(xlabel='Lag (seconds)')
    >>> ax[2].legend(frameon=True)
    >>> freqs = librosa.tempo_frequencies(tempogram.shape[0], hop_length=hop_length, sr=sr)
    >>> ax[3].semilogx(freqs[1:], np.mean(tempogram[1:], axis=1),
    ...              label='Mean local autocorrelation', base=2)
    >>> ax[3].semilogx(freqs[1:], ac_global[1:], '--', alpha=0.75,
    ...              label='Global autocorrelation', base=2)
    >>> ax[3].axvline(tempo, color='black', linestyle='--', alpha=.8,
    ...             label='Estimated tempo={:g}'.format(tempo))
    >>> ax[3].legend(frameon=True)
    >>> ax[3].set(xlabel='BPM')
    >>> ax[3].grid(True)
    """
    from ..onset import onset_strength

    if win_length < 1:
        raise ParameterError("win_length must be a positive integer")

    ac_window = get_window(window, win_length, fftbins=True)

    if onset_envelope is None:
        if y is None:
            raise ParameterError("Either y or onset_envelope must be provided")

        onset_envelope = onset_strength(y=y, sr=sr, hop_length=hop_length)

    # Center the autocorrelation windows
    n = onset_envelope.shape[-1]

    if center:
        padding = [(0, 0) for _ in onset_envelope.shape]
        padding[-1] = (int(win_length // 2),) * 2
        onset_envelope = np.pad(
            onset_envelope, padding, mode="linear_ramp", end_values=[0, 0]
        )

    # Carve onset envelope into frames
    odf_frame = util.frame(onset_envelope, frame_length=win_length, hop_length=1)

    # Truncate to the length of the original signal
    if center:
        odf_frame = odf_frame[..., :n]

    # explicit broadcast of ac_window
    ac_window = util.expand_to(ac_window, ndim=odf_frame.ndim, axes=-2)

    # Window, autocorrelate, and normalize
    return util.normalize(
        autocorrelate(odf_frame * ac_window, axis=-2), norm=norm, axis=-2
    )


def fourier_tempogram(
    *,
    y: Optional[np.ndarray] = None,
    sr: float = 22050,
    onset_envelope: Optional[np.ndarray] = None,
    hop_length: int = 512,
    win_length: int = 384,
    center: bool = True,
    window: _WindowSpec = "hann",
) -> np.ndarray:
    """Compute the Fourier tempogram: the short-time Fourier transform of the
    onset strength envelope. [#]_

    .. [#] Grosche, Peter, Meinard Müller, and Frank Kurth.
        "Cyclic tempogram - A mid-level tempo representation for music signals."
        ICASSP, 2010.

    Parameters
    ----------
    y : np.ndarray [shape=(..., n)] or None
        Audio time series.  Multi-channel is supported.
    sr : number > 0 [scalar]
        sampling rate of ``y``
    onset_envelope : np.ndarray [shape=(..., n)] or None
        Optional pre-computed onset strength envelope as provided by
        ``librosa.onset.onset_strength``.
        Multi-channel is supported.
    hop_length : int > 0
        number of audio samples between successive onset measurements
    win_length : int > 0
        length of the onset window (in frames/onset measurements)
        The default settings (384) corresponds to ``384 * hop_length / sr ~= 8.9s``.
    center : bool
        If `True`, onset windows are centered.
        If `False`, windows are left-aligned.
    window : string, function, number, tuple, or np.ndarray [shape=(win_length,)]
        A window specification as in `stft`.

    Returns
    -------
    tempogram : np.ndarray [shape=(..., win_length // 2 + 1, n)]
        Complex short-time Fourier transform of the onset envelope.

    Raises
    ------
    ParameterError
        if neither ``y`` nor ``onset_envelope`` are provided

        if ``win_length < 1``

    See Also
    --------
    tempogram
    librosa.onset.onset_strength
    librosa.util.normalize
    librosa.stft

    Examples
    --------
    >>> # Compute local onset autocorrelation
    >>> y, sr = librosa.load(librosa.ex('nutcracker'))
    >>> hop_length = 512
    >>> oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
    >>> tempogram = librosa.feature.fourier_tempogram(onset_envelope=oenv, sr=sr,
    ...                                               hop_length=hop_length)
    >>> # Compute the auto-correlation tempogram, unnormalized to make comparison easier
    >>> ac_tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr,
    ...                                          hop_length=hop_length, norm=None)

    >>> import matplotlib.pyplot as plt
    >>> fig, ax = plt.subplots(nrows=3, sharex=True)
    >>> ax[0].plot(librosa.times_like(oenv), oenv, label='Onset strength')
    >>> ax[0].legend(frameon=True)
    >>> ax[0].label_outer()
    >>> librosa.display.specshow(np.abs(tempogram), sr=sr, hop_length=hop_length,
    >>>                          x_axis='time', y_axis='fourier_tempo', cmap='magma',
    ...                          ax=ax[1])
    >>> ax[1].set(title='Fourier tempogram')
    >>> ax[1].label_outer()
    >>> librosa.display.specshow(ac_tempogram, sr=sr, hop_length=hop_length,
    >>>                          x_axis='time', y_axis='tempo', cmap='magma',
    ...                          ax=ax[2])
    >>> ax[2].set(title='Autocorrelation tempogram')
    """
    from ..onset import onset_strength

    if win_length < 1:
        raise ParameterError("win_length must be a positive integer")

    if onset_envelope is None:
        if y is None:
            raise ParameterError("Either y or onset_envelope must be provided")

        onset_envelope = onset_strength(y=y, sr=sr, hop_length=hop_length)

    # Generate the short-time Fourier transform
    return stft(
        onset_envelope, n_fft=win_length, hop_length=1, center=center, window=window
    )


@cache(level=30)
def tempo(
    *,
    y: Optional[np.ndarray] = None,
    sr: float = 22050,
    onset_envelope: Optional[np.ndarray] = None,
    tg: Optional[np.ndarray] = None,
    hop_length: int = 512,
    start_bpm: float = 120,
    std_bpm: float = 1.0,
    ac_size: float = 8.0,
    max_tempo: Optional[float] = 320.0,
    aggregate: Optional[Callable[..., Any]] = np.mean,
    prior: Optional[scipy.stats.rv_continuous] = None,
) -> np.ndarray:
    """Estimate the tempo (beats per minute)

    Parameters
    ----------
    y : np.ndarray [shape=(..., n)] or None
        audio time series. Multi-channel is supported.
    sr : number > 0 [scalar]
        sampling rate of the time series
    onset_envelope : np.ndarray [shape=(..., n)]
        pre-computed onset strength envelope
    tg : np.ndarray
        pre-computed tempogram.  If provided, then `y` and
        `onset_envelope` are ignored, and `win_length` is
        inferred from the shape of the tempogram.
    hop_length : int > 0 [scalar]
        hop length of the time series
    start_bpm : float [scalar]
        initial guess of the BPM
    std_bpm : float > 0 [scalar]
        standard deviation of tempo distribution
    ac_size : float > 0 [scalar]
        length (in seconds) of the auto-correlation window
    max_tempo : float > 0 [scalar, optional]
        If provided, only estimate tempo below this threshold
    aggregate : callable [optional]
        Aggregation function for estimating global tempo.
        If `None`, then tempo is estimated independently for each frame.
    prior : scipy.stats.rv_continuous [optional]
        A prior distribution over tempo (in beats per minute).
        By default, a pseudo-log-normal prior is used.
        If given, ``start_bpm`` and ``std_bpm`` will be ignored.

    Returns
    -------
    tempo : np.ndarray
        estimated tempo (beats per minute).
        If input is multi-channel, one tempo estimate per channel is provided.

    See Also
    --------
    librosa.onset.onset_strength
    librosa.feature.tempogram

    Notes
    -----
    This function caches at level 30.

    Examples
    --------
    >>> # Estimate a static tempo
    >>> y, sr = librosa.load(librosa.ex('nutcracker'), duration=30)
    >>> onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    >>> tempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr)
    >>> tempo
    array([143.555])

    >>> # Or a static tempo with a uniform prior instead
    >>> import scipy.stats
    >>> prior = scipy.stats.uniform(30, 300)  # uniform over 30-300 BPM
    >>> utempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr, prior=prior)
    >>> utempo
    array([161.499])

    >>> # Or a dynamic tempo
    >>> dtempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr,
    ...                                aggregate=None)
    >>> dtempo
    array([ 89.103,  89.103,  89.103, ..., 123.047, 123.047, 123.047])

    >>> # Dynamic tempo with a proper log-normal prior
    >>> prior_lognorm = scipy.stats.lognorm(loc=np.log(120), scale=120, s=1)
    >>> dtempo_lognorm = librosa.feature.tempo(onset_envelope=onset_env, sr=sr,
    ...                                        aggregate=None,
    ...                                        prior=prior_lognorm)
    >>> dtempo_lognorm
    array([ 89.103,  89.103,  89.103, ..., 123.047, 123.047, 123.047])

    Plot the estimated tempo against the onset autocorrelation

    >>> import matplotlib.pyplot as plt
    >>> # Convert to scalar
    >>> tempo = tempo.item()
    >>> utempo = utempo.item()
    >>> # Compute 2-second windowed autocorrelation
    >>> hop_length = 512
    >>> ac = librosa.autocorrelate(onset_env, max_size=2 * sr // hop_length)
    >>> freqs = librosa.tempo_frequencies(len(ac), sr=sr,
    ...                                   hop_length=hop_length)
    >>> # Plot on a BPM axis.  We skip the first (0-lag) bin.
    >>> fig, ax = plt.subplots()
    >>> ax.semilogx(freqs[1:], librosa.util.normalize(ac)[1:],
    ...              label='Onset autocorrelation', base=2)
    >>> ax.axvline(tempo, 0, 1, alpha=0.75, linestyle='--', color='r',
    ...             label='Tempo (default prior): {:.2f} BPM'.format(tempo))
    >>> ax.axvline(utempo, 0, 1, alpha=0.75, linestyle=':', color='g',
    ...             label='Tempo (uniform prior): {:.2f} BPM'.format(utempo))
    >>> ax.set(xlabel='Tempo (BPM)', title='Static tempo estimation')
    >>> ax.grid(True)
    >>> ax.legend()

    Plot dynamic tempo estimates over a tempogram

    >>> fig, ax = plt.subplots()
    >>> tg = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr,
    ...                                hop_length=hop_length)
    >>> librosa.display.specshow(tg, x_axis='time', y_axis='tempo', cmap='magma', ax=ax)
    >>> ax.plot(librosa.times_like(dtempo), dtempo,
    ...          color='c', linewidth=1.5, label='Tempo estimate (default prior)')
    >>> ax.plot(librosa.times_like(dtempo_lognorm), dtempo_lognorm,
    ...          color='c', linewidth=1.5, linestyle='--',
    ...          label='Tempo estimate (lognorm prior)')
    >>> ax.set(title='Dynamic tempo estimation')
    >>> ax.legend()
    """
    if start_bpm <= 0:
        raise ParameterError("start_bpm must be strictly positive")

    if tg is None:
        win_length = time_to_frames(ac_size, sr=sr, hop_length=hop_length).item()

        tg = tempogram(
            y=y,
            sr=sr,
            onset_envelope=onset_envelope,
            hop_length=hop_length,
            win_length=win_length,
        )
    else:
        # Override window length by what's actually given
        win_length = tg.shape[-2]

    # Eventually, we want this to work for time-varying tempo
    if aggregate is not None:
        tg = aggregate(tg, axis=-1, keepdims=True)

    assert tg is not None

    # Get the BPM values for each bin, skipping the 0-lag bin
    bpms = tempo_frequencies(win_length, hop_length=hop_length, sr=sr)

    # Weight the autocorrelation by a log-normal distribution
    if prior is None:
        logprior = -0.5 * ((np.log2(bpms) - np.log2(start_bpm)) / std_bpm) ** 2
    else:
        logprior = prior.logpdf(bpms)

    # Kill everything above the max tempo
    if max_tempo is not None:
        max_idx = int(np.argmax(bpms < max_tempo))
        logprior[:max_idx] = -np.inf
    # explicit axis expansion
    logprior = util.expand_to(logprior, ndim=tg.ndim, axes=-2)

    # Get the maximum, weighted by the prior
    # Using log1p here for numerical stability
    best_period = np.argmax(np.log1p(1e6 * tg) + logprior, axis=-2)

    tempo_est: np.ndarray = np.take(bpms, best_period)
    return tempo_est


@cache(level=40)
def tempogram_ratio(
    *,
    y: Optional[np.ndarray] = None,
    sr: float = 22050,
    onset_envelope: Optional[np.ndarray] = None,
    tg: Optional[np.ndarray] = None,
    bpm: Optional[np.ndarray] = None,
    hop_length: int = 512,
    win_length: int = 384,
    start_bpm: float = 120,
    std_bpm: float = 1.0,
    max_tempo: Optional[float] = 320.0,
    freqs: Optional[np.ndarray] = None,
    factors: Optional[np.ndarray] = None,
    aggregate: Optional[Callable[..., Any]] = None,
    prior: Optional[scipy.stats.rv_continuous] = None,
    center: bool = True,
    window: _WindowSpec = "hann",
    kind: str = "linear",
    fill_value: float = 0,
    norm: Optional[float] = np.inf,
) -> np.ndarray:
    """Tempogram ratio features, also known as spectral rhythm patterns. [1]_

    This function summarizes the energy at metrically important multiples
    of the tempo.  For example, if the tempo corresponds to the quarter-note
    period, the tempogram ratio will measure the energy at the eighth note,
    sixteenth note, half note, whole note, etc. periods, as well as dotted
    and triplet ratios.

    By default, the multiplicative factors used here are as specified by
    [2]_.  If the estimated tempo corresponds to a quarter note, these factors
    will measure relative energy at the following metrical subdivisions:

    +-------+--------+------------------+
    | Index | Factor | Description      |
    +=======+========+==================+
    |     0 |    4   | Sixteenth note   |
    +-------+--------+------------------+
    |     1 |    8/3 | Dotted sixteenth |
    +-------+--------+------------------+
    |     2 |    3   | Eighth triplet   |
    +-------+--------+------------------+
    |     3 |    2   | Eighth note      |
    +-------+--------+------------------+
    |     4 |    4/3 | Dotted eighth    |
    +-------+--------+------------------+
    |     5 |    3/2 | Quarter triplet  |
    +-------+--------+------------------+
    |     6 |    1   | Quarter note     |
    +-------+--------+------------------+
    |     7 |    2/3 | Dotted quarter   |
    +-------+--------+------------------+
    |     8 |    3/4 | Half triplet     |
    +-------+--------+------------------+
    |     9 |    1/2 | Half note        |
    +-------+--------+------------------+
    |    10 |    1/3 | Dotted half note |
    +-------+--------+------------------+
    |    11 |    3/8 | Whole triplet    |
    +-------+--------+------------------+
    |    12 |    1/4 | Whole note       |
    +-------+--------+------------------+

    .. [1] Peeters, Geoffroy.
        "Rhythm Classification Using Spectral Rhythm Patterns."
        In ISMIR, pp. 644-647. 2005.

    .. [2] Prockup, Matthew, Andreas F. Ehmann, Fabien Gouyon, Erik M. Schmidt, and Youngmoo E. Kim.
        "Modeling musical rhythm at scale with the music genome project."
        In 2015 IEEE workshop on applications of signal processing to audio and acoustics (WASPAA), pp. 1-5. IEEE, 2015.

    Parameters
    ----------
    y : np.ndarray [shape=(..., n)] or None
        audio time series
    sr : number > 0 [scalar]
        sampling rate of the time series
    onset_envelope : np.ndarray [shape=(..., n)]
        pre-computed onset strength envelope
    tg : np.ndarray
        pre-computed tempogram.  If provided, then `y` and
        `onset_envelope` are ignored, and `win_length` is
        inferred from the shape of the tempogram.
    bpm : np.ndarray
        pre-computed tempo estimate.  This must be a per-frame
        estimate, and have dimension compatible with `tg`.
    hop_length : int > 0 [scalar]
        hop length of the time series
    win_length : int > 0 [scalar]
        window length of the autocorrelation window for tempogram
        calculation
    start_bpm : float [scalar]
        initial guess of the BPM if `bpm` is not provided
    std_bpm : float > 0 [scalar]
        standard deviation of tempo distribution
    max_tempo : float > 0 [scalar, optional]
        If provided, only estimate tempo below this threshold
    freqs : np.ndarray
        Frequencies (in BPM) of the tempogram axis.
    factors : np.ndarray
        Multiples of the fundamental tempo (bpm) to estimate.
        If not provided, the factors are as specified above.
    prior : scipy.stats.rv_continuous [optional]
        A prior distribution over tempo (in beats per minute).
        By default, a pseudo-log-normal prior is used.
        If given, ``start_bpm`` and ``std_bpm`` will be ignored.
    center : bool
        If `True`, onset windows are centered.
        If `False`, windows are left-aligned.
    aggregate : callable [optional]
        Aggregation function for estimating global tempogram ratio.
        If `None`, then ratios are estimated independently for each frame.
    window : string, function, number, tuple, or np.ndarray [shape=(win_length,)]
        A window specification as in `stft`.
    kind : str
        Interpolation mode for measuring tempogram ratios
    fill_value : float
        The value to fill when extrapolating beyond the observed
        frequency range.
    norm : {np.inf, -np.inf, 0, float > 0, None}
        Normalization mode.  Set to `None` to disable normalization.

    Returns
    -------
    tgr : np.ndarray
        The tempogram ratio for the specified factors.
        If `aggregate` is provided, the trailing time axis
        will be removed.
        If `aggregate` is not provided (default), ratios
        will be estimated for each frame.

    See Also
    --------
    tempogram
    tempo
    librosa.f0_harmonics
    librosa.tempo_frequencies

    Examples
    --------
    Compute tempogram ratio features using the default factors
    for a waltz (3/4 time)

    >>> import matplotlib.pyplot as plt
    >>> y, sr = librosa.load(librosa.ex('sweetwaltz'))
    >>> tempogram = librosa.feature.tempogram(y=y, sr=sr)
    >>> tgr = librosa.feature.tempogram_ratio(tg=tempogram, sr=sr)
    >>> fig, ax = plt.subplots(nrows=2, sharex=True)
    >>> librosa.display.specshow(tempogram, x_axis='time', y_axis='tempo',
    ...                          ax=ax[0])
    >>> librosa.display.specshow(tgr, x_axis='time', ax=ax[1])
    >>> ax[0].label_outer()
    >>> ax[0].set(title="Tempogram")
    >>> ax[1].set(title="Tempogram ratio")
    """
    # Get a tempogram and time-varying tempo estimate
    if tg is None:
        tg = tempogram(
            y=y,
            sr=sr,
            onset_envelope=onset_envelope,
            hop_length=hop_length,
            win_length=win_length,
            center=center,
            window=window,
            norm=norm,
        )

    if freqs is None:
        freqs = tempo_frequencies(sr=sr, n_bins=len(tg), hop_length=hop_length)

    # Estimate tempo per-frame, no aggregation yet
    if bpm is None:
        bpm = tempo(
            sr=sr,
            tg=tg,
            hop_length=hop_length,
            start_bpm=start_bpm,
            std_bpm=std_bpm,
            max_tempo=max_tempo,
            aggregate=None,
            prior=prior,
        )

    if factors is None:
        # metric multiples from Prockup'15
        factors = np.array(
            [4, 8 / 3, 3, 2, 4 / 3, 3 / 2, 1, 2 / 3, 3 / 4, 1 / 2, 1 / 3, 3 / 8, 1 / 4]
        )

    tgr = f0_harmonics(
        tg, freqs=freqs, f0=bpm, harmonics=factors, kind=kind, fill_value=fill_value
    )

    if aggregate is not None:
        return aggregate(tgr, axis=-1)  # type: ignore

    return tgr