File: harmonic.py

package info (click to toggle)
python-librosa 0.11.0-5
links: PTS, VCS
area: main
in suites: forky, sid
size: 166,732 kB
sloc: python: 21,731; makefile: 141; sh: 2
file content (450 lines) | stat: -rw-r--r-- 15,957 bytes
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Harmonic calculations for frequency representations"""

import warnings

import numpy as np
import scipy.interpolate
import scipy.signal
from ..util.exceptions import ParameterError
from ..util import is_unique
from numpy.typing import ArrayLike
from typing import Callable, Optional, Sequence

__all__ = ["salience", "interp_harmonics", "f0_harmonics"]


def salience(
    S: np.ndarray,
    *,
    freqs: np.ndarray,
    harmonics: Sequence[float],
    weights: Optional[ArrayLike] = None,
    aggregate: Optional[Callable] = None,
    filter_peaks: bool = True,
    fill_value: float = np.nan,
    kind: str = "linear",
    axis: int = -2,
) -> np.ndarray:
    """Harmonic salience function.

    Parameters
    ----------
    S : np.ndarray [shape=(..., d, n)]
        input time frequency magnitude representation (e.g. STFT or CQT magnitudes).
        Must be real-valued and non-negative.

    freqs : np.ndarray, shape=(S.shape[axis]) or shape=S.shape
        The frequency values corresponding to S's elements along the
        chosen axis.

        Frequencies can also be time-varying, e.g. as computed by
        `reassigned_spectrogram`, in which case the shape should
        match ``S``.

    harmonics : list-like, non-negative
        Harmonics to include in salience computation.  The first harmonic (1)
        corresponds to ``S`` itself. Values less than one (e.g., 1/2) correspond
        to sub-harmonics.

    weights : list-like
        The weight to apply to each harmonic in the summation. (default:
        uniform weights). Must be the same length as ``harmonics``.

    aggregate : function
        aggregation function (default: `np.average`)

        If ``aggregate=np.average``, then a weighted average is
        computed per-harmonic according to the specified weights.
        For all other aggregation functions, all harmonics
        are treated equally.

    filter_peaks : bool
        If true, returns harmonic summation only on frequencies of peak
        magnitude. Otherwise returns harmonic summation over the full spectrum.
        Defaults to True.

    fill_value : float
        The value to fill non-peaks in the output representation. (default:
        `np.nan`) Only used if ``filter_peaks == True``.

    kind : str
        Interpolation type for harmonic estimation.
        See `scipy.interpolate.interp1d`.

    axis : int
        The axis along which to compute harmonics

    Returns
    -------
    S_sal : np.ndarray
        ``S_sal`` will have the same shape as ``S``, and measure
        the overall harmonic energy at each frequency.

    See Also
    --------
    interp_harmonics

    Examples
    --------
    >>> y, sr = librosa.load(librosa.ex('trumpet'), duration=3)
    >>> S = np.abs(librosa.stft(y))
    >>> freqs = librosa.fft_frequencies(sr=sr)
    >>> harms = [1, 2, 3, 4]
    >>> weights = [1.0, 0.5, 0.33, 0.25]
    >>> S_sal = librosa.salience(S, freqs=freqs, harmonics=harms, weights=weights, fill_value=0)
    >>> print(S_sal.shape)
    (1025, 115)
    >>> import matplotlib.pyplot as plt
    >>> fig, ax = plt.subplots(nrows=2, sharex=True, sharey=True)
    >>> librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),
    ...                          sr=sr, y_axis='log', x_axis='time', ax=ax[0])
    >>> ax[0].set(title='Magnitude spectrogram')
    >>> ax[0].label_outer()
    >>> img = librosa.display.specshow(librosa.amplitude_to_db(S_sal,
    ...                                                        ref=np.max),
    ...                                sr=sr, y_axis='log', x_axis='time', ax=ax[1])
    >>> ax[1].set(title='Salience spectrogram')
    >>> fig.colorbar(img, ax=ax, format="%+2.0f dB")
    """
    if aggregate is None:
        aggregate = np.average

    if weights is None:
        weights = np.ones((len(harmonics),))
    else:
        weights = np.array(weights, dtype=float)

    S_harm = interp_harmonics(S, freqs=freqs, harmonics=harmonics, kind=kind, axis=axis)

    S_sal: np.ndarray
    if aggregate is np.average:
        S_sal = aggregate(S_harm, axis=axis - 1, weights=weights)
    else:
        S_sal = aggregate(S_harm, axis=axis - 1)

    if filter_peaks:
        S_peaks = scipy.signal.argrelmax(S, axis=axis)
        S_out = np.empty(S.shape)
        S_out.fill(fill_value)
        S_out[S_peaks] = S_sal[S_peaks]

        S_sal = S_out

    return S_sal


def interp_harmonics(
    x: np.ndarray,
    *,
    freqs: np.ndarray,
    harmonics: ArrayLike,
    kind: str = "linear",
    fill_value: float = 0,
    axis: int = -2,
) -> np.ndarray:
    """Compute the energy at harmonics of time-frequency representation.

    Given a frequency-based energy representation such as a spectrogram
    or tempogram, this function computes the energy at the chosen harmonics
    of the frequency axis.  (See examples below.)
    The resulting harmonic array can then be used as input to a salience
    computation.

    Parameters
    ----------
    x : np.ndarray
        The input energy
    freqs : np.ndarray, shape=(x.shape[axis]) or shape=x.shape
        The frequency values corresponding to x's elements along the
        chosen axis.
        Frequencies can also be time-varying, e.g. as computed by
        `reassigned_spectrogram`, in which case the shape should
        match ``x``.
    harmonics : list-like, non-negative
        Harmonics to compute as ``harmonics[i] * freqs``.
        The first harmonic (1) corresponds to ``freqs``.
        Values less than one (e.g., 1/2) correspond to sub-harmonics.
    kind : str
        Interpolation type.  See `scipy.interpolate.interp1d`.
    fill_value : float
        The value to fill when extrapolating beyond the observed
        frequency range.
    axis : int
        The axis along which to compute harmonics

    Returns
    -------
    x_harm : np.ndarray
        ``x_harm[i]`` will have the same shape as ``x``, and measure
        the energy at the ``harmonics[i]`` harmonic of each frequency.
        A new dimension indexing harmonics will be inserted immediately
        before ``axis``.

    See Also
    --------
    scipy.interpolate.interp1d

    Examples
    --------
    Estimate the harmonics of a time-averaged tempogram

    >>> y, sr = librosa.load(librosa.ex('sweetwaltz'))
    >>> # Compute the time-varying tempogram and average over time
    >>> tempi = np.mean(librosa.feature.tempogram(y=y, sr=sr), axis=1)
    >>> # We'll measure the first five harmonics
    >>> harmonics = [1, 2, 3, 4, 5]
    >>> f_tempo = librosa.tempo_frequencies(len(tempi), sr=sr)
    >>> # Build the harmonic tensor; we only have one axis here (tempo)
    >>> t_harmonics = librosa.interp_harmonics(tempi, freqs=f_tempo, harmonics=harmonics, axis=0)
    >>> print(t_harmonics.shape)
    (5, 384)

    >>> # And plot the results
    >>> import matplotlib.pyplot as plt
    >>> fig, ax = plt.subplots()
    >>> librosa.display.specshow(t_harmonics, x_axis='tempo', sr=sr, ax=ax)
    >>> ax.set(yticks=np.arange(len(harmonics)),
    ...        yticklabels=['{:.3g}'.format(_) for _ in harmonics],
    ...        ylabel='Harmonic', xlabel='Tempo (BPM)')

    We can also compute frequency harmonics for spectrograms.
    To calculate sub-harmonic energy, use values < 1.

    >>> y, sr = librosa.load(librosa.ex('trumpet'), duration=3)
    >>> harmonics = [1./3, 1./2, 1, 2, 3, 4]
    >>> S = np.abs(librosa.stft(y))
    >>> fft_freqs = librosa.fft_frequencies(sr=sr)
    >>> S_harm = librosa.interp_harmonics(S, freqs=fft_freqs, harmonics=harmonics, axis=0)
    >>> print(S_harm.shape)
    (6, 1025, 646)

    >>> fig, ax = plt.subplots(nrows=3, ncols=2, sharex=True, sharey=True)
    >>> for i, _sh in enumerate(S_harm):
    ...     img = librosa.display.specshow(librosa.amplitude_to_db(_sh,
    ...                                                      ref=S.max()),
    ...                              sr=sr, y_axis='log', x_axis='time',
    ...                              ax=ax.flat[i])
    ...     ax.flat[i].set(title='h={:.3g}'.format(harmonics[i]))
    ...     ax.flat[i].label_outer()
    >>> fig.colorbar(img, ax=ax, format="%+2.f dB")
    """
    if freqs.ndim == 1 and len(freqs) == x.shape[axis]:
        # Build the 1-D interpolator.
        # All frames have a common domain, so we only need one interpolator here.

        # First, verify that the input frequencies are unique
        if not is_unique(freqs, axis=0):
            warnings.warn(
                "Frequencies are not unique. This may produce incorrect harmonic interpolations.",
                stacklevel=2,
            )

        f_interp = scipy.interpolate.interp1d(
            freqs,
            x,
            axis=axis,
            bounds_error=False,
            copy=False,
            kind=kind,
            fill_value=fill_value,
        )

        # Set the interpolation points
        f_out = np.multiply.outer(harmonics, freqs)

        # Interpolate; suppress type checks
        return f_interp(f_out)  # type: ignore

    elif freqs.shape == x.shape:
        if not np.all(is_unique(freqs, axis=axis)):
            warnings.warn(
                "Frequencies are not unique. This may produce incorrect harmonic interpolations.",
                stacklevel=2,
            )

        # If we have time-varying frequencies, then it must match exactly the shape of the input

        # We'll define a frame-wise interpolator helper function that we will vectorize over
        # the entire input array
        def _f_interp(_a, _b):
            interp = scipy.interpolate.interp1d(
                _a, _b, bounds_error=False, copy=False, kind=kind, fill_value=fill_value
            )

            return interp(np.multiply.outer(_a, harmonics))

        # Signature is expanding frequency into a new dimension
        xfunc = np.vectorize(_f_interp, signature="(f),(f)->(f,h)")

        # Rotate the vectorizing axis to the tail so that we get parallelism over frames
        # Afterward, we're swapping (-1, axis-1) instead of (-1,axis)
        # because a new dimension has been inserted
        return (  # type: ignore
            xfunc(freqs.swapaxes(axis, -1), x.swapaxes(axis, -1))
            .swapaxes(
                # Return the original target axis to its place
                -2,
                axis,
            )
            .swapaxes(
                # Put the new harmonic axis directly in front of the target axis
                -1,
                axis - 1,
            )
        )
    else:
        raise ParameterError(
            f"freqs.shape={freqs.shape} is incompatible with input shape={x.shape}"
        )


def f0_harmonics(
    x: np.ndarray,
    *,
    f0: np.ndarray,
    freqs: np.ndarray,
    harmonics: ArrayLike,
    kind: str = "linear",
    fill_value: float = 0,
    axis: int = -2,
) -> np.ndarray:
    """Compute the energy at selected harmonics of a time-varying
    fundamental frequency.

    This function can be used to reduce a `frequency * time` representation
    to a `harmonic * time` representation, effectively normalizing out for
    the fundamental frequency.  The result can be used as a representation
    of timbre when f0 corresponds to pitch, or as a representation of
    rhythm when f0 corresponds to tempo.

    This function differs from `interp_harmonics`, which computes the
    harmonics of *all* frequencies.

    Parameters
    ----------
    x : np.ndarray [shape=(..., frequencies, n)]
        The input array (e.g., STFT magnitudes)
    f0 : np.ndarray [shape=(..., n)]
        The fundamental frequency (f0) of each frame in the input
        Shape should match ``x.shape[-1]``
    freqs : np.ndarray, shape=(x.shape[axis]) or shape=x.shape
        The frequency values corresponding to X's elements along the
        chosen axis.
        Frequencies can also be time-varying, e.g. as computed by
        `reassigned_spectrogram`, in which case the shape should
        match ``x``.
    harmonics : list-like, non-negative
        Harmonics to compute as ``harmonics[i] * f0``
        Values less than one (e.g., 1/2) correspond to sub-harmonics.
    kind : str
        Interpolation type.  See `scipy.interpolate.interp1d`.
    fill_value : float
        The value to fill when extrapolating beyond the observed
        frequency range.
    axis : int
        The axis corresponding to frequency in ``x``

    Returns
    -------
    f0_harm : np.ndarray [shape=(..., len(harmonics), n)]
        Interpolated energy at each specified harmonic of the fundamental
        frequency for each time step.

    See Also
    --------
    interp_harmonics
    librosa.feature.tempogram_ratio

    Examples
    --------
    This example estimates the fundamental (f0), and then extracts the first
    12 harmonics

    >>> y, sr = librosa.load(librosa.ex('trumpet'))
    >>> f0, voicing, voicing_p = librosa.pyin(y=y, sr=sr, fmin=200, fmax=700)
    >>> S = np.abs(librosa.stft(y))
    >>> freqs = librosa.fft_frequencies(sr=sr)
    >>> harmonics = np.arange(1, 13)
    >>> f0_harm = librosa.f0_harmonics(S, freqs=freqs, f0=f0, harmonics=harmonics)

    >>> import matplotlib.pyplot as plt
    >>> fig, ax =plt.subplots(nrows=2, sharex=True)
    >>> librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),
    ...                          x_axis='time', y_axis='log', ax=ax[0])
    >>> times = librosa.times_like(f0)
    >>> for h in harmonics:
    ...     ax[0].plot(times, h * f0, label=f"{h}*f0")
    >>> ax[0].legend(ncols=4, loc='lower right')
    >>> ax[0].label_outer()
    >>> librosa.display.specshow(librosa.amplitude_to_db(f0_harm, ref=np.max),
    ...                          x_axis='time', ax=ax[1])
    >>> ax[1].set_yticks(harmonics-1)
    >>> ax[1].set_yticklabels(harmonics)
    >>> ax[1].set(ylabel='Harmonics')
    """
    result: np.ndarray
    if freqs.ndim == 1 and len(freqs) == x.shape[axis]:
        if not is_unique(freqs, axis=0):
            warnings.warn(
                "Frequencies are not unique. This may produce incorrect harmonic interpolations.",
                stacklevel=2,
            )

        # We have a fixed frequency grid
        idx = np.isfinite(freqs)

        def _f_interps(data, f):
            interp = scipy.interpolate.interp1d(
                freqs[idx],
                data[idx],
                axis=0,
                bounds_error=False,
                copy=False,
                assume_sorted=False,
                kind=kind,
                fill_value=fill_value,
            )
            return interp(f)

        xfunc = np.vectorize(_f_interps, signature="(f),(h)->(h)")
        result = xfunc(x.swapaxes(axis, -1), np.multiply.outer(f0, harmonics)).swapaxes(
            axis, -1
        )

    elif freqs.shape == x.shape:
        if not np.all(is_unique(freqs, axis=axis)):
            warnings.warn(
                "Frequencies are not unique. This may produce incorrect harmonic interpolations.",
                stacklevel=2,
            )

        # We have a dynamic frequency grid, not so bad
        def _f_interpd(data, frequencies, f):
            idx = np.isfinite(frequencies)
            interp = scipy.interpolate.interp1d(
                frequencies[idx],
                data[idx],
                axis=0,
                bounds_error=False,
                copy=False,
                assume_sorted=False,
                kind=kind,
                fill_value=fill_value,
            )
            return interp(f)

        xfunc = np.vectorize(_f_interpd, signature="(f),(f),(h)->(h)")
        result = xfunc(
            x.swapaxes(axis, -1),
            freqs.swapaxes(axis, -1),
            np.multiply.outer(f0, harmonics),
        ).swapaxes(axis, -1)

    else:
        raise ParameterError(
            f"freqs.shape={freqs.shape} is incompatible with input shape={x.shape}"
        )

    return np.nan_to_num(result, copy=False, nan=fill_value)