File: plot_patch_generation.py

package info (click to toggle)
python-librosa 0.11.0-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 166,732 kB
  • sloc: python: 21,731; makefile: 141; sh: 2
file content (130 lines) | stat: -rw-r--r-- 4,618 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""
==========================
Efficient patch generation
==========================

This notebook demonstrates how to efficiently generate fixed-duration
excerpts of a signal using `librosa.util.frame`.
This can be helpful in machine learning applications where a model may
expect inputs of a certain size during training, but your data may be
of arbitrary and variable length.

Aside from being a convenient helper method for patch sampling, the
`librosa.util.frame` function can do this *efficiently* by avoiding
memory copies.
The patch array produced below is a *view* of the original data array,
not a copy.
"""

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import librosa

######################
# Load an example clip
y, sr = librosa.load(librosa.ex('libri1'))


######################################
# Compute a log-scaled Mel spectrogram
melspec = librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=sr),
                              ref=np.max)

##################################################
# The resulting spectrogram has a number of frames
# that depends on the length of the input signal `y`:
print(f"Mel spectrogram shape: {melspec.shape}")

###################################################
#
fig, ax = plt.subplots()
librosa.display.specshow(melspec, x_axis='time', y_axis='mel', ax=ax)
ax.set(title='Full Mel spectrogram')


######################################
# We can use librosa.util.frame to carve
# `melspec` into patches of fixed duration.
#
# In this case, we'll make ~5-second patches
# separated by approximately 1/10 second each.

frame_length = librosa.time_to_frames(5.0)
hop_length = librosa.time_to_frames(0.10)
print(f"Frame length={frame_length}, hop length={hop_length}")

########################################
patches = librosa.util.frame(melspec,
                             frame_length=frame_length,
                             hop_length=hop_length)

###########################################################
# The resulting `patches` array is now three-dimensional,
# with axes corresponding to [frequency, time, patch index]

print(f"Patch array shape: {patches.shape}")


####################################################
# So ``patches[..., 0]`` is the first 1-second patch,
# ``patches[..., 1]`` is the second 1-second patch,
# and so on.  All patches will have the same shape.
#
# Unlike the framing operation used by spectrogram functions,
# these patches are **not** centered; they are left-aligned.
# This means that the first patch, ``patches[..., 0]``
# corresponds to the original data ``melspec[..., 0:frame_length]``.
# The second patch ``patches[..., 1]`` corresponds to data
# ``melspec[..., hop_length:hop_length+frame_length]``,
# the third patch ``patches[..., 2]`` corresponds to
# ``melspec[..., 2*hop_length:2*hop_length+frame_length]``, etc.
#
# The figure below illustrates the first three patches.
# Because the overlap (1/10) is small relative to the patch length (5),
# these patches have substantial overlap and contain mostly the same
# content but at different time offsets.

fig, ax = plt.subplot_mosaic([list("AAA"), list("012")])

librosa.display.specshow(melspec, x_axis='time', y_axis='mel', ax=ax["A"])
ax["A"].set(title='Full spectrogram', xlabel=None)

for index in [0, 1, 2]:
    librosa.display.specshow(patches[..., index],
                             x_axis='time', y_axis='mel',
                             ax=ax[str(index)])
    ax[str(index)].set(title=f"Patch #{index}")
    ax[str(index)].label_outer()

###########################################################
# The animation below illustrates each patch in approximate
# real time.

# We'll plot the first patch to create the display object,
# then animate the rest.

# sphinx_gallery_thumbnail_number = 2

fig, ax = plt.subplots()
mesh = librosa.display.specshow(patches[..., 0], x_axis='time',
                                y_axis='mel', ax=ax)


# This helper function is used to render each frame of the animation
# Updating the mesh object is much more efficient than rendering an
# entirely new spectrogram for each frame!
#
# Note that the "time" axis of this figure corresponds to the time
# within the patch; not the absolute time in the original signal.
#
def _update(num):
    mesh.set_array(patches[..., num])
    return (mesh,)


ani = animation.FuncAnimation(fig,
                              func=_update,
                              frames=patches.shape[-1],
                              interval=100,  # 100 milliseconds = 1/10 sec
                              blit=True)