1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
|
"""
#Visualization of the filters of VGG16, via gradient ascent in input space.
This script can run on CPU in a few minutes.
Results example: 
"""
from __future__ import print_function
import time
import numpy as np
from PIL import Image as pil_image
from keras.preprocessing.image import save_img
from keras import layers
from keras.applications import vgg16
from keras import backend as K
def normalize(x):
"""utility function to normalize a tensor.
# Arguments
x: An input tensor.
# Returns
The normalized input tensor.
"""
return x / (K.sqrt(K.mean(K.square(x))) + K.epsilon())
def deprocess_image(x):
"""utility function to convert a float array into a valid uint8 image.
# Arguments
x: A numpy-array representing the generated image.
# Returns
A processed numpy-array, which could be used in e.g. imshow.
"""
# normalize tensor: center on 0., ensure std is 0.25
x -= x.mean()
x /= (x.std() + K.epsilon())
x *= 0.25
# clip to [0, 1]
x += 0.5
x = np.clip(x, 0, 1)
# convert to RGB array
x *= 255
if K.image_data_format() == 'channels_first':
x = x.transpose((1, 2, 0))
x = np.clip(x, 0, 255).astype('uint8')
return x
def process_image(x, former):
"""utility function to convert a valid uint8 image back into a float array.
Reverses `deprocess_image`.
# Arguments
x: A numpy-array, which could be used in e.g. imshow.
former: The former numpy-array.
Need to determine the former mean and variance.
# Returns
A processed numpy-array representing the generated image.
"""
if K.image_data_format() == 'channels_first':
x = x.transpose((2, 0, 1))
return (x / 255 - 0.5) * 4 * former.std() + former.mean()
def visualize_layer(model,
layer_name,
step=1.,
epochs=15,
upscaling_steps=9,
upscaling_factor=1.2,
output_dim=(412, 412),
filter_range=(0, None)):
"""Visualizes the most relevant filters of one conv-layer in a certain model.
# Arguments
model: The model containing layer_name.
layer_name: The name of the layer to be visualized.
Has to be a part of model.
step: step size for gradient ascent.
epochs: Number of iterations for gradient ascent.
upscaling_steps: Number of upscaling steps.
Starting image is in this case (80, 80).
upscaling_factor: Factor to which to slowly upgrade
the image towards output_dim.
output_dim: [img_width, img_height] The output image dimensions.
filter_range: Tupel[lower, upper]
Determines the to be computed filter numbers.
If the second value is `None`,
the last filter will be inferred as the upper boundary.
"""
def _generate_filter_image(input_img,
layer_output,
filter_index):
"""Generates image for one particular filter.
# Arguments
input_img: The input-image Tensor.
layer_output: The output-image Tensor.
filter_index: The to be processed filter number.
Assumed to be valid.
#Returns
Either None if no image could be generated.
or a tuple of the image (array) itself and the last loss.
"""
s_time = time.time()
# we build a loss function that maximizes the activation
# of the nth filter of the layer considered
if K.image_data_format() == 'channels_first':
loss = K.mean(layer_output[:, filter_index, :, :])
else:
loss = K.mean(layer_output[:, :, :, filter_index])
# we compute the gradient of the input picture wrt this loss
grads = K.gradients(loss, input_img)[0]
# normalization trick: we normalize the gradient
grads = normalize(grads)
# this function returns the loss and grads given the input picture
iterate = K.function([input_img], [loss, grads])
# we start from a gray image with some random noise
intermediate_dim = tuple(
int(x / (upscaling_factor ** upscaling_steps)) for x in output_dim)
if K.image_data_format() == 'channels_first':
input_img_data = np.random.random(
(1, 3, intermediate_dim[0], intermediate_dim[1]))
else:
input_img_data = np.random.random(
(1, intermediate_dim[0], intermediate_dim[1], 3))
input_img_data = (input_img_data - 0.5) * 20 + 128
# Slowly upscaling towards the original size prevents
# a dominating high-frequency of the to visualized structure
# as it would occur if we directly compute the 412d-image.
# Behaves as a better starting point for each following dimension
# and therefore avoids poor local minima
for up in reversed(range(upscaling_steps)):
# we run gradient ascent for e.g. 20 steps
for _ in range(epochs):
loss_value, grads_value = iterate([input_img_data])
input_img_data += grads_value * step
# some filters get stuck to 0, we can skip them
if loss_value <= K.epsilon():
return None
# Calculate upscaled dimension
intermediate_dim = tuple(
int(x / (upscaling_factor ** up)) for x in output_dim)
# Upscale
img = deprocess_image(input_img_data[0])
img = np.array(pil_image.fromarray(img).resize(intermediate_dim,
pil_image.BICUBIC))
input_img_data = np.expand_dims(
process_image(img, input_img_data[0]), 0)
# decode the resulting input image
img = deprocess_image(input_img_data[0])
e_time = time.time()
print('Costs of filter {:3}: {:5.0f} ( {:4.2f}s )'.format(filter_index,
loss_value,
e_time - s_time))
return img, loss_value
def _draw_filters(filters, n=None):
"""Draw the best filters in a nxn grid.
# Arguments
filters: A List of generated images and their corresponding losses
for each processed filter.
n: dimension of the grid.
If none, the largest possible square will be used
"""
if n is None:
n = int(np.floor(np.sqrt(len(filters))))
# the filters that have the highest loss are assumed to be better-looking.
# we will only keep the top n*n filters.
filters.sort(key=lambda x: x[1], reverse=True)
filters = filters[:n * n]
# build a black picture with enough space for
# e.g. our 8 x 8 filters of size 412 x 412, with a 5px margin in between
MARGIN = 5
width = n * output_dim[0] + (n - 1) * MARGIN
height = n * output_dim[1] + (n - 1) * MARGIN
stitched_filters = np.zeros((width, height, 3), dtype='uint8')
# fill the picture with our saved filters
for i in range(n):
for j in range(n):
img, _ = filters[i * n + j]
width_margin = (output_dim[0] + MARGIN) * i
height_margin = (output_dim[1] + MARGIN) * j
stitched_filters[
width_margin: width_margin + output_dim[0],
height_margin: height_margin + output_dim[1], :] = img
# save the result to disk
save_img('vgg_{0:}_{1:}x{1:}.png'.format(layer_name, n), stitched_filters)
# this is the placeholder for the input images
assert len(model.inputs) == 1
input_img = model.inputs[0]
# get the symbolic outputs of each "key" layer (we gave them unique names).
layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])
output_layer = layer_dict[layer_name]
assert isinstance(output_layer, layers.Conv2D)
# Compute to be processed filter range
filter_lower = filter_range[0]
filter_upper = (filter_range[1]
if filter_range[1] is not None
else len(output_layer.get_weights()[1]))
assert(filter_lower >= 0
and filter_upper <= len(output_layer.get_weights()[1])
and filter_upper > filter_lower)
print('Compute filters {:} to {:}'.format(filter_lower, filter_upper))
# iterate through each filter and generate its corresponding image
processed_filters = []
for f in range(filter_lower, filter_upper):
img_loss = _generate_filter_image(input_img, output_layer.output, f)
if img_loss is not None:
processed_filters.append(img_loss)
print('{} filter processed.'.format(len(processed_filters)))
# Finally draw and store the best filters to disk
_draw_filters(processed_filters)
if __name__ == '__main__':
# the name of the layer we want to visualize
# (see model definition at keras/applications/vgg16.py)
LAYER_NAME = 'block5_conv1'
# build the VGG16 network with ImageNet weights
vgg = vgg16.VGG16(weights='imagenet', include_top=False)
print('Model loaded.')
vgg.summary()
# example function call
visualize_layer(vgg, LAYER_NAME)
|