1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
|
"""
.. _rsa_noplot:
====================================
Representational Similarity Analysis
====================================
Representational Similarity Analysis is used to perform summary statistics
on supervised classifications where the number of classes is relatively high.
It consists in characterizing the structure of the confusion matrix to infer
the similarity between brain responses and serves as a proxy for characterizing
the space of mental representations [1]_ [2]_ [3]_.
In this example, we perform RSA on responses to 24 object images (among
a list of 92 images). Subjects were presented with images of human, animal
and inanimate objects [4]_. Here we use the 24 unique images of faces
and body parts.
.. note:: this example will download a very large (~6GB) file, so we will not
build the images below.
References
----------
.. [1] Shepard, R. "Multidimensional scaling, tree-fitting, and clustering."
Science 210.4468 (1980): 390-398.
.. [2] Laakso, A. & Cottrell, G.. "Content and cluster analysis:
assessing representational similarity in neural systems." Philosophical
psychology 13.1 (2000): 47-76.
.. [3] Kriegeskorte, N., Marieke, M., & Bandettini. P. "Representational
similarity analysis-connecting the branches of systems neuroscience."
Frontiers in systems neuroscience 2 (2008): 4.
.. [4] Cichy, R. M., Pantazis, D., & Oliva, A. "Resolving human object
recognition in space and time." Nature neuroscience (2014): 17(3),
455-462.
"""
# Authors: Jean-Remi King <jeanremi.king@gmail.com>
# Jaakko Leppakangas <jaeilepp@student.jyu.fi>
# Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
#
# License: BSD (3-clause)
import os.path as op
import numpy as np
from pandas import read_csv
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.manifold import MDS
import mne
from mne.io import read_raw_fif, concatenate_raws
from mne.datasets import visual_92_categories
print(__doc__)
data_path = visual_92_categories.data_path()
# Define stimulus - trigger mapping
fname = op.join(data_path, 'visual_stimuli.csv')
conds = read_csv(fname)
print(conds.head(5))
##############################################################################
# Let's restrict the number of conditions to speed up computation
max_trigger = 24
conds = conds[:max_trigger] # take only the first 24 rows
##############################################################################
# Define stimulus - trigger mapping
conditions = []
for c in conds.values:
cond_tags = list(c[:2])
cond_tags += [('not-' if i == 0 else '') + conds.columns[k]
for k, i in enumerate(c[2:], 2)]
conditions.append('/'.join(map(str, cond_tags)))
print(conditions[:10])
##############################################################################
# Let's make the event_id dictionary
event_id = dict(zip(conditions, conds.trigger + 1))
event_id['0/human bodypart/human/not-face/animal/natural']
##############################################################################
# Read MEG data
n_runs = 4 # 4 for full data (use less to speed up computations)
fname = op.join(data_path, 'sample_subject_%i_tsss_mc.fif')
raws = [read_raw_fif(fname % block) for block in range(n_runs)]
raw = concatenate_raws(raws)
events = mne.find_events(raw, min_duration=.002)
events = events[events[:, 2] <= max_trigger]
##############################################################################
# Epoch data
picks = mne.pick_types(raw.info, meg=True)
epochs = mne.Epochs(raw, events=events, event_id=event_id, baseline=None,
picks=picks, tmin=-.1, tmax=.500, preload=True)
##############################################################################
# Let's plot some conditions
epochs['face'].average().plot()
epochs['not-face'].average().plot()
##############################################################################
# Representational Similarity Analysis (RSA) is a neuroimaging-specific
# appelation to refer to statistics applied to the confusion matrix
# also referred to as the representational dissimilarity matrices (RDM).
#
# Compared to the approach from Cichy et al. we'll use a multiclass
# classifier (Multinomial Logistic Regression) while the paper uses
# all pairwise binary classification task to make the RDM.
# Also we use here the ROC-AUC as performance metric while the
# paper uses accuracy. Finally here for the sake of time we use
# RSA on a window of data while Cichy et al. did it for all time
# instants separately.
# Classify using the average signal in the window 50ms to 300ms
# to focus the classifier on the time interval with best SNR.
clf = make_pipeline(StandardScaler(),
LogisticRegression(C=1, solver='lbfgs'))
X = epochs.copy().crop(0.05, 0.3).get_data().mean(axis=2)
y = epochs.events[:, 2]
classes = set(y)
cv = StratifiedKFold(n_splits=5, random_state=0, shuffle=True)
# Compute confusion matrix for each cross-validation fold
y_pred = np.zeros((len(y), len(classes)))
for train, test in cv.split(X, y):
# Fit
clf.fit(X[train], y[train])
# Probabilistic prediction (necessary for ROC-AUC scoring metric)
y_pred[test] = clf.predict_proba(X[test])
##############################################################################
# Compute confusion matrix using ROC-AUC
confusion = np.zeros((len(classes), len(classes)))
for ii, train_class in enumerate(classes):
for jj in range(ii, len(classes)):
confusion[ii, jj] = roc_auc_score(y == train_class, y_pred[:, jj])
confusion[jj, ii] = confusion[ii, jj]
##############################################################################
# Plot
labels = [''] * 5 + ['face'] + [''] * 11 + ['bodypart'] + [''] * 6
fig, ax = plt.subplots(1)
im = ax.matshow(confusion, cmap='RdBu_r', clim=[0.3, 0.7])
ax.set_yticks(range(len(classes)))
ax.set_yticklabels(labels)
ax.set_xticks(range(len(classes)))
ax.set_xticklabels(labels, rotation=40, ha='left')
ax.axhline(11.5, color='k')
ax.axvline(11.5, color='k')
plt.colorbar(im)
plt.tight_layout()
plt.show()
##############################################################################
# Confusion matrix related to mental representations have been historically
# summarized with dimensionality reduction using multi-dimensional scaling [1].
# See how the face samples cluster together.
fig, ax = plt.subplots(1)
mds = MDS(2, random_state=0, dissimilarity='precomputed')
chance = 0.5
summary = mds.fit_transform(chance - confusion)
cmap = plt.get_cmap('rainbow')
colors = ['r', 'b']
names = list(conds['condition'].values)
for color, name in zip(colors, set(names)):
sel = np.where([this_name == name for this_name in names])[0]
size = 500 if name == 'human face' else 100
ax.scatter(summary[sel, 0], summary[sel, 1], s=size,
facecolors=color, label=name, edgecolors='k')
ax.axis('off')
ax.legend(loc='lower right', scatterpoints=1, ncol=2)
plt.tight_layout()
plt.show()
|