File: plot_colocalization_metrics.py

package info (click to toggle)
skimage 0.25.2-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 32,720 kB
  • sloc: python: 60,007; cpp: 2,592; ansic: 1,591; xml: 1,342; javascript: 1,267; makefile: 168; sh: 20
file content (166 lines) | stat: -rw-r--r-- 6,499 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""
======================
Colocalization metrics
======================

In this example, we demonstrate the use of different metrics to assess the
colocalization of two different image channels.

Colocalization can be split into two different concepts:
1. Co-occurence: What proportion of a substance is localized to a particular
area?
2. Correlation: What is the relationship in intensity between two substances?

"""

#####################################################################
# Co-occurence: subcellular localization
# ======================================
#
# Imagine that we are trying to determine the subcellular localization of a
# protein - is it located more in the nucleus or cytoplasm compared to a
# control?
#
# We begin by segmenting the nucleus of a sample image as described in another
# `example <https://scikit-image.org/docs/stable/auto_examples/applications/plot_fluorescence_nuclear_envelope.html>`_
# and assume that whatever is not in the nucleus is in the cytoplasm.
# The protein, "protein A", will be simulated as blobs and segmented.

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
from scipy import ndimage as ndi
import skimage as ski

rng = np.random.default_rng()

# segment nucleus
nucleus = ski.data.protein_transport()[0, 0, :, :180]
smooth = ski.filters.gaussian(nucleus, sigma=1.5)
thresh = smooth > ski.filters.threshold_otsu(smooth)
fill = ndi.binary_fill_holes(thresh)
nucleus_seg = ski.segmentation.clear_border(fill)

# protein blobs of varying intensity
proteinA = np.zeros_like(nucleus, dtype="float64")
proteinA_seg = np.zeros_like(nucleus, dtype="float64")

for blob_seed in range(10):
    blobs = ski.data.binary_blobs(
        180, blob_size_fraction=0.5, volume_fraction=(50 / (180**2)), rng=blob_seed
    )
    blobs_image = ski.filters.gaussian(blobs, sigma=1.5) * rng.integers(50, 256)
    proteinA += blobs_image
    proteinA_seg += blobs

# plot data
fig, ax = plt.subplots(3, 2, figsize=(8, 12), sharey=True)
ax[0, 0].imshow(nucleus, cmap=plt.cm.gray)
ax[0, 0].set_title('Nucleus')

ax[0, 1].imshow(nucleus_seg, cmap=plt.cm.gray)
ax[0, 1].set_title('Nucleus segmentation')

black_magenta = LinearSegmentedColormap.from_list("", ["black", "magenta"])
ax[1, 0].imshow(proteinA, cmap=black_magenta)
ax[1, 0].set_title('Protein A')

ax[1, 1].imshow(proteinA_seg, cmap=black_magenta)
ax[1, 1].set_title('Protein A segmentation')

ax[2, 0].imshow(proteinA, cmap=black_magenta)
ax[2, 0].imshow(nucleus_seg, cmap=plt.cm.gray, alpha=0.2)
ax[2, 0].set_title('Protein A\nwith nucleus overlaid')

ax[2, 1].imshow(proteinA_seg, cmap=black_magenta)
ax[2, 1].imshow(nucleus_seg, cmap=plt.cm.gray, alpha=0.2)
ax[2, 1].set_title('Protein A segmentation\nwith nucleus overlaid')

for a in ax.ravel():
    a.set_axis_off()

#####################################################################
# Intersection coefficient
# ========================
#
# After segmenting both the nucleus and the protein of interest, we can
# determine what fraction of the protein A segmentation overlaps with the
# nucleus segmentation.

ski.measure.intersection_coeff(proteinA_seg, nucleus_seg)

#####################################################################
# Manders' Colocalization Coefficient (MCC)
# =========================================
#
# The overlap coefficient assumes that the area of protein segmentation
# corresponds to the concentration of that protein - with larger areas
# indicating more protein. As the resolution of images are usually too small to
# make out individual proteins, they can clump together within one pixel,
# making the intensity of that pixel brighter. So, to better capture the
# protein concentration, we may choose to determine what proportion of the
# *intensity* of the protein channel is inside the nucleus. This metric is
# known as Manders' Colocalization Coefficient.
#
# In this image, while there are a lot of protein A spots within the nucleus
# they are dim compared to some of the spots outside the nucleus, so the MCC is
# much lower than the overlap coefficient.

ski.measure.manders_coloc_coeff(proteinA, nucleus_seg)

#####################################################################
# After choosing a co-occurence metric, we can apply the same process to
# control images. If no control images are available, the Costes method could
# be used to compare the MCC value of the original image with that of the
# randomly scrambled image. Information about this method is given in [1]_.
#
# .. [1] J. S. Aaron, A. B. Taylor and T.-L. Chew, Image co-localization –
#        co-occurrence versus correlation. J Cell Sci 1 February 2018
#        131 (3): jcs211847. doi: https://doi.org/10.1242/jcs.211847

#####################################################################
# Correlation: association of two proteins
# ========================================
#
# Now, imagine that we want to know how closely related two proteins are.
#
# First, we will generate protein B and plot intensities of the two proteins in
# every pixel to see the relationship between them.

# generating protein B data that is correlated to protein A for demo
proteinB = proteinA + rng.normal(loc=100, scale=10, size=proteinA.shape)

# plot images
fig, ax = plt.subplots(1, 2, figsize=(8, 8), sharey=True)

ax[0].imshow(proteinA, cmap=black_magenta)
ax[0].set_title('Protein A')

black_cyan = LinearSegmentedColormap.from_list("", ["black", "cyan"])
ax[1].imshow(proteinB, cmap=black_cyan)
ax[1].set_title('Protein B')

for a in ax.ravel():
    a.set_axis_off()

# plot pixel intensity scatter
fig, ax = plt.subplots()
ax.scatter(proteinA, proteinB)
ax.set_title('Pixel intensity')
ax.set_xlabel('Protein A intensity')
ax.set_ylabel('Protein B intensity')

#####################################################################
# The intensities look linearly correlated so Pearson's Correlation Coefficient
# would give us a good measure of how strong the association is.

pcc, pval = ski.measure.pearson_corr_coeff(proteinA, proteinB)
print(f"PCC: {pcc:0.3g}, p-val: {pval:0.3g}")

#####################################################################
# Sometimes the intensities are correlated but not in a linear way. A rank-based
# correlation coefficient like `Spearman's
# <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html>`_
# might give a more accurate measure of the non-linear relationship in that
# case.
plt.show()