1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
# Copyright (c) 2008 Carnegie Mellon University
#
# You may copy and modify this freely under the same terms as
# Sphinx-III
"""
Divergence and distance measures for multivariate Gaussians and
multinomial distributions.
This module provides some functions for calculating divergence or
distance measures between distributions, or between one distribution
and a codebook of distributions.
"""
__author__ = "David Huggins-Daines <dhdaines@gmail.com>"
__version__ = "$Revision$"
import numpy
def gau_bh(pm, pv, qm, qv):
"""
Classification-based Bhattacharyya distance between two Gaussians
with diagonal covariance. Also computes Bhattacharyya distance
between a single Gaussian pm,pv and a set of Gaussians qm,qv.
"""
if (len(qm.shape) == 2):
axis = 1
else:
axis = 0
# Difference between means pm, qm
diff = qm - pm
# Interpolated variances
pqv = (pv + qv) / 2.
# Log-determinants of pv, qv
ldpv = numpy.log(pv).sum()
ldqv = numpy.log(qv).sum(axis)
# Log-determinant of pqv
ldpqv = numpy.log(pqv).sum(axis)
# "Shape" component (based on covariances only)
# 0.5 log(|\Sigma_{pq}| / sqrt(\Sigma_p * \Sigma_q)
norm = 0.5 * (ldpqv - 0.5 * (ldpv + ldqv))
# "Divergence" component (actually just scaled Mahalanobis distance)
# 0.125 (\mu_q - \mu_p)^T \Sigma_{pq}^{-1} (\mu_q - \mu_p)
dist = 0.125 * (diff * (1. / pqv) * diff).sum(axis)
return dist + norm
def gau_kl(pm, pv, qm, qv):
"""
Kullback-Liebler divergence from Gaussian pm,pv to Gaussian qm,qv.
Also computes KL divergence from a single Gaussian pm,pv to a set
of Gaussians qm,qv.
Diagonal covariances are assumed. Divergence is expressed in nats.
"""
if (len(qm.shape) == 2):
axis = 1
else:
axis = 0
# Determinants of diagonal covariances pv, qv
dpv = pv.prod()
dqv = qv.prod(axis)
# Inverse of diagonal covariance qv
iqv = 1. / qv
# Difference between means pm, qm
diff = qm - pm
return (0.5 * (
numpy.log(dqv / dpv) # log |\Sigma_q| / |\Sigma_p|
+ (iqv * pv).sum(axis) # + tr(\Sigma_q^{-1} * \Sigma_p)
+ (diff * iqv * diff).sum(
axis) # + (\mu_q-\mu_p)^T\Sigma_q^{-1}(\mu_q-\mu_p)
- len(pm))) # - N
def gau_js(pm, pv, qm, qv):
"""
Jensen-Shannon divergence between two Gaussians. Also computes JS
divergence between a single Gaussian pm,pv and a set of Gaussians
qm,qv.
Diagonal covariances are assumed. Divergence is expressed in nats.
"""
if (len(qm.shape) == 2):
axis = 1
else:
axis = 0
# Determinants of diagonal covariances pv, qv
dpv = pv.prod()
dqv = qv.prod(axis)
# Inverses of diagonal covariances pv, qv
iqv = 1. / qv
ipv = 1. / pv
# Difference between means pm, qm
diff = qm - pm
# KL(p||q)
kl1 = (
0.5 * (
numpy.log(dqv / dpv) # log |\Sigma_q| / |\Sigma_p|
+ (iqv * pv).sum(axis) # + tr(\Sigma_q^{-1} * \Sigma_p)
+ (diff * iqv * diff).sum(
axis) # + (\mu_q-\mu_p)^T\Sigma_q^{-1}(\mu_q-\mu_p)
- len(pm))) # - N
# KL(q||p)
kl2 = (
0.5 * (
numpy.log(dpv / dqv) # log |\Sigma_p| / |\Sigma_q|
+ (ipv * qv).sum(axis) # + tr(\Sigma_p^{-1} * \Sigma_q)
+ (diff * ipv * diff).sum(
axis) # + (\mu_q-\mu_p)^T\Sigma_p^{-1}(\mu_q-\mu_p)
- len(pm))) # - N
# JS(p,q)
return 0.5 * (kl1 + kl2)
def multi_kl(p, q):
"""Kullback-Liebler divergence from multinomial p to multinomial q,
expressed in nats."""
if (len(q.shape) == 2):
axis = 1
else:
axis = 0
# Clip before taking logarithm to avoid NaNs (but still exclude
# zero-probability mixtures from the calculation)
return (
p *
(numpy.log(p.clip(1e-10, 1)) - numpy.log(q.clip(1e-10, 1)))).sum(axis)
def multi_js(p, q):
"""Jensen-Shannon divergence (symmetric) between two multinomials,
expressed in nats."""
if (len(q.shape) == 2):
axis = 1
else:
axis = 0
# D_{JS}(P\|Q) = (D_{KL}(P\|Q) + D_{KL}(Q\|P)) / 2
return 0.5 * (
(q *
(numpy.log(q.clip(1e-10, 1)) - numpy.log(p.clip(1e-10, 1)))).sum(axis)
+
(p *
(numpy.log(p.clip(1e-10, 1)) - numpy.log(q.clip(1e-10, 1)))).sum(axis)
)
|