1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
|
# benchmark_distributions.py
# Contact: Jacob Schreiber ( jmschreiber91@gmail.com )
"""
Benchmark the distribution module, printing out the time it takes to do
log probability and training calculations.
"""
from pomegranate import *
import random
import numpy
import time
numpy.random.seed(0)
random.seed(0)
def print_benchmark( distribution, duration ):
"""Formatted print."""
print( "{:25}: {:.4}s".format( distribution.__class__.__name__, duration ) )
def bench_log_probability( distribution, n=10000000, symbol=5 ):
"""Bench a log probability distribution."""
tic = time.time()
for i in range(n):
logp = distribution.log_probability( symbol )
return time.time() - tic
def bench_from_sample( distribution, sample, n=1000 ):
"""Bench the training of a probability distribution."""
tic = time.time()
for i in range(n):
distribution.summarize( sample )
return time.time() - tic
def benchmark_distribution_log_probabilities():
"""Run log probability benchmarks."""
distributions = [ UniformDistribution( 0, 17 ),
NormalDistribution( 7, 1 ),
LogNormalDistribution( 7, 1 ),
ExponentialDistribution( 7 ),
GammaDistribution( 7, 3 ),
GaussianKernelDensity([0, 1, 4, 3, 2, 0.5, 2, 1, 2]),
UniformKernelDensity([0, 1, 4, 3, 2, 0.5, 2, 1, 2]),
TriangleKernelDensity([0, 1, 4, 3, 2, 0.5, 2, 1, 2]),
MixtureDistribution( [UniformDistribution( 5, 2 ),
NormalDistribution( 7, 1 ),
NormalDistribution( 3, 0.5 )] )
]
for distribution in distributions:
print_benchmark( distribution, bench_log_probability( distribution ) )
distribution = DiscreteDistribution({'A': 0.2, 'B': 0.27, 'C': 0.3, 'D': 0.23})
print_benchmark( distribution, bench_log_probability( distribution ) )
distribution = IndependentComponentsDistribution([ NormalDistribution( 5, 1 ),
NormalDistribution( 8, 0.5),
NormalDistribution( 2, 0.1),
NormalDistribution( 13, 0.1),
NormalDistribution( 0.5, 0.01) ])
print_benchmark( distribution, bench_log_probability( distribution, symbol=(5,4,3,2,1) ) )
mu = np.random.randn(4)
cov = np.random.randn(4, 4) / 10
cov = np.abs( cov.dot( cov.T ) ) + np.eye( 4 )
distribution = MultivariateGaussianDistribution( mu, cov )
print_benchmark( distribution, bench_log_probability( distribution, n=100000, symbol=(1,2,3,4) ) )
def benchmark_distribution_train():
"""Run training benchmarks."""
distributions = [ UniformDistribution( 0, 17 ),
NormalDistribution( 7, 1 ),
LogNormalDistribution( 7, 1 ),
ExponentialDistribution( 7 ),
GammaDistribution( 7, 3 ),
GaussianKernelDensity([0, 1, 4, 3, 2, 0.5, 2, 1, 2]),
UniformKernelDensity([0, 1, 4, 3, 2, 0.5, 2, 1, 2]),
TriangleKernelDensity([0, 1, 4, 3, 2, 0.5, 2, 1, 2]),
MixtureDistribution( [UniformDistribution( 5, 2 ),
NormalDistribution( 7, 1 ),
NormalDistribution( 3, 0.5 )] )
]
sample = np.random.randn(10000)
for distribution in distributions:
print_benchmark( distribution, bench_from_sample( distribution, sample ) )
sample = ['A']*2500 + ['B']*3000 + ['C']*3500 + ['D']*1000
distribution = DiscreteDistribution({'A': 0.2, 'B': 0.27, 'C': 0.3, 'D': 0.23})
print_benchmark( distribution, bench_from_sample( distribution, sample ) )
sample = np.random.randn(10000, 5)
distribution = IndependentComponentsDistribution([ NormalDistribution( 5, 1 ),
NormalDistribution( 8, 0.5),
NormalDistribution( 2, 0.1),
NormalDistribution( 13, 0.1),
NormalDistribution( 0.5, 0.01) ])
print_benchmark( distribution, bench_from_sample( distribution, sample ) )
sample = np.random.randn(10000, 4)
mu = np.random.randn(4)
cov = np.random.randn(4, 4) / 10
cov = np.abs( cov.dot( cov.T ) ) + np.eye( 4 )
distribution = MultivariateGaussianDistribution( mu, cov )
print_benchmark( distribution, bench_from_sample( distribution, sample ) )
print( "DISTRIBUTION BENCHMARKS" )
print( "-----------------------" )
print()
print( "LOG PROBABILITY (N=10,000,000 iterations, N=100,000 FOR MVG)" )
benchmark_distribution_log_probabilities()
print()
print( "TRAINING (N=1,000 ITERATIONS, BATCHES=10,000 ITEMS)" )
benchmark_distribution_train()
|