1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
# montecarlo.py: Routines for Monte Carlo simulation
# Copyright: Ed Schofield, 2005-2006
# License: BSD-style (see LICENSE.txt at root of scipy tree)
__author__ = "Ed Schofield"
from __future__ import division
import numpy
import scipy
from scipy.sandbox.montecarlo._intsampler import _intsampler
class genericsampler(object):
"""A base class for other samplers.
"""
def __init__(self):
self.format = self.__class__.__name__[:3]
if self.__class__.__name__ == "genericsampler":
raise TypeError, "this class cannot be instantiated directly"
def seed(self, myseed):
"""Initializes the sampler with a given seed. If the seed is 0, uses a
seed from /dev/urandom or the system time.
"""
self.sampler.seed(myseed)
class intsampler(genericsampler):
"""A class that samples objects from a given discrete distribution.
The distribution is defined on an integer-valued sample space and
specified with a PMF as a list or array like this:
>>> table = [10, 15, 20]
representing this pmf:
x 0 1 2
p(x) 10/45 15/45 20/45
The output will be something like:
>>> sampler = intsampler(table)
>> sampler.sample(10)
array([c, b, b, b, b, b, c, b, b, b], dtype=object)
The class is a thin wrapper around the '_intsampler' class, which
uses the 5 table compressed lookup sampler of Marsaglia (2004). It
is very fast, and requires near-constant time in the size of the
sample space. It provides a friendlier interface than _intsampler,
allowing use of an integer-valued PMF (unnormalized) and able to
return pmf values of the sample points in addition to the sample
itself.
"""
def __init__(self, mytable):
self.probs = numpy.array(mytable, float)
s = self.probs.sum()
if s > 0:
self.probs /= s
else:
raise ValueError, "sum of table frequencies must be > 0"
self.sampler = _intsampler(self.probs)
def sample(self, size, return_probs=0):
"""Generates a sample of the given size from the specified
discrete distribution, optionally returning the probabilities
under the distribution.
The optional argument return_probs, 0 by default, has the
following meaning:
0: don't return pmf values at each sample point
1: return pmf values at each sample point
2: return log pmf values at each sample point
"""
sample = self.sampler.sample(size)
if return_probs == 0:
return sample
elif return_probs > 0:
# More fancy indexing:
sampleprobs = self.probs[sample]
if return_probs == 1:
return (sample, sampleprobs)
elif return_probs == 2:
return (sample, numpy.log(sampleprobs))
class dictsampler(genericsampler):
"""A class that samples objects from a given discrete distribution.
The distribution is specified as a dictionary representing a PMF
like this:
>>> table = {}
>>> table['a'] = 10
>>> table['b'] = 15
>>> table['c'] = 20
The output will be something like:
>>> sampler = dictsampler(table)
>> sampler.sample(10)
array([c, b, b, b, b, b, c, b, b, b], dtype=object)
giving a sample from this distribution:
x 'a' 'b' 'c'
p(x) 10/180 150/180 20/180
The function uses the constant-time 'intsampler' class, and should be
very fast.
"""
def __init__(self, mydict):
# We can't use this:
# self.labels = numpy.array(mydict.keys(), object)
# since numpy's construction of object arrays is dodgy. Instead,
# create an empty object array and fill it:
self.labels = numpy.empty(len(mydict), dtype=object)
for i, label in enumerate(mydict):
self.labels[i] = label
self.probs = numpy.array(mydict.values(), float)
s = self.probs.sum()
if s > 0:
self.probs /= s
else:
raise ValueError, "sum of table frequencies must be > 0"
self.sampler = intsampler(self.probs)
def sample(self, size, return_probs=0):
"""Generates a sample of the given size from the specified
discrete distribution, optionally returning the probabilities
under the distribution.
The optional argument return_probs, 0 by default, has the
following meaning:
0: don't return pmf values at each sample point
1: return pmf values at each sample point
2: return log pmf values at each sample point
"""
sampleindices = self.sampler.sample(size)
# Fancy indexing with the object array of labels
sample = self.labels[sampleindices]
if return_probs == 0:
return sample
elif return_probs > 0:
# More fancy indexing:
sampleprobs = self.probs[sampleindices]
if return_probs == 1:
return (sample, sampleprobs)
elif return_probs == 2:
return (sample, numpy.log(sampleprobs))
def _test():
import doctest
doctest.testmod()
if __name__ == "__main__":
_test()
|