File: PoissonDistribution.pyx

package info (click to toggle)
python-pomegranate 0.15.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 36,948 kB
  • sloc: python: 11,489; makefile: 259; sh: 28
file content (107 lines) | stat: -rw-r--r-- 2,759 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!python
#cython: boundscheck=False
#cython: cdivision=True
# PoissonDistribution.pyx
# Contact: Jacob Schreiber <jmschreiber91@gmail.com>

import numpy

from ..utils cimport _log
from ..utils cimport isnan
from ..utils cimport lgamma
from ..utils import check_random_state

from libc.math cimport sqrt as csqrt

from .distributions cimport Distribution

# Define some useful constants
DEF NEGINF = float("-inf")
DEF INF = float("inf")

cdef class PoissonDistribution(Distribution):
	"""The probability of a number of events occuring in a fixed time window.

	A probability distribution which expresses the probability of a
	number of events occurring in a fixed time window. It assumes these events
	occur with at a known rate, and independently of each other. It can
	operate over both integer and float values.
	"""

	property parameters:
		def __get__(self):
			return [self.l]
		def __set__(self, parameters):
			self.l = parameters[0]

	def __init__(self, l, frozen=False):
		self.l = l
		self.logl = _log(l)
		self.name = "PoissonDistribution"
		self.summaries = [0, 0]
		self.frozen = frozen

	def __reduce__(self):
		"""Serialize the distribution for pickle."""
		return self.__class__, (self.l, self.frozen)

	cdef void _log_probability(self, double* X, double* log_probability, int n) nogil:
		cdef int i

		for i in range(n):
			if isnan(X[i]):
				log_probability[i] = 0.
			elif X[i] < 0 or self.l == 0:
				log_probability[i] = NEGINF
			else:
				log_probability[i] = X[i] * self.logl - self.l - lgamma(X[i]+1)

	def sample(self, n=None, random_state=None):
		random_state = check_random_state(random_state)
		return random_state.poisson(self.l, n)

	cdef double _summarize(self, double* items, double* weights, int n,
		int column_idx, int d) nogil:
		"""Cython optimized function to calculate the summary statistics."""

		cdef int i
		cdef double x_sum = 0.0, w_sum = 0.0
		cdef double item

		for i in range(n):
			item = items[i*d + column_idx]
			if isnan(item):
				continue

			x_sum += item * weights[i]
			w_sum += weights[i]

		with gil:
			self.summaries[0] += x_sum
			self.summaries[1] += w_sum

	def from_summaries(self, inertia=0.0):
		"""
		Takes in a series of summaries, consisting of the minimum and maximum
		of a sample, and determine the global minimum and maximum.
		"""

		# If the distribution is frozen, don't bother with any calculation
		if self.frozen == True or self.summaries[0] < 1e-7:
			return

		x_sum, w_sum = self.summaries
		mu = x_sum / w_sum

		self.l = mu*(1-inertia) + self.l*inertia
		self.logl = _log(self.l)
		self.summaries = [0, 0]

	def clear_summaries(self):
		"""Clear the summary statistics stored in the object."""

		self.summaries = [0, 0]

	@classmethod
	def blank(cls):
		return cls(0)