1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
"""
Cython rewrite of the vector quantization module, originally written
in C at src/vq.c and the wrapper at src/vq_module.c. This should be
easier to maintain than old SWIG output.
Original C version by Damian Eads.
Translated to Cython by David Warde-Farley, October 2009.
"""
import numpy as np
cimport numpy as np
cdef extern from "math.h":
float sqrtf(float num)
double sqrt(double num)
cdef extern from "numpy/arrayobject.h":
object PyArray_EMPTY(int, np.npy_intp*, int, int)
cdef enum:
PyArray_INTP
cdef extern from "numpy/npy_math.h":
cdef enum:
NPY_INFINITY
# C types
ctypedef np.float64_t float64_t
ctypedef np.float32_t float32_t
ctypedef np.int32_t int32_t
# Initialize the NumPy C API
np.import_array()
cdef void float_tvq(float32_t *obs, float32_t *code_book,
int ncodes, int nfeat, int nobs,
np.npy_intp *codes, float32_t *low_dist):
"""
Quantize Nobs observations to their nearest codebook
entry (single-precision version).
"""
# Temporary variables
cdef float32_t dist, diff
cdef int32_t obs_index, code_index, feature
cdef int32_t offset = 0
# Index and pointer to keep track of the current position in
# both arrays so that we don't have to always do index * nfeat.
cdef int codebook_pos
cdef float32_t *current_obs
for obs_index in range(nobs):
codebook_pos = 0
low_dist[obs_index] = NPY_INFINITY
for code_index in range(ncodes):
dist = 0
# Distance between code_book[code_index] and obs[obs_index]
for feature in range(nfeat):
# Use current_obs pointer and codebook_pos to minimize
# pointer arithmetic necessary (i.e. no multiplications)
current_obs = &(obs[offset])
diff = code_book[codebook_pos] - current_obs[feature]
dist += diff * diff
codebook_pos += 1
dist = sqrtf(dist)
# Replace the code assignment and record distance if necessary
if dist < low_dist[obs_index]:
codes[obs_index] = code_index
low_dist[obs_index] = dist
# Update the offset of the current observation
offset += nfeat
def vq(np.ndarray obs, np.ndarray codes):
"""
vq(obs, codes)
Vector quantization ndarray wrapper.
"""
cdef np.npy_intp nobs, ncodes, nfeat, nfeat_codes
cdef np.ndarray obs_a, codes_a
cdef np.ndarray outcodes, outdists
cdef int flags = np.NPY_CONTIGUOUS | np.NPY_NOTSWAPPED | np.NPY_ALIGNED
# Ensure the arrays are contiguous - done in C to minimize overhead.
obs_a = np.PyArray_FROM_OF(obs, flags)
codes_a = np.PyArray_FROM_OF(codes, flags)
if obs_a.ndim == 2:
nobs = obs_a.shape[0]
nfeat = obs_a.shape[1]
elif obs_a.ndim == 1:
nfeat = obs_a.shape[0]
nobs = 1
else:
raise ValueError('obs must have 0 < obs.ndim <= 2')
if codes_a.ndim == 2:
ncodes = codes_a.shape[0]
nfeat_codes = codes_a.shape[1]
elif codes.ndim == 1:
# Treat one dimensional arrays as row vectors.
nfeat_codes = codes_a.shape[0]
ncodes = 1
else:
raise ValueError('codes must have 0 < codes.ndim <= 2')
# Ensure the two arrays have the same number of features (columns).
if nfeat_codes != nfeat:
raise ValueError('obs and codes must have same # of ' + \
'features (columns)')
# We create this with the C API so that we can be sure that
# the resulting array has elements big enough to store indices
# on that platform. Hence, PyArray_INTP.
outcodes = PyArray_EMPTY(1, &nobs, PyArray_INTP, 0)
# This we just want to match the dtype of the input, so np.empty is fine.
outdists = np.empty((nobs,), dtype=obs.dtype)
if obs.dtype == np.float32:
float_tvq(<float32_t *>obs_a.data, <float32_t *>codes_a.data,
ncodes, nfeat, nobs, <np.npy_intp *>outcodes.data,
<float32_t *>outdists.data)
return outcodes, outdists
|