File: _vq_rewrite.pyx

package info (click to toggle)
python-scipy 0.10.1%2Bdfsg2-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 42,232 kB
  • sloc: cpp: 224,773; ansic: 103,496; python: 85,210; fortran: 79,130; makefile: 272; sh: 43
file content (128 lines) | stat: -rw-r--r-- 4,236 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""
Cython rewrite of the vector quantization module, originally written
in C at src/vq.c and the wrapper at src/vq_module.c. This should be
easier to maintain than old SWIG output.

Original C version by Damian Eads. 
Translated to Cython by David Warde-Farley, October 2009.
"""

import numpy as np
cimport numpy as np

cdef extern from "math.h":
    float sqrtf(float num)
    double sqrt(double num)

cdef extern from "numpy/arrayobject.h":
    object PyArray_EMPTY(int, np.npy_intp*, int, int)
    cdef enum:
        PyArray_INTP

cdef extern from "numpy/npy_math.h":
    cdef enum:
        NPY_INFINITY

# C types
ctypedef np.float64_t float64_t
ctypedef np.float32_t float32_t
ctypedef np.int32_t int32_t

# Initialize the NumPy C API
np.import_array()

cdef void float_tvq(float32_t *obs, float32_t *code_book,
                   int ncodes, int nfeat, int nobs,
                   np.npy_intp *codes, float32_t *low_dist):
    """
    Quantize Nobs observations to their nearest codebook 
    entry (single-precision version).
    """
    # Temporary variables
    cdef float32_t dist, diff
    cdef int32_t obs_index, code_index, feature
    cdef int32_t offset = 0
    
    # Index and pointer to keep track of the current position in 
    # both arrays so that we don't have to always do index * nfeat.
    cdef int codebook_pos
    cdef float32_t *current_obs 
    
    for obs_index in range(nobs):
        codebook_pos = 0
        low_dist[obs_index] = NPY_INFINITY
        for code_index in range(ncodes):
            dist = 0
            
            # Distance between code_book[code_index] and obs[obs_index]
            for feature in range(nfeat):
                
                # Use current_obs pointer and codebook_pos to minimize
                # pointer arithmetic necessary (i.e. no multiplications)
                current_obs = &(obs[offset])
                diff = code_book[codebook_pos] - current_obs[feature]
                dist += diff * diff
                codebook_pos += 1
            
            dist = sqrtf(dist)
            
            # Replace the code assignment and record distance if necessary
            if dist < low_dist[obs_index]:
                codes[obs_index] = code_index
                low_dist[obs_index] = dist
        
        # Update the offset of the current observation
        offset += nfeat

def vq(np.ndarray obs, np.ndarray codes):
    """
    Vector quantization ndarray wrapper.
    """
    cdef np.npy_intp nobs, ncodes, nfeat, nfeat_codes
    cdef np.ndarray obs_a, codes_a
    cdef np.ndarray outcodes, outdists
    cdef int flags = np.NPY_CONTIGUOUS | np.NPY_NOTSWAPPED | np.NPY_ALIGNED

    # Ensure the arrays are contiguous - done in C to minimize overhead.
    obs_a = np.PyArray_FROM_OF(obs, flags)
    codes_a = np.PyArray_FROM_OF(codes, flags)
     
    if obs_a.ndim == 2:
        nobs = obs_a.shape[0]
        nfeat = obs_a.shape[1]
    elif obs_a.ndim == 1:
        nfeat = obs_a.shape[0]
        nobs = 1
    else:
        raise ValueError('obs must have 0 < obs.ndim <= 2')
    
    if codes_a.ndim == 2:
        ncodes = codes_a.shape[0]
        nfeat_codes = codes_a.shape[1]
    elif codes.ndim == 1:
        # Treat one dimensional arrays as row vectors.
        nfeat_codes = codes_a.shape[0]
        ncodes = 1
    else:
        raise ValueError('codes must have 0 < codes.ndim <= 2')
    
    # Ensure the two arrays have the same number of features (columns).
    if nfeat_codes != nfeat:
        raise ValueError('obs and codes must have same # of ' + \
                         'features (columns)')
    
    # We create this with the C API so that we can be sure that
    # the resulting array has elements big enough to store indices
    # on that platform. Hence, PyArray_INTP.
    outcodes = PyArray_EMPTY(1, &nobs, PyArray_INTP, 0)
    
    # This we just want to match the dtype of the input, so np.empty is fine.
    outdists = np.empty((nobs,), dtype=obs.dtype)
    
    if obs.dtype == np.float32:
        float_tvq(<float32_t *>obs_a.data, <float32_t *>codes_a.data, 
                  ncodes, nfeat, nobs, <np.npy_intp *>outcodes.data, 
                  <float32_t *>outdists.data)
    
    return outcodes, outdists