1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
"""
Uses C++ map containers for fast dict-like behavior with keys being
integers, and values float.
"""
# Author: Gael Varoquaux
# License: BSD
cimport cython
# C++
from cython.operator cimport dereference as deref, preincrement as inc, \
predecrement as dec
from libcpp.utility cimport pair
from libcpp.map cimport map as cpp_map
import numpy as np
# Import the C-level symbols of numpy
cimport numpy as np
# Numpy must be initialized. When using numpy from C or Cython you must
# _always_ do that, or you will have segfaults
np.import_array()
#DTYPE = np.float64
#ctypedef np.float64_t DTYPE_t
#ITYPE = np.intp
#ctypedef np.intp_t ITYPE_t
###############################################################################
# An object to be used in Python
# Lookup is faster than dict (up to 10 times), and so is full traversal
# (up to 50 times), and assignment (up to 6 times), but creation is
# slower (up to 3 times). Also, a large benefit is that memory
# consumption is reduced a lot compared to a Python dict
cdef class IntFloatDict:
@cython.boundscheck(False)
@cython.wraparound(False)
def __init__(self, np.ndarray[ITYPE_t, ndim=1] keys,
np.ndarray[DTYPE_t, ndim=1] values):
cdef int i
cdef int size = values.size
# Should check that sizes for keys and values are equal, and
# after should boundcheck(False)
for i in range(size):
self.my_map[keys[i]] = values[i]
def __len__(self):
return self.my_map.size()
def __getitem__(self, int key):
cdef cpp_map[ITYPE_t, DTYPE_t].iterator it = self.my_map.find(key)
if it == self.my_map.end():
# The key is not in the dict
raise KeyError('%i' % key)
return deref(it).second
def __setitem__(self, int key, float value):
self.my_map[key] = value
# Cython 0.20 generates buggy code below. Commenting this out for now
# and relying on the to_arrays method
#def __iter__(self):
# cdef cpp_map[ITYPE_t, DTYPE_t].iterator it = self.my_map.begin()
# cdef cpp_map[ITYPE_t, DTYPE_t].iterator end = self.my_map.end()
# while it != end:
# yield deref(it).first, deref(it).second
# inc(it)
def __iter__(self):
cdef int size = self.my_map.size()
cdef ITYPE_t [:] keys = np.empty(size, dtype=np.intp)
cdef DTYPE_t [:] values = np.empty(size, dtype=np.float64)
self._to_arrays(keys, values)
cdef int idx
cdef ITYPE_t key
cdef DTYPE_t value
for idx in range(size):
key = keys[idx]
value = values[idx]
yield key, value
def to_arrays(self):
"""Return the key, value representation of the IntFloatDict
object.
Returns
=======
keys : ndarray, shape (n_items, ), dtype=int
The indices of the data points
values : ndarray, shape (n_items, ), dtype=float
The values of the data points
"""
cdef int size = self.my_map.size()
cdef np.ndarray[ITYPE_t, ndim=1] keys = np.empty(size,
dtype=np.intp)
cdef np.ndarray[DTYPE_t, ndim=1] values = np.empty(size,
dtype=np.float64)
self._to_arrays(keys, values)
return keys, values
cdef _to_arrays(self, ITYPE_t [:] keys, DTYPE_t [:] values):
# Internal version of to_arrays that takes already-initialized arrays
cdef cpp_map[ITYPE_t, DTYPE_t].iterator it = self.my_map.begin()
cdef cpp_map[ITYPE_t, DTYPE_t].iterator end = self.my_map.end()
cdef int index = 0
while it != end:
keys[index] = deref(it).first
values[index] = deref(it).second
inc(it)
index += 1
def update(self, IntFloatDict other):
cdef cpp_map[ITYPE_t, DTYPE_t].iterator it = other.my_map.begin()
cdef cpp_map[ITYPE_t, DTYPE_t].iterator end = other.my_map.end()
while it != end:
self.my_map[deref(it).first] = deref(it).second
inc(it)
def copy(self):
cdef IntFloatDict out_obj = IntFloatDict.__new__(IntFloatDict)
# The '=' operator is a copy operator for C++ maps
out_obj.my_map = self.my_map
return out_obj
def append(self, ITYPE_t key, DTYPE_t value):
cdef cpp_map[ITYPE_t, DTYPE_t].iterator end = self.my_map.end()
# Decrement the iterator
dec(end)
# Construct our arguments
cdef pair[ITYPE_t, DTYPE_t] args
args.first = key
args.second = value
self.my_map.insert(end, args)
###############################################################################
# operation on dict
def argmin(IntFloatDict d):
cdef cpp_map[ITYPE_t, DTYPE_t].iterator it = d.my_map.begin()
cdef cpp_map[ITYPE_t, DTYPE_t].iterator end = d.my_map.end()
cdef ITYPE_t min_key
cdef DTYPE_t min_value = np.inf
while it != end:
if deref(it).second < min_value:
min_value = deref(it).second
min_key = deref(it).first
inc(it)
return min_key, min_value
|