1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
|
import numpy
from .dtypes import dtype_to_ctype, get_common_dtype
from . import gpuarray
from ._elemwise import GpuElemwise, arg
__all__ = ['GpuElemwise', 'arg', 'as_argument',
'elemwise1', 'elemwise2', 'ielemwise2', 'compare']
def _dtype(o):
if hasattr(o, 'dtype'):
return o.dtype
return numpy.asarray(o).dtype
def as_argument(o, name, read=False, write=False):
if (not read) and (not write):
raise ValueError('argument is neither read not write')
return arg(name, _dtype(o), scalar=not isinstance(o, gpuarray.GpuArray),
read=read, write=write)
def elemwise1(a, op, oper=None, op_tmpl="res = %(op)sa", out=None,
convert_f16=True):
args = (as_argument(a, 'res', write=True), as_argument(a, 'a', read=True))
if out is None:
res = a._empty_like_me()
else:
res = out
if oper is None:
oper = op_tmpl % {'op': op}
k = GpuElemwise(a.context, oper, args, convert_f16=convert_f16)
k(res, a)
return res
def elemwise2(a, op, b, ary, odtype=None, oper=None,
op_tmpl="res = (%(out_t)s)a %(op)s (%(out_t)s)b",
broadcast=False, convert_f16=True):
ndim_extend = True
if not isinstance(a, gpuarray.GpuArray):
a = numpy.asarray(a)
ndim_extend = False
if not isinstance(b, gpuarray.GpuArray):
b = numpy.asarray(b)
ndim_extend = False
if odtype is None:
odtype = get_common_dtype(a, b, True)
a_arg = as_argument(a, 'a', read=True)
b_arg = as_argument(b, 'b', read=True)
args = [arg('res', odtype, write=True), a_arg, b_arg]
if ndim_extend:
if a.ndim != b.ndim:
nd = max(a.ndim, b.ndim)
if a.ndim < nd:
a = a.reshape(((1,) * (nd - a.ndim)) + a.shape)
if b.ndim < nd:
b = b.reshape(((1,) * (nd - b.ndim)) + b.shape)
out_shape = tuple(max(sa, sb) for sa, sb in zip(a.shape, b.shape))
res = gpuarray.empty(out_shape, dtype=odtype, context=ary.context,
cls=ary.__class__)
else:
res = ary._empty_like_me(dtype=odtype)
if oper is None:
if convert_f16 and odtype == 'float16':
odtype = numpy.dtype('float32')
oper = op_tmpl % {'op': op, 'out_t': dtype_to_ctype(odtype)}
k = GpuElemwise(ary.context, oper, args, convert_f16=convert_f16)
k(res, a, b, broadcast=broadcast)
return res
def ielemwise2(a, op, b, oper=None, op_tmpl="a = a %(op)s b",
broadcast=False, convert_f16=True):
if not isinstance(b, gpuarray.GpuArray):
b = numpy.asarray(b)
a_arg = as_argument(a, 'a', read=True, write=True)
b_arg = as_argument(b, 'b', read=True)
args = [a_arg, b_arg]
if oper is None:
oper = op_tmpl % {'op': op}
k = GpuElemwise(a.context, oper, args, convert_f16=convert_f16)
k(a, b, broadcast=broadcast)
return a
def compare(a, op, b, broadcast=False, convert_f16=True):
return elemwise2(a, op, b, a, odtype=numpy.dtype('bool'),
op_tmpl="res = (a %(op)s b)",
broadcast=broadcast, convert_f16=convert_f16)
|