File: fp64_errors.patch

package info (click to toggle)
libgpuarray 0.7.6-13
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 3,176 kB
  • sloc: ansic: 19,235; python: 4,591; makefile: 208; javascript: 71; sh: 15
file content (216 lines) | stat: -rw-r--r-- 7,969 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
Description: More user-friendly "no double support" errors

Messages that actually say that that's the problem, and
the right error type for the tests to recognize as 'skip' not 'fail'.

Add 'skip if unsupported' to the double-using tests that 
don't already have it.

Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
Forwarded: https://github.com/Theano/libgpuarray/pull/585

--- a/pygpu/_elemwise.pyx
+++ b/pygpu/_elemwise.pyx
@@ -1,4 +1,4 @@
-from pygpu.gpuarray import GpuArrayException
+from pygpu.gpuarray import GpuArrayException, UnsupportedException
 from pygpu.gpuarray cimport (gpucontext, GA_NO_ERROR, get_typecode,
                              typecode_to_dtype, GpuContext, GpuArray,
                              get_exc, gpuarray_get_elsize)
@@ -15,6 +15,11 @@ cdef bytes to_bytes(s):
       return <bytes>(<unicode>s).encode('ascii')
   raise TypeError("Can't convert to bytes")
 
+cdef extern from "gpuarray/buffer.h":
+    ctypedef struct gpucontext:
+        pass
+    char *gpucontext_error(gpucontext *ctx, int err)
+
 cdef extern from "gpuarray/elemwise.h":
     ctypedef struct _GpuElemwise "GpuElemwise":
         pass
@@ -141,7 +146,11 @@ cdef class GpuElemwise:
         finally:
             free(_args)
         if self.ge is NULL:
-            raise GpuArrayException("Could not initialize C GpuElemwise instance")
+            error_message = gpucontext_error(ctx.ctx, 0).decode(encoding='latin-1')
+            # getting the error type this way is fragile, but the alternative is breaking ABI
+            raise (UnsupportedException if
+            "This device does not support double precision" in error_message else
+             GpuArrayException)("Could not initialize C GpuElemwise instance: " + error_message)
 
     def __dealloc__(self):
         cdef unsigned int i
--- a/src/gpuarray_buffer_opencl.c
+++ b/src/gpuarray_buffer_opencl.c
@@ -781,7 +781,9 @@ static int cl_check_extensions(const cha
     (*count)++;
   }
   if (flags & GA_USE_DOUBLE) {
-    GA_CHECK(check_ext(ctx, CL_DOUBLE));
+    if (check_ext(ctx, CL_DOUBLE) != GA_NO_ERROR) {
+      return error_set(ctx->err, GA_DEVSUP_ERROR, "This device does not support double precision (pygpu int/int, int32+float32, and floating point literals default to double precision)");
+    }
     preamble[*count] = PRAGMA CL_DOUBLE ENABLE;
     (*count)++;
   }
--- a/src/gpuarray_blas_opencl_clblas.c
+++ b/src/gpuarray_blas_opencl_clblas.c
@@ -35,8 +35,12 @@ static inline clblasTranspose convT(cb_t
 static unsigned int refcnt = 0;
 
 static const char *estr(clblasStatus err) {
-  if (err > -1024)
+  if (err > -1024) {
+    if (err == CL_INVALID_DEVICE) {
+      return "Invalid device, or double precision requested on a device that does not support double precision";
+    }
     return cl_error_string((cl_int)err);
+  }
   switch (err) {
   case clblasNotImplemented:
     return "Unimplemented feature";
--- a/pygpu/tests/test_elemwise.py
+++ b/pygpu/tests/test_elemwise.py
@@ -318,24 +318,29 @@ neg_infinity() {return -INFINITY;}
 
 def test_infinity():
     for dtype in ['float32', 'float64']:
-        ac, ag = gen_gpuarray((2,), dtype, ctx=context, cls=elemary)
-        out_g = ag._empty_like_me()
-        flt = 'ga_float' if dtype == 'float32' else 'ga_double'
-        out_arg = arg('out', out_g.dtype, scalar=False, read=False, write=True)
-        preamble = _inf_preamb_tpl.render(flt=flt)
+        infinity(dtype)
 
-        # +infinity
-        ac[:] = numpy.inf
-        expr_inf = 'out = infinity()'
-        kernel = GpuElemwise(context, expr_inf, [out_arg],
-                             preamble=preamble)
-        kernel(out_g)
-        assert numpy.array_equal(ac, numpy.asarray(out_g))
 
-        # -infinity
-        ac[:] = -numpy.inf
-        expr_neginf = 'out = neg_infinity()'
-        kernel = GpuElemwise(context, expr_neginf, [out_arg],
-                             preamble=preamble)
-        kernel(out_g)
-        assert numpy.array_equal(ac, numpy.asarray(out_g))
+@guard_devsup
+def infinity(dtype):
+    ac, ag = gen_gpuarray((2,), dtype, ctx=context, cls=elemary)
+    out_g = ag._empty_like_me()
+    flt = 'ga_float' if dtype == 'float32' else 'ga_double'
+    out_arg = arg('out', out_g.dtype, scalar=False, read=False, write=True)
+    preamble = _inf_preamb_tpl.render(flt=flt)
+
+    # +infinity
+    ac[:] = numpy.inf
+    expr_inf = 'out = infinity()'
+    kernel = GpuElemwise(context, expr_inf, [out_arg],
+                         preamble=preamble)
+    kernel(out_g)
+    assert numpy.array_equal(ac, numpy.asarray(out_g))
+
+    # -infinity
+    ac[:] = -numpy.inf
+    expr_neginf = 'out = neg_infinity()'
+    kernel = GpuElemwise(context, expr_neginf, [out_arg],
+                         preamble=preamble)
+    kernel(out_g)
+    assert numpy.array_equal(ac, numpy.asarray(out_g))
--- a/pygpu/tests/test_reduction.py
+++ b/pygpu/tests/test_reduction.py
@@ -52,8 +52,13 @@ def test_red_big_array():
                   [False, True, False]]:
         yield red_array_sum, 'float32', (2000, 30, 100), redux
 
-
+# this test needs a guard_devsup because Python 'float' is double,
+# and placing one directly on a test_* makes nose not know that it's a test
 def test_red_broadcast():
+    red_broadcast()
+
+@guard_devsup
+def red_broadcast():
     from pygpu.tools import as_argument
 
     dtype = float
@@ -88,6 +93,7 @@ def test_reduction_ops():
                 yield reduction_op, op, dtype, axis
 
 
+@guard_devsup
 def reduction_op(op, dtype, axis):
     c, g = gen_gpuarray((2, 3), dtype=dtype, ctx=context, cls=elemary)
 
--- a/pygpu/tests/test_blas.py
+++ b/pygpu/tests/test_blas.py
@@ -14,6 +14,19 @@ except ImportError as e:
     raise SkipTest("no scipy blas to compare against")
 
 import pygpu.blas as gblas
+from pygpu.gpuarray import (GpuArrayException, UnsupportedException)
+
+def guard_devsup_blasdouble(func):
+    def f(*args, **kwargs):
+        try:
+            func(*args, **kwargs)
+        except UnsupportedException as e:
+            pytest.skip("operation not supported")
+        except GpuArrayException as e:
+            if 'float64' in args and "does not support double precision" in str(e):
+                pytest.skip("double precision not supported")
+            raise
+    return f
 
 
 def test_dot():
@@ -25,7 +38,7 @@ def test_dot():
         yield dot, 666, 'float32', False, False, overwrite, init_z
 
 
-@guard_devsup
+@guard_devsup_blasdouble
 def dot(N, dtype, offseted_i, sliced, overwrite, init_z):
     cX, gX = gen_gpuarray((N,), dtype, offseted_inner=offseted_i,
                           sliced=sliced, ctx=context)
@@ -61,7 +74,7 @@ def test_gemv():
                overwrite, True, alpha, beta)
 
 
-@guard_devsup
+@guard_devsup_blasdouble
 def gemv(shp, dtype, order, trans, offseted_i, sliced,
          overwrite, init_y, alpha=1.0, beta=0.0):
     cA, gA = gen_gpuarray(shp, dtype, order=order, offseted_inner=offseted_i,
@@ -109,7 +122,7 @@ def test_gemm():
                (False, False), False, 1, overwrite, True, alpha, beta)
 
 
-@guard_devsup
+@guard_devsup_blasdouble
 def gemm(m, n, k, dtype, order, trans, offseted_o, sliced, overwrite,
          init_res, alpha=1.0, beta=0.0):
     if trans[0]:
@@ -153,7 +166,7 @@ def test_ger():
     for init_res, overwrite in product(bools, bools):
         yield ger, 4, 5, 'float32', 'f', 1, 1, init_res, overwrite
 
-
+@guard_devsup_blasdouble
 def ger(m, n, dtype, order, sliced_x, sliced_y, init_res, overwrite=False):
     cX, gX = gen_gpuarray((m,), dtype, order, sliced=sliced_x, ctx=context)
     cY, gY = gen_gpuarray((n,), dtype, order, sliced=sliced_y, ctx=context)
@@ -192,7 +205,7 @@ def test_rgemmBatch_3d():
                (False, False), False, 1, overwrite, True, alpha, beta)
 
 
-@guard_devsup
+@guard_devsup_blasdouble
 def rgemmBatch_3d(b, m, n, k, dtype, order, trans, offseted_o, sliced,
                   overwrite, init_res, alpha=1.0, beta=0.0):
     if trans[0]: