Description: More user-friendly "no double support" errors

Messages that actually say that that's the problem, and
the right error type for the tests to recognize as 'skip' not 'fail'.

Add 'skip if unsupported' to the double-using tests that 
don't already have it.

Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
Forwarded: https://github.com/Theano/libgpuarray/pull/585

--- a/pygpu/_elemwise.pyx
+++ b/pygpu/_elemwise.pyx
@@ -1,4 +1,4 @@
-from pygpu.gpuarray import GpuArrayException
+from pygpu.gpuarray import GpuArrayException, UnsupportedException
 from pygpu.gpuarray cimport (gpucontext, GA_NO_ERROR, get_typecode,
                              typecode_to_dtype, GpuContext, GpuArray,
                              get_exc, gpuarray_get_elsize)
@@ -15,6 +15,11 @@ cdef bytes to_bytes(s):
       return <bytes>(<unicode>s).encode('ascii')
   raise TypeError("Can't convert to bytes")
 
+cdef extern from "gpuarray/buffer.h":
+    ctypedef struct gpucontext:
+        pass
+    char *gpucontext_error(gpucontext *ctx, int err)
+
 cdef extern from "gpuarray/elemwise.h":
     ctypedef struct _GpuElemwise "GpuElemwise":
         pass
@@ -141,7 +146,11 @@ cdef class GpuElemwise:
         finally:
             free(_args)
         if self.ge is NULL:
-            raise GpuArrayException("Could not initialize C GpuElemwise instance")
+            error_message = gpucontext_error(ctx.ctx, 0).decode(encoding='latin-1')
+            # getting the error type this way is fragile, but the alternative is breaking ABI
+            raise (UnsupportedException if
+            "This device does not support double precision" in error_message else
+             GpuArrayException)("Could not initialize C GpuElemwise instance: " + error_message)
 
     def __dealloc__(self):
         cdef unsigned int i
--- a/src/gpuarray_buffer_opencl.c
+++ b/src/gpuarray_buffer_opencl.c
@@ -781,7 +781,9 @@ static int cl_check_extensions(const cha
     (*count)++;
   }
   if (flags & GA_USE_DOUBLE) {
-    GA_CHECK(check_ext(ctx, CL_DOUBLE));
+    if (check_ext(ctx, CL_DOUBLE) != GA_NO_ERROR) {
+      return error_set(ctx->err, GA_DEVSUP_ERROR, "This device does not support double precision (pygpu int/int, int32+float32, and floating point literals default to double precision)");
+    }
     preamble[*count] = PRAGMA CL_DOUBLE ENABLE;
     (*count)++;
   }
--- a/src/gpuarray_blas_opencl_clblas.c
+++ b/src/gpuarray_blas_opencl_clblas.c
@@ -35,8 +35,12 @@ static inline clblasTranspose convT(cb_t
 static unsigned int refcnt = 0;
 
 static const char *estr(clblasStatus err) {
-  if (err > -1024)
+  if (err > -1024) {
+    if (err == CL_INVALID_DEVICE) {
+      return "Invalid device, or double precision requested on a device that does not support double precision";
+    }
     return cl_error_string((cl_int)err);
+  }
   switch (err) {
   case clblasNotImplemented:
     return "Unimplemented feature";
--- a/pygpu/tests/test_elemwise.py
+++ b/pygpu/tests/test_elemwise.py
@@ -318,24 +318,29 @@ neg_infinity() {return -INFINITY;}
 
 def test_infinity():
     for dtype in ['float32', 'float64']:
-        ac, ag = gen_gpuarray((2,), dtype, ctx=context, cls=elemary)
-        out_g = ag._empty_like_me()
-        flt = 'ga_float' if dtype == 'float32' else 'ga_double'
-        out_arg = arg('out', out_g.dtype, scalar=False, read=False, write=True)
-        preamble = _inf_preamb_tpl.render(flt=flt)
+        infinity(dtype)
 
-        # +infinity
-        ac[:] = numpy.inf
-        expr_inf = 'out = infinity()'
-        kernel = GpuElemwise(context, expr_inf, [out_arg],
-                             preamble=preamble)
-        kernel(out_g)
-        assert numpy.array_equal(ac, numpy.asarray(out_g))
 
-        # -infinity
-        ac[:] = -numpy.inf
-        expr_neginf = 'out = neg_infinity()'
-        kernel = GpuElemwise(context, expr_neginf, [out_arg],
-                             preamble=preamble)
-        kernel(out_g)
-        assert numpy.array_equal(ac, numpy.asarray(out_g))
+@guard_devsup
+def infinity(dtype):
+    ac, ag = gen_gpuarray((2,), dtype, ctx=context, cls=elemary)
+    out_g = ag._empty_like_me()
+    flt = 'ga_float' if dtype == 'float32' else 'ga_double'
+    out_arg = arg('out', out_g.dtype, scalar=False, read=False, write=True)
+    preamble = _inf_preamb_tpl.render(flt=flt)
+
+    # +infinity
+    ac[:] = numpy.inf
+    expr_inf = 'out = infinity()'
+    kernel = GpuElemwise(context, expr_inf, [out_arg],
+                         preamble=preamble)
+    kernel(out_g)
+    assert numpy.array_equal(ac, numpy.asarray(out_g))
+
+    # -infinity
+    ac[:] = -numpy.inf
+    expr_neginf = 'out = neg_infinity()'
+    kernel = GpuElemwise(context, expr_neginf, [out_arg],
+                         preamble=preamble)
+    kernel(out_g)
+    assert numpy.array_equal(ac, numpy.asarray(out_g))
--- a/pygpu/tests/test_reduction.py
+++ b/pygpu/tests/test_reduction.py
@@ -52,8 +52,13 @@ def test_red_big_array():
                   [False, True, False]]:
         yield red_array_sum, 'float32', (2000, 30, 100), redux
 
-
+# this test needs a guard_devsup because Python 'float' is double,
+# and placing one directly on a test_* makes nose not know that it's a test
 def test_red_broadcast():
+    red_broadcast()
+
+@guard_devsup
+def red_broadcast():
     from pygpu.tools import as_argument
 
     dtype = float
@@ -88,6 +93,7 @@ def test_reduction_ops():
                 yield reduction_op, op, dtype, axis
 
 
+@guard_devsup
 def reduction_op(op, dtype, axis):
     c, g = gen_gpuarray((2, 3), dtype=dtype, ctx=context, cls=elemary)
 
--- a/pygpu/tests/test_blas.py
+++ b/pygpu/tests/test_blas.py
@@ -14,6 +14,19 @@ except ImportError as e:
     raise SkipTest("no scipy blas to compare against")
 
 import pygpu.blas as gblas
+from pygpu.gpuarray import (GpuArrayException, UnsupportedException)
+
+def guard_devsup_blasdouble(func):
+    def f(*args, **kwargs):
+        try:
+            func(*args, **kwargs)
+        except UnsupportedException as e:
+            pytest.skip("operation not supported")
+        except GpuArrayException as e:
+            if 'float64' in args and "does not support double precision" in str(e):
+                pytest.skip("double precision not supported")
+            raise
+    return f
 
 
 def test_dot():
@@ -25,7 +38,7 @@ def test_dot():
         yield dot, 666, 'float32', False, False, overwrite, init_z
 
 
-@guard_devsup
+@guard_devsup_blasdouble
 def dot(N, dtype, offseted_i, sliced, overwrite, init_z):
     cX, gX = gen_gpuarray((N,), dtype, offseted_inner=offseted_i,
                           sliced=sliced, ctx=context)
@@ -61,7 +74,7 @@ def test_gemv():
                overwrite, True, alpha, beta)
 
 
-@guard_devsup
+@guard_devsup_blasdouble
 def gemv(shp, dtype, order, trans, offseted_i, sliced,
          overwrite, init_y, alpha=1.0, beta=0.0):
     cA, gA = gen_gpuarray(shp, dtype, order=order, offseted_inner=offseted_i,
@@ -109,7 +122,7 @@ def test_gemm():
                (False, False), False, 1, overwrite, True, alpha, beta)
 
 
-@guard_devsup
+@guard_devsup_blasdouble
 def gemm(m, n, k, dtype, order, trans, offseted_o, sliced, overwrite,
          init_res, alpha=1.0, beta=0.0):
     if trans[0]:
@@ -153,7 +166,7 @@ def test_ger():
     for init_res, overwrite in product(bools, bools):
         yield ger, 4, 5, 'float32', 'f', 1, 1, init_res, overwrite
 
-
+@guard_devsup_blasdouble
 def ger(m, n, dtype, order, sliced_x, sliced_y, init_res, overwrite=False):
     cX, gX = gen_gpuarray((m,), dtype, order, sliced=sliced_x, ctx=context)
     cY, gY = gen_gpuarray((n,), dtype, order, sliced=sliced_y, ctx=context)
@@ -192,7 +205,7 @@ def test_rgemmBatch_3d():
                (False, False), False, 1, overwrite, True, alpha, beta)
 
 
-@guard_devsup
+@guard_devsup_blasdouble
 def rgemmBatch_3d(b, m, n, k, dtype, order, trans, offseted_o, sliced,
                   overwrite, init_res, alpha=1.0, beta=0.0):
     if trans[0]:
