File: dot_demo.py

package info (click to toggle)
python-scikit-cuda 0.5.3-1
  • links: PTS
  • area: contrib
  • in suites: forky, trixie
  • size: 1,516 kB
  • sloc: python: 18,940; ansic: 459; makefile: 95; sh: 9
file content (57 lines) | stat: -rw-r--r-- 1,884 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python

"""
Demonstrates multiplication of two matrices on the GPU.
"""
from __future__ import print_function

import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import pycuda.driver as drv
import numpy as np

import skcuda.linalg as culinalg
import skcuda.misc as cumisc
culinalg.init()

# Double precision is only supported by devices with compute
# capability >= 1.3:
import string
demo_types = [np.float32, np.complex64]
if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3:
    demo_types.extend([np.float64, np.complex128])

for t in demo_types:
    print('Testing matrix multiplication for type ' + str(np.dtype(t)))
    if np.iscomplexobj(t()):
        a = np.asarray(np.random.rand(10, 5) + 1j * np.random.rand(10, 5), t)
        b = np.asarray(np.random.rand(5, 5) + 1j * np.random.rand(5, 5), t)
        c = np.asarray(np.random.rand(5, 5) + 1j * np.random.rand(5, 5), t)
    else:
        a = np.asarray(np.random.rand(10, 5), t)
        b = np.asarray(np.random.rand(5, 5), t)
        c = np.asarray(np.random.rand(5, 5), t)

    a_gpu = gpuarray.to_gpu(a)
    b_gpu = gpuarray.to_gpu(b)
    c_gpu = gpuarray.to_gpu(c)

    temp_gpu = culinalg.dot(a_gpu, b_gpu)
    d_gpu = culinalg.dot(temp_gpu, c_gpu)
    temp_gpu.gpudata.free()
    del(temp_gpu)
    print('Success status: ', np.allclose(np.dot(np.dot(a, b), c), d_gpu.get()))

    print('Testing vector multiplication for type ' + str(np.dtype(t)))
    if np.iscomplexobj(t()):
        d = np.asarray(np.random.rand(5) + 1j * np.random.rand(5), t)
        e = np.asarray(np.random.rand(5) + 1j * np.random.rand(5), t)
    else:
        d = np.asarray(np.random.rand(5), t)
        e = np.asarray(np.random.rand(5), t)

    d_gpu = gpuarray.to_gpu(d)
    e_gpu = gpuarray.to_gpu(e)

    temp = culinalg.dot(d_gpu, e_gpu)
    print('Success status: ', np.allclose(np.dot(d, e), temp))