File: benchmark-all.py

package info (click to toggle)
pyopencl 0.92.dfsg-1
  • links: PTS, VCS
  • area: contrib
  • in suites: squeeze
  • size: 572 kB
  • ctags: 843
  • sloc: python: 3,982; cpp: 3,333; makefile: 101; sh: 2
file content (78 lines) | stat: -rw-r--r-- 2,927 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# example provided by Roger Pau Monn'e

import pyopencl as cl
import numpy
import numpy.linalg as la
import datetime
from time import time

a = numpy.random.rand(1000).astype(numpy.float32)
b = numpy.random.rand(1000).astype(numpy.float32)
c_result = numpy.empty_like(a)

# Speed in normal CPU usage
time1 = time()
for i in range(1000):
        for j in range(1000):
                c_result[i] = a[i] + b[i]
                c_result[i] = c_result[i] * (a[i] + b[i])
                c_result[i] = c_result[i] * (a[i] / 2.0)
time2 = time()
print "Execution time of test without OpenCL: ", time2 - time1, "s"


for platform in cl.get_platforms():
    for device in platform.get_devices():
        print "==============================================================="
        print "Platform name:", platform.name
        print "Platform profile:", platform.profile
        print "Platform vendor:", platform.vendor
        print "Platform version:", platform.version
        print "---------------------------------------------------------------"
        print "Device name:", device.name
        print "Device type:", cl.device_type.to_string(device.type)
        print "Device memory: ", device.global_mem_size//1024//1024, 'MB'
        print "Device max clock speed:", device.max_clock_frequency, 'MHz'
        print "Device compute units:", device.max_compute_units

        # Simnple speed test
        ctx = cl.Context([device])
        queue = cl.CommandQueue(ctx, 
                properties=cl.command_queue_properties.PROFILING_ENABLE)

        mf = cl.mem_flags
        a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
        b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
        dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)

        prg = cl.Program(ctx, """
            __kernel void sum(__global const float *a,
            __global const float *b, __global float *c)
            {
                        int loop;
                        int gid = get_global_id(0);
                        for(loop=0; loop<1000;loop++)
                        {
                                c[gid] = a[gid] + b[gid];
                                c[gid] = c[gid] * (a[gid] + b[gid]);
                                c[gid] = c[gid] * (a[gid] / 2.0);
                        }
                }
                """).build()

        exec_evt = prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf)
        exec_evt.wait()
        elapsed = 1e-9*(exec_evt.profile.end - exec_evt.profile.start)

        print "Execution time of test: %g s" % elapsed

        c = numpy.empty_like(a)
        cl.enqueue_read_buffer(queue, dest_buf, c).wait()
        error = 0
        for i in range(1000):
                if c[i] != c_result[i]:
                        error = 1
        if error:
                print "Results doesn't match!!"
        else:
                print "Results OK"