File: deviceprops.py

package info (click to toggle)
brian 1.4.1-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 23,416 kB
  • ctags: 18,571
  • sloc: python: 68,406; cpp: 29,040; ansic: 5,182; sh: 111; makefile: 58
file content (57 lines) | stat: -rw-r--r-- 1,513 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from pylab import *
import pycuda
import pycuda.autoinit
import pycuda.compiler

dev = pycuda.autoinit.device
devattr = pycuda.autoinit.device.get_attributes()

for k, v in devattr.items():
    print k, v, k.__class__

max_threads_per_block = dev.max_threads_per_block
max_shared_memory_per_block = dev.max_shared_memory_per_block
max_registers_per_block = dev.max_registers_per_block

code = '''
__global__ void func(SCALAR *X)
{
    const int i = blockIdx.x*blockDim.x+threadIdx.x;
    __shared__ SCALAR sh[BLOCKDIM];
    SCALAR x = X[i];
    sh[threadIdx.x] = x;
    LOOP
    x = sh[threadIdx.x];
    X[i] = x;
}
'''

nloop = 10
loopline = 'x = x*x+0.1;'.replace('x', 'sh[threadIdx.x]')
code = code.replace('LOOP', loopline*nloop);

blocksize = max_threads_per_block
code = code.replace('BLOCKDIM', str(blocksize))

code = code.replace('SCALAR', 'double')
#code = code.replace('SCALAR', 'float')

print
print code
print 
print 'max_threads_per_block', max_threads_per_block
print 'max_shared_memory_per_block', max_shared_memory_per_block
print 'max_registers_per_block', max_registers_per_block

module = pycuda.compiler.SourceModule(code)
f = module.get_function('func')
print
print 'func local', f.local_size_bytes
print 'func shared', f.shared_size_bytes
print 'func regs', f.num_regs
print
print 'func local per block', f.local_size_bytes*blocksize
print 'func shared per block', f.shared_size_bytes#*blocksize
print 'func regs per block', f.num_regs*blocksize

#x = linspace(0, 2, 100000, dtype=float64)