File: sparseNN_tn.sh

package info (click to toggle)
rocblas 6.4.4-4
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,082,776 kB
  • sloc: cpp: 244,923; f90: 50,012; python: 50,003; sh: 24,630; asm: 8,917; makefile: 150; ansic: 107; xml: 36; awk: 14
file content (50 lines) | stat: -rwxr-xr-x 1,574 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/bin/bash
bench=./rocblas-bench
if [ ! -f ${bench} ]; then
        echo ${bench} not found, exit...
        exit 1
else
        echo ">>" $(realpath $(ldd ${bench} | grep rocblas | awk '{print $3;}'))
fi

for m in 512 1024 2048; do
  for n in 200 256 512 1024; do
    for k in {512..3200..256}; do
      ${bench} -f gemm -r s --transposeA T --transposeB N -m $m -n $n -k $k --alpha 1 --lda $k --ldb $k --beta 1 --ldc $m
    done
    ${bench} -f gemm -r s --transposeA T --transposeB N -m $m -n $n -k 3200 --alpha 1 --lda 3200 --ldb 3200 --beta 1 --ldc $m
  done
done

for m in 32 64 128 256; do
  for n in 200 256 512 1024; do
    for k in 512 1024 2048; do
      ${bench} -f gemm -r s --transposeA T --transposeB N -m $m -n $n -k $k --alpha 1 --lda $k --ldb $k --beta 1 --ldc $m
    done
  done
done

for n in 200 256 512 1024; do
  for k in 1024 2048 4096; do
    ${bench} -f gemm -r s --transposeA T --transposeB N -m 1 -n $n -k $k --alpha 1 --lda $k --ldb $k --beta 1 --ldc 1
  done
done


for m in 1024 2048 4096; do
  for n in 200 256 512 1024; do
    for ki in 32 64 128 256; do
      for f in {4..64..4}; do
        k=$((f*ki))
        ${bench} -f gemm -r s --transposeA T --transposeB N -m $m -n $n -k $k --alpha 1 --lda $k --ldb $k --beta 1 --ldc $m
      done
      k=${ki}
      ${bench} -f gemm -r s --transposeA T --transposeB N -m $m -n $n -k $k --alpha 1 --lda $k --ldb $k --beta 1 --ldc $m
      k=$((ki*65))
      ${bench} -f gemm -r s --transposeA T --transposeB N -m $m -n $n -k $k --alpha 1 --lda $k --ldb $k --beta 1 --ldc $m
    done
  done
done