File: qcdf.f

package info (click to toggle)
blitz%2B%2B 1%3A0.10-3.2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 13,276 kB
  • ctags: 12,037
  • sloc: cpp: 70,465; sh: 11,116; fortran: 1,510; python: 1,246; f90: 852; makefile: 701
file content (77 lines) | stat: -rw-r--r-- 2,199 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
C  Initial Fortran 77 version of the Lattice QCD benchmark.

       subroutine qcdf(M, res, src, V, iters)
       integer V, iters, i, site, spin, col
       complex*16 M(3,3,V), res(3,2,V), src(3,2,V)
  
       DO i=1,iters
         DO site=1,V
           DO spin=1,2
             DO col=1,3
                res(col,spin,site) = M(col,1,site) * src(1,spin,site) 
     .            + M(col,2,site) * src(2,spin,site) 
     .            + M(col,3,site) * src(3,spin,site)
             ENDDO
           ENDDO
         ENDDO
       ENDDO

       return
       end

C  Hand-tuned version
C  Changes:
C    o Ordering of array altered to improve layout of data in memory
C    o col and spin loops unwound; it was found that unwinding the
C      col loop inside the spin loop was marginally faster (by 1.1%)
C    o Unwinding both loops was faster than unwinding just one.

       subroutine qcdf2(M, res, src, V, iters)
       integer V, iters, i, site
       complex*16 M(3,3,V), res(3,2,V), src(3,2,V)

       DO i=1,iters
         DO site=1,V

C             col=1, spin=1

              res(1,1,site) = M(1,1,site) * src(1,1,site)
     .            + M(1,2,site) * src(2,1,site)
     .            + M(1,3,site) * src(3,1,site)

C             col=2, spin=1

              res(2,1,site) = M(2,1,site) * src(1,1,site)
     .            + M(2,2,site) * src(2,1,site)
     .            + M(2,3,site) * src(3,1,site)

C             col=3, spin=1

              res(3,1,site) = M(3,1,site) * src(1,1,site)
     .            + M(3,2,site) * src(2,1,site)
     .            + M(3,3,site) * src(3,1,site)

C             col=1, spin=2

              res(1,2,site) = M(1,1,site) * src(1,2,site)
     .            + M(1,2,site) * src(2,2,site)
     .            + M(1,3,site) * src(3,2,site)

C             col=2, spin=2

              res(2,2,site) = M(2,1,site) * src(1,2,site)
     .            + M(2,2,site) * src(2,2,site)
     .            + M(2,3,site) * src(3,2,site)

C             col=3, spin=2

              res(3,2,site) = M(3,1,site) * src(1,2,site)
     .            + M(3,2,site) * src(2,2,site)
     .            + M(3,3,site) * src(3,2,site)

         ENDDO
       ENDDO

       return
       end