File: strassen.f90

package info (click to toggle)
gcc-avr 1%3A5.4.0%2BAtmel3.6.2-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 589,872 kB
  • sloc: ansic: 2,775,581; ada: 756,757; cpp: 723,977; f90: 117,673; asm: 66,898; makefile: 62,755; xml: 44,466; sh: 29,549; exp: 23,315; objc: 15,216; fortran: 10,901; pascal: 4,185; python: 4,093; perl: 2,969; awk: 2,811; ml: 2,385; cs: 879; yacc: 316; lex: 198; haskell: 112; lisp: 8
file content (76 lines) | stat: -rw-r--r-- 2,362 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
! { dg-options "-O2" }
! { dg-skip-if "AArch64 tiny code model does not support programs larger than 1MiB" {aarch64_tiny} {"*"} {""} }

program strassen_matmul
  use omp_lib
  integer, parameter :: N = 1024
  double precision, save :: A(N,N), B(N,N), C(N,N), D(N,N)
  double precision :: start, end

  call random_seed
  call random_number (A)
  call random_number (B)
  start = omp_get_wtime ()
  C = matmul (A, B)
  end = omp_get_wtime ()
  write(*,'(a, f10.6)') ' Time for matmul      = ', end - start
  D = 0
  start = omp_get_wtime ()
  call strassen (A, B, D, N)
  end = omp_get_wtime ()
  write(*,'(a, f10.6)') ' Time for Strassen    = ', end - start
  if (sqrt (sum ((C - D) ** 2)) / N .gt. 0.1) call abort
  D = 0
  start = omp_get_wtime ()
!$omp parallel
!$omp single
  call strassen (A, B, D, N)
!$omp end single nowait
!$omp end parallel
  end = omp_get_wtime ()
  write(*,'(a, f10.6)') ' Time for Strassen MP = ', end - start
  if (sqrt (sum ((C - D) ** 2)) / N .gt. 0.1) call abort

contains

  recursive subroutine strassen (A, B, C, N)
    integer, intent(in) :: N
    double precision, intent(in) :: A(N,N), B(N,N)
    double precision, intent(out) :: C(N,N)
    double precision :: T(N/2,N/2,7)
    integer :: K, L

    if (iand (N,1) .ne. 0 .or. N < 64) then
      C = matmul (A, B)
      return
    end if
    K = N / 2
    L = N / 2 + 1
!$omp task shared (A, B, T)
    call strassen (A(:K,:K) + A(L:,L:), B(:K,:K) + B(L:,L:), T(:,:,1), K)
!$omp end task
!$omp task shared (A, B, T)
    call strassen (A(L:,:K) + A(L:,L:), B(:K,:K), T(:,:,2), K)
!$omp end task
!$omp task shared (A, B, T)
    call strassen (A(:K,:K), B(:K,L:) - B(L:,L:), T(:,:,3), K)
!$omp end task
!$omp task shared (A, B, T)
    call strassen (A(L:,L:), B(L:,:K) - B(:K,:K), T(:,:,4), K)
!$omp end task
!$omp task shared (A, B, T)
    call strassen (A(:K,:K) + A(:K,L:), B(L:,L:), T(:,:,5), K)
!$omp end task
!$omp task shared (A, B, T)
    call strassen (A(L:,:K) - A(:K,:K), B(:K,:K) + B(:K,L:), T(:,:,6), K)
!$omp end task
!$omp task shared (A, B, T)
    call strassen (A(:K,L:) - A(L:,L:), B(L:,:K) + B(L:,L:), T(:,:,7), K)
!$omp end task
!$omp taskwait
    C(:K,:K) = T(:,:,1) + T(:,:,4) - T(:,:,5) + T(:,:,7)
    C(L:,:K) = T(:,:,2) + T(:,:,4)
    C(:K,L:) = T(:,:,3) + T(:,:,5)
    C(L:,L:) = T(:,:,1) - T(:,:,2) + T(:,:,3) + T(:,:,6)
  end subroutine strassen
end