File: test_mp_root_sum_gpu.tmpl

package info (click to toggle)
espresso 6.7-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 311,068 kB
  • sloc: f90: 447,429; ansic: 52,566; sh: 40,631; xml: 37,561; tcl: 20,077; lisp: 5,923; makefile: 4,503; python: 4,379; perl: 1,219; cpp: 761; fortran: 618; java: 568; awk: 128
file content (77 lines) | stat: -rw-r--r-- 2,193 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
! Implemented: rm, cm

#if defined(__CUDA)
PROGRAM test_mp_root_sum_{vname}_gpu
!
! Simple program to check the functionalities of test_mp_root_sum_i1.
!
    USE cudafor
    USE parallel_include
    USE util_param, ONLY : DP
    USE mp, ONLY : mp_root_sum
    USE mp_world, ONLY : mp_world_start, mp_world_end, mpime, &
                          root, nproc, world_comm
    USE tester
    IMPLICIT NONE
    !
    TYPE(tester_t) :: test
    INTEGER :: world_group = 0, valid_sum, rnk
    INTEGER, PARAMETER :: datasize = {datasize}
    ! for comparing with CPU implementation
    INTEGER :: i
    REAL(DP) :: rnd{size}
    ! test variable
    {type}, DEVICE :: {vname}_d{size}, root_{vname}_d{size}
    {type} :: {vname}_h{size}, root_{vname}_h{size}
    {type} :: aux_h{size}
    !    
    CALL test%init()
    
#if defined(__MPI)    
    world_group = MPI_COMM_WORLD
#endif
    CALL mp_world_start(world_group)
    {vname}_h = mpime
    {vname}_d = {vname}_h
    CALL mp_root_sum({vname}_d, root_{vname}_d , root,  world_comm)
    root_{vname}_h = root_{vname}_d
    !
    ! The sum of n rank is (zero based)
    !  sum = (n-1)*n*0.5
    !
    ! For a rank N matrix is 2^N * sum
    !
    rnk = SIZE(SHAPE({vname}_h))
    valid_sum = ({datasize}**rnk) * (nproc-1)*nproc/2
    !
    IF (mpime == root) CALL test%assert_equal(INT({sumf}(root_{vname}_h )) , valid_sum )
    !
    ! Validate against CPU implementation
    !
    CALL save_random_seed("test_mp_root_sum_{vname}_gpu", mpime)
    !
    DO i = 0, nproc-1
      CALL RANDOM_NUMBER(rnd)
      {vname}_h = {typeconv} ( 10.0 * rnd )
      {vname}_d = {vname}_h
      aux_h = 0
      CALL mp_root_sum({vname}_d, root_{vname}_d, i , world_comm)
      CALL mp_root_sum({vname}_h, root_{vname}_h, i , world_comm)
      IF (mpime == i) THEN
          aux_h = root_{vname}_d
          CALL test%assert_equal({sumf}(root_{vname}_h) , {sumf}(aux_h) )
      END IF
    END DO
    !
    CALL collect_results(test)
    !
    CALL mp_world_end()
    !
    IF (mpime .eq. 0) CALL test%print()
    !
END PROGRAM test_mp_root_sum_{vname}_gpu
#else
PROGRAM test_mp_root_sum_{vname}_gpu
    CALL no_test()
END PROGRAM test_mp_root_sum_{vname}_gpu
#endif