File: devxlib_malloc_alloc.jf90

package info (click to toggle)
devicexlib 0.8.6-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 10,364 kB
  • sloc: f90: 77,678; sh: 3,701; fortran: 773; makefile: 268; python: 246; ansic: 69; awk: 36
file content (172 lines) | stat: -rw-r--r-- 7,692 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
!
! Copyright (C) 2022, MaX CoE
! Distributed under the MIT License 
! (license terms are at http://opensource.org/licenses/MIT).
!
!--
!
! Utility functions to perform memory allocation on the device
! using CUDA-Fortran, OpenACC or OpenMP Offload
!
!==================================================================
!==================================================================
! *DO NOT EDIT*: automatically generated from devxlib_malloc_alloc.jf90
!==================================================================
!==================================================================
!
#include<devxlib_macros.h>
#include<devxlib_defs.h>
!
!=======================================
!
submodule (devxlib_malloc) devxlib_malloc_alloc

   implicit none

   contains
{%- for t in types %}
{%- for p in kinds[t] %}
{%- for d in range(1,dimensions+1) %}
      module subroutine {{p.name}}_devxlib_malloc_alloc_{{t[0]|lower}}{{d}}d(dev_ptr, dimensions, ierr, device_id, lbounds)
          implicit none
          !
          {{t}}({{p.val}}), pointer, contiguous, intent(out) DEV_ATTR :: dev_ptr({% for dd in range(d) %}:{% if not loop.last %}, {%- endif %}{% endfor %})
          {%- if d == 1 %}
          integer(int64), intent(in)           :: dimensions
          {%- else %}
          integer(int64), intent(in)           :: dimensions({{d}})
          {%- endif %}
          integer(int32), intent(out)          :: ierr
          integer(int32), intent(in), optional :: device_id
{%- if d == 1 %}
          integer(int64), intent(in), optional :: lbounds
{%- else %}
          integer(int64), intent(in), optional :: lbounds({{d}})
{%- endif %}
#if defined __DXL_CUDAF
          integer(kind=c_int) :: info
#endif
          {{t}}({{p.val}}) :: dummy
          type(c_ptr) :: c_dev_ptr
          {{t}}({{p.val}}), pointer, contiguous :: fptr({% for dd in range(d) %}:{% if not loop.last %}, {%- endif %}{% endfor %})
#if defined __DXL_OPENMP_GPU
          integer(int32) :: device_id_local
#endif
          !
#if defined __DXL_CUDAF
          if (present(device_id)) ierr = cudaSetDevice(device_id)
#elif defined __DXL_OPENMP_GPU
          if (present(device_id)) then
             device_id_local = device_id
          else
             device_id_local = omp_get_default_device()
          endif
#endif
          !
          if (present(lbounds)) then
#if defined __DXL_CUDAF || defined __DXL_OPENACC || defined __DXL_OPENMP_GPU
             if (.not. associated(dev_ptr)) then
#  if defined __DXL_CUDAF
{%- if d == 1 %}
                info = cudaMalloc_f(c_dev_ptr,int(storage_size(dummy) * dimensions / 8, c_size_t))
{%- else %}
                info = cudaMalloc_f(c_dev_ptr,int(storage_size(dummy) * product(dimensions) / 8, c_size_t))
{%- endif %}
#  elif defined __DXL_OPENACC
{%- if d == 1 %}
                c_dev_ptr = acc_malloc_f(int(storage_size(dummy) * dimensions / 8, c_size_t))
{%- else %}
                c_dev_ptr = acc_malloc_f(int(storage_size(dummy) * product(dimensions) / 8, c_size_t))
{%- endif %}
#  elif defined __DXL_OPENMP_GPU
{%- if d == 1 %}
                c_dev_ptr = omp_target_alloc(int(storage_size(dummy) * dimensions / 8, c_size_t), device_id_local)
{%- else %}
                c_dev_ptr = omp_target_alloc(int(storage_size(dummy) * product(dimensions) / 8, c_size_t), device_id_local)
{%- endif %}
#  endif
                if (c_associated(c_dev_ptr)) then
{%- if d == 1 %}
                   call c_f_pointer(c_dev_ptr, fptr, [dimensions])
                   dev_ptr(lbounds:lbounds+dimensions-1_int64) => fptr
{%- else %}
            {%- if d > 4 %}
                   call c_f_pointer(c_dev_ptr, fptr, [{% for dd in range(0,4) %}dimensions({{dd+1}}){% if not loop.last %},{% print(' ') %}{% else %}, &{%- endif %}{% endfor %}
                                                     {% for dd in range(4,d) %}dimensions({{dd+1}}){% if not loop.last %},{% print(' ') %}{%- endif %}{% endfor %}])
                   dev_ptr({% for dd in range(0,d) %}lbounds({{dd+1}}):lbounds({{dd+1}})+dimensions({{dd+1}})-1_int64{% if not loop.last %},&
                          {% print(' ') %}{%- endif %}{% endfor %}) => fptr
            {%- else %}
                   call c_f_pointer(c_dev_ptr, fptr, [{% for dd in range(d) %}dimensions({{dd+1}}){% if not loop.last %},{% print(' ') %}{%- endif %}{% endfor %}])
                   dev_ptr({% for dd in range(0,d) %}lbounds({{dd+1}}):lbounds({{dd+1}})+dimensions({{dd+1}})-1_int64{% if not loop.last %},&
                          {% print(' ') %}{%- endif %}{% endfor %}) => fptr
            {%- endif %}
{%- endif %}
                   ierr = 0
                else
                   ierr = 1000
                endif
             endif
#else
{%- if d == 1 %}
             if (.not.associated(dev_ptr)) allocate( dev_ptr(lbounds:lbounds+dimensions-1_int64), stat = ierr )
{%- else %}
             if (.not.associated(dev_ptr)) allocate( dev_ptr({% for dd in range(0,d) %}lbounds({{dd+1}}):lbounds({{dd+1}})+dimensions({{dd+1}})-1_int64{% if not loop.last %},&
                                                            {% print(' ') %}{%- endif %}{% endfor %}), stat = ierr )
{%- endif %}
#endif
          else
             !
#if defined __DXL_CUDAF || defined __DXL_OPENACC || defined __DXL_OPENMP_GPU
             if (.not. associated(dev_ptr)) then
#  if defined __DXL_CUDAF
{%- if d == 1 %}
                info = cudaMalloc_f(c_dev_ptr,int(storage_size(dummy) * dimensions / 8, c_size_t))
{%- else %}
                info = cudaMalloc_f(c_dev_ptr,int(storage_size(dummy) * product(dimensions) / 8, c_size_t))
{%- endif %}
#  elif defined __DXL_OPENACC
{%- if d == 1 %}
                c_dev_ptr = acc_malloc_f(int(storage_size(dummy) * dimensions / 8, c_size_t))
{%- else %}
                c_dev_ptr = acc_malloc_f(int(storage_size(dummy) * product(dimensions) / 8, c_size_t))
{%- endif %}
#  elif defined __DXL_OPENMP_GPU
{%- if d == 1 %}
                c_dev_ptr = omp_target_alloc(int(storage_size(dummy) * dimensions / 8, c_size_t), device_id_local)
{%- else %}
                c_dev_ptr = omp_target_alloc(int(storage_size(dummy) * product(dimensions) / 8, c_size_t), device_id_local)
{%- endif %}
#  endif
                if (c_associated(c_dev_ptr)) then
{%- if d == 1 %}
                   call c_f_pointer(c_dev_ptr, dev_ptr, [dimensions])
{%- else %}
            {%- if d > 4 %}
                   call c_f_pointer(c_dev_ptr, dev_ptr, [{% for dd in range(0,4) %}dimensions({{dd+1}}){% if not loop.last %},{% print(' ') %}{% else %}, &{%- endif %}{% endfor %}
                                                         {% for dd in range(4,d) %}dimensions({{dd+1}}){% if not loop.last %},{% print(' ') %}{%- endif %}{% endfor %}])
            {%- else %}
                   call c_f_pointer(c_dev_ptr, dev_ptr, [{% for dd in range(d) %}dimensions({{dd+1}}){% if not loop.last %},{% print(' ') %}{%- endif %}{% endfor %}])
            {%- endif %}
{%- endif %}
                   ierr = 0
                else
                   ierr = 1000
                endif
             endif
#else
             {%- if d == 1 %}
             if (.not.associated(dev_ptr)) allocate( dev_ptr(dimensions), stat = ierr )
             {%- else %}
             if (.not.associated(dev_ptr)) allocate( dev_ptr({% for dd in range(d) %}dimensions({{dd+1}}){% if not loop.last %},&
                                                    {% print(' ') %}{%- endif %}{% endfor %}), stat = ierr )
             {%- endif %}
#endif
          endif
          !
      end subroutine {{p.name}}_devxlib_malloc_alloc_{{t[0]|lower}}{{d}}d
      !
{%- endfor %}
{%- endfor %}
{%- endfor %}

endsubmodule devxlib_malloc_alloc