1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
|
!
! Copyright (C) 2022, MaX CoE
! Distributed under the MIT License
! (license terms are at http://opensource.org/licenses/MIT).
!
!--
!
! Utility functions to perform memory allocation on the device
! using CUDA-Fortran, OpenACC or OpenMP Offload
!
!==================================================================
!==================================================================
! *DO NOT EDIT*: automatically generated from devxlib_malloc_alloc.jf90
!==================================================================
!==================================================================
!
#include<devxlib_macros.h>
#include<devxlib_defs.h>
!
!=======================================
!
submodule (devxlib_malloc) devxlib_malloc_alloc
implicit none
contains
{%- for t in types %}
{%- for p in kinds[t] %}
{%- for d in range(1,dimensions+1) %}
module subroutine {{p.name}}_devxlib_malloc_alloc_{{t[0]|lower}}{{d}}d(dev_ptr, dimensions, ierr, device_id, lbounds)
implicit none
!
{{t}}({{p.val}}), pointer, contiguous, intent(out) DEV_ATTR :: dev_ptr({% for dd in range(d) %}:{% if not loop.last %}, {%- endif %}{% endfor %})
{%- if d == 1 %}
integer(int64), intent(in) :: dimensions
{%- else %}
integer(int64), intent(in) :: dimensions({{d}})
{%- endif %}
integer(int32), intent(out) :: ierr
integer(int32), intent(in), optional :: device_id
{%- if d == 1 %}
integer(int64), intent(in), optional :: lbounds
{%- else %}
integer(int64), intent(in), optional :: lbounds({{d}})
{%- endif %}
#if defined __DXL_CUDAF
integer(kind=c_int) :: info
#endif
{{t}}({{p.val}}) :: dummy
type(c_ptr) :: c_dev_ptr
{{t}}({{p.val}}), pointer, contiguous :: fptr({% for dd in range(d) %}:{% if not loop.last %}, {%- endif %}{% endfor %})
#if defined __DXL_OPENMP_GPU
integer(int32) :: device_id_local
#endif
!
#if defined __DXL_CUDAF
if (present(device_id)) ierr = cudaSetDevice(device_id)
#elif defined __DXL_OPENMP_GPU
if (present(device_id)) then
device_id_local = device_id
else
device_id_local = omp_get_default_device()
endif
#endif
!
if (present(lbounds)) then
#if defined __DXL_CUDAF || defined __DXL_OPENACC || defined __DXL_OPENMP_GPU
if (.not. associated(dev_ptr)) then
# if defined __DXL_CUDAF
{%- if d == 1 %}
info = cudaMalloc_f(c_dev_ptr,int(storage_size(dummy) * dimensions / 8, c_size_t))
{%- else %}
info = cudaMalloc_f(c_dev_ptr,int(storage_size(dummy) * product(dimensions) / 8, c_size_t))
{%- endif %}
# elif defined __DXL_OPENACC
{%- if d == 1 %}
c_dev_ptr = acc_malloc_f(int(storage_size(dummy) * dimensions / 8, c_size_t))
{%- else %}
c_dev_ptr = acc_malloc_f(int(storage_size(dummy) * product(dimensions) / 8, c_size_t))
{%- endif %}
# elif defined __DXL_OPENMP_GPU
{%- if d == 1 %}
c_dev_ptr = omp_target_alloc(int(storage_size(dummy) * dimensions / 8, c_size_t), device_id_local)
{%- else %}
c_dev_ptr = omp_target_alloc(int(storage_size(dummy) * product(dimensions) / 8, c_size_t), device_id_local)
{%- endif %}
# endif
if (c_associated(c_dev_ptr)) then
{%- if d == 1 %}
call c_f_pointer(c_dev_ptr, fptr, [dimensions])
dev_ptr(lbounds:lbounds+dimensions-1_int64) => fptr
{%- else %}
{%- if d > 4 %}
call c_f_pointer(c_dev_ptr, fptr, [{% for dd in range(0,4) %}dimensions({{dd+1}}){% if not loop.last %},{% print(' ') %}{% else %}, &{%- endif %}{% endfor %}
{% for dd in range(4,d) %}dimensions({{dd+1}}){% if not loop.last %},{% print(' ') %}{%- endif %}{% endfor %}])
dev_ptr({% for dd in range(0,d) %}lbounds({{dd+1}}):lbounds({{dd+1}})+dimensions({{dd+1}})-1_int64{% if not loop.last %},&
{% print(' ') %}{%- endif %}{% endfor %}) => fptr
{%- else %}
call c_f_pointer(c_dev_ptr, fptr, [{% for dd in range(d) %}dimensions({{dd+1}}){% if not loop.last %},{% print(' ') %}{%- endif %}{% endfor %}])
dev_ptr({% for dd in range(0,d) %}lbounds({{dd+1}}):lbounds({{dd+1}})+dimensions({{dd+1}})-1_int64{% if not loop.last %},&
{% print(' ') %}{%- endif %}{% endfor %}) => fptr
{%- endif %}
{%- endif %}
ierr = 0
else
ierr = 1000
endif
endif
#else
{%- if d == 1 %}
if (.not.associated(dev_ptr)) allocate( dev_ptr(lbounds:lbounds+dimensions-1_int64), stat = ierr )
{%- else %}
if (.not.associated(dev_ptr)) allocate( dev_ptr({% for dd in range(0,d) %}lbounds({{dd+1}}):lbounds({{dd+1}})+dimensions({{dd+1}})-1_int64{% if not loop.last %},&
{% print(' ') %}{%- endif %}{% endfor %}), stat = ierr )
{%- endif %}
#endif
else
!
#if defined __DXL_CUDAF || defined __DXL_OPENACC || defined __DXL_OPENMP_GPU
if (.not. associated(dev_ptr)) then
# if defined __DXL_CUDAF
{%- if d == 1 %}
info = cudaMalloc_f(c_dev_ptr,int(storage_size(dummy) * dimensions / 8, c_size_t))
{%- else %}
info = cudaMalloc_f(c_dev_ptr,int(storage_size(dummy) * product(dimensions) / 8, c_size_t))
{%- endif %}
# elif defined __DXL_OPENACC
{%- if d == 1 %}
c_dev_ptr = acc_malloc_f(int(storage_size(dummy) * dimensions / 8, c_size_t))
{%- else %}
c_dev_ptr = acc_malloc_f(int(storage_size(dummy) * product(dimensions) / 8, c_size_t))
{%- endif %}
# elif defined __DXL_OPENMP_GPU
{%- if d == 1 %}
c_dev_ptr = omp_target_alloc(int(storage_size(dummy) * dimensions / 8, c_size_t), device_id_local)
{%- else %}
c_dev_ptr = omp_target_alloc(int(storage_size(dummy) * product(dimensions) / 8, c_size_t), device_id_local)
{%- endif %}
# endif
if (c_associated(c_dev_ptr)) then
{%- if d == 1 %}
call c_f_pointer(c_dev_ptr, dev_ptr, [dimensions])
{%- else %}
{%- if d > 4 %}
call c_f_pointer(c_dev_ptr, dev_ptr, [{% for dd in range(0,4) %}dimensions({{dd+1}}){% if not loop.last %},{% print(' ') %}{% else %}, &{%- endif %}{% endfor %}
{% for dd in range(4,d) %}dimensions({{dd+1}}){% if not loop.last %},{% print(' ') %}{%- endif %}{% endfor %}])
{%- else %}
call c_f_pointer(c_dev_ptr, dev_ptr, [{% for dd in range(d) %}dimensions({{dd+1}}){% if not loop.last %},{% print(' ') %}{%- endif %}{% endfor %}])
{%- endif %}
{%- endif %}
ierr = 0
else
ierr = 1000
endif
endif
#else
{%- if d == 1 %}
if (.not.associated(dev_ptr)) allocate( dev_ptr(dimensions), stat = ierr )
{%- else %}
if (.not.associated(dev_ptr)) allocate( dev_ptr({% for dd in range(d) %}dimensions({{dd+1}}){% if not loop.last %},&
{% print(' ') %}{%- endif %}{% endfor %}), stat = ierr )
{%- endif %}
#endif
endif
!
end subroutine {{p.name}}_devxlib_malloc_alloc_{{t[0]|lower}}{{d}}d
!
{%- endfor %}
{%- endfor %}
{%- endfor %}
endsubmodule devxlib_malloc_alloc
|