File: test_shared_allocator.cu

package info (click to toggle)
gridtools 2.3.9-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 29,480 kB
  • sloc: cpp: 228,792; python: 17,561; javascript: 9,164; ansic: 4,101; sh: 850; makefile: 231; f90: 201
file content (101 lines) | stat: -rw-r--r-- 4,323 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*
 * GridTools
 *
 * Copyright (c) 2014-2023, ETH Zurich
 * All rights reserved.
 *
 * Please, refer to the LICENSE file in the root directory.
 * SPDX-License-Identifier: BSD-3-Clause
 */

#include <gridtools/stencil/gpu/shared_allocator.hpp>

#include <gtest/gtest.h>

#include <gridtools/meta.hpp>

#include <cuda_test_helper.hpp>

namespace gridtools {
    namespace stencil {
        namespace gpu_backend {
            namespace {
                template <typename PtrHolder>
                __device__ uint64_t get_ptr(PtrHolder ptr_holder) {
                    return reinterpret_cast<uint64_t>(ptr_holder());
                }

                TEST(shared_allocator, alignment) {
                    shared_allocator allocator;
                    EXPECT_EQ(0, allocator.size());

                    using alloc1_t = char[14];
                    using alloc2_t = double;
                    using alloc3_t = double;

                    auto alloc1 = allocate(allocator, meta::lazy::id<alloc1_t>{}, 7);
                    auto alloc2 = allocate(allocator, meta::lazy::id<alloc2_t>{}, 4);
                    auto alloc3 = allocate(allocator, meta::lazy::id<alloc3_t>{}, 1);

                    auto ptr1 = on_device::exec_with_shared_memory(
                        allocator.size(), GT_MAKE_INTEGRAL_CONSTANT_FROM_VALUE(&get_ptr<decltype(alloc1)>), alloc1);
                    auto ptr2 = on_device::exec_with_shared_memory(
                        allocator.size(), GT_MAKE_INTEGRAL_CONSTANT_FROM_VALUE(&get_ptr<decltype(alloc2)>), alloc2);
                    auto ptr3 = on_device::exec_with_shared_memory(
                        allocator.size(), GT_MAKE_INTEGRAL_CONSTANT_FROM_VALUE(&get_ptr<decltype(alloc3)>), alloc3);

                    // check alignment for all allocations
                    EXPECT_EQ(ptr1 % alignof(alloc1_t), 0);
                    EXPECT_EQ(ptr2 % alignof(alloc2_t), 0);
                    EXPECT_EQ(ptr3 % alignof(alloc3_t), 0);
                }

                template <class PtrHolderFloat, class PtrHolderInt>
                __global__ void fill_and_check_test(
                    PtrHolderFloat holder1, PtrHolderFloat holder1_shifted, PtrHolderInt holder2, bool *result) {
                    static_assert(std::is_same_v<decltype(holder1()), float *>);
                    static_assert(std::is_same_v<decltype(holder1_shifted()), float *>);
                    static_assert(std::is_same_v<decltype(holder2()), int16_t *>);

                    auto ptr1 = holder1();
                    auto ptr1_shifted = holder1_shifted();
                    auto ptr2 = holder2();

                    ptr1[threadIdx.x] = 100 * blockIdx.x + threadIdx.x;
                    ptr1_shifted[threadIdx.x] = 10000 + 100 * blockIdx.x + threadIdx.x;
                    ptr2[threadIdx.x] = 20000 + 100 * blockIdx.x + threadIdx.x;
                    __syncthreads();

                    if (threadIdx.x == 0) {
                        bool local_result = true;
                        for (int i = 0; i < 32; ++i) {
                            local_result &=
                                (ptr1[i] == 100 * blockIdx.x + i && ptr1[i + 32] == 10000 + 100 * blockIdx.x + i &&
                                    ptr2[i] == 20000 + 100 * blockIdx.x + i);
                        }

                        result[blockIdx.x] = local_result;
                    }
                }

                TEST(shared_allocator, fill_and_check) {
                    shared_allocator allocator;
                    auto float_ptr = allocate(allocator, meta::lazy::id<float>{}, 64);
                    auto int_ptr = allocate(allocator, meta::lazy::id<int16_t>{}, 32);

                    bool *result;
                    GT_CUDA_CHECK(cudaMallocManaged(&result, 2 * sizeof(bool)));

                    fill_and_check_test<<<2, 32, allocator.size()>>>(
                        float_ptr, (float_ptr + 48) + (-16), int_ptr, result);
                    GT_CUDA_CHECK(cudaDeviceSynchronize());

                    EXPECT_TRUE(result[0]);
                    EXPECT_TRUE(result[1]);

                    GT_CUDA_CHECK(cudaFree(result));
                }
            } // namespace
        }     // namespace gpu_backend
    }         // namespace stencil
} // namespace gridtools