File: test_launch_kernel.cu

package info (click to toggle)
gridtools 2.3.9-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 29,480 kB
  • sloc: cpp: 228,792; python: 17,561; javascript: 9,164; ansic: 4,101; sh: 850; makefile: 231; f90: 201
file content (117 lines) | stat: -rw-r--r-- 4,971 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/*
 * GridTools
 *
 * Copyright (c) 2014-2023, ETH Zurich
 * All rights reserved.
 *
 * Please, refer to the LICENSE file in the root directory.
 * SPDX-License-Identifier: BSD-3-Clause
 */

#include <gridtools/stencil/gpu/launch_kernel.hpp>

#include <gtest/gtest.h>

#include <gridtools/common/cuda_util.hpp>
#include <gridtools/common/defs.hpp>
#include <gridtools/common/host_device.hpp>
#include <gridtools/meta.hpp>
#include <gridtools/stencil/common/extent.hpp>

namespace gridtools {
    namespace stencil {
        namespace gpu_backend {
            template <class Extent, int_t IBlockSize, int_t JBlockSize>
            struct validation_kernel_f {
                int *m_failures;
                int_t m_i_size;
                int_t m_j_size;

                template <class Validator>
                GT_FUNCTION_DEVICE void operator()(int_t iblock, int_t jblock, Validator is_valid) const {
                    int_t i_block_size =
                        (blockIdx.x + 1) * IBlockSize < m_i_size ? IBlockSize : m_i_size - blockIdx.x * IBlockSize;
                    int_t j_block_size =
                        (blockIdx.y + 1) * JBlockSize < m_j_size ? JBlockSize : m_j_size - blockIdx.y * JBlockSize;
                    bool expected = Extent::iminus::value <= iblock && Extent::iplus::value + i_block_size > iblock &&
                                    Extent::jminus::value <= jblock && Extent::jplus::value + j_block_size > jblock;
                    bool actual = is_valid(Extent());
                    if (actual == expected)
                        return;
                    atomicAdd(m_failures, 1);
                    int block_idx_x = blockIdx.x;
                    int block_idx_y = blockIdx.y;
                    printf("false %s at {%d,%d} of block {%d,%d}\n",
                        actual ? "positive" : "negative",
                        iblock,
                        jblock,
                        block_idx_x,
                        block_idx_y);
                }
            };

            template <class MaxExtent, class Extent, int_t IBlockSize, int_t JBlockSize>
            void do_validation_test(int_t i_size, int_t j_size) {
                auto failures = cuda_util::make_clone(0);
                validation_kernel_f<Extent, IBlockSize, JBlockSize> kernel = {failures.get(), i_size, j_size};
                launch_kernel<MaxExtent, IBlockSize, JBlockSize>(i_size, j_size, 1, kernel, 0);
                EXPECT_EQ(0, cuda_util::from_clone(failures));
            }

            TEST(validation, simplest) { do_validation_test<extent<>, extent<>, 32, 8>(128, 128); }

            TEST(validation, rounded_sizes) {
                do_validation_test<extent<-2, 2, -1, 3>, extent<-1, 1, 0, 2>, 32, 8>(128, 128);
            }

            TEST(validation, hori_diff) {
                do_validation_test<extent<-1, 1, -1, 1>, extent<-1, 1, -1, 1>, 32, 8>(128, 128);
            }

            TEST(validation, hori_diff_small_size) {
                do_validation_test<extent<-1, 1, -1, 1>, extent<-1, 1, -1, 1>, 32, 8>(5, 5);
            }

            TEST(validation, max_extent) {
                do_validation_test<extent<-2, 2, -1, 3>, extent<-2, 2, -1, 3>, 32, 8>(123, 50);
            }

            TEST(validation, zero_extent) { do_validation_test<extent<-2, 2, -1, 3>, extent<>, 32, 8>(123, 50); }

            TEST(validation, reduced_extent) {
                do_validation_test<extent<-2, 2, -1, 3>, extent<-1, 1, 0, 2>, 32, 8>(123, 50);
            }

            struct syncthreads_kernel_f {
                int *m_failures;
                int *m_count;

                template <class Validator>
                GT_FUNCTION_DEVICE void operator()(int_t iblock, int_t jblock, Validator is_valid) const {
                    if (is_valid(extent<-1, 1>())) {
                        assert(jblock == 0);
                        assert(iblock >= -1 && iblock <= 1);
                        atomicAdd(m_count, 1);
                    }
                    __syncthreads();
                    if (is_valid(extent<-1, 1>())) {
                        assert(jblock == 0);
                        assert(iblock >= -1 && iblock <= 1);
                        auto count = atomicAdd(m_count, 0);
                        if (count == 3)
                            return;
                        atomicAdd(m_failures, 1);
                        printf("failure: i = %d, count == %d\n", iblock, count);
                    }
                }
            };

            TEST(syncthreads, smoke) {
                auto failures = cuda_util::make_clone(0);
                auto count = cuda_util::make_clone(0);
                launch_kernel<extent<-1, 1>, 32, 8>(1, 1, 1, syncthreads_kernel_f{failures.get(), count.get()}, 0);
                EXPECT_EQ(0, cuda_util::from_clone(failures));
            }
        } // namespace gpu_backend
    }     // namespace stencil
} // namespace gridtools