File: test_ij_cache.cu

package info (click to toggle)
gridtools 2.3.9-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 29,480 kB
  • sloc: cpp: 228,792; python: 17,561; javascript: 9,164; ansic: 4,101; sh: 850; makefile: 231; f90: 201
file content (132 lines) | stat: -rw-r--r-- 5,962 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/*
 * GridTools
 *
 * Copyright (c) 2014-2023, ETH Zurich
 * All rights reserved.
 *
 * Please, refer to the LICENSE file in the root directory.
 * SPDX-License-Identifier: BSD-3-Clause
 */

#include <gridtools/stencil/gpu/ij_cache.hpp>

#include <gtest/gtest.h>

#include <gridtools/common/integral_constant.hpp>
#include <gridtools/sid/concept.hpp>
#include <gridtools/stencil/common/extent.hpp>

#include <cuda_test_helper.hpp>

namespace gridtools {
    namespace stencil {
        namespace gpu_backend {
            using namespace literals;

            constexpr auto i_size = 3_c;
            constexpr auto j_size = 8_c;
            using extent_t = extent<-5, 1, -3, 1>;

            TEST(sid_ij_cache, smoke) {

                shared_allocator allocator;
                auto testee = make_ij_cache<double>(1_c, i_size, j_size, extent_t{}, allocator);

                using ij_cache_t = decltype(testee);

                static_assert(is_sid<ij_cache_t>());
                static_assert(std::is_same_v<sid::ptr_type<ij_cache_t>, double *>);
                static_assert(std::is_same_v<sid::ptr_diff_type<ij_cache_t>, int_t>);

                using expected_kind = hymap::keys<dim::c, dim::i, dim::j>::
                    values<integral_constant<int_t, 1>, integral_constant<int_t, 1>, integral_constant<int_t, 9>>;
                static_assert(std::is_same_v<sid::strides_kind<ij_cache_t>, expected_kind>);
                auto strides = sid::get_strides(testee);
                EXPECT_EQ(1, at_key<dim::i>(strides));
                EXPECT_EQ(9, at_key<dim::j>(strides));
                EXPECT_EQ(0, sid::get_stride<dim::k>(strides));
            }

            template <class IJCache1, class IJCache2, class Strides1, class Strides2>
            __device__ bool ij_cache_test(IJCache1 cache1, IJCache2 cache2, Strides1 strides1, Strides2 strides2) {

                auto ptr1 = cache1();
                auto ptr2 = cache2();

                // fill some data into the caches
                sid::shift(ptr1, sid::get_stride<dim::i>(strides1), extent_t::iminus());
                sid::shift(ptr1, sid::get_stride<dim::j>(strides1), extent_t::jminus());

                sid::shift(ptr2, sid::get_stride<dim::i>(strides2), extent_t::iminus());
                sid::shift(ptr2, sid::get_stride<dim::j>(strides2), extent_t::jminus());

                constexpr auto j_total_size = j_size - extent_t::jminus() + extent_t::jplus();
                constexpr auto i_total_size = i_size - extent_t::iminus() + extent_t::iplus();

                for (int j = 0; j < j_total_size; ++j) {
                    for (int i = 0; i < i_total_size; ++i) {
                        *ptr1 = 100 * j + i;
                        *ptr2 = 100 * j + i;

                        sid::shift(ptr1, sid::get_stride<dim::i>(strides1), 1_c);
                        sid::shift(ptr2, sid::get_stride<dim::i>(strides2), 1_c);
                    }
                    sid::shift(ptr1, sid::get_stride<dim::i>(strides1), -i_total_size);
                    sid::shift(ptr2, sid::get_stride<dim::i>(strides2), -i_total_size);

                    sid::shift(ptr1, sid::get_stride<dim::j>(strides1), 1_c);
                    sid::shift(ptr2, sid::get_stride<dim::j>(strides2), 1_c);
                }

                sid::shift(ptr1, sid::get_stride<dim::j>(strides1), -j_total_size);
                sid::shift(ptr2, sid::get_stride<dim::j>(strides2), -j_total_size);

                // shifting in k has no effect
                sid::shift(ptr1, sid::get_stride<dim::k>(strides1), 123);
                sid::shift(ptr2, sid::get_stride<dim::k>(strides2), 456);

                // verify that the data is still in there (no overwrites from the two caches)
                for (int j = 0; j < j_total_size; ++j) {
                    for (int i = 0; i < i_total_size; ++i) {
                        if (*ptr1 != 100. * j + i) {
                            printf(
                                "Float-Cache: Incorrect result at (i=%i, j=%i): %f != %f\n", i, j, *ptr1, 100. * j + i);
                            return false;
                        }
                        if (*ptr2 != 100 * j + i) {
                            printf("Int-Cache: Incorrect result at (i=%i, j=%i): %i != %i\n", i, j, *ptr2, 100 * j + i);
                            return false;
                        }
                        sid::shift(ptr1, sid::get_stride<dim::i>(strides1), 1_c);
                        sid::shift(ptr2, sid::get_stride<dim::i>(strides2), 1_c);
                    }
                    sid::shift(ptr1, sid::get_stride<dim::i>(strides1), -i_total_size);
                    sid::shift(ptr2, sid::get_stride<dim::i>(strides2), -i_total_size);

                    sid::shift(ptr1, sid::get_stride<dim::j>(strides1), 1_c);
                    sid::shift(ptr2, sid::get_stride<dim::j>(strides2), 1_c);
                }
                return true;
            }

            TEST(sid_ij_cache, sid) {
                shared_allocator allocator;
                auto cache1 = make_ij_cache<double>(1_c, i_size, j_size, extent_t{}, allocator);
                auto strides1 = sid::get_strides(cache1);
                auto ptr1 = sid::get_origin(cache1);

                auto cache2 = make_ij_cache<int16_t>(1_c, i_size, j_size, extent_t{}, allocator);
                auto strides2 = sid::get_strides(cache2);
                auto ptr2 = sid::get_origin(cache2);

                EXPECT_TRUE(gridtools::on_device::exec_with_shared_memory(allocator.size(),
                    GT_MAKE_INTEGRAL_CONSTANT_FROM_VALUE(
                        (&ij_cache_test<decltype(ptr1), decltype(ptr2), decltype(strides1), decltype(strides2)>)),
                    ptr1,
                    ptr2,
                    strides1,
                    strides2));
            }
        } // namespace gpu_backend
    }     // namespace stencil
} // namespace gridtools