File: test_tmp_storage_sid.cpp

package info (click to toggle)
gridtools 2.3.9-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 29,480 kB
  • sloc: cpp: 228,792; python: 17,561; javascript: 9,164; ansic: 4,101; sh: 850; makefile: 231; f90: 201
file content (126 lines) | stat: -rw-r--r-- 7,013 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/*
 * GridTools
 *
 * Copyright (c) 2014-2023, ETH Zurich
 * All rights reserved.
 *
 * Please, refer to the LICENSE file in the root directory.
 * SPDX-License-Identifier: BSD-3-Clause
 */

#include <gridtools/stencil/gpu/tmp_storage_sid.hpp>

#include <memory>

#include <gtest/gtest.h>

#include <gridtools/sid/allocator.hpp>
#include <gridtools/sid/concept.hpp>

#include <multiplet.hpp>

namespace gridtools {
    namespace stencil {
        namespace gpu_backend {
            namespace {
                using namespace literals;

                using extent_t = extent<-1, 2, -3, 4>;
                constexpr auto blocksize_i = 32_c;
                constexpr auto blocksize_j = 8_c;

                int_t n_blocks_i = 11;
                int_t n_blocks_j = 12;
                int_t k_size = 13;

                TEST(tmp_cuda_storage_sid, write_in_blocks) {
                    using index_info = multiplet<5>;

                    auto alloc = sid::allocator(&std::make_unique<char[]>);

                    auto testee = make_tmp_storage<index_info>(
                        1_c, blocksize_i, blocksize_j, extent_t{}, n_blocks_i, n_blocks_j, k_size, alloc);

                    auto strides = sid::get_strides(testee);
                    auto origin = sid::get_origin(testee);

                    // write block id
                    for (int i = extent_t::iminus(); i < blocksize_i + extent_t::iplus(); ++i)
                        for (int j = extent_t::jminus(); j < blocksize_j + extent_t::jplus(); ++j)
                            for (int bi = 0; bi < n_blocks_i; ++bi)
                                for (int bj = 0; bj < n_blocks_j; ++bj)
                                    for (int k = 0; k < k_size; ++k) {
                                        auto ptr = origin();
                                        sid::shift(ptr, host::at_key<dim::i>(strides), i);
                                        sid::shift(ptr, host::at_key<dim::j>(strides), j);
                                        sid::shift(ptr, host::at_key<sid::blocked_dim<dim::i>>(strides), bi);
                                        sid::shift(ptr, host::at_key<sid::blocked_dim<dim::j>>(strides), bj);
                                        sid::shift(ptr, host::at_key<dim::k>(strides), k);
                                        *ptr = {i, j, bi, bj, k};
                                    }
                    // validate that block id is correct, i.e. there were no overlapping memory accesses in the write
                    for (int i = extent_t::iminus(); i < blocksize_i + extent_t::iplus(); ++i)
                        for (int j = extent_t::jminus(); j < blocksize_j + extent_t::jplus(); ++j)
                            for (int bi = 0; bi < n_blocks_i; ++bi)
                                for (int bj = 0; bj < n_blocks_j; ++bj)
                                    for (int k = 0; k < k_size; ++k) {
                                        auto ptr = origin();
                                        sid::shift(ptr, host::at_key<dim::i>(strides), i);
                                        sid::shift(ptr, host::at_key<dim::j>(strides), j);
                                        sid::shift(ptr, host::at_key<sid::blocked_dim<dim::i>>(strides), bi);
                                        sid::shift(ptr, host::at_key<sid::blocked_dim<dim::j>>(strides), bj);
                                        sid::shift(ptr, host::at_key<dim::k>(strides), k);
                                        EXPECT_EQ((index_info{i, j, bi, bj, k}), *ptr);
                                    }
                }

                constexpr auto ncolors = 2_c;
                TEST(tmp_cuda_storage_sid_block, write_in_blocks) {
                    using index_info = multiplet<6>;

                    auto alloc = sid::allocator(&std::make_unique<char[]>);

                    auto testee = make_tmp_storage<index_info>(
                        2_c, blocksize_i, blocksize_j, extent_t{}, n_blocks_i, n_blocks_j, k_size, alloc);

                    auto strides = sid::get_strides(testee);
                    auto origin = sid::get_origin(testee);

                    // write block id
                    for (int i = extent_t::iminus(); i < blocksize_i + extent_t::iplus(); ++i)
                        for (int j = extent_t::jminus(); j < blocksize_j + extent_t::jplus(); ++j)
                            for (int c = 0; c < ncolors; ++c)
                                for (int bi = 0; bi < n_blocks_i; ++bi)
                                    for (int bj = 0; bj < n_blocks_j; ++bj)
                                        for (int k = 0; k < k_size; ++k) {
                                            auto ptr = origin();
                                            sid::shift(ptr, host::at_key<dim::i>(strides), i);
                                            sid::shift(ptr, host::at_key<dim::j>(strides), j);
                                            sid::shift(ptr, host::at_key<dim::c>(strides), c);
                                            sid::shift(ptr, host::at_key<sid::blocked_dim<dim::i>>(strides), bi);
                                            sid::shift(ptr, host::at_key<sid::blocked_dim<dim::j>>(strides), bj);
                                            sid::shift(ptr, host::at_key<dim::k>(strides), k);
                                            *ptr = {i, j, c, bi, bj, k};
                                        }

                    // validate that block id is correct, i.e. there were no overlapping memory accesses in the write
                    for (int i = extent_t::iminus(); i < blocksize_i + extent_t::iplus(); ++i)
                        for (int j = extent_t::jminus(); j < blocksize_j + extent_t::jplus(); ++j)
                            for (int c = 0; c < ncolors; ++c)
                                for (int bi = 0; bi < n_blocks_i; ++bi)
                                    for (int bj = 0; bj < n_blocks_j; ++bj)
                                        for (int k = 0; k < k_size; ++k) {
                                            auto ptr = origin();
                                            sid::shift(ptr, host::at_key<dim::i>(strides), i);
                                            sid::shift(ptr, host::at_key<dim::j>(strides), j);
                                            sid::shift(ptr, host::at_key<dim::c>(strides), c);
                                            sid::shift(ptr, host::at_key<sid::blocked_dim<dim::i>>(strides), bi);
                                            sid::shift(ptr, host::at_key<sid::blocked_dim<dim::j>>(strides), bj);
                                            sid::shift(ptr, host::at_key<dim::k>(strides), k);
                                            EXPECT_EQ((index_info{i, j, c, bi, bj, k}), *ptr);
                                        }
                }
            } // namespace
        }     // namespace gpu_backend
    }         // namespace stencil
} // namespace gridtools