File: using_vs_adl.cu

package info (click to toggle)
libthrust 1.17.2-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 10,900 kB
  • sloc: ansic: 29,519; cpp: 23,989; python: 1,421; sh: 811; perl: 460; makefile: 112
file content (171 lines) | stat: -rw-r--r-- 6,333 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#include <thrust/detail/config.h>

#if THRUST_CPP_DIALECT >= 2014

#include <async/test_policy_overloads.h>

#include <async/exclusive_scan/mixin.h>

// Verify what happens when calling the algorithm without any namespace
// qualifiers:
// - If the async entry point is available in the global namespace due to a
//   using statement, the async algorithm should be called.
// - Otherwise, ADL should resolve the call to the synchronous algo in the
//   thrust:: namespace.

namespace invoke_reference
{

template <typename input_value_type,
          typename output_value_type = input_value_type>
struct adl_host_synchronous
{
  template <typename InputType,
            typename OutputType,
            typename PostfixArgTuple,
            std::size_t... PostfixArgIndices>
  static void invoke_reference(InputType const& input,
                               OutputType& output,
                               PostfixArgTuple&& postfix_tuple,
                               std::index_sequence<PostfixArgIndices...>)
  {
    // Create host versions of the input/output:
    thrust::host_vector<input_value_type> host_input(input.cbegin(),
                                                     input.cend());
    thrust::host_vector<output_value_type> host_output(host_input.size());

    using OutIter = thrust::remove_cvref_t<decltype(host_output.begin())>;

    // ADL should resolve this to the synchronous `thrust::` algorithm.
    // This is checked by ensuring that the call returns an output iterator.
    OutIter result =
      exclusive_scan(host_input.cbegin(),
                     host_input.cend(),
                     host_output.begin(),
                     std::get<PostfixArgIndices>(THRUST_FWD(postfix_tuple))...);
    (void)result;

    // Copy back to device.
    output = host_output;
  }
};

} // namespace invoke_reference

namespace invoke_async
{

struct using_namespace
{
  template <typename PrefixArgTuple,
            std::size_t... PrefixArgIndices,
            typename InputType,
            typename OutputType,
            typename PostfixArgTuple,
            std::size_t... PostfixArgIndices>
  static auto invoke_async(PrefixArgTuple&& prefix_tuple,
                           std::index_sequence<PrefixArgIndices...>,
                           InputType const& input,
                           OutputType& output,
                           PostfixArgTuple&& postfix_tuple,
                           std::index_sequence<PostfixArgIndices...>)
  {
    // Importing the CPO into the current namespace should unambiguously resolve
    // this call to the CPO, as opposed to resolving to the thrust:: algorithm
    // via ADL. This is verified by checking that an event is returned.
    using namespace thrust::async;
    thrust::device_event e =
      exclusive_scan(std::get<PrefixArgIndices>(THRUST_FWD(prefix_tuple))...,
                     input.cbegin(),
                     input.cend(),
                     output.begin(),
                     std::get<PostfixArgIndices>(THRUST_FWD(postfix_tuple))...);
    return e;
  }
};

struct using_cpo
{
  template <typename PrefixArgTuple,
            std::size_t... PrefixArgIndices,
            typename InputType,
            typename OutputType,
            typename PostfixArgTuple,
            std::size_t... PostfixArgIndices>
  static auto invoke_async(PrefixArgTuple&& prefix_tuple,
                           std::index_sequence<PrefixArgIndices...>,
                           InputType const& input,
                           OutputType& output,
                           PostfixArgTuple&& postfix_tuple,
                           std::index_sequence<PostfixArgIndices...>)
  {
    // Importing the CPO into the current namespace should unambiguously resolve
    // this call to the CPO, as opposed to resolving to the thrust:: algorithm
    // via ADL. This is verified by checking that an event is returned.
    using thrust::async::exclusive_scan;
    thrust::device_event e =
      exclusive_scan(std::get<PrefixArgIndices>(THRUST_FWD(prefix_tuple))...,
                     input.cbegin(),
                     input.cend(),
                     output.begin(),
                     std::get<PostfixArgIndices>(THRUST_FWD(postfix_tuple))...);
    return e;
  }
};

} // namespace invoke_async

template <typename input_value_type,
          typename output_value_type   = input_value_type,
          typename initial_value_type  = input_value_type,
          typename alternate_binary_op = thrust::maximum<>>
struct using_namespace_invoker
    : testing::async::mixin::input::device_vector<input_value_type>
    , testing::async::mixin::output::device_vector<output_value_type>
    , testing::async::exclusive_scan::mixin::postfix_args::
        all_overloads<initial_value_type, alternate_binary_op>
    , invoke_reference::adl_host_synchronous<input_value_type, output_value_type>
    , invoke_async::using_namespace
    , testing::async::mixin::compare_outputs::assert_almost_equal_if_fp_quiet
{
  static std::string description()
  {
    return "importing async CPO with `using namespace thrust::async`";
  }
};

void test_using_namespace()
{
  using invoker = using_namespace_invoker<int>;
  testing::async::test_policy_overloads<invoker>::run(128);
}
DECLARE_UNITTEST(test_using_namespace);

template <typename input_value_type,
          typename output_value_type   = input_value_type,
          typename initial_value_type  = input_value_type,
          typename alternate_binary_op = thrust::maximum<>>
struct using_cpo_invoker
    : testing::async::mixin::input::device_vector<input_value_type>
    , testing::async::mixin::output::device_vector<output_value_type>
    , testing::async::exclusive_scan::mixin::postfix_args::
        all_overloads<initial_value_type, alternate_binary_op>
    , invoke_reference::adl_host_synchronous<input_value_type, output_value_type>
    , invoke_async::using_cpo
    , testing::async::mixin::compare_outputs::assert_almost_equal_if_fp_quiet
{
  static std::string description()
  {
    return "importing async CPO with "
           "`using namespace thrust::async::exclusive_scan`";
  }
};

void test_using_cpo()
{
  using invoker = using_cpo_invoker<int>;
  testing::async::test_policy_overloads<invoker>::run(128);
}
DECLARE_UNITTEST(test_using_cpo);

#endif // C++14