File: sycl_reduce.cpp

package info (click to toggle)
taskflow 3.9.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 45,948 kB
  • sloc: cpp: 39,058; xml: 35,572; python: 12,935; javascript: 1,732; makefile: 59; sh: 16
file content (62 lines) | stat: -rw-r--r-- 1,402 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
// This program demonstrates how to performs a parallel reduction
// using syclFlow.

#include <taskflow/sycl/syclflow.hpp>
#include <taskflow/sycl/algorithm/reduce.hpp>

int main(int argc, char* argv[]) {

  if(argc != 2) {
    std::cerr << "usage: ./sycl_reduce num_items\n";
    std::exit(EXIT_FAILURE);
  }

  size_t N = std::atoi(argv[1]);

  sycl::queue queue;

  auto data = sycl::malloc_shared<int>(N, queue);
  auto res1 = sycl::malloc_shared<int>(1, queue);
  auto res2 = sycl::malloc_shared<int>(1, queue);
  auto hres = 0;

  // initialize the data
  for(size_t i=0; i<N; i++) {
    data[i] = ::rand()%100;
    hres += data[i];
  }
  *res1 = 10;
  *res2 = 10;

  tf::syclDefaultExecutionPolicy policy(queue);

  tf::sycl_reduce(policy, data, data+N, res1, [](int a, int b){ return a+b; });

  //// perform reduction
  //tf::syclFlow syclflow(queue);
  //
  //// res1 = res1 + data[0] + data[1] + ...
  //syclflow.reduce(
  //  data, data+N, res1, [](int a, int b){ return a+b; }
  //);
  //
  //// res2 = data[0] + data[1] + data[2] + ...
  //syclflow.uninitialized_reduce(
  //  data, data+N, res2, [](int a, int b){ return a+b; }
  //);

  //syclflow.offload();
  //
  //// inspect
  //if(hres + 10 != *res1 || hres != *res2) {
  //  throw std::runtime_error("incorrect result");
  //}
  //
  printf("hres=%d res1=%d\n", hres, *res1);

  std::cout << "correct result\n";

  return 0;
}