File: sycl_saxpy.cpp

package info (click to toggle)
taskflow 3.9.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 45,948 kB
  • sloc: cpp: 39,058; xml: 35,572; python: 12,935; javascript: 1,732; makefile: 59; sh: 16
file content (63 lines) | stat: -rw-r--r-- 1,436 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
// This program demonstrates how to create a simple SAXPY
// ("single-precision AX+Y") task graph using syclFlow.

#include <taskflow/taskflow.hpp>
#include <taskflow/sycl/syclflow.hpp>

constexpr size_t N = 1000000;

int main() {

  tf::Executor executor;
  tf::Taskflow taskflow("saxpy example");

  sycl::queue queue;

  // allocate shared memory
  auto X = sycl::malloc_shared<float>(N, queue);
  auto Y = sycl::malloc_shared<float>(N, queue);

  // create a syclFlow to perform the saxpy operation
  taskflow.emplace_on([&](tf::syclFlow& sf){

    tf::syclTask fillX = sf.fill(X, 1.0f, N).name("fillX");
    tf::syclTask fillY = sf.fill(Y, 2.0f, N).name("fillY");

    tf::syclTask saxpy = sf.parallel_for(sycl::range<1>(N),
      [=] (sycl::id<1> id) {
        X[id] = 3.0f * X[id] + Y[id];
      }
    ).name("saxpy");

    saxpy.succeed(fillX, fillY);

  }, queue).name("syclFlow");

  // dump the graph without detailed syclFlow connections
  taskflow.dump(std::cout);
  
  // run the taskflow
  executor.run(taskflow).wait();

  // dump the graph with all syclFlow details (after executed)
  taskflow.dump(std::cout);

  // verify the result
  for(size_t i=0; i<N; i++) {
    if(std::fabs(X[i]-5.0f) >= 1e-4) {
      throw std::runtime_error("incorrect saxpy result (expected 5.0f)");
    }
  }

  std::cout << "correct saxpy result\n";

  // free the memory
  sycl::free(X, queue);
  sycl::free(Y, queue);

  return 0;
}