File: barrier.cpp

package info (click to toggle)
intel-graphics-compiler2 2.22.3-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 107,676 kB
  • sloc: cpp: 809,645; lisp: 288,070; ansic: 16,397; python: 4,010; yacc: 2,588; lex: 1,666; pascal: 314; sh: 186; makefile: 38
file content (133 lines) | stat: -rw-r--r-- 4,017 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*========================== begin_copyright_notice ============================

Copyright (C) 2021-2024 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#include <cm-cl/exec.h>

extern "C" {
#include "spirv_atomics_common.h"
}

using namespace cm;
using namespace exec;
using namespace detail;

template <memory_scope OCLScope>
static CM_NODEBUG CM_INLINE void spirvFenceHelperWithKnownScope(int semantics) {
  switch (semantics) {
  default:
  case SequentiallyConsistent:
    return fence<memory_order_seq_cst, OCLScope>();
  case Relaxed:
    return;
  case Acquire:
    return fence<memory_order_acquire, OCLScope>();
  case Release:
    return fence<memory_order_release, OCLScope>();
  case AcquireRelease:
    return fence<memory_order_acq_rel, OCLScope>();
  }
}

static CM_NODEBUG CM_INLINE void spirvFenceHelper(int scope, int semantics) {
  switch (scope) {
  default:
  case CrossDevice:
    return spirvFenceHelperWithKnownScope<memory_scope_all_devices>(semantics);
  case Device:
    return spirvFenceHelperWithKnownScope<memory_scope_device>(semantics);
  case Workgroup:
    return spirvFenceHelperWithKnownScope<memory_scope_work_group>(semantics);
  case Subgroup:
    return spirvFenceHelperWithKnownScope<memory_scope_sub_group>(semantics);
  case Invocation:
    return spirvFenceHelperWithKnownScope<memory_scope_work_item>(semantics);
  }
}

CM_NODEBUG CM_INLINE void __spirv_MemoryBarrier(int scope, int semantics) {
  spirvFenceHelper(scope, semantics);
}

static CM_NODEBUG CM_INLINE void local_barrier() { __cm_cl_barrier(); }

static CM_NODEBUG CM_INLINE void global_barrier() {
  fence<memory_order_acq_rel, memory_scope_device>();
  local_barrier();

  __global uint8_t *sync_buff = sync_buffer();

  bool is_first_item =
      (get_local_id(0) | get_local_id(1) | get_local_id(2)) == 0;

  uint32_t group_id = get_group_linear_id();

  // Signal that a group hit the global barrier.
  if (is_first_item) {
    sync_buff[group_id] = 1;
    fence<memory_order_release, memory_scope_device>();
  }

  uint32_t num_groups = get_group_linear_count();

  // The last group controls that the others hit
  // the global barrier.
  if (group_id == (num_groups - 1)) {
    uint32_t local_size = get_local_linear_size();
    uint8_t Value;
    do {
      fence<memory_order_acquire, memory_scope_device>();
      Value = 1;
      for (uint32_t local_id = get_local_linear_id(); local_id < num_groups;
           local_id += local_size)
        Value = Value & sync_buff[local_id];
    } while (Value == 0);

    fence<memory_order_acq_rel, memory_scope_device>();
    local_barrier();

    // Global barrier is complete.
    for (uint32_t local_id = get_local_linear_id(); local_id < num_groups;
         local_id += local_size)
      sync_buff[local_id] = 0;
    fence<memory_order_release, memory_scope_device>();
  }

  // The first items wait for the last group.
  if (is_first_item)
    while (sync_buff[group_id] != 0)
      fence<memory_order_acquire, memory_scope_device>();

  // Other items wait for the first ones.
  fence<memory_order_acq_rel, memory_scope_device>();
  local_barrier();
}

CM_NODEBUG CM_INLINE void __spirv_ControlBarrier(int scope, int memory_scope,
                                                 int memory_semantics) {
  if (scope == Workgroup) {
    spirvFenceHelper(memory_scope, memory_semantics);
    local_barrier();
  } else if (scope == Device)
    global_barrier();
}

CM_NODEBUG CM_INLINE void
__spirv_ControlBarrierArriveINTEL(int scope, int memory_scope,
                                  int memory_semantics) {
  spirvFenceHelper(memory_scope, memory_semantics);
  if (scope == Workgroup)
    __cm_cl_sbarrier(1);
}

CM_NODEBUG CM_INLINE void
__spirv_ControlBarrierWaitINTEL(int scope, int memory_scope,
                                int memory_semantics) {
  spirvFenceHelper(memory_scope, memory_semantics);
  if (scope == Workgroup)
    __cm_cl_sbarrier(0);
}