1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
//===- Synchronization.h - OpenMP synchronization utilities ------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//
#ifndef OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
#define OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
#include "Types.h"
namespace ompx {
namespace atomic {
enum OrderingTy {
relaxed = __ATOMIC_RELAXED,
aquire = __ATOMIC_ACQUIRE,
release = __ATOMIC_RELEASE,
acq_rel = __ATOMIC_ACQ_REL,
seq_cst = __ATOMIC_SEQ_CST,
};
enum MemScopeTy {
all, // All threads on all devices
device, // All threads on the device
cgroup // All threads in the contention group, e.g. the team
};
/// Atomically increment \p *Addr and wrap at \p V with \p Ordering semantics.
uint32_t inc(uint32_t *Addr, uint32_t V, OrderingTy Ordering,
MemScopeTy MemScope = MemScopeTy::all);
/// Atomically perform <op> on \p V and \p *Addr with \p Ordering semantics. The
/// result is stored in \p *Addr;
/// {
#define ATOMIC_COMMON_OP(TY) \
TY add(TY *Addr, TY V, OrderingTy Ordering); \
TY mul(TY *Addr, TY V, OrderingTy Ordering); \
TY load(TY *Addr, OrderingTy Ordering); \
void store(TY *Addr, TY V, OrderingTy Ordering); \
bool cas(TY *Addr, TY ExpectedV, TY DesiredV, OrderingTy OrderingSucc, \
OrderingTy OrderingFail);
#define ATOMIC_FP_ONLY_OP(TY) \
TY min(TY *Addr, TY V, OrderingTy Ordering); \
TY max(TY *Addr, TY V, OrderingTy Ordering);
#define ATOMIC_INT_ONLY_OP(TY) \
TY min(TY *Addr, TY V, OrderingTy Ordering); \
TY max(TY *Addr, TY V, OrderingTy Ordering); \
TY bit_or(TY *Addr, TY V, OrderingTy Ordering); \
TY bit_and(TY *Addr, TY V, OrderingTy Ordering); \
TY bit_xor(TY *Addr, TY V, OrderingTy Ordering);
#define ATOMIC_FP_OP(TY) \
ATOMIC_FP_ONLY_OP(TY) \
ATOMIC_COMMON_OP(TY)
#define ATOMIC_INT_OP(TY) \
ATOMIC_INT_ONLY_OP(TY) \
ATOMIC_COMMON_OP(TY)
// This needs to be kept in sync with the header. Also the reason we don't use
// templates here.
ATOMIC_INT_OP(int8_t)
ATOMIC_INT_OP(int16_t)
ATOMIC_INT_OP(int32_t)
ATOMIC_INT_OP(int64_t)
ATOMIC_INT_OP(uint8_t)
ATOMIC_INT_OP(uint16_t)
ATOMIC_INT_OP(uint32_t)
ATOMIC_INT_OP(uint64_t)
ATOMIC_FP_OP(float)
ATOMIC_FP_OP(double)
#undef ATOMIC_INT_ONLY_OP
#undef ATOMIC_FP_ONLY_OP
#undef ATOMIC_COMMON_OP
#undef ATOMIC_INT_OP
#undef ATOMIC_FP_OP
///}
} // namespace atomic
namespace synchronize {
/// Initialize the synchronization machinery. Must be called by all threads.
void init(bool IsSPMD);
/// Synchronize all threads in a warp identified by \p Mask.
void warp(LaneMaskTy Mask);
/// Synchronize all threads in a block and perform a fence before and after the
/// barrier according to \p Ordering. Note that the fence might be part of the
/// barrier.
void threads(atomic::OrderingTy Ordering);
/// Synchronizing threads is allowed even if they all hit different instances of
/// `synchronize::threads()`. However, `synchronize::threadsAligned()` is more
/// restrictive in that it requires all threads to hit the same instance. The
/// noinline is removed by the openmp-opt pass and helps to preserve the
/// information till then.
///{
#pragma omp begin assumes ext_aligned_barrier
/// Synchronize all threads in a block, they are reaching the same instruction
/// (hence all threads in the block are "aligned"). Also perform a fence before
/// and after the barrier according to \p Ordering. Note that the
/// fence might be part of the barrier if the target offers this.
__attribute__((noinline)) void threadsAligned(atomic::OrderingTy Ordering);
#pragma omp end assumes
///}
} // namespace synchronize
namespace fence {
/// Memory fence with \p Ordering semantics for the team.
void team(atomic::OrderingTy Ordering);
/// Memory fence with \p Ordering semantics for the contention group.
void kernel(atomic::OrderingTy Ordering);
/// Memory fence with \p Ordering semantics for the system.
void system(atomic::OrderingTy Ordering);
} // namespace fence
} // namespace ompx
#endif
|