File: printf_not_cm_common.h

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (217 lines) | stat: -rw-r--r-- 7,755 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
/*========================== begin_copyright_notice ============================

Copyright (C) 2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#ifndef VC_BIF_PRINTF_NOT_CM_COMMON_H
#define VC_BIF_PRINTF_NOT_CM_COMMON_H

#include <cm-cl/atomic.h>
#include <cm-cl/vector.h>
#include <opencl_def.h>

#include "vc/BiF/PrintfIface.h"

using namespace vc::bif::printf;
using namespace cm;

// Currently the max format string length supported by runtime.
static inline constexpr int MaxFormatStrSize = 16 * 1024;
// Number of vector elements for current address storage. Address is always
// stored as 64-bit value split into 2 parts (32-bit pointers are zext).
static inline constexpr int AddressVectorWidth = 2;

namespace TransferDataLayout {
enum Enum {
  // Indices:
  CurAddressLow,
  CurAddressHigh,
  ReturnValue,
};
} // namespace TransferDataLayout

using BufferElementTy = unsigned;
static inline constexpr int ArgHeaderSize = sizeof(BufferElementTy);

// StringAnnotationSize defines how much space in bytes is required to write
// a string to the prinf buffer. For ocl it is the size of a string index, for
// ze - the size of a string pointer.
template <int StringAnnotationSize>
inline int calcRequiredBufferSize(vector<int, ArgsInfoVector::Size> ArgsInfo) {
  int Num32BitArgs =
      ArgsInfo[ArgsInfoVector::NumTotal] - ArgsInfo[ArgsInfoVector::Num64Bit] -
      ArgsInfo[ArgsInfoVector::NumStr] - ArgsInfo[ArgsInfoVector::NumPtr];
  // Note that pointers are always passed as 64-bit values
  // (32-bit ones are zext).
  int Num64BitArgs =
      ArgsInfo[ArgsInfoVector::Num64Bit] + ArgsInfo[ArgsInfoVector::NumPtr];
  int BufferSize = StringAnnotationSize +
                   ArgsInfo[ArgsInfoVector::NumTotal] * ArgHeaderSize +
                   ArgsInfo[ArgsInfoVector::NumStr] * StringAnnotationSize +
                   Num32BitArgs * sizeof(int32_t) +
                   Num64BitArgs * sizeof(int64_t);
  return BufferSize;
}

// Return initial buffer offset in BufferElementTy elements (not in bytes).
static inline BufferElementTy
getInitialBufferOffset(__global BufferElementTy *BufferPtr,
                       BufferElementTy RequiredSize) {
#if __clang_major__ > 9
  int ByteOffset =
      atomic::execute<atomic::operation::add, memory_order_relaxed,
                      memory_scope_all_devices>(BufferPtr, RequiredSize);
#else  // __clang_major__ > 9
  // Helping clang-9 correctly deduce the argument type.
  int ByteOffset =
      atomic::execute<atomic::operation::add, memory_order_relaxed,
                      memory_scope_all_devices, __global BufferElementTy>(
          BufferPtr, RequiredSize);
#endif // __clang_major__ > 9
  return ByteOffset / sizeof(BufferElementTy);
}

template <typename T>
static vector<BufferElementTy, AddressVectorWidth> castPointerToVector(T *Ptr) {
  vector<uint64_t, 1> Tmp = reinterpret_cast<uintptr_t>(Ptr);
  return Tmp.format<BufferElementTy>();
}

// A helper function to properly set CurAddressLow and CurAddressHigh
// elements of \p TransferData vector by the provided \p Ptr.
static inline void
setCurAddress(vector<BufferElementTy, TransferDataSize> &TransferData,
              __global BufferElementTy *Ptr) {
  TransferData.select<AddressVectorWidth, 1>(
      TransferDataLayout::CurAddressLow) = castPointerToVector(Ptr);
}

// A helper function to properly extract current address from \p TransferData.
static inline __global BufferElementTy *
getCurAddress(vector<BufferElementTy, TransferDataSize> TransferData) {
  vector<BufferElementTy, AddressVectorWidth> Address =
      TransferData.select<AddressVectorWidth, 1>(
          TransferDataLayout::CurAddressLow);
  // Bit-casting to 64-bit int and then truncating if necessary.
  return reinterpret_cast<__global BufferElementTy *>(
      static_cast<uintptr_t>(Address.format<uint64_t>()));
}

static inline vector<BufferElementTy, TransferDataSize>
generateTransferData(__global BufferElementTy *InitPtr,
                     BufferElementTy ReturnValue) {
  vector<BufferElementTy, TransferDataSize> TransferData;
  setCurAddress(TransferData, InitPtr);
  TransferData[TransferDataLayout::ReturnValue] = ReturnValue;
  return TransferData;
}

// Printf initial routines. The function gets printf buffer and allocates
// space in it. It needs some info about args to allocate enough space.
template <int StringAnnotationSize>
vector<BufferElementTy, TransferDataSize>
printf_init_impl(vector<int, ArgsInfoVector::Size> ArgsInfo) {
  auto FmtStrSize = ArgsInfo[ArgsInfoVector::FormatStrSize];
  if (FmtStrSize > MaxFormatStrSize)
    return generateTransferData(/* BufferPtr */ nullptr, /* ReturnValue */ -1);
  auto BufferSize = calcRequiredBufferSize<StringAnnotationSize>(ArgsInfo);
#if __clang_major__ > 9
  auto *BufferPtr =
      static_cast<__global BufferElementTy *>(cm::detail::printf_buffer());
#else  // __clang_major__ > 9
  // clang-9 cannot handle this auto.
  __global BufferElementTy *BufferPtr =
      static_cast<__global BufferElementTy *>(cm::detail::printf_buffer());
#endif // __clang_major__ > 9
  auto Offset = getInitialBufferOffset(BufferPtr, BufferSize);
  return generateTransferData(BufferPtr + Offset, /* ReturnValue */ 0);
}

// Writes \p Data to printf buffer via \p CurAddress pointer.
// Returns promoted pointer.
static inline __global BufferElementTy *
writeElementToBuffer(__global BufferElementTy *CurAddress,
                     BufferElementTy Data) {
  *CurAddress = Data;
  return ++CurAddress;
}

// ArgCode is written into printf buffer before every argument.
namespace ArgCode {
enum Enum {
  Invalid,
  Byte,
  Short,
  Int,
  Float,
  String,
  Long,
  Pointer,
  Double,
  VectorByte,
  VectorShort,
  VectorInt,
  VectorLong,
  VectorFloat,
  VectorDouble,
  Size
};
} // namespace ArgCode

namespace ArgInfo {
enum Enum { Code, NumDWords, Size };
} // namespace ArgInfo

// StringArgSize is in DWords.
template <int StringArgSize>
inline vector<BufferElementTy, ArgInfo::Size> getArgInfo(ArgKind::Enum Kind) {
  using RetInitT = cl_vector<BufferElementTy, ArgInfo::Size>;
  switch (Kind) {
  case ArgKind::Char:
  case ArgKind::Short:
  case ArgKind::Int:
    return RetInitT{ArgCode::Int, 1};
  case ArgKind::Long:
    return RetInitT{ArgCode::Long, 2};
  case ArgKind::Float:
    return RetInitT{ArgCode::Float, 1};
  case ArgKind::Double:
    return RetInitT{ArgCode::Double, 2};
  case ArgKind::Pointer:
    return RetInitT{ArgCode::Pointer, 2};
  case ArgKind::String:
    return RetInitT{ArgCode::String, StringArgSize};
  default:
    return RetInitT{ArgCode::Invalid, 0};
  }
}

// Single printf arg handling (those that are after format string).
// StringArgSize is in DWords.
template <int StringArgSize>
inline vector<BufferElementTy, TransferDataSize>
printf_arg_impl(vector<BufferElementTy, TransferDataSize> TransferData,
                ArgKind::Enum Kind,
                vector<BufferElementTy, ArgData::Size> Arg) {
  if (TransferData[TransferDataLayout::ReturnValue])
    // Just skip.
    return TransferData;
  vector<BufferElementTy, ArgInfo::Size> Info = getArgInfo<StringArgSize>(Kind);
  __global BufferElementTy *CurAddress = getCurAddress(TransferData);
  CurAddress = writeElementToBuffer(CurAddress, Info[ArgInfo::Code]);
  for (int Idx = 0; Idx != Info[ArgInfo::NumDWords]; ++Idx)
    CurAddress = writeElementToBuffer(CurAddress, Arg[Idx]);
  setCurAddress(TransferData, CurAddress);
  return TransferData;
}

// Getting printf return value here.
static inline int
printf_ret_impl(vector<BufferElementTy, TransferDataSize> TransferData) {
  return TransferData[TransferDataLayout::ReturnValue];
}

#endif // VC_BIF_PRINTF_NOT_CM_COMMON_H