1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
|
/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
#define ALIGNED4(ptr) __builtin_assume(((size_t)ptr&0b11) == 0)
// assumption is local work size = pattern size
__kernel void FillBufferBytes(
__global uchar* pDst,
uint dstOffsetInBytes,
const __global uchar* pPattern )
{
ALIGNED4(pDst);
ALIGNED4(pPattern);
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
uint srcIndex = get_local_id(0);
pDst[dstIndex] = pPattern[srcIndex];
}
__kernel void FillBufferLeftLeftover(
__global uchar* pDst,
uint dstOffsetInBytes,
const __global uchar* pPattern,
const uint patternSizeInEls )
{
ALIGNED4(pDst);
ALIGNED4(pPattern);
uint gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferMiddle(
__global uchar* pDst,
uint dstOffsetInBytes,
const __global uint* pPattern,
const uint patternSizeInEls )
{
ALIGNED4(pDst);
ALIGNED4(pPattern);
uint gid = get_global_id(0);
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferRightLeftover(
__global uchar* pDst,
uint dstOffsetInBytes,
const __global uchar* pPattern,
const uint patternSizeInEls )
{
ALIGNED4(pDst);
ALIGNED4(pPattern);
uint gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferImmediate(
__global uchar* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
ALIGNED4(ptr);
uint gid = get_global_id(0);
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
dstPtr[gid] = value;
}
__kernel void FillBufferImmediateLeftOver(
__global uchar* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
ALIGNED4(ptr);
uint gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
}
__kernel void FillBufferSSHOffset(
__global uchar* ptr,
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const __global uchar* pPattern,
uint patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
)
{
ALIGNED4(ptr);
ALIGNED4(pPattern);
uint dstIndex = get_global_id(0);
uint srcIndex = get_local_id(0);
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
__global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset;
pDst[dstIndex] = pSrc[srcIndex];
}
)==="
|