File: copy_buffer_to_buffer.builtin_kernel

package info (click to toggle)
intel-compute-runtime 26.05.37020.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 83,596 kB
  • sloc: cpp: 976,037; lisp: 2,096; sh: 704; makefile: 162
file content (132 lines) | stat: -rw-r--r-- 3,934 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/*
 * Copyright (C) 2020-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

R"===(

#define ALIGNED4(ptr) __builtin_assume(((size_t)ptr&0b11) == 0)

__kernel void CopyBufferToBufferBytes(
    const __global uchar* pSrc,
    __global uchar* pDst,
    offset_t srcOffsetInBytes,
    offset_t dstOffsetInBytes,
    offset_t bytesToRead )
{
    ALIGNED4(pSrc);
    ALIGNED4(pDst);
    pSrc += ( srcOffsetInBytes + get_global_id(0) );
    pDst += ( dstOffsetInBytes + get_global_id(0) );
    pDst[ 0 ] = pSrc[ 0 ];
}

__kernel void CopyBufferToBufferLeftLeftover(
    const __global uchar* pSrc,
    __global uchar* pDst,
    offset_t srcOffsetInBytes,
    offset_t dstOffsetInBytes)
{
    ALIGNED4(pSrc);
    ALIGNED4(pDst);
    idx_t gid = get_global_id(0);
    pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
}

__kernel void CopyBufferToBufferMiddle(
    const __global uint* pSrc,
    __global uint* pDst,
    offset_t srcOffsetInBytes,
    offset_t dstOffsetInBytes)
{
    ALIGNED4(pSrc);
    ALIGNED4(pDst);
    idx_t gid = get_global_id(0);
    pDst += dstOffsetInBytes >> 2;
    pSrc += srcOffsetInBytes >> 2;
    uint4 loaded = vload4(gid, pSrc);
    vstore4(loaded, gid, pDst);
}

__kernel void CopyBufferToBufferMiddleMisaligned(
    __global const uint* pSrc,
     __global uint* pDst,
     offset_t srcOffsetInBytes,
     offset_t dstOffsetInBytes,
     uint misalignmentInBits)
{
    ALIGNED4(pSrc);
    ALIGNED4(pDst);
    idx_t gid = get_global_id(0);
    pDst += dstOffsetInBytes >> 2;
    pSrc += srcOffsetInBytes >> 2;
    const uint4 src0 = vload4(gid, pSrc);
    const uint4 src1 = vload4((gid + 1), pSrc);

    uint4 result;
    result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits));
    result.y = (src0.y >> misalignmentInBits) | (src0.z << (32 - misalignmentInBits));
    result.z = (src0.z >> misalignmentInBits) | (src0.w << (32 - misalignmentInBits));
    result.w = (src0.w >> misalignmentInBits) | (src1.x << (32 - misalignmentInBits));
    vstore4(result, gid, pDst);
}

__kernel void CopyBufferToBufferRightLeftover(
    const __global uchar* pSrc,
    __global uchar* pDst,
    offset_t srcOffsetInBytes,
    offset_t dstOffsetInBytes)
{
    ALIGNED4(pSrc);
    ALIGNED4(pDst);
    idx_t gid = get_global_id(0);
    pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
}

__kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) {
    ALIGNED4(dst);
    ALIGNED4(src);
    idx_t gid = get_global_id(0);
    dst[gid] = (uchar)(src[gid]);
}

__kernel void CopyBufferToBufferSideRegion(
    __global uchar* pDst,
    const __global uchar* pSrc,
    idx_t len,
    offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
    offset_t srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
    )
{
    ALIGNED4(pSrc);
    ALIGNED4(pDst);
    idx_t gid = get_global_id(0);
    __global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset);
    __global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset);
    if (gid < len) {
        pDstWithOffset[ gid ] = pSrcWithOffset[ gid ];
    }
}

__kernel void CopyBufferToBufferMiddleRegion(
    __global uint* pDst,
    const __global uint* pSrc,
    idx_t elems,
    offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
    offset_t srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
    )
{
    ALIGNED4(pSrc);
    ALIGNED4(pDst);
    idx_t gid = get_global_id(0);
    __global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset);
    __global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset);
    if (gid < elems) {
        uint4 loaded = vload4(gid, pSrcWithOffset);
        vstore4(loaded, gid, pDstWithOffset);
    }
}

)==="