1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
|
#ifndef __OPENCL_VERSION__
#include "header.cl"
#endif
__kernel void TEMPLATE(mul,Dtype)(const int_tp n, __global const Dtype* a,
const int_tp offa,
__global Dtype* b,
const int_tp offb, __global Dtype* y,
const int_tp offy) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
y[index + offy] = a[index + offa] * b[index + offb];
}
}
__kernel void TEMPLATE(div,Dtype)(const int_tp n, __global const Dtype* a,
const int_tp offa,
__global Dtype* b,
const int_tp offb, __global Dtype* y,
const int_tp offy) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
y[index + offy] = a[index + offa] / b[index + offb];
}
}
__kernel void TEMPLATE(add_scalar,Dtype)(const int_tp N, const Dtype alpha,
__global Dtype* Y,
const int_tp offY) {
for (int_tp index = get_global_id(0); index < N; index += get_global_size(0)) {
Y[offY + index] += alpha;
}
}
__kernel void TEMPLATE(add,Dtype)(const int_tp n, __global const Dtype* a,
const int_tp offa, __global const Dtype* b,
const int_tp offb, __global Dtype* y,
const int_tp offy) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
y[offy + index] = a[offa + index] + b[offb + index];
}
}
__kernel void TEMPLATE(sub,Dtype)(const int_tp n, __global const Dtype* a,
const int_tp offa, __global const Dtype* b,
const int_tp offb, __global Dtype* y,
const int_tp offy) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
y[offy + index] = a[offa + index] - b[offb + index];
}
}
__kernel void TEMPLATE(abs,Dtype)(const int_tp n, __global const Dtype* a,
const int_tp offa, __global Dtype* y,
const int_tp offy) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
y[offy + index] = fabs((Dtype)(a[offa + index]));
}
}
__kernel void TEMPLATE(exp,Dtype)(const int_tp n, __global const Dtype* a,
const int_tp offa, __global Dtype* y,
const int_tp offy) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
y[offy + index] = exp(a[offa + index]);
}
}
__kernel void TEMPLATE(log,Dtype)(const int_tp n, __global const Dtype* a,
const int_tp offa, __global Dtype* y,
const int_tp offy) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
y[offy + index] = log((Dtype)(a[offa + index]));
}
}
__kernel void TEMPLATE(powx,Dtype)(const int_tp n, __global const Dtype* a,
const int_tp offa, Dtype alpha,
__global Dtype* y,
const int_tp offy) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
if(alpha == 2.0) {
y[offy + index] = pow((Dtype)fabs(a[offa + index]), (Dtype)alpha);
} else {
y[offy + index] = pow((Dtype)a[offa + index], (Dtype)alpha);
}
}
}
__kernel void TEMPLATE(sign,Dtype)(const int_tp n, __global const Dtype* x,
const int_tp offx, __global Dtype* y,
const int_tp offy) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
y[index + offy] = (0.0 < x[index + offx])
- (x[index + offx] < 0.0);
}
}
__kernel void TEMPLATE(sgnbit,Dtype)(const int_tp n, __global const Dtype* x,
const int_tp offx, __global Dtype* y,
const int_tp offy) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
y[index + offy] = signbit(x[index + offx]);
}
}
|