1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
|
#ifndef __OPENCL_VERSION__
#include "header.cl"
#endif
__kernel void TEMPLATE(relu_forward,Dtype)(const int_tp n,
__global const Dtype* in,
__global Dtype* out,
Dtype negative_slope) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
out[index] = in[index] > 0 ? in[index] : in[index] * negative_slope;
}
}
__kernel void TEMPLATE(relu_backward,Dtype)(const int_tp n,
__global const Dtype* in_diff,
__global const Dtype* in_data,
__global Dtype* out_diff,
Dtype negative_slope) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
out_diff[index] = in_diff[index]
* ((in_data[index] > 0?1.0:0.0) + (in_data[index] <= 0?1.0:0.0) * negative_slope);
}
}
__kernel void TEMPLATE(tanh_forward,Dtype)(const int_tp n,
__global const Dtype* in,
__global Dtype* out) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
out[index] = tanh(in[index]);
}
}
__kernel void TEMPLATE(tanh_backward,Dtype)(const int_tp n,
__global const Dtype* in_diff,
__global const Dtype* out_data,
__global Dtype* out_diff) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
Dtype tanhx = out_data[index];
out_diff[index] = in_diff[index] * (1 - tanhx * tanhx);
}
}
__kernel void TEMPLATE(sigmoid_forward,Dtype)(const int_tp n,
__global const Dtype* in,
__global Dtype* out) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
out[index] = 1.0 / (1.0 + exp(-in[index]));
}
}
__kernel void TEMPLATE(sigmoid_backward,Dtype)(const int_tp n,
__global const Dtype* in_diff,
__global const Dtype* out_data,
__global Dtype* out_diff) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
const Dtype sigmoid_x = out_data[index];
out_diff[index] = in_diff[index] * sigmoid_x * (1 - sigmoid_x);
}
}
__kernel void TEMPLATE(threshold,Dtype)(const int_tp n, const Dtype threshold,
__global const Dtype* in,
__global Dtype* out) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
out[index] = in[index] > threshold ? 1.0 : 0.0;
}
}
__kernel void TEMPLATE(prelu_forward,Dtype)(const int_tp n, const int_tp channels,
const int_tp dim,
__global const Dtype* in,
__global Dtype* out,
__global const Dtype* slope_data,
const int_tp div_factor) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
int_tp c = (index / dim) % channels / div_factor;
out[index] = in[index] > 0 ? in[index] : in[index] * slope_data[c];
}
}
__kernel void TEMPLATE(prelu_backward,Dtype)(const int_tp n, const int_tp channels,
const int_tp dim,
__global const Dtype* in_diff,
__global const Dtype* in_data,
__global Dtype* out_diff,
__global const Dtype* slope_data,
const int_tp div_factor) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
int_tp c = (index / dim) % channels / div_factor;
out_diff[index] = in_diff[index]
* ((in_data[index] > 0?1.0:0.0) + (in_data[index] <= 0?1.0:0.0) * slope_data[c]);
}
}
__kernel void TEMPLATE(prelu_param_backward,Dtype)(const int_tp n, const int_tp rows,
const int_tp rowPitch,
__global const Dtype* in_diff,
__global const Dtype* in_data,
__global Dtype* out_diff) {
for (int_tp index = get_global_id(0); index < n; index += get_global_size(0)) {
out_diff[index] = in_diff[index] * in_data[index] * (in_data[index] <= 0?1.0:0.0);
for (int k = 1; k < rows; k++) {
out_diff[index] += in_diff[index + k * rowPitch]
* in_data[index + k * rowPitch]
* (in_data[index + k * rowPitch] <= 0?1.0:0.0);
}
}
}
|