1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
#include "caffe2/utils/math/broadcast.h"
#include "caffe2/core/context.h"
#include "caffe2/utils/eigen_utils.h"
namespace caffe2 {
namespace math {
bool can_use_broadcast_fastpath(int ndim, const int* dims) {
int index_of_last_singleton = -1;
int index_of_first_non_singleton = ndim;
for (int i = 0; i < ndim; i++) {
if (dims[i] == 1) {
index_of_last_singleton = i;
} else if (index_of_first_non_singleton == ndim) {
index_of_first_non_singleton = i;
}
}
return index_of_last_singleton < index_of_first_non_singleton;
}
#define CAFFE2_SPECIALIZED_AFFINE_CHANNEL(T) \
template <> \
C10_EXPORT void AffineChannel<T, CPUContext, StorageOrder::NCHW>( \
const int N, \
const int C, \
const int HxW, \
const T* X, \
const T* scale, \
const T* bias, \
T* Y, \
CPUContext* /* context */) { \
ConstEigenVectorArrayMap<T> scale_arr(scale, C); \
ConstEigenVectorArrayMap<T> bias_arr(bias, C); \
const int stride = C * HxW; \
const T* X_ptr = X; \
T* Y_ptr = Y; \
for (int i = 0; i < N; ++i) { \
EigenArrayMap<T>(Y_ptr, HxW, C) = \
(ConstEigenArrayMap<T>(X_ptr, HxW, C).rowwise() * \
scale_arr.transpose()) \
.rowwise() + \
bias_arr.transpose(); \
X_ptr += stride; \
Y_ptr += stride; \
} \
} \
template <> \
C10_EXPORT void AffineChannel<T, CPUContext, StorageOrder::NHWC>( \
const int N, \
const int C, \
const int HxW, \
const T* X, \
const T* scale, \
const T* bias, \
T* Y, \
CPUContext* /* context */) { \
EigenArrayMap<T>(Y, C, N * HxW) = \
(ConstEigenArrayMap<T>(X, C, N * HxW).colwise() * \
ConstEigenVectorArrayMap<T>(scale, C)) \
.colwise() + \
ConstEigenVectorArrayMap<T>(bias, C); \
}
CAFFE2_SPECIALIZED_AFFINE_CHANNEL(float)
#undef CAFFE2_SPECIALIZED_AFFINE_CHANNEL
} // namespace math
} // namespace caffe2
|