1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
|
#ifndef SAS_KERNEL_HDR
#define SAS_KERNEL_HDR
#define FLOAT_SIZE 8
#ifdef __OPENCL_VERSION__
# define USE_OPENCL
#elif defined(__CUDACC__)
# define USE_CUDA
#elif defined(_OPENMP)
# define USE_OPENMP
#endif
// Use SAS_DOUBLE to force the use of double even for float kernels
#define SAS_DOUBLE double
// If opencl is not available, then we are compiling a C function
// Note: if using a C++ compiler, then define kernel as extern "C"
#ifdef USE_OPENCL
#define USE_GPU
#define pglobal global
#define pconstant constant
typedef int int32_t;
#if defined(USE_SINCOS)
# define SINCOS(angle,svar,cvar) svar=sincos(angle,&cvar)
#else
# define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
#endif
// Intel CPU on Mac gives strange values for erf(); on the verified
// platforms (intel, nvidia, amd), the cephes erf() is significantly
// faster than that available in the native OpenCL.
#define NEED_ERF
// OpenCL only has type generic math
#define expf exp
#ifndef NEED_ERF
# define erff erf
# define erfcf erfc
#endif
#elif defined(USE_CUDA)
#define USE_GPU
#define local __shared__
#define pglobal
#define constant __constant__
#define pconstant const
#define kernel extern "C" __global__
// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
// OpenCL pown(a,b) = C99 pow(a,b), b integer
#define powr(a,b) pow(a,b)
#define pown(a,b) pow(a,b)
//typedef int int32_t;
#if defined(USE_SINCOS)
# define SINCOS(angle,svar,cvar) sincos(angle,&svar,&cvar)
#else
# define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
#endif
#else // !USE_OPENCL && !USE_CUDA
#define local
#define pglobal
#define constant const
#define pconstant const
#ifdef __cplusplus
#include <cstdio>
#include <cmath>
using namespace std;
#if defined(_MSC_VER)
#include <limits>
#include <float.h>
#define kernel extern "C" __declspec( dllexport )
inline double trunc(double x) { return x>=0?floor(x):-floor(-x); }
inline double fmin(double x, double y) { return x>y ? y : x; }
inline double fmax(double x, double y) { return x<y ? y : x; }
#define isnan(x) _isnan(x)
#define isinf(x) (!_finite(x))
#define isfinite(x) _finite(x)
#define NAN (std::numeric_limits<double>::quiet_NaN()) // non-signalling NaN
#define INFINITY (std::numeric_limits<double>::infinity())
#define NEED_ERF
#define NEED_EXPM1
#define NEED_TGAMMA
#else
#define kernel extern "C"
#include <cstdint>
#endif
inline void SINCOS(double angle, double &svar, double &cvar) { svar=sin(angle); cvar=cos(angle); }
#else // !__cplusplus
#include <inttypes.h> // C99 guarantees that int32_t types is here
#include <stdio.h>
#if defined(__TINYC__)
typedef int int32_t;
#include <math.h>
// TODO: check isnan is correct
inline double _isnan(double x) { return x != x; } // hope this doesn't optimize away!
#undef isnan
#define isnan(x) _isnan(x)
// Defeat the double->float conversion since we don't have tgmath
inline SAS_DOUBLE trunc(SAS_DOUBLE x) { return x>=0?floor(x):-floor(-x); }
inline SAS_DOUBLE fmin(SAS_DOUBLE x, SAS_DOUBLE y) { return x>y ? y : x; }
inline SAS_DOUBLE fmax(SAS_DOUBLE x, SAS_DOUBLE y) { return x<y ? y : x; }
#define NEED_ERF
#define NEED_EXPM1
#define NEED_TGAMMA
#define NEED_CBRT
// expf missing from windows?
#define expf exp
#else
#include <tgmath.h> // C99 type-generic math, so sin(float) => sinf
#endif
// MSVC doesn't support C99, so no need for dllexport on C99 branch
#define kernel
#define SINCOS(angle,svar,cvar) do {const double _t_=angle; svar=sin(_t_);cvar=cos(_t_);} while (0)
#endif // !__cplusplus
// OpenCL powr(a,b) = C99 pow(a,b), b >= 0
// OpenCL pown(a,b) = C99 pow(a,b), b integer
#define powr(a,b) pow(a,b)
#define pown(a,b) pow(a,b)
#endif // !USE_OPENCL
#if defined(NEED_CBRT)
#define cbrt(_x) pow(_x, 0.33333333333333333333333)
#endif
#if defined(NEED_EXPM1)
// TODO: precision is a half digit lower than numpy on mac in [1e-7, 0.5]
// Run "explore/precision.py sas_expm1" to see this (may have to fiddle
// the xrange for log to see the complete range).
static SAS_DOUBLE expm1(SAS_DOUBLE x_in) {
double x = (double)x_in; // go back to float for single precision kernels
// Adapted from the cephes math library.
// Copyright 1984 - 1992 by Stephen L. Moshier
if (x != x || x == 0.0) {
return x; // NaN and +/- 0
} else if (x < -0.5 || x > 0.5) {
return exp(x) - 1.0;
} else {
const double xsq = x*x;
const double p = (((
+1.2617719307481059087798E-4)*xsq
+3.0299440770744196129956E-2)*xsq
+9.9999999999999999991025E-1);
const double q = ((((
+3.0019850513866445504159E-6)*xsq
+2.5244834034968410419224E-3)*xsq
+2.2726554820815502876593E-1)*xsq
+2.0000000000000000000897E0);
double r = x * p;
r = r / (q - r);
return r+r;
}
}
#endif
// Standard mathematical constants:
// M_E, M_LOG2E, M_LOG10E, M_LN2, M_LN10, M_PI, M_PI_2=pi/2, M_PI_4=pi/4,
// M_1_PI=1/pi, M_2_PI=2/pi, M_2_SQRTPI=2/sqrt(pi), SQRT2, SQRT1_2=sqrt(1/2)
// OpenCL defines M_constant_F for float constants, and nothing if double
// is not enabled on the card, which is why these constants may be missing
#ifndef M_PI
# define M_PI 3.141592653589793
#endif
#ifndef M_PI_2
# define M_PI_2 1.570796326794897
#endif
#ifndef M_PI_4
# define M_PI_4 0.7853981633974483
#endif
#ifndef M_E
# define M_E 2.718281828459045091
#endif
#ifndef M_SQRT1_2
# define M_SQRT1_2 0.70710678118654746
#endif
// Non-standard function library
// pi/180, used for converting between degrees and radians
// 4/3 pi for computing sphere volumes
// square and cube for computing squares and cubes
#ifndef M_PI_180
# define M_PI_180 0.017453292519943295
#endif
#ifndef M_4PI_3
# define M_4PI_3 4.18879020478639
#endif
double square(double x) { return x*x; }
double cube(double x) { return x*x*x; }
double sas_sinx_x(double x) { return x==0 ? 1.0 : sin(x)/x; }
// CRUFT: support old style models with orientation received qx, qy and angles
// To rotate from the canonical position to theta, phi, psi, first rotate by
// psi about the major axis, oriented along z, which is a rotation in the
// detector plane xy. Next rotate by theta about the y axis, aligning the major
// axis in the xz plane. Finally, rotate by phi in the detector plane xy.
// To compute the scattering, undo these rotations in reverse order:
// rotate in xy by -phi, rotate in xz by -theta, rotate in xy by -psi
// The returned q is the length of the q vector and (xhat, yhat, zhat) is a unit
// vector in the q direction.
// To change between counterclockwise and clockwise rotation, change the
// sign of phi and psi.
#if 1
//think cos(theta) should be sin(theta) in new coords, RKH 11Jan2017
#define ORIENT_SYMMETRIC(qx, qy, theta, phi, q, sn, cn) do { \
SINCOS(phi*M_PI_180, sn, cn); \
q = sqrt(qx*qx + qy*qy); \
cn = (q==0. ? 1.0 : (cn*qx + sn*qy)/q * sin(theta*M_PI_180)); \
sn = sqrt(1 - cn*cn); \
} while (0)
#else
// SasView 3.x definition of orientation
#define ORIENT_SYMMETRIC(qx, qy, theta, phi, q, sn, cn) do { \
SINCOS(theta*M_PI_180, sn, cn); \
q = sqrt(qx*qx + qy*qy);\
cn = (q==0. ? 1.0 : (cn*cos(phi*M_PI_180)*qx + sn*qy)/q); \
sn = sqrt(1 - cn*cn); \
} while (0)
#endif
#if 1
#define ORIENT_ASYMMETRIC(qx, qy, theta, phi, psi, q, xhat, yhat, zhat) do { \
q = sqrt(qx*qx + qy*qy); \
const double qxhat = qx/q; \
const double qyhat = qy/q; \
double sin_theta, cos_theta; \
double sin_phi, cos_phi; \
double sin_psi, cos_psi; \
SINCOS(theta*M_PI_180, sin_theta, cos_theta); \
SINCOS(phi*M_PI_180, sin_phi, cos_phi); \
SINCOS(psi*M_PI_180, sin_psi, cos_psi); \
xhat = qxhat*(-sin_phi*sin_psi + cos_theta*cos_phi*cos_psi) \
+ qyhat*( cos_phi*sin_psi + cos_theta*sin_phi*cos_psi); \
yhat = qxhat*(-sin_phi*cos_psi - cos_theta*cos_phi*sin_psi) \
+ qyhat*( cos_phi*cos_psi - cos_theta*sin_phi*sin_psi); \
zhat = qxhat*(-sin_theta*cos_phi) \
+ qyhat*(-sin_theta*sin_phi); \
} while (0)
#else
// SasView 3.x definition of orientation
#define ORIENT_ASYMMETRIC(qx, qy, theta, phi, psi, q, cos_alpha, cos_mu, cos_nu) do { \
q = sqrt(qx*qx + qy*qy); \
const double qxhat = qx/q; \
const double qyhat = qy/q; \
double sin_theta, cos_theta; \
double sin_phi, cos_phi; \
double sin_psi, cos_psi; \
SINCOS(theta*M_PI_180, sin_theta, cos_theta); \
SINCOS(phi*M_PI_180, sin_phi, cos_phi); \
SINCOS(psi*M_PI_180, sin_psi, cos_psi); \
cos_alpha = cos_theta*cos_phi*qxhat + sin_theta*qyhat; \
cos_mu = (-sin_theta*cos_psi*cos_phi - sin_psi*sin_phi)*qxhat + cos_theta*cos_psi*qyhat; \
cos_nu = (-cos_phi*sin_psi*sin_theta + sin_phi*cos_psi)*qxhat + sin_psi*cos_theta*qyhat; \
} while (0)
#endif
//# Beginning of rotational operation definitions
typedef struct {
double R31, R32;
} QACRotation;
typedef struct {
double R11, R12;
double R21, R22;
double R31, R32;
} QABCRotation;
// Fill in the rotation matrix R from the view angles (theta, phi) and the
// jitter angles (dtheta, dphi). This matrix can be applied to all of the
// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qc]'
static void
qac_rotation(
QACRotation *rotation,
double theta, double phi,
double dtheta, double dphi)
{
double sin_theta, cos_theta;
double sin_phi, cos_phi;
// reverse view matrix
SINCOS(theta*M_PI_180, sin_theta, cos_theta);
SINCOS(phi*M_PI_180, sin_phi, cos_phi);
const double V11 = cos_phi*cos_theta;
const double V12 = sin_phi*cos_theta;
const double V21 = -sin_phi;
const double V22 = cos_phi;
const double V31 = sin_theta*cos_phi;
const double V32 = sin_phi*sin_theta;
// reverse jitter matrix
SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
const double J31 = sin_theta;
const double J32 = -sin_phi*cos_theta;
const double J33 = cos_phi*cos_theta;
// reverse matrix
rotation->R31 = J31*V11 + J32*V21 + J33*V31;
rotation->R32 = J31*V12 + J32*V22 + J33*V32;
}
// Apply the rotation matrix returned from qac_rotation to the point (qx,qy),
// returning R*[qx,qy]' = [qa,qc]'
static void
qac_apply(
QACRotation *rotation,
double qx, double qy,
double *qab_out, double *qc_out)
{
// Indirect calculation of qab, from qab^2 = |q|^2 - qc^2
const double dqc = rotation->R31*qx + rotation->R32*qy;
const double dqab_sq = -dqc*dqc + qx*qx + qy*qy;
//*qab_out = sqrt(fabs(dqab_sq));
*qab_out = dqab_sq > 0.0 ? sqrt(dqab_sq) : 0.0;
*qc_out = dqc;
}
// Fill in the rotation matrix R from the view angles (theta, phi, psi) and the
// jitter angles (dtheta, dphi, dpsi). This matrix can be applied to all of the
// (qx, qy) points in the image to produce R*[qx,qy]' = [qa,qb,qc]'
static void
qabc_rotation(
QABCRotation *rotation,
double theta, double phi, double psi,
double dtheta, double dphi, double dpsi)
{
double sin_theta, cos_theta;
double sin_phi, cos_phi;
double sin_psi, cos_psi;
// reverse view matrix
SINCOS(theta*M_PI_180, sin_theta, cos_theta);
SINCOS(phi*M_PI_180, sin_phi, cos_phi);
SINCOS(psi*M_PI_180, sin_psi, cos_psi);
const double V11 = -sin_phi*sin_psi + cos_phi*cos_psi*cos_theta;
const double V12 = sin_phi*cos_psi*cos_theta + sin_psi*cos_phi;
const double V21 = -sin_phi*cos_psi - sin_psi*cos_phi*cos_theta;
const double V22 = -sin_phi*sin_psi*cos_theta + cos_phi*cos_psi;
const double V31 = sin_theta*cos_phi;
const double V32 = sin_phi*sin_theta;
// reverse jitter matrix
SINCOS(dtheta*M_PI_180, sin_theta, cos_theta);
SINCOS(dphi*M_PI_180, sin_phi, cos_phi);
SINCOS(dpsi*M_PI_180, sin_psi, cos_psi);
const double J11 = cos_psi*cos_theta;
const double J12 = sin_phi*sin_theta*cos_psi + sin_psi*cos_phi;
const double J13 = sin_phi*sin_psi - sin_theta*cos_phi*cos_psi;
const double J21 = -sin_psi*cos_theta;
const double J22 = -sin_phi*sin_psi*sin_theta + cos_phi*cos_psi;
const double J23 = sin_phi*cos_psi + sin_psi*sin_theta*cos_phi;
const double J31 = sin_theta;
const double J32 = -sin_phi*cos_theta;
const double J33 = cos_phi*cos_theta;
// reverse matrix
rotation->R11 = J11*V11 + J12*V21 + J13*V31;
rotation->R12 = J11*V12 + J12*V22 + J13*V32;
rotation->R21 = J21*V11 + J22*V21 + J23*V31;
rotation->R22 = J21*V12 + J22*V22 + J23*V32;
rotation->R31 = J31*V11 + J32*V21 + J33*V31;
rotation->R32 = J31*V12 + J32*V22 + J33*V32;
}
// Apply the rotation matrix returned from qabc_rotation to the point (qx,qy),
// returning R*[qx,qy]' = [qa,qb,qc]'
static void
qabc_apply(
QABCRotation *rotation,
double qx, double qy,
double *qa_out, double *qb_out, double *qc_out)
{
*qa_out = rotation->R11*qx + rotation->R12*qy;
*qb_out = rotation->R21*qx + rotation->R22*qy;
*qc_out = rotation->R31*qx + rotation->R32*qy;
}
// ##### End of rotation operation definitions ######
#endif // SAS_KERNEL_HDR
|