1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
|
/*
This file is part of darktable,
Copyright (C) 2012-2025 darktable developers.
darktable is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
darktable is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with darktable. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#define NORM_MIN 1.52587890625e-05f // norm can't be < to 2^(-16)
constant sampler_t sampleri = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
constant sampler_t samplerf = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
constant sampler_t samplerc = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
// sampler for when the bound checks are already done manually
constant sampler_t samplerA = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
#ifndef M_PI_F
#define M_PI_F 3.14159265358979323846f // should be defined by the OpenCL compiler acc. to standard
#endif
#define LUT_ELEM 512 // gamut LUT number of elements:
#define RED 0
#define GREEN 1
#define BLUE 2
#define ALPHA 3
#define DT_OPENCL_PERFORMANCE
#ifdef DT_OPENCL_PERFORMANCE
#define dtcl_sin(A) native_sin(A)
#define dtcl_cos(A) native_cos(A)
#define dtcl_sqrt(A) native_sqrt(A)
#define dtcl_pow(A,B) native_powr(A,B)
#define dtcl_exp(A) native_exp(A)
#define dtcl_log(A) native_log(A)
// Allow the compiler to convert a * b + c to fused multiply-add to use hardware acceleration
// on compatible platforms
#pragma OPENCL FP_CONTRACT ON
#else
#define dtcl_sin(A) sin(A)
#define dtcl_cos(A) cos(A)
#define dtcl_sqrt(A) sqrt(A)
#define dtcl_pow(A,B) pow(A,B)
#define dtcl_exp(A) exp(A)
#define dtcl_log(A) log(A)
#pragma OPENCL FP_CONTRACT OFF
#endif
static inline int
FC(const int row, const int col, const unsigned int filters)
{
return filters >> ((((row) << 1 & 14) + ((col) & 1)) << 1) & 3;
}
static inline int
FCxtrans(const int row, const int col, global const unsigned char (*const xtrans)[6])
{
// There used to be a few cases in xtrans demosaicers in which row or col was negative.
// The +600 ensures a non-negative array index as in CPU code
return xtrans[(row + 600) % 6][(col + 600) % 6];
}
int
fcol(const int row, const int col, const unsigned int filters, global const unsigned char (*const xtrans)[6])
{
if(filters == 9)
return FCxtrans(row, col, xtrans);
else
return FC(row, col, filters);
}
static inline float
dt_fast_hypot(const float x, const float y)
{
return dtcl_sqrt(x * x + y * y);
}
/* we use this exp approximation to maintain full identity with cpu path */
static inline float
dt_fast_expf(const float x)
{
// meant for the range [-100.0f, 0.0f]. largest error ~ -0.06 at 0.0f.
// will get _a_lot_ worse for x > 0.0f (9000 at 10.0f)..
const int i1 = 0x3f800000u;
// e^x, the comment would be 2^x
const int i2 = 0x402DF854u;//0x40000000u;
// const int k = CLAMPS(i1 + x * (i2 - i1), 0x0u, 0x7fffffffu);
// without max clamping (doesn't work for large x, but is faster):
const int k0 = i1 + x * (i2 - i1);
union {
float f;
int k;
} u;
u.k = k0 > 0 ? k0 : 0;
return u.f;
}
static inline float fsquare(const float a)
{
return (a * a);
}
static inline float clipf(const float a)
{
return clamp(a, 0.0f, 1.0f);
}
|