File: common.h

package info (click to toggle)
darktable 5.4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 65,660 kB
  • sloc: ansic: 367,579; cpp: 102,778; xml: 20,091; lisp: 15,099; sh: 3,771; javascript: 3,264; perl: 1,925; python: 1,551; ruby: 975; makefile: 543; asm: 46; sql: 38; awk: 21
file content (125 lines) | stat: -rw-r--r-- 3,796 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/*
    This file is part of darktable,
    Copyright (C) 2012-2025 darktable developers.

    darktable is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    darktable is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with darktable.  If not, see <http://www.gnu.org/licenses/>.
*/

#pragma once
#define NORM_MIN 1.52587890625e-05f // norm can't be < to 2^(-16)


constant sampler_t sampleri =  CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

constant sampler_t samplerf =  CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;

constant sampler_t samplerc =  CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP         | CLK_FILTER_NEAREST;

// sampler for when the bound checks are already done manually
constant sampler_t samplerA = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE            | CLK_FILTER_NEAREST;


#ifndef M_PI_F
#define M_PI_F           3.14159265358979323846f  // should be defined by the OpenCL compiler acc. to standard
#endif

#define LUT_ELEM 512 // gamut LUT number of elements:

#define RED 0
#define GREEN 1
#define BLUE 2
#define ALPHA 3

#define DT_OPENCL_PERFORMANCE

#ifdef DT_OPENCL_PERFORMANCE
  #define dtcl_sin(A) native_sin(A)
  #define dtcl_cos(A) native_cos(A)
  #define dtcl_sqrt(A) native_sqrt(A)
  #define dtcl_pow(A,B) native_powr(A,B)
  #define dtcl_exp(A) native_exp(A)
  #define dtcl_log(A) native_log(A)
  // Allow the compiler to convert a * b + c to fused multiply-add to use hardware acceleration
  // on compatible platforms
  #pragma OPENCL FP_CONTRACT ON
#else
  #define dtcl_sin(A) sin(A)
  #define dtcl_cos(A) cos(A)
  #define dtcl_sqrt(A) sqrt(A)
  #define dtcl_pow(A,B) pow(A,B)
  #define dtcl_exp(A) exp(A)
  #define dtcl_log(A) log(A)
  #pragma OPENCL FP_CONTRACT OFF
#endif

static inline int
FC(const int row, const int col, const unsigned int filters)
{
  return filters >> ((((row) << 1 & 14) + ((col) & 1)) << 1) & 3;
}


static inline int
FCxtrans(const int row, const int col, global const unsigned char (*const xtrans)[6])
{
  // There used to be a few cases in xtrans demosaicers in which row or col was negative.
  // The +600 ensures a non-negative array index as in CPU code
  return xtrans[(row + 600) % 6][(col + 600) % 6];
}

int
fcol(const int row, const int col, const unsigned int filters, global const unsigned char (*const xtrans)[6])
{
  if(filters == 9)
    return FCxtrans(row, col, xtrans);
  else
    return FC(row, col, filters);
}


static inline float
dt_fast_hypot(const float x, const float y)
{
  return dtcl_sqrt(x * x + y * y);
}

/* we use this exp approximation to maintain full identity with cpu path */
static inline float
dt_fast_expf(const float x)
{
  // meant for the range [-100.0f, 0.0f]. largest error ~ -0.06 at 0.0f.
  // will get _a_lot_ worse for x > 0.0f (9000 at 10.0f)..
  const int i1 = 0x3f800000u;
  // e^x, the comment would be 2^x
  const int i2 = 0x402DF854u;//0x40000000u;
  // const int k = CLAMPS(i1 + x * (i2 - i1), 0x0u, 0x7fffffffu);
  // without max clamping (doesn't work for large x, but is faster):
  const int k0 = i1 + x * (i2 - i1);
  union {
      float f;
      int k;
  } u;
  u.k = k0 > 0 ? k0 : 0;
  return u.f;
}

static inline float fsquare(const float a)
{
  return (a * a);
}

static inline float clipf(const float a)
{
  return clamp(a, 0.0f, 1.0f);
}