1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
/*========================== begin_copyright_notice ============================
Copyright (C) 2017-2021 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
#include "../include/BiF_Definitions.cl"
#include "spirv.h"
INLINE
float OVERLOADABLE fast_fmod( float xx, float yy )
{
float result = xx - yy * trunc( xx / yy );
return result;
}
INLINE
float2 OVERLOADABLE fast_fmod( float2 xx, float2 yy )
{
float2 temp;
temp.s0 = fast_fmod(xx.s0, yy.s0);
temp.s1 = fast_fmod(xx.s1, yy.s1);
return temp;
}
INLINE
float3 OVERLOADABLE fast_fmod( float3 xx, float3 yy )
{
float3 temp;
temp.s0 = fast_fmod(xx.s0, yy.s0);
temp.s1 = fast_fmod(xx.s1, yy.s1);
temp.s2 = fast_fmod(xx.s2, yy.s2);
return temp;
}
INLINE
float4 OVERLOADABLE fast_fmod( float4 xx, float4 yy )
{
float4 temp;
temp.s0 = fast_fmod(xx.s0, yy.s0);
temp.s1 = fast_fmod(xx.s1, yy.s1);
temp.s2 = fast_fmod(xx.s2, yy.s2);
temp.s3 = fast_fmod(xx.s3, yy.s3);
return temp;
}
#if defined(cl_khr_fp16)
INLINE
half OVERLOADABLE fast_fmod( half xx, half yy )
{
return (half)fast_fmod((float)xx, (float)yy);
}
INLINE
half2 OVERLOADABLE fast_fmod( half2 xx, half2 yy )
{
half2 temp;
temp.s0 = fast_fmod(xx.s0, yy.s0);
temp.s1 = fast_fmod(xx.s1, yy.s1);
return temp;
}
INLINE
half3 OVERLOADABLE fast_fmod( half3 xx, half3 yy )
{
half3 temp;
temp.s0 = fast_fmod(xx.s0, yy.s0);
temp.s1 = fast_fmod(xx.s1, yy.s1);
temp.s2 = fast_fmod(xx.s2, yy.s2);
return temp;
}
INLINE
half4 OVERLOADABLE fast_fmod( half4 xx, half4 yy )
{
half4 temp;
temp.s0 = fast_fmod(xx.s0, yy.s0);
temp.s1 = fast_fmod(xx.s1, yy.s1);
temp.s2 = fast_fmod(xx.s2, yy.s2);
temp.s3 = fast_fmod(xx.s3, yy.s3);
return temp;
}
#endif // cl_khr_fp16
#if defined(cl_fp64_basic_ops)
INLINE
double OVERLOADABLE fast_fmod( double xx, double yy )
{
return fast_fmod(xx, yy);
}
INLINE
double2 OVERLOADABLE fast_fmod( double2 xx, double2 yy )
{
double2 temp;
temp.s0 = fast_fmod(xx.s0, yy.s0);
temp.s1 = fast_fmod(xx.s1, yy.s1);
return temp;
}
INLINE
double3 OVERLOADABLE fast_fmod( double3 xx, double3 yy )
{
double3 temp;
temp.s0 = fast_fmod(xx.s0, yy.s0);
temp.s1 = fast_fmod(xx.s1, yy.s1);
temp.s2 = fast_fmod(xx.s2, yy.s2);
return temp;
}
INLINE
double4 OVERLOADABLE fast_fmod( double4 xx, double4 yy )
{
double4 temp;
temp.s0 = fast_fmod(xx.s0, yy.s0);
temp.s1 = fast_fmod(xx.s1, yy.s1);
temp.s2 = fast_fmod(xx.s2, yy.s2);
temp.s3 = fast_fmod(xx.s3, yy.s3);
return temp;
}
#endif // cl_fp64_basic_ops
float OVERLOADABLE fmod( float xx, float yy )
{
return SPIRV_OCL_BUILTIN(fmod, _f32_f32, )( xx, yy );
}
GENERATE_VECTOR_FUNCTIONS_2ARGS_VV_LOOP( fmod, float, float, float )
#if defined(cl_khr_fp64)
double OVERLOADABLE fmod( double xx, double yy )
{
return SPIRV_OCL_BUILTIN(fmod, _f64_f64, )( xx, yy );
}
GENERATE_VECTOR_FUNCTIONS_2ARGS_VV_LOOP( fmod, double, double, double )
#endif // defined(cl_khr_fp64)
#if defined(cl_khr_fp16)
INLINE half OVERLOADABLE fmod( half x, half y )
{
return SPIRV_OCL_BUILTIN(fmod, _f16_f16, )( x, y );
}
GENERATE_VECTOR_FUNCTIONS_2ARGS_VV_LOOP( fmod, half, half, half )
#endif // defined(cl_khr_fp16)
|