File: inv.h

package info (click to toggle)
vdt 0.4.4-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 620 kB
  • sloc: cpp: 2,382; ansic: 1,589; python: 1,126; csh: 16; makefile: 8
file content (99 lines) | stat: -rw-r--r-- 3,773 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
/*
 * inv.h
 * An experiment: implement division with the square fo the approximate 
 * inverse square root.
 * In other words one transforms a shift, multiplications and sums into a 
 * sqrt.
 * 
 *  Created on: Jun 24, 2012
 *      Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
 * 
 * VDT is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef INV_H_
#define INV_H_

#include "vdtcore_common.h"
#include "sqrt.h"
#include <cmath>
#include <limits>

namespace vdt{

//------------------------------------------------------------------------------

/// General implementation of the inversion
inline double fast_inv_general(double x, const uint32_t isqrt_iterations) {  
  const uint64_t sign_mask = details::getSignMask(x);
  const double sqrt_one_over_x = fast_isqrt_general(std::fabs(x),
                                                   isqrt_iterations);
  return sqrt_one_over_x*(details::dpORuint64(sqrt_one_over_x , sign_mask ));
}

//------------------------------------------------------------------------------

/// Four iterations inversion
inline double fast_inv(double x) {return fast_inv_general(x,4);}

//------------------------------------------------------------------------------

/// Three iterations
inline double fast_approx_inv(double x) {return fast_inv_general(x,3);}

//------------------------------------------------------------------------------

/// For comparisons
inline double inv (double x) {return 1./x;}

//------------------------------------------------------------------------------
// Single precision          



/// General implementation of the inversion
inline float fast_invf_general(float x, const uint32_t isqrt_iterations) { 
  const uint32_t sign_mask = details::getSignMask(x);
  const float sqrt_one_over_x = fast_isqrtf_general(std::fabs(x),
                                                   isqrt_iterations);
  return sqrt_one_over_x*(details::spORuint32(sqrt_one_over_x , sign_mask ));
}

//------------------------------------------------------------------------------

/// Two iterations
inline float fast_invf(float x) {return fast_invf_general(x,2);}

//------------------------------------------------------------------------------

/// One iterations
inline float fast_approx_invf(float x) {return fast_invf_general(x,1);}

//------------------------------------------------------------------------------

/// For comparisons
inline float invf (float x) {return 1.f/x;}

//------------------------------------------------------------------------------

void invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
void fast_invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
void fast_approx_invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
void invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
void fast_invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
void fast_approx_invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);

} // end namespace vdt

#endif /* INV_H_ */