1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
|
// Copyright (c) 2014
// INRIA Saclay-Ile de France (France)
//
// This file is part of CGAL (www.cgal.org)
//
// $URL: https://github.com/CGAL/cgal/blob/v6.1.1/NewKernel_d/include/CGAL/NewKernel_d/Vector/avx4.h $
// $Id: include/CGAL/NewKernel_d/Vector/avx4.h 08b27d3db14 $
// SPDX-License-Identifier: LGPL-3.0-or-later OR LicenseRef-Commercial
//
// Author(s) : Marc Glisse
#ifndef CGAL_VECTOR_AVX4_H
#define CGAL_VECTOR_AVX4_H
#if !defined __AVX__
#error Requires AVX and gcc 4.8+
#endif
#include <x86intrin.h>
#include <CGAL/NewKernel_d/functor_tags.h>
#include <CGAL/Dimension.h>
#include <CGAL/enum.h> // CGAL::Sign
#include <CGAL/number_utils.h> // CGAL::sign
namespace CGAL {
struct Avx_vector_4 {
typedef double NT;
typedef Dimension_tag<4> Dimension;
typedef Dimension_tag<4> Max_dimension;
// No Rebind_dimension, this is a building block
template<class,bool=true> struct Property : std::false_type {};
template<bool b> struct Property<Has_vector_plus_minus_tag,b>
: std::true_type {};
/* MAYBE?
template<bool b> struct Property<Has_vector_scalar_ops_tag,b>
: std::true_type {};
*/
template<bool b> struct Property<Has_determinant_of_vectors_tag,b>
: std::true_type {};
template<bool b> struct Property<Has_dot_product_tag,b>
: std::true_type {};
template<bool b> struct Property<Has_determinant_of_vectors_omit_last_tag,b>
: std::true_type {};
typedef __m256d Vector;
struct Construct_vector {
struct Dimension {
// Initialize with NaN?
Vector operator()(unsigned d) const {
CGAL_assertion(d==4);
return Vector();
}
};
struct Iterator {
template<typename Iter>
Vector operator()(unsigned d,Iter const& f,Iter const& e) const {
CGAL_assertion(d==4);
double x0 = *f;
double x1 = *++f;
double x2 = *++f;
double x3 = *++f;
CGAL_assertion(++f==e);
Vector a = { x0, x1, x2, x3 };
return a;
}
};
struct Iterator_and_last {
template<typename Iter,typename T>
Vector operator()(unsigned d,Iter const& f,Iter const& e,double t) const {
CGAL_assertion(d==4);
double x0 = *f;
double x1 = *++f;
double x2 = *++f;
CGAL_assertion(++f==e);
Vector a = { x0, x1, x2, t };
return a;
}
};
struct Values {
Vector operator()(double a,double b,double c,double d) const {
Vector r = { a, b, c, d };
return r;
}
};
struct Values_divide {
Vector operator()(double h,double a,double b,double c,double d) const {
// {a,b,c,d}/{h,h,h,h} should be roughly the same
Vector r = { a/h, b/h, c/h, d/h };
return r;
}
};
};
public:
typedef double const* Vector_const_iterator;
static inline Vector_const_iterator vector_begin(Vector const&a){
return (Vector_const_iterator)(&a);
}
static inline Vector_const_iterator vector_end(Vector const&a){
return (Vector_const_iterator)(&a)+4;
}
static inline unsigned size_of_vector(Vector){
return 4;
}
static inline double dot_product(__m256d x, __m256d y){
__m256d p=x*y;
__m256d z=_mm256_hadd_pd(p,p);
return z[0]+z[2];
}
private:
static inline __m256d avx_sym(__m256d x){
#if 0
return __builtin_shuffle(x,(__m256i){2,3,0,1});
#else
return _mm256_permute2f128_pd(x,x,1);
#endif
}
static inline __m256d avx_left(__m256d x){
#if 0
return __builtin_shuffle(x,(__m256i){1,2,3,0});
#else
#ifdef __AVX2__
return _mm256_permute4x64_pd(x,1+2*4+3*16+0*64);
#else
__m256d s = _mm256_permute2f128_pd(x,x,1);
return _mm256_shuffle_pd(x,s,5);
#endif
#endif
}
static inline __m256d avx_right(__m256d x){
#if 0
return __builtin_shuffle(x,(__m256i){3,0,1,2});
#else
#ifdef __AVX2__
return _mm256_permute4x64_pd(x,3+0*4+1*16+2*64);
#else
__m256d s = _mm256_permute2f128_pd(x,x,1);
return _mm256_shuffle_pd(s,x,5);
#endif
#endif
}
static inline double avx_altprod(__m256d x, __m256d y){
__m256d p=x*y;
__m256d z=_mm256_hsub_pd(p,p);
return z[0]+z[2];
}
public:
static double
determinant_of_vectors(Vector a, Vector b, Vector c, Vector d) {
__m256d x=a*avx_left(b)-avx_left(a)*b;
__m256d yy=a*avx_sym(b);
__m256d y=yy-avx_sym(yy);
__m256d z0=x*avx_sym(c);
__m256d z1=avx_left(x)*c;
__m256d z2=y*avx_left(c);
__m256d z=z0+z1-z2;
return avx_altprod(z,avx_right(d));
}
static CGAL::Sign
sign_of_determinant_of_vectors(Vector a, Vector b, Vector c, Vector d) {
return CGAL::sign(determinant_of_vectors(a,b,c,d));
}
private:
static inline __m256d avx3_right(__m256d x){
#if 0
return __builtin_shuffle(x,(__m256i){2,0,1,3}); // can replace 3 with anything
#else
#ifdef __AVX2__
return _mm256_permute4x64_pd(x,2+0*4+1*16+3*64);
#else
__m256d s = _mm256_permute2f128_pd(x,x,1);
return _mm256_shuffle_pd(s,x,12);
#endif
#endif
}
public:
static inline double dot_product_omit_last(__m256d x, __m256d y){
__m256d p=x*y;
__m128d q=_mm256_extractf128_pd(p,0);
double z=_mm_hadd_pd(q,q)[0];
return z+p[2];
}
// Note: without AVX2, is it faster than the scalar computation?
static double
determinant_of_vectors_omit_last(Vector a, Vector b, Vector c) {
__m256d x=a*avx3_right(b)-avx3_right(a)*b;
return dot_product_omit_last(c,avx3_right(x));
}
static CGAL::Sign
sign_of_determinant_of_vectors_omit_last(Vector a, Vector b, Vector c) {
return CGAL::sign(determinant_of_vectors_omit_last(a,b,c));
}
};
}
#endif
|