1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
/* Ergo, version 3.8, a program for linear scaling electronic structure
* calculations.
* Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
* and Anastasia Kruchinina.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Primary academic reference:
* Ergo: An open-source program for linear-scaling electronic structure
* calculations,
* Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
* Kruchinina,
* SoftwareX 7, 107 (2018),
* <http://dx.doi.org/10.1016/j.softx.2018.03.005>
*
* For further information about Ergo, see <http://www.ergoscf.org>.
*/
/** @file g_intrin.h
*
* @brief Templates for convenient access to intrinsic instructions.
*
* @author Emanuel H. Rubensson
* @date 2009
*
*/
#ifndef G_INTRIN
#define G_INTRIN
#include <emmintrin.h>
#ifdef __SSE3__
#include <pmmintrin.h>
#endif
/* Interface to load functions. */
/* load_p */
template<typename Treal, typename Treg>
inline static Treg _mm_load_p (Treal const * ptr);
inline static __m128 _mm_load_p (float const * ptr) {
return _mm_load_ps (ptr);
}
inline static __m128d _mm_load_p (double const * ptr) {
return _mm_load_pd (ptr);
}
/* load1_p */
template<typename Treal, typename Treg>
inline static Treg _mm_load1_p (Treal const * ptr);
inline static __m128 _mm_load1_p (float const * ptr) {
return _mm_load1_ps (ptr);
}
inline static __m128d _mm_load1_p (double const * ptr) {
return _mm_load1_pd (ptr);
}
/* set1_p */
template<typename Treal, typename Treg>
inline static Treg _mm_set1_p (Treal const val);
inline static __m128 _mm_set1_p (float const val) {
return _mm_set1_ps (val);
}
inline static __m128d _mm_set1_p (double const val) {
return _mm_set1_pd (val);
}
/* Interface to store functions. */
template<typename Treal, typename Treg>
inline static void _mm_store_p (Treal * ptr, Treg A);
inline static void _mm_store_p (float * ptr, __m128 A) {
_mm_store_ps (ptr, A);
}
inline static void _mm_store_p (double * ptr, __m128d A) {
_mm_store_pd (ptr, A);
}
/* Interface to add functions. */
template<typename Treg>
inline static Treg _mm_add_p (Treg A, Treg B);
inline static __m128 _mm_add_p (__m128 A, __m128 B) {
return _mm_add_ps(A, B);
}
inline static __m128d _mm_add_p (__m128d A, __m128d B) {
return _mm_add_pd(A, B);
}
/* Interface to mul functions. */
template<typename Treg>
inline static Treg _mm_mul_p (Treg A, Treg B);
inline static __m128 _mm_mul_p (__m128 A, __m128 B) {
return _mm_mul_ps(A, B);
}
inline static __m128d _mm_mul_p (__m128d A, __m128d B) {
return _mm_mul_pd(A, B);
}
/* pxor */
template<typename Treg>
inline static Treg _mm_xor_p (Treg A, Treg B);
inline static __m128 _mm_xor_p (__m128 A, __m128 B) {
return _mm_xor_ps(A, B);
}
inline static __m128d _mm_xor_p (__m128d A, __m128d B) {
return _mm_xor_pd(A, B);
}
#endif
|