File: g_intrin.h

package info (click to toggle)
ergo 3.8-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye
  • size: 17,396 kB
  • sloc: cpp: 94,740; ansic: 17,015; sh: 7,559; makefile: 1,402; yacc: 127; lex: 110; awk: 23
file content (140 lines) | stat: -rw-r--r-- 3,462 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
/* Ergo, version 3.8, a program for linear scaling electronic structure
 * calculations.
 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
 * and Anastasia Kruchinina.
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 * Primary academic reference:
 * Ergo: An open-source program for linear-scaling electronic structure
 * calculations,
 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
 * Kruchinina,
 * SoftwareX 7, 107 (2018),
 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
 * 
 * For further information about Ergo, see <http://www.ergoscf.org>.
 */

/** @file g_intrin.h
 *
 * @brief Templates for convenient access to intrinsic instructions.
 *
 * @author Emanuel H. Rubensson
 * @date 2009
 *
 */

#ifndef G_INTRIN
#define G_INTRIN
#include <emmintrin.h>
#ifdef __SSE3__
#include <pmmintrin.h>
#endif

/* Interface to load functions. */

/* load_p */
template<typename Treal, typename Treg>
  inline static Treg _mm_load_p (Treal const * ptr);

inline static __m128 _mm_load_p (float const * ptr) {
  return _mm_load_ps (ptr);
}

inline static __m128d _mm_load_p (double const * ptr) {
  return _mm_load_pd (ptr);
}

/* load1_p */
template<typename Treal, typename Treg>
  inline static Treg _mm_load1_p (Treal const * ptr);

inline static __m128 _mm_load1_p (float const * ptr) {
  return _mm_load1_ps (ptr);
}


inline static __m128d _mm_load1_p (double const * ptr) {
  return _mm_load1_pd (ptr);
}

/* set1_p */
template<typename Treal, typename Treg>
  inline static Treg _mm_set1_p (Treal const val);

inline static __m128 _mm_set1_p (float const val) {
  return _mm_set1_ps (val);
}


inline static __m128d _mm_set1_p (double const val) {
  return _mm_set1_pd (val);
}


/* Interface to store functions. */
template<typename Treal, typename Treg>
  inline static void _mm_store_p (Treal * ptr, Treg A);

inline static void  _mm_store_p (float * ptr, __m128 A) {
  _mm_store_ps (ptr, A);
}

inline static void  _mm_store_p (double * ptr, __m128d A) {
  _mm_store_pd (ptr, A);
}


/* Interface to add functions. */

template<typename Treg>
inline static Treg _mm_add_p (Treg A, Treg B);

inline static __m128 _mm_add_p (__m128 A, __m128 B) {
  return _mm_add_ps(A, B);
}

inline static __m128d _mm_add_p (__m128d A, __m128d B) {
  return _mm_add_pd(A, B);
}


/* Interface to mul functions. */

template<typename Treg>
inline static Treg _mm_mul_p (Treg A, Treg B);

inline static __m128 _mm_mul_p (__m128 A, __m128 B) {
  return _mm_mul_ps(A, B);
}

inline static __m128d _mm_mul_p (__m128d A, __m128d B) {
  return _mm_mul_pd(A, B);
}

/* pxor */

template<typename Treg>
inline static Treg _mm_xor_p (Treg A, Treg B);

inline static __m128 _mm_xor_p (__m128 A, __m128 B) {
  return _mm_xor_ps(A, B);
}

inline static __m128d _mm_xor_p (__m128d A, __m128d B) {
  return _mm_xor_pd(A, B);
}

#endif