1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
|
/**************************** instrset.h **********************************
* Author: Agner Fog
* Date created: 2012-05-30
* Last modified: 2016-11-25
* Version: 1.25
* Project: vector classes
* Description:
* Header file for various compiler-specific tasks and other common tasks to
* vector class library:
* > selects the supported instruction set
* > defines integer types
* > defines compiler version macros
* > undefines certain macros that prevent function overloading
* > defines template class to represent compile-time integer constant
* > defines template for compile-time error messages
*
* (c) Copyright 2012-2016 GNU General Public License www.gnu.org/licenses
******************************************************************************/
#ifndef INSTRSET_H
#define INSTRSET_H 125
// Detect 64 bit mode
#if (defined(_M_AMD64) || defined(_M_X64) || defined(__amd64) ) && ! defined(__x86_64__)
#define __x86_64__ 1 // There are many different macros for this, decide on only one
#endif
// Find instruction set from compiler macros if INSTRSET not defined
// Note: Most of these macros are not defined in Microsoft compilers
#ifndef INSTRSET
#if defined ( __AVX512F__ ) || defined ( __AVX512__ )
#define INSTRSET 9
#elif defined ( __AVX2__ )
#define INSTRSET 8
#elif defined ( __AVX__ )
#define INSTRSET 7
#elif defined ( __SSE4_2__ )
#define INSTRSET 6
#elif defined ( __SSE4_1__ )
#define INSTRSET 5
#elif defined ( __SSSE3__ )
#define INSTRSET 4
#elif defined ( __SSE3__ )
#define INSTRSET 3
#elif defined ( __SSE2__ ) || defined ( __x86_64__ )
#define INSTRSET 2
#elif defined ( __SSE__ )
#define INSTRSET 1
#elif defined ( _M_IX86_FP ) // Defined in MS compiler. 1: SSE, 2: SSE2
#define INSTRSET _M_IX86_FP
#else
#define INSTRSET 0
#endif // instruction set defines
#endif // INSTRSET
// Include the appropriate header file for intrinsic functions
#if INSTRSET > 7 // AVX2 and later
#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)
#include <x86intrin.h> // x86intrin.h includes header files for whatever instruction
// sets are specified on the compiler command line, such as:
// xopintrin.h, fma4intrin.h
#else
#include <immintrin.h> // MS version of immintrin.h covers AVX, AVX2 and FMA3
#endif // __GNUC__
#elif INSTRSET == 7
#include <immintrin.h> // AVX
#elif INSTRSET == 6
#include <nmmintrin.h> // SSE4.2
#elif INSTRSET == 5
#include <smmintrin.h> // SSE4.1
#elif INSTRSET == 4
#include <tmmintrin.h> // SSSE3
#elif INSTRSET == 3
#include <pmmintrin.h> // SSE3
#elif INSTRSET == 2
#include <emmintrin.h> // SSE2
#elif INSTRSET == 1
#include <xmmintrin.h> // SSE
#endif // INSTRSET
#if INSTRSET >= 8 && !defined(__FMA__)
// Assume that all processors that have AVX2 also have FMA3
#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) && ! defined (__clang__)
// Prevent error message in g++ when using FMA intrinsics with avx2:
#pragma message "It is recommended to specify also option -mfma when using -mavx2 or higher"
#else
#define __FMA__ 1
#endif
#endif
// AMD instruction sets
#if defined (__XOP__) || defined (__FMA4__)
#ifdef __GNUC__
#include <x86intrin.h> // AMD XOP (Gnu)
#else
#include <ammintrin.h> // AMD XOP (Microsoft)
#endif // __GNUC__
#elif defined (__SSE4A__) // AMD SSE4A
#include <ammintrin.h>
#endif // __XOP__
// FMA3 instruction set
#if defined (__FMA__) && (defined(__GNUC__) || defined(__clang__)) && ! defined (__INTEL_COMPILER)
#include <fmaintrin.h>
#endif // __FMA__
// FMA4 instruction set
#if defined (__FMA4__) && (defined(__GNUC__) || defined(__clang__))
#include <fma4intrin.h> // must have both x86intrin.h and fma4intrin.h, don't know why
#endif // __FMA4__
// Define integer types with known size
#if defined(__GNUC__) || defined(__clang__) || (defined(_MSC_VER) && _MSC_VER >= 1600)
// Compilers supporting C99 or C++0x have stdint.h defining these integer types
#include <stdint.h>
#elif defined(_MSC_VER)
// Older Microsoft compilers have their own definitions
typedef signed __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef signed __int16 int16_t;
typedef unsigned __int16 uint16_t;
typedef signed __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef signed __int64 int64_t;
typedef unsigned __int64 uint64_t;
#ifndef _INTPTR_T_DEFINED
#define _INTPTR_T_DEFINED
#ifdef __x86_64__
typedef int64_t intptr_t;
#else
typedef int32_t intptr_t;
#endif
#endif
#else
// This works with most compilers
typedef signed char int8_t;
typedef unsigned char uint8_t;
typedef signed short int int16_t;
typedef unsigned short int uint16_t;
typedef signed int int32_t;
typedef unsigned int uint32_t;
typedef long long int64_t;
typedef unsigned long long uint64_t;
#ifdef __x86_64__
typedef int64_t intptr_t;
#else
typedef int32_t intptr_t;
#endif
#endif
#include <stdlib.h> // define abs(int)
#ifdef _MSC_VER // Microsoft compiler or compatible Intel compiler
#include <intrin.h> // define _BitScanReverse(int), __cpuid(int[4],int), _xgetbv(int)
#endif // _MSC_VER
// functions in instrset_detect.cpp
#ifdef VCL_NAMESPACE
namespace VCL_NAMESPACE {
#endif
int instrset_detect(void); // tells which instruction sets are supported
bool hasFMA3(void); // true if FMA3 instructions supported
bool hasFMA4(void); // true if FMA4 instructions supported
bool hasXOP(void); // true if XOP instructions supported
bool hasAVX512ER(void); // true if AVX512ER instructions supported
#ifdef VCL_NAMESPACE
}
#endif
// GCC version
#if defined(__GNUC__) && !defined (GCC_VERSION) && !defined (__clang__)
#define GCC_VERSION ((__GNUC__) * 10000 + (__GNUC_MINOR__) * 100 + (__GNUC_PATCHLEVEL__))
#endif
// Clang version
#if defined (__clang__)
#define CLANG_VERSION ((__clang_major__) * 10000 + (__clang_minor__) * 100 + (__clang_patchlevel__))
// Problem: The version number is not consistent across platforms
// http://llvm.org/bugs/show_bug.cgi?id=12643
// Apple bug 18746972
#endif
// Fix problem with non-overloadable macros named min and max in WinDef.h
#ifdef _MSC_VER
#if defined (_WINDEF_) && defined(min) && defined(max)
#undef min
#undef max
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#endif
#ifdef VCL_NAMESPACE
namespace VCL_NAMESPACE {
#endif
// Template class to represent compile-time integer constant
template <int32_t n> class Const_int_t {}; // represent compile-time signed integer constant
template <uint32_t n> class Const_uint_t {}; // represent compile-time unsigned integer constant
#define const_int(n) (Const_int_t <n>()) // n must be compile-time integer constant
#define const_uint(n) (Const_uint_t<n>()) // n must be compile-time unsigned integer constant
// Template for compile-time error messages
template <bool> class Static_error_check {
public: Static_error_check() {};
};
template <> class Static_error_check<false> { // generate compile-time error if false
private: Static_error_check() {};
};
#ifdef VCL_NAMESPACE
}
#endif
#endif // INSTRSET_H
|