File: instrset_detect.h

package info (click to toggle)
scummvm 2.9.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 450,268 kB
  • sloc: cpp: 4,297,604; asm: 28,322; python: 12,901; sh: 11,219; java: 8,477; xml: 7,843; perl: 2,633; ansic: 2,465; yacc: 1,670; javascript: 1,020; makefile: 933; lex: 578; awk: 275; objc: 82; sed: 11; php: 1
file content (236 lines) | stat: -rw-r--r-- 10,984 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
/* ScummVM - Graphic Adventure Engine
 *
 * ScummVM is the legal property of its developers, whose names
 * are too numerous to list here. Please refer to the COPYRIGHT
 * file distributed with this source distribution.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

// This file is derived from instrset_detect.cpp and instrset.h with minor style changes
/*****************************************************************************
* Author:        Agner Fog
* Date created:  2012-05-30
* Last modified: 2022-07-20
* Version:       2.02.00
* Project:       vector class library
* Description:
* Functions for checking which instruction sets are supported.
*
* (c) Copyright 2012-2022 Agner Fog.
* Apache License version 2.0 or later.
******************************************************************************/

#ifndef INSTRSET_DETECT_H
#define INSTRSET_DETECT_H

#if defined(__x86_64__) || defined(__amd64) || defined(_M_X64)  || defined(_M_AMD64) || \
	defined(__i386__)   || defined(__i386)  || defined(_M_IX86)

#include <stdint.h>                    // Define integer types with known size
#include <limits.h>                    // Define INT_MAX

// Header files for non-vector intrinsic functions including _BitScanReverse(int), __cpuid(int[4],int), _xgetbv(int)
#ifdef _MSC_VER                        // Microsoft compiler or compatible Intel compiler
#include <intrin.h>
#pragma warning(disable: 6323 4514 4710 4711) // Diasble annoying warnings
#else
#include <x86intrin.h>                 // Gcc or Clang compiler
#endif

// Define interface to cpuid instruction.
// input:  functionnumber = leaf (eax), ecxleaf = subleaf(ecx)
// output: output[0] = eax, output[1] = ebx, output[2] = ecx, output[3] = edx
static inline void cpuid(int output[4], int functionnumber, int ecxleaf = 0) {
#if defined(__GNUC__) || defined(__clang__)           // use inline assembly, Gnu/AT&T syntax
    int a, b, c, d;
    __asm("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "a"(functionnumber), "c"(ecxleaf) : );
    output[0] = a;
    output[1] = b;
    output[2] = c;
    output[3] = d;

#elif defined (_MSC_VER)                              // Microsoft compiler, intrin.h included
    __cpuidex(output, functionnumber, ecxleaf);       // intrinsic function for CPUID

#else                                                 // unknown platform. try inline assembly with masm/intel syntax
    __asm {
        mov eax, functionnumber
        mov ecx, ecxleaf
        cpuid;
        mov esi, output
        mov[esi], eax
        mov[esi + 4], ebx
        mov[esi + 8], ecx
        mov[esi + 12], edx
    }
#endif
}


// Define interface to xgetbv instruction
static inline uint64_t xgetbv (int ctr) {
#if (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200)
    // Microsoft or Intel compiler supporting _xgetbv intrinsic

    return uint64_t(_xgetbv(ctr));                    // intrinsic function for XGETBV

#elif defined(__GNUC__) ||  defined (__clang__)       // use inline assembly, Gnu/AT&T syntax

   uint32_t a, d;
   __asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : );
   return a | (uint64_t(d) << 32);

#else  // #elif defined (_WIN32)                      // other compiler. try inline assembly with masm/intel/MS syntax
   uint32_t a, d;
    __asm {
        mov ecx, ctr
        _emit 0x0f
        _emit 0x01
        _emit 0xd0 ; // xgetbv
        mov a, eax
        mov d, edx
    }
   return a | (uint64_t(d) << 32);

#endif
}

/* find supported instruction set
    return value:
    0           = 80386 instruction set
    1  or above = SSE (XMM) supported by CPU (not testing for OS support)
    2  or above = SSE2
    3  or above = SSE3
    4  or above = Supplementary SSE3 (SSSE3)
    5  or above = SSE4.1
    6  or above = SSE4.2
    7  or above = AVX supported by CPU and operating system
    8  or above = AVX2
    9  or above = AVX512F
   10  or above = AVX512VL, AVX512BW, AVX512DQ
*/
static int instrset_detect(void) {

    static int iset = -1;                                  // remember value for next call
    if (iset >= 0) {
        return iset;                                       // called before
    }
    iset = 0;                                              // default value
    int abcd[4] = {0,0,0,0};                               // cpuid results
    cpuid(abcd, 0);                                        // call cpuid function 0
    if (abcd[0] == 0) return iset;                         // no further cpuid function supported
    cpuid(abcd, 1);                                        // call cpuid function 1 for feature flags
    if ((abcd[3] & (1 <<  0)) == 0) return iset;           // no floating point
    if ((abcd[3] & (1 << 23)) == 0) return iset;           // no MMX
    if ((abcd[3] & (1 << 15)) == 0) return iset;           // no conditional move
    if ((abcd[3] & (1 << 24)) == 0) return iset;           // no FXSAVE
    if ((abcd[3] & (1 << 25)) == 0) return iset;           // no SSE
    iset = 1;                                              // 1: SSE supported
    if ((abcd[3] & (1 << 26)) == 0) return iset;           // no SSE2
    iset = 2;                                              // 2: SSE2 supported
    if ((abcd[2] & (1 <<  0)) == 0) return iset;           // no SSE3
    iset = 3;                                              // 3: SSE3 supported
    if ((abcd[2] & (1 <<  9)) == 0) return iset;           // no SSSE3
    iset = 4;                                              // 4: SSSE3 supported
    if ((abcd[2] & (1 << 19)) == 0) return iset;           // no SSE4.1
    iset = 5;                                              // 5: SSE4.1 supported
    if ((abcd[2] & (1 << 23)) == 0) return iset;           // no POPCNT
    if ((abcd[2] & (1 << 20)) == 0) return iset;           // no SSE4.2
    iset = 6;                                              // 6: SSE4.2 supported
    if ((abcd[2] & (1 << 27)) == 0) return iset;           // no OSXSAVE
    if ((xgetbv(0) & 6) != 6)       return iset;           // AVX not enabled in O.S.
    if ((abcd[2] & (1 << 28)) == 0) return iset;           // no AVX
    iset = 7;                                              // 7: AVX supported
    cpuid(abcd, 7);                                        // call cpuid leaf 7 for feature flags
    if ((abcd[1] & (1 <<  5)) == 0) return iset;           // no AVX2
    iset = 8;
    if ((abcd[1] & (1 << 16)) == 0) return iset;           // no AVX512
    cpuid(abcd, 0xD);                                      // call cpuid leaf 0xD for feature flags
    if ((abcd[0] & 0x60) != 0x60)   return iset;           // no AVX512
    iset = 9;
    cpuid(abcd, 7);                                        // call cpuid leaf 7 for feature flags
    if ((abcd[1] & (1 << 31)) == 0) return iset;           // no AVX512VL
    if ((abcd[1] & 0x40020000) != 0x40020000) return iset; // no AVX512BW, AVX512DQ
    iset = 10;
    return iset;
}

// detect if CPU supports the FMA3 instruction set
static inline bool hasFMA3(void) {
    if (instrset_detect() < 7) return false;               // must have AVX
    int abcd[4];                                           // cpuid results
    cpuid(abcd, 1);                                        // call cpuid function 1
    return ((abcd[2] & (1 << 12)) != 0);                   // ecx bit 12 indicates FMA3
}

// detect if CPU supports the FMA4 instruction set
static inline bool hasFMA4(void) {
    if (instrset_detect() < 7) return false;               // must have AVX
    int abcd[4];                                           // cpuid results
    cpuid(abcd, 0x80000001);                               // call cpuid function 0x80000001
    return ((abcd[2] & (1 << 16)) != 0);                   // ecx bit 16 indicates FMA4
}

// detect if CPU supports the XOP instruction set
static inline bool hasXOP(void) {
    if (instrset_detect() < 7) return false;               // must have AVX
    int abcd[4];                                           // cpuid results
    cpuid(abcd, 0x80000001);                               // call cpuid function 0x80000001
    return ((abcd[2] & (1 << 11)) != 0);                   // ecx bit 11 indicates XOP
}

// detect if CPU supports the AVX512ER instruction set
static inline bool hasAVX512ER(void) {
    if (instrset_detect() < 9) return false;               // must have AVX512F
    int abcd[4];                                           // cpuid results
    cpuid(abcd, 7);                                        // call cpuid function 7
    return ((abcd[1] & (1 << 27)) != 0);                   // ebx bit 27 indicates AVX512ER
}

// detect if CPU supports the AVX512VBMI instruction set
static inline bool hasAVX512VBMI(void) {
    if (instrset_detect() < 10) return false;              // must have AVX512BW
    int abcd[4];                                           // cpuid results
    cpuid(abcd, 7);                                        // call cpuid function 7
    return ((abcd[2] & (1 << 1)) != 0);                    // ecx bit 1 indicates AVX512VBMI
}

// detect if CPU supports the AVX512VBMI2 instruction set
static inline bool hasAVX512VBMI2(void) {
    if (instrset_detect() < 10) return false;              // must have AVX512BW
    int abcd[4];                                           // cpuid results
    cpuid(abcd, 7);                                        // call cpuid function 7
    return ((abcd[2] & (1 << 6)) != 0);                    // ecx bit 6 indicates AVX512VBMI2
}

// detect if CPU supports the F16C instruction set
static inline bool hasF16C(void) {
    if (instrset_detect() < 7) return false;               // must have AVX
    int abcd[4];                                           // cpuid results
    cpuid(abcd, 1);                                        // call cpuid function 1
    return ((abcd[2] & (1 << 29)) != 0);                   // ecx bit 29 indicates F16C
}

// detect if CPU supports the AVX512_FP16 instruction set
static inline bool hasAVX512FP16(void) {
    if (instrset_detect() < 10) return false;              // must have AVX512
    int abcd[4];                                           // cpuid results
    cpuid(abcd, 7);                                        // call cpuid function 1
    return ((abcd[3] & (1 << 23)) != 0);                   // edx bit 23 indicates AVX512_FP16
}

#endif
#endif