File: AwsSIMD.cmake

package info (click to toggle)
aws-crt-python 0.24.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 75,932 kB
  • sloc: ansic: 418,984; python: 23,626; makefile: 6,035; sh: 4,075; ruby: 208; java: 82; perl: 73; cpp: 25; xml: 11
file content (141 lines) | stat: -rw-r--r-- 4,594 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0.

include(CheckCCompilerFlag)
include(CheckIncludeFile)

if (MSVC)
    set(AWS_AVX2_FLAG "/arch:AVX2")
    set(AWS_AVX512_FLAG "/arch:AVX512")
    set(AWS_AVX512vL_FLAG "")
    set(AWS_CLMUL_FLAG "")
    set(AWS_SSE4_2_FLAG "")
    set(AWS_ARMv8_1_FLAG "/arch:arm8.1")
    set(WERROR_FLAG "")
else()
    set(AWS_AVX2_FLAG "-mavx -mavx2")
    set(AWS_AVX512_FLAG "-mavx512f -mvpclmulqdq")
    set(AWS_AVX512vL_FLAG "-mavx512vl")
    set(AWS_CLMUL_FLAG "-mpclmul")
    set(AWS_SSE4_2_FLAG "-msse4.2")

    # AWS Graviton3 processors use neoverse-v1
    check_c_compiler_flag("-mtune=neoverse-v1" HAVE_MTUNE_NEOVERSE_V1)
    if (HAVE_MTUNE_NEOVERSE_V1)
        set(AWS_ARMv8_1_FLAG "-march=armv8-a+crc+crypto -mtune=neoverse-v1")
    else()
        set(AWS_ARMv8_1_FLAG "-march=armv8-a+crc+crypto")
    endif()
    set(WERROR_FLAG "-Werror")
endif()

if (USE_CPU_EXTENSIONS)
    set(AVX_CFLAGS ${AWS_SSE4_2_FLAG})

    check_c_compiler_flag(${AWS_AVX2_FLAG} HAVE_M_AVX2_FLAG)
    if (HAVE_M_AVX2_FLAG)
        set(AVX_CFLAGS "${AWS_AVX2_FLAG} ${AVX_CFLAGS}")
    endif()

    check_c_compiler_flag("${AWS_AVX512_FLAG} ${AWS_CLMUL_FLAG}" HAVE_M_AVX512_FLAG)
    if (HAVE_M_AVX512_FLAG)
        set(AVX_CFLAGS "${AWS_AVX512_FLAG} ${AWS_CLMUL_FLAG} ${AVX_CFLAGS}")
    endif()

    set(old_flags "${CMAKE_REQUIRED_FLAGS}")
    set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${AVX_CFLAGS} ${WERROR_FLAG}")

    check_c_source_compiles("
        #include <immintrin.h>
        #include <emmintrin.h>
        #include <string.h>

        int main() {
            __m256i vec;
            memset(&vec, 0, sizeof(vec));

            _mm256_shuffle_epi8(vec, vec);
            _mm256_set_epi32(1,2,3,4,5,6,7,8);
            _mm256_permutevar8x32_epi32(vec, vec);

            return 0;
        }"  AWS_HAVE_AVX2_INTRINSICS)

    check_c_source_compiles("
        #include <immintrin.h>

        int main() {
            __m512 a = _mm512_setzero_ps();
            return 0;
        }" AWS_HAVE_AVX512_INTRINSICS)

    check_c_source_compiles("
        #include <immintrin.h>
        #include <string.h>

        int main() {
            __m256i vec;
            memset(&vec, 0, sizeof(vec));
            return (int)_mm256_extract_epi64(vec, 2);
        }" AWS_HAVE_MM256_EXTRACT_EPI64)

    check_c_source_compiles("
        #include <wmmintrin.h>
        #include <emmintrin.h>
        int main() {
            __m128i a = _mm_setzero_si128();
            __m128i b = _mm_setzero_si128();
            __m128i result = _mm_clmulepi64_si128(a, b, 0x00);
            (void)result;
            return 0;
        }" AWS_HAVE_CLMUL)

    set(CMAKE_REQUIRED_FLAGS "${old_flags} ${AWS_ARMv8_1_FLAG} ${WERROR_FLAG}")
    check_c_source_compiles("
            #include <arm_acle.h>
            int main() {
                int crc = __crc32d(0, 1);
                return 0;
            }" AWS_HAVE_ARM32_CRC)

    check_c_source_compiles("
        #include <stdatomic.h>
        int main() {
            _Atomic int var = 0;
            atomic_fetch_add_explicit(&var, 1, memory_order_relaxed);
            return 0;
    }" AWS_HAVE_ARMv8_1)

    set(CMAKE_REQUIRED_FLAGS "${old_flags}")

endif() # USE_CPU_EXTENSIONS

# The part where the definition is added to the compiler flags has been moved to config.h.in
# see git history for more details.

# Adds AVX flags, if any, that are supported. These files will be built with
# available avx intrinsics enabled.
# Usage: simd_add_source_avx(target file1.c file2.c ...)
function(simd_add_source_avx target)
    foreach(file ${ARGN})
        target_sources(${target} PRIVATE ${file})
        set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS " ${AVX_CFLAGS}")
    endforeach()
endfunction(simd_add_source_avx)

# The part where the definition is added to the compiler flags has been moved to config.h.in
# see git history for more details.

# Adds compiler flags to the source and adds the source to target.
# Unfortunately the flags have to be passed as strings. Predefined flags are
# at the top of this file.
# Usage: simd_append_source_and_features(target file1.c ${AWS_AVX512_FLAG} ${AWS_AVX2_FLAG} ...)
function(simd_append_source_and_features target file)
    set(CC_FLAGS "")
    foreach(flag ${ARGN})
        set(CC_FLAGS "${CC_FLAGS} ${flag}")
    endforeach()

    target_sources(${target} PRIVATE ${file})
    set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS " ${CC_FLAGS}")
endfunction(simd_append_source_and_features)