1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
|
From 6935b19dad5f28a9720e297a2f63ae349b77781b Mon Sep 17 00:00:00 2001
From: "A. Maitland Bottoms" <bottoms@debian.org>
Date: Sat, 15 Jul 2023 14:03:08 -0400
Subject: [PATCH] lpcnet check c compiler flag
Improve portability by having CMake check compiler flags before
using them.
---
CMakeLists.txt | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
--- a/LPCNet/CMakeLists.txt
+++ b/LPCNet/CMakeLists.txt
@@ -145,23 +145,35 @@
set(NEON_PRESENT TRUE)
endif()
+# Let CMake check for compiler flag support
+include(CheckCCompilerFlag)
+check_c_compiler_flag("-mavx2" HAS_MAVX2_FLAG)
+check_c_compiler_flag("-mfma" HAS_MFMA_FLAG)
+check_c_compiler_flag("-mavx" HAS_MAVX_FLAG)
+check_c_compiler_flag("-msse4.1" HAS_MSSE_FLAG)
+check_c_compiler_flag("-mfpu=neon" HAS_FPU_NEON_FLAG)
+check_c_compiler_flag("-march=armv8-a" HAS_MARCH_ARMV8_FLAG)
+check_c_compiler_flag("-mtune=cortex-a53" HAS_MTUNE_A53_FLAG)
+
set(LPCNET_C_PROC_FLAGS "")
-if(${AVX2} AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0))
+if(${AVX2} AND (${HAS_MAVX2_FLAG}) AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0))
message(STATUS "avx2 processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -mavx2 -mfma)
-elseif(${AVX} AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0))
+elseif(${AVX} AND (${HAS_MAVX_FLAG}) AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0))
# AVX2 machines will also match on AVX
message(STATUS "avx processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -mavx)
-elseif(${SSE} AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0))
+elseif(${SSE} AND (${HAS_MSSE_FLAG}) AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0))
# AVX and AVX2 machines will also match on SSE
message(STATUS "sse processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -msse4.1)
elseif(${NEON} AND (${NEON_PRESENT} OR ${NEON_PRESENT} GREATER 0))
# RPi / ARM 32bit
- message(STATUS "neon processor flags found or enabled.")
- set(LPCNET_C_PROC_FLAGS -mfpu=neon -march=armv8-a -mtune=cortex-a53)
+ if((${HAS_FPU_NEON_FLAG}) AND (${HAS_MARCH_ARMV8_FLAG}) AND (${HAS_MTUNE_A53_FLAG}))
+ message(STATUS "neon processor flags found or enabled.")
+ set(LPCNET_C_PROC_FLAGS -mfpu=neon -march=armv8-a -mtune=cortex-a53)
+ endif()
endif()
if(LPCNET_191005_DIR)
--- a/LPCNet/src/test_vec.c
+++ b/LPCNet/src/test_vec.c
@@ -58,6 +58,8 @@
#define COLS 2
#define ENTRIES 2
+#if defined(__AVX__) || defined(__SSE__) || defined (__ARM_NEON__) || defined(__aarch64__)
+// compare fast and unoptimized results
int test_sgemv_accum16() {
float weights[ROWS*COLS];
float x[COLS];
@@ -135,5 +137,9 @@
int test2 = test_sparse_sgemv_accum16();
return test1 || test2;
}
-
-
+#else
+int main() {
+ printf("no fast vector routines to test.\n");
+ return 0;
+}
+#endif
|