cpuinfo: Added SDL_HasAVX512F(). This checks for the "foundation" AVX-512 instructions (that all AVX-512 compatible CPUs support).
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
diff --git a/include/SDL_cpuinfo.h b/include/SDL_cpuinfo.h
index 270ee52..acdb1a8 100644
--- a/include/SDL_cpuinfo.h
+++ b/include/SDL_cpuinfo.h
@@ -160,6 +160,11 @@ extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX(void);
extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX2(void);
/**
+ * This function returns true if the CPU has AVX-512F (foundation) features.
+ */
+extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX512F(void);
+
+/**
* This function returns true if the CPU has NEON (ARM SIMD) features.
*/
extern DECLSPEC SDL_bool SDLCALL SDL_HasNEON(void);
diff --git a/src/cpuinfo/SDL_cpuinfo.c b/src/cpuinfo/SDL_cpuinfo.c
index 4e0a7e3..1decb2d 100644
--- a/src/cpuinfo/SDL_cpuinfo.c
+++ b/src/cpuinfo/SDL_cpuinfo.c
@@ -89,6 +89,7 @@
#define CPU_HAS_AVX (1 << 9)
#define CPU_HAS_AVX2 (1 << 10)
#define CPU_HAS_NEON (1 << 11)
+#define CPU_HAS_AVX512F (1 << 12)
#if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
/* This is the brute force way of detecting instruction sets...
@@ -247,6 +248,7 @@ done:
static int CPU_CPUIDFeatures[4];
static int CPU_CPUIDMaxFunction = 0;
static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
+static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
static void
CPU_calcCPUIDFeatures(void)
@@ -267,7 +269,7 @@ CPU_calcCPUIDFeatures(void)
/* Check to make sure we can call xgetbv */
if (c & 0x08000000) {
- /* Call xgetbv to see if YMM register state is saved */
+ /* Call xgetbv to see if YMM (etc) register state is saved */
#if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
__asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
@@ -281,6 +283,7 @@ CPU_calcCPUIDFeatures(void)
}
#endif
CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
+ CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
}
}
}
@@ -401,6 +404,18 @@ CPU_haveAVX2(void)
return 0;
}
+static int
+CPU_haveAVX512F(void)
+{
+ if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
+ int a, b, c, d;
+ (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */
+ cpuid(7, a, b, c, d);
+ return (b & 0x00010000);
+ }
+ return 0;
+}
+
static int SDL_CPUCount = 0;
int
@@ -624,6 +639,10 @@ SDL_GetCPUFeatures(void)
SDL_CPUFeatures |= CPU_HAS_AVX2;
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
}
+ if (CPU_haveAVX512F()) {
+ SDL_CPUFeatures |= CPU_HAS_AVX512F;
+ SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
+ }
if (CPU_haveNEON()) {
SDL_CPUFeatures |= CPU_HAS_NEON;
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
@@ -700,6 +719,12 @@ SDL_HasAVX2(void)
}
SDL_bool
+SDL_HasAVX512F(void)
+{
+ return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
+}
+
+SDL_bool
SDL_HasNEON(void)
{
return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
@@ -819,6 +844,7 @@ main()
printf("SSE4.2: %d\n", SDL_HasSSE42());
printf("AVX: %d\n", SDL_HasAVX());
printf("AVX2: %d\n", SDL_HasAVX2());
+ printf("AVX-512F: %d\n", SDL_HasAVX512F());
printf("NEON: %d\n", SDL_HasNEON());
printf("RAM: %d MB\n", SDL_GetSystemRAM());
return 0;
diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h
index b454e5a..62de268 100644
--- a/src/dynapi/SDL_dynapi_overrides.h
+++ b/src/dynapi/SDL_dynapi_overrides.h
@@ -671,3 +671,4 @@
#define SDL_log10f SDL_log10f_REAL
#define SDL_GameControllerMappingForDeviceIndex SDL_GameControllerMappingForDeviceIndex_REAL
#define SDL_LinuxSetThreadPriority SDL_LinuxSetThreadPriority_REAL
+#define SDL_HasAVX512F SDL_HasAVX512F_REAL
diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h
index e4e6907..895b768 100644
--- a/src/dynapi/SDL_dynapi_procs.h
+++ b/src/dynapi/SDL_dynapi_procs.h
@@ -711,3 +711,4 @@ SDL_DYNAPI_PROC(char*,SDL_GameControllerMappingForDeviceIndex,(int a),(a),return
#ifdef __LINUX__
SDL_DYNAPI_PROC(int,SDL_LinuxSetThreadPriority,(Sint64 a, int b),(a,b),return)
#endif
+SDL_DYNAPI_PROC(SDL_bool,SDL_HasAVX512F,(void),(),return)
diff --git a/test/testplatform.c b/test/testplatform.c
index 84efab3..1c1d2dc 100644
--- a/test/testplatform.c
+++ b/test/testplatform.c
@@ -380,6 +380,7 @@ TestCPUInfo(SDL_bool verbose)
SDL_Log("SSE4.2 %s\n", SDL_HasSSE42()? "detected" : "not detected");
SDL_Log("AVX %s\n", SDL_HasAVX()? "detected" : "not detected");
SDL_Log("AVX2 %s\n", SDL_HasAVX2()? "detected" : "not detected");
+ SDL_Log("AVX-512F %s\n", SDL_HasAVX512F()? "detected" : "not detected");
SDL_Log("NEON %s\n", SDL_HasNEON()? "detected" : "not detected");
SDL_Log("System RAM %d MB\n", SDL_GetSystemRAM());
}