Commit 4df859c58609f7814027e8ccd52d40eb20b4d565

Ryan C. Gordon 2018-05-21T11:35:42

cpuinfo: Added SDL_HasAVX512F(). This checks for the "foundation" AVX-512 instructions (that all AVX-512 compatible CPUs support).

diff --git a/include/SDL_cpuinfo.h b/include/SDL_cpuinfo.h
index 270ee52..acdb1a8 100644
--- a/include/SDL_cpuinfo.h
+++ b/include/SDL_cpuinfo.h
@@ -160,6 +160,11 @@ extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX(void);
 extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX2(void);
 
 /**
+ *  This function returns true if the CPU has AVX-512F (foundation) features.
+ */
+extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX512F(void);
+
+/**
  *  This function returns true if the CPU has NEON (ARM SIMD) features.
  */
 extern DECLSPEC SDL_bool SDLCALL SDL_HasNEON(void);
diff --git a/src/cpuinfo/SDL_cpuinfo.c b/src/cpuinfo/SDL_cpuinfo.c
index 4e0a7e3..1decb2d 100644
--- a/src/cpuinfo/SDL_cpuinfo.c
+++ b/src/cpuinfo/SDL_cpuinfo.c
@@ -89,6 +89,7 @@
 #define CPU_HAS_AVX     (1 << 9)
 #define CPU_HAS_AVX2    (1 << 10)
 #define CPU_HAS_NEON    (1 << 11)
+#define CPU_HAS_AVX512F (1 << 12)
 
 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
 /* This is the brute force way of detecting instruction sets...
@@ -247,6 +248,7 @@ done:
 static int CPU_CPUIDFeatures[4];
 static int CPU_CPUIDMaxFunction = 0;
 static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
+static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
 
 static void
 CPU_calcCPUIDFeatures(void)
@@ -267,7 +269,7 @@ CPU_calcCPUIDFeatures(void)
 
                 /* Check to make sure we can call xgetbv */
                 if (c & 0x08000000) {
-                    /* Call xgetbv to see if YMM register state is saved */
+                    /* Call xgetbv to see if YMM (etc) register state is saved */
 #if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
                     __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
@@ -281,6 +283,7 @@ CPU_calcCPUIDFeatures(void)
                     }
 #endif
                     CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
+                    CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
                 }
             }
         }
@@ -401,6 +404,18 @@ CPU_haveAVX2(void)
     return 0;
 }
 
+static int
+CPU_haveAVX512F(void)
+{
+    if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
+        int a, b, c, d;
+        (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
+        cpuid(7, a, b, c, d);
+        return (b & 0x00010000);
+    }
+    return 0;
+}
+
 static int SDL_CPUCount = 0;
 
 int
@@ -624,6 +639,10 @@ SDL_GetCPUFeatures(void)
             SDL_CPUFeatures |= CPU_HAS_AVX2;
             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
         }
+        if (CPU_haveAVX512F()) {
+            SDL_CPUFeatures |= CPU_HAS_AVX512F;
+            SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
+        }
         if (CPU_haveNEON()) {
             SDL_CPUFeatures |= CPU_HAS_NEON;
             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
@@ -700,6 +719,12 @@ SDL_HasAVX2(void)
 }
 
 SDL_bool
+SDL_HasAVX512F(void)
+{
+    return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
+}
+
+SDL_bool
 SDL_HasNEON(void)
 {
     return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
@@ -819,6 +844,7 @@ main()
     printf("SSE4.2: %d\n", SDL_HasSSE42());
     printf("AVX: %d\n", SDL_HasAVX());
     printf("AVX2: %d\n", SDL_HasAVX2());
+    printf("AVX-512F: %d\n", SDL_HasAVX512F());
     printf("NEON: %d\n", SDL_HasNEON());
     printf("RAM: %d MB\n", SDL_GetSystemRAM());
     return 0;
diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h
index b454e5a..62de268 100644
--- a/src/dynapi/SDL_dynapi_overrides.h
+++ b/src/dynapi/SDL_dynapi_overrides.h
@@ -671,3 +671,4 @@
 #define SDL_log10f SDL_log10f_REAL
 #define SDL_GameControllerMappingForDeviceIndex SDL_GameControllerMappingForDeviceIndex_REAL
 #define SDL_LinuxSetThreadPriority SDL_LinuxSetThreadPriority_REAL
+#define SDL_HasAVX512F SDL_HasAVX512F_REAL
diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h
index e4e6907..895b768 100644
--- a/src/dynapi/SDL_dynapi_procs.h
+++ b/src/dynapi/SDL_dynapi_procs.h
@@ -711,3 +711,4 @@ SDL_DYNAPI_PROC(char*,SDL_GameControllerMappingForDeviceIndex,(int a),(a),return
 #ifdef __LINUX__
 SDL_DYNAPI_PROC(int,SDL_LinuxSetThreadPriority,(Sint64 a, int b),(a,b),return)
 #endif
+SDL_DYNAPI_PROC(SDL_bool,SDL_HasAVX512F,(void),(),return)
diff --git a/test/testplatform.c b/test/testplatform.c
index 84efab3..1c1d2dc 100644
--- a/test/testplatform.c
+++ b/test/testplatform.c
@@ -380,6 +380,7 @@ TestCPUInfo(SDL_bool verbose)
         SDL_Log("SSE4.2 %s\n", SDL_HasSSE42()? "detected" : "not detected");
         SDL_Log("AVX %s\n", SDL_HasAVX()? "detected" : "not detected");
         SDL_Log("AVX2 %s\n", SDL_HasAVX2()? "detected" : "not detected");
+        SDL_Log("AVX-512F %s\n", SDL_HasAVX512F()? "detected" : "not detected");
         SDL_Log("NEON %s\n", SDL_HasNEON()? "detected" : "not detected");
         SDL_Log("System RAM %d MB\n", SDL_GetSystemRAM());
     }