Commit eb1c6044b2d5b657f9fbffff720c218db35f439f

Sam Lantinga 2014-07-11T22:02:50

Fixed bug in AVX detection and added AVX2 detection

diff --git a/include/SDL_cpuinfo.h b/include/SDL_cpuinfo.h
index 1f6efd3..5b2c7a4 100644
--- a/include/SDL_cpuinfo.h
+++ b/include/SDL_cpuinfo.h
@@ -140,6 +140,11 @@ extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE42(void);
 extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX(void);
 
 /**
+ *  This function returns true if the CPU has AVX2 features.
+ */
+extern DECLSPEC SDL_bool SDLCALL SDL_HasAVX2(void);
+
+/**
  *  This function returns the amount of RAM configured in the system, in MB.
  */
 extern DECLSPEC int SDLCALL SDL_GetSystemRAM(void);
diff --git a/src/cpuinfo/SDL_cpuinfo.c b/src/cpuinfo/SDL_cpuinfo.c
index ae93a2f..94a6826 100644
--- a/src/cpuinfo/SDL_cpuinfo.c
+++ b/src/cpuinfo/SDL_cpuinfo.c
@@ -60,6 +60,7 @@
 #define CPU_HAS_SSE41   0x00000100
 #define CPU_HAS_SSE42   0x00000200
 #define CPU_HAS_AVX     0x00000400
+#define CPU_HAS_AVX2    0x00000800
 
 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
 /* This is the brute force way of detecting instruction sets...
@@ -73,7 +74,7 @@ illegal_instruction(int sig)
 }
 #endif /* HAVE_SETJMP */
 
-static SDL_INLINE int
+static int
 CPU_haveCPUID(void)
 {
     int has_CPUID = 0;
@@ -172,6 +173,7 @@ done:
 #define cpuid(func, a, b, c, d) \
     __asm__ __volatile__ ( \
 "        pushl %%ebx        \n" \
+"        xorl %%ecx,%%ecx   \n" \
 "        cpuid              \n" \
 "        movl %%ebx, %%esi  \n" \
 "        popl %%ebx         \n" : \
@@ -180,6 +182,7 @@ done:
 #define cpuid(func, a, b, c, d) \
     __asm__ __volatile__ ( \
 "        pushq %%rbx        \n" \
+"        xorq %%rcx,%%rcx   \n" \
 "        cpuid              \n" \
 "        movq %%rbx, %%rsi  \n" \
 "        popq %%rbx         \n" : \
@@ -188,6 +191,7 @@ done:
 #define cpuid(func, a, b, c, d) \
     __asm { \
         __asm mov eax, func \
+        __asm xor ecx, ecx \
         __asm cpuid \
         __asm mov a, eax \
         __asm mov b, ebx \
@@ -209,7 +213,7 @@ done:
     a = b = c = d = 0
 #endif
 
-static SDL_INLINE int
+static int
 CPU_getCPUIDFeatures(void)
 {
     int features = 0;
@@ -223,7 +227,41 @@ CPU_getCPUIDFeatures(void)
     return features;
 }
 
-static SDL_INLINE int
+static SDL_bool
+CPU_OSSavesYMM(void)
+{
+    int a, b, c, d;
+
+    /* Check to make sure we can call xgetbv */
+    cpuid(0, a, b, c, d);
+    if (a < 1) {
+        return SDL_FALSE;
+    }
+    cpuid(1, a, b, c, d);
+    if (!(c & 0x08000000)) {
+        return SDL_FALSE;
+    }
+
+    /* Call xgetbv to see if YMM register state is saved */
+    a = 0;
+#if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
+    asm(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
+#elif defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
+    a = (int)_xgetbv(0);
+#elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
+    __asm
+    {
+        xor ecx, ecx
+        _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
+        mov a, xcr0
+    }
+#else
+#error Need xgetbv implementation!
+#endif
+    return ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
+}
+
+static int
 CPU_haveRDTSC(void)
 {
     if (CPU_haveCPUID()) {
@@ -232,7 +270,7 @@ CPU_haveRDTSC(void)
     return 0;
 }
 
-static SDL_INLINE int
+static int
 CPU_haveAltiVec(void)
 {
     volatile int altivec = 0;
@@ -259,7 +297,7 @@ CPU_haveAltiVec(void)
     return altivec;
 }
 
-static SDL_INLINE int
+static int
 CPU_haveMMX(void)
 {
     if (CPU_haveCPUID()) {
@@ -268,7 +306,7 @@ CPU_haveMMX(void)
     return 0;
 }
 
-static SDL_INLINE int
+static int
 CPU_have3DNow(void)
 {
     if (CPU_haveCPUID()) {
@@ -283,7 +321,7 @@ CPU_have3DNow(void)
     return 0;
 }
 
-static SDL_INLINE int
+static int
 CPU_haveSSE(void)
 {
     if (CPU_haveCPUID()) {
@@ -292,7 +330,7 @@ CPU_haveSSE(void)
     return 0;
 }
 
-static SDL_INLINE int
+static int
 CPU_haveSSE2(void)
 {
     if (CPU_haveCPUID()) {
@@ -301,7 +339,7 @@ CPU_haveSSE2(void)
     return 0;
 }
 
-static SDL_INLINE int
+static int
 CPU_haveSSE3(void)
 {
     if (CPU_haveCPUID()) {
@@ -316,13 +354,13 @@ CPU_haveSSE3(void)
     return 0;
 }
 
-static SDL_INLINE int
+static int
 CPU_haveSSE41(void)
 {
     if (CPU_haveCPUID()) {
         int a, b, c, d;
 
-        cpuid(1, a, b, c, d);
+        cpuid(0, a, b, c, d);
         if (a >= 1) {
             cpuid(1, a, b, c, d);
             return (c & 0x00080000);
@@ -331,13 +369,13 @@ CPU_haveSSE41(void)
     return 0;
 }
 
-static SDL_INLINE int
+static int
 CPU_haveSSE42(void)
 {
     if (CPU_haveCPUID()) {
         int a, b, c, d;
 
-        cpuid(1, a, b, c, d);
+        cpuid(0, a, b, c, d);
         if (a >= 1) {
             cpuid(1, a, b, c, d);
             return (c & 0x00100000);
@@ -346,13 +384,13 @@ CPU_haveSSE42(void)
     return 0;
 }
 
-static SDL_INLINE int
+static int
 CPU_haveAVX(void)
 {
-    if (CPU_haveCPUID()) {
+    if (CPU_haveCPUID() && CPU_OSSavesYMM()) {
         int a, b, c, d;
 
-        cpuid(1, a, b, c, d);
+        cpuid(0, a, b, c, d);
         if (a >= 1) {
             cpuid(1, a, b, c, d);
             return (c & 0x10000000);
@@ -361,6 +399,21 @@ CPU_haveAVX(void)
     return 0;
 }
 
+static int
+CPU_haveAVX2(void)
+{
+    if (CPU_haveCPUID() && CPU_OSSavesYMM()) {
+        int a, b, c, d;
+
+        cpuid(0, a, b, c, d);
+        if (a >= 7) {
+            cpuid(7, a, b, c, d);
+            return (b & 0x00000020);
+        }
+    }
+    return 0;
+}
+
 static int SDL_CPUCount = 0;
 
 int
@@ -560,6 +613,9 @@ SDL_GetCPUFeatures(void)
         if (CPU_haveAVX()) {
             SDL_CPUFeatures |= CPU_HAS_AVX;
         }
+        if (CPU_haveAVX2()) {
+            SDL_CPUFeatures |= CPU_HAS_AVX2;
+        }
     }
     return SDL_CPUFeatures;
 }
@@ -654,6 +710,15 @@ SDL_HasAVX(void)
     return SDL_FALSE;
 }
 
+SDL_bool
+SDL_HasAVX2(void)
+{
+    if (SDL_GetCPUFeatures() & CPU_HAS_AVX2) {
+        return SDL_TRUE;
+    }
+    return SDL_FALSE;
+}
+
 static int SDL_SystemRAM = 0;
 
 int
@@ -720,6 +785,7 @@ main()
     printf("SSE4.1: %d\n", SDL_HasSSE41());
     printf("SSE4.2: %d\n", SDL_HasSSE42());
     printf("AVX: %d\n", SDL_HasAVX());
+    printf("AVX2: %d\n", SDL_HasAVX2());
     printf("RAM: %d MB\n", SDL_GetSystemRAM());
     return 0;
 }
diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h
index 9aac2c9..79d6e8b 100644
--- a/src/dynapi/SDL_dynapi_overrides.h
+++ b/src/dynapi/SDL_dynapi_overrides.h
@@ -587,3 +587,4 @@
 #define SDL_CaptureMouse SDL_CaptureMouse_REAL
 #define SDL_SetWindowHitTest SDL_SetWindowHitTest_REAL
 #define SDL_GetGlobalMouseState SDL_GetGlobalMouseState_REAL
+#define SDL_HasAVX2 SDL_HasAVX2_REAL
diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h
index bdb3cc4..0112769 100644
--- a/src/dynapi/SDL_dynapi_procs.h
+++ b/src/dynapi/SDL_dynapi_procs.h
@@ -619,3 +619,4 @@ SDL_DYNAPI_PROC(float,SDL_tanf,(float a),(a),return)
 SDL_DYNAPI_PROC(int,SDL_CaptureMouse,(SDL_bool a),(a),return)
 SDL_DYNAPI_PROC(int,SDL_SetWindowHitTest,(SDL_Window *a, SDL_HitTest b, void *c),(a,b,c),return)
 SDL_DYNAPI_PROC(Uint32,SDL_GetGlobalMouseState,(int *a, int *b),(a,b),return)
+SDL_DYNAPI_PROC(SDL_bool,SDL_HasAVX2,(void),(),return)