Commit 42153342ab9c457ae38277542e44c43317232e66

Sylvain Becker 2019-10-29T16:13:41

Use fill_function for FillRect SIMD/NEON (bug 4365)

diff --git a/src/video/SDL_fillrect.c b/src/video/SDL_fillrect.c
index 5bd43e4..73b4ce2 100644
--- a/src/video/SDL_fillrect.c
+++ b/src/video/SDL_fillrect.c
@@ -253,6 +253,48 @@ SDL_FillRect(SDL_Surface * dst, const SDL_Rect * rect, Uint32 color)
     return SDL_FillRects(dst, rect, 1, color);
 }
 
+#if SDL_ARM_NEON_BLITTERS
+void FillRect8ARMNEONAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
+void FillRect16ARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
+void FillRect32ARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
+
+static void fill_8_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
+    FillRect8ARMNEONAsm(w, h, (uint8_t *) pixels, pitch >> 0, color);
+    return;
+}
+
+static void fill_16_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
+    FillRect16ARMNEONAsm(w, h, (uint16_t *) pixels, pitch >> 1, color);
+    return;
+}
+
+static void fill_32_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
+    FillRect32ARMNEONAsm(w, h, (uint32_t *) pixels, pitch >> 2, color);
+    return;
+}
+#endif
+
+#if SDL_ARM_SIMD_BLITTERS
+void FillRect8ARMSIMDAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
+void FillRect16ARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
+void FillRect32ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
+
+static void fill_8_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
+    FillRect8ARMSIMDAsm(w, h, (uint8_t *) pixels, pitch >> 0, color);
+    return;
+}
+
+static void fill_16_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
+    FillRect16ARMSIMDAsm(w, h, (uint16_t *) pixels, pitch >> 1, color);
+    return;
+}
+
+static void fill_32_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
+    FillRect32ARMSIMDAsm(w, h, (uint32_t *) pixels, pitch >> 2, color);
+    return;
+}
+#endif
+
 int
 SDL_FillRects(SDL_Surface * dst, const SDL_Rect * rects, int count,
               Uint32 color)
@@ -282,100 +324,89 @@ SDL_FillRects(SDL_Surface * dst, const SDL_Rect * rects, int count,
     }
 
 #if SDL_ARM_NEON_BLITTERS
-    if (SDL_HasNEON() && dst->format->BytesPerPixel != 3) {
-        void FillRect8ARMNEONAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
-        void FillRect16ARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
-        void FillRect32ARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
+    if (SDL_HasNEON() && dst->format->BytesPerPixel != 3 && fill_function == NULL) {
         switch (dst->format->BytesPerPixel) {
         case 1:
-            FillRect8ARMNEONAsm(rect->w, rect->h, (uint8_t *) pixels, dst->pitch >> 0, color);
+            fill_function = fill_8_neon;
             break;
         case 2:
-            FillRect16ARMNEONAsm(rect->w, rect->h, (uint16_t *) pixels, dst->pitch >> 1, color);
+            fill_function = fill_16_neon;
             break;
         case 4:
-            FillRect32ARMNEONAsm(rect->w, rect->h, (uint32_t *) pixels, dst->pitch >> 2, color);
+            fill_function = fill_32_neon;
             break;
         }
-
-        SDL_UnlockSurface(dst);
-        return(0);
     }
 #endif
 #if SDL_ARM_SIMD_BLITTERS
-    if (SDL_HasARMSIMD() && dst->format->BytesPerPixel != 3) {
-        void FillRect8ARMSIMDAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
-        void FillRect16ARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
-        void FillRect32ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
+    if (SDL_HasARMSIMD() && dst->format->BytesPerPixel != 3 && fill_function == NULL) {
         switch (dst->format->BytesPerPixel) {
         case 1:
-            FillRect8ARMSIMDAsm(rect->w, rect->h, (uint8_t *) pixels, dst->pitch >> 0, color);
+            fill_function = fill_8_simd;
             break;
         case 2:
-            FillRect16ARMSIMDAsm(rect->w, rect->h, (uint16_t *) pixels, dst->pitch >> 1, color);
+            fill_function = fill_16_simd;
             break;
         case 4:
-            FillRect32ARMSIMDAsm(rect->w, rect->h, (uint32_t *) pixels, dst->pitch >> 2, color);
+            fill_function = fill_32_simd;
             break;
         }
-
-        SDL_UnlockSurface(dst);
-        return(0);
     }
 #endif
 
-    switch (dst->format->BytesPerPixel) {
-    case 1:
-        {
-            color |= (color << 8);
-            color |= (color << 16);
+    if (fill_function == NULL) {
+        switch (dst->format->BytesPerPixel) {
+        case 1:
+            {
+                color |= (color << 8);
+                color |= (color << 16);
 #ifdef __SSE__
-            if (SDL_HasSSE()) {
-                fill_function = SDL_FillRect1SSE;
+                if (SDL_HasSSE()) {
+                    fill_function = SDL_FillRect1SSE;
+                    break;
+                }
+#endif
+                fill_function = SDL_FillRect1;
                 break;
             }
-#endif
-            fill_function = SDL_FillRect1;
-            break;
-        }
 
-    case 2:
-        {
-            color |= (color << 16);
+        case 2:
+            {
+                color |= (color << 16);
 #ifdef __SSE__
-            if (SDL_HasSSE()) {
-                fill_function = SDL_FillRect2SSE;
+                if (SDL_HasSSE()) {
+                    fill_function = SDL_FillRect2SSE;
+                    break;
+                }
+#endif
+                fill_function = SDL_FillRect2;
                 break;
             }
-#endif
-            fill_function = SDL_FillRect2;
-            break;
-        }
 
-    case 3:
-        /* 24-bit RGB is a slow path, at least for now. */
-        {
-            fill_function = SDL_FillRect3;
-            break;
-        }
+        case 3:
+            /* 24-bit RGB is a slow path, at least for now. */
+            {
+                fill_function = SDL_FillRect3;
+                break;
+            }
 
-    case 4:
-        {
+        case 4:
+            {
 #ifdef __SSE__
-            if (SDL_HasSSE()) {
-                fill_function = SDL_FillRect4SSE;
+                if (SDL_HasSSE()) {
+                    fill_function = SDL_FillRect4SSE;
+                    break;
+                }
+#endif
+                fill_function = SDL_FillRect4;
                 break;
             }
-#endif
-            fill_function = SDL_FillRect4;
-            break;
-        }
 
-    default:
-        return SDL_SetError("Unsupported pixel format");
+        default:
+            return SDL_SetError("Unsupported pixel format");
+        }
     }
 
-
     for (i = 0; i < count; ++i) {
         rect = &rects[i];
         /* Perform clipping */