Faster blit when using CopyAlpha + ColorKey Applied to following formats: ABGR8888 -> ARGB8888 : faster x7 (3959672 -> 537227) ABGR8888 -> BGRA8888 : faster x7 (4008716 -> 532064) ABGR8888 -> RGBA8888 : faster x7 (3998576 -> 530964) ARGB8888 -> ABGR8888 : faster x7 (3942420 -> 532503) ARGB8888 -> BGRA8888 : faster x7 (3995382 -> 527722) ARGB8888 -> RGBA8888 : faster x7 (4259330 -> 543033) BGRA8888 -> ABGR8888 : faster x7 (4110411 -> 529402) BGRA8888 -> ARGB8888 : faster x7 (4071906 -> 538393) BGRA8888 -> RGBA8888 : faster x6 (4038320 -> 585141) RGBA8888 -> ABGR8888 : faster x7 (3937018 -> 534127) RGBA8888 -> ARGB8888 : faster x7 (3979577 -> 537810) RGBA8888 -> BGRA8888 : faster x7 (3975656 -> 528355)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c
index 55b83e2..7df4676 100644
--- a/src/video/SDL_blit_N.c
+++ b/src/video/SDL_blit_N.c
@@ -2339,7 +2339,7 @@ BlitNtoNKey(SDL_BlitInfo * info)
if (srcbpp == 4 && dstbpp == 4 && srcfmt->Rmask == dstfmt->Rmask && srcfmt->Gmask == dstfmt->Gmask && srcfmt->Bmask == dstfmt->Bmask) {
Uint32 *src32 = (Uint32*)src;
Uint32 *dst32 = (Uint32*)dst;
-
+
if (dstfmt->Amask) {
/* RGB->RGBA, SET_ALPHA */
Uint32 mask = info->a << dstfmt->Ashift;
@@ -2441,8 +2441,8 @@ BlitNtoNKey(SDL_BlitInfo * info)
dst += dstskip;
}
return;
- }
-
+ }
+
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
@@ -2519,6 +2519,46 @@ BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
return;
}
+ /* Any src/dst 8888 for CopyAlpha, no ARGB2101010 */
+ if (srcbpp == 4 && dstbpp == 4 &&
+ srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
+ dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
+
+ Uint32 *src32 = (Uint32*)src;
+ Uint32 *dst32 = (Uint32*)dst;
+
+ /* Find the appropriate permutation */
+ int r, g, b, a;
+ Pixel = 0x03020100;
+ RGBA_FROM_PIXEL(Pixel, srcfmt, r, g, b, a);
+ PIXEL_FROM_RGBA(Pixel, dstfmt, r, g, b, a);
+ r = Pixel & 0xFF;
+ g = (Pixel >> 8) & 0xFF;
+ b = (Pixel >> 16) & 0xFF;
+ a = (Pixel >> 24) & 0xFF;
+
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ if ((*src32 & rgbmask) != ckey) {
+ Uint8 *s8 = src32;
+ Uint8 *d8 = dst32;
+ d8[0] = s8[r];
+ d8[1] = s8[g];
+ d8[2] = s8[b];
+ d8[3] = s8[a];
+ }
+ ++src32;
+ ++dst32;
+ }, width);
+ /* *INDENT-ON* */
+ src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
+ dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
+ }
+ return;
+ }
+
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(