Faster blit with CopyAlpha, no ColorKey Applied to following formats: ABGR8888 -> BGRA8888 : faster x3 (2727179 -> 704761) ABGR8888 -> RGBA8888 : faster x3 (2707808 -> 705309) ARGB8888 -> BGRA8888 : faster x3 (2745371 -> 712437) ARGB8888 -> RGBA8888 : faster x3 (2746230 -> 705236) BGRA8888 -> ABGR8888 : faster x3 (2745026 -> 707045) BGRA8888 -> ARGB8888 : faster x3 (2752760 -> 727373) BGRA8888 -> RGBA8888 : faster x3 (2769544 -> 704607) RGBA8888 -> ABGR8888 : faster x3 (2725058 -> 706669) RGBA8888 -> ARGB8888 : faster x3 (2704866 -> 707132) RGBA8888 -> BGRA8888 : faster x3 (2710351 -> 704615)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c
index 4b2d876..5232125 100644
--- a/src/video/SDL_blit_N.c
+++ b/src/video/SDL_blit_N.c
@@ -2151,8 +2151,8 @@ Blit4to4CopyAlpha(SDL_BlitInfo * info)
/* permutation for mapping srcfmt to dstfmt, overloading or not the alpha channel */
static void
-get_permutation(SDL_PixelFormat *srcfmt, SDL_PixelFormat *dstfmt,
- int *_r , int *_g, int *_b, int *_a, int *_missing)
+get_permutation(SDL_PixelFormat *srcfmt, SDL_PixelFormat *dstfmt,
+ int *_r , int *_g, int *_b, int *_a, int *_missing)
{
int missing = 0, r, g, b, a = 0;
int Pixel = 0x04030201; /* identity permutation */
@@ -2162,7 +2162,7 @@ get_permutation(SDL_PixelFormat *srcfmt, SDL_PixelFormat *dstfmt,
} else {
RGB_FROM_PIXEL(Pixel, srcfmt, r, g, b);
}
-
+
if (dstfmt->Amask) {
if (srcfmt->Amask) {
PIXEL_FROM_RGBA(Pixel, dstfmt, r, g, b, a);
@@ -2228,7 +2228,7 @@ BlitNtoN(SDL_BlitInfo * info)
SDL_PixelFormat *dstfmt = info->dst_fmt;
int dstbpp = dstfmt->BytesPerPixel;
unsigned alpha = dstfmt->Amask ? info->a : 0;
-
+
/* Any src/dst 8888, no ARGB2101010 */
if (srcbpp == 4 && dstbpp == 4 &&
srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
@@ -2240,7 +2240,7 @@ BlitNtoN(SDL_BlitInfo * info)
/* Find the appropriate permutation */
int missing = 0, r, g, b, a;
get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, &missing);
-
+
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
@@ -2261,7 +2261,7 @@ BlitNtoN(SDL_BlitInfo * info)
}
return;
}
-
+
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
@@ -2297,6 +2297,38 @@ BlitNtoNCopyAlpha(SDL_BlitInfo * info)
int dstbpp = dstfmt->BytesPerPixel;
int c;
+ /* Any src/dst 8888, no ARGB2101010 */
+ if (srcbpp == 4 && dstbpp == 4 &&
+ srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
+ dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
+
+ Uint32 *src32 = (Uint32*)src;
+ Uint32 *dst32 = (Uint32*)dst;
+
+ /* Find the appropriate permutation */
+ int r, g, b, a;
+ get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, NULL);
+
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint8 *s8 = (Uint8 *)src32;
+ Uint8 *d8 = (Uint8 *)dst32;
+ d8[0] = s8[r];
+ d8[1] = s8[g];
+ d8[2] = s8[b];
+ d8[3] = s8[a];
+ ++src32;
+ ++dst32;
+ }, width);
+ /* *INDENT-ON* */
+ src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
+ dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
+ }
+ return;
+ }
+
while (height--) {
for (c = width; c; --c) {
Uint32 Pixel;
@@ -2484,11 +2516,11 @@ BlitNtoNKey(SDL_BlitInfo * info)
Uint32 *src32 = (Uint32*)src;
Uint32 *dst32 = (Uint32*)dst;
-
+
/* Find the appropriate permutation */
int missing = 0, r, g, b, a;
get_permutation(srcfmt, dstfmt, &r, &g, &b, &a, &missing);
-
+
if (dstfmt->Amask) {
while (height--) {
/* *INDENT-OFF* */
@@ -2510,7 +2542,7 @@ BlitNtoNKey(SDL_BlitInfo * info)
src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
}
-
+
return;
} else {
while (height--) {