Add a fast path for ARGB888->(A)BGR888 blending with pixel alpha
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c
index bce6957..fbd5ead 100644
--- a/src/video/SDL_blit_A.c
+++ b/src/video/SDL_blit_A.c
@@ -575,6 +575,61 @@ BlitRGBtoRGBPixelAlpha(SDL_BlitInfo * info)
}
}
+/* fast ARGB888->(A)BGR888 blending with pixel alpha */
+static void
+BlitRGBtoBGRPixelAlpha(SDL_BlitInfo * info)
+{
+ int width = info->dst_w;
+ int height = info->dst_h;
+ Uint32 *srcp = (Uint32 *) info->src;
+ int srcskip = info->src_skip >> 2;
+ Uint32 *dstp = (Uint32 *) info->dst;
+ int dstskip = info->dst_skip >> 2;
+
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP4({
+ Uint32 dalpha;
+ Uint32 d;
+ Uint32 s1;
+ Uint32 d1;
+ Uint32 s = *srcp;
+ Uint32 alpha = s >> 24;
+ /* FIXME: Here we special-case opaque alpha since the
+ compositioning used (>>8 instead of /255) doesn't handle
+ it correctly. Also special-case alpha=0 for speed?
+ Benchmark this! */
+ if (alpha) {
+ /*
+ * take out the middle component (green), and process
+ * the other two in parallel. One multiply less.
+ */
+ s1 = s & 0xff00ff;
+ s1 = (s1 >> 16) | (s1 << 16);
+ s &= 0xff00;
+
+ if (alpha == SDL_ALPHA_OPAQUE) {
+ *dstp = 0xff000000 | s | s1;
+ } else {
+ d = *dstp;
+ dalpha = d >> 24;
+ d1 = d & 0xff00ff;
+ d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;
+ d &= 0xff00;
+ d = (d + ((s - d) * alpha >> 8)) & 0xff00;
+ dalpha = alpha + (dalpha * (alpha ^ 0xFF) >> 8);
+ *dstp = d1 | d | (dalpha << 24);
+ }
+ }
+ ++srcp;
+ ++dstp;
+ }, width);
+ /* *INDENT-ON* */
+ srcp += srcskip;
+ dstp += dstskip;
+ }
+}
+
#ifdef __3dNOW__
/* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
static void
@@ -1407,6 +1462,12 @@ SDL_CalculateBlitA(SDL_Surface * surface)
#endif
return BlitRGBtoRGBPixelAlpha;
}
+ } else if (sf->Rmask == df->Bmask
+ && sf->Gmask == df->Gmask
+ && sf->Bmask == df->Rmask && sf->BytesPerPixel == 4) {
+ if (sf->Amask == 0xff000000) {
+ return BlitRGBtoBGRPixelAlpha;
+ }
}
return BlitNtoNPixelAlpha;