Fixed bug 4290 - add fastpaths for format conversion in BlitNtoN All following conversion are faster (no colorkey, no blending). (ratio isn't very accurate) ABGR8888 -> ARGB8888 : faster x6 (2655837 -> 416607) ABGR8888 -> BGR24 : faster x7 (2470117 -> 325693) ABGR8888 -> RGB24 : faster x7 (2478107 -> 335445) ABGR8888 -> RGB888 : faster x9 (3178524 -> 333859) ARGB8888 -> ABGR8888 : faster x6 (2648366 -> 406977) ARGB8888 -> BGR24 : faster x7 (2474978 -> 327819) ARGB8888 -> BGR888 : faster x9 (3189072 -> 326710) ARGB8888 -> RGB24 : faster x7 (2473689 -> 324729) BGR24 -> ABGR8888 : faster x6 (2268763 -> 359946) BGR24 -> ARGB8888 : faster x6 (2306393 -> 359213) BGR24 -> BGR888 : faster x6 (2231141 -> 324195) BGR24 -> RGB24 : faster x4 (1557835 -> 322033) BGR24 -> RGB888 : faster x6 (2229854 -> 323849) BGR888 -> ARGB8888 : faster x8 (3215202 -> 363137) BGR888 -> BGR24 : faster x7 (2474775 -> 347916) BGR888 -> RGB24 : faster x7 (2532783 -> 327354) BGR888 -> RGB888 : faster x9 (3134634 -> 344987) RGB24 -> ABGR8888 : faster x6 (2229486 -> 358919) RGB24 -> ARGB8888 : faster x6 (2271587 -> 358521) RGB24 -> BGR24 : faster x4 (1530913 -> 321149) RGB24 -> BGR888 : faster x6 (2227284 -> 327453) RGB24 -> RGB888 : faster x6 (2227125 -> 329061) RGB888 -> ABGR8888 : faster x8 (3163292 -> 362445) RGB888 -> BGR24 : faster x7 (2469489 -> 327127) RGB888 -> BGR888 : faster x9 (3190526 -> 326022) RGB888 -> RGB24 : faster x7 (2479084 -> 324982)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c
index daa2779..1fe6a6b 100644
--- a/src/video/SDL_blit_N.c
+++ b/src/video/SDL_blit_N.c
@@ -2515,6 +2515,146 @@ BlitNto2101010(SDL_BlitInfo * info)
}
}
+/* Blit_3or4_to_3or4__same_rgb: 3 or 4 bpp, same RGB triplet */
+static void
+Blit_3or4_to_3or4__same_rgb(SDL_BlitInfo * info)
+{
+ int width = info->dst_w;
+ int height = info->dst_h;
+ Uint8 *src = info->src;
+ int srcskip = info->src_skip;
+ Uint8 *dst = info->dst;
+ int dstskip = info->dst_skip;
+ SDL_PixelFormat *srcfmt = info->src_fmt;
+ int srcbpp = srcfmt->BytesPerPixel;
+ SDL_PixelFormat *dstfmt = info->dst_fmt;
+ int dstbpp = dstfmt->BytesPerPixel;
+
+ if (dstfmt->Amask) {
+ /* SET_ALPHA */
+ unsigned alpha = info->a;
+ int alphashift = alpha << 24;
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint32 *dst32 = (Uint32*)dst;
+ unsigned s0 = src[0];
+ unsigned s1 = src[1];
+ unsigned s2 = src[2];
+ *dst32 = (s0) | (s1 << 8) | (s2 << 16) | alphashift;
+ dst += dstbpp;
+ src += srcbpp;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ } else {
+ /* NO_ALPHA */
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint32 *dst32 = (Uint32*)dst;
+ unsigned s0 = src[0];
+ unsigned s1 = src[1];
+ unsigned s2 = src[2];
+ *dst32 = (s0) | (s1 << 8) | (s2 << 16);
+ dst += dstbpp;
+ src += srcbpp;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ }
+ return;
+}
+
+/* Blit_3or4_to_3or4__inversed_rgb: 3 or 4 bpp, inversed RGB triplet */
+static void
+Blit_3or4_to_3or4__inversed_rgb(SDL_BlitInfo * info)
+{
+ int width = info->dst_w;
+ int height = info->dst_h;
+ Uint8 *src = info->src;
+ int srcskip = info->src_skip;
+ Uint8 *dst = info->dst;
+ int dstskip = info->dst_skip;
+ SDL_PixelFormat *srcfmt = info->src_fmt;
+ int srcbpp = srcfmt->BytesPerPixel;
+ SDL_PixelFormat *dstfmt = info->dst_fmt;
+ int dstbpp = dstfmt->BytesPerPixel;
+
+ if (dstfmt->Amask) {
+
+ if (srcfmt->Amask) {
+ /* COPY_ALPHA */
+ /* Only to switch ABGR8888 <-> ARGB8888 */
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint32 *dst32 = (Uint32*)dst;
+ unsigned s0 = src[0];
+ unsigned s1 = src[1];
+ unsigned s2 = src[2];
+ unsigned alphashift = src[3] << 24;
+ /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
+ *dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
+ dst += dstbpp;
+ src += srcbpp;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ } else {
+ /* SET_ALPHA */
+ unsigned alpha = info->a;
+ int alphashift = alpha << 24;
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint32 *dst32 = (Uint32*)dst;
+ unsigned s0 = src[0];
+ unsigned s1 = src[1];
+ unsigned s2 = src[2];
+ /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
+ *dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
+ dst += dstbpp;
+ src += srcbpp;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ }
+ } else {
+ /* NO_ALPHA */
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint32 *dst32 = (Uint32*)dst;
+ unsigned s0 = src[0];
+ unsigned s1 = src[1];
+ unsigned s2 = src[2];
+ /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
+ *dst32 = (s0 << 16) | (s1 << 8) | (s2);
+ dst += dstbpp;
+ src += srcbpp;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+ }
+ return;
+}
+
/* Normal N to N optimized blitters */
#define NO_ALPHA 1
#define SET_ALPHA 2
@@ -2555,6 +2695,23 @@ static const struct blit_table normal_blit_2[] = {
};
static const struct blit_table normal_blit_3[] = {
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+ /* 3->4 with same rgb triplet */
+ {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
+ 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
+ {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
+ 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
+ /* 3->4 with inversed rgb triplet */
+ {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
+ 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
+ {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
+ 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
+ /* 3->3 to switch RGB 24 <-> BGR 24 */
+ {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
+ 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
+ {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
+ 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
+#endif
/* Default for 24-bit RGB source, never optimized */
{0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
};
@@ -2571,6 +2728,24 @@ static const struct blit_table normal_blit_4[] = {
{0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
#endif
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+ /* 4->3 with same rgb triplet */
+ {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
+ 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
+ {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
+ 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
+ /* 4->3 with inversed rgb triplet */
+ {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
+ 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
+ {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
+ 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
+#endif
+ /* 4->4 with inversed rgb triplet, and COPY_ALPHA to switch ABGR8888 <-> ARGB8888 */
+ {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
+ 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA | COPY_ALPHA},
+ {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
+ 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA | COPY_ALPHA},
+ /* RBG 888 and RGB 565 */
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F,
0, Blit_RGB888_RGB565, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F,
@@ -2623,7 +2798,7 @@ SDL_CalculateBlitN(SDL_Surface * surface)
}
} else {
/* Now the meat, choose the blitter we want */
- int a_need = NO_ALPHA;
+ Uint32 a_need = NO_ALPHA;
if (dstfmt->Amask)
a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
table = normal_blit[srcfmt->BytesPerPixel - 1];