optimize Emscripten_UpdateWindowFramebuffer - avoid creating contexts and images all the time - use set and then fix alpha directly
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
diff --git a/src/video/emscripten/SDL_emscriptenframebuffer.c b/src/video/emscripten/SDL_emscriptenframebuffer.c
index a26e23a..da03497 100644
--- a/src/video/emscripten/SDL_emscriptenframebuffer.c
+++ b/src/video/emscripten/SDL_emscriptenframebuffer.c
@@ -69,15 +69,25 @@ int Emscripten_UpdateWindowFramebuffer(_THIS, SDL_Window * window, const SDL_Rec
/* Send the data to the display */
EM_ASM_INT({
- //TODO: don't create context every update
- var ctx = Module['canvas'].getContext('2d');
-
- //library_sdl.js SDL_UnlockSurface
- var image = ctx.createImageData($0, $1);
- var data = image.data;
- var src = $2 >> 2;
+ var w = $0;
+ var h = $1;
+ var pixels = $2;
+
+ if (!Module['SDL2']) Module['SDL2'] = {};
+ var SDL2 = Module['SDL2'];
+ if (SDL2.ctxCanvas !== Module['canvas']) {
+ SDL2.ctx = Module['canvas'].getContext('2d');
+ SDL2.ctxCanvas = Module['canvas'];
+ }
+ if (SDL2.w !== w || SDL2.h !== h || SDL2.imageCtx !== SDL2.ctx) {
+ SDL2.image = SDL2.ctx.createImageData(w, h);
+ SDL2.w = w;
+ SDL2.h = h;
+ SDL2.imageCtx = SDL2.ctx;
+ }
+ var data = SDL2.image.data;
+ var src = pixels >> 2;
var dst = 0;
- var isScreen = true;
var num;
if (typeof CanvasPixelArray !== 'undefined' && data instanceof CanvasPixelArray) {
// IE10/IE11: ImageData objects are backed by the deprecated CanvasPixelArray,
@@ -90,26 +100,58 @@ int Emscripten_UpdateWindowFramebuffer(_THIS, SDL_Window * window, const SDL_Rec
data[dst ] = val & 0xff;
data[dst+1] = (val >> 8) & 0xff;
data[dst+2] = (val >> 16) & 0xff;
- data[dst+3] = isScreen ? 0xff : ((val >> 24) & 0xff);
+ data[dst+3] = 0xff;
src++;
dst += 4;
}
} else {
- var data32 = new Uint32Array(data.buffer);
+ if (SDL2.data32Data !== data) {
+ SDL2.data32 = new Int32Array(data.buffer);
+ SDL2.data8 = new Uint8Array(data.buffer);
+ }
+ var data32 = SDL2.data32;
num = data32.length;
- if (isScreen) {
- while (dst < num) {
- // HEAP32[src++] is an optimization. Instead, we could do {{{ makeGetValue('buffer', 'dst', 'i32') }}};
- data32[dst++] = HEAP32[src++] | 0xff000000;
+ // logically we need to do
+ // while (dst < num) {
+ // data32[dst++] = HEAP32[src++] | 0xff000000
+ // }
+ // the following code is faster though, because
+ // .set() is almost free - easily 10x faster due to
+ // native memcpy efficiencies, and the remaining loop
+ // just stores, not load + store, so it is faster
+ data32.set(HEAP32.subarray(src, src + num));
+ var data8 = SDL2.data8;
+ var i = 3;
+ var j = i + 4*num;
+ if (num % 8 == 0) {
+ // unrolling gives big speedups
+ while (i < j) {
+ data8[i] = 0xff;
+ i = i + 4 | 0;
+ data8[i] = 0xff;
+ i = i + 4 | 0;
+ data8[i] = 0xff;
+ i = i + 4 | 0;
+ data8[i] = 0xff;
+ i = i + 4 | 0;
+ data8[i] = 0xff;
+ i = i + 4 | 0;
+ data8[i] = 0xff;
+ i = i + 4 | 0;
+ data8[i] = 0xff;
+ i = i + 4 | 0;
+ data8[i] = 0xff;
+ i = i + 4 | 0;
}
- } else {
- while (dst < num) {
- data32[dst++] = HEAP32[src++];
+ } else {
+ while (i < j) {
+ data8[i] = 0xff;
+ i = i + 4 | 0;
}
}
}
- ctx.putImageData(image, 0, 0);
+ SDL2.ctx.putImageData(SDL2.image, 0, 0);
return 0;
}, surface->w, surface->h, surface->pixels);