metal: avoid an extra buffer allocation and GPU data copy in RunCommandQueue, it's not needed. Improves overall performance.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
diff --git a/src/render/metal/SDL_render_metal.m b/src/render/metal/SDL_render_metal.m
index 15ba1d2..58c7bb6 100644
--- a/src/render/metal/SDL_render_metal.m
+++ b/src/render/metal/SDL_render_metal.m
@@ -1073,24 +1073,19 @@ METAL_RunCommandQueue(SDL_Renderer * renderer, SDL_RenderCommand *cmd, void *ver
// !!! FIXME: have a ring of pre-made MTLBuffers we cycle through? How expensive is creation?
if (vertsize > 0) {
- id<MTLBuffer> mtlbufvertexstaging = [data.mtldevice newBufferWithLength:vertsize options:MTLResourceStorageModeShared];
- #if !__has_feature(objc_arc)
- [mtlbufvertexstaging autorelease];
- #endif
- mtlbufvertexstaging.label = @"SDL vertex staging data";
- SDL_memcpy([mtlbufvertexstaging contents], vertices, vertsize);
-
- // Move our new vertex buffer from system RAM to GPU memory so any draw calls can use it.
- mtlbufvertex = [data.mtldevice newBufferWithLength:vertsize options:MTLResourceStorageModePrivate];
+ /* We can memcpy to a shared buffer from the CPU and read it from the GPU
+ * without any extra copying. It's a bit slower on macOS to read shared
+ * data from the GPU than to read managed/private data, but we avoid the
+ * cost of copying the data and the code's simpler. Apple's best
+ * practices guide recommends this approach for streamed vertex data.
+ * TODO: this buffer is also used for constants. Is performance still
+ * good for those, or should we have a managed buffer for them? */
+ mtlbufvertex = [data.mtldevice newBufferWithLength:vertsize options:MTLResourceStorageModeShared];
#if !__has_feature(objc_arc)
[mtlbufvertex autorelease];
#endif
mtlbufvertex.label = @"SDL vertex data";
- id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer];
- id<MTLBlitCommandEncoder> blitcmd = [cmdbuffer blitCommandEncoder];
- [blitcmd copyFromBuffer:mtlbufvertexstaging sourceOffset:0 toBuffer:mtlbufvertex destinationOffset:0 size:vertsize];
- [blitcmd endEncoding];
- [cmdbuffer commit];
+ SDL_memcpy([mtlbufvertex contents], vertices, vertsize);
}
// If there's a command buffer here unexpectedly (app requested one?). Commit it so we can start fresh.