metal: SDL_RenderFillRects uses one draw call per 16k rectangles (within the given FillRects call), instead of one draw call per rectangle. Reduces CPU usage when drawing many rectangles.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
diff --git a/src/render/metal/SDL_render_metal.m b/src/render/metal/SDL_render_metal.m
index 06e4ef6..1e9a2e8 100644
--- a/src/render/metal/SDL_render_metal.m
+++ b/src/render/metal/SDL_render_metal.m
@@ -117,6 +117,7 @@ typedef struct METAL_ShaderPipelines
@property (nonatomic, retain) id<MTLSamplerState> mtlsamplernearest;
@property (nonatomic, retain) id<MTLSamplerState> mtlsamplerlinear;
@property (nonatomic, retain) id<MTLBuffer> mtlbufconstants;
+ @property (nonatomic, retain) id<MTLBuffer> mtlbufquadindices;
@property (nonatomic, retain) CAMetalLayer *mtllayer;
@property (nonatomic, retain) MTLRenderPassDescriptor *mtlpassdesc;
@property (nonatomic, assign) METAL_ShaderPipelines *activepipelines;
@@ -137,6 +138,7 @@ typedef struct METAL_ShaderPipelines
[_mtlsamplernearest release];
[_mtlsamplerlinear release];
[_mtlbufconstants release];
+ [_mtlbufquadindices release];
[_mtllayer release];
[_mtlpassdesc release];
[super dealloc];
@@ -794,7 +796,6 @@ METAL_QueueDrawPoints(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL
static int
METAL_QueueFillRects(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_FRect * rects, int count)
{
- // !!! FIXME: use an index buffer
const size_t vertlen = (sizeof (float) * 8) * count;
float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, 0, &cmd->data.draw.first);
if (!verts) {
@@ -803,6 +804,11 @@ METAL_QueueFillRects(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_
cmd->data.draw.count = count;
+ /* Quads in the following vertex order (matches the quad index buffer):
+ * 1---3
+ * | \ |
+ * 0---2
+ */
for (int i = 0; i < count; i++, rects++) {
if ((rects->w <= 0.0f) || (rects->h <= 0.0f)) {
cmd->data.draw.count--;
@@ -829,9 +835,8 @@ static int
METAL_QueueCopy(SDL_Renderer * renderer, SDL_RenderCommand *cmd, SDL_Texture * texture,
const SDL_Rect * srcrect, const SDL_FRect * dstrect)
{
- METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
- const float texw = (float) texturedata.mtltexture.width;
- const float texh = (float) texturedata.mtltexture.height;
+ const float texw = (float) texture->w;
+ const float texh = (float) texture->h;
// !!! FIXME: use an index buffer
const size_t vertlen = (sizeof (float) * 16);
float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, 0, &cmd->data.draw.first);
@@ -867,9 +872,8 @@ METAL_QueueCopyEx(SDL_Renderer * renderer, SDL_RenderCommand *cmd, SDL_Texture *
const SDL_Rect * srcquad, const SDL_FRect * dstrect,
const double angle, const SDL_FPoint *center, const SDL_RendererFlip flip)
{
- METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
- const float texw = (float) texturedata.mtltexture.width;
- const float texh = (float) texturedata.mtltexture.height;
+ const float texw = (float) texture->w;
+ const float texh = (float) texture->h;
const float rads = (float)(M_PI * (float) angle / 180.0f);
const float c = cosf(rads), s = sinf(rads);
float minu, maxu, minv, maxv;
@@ -1159,10 +1163,19 @@ METAL_RunCommandQueue(SDL_Renderer * renderer, SDL_RenderCommand *cmd, void *ver
case SDL_RENDERCMD_FILL_RECTS: {
const size_t count = cmd->data.draw.count;
- size_t start = 0;
+ const size_t maxcount = UINT16_MAX / 6;
SetDrawState(renderer, cmd, SDL_METAL_FRAGMENT_SOLID, CONSTANTS_OFFSET_IDENTITY, mtlbufvertex, &statecache);
- for (size_t i = 0; i < count; i++, start += 4) { // !!! FIXME: can we do all of these this with a single draw call, using MTLPrimitiveTypeTriangle and an index buffer?
- [data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:start vertexCount:4];
+ /* Our index buffer has 16 bit indices, so we can only draw 65k
+ * vertices (16k rects) at a time. */
+ for (size_t i = 0; i < count; i += maxcount) {
+ /* Set the vertex buffer offset for our current positions.
+ * The vertex buffer itself was bound in SetDrawState. */
+ [data.mtlcmdencoder setVertexBufferOffset:cmd->data.draw.first + i*sizeof(float)*8 atIndex:0];
+ [data.mtlcmdencoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
+ indexCount:SDL_min(maxcount, count - i) * 6
+ indexType:MTLIndexTypeUInt16
+ indexBuffer:data.mtlbufquadindices
+ indexBufferOffset:0];
}
break;
}
@@ -1424,11 +1437,6 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
#if !__has_feature(objc_arc)
[mtlbufconstantstaging autorelease];
#endif
- mtlbufconstantstaging.label = @"SDL constant staging data";
-
- id<MTLBuffer> mtlbufconstants = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModePrivate];
- data.mtlbufconstants = mtlbufconstants;
- data.mtlbufconstants.label = @"SDL constant data";
char *constantdata = [mtlbufconstantstaging contents];
SDL_memcpy(constantdata + CONSTANTS_OFFSET_IDENTITY, identitytransform, sizeof(identitytransform));
@@ -1437,10 +1445,42 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT601, decodetransformBT601, sizeof(decodetransformBT601));
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT709, decodetransformBT709, sizeof(decodetransformBT709));
+ int quadcount = UINT16_MAX / 4;
+ size_t indicessize = sizeof(UInt16) * quadcount * 6;
+ id<MTLBuffer> mtlbufquadindicesstaging = [data.mtldevice newBufferWithLength:indicessize options:MTLResourceStorageModeShared];
+#if !__has_feature(objc_arc)
+ [mtlbufquadindicesstaging autorelease];
+#endif
+
+ /* Quads in the following vertex order (matches the FillRects vertices):
+ * 1---3
+ * | \ |
+ * 0---2
+ */
+ UInt16 *indexdata = [mtlbufquadindicesstaging contents];
+ for (int i = 0; i < quadcount; i++) {
+ indexdata[i * 6 + 0] = i * 4 + 0;
+ indexdata[i * 6 + 1] = i * 4 + 1;
+ indexdata[i * 6 + 2] = i * 4 + 2;
+
+ indexdata[i * 6 + 3] = i * 4 + 2;
+ indexdata[i * 6 + 4] = i * 4 + 1;
+ indexdata[i * 6 + 5] = i * 4 + 3;
+ }
+
+ id<MTLBuffer> mtlbufconstants = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModePrivate];
+ data.mtlbufconstants = mtlbufconstants;
+ data.mtlbufconstants.label = @"SDL constant data";
+
+ id<MTLBuffer> mtlbufquadindices = [data.mtldevice newBufferWithLength:indicessize options:MTLResourceStorageModePrivate];
+ data.mtlbufquadindices = mtlbufquadindices;
+ data.mtlbufquadindices.label = @"SDL quad index buffer";
+
id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer];
id<MTLBlitCommandEncoder> blitcmd = [cmdbuffer blitCommandEncoder];
- [blitcmd copyFromBuffer:mtlbufconstantstaging sourceOffset:0 toBuffer:data.mtlbufconstants destinationOffset:0 size:CONSTANTS_LENGTH];
+ [blitcmd copyFromBuffer:mtlbufconstantstaging sourceOffset:0 toBuffer:mtlbufconstants destinationOffset:0 size:CONSTANTS_LENGTH];
+ [blitcmd copyFromBuffer:mtlbufquadindicesstaging sourceOffset:0 toBuffer:mtlbufquadindices destinationOffset:0 size:indicessize];
[blitcmd endEncoding];
[cmdbuffer commit];
@@ -1503,8 +1543,10 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
#endif
#else
#ifdef __IPHONE_11_0
- if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
- maxtexsize = 16384;
+ if (@available(iOS 11.0, *)) {
+ if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
+ maxtexsize = 16384;
+ }
} else
#endif
#ifdef __IPHONE_10_0
@@ -1529,6 +1571,7 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
[mtlsamplernearest release];
[mtlsamplerlinear release];
[mtlbufconstants release];
+ [mtlbufquadindices release];
[view release];
[data release];
[mtldevice release];