Hash :
aea88562
Author :
Date :
2023-05-19T16:52:43
Reland "Metal: Optimized BufferSubData per device"
This reverts commit ee64836f702332adaca58d9f452063a04b2da955 ,
relanding the patch stack described there.
Between patchsets 1 and 5:
- The shadow buffer allocation has been replaced with a multimap of
precisely-sized buffers, rather than rounding up buffer sizes.
- Garbage collection of shadow buffers is triggered in three situations:
- A certain number of context switches have occurred; this number
was hand-tuned to avoid GC every frame.
- A certain number of command buffer submissions has occurred; this
number was hand-tuned to GC no more often than every few seconds
on representative workloads.
- The total size of the allocated shadow buffers is more than 1 MB,
and either more than twice the size at the last garbage
collection, or 64 MB more than at the last garbage collection. In
this case, aggressive GC is performed in order to reclaim shadow
buffers more quickly.
Performance before and after these changes appears identical on
microbenchmarks. On one Figma test case, comparing GPU memory
allocated inside the BufferManager, peak consumption is decreased by
over 75%, and steady-state consumption decreases by over 88%.
Patchset 6 adds a needed workaround for a bug in the
AMDMTLBronzeDriver affecting uploads of client-side data, and
therefore some dEQP tests. It also streamlines the aggressive GC.
Bug: angleproject:7544
Change-Id: I81b061f0b33c27fa403527fa12d626f4e9c88ebe
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/4497413
Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
Commit-Queue: Geoff Lang <geofflang@chromium.org>
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
//
// Copyright 2019 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// BufferMtl.h:
// Defines the class interface for BufferMtl, implementing BufferImpl.
//
#ifndef LIBANGLE_RENDERER_METAL_BUFFERMTL_H_
#define LIBANGLE_RENDERER_METAL_BUFFERMTL_H_
#import <Metal/Metal.h>
#include <optional>
#include <utility>
#include "libANGLE/Buffer.h"
#include "libANGLE/Observer.h"
#include "libANGLE/angletypes.h"
#include "libANGLE/renderer/BufferImpl.h"
#include "libANGLE/renderer/Format.h"
#include "libANGLE/renderer/metal/mtl_buffer_pool.h"
#include "libANGLE/renderer/metal/mtl_resources.h"
namespace rx
{
struct DrawCommandRange
{
uint32_t count;
size_t offset;
};
// Inclusive range of consecutive primitive restart value indexes.
struct IndexRange
{
IndexRange(size_t begin, size_t end) : restartBegin(begin), restartEnd(end) {}
size_t restartBegin;
size_t restartEnd;
};
// Conversion buffers hold translated index and vertex data.
struct ConversionBufferMtl
{
ConversionBufferMtl(ContextMtl *context, size_t initialSize, size_t alignment);
~ConversionBufferMtl();
// One state value determines if we need to re-stream vertex data.
bool dirty;
// The conversion is stored in a dynamic buffer.
mtl::BufferPool data;
// These properties are to be filled by user of this buffer conversion
mtl::BufferRef convertedBuffer;
size_t convertedOffset;
};
struct VertexConversionBufferMtl : public ConversionBufferMtl
{
VertexConversionBufferMtl(ContextMtl *context,
angle::FormatID formatIDIn,
GLuint strideIn,
size_t offsetIn);
// The conversion is identified by the triple of {format, stride, offset}.
angle::FormatID formatID;
GLuint stride;
size_t offset;
};
struct IndexConversionBufferMtl : public ConversionBufferMtl
{
IndexConversionBufferMtl(ContextMtl *context,
gl::DrawElementsType elemType,
bool primitiveRestartEnabled,
size_t offsetIn);
const gl::DrawElementsType elemType;
const size_t offset;
bool primitiveRestartEnabled;
IndexRange getRangeForConvertedBuffer(size_t count);
};
struct UniformConversionBufferMtl : public ConversionBufferMtl
{
UniformConversionBufferMtl(ContextMtl *context,
std::pair<size_t, size_t> offsetIn,
size_t blockSize);
size_t initialSrcOffset() { return offset.second; }
const size_t uniformBufferBlockSize;
const std::pair<size_t, size_t> offset;
};
class BufferHolderMtl
{
public:
virtual ~BufferHolderMtl() = default;
// Due to the complication of synchronizing accesses between CPU and GPU,
// a mtl::Buffer might be under used by GPU but CPU wants to modify its content through
// map() method, this could lead to GPU stalling. The more efficient method is maintain
// a queue of mtl::Buffer and only let CPU modifies a free mtl::Buffer.
// So, in order to let GPU use the most recent modified content, one must call this method
// right before the draw call to retrieved the most up-to-date mtl::Buffer.
mtl::BufferRef getCurrentBuffer() const { return mIsWeak ? mBufferWeakRef.lock() : mBuffer; }
protected:
mtl::BufferRef mBuffer;
mtl::BufferWeakRef mBufferWeakRef;
bool mIsWeak = false;
};
class BufferMtl : public BufferImpl, public BufferHolderMtl
{
public:
BufferMtl(const gl::BufferState &state);
~BufferMtl() override;
void destroy(const gl::Context *context) override;
angle::Result setData(const gl::Context *context,
gl::BufferBinding target,
const void *data,
size_t size,
gl::BufferUsage usage) override;
angle::Result setSubData(const gl::Context *context,
gl::BufferBinding target,
const void *data,
size_t size,
size_t offset) override;
angle::Result copySubData(const gl::Context *context,
BufferImpl *source,
GLintptr sourceOffset,
GLintptr destOffset,
GLsizeiptr size) override;
angle::Result map(const gl::Context *context, GLenum access, void **mapPtr) override;
angle::Result mapRange(const gl::Context *context,
size_t offset,
size_t length,
GLbitfield access,
void **mapPtr) override;
angle::Result unmap(const gl::Context *context, GLboolean *result) override;
angle::Result getIndexRange(const gl::Context *context,
gl::DrawElementsType type,
size_t offset,
size_t count,
bool primitiveRestartEnabled,
gl::IndexRange *outRange) override;
void onDataChanged() override;
angle::Result getFirstLastIndices(ContextMtl *contextMtl,
gl::DrawElementsType type,
size_t offset,
size_t count,
std::pair<uint32_t, uint32_t> *outIndices);
const uint8_t *getBufferDataReadOnly(ContextMtl *contextMtl);
bool isSafeToReadFromBufferViaBlit(ContextMtl *contextMtl);
ConversionBufferMtl *getVertexConversionBuffer(ContextMtl *context,
angle::FormatID formatID,
GLuint stride,
size_t offset);
IndexConversionBufferMtl *getIndexConversionBuffer(ContextMtl *context,
gl::DrawElementsType elemType,
bool primitiveRestartEnabled,
size_t offset);
ConversionBufferMtl *getUniformConversionBuffer(ContextMtl *context,
std::pair<size_t, size_t> offset,
size_t blockSize);
size_t size() const { return static_cast<size_t>(mState.getSize()); }
const std::vector<IndexRange> &getRestartIndices(ContextMtl *ctx,
gl::DrawElementsType indexType);
static const std::vector<IndexRange> getRestartIndicesFromClientData(
ContextMtl *ctx,
gl::DrawElementsType indexType,
const mtl::BufferRef clientBuffer);
private:
angle::Result setDataImpl(const gl::Context *context,
gl::BufferBinding target,
const void *data,
size_t size,
gl::BufferUsage usage);
angle::Result setSubDataImpl(const gl::Context *context,
const void *data,
size_t size,
size_t offset);
angle::Result commitShadowCopy(ContextMtl *contextMtl);
angle::Result commitShadowCopy(ContextMtl *contextMtl, size_t size);
void markConversionBuffersDirty();
void clearConversionBuffers();
angle::Result putDataInNewBufferAndStartUsingNewBuffer(ContextMtl *contextMtl,
const uint8_t *srcPtr,
size_t sizeToCopy,
size_t offset);
angle::Result updateExistingBufferViaBlitFromStagingBuffer(ContextMtl *contextMtl,
const uint8_t *srcPtr,
size_t sizeToCopy,
size_t offset);
angle::Result copyDataToExistingBufferViaCPU(ContextMtl *contextMtl,
const uint8_t *srcPtr,
size_t sizeToCopy,
size_t offset);
angle::Result updateShadowCopyThenCopyShadowToNewBuffer(ContextMtl *contextMtl,
const uint8_t *srcPtr,
size_t sizeToCopy,
size_t offset);
bool clientShadowCopyDataNeedSync(ContextMtl *contextMtl);
void ensureShadowCopySyncedFromGPU(ContextMtl *contextMtl);
uint8_t *syncAndObtainShadowCopy(ContextMtl *contextMtl);
// Optional client side shadow buffer
angle::MemoryBuffer mShadowCopy;
// A cache of converted vertex data.
std::vector<VertexConversionBufferMtl> mVertexConversionBuffers;
std::vector<IndexConversionBufferMtl> mIndexConversionBuffers;
std::vector<UniformConversionBufferMtl> mUniformConversionBuffers;
struct RestartRangeCache
{
RestartRangeCache(std::vector<IndexRange> &&ranges_, gl::DrawElementsType indexType_)
: ranges(ranges_), indexType(indexType_)
{}
const std::vector<IndexRange> ranges;
const gl::DrawElementsType indexType;
};
std::optional<RestartRangeCache> mRestartRangeCache;
std::vector<IndexRange> mRestartIndices;
size_t mGLSize = 0; // size GL asked for (vs size we actually allocated)
size_t mRevisionCount = 0; // for generating labels only
gl::BufferUsage mUsage;
};
class SimpleWeakBufferHolderMtl : public BufferHolderMtl
{
public:
SimpleWeakBufferHolderMtl();
void set(const mtl::BufferRef &buffer) { mBufferWeakRef = buffer; }
};
} // namespace rx
#endif /* LIBANGLE_RENDERER_METAL_BUFFERMTL_H_ */