Hash :
aea88562
Author :
Date :
2023-05-19T16:52:43
Reland "Metal: Optimized BufferSubData per device"
This reverts commit ee64836f702332adaca58d9f452063a04b2da955 ,
relanding the patch stack described there.
Between patchsets 1 and 5:
- The shadow buffer allocation has been replaced with a multimap of
precisely-sized buffers, rather than rounding up buffer sizes.
- Garbage collection of shadow buffers is triggered in three situations:
- A certain number of context switches have occurred; this number
was hand-tuned to avoid GC every frame.
- A certain number of command buffer submissions has occurred; this
number was hand-tuned to GC no more often than every few seconds
on representative workloads.
- The total size of the allocated shadow buffers is more than 1 MB,
and either more than twice the size at the last garbage
collection, or 64 MB more than at the last garbage collection. In
this case, aggressive GC is performed in order to reclaim shadow
buffers more quickly.
Performance before and after these changes appears identical on
microbenchmarks. On one Figma test case, comparing GPU memory
allocated inside the BufferManager, peak consumption is decreased by
over 75%, and steady-state consumption decreases by over 88%.
Patchset 6 adds a needed workaround for a bug in the
AMDMTLBronzeDriver affecting uploads of client-side data, and
therefore some dEQP tests. It also streamlines the aggressive GC.
Bug: angleproject:7544
Change-Id: I81b061f0b33c27fa403527fa12d626f4e9c88ebe
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/4497413
Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
Commit-Queue: Geoff Lang <geofflang@chromium.org>
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
//
// Copyright 2022 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// mtl_buffer_manager.h:
// BufferManager manages buffers across all contexts for a single
// device.
//
#ifndef LIBANGLE_RENDERER_METAL_MTL_BUFFER_MANAGER_H_
#define LIBANGLE_RENDERER_METAL_MTL_BUFFER_MANAGER_H_
#include "common/FixedVector.h"
#include "libANGLE/renderer/metal/mtl_resources.h"
#include <map>
#include <vector>
namespace rx
{
class ContextMtl;
namespace mtl
{
// GL buffers are backed by Metal buffers. Which metal
// buffer is backing a particular GL buffer is fluid.
// The case being optimized is a loop of something like
//
// for 1..4
// glBufferSubData
// glDrawXXX
//
// You can't update a buffer in the middle of a render pass
// in metal so instead we'd end up using multiple buffers.
//
// Simple case, the call to `glBufferSubData` updates the
// entire buffer. In this case we'd end up with each call
// to `glBufferSubData` getting a new buffer from this
// BufferManager and copying the new data to it. We'd
// end up submitting this renderpass
//
// draw with buf1
// draw with buf2
// draw with buf3
// draw with buf4
//
// The GL buffer now references buf4. And buf1, buf2, buf3 and
// buf0 (the buffer that was previously referenced by the GL buffer)
// are all added to the inuse-list
//
// This macro enables showing the running totals of the various
// buckets of unused buffers.
// #define ANGLE_MTL_TRACK_BUFFER_MEM
class BufferManager
{
public:
BufferManager();
static constexpr size_t kMaxStagingBufferSize = 1024 * 1024;
#if TARGET_OS_OSX || TARGET_OS_MACCATALYST
static constexpr int kNumCachedStorageModes = 2;
#else
static constexpr int kNumCachedStorageModes = 1;
#endif
static constexpr size_t kContextSwitchesBetweenGC = 120;
static constexpr size_t kCommandBufferCommitsBetweenGC = 5000;
static constexpr size_t kMinMemBasedGC = 1024 * 1024;
static constexpr size_t kMemAllocedBetweenGC = 64 * 1024 * 1024;
angle::Result queueBlitCopyDataToBuffer(ContextMtl *contextMtl,
const void *srcPtr,
size_t sizeToCopy,
size_t offset,
mtl::BufferRef &dstMetalBuffer);
angle::Result getBuffer(ContextMtl *contextMtl,
MTLStorageMode storageMode,
size_t size,
mtl::BufferRef &bufferRef);
void returnBuffer(ContextMtl *contextMtl, mtl::BufferRef &bufferRef);
void incrementNumContextSwitches();
void incrementNumCommandBufferCommits();
private:
typedef std::vector<mtl::BufferRef> BufferList;
typedef std::multimap<size_t, mtl::BufferRef> BufferMap;
enum class GCReason
{
ContextSwitches,
CommandBufferCommits,
TotalMem
};
void freeUnusedBuffers(ContextMtl *contextMtl);
void addBufferRefToFreeLists(mtl::BufferRef &bufferRef);
void collectGarbage(GCReason reason);
BufferList mInUseBuffers;
BufferMap mFreeBuffers[kNumCachedStorageModes];
// For garbage collecting expired buffer shadow copies
size_t mContextSwitches = 0;
size_t mContextSwitchesAtLastGC = 0;
size_t mCommandBufferCommits = 0;
size_t mCommandBufferCommitsAtLastGC = 0;
size_t mTotalMem = 0;
size_t mTotalMemAtLastGC = 0;
#ifdef ANGLE_MTL_TRACK_BUFFER_MEM
std::map<size_t, size_t> mAllocatedSizes;
#endif
};
} // namespace mtl
} // namespace rx
#endif /* LIBANGLE_RENDERER_METAL_MTL_BUFFER_MANAGER_H_ */