Hash :
aea88562
Author :
Date :
2023-05-19T16:52:43
Reland "Metal: Optimized BufferSubData per device"
This reverts commit ee64836f702332adaca58d9f452063a04b2da955 ,
relanding the patch stack described there.
Between patchsets 1 and 5:
- The shadow buffer allocation has been replaced with a multimap of
precisely-sized buffers, rather than rounding up buffer sizes.
- Garbage collection of shadow buffers is triggered in three situations:
- A certain number of context switches have occurred; this number
was hand-tuned to avoid GC every frame.
- A certain number of command buffer submissions has occurred; this
number was hand-tuned to GC no more often than every few seconds
on representative workloads.
- The total size of the allocated shadow buffers is more than 1 MB,
and either more than twice the size at the last garbage
collection, or 64 MB more than at the last garbage collection. In
this case, aggressive GC is performed in order to reclaim shadow
buffers more quickly.
Performance before and after these changes appears identical on
microbenchmarks. On one Figma test case, comparing GPU memory
allocated inside the BufferManager, peak consumption is decreased by
over 75%, and steady-state consumption decreases by over 88%.
Patchset 6 adds a needed workaround for a bug in the
AMDMTLBronzeDriver affecting uploads of client-side data, and
therefore some dEQP tests. It also streamlines the aggressive GC.
Bug: angleproject:7544
Change-Id: I81b061f0b33c27fa403527fa12d626f4e9c88ebe
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/4497413
Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
Commit-Queue: Geoff Lang <geofflang@chromium.org>
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
//
// Copyright (c) 2020 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// mtl_occlusion_query_pool: A visibility pool for allocating visibility query within
// one render pass.
//
#include "libANGLE/renderer/metal/mtl_occlusion_query_pool.h"
#include "libANGLE/renderer/metal/ContextMtl.h"
#include "libANGLE/renderer/metal/DisplayMtl.h"
#include "libANGLE/renderer/metal/QueryMtl.h"
namespace rx
{
namespace mtl
{
// OcclusionQueryPool implementation
OcclusionQueryPool::OcclusionQueryPool() {}
OcclusionQueryPool::~OcclusionQueryPool() {}
void OcclusionQueryPool::destroy(ContextMtl *contextMtl)
{
mRenderPassResultsPool = nullptr;
for (QueryMtl *allocatedQuery : mAllocatedQueries)
{
if (!allocatedQuery)
{
continue;
}
allocatedQuery->clearAllocatedVisibilityOffsets();
}
mAllocatedQueries.clear();
}
angle::Result OcclusionQueryPool::allocateQueryOffset(ContextMtl *contextMtl,
QueryMtl *query,
bool clearOldValue)
{
// Only query that already has allocated offset or first query of the render pass is allowed to
// keep old value. Other queries must be reset to zero before counting the samples visibility in
// draw calls.
ASSERT(clearOldValue || mAllocatedQueries.empty() ||
!query->getAllocatedVisibilityOffsets().empty());
uint32_t currentOffset =
static_cast<uint32_t>(mAllocatedQueries.size()) * kOcclusionQueryResultSize;
if (!mRenderPassResultsPool)
{
// First allocation
ANGLE_TRY(Buffer::MakeBufferWithStorageMode(contextMtl, MTLStorageModePrivate,
kOcclusionQueryResultSize, nullptr,
&mRenderPassResultsPool));
mRenderPassResultsPool->get().label = @"OcclusionQueryPool";
}
else if (currentOffset + kOcclusionQueryResultSize > mRenderPassResultsPool->size())
{
// Double the capacity
ANGLE_TRY(Buffer::MakeBufferWithStorageMode(contextMtl, MTLStorageModePrivate,
mRenderPassResultsPool->size() * 2, nullptr,
&mRenderPassResultsPool));
mRenderPassResultsPool->get().label = @"OcclusionQueryPool";
}
if (clearOldValue)
{
// If old value is not needed, deallocate any offset previously allocated for this query.
deallocateQueryOffset(contextMtl, query);
}
if (query->getAllocatedVisibilityOffsets().empty())
{
mAllocatedQueries.push_back(query);
query->setFirstAllocatedVisibilityOffset(currentOffset);
}
else
{
// Additional offset allocated for a query is only allowed if it is a continuous region.
ASSERT(currentOffset ==
query->getAllocatedVisibilityOffsets().back() + kOcclusionQueryResultSize);
// Just reserve an empty slot in the allocated query array
mAllocatedQueries.push_back(nullptr);
query->addAllocatedVisibilityOffset();
}
if (currentOffset == 0)
{
mResetFirstQuery = clearOldValue;
if (!clearOldValue && !contextMtl->getDisplay()->getFeatures().allowBufferReadWrite.enabled)
{
// If old value of first query needs to be retained and device doesn't support buffer
// read-write, we need an additional offset to store the old value of the query.
return allocateQueryOffset(contextMtl, query, false);
}
}
return angle::Result::Continue;
}
void OcclusionQueryPool::deallocateQueryOffset(ContextMtl *contextMtl, QueryMtl *query)
{
if (query->getAllocatedVisibilityOffsets().empty())
{
return;
}
mAllocatedQueries[query->getAllocatedVisibilityOffsets().front() / kOcclusionQueryResultSize] =
nullptr;
query->clearAllocatedVisibilityOffsets();
}
void OcclusionQueryPool::resolveVisibilityResults(ContextMtl *contextMtl)
{
if (mAllocatedQueries.empty())
{
return;
}
RenderUtils &utils = contextMtl->getDisplay()->getUtils();
BlitCommandEncoder *blitEncoder = nullptr;
// Combine the values stored in the offsets allocated for first query
if (mAllocatedQueries[0])
{
const BufferRef &dstBuf = mAllocatedQueries[0]->getVisibilityResultBuffer();
const VisibilityBufferOffsetsMtl &allocatedOffsets =
mAllocatedQueries[0]->getAllocatedVisibilityOffsets();
if (!mResetFirstQuery &&
!contextMtl->getDisplay()->getFeatures().allowBufferReadWrite.enabled)
{
// If we cannot read and write to the same buffer in shader. We need to copy the old
// value of first query to first offset allocated for it.
blitEncoder = contextMtl->getBlitCommandEncoder();
blitEncoder->copyBuffer(dstBuf, 0, mRenderPassResultsPool, allocatedOffsets.front(),
kOcclusionQueryResultSize);
utils.combineVisibilityResult(contextMtl, false, allocatedOffsets,
mRenderPassResultsPool, dstBuf);
}
else
{
utils.combineVisibilityResult(contextMtl, !mResetFirstQuery, allocatedOffsets,
mRenderPassResultsPool, dstBuf);
}
}
// Combine the values stored in the offsets allocated for each of the remaining queries
for (size_t i = 1; i < mAllocatedQueries.size(); ++i)
{
QueryMtl *query = mAllocatedQueries[i];
if (!query)
{
continue;
}
const BufferRef &dstBuf = mAllocatedQueries[i]->getVisibilityResultBuffer();
const VisibilityBufferOffsetsMtl &allocatedOffsets =
mAllocatedQueries[i]->getAllocatedVisibilityOffsets();
utils.combineVisibilityResult(contextMtl, false, allocatedOffsets, mRenderPassResultsPool,
dstBuf);
}
// Request synchronization and cleanup
blitEncoder = contextMtl->getBlitCommandEncoder();
for (size_t i = 0; i < mAllocatedQueries.size(); ++i)
{
QueryMtl *query = mAllocatedQueries[i];
if (!query)
{
continue;
}
const BufferRef &dstBuf = mAllocatedQueries[i]->getVisibilityResultBuffer();
dstBuf->syncContent(contextMtl, blitEncoder);
query->clearAllocatedVisibilityOffsets();
}
mAllocatedQueries.clear();
}
} // namespace mtl
} // namespace rx