Hash :
2df17a12
Author :
Date :
2022-05-25T16:18:37
Vulkan: Pack driver uniforms Previously 5 vec4s were used for driver uniforms + 2 vec4s if specialization constants couldn't be supported. The driver uniforms are rearranged and packed such that only 2 vec4s are normally used, which include fallback for specialization constants as well. In the future, most of the specialization constants may turn into uniforms, and this change prepares for that. Additional uniforms are used (3 vec4s) only if common extensions are missing; transform feedback and bresenham lines. This change makes it more practical for driver uniforms to be turned into push constants. Additionally, these uniforms could potentially be loaded and cached at the beginning of the shader for more efficient memory access. On Pixel6, with this change, the traces show no difference in wall time. On most traces, CPU time shows up to ~7% improvement. Bug: angleproject:7366 Change-Id: I0f47f863955af06a19c69d1f1d7c45b97d95476e Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/3668151 Reviewed-by: Jamie Madill <jmadill@chromium.org> Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: Charlie Lao <cclao@google.com>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
//
// Copyright 2019 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Implementation of dFdy viewport transformation.
// See header for more info.
#include "compiler/translator/tree_ops/RewriteDfdy.h"
#include "common/angleutils.h"
#include "compiler/translator/SymbolTable.h"
#include "compiler/translator/TranslatorVulkan.h"
#include "compiler/translator/tree_util/DriverUniform.h"
#include "compiler/translator/tree_util/IntermNode_util.h"
#include "compiler/translator/tree_util/IntermTraverse.h"
#include "compiler/translator/tree_util/SpecializationConstant.h"
namespace sh
{
namespace
{
class Traverser : public TIntermTraverser
{
public:
Traverser(TSymbolTable *symbolTable, SpecConst *specConst, const DriverUniform *driverUniforms);
private:
bool visitAggregate(Visit visit, TIntermAggregate *node) override;
SpecConst *mSpecConst = nullptr;
const DriverUniform *mDriverUniforms = nullptr;
};
Traverser::Traverser(TSymbolTable *symbolTable,
SpecConst *specConst,
const DriverUniform *driverUniforms)
: TIntermTraverser(true, false, false, symbolTable),
mSpecConst(specConst),
mDriverUniforms(driverUniforms)
{}
bool Traverser::visitAggregate(Visit visit, TIntermAggregate *node)
{
// Decide if the node represents a call to dFdx() or dFdy()
if (node->getOp() != EOpDFdx && node->getOp() != EOpDFdy)
{
return true;
}
const bool isDFdx = node->getOp() == EOpDFdx;
// Two transformations are done on dFdx and dFdy:
//
// - If pre-rotation is applied, dFdx and dFdy may need to swap their axis based on the degree
// of rotation. dFdx becomes dFdy if rotation is 90 or 270 degrees. Similarly, dFdy becomes
// dFdx.
// - The result is potentially negated. This could be due to viewport y-flip or pre-rotation.
//
// Accordingly, there are two variables controlling the above transformations:
//
// - Rotation: A vec2 that is either (0, 1) or (1, 0). dFdx and dFdy are replaced with:
//
// dFdx * Rotation.x + dFdy * Rotation.y
//
// - Scale: A vec2 with -1 or 1 for either x or y components. The previous result is multiplied
// by this.
//
// Together, the above operations account for the combinations of 4 possible rotations and
// y-flip.
// Get the results of dFdx(operand) and dFdy(operand), and multiply them by the swizzles
TIntermTyped *operand = node->getChildNode(0)->getAsTyped();
TIntermTyped *dFdx = CreateBuiltInUnaryFunctionCallNode("dFdx", operand, *mSymbolTable, 300);
TIntermTyped *dFdy =
CreateBuiltInUnaryFunctionCallNode("dFdy", operand->deepCopy(), *mSymbolTable, 300);
// Get rotation multiplier
TIntermTyped *swapXY = mSpecConst->getSwapXY();
if (swapXY == nullptr)
{
swapXY = mDriverUniforms->getSwapXY();
}
TIntermTyped *swapXMultiplier = MakeSwapXMultiplier(swapXY);
TIntermTyped *swapYMultiplier = MakeSwapYMultiplier(swapXY->deepCopy());
// Get flip multiplier
TIntermTyped *flipXY = mDriverUniforms->getFlipXY(mSymbolTable, DriverUniformFlip::Fragment);
// Multiply the flip and rotation multipliers
TIntermTyped *xMultiplier =
new TIntermBinary(EOpMul, isDFdx ? swapXMultiplier : swapYMultiplier,
(new TIntermSwizzle(flipXY->deepCopy(), {0}))->fold(nullptr));
TIntermTyped *yMultiplier =
new TIntermBinary(EOpMul, isDFdx ? swapYMultiplier : swapXMultiplier,
(new TIntermSwizzle(flipXY->deepCopy(), {1}))->fold(nullptr));
const TOperator mulOp = dFdx->getType().isVector() ? EOpVectorTimesScalar : EOpMul;
TIntermTyped *rotatedFlippedDfdx = new TIntermBinary(mulOp, dFdx, xMultiplier);
TIntermTyped *rotatedFlippedDfdy = new TIntermBinary(mulOp, dFdy, yMultiplier);
// Sum them together into the result
TIntermBinary *rotatedFlippedResult =
new TIntermBinary(EOpAdd, rotatedFlippedDfdx, rotatedFlippedDfdy);
// Replace the old dFdx() or dFdy() node with the new node that contains the corrected value
//
// Note the following bugs (anglebug.com/7346):
//
// - Side effects of operand are duplicated with the above
// - If the direct child of this node is itself dFdx/y, its queueReplacement will not be
// effective as the parent is also replaced.
queueReplacement(rotatedFlippedResult, OriginalNode::IS_DROPPED);
return true;
}
} // anonymous namespace
bool RewriteDfdy(TCompiler *compiler,
TIntermBlock *root,
TSymbolTable *symbolTable,
int shaderVersion,
SpecConst *specConst,
const DriverUniform *driverUniforms)
{
// dFdx/dFdy is only valid in GLSL 3.0 and later.
if (shaderVersion < 300)
{
return true;
}
Traverser traverser(symbolTable, specConst, driverUniforms);
root->traverse(&traverser);
return traverser.updateTree(compiler, root);
}
} // namespace sh