Edit

kc3-lang/angle/src/libANGLE/renderer/vulkan/ProgramVk.cpp

Branch :

  • Show log

    Commit

  • Author : Jamie Madill
    Date : 2021-03-10 15:58:00
    Hash : 0c77f3ad
    Message : Vulkan: Implement shader buffers descriptor cache. Implements a descriptor set cache for UBOs, SSBOs, and atomic counter buffers. Storage Images and framebuffer fetch input attachments are not yet included. Requires moving the buffer barrier handling into ContextVk, similarly to how we handle the barriers for Textures. The packed description key for the descriptors uses a "fast" vector with a basic minimum size. For most cases of a few buffers this will fit easily in stack memory, but for larger programs with many buffers we fit this into heap memory. The key has a large upper bound due to the high ES 3.2 requirements and the need to index several values such as the offset and binding size. We use dynamic offsets for uniform buffers when possible. This ensures applications like Manhattan 3.1 that use sets of common buffers with changing offsets hit the cache most of the time. Because of resource limits we pick at compilation time whether to use dynamic or static descriptor sets. Mostly this applies to tests that use a large number of uniform buffers. A future implementation could be smart and would recompile the program with heuristics to use a minimal number of dynamic indices. Reduces the number of descriptor set updates from ~300 -> ~30 per frame in Manhattan 3.1 and in Asphalt 9 from 900+ to as low as 0 per frame. Bug: angleproject:5736 Change-Id: I5c2a3881bec90d301dab15cc86c8a70e60674ad7 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2757515 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: Tim Van Patten <timvp@google.com> Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>

  • src/libANGLE/renderer/vulkan/ProgramVk.cpp
  • //
    // Copyright 2016 The ANGLE Project Authors. All rights reserved.
    // Use of this source code is governed by a BSD-style license that can be
    // found in the LICENSE file.
    //
    // ProgramVk.cpp:
    //    Implements the class methods for ProgramVk.
    //
    
    #include "libANGLE/renderer/vulkan/ProgramVk.h"
    
    #include "common/debug.h"
    #include "common/utilities.h"
    #include "libANGLE/Context.h"
    #include "libANGLE/ProgramLinkedResources.h"
    #include "libANGLE/renderer/glslang_wrapper_utils.h"
    #include "libANGLE/renderer/renderer_utils.h"
    #include "libANGLE/renderer/vulkan/BufferVk.h"
    #include "libANGLE/renderer/vulkan/GlslangWrapperVk.h"
    #include "libANGLE/renderer/vulkan/TextureVk.h"
    
    namespace rx
    {
    
    namespace
    {
    // Identical to Std140 encoder in all aspects, except it ignores opaque uniform types.
    class VulkanDefaultBlockEncoder : public sh::Std140BlockEncoder
    {
      public:
        void advanceOffset(GLenum type,
                           const std::vector<unsigned int> &arraySizes,
                           bool isRowMajorMatrix,
                           int arrayStride,
                           int matrixStride) override
        {
            if (gl::IsOpaqueType(type))
            {
                return;
            }
    
            sh::Std140BlockEncoder::advanceOffset(type, arraySizes, isRowMajorMatrix, arrayStride,
                                                  matrixStride);
        }
    };
    
    void InitDefaultUniformBlock(const std::vector<sh::ShaderVariable> &uniforms,
                                 sh::BlockLayoutMap *blockLayoutMapOut,
                                 size_t *blockSizeOut)
    {
        if (uniforms.empty())
        {
            *blockSizeOut = 0;
            return;
        }
    
        VulkanDefaultBlockEncoder blockEncoder;
        sh::GetActiveUniformBlockInfo(uniforms, "", &blockEncoder, blockLayoutMapOut);
    
        size_t blockSize = blockEncoder.getCurrentOffset();
    
        // TODO(jmadill): I think we still need a valid block for the pipeline even if zero sized.
        if (blockSize == 0)
        {
            *blockSizeOut = 0;
            return;
        }
    
        *blockSizeOut = blockSize;
        return;
    }
    
    template <typename T>
    void UpdateDefaultUniformBlock(GLsizei count,
                                   uint32_t arrayIndex,
                                   int componentCount,
                                   const T *v,
                                   const sh::BlockMemberInfo &layoutInfo,
                                   angle::MemoryBuffer *uniformData)
    {
        const int elementSize = sizeof(T) * componentCount;
    
        uint8_t *dst = uniformData->data() + layoutInfo.offset;
        if (layoutInfo.arrayStride == 0 || layoutInfo.arrayStride == elementSize)
        {
            uint32_t arrayOffset = arrayIndex * layoutInfo.arrayStride;
            uint8_t *writePtr    = dst + arrayOffset;
            ASSERT(writePtr + (elementSize * count) <= uniformData->data() + uniformData->size());
            memcpy(writePtr, v, elementSize * count);
        }
        else
        {
            // Have to respect the arrayStride between each element of the array.
            int maxIndex = arrayIndex + count;
            for (int writeIndex = arrayIndex, readIndex = 0; writeIndex < maxIndex;
                 writeIndex++, readIndex++)
            {
                const int arrayOffset = writeIndex * layoutInfo.arrayStride;
                uint8_t *writePtr     = dst + arrayOffset;
                const T *readPtr      = v + (readIndex * componentCount);
                ASSERT(writePtr + elementSize <= uniformData->data() + uniformData->size());
                memcpy(writePtr, readPtr, elementSize);
            }
        }
    }
    
    template <typename T>
    void ReadFromDefaultUniformBlock(int componentCount,
                                     uint32_t arrayIndex,
                                     T *dst,
                                     const sh::BlockMemberInfo &layoutInfo,
                                     const angle::MemoryBuffer *uniformData)
    {
        ASSERT(layoutInfo.offset != -1);
    
        const int elementSize = sizeof(T) * componentCount;
        const uint8_t *source = uniformData->data() + layoutInfo.offset;
    
        if (layoutInfo.arrayStride == 0 || layoutInfo.arrayStride == elementSize)
        {
            const uint8_t *readPtr = source + arrayIndex * layoutInfo.arrayStride;
            memcpy(dst, readPtr, elementSize);
        }
        else
        {
            // Have to respect the arrayStride between each element of the array.
            const int arrayOffset  = arrayIndex * layoutInfo.arrayStride;
            const uint8_t *readPtr = source + arrayOffset;
            memcpy(dst, readPtr, elementSize);
        }
    }
    
    class Std140BlockLayoutEncoderFactory : public gl::CustomBlockLayoutEncoderFactory
    {
      public:
        sh::BlockLayoutEncoder *makeEncoder() override { return new sh::Std140BlockEncoder(); }
    };
    }  // anonymous namespace
    
    // ProgramVk implementation.
    ProgramVk::ProgramVk(const gl::ProgramState &state) : ProgramImpl(state)
    {
        GlslangWrapperVk::ResetGlslangProgramInterfaceInfo(&mGlslangProgramInterfaceInfo);
        mExecutable.setProgram(this);
    }
    
    ProgramVk::~ProgramVk() = default;
    
    void ProgramVk::destroy(const gl::Context *context)
    {
        ContextVk *contextVk = vk::GetImpl(context);
        reset(contextVk);
    }
    
    void ProgramVk::reset(ContextVk *contextVk)
    {
        mOriginalShaderInfo.release(contextVk);
    
        GlslangWrapperVk::ResetGlslangProgramInterfaceInfo(&mGlslangProgramInterfaceInfo);
    
        mExecutable.reset(contextVk);
    }
    
    std::unique_ptr<rx::LinkEvent> ProgramVk::load(const gl::Context *context,
                                                   gl::BinaryInputStream *stream,
                                                   gl::InfoLog &infoLog)
    {
        ContextVk *contextVk = vk::GetImpl(context);
        gl::ShaderMap<size_t> requiredBufferSize;
        requiredBufferSize.fill(0);
    
        reset(contextVk);
    
        mOriginalShaderInfo.load(stream);
        mExecutable.load(stream);
    
        // Deserializes the uniformLayout data of mDefaultUniformBlocks
        for (gl::ShaderType shaderType : gl::AllShaderTypes())
        {
            const size_t uniformCount = stream->readInt<size_t>();
            for (unsigned int uniformIndex = 0; uniformIndex < uniformCount; ++uniformIndex)
            {
                sh::BlockMemberInfo blockInfo;
                gl::LoadBlockMemberInfo(stream, &blockInfo);
                mDefaultUniformBlocks[shaderType].uniformLayout.push_back(blockInfo);
            }
        }
    
        // Deserializes required uniform block memory sizes
        for (gl::ShaderType shaderType : gl::AllShaderTypes())
        {
            requiredBufferSize[shaderType] = stream->readInt<size_t>();
        }
    
        // Initialize and resize the mDefaultUniformBlocks' memory
        angle::Result status = resizeUniformBlockMemory(contextVk, requiredBufferSize);
        if (status != angle::Result::Continue)
        {
            return std::make_unique<LinkEventDone>(status);
        }
    
        status = mExecutable.createPipelineLayout(context, nullptr);
        return std::make_unique<LinkEventDone>(status);
    }
    
    void ProgramVk::save(const gl::Context *context, gl::BinaryOutputStream *stream)
    {
        mOriginalShaderInfo.save(stream);
        mExecutable.save(stream);
    
        // Serializes the uniformLayout data of mDefaultUniformBlocks
        for (gl::ShaderType shaderType : gl::AllShaderTypes())
        {
            const size_t uniformCount = mDefaultUniformBlocks[shaderType].uniformLayout.size();
            stream->writeInt(uniformCount);
            for (unsigned int uniformIndex = 0; uniformIndex < uniformCount; ++uniformIndex)
            {
                sh::BlockMemberInfo &blockInfo =
                    mDefaultUniformBlocks[shaderType].uniformLayout[uniformIndex];
                gl::WriteBlockMemberInfo(stream, blockInfo);
            }
        }
    
        // Serializes required uniform block memory sizes
        for (gl::ShaderType shaderType : gl::AllShaderTypes())
        {
            stream->writeInt(mDefaultUniformBlocks[shaderType].uniformData.size());
        }
    }
    
    void ProgramVk::setBinaryRetrievableHint(bool retrievable)
    {
        // Nothing to do here yet.
    }
    
    void ProgramVk::setSeparable(bool separable)
    {
        // Nothing to do here yet.
    }
    
    // TODO: http://anglebug.com/3570: Move/Copy all of the necessary information into
    // the ProgramExecutable, so this function can be removed.
    void ProgramVk::fillProgramStateMap(gl::ShaderMap<const gl::ProgramState *> *programStatesOut)
    {
        for (gl::ShaderType shaderType : gl::AllShaderTypes())
        {
            (*programStatesOut)[shaderType] = nullptr;
            if (mState.getExecutable().hasLinkedShaderStage(shaderType))
            {
                (*programStatesOut)[shaderType] = &mState;
            }
        }
    }
    
    std::unique_ptr<LinkEvent> ProgramVk::link(const gl::Context *context,
                                               const gl::ProgramLinkedResources &resources,
                                               gl::InfoLog &infoLog,
                                               const gl::ProgramMergedVaryings &mergedVaryings)
    {
        ANGLE_TRACE_EVENT0("gpu.angle", "ProgramVk::link");
    
        ContextVk *contextVk = vk::GetImpl(context);
        // Link resources before calling GetShaderSource to make sure they are ready for the set/binding
        // assignment done in that function.
        linkResources(resources);
    
        reset(contextVk);
        mExecutable.clearVariableInfoMap();
    
        // Gather variable info and compiled SPIR-V binaries.
        gl::ShaderMap<const angle::spirv::Blob *> spirvBlobs;
        GlslangWrapperVk::GetShaderCode(contextVk->getFeatures(), mState, resources,
                                        &mGlslangProgramInterfaceInfo, &spirvBlobs,
                                        &mExecutable.mVariableInfoMap);
    
        // Compile the shaders.
        angle::Result status = mOriginalShaderInfo.initShaders(
            mState.getExecutable().getLinkedShaderStages(), spirvBlobs, mExecutable.mVariableInfoMap);
        if (status != angle::Result::Continue)
        {
            return std::make_unique<LinkEventDone>(status);
        }
    
        status = initDefaultUniformBlocks(context);
        if (status != angle::Result::Continue)
        {
            return std::make_unique<LinkEventDone>(status);
        }
    
        if (contextVk->getFeatures().enablePrecisionQualifiers.enabled)
        {
            mExecutable.resolvePrecisionMismatch(mergedVaryings);
        }
    
        // TODO(jie.a.chen@intel.com): Parallelize linking.
        // http://crbug.com/849576
        status = mExecutable.createPipelineLayout(context, nullptr);
        return std::make_unique<LinkEventDone>(status);
    }
    
    void ProgramVk::linkResources(const gl::ProgramLinkedResources &resources)
    {
        Std140BlockLayoutEncoderFactory std140EncoderFactory;
        gl::ProgramLinkedResourcesLinker linker(&std140EncoderFactory);
    
        linker.linkResources(mState, resources);
    }
    
    angle::Result ProgramVk::initDefaultUniformBlocks(const gl::Context *glContext)
    {
        ContextVk *contextVk = vk::GetImpl(glContext);
    
        // Process vertex and fragment uniforms into std140 packing.
        gl::ShaderMap<sh::BlockLayoutMap> layoutMap;
        gl::ShaderMap<size_t> requiredBufferSize;
        requiredBufferSize.fill(0);
    
        generateUniformLayoutMapping(layoutMap, requiredBufferSize);
        initDefaultUniformLayoutMapping(layoutMap);
    
        // All uniform initializations are complete, now resize the buffers accordingly and return
        return resizeUniformBlockMemory(contextVk, requiredBufferSize);
    }
    
    void ProgramVk::generateUniformLayoutMapping(gl::ShaderMap<sh::BlockLayoutMap> &layoutMap,
                                                 gl::ShaderMap<size_t> &requiredBufferSize)
    {
        const gl::ProgramExecutable &glExecutable = mState.getExecutable();
    
        for (const gl::ShaderType shaderType : glExecutable.getLinkedShaderStages())
        {
            gl::Shader *shader = mState.getAttachedShader(shaderType);
    
            if (shader)
            {
                const std::vector<sh::ShaderVariable> &uniforms = shader->getUniforms();
                InitDefaultUniformBlock(uniforms, &layoutMap[shaderType],
                                        &requiredBufferSize[shaderType]);
            }
        }
    }
    
    void ProgramVk::initDefaultUniformLayoutMapping(gl::ShaderMap<sh::BlockLayoutMap> &layoutMap)
    {
        // Init the default block layout info.
        const auto &uniforms                      = mState.getUniforms();
        const gl::ProgramExecutable &glExecutable = mState.getExecutable();
    
        for (const gl::VariableLocation &location : mState.getUniformLocations())
        {
            gl::ShaderMap<sh::BlockMemberInfo> layoutInfo;
    
            if (location.used() && !location.ignored)
            {
                const auto &uniform = uniforms[location.index];
                if (uniform.isInDefaultBlock() && !uniform.isSampler() && !uniform.isImage() &&
                    !uniform.isFragmentInOut)
                {
                    std::string uniformName = uniform.name;
                    if (uniform.isArray())
                    {
                        // Gets the uniform name without the [0] at the end.
                        uniformName = gl::StripLastArrayIndex(uniformName);
                        ASSERT(uniformName.size() != uniform.name.size());
                    }
    
                    bool found = false;
    
                    for (const gl::ShaderType shaderType : glExecutable.getLinkedShaderStages())
                    {
                        auto it = layoutMap[shaderType].find(uniformName);
                        if (it != layoutMap[shaderType].end())
                        {
                            found                  = true;
                            layoutInfo[shaderType] = it->second;
                        }
                    }
    
                    ASSERT(found);
                }
            }
    
            for (const gl::ShaderType shaderType : glExecutable.getLinkedShaderStages())
            {
                mDefaultUniformBlocks[shaderType].uniformLayout.push_back(layoutInfo[shaderType]);
            }
        }
    }
    
    angle::Result ProgramVk::resizeUniformBlockMemory(ContextVk *contextVk,
                                                      gl::ShaderMap<size_t> &requiredBufferSize)
    {
        const gl::ProgramExecutable &glExecutable = mState.getExecutable();
    
        for (const gl::ShaderType shaderType : glExecutable.getLinkedShaderStages())
        {
            if (requiredBufferSize[shaderType] > 0)
            {
                if (!mDefaultUniformBlocks[shaderType].uniformData.resize(
                        requiredBufferSize[shaderType]))
                {
                    ANGLE_VK_CHECK(contextVk, false, VK_ERROR_OUT_OF_HOST_MEMORY);
                }
    
                // Initialize uniform buffer memory to zero by default.
                mDefaultUniformBlocks[shaderType].uniformData.fill(0);
                mDefaultUniformBlocksDirty.set(shaderType);
            }
        }
    
        return angle::Result::Continue;
    }
    
    GLboolean ProgramVk::validate(const gl::Caps &caps, gl::InfoLog *infoLog)
    {
        // No-op. The spec is very vague about the behavior of validation.
        return GL_TRUE;
    }
    
    template <typename T>
    void ProgramVk::setUniformImpl(GLint location, GLsizei count, const T *v, GLenum entryPointType)
    {
        const gl::VariableLocation &locationInfo  = mState.getUniformLocations()[location];
        const gl::LinkedUniform &linkedUniform    = mState.getUniforms()[locationInfo.index];
        const gl::ProgramExecutable &glExecutable = mState.getExecutable();
    
        ASSERT(!linkedUniform.isSampler());
    
        if (linkedUniform.typeInfo->type == entryPointType)
        {
            for (const gl::ShaderType shaderType : glExecutable.getLinkedShaderStages())
            {
                DefaultUniformBlock &uniformBlock     = mDefaultUniformBlocks[shaderType];
                const sh::BlockMemberInfo &layoutInfo = uniformBlock.uniformLayout[location];
    
                // Assume an offset of -1 means the block is unused.
                if (layoutInfo.offset == -1)
                {
                    continue;
                }
    
                const GLint componentCount = linkedUniform.typeInfo->componentCount;
                UpdateDefaultUniformBlock(count, locationInfo.arrayIndex, componentCount, v, layoutInfo,
                                          &uniformBlock.uniformData);
                mDefaultUniformBlocksDirty.set(shaderType);
            }
        }
        else
        {
            for (const gl::ShaderType shaderType : glExecutable.getLinkedShaderStages())
            {
                DefaultUniformBlock &uniformBlock     = mDefaultUniformBlocks[shaderType];
                const sh::BlockMemberInfo &layoutInfo = uniformBlock.uniformLayout[location];
    
                // Assume an offset of -1 means the block is unused.
                if (layoutInfo.offset == -1)
                {
                    continue;
                }
    
                const GLint componentCount = linkedUniform.typeInfo->componentCount;
    
                ASSERT(linkedUniform.typeInfo->type == gl::VariableBoolVectorType(entryPointType));
    
                GLint initialArrayOffset =
                    locationInfo.arrayIndex * layoutInfo.arrayStride + layoutInfo.offset;
                for (GLint i = 0; i < count; i++)
                {
                    GLint elementOffset = i * layoutInfo.arrayStride + initialArrayOffset;
                    GLint *dest =
                        reinterpret_cast<GLint *>(uniformBlock.uniformData.data() + elementOffset);
                    const T *source = v + i * componentCount;
    
                    for (int c = 0; c < componentCount; c++)
                    {
                        dest[c] = (source[c] == static_cast<T>(0)) ? GL_FALSE : GL_TRUE;
                    }
                }
    
                mDefaultUniformBlocksDirty.set(shaderType);
            }
        }
    }
    
    template <typename T>
    void ProgramVk::getUniformImpl(GLint location, T *v, GLenum entryPointType) const
    {
        const gl::VariableLocation &locationInfo = mState.getUniformLocations()[location];
        const gl::LinkedUniform &linkedUniform   = mState.getUniforms()[locationInfo.index];
    
        ASSERT(!linkedUniform.isSampler() && !linkedUniform.isImage());
    
        const gl::ShaderType shaderType = linkedUniform.getFirstShaderTypeWhereActive();
        ASSERT(shaderType != gl::ShaderType::InvalidEnum);
    
        const DefaultUniformBlock &uniformBlock = mDefaultUniformBlocks[shaderType];
        const sh::BlockMemberInfo &layoutInfo   = uniformBlock.uniformLayout[location];
    
        ASSERT(linkedUniform.typeInfo->componentType == entryPointType ||
               linkedUniform.typeInfo->componentType == gl::VariableBoolVectorType(entryPointType));
    
        if (gl::IsMatrixType(linkedUniform.type))
        {
            const uint8_t *ptrToElement = uniformBlock.uniformData.data() + layoutInfo.offset +
                                          (locationInfo.arrayIndex * layoutInfo.arrayStride);
            GetMatrixUniform(linkedUniform.type, v, reinterpret_cast<const T *>(ptrToElement), false);
        }
        else
        {
            ReadFromDefaultUniformBlock(linkedUniform.typeInfo->componentCount, locationInfo.arrayIndex,
                                        v, layoutInfo, &uniformBlock.uniformData);
        }
    }
    
    void ProgramVk::setUniform1fv(GLint location, GLsizei count, const GLfloat *v)
    {
        setUniformImpl(location, count, v, GL_FLOAT);
    }
    
    void ProgramVk::setUniform2fv(GLint location, GLsizei count, const GLfloat *v)
    {
        setUniformImpl(location, count, v, GL_FLOAT_VEC2);
    }
    
    void ProgramVk::setUniform3fv(GLint location, GLsizei count, const GLfloat *v)
    {
        setUniformImpl(location, count, v, GL_FLOAT_VEC3);
    }
    
    void ProgramVk::setUniform4fv(GLint location, GLsizei count, const GLfloat *v)
    {
        setUniformImpl(location, count, v, GL_FLOAT_VEC4);
    }
    
    void ProgramVk::setUniform1iv(GLint location, GLsizei count, const GLint *v)
    {
        const gl::VariableLocation &locationInfo = mState.getUniformLocations()[location];
        const gl::LinkedUniform &linkedUniform   = mState.getUniforms()[locationInfo.index];
        if (linkedUniform.isSampler())
        {
            // We could potentially cache some indexing here. For now this is a no-op since the mapping
            // is handled entirely in ContextVk.
            return;
        }
    
        setUniformImpl(location, count, v, GL_INT);
    }
    
    void ProgramVk::setUniform2iv(GLint location, GLsizei count, const GLint *v)
    {
        setUniformImpl(location, count, v, GL_INT_VEC2);
    }
    
    void ProgramVk::setUniform3iv(GLint location, GLsizei count, const GLint *v)
    {
        setUniformImpl(location, count, v, GL_INT_VEC3);
    }
    
    void ProgramVk::setUniform4iv(GLint location, GLsizei count, const GLint *v)
    {
        setUniformImpl(location, count, v, GL_INT_VEC4);
    }
    
    void ProgramVk::setUniform1uiv(GLint location, GLsizei count, const GLuint *v)
    {
        setUniformImpl(location, count, v, GL_UNSIGNED_INT);
    }
    
    void ProgramVk::setUniform2uiv(GLint location, GLsizei count, const GLuint *v)
    {
        setUniformImpl(location, count, v, GL_UNSIGNED_INT_VEC2);
    }
    
    void ProgramVk::setUniform3uiv(GLint location, GLsizei count, const GLuint *v)
    {
        setUniformImpl(location, count, v, GL_UNSIGNED_INT_VEC3);
    }
    
    void ProgramVk::setUniform4uiv(GLint location, GLsizei count, const GLuint *v)
    {
        setUniformImpl(location, count, v, GL_UNSIGNED_INT_VEC4);
    }
    
    template <int cols, int rows>
    void ProgramVk::setUniformMatrixfv(GLint location,
                                       GLsizei count,
                                       GLboolean transpose,
                                       const GLfloat *value)
    {
        const gl::VariableLocation &locationInfo  = mState.getUniformLocations()[location];
        const gl::LinkedUniform &linkedUniform    = mState.getUniforms()[locationInfo.index];
        const gl::ProgramExecutable &glExecutable = mState.getExecutable();
    
        for (const gl::ShaderType shaderType : glExecutable.getLinkedShaderStages())
        {
            DefaultUniformBlock &uniformBlock     = mDefaultUniformBlocks[shaderType];
            const sh::BlockMemberInfo &layoutInfo = uniformBlock.uniformLayout[location];
    
            // Assume an offset of -1 means the block is unused.
            if (layoutInfo.offset == -1)
            {
                continue;
            }
    
            SetFloatUniformMatrixGLSL<cols, rows>::Run(
                locationInfo.arrayIndex, linkedUniform.getArraySizeProduct(), count, transpose, value,
                uniformBlock.uniformData.data() + layoutInfo.offset);
    
            mDefaultUniformBlocksDirty.set(shaderType);
        }
    }
    
    void ProgramVk::setUniformMatrix2fv(GLint location,
                                        GLsizei count,
                                        GLboolean transpose,
                                        const GLfloat *value)
    {
        setUniformMatrixfv<2, 2>(location, count, transpose, value);
    }
    
    void ProgramVk::setUniformMatrix3fv(GLint location,
                                        GLsizei count,
                                        GLboolean transpose,
                                        const GLfloat *value)
    {
        setUniformMatrixfv<3, 3>(location, count, transpose, value);
    }
    
    void ProgramVk::setUniformMatrix4fv(GLint location,
                                        GLsizei count,
                                        GLboolean transpose,
                                        const GLfloat *value)
    {
        setUniformMatrixfv<4, 4>(location, count, transpose, value);
    }
    
    void ProgramVk::setUniformMatrix2x3fv(GLint location,
                                          GLsizei count,
                                          GLboolean transpose,
                                          const GLfloat *value)
    {
        setUniformMatrixfv<2, 3>(location, count, transpose, value);
    }
    
    void ProgramVk::setUniformMatrix3x2fv(GLint location,
                                          GLsizei count,
                                          GLboolean transpose,
                                          const GLfloat *value)
    {
        setUniformMatrixfv<3, 2>(location, count, transpose, value);
    }
    
    void ProgramVk::setUniformMatrix2x4fv(GLint location,
                                          GLsizei count,
                                          GLboolean transpose,
                                          const GLfloat *value)
    {
        setUniformMatrixfv<2, 4>(location, count, transpose, value);
    }
    
    void ProgramVk::setUniformMatrix4x2fv(GLint location,
                                          GLsizei count,
                                          GLboolean transpose,
                                          const GLfloat *value)
    {
        setUniformMatrixfv<4, 2>(location, count, transpose, value);
    }
    
    void ProgramVk::setUniformMatrix3x4fv(GLint location,
                                          GLsizei count,
                                          GLboolean transpose,
                                          const GLfloat *value)
    {
        setUniformMatrixfv<3, 4>(location, count, transpose, value);
    }
    
    void ProgramVk::setUniformMatrix4x3fv(GLint location,
                                          GLsizei count,
                                          GLboolean transpose,
                                          const GLfloat *value)
    {
        setUniformMatrixfv<4, 3>(location, count, transpose, value);
    }
    
    void ProgramVk::getUniformfv(const gl::Context *context, GLint location, GLfloat *params) const
    {
        getUniformImpl(location, params, GL_FLOAT);
    }
    
    void ProgramVk::getUniformiv(const gl::Context *context, GLint location, GLint *params) const
    {
        getUniformImpl(location, params, GL_INT);
    }
    
    void ProgramVk::getUniformuiv(const gl::Context *context, GLint location, GLuint *params) const
    {
        getUniformImpl(location, params, GL_UNSIGNED_INT);
    }
    
    size_t ProgramVk::calcUniformUpdateRequiredSpace(ContextVk *contextVk,
                                                     const gl::ProgramExecutable &glExecutable,
                                                     gl::ShaderMap<VkDeviceSize> &uniformOffsets) const
    {
        size_t requiredSpace = 0;
        for (const gl::ShaderType shaderType : glExecutable.getLinkedShaderStages())
        {
            if (mDefaultUniformBlocksDirty[shaderType])
            {
                uniformOffsets[shaderType] = requiredSpace;
                requiredSpace += getDefaultUniformAlignedSize(contextVk, shaderType);
            }
        }
        return requiredSpace;
    }
    
    angle::Result ProgramVk::updateUniforms(ContextVk *contextVk)
    {
        ASSERT(dirtyUniforms());
    
        bool anyNewBufferAllocated                = false;
        uint8_t *bufferData                       = nullptr;
        VkDeviceSize bufferOffset                 = 0;
        uint32_t offsetIndex                      = 0;
        const gl::ProgramExecutable &glExecutable = mState.getExecutable();
        gl::ShaderMap<VkDeviceSize> offsets;  // offset to the beginning of bufferData
        size_t requiredSpace;
    
        // We usually only update uniform data for shader stages that are actually dirty. But when the
        // buffer for uniform data have switched, because all shader stages are using the same buffer,
        // we then must update uniform data for all shader stages to keep all shader stages' uniform
        // data in the same buffer.
        requiredSpace = calcUniformUpdateRequiredSpace(contextVk, glExecutable, offsets);
        ASSERT(requiredSpace > 0);
    
        // Allocate space from dynamicBuffer. Always try to allocate from the current buffer first.
        // If that failed, we deal with fall out and try again.
        vk::DynamicBuffer *defaultUniformStorage = contextVk->getDefaultUniformStorage();
        if (!defaultUniformStorage->allocateFromCurrentBuffer(requiredSpace, &bufferData,
                                                              &bufferOffset))
        {
            for (const gl::ShaderType shaderType : glExecutable.getLinkedShaderStages())
            {
                if (!mDefaultUniformBlocks[shaderType].uniformData.empty())
                {
                    mDefaultUniformBlocksDirty.set(shaderType);
                }
            }
    
            requiredSpace = calcUniformUpdateRequiredSpace(contextVk, glExecutable, offsets);
            ANGLE_TRY(defaultUniformStorage->allocate(contextVk, requiredSpace, &bufferData, nullptr,
                                                      &bufferOffset, &anyNewBufferAllocated));
        }
    
        // Update buffer memory by immediate mapping. This immediate update only works once.
        for (const gl::ShaderType shaderType : glExecutable.getLinkedShaderStages())
        {
            if (mDefaultUniformBlocksDirty[shaderType])
            {
                const angle::MemoryBuffer &uniformData = mDefaultUniformBlocks[shaderType].uniformData;
                memcpy(&bufferData[offsets[shaderType]], uniformData.data(), uniformData.size());
                mExecutable.mDynamicUniformDescriptorOffsets[offsetIndex] =
                    static_cast<uint32_t>(bufferOffset + offsets[shaderType]);
                mDefaultUniformBlocksDirty.reset(shaderType);
            }
            ++offsetIndex;
        }
        ANGLE_TRY(defaultUniformStorage->flush(contextVk));
    
        vk::BufferHelper *defaultUniformBuffer = defaultUniformStorage->getCurrentBuffer();
        if (mExecutable.getCurrentDefaultUniformBufferSerial() !=
            defaultUniformBuffer->getBufferSerial())
        {
            // We need to reinitialize the descriptor sets if we newly allocated buffers since we can't
            // modify the descriptor sets once initialized.
            vk::UniformsAndXfbDescriptorDesc defaultUniformsDesc;
            vk::UniformsAndXfbDescriptorDesc *uniformsAndXfbBufferDesc;
    
            if (glExecutable.hasTransformFeedbackOutput())
            {
                const gl::State &glState = contextVk->getState();
                TransformFeedbackVk *transformFeedbackVk =
                    vk::GetImpl(glState.getCurrentTransformFeedback());
                uniformsAndXfbBufferDesc = &transformFeedbackVk->getTransformFeedbackDesc();
                uniformsAndXfbBufferDesc->updateDefaultUniformBuffer(
                    defaultUniformBuffer->getBufferSerial());
            }
            else
            {
                defaultUniformsDesc.updateDefaultUniformBuffer(defaultUniformBuffer->getBufferSerial());
                uniformsAndXfbBufferDesc = &defaultUniformsDesc;
            }
    
            bool newDescriptorSetAllocated;
            ANGLE_TRY(mExecutable.allocUniformAndXfbDescriptorSet(contextVk, *uniformsAndXfbBufferDesc,
                                                                  &newDescriptorSetAllocated));
            if (newDescriptorSetAllocated)
            {
                // Update the descriptor set with the bufferInfo
                for (const gl::ShaderType shaderType : glExecutable.getLinkedShaderStages())
                {
                    mExecutable.updateDefaultUniformsDescriptorSet(
                        shaderType, mDefaultUniformBlocks[shaderType], defaultUniformBuffer, contextVk);
                }
                mExecutable.updateTransformFeedbackDescriptorSetImpl(mState, contextVk);
            }
        }
    
        return angle::Result::Continue;
    }
    
    void ProgramVk::setAllDefaultUniformsDirty()
    {
        const gl::ProgramExecutable &glExecutable = mState.getExecutable();
        for (const gl::ShaderType shaderType : glExecutable.getLinkedShaderStages())
        {
            setShaderUniformDirtyBit(shaderType);
        }
    }
    
    void ProgramVk::onProgramBind()
    {
        // Because all programs share default uniform buffers, when we switch programs, we have to
        // re-update all uniform data. We could do more tracking to avoid update if the context's
        // current uniform buffer is still the same buffer we last time used and buffer has not been
        // recycled. But statistics gathered on gfxbench shows that app always update uniform data on
        // program bind anyway, so not really worth it to add more tracking logic here.
        setAllDefaultUniformsDirty();
    }
    }  // namespace rx