Edit

kc3-lang/angle/src/tests/perf_tests/VulkanBarriersPerf.cpp

Branch :

  • Show log

    Commit

  • Author : Brian Sheedy
    Date : 2019-08-16 14:09:13
    Hash : 2f4a7518
    Message : Refactor perf tests to fix metric/story swapping Refactors the perf tests to fix the issue of metric and story being swapped, which causes issues when trying to convert to histograms. Specifically, does the following: 1. Rolls the version of src/tests/perf_tests/third_party/perf/ to Chromium 476dae823269c8d05b544271af97ad1adb0db8ee 2. Switch to using PerfResultReporter instead of PrintResult directly. 3. Split RenderTestParams::suffix into backend and story; backend is used as part of the metric, while story is used as the story. 4. Remove the "average" metric that was being automatically reported by ANGLEPerfTest, as reported results are automatically averaged. 5. Update the reported metric to more clearly distinguish between test, backend, and metric. It is now name_backend.metric. e.g. DrawCallPerf_vulkan.wall_time. Bug: chromium:923564,chromium:924618 Change-Id: I00cc191407052f23df57dbfa53b6fb088fc26960 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1762360 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: Jamie Madill <jmadill@chromium.org> Reviewed-by: Jonah Ryan-Davis <jonahr@google.com>

  • src/tests/perf_tests/VulkanBarriersPerf.cpp
  • //
    // Copyright 2019 The ANGLE Project Authors. All rights reserved.
    // Use of this source code is governed by a BSD-style license that can be
    // found in the LICENSE file.
    //
    // VulkanBarriersPerf:
    //   Performance tests for ANGLE's Vulkan backend w.r.t barrier efficiency.
    //
    
    #include <sstream>
    
    #include "ANGLEPerfTest.h"
    #include "test_utils/gl_raii.h"
    #include "util/shader_utils.h"
    
    using namespace angle;
    
    namespace
    {
    constexpr unsigned int kIterationsPerStep = 10;
    
    struct VulkanBarriersPerfParams final : public RenderTestParams
    {
        VulkanBarriersPerfParams(bool largeTransfers, bool slowFS)
        {
            iterationsPerStep = kIterationsPerStep;
    
            // Common default parameters
            eglParameters = egl_platform::VULKAN();
            majorVersion  = 2;
            minorVersion  = 0;
            windowWidth   = 256;
            windowHeight  = 256;
            trackGpuTime  = true;
    
            doLargeTransfers      = largeTransfers;
            doSlowFragmentShaders = slowFS;
        }
    
        std::string story() const override;
    
        // Static parameters
        static constexpr int kImageSizes[3] = {256, 512, 4096};
    
        bool doLargeTransfers;
        bool doSlowFragmentShaders;
    };
    
    constexpr int VulkanBarriersPerfParams::kImageSizes[];
    
    std::ostream &operator<<(std::ostream &os, const VulkanBarriersPerfParams &params)
    {
        os << params.backendAndStory().substr(1);
        return os;
    }
    
    class VulkanBarriersPerfBenchmark : public ANGLERenderTest,
                                        public ::testing::WithParamInterface<VulkanBarriersPerfParams>
    {
      public:
        VulkanBarriersPerfBenchmark();
    
        void initializeBenchmark() override;
        void destroyBenchmark() override;
        void drawBenchmark() override;
    
      private:
        void createTexture(uint32_t textureIndex, uint32_t sizeIndex, bool compressed);
        void createFramebuffer(uint32_t fboIndex, uint32_t textureIndex, uint32_t sizeIndex);
        void createResources();
    
        // Handle to the program object
        GLProgram mProgram;
    
        // Attribute locations
        GLint mPositionLoc;
        GLint mTexCoordLoc;
    
        // Sampler location
        GLint mSamplerLoc;
    
        // Texture handles
        GLTexture mTextures[4];
    
        // Framebuffer handles
        GLFramebuffer mFbos[2];
    
        // Buffer handle
        GLBuffer mVertexBuffer;
        GLBuffer mIndexBuffer;
    
        static constexpr size_t kSmallFboIndex = 0;
        static constexpr size_t kLargeFboIndex = 1;
    
        static constexpr size_t kSmallTextureIndex     = 0;
        static constexpr size_t kLargeTextureIndex     = 1;
        static constexpr size_t kTransferTexture1Index = 2;
        static constexpr size_t kTransferTexture2Index = 3;
    
        static constexpr size_t kSmallSizeIndex = 0;
        static constexpr size_t kLargeSizeIndex = 1;
        static constexpr size_t kHugeSizeIndex  = 2;
    };
    
    std::string VulkanBarriersPerfParams::story() const
    {
        std::ostringstream sout;
    
        sout << RenderTestParams::story();
    
        if (doLargeTransfers)
        {
            sout << "_transfer";
        }
        if (doSlowFragmentShaders)
        {
            sout << "_slowfs";
        }
    
        return sout.str();
    }
    
    VulkanBarriersPerfBenchmark::VulkanBarriersPerfBenchmark()
        : ANGLERenderTest("VulkanBarriersPerf", GetParam()),
          mPositionLoc(-1),
          mTexCoordLoc(-1),
          mSamplerLoc(-1)
    {}
    
    constexpr char kVS[] = R"(attribute vec4 a_position;
    attribute vec2 a_texCoord;
    varying vec2 v_texCoord;
    void main()
    {
        gl_Position = a_position;
        v_texCoord  = a_texCoord;
    })";
    
    constexpr char kShortFS[] = R"(precision mediump float;
    varying vec2 v_texCoord;
    uniform sampler2D s_texture;
    void main()
    {
        gl_FragColor = texture2D(s_texture, v_texCoord);
    })";
    
    constexpr char kSlowFS[] = R"(precision mediump float;
    varying vec2 v_texCoord;
    uniform sampler2D s_texture;
    void main()
    {
        vec4 outColor = vec4(0);
        if (v_texCoord.x < 0.2)
        {
            for (int i = 0; i < 100; ++i)
            {
                outColor += texture2D(s_texture, v_texCoord);
            }
        }
        gl_FragColor = outColor;
    })";
    
    void VulkanBarriersPerfBenchmark::createTexture(uint32_t textureIndex,
                                                    uint32_t sizeIndex,
                                                    bool compressed)
    {
        const auto &params = GetParam();
    
        // TODO(syoussefi): compressed copy using vkCmdCopyImage not yet implemented in the vulkan
        // backend. http://anglebug.com/2999
    
        glBindTexture(GL_TEXTURE_2D, mTextures[textureIndex]);
        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, params.kImageSizes[sizeIndex],
                     params.kImageSizes[sizeIndex], 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
    
        // Disable mipmapping
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    }
    
    void VulkanBarriersPerfBenchmark::createFramebuffer(uint32_t fboIndex,
                                                        uint32_t textureIndex,
                                                        uint32_t sizeIndex)
    {
        createTexture(textureIndex, sizeIndex, false);
    
        glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboIndex]);
        glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
                               mTextures[textureIndex], 0);
    }
    
    void VulkanBarriersPerfBenchmark::createResources()
    {
        const auto &params = GetParam();
    
        mProgram.makeRaster(kVS, params.doSlowFragmentShaders ? kSlowFS : kShortFS);
        ASSERT_TRUE(mProgram.valid());
    
        // Get the attribute locations
        mPositionLoc = glGetAttribLocation(mProgram, "a_position");
        mTexCoordLoc = glGetAttribLocation(mProgram, "a_texCoord");
    
        // Get the sampler location
        mSamplerLoc = glGetUniformLocation(mProgram, "s_texture");
    
        // Build the vertex buffer
        GLfloat vertices[] = {
            -0.5f, 0.5f,  0.0f,  // Position 0
            0.0f,  0.0f,         // TexCoord 0
            -0.5f, -0.5f, 0.0f,  // Position 1
            0.0f,  1.0f,         // TexCoord 1
            0.5f,  -0.5f, 0.0f,  // Position 2
            1.0f,  1.0f,         // TexCoord 2
            0.5f,  0.5f,  0.0f,  // Position 3
            1.0f,  0.0f          // TexCoord 3
        };
    
        glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);
        glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
    
        GLushort indices[] = {0, 1, 2, 0, 2, 3};
        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);
        glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
    
        // Use tightly packed data
        glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
    
        // Create four textures.  Two of them are going to be framebuffers, and two are used for large
        // transfers.
        createFramebuffer(kSmallFboIndex, kSmallTextureIndex, kSmallSizeIndex);
        createFramebuffer(kLargeFboIndex, kLargeTextureIndex, kLargeSizeIndex);
    
        if (params.doLargeTransfers)
        {
            createTexture(kTransferTexture1Index, kHugeSizeIndex, true);
            createTexture(kTransferTexture2Index, kHugeSizeIndex, true);
        }
    }
    
    void VulkanBarriersPerfBenchmark::initializeBenchmark()
    {
        createResources();
    
        glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
    
        ASSERT_GL_NO_ERROR();
    }
    
    void VulkanBarriersPerfBenchmark::destroyBenchmark() {}
    
    void VulkanBarriersPerfBenchmark::drawBenchmark()
    {
        const auto &params = GetParam();
    
        glUseProgram(mProgram);
    
        // Bind the buffers
        glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);
        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);
    
        // Load the vertex position
        glVertexAttribPointer(mPositionLoc, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), 0);
        // Load the texture coordinate
        glVertexAttribPointer(mTexCoordLoc, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat),
                              reinterpret_cast<void *>(3 * sizeof(GLfloat)));
    
        glEnableVertexAttribArray(mPositionLoc);
        glEnableVertexAttribArray(mTexCoordLoc);
    
        // Set the texture sampler to texture unit to 0
        glUniform1i(mSamplerLoc, 0);
    
        /*
         * The perf benchmark does the following:
         *
         * - Alternately clear and draw from fbo 1 into fbo 2 and back.  This would use the color
         * attachment and shader read-only layouts in the fragment shader and color attachment stages.
         *
         * Once compressed texture copies are supported, alternately transfer large chunks of data from
         * texture 1 into texture 2 and back.  This would use the transfer layouts in the transfer
         * stage.
         *
         * Once compute shader support is added, another independent set of operations could be a few
         * dispatches.  This would use the general and shader read-only layouts in the compute stage.
         *
         * The idea is to create independent pipelines of operations that would run in parallel on the
         * GPU.  Regressions or inefficiencies in the barrier implementation could result in
         * serialization of these jobs, resulting in a hit in performance.
         *
         * The above operations for example should ideally run on the GPU threads in parallel:
         *
         * + |---draw---||---draw---||---draw---||---draw---||---draw---|
         * + |-----------transfer------------||-----------transfer------------|
         * + |-----dispatch------||------dispatch------||------dispatch------|
         *
         * If barriers are too restrictive, situations like this could happen (draw is blocking
         * transfer):
         *
         * + |---draw---||---draw---||---draw---||---draw---||---draw---|
         * +             |-----------transfer------------||-----------transfer------------|
         *
         * Or like this (transfer is blocking draw):
         *
         * + |---draw---|                     |---draw---|                     |---draw---|
         * + |-----------transfer------------||-----------transfer------------|
         *
         * Or like this (draw and transfer blocking each other):
         *
         * + |---draw---|                                 |---draw---|
         * +             |-----------transfer------------|            |-----------transfer------------|
         *
         * The idea of doing slow FS calls is to make the second case above slower (by making the draw
         * slower than the transfer):
         *
         * + |------------------draw------------------|                                 |-...draw...-|
         * + |-----------transfer------------|         |-----------transfer------------|
         */
    
        startGpuTimer();
        for (unsigned int iteration = 0; iteration < params.iterationsPerStep; ++iteration)
        {
            bool altEven = iteration % 2 == 0;
    
            const int fboDestIndex     = altEven ? kLargeFboIndex : kSmallFboIndex;
            const int fboTexSrcIndex   = altEven ? kSmallTextureIndex : kLargeTextureIndex;
            const int fboDestSizeIndex = altEven ? kLargeSizeIndex : kSmallSizeIndex;
    
            // Set the viewport
            glViewport(0, 0, fboDestSizeIndex, fboDestSizeIndex);
    
            // Clear the color buffer
            glClear(GL_COLOR_BUFFER_BIT);
    
            // Bind the framebuffer
            glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboDestIndex]);
    
            // Bind the texture
            glActiveTexture(GL_TEXTURE0);
            glBindTexture(GL_TEXTURE_2D, mTextures[fboTexSrcIndex]);
    
            ASSERT_GL_NO_ERROR();
    
            glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0);
        }
        stopGpuTimer();
    
        ASSERT_GL_NO_ERROR();
    }
    
    }  // namespace
    
    TEST_P(VulkanBarriersPerfBenchmark, Run)
    {
        run();
    }
    
    ANGLE_INSTANTIATE_TEST(VulkanBarriersPerfBenchmark,
                           VulkanBarriersPerfParams(false, false),
                           VulkanBarriersPerfParams(true, false),
                           VulkanBarriersPerfParams(true, true));