Edit

kc3-lang/angle/src/libGLESv2/DynamicHLSL.cpp

Branch :

  • Show log

    Commit

  • Author : Jamie Madill
    Date : 2014-06-25 16:04:54
    Hash : f2575989
    Message : Use the sh namespace for shader variables. Since these types originate from the translator, use an appropriate namespace. Also rename some of the gl helper functions to be more specific to their functionality. BUG=angle:466 Change-Id: Idc29987b2053b3c40748dd46b581f3dbd8a6fd61 Reviewed-on: https://chromium-review.googlesource.com/204680 Tested-by: Jamie Madill <jmadill@chromium.org> Reviewed-by: Zhenyao Mo <zmo@chromium.org>

  • src/libGLESv2/DynamicHLSL.cpp
  • //
    // Copyright (c) 2014 The ANGLE Project Authors. All rights reserved.
    // Use of this source code is governed by a BSD-style license that can be
    // found in the LICENSE file.
    //
    // DynamicHLSL.cpp: Implementation for link and run-time HLSL generation
    //
    
    #include "precompiled.h"
    
    #include "libGLESv2/DynamicHLSL.h"
    #include "libGLESv2/Shader.h"
    #include "libGLESv2/Program.h"
    #include "libGLESv2/renderer/Renderer.h"
    #include "common/utilities.h"
    #include "libGLESv2/ProgramBinary.h"
    #include "libGLESv2/formatutils.h"
    #include "common/blocklayout.h"
    
    static std::string Str(int i)
    {
        char buffer[20];
        snprintf(buffer, sizeof(buffer), "%d", i);
        return buffer;
    }
    
    namespace gl_d3d
    {
    
    std::string HLSLComponentTypeString(GLenum componentType)
    {
        switch (componentType)
        {
          case GL_UNSIGNED_INT:         return "uint";
          case GL_INT:                  return "int";
          case GL_UNSIGNED_NORMALIZED:
          case GL_SIGNED_NORMALIZED:
          case GL_FLOAT:                return "float";
          default: UNREACHABLE();       return "not-component-type";
        }
    }
    
    std::string HLSLComponentTypeString(GLenum componentType, int componentCount)
    {
        return HLSLComponentTypeString(componentType) + (componentCount > 1 ? Str(componentCount) : "");
    }
    
    std::string HLSLMatrixTypeString(GLenum type)
    {
        switch (type)
        {
          case GL_FLOAT_MAT2:     return "float2x2";
          case GL_FLOAT_MAT3:     return "float3x3";
          case GL_FLOAT_MAT4:     return "float4x4";
          case GL_FLOAT_MAT2x3:   return "float2x3";
          case GL_FLOAT_MAT3x2:   return "float3x2";
          case GL_FLOAT_MAT2x4:   return "float2x4";
          case GL_FLOAT_MAT4x2:   return "float4x2";
          case GL_FLOAT_MAT3x4:   return "float3x4";
          case GL_FLOAT_MAT4x3:   return "float4x3";
          default: UNREACHABLE(); return "not-matrix-type";
        }
    }
    
    std::string HLSLTypeString(GLenum type)
    {
        if (gl::IsMatrixType(type))
        {
            return HLSLMatrixTypeString(type);
        }
    
        return HLSLComponentTypeString(gl::VariableComponentType(type), gl::VariableComponentCount(type));
    }
    
    }
    
    namespace gl
    {
    
    std::string ArrayString(unsigned int i)
    {
        return (i == GL_INVALID_INDEX ? "" : "[" + Str(i) + "]");
    }
    
    const std::string VERTEX_ATTRIBUTE_STUB_STRING = "@@ VERTEX ATTRIBUTES @@";
    const std::string PIXEL_OUTPUT_STUB_STRING = "@@ PIXEL OUTPUT @@";
    
    DynamicHLSL::DynamicHLSL(rx::Renderer *const renderer)
        : mRenderer(renderer)
    {
    }
    
    static bool packVarying(PackedVarying *varying, const int maxVaryingVectors, VaryingPacking packing)
    {
        GLenum transposedType = TransposeMatrixType(varying->type);
    
        // matrices within varying structs are not transposed
        int registers = (varying->isStruct() ? HLSLVariableRegisterCount(*varying) : VariableRowCount(transposedType)) * varying->elementCount();
        int elements = (varying->isStruct() ? 4 : VariableColumnCount(transposedType));
    
        if (elements >= 2 && elements <= 4)
        {
            for (int r = 0; r <= maxVaryingVectors - registers; r++)
            {
                bool available = true;
    
                for (int y = 0; y < registers && available; y++)
                {
                    for (int x = 0; x < elements && available; x++)
                    {
                        if (packing[r + y][x])
                        {
                            available = false;
                        }
                    }
                }
    
                if (available)
                {
                    varying->registerIndex = r;
    
                    for (int y = 0; y < registers; y++)
                    {
                        for (int x = 0; x < elements; x++)
                        {
                            packing[r + y][x] = &*varying;
                        }
                    }
    
                    return true;
                }
            }
    
            if (elements == 2)
            {
                for (int r = maxVaryingVectors - registers; r >= 0; r--)
                {
                    bool available = true;
    
                    for (int y = 0; y < registers && available; y++)
                    {
                        for (int x = 2; x < 4 && available; x++)
                        {
                            if (packing[r + y][x])
                            {
                                available = false;
                            }
                        }
                    }
    
                    if (available)
                    {
                        varying->registerIndex = r;
    
                        for (int y = 0; y < registers; y++)
                        {
                            for (int x = 2; x < 4; x++)
                            {
                                packing[r + y][x] = &*varying;
                            }
                        }
    
                        return true;
                    }
                }
            }
        }
        else if (elements == 1)
        {
            int space[4] = { 0 };
    
            for (int y = 0; y < maxVaryingVectors; y++)
            {
                for (int x = 0; x < 4; x++)
                {
                    space[x] += packing[y][x] ? 0 : 1;
                }
            }
    
            int column = 0;
    
            for (int x = 0; x < 4; x++)
            {
                if (space[x] >= registers && space[x] < space[column])
                {
                    column = x;
                }
            }
    
            if (space[column] >= registers)
            {
                for (int r = 0; r < maxVaryingVectors; r++)
                {
                    if (!packing[r][column])
                    {
                        varying->registerIndex = r;
    
                        for (int y = r; y < r + registers; y++)
                        {
                            packing[y][column] = &*varying;
                        }
    
                        break;
                    }
                }
    
                return true;
            }
        }
        else UNREACHABLE();
    
        return false;
    }
    
    // Packs varyings into generic varying registers, using the algorithm from [OpenGL ES Shading Language 1.00 rev. 17] appendix A section 7 page 111
    // Returns the number of used varying registers, or -1 if unsuccesful
    int DynamicHLSL::packVaryings(InfoLog &infoLog, VaryingPacking packing, FragmentShader *fragmentShader,
                                  VertexShader *vertexShader, const std::vector<std::string>& transformFeedbackVaryings)
    {
        const int maxVaryingVectors = mRenderer->getMaxVaryingVectors();
    
        vertexShader->resetVaryingsRegisterAssignment();
        fragmentShader->resetVaryingsRegisterAssignment();
    
        std::set<std::string> packedVaryings;
    
        for (unsigned int varyingIndex = 0; varyingIndex < fragmentShader->mVaryings.size(); varyingIndex++)
        {
            PackedVarying *varying = &fragmentShader->mVaryings[varyingIndex];
            if (packVarying(varying, maxVaryingVectors, packing))
            {
                packedVaryings.insert(varying->name);
            }
            else
            {
                infoLog.append("Could not pack varying %s", varying->name.c_str());
                return -1;
            }
        }
    
        for (unsigned int feedbackVaryingIndex = 0; feedbackVaryingIndex < transformFeedbackVaryings.size(); feedbackVaryingIndex++)
        {
            const std::string &transformFeedbackVarying = transformFeedbackVaryings[feedbackVaryingIndex];
            if (packedVaryings.find(transformFeedbackVarying) == packedVaryings.end())
            {
                bool found = false;
                for (unsigned int varyingIndex = 0; varyingIndex < vertexShader->mVaryings.size(); varyingIndex++)
                {
                    PackedVarying *varying = &vertexShader->mVaryings[varyingIndex];
                    if (transformFeedbackVarying == varying->name)
                    {
                        if (!packVarying(varying, maxVaryingVectors, packing))
                        {
                            infoLog.append("Could not pack varying %s", varying->name.c_str());
                            return -1;
                        }
    
                        found = true;
                        break;
                    }
                }
    
                if (!found && transformFeedbackVarying != "gl_Position" && transformFeedbackVarying != "gl_PointSize")
                {
                    infoLog.append("Transform feedback varying %s does not exist in the vertex shader.", transformFeedbackVarying.c_str());
                    return -1;
                }
            }
        }
    
        // Return the number of used registers
        int registers = 0;
    
        for (int r = 0; r < maxVaryingVectors; r++)
        {
            if (packing[r][0] || packing[r][1] || packing[r][2] || packing[r][3])
            {
                registers++;
            }
        }
    
        return registers;
    }
    
    std::string DynamicHLSL::generateVaryingHLSL(VertexShader *shader) const
    {
        std::string varyingSemantic = getVaryingSemantic(shader->mUsesPointSize);
        std::string varyingHLSL;
    
        for (unsigned int varyingIndex = 0; varyingIndex < shader->mVaryings.size(); varyingIndex++)
        {
            const PackedVarying &varying = shader->mVaryings[varyingIndex];
            if (varying.registerAssigned())
            {
                GLenum transposedType = TransposeMatrixType(varying.type);
                int variableRows = (varying.isStruct() ? 1 : VariableRowCount(transposedType));
    
                for (unsigned int elementIndex = 0; elementIndex < varying.elementCount(); elementIndex++)
                {
                    for (int row = 0; row < variableRows; row++)
                    {
                        switch (varying.interpolation)
                        {
                          case sh::INTERPOLATION_SMOOTH:   varyingHLSL += "    ";                 break;
                          case sh::INTERPOLATION_FLAT:     varyingHLSL += "    nointerpolation "; break;
                          case sh::INTERPOLATION_CENTROID: varyingHLSL += "    centroid ";        break;
                          default:  UNREACHABLE();
                        }
    
                        unsigned int semanticIndex = elementIndex * variableRows + varying.registerIndex + row;
                        std::string n = Str(semanticIndex);
    
                        std::string typeString;
    
                        if (varying.isStruct())
                        {
                            // matrices within structs are not transposed, so
                            // do not use the special struct prefix "rm"
                            typeString = decorateVariable(varying.structName);
                        }
                        else
                        {
                            GLenum componentType = VariableComponentType(transposedType);
                            int columnCount = VariableColumnCount(transposedType);
                            typeString = gl_d3d::HLSLComponentTypeString(componentType, columnCount);
                        }
                        varyingHLSL += typeString + " v" + n + " : " + varyingSemantic + n + ";\n";
                    }
                }
            }
        }
    
        return varyingHLSL;
    }
    
    std::string DynamicHLSL::generateVertexShaderForInputLayout(const std::string &sourceShader,
                                                                const VertexFormat inputLayout[],
                                                                const sh::Attribute shaderAttributes[]) const
    {
        std::string structHLSL, initHLSL;
    
        int semanticIndex = 0;
        unsigned int inputIndex = 0;
    
        for (unsigned int attributeIndex = 0; attributeIndex < MAX_VERTEX_ATTRIBS; attributeIndex++)
        {
            ASSERT(inputIndex < MAX_VERTEX_ATTRIBS);
    
            const VertexFormat &vertexFormat = inputLayout[inputIndex];
            const sh::Attribute &shaderAttribute = shaderAttributes[attributeIndex];
    
            if (!shaderAttribute.name.empty())
            {
                // HLSL code for input structure
                if (IsMatrixType(shaderAttribute.type))
                {
                    // Matrix types are always transposed
                    structHLSL += "    " + gl_d3d::HLSLMatrixTypeString(TransposeMatrixType(shaderAttribute.type));
                }
                else
                {
                    GLenum componentType = mRenderer->getVertexComponentType(vertexFormat);
                    structHLSL += "    " + gl_d3d::HLSLComponentTypeString(componentType, VariableComponentCount(shaderAttribute.type));
                }
    
                structHLSL += " " + decorateVariable(shaderAttribute.name) + " : TEXCOORD" + Str(semanticIndex) + ";\n";
                semanticIndex += VariableRegisterCount(shaderAttribute.type);
    
                // HLSL code for initialization
                initHLSL += "    " + decorateVariable(shaderAttribute.name) + " = ";
    
                // Mismatched vertex attribute to vertex input may result in an undefined
                // data reinterpretation (eg for pure integer->float, float->pure integer)
                // TODO: issue warning with gl debug info extension, when supported
                if (IsMatrixType(shaderAttribute.type) ||
                    (mRenderer->getVertexConversionType(vertexFormat) & rx::VERTEX_CONVERT_GPU) != 0)
                {
                    initHLSL += generateAttributeConversionHLSL(vertexFormat, shaderAttribute);
                }
                else
                {
                    initHLSL += "input." + decorateVariable(shaderAttribute.name);
                }
    
                initHLSL += ";\n";
    
                inputIndex += VariableRowCount(TransposeMatrixType(shaderAttribute.type));
            }
        }
    
        std::string replacementHLSL = "struct VS_INPUT\n"
                                      "{\n" +
                                      structHLSL +
                                      "};\n"
                                      "\n"
                                      "void initAttributes(VS_INPUT input)\n"
                                      "{\n" +
                                      initHLSL +
                                      "}\n";
    
        std::string vertexHLSL(sourceShader);
    
        size_t copyInsertionPos = vertexHLSL.find(VERTEX_ATTRIBUTE_STUB_STRING);
        vertexHLSL.replace(copyInsertionPos, VERTEX_ATTRIBUTE_STUB_STRING.length(), replacementHLSL);
    
        return vertexHLSL;
    }
    
    std::string DynamicHLSL::generatePixelShaderForOutputSignature(const std::string &sourceShader, const std::vector<PixelShaderOuputVariable> &outputVariables,
                                                                   bool usesFragDepth, const std::vector<GLenum> &outputLayout) const
    {
        const int shaderModel = mRenderer->getMajorShaderModel();
        std::string targetSemantic = (shaderModel >= 4) ? "SV_TARGET" : "COLOR";
        std::string depthSemantic = (shaderModel >= 4) ? "SV_Depth" : "DEPTH";
    
        std::string declarationHLSL;
        std::string copyHLSL;
        for (size_t i = 0; i < outputVariables.size(); i++)
        {
            const PixelShaderOuputVariable& outputVariable = outputVariables[i];
            ASSERT(outputLayout.size() > outputVariable.outputIndex);
    
            // FIXME(geofflang): Work around NVIDIA driver bug by repacking buffers
            bool outputIndexEnabled = true; // outputLayout[outputVariable.outputIndex] != GL_NONE
            if (outputIndexEnabled)
            {
                declarationHLSL += "    " + gl_d3d::HLSLTypeString(outputVariable.type) + " " + outputVariable.name +
                                   " : " + targetSemantic + Str(outputVariable.outputIndex) + ";\n";
    
                copyHLSL += "    output." + outputVariable.name + " = " + outputVariable.source + ";\n";
            }
        }
    
        if (usesFragDepth)
        {
            declarationHLSL += "    float gl_Depth : " + depthSemantic + ";\n";
            copyHLSL += "    output.gl_Depth = gl_Depth; \n";
        }
    
        std::string replacementHLSL = "struct PS_OUTPUT\n"
                                      "{\n" +
                                      declarationHLSL +
                                      "};\n"
                                      "\n"
                                      "PS_OUTPUT generateOutput()\n"
                                      "{\n"
                                      "    PS_OUTPUT output;\n" +
                                      copyHLSL +
                                      "    return output;\n"
                                      "}\n";
    
        std::string pixelHLSL(sourceShader);
    
        size_t outputInsertionPos = pixelHLSL.find(PIXEL_OUTPUT_STUB_STRING);
        pixelHLSL.replace(outputInsertionPos, PIXEL_OUTPUT_STUB_STRING.length(), replacementHLSL);
    
        return pixelHLSL;
    }
    
    std::string DynamicHLSL::getVaryingSemantic(bool pointSize) const
    {
        // SM3 reserves the TEXCOORD semantic for point sprite texcoords (gl_PointCoord)
        // In D3D11 we manually compute gl_PointCoord in the GS.
        int shaderModel = mRenderer->getMajorShaderModel();
        return ((pointSize && shaderModel < 4) ? "COLOR" : "TEXCOORD");
    }
    
    struct DynamicHLSL::SemanticInfo
    {
        struct BuiltinInfo
        {
            BuiltinInfo()
                : enabled(false),
                  index(0),
                  systemValue(false)
            {}
    
            bool enabled;
            std::string semantic;
            unsigned int index;
            bool systemValue;
    
            std::string str() const
            {
                return (systemValue ? semantic : (semantic + Str(index)));
            }
    
            void enableSystem(const std::string &systemValueSemantic)
            {
                enabled = true;
                semantic = systemValueSemantic;
                systemValue = true;
            }
    
            void enable(const std::string &semanticVal, unsigned int indexVal)
            {
                enabled = true;
                semantic = semanticVal;
                index = indexVal;
            }
        };
    
        BuiltinInfo dxPosition;
        BuiltinInfo glPosition;
        BuiltinInfo glFragCoord;
        BuiltinInfo glPointCoord;
        BuiltinInfo glPointSize;
    };
    
    DynamicHLSL::SemanticInfo DynamicHLSL::getSemanticInfo(int startRegisters, bool fragCoord, bool pointCoord,
                                                           bool pointSize, bool pixelShader) const
    {
        SemanticInfo info;
        bool hlsl4 = (mRenderer->getMajorShaderModel() >= 4);
        const std::string &varyingSemantic = getVaryingSemantic(pointSize);
    
        int reservedRegisterIndex = startRegisters;
    
        if (hlsl4)
        {
            info.dxPosition.enableSystem("SV_Position");
        }
        else if (pixelShader)
        {
            info.dxPosition.enableSystem("VPOS");
        }
        else
        {
            info.dxPosition.enableSystem("POSITION");
        }
    
        info.glPosition.enable(varyingSemantic, reservedRegisterIndex++);
    
        if (fragCoord)
        {
            info.glFragCoord.enable(varyingSemantic, reservedRegisterIndex++);
        }
    
        if (pointCoord)
        {
            // SM3 reserves the TEXCOORD semantic for point sprite texcoords (gl_PointCoord)
            // In D3D11 we manually compute gl_PointCoord in the GS.
            if (hlsl4)
            {
                info.glPointCoord.enable(varyingSemantic, reservedRegisterIndex++);
            }
            else
            {
                info.glPointCoord.enable("TEXCOORD", 0);
            }
        }
    
        // Special case: do not include PSIZE semantic in HLSL 3 pixel shaders
        if (pointSize && (!pixelShader || hlsl4))
        {
            info.glPointSize.enableSystem("PSIZE");
        }
    
        return info;
    }
    
    std::string DynamicHLSL::generateVaryingLinkHLSL(const SemanticInfo &info, const std::string &varyingHLSL) const
    {
        std::string linkHLSL = "{\n";
    
        ASSERT(info.dxPosition.enabled && info.glPosition.enabled);
    
        linkHLSL += "    float4 dx_Position : " + info.dxPosition.str() + ";\n";
        linkHLSL += "    float4 gl_Position : " + info.glPosition.str() + ";\n";
    
        if (info.glFragCoord.enabled)
        {
            linkHLSL += "    float4 gl_FragCoord : " + info.glFragCoord.str() + ";\n";
        }
    
        if (info.glPointCoord.enabled)
        {
            linkHLSL += "    float2 gl_PointCoord : " + info.glPointCoord.str() + ";\n";
        }
    
        linkHLSL += varyingHLSL;
    
        if (info.glPointSize.enabled)
        {
            linkHLSL += "    float gl_PointSize : " + info.glPointSize.str() + ";\n";
        }
    
        linkHLSL += "};\n";
    
        return linkHLSL;
    }
    
    void DynamicHLSL::storeBuiltinLinkedVaryings(const SemanticInfo &info,
                                                 std::vector<LinkedVarying> *linkedVaryings) const
    {
        ASSERT(info.glPosition.enabled);
    
        linkedVaryings->push_back(LinkedVarying("gl_Position", GL_FLOAT_VEC4, 1, info.glPosition.semantic,
                                                info.glPosition.index, 1));
    
        if (info.glFragCoord.enabled)
        {
            linkedVaryings->push_back(LinkedVarying("gl_FragCoord", GL_FLOAT_VEC4, 1, info.glFragCoord.semantic,
                                                    info.glFragCoord.index, 1));
        }
    
        if (info.glPointSize.enabled)
        {
            linkedVaryings->push_back(LinkedVarying("gl_PointSize", GL_FLOAT, 1, "PSIZE", 0, 1));
        }
    }
    
    void DynamicHLSL::storeUserLinkedVaryings(const VertexShader *vertexShader,
                                              std::vector<LinkedVarying> *linkedVaryings) const
    {
        const std::string &varyingSemantic = getVaryingSemantic(vertexShader->mUsesPointSize);
        const std::vector<PackedVarying> &varyings = vertexShader->mVaryings;
    
        for (unsigned int varyingIndex = 0; varyingIndex < varyings.size(); varyingIndex++)
        {
            const PackedVarying &varying = varyings[varyingIndex];
            if (varying.registerAssigned())
            {
                GLenum transposedType = TransposeMatrixType(varying.type);
                int variableRows = (varying.isStruct() ? 1 : VariableRowCount(transposedType));
    
                linkedVaryings->push_back(LinkedVarying(varying.name, varying.type, varying.elementCount(),
                                                        varyingSemantic, varying.registerIndex,
                                                        variableRows * varying.elementCount()));
            }
        }
    }
    
    bool DynamicHLSL::generateShaderLinkHLSL(InfoLog &infoLog, int registers, const VaryingPacking packing,
                                             std::string& pixelHLSL, std::string& vertexHLSL,
                                             FragmentShader *fragmentShader, VertexShader *vertexShader,
                                             const std::vector<std::string>& transformFeedbackVaryings,
                                             std::vector<LinkedVarying> *linkedVaryings,
                                             std::map<int, VariableLocation> *programOutputVars,
                                             std::vector<PixelShaderOuputVariable> *outPixelShaderKey,
                                             bool *outUsesFragDepth) const
    {
        if (pixelHLSL.empty() || vertexHLSL.empty())
        {
            return false;
        }
    
        bool usesMRT = fragmentShader->mUsesMultipleRenderTargets;
        bool usesFragColor = fragmentShader->mUsesFragColor;
        bool usesFragData = fragmentShader->mUsesFragData;
        bool usesFragCoord = fragmentShader->mUsesFragCoord;
        bool usesPointCoord = fragmentShader->mUsesPointCoord;
        bool usesPointSize = vertexShader->mUsesPointSize;
    
        if (usesFragColor && usesFragData)
        {
            infoLog.append("Cannot use both gl_FragColor and gl_FragData in the same fragment shader.");
            return false;
        }
    
        // Write the HLSL input/output declarations
        const int shaderModel = mRenderer->getMajorShaderModel();
        const int maxVaryingVectors = mRenderer->getMaxVaryingVectors();
    
        const int registersNeeded = registers + (usesFragCoord ? 1 : 0) + (usesPointCoord ? 1 : 0);
    
        // Two cases when writing to gl_FragColor and using ESSL 1.0:
        // - with a 3.0 context, the output color is copied to channel 0
        // - with a 2.0 context, the output color is broadcast to all channels
        const bool broadcast = (usesFragColor && mRenderer->getCurrentClientVersion() < 3);
        const unsigned int numRenderTargets = (broadcast || usesMRT ? mRenderer->getMaxRenderTargets() : 1);
    
        int shaderVersion = vertexShader->getShaderVersion();
    
        if (registersNeeded > maxVaryingVectors)
        {
            infoLog.append("No varying registers left to support gl_FragCoord/gl_PointCoord");
            return false;
        }
    
        const std::string &varyingHLSL = generateVaryingHLSL(vertexShader);
        const SemanticInfo &vertexSemantics = getSemanticInfo(registers, usesFragCoord,
                                                              false, usesPointSize, false);
    
        storeUserLinkedVaryings(vertexShader, linkedVaryings);
        storeBuiltinLinkedVaryings(vertexSemantics, linkedVaryings);
    
        // Add stub string to be replaced when shader is dynamically defined by its layout
        vertexHLSL += "\n" + VERTEX_ATTRIBUTE_STUB_STRING + "\n"
                      "struct VS_OUTPUT\n" + generateVaryingLinkHLSL(vertexSemantics, varyingHLSL) + "\n"
                      "VS_OUTPUT main(VS_INPUT input)\n"
                      "{\n"
                      "    initAttributes(input);\n";
    
        if (shaderModel >= 4)
        {
            vertexHLSL += "\n"
                          "    gl_main();\n"
                          "\n"
                          "    VS_OUTPUT output;\n"
                          "    output.gl_Position = gl_Position;\n"
                          "    output.dx_Position.x = gl_Position.x;\n"
                          "    output.dx_Position.y = -gl_Position.y;\n"
                          "    output.dx_Position.z = (gl_Position.z + gl_Position.w) * 0.5;\n"
                          "    output.dx_Position.w = gl_Position.w;\n";
        }
        else
        {
            vertexHLSL += "\n"
                          "    gl_main();\n"
                          "\n"
                          "    VS_OUTPUT output;\n"
                          "    output.gl_Position = gl_Position;\n"
                          "    output.dx_Position.x = gl_Position.x * dx_ViewAdjust.z + dx_ViewAdjust.x * gl_Position.w;\n"
                          "    output.dx_Position.y = -(gl_Position.y * dx_ViewAdjust.w + dx_ViewAdjust.y * gl_Position.w);\n"
                          "    output.dx_Position.z = (gl_Position.z + gl_Position.w) * 0.5;\n"
                          "    output.dx_Position.w = gl_Position.w;\n";
        }
    
        if (usesPointSize && shaderModel >= 3)
        {
            vertexHLSL += "    output.gl_PointSize = gl_PointSize;\n";
        }
    
        if (usesFragCoord)
        {
            vertexHLSL += "    output.gl_FragCoord = gl_Position;\n";
        }
    
        for (unsigned int vertVaryingIndex = 0; vertVaryingIndex < vertexShader->mVaryings.size(); vertVaryingIndex++)
        {
            const PackedVarying &varying = vertexShader->mVaryings[vertVaryingIndex];
            if (varying.registerAssigned())
            {
                for (unsigned int elementIndex = 0; elementIndex < varying.elementCount(); elementIndex++)
                {
                    int variableRows = (varying.isStruct() ? 1 : VariableRowCount(TransposeMatrixType(varying.type)));
    
                    for (int row = 0; row < variableRows; row++)
                    {
                        int r = varying.registerIndex + elementIndex * variableRows + row;
                        vertexHLSL += "    output.v" + Str(r);
    
                        bool sharedRegister = false;   // Register used by multiple varyings
    
                        for (int x = 0; x < 4; x++)
                        {
                            if (packing[r][x] && packing[r][x] != packing[r][0])
                            {
                                sharedRegister = true;
                                break;
                            }
                        }
    
                        if(sharedRegister)
                        {
                            vertexHLSL += ".";
    
                            for (int x = 0; x < 4; x++)
                            {
                                if (packing[r][x] == &varying)
                                {
                                    switch(x)
                                    {
                                      case 0: vertexHLSL += "x"; break;
                                      case 1: vertexHLSL += "y"; break;
                                      case 2: vertexHLSL += "z"; break;
                                      case 3: vertexHLSL += "w"; break;
                                    }
                                }
                            }
                        }
    
                        vertexHLSL += " = _" + varying.name;
    
                        if (varying.isArray())
                        {
                            vertexHLSL += ArrayString(elementIndex);
                        }
    
                        if (variableRows > 1)
                        {
                            vertexHLSL += ArrayString(row);
                        }
    
                        vertexHLSL += ";\n";
                    }
                }
            }
        }
    
        vertexHLSL += "\n"
                      "    return output;\n"
                      "}\n";
    
        const SemanticInfo &pixelSemantics = getSemanticInfo(registers, usesFragCoord, usesPointCoord,
                                                             usesPointSize, true);
    
        pixelHLSL += "struct PS_INPUT\n" + generateVaryingLinkHLSL(pixelSemantics, varyingHLSL) + "\n";
    
        if (shaderVersion < 300)
        {
            for (unsigned int renderTargetIndex = 0; renderTargetIndex < numRenderTargets; renderTargetIndex++)
            {
                PixelShaderOuputVariable outputKeyVariable;
                outputKeyVariable.type = GL_FLOAT_VEC4;
                outputKeyVariable.name = "gl_Color" + Str(renderTargetIndex);
                outputKeyVariable.source = broadcast ? "gl_Color[0]" : "gl_Color[" + Str(renderTargetIndex) + "]";
                outputKeyVariable.outputIndex = renderTargetIndex;
    
                outPixelShaderKey->push_back(outputKeyVariable);
            }
    
            *outUsesFragDepth = fragmentShader->mUsesFragDepth;
        }
        else
        {
            defineOutputVariables(fragmentShader, programOutputVars);
    
            const std::vector<sh::Attribute> &shaderOutputVars = fragmentShader->getOutputVariables();
            for (auto locationIt = programOutputVars->begin(); locationIt != programOutputVars->end(); locationIt++)
            {
                const VariableLocation &outputLocation = locationIt->second;
                const sh::ShaderVariable &outputVariable = shaderOutputVars[outputLocation.index];
                const std::string &variableName = "out_" + outputLocation.name;
                const std::string &elementString = (outputLocation.element == GL_INVALID_INDEX ? "" : Str(outputLocation.element));
    
                PixelShaderOuputVariable outputKeyVariable;
                outputKeyVariable.type = outputVariable.type;
                outputKeyVariable.name = variableName + elementString;
                outputKeyVariable.source = variableName + ArrayString(outputLocation.element);
                outputKeyVariable.outputIndex = locationIt->first;
    
                outPixelShaderKey->push_back(outputKeyVariable);
            }
    
            *outUsesFragDepth = false;
        }
    
        pixelHLSL += PIXEL_OUTPUT_STUB_STRING + "\n";
    
        if (fragmentShader->mUsesFrontFacing)
        {
            if (shaderModel >= 4)
            {
                pixelHLSL += "PS_OUTPUT main(PS_INPUT input, bool isFrontFace : SV_IsFrontFace)\n"
                             "{\n";
            }
            else
            {
                pixelHLSL += "PS_OUTPUT main(PS_INPUT input, float vFace : VFACE)\n"
                             "{\n";
            }
        }
        else
        {
            pixelHLSL += "PS_OUTPUT main(PS_INPUT input)\n"
                         "{\n";
        }
    
        if (usesFragCoord)
        {
            pixelHLSL += "    float rhw = 1.0 / input.gl_FragCoord.w;\n";
    
            if (shaderModel >= 4)
            {
                pixelHLSL += "    gl_FragCoord.x = input.dx_Position.x;\n"
                             "    gl_FragCoord.y = input.dx_Position.y;\n";
            }
            else if (shaderModel >= 3)
            {
                pixelHLSL += "    gl_FragCoord.x = input.dx_Position.x + 0.5;\n"
                             "    gl_FragCoord.y = input.dx_Position.y + 0.5;\n";
            }
            else
            {
                // dx_ViewCoords contains the viewport width/2, height/2, center.x and center.y. See Renderer::setViewport()
                pixelHLSL += "    gl_FragCoord.x = (input.gl_FragCoord.x * rhw) * dx_ViewCoords.x + dx_ViewCoords.z;\n"
                             "    gl_FragCoord.y = (input.gl_FragCoord.y * rhw) * dx_ViewCoords.y + dx_ViewCoords.w;\n";
            }
    
            pixelHLSL += "    gl_FragCoord.z = (input.gl_FragCoord.z * rhw) * dx_DepthFront.x + dx_DepthFront.y;\n"
                         "    gl_FragCoord.w = rhw;\n";
        }
    
        if (usesPointCoord && shaderModel >= 3)
        {
            pixelHLSL += "    gl_PointCoord.x = input.gl_PointCoord.x;\n";
            pixelHLSL += "    gl_PointCoord.y = 1.0 - input.gl_PointCoord.y;\n";
        }
    
        if (fragmentShader->mUsesFrontFacing)
        {
            if (shaderModel <= 3)
            {
                pixelHLSL += "    gl_FrontFacing = (vFace * dx_DepthFront.z >= 0.0);\n";
            }
            else
            {
                pixelHLSL += "    gl_FrontFacing = isFrontFace;\n";
            }
        }
    
        for (unsigned int varyingIndex = 0; varyingIndex < fragmentShader->mVaryings.size(); varyingIndex++)
        {
            const PackedVarying &varying = fragmentShader->mVaryings[varyingIndex];
            if (varying.registerAssigned())
            {
                for (unsigned int elementIndex = 0; elementIndex < varying.elementCount(); elementIndex++)
                {
                    GLenum transposedType = TransposeMatrixType(varying.type);
                    int variableRows = (varying.isStruct() ? 1 : VariableRowCount(transposedType));
                    for (int row = 0; row < variableRows; row++)
                    {
                        std::string n = Str(varying.registerIndex + elementIndex * variableRows + row);
                        pixelHLSL += "    _" + varying.name;
    
                        if (varying.isArray())
                        {
                            pixelHLSL += ArrayString(elementIndex);
                        }
    
                        if (variableRows > 1)
                        {
                            pixelHLSL += ArrayString(row);
                        }
    
                        if (varying.isStruct())
                        {
                            pixelHLSL += " = input.v" + n + ";\n";   break;
                        }
                        else
                        {
                            switch (VariableColumnCount(transposedType))
                            {
                              case 1: pixelHLSL += " = input.v" + n + ".x;\n";   break;
                              case 2: pixelHLSL += " = input.v" + n + ".xy;\n";  break;
                              case 3: pixelHLSL += " = input.v" + n + ".xyz;\n"; break;
                              case 4: pixelHLSL += " = input.v" + n + ";\n";     break;
                              default: UNREACHABLE();
                            }
                        }
                    }
                }
            }
            else UNREACHABLE();
        }
    
        pixelHLSL += "\n"
                     "    gl_main();\n"
                     "\n"
                     "    return generateOutput();\n"
                     "}\n";
    
        return true;
    }
    
    void DynamicHLSL::defineOutputVariables(FragmentShader *fragmentShader, std::map<int, VariableLocation> *programOutputVars) const
    {
        const std::vector<sh::Attribute> &shaderOutputVars = fragmentShader->getOutputVariables();
    
        for (unsigned int outputVariableIndex = 0; outputVariableIndex < shaderOutputVars.size(); outputVariableIndex++)
        {
            const sh::Attribute &outputVariable = shaderOutputVars[outputVariableIndex];
            const int baseLocation = outputVariable.location == -1 ? 0 : outputVariable.location;
    
            if (outputVariable.arraySize > 0)
            {
                for (unsigned int elementIndex = 0; elementIndex < outputVariable.arraySize; elementIndex++)
                {
                    const int location = baseLocation + elementIndex;
                    ASSERT(programOutputVars->count(location) == 0);
                    (*programOutputVars)[location] = VariableLocation(outputVariable.name, elementIndex, outputVariableIndex);
                }
            }
            else
            {
                ASSERT(programOutputVars->count(baseLocation) == 0);
                (*programOutputVars)[baseLocation] = VariableLocation(outputVariable.name, GL_INVALID_INDEX, outputVariableIndex);
            }
        }
    }
    
    std::string DynamicHLSL::generateGeometryShaderHLSL(int registers, FragmentShader *fragmentShader, VertexShader *vertexShader) const
    {
        // for now we only handle point sprite emulation
        ASSERT(vertexShader->mUsesPointSize && mRenderer->getMajorShaderModel() >= 4);
        return generatePointSpriteHLSL(registers, fragmentShader, vertexShader);
    }
    
    std::string DynamicHLSL::generatePointSpriteHLSL(int registers, FragmentShader *fragmentShader, VertexShader *vertexShader) const
    {
        ASSERT(registers >= 0);
        ASSERT(vertexShader->mUsesPointSize);
        ASSERT(mRenderer->getMajorShaderModel() >= 4);
    
        std::string geomHLSL;
    
        const SemanticInfo &inSemantics = getSemanticInfo(registers, fragmentShader->mUsesFragCoord,
                                                          false, true, false);
        const SemanticInfo &outSemantics = getSemanticInfo(registers, fragmentShader->mUsesFragCoord,
                                                           fragmentShader->mUsesPointCoord, true, false);
    
        std::string varyingHLSL = generateVaryingHLSL(vertexShader);
        std::string inLinkHLSL = generateVaryingLinkHLSL(inSemantics, varyingHLSL);
        std::string outLinkHLSL = generateVaryingLinkHLSL(outSemantics, varyingHLSL);
    
        geomHLSL += "uniform float4 dx_ViewCoords : register(c1);\n"
                    "\n"
                    "struct GS_INPUT\n" + inLinkHLSL + "\n" +
                    "struct GS_OUTPUT\n" + outLinkHLSL + "\n" +
                    "\n"
                      "static float2 pointSpriteCorners[] = \n"
                      "{\n"
                      "    float2( 0.5f, -0.5f),\n"
                      "    float2( 0.5f,  0.5f),\n"
                      "    float2(-0.5f, -0.5f),\n"
                      "    float2(-0.5f,  0.5f)\n"
                      "};\n"
                      "\n"
                      "static float2 pointSpriteTexcoords[] = \n"
                      "{\n"
                      "    float2(1.0f, 1.0f),\n"
                      "    float2(1.0f, 0.0f),\n"
                      "    float2(0.0f, 1.0f),\n"
                      "    float2(0.0f, 0.0f)\n"
                      "};\n"
                      "\n"
                      "static float minPointSize = " + Str(ALIASED_POINT_SIZE_RANGE_MIN) + ".0f;\n"
                      "static float maxPointSize = " + Str(mRenderer->getMaxPointSize()) + ".0f;\n"
                      "\n"
                      "[maxvertexcount(4)]\n"
                      "void main(point GS_INPUT input[1], inout TriangleStream<GS_OUTPUT> outStream)\n"
                      "{\n"
                      "    GS_OUTPUT output = (GS_OUTPUT)0;\n"
                      "    output.gl_Position = input[0].gl_Position;\n";
                      "    output.gl_PointSize = input[0].gl_PointSize;\n";
    
        for (int r = 0; r < registers; r++)
        {
            geomHLSL += "    output.v" + Str(r) + " = input[0].v" + Str(r) + ";\n";
        }
    
        if (fragmentShader->mUsesFragCoord)
        {
            geomHLSL += "    output.gl_FragCoord = input[0].gl_FragCoord;\n";
        }
    
        geomHLSL += "    \n"
                    "    float gl_PointSize = clamp(input[0].gl_PointSize, minPointSize, maxPointSize);\n"
                    "    float4 dx_Position = input[0].dx_Position;\n"
                    "    float2 viewportScale = float2(1.0f / dx_ViewCoords.x, 1.0f / dx_ViewCoords.y) * dx_Position.w;\n";
    
        for (int corner = 0; corner < 4; corner++)
        {
            geomHLSL += "    \n"
                        "    output.dx_Position = dx_Position + float4(pointSpriteCorners[" + Str(corner) + "] * viewportScale * gl_PointSize, 0.0f, 0.0f);\n";
    
            if (fragmentShader->mUsesPointCoord)
            {
                geomHLSL += "    output.gl_PointCoord = pointSpriteTexcoords[" + Str(corner) + "];\n";
            }
    
            geomHLSL += "    outStream.Append(output);\n";
        }
    
        geomHLSL += "    \n"
                    "    outStream.RestartStrip();\n"
                    "}\n";
    
        return geomHLSL;
    }
    
    // This method needs to match OutputHLSL::decorate
    std::string DynamicHLSL::decorateVariable(const std::string &name)
    {
        if (name.compare(0, 3, "gl_"))
        {
            return "_" + name;
        }
    
        return name;
    }
    
    std::string DynamicHLSL::generateAttributeConversionHLSL(const VertexFormat &vertexFormat, const sh::ShaderVariable &shaderAttrib) const
    {
        std::string attribString = "input." + decorateVariable(shaderAttrib.name);
    
        // Matrix
        if (IsMatrixType(shaderAttrib.type))
        {
            return "transpose(" + attribString + ")";
        }
    
        GLenum shaderComponentType = VariableComponentType(shaderAttrib.type);
        int shaderComponentCount = VariableComponentCount(shaderAttrib.type);
    
        // Perform integer to float conversion (if necessary)
        bool requiresTypeConversion = (shaderComponentType == GL_FLOAT && vertexFormat.mType != GL_FLOAT);
    
        if (requiresTypeConversion)
        {
            // TODO: normalization for 32-bit integer formats
            ASSERT(!vertexFormat.mNormalized && !vertexFormat.mPureInteger);
            return "float" + Str(shaderComponentCount) + "(" + attribString + ")";
        }
    
        // No conversion necessary
        return attribString;
    }
    
    void DynamicHLSL::getInputLayoutSignature(const VertexFormat inputLayout[], GLenum signature[]) const
    {
        for (size_t inputIndex = 0; inputIndex < MAX_VERTEX_ATTRIBS; inputIndex++)
        {
            const VertexFormat &vertexFormat = inputLayout[inputIndex];
    
            if (vertexFormat.mType == GL_NONE)
            {
                signature[inputIndex] = GL_NONE;
            }
            else
            {
                bool gpuConverted = ((mRenderer->getVertexConversionType(vertexFormat) & rx::VERTEX_CONVERT_GPU) != 0);
                signature[inputIndex] = (gpuConverted ? GL_TRUE : GL_FALSE);
            }
        }
    }
    
    }