Edit

IABSD.fr/xenocara/lib/mesa/src/intel/compiler/brw_opt_virtual_grfs.cpp

Branch :

  • Show log

    Commit

  • Author : jsg
    Date : 2025-06-05 11:23:11
    Hash : 67d6f117
    Message : Import Mesa 25.0.7

  • lib/mesa/src/intel/compiler/brw_opt_virtual_grfs.cpp
  • /*
     * Copyright © 2010 Intel Corporation
     * SPDX-License-Identifier: MIT
     */
    
    #include "brw_fs.h"
    #include "brw_builder.h"
    
    using namespace brw;
    
    /**
     * Split large virtual GRFs into separate components if we can.
     *
     * This pass aggressively splits VGRFs into as small a chunks as possible,
     * down to single registers if it can.  If no VGRFs can be split, we return
     * false so this pass can safely be used inside an optimization loop.  We
     * want to split, because virtual GRFs are what we register allocate and
     * spill (due to contiguousness requirements for some instructions), and
     * they're what we naturally generate in the codegen process, but most
     * virtual GRFs don't actually need to be contiguous sets of GRFs.  If we
     * split, we'll end up with reduced live intervals and better dead code
     * elimination and coalescing.
     */
    bool
    brw_opt_split_virtual_grfs(fs_visitor &s)
    {
       /* Compact the register file so we eliminate dead vgrfs.  This
        * only defines split points for live registers, so if we have
        * too large dead registers they will hit assertions later.
        */
       brw_opt_compact_virtual_grfs(s);
    
       unsigned num_vars = s.alloc.count;
    
       /* Count the total number of registers */
       unsigned reg_count = 0;
       unsigned *vgrf_to_reg = new unsigned[num_vars];
       for (unsigned i = 0; i < num_vars; i++) {
          vgrf_to_reg[i] = reg_count;
          reg_count += s.alloc.sizes[i];
       }
    
       /* An array of "split points".  For each register slot, this indicates
        * if this slot can be separated from the previous slot.  Every time an
        * instruction uses multiple elements of a register (as a source or
        * destination), we mark the used slots as inseparable.  Then we go
        * through and split the registers into the smallest pieces we can.
        */
       bool *split_points = new bool[reg_count];
       memset(split_points, 0, reg_count * sizeof(*split_points));
    
       /* Mark all used registers as fully splittable following the physical
        * register size.
        */
       const unsigned reg_inc = reg_unit(s.devinfo);
       foreach_block_and_inst(block, fs_inst, inst, s.cfg) {
          if (inst->dst.file == VGRF) {
             unsigned reg = vgrf_to_reg[inst->dst.nr];
             for (unsigned j = reg_inc; j < s.alloc.sizes[inst->dst.nr]; j += reg_inc)
                split_points[reg + j] = true;
          }
    
          for (unsigned i = 0; i < inst->sources; i++) {
             if (inst->src[i].file == VGRF) {
                unsigned reg = vgrf_to_reg[inst->src[i].nr];
                for (unsigned j = reg_inc; j < s.alloc.sizes[inst->src[i].nr]; j += reg_inc)
                   split_points[reg + j] = true;
             }
          }
       }
    
       foreach_block_and_inst(block, fs_inst, inst, s.cfg) {
          /* We fix up undef instructions later */
          if (inst->opcode == SHADER_OPCODE_UNDEF) {
             assert(inst->dst.file == VGRF);
             continue;
          }
    
          if (inst->dst.file == VGRF) {
             unsigned reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE;
             for (unsigned j = 1; j < regs_written(inst); j++)
                split_points[reg + j] = false;
          }
          for (unsigned i = 0; i < inst->sources; i++) {
             if (inst->src[i].file == VGRF) {
                unsigned reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE;
                for (unsigned j = 1; j < regs_read(s.devinfo, inst, i); j++)
                   split_points[reg + j] = false;
             }
          }
       }
    
       /* Bitset of which registers have been split */
       bool *vgrf_has_split = new bool[num_vars];
       memset(vgrf_has_split, 0, num_vars * sizeof(*vgrf_has_split));
    
       unsigned *new_virtual_grf = new unsigned[reg_count];
       unsigned *new_reg_offset = new unsigned[reg_count];
    
       unsigned reg = 0;
       bool has_splits = false;
       for (unsigned i = 0; i < num_vars; i++) {
          /* The first one should always be 0 as a quick sanity check. */
          assert(split_points[reg] == false);
    
          /* j = 0 case */
          new_reg_offset[reg] = 0;
          reg++;
          unsigned offset = 1;
    
          /* j > 0 case */
          for (unsigned j = 1; j < s.alloc.sizes[i]; j++) {
             /* If this is a split point, reset the offset to 0 and allocate a
              * new virtual GRF for the previous offset many registers
              */
             if (split_points[reg]) {
                has_splits = true;
                vgrf_has_split[i] = true;
                unsigned grf = s.alloc.allocate(offset);
                for (unsigned k = reg - offset; k < reg; k++)
                   new_virtual_grf[k] = grf;
                offset = 0;
             }
             new_reg_offset[reg] = offset;
             offset++;
             reg++;
          }
    
          /* The last one gets the original register number */
          s.alloc.sizes[i] = offset;
          for (unsigned k = reg - offset; k < reg; k++)
             new_virtual_grf[k] = i;
       }
       assert(reg == reg_count);
    
       bool progress;
       if (!has_splits) {
          progress = false;
          goto cleanup;
       }
    
       foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
          if (inst->opcode == SHADER_OPCODE_UNDEF) {
             assert(inst->dst.file == VGRF);
             if (vgrf_has_split[inst->dst.nr]) {
                const brw_builder ibld(&s, block, inst);
                assert(inst->size_written % REG_SIZE == 0);
                unsigned reg_offset = inst->dst.offset / REG_SIZE;
                unsigned size_written = 0;
                while (size_written < inst->size_written) {
                   reg = vgrf_to_reg[inst->dst.nr] + reg_offset + size_written / REG_SIZE;
                   fs_inst *undef =
                      ibld.UNDEF(
                         byte_offset(brw_vgrf(new_virtual_grf[reg], inst->dst.type),
                                     new_reg_offset[reg] * REG_SIZE));
                   undef->size_written =
                      MIN2(inst->size_written - size_written, undef->size_written);
                   assert(undef->size_written % REG_SIZE == 0);
                   size_written += undef->size_written;
                }
                inst->remove(block);
             } else {
                reg = vgrf_to_reg[inst->dst.nr];
                assert(new_reg_offset[reg] == 0);
                assert(new_virtual_grf[reg] == inst->dst.nr);
             }
             continue;
          }
    
          if (inst->dst.file == VGRF) {
             reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE;
             if (vgrf_has_split[inst->dst.nr]) {
                inst->dst.nr = new_virtual_grf[reg];
                inst->dst.offset = new_reg_offset[reg] * REG_SIZE +
                                   inst->dst.offset % REG_SIZE;
                assert(new_reg_offset[reg] < s.alloc.sizes[new_virtual_grf[reg]]);
             } else {
                assert(new_reg_offset[reg] == inst->dst.offset / REG_SIZE);
                assert(new_virtual_grf[reg] == inst->dst.nr);
             }
          }
          for (unsigned i = 0; i < inst->sources; i++) {
    	 if (inst->src[i].file != VGRF)
                continue;
    
             reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE;
             if (vgrf_has_split[inst->src[i].nr]) {
                inst->src[i].nr = new_virtual_grf[reg];
                inst->src[i].offset = new_reg_offset[reg] * REG_SIZE +
                                      inst->src[i].offset % REG_SIZE;
                assert(new_reg_offset[reg] < s.alloc.sizes[new_virtual_grf[reg]]);
             } else {
                assert(new_reg_offset[reg] == inst->src[i].offset / REG_SIZE);
                assert(new_virtual_grf[reg] == inst->src[i].nr);
             }
          }
       }
       s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL | DEPENDENCY_VARIABLES);
    
       progress = true;
    
    cleanup:
       delete[] split_points;
       delete[] vgrf_has_split;
       delete[] new_virtual_grf;
       delete[] new_reg_offset;
       delete[] vgrf_to_reg;
    
       return progress;
    }
    
    /**
     * Remove unused virtual GRFs and compact the vgrf_* arrays.
     *
     * During code generation, we create tons of temporary variables, many of
     * which get immediately killed and are never used again.  Yet, in later
     * optimization and analysis passes, such as compute_live_intervals, we need
     * to loop over all the virtual GRFs.  Compacting them can save a lot of
     * overhead.
     */
    bool
    brw_opt_compact_virtual_grfs(fs_visitor &s)
    {
       bool progress = false;
       int *remap_table = new int[s.alloc.count];
       memset(remap_table, -1, s.alloc.count * sizeof(int));
    
       /* Mark which virtual GRFs are used. */
       foreach_block_and_inst(block, const fs_inst, inst, s.cfg) {
          if (inst->dst.file == VGRF)
             remap_table[inst->dst.nr] = 0;
    
          for (int i = 0; i < inst->sources; i++) {
             if (inst->src[i].file == VGRF)
                remap_table[inst->src[i].nr] = 0;
          }
       }
    
       /* Compact the GRF arrays. */
       int new_index = 0;
       for (unsigned i = 0; i < s.alloc.count; i++) {
          if (remap_table[i] == -1) {
             /* We just found an unused register.  This means that we are
              * actually going to compact something.
              */
             progress = true;
          } else {
             remap_table[i] = new_index;
             s.alloc.sizes[new_index] = s.alloc.sizes[i];
             s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL | DEPENDENCY_VARIABLES);
             ++new_index;
          }
       }
    
       s.alloc.count = new_index;
    
       /* Patch all the instructions to use the newly renumbered registers */
       foreach_block_and_inst(block, fs_inst, inst, s.cfg) {
          if (inst->dst.file == VGRF)
             inst->dst.nr = remap_table[inst->dst.nr];
    
          for (int i = 0; i < inst->sources; i++) {
             if (inst->src[i].file == VGRF)
                inst->src[i].nr = remap_table[inst->src[i].nr];
          }
       }
    
       /* Patch all the references to delta_xy, since they're used in register
        * allocation.  If they're unused, switch them to BAD_FILE so we don't
        * think some random VGRF is delta_xy.
        */
       for (unsigned i = 0; i < ARRAY_SIZE(s.delta_xy); i++) {
          if (s.delta_xy[i].file == VGRF) {
             if (remap_table[s.delta_xy[i].nr] != -1) {
                s.delta_xy[i].nr = remap_table[s.delta_xy[i].nr];
             } else {
                s.delta_xy[i].file = BAD_FILE;
             }
          }
       }
    
       delete[] remap_table;
    
       return progress;
    }