Edit

IABSD.fr/xenocara/lib/mesa/src/imagination/vulkan/pvr_query_compute.c

Branch :

  • Show log

    Commit

  • Author : jsg
    Date : 2025-06-05 11:23:11
    Hash : 67d6f117
    Message : Import Mesa 25.0.7

  • lib/mesa/src/imagination/vulkan/pvr_query_compute.c
  • /*
     * Copyright © 2022 Imagination Technologies Ltd.
     *
     * Permission is hereby granted, free of charge, to any person obtaining a copy
     * of this software and associated documentation files (the "Software"), to deal
     * in the Software without restriction, including without limitation the rights
     * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     * copies of the Software, and to permit persons to whom the Software is
     * furnished to do so, subject to the following conditions:
     *
     * The above copyright notice and this permission notice (including the next
     * paragraph) shall be included in all copies or substantial portions of the
     * Software.
     *
     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     * SOFTWARE.
     */
    
    #include <assert.h>
    #include <stdbool.h>
    #include <stddef.h>
    #include <stdint.h>
    #include <string.h>
    #include <vulkan/vulkan.h>
    
    #include "hwdef/rogue_hw_utils.h"
    #include "pvr_bo.h"
    #include "pvr_formats.h"
    #include "pvr_pds.h"
    #include "pvr_private.h"
    #include "usc/programs/pvr_shader_factory.h"
    #include "usc/programs/pvr_static_shaders.h"
    #include "pvr_tex_state.h"
    #include "pvr_types.h"
    #include "vk_alloc.h"
    #include "vk_command_pool.h"
    #include "vk_util.h"
    
    static inline void pvr_init_primary_compute_pds_program(
       struct pvr_pds_compute_shader_program *program)
    {
       pvr_pds_compute_shader_program_init(program);
       program->local_input_regs[0] = 0;
       /* Workgroup id is in reg0. */
       program->work_group_input_regs[0] = 0;
       program->flattened_work_groups = true;
       program->kick_usc = true;
    }
    
    static VkResult pvr_create_compute_secondary_prog(
       struct pvr_device *device,
       const struct pvr_shader_factory_info *shader_factory_info,
       struct pvr_compute_query_shader *query_prog)
    {
       const size_t size =
          pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes();
       struct pvr_pds_descriptor_program_input sec_pds_program;
       struct pvr_pds_info *info = &query_prog->info;
       uint32_t staging_buffer_size;
       uint32_t *staging_buffer;
       VkResult result;
    
       info->entries =
          vk_alloc(&device->vk.alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
       if (!info->entries)
          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
    
       info->entries_size_in_bytes = size;
    
       sec_pds_program = (struct pvr_pds_descriptor_program_input){
          .buffer_count = 1,
          .buffers = {
             [0] = {
                .buffer_id = 0,
                .source_offset = 0,
                .type = PVR_BUFFER_TYPE_COMPILE_TIME,
                .size_in_dwords = shader_factory_info->const_shared_regs,
                .destination = shader_factory_info->explicit_const_start_offset,
             }
          },
       };
    
       pvr_pds_generate_descriptor_upload_program(&sec_pds_program, NULL, info);
    
       staging_buffer_size = info->code_size_in_dwords;
    
       staging_buffer = vk_alloc(&device->vk.alloc,
                                 PVR_DW_TO_BYTES(staging_buffer_size),
                                 8,
                                 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
       if (!staging_buffer) {
          vk_free(&device->vk.alloc, info->entries);
          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
       }
    
       pvr_pds_generate_descriptor_upload_program(&sec_pds_program,
                                                  staging_buffer,
                                                  info);
    
       assert(info->code_size_in_dwords <= staging_buffer_size);
    
       /* FIXME: Figure out the define for alignment of 16. */
       result = pvr_gpu_upload_pds(device,
                                   NULL,
                                   0,
                                   0,
                                   staging_buffer,
                                   info->code_size_in_dwords,
                                   16,
                                   16,
                                   &query_prog->pds_sec_code);
       if (result != VK_SUCCESS) {
          vk_free(&device->vk.alloc, staging_buffer);
          vk_free(&device->vk.alloc, info->entries);
          return result;
       }
    
       vk_free(&device->vk.alloc, staging_buffer);
    
       return VK_SUCCESS;
    }
    
    static void
    pvr_destroy_compute_secondary_prog(struct pvr_device *device,
                                       struct pvr_compute_query_shader *program)
    {
       pvr_bo_suballoc_free(program->pds_sec_code.pvr_bo);
       vk_free(&device->vk.alloc, program->info.entries);
    }
    
    static VkResult pvr_create_compute_query_program(
       struct pvr_device *device,
       const struct pvr_shader_factory_info *shader_factory_info,
       struct pvr_compute_query_shader *query_prog)
    {
       const uint32_t cache_line_size =
          rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
       struct pvr_pds_compute_shader_program pds_primary_prog;
       VkResult result;
    
       /* No support for query constant calc program. */
       assert(shader_factory_info->const_calc_prog_inst_bytes == 0);
       /* No support for query coefficient update program. */
       assert(shader_factory_info->coeff_update_prog_start == PVR_INVALID_INST);
    
       result = pvr_gpu_upload_usc(device,
                                   shader_factory_info->shader_code,
                                   shader_factory_info->code_size,
                                   cache_line_size,
                                   &query_prog->usc_bo);
       if (result != VK_SUCCESS)
          return result;
    
       pvr_init_primary_compute_pds_program(&pds_primary_prog);
    
       pvr_pds_setup_doutu(&pds_primary_prog.usc_task_control,
                           query_prog->usc_bo->dev_addr.addr,
                           shader_factory_info->temps_required,
                           ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
                           false);
    
       result =
          pvr_pds_compute_shader_create_and_upload(device,
                                                   &pds_primary_prog,
                                                   &query_prog->pds_prim_code);
       if (result != VK_SUCCESS)
          goto err_free_usc_bo;
    
       query_prog->primary_data_size_dw = pds_primary_prog.data_size;
       query_prog->primary_num_temps = pds_primary_prog.temps_used;
    
       result = pvr_create_compute_secondary_prog(device,
                                                  shader_factory_info,
                                                  query_prog);
       if (result != VK_SUCCESS)
          goto err_free_pds_prim_code_bo;
    
       return VK_SUCCESS;
    
    err_free_pds_prim_code_bo:
       pvr_bo_suballoc_free(query_prog->pds_prim_code.pvr_bo);
    
    err_free_usc_bo:
       pvr_bo_suballoc_free(query_prog->usc_bo);
    
       return result;
    }
    
    /* TODO: See if we can dedup this with pvr_setup_descriptor_mappings() or
     * pvr_setup_descriptor_mappings().
     */
    static VkResult pvr_write_compute_query_pds_data_section(
       struct pvr_cmd_buffer *cmd_buffer,
       const struct pvr_compute_query_shader *query_prog,
       struct pvr_private_compute_pipeline *pipeline)
    {
       const struct pvr_pds_info *const info = &query_prog->info;
       struct pvr_suballoc_bo *pvr_bo;
       const uint8_t *entries;
       uint32_t *dword_buffer;
       uint64_t *qword_buffer;
       VkResult result;
    
       result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
                                         cmd_buffer->device->heaps.pds_heap,
                                         PVR_DW_TO_BYTES(info->data_size_in_dwords),
                                         &pvr_bo);
       if (result != VK_SUCCESS)
          return result;
    
       dword_buffer = (uint32_t *)pvr_bo_suballoc_get_map_addr(pvr_bo);
       qword_buffer = (uint64_t *)pvr_bo_suballoc_get_map_addr(pvr_bo);
    
       entries = (uint8_t *)info->entries;
    
       /* TODO: Remove this when we can test this path and make sure that this is
        * not needed. If it's needed we should probably be using LITERAL entries for
        * this instead.
        */
       memset(dword_buffer, 0xFE, PVR_DW_TO_BYTES(info->data_size_in_dwords));
    
       pipeline->pds_shared_update_data_size_dw = info->data_size_in_dwords;
    
       for (uint32_t i = 0; i < info->entry_count; i++) {
          const struct pvr_const_map_entry *const entry_header =
             (struct pvr_const_map_entry *)entries;
    
          switch (entry_header->type) {
          case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: {
             const struct pvr_const_map_entry_literal32 *const literal =
                (struct pvr_const_map_entry_literal32 *)entries;
    
             PVR_WRITE(dword_buffer,
                       literal->literal_value,
                       literal->const_offset,
                       info->data_size_in_dwords);
    
             entries += sizeof(*literal);
             break;
          }
          case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL64: {
             const struct pvr_const_map_entry_literal64 *const literal =
                (struct pvr_const_map_entry_literal64 *)entries;
    
             PVR_WRITE(qword_buffer,
                       literal->literal_value,
                       literal->const_offset,
                       info->data_size_in_dwords);
    
             entries += sizeof(*literal);
             break;
          }
          case PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS: {
             const struct pvr_const_map_entry_doutu_address *const doutu_addr =
                (struct pvr_const_map_entry_doutu_address *)entries;
             const pvr_dev_addr_t exec_addr =
                PVR_DEV_ADDR_OFFSET(query_prog->pds_sec_code.pvr_bo->dev_addr,
                                    query_prog->pds_sec_code.code_offset);
             uint64_t addr = 0ULL;
    
             pvr_set_usc_execution_address64(&addr, exec_addr.addr);
    
             PVR_WRITE(qword_buffer,
                       addr | doutu_addr->doutu_control,
                       doutu_addr->const_offset,
                       info->data_size_in_dwords);
    
             entries += sizeof(*doutu_addr);
             break;
          }
          case PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER: {
             const struct pvr_const_map_entry_special_buffer *special_buff_entry =
                (struct pvr_const_map_entry_special_buffer *)entries;
    
             switch (special_buff_entry->buffer_type) {
             case PVR_BUFFER_TYPE_COMPILE_TIME: {
                uint64_t addr = pipeline->const_buffer_addr.addr;
    
                PVR_WRITE(qword_buffer,
                          addr,
                          special_buff_entry->const_offset,
                          info->data_size_in_dwords);
                break;
             }
    
             default:
                unreachable("Unsupported special buffer type.");
             }
    
             entries += sizeof(*special_buff_entry);
             break;
          }
          default:
             unreachable("Unsupported data section map");
          }
       }
    
       pipeline->pds_shared_update_data_offset =
          pvr_bo->dev_addr.addr -
          cmd_buffer->device->heaps.pds_heap->base_addr.addr;
    
       return VK_SUCCESS;
    }
    
    static void pvr_write_private_compute_dispatch(
       struct pvr_cmd_buffer *cmd_buffer,
       struct pvr_private_compute_pipeline *pipeline,
       uint32_t num_query_indices)
    {
       struct pvr_sub_cmd *sub_cmd = cmd_buffer->state.current_sub_cmd;
       const uint32_t workgroup_size[PVR_WORKGROUP_DIMENSIONS] = {
          DIV_ROUND_UP(num_query_indices, 32),
          1,
          1,
       };
    
       assert(sub_cmd->type == PVR_SUB_CMD_TYPE_OCCLUSION_QUERY);
    
       pvr_compute_update_shared_private(cmd_buffer, &sub_cmd->compute, pipeline);
       pvr_compute_update_kernel_private(cmd_buffer,
                                         &sub_cmd->compute,
                                         pipeline,
                                         workgroup_size);
       pvr_compute_generate_fence(cmd_buffer, &sub_cmd->compute, false);
    }
    
    static void
    pvr_destroy_compute_query_program(struct pvr_device *device,
                                      struct pvr_compute_query_shader *program)
    {
       pvr_destroy_compute_secondary_prog(device, program);
       pvr_bo_suballoc_free(program->pds_prim_code.pvr_bo);
       pvr_bo_suballoc_free(program->usc_bo);
    }
    
    static VkResult pvr_create_multibuffer_compute_query_program(
       struct pvr_device *device,
       const struct pvr_shader_factory_info *const *shader_factory_info,
       struct pvr_compute_query_shader *query_programs)
    {
       const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
       VkResult result;
       uint32_t i;
    
       for (i = 0; i < core_count; i++) {
          result = pvr_create_compute_query_program(device,
                                                    shader_factory_info[i],
                                                    &query_programs[i]);
          if (result != VK_SUCCESS)
             goto err_destroy_compute_query_program;
       }
    
       return VK_SUCCESS;
    
    err_destroy_compute_query_program:
       for (uint32_t j = 0; j < i; j++)
          pvr_destroy_compute_query_program(device, &query_programs[j]);
    
       return result;
    }
    
    VkResult pvr_device_create_compute_query_programs(struct pvr_device *device)
    {
       const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
       VkResult result;
    
       result = pvr_create_compute_query_program(device,
                                                 &availability_query_write_info,
                                                 &device->availability_shader);
       if (result != VK_SUCCESS)
          return result;
    
       device->copy_results_shaders =
          vk_alloc(&device->vk.alloc,
                   sizeof(*device->copy_results_shaders) * core_count,
                   8,
                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
       if (!device->copy_results_shaders) {
          result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
          goto err_destroy_availability_query_program;
       }
    
       result = pvr_create_multibuffer_compute_query_program(
          device,
          copy_query_results_collection,
          device->copy_results_shaders);
       if (result != VK_SUCCESS)
          goto err_vk_free_copy_results_shaders;
    
       device->reset_queries_shaders =
          vk_alloc(&device->vk.alloc,
                   sizeof(*device->reset_queries_shaders) * core_count,
                   8,
                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
       if (!device->reset_queries_shaders) {
          result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
          goto err_destroy_copy_results_query_programs;
       }
    
       result = pvr_create_multibuffer_compute_query_program(
          device,
          reset_query_collection,
          device->reset_queries_shaders);
       if (result != VK_SUCCESS)
          goto err_vk_free_reset_queries_shaders;
    
       return VK_SUCCESS;
    
    err_vk_free_reset_queries_shaders:
       vk_free(&device->vk.alloc, device->reset_queries_shaders);
    
    err_destroy_copy_results_query_programs:
       for (uint32_t i = 0; i < core_count; i++) {
          pvr_destroy_compute_query_program(device,
                                            &device->copy_results_shaders[i]);
       }
    
    err_vk_free_copy_results_shaders:
       vk_free(&device->vk.alloc, device->copy_results_shaders);
    
    err_destroy_availability_query_program:
       pvr_destroy_compute_query_program(device, &device->availability_shader);
    
       return result;
    }
    
    void pvr_device_destroy_compute_query_programs(struct pvr_device *device)
    {
       const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
    
       pvr_destroy_compute_query_program(device, &device->availability_shader);
    
       for (uint32_t i = 0; i < core_count; i++) {
          pvr_destroy_compute_query_program(device,
                                            &device->copy_results_shaders[i]);
          pvr_destroy_compute_query_program(device,
                                            &device->reset_queries_shaders[i]);
       }
    
       vk_free(&device->vk.alloc, device->copy_results_shaders);
       vk_free(&device->vk.alloc, device->reset_queries_shaders);
    }
    
    static void pvr_init_tex_info(const struct pvr_device_info *dev_info,
                                  struct pvr_texture_state_info *tex_info,
                                  uint32_t width,
                                  pvr_dev_addr_t addr)
    {
       const VkFormat vk_format = VK_FORMAT_R32_UINT;
       const uint8_t *swizzle_arr = pvr_get_format_swizzle(vk_format);
       bool is_view_1d = !PVR_HAS_FEATURE(dev_info, tpu_extended_integer_lookup) &&
                         !PVR_HAS_FEATURE(dev_info, tpu_image_state_v2);
    
       *tex_info = (struct pvr_texture_state_info){
          .format = vk_format,
          .mem_layout = PVR_MEMLAYOUT_LINEAR,
          .flags = PVR_TEXFLAGS_INDEX_LOOKUP,
          .type = is_view_1d ? VK_IMAGE_VIEW_TYPE_1D : VK_IMAGE_VIEW_TYPE_2D,
          .is_cube = false,
          .tex_state_type = PVR_TEXTURE_STATE_SAMPLE,
          .extent = { .width = width, .height = 1, .depth = 0 },
          .array_size = 1,
          .base_level = 0,
          .mip_levels = 1,
          .mipmaps_present = false,
          .sample_count = 1,
          .stride = width,
          .offset = 0,
          .swizzle = { [0] = swizzle_arr[0],
                       [1] = swizzle_arr[1],
                       [2] = swizzle_arr[2],
                       [3] = swizzle_arr[3] },
          .addr = addr,
    
       };
    }
    
    /* TODO: Split this function into per program type functions. */
    VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
                                   const struct pvr_query_info *query_info)
    {
       struct pvr_device *device = cmd_buffer->device;
       const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
       const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
       const struct pvr_shader_factory_info *shader_factory_info;
       uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
       const struct pvr_compute_query_shader *query_prog;
       struct pvr_private_compute_pipeline pipeline;
       const uint32_t buffer_count = core_count;
       struct pvr_texture_state_info tex_info;
       uint32_t num_query_indices;
       uint32_t *const_buffer;
       struct pvr_suballoc_bo *pvr_bo;
       VkResult result;
    
       pvr_csb_pack (&sampler_state[0U], TEXSTATE_SAMPLER, reg) {
          reg.addrmode_u = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
          reg.addrmode_v = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
          reg.addrmode_w = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
          reg.minfilter = ROGUE_TEXSTATE_FILTER_POINT;
          reg.magfilter = ROGUE_TEXSTATE_FILTER_POINT;
          reg.non_normalized_coords = true;
          reg.dadjust = ROGUE_TEXSTATE_DADJUST_ZERO_UINT;
       }
    
       /* clang-format off */
       pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
       /* clang-format on */
    
       switch (query_info->type) {
       case PVR_QUERY_TYPE_AVAILABILITY_WRITE:
          /* Adds a compute shader (fenced on the last 3D) that writes a non-zero
           * value in availability_bo at every index in index_bo.
           */
          query_prog = &device->availability_shader;
          shader_factory_info = &availability_query_write_info;
          num_query_indices = query_info->availability_write.num_query_indices;
          break;
    
       case PVR_QUERY_TYPE_COPY_QUERY_RESULTS:
          /* Adds a compute shader to copy availability and query value data. */
          query_prog = &device->copy_results_shaders[buffer_count - 1];
          shader_factory_info = copy_query_results_collection[buffer_count - 1];
          num_query_indices = query_info->copy_query_results.query_count;
          break;
    
       case PVR_QUERY_TYPE_RESET_QUERY_POOL:
          /* Adds a compute shader to reset availability and query value data. */
          query_prog = &device->reset_queries_shaders[buffer_count - 1];
          shader_factory_info = reset_query_collection[buffer_count - 1];
          num_query_indices = query_info->reset_query_pool.query_count;
          break;
    
       default:
          unreachable("Invalid query type");
       }
    
       result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer,
                                             PVR_SUB_CMD_TYPE_OCCLUSION_QUERY);
       if (result != VK_SUCCESS)
          return result;
    
       pipeline.pds_code_offset = query_prog->pds_prim_code.code_offset;
       pipeline.pds_data_offset = query_prog->pds_prim_code.data_offset;
    
       pipeline.pds_shared_update_code_offset =
          query_prog->pds_sec_code.code_offset;
       pipeline.pds_data_size_dw = query_prog->primary_data_size_dw;
       pipeline.pds_temps_used = query_prog->primary_num_temps;
    
       pipeline.coeff_regs_count = shader_factory_info->coeff_regs;
       pipeline.unified_store_regs_count = shader_factory_info->input_regs;
       pipeline.const_shared_regs_count = shader_factory_info->const_shared_regs;
    
       const_buffer =
          vk_alloc(&cmd_buffer->vk.pool->alloc,
                   PVR_DW_TO_BYTES(shader_factory_info->const_shared_regs),
                   8,
                   VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
       if (!const_buffer) {
          return vk_command_buffer_set_error(&cmd_buffer->vk,
                                             VK_ERROR_OUT_OF_HOST_MEMORY);
       }
    
       /* clang-format off */
    #define DRIVER_CONST(index)                                            \
       assert(shader_factory_info->driver_const_location_map[index] <      \
              shader_factory_info->const_shared_regs);                     \
       const_buffer[shader_factory_info->driver_const_location_map[index]]
       /* clang-format on */
    
       switch (query_info->type) {
       case PVR_QUERY_TYPE_AVAILABILITY_WRITE: {
          uint64_t image_sampler_state[3][ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
          uint32_t image_sampler_idx = 0;
    
          memcpy(&image_sampler_state[image_sampler_idx][0],
                 &sampler_state[0],
                 sizeof(sampler_state));
          image_sampler_idx++;
    
          pvr_init_tex_info(dev_info,
                            &tex_info,
                            num_query_indices,
                            query_info->availability_write.index_bo->dev_addr);
    
          result = pvr_pack_tex_state(device,
                                      &tex_info,
                                      &image_sampler_state[image_sampler_idx][0]);
          if (result != VK_SUCCESS) {
             vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
             return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
          }
    
          image_sampler_idx++;
    
          pvr_init_tex_info(
             dev_info,
             &tex_info,
             query_info->availability_write.num_queries,
             query_info->availability_write.availability_bo->dev_addr);
    
          result = pvr_pack_tex_state(device,
                                      &tex_info,
                                      &image_sampler_state[image_sampler_idx][0]);
          if (result != VK_SUCCESS) {
             vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
             return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
          }
    
          image_sampler_idx++;
    
          memcpy(&const_buffer[0],
                 &image_sampler_state[0][0],
                 sizeof(image_sampler_state));
    
          /* Only PVR_QUERY_AVAILABILITY_WRITE_COUNT driver consts allowed. */
          assert(shader_factory_info->num_driver_consts ==
                 PVR_QUERY_AVAILABILITY_WRITE_COUNT);
    
          DRIVER_CONST(PVR_QUERY_AVAILABILITY_WRITE_INDEX_COUNT) =
             num_query_indices;
          break;
       }
    
       case PVR_QUERY_TYPE_COPY_QUERY_RESULTS: {
          PVR_FROM_HANDLE(pvr_query_pool,
                          pool,
                          query_info->copy_query_results.query_pool);
          PVR_FROM_HANDLE(pvr_buffer,
                          buffer,
                          query_info->copy_query_results.dst_buffer);
          const uint32_t image_sampler_state_arr_size =
             (buffer_count + 2) * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS;
          uint32_t image_sampler_idx = 0;
          pvr_dev_addr_t addr;
          uint64_t offset;
    
          STACK_ARRAY(uint64_t, image_sampler_state, image_sampler_state_arr_size);
          if (!image_sampler_state) {
             vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
    
             return vk_command_buffer_set_error(&cmd_buffer->vk,
                                                VK_ERROR_OUT_OF_HOST_MEMORY);
          }
    
    #define SAMPLER_ARR_2D(_arr, _i, _j) \
       _arr[_i * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS + _j]
    
          memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
                 &sampler_state[0],
                 sizeof(sampler_state));
          image_sampler_idx++;
    
          offset = query_info->copy_query_results.first_query * sizeof(uint32_t);
    
          addr = PVR_DEV_ADDR_OFFSET(pool->availability_buffer->dev_addr, offset);
    
          pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
    
          result = pvr_pack_tex_state(
             device,
             &tex_info,
             &SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0));
          if (result != VK_SUCCESS) {
             vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
             return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
          }
    
          image_sampler_idx++;
    
          for (uint32_t i = 0; i < buffer_count; i++) {
             addr = PVR_DEV_ADDR_OFFSET(pool->result_buffer->dev_addr,
                                        offset + i * pool->result_stride);
    
             pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
    
             result = pvr_pack_tex_state(
                device,
                &tex_info,
                &SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0));
             if (result != VK_SUCCESS) {
                vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
                return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
             }
    
             image_sampler_idx++;
          }
    
          memcpy(&const_buffer[0],
                 &SAMPLER_ARR_2D(image_sampler_state, 0, 0),
                 image_sampler_state_arr_size * sizeof(image_sampler_state[0]));
    
          STACK_ARRAY_FINISH(image_sampler_state);
    
          /* Only PVR_COPY_QUERY_POOL_RESULTS_COUNT driver consts allowed. */
          assert(shader_factory_info->num_driver_consts ==
                 PVR_COPY_QUERY_POOL_RESULTS_COUNT);
    
          /* Assert if no memory is bound to destination buffer. */
          assert(buffer->dev_addr.addr);
    
          addr = buffer->dev_addr;
          addr.addr += query_info->copy_query_results.dst_offset;
          addr.addr += query_info->copy_query_results.first_query *
                       query_info->copy_query_results.stride;
    
          DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_INDEX_COUNT) = num_query_indices;
          DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_LOW) = addr.addr &
                                                                       0xFFFFFFFF;
          DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_HIGH) = addr.addr >>
                                                                        32;
          DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_DEST_STRIDE) =
             query_info->copy_query_results.stride;
          DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_PARTIAL_RESULT_FLAG) =
             query_info->copy_query_results.flags & VK_QUERY_RESULT_PARTIAL_BIT;
          DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_64_BIT_FLAG) =
             query_info->copy_query_results.flags & VK_QUERY_RESULT_64_BIT;
          DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_WITH_AVAILABILITY_FLAG) =
             query_info->copy_query_results.flags &
             VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
          break;
       }
    
       case PVR_QUERY_TYPE_RESET_QUERY_POOL: {
          PVR_FROM_HANDLE(pvr_query_pool,
                          pool,
                          query_info->reset_query_pool.query_pool);
          const uint32_t image_sampler_state_arr_size =
             (buffer_count + 2) * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS;
          uint32_t image_sampler_idx = 0;
          pvr_dev_addr_t addr;
          uint64_t offset;
    
          STACK_ARRAY(uint64_t, image_sampler_state, image_sampler_state_arr_size);
          if (!image_sampler_state) {
             vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
    
             return vk_command_buffer_set_error(&cmd_buffer->vk,
                                                VK_ERROR_OUT_OF_HOST_MEMORY);
          }
    
          memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
                 &sampler_state[0],
                 sizeof(sampler_state));
          image_sampler_idx++;
    
          offset = query_info->reset_query_pool.first_query * sizeof(uint32_t);
    
          for (uint32_t i = 0; i < buffer_count; i++) {
             addr = PVR_DEV_ADDR_OFFSET(pool->result_buffer->dev_addr,
                                        offset + i * pool->result_stride);
    
             pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
    
             result = pvr_pack_tex_state(
                device,
                &tex_info,
                &SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0));
             if (result != VK_SUCCESS) {
                vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
                return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
             }
    
             image_sampler_idx++;
          }
    
          addr = PVR_DEV_ADDR_OFFSET(pool->availability_buffer->dev_addr, offset);
    
          pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
    
          result = pvr_pack_tex_state(
             device,
             &tex_info,
             &SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0));
          if (result != VK_SUCCESS) {
             vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
             return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
          }
    
          image_sampler_idx++;
    
    #undef SAMPLER_ARR_2D
    
          memcpy(&const_buffer[0],
                 &image_sampler_state[0],
                 image_sampler_state_arr_size * sizeof(image_sampler_state[0]));
    
          STACK_ARRAY_FINISH(image_sampler_state);
    
          /* Only PVR_RESET_QUERY_POOL_COUNT driver consts allowed. */
          assert(shader_factory_info->num_driver_consts ==
                 PVR_RESET_QUERY_POOL_COUNT);
    
          DRIVER_CONST(PVR_RESET_QUERY_POOL_INDEX_COUNT) = num_query_indices;
          break;
       }
    
       default:
          unreachable("Invalid query type");
       }
    
    #undef DRIVER_CONST
    
       for (uint32_t i = 0; i < shader_factory_info->num_static_const; i++) {
          const struct pvr_static_buffer *load =
             &shader_factory_info->static_const_buffer[i];
    
          /* Assert if static const is out of range. */
          assert(load->dst_idx < shader_factory_info->const_shared_regs);
          const_buffer[load->dst_idx] = load->value;
       }
    
       result = pvr_cmd_buffer_upload_general(
          cmd_buffer,
          const_buffer,
          PVR_DW_TO_BYTES(shader_factory_info->const_shared_regs),
          &pvr_bo);
       if (result != VK_SUCCESS) {
          vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
    
          return result;
       }
    
       pipeline.const_buffer_addr = pvr_bo->dev_addr;
    
       vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
    
       /* PDS data section for the secondary/constant upload. */
       result = pvr_write_compute_query_pds_data_section(cmd_buffer,
                                                         query_prog,
                                                         &pipeline);
       if (result != VK_SUCCESS)
          return result;
    
       pipeline.workgroup_size.width = ROGUE_MAX_INSTANCES_PER_TASK;
       pipeline.workgroup_size.height = 1;
       pipeline.workgroup_size.depth = 1;
    
       pvr_write_private_compute_dispatch(cmd_buffer, &pipeline, num_query_indices);
    
       return pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
    }