Edit

IABSD.fr/xenocara/lib/mesa/src/broadcom/vulkan/v3dv_query.c

Branch :

  • Show log

    Commit

  • Author : jsg
    Date : 2025-06-05 11:23:11
    Hash : 67d6f117
    Message : Import Mesa 25.0.7

  • lib/mesa/src/broadcom/vulkan/v3dv_query.c
  • /*
     * Copyright © 2020 Raspberry Pi Ltd
     *
     * Permission is hereby granted, free of charge, to any person obtaining a
     * copy of this software and associated documentation files (the "Software"),
     * to deal in the Software without restriction, including without limitation
     * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     * and/or sell copies of the Software, and to permit persons to whom the
     * Software is furnished to do so, subject to the following conditions:
     *
     * The above copyright notice and this permission notice (including the next
     * paragraph) shall be included in all copies or substantial portions of the
     * Software.
     *
     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     * IN THE SOFTWARE.
     */
    
    #include "v3dv_private.h"
    
    #include "util/timespec.h"
    #include "compiler/nir/nir_builder.h"
    
    static void
    kperfmon_create(struct v3dv_device *device,
                    struct v3dv_query_pool *pool,
                    uint32_t query)
    {
       for (uint32_t i = 0; i < pool->perfmon.nperfmons; i++) {
          assert(i * DRM_V3D_MAX_PERF_COUNTERS < pool->perfmon.ncounters);
    
          struct drm_v3d_perfmon_create req = {
             .ncounters = MIN2(pool->perfmon.ncounters -
                               i * DRM_V3D_MAX_PERF_COUNTERS,
                               DRM_V3D_MAX_PERF_COUNTERS),
          };
          memcpy(req.counters,
                 &pool->perfmon.counters[i * DRM_V3D_MAX_PERF_COUNTERS],
                 req.ncounters);
    
          int ret = v3d_ioctl(device->pdevice->render_fd,
                              DRM_IOCTL_V3D_PERFMON_CREATE,
                              &req);
          if (ret)
             mesa_loge("Failed to create perfmon for query %d: %s\n", query,
                       strerror(errno));
    
          pool->queries[query].perf.kperfmon_ids[i] = req.id;
       }
    }
    
    static void
    kperfmon_destroy(struct v3dv_device *device,
                     struct v3dv_query_pool *pool,
                     uint32_t query)
    {
       /* Skip destroying if never created */
       if (!pool->queries[query].perf.kperfmon_ids[0])
          return;
    
       for (uint32_t i = 0; i < pool->perfmon.nperfmons; i++) {
          struct drm_v3d_perfmon_destroy req = {
             .id = pool->queries[query].perf.kperfmon_ids[i]
          };
    
          int ret = v3d_ioctl(device->pdevice->render_fd,
                              DRM_IOCTL_V3D_PERFMON_DESTROY,
                              &req);
    
          if (ret) {
             mesa_loge("Failed to destroy perfmon %u: %s\n",
                       req.id, strerror(errno));
          }
       }
    }
    
    /**
     * Creates a VkBuffer (and VkDeviceMemory) to access a BO.
     */
    static VkResult
    create_vk_storage_buffer(struct v3dv_device *device,
                             struct v3dv_bo *bo,
                             VkBuffer *vk_buf,
                             VkDeviceMemory *vk_mem)
    {
       VkDevice vk_device = v3dv_device_to_handle(device);
    
       VkBufferCreateInfo buf_info = {
          .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
          .size = bo->size,
          .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
       };
       VkResult result = v3dv_CreateBuffer(vk_device, &buf_info, NULL, vk_buf);
       if (result != VK_SUCCESS)
          return result;
    
       struct v3dv_device_memory *mem =
          vk_object_zalloc(&device->vk, NULL, sizeof(*mem),
                           VK_OBJECT_TYPE_DEVICE_MEMORY);
       if (!mem)
          return VK_ERROR_OUT_OF_HOST_MEMORY;
    
       mem->bo = bo;
       mem->type = &device->pdevice->memory.memoryTypes[0];
    
       *vk_mem = v3dv_device_memory_to_handle(mem);
       VkBindBufferMemoryInfo bind_info = {
          .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
          .buffer = *vk_buf,
          .memory = *vk_mem,
          .memoryOffset = 0,
       };
       v3dv_BindBufferMemory2(vk_device, 1, &bind_info);
    
       return VK_SUCCESS;
    }
    
    static void
    destroy_vk_storage_buffer(struct v3dv_device *device,
                              VkBuffer *vk_buf,
                              VkDeviceMemory *vk_mem)
    {
       if (*vk_mem) {
          vk_object_free(&device->vk, NULL, v3dv_device_memory_from_handle(*vk_mem));
          *vk_mem = VK_NULL_HANDLE;
       }
    
       v3dv_DestroyBuffer(v3dv_device_to_handle(device), *vk_buf, NULL);
       *vk_buf = VK_NULL_HANDLE;
    }
    
    /**
     * Allocates descriptor sets to access query pool BO (availability and
     * occlusion query results) from Vulkan pipelines.
     */
    static VkResult
    create_pool_descriptors(struct v3dv_device *device,
                            struct v3dv_query_pool *pool)
    {
       assert(pool->query_type == VK_QUERY_TYPE_OCCLUSION);
       VkDevice vk_device = v3dv_device_to_handle(device);
    
       VkDescriptorPoolSize pool_size = {
          .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
          .descriptorCount = 1,
       };
       VkDescriptorPoolCreateInfo pool_info = {
          .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
          .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
          .maxSets = 1,
          .poolSizeCount = 1,
          .pPoolSizes = &pool_size,
       };
       VkResult result =
          v3dv_CreateDescriptorPool(vk_device, &pool_info, NULL,
                                    &pool->meta.descriptor_pool);
    
       if (result != VK_SUCCESS)
          return result;
    
       VkDescriptorSetAllocateInfo alloc_info = {
          .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
          .descriptorPool = pool->meta.descriptor_pool,
          .descriptorSetCount = 1,
          .pSetLayouts = &device->queries.buf_descriptor_set_layout,
       };
       result = v3dv_AllocateDescriptorSets(vk_device, &alloc_info,
                                            &pool->meta.descriptor_set);
       if (result != VK_SUCCESS)
          return result;
    
       VkDescriptorBufferInfo desc_buf_info = {
          .buffer = pool->meta.buf,
          .offset = 0,
          .range = VK_WHOLE_SIZE,
       };
    
       VkWriteDescriptorSet write = {
          .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
          .dstSet = pool->meta.descriptor_set,
          .dstBinding = 0,
          .dstArrayElement = 0,
          .descriptorCount = 1,
          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
          .pBufferInfo = &desc_buf_info,
       };
       v3dv_UpdateDescriptorSets(vk_device, 1, &write, 0, NULL);
    
       return VK_SUCCESS;
    }
    
    static void
    destroy_pool_descriptors(struct v3dv_device *device,
                             struct v3dv_query_pool *pool)
    {
       assert(pool->query_type == VK_QUERY_TYPE_OCCLUSION);
    
       v3dv_FreeDescriptorSets(v3dv_device_to_handle(device),
                               pool->meta.descriptor_pool,
                               1, &pool->meta.descriptor_set);
       pool->meta.descriptor_set = VK_NULL_HANDLE;
    
       v3dv_DestroyDescriptorPool(v3dv_device_to_handle(device),
                                  pool->meta.descriptor_pool, NULL);
       pool->meta.descriptor_pool = VK_NULL_HANDLE;
    }
    
    static VkResult
    pool_create_meta_resources(struct v3dv_device *device,
                               struct v3dv_query_pool *pool)
    {
       VkResult result;
    
       if (pool->query_type != VK_QUERY_TYPE_OCCLUSION)
          return VK_SUCCESS;
    
       result = create_vk_storage_buffer(device, pool->occlusion.bo,
                                         &pool->meta.buf, &pool->meta.mem);
       if (result != VK_SUCCESS)
          return result;
    
       result = create_pool_descriptors(device, pool);
       if (result != VK_SUCCESS)
           return result;
    
       return VK_SUCCESS;
    }
    
    static void
    pool_destroy_meta_resources(struct v3dv_device *device,
                                struct v3dv_query_pool *pool)
    {
       if (pool->query_type != VK_QUERY_TYPE_OCCLUSION)
          return;
    
       destroy_pool_descriptors(device, pool);
       destroy_vk_storage_buffer(device, &pool->meta.buf, &pool->meta.mem);
    }
    
    VKAPI_ATTR VkResult VKAPI_CALL
    v3dv_CreateQueryPool(VkDevice _device,
                         const VkQueryPoolCreateInfo *pCreateInfo,
                         const VkAllocationCallbacks *pAllocator,
                         VkQueryPool *pQueryPool)
    {
       V3DV_FROM_HANDLE(v3dv_device, device, _device);
    
       assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION ||
              pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP ||
              pCreateInfo->queryType == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
       assert(pCreateInfo->queryCount > 0);
    
       struct v3dv_query_pool *pool =
          vk_object_zalloc(&device->vk, pAllocator, sizeof(*pool),
                           VK_OBJECT_TYPE_QUERY_POOL);
       if (pool == NULL)
          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
    
       pool->query_type = pCreateInfo->queryType;
       pool->query_count = pCreateInfo->queryCount;
    
       uint32_t query_idx = 0;
       VkResult result;
    
       const uint32_t pool_bytes = sizeof(struct v3dv_query) * pool->query_count;
       pool->queries = vk_alloc2(&device->vk.alloc, pAllocator, pool_bytes, 8,
                                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
       if (pool->queries == NULL) {
          result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
          goto fail;
       }
    
       switch (pool->query_type) {
       case VK_QUERY_TYPE_OCCLUSION: {
          /* The hardware allows us to setup groups of 16 queries in consecutive
           * 4-byte addresses, requiring only that each group of 16 queries is
           * aligned to a 1024 byte boundary.
           */
          const uint32_t query_groups = DIV_ROUND_UP(pool->query_count, 16);
          uint32_t bo_size = query_groups * 1024;
          /* After the counters we store avalability data, 1 byte/query */
          pool->occlusion.avail_offset = bo_size;
          bo_size += pool->query_count;
          pool->occlusion.bo = v3dv_bo_alloc(device, bo_size, "query:o", true);
          if (!pool->occlusion.bo) {
             result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
             goto fail;
          }
          if (!v3dv_bo_map(device, pool->occlusion.bo, bo_size)) {
             result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
             goto fail;
          }
          break;
       }
       case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
          const VkQueryPoolPerformanceCreateInfoKHR *pq_info =
             vk_find_struct_const(pCreateInfo->pNext,
                                  QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR);
    
          assert(pq_info);
    
          pool->perfmon.ncounters = pq_info->counterIndexCount;
          for (uint32_t i = 0; i < pq_info->counterIndexCount; i++)
             pool->perfmon.counters[i] = pq_info->pCounterIndices[i];
    
          pool->perfmon.nperfmons = DIV_ROUND_UP(pool->perfmon.ncounters,
                                                 DRM_V3D_MAX_PERF_COUNTERS);
    
          assert(pool->perfmon.nperfmons <= V3DV_MAX_PERFMONS);
          break;
       }
       case VK_QUERY_TYPE_TIMESTAMP: {
          /* 8 bytes per query used for the timestamp value. We have all
           * timestamps tightly packed first in the buffer.
           */
          const uint32_t bo_size = pool->query_count * 8;
          pool->timestamp.bo = v3dv_bo_alloc(device, bo_size, "query:t", true);
          if (!pool->timestamp.bo) {
             result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
             goto fail;
          }
          if (!v3dv_bo_map(device, pool->timestamp.bo, bo_size)) {
             result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
             goto fail;
          }
          break;
       }
       default:
          unreachable("Unsupported query type");
       }
    
       /* Initialize queries in the pool */
       for (; query_idx < pool->query_count; query_idx++) {
          pool->queries[query_idx].maybe_available = false;
          switch (pool->query_type) {
          case VK_QUERY_TYPE_OCCLUSION: {
             const uint32_t query_group = query_idx / 16;
             const uint32_t query_offset = query_group * 1024 + (query_idx % 16) * 4;
             pool->queries[query_idx].occlusion.offset = query_offset;
             break;
             }
          case VK_QUERY_TYPE_TIMESTAMP:
             pool->queries[query_idx].timestamp.offset = query_idx * 8;
             result = vk_sync_create(&device->vk,
                                     &device->pdevice->drm_syncobj_type, 0, 0,
                                     &pool->queries[query_idx].timestamp.sync);
             if (result != VK_SUCCESS)
                goto fail;
             break;
          case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
             result = vk_sync_create(&device->vk,
                                     &device->pdevice->drm_syncobj_type, 0, 0,
                                     &pool->queries[query_idx].perf.last_job_sync);
             if (result != VK_SUCCESS)
                goto fail;
    
             kperfmon_create(device, pool, query_idx);
             break;
             }
          default:
             unreachable("Unsupported query type");
          }
       }
    
       /* Create meta resources */
       result = pool_create_meta_resources(device, pool);
       if (result != VK_SUCCESS)
          goto fail;
    
       *pQueryPool = v3dv_query_pool_to_handle(pool);
    
       return VK_SUCCESS;
    
    fail:
       if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
          for (uint32_t j = 0; j < query_idx; j++)
             vk_sync_destroy(&device->vk, pool->queries[j].timestamp.sync);
       }
    
       if (pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
          for (uint32_t j = 0; j < query_idx; j++)
             vk_sync_destroy(&device->vk, pool->queries[j].perf.last_job_sync);
       }
    
       if (pool->occlusion.bo)
          v3dv_bo_free(device, pool->occlusion.bo);
       if (pool->timestamp.bo)
          v3dv_bo_free(device, pool->timestamp.bo);
       if (pool->queries)
          vk_free2(&device->vk.alloc, pAllocator, pool->queries);
       pool_destroy_meta_resources(device, pool);
       vk_object_free(&device->vk, pAllocator, pool);
    
       return result;
    }
    
    VKAPI_ATTR void VKAPI_CALL
    v3dv_DestroyQueryPool(VkDevice _device,
                          VkQueryPool queryPool,
                          const VkAllocationCallbacks *pAllocator)
    {
       V3DV_FROM_HANDLE(v3dv_device, device, _device);
       V3DV_FROM_HANDLE(v3dv_query_pool, pool, queryPool);
    
       if (!pool)
          return;
    
       if (pool->occlusion.bo)
          v3dv_bo_free(device, pool->occlusion.bo);
    
       if (pool->timestamp.bo)
          v3dv_bo_free(device, pool->timestamp.bo);
    
       if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
          for (uint32_t i = 0; i < pool->query_count; i++)
             vk_sync_destroy(&device->vk, pool->queries[i].timestamp.sync);
       }
    
       if (pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
          for (uint32_t i = 0; i < pool->query_count; i++) {
             kperfmon_destroy(device, pool, i);
             vk_sync_destroy(&device->vk, pool->queries[i].perf.last_job_sync);
          }
       }
    
       if (pool->queries)
          vk_free2(&device->vk.alloc, pAllocator, pool->queries);
    
       pool_destroy_meta_resources(device, pool);
    
       vk_object_free(&device->vk, pAllocator, pool);
    }
    
    static void
    write_to_buffer(void *dst, uint32_t idx, bool do_64bit, uint64_t value)
    {
       if (do_64bit) {
          uint64_t *dst64 = (uint64_t *) dst;
          dst64[idx] = value;
       } else {
          uint32_t *dst32 = (uint32_t *) dst;
          dst32[idx] = (uint32_t) value;
       }
    }
    
    static VkResult
    query_wait_available(struct v3dv_device *device,
                         struct v3dv_query_pool *pool,
                         struct v3dv_query *q,
                         uint32_t query_idx)
    {
       /* For occlusion queries we prefer to poll the availability BO in a loop
        * to waiting on the query results BO, because the latter would
        * make us wait for any job running queries from the pool, even if those
        * queries do not involve the one we want to wait on.
        */
       if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
          uint8_t *q_addr = ((uint8_t *) pool->occlusion.bo->map) +
                            pool->occlusion.avail_offset + query_idx;
          while (*q_addr == 0)
             usleep(250);
          return VK_SUCCESS;
       }
    
       if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
          if (vk_sync_wait(&device->vk, q->timestamp.sync,
                           0, VK_SYNC_WAIT_COMPLETE, UINT64_MAX) != VK_SUCCESS) {
             return vk_device_set_lost(&device->vk, "Query job wait failed");
          }
          return VK_SUCCESS;
       }
    
       assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
    
       /* For performance queries we need to wait for the queue to signal that
        * the query has been submitted for execution before anything else.
        */
       VkResult result = VK_SUCCESS;
       if (!q->maybe_available) {
          struct timespec timeout;
          timespec_get(&timeout, TIME_UTC);
          timespec_add_msec(&timeout, &timeout, 2000);
    
          mtx_lock(&device->query_mutex);
          while (!q->maybe_available) {
             if (vk_device_is_lost(&device->vk)) {
                result = VK_ERROR_DEVICE_LOST;
                break;
             }
    
             int ret = cnd_timedwait(&device->query_ended,
                                     &device->query_mutex,
                                     &timeout);
             if (ret != thrd_success) {
                mtx_unlock(&device->query_mutex);
                result = vk_device_set_lost(&device->vk, "Query wait failed");
                break;
             }
          }
          mtx_unlock(&device->query_mutex);
    
          if (result != VK_SUCCESS)
             return result;
    
          /* For performance queries, we also need to wait for the relevant syncobj
           * to be signaled to ensure completion of the GPU work.
           */
          if (pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR &&
              vk_sync_wait(&device->vk, q->perf.last_job_sync,
                           0, VK_SYNC_WAIT_COMPLETE, UINT64_MAX) != VK_SUCCESS) {
            return vk_device_set_lost(&device->vk, "Query job wait failed");
          }
       }
    
       return result;
    }
    
    static VkResult
    query_check_available(struct v3dv_device *device,
                          struct v3dv_query_pool *pool,
                          struct v3dv_query *q,
                          uint32_t query_idx)
    {
       /* For occlusion we check the availability BO */
       if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
          const uint8_t *q_addr = ((uint8_t *) pool->occlusion.bo->map) +
                                  pool->occlusion.avail_offset + query_idx;
          return (*q_addr != 0) ? VK_SUCCESS : VK_NOT_READY;
       }
    
       /* For timestamp queries, we need to check if the relevant job
        * has completed.
        */
       if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
          if (vk_sync_wait(&device->vk, q->timestamp.sync,
                           0, VK_SYNC_WAIT_COMPLETE, 0) != VK_SUCCESS) {
             return VK_NOT_READY;
          }
          return VK_SUCCESS;
       }
    
       /* For other queries we need to check if the queue has submitted the query
        * for execution at all.
        */
       assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
       if (!q->maybe_available)
          return VK_NOT_READY;
    
       /* For performance queries, we also need to check if the relevant GPU job
        * has completed.
        */
       if (pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR &&
           vk_sync_wait(&device->vk, q->perf.last_job_sync,
                        0, VK_SYNC_WAIT_COMPLETE, 0) != VK_SUCCESS) {
             return VK_NOT_READY;
       }
    
       return VK_SUCCESS;
    }
    
    static VkResult
    query_is_available(struct v3dv_device *device,
                       struct v3dv_query_pool *pool,
                       uint32_t query,
                       bool do_wait,
                       bool *available)
    {
       struct v3dv_query *q = &pool->queries[query];
    
       if (do_wait) {
          VkResult result = query_wait_available(device, pool, q, query);
          if (result != VK_SUCCESS) {
             *available = false;
             return result;
          }
    
          *available = true;
       } else {
          VkResult result = query_check_available(device, pool, q, query);
          assert(result == VK_SUCCESS || result == VK_NOT_READY);
          *available = (result == VK_SUCCESS);
       }
    
       return VK_SUCCESS;
    }
    
    static VkResult
    write_occlusion_query_result(struct v3dv_device *device,
                                 struct v3dv_query_pool *pool,
                                 uint32_t query,
                                 bool do_64bit,
                                 void *data,
                                 uint32_t slot)
    {
       assert(pool && pool->query_type == VK_QUERY_TYPE_OCCLUSION);
    
       if (vk_device_is_lost(&device->vk))
          return VK_ERROR_DEVICE_LOST;
    
       struct v3dv_query *q = &pool->queries[query];
       assert(pool->occlusion.bo && pool->occlusion.bo->map);
    
       const uint8_t *query_addr =
          ((uint8_t *) pool->occlusion.bo->map) + q->occlusion.offset;
       write_to_buffer(data, slot, do_64bit, (uint64_t) *((uint32_t *)query_addr));
       return VK_SUCCESS;
    }
    
    static VkResult
    write_timestamp_query_result(struct v3dv_device *device,
                                 struct v3dv_query_pool *pool,
                                 uint32_t query,
                                 bool do_64bit,
                                 void *data,
                                 uint32_t slot)
    {
       assert(pool && pool->query_type == VK_QUERY_TYPE_TIMESTAMP);
    
       struct v3dv_query *q = &pool->queries[query];
    
       const uint8_t *query_addr =
          ((uint8_t *) pool->timestamp.bo->map) + q->timestamp.offset;
    
       write_to_buffer(data, slot, do_64bit, *((uint64_t *)query_addr));
       return VK_SUCCESS;
    }
    
    static VkResult
    write_performance_query_result(struct v3dv_device *device,
                                   struct v3dv_query_pool *pool,
                                   uint32_t query,
                                   bool do_64bit,
                                   void *data,
                                   uint32_t slot)
    {
       assert(pool && pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
    
       struct v3dv_query *q = &pool->queries[query];
       uint64_t counter_values[V3D_MAX_PERFCNT];
    
       assert(pool->perfmon.nperfmons);
       assert(pool->perfmon.ncounters);
    
       for (uint32_t i = 0; i < pool->perfmon.nperfmons; i++) {
          struct drm_v3d_perfmon_get_values req = {
             .id = q->perf.kperfmon_ids[i],
             .values_ptr = (uintptr_t)(&counter_values[i *
                                       DRM_V3D_MAX_PERF_COUNTERS])
          };
    
          int ret = v3d_ioctl(device->pdevice->render_fd,
                              DRM_IOCTL_V3D_PERFMON_GET_VALUES,
                              &req);
    
          if (ret) {
             mesa_loge("failed to get perfmon values: %s\n", strerror(errno));
             return vk_error(device, VK_ERROR_DEVICE_LOST);
          }
       }
    
       for (uint32_t i = 0; i < pool->perfmon.ncounters; i++)
          write_to_buffer(data, slot + i, do_64bit, counter_values[i]);
    
       return VK_SUCCESS;
    }
    
    static VkResult
    write_query_result(struct v3dv_device *device,
                       struct v3dv_query_pool *pool,
                       uint32_t query,
                       bool do_64bit,
                       void *data,
                       uint32_t slot)
    {
       switch (pool->query_type) {
       case VK_QUERY_TYPE_OCCLUSION:
          return write_occlusion_query_result(device, pool, query, do_64bit,
                                              data, slot);
       case VK_QUERY_TYPE_TIMESTAMP:
          return write_timestamp_query_result(device, pool, query, do_64bit,
                                              data, slot);
       case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
          return write_performance_query_result(device, pool, query, do_64bit,
                                                data, slot);
       default:
          unreachable("Unsupported query type");
       }
    }
    
    static uint32_t
    get_query_result_count(struct v3dv_query_pool *pool)
    {
       switch (pool->query_type) {
       case VK_QUERY_TYPE_OCCLUSION:
       case VK_QUERY_TYPE_TIMESTAMP:
          return 1;
       case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
          return pool->perfmon.ncounters;
       default:
          unreachable("Unsupported query type");
       }
    }
    
    VkResult
    v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
                                    struct v3dv_query_pool *pool,
                                    uint32_t first,
                                    uint32_t count,
                                    void *data,
                                    VkDeviceSize stride,
                                    VkQueryResultFlags flags)
    {
       assert(first < pool->query_count);
       assert(first + count <= pool->query_count);
       assert(data);
    
       const bool do_64bit = flags & VK_QUERY_RESULT_64_BIT ||
          pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR;
       const bool do_wait = flags & VK_QUERY_RESULT_WAIT_BIT;
       const bool do_partial = flags & VK_QUERY_RESULT_PARTIAL_BIT;
    
       uint32_t result_count = get_query_result_count(pool);
    
       VkResult result = VK_SUCCESS;
       for (uint32_t i = first; i < first + count; i++) {
          bool available = false;
          VkResult query_result =
             query_is_available(device, pool, i, do_wait, &available);
          if (query_result == VK_ERROR_DEVICE_LOST)
             result = VK_ERROR_DEVICE_LOST;
    
          /**
           * From the Vulkan 1.0 spec:
           *
           *    "If VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are
           *     both not set then no result values are written to pData for queries
           *     that are in the unavailable state at the time of the call, and
           *     vkGetQueryPoolResults returns VK_NOT_READY. However, availability
           *     state is still written to pData for those queries if
           *     VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set."
           */
          uint32_t slot = 0;
    
          const bool write_result = available || do_partial;
          if (write_result)
             write_query_result(device, pool, i, do_64bit, data, slot);
          slot += result_count;
    
          if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
             write_to_buffer(data, slot++, do_64bit, available ? 1u : 0u);
    
          if (!write_result && result != VK_ERROR_DEVICE_LOST)
             result = VK_NOT_READY;
    
          data += stride;
       }
    
       return result;
    }
    
    VKAPI_ATTR VkResult VKAPI_CALL
    v3dv_GetQueryPoolResults(VkDevice _device,
                             VkQueryPool queryPool,
                             uint32_t firstQuery,
                             uint32_t queryCount,
                             size_t dataSize,
                             void *pData,
                             VkDeviceSize stride,
                             VkQueryResultFlags flags)
    {
       V3DV_FROM_HANDLE(v3dv_device, device, _device);
       V3DV_FROM_HANDLE(v3dv_query_pool, pool, queryPool);
    
       if (vk_device_is_lost(&device->vk))
          return VK_ERROR_DEVICE_LOST;
    
       return v3dv_get_query_pool_results_cpu(device, pool, firstQuery, queryCount,
                                              pData, stride, flags);
    }
    
    /* Emits a series of vkCmdDispatchBase calls to execute all the workgroups
     * required to handle a number of queries considering per-dispatch limits.
     */
    static void
    cmd_buffer_emit_dispatch_queries(struct v3dv_cmd_buffer *cmd_buffer,
                                     uint32_t query_count)
    {
       VkCommandBuffer vk_cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
    
       uint32_t dispatched = 0;
       const uint32_t max_batch_size = 65535;
       while (dispatched < query_count) {
          uint32_t batch_size = MIN2(query_count - dispatched, max_batch_size);
          v3dv_CmdDispatchBase(vk_cmd_buffer, dispatched, 0, 0, batch_size, 1, 1);
          dispatched += batch_size;
       }
    }
    
    void
    v3dv_cmd_buffer_emit_set_query_availability(struct v3dv_cmd_buffer *cmd_buffer,
                                                struct v3dv_query_pool *pool,
                                                uint32_t query, uint32_t count,
                                                uint8_t availability)
    {
       assert(pool->query_type == VK_QUERY_TYPE_OCCLUSION ||
              pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
    
       struct v3dv_device *device = cmd_buffer->device;
       VkCommandBuffer vk_cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
    
       /* We are about to emit a compute job to set query availability and we need
        * to ensure this executes after the graphics work using the queries has
        * completed.
        */
       VkMemoryBarrier2 barrier = {
          .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
          .srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
          .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
       };
       VkDependencyInfo barrier_info = {
          .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
          .memoryBarrierCount = 1,
          .pMemoryBarriers = &barrier,
       };
       v3dv_cmd_buffer_emit_pipeline_barrier(cmd_buffer, &barrier_info);
    
       /* Dispatch queries */
       v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
    
       v3dv_CmdBindPipeline(vk_cmd_buffer,
                            VK_PIPELINE_BIND_POINT_COMPUTE,
                            device->queries.avail_pipeline);
    
       v3dv_CmdBindDescriptorSets(vk_cmd_buffer,
                                  VK_PIPELINE_BIND_POINT_COMPUTE,
                                  device->queries.avail_pipeline_layout,
                                  0, 1, &pool->meta.descriptor_set,
                                  0, NULL);
    
       struct {
          uint32_t offset;
          uint32_t query;
          uint8_t availability;
       } push_data = { pool->occlusion.avail_offset, query, availability };
       v3dv_CmdPushConstants(vk_cmd_buffer,
                             device->queries.avail_pipeline_layout,
                             VK_SHADER_STAGE_COMPUTE_BIT,
                             0, sizeof(push_data), &push_data);
       cmd_buffer_emit_dispatch_queries(cmd_buffer, count);
    
       v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
    }
    
    static void
    cmd_buffer_emit_reset_occlusion_query_pool(struct v3dv_cmd_buffer *cmd_buffer,
                                               struct v3dv_query_pool *pool,
                                               uint32_t query, uint32_t count)
    {
       struct v3dv_device *device = cmd_buffer->device;
       VkCommandBuffer vk_cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
    
       /* Ensure the GPU is done with the queries in the graphics queue before
        * we reset in the compute queue.
        */
       VkMemoryBarrier2 barrier = {
          .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
          .srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
          .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
       };
       VkDependencyInfo barrier_info = {
          .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
          .memoryBarrierCount = 1,
          .pMemoryBarriers = &barrier,
       };
       v3dv_cmd_buffer_emit_pipeline_barrier(cmd_buffer, &barrier_info);
    
       /* Emit compute reset */
       v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
    
       v3dv_CmdBindPipeline(vk_cmd_buffer,
                            VK_PIPELINE_BIND_POINT_COMPUTE,
                            device->queries.reset_occlusion_pipeline);
    
       v3dv_CmdBindDescriptorSets(vk_cmd_buffer,
                                  VK_PIPELINE_BIND_POINT_COMPUTE,
                                  device->queries.reset_occlusion_pipeline_layout,
                                  0, 1, &pool->meta.descriptor_set,
                                  0, NULL);
       struct {
          uint32_t offset;
          uint32_t query;
       } push_data = { pool->occlusion.avail_offset, query };
       v3dv_CmdPushConstants(vk_cmd_buffer,
                             device->queries.reset_occlusion_pipeline_layout,
                             VK_SHADER_STAGE_COMPUTE_BIT,
                             0, sizeof(push_data), &push_data);
    
       cmd_buffer_emit_dispatch_queries(cmd_buffer, count);
    
       v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
    
       /* Ensure future work in the graphics queue using the queries doesn't start
        * before the reset completed.
        */
       barrier = (VkMemoryBarrier2) {
          .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
          .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
          .dstStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT,
       };
       barrier_info = (VkDependencyInfo) {
          .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
          .memoryBarrierCount = 1,
          .pMemoryBarriers = &barrier,
       };
       v3dv_cmd_buffer_emit_pipeline_barrier(cmd_buffer, &barrier_info);
    }
    
    static void
    cmd_buffer_emit_reset_query_pool(struct v3dv_cmd_buffer *cmd_buffer,
                                     struct v3dv_query_pool *pool,
                                     uint32_t first, uint32_t count)
    {
       assert(pool->query_type == VK_QUERY_TYPE_OCCLUSION);
       cmd_buffer_emit_reset_occlusion_query_pool(cmd_buffer, pool, first, count);
    }
    
    static void
    cmd_buffer_emit_reset_query_pool_cpu(struct v3dv_cmd_buffer *cmd_buffer,
                                         struct v3dv_query_pool *pool,
                                         uint32_t first, uint32_t count)
    {
       assert(pool->query_type != VK_QUERY_TYPE_OCCLUSION);
    
       struct v3dv_job *job =
          v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
                                         V3DV_JOB_TYPE_CPU_RESET_QUERIES,
                                         cmd_buffer, -1);
       v3dv_return_if_oom(cmd_buffer, NULL);
       job->cpu.query_reset.pool = pool;
       job->cpu.query_reset.first = first;
       job->cpu.query_reset.count = count;
       list_addtail(&job->list_link, &cmd_buffer->jobs);
    }
    
    VKAPI_ATTR void VKAPI_CALL
    v3dv_CmdResetQueryPool(VkCommandBuffer commandBuffer,
                           VkQueryPool queryPool,
                           uint32_t firstQuery,
                           uint32_t queryCount)
    {
       V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
       V3DV_FROM_HANDLE(v3dv_query_pool, pool, queryPool);
    
       /* Resets can only happen outside a render pass instance so we should not
        * be in the middle of job recording.
        */
       assert(cmd_buffer->state.pass == NULL);
       assert(cmd_buffer->state.job == NULL);
    
       assert(firstQuery < pool->query_count);
       assert(firstQuery + queryCount <= pool->query_count);
    
       /* We can reset occlusion queries in the GPU, but for other query types
        * we emit a CPU job that will call v3dv_reset_query_pool_cpu when executed
        * in the queue.
        */
       if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
          cmd_buffer_emit_reset_query_pool(cmd_buffer, pool, firstQuery, queryCount);
       } else {
          cmd_buffer_emit_reset_query_pool_cpu(cmd_buffer, pool,
                                               firstQuery, queryCount);
       }
    }
    
    /**
     * Creates a descriptor pool so we can create a descriptors for the destination
     * buffers of vkCmdCopyQueryResults for queries where this is implemented in
     * the GPU.
     */
    static VkResult
    create_storage_buffer_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
    {
       /* If this is not the first pool we create one for this command buffer
        * size it based on the size of the currently exhausted pool.
        */
       uint32_t descriptor_count = 32;
       if (cmd_buffer->meta.query.dspool != VK_NULL_HANDLE) {
          struct v3dv_descriptor_pool *exhausted_pool =
             v3dv_descriptor_pool_from_handle(cmd_buffer->meta.query.dspool);
          descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
       }
    
       /* Create the descriptor pool */
       cmd_buffer->meta.query.dspool = VK_NULL_HANDLE;
       VkDescriptorPoolSize pool_size = {
          .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
          .descriptorCount = descriptor_count,
       };
       VkDescriptorPoolCreateInfo info = {
          .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
          .maxSets = descriptor_count,
          .poolSizeCount = 1,
          .pPoolSizes = &pool_size,
          .flags = 0,
       };
       VkResult result =
          v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
                                    &info,
                                    &cmd_buffer->device->vk.alloc,
                                    &cmd_buffer->meta.query.dspool);
    
       if (result == VK_SUCCESS) {
          assert(cmd_buffer->meta.query.dspool != VK_NULL_HANDLE);
          const VkDescriptorPool vk_pool = cmd_buffer->meta.query.dspool;
    
          v3dv_cmd_buffer_add_private_obj(
             cmd_buffer, (uintptr_t) vk_pool,
             (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
    
          struct v3dv_descriptor_pool *pool =
             v3dv_descriptor_pool_from_handle(vk_pool);
          pool->is_driver_internal = true;
       }
    
       return result;
    }
    
    static VkResult
    allocate_storage_buffer_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
                                           VkDescriptorSet *set)
    {
       /* Make sure we have a descriptor pool */
       VkResult result;
       if (cmd_buffer->meta.query.dspool == VK_NULL_HANDLE) {
          result = create_storage_buffer_descriptor_pool(cmd_buffer);
          if (result != VK_SUCCESS)
             return result;
       }
       assert(cmd_buffer->meta.query.dspool != VK_NULL_HANDLE);
    
       /* Allocate descriptor set */
       struct v3dv_device *device = cmd_buffer->device;
       VkDevice vk_device = v3dv_device_to_handle(device);
       VkDescriptorSetAllocateInfo info = {
          .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
          .descriptorPool = cmd_buffer->meta.query.dspool,
          .descriptorSetCount = 1,
          .pSetLayouts = &device->queries.buf_descriptor_set_layout,
       };
       result = v3dv_AllocateDescriptorSets(vk_device, &info, set);
    
       /* If we ran out of pool space, grow the pool and try again */
       if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
          result = create_storage_buffer_descriptor_pool(cmd_buffer);
          if (result == VK_SUCCESS) {
             info.descriptorPool = cmd_buffer->meta.query.dspool;
             result = v3dv_AllocateDescriptorSets(vk_device, &info, set);
          }
       }
    
       return result;
    }
    
    static uint32_t
    copy_pipeline_index_from_flags(VkQueryResultFlags flags)
    {
       uint32_t index = 0;
       if (flags & VK_QUERY_RESULT_64_BIT)
          index |= 1;
       if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
          index |= 2;
       if (flags & VK_QUERY_RESULT_PARTIAL_BIT)
          index |= 4;
       assert(index < 8);
       return index;
    }
    
    static nir_shader *
    get_copy_query_results_cs(const nir_shader_compiler_options *compiler_options,
                              VkQueryResultFlags flags);
    
    static void
    cmd_buffer_emit_copy_query_pool_results(struct v3dv_cmd_buffer *cmd_buffer,
                                            struct v3dv_query_pool *pool,
                                            uint32_t first, uint32_t count,
                                            struct v3dv_buffer *buf,
                                            uint32_t offset, uint32_t stride,
                                            VkQueryResultFlags flags)
    {
       struct v3dv_device *device = cmd_buffer->device;
       VkDevice vk_device = v3dv_device_to_handle(device);
       VkCommandBuffer vk_cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
    
       /* Create the required copy pipeline if not yet created */
       uint32_t pipeline_idx = copy_pipeline_index_from_flags(flags);
       if (!device->queries.copy_pipeline[pipeline_idx]) {
          const nir_shader_compiler_options *compiler_options =
             v3dv_pipeline_get_nir_options(&device->devinfo);
          nir_shader *copy_query_results_cs_nir =
             get_copy_query_results_cs(compiler_options, flags);
          VkResult result =
             v3dv_create_compute_pipeline_from_nir(
                   device, copy_query_results_cs_nir,
                   device->queries.copy_pipeline_layout,
                   &device->queries.copy_pipeline[pipeline_idx]);
          ralloc_free(copy_query_results_cs_nir);
          if (result != VK_SUCCESS) {
             mesa_loge("Failed to create copy query results pipeline\n");
             return;
          }
       }
    
       /* FIXME: do we need this barrier? Since vkCmdEndQuery should've been called
        * and that already waits maybe we don't (since this is serialized
        * in the compute queue with EndQuery anyway).
        */
       if (flags & VK_QUERY_RESULT_WAIT_BIT) {
          VkMemoryBarrier2 barrier = {
             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
             .srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
             .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
          };
          VkDependencyInfo barrier_info = {
             .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
             .memoryBarrierCount = 1,
             .pMemoryBarriers = &barrier,
          };
          v3dv_cmd_buffer_emit_pipeline_barrier(cmd_buffer, &barrier_info);
       }
    
       /* Allocate and setup descriptor set for output buffer */
       VkDescriptorSet out_buf_descriptor_set;
       VkResult result =
          allocate_storage_buffer_descriptor_set(cmd_buffer,
                                                 &out_buf_descriptor_set);
       if (result != VK_SUCCESS) {
          mesa_loge("vkCmdCopyQueryPoolResults failed: "
                    "could not allocate descriptor.\n");
          return;
       }
    
       VkDescriptorBufferInfo desc_buf_info = {
          .buffer = v3dv_buffer_to_handle(buf),
          .offset = 0,
          .range = VK_WHOLE_SIZE,
       };
       VkWriteDescriptorSet write = {
          .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
          .dstSet = out_buf_descriptor_set,
          .dstBinding = 0,
          .dstArrayElement = 0,
          .descriptorCount = 1,
          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
          .pBufferInfo = &desc_buf_info,
       };
       v3dv_UpdateDescriptorSets(vk_device, 1, &write, 0, NULL);
    
       /* Dispatch copy */
       v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
    
       assert(device->queries.copy_pipeline[pipeline_idx]);
       v3dv_CmdBindPipeline(vk_cmd_buffer,
                            VK_PIPELINE_BIND_POINT_COMPUTE,
                            device->queries.copy_pipeline[pipeline_idx]);
    
       VkDescriptorSet sets[2] = {
          pool->meta.descriptor_set,
          out_buf_descriptor_set,
       };
       v3dv_CmdBindDescriptorSets(vk_cmd_buffer,
                                  VK_PIPELINE_BIND_POINT_COMPUTE,
                                  device->queries.copy_pipeline_layout,
                                  0, 2, sets, 0, NULL);
    
       struct {
          uint32_t avail_offset, first, offset, stride, flags;
       } push_data = { pool->occlusion.avail_offset, first, offset, stride, flags };
       v3dv_CmdPushConstants(vk_cmd_buffer,
                             device->queries.copy_pipeline_layout,
                             VK_SHADER_STAGE_COMPUTE_BIT,
                             0, sizeof(push_data), &push_data);
    
       cmd_buffer_emit_dispatch_queries(cmd_buffer, count);
    
       v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
    }
    
    static void
    cmd_buffer_emit_copy_query_pool_results_cpu(struct v3dv_cmd_buffer *cmd_buffer,
                                                struct v3dv_query_pool *pool,
                                                uint32_t first,
                                                uint32_t count,
                                                struct v3dv_buffer *dst,
                                                uint32_t offset,
                                                uint32_t stride,
                                                VkQueryResultFlags flags)
    {
       struct v3dv_job *job =
          v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
                                         V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
                                         cmd_buffer, -1);
       v3dv_return_if_oom(cmd_buffer, NULL);
    
       job->cpu.query_copy_results.pool = pool;
       job->cpu.query_copy_results.first = first;
       job->cpu.query_copy_results.count = count;
       job->cpu.query_copy_results.dst = dst;
       job->cpu.query_copy_results.offset = offset;
       job->cpu.query_copy_results.stride = stride;
       job->cpu.query_copy_results.flags = flags;
    
       list_addtail(&job->list_link, &cmd_buffer->jobs);
    }
    
    VKAPI_ATTR void VKAPI_CALL
    v3dv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
                                 VkQueryPool queryPool,
                                 uint32_t firstQuery,
                                 uint32_t queryCount,
                                 VkBuffer dstBuffer,
                                 VkDeviceSize dstOffset,
                                 VkDeviceSize stride,
                                 VkQueryResultFlags flags)
    {
       V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
       V3DV_FROM_HANDLE(v3dv_query_pool, pool, queryPool);
       V3DV_FROM_HANDLE(v3dv_buffer, dst, dstBuffer);
    
       /* Copies can only happen outside a render pass instance so we should not
        * be in the middle of job recording.
        */
       assert(cmd_buffer->state.pass == NULL);
       assert(cmd_buffer->state.job == NULL);
    
       assert(firstQuery < pool->query_count);
       assert(firstQuery + queryCount <= pool->query_count);
    
       /* For occlusion queries we implement the copy in the GPU but for other
        * queries we emit a CPU job that will call v3dv_get_query_pool_results_cpu
        * when executed in the queue.
        */
       if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
          cmd_buffer_emit_copy_query_pool_results(cmd_buffer, pool,
                                                  firstQuery, queryCount,
                                                  dst, (uint32_t) dstOffset,
                                                  (uint32_t) stride, flags);
       } else {
          cmd_buffer_emit_copy_query_pool_results_cpu(cmd_buffer, pool,
                                                      firstQuery, queryCount,
                                                      dst, (uint32_t)dstOffset,
                                                      (uint32_t) stride, flags);
       }
    }
    
    VKAPI_ATTR void VKAPI_CALL
    v3dv_CmdBeginQuery(VkCommandBuffer commandBuffer,
                       VkQueryPool queryPool,
                       uint32_t query,
                       VkQueryControlFlags flags)
    {
       V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
       V3DV_FROM_HANDLE(v3dv_query_pool, pool, queryPool);
    
       v3dv_cmd_buffer_begin_query(cmd_buffer, pool, query, flags);
    }
    
    VKAPI_ATTR void VKAPI_CALL
    v3dv_CmdEndQuery(VkCommandBuffer commandBuffer,
                     VkQueryPool queryPool,
                     uint32_t query)
    {
       V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
       V3DV_FROM_HANDLE(v3dv_query_pool, pool, queryPool);
    
       v3dv_cmd_buffer_end_query(cmd_buffer, pool, query);
    }
    
    void
    v3dv_reset_query_pool_cpu(struct v3dv_device *device,
                              struct v3dv_query_pool *pool,
                              uint32_t first,
                              uint32_t count)
    {
       mtx_lock(&device->query_mutex);
    
       if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
          assert(first + count <= pool->query_count);
    
          /* Reset timestamp */
          uint8_t *base_addr;
          base_addr  = ((uint8_t *) pool->timestamp.bo->map) +
                        pool->queries[first].timestamp.offset;
          memset(base_addr, 0, 8 * count);
    
          for (uint32_t i = first; i < first + count; i++) {
             if (vk_sync_reset(&device->vk, pool->queries[i].timestamp.sync) != VK_SUCCESS)
                mesa_loge("Failed to reset sync");
          }
    
          mtx_unlock(&device->query_mutex);
          return;
       }
    
       for (uint32_t i = first; i < first + count; i++) {
          assert(i < pool->query_count);
          struct v3dv_query *q = &pool->queries[i];
          q->maybe_available = false;
          switch (pool->query_type) {
          case VK_QUERY_TYPE_OCCLUSION: {
             /* Reset availability */
             uint8_t *base_addr = ((uint8_t *) pool->occlusion.bo->map) +
                                  pool->occlusion.avail_offset + first;
             memset(base_addr, 0, count);
    
             /* Reset occlusion counter */
             const uint8_t *q_addr =
                ((uint8_t *) pool->occlusion.bo->map) + q->occlusion.offset;
             uint32_t *counter = (uint32_t *) q_addr;
             *counter = 0;
             break;
          }
          case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
             kperfmon_destroy(device, pool, i);
             kperfmon_create(device, pool, i);
             if (vk_sync_reset(&device->vk, q->perf.last_job_sync) != VK_SUCCESS)
                mesa_loge("Failed to reset sync");
             break;
          default:
             unreachable("Unsupported query type");
          }
       }
    
       mtx_unlock(&device->query_mutex);
    }
    
    VKAPI_ATTR void VKAPI_CALL
    v3dv_ResetQueryPool(VkDevice _device,
                        VkQueryPool queryPool,
                        uint32_t firstQuery,
                        uint32_t queryCount)
    {
       V3DV_FROM_HANDLE(v3dv_device, device, _device);
       V3DV_FROM_HANDLE(v3dv_query_pool, pool, queryPool);
    
       v3dv_reset_query_pool_cpu(device, pool, firstQuery, queryCount);
    }
    
    VKAPI_ATTR VkResult VKAPI_CALL
    v3dv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
       VkPhysicalDevice physicalDevice,
       uint32_t queueFamilyIndex,
       uint32_t *pCounterCount,
       VkPerformanceCounterKHR *pCounters,
       VkPerformanceCounterDescriptionKHR *pCounterDescriptions)
    {
       V3DV_FROM_HANDLE(v3dv_physical_device, pDevice, physicalDevice);
    
       uint32_t desc_count = *pCounterCount;
       uint8_t ncounters = pDevice->perfcntr->max_perfcnt;
    
       VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR,
                              out, pCounters, pCounterCount);
       VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR,
                              out_desc, pCounterDescriptions, &desc_count);
    
       for (int i = 0; i < ncounters; i++) {
          const struct v3d_perfcntr_desc *perfcntr_desc = v3d_perfcntrs_get_by_index(pDevice->perfcntr, i);
    
          vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) {
             counter->unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR;
             counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR;
             counter->storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR;
    
             unsigned char sha1_result[20];
             _mesa_sha1_compute(perfcntr_desc->name, strlen(perfcntr_desc->name), sha1_result);
    
             memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
          }
    
          vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR,
                                   &out_desc, desc) {
             desc->flags = 0;
             snprintf(desc->name, sizeof(desc->name), "%s", perfcntr_desc->name);
             snprintf(desc->category, sizeof(desc->category), "%s", perfcntr_desc->category);
             snprintf(desc->description, sizeof(desc->description), "%s", perfcntr_desc->description);
          }
       }
    
       return vk_outarray_status(&out);
    }
    
    VKAPI_ATTR void VKAPI_CALL
    v3dv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
       VkPhysicalDevice physicalDevice,
       const VkQueryPoolPerformanceCreateInfoKHR *pPerformanceQueryCreateInfo,
       uint32_t *pNumPasses)
    {
       *pNumPasses = DIV_ROUND_UP(pPerformanceQueryCreateInfo->counterIndexCount,
                                  DRM_V3D_MAX_PERF_COUNTERS);
    }
    
    VKAPI_ATTR VkResult VKAPI_CALL
    v3dv_AcquireProfilingLockKHR(
       VkDevice _device,
       const VkAcquireProfilingLockInfoKHR *pInfo)
    {
       return VK_SUCCESS;
    }
    
    VKAPI_ATTR void VKAPI_CALL
    v3dv_ReleaseProfilingLockKHR(VkDevice device)
    {
    }
    
    static inline void
    nir_set_query_availability(nir_builder *b,
                               nir_def *buf,
                               nir_def *offset,
                               nir_def *query_idx,
                               nir_def *avail)
    {
       offset = nir_iadd(b, offset, query_idx); /* we use 1B per query */
       nir_store_ssbo(b, avail, buf, offset, .write_mask = 0x1, .align_mul = 1);
    }
    
    static inline nir_def *
    nir_get_query_availability(nir_builder *b,
                               nir_def *buf,
                               nir_def *offset,
                               nir_def *query_idx)
    {
       offset = nir_iadd(b, offset, query_idx); /* we use 1B per query */
       nir_def *avail = nir_load_ssbo(b, 1, 8, buf, offset, .align_mul = 1);
       return nir_i2i32(b, avail);
    }
    
    static nir_shader *
    get_set_query_availability_cs(const nir_shader_compiler_options *options)
    {
       nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
                                                      "set query availability cs");
    
       nir_def *buf =
          nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
                                    .desc_set = 0,
                                    .binding = 0,
                                    .desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
    
       /* This assumes a local size of 1 and a horizontal-only dispatch. If we
        * ever change any of these parameters we need to update how we compute the
        * query index here.
        */
       nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b), 0);
    
       nir_def *offset =
          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
    
       nir_def *query_idx =
          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
    
       nir_def *avail =
          nir_load_push_constant(&b, 1, 8, nir_imm_int(&b, 0), .base = 8, .range = 1);
    
       query_idx = nir_iadd(&b, query_idx, wg_id);
       nir_set_query_availability(&b, buf, offset, query_idx, avail);
    
       return b.shader;
    }
    
    static inline nir_def *
    nir_get_occlusion_counter_offset(nir_builder *b, nir_def *query_idx)
    {
       nir_def *query_group = nir_udiv_imm(b, query_idx, 16);
       nir_def *query_group_offset = nir_umod_imm(b, query_idx, 16);
       nir_def *offset =
          nir_iadd(b, nir_imul_imm(b, query_group, 1024),
                      nir_imul_imm(b, query_group_offset, 4));
       return offset;
    }
    
    static inline void
    nir_reset_occlusion_counter(nir_builder *b,
                                nir_def *buf,
                                nir_def *query_idx)
    {
       nir_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
       nir_def *zero = nir_imm_int(b, 0);
       nir_store_ssbo(b, zero, buf, offset, .write_mask = 0x1, .align_mul = 4);
    }
    
    static inline nir_def *
    nir_read_occlusion_counter(nir_builder *b,
                               nir_def *buf,
                               nir_def *query_idx)
    {
       nir_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
       return nir_load_ssbo(b, 1, 32, buf, offset, .access = 0, .align_mul = 4);
    }
    
    static nir_shader *
    get_reset_occlusion_query_cs(const nir_shader_compiler_options *options)
    {
       nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
                                                      "reset occlusion query cs");
    
       nir_def *buf =
          nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
                                    .desc_set = 0,
                                    .binding = 0,
                                    .desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
    
       /* This assumes a local size of 1 and a horizontal-only dispatch. If we
        * ever change any of these parameters we need to update how we compute the
        * query index here.
        */
       nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b), 0);
    
       nir_def *avail_offset =
          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
    
       nir_def *base_query_idx =
          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
    
       nir_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
    
       nir_set_query_availability(&b, buf, avail_offset, query_idx,
                                  nir_imm_intN_t(&b, 0, 8));
       nir_reset_occlusion_counter(&b, buf, query_idx);
    
       return b.shader;
    }
    
    static void
    write_query_buffer(nir_builder *b,
                       nir_def *buf,
                       nir_def *offset,
                       nir_def *value,
                       bool flag_64bit)
    {
       if (flag_64bit) {
          /* Create a 64-bit value using a vec2 with the .Y component set to 0
           * so we can write a 64-bit value in a single store.
           */
          nir_def *value64 = nir_vec2(b, value, nir_imm_int(b, 0));
          nir_store_ssbo(b, value64, buf, offset, .write_mask = 0x3, .align_mul = 8);
       } else {
          nir_store_ssbo(b, value, buf, offset, .write_mask = 0x1, .align_mul = 4);
       }
    }
    
    static nir_shader *
    get_copy_query_results_cs(const nir_shader_compiler_options *options,
                              VkQueryResultFlags flags)
    {
       bool flag_64bit = flags & VK_QUERY_RESULT_64_BIT;
       bool flag_avail = flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
       bool flag_partial = flags & VK_QUERY_RESULT_PARTIAL_BIT;
    
       nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
                                                      "copy query results cs");
    
       nir_def *buf =
          nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
                                    .desc_set = 0,
                                    .binding = 0,
                                    .desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
    
       nir_def *buf_out =
          nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
                                    .desc_set = 1,
                                    .binding = 0,
                                    .desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
    
       /* Read push constants */
       nir_def *avail_offset =
          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
    
       nir_def *base_query_idx =
          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
    
       nir_def *base_offset_out =
          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 8, .range = 4);
    
       nir_def *stride =
          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 12, .range = 4);
    
       /* This assumes a local size of 1 and a horizontal-only dispatch. If we
        * ever change any of these parameters we need to update how we compute the
        * query index here.
        */
       nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b), 0);
       nir_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
    
       /* Read query availability if needed */
       nir_def *avail = NULL;
       if (flag_avail || !flag_partial)
          avail = nir_get_query_availability(&b, buf, avail_offset, query_idx);
    
       /* Write occusion query result... */
       nir_def *offset =
          nir_iadd(&b, base_offset_out, nir_imul(&b, wg_id, stride));
    
       /* ...if partial is requested, we always write */
       if(flag_partial) {
          nir_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
          write_query_buffer(&b, buf_out, offset, query_res, flag_64bit);
       } else {
          /*...otherwise, we only write if the query is available */
          nir_if *if_stmt = nir_push_if(&b, nir_ine_imm(&b, avail, 0));
             nir_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
             write_query_buffer(&b, buf_out, offset, query_res, flag_64bit);
          nir_pop_if(&b, if_stmt);
       }
    
       /* Write query availability */
       if (flag_avail) {
          offset = nir_iadd_imm(&b, offset, flag_64bit ? 8 : 4);
          write_query_buffer(&b, buf_out, offset, avail, flag_64bit);
       }
    
       return b.shader;
    }
    
    static bool
    create_query_pipelines(struct v3dv_device *device)
    {
       VkResult result;
       VkPipeline pipeline;
    
       /* Set layout: single storage buffer */
       if (!device->queries.buf_descriptor_set_layout) {
          VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
             .binding = 0,
             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
             .descriptorCount = 1,
             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
          };
          VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
             .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
             .bindingCount = 1,
             .pBindings = &descriptor_set_layout_binding,
          };
          result =
             v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
                                            &descriptor_set_layout_info,
                                            &device->vk.alloc,
                                            &device->queries.buf_descriptor_set_layout);
          if (result != VK_SUCCESS)
             return false;
       }
    
       /* Set availability pipeline.
        *
        * Pipeline layout:
        *  - 1 storage buffer for the BO with the query availability.
        *  - 2 push constants:
        *    0B: offset of the availability info in the buffer (4 bytes)
        *    4B: base query index (4 bytes).
        *    8B: availability (1 byte).
        */
       if (!device->queries.avail_pipeline_layout) {
          VkPipelineLayoutCreateInfo pipeline_layout_info = {
             .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
             .setLayoutCount = 1,
             .pSetLayouts = &device->queries.buf_descriptor_set_layout,
             .pushConstantRangeCount = 1,
             .pPushConstantRanges =
                 &(VkPushConstantRange) { VK_SHADER_STAGE_COMPUTE_BIT, 0, 9 },
          };
    
          result =
             v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
                                       &pipeline_layout_info,
                                       &device->vk.alloc,
                                       &device->queries.avail_pipeline_layout);
    
          if (result != VK_SUCCESS)
             return false;
       }
    
       const nir_shader_compiler_options *compiler_options =
          v3dv_pipeline_get_nir_options(&device->devinfo);
    
       if (!device->queries.avail_pipeline) {
          nir_shader *set_query_availability_cs_nir =
             get_set_query_availability_cs(compiler_options);
          result = v3dv_create_compute_pipeline_from_nir(device,
                                                         set_query_availability_cs_nir,
                                                         device->queries.avail_pipeline_layout,
                                                         &pipeline);
          ralloc_free(set_query_availability_cs_nir);
          if (result != VK_SUCCESS)
             return false;
    
          device->queries.avail_pipeline = pipeline;
       }
    
       /* Reset occlusion query pipeline.
        *
        * Pipeline layout:
        *  - 1 storage buffer for the BO with the occlusion and availability data.
        *  - Push constants:
        *    0B: offset of the availability info in the buffer (4B)
        *    4B: base query index (4B)
        */
       if (!device->queries.reset_occlusion_pipeline_layout) {
          VkPipelineLayoutCreateInfo pipeline_layout_info = {
             .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
             .setLayoutCount = 1,
             .pSetLayouts = &device->queries.buf_descriptor_set_layout,
             .pushConstantRangeCount = 1,
             .pPushConstantRanges =
                 &(VkPushConstantRange) { VK_SHADER_STAGE_COMPUTE_BIT, 0, 8 },
          };
    
          result =
             v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
                                       &pipeline_layout_info,
                                       &device->vk.alloc,
                                       &device->queries.reset_occlusion_pipeline_layout);
    
          if (result != VK_SUCCESS)
             return false;
       }
    
       if (!device->queries.reset_occlusion_pipeline) {
          nir_shader *reset_occlusion_query_cs_nir =
             get_reset_occlusion_query_cs(compiler_options);
          result = v3dv_create_compute_pipeline_from_nir(
                      device,
                      reset_occlusion_query_cs_nir,
                      device->queries.reset_occlusion_pipeline_layout,
                      &pipeline);
          ralloc_free(reset_occlusion_query_cs_nir);
          if (result != VK_SUCCESS)
             return false;
    
          device->queries.reset_occlusion_pipeline = pipeline;
       }
    
       /* Copy query results pipelines.
        *
        * Pipeline layout:
        *  - 1 storage buffer for the BO with the query availability and occlusion.
        *  - 1 storage buffer for the output.
        *  - Push constants:
        *    0B: offset of the availability info in the buffer (4B)
        *    4B: base query index (4B)
        *    8B: offset into output buffer (4B)
        *    12B: stride (4B)
        *
        * We create multiple specialized pipelines depending on the copy flags
        * to remove conditionals from the copy shader and get more optimized
        * pipelines.
        */
       if (!device->queries.copy_pipeline_layout) {
          VkDescriptorSetLayout set_layouts[2] = {
             device->queries.buf_descriptor_set_layout,
             device->queries.buf_descriptor_set_layout
          };
          VkPipelineLayoutCreateInfo pipeline_layout_info = {
             .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
             .setLayoutCount = 2,
             .pSetLayouts = set_layouts,
             .pushConstantRangeCount = 1,
             .pPushConstantRanges =
                 &(VkPushConstantRange) { VK_SHADER_STAGE_COMPUTE_BIT, 0, 16 },
          };
    
          result =
             v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
                                       &pipeline_layout_info,
                                       &device->vk.alloc,
                                       &device->queries.copy_pipeline_layout);
    
          if (result != VK_SUCCESS)
             return false;
       }
    
       /* Actual copy pipelines are created lazily on demand since there can be up
        * to 8 depending on the flags used, however it is likely that applications
        * will use the same flags every time and only one pipeline is required.
        */
    
       return true;
    }
    
    static void
    destroy_query_pipelines(struct v3dv_device *device)
    {
       VkDevice _device = v3dv_device_to_handle(device);
    
       /* Availability pipeline */
       v3dv_DestroyPipeline(_device, device->queries.avail_pipeline,
                             &device->vk.alloc);
       device->queries.avail_pipeline = VK_NULL_HANDLE;
       v3dv_DestroyPipelineLayout(_device, device->queries.avail_pipeline_layout,
                                  &device->vk.alloc);
       device->queries.avail_pipeline_layout = VK_NULL_HANDLE;
    
       /* Reset occlusion pipeline */
       v3dv_DestroyPipeline(_device, device->queries.reset_occlusion_pipeline,
                             &device->vk.alloc);
       device->queries.reset_occlusion_pipeline = VK_NULL_HANDLE;
       v3dv_DestroyPipelineLayout(_device,
                                  device->queries.reset_occlusion_pipeline_layout,
                                  &device->vk.alloc);
       device->queries.reset_occlusion_pipeline_layout = VK_NULL_HANDLE;
    
       /* Copy pipelines */
       for (int i = 0; i < 8; i++) {
          v3dv_DestroyPipeline(_device, device->queries.copy_pipeline[i],
                                &device->vk.alloc);
          device->queries.copy_pipeline[i] = VK_NULL_HANDLE;
       }
       v3dv_DestroyPipelineLayout(_device, device->queries.copy_pipeline_layout,
                                  &device->vk.alloc);
       device->queries.copy_pipeline_layout = VK_NULL_HANDLE;
    
       v3dv_DestroyDescriptorSetLayout(_device,
                                       device->queries.buf_descriptor_set_layout,
                                       &device->vk.alloc);
       device->queries.buf_descriptor_set_layout = VK_NULL_HANDLE;
    }
    
    /**
     * Allocates device resources for implementing certain types of queries.
     */
    VkResult
    v3dv_query_allocate_resources(struct v3dv_device *device)
    {
       if (!create_query_pipelines(device))
          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
    
       return VK_SUCCESS;
    }
    
    void
    v3dv_query_free_resources(struct v3dv_device *device)
    {
       destroy_query_pipelines(device);
    }