Edit

IABSD.fr/xenocara/lib/mesa/src/imagination/vulkan/pvr_queue.c

Branch :

  • Show log

    Commit

  • Author : jsg
    Date : 2025-06-05 11:23:11
    Hash : 67d6f117
    Message : Import Mesa 25.0.7

  • lib/mesa/src/imagination/vulkan/pvr_queue.c
  • /*
     * Copyright © 2022 Imagination Technologies Ltd.
     *
     * based in part on radv driver which is:
     * Copyright © 2016 Red Hat.
     * Copyright © 2016 Bas Nieuwenhuizen
     *
     * Permission is hereby granted, free of charge, to any person obtaining a copy
     * of this software and associated documentation files (the "Software"), to deal
     * in the Software without restriction, including without limitation the rights
     * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     * copies of the Software, and to permit persons to whom the Software is
     * furnished to do so, subject to the following conditions:
     *
     * The above copyright notice and this permission notice (including the next
     * paragraph) shall be included in all copies or substantial portions of the
     * Software.
     *
     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     * SOFTWARE.
     */
    
    /**
     * This file implements VkQueue, VkFence, and VkSemaphore
     */
    
    #include <assert.h>
    #include <stdbool.h>
    #include <stddef.h>
    #include <stdint.h>
    #include <unistd.h>
    #include <vulkan/vulkan.h>
    
    #include "pvr_job_compute.h"
    #include "pvr_job_context.h"
    #include "pvr_job_render.h"
    #include "pvr_job_transfer.h"
    #include "pvr_limits.h"
    #include "pvr_private.h"
    #include "util/macros.h"
    #include "util/u_atomic.h"
    #include "vk_alloc.h"
    #include "vk_fence.h"
    #include "vk_log.h"
    #include "vk_object.h"
    #include "vk_queue.h"
    #include "vk_semaphore.h"
    #include "vk_sync.h"
    #include "vk_sync_dummy.h"
    #include "vk_util.h"
    
    static VkResult pvr_driver_queue_submit(struct vk_queue *queue,
                                            struct vk_queue_submit *submit);
    
    static VkResult pvr_queue_init(struct pvr_device *device,
                                   struct pvr_queue *queue,
                                   const VkDeviceQueueCreateInfo *pCreateInfo,
                                   uint32_t index_in_family)
    {
       struct pvr_transfer_ctx *transfer_ctx;
       struct pvr_compute_ctx *compute_ctx;
       struct pvr_compute_ctx *query_ctx;
       struct pvr_render_ctx *gfx_ctx;
       VkResult result;
    
       *queue = (struct pvr_queue){ 0 };
    
       result =
          vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family);
       if (result != VK_SUCCESS)
          return result;
    
       if (device->ws->features.supports_threaded_submit) {
          result = vk_queue_enable_submit_thread(&queue->vk);
          if (result != VK_SUCCESS)
             goto err_vk_queue_finish;
       }
    
       result = pvr_transfer_ctx_create(device,
                                        PVR_WINSYS_CTX_PRIORITY_MEDIUM,
                                        &transfer_ctx);
       if (result != VK_SUCCESS)
          goto err_vk_queue_finish;
    
       result = pvr_compute_ctx_create(device,
                                       PVR_WINSYS_CTX_PRIORITY_MEDIUM,
                                       &compute_ctx);
       if (result != VK_SUCCESS)
          goto err_transfer_ctx_destroy;
    
       result = pvr_compute_ctx_create(device,
                                       PVR_WINSYS_CTX_PRIORITY_MEDIUM,
                                       &query_ctx);
       if (result != VK_SUCCESS)
          goto err_compute_ctx_destroy;
    
       result =
          pvr_render_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &gfx_ctx);
       if (result != VK_SUCCESS)
          goto err_query_ctx_destroy;
    
       queue->device = device;
       queue->gfx_ctx = gfx_ctx;
       queue->compute_ctx = compute_ctx;
       queue->query_ctx = query_ctx;
       queue->transfer_ctx = transfer_ctx;
    
       queue->vk.driver_submit = pvr_driver_queue_submit;
    
       return VK_SUCCESS;
    
    err_query_ctx_destroy:
       pvr_compute_ctx_destroy(query_ctx);
    
    err_compute_ctx_destroy:
       pvr_compute_ctx_destroy(compute_ctx);
    
    err_transfer_ctx_destroy:
       pvr_transfer_ctx_destroy(transfer_ctx);
    
    err_vk_queue_finish:
       vk_queue_finish(&queue->vk);
    
       return result;
    }
    
    VkResult pvr_queues_create(struct pvr_device *device,
                               const VkDeviceCreateInfo *pCreateInfo)
    {
       VkResult result;
    
       /* Check requested queue families and queues */
       assert(pCreateInfo->queueCreateInfoCount == 1);
       assert(pCreateInfo->pQueueCreateInfos[0].queueFamilyIndex == 0);
       assert(pCreateInfo->pQueueCreateInfos[0].queueCount <= PVR_MAX_QUEUES);
    
       const VkDeviceQueueCreateInfo *queue_create =
          &pCreateInfo->pQueueCreateInfos[0];
    
       device->queues = vk_alloc(&device->vk.alloc,
                                 queue_create->queueCount * sizeof(*device->queues),
                                 8,
                                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
       if (!device->queues)
          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
    
       device->queue_count = 0;
    
       for (uint32_t i = 0; i < queue_create->queueCount; i++) {
          result = pvr_queue_init(device, &device->queues[i], queue_create, i);
          if (result != VK_SUCCESS)
             goto err_queues_finish;
    
          device->queue_count++;
       }
    
       return VK_SUCCESS;
    
    err_queues_finish:
       pvr_queues_destroy(device);
       return result;
    }
    
    static void pvr_queue_finish(struct pvr_queue *queue)
    {
       for (uint32_t i = 0; i < ARRAY_SIZE(queue->next_job_wait_sync); i++) {
          if (queue->next_job_wait_sync[i])
             vk_sync_destroy(&queue->device->vk, queue->next_job_wait_sync[i]);
       }
    
       for (uint32_t i = 0; i < ARRAY_SIZE(queue->last_job_signal_sync); i++) {
          if (queue->last_job_signal_sync[i])
             vk_sync_destroy(&queue->device->vk, queue->last_job_signal_sync[i]);
       }
    
       pvr_render_ctx_destroy(queue->gfx_ctx);
       pvr_compute_ctx_destroy(queue->query_ctx);
       pvr_compute_ctx_destroy(queue->compute_ctx);
       pvr_transfer_ctx_destroy(queue->transfer_ctx);
    
       vk_queue_finish(&queue->vk);
    }
    
    void pvr_queues_destroy(struct pvr_device *device)
    {
       for (uint32_t q_idx = 0; q_idx < device->queue_count; q_idx++)
          pvr_queue_finish(&device->queues[q_idx]);
    
       vk_free(&device->vk.alloc, device->queues);
    }
    
    static void pvr_update_job_syncs(struct pvr_device *device,
                                     struct pvr_queue *queue,
                                     struct vk_sync *new_signal_sync,
                                     enum pvr_job_type submitted_job_type)
    {
       if (queue->next_job_wait_sync[submitted_job_type]) {
          vk_sync_destroy(&device->vk,
                          queue->next_job_wait_sync[submitted_job_type]);
          queue->next_job_wait_sync[submitted_job_type] = NULL;
       }
    
       if (queue->last_job_signal_sync[submitted_job_type]) {
          vk_sync_destroy(&device->vk,
                          queue->last_job_signal_sync[submitted_job_type]);
       }
    
       queue->last_job_signal_sync[submitted_job_type] = new_signal_sync;
    }
    
    static VkResult pvr_process_graphics_cmd(struct pvr_device *device,
                                             struct pvr_queue *queue,
                                             struct pvr_cmd_buffer *cmd_buffer,
                                             struct pvr_sub_cmd_gfx *sub_cmd)
    {
       pvr_dev_addr_t original_ctrl_stream_addr = { 0 };
       struct vk_sync *geom_signal_sync;
       struct vk_sync *frag_signal_sync = NULL;
       VkResult result;
    
       result = vk_sync_create(&device->vk,
                               &device->pdevice->ws->syncobj_type,
                               0U,
                               0UL,
                               &geom_signal_sync);
       if (result != VK_SUCCESS)
          return result;
    
       if (sub_cmd->job.run_frag) {
          result = vk_sync_create(&device->vk,
                                  &device->pdevice->ws->syncobj_type,
                                  0U,
                                  0UL,
                                  &frag_signal_sync);
          if (result != VK_SUCCESS)
             goto err_destroy_geom_sync;
       }
    
       /* FIXME: DoShadowLoadOrStore() */
    
       /* Perform two render submits when using multiple framebuffer layers. The
        * first submit contains just geometry, while the second only terminates
        * (and triggers the fragment render if originally specified). This is needed
        * because the render target cache gets cleared on terminating submits, which
        * could result in missing primitives.
        */
       if (pvr_sub_cmd_gfx_requires_split_submit(sub_cmd)) {
          /* If fragment work shouldn't be run there's no need for a split,
           * and if geometry_terminate is false this kick can't have a fragment
           * stage without another terminating geometry kick.
           */
          assert(sub_cmd->job.geometry_terminate && sub_cmd->job.run_frag);
    
          /* First submit must not touch fragment work. */
          sub_cmd->job.geometry_terminate = false;
          sub_cmd->job.run_frag = false;
    
          result =
             pvr_render_job_submit(queue->gfx_ctx,
                                   &sub_cmd->job,
                                   queue->next_job_wait_sync[PVR_JOB_TYPE_GEOM],
                                   NULL,
                                   NULL,
                                   NULL);
    
          sub_cmd->job.geometry_terminate = true;
          sub_cmd->job.run_frag = true;
    
          if (result != VK_SUCCESS)
             goto err_destroy_frag_sync;
    
          original_ctrl_stream_addr = sub_cmd->job.ctrl_stream_addr;
    
          /* Second submit contains only a trivial control stream to terminate the
           * geometry work.
           */
          assert(sub_cmd->terminate_ctrl_stream);
          sub_cmd->job.ctrl_stream_addr =
             sub_cmd->terminate_ctrl_stream->vma->dev_addr;
       }
    
       result = pvr_render_job_submit(queue->gfx_ctx,
                                      &sub_cmd->job,
                                      queue->next_job_wait_sync[PVR_JOB_TYPE_GEOM],
                                      queue->next_job_wait_sync[PVR_JOB_TYPE_FRAG],
                                      geom_signal_sync,
                                      frag_signal_sync);
    
       if (original_ctrl_stream_addr.addr > 0)
          sub_cmd->job.ctrl_stream_addr = original_ctrl_stream_addr;
    
       if (result != VK_SUCCESS)
          goto err_destroy_frag_sync;
    
       pvr_update_job_syncs(device, queue, geom_signal_sync, PVR_JOB_TYPE_GEOM);
    
       if (sub_cmd->job.run_frag)
          pvr_update_job_syncs(device, queue, frag_signal_sync, PVR_JOB_TYPE_FRAG);
    
       /* FIXME: DoShadowLoadOrStore() */
    
       return VK_SUCCESS;
    
    err_destroy_frag_sync:
       if (frag_signal_sync)
          vk_sync_destroy(&device->vk, frag_signal_sync);
    err_destroy_geom_sync:
       vk_sync_destroy(&device->vk, geom_signal_sync);
    
       return result;
    }
    
    static VkResult pvr_process_compute_cmd(struct pvr_device *device,
                                            struct pvr_queue *queue,
                                            struct pvr_sub_cmd_compute *sub_cmd)
    {
       struct vk_sync *sync;
       VkResult result;
    
       result = vk_sync_create(&device->vk,
                               &device->pdevice->ws->syncobj_type,
                               0U,
                               0UL,
                               &sync);
       if (result != VK_SUCCESS)
          return result;
    
       result =
          pvr_compute_job_submit(queue->compute_ctx,
                                 sub_cmd,
                                 queue->next_job_wait_sync[PVR_JOB_TYPE_COMPUTE],
                                 sync);
       if (result != VK_SUCCESS) {
          vk_sync_destroy(&device->vk, sync);
          return result;
       }
    
       pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_COMPUTE);
    
       return result;
    }
    
    static VkResult pvr_process_transfer_cmds(struct pvr_device *device,
                                              struct pvr_queue *queue,
                                              struct pvr_sub_cmd_transfer *sub_cmd)
    {
       struct vk_sync *sync;
       VkResult result;
    
       result = vk_sync_create(&device->vk,
                               &device->pdevice->ws->syncobj_type,
                               0U,
                               0UL,
                               &sync);
       if (result != VK_SUCCESS)
          return result;
    
       result =
          pvr_transfer_job_submit(queue->transfer_ctx,
                                  sub_cmd,
                                  queue->next_job_wait_sync[PVR_JOB_TYPE_TRANSFER],
                                  sync);
       if (result != VK_SUCCESS) {
          vk_sync_destroy(&device->vk, sync);
          return result;
       }
    
       pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_TRANSFER);
    
       return result;
    }
    
    static VkResult
    pvr_process_occlusion_query_cmd(struct pvr_device *device,
                                    struct pvr_queue *queue,
                                    struct pvr_sub_cmd_compute *sub_cmd)
    {
       struct vk_sync *sync;
       VkResult result;
    
       /* TODO: Currently we add barrier event sub commands to handle the sync
        * necessary for the different occlusion query types. Would we get any speed
        * up in processing the queue by doing that sync here without using event sub
        * commands?
        */
    
       result = vk_sync_create(&device->vk,
                               &device->pdevice->ws->syncobj_type,
                               0U,
                               0UL,
                               &sync);
       if (result != VK_SUCCESS)
          return result;
    
       result = pvr_compute_job_submit(
          queue->query_ctx,
          sub_cmd,
          queue->next_job_wait_sync[PVR_JOB_TYPE_OCCLUSION_QUERY],
          sync);
       if (result != VK_SUCCESS) {
          vk_sync_destroy(&device->vk, sync);
          return result;
       }
    
       pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_OCCLUSION_QUERY);
    
       return result;
    }
    
    static VkResult
    pvr_process_event_cmd_barrier(struct pvr_device *device,
                                  struct pvr_queue *queue,
                                  struct pvr_sub_cmd_event_barrier *sub_cmd)
    {
       const uint32_t src_mask = sub_cmd->wait_for_stage_mask;
       const uint32_t dst_mask = sub_cmd->wait_at_stage_mask;
       struct vk_sync_wait wait_syncs[PVR_JOB_TYPE_MAX + 1];
       uint32_t src_wait_count = 0;
       VkResult result;
    
       assert(!(src_mask & ~(PVR_PIPELINE_STAGE_ALL_BITS |
                             PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT)));
       assert(!(dst_mask & ~(PVR_PIPELINE_STAGE_ALL_BITS |
                             PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT)));
    
       u_foreach_bit (stage, src_mask) {
          if (queue->last_job_signal_sync[stage]) {
             wait_syncs[src_wait_count++] = (struct vk_sync_wait){
                .sync = queue->last_job_signal_sync[stage],
                .stage_mask = ~(VkPipelineStageFlags2)0,
                .wait_value = 0,
             };
          }
       }
    
       /* No previous src jobs that need finishing so no need for a barrier. */
       if (src_wait_count == 0)
          return VK_SUCCESS;
    
       u_foreach_bit (stage, dst_mask) {
          uint32_t wait_count = src_wait_count;
          struct vk_sync_signal signal;
          struct vk_sync *signal_sync;
    
          result = vk_sync_create(&device->vk,
                                  &device->pdevice->ws->syncobj_type,
                                  0U,
                                  0UL,
                                  &signal_sync);
          if (result != VK_SUCCESS)
             return result;
    
          signal = (struct vk_sync_signal){
             .sync = signal_sync,
             .stage_mask = ~(VkPipelineStageFlags2)0,
             .signal_value = 0,
          };
    
          if (queue->next_job_wait_sync[stage]) {
             wait_syncs[wait_count++] = (struct vk_sync_wait){
                .sync = queue->next_job_wait_sync[stage],
                .stage_mask = ~(VkPipelineStageFlags2)0,
                .wait_value = 0,
             };
          }
    
          result = device->ws->ops->null_job_submit(device->ws,
                                                    wait_syncs,
                                                    wait_count,
                                                    &signal);
          if (result != VK_SUCCESS) {
             vk_sync_destroy(&device->vk, signal_sync);
             return result;
          }
    
          if (queue->next_job_wait_sync[stage])
             vk_sync_destroy(&device->vk, queue->next_job_wait_sync[stage]);
    
          queue->next_job_wait_sync[stage] = signal_sync;
       }
    
       return VK_SUCCESS;
    }
    
    static VkResult
    pvr_process_event_cmd_set_or_reset(struct pvr_device *device,
                                       struct pvr_queue *queue,
                                       struct pvr_sub_cmd_event_set_reset *sub_cmd,
                                       const enum pvr_event_state new_event_state)
    {
       /* Not PVR_JOB_TYPE_MAX since that also includes
        * PVR_JOB_TYPE_OCCLUSION_QUERY so no stage in the src mask.
        */
       struct vk_sync_wait waits[PVR_NUM_SYNC_PIPELINE_STAGES];
       struct vk_sync_signal signal;
       struct vk_sync *signal_sync;
    
       uint32_t wait_count = 0;
       VkResult result;
    
       assert(!(sub_cmd->wait_for_stage_mask & ~PVR_PIPELINE_STAGE_ALL_BITS));
    
       u_foreach_bit (stage, sub_cmd->wait_for_stage_mask) {
          if (!queue->last_job_signal_sync[stage])
             continue;
    
          waits[wait_count++] = (struct vk_sync_wait){
             .sync = queue->last_job_signal_sync[stage],
             .stage_mask = ~(VkPipelineStageFlags2)0,
             .wait_value = 0,
          };
       }
    
       result = vk_sync_create(&device->vk,
                               &device->pdevice->ws->syncobj_type,
                               0U,
                               0UL,
                               &signal_sync);
       if (result != VK_SUCCESS)
          return result;
    
       signal = (struct vk_sync_signal){
          .sync = signal_sync,
          .stage_mask = ~(VkPipelineStageFlags2)0,
          .signal_value = 0,
       };
    
       result =
          device->ws->ops->null_job_submit(device->ws, waits, wait_count, &signal);
       if (result != VK_SUCCESS) {
          vk_sync_destroy(&device->vk, signal_sync);
          return result;
       }
    
       if (sub_cmd->event->sync)
          vk_sync_destroy(&device->vk, sub_cmd->event->sync);
    
       sub_cmd->event->sync = signal_sync;
       sub_cmd->event->state = new_event_state;
    
       return VK_SUCCESS;
    }
    
    static inline VkResult
    pvr_process_event_cmd_set(struct pvr_device *device,
                              struct pvr_queue *queue,
                              struct pvr_sub_cmd_event_set_reset *sub_cmd)
    {
       return pvr_process_event_cmd_set_or_reset(device,
                                                 queue,
                                                 sub_cmd,
                                                 PVR_EVENT_STATE_SET_BY_DEVICE);
    }
    
    static inline VkResult
    pvr_process_event_cmd_reset(struct pvr_device *device,
                                struct pvr_queue *queue,
                                struct pvr_sub_cmd_event_set_reset *sub_cmd)
    {
       return pvr_process_event_cmd_set_or_reset(device,
                                                 queue,
                                                 sub_cmd,
                                                 PVR_EVENT_STATE_RESET_BY_DEVICE);
    }
    
    /**
     * \brief Process an event sub command of wait type.
     *
     * This sets up barrier syncobjs to create a dependency from the event syncobjs
     * onto the next job submissions.
     *
     * The barriers are setup by taking into consideration each event's dst stage
     * mask so this is in line with vkCmdWaitEvents2().
     *
     * \param[in] device                       Device to create the syncobjs on.
     * \param[in] sub_cmd                      Sub command to process.
     * \param[in,out] barriers                 Current barriers as input. Barriers
     *                                         for the next jobs as output.
     * \parma[in,out] per_cmd_buffer_syncobjs  Completion syncobjs for the command
     *                                         buffer being processed.
     */
    static VkResult
    pvr_process_event_cmd_wait(struct pvr_device *device,
                               struct pvr_queue *queue,
                               struct pvr_sub_cmd_event_wait *sub_cmd)
    {
       uint32_t dst_mask = 0;
       VkResult result;
    
       STACK_ARRAY(struct vk_sync_wait, waits, sub_cmd->count + 1);
       if (!waits)
          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
    
       for (uint32_t i = 0; i < sub_cmd->count; i++)
          dst_mask |= sub_cmd->wait_at_stage_masks[i];
    
       u_foreach_bit (stage, dst_mask) {
          struct vk_sync_signal signal;
          struct vk_sync *signal_sync;
          uint32_t wait_count = 0;
    
          for (uint32_t i = 0; i < sub_cmd->count; i++) {
             if (sub_cmd->wait_at_stage_masks[i] & stage) {
                waits[wait_count++] = (struct vk_sync_wait){
                   .sync = sub_cmd->events[i]->sync,
                   .stage_mask = ~(VkPipelineStageFlags2)0,
                   .wait_value = 0,
                };
             }
          }
    
          if (!wait_count)
             continue;
    
          if (queue->next_job_wait_sync[stage]) {
             waits[wait_count++] = (struct vk_sync_wait){
                .sync = queue->next_job_wait_sync[stage],
                .stage_mask = ~(VkPipelineStageFlags2)0,
                .wait_value = 0,
             };
          }
    
          assert(wait_count <= (sub_cmd->count + 1));
    
          result = vk_sync_create(&device->vk,
                                  &device->pdevice->ws->syncobj_type,
                                  0U,
                                  0UL,
                                  &signal_sync);
          if (result != VK_SUCCESS)
             goto err_free_waits;
    
          signal = (struct vk_sync_signal){
             .sync = signal_sync,
             .stage_mask = ~(VkPipelineStageFlags2)0,
             .signal_value = 0,
          };
    
          result = device->ws->ops->null_job_submit(device->ws,
                                                    waits,
                                                    wait_count,
                                                    &signal);
          if (result != VK_SUCCESS) {
             vk_sync_destroy(&device->vk, signal.sync);
             goto err_free_waits;
          }
    
          if (queue->next_job_wait_sync[stage])
             vk_sync_destroy(&device->vk, queue->next_job_wait_sync[stage]);
    
          queue->next_job_wait_sync[stage] = signal.sync;
       }
    
       STACK_ARRAY_FINISH(waits);
    
       return VK_SUCCESS;
    
    err_free_waits:
       STACK_ARRAY_FINISH(waits);
    
       return result;
    }
    
    static VkResult pvr_process_event_cmd(struct pvr_device *device,
                                          struct pvr_queue *queue,
                                          struct pvr_sub_cmd_event *sub_cmd)
    {
       switch (sub_cmd->type) {
       case PVR_EVENT_TYPE_SET:
          return pvr_process_event_cmd_set(device, queue, &sub_cmd->set_reset);
       case PVR_EVENT_TYPE_RESET:
          return pvr_process_event_cmd_reset(device, queue, &sub_cmd->set_reset);
       case PVR_EVENT_TYPE_WAIT:
          return pvr_process_event_cmd_wait(device, queue, &sub_cmd->wait);
       case PVR_EVENT_TYPE_BARRIER:
          return pvr_process_event_cmd_barrier(device, queue, &sub_cmd->barrier);
       default:
          unreachable("Invalid event sub-command type.");
       };
    }
    
    static VkResult pvr_process_cmd_buffer(struct pvr_device *device,
                                           struct pvr_queue *queue,
                                           struct pvr_cmd_buffer *cmd_buffer)
    {
       VkResult result;
    
       list_for_each_entry_safe (struct pvr_sub_cmd,
                                 sub_cmd,
                                 &cmd_buffer->sub_cmds,
                                 link) {
          switch (sub_cmd->type) {
          case PVR_SUB_CMD_TYPE_GRAPHICS: {
             /* If the fragment job utilizes occlusion queries, for data integrity
              * it needs to wait for the occlusion query to be processed.
              */
             if (sub_cmd->gfx.has_occlusion_query) {
                struct pvr_sub_cmd_event_barrier barrier = {
                   .wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
                   .wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
                };
    
                result = pvr_process_event_cmd_barrier(device, queue, &barrier);
                if (result != VK_SUCCESS)
                   break;
             }
    
             if (sub_cmd->gfx.wait_on_previous_transfer) {
                struct pvr_sub_cmd_event_barrier barrier = {
                   .wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
                   .wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
                };
    
                result = pvr_process_event_cmd_barrier(device, queue, &barrier);
                if (result != VK_SUCCESS)
                   break;
             }
    
             result =
                pvr_process_graphics_cmd(device, queue, cmd_buffer, &sub_cmd->gfx);
             break;
          }
    
          case PVR_SUB_CMD_TYPE_COMPUTE:
             result = pvr_process_compute_cmd(device, queue, &sub_cmd->compute);
             break;
    
          case PVR_SUB_CMD_TYPE_TRANSFER: {
             const bool serialize_with_frag = sub_cmd->transfer.serialize_with_frag;
    
             if (serialize_with_frag) {
                struct pvr_sub_cmd_event_barrier barrier = {
                   .wait_for_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
                   .wait_at_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
                };
    
                result = pvr_process_event_cmd_barrier(device, queue, &barrier);
                if (result != VK_SUCCESS)
                   break;
             }
    
             result = pvr_process_transfer_cmds(device, queue, &sub_cmd->transfer);
    
             if (serialize_with_frag) {
                struct pvr_sub_cmd_event_barrier barrier = {
                   .wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
                   .wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
                };
    
                if (result != VK_SUCCESS)
                   break;
    
                result = pvr_process_event_cmd_barrier(device, queue, &barrier);
             }
    
             break;
          }
    
          case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY:
             result =
                pvr_process_occlusion_query_cmd(device, queue, &sub_cmd->compute);
             break;
    
          case PVR_SUB_CMD_TYPE_EVENT:
             result = pvr_process_event_cmd(device, queue, &sub_cmd->event);
             break;
    
          default:
             mesa_loge("Unsupported sub-command type %d", sub_cmd->type);
             result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
          }
    
          if (result != VK_SUCCESS)
             return result;
    
          p_atomic_inc(&device->global_cmd_buffer_submit_count);
       }
    
       return VK_SUCCESS;
    }
    
    static VkResult pvr_clear_last_submits_syncs(struct pvr_queue *queue)
    {
       struct vk_sync_wait waits[PVR_JOB_TYPE_MAX * 2];
       uint32_t wait_count = 0;
       VkResult result;
    
       for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
          if (queue->next_job_wait_sync[i]) {
             waits[wait_count++] = (struct vk_sync_wait){
                .sync = queue->next_job_wait_sync[i],
                .stage_mask = ~(VkPipelineStageFlags2)0,
                .wait_value = 0,
             };
          }
    
          if (queue->last_job_signal_sync[i]) {
             waits[wait_count++] = (struct vk_sync_wait){
                .sync = queue->last_job_signal_sync[i],
                .stage_mask = ~(VkPipelineStageFlags2)0,
                .wait_value = 0,
             };
          }
       }
    
       result = vk_sync_wait_many(&queue->device->vk,
                                  wait_count,
                                  waits,
                                  VK_SYNC_WAIT_COMPLETE,
                                  UINT64_MAX);
    
       if (result != VK_SUCCESS)
          return vk_error(queue, result);
    
       for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
          if (queue->next_job_wait_sync[i]) {
             vk_sync_destroy(&queue->device->vk, queue->next_job_wait_sync[i]);
             queue->next_job_wait_sync[i] = NULL;
          }
    
          if (queue->last_job_signal_sync[i]) {
             vk_sync_destroy(&queue->device->vk, queue->last_job_signal_sync[i]);
             queue->last_job_signal_sync[i] = NULL;
          }
       }
    
       return VK_SUCCESS;
    }
    
    static VkResult pvr_process_queue_signals(struct pvr_queue *queue,
                                              struct vk_sync_signal *signals,
                                              uint32_t signal_count)
    {
       struct vk_sync_wait signal_waits[PVR_JOB_TYPE_MAX];
       struct pvr_device *device = queue->device;
       VkResult result;
    
       for (uint32_t signal_idx = 0; signal_idx < signal_count; signal_idx++) {
          struct vk_sync_signal *signal = &signals[signal_idx];
          const enum pvr_pipeline_stage_bits signal_stage_src =
             pvr_stage_mask_src(signal->stage_mask);
          uint32_t wait_count = 0;
    
          for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
             /* Exception for occlusion query jobs since that's something internal,
              * so the user provided syncs won't ever have it as a source stage.
              */
             if (!(signal_stage_src & BITFIELD_BIT(i)) &&
                 i != PVR_JOB_TYPE_OCCLUSION_QUERY)
                continue;
    
             if (!queue->last_job_signal_sync[i])
                continue;
    
             signal_waits[wait_count++] = (struct vk_sync_wait){
                .sync = queue->last_job_signal_sync[i],
                .stage_mask = ~(VkPipelineStageFlags2)0,
                .wait_value = 0,
             };
          }
    
          result = device->ws->ops->null_job_submit(device->ws,
                                                    signal_waits,
                                                    wait_count,
                                                    signal);
          if (result != VK_SUCCESS)
             return result;
       }
    
       return VK_SUCCESS;
    }
    
    static VkResult pvr_process_queue_waits(struct pvr_queue *queue,
                                            struct vk_sync_wait *waits,
                                            uint32_t wait_count)
    {
       struct pvr_device *device = queue->device;
       VkResult result;
    
       STACK_ARRAY(struct vk_sync_wait, stage_waits, wait_count);
       if (!stage_waits)
          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
    
       for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
          struct vk_sync_signal next_job_wait_signal_sync;
          uint32_t stage_wait_count = 0;
    
          for (uint32_t wait_idx = 0; wait_idx < wait_count; wait_idx++) {
             if (!(pvr_stage_mask_dst(waits[wait_idx].stage_mask) &
                   BITFIELD_BIT(i))) {
                continue;
             }
    
             stage_waits[stage_wait_count++] = (struct vk_sync_wait){
                .sync = waits[wait_idx].sync,
                .stage_mask = ~(VkPipelineStageFlags2)0,
                .wait_value = waits[wait_idx].wait_value,
             };
          }
    
          if (!stage_wait_count)
             continue;
    
          result = vk_sync_create(&device->vk,
                                  &device->pdevice->ws->syncobj_type,
                                  0U,
                                  0UL,
                                  &queue->next_job_wait_sync[i]);
          if (result != VK_SUCCESS)
             goto err_free_waits;
    
          next_job_wait_signal_sync = (struct vk_sync_signal){
             .sync = queue->next_job_wait_sync[i],
             .stage_mask = ~(VkPipelineStageFlags2)0,
             .signal_value = 0,
          };
    
          result = device->ws->ops->null_job_submit(device->ws,
                                                    stage_waits,
                                                    stage_wait_count,
                                                    &next_job_wait_signal_sync);
          if (result != VK_SUCCESS)
             goto err_free_waits;
       }
    
       STACK_ARRAY_FINISH(stage_waits);
    
       return VK_SUCCESS;
    
    err_free_waits:
       STACK_ARRAY_FINISH(stage_waits);
    
       return result;
    }
    
    static VkResult pvr_driver_queue_submit(struct vk_queue *queue,
                                            struct vk_queue_submit *submit)
    {
       struct pvr_queue *driver_queue = container_of(queue, struct pvr_queue, vk);
       struct pvr_device *device = driver_queue->device;
       VkResult result;
    
       result = pvr_clear_last_submits_syncs(driver_queue);
       if (result != VK_SUCCESS)
          return result;
    
       if (submit->wait_count) {
          result = pvr_process_queue_waits(driver_queue,
                                           submit->waits,
                                           submit->wait_count);
          if (result != VK_SUCCESS)
             return result;
       }
    
       for (uint32_t i = 0U; i < submit->command_buffer_count; i++) {
          result = pvr_process_cmd_buffer(
             device,
             driver_queue,
             container_of(submit->command_buffers[i], struct pvr_cmd_buffer, vk));
          if (result != VK_SUCCESS)
             return result;
       }
    
       result = pvr_process_queue_signals(driver_queue,
                                          submit->signals,
                                          submit->signal_count);
       if (result != VK_SUCCESS)
          return result;
    
       return VK_SUCCESS;
    }