From: Conor McCarthy Subject: [PATCH vkd3d 1/2] vkd3d: Use all available command queues. Message-Id: <20220121143715.8297-1-cmccarthy@codeweavers.com> Date: Sat, 22 Jan 2022 00:37:14 +1000 Deadlock will occur if a fence GPU wait is queued and then the fence is signalled on the same queue. Mapping multiple D3D12 queues to one Vk queue can cause this scenario. Signed-off-by: Conor McCarthy --- libs/vkd3d/command.c | 21 ++++-- libs/vkd3d/device.c | 138 ++++++++++++++++++++++++++++--------- libs/vkd3d/vkd3d_private.h | 15 ++-- 3 files changed, 129 insertions(+), 45 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 2cf1eba2..9ec2c1b2 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -23,7 +23,8 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence); HRESULT vkd3d_queue_create(struct d3d12_device *device, - uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue) + uint32_t family_index, const VkQueueFamilyProperties *properties, + struct vkd3d_queue_array *queue_array, uint32_t queue_index) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct vkd3d_queue *object; @@ -52,11 +53,11 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, memset(object->old_vk_semaphores, 0, sizeof(object->old_vk_semaphores)); - VK_CALL(vkGetDeviceQueue(device->vk_device, family_index, 0, &object->vk_queue)); + VK_CALL(vkGetDeviceQueue(device->vk_device, family_index, queue_index, &object->vk_queue)); TRACE("Created queue %p for queue family index %u.\n", object, family_index); - *queue = object; + queue_array->queues[queue_index] = object; return S_OK; } @@ -1729,17 +1730,23 @@ static struct d3d12_command_allocator *unsafe_impl_from_ID3D12CommandAllocator(I return impl_from_ID3D12CommandAllocator(iface); } +static struct vkd3d_queue *d3d12_device_get_next_vkd3d_queue(struct vkd3d_queue_array *queue_array) +{ + unsigned int i = (unsigned int)(InterlockedIncrement(&queue_array->index) - 1) % queue_array->count; + return queue_array->queues[i]; +} + struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3D12_COMMAND_LIST_TYPE type) { switch (type) { case D3D12_COMMAND_LIST_TYPE_DIRECT: - return device->direct_queue; + return d3d12_device_get_next_vkd3d_queue(device->direct_queues); case D3D12_COMMAND_LIST_TYPE_COMPUTE: - return device->compute_queue; + return d3d12_device_get_next_vkd3d_queue(device->compute_queues); case D3D12_COMMAND_LIST_TYPE_COPY: - return device->copy_queue; + return d3d12_device_get_next_vkd3d_queue(device->copy_queues); default: FIXME("Unhandled command list type %#x.\n", type); return NULL; @@ -1759,7 +1766,7 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo return hr; if (!(queue = d3d12_device_get_vkd3d_queue(device, type))) - queue = device->direct_queue; + queue = device->direct_queues->queues[0]; allocator->ID3D12CommandAllocator_iface.lpVtbl = &d3d12_command_allocator_vtbl; allocator->refcount = 1; diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 4bcb5efc..ce570ffe 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -1594,19 +1594,67 @@ struct vkd3d_device_queue_info VkDeviceQueueCreateInfo vk_queue_create_info[VKD3D_QUEUE_FAMILY_COUNT]; }; +static void vkd3d_queue_array_cleanup(struct vkd3d_queue_array *queue_array, struct d3d12_device *device) +{ + unsigned int i; + + if (!queue_array) + return; + + for (i = 0; i < queue_array->count; ++i) + vkd3d_queue_destroy(queue_array->queues[i], device); + vkd3d_free(queue_array); +} + static void d3d12_device_destroy_vkd3d_queues(struct d3d12_device *device) { - if (device->direct_queue) - vkd3d_queue_destroy(device->direct_queue, device); - if (device->compute_queue && device->compute_queue != device->direct_queue) - vkd3d_queue_destroy(device->compute_queue, device); - if (device->copy_queue && device->copy_queue != device->direct_queue - && device->copy_queue != device->compute_queue) - vkd3d_queue_destroy(device->copy_queue, device); - - device->direct_queue = NULL; - device->compute_queue = NULL; - device->copy_queue = NULL; + vkd3d_queue_array_cleanup(device->direct_queues, device); + if (device->compute_queues != device->direct_queues) + vkd3d_queue_array_cleanup(device->compute_queues, device); + if (device->copy_queues != device->direct_queues && device->copy_queues != device->compute_queues) + vkd3d_queue_array_cleanup(device->copy_queues, device); + + device->direct_queues = NULL; + device->compute_queues = NULL; + device->copy_queues = NULL; +} + +static HRESULT d3d12_device_create_family_queue_array(struct d3d12_device *device, + const struct vkd3d_device_queue_info *queue_info, enum vkd3d_queue_family family, + struct vkd3d_queue_array **queue_array_ptr) +{ + unsigned int family_index = queue_info->family_index[family]; + struct vkd3d_queue_array *queue_array; + unsigned int i; + HRESULT hr; + + if (!(queue_array = vkd3d_calloc(1, sizeof(*queue_array) + queue_info->vk_properties[family].queueCount + * sizeof(queue_array->queues[0])))) + return E_OUTOFMEMORY; + + queue_array->count = queue_info->vk_properties[family].queueCount; + + for (i = 0; i < queue_array->count; ++i) + { + if (SUCCEEDED(hr = vkd3d_queue_create(device, family_index, + &queue_info->vk_properties[family], queue_array, i))) + continue; + + if (!i) + { + vkd3d_free(queue_array); + return hr; + } + + WARN("Failed to create queue of family %#x at index %u.\n", family, i); + queue_array->count = i; + break; + } + + device->queue_family_indices[device->queue_family_count++] = family_index; + *queue_array_ptr = queue_array; + + return S_OK; } static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, @@ -1617,38 +1665,32 @@ static HRESULT d3d12_device_create_vkd3d_queues(struct d3d12_device *device, uint32_t direct_family_index = queue_info->family_index[VKD3D_QUEUE_FAMILY_DIRECT]; HRESULT hr; - device->direct_queue = NULL; - device->compute_queue = NULL; - device->copy_queue = NULL; + device->direct_queues = NULL; + device->compute_queues = NULL; + device->copy_queues = NULL; device->queue_family_count = 0; memset(device->queue_family_indices, 0, sizeof(device->queue_family_indices)); - if (SUCCEEDED((hr = vkd3d_queue_create(device, direct_family_index, - &queue_info->vk_properties[VKD3D_QUEUE_FAMILY_DIRECT], &device->direct_queue)))) - device->queue_family_indices[device->queue_family_count++] = direct_family_index; - else + if (FAILED(hr = d3d12_device_create_family_queue_array(device, queue_info, VKD3D_QUEUE_FAMILY_DIRECT, + &device->direct_queues))) goto out_destroy_queues; if (compute_family_index == direct_family_index) - device->compute_queue = device->direct_queue; - else if (SUCCEEDED(hr = vkd3d_queue_create(device, compute_family_index, - &queue_info->vk_properties[VKD3D_QUEUE_FAMILY_COMPUTE], &device->compute_queue))) - device->queue_family_indices[device->queue_family_count++] = compute_family_index; - else + device->compute_queues = device->direct_queues; + else if (FAILED(hr = d3d12_device_create_family_queue_array(device, queue_info, VKD3D_QUEUE_FAMILY_COMPUTE, + &device->compute_queues))) goto out_destroy_queues; if (transfer_family_index == direct_family_index) - device->copy_queue = device->direct_queue; + device->copy_queues = device->direct_queues; else if (transfer_family_index == compute_family_index) - device->copy_queue = device->compute_queue; - else if (SUCCEEDED(hr = vkd3d_queue_create(device, transfer_family_index, - &queue_info->vk_properties[VKD3D_QUEUE_FAMILY_TRANSFER], &device->copy_queue))) - device->queue_family_indices[device->queue_family_count++] = transfer_family_index; - else + device->copy_queues = device->compute_queues; + else if (FAILED(hr = d3d12_device_create_family_queue_array(device, queue_info, VKD3D_QUEUE_FAMILY_TRANSFER, + &device->copy_queues))) goto out_destroy_queues; - device->feature_options3.CopyQueueTimestampQueriesSupported = !!device->copy_queue->timestamp_bits; + device->feature_options3.CopyQueueTimestampQueriesSupported = !!device->copy_queues->queues[0]->timestamp_bits; return S_OK; @@ -1657,8 +1699,6 @@ out_destroy_queues: return hr; } -static float queue_priorities[] = {1.0f}; - static HRESULT vkd3d_select_queues(const struct vkd3d_instance *vkd3d_instance, VkPhysicalDevice physical_device, struct vkd3d_device_queue_info *info) { @@ -1707,8 +1747,8 @@ static HRESULT vkd3d_select_queues(const struct vkd3d_instance *vkd3d_instance, queue_info->pNext = NULL; queue_info->flags = 0; queue_info->queueFamilyIndex = i; - queue_info->queueCount = 1; /* FIXME: Use multiple queues. */ - queue_info->pQueuePriorities = queue_priorities; + queue_info->queueCount = queue_properties[i].queueCount; + queue_info->pQueuePriorities = NULL; } vkd3d_free(queue_properties); @@ -1767,6 +1807,29 @@ static bool d3d12_is_64k_msaa_supported(struct d3d12_device *device) && info.Alignment <= 0x10000; } +static bool vkd3d_queue_priorities_init(struct vkd3d_device_queue_info *queue_info) +{ + unsigned int i, max_count; + float *queue_priorities; + + for (i = 0, max_count = 0; i < queue_info->vk_family_count; ++i) + max_count = max(max_count, queue_info->vk_queue_create_info[i].queueCount); + if (!(queue_priorities = vkd3d_malloc(max_count * sizeof(*queue_priorities)))) + return false; + for (i = 0; i < max_count; ++i) + queue_priorities[i] = 1.0f; + + for (i = 0; i < queue_info->vk_family_count; ++i) + queue_info->vk_queue_create_info[i].pQueuePriorities = queue_priorities; + + return true; +} + +static void vkd3d_queue_priorities_cleanup(const struct vkd3d_device_queue_info *queue_info) +{ + vkd3d_free((float *)queue_info->vk_queue_create_info[0].pQueuePriorities); +} + static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, const struct vkd3d_device_create_info *create_info) { @@ -1839,8 +1902,15 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, device_info.pEnabledFeatures = &physical_device_info.features2.features; vkd3d_free(user_extension_supported); + if (!vkd3d_queue_priorities_init(&device_queue_info)) + { + vkd3d_free(extensions); + return E_OUTOFMEMORY; + } + vr = VK_CALL(vkCreateDevice(physical_device, &device_info, NULL, &vk_device)); vkd3d_free(extensions); + vkd3d_queue_priorities_cleanup(&device_queue_info); if (vr < 0) { ERR("Failed to create Vulkan device, vr %d.\n", vr); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 136b0203..ec80d983 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1063,9 +1063,16 @@ struct vkd3d_queue VkSemaphore old_vk_semaphores[VKD3D_MAX_VK_SYNC_OBJECTS]; }; +struct vkd3d_queue_array +{ + LONG index; + unsigned int count; + struct vkd3d_queue *queues[]; +}; + VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue); HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, - const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue); + const VkQueueFamilyProperties *properties, struct vkd3d_queue_array *queue_array, uint32_t queue_index); void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_release(struct vkd3d_queue *queue); @@ -1197,9 +1204,9 @@ struct d3d12_device struct vkd3d_vulkan_info vk_info; - struct vkd3d_queue *direct_queue; - struct vkd3d_queue *compute_queue; - struct vkd3d_queue *copy_queue; + struct vkd3d_queue_array *direct_queues; + struct vkd3d_queue_array *compute_queues; + struct vkd3d_queue_array *copy_queues; uint32_t queue_family_indices[VKD3D_MAX_QUEUE_FAMILY_COUNT]; unsigned int queue_family_count; VkTimeDomainEXT vk_host_time_domain; -- 2.34.1