From: Conor McCarthy Subject: [PATCH vkd3d] vkd3d: Use Vulkan timeline semaphores for D3D12 fences. Message-Id: <20220125143442.34536-1-cmccarthy@codeweavers.com> Date: Wed, 26 Jan 2022 00:34:42 +1000 If KHR_timeline_semaphore is not available, the old implementation will be used. Based on a vkd3d-proton patch by Hans-Kristian Arntzen. Signed-off-by: Conor McCarthy --- libs/vkd3d/command.c | 479 +++++++++++++++++++++++++++++++++---- libs/vkd3d/device.c | 14 ++ libs/vkd3d/vkd3d_private.h | 11 + libs/vkd3d/vulkan_procs.h | 5 + tests/d3d12.c | 11 +- 5 files changed, 463 insertions(+), 57 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 2cf1eba2..3203cb51 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -268,6 +268,7 @@ static HRESULT vkd3d_enqueue_gpu_fence(struct vkd3d_fence_worker *worker, } worker->enqueued_fences[worker->enqueued_fence_count].vk_fence = vk_fence; + worker->enqueued_fences[worker->enqueued_fence_count].vk_semaphore = VK_NULL_HANDLE; waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count].waiting_fence; waiting_fence->fence = fence; waiting_fence->value = value; @@ -317,6 +318,7 @@ static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, s static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_worker *worker) { unsigned int i; + bool timeline; size_t count; bool ret; @@ -325,8 +327,18 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo count = worker->fence_count + worker->enqueued_fence_count; - ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size, - count, sizeof(*worker->vk_fences)); + if ((timeline = worker->device->vk_info.KHR_timeline_semaphore)) + { + ret = vkd3d_array_reserve((void **) &worker->vk_semaphores, &worker->vk_semaphores_size, + count, sizeof(*worker->vk_semaphores)); + ret &= vkd3d_array_reserve((void **) &worker->semaphore_wait_values, &worker->semaphore_wait_values_size, + count, sizeof(*worker->semaphore_wait_values)); + } + else + { + ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size, + count, sizeof(*worker->vk_fences)); + } ret &= vkd3d_array_reserve((void **)&worker->fences, &worker->fences_size, count, sizeof(*worker->fences)); if (!ret) @@ -339,7 +351,16 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo { struct vkd3d_enqueued_fence *current = &worker->enqueued_fences[i]; - worker->vk_fences[worker->fence_count] = current->vk_fence; + if (timeline) + { + worker->vk_semaphores[worker->fence_count] = current->vk_semaphore; + worker->semaphore_wait_values[worker->fence_count] = current->waiting_fence.value; + } + else + { + worker->vk_fences[worker->fence_count] = current->vk_fence; + } + worker->fences[worker->fence_count] = current->waiting_fence; ++worker->fence_count; } @@ -347,6 +368,66 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo worker->enqueued_fence_count = 0; } +static void vkd3d_wait_for_gpu_timeline_semaphores(struct vkd3d_fence_worker *worker) +{ + struct d3d12_device *device = worker->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkSemaphoreWaitInfoKHR wait_info; + VkSemaphore vk_semaphore; + uint64_t counter_value; + unsigned int i, j; + HRESULT hr; + int vr; + + if (!worker->fence_count) + return; + + wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR; + wait_info.pNext = NULL; + wait_info.flags = VK_SEMAPHORE_WAIT_ANY_BIT_KHR; + wait_info.pSemaphores = worker->vk_semaphores; + wait_info.semaphoreCount = worker->fence_count; + wait_info.pValues = worker->semaphore_wait_values; + + vr = VK_CALL(vkWaitSemaphoresKHR(device->vk_device, &wait_info, ~(uint64_t)0)); + if (vr == VK_TIMEOUT) + return; + if (vr != VK_SUCCESS) + { + ERR("Failed to wait for Vulkan timeline semaphores, vr %d.\n", vr); + return; + } + + for (i = 0, j = 0; i < worker->fence_count; ++i) + { + struct vkd3d_waiting_fence *current = &worker->fences[i]; + + vk_semaphore = worker->vk_semaphores[i]; + if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, vk_semaphore, &counter_value))) < 0) + { + ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr); + } + else if (counter_value >= current->value) + { + TRACE("Signaling fence %p value %#"PRIx64".\n", current->fence, current->value); + if (FAILED(hr = d3d12_fence_signal(current->fence, counter_value, VK_NULL_HANDLE))) + ERR("Failed to signal D3D12 fence, hr %#x.\n", hr); + + InterlockedDecrement(¤t->fence->pending_worker_operation_count); + continue; + } + + if (i != j) + { + worker->vk_semaphores[j] = worker->vk_semaphores[i]; + worker->semaphore_wait_values[j] = worker->semaphore_wait_values[i]; + worker->fences[j] = worker->fences[i]; + } + ++j; + } + worker->fence_count = j; +} + static void vkd3d_wait_for_gpu_fences(struct vkd3d_fence_worker *worker) { struct d3d12_device *device = worker->device; @@ -402,13 +483,19 @@ static void vkd3d_wait_for_gpu_fences(struct vkd3d_fence_worker *worker) static void *vkd3d_fence_worker_main(void *arg) { struct vkd3d_fence_worker *worker = arg; + bool timeline; int rc; vkd3d_set_thread_name("vkd3d_fence"); + timeline = worker->device->vk_info.KHR_timeline_semaphore; + for (;;) { - vkd3d_wait_for_gpu_fences(worker); + if (timeline) + vkd3d_wait_for_gpu_timeline_semaphores(worker); + else + vkd3d_wait_for_gpu_fences(worker); if (!worker->fence_count || atomic_add_fetch(&worker->enqueued_fence_count, 0)) { @@ -473,6 +560,10 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, worker->vk_fences_size = 0; worker->fences = NULL; worker->fences_size = 0; + worker->vk_semaphores = NULL; + worker->vk_semaphores_size = 0; + worker->semaphore_wait_values = NULL; + worker->semaphore_wait_values_size = 0; if ((rc = pthread_mutex_init(&worker->mutex, NULL))) { @@ -535,6 +626,8 @@ HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, vkd3d_free(worker->enqueued_fences); vkd3d_free(worker->vk_fences); vkd3d_free(worker->fences); + vkd3d_free(worker->vk_semaphores); + vkd3d_free(worker->semaphore_wait_values); return S_OK; } @@ -684,6 +777,7 @@ static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence) } d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true); + VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL)); pthread_mutex_unlock(&fence->mutex); } @@ -802,31 +896,21 @@ static HRESULT d3d12_fence_add_vk_semaphore(struct d3d12_fence *fence, return hr; } -static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence) +static bool d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence) { struct d3d12_device *device = fence->device; - struct vkd3d_signaled_semaphore *current; bool signal_null_event_cond = false; unsigned int i, j; - int rc; - - if ((rc = pthread_mutex_lock(&fence->mutex))) - { - ERR("Failed to lock mutex, error %d.\n", rc); - return hresult_from_errno(rc); - } - - fence->value = value; for (i = 0, j = 0; i < fence->event_count; ++i) { struct vkd3d_waiting_event *current = &fence->events[i]; - if (current->value <= value) + if (current->value <= fence->value) { if (current->event) { - fence->device->signal_event(current->event); + device->signal_event(current->event); } else { @@ -841,9 +925,36 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF ++j; } } + fence->event_count = j; - if (signal_null_event_cond) + return signal_null_event_cond; +} + +static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence) +{ + struct d3d12_device *device = fence->device; + struct vkd3d_signaled_semaphore *current; + unsigned int i; + int rc; + + if ((rc = pthread_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + if (value < fence->value && device->vk_info.KHR_timeline_semaphore) + { + FIXME("Fence values must be monotonically increasing. Fence %p, was %"PRIx64", now %"PRIx64".\n", + fence, fence->value, value); + } + else + { + fence->value = value; + } + + if (d3d12_fence_signal_external_events_locked(fence)) pthread_cond_broadcast(&fence->null_event_cond); if (vk_fence) @@ -1069,12 +1180,71 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *i return S_OK; } +static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fence, uint64_t value) +{ + struct d3d12_device *device = fence->device; + VkSemaphoreSignalInfoKHR info; + VkResult vr = VK_SUCCESS; + int rc; + + if ((rc = pthread_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + /* We must only signal a value which is greater than the current value. + * That value can be in the range of current known value (fence->value), or as large as pending_timeline_value. + * Pending timeline value signal might be blocked by another synchronization primitive, and thus statically + * cannot be that value, so the safest thing to do is to check the current value which is updated by the fence + * wait thread continuously. This check is technically racy since the value might be immediately out of date, + * but there is no way to avoid this. */ + if (value > fence->value) + { + struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + /* Sanity check against the delta limit. */ + if (value - fence->value > device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference) + { + FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n", + value - fence->value, device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference); + } + + info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO_KHR; + info.pNext = NULL; + info.semaphore = fence->timeline_semaphore; + info.value = value; + if ((vr = VK_CALL(vkSignalSemaphoreKHR(device->vk_device, &info))) >= 0) + { + fence->value = value; + if (value > fence->pending_timeline_value) + fence->pending_timeline_value = value; + } + else + { + ERR("Failed to signal timeline semaphore, vr %d.\n", vr); + } + } + else if (value != fence->value) + { + FIXME("Attempting to signal fence %p with %"PRIu64", but value is currently %"PRIu64".\n", + fence, value, fence->value); + } + + d3d12_fence_signal_external_events_locked(fence); + + pthread_mutex_unlock(&fence->mutex); + return hresult_from_vk_result(vr); +} + static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value) { struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); TRACE("iface %p, value %#"PRIx64".\n", iface, value); + if (fence->timeline_semaphore) + return d3d12_fence_signal_cpu_timeline_semaphore(fence, value); return d3d12_fence_signal(fence, value, VK_NULL_HANDLE); } @@ -1105,6 +1275,34 @@ static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) return impl_from_ID3D12Fence(iface); } +static HRESULT d3d12_fence_init_timeline_semaphore(struct d3d12_fence *fence, struct d3d12_device *device, + uint64_t initial_value) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkSemaphoreTypeCreateInfoKHR type_info; + VkSemaphoreCreateInfo info; + VkResult vr; + + info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + info.pNext = &type_info; + info.flags = 0; + + type_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR; + type_info.pNext = NULL; + type_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR; + type_info.initialValue = initial_value; + + if ((vr = VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, &fence->timeline_semaphore))) < 0) + { + WARN("Failed to create timeline semaphore, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + fence->pending_timeline_value = initial_value; + + return S_OK; +} + static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device, UINT64 initial_value, D3D12_FENCE_FLAGS flags) { @@ -1136,6 +1334,11 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * fence->events_size = 0; fence->event_count = 0; + fence->timeline_semaphore = NULL; + if (device->vk_info.KHR_timeline_semaphore && FAILED(hr = d3d12_fence_init_timeline_semaphore(fence, + device, initial_value))) + return hr; + list_init(&fence->semaphores); fence->semaphore_count = 0; @@ -6000,18 +6203,97 @@ static void STDMETHODCALLTYPE d3d12_command_queue_EndEvent(ID3D12CommandQueue *i FIXME("iface %p stub!\n", iface); } +static bool d3d12_fence_update_gpu_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t value) +{ + struct d3d12_device *device = fence->device; + bool need_signal = false; + int rc; + + if ((rc = pthread_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return false; + } + + /* If we're attempting to async signal a fence with a value which is not monotonically increasing the payload value, + * warn about this case. Do not treat this as an error since it might work. */ + if (value > fence->pending_timeline_value) + { + /* Sanity check against the delta limit. Use the current fence value. */ + if (value - fence->value > device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference) + { + FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n", + value - fence->value, device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference); + } + + fence->pending_timeline_value = value; + need_signal = true; + } + else + { + FIXME("Fence %p is being signalled non-monotonically. Old pending value %"PRIu64", new pending value %"PRIu64".\n", + fence, fence->pending_timeline_value, value); + + /* Mostly to be safe against weird, unknown use cases. The pending signal might be blocked + * by another fence, so we'll base this on the actual, currently visible count value. */ + need_signal = value > fence->value; + } + + pthread_mutex_unlock(&fence->mutex); + + return need_signal; +} + +static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worker, VkSemaphore vk_semaphore, + struct d3d12_fence *fence, uint64_t value, struct vkd3d_queue *queue) +{ + struct vkd3d_waiting_fence *waiting_fence; + int rc; + + TRACE("worker %p, fence %p, value %#"PRIx64".\n", worker, fence, value); + + if ((rc = pthread_mutex_lock(&worker->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + if (!vkd3d_array_reserve((void **)&worker->enqueued_fences, &worker->enqueued_fences_size, + worker->enqueued_fence_count + 1, sizeof(*worker->enqueued_fences))) + { + ERR("Failed to add GPU timeline semaphore.\n"); + pthread_mutex_unlock(&worker->mutex); + return E_OUTOFMEMORY; + } + + worker->enqueued_fences[worker->enqueued_fence_count].vk_semaphore = vk_semaphore; + waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count].waiting_fence; + waiting_fence->fence = fence; + waiting_fence->value = value; + waiting_fence->queue = queue; + ++worker->enqueued_fence_count; + + InterlockedIncrement(&fence->pending_worker_operation_count); + + pthread_cond_signal(&worker->cond); + pthread_mutex_unlock(&worker->mutex); + + return S_OK; +} + static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, UINT64 value) { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info; const struct vkd3d_vk_device_procs *vk_procs; VkSemaphore vk_semaphore = VK_NULL_HANDLE; VkFence vk_fence = VK_NULL_HANDLE; struct vkd3d_queue *vkd3d_queue; + uint64_t sequence_number = 0; struct d3d12_device *device; struct d3d12_fence *fence; VkSubmitInfo submit_info; - uint64_t sequence_number; VkQueue vk_queue; VkResult vr; HRESULT hr; @@ -6024,10 +6306,27 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * fence = unsafe_impl_from_ID3D12Fence(fence_iface); - if ((vr = d3d12_fence_create_vk_fence(fence, &vk_fence)) < 0) + if (device->vk_info.KHR_timeline_semaphore) { - WARN("Failed to create Vulkan fence, vr %d.\n", vr); - goto fail_vkresult; + if (d3d12_fence_update_gpu_signal_timeline_semaphore(fence, value)) + { + vk_semaphore = fence->timeline_semaphore; + assert(vk_semaphore); + } + else + { + /* If we are not incrementing the counter, this is a noop since we cannot + * signal a timeline semaphore non-monotonically in Vulkan. */ + return S_OK; + } + } + else + { + if ((vr = d3d12_fence_create_vk_fence(fence, &vk_fence)) < 0) + { + WARN("Failed to create Vulkan fence, vr %d.\n", vr); + goto fail_vkresult; + } } if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue))) @@ -6037,7 +6336,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * goto fail; } - if ((vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue, device, &vk_semaphore)) < 0) + if (!device->vk_info.KHR_timeline_semaphore && (vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue, + device, &vk_semaphore)) < 0) { ERR("Failed to create Vulkan semaphore, vr %d.\n", vr); vk_semaphore = VK_NULL_HANDLE; @@ -6053,7 +6353,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * submit_info.signalSemaphoreCount = vk_semaphore ? 1 : 0; submit_info.pSignalSemaphores = &vk_semaphore; - if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))) >= 0) + if (device->vk_info.KHR_timeline_semaphore) + { + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; + timeline_submit_info.pNext = NULL; + timeline_submit_info.pSignalSemaphoreValues = &value; + timeline_submit_info.signalSemaphoreValueCount = 1; + timeline_submit_info.waitSemaphoreValueCount = 0; + timeline_submit_info.pWaitSemaphoreValues = NULL; + submit_info.pNext = &timeline_submit_info; + } + + vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence)); + if (!device->vk_info.KHR_timeline_semaphore && vr >= 0) { sequence_number = ++vkd3d_queue->submitted_sequence_number; @@ -6070,26 +6382,34 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * goto fail_vkresult; } - if (vk_semaphore && SUCCEEDED(hr = d3d12_fence_add_vk_semaphore(fence, vk_semaphore, vk_fence, value))) - vk_semaphore = VK_NULL_HANDLE; - - vr = VK_CALL(vkGetFenceStatus(device->vk_device, vk_fence)); - if (vr == VK_NOT_READY) - { - if (SUCCEEDED(hr = vkd3d_enqueue_gpu_fence(&device->fence_worker, vk_fence, fence, value, vkd3d_queue, sequence_number))) - vk_fence = VK_NULL_HANDLE; - } - else if (vr == VK_SUCCESS) + if (device->vk_info.KHR_timeline_semaphore) { - TRACE("Already signaled %p, value %#"PRIx64".\n", fence, value); - hr = d3d12_fence_signal(fence, value, vk_fence); - vk_fence = VK_NULL_HANDLE; - vkd3d_queue_update_sequence_number(vkd3d_queue, sequence_number, device); + if (SUCCEEDED(hr = vkd3d_enqueue_timeline_semaphore(&device->fence_worker, vk_semaphore, fence, value, vkd3d_queue))) + vk_semaphore = VK_NULL_HANDLE; } else { - FIXME("Failed to get fence status, vr %d.\n", vr); - hr = hresult_from_vk_result(vr); + if (vk_semaphore && SUCCEEDED(hr = d3d12_fence_add_vk_semaphore(fence, vk_semaphore, vk_fence, value))) + vk_semaphore = VK_NULL_HANDLE; + + vr = VK_CALL(vkGetFenceStatus(device->vk_device, vk_fence)); + if (vr == VK_NOT_READY) + { + if (SUCCEEDED(hr = vkd3d_enqueue_gpu_fence(&device->fence_worker, vk_fence, fence, value, vkd3d_queue, sequence_number))) + vk_fence = VK_NULL_HANDLE; + } + else if (vr == VK_SUCCESS) + { + TRACE("Already signaled %p, value %#"PRIx64".\n", fence, value); + hr = d3d12_fence_signal(fence, value, vk_fence); + vk_fence = VK_NULL_HANDLE; + vkd3d_queue_update_sequence_number(vkd3d_queue, sequence_number, device); + } + else + { + FIXME("Failed to get fence status, vr %d.\n", vr); + hr = hresult_from_vk_result(vr); + } } if (vk_fence || vk_semaphore) @@ -6105,32 +6425,27 @@ fail_vkresult: hr = hresult_from_vk_result(vr); fail: VK_CALL(vkDestroyFence(device->vk_device, vk_fence, NULL)); - VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL)); + if (!device->vk_info.KHR_timeline_semaphore) + VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL)); return hr; } -static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface, - ID3D12Fence *fence_iface, UINT64 value) +static HRESULT d3d12_command_queue_wait_binary_semaphore(struct d3d12_command_queue *command_queue, + struct d3d12_fence *fence, uint64_t value) { static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); const struct vkd3d_vk_device_procs *vk_procs; struct vkd3d_signaled_semaphore *semaphore; uint64_t completed_value = 0; struct vkd3d_queue *queue; - struct d3d12_fence *fence; VkSubmitInfo submit_info; VkQueue vk_queue; VkResult vr; HRESULT hr; - TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); - vk_procs = &command_queue->device->vk_procs; queue = command_queue->vkd3d_queue; - fence = unsafe_impl_from_ID3D12Fence(fence_iface); - semaphore = d3d12_fence_acquire_vk_semaphore(fence, value, &completed_value); if (!semaphore && completed_value >= value) { @@ -6208,6 +6523,72 @@ fail: return hr; } +static HRESULT d3d12_command_queue_wait_timeline_semaphore(struct d3d12_command_queue *command_queue, + struct d3d12_fence *fence, uint64_t value) +{ + static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info; + const struct vkd3d_vk_device_procs *vk_procs; + struct vkd3d_queue *queue; + VkSubmitInfo submit_info; + VkQueue vk_queue; + VkResult vr; + + vk_procs = &command_queue->device->vk_procs; + queue = command_queue->vkd3d_queue; + + assert(fence->timeline_semaphore); + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; + timeline_submit_info.pNext = NULL; + timeline_submit_info.signalSemaphoreValueCount = 0; + timeline_submit_info.pSignalSemaphoreValues = NULL; + timeline_submit_info.waitSemaphoreValueCount = 1; + timeline_submit_info.pWaitSemaphoreValues = &value; + + if (!(vk_queue = vkd3d_queue_acquire(queue))) + { + ERR("Failed to acquire queue %p.\n", queue); + return E_FAIL; + } + + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = &timeline_submit_info; + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = &fence->timeline_semaphore; + submit_info.pWaitDstStageMask = &wait_stage_mask; + submit_info.commandBufferCount = 0; + submit_info.pCommandBuffers = NULL; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = NULL; + + vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE)); + + vkd3d_queue_release(queue); + + if (vr < 0) + { + WARN("Failed to submit wait operation, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + return S_OK; +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface, + ID3D12Fence *fence_iface, UINT64 value) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface); + + TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); + + if (command_queue->device->vk_info.KHR_timeline_semaphore) + return d3d12_command_queue_wait_timeline_semaphore(command_queue, fence, value); + + FIXME_ONCE("KHR_timeline_semaphore is not available. Some wait commands may be unsupported.\n"); + return d3d12_command_queue_wait_binary_semaphore(command_queue, fence, value); +} + static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetTimestampFrequency(ID3D12CommandQueue *iface, UINT64 *frequency) { diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 4bcb5efc..bc5cad76 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -129,6 +129,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3), VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor), VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), + VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), /* EXT extensions */ VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), @@ -683,6 +684,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties; VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties; + VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties; VkPhysicalDeviceProperties2KHR properties2; @@ -694,6 +696,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features; VkPhysicalDeviceTransformFeedbackFeaturesEXT xfb_features; VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor_features; + VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore_features; VkPhysicalDeviceFeatures2 features2; }; @@ -705,10 +708,12 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing_properties; VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; + VkPhysicalDeviceTimelineSemaphorePropertiesKHR *timeline_semaphore_properties; VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; + VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *timeline_semaphore_features; VkPhysicalDeviceDepthClipEnableFeaturesEXT *depth_clip_features; VkPhysicalDeviceMaintenance3Properties *maintenance3_properties; VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties; @@ -727,6 +732,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i buffer_alignment_properties = &info->texel_buffer_alignment_properties; vertex_divisor_features = &info->vertex_divisor_features; vertex_divisor_properties = &info->vertex_divisor_properties; + timeline_semaphore_features = &info->timeline_semaphore_features; + timeline_semaphore_properties = &info->timeline_semaphore_properties; xfb_features = &info->xfb_features; xfb_properties = &info->xfb_properties; @@ -746,6 +753,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->features2, xfb_features); vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; vk_prepend_struct(&info->features2, vertex_divisor_features); + timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; + vk_prepend_struct(&info->features2, timeline_semaphore_features); if (vulkan_info->KHR_get_physical_device_properties2) VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); @@ -764,6 +773,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->properties2, xfb_properties); vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; vk_prepend_struct(&info->properties2, vertex_divisor_properties); + timeline_semaphore_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES_KHR; + vk_prepend_struct(&info->properties2, timeline_semaphore_properties); if (vulkan_info->KHR_get_physical_device_properties2) VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); @@ -1312,6 +1323,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; vulkan_info->max_vertex_attrib_divisor = max(physical_device_info->vertex_divisor_properties.maxVertexAttribDivisor, 1); + vulkan_info->timeline_semaphore_properties = physical_device_info->timeline_semaphore_properties; device->feature_options.DoublePrecisionFloatShaderOps = features->shaderFloat64; device->feature_options.OutputMergerLogicOp = features->logicOp; @@ -1434,6 +1446,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->EXT_shader_demote_to_helper_invocation = false; if (!physical_device_info->texel_buffer_alignment_features.texelBufferAlignment) vulkan_info->EXT_texel_buffer_alignment = false; + if (!physical_device_info->timeline_semaphore_features.timelineSemaphore) + vulkan_info->KHR_timeline_semaphore = false; vulkan_info->texel_buffer_alignment_properties = physical_device_info->texel_buffer_alignment_properties; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 136b0203..088d4312 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -107,6 +107,7 @@ struct vkd3d_vulkan_info bool KHR_maintenance3; bool KHR_push_descriptor; bool KHR_sampler_mirror_clamp_to_edge; + bool KHR_timeline_semaphore; /* EXT device extensions */ bool EXT_calibrated_timestamps; bool EXT_conditional_rendering; @@ -130,6 +131,8 @@ struct vkd3d_vulkan_info VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; + VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties; + unsigned int shader_extension_count; enum vkd3d_shader_spirv_extension shader_extensions[VKD3D_MAX_SHADER_EXTENSIONS]; @@ -196,6 +199,7 @@ struct vkd3d_fence_worker struct vkd3d_enqueued_fence { VkFence vk_fence; + VkSemaphore vk_semaphore; struct vkd3d_waiting_fence waiting_fence; } *enqueued_fences; size_t enqueued_fences_size; @@ -205,6 +209,10 @@ struct vkd3d_fence_worker size_t vk_fences_size; struct vkd3d_waiting_fence *fences; size_t fences_size; + VkSemaphore *vk_semaphores; + size_t vk_semaphores_size; + uint64_t *semaphore_wait_values; + size_t semaphore_wait_values_size; struct d3d12_device *device; }; @@ -378,6 +386,9 @@ struct d3d12_fence size_t events_size; size_t event_count; + VkSemaphore timeline_semaphore; + uint64_t pending_timeline_value; + struct list semaphores; unsigned int semaphore_count; diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h index 60556735..34e0ab4b 100644 --- a/libs/vkd3d/vulkan_procs.h +++ b/libs/vkd3d/vulkan_procs.h @@ -195,6 +195,11 @@ VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutSupportKHR) /* VK_KHR_push_descriptor */ VK_DEVICE_EXT_PFN(vkCmdPushDescriptorSetKHR) +/* VK_KHR_timeline_semaphore */ +VK_DEVICE_EXT_PFN(vkGetSemaphoreCounterValueKHR) +VK_DEVICE_EXT_PFN(vkWaitSemaphoresKHR) +VK_DEVICE_EXT_PFN(vkSignalSemaphoreKHR) + /* VK_EXT_calibrated_timestamps */ VK_DEVICE_EXT_PFN(vkGetCalibratedTimestampsEXT) diff --git a/tests/d3d12.c b/tests/d3d12.c index 82f353a7..b5a9b89f 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -33238,7 +33238,9 @@ static void test_queue_wait(void) command_list = context.list; queue = context.queue; - queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL); + /* 'queue2' must not map to the same command queue as 'queue', or Wait() before GPU signal will fail. + * Using a compute queue fixes this on most hardware, but it may still fail on low spec hardware. */ + queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL); event = create_event(); ok(event, "Failed to create event.\n"); @@ -33303,12 +33305,6 @@ static void test_queue_wait(void) check_readback_data_uint(&rb, NULL, 0xff00ff00, 0); release_resource_readback(&rb); - if (!vkd3d_test_platform_is_windows()) - { - skip("Wait() is not implemented yet.\n"); /* FIXME */ - goto skip_tests; - } - /* Wait() before CPU signal */ update_buffer_data(cb, 0, sizeof(blue), &blue); queue_wait(queue, fence, 2); @@ -33384,7 +33380,6 @@ static void test_queue_wait(void) check_readback_data_uint(&rb, NULL, 0xff00ff00, 0); release_resource_readback(&rb); -skip_tests: /* Signal() and Wait() in the same command queue */ update_buffer_data(cb, 0, sizeof(blue), &blue); queue_signal(queue, fence, 7); -- 2.34.1