From: Conor McCarthy Subject: [PATCH vkd3d v2] vkd3d: Replace descriptor mutexes with atomic operations. Message-Id: <20220601024357.11155-1-cmccarthy@codeweavers.com> Date: Wed, 1 Jun 2022 12:43:57 +1000 Atomic ops can be used if the descriptor struct elements are replaced with a pointer to an allocated object. SRV, UAV and sampler descriptors already had view object pointers, so this doesn't affect performance for them, but CBVs now require dereferencing another (possibly distant) memory location in addition to the descriptor, which is not great for memory cache efficiency when copying CBVs. However, the descriptor heap buffer is now much smaller, and in most cases it's probably a good tradeoff for removing mutexes. Signed-off-by: Conor McCarthy --- v2: Handle the case in d3d12_desc_get_object_ref() where a descriptor object was already freed and reused elsewhere when the refcount was incremented. --- include/private/vkd3d_common.h | 13 + include/vkd3d_windows.h | 1 + libs/vkd3d/command.c | 90 ++++--- libs/vkd3d/device.c | 58 ++--- libs/vkd3d/resource.c | 425 +++++++++++++++------------------ libs/vkd3d/vkd3d_private.h | 128 +++++++--- 6 files changed, 390 insertions(+), 325 deletions(-) diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index c041b52d..e4a636c4 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -219,6 +219,10 @@ static inline LONG InterlockedAdd(LONG volatile *x, LONG val) { return __sync_add_and_fetch(x, val); } +static inline LONG InterlockedCompareExchange(LONG volatile *x, LONG xchg, LONG cmp) +{ + return __sync_val_compare_and_swap(x, cmp, xchg); +} # else # error "InterlockedIncrement() not implemented for this platform" # endif /* HAVE_SYNC_ADD_AND_FETCH */ @@ -231,6 +235,15 @@ static inline LONG InterlockedDecrement(LONG volatile *x) # else # error "InterlockedDecrement() not implemented for this platform" # endif + +# if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 7)) +static inline PVOID InterlockedExchangePointer(PVOID volatile *x, PVOID val) +{ + return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); +} +# else +# error "InterlockedExchangePointer() not implemented for this platform" +# endif #endif /* _WIN32 */ static inline void vkd3d_parse_version(const char *version, int *major, int *minor) diff --git a/include/vkd3d_windows.h b/include/vkd3d_windows.h index 002ff667..7570113b 100644 --- a/include/vkd3d_windows.h +++ b/include/vkd3d_windows.h @@ -103,6 +103,7 @@ typedef unsigned short WCHAR; typedef wchar_t WCHAR; # endif /* VKD3D_WIN32_WCHAR */ typedef void *HANDLE; +typedef void *PVOID; /* GUID */ # ifdef __WIDL__ diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index d0782e5a..2f518af9 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1525,13 +1525,13 @@ static bool d3d12_command_allocator_add_descriptor_pool(struct d3d12_command_all } static bool d3d12_command_allocator_add_view(struct d3d12_command_allocator *allocator, - struct vkd3d_view *view) + struct vkd3d_view_desc *view) { if (!vkd3d_array_reserve((void **)&allocator->views, &allocator->views_size, allocator->view_count + 1, sizeof(*allocator->views))) return false; - vkd3d_view_incref(view); + vkd3d_desc_incref(view); allocator->views[allocator->view_count++] = view; return true; @@ -1718,7 +1718,7 @@ static void d3d12_command_allocator_free_resources(struct d3d12_command_allocato for (i = 0; i < allocator->view_count; ++i) { - vkd3d_view_decref(allocator->views[i], device); + vkd3d_view_desc_decref(allocator->views[i], device); } allocator->view_count = 0; @@ -2868,28 +2868,31 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des unsigned int index, bool use_array) { uint32_t descriptor_range_magic = range->descriptor_magic; - const struct vkd3d_view *view = descriptor->u.view_info.view; + union d3d12_desc_object u = descriptor->u; uint32_t vk_binding = range->binding; + VkDescriptorType vk_descriptor_type; uint32_t set = range->set; - if (descriptor->magic != descriptor_range_magic) + if (!u.header || u.header->magic != descriptor_range_magic) return false; + vk_descriptor_type = u.header->vk_descriptor_type; + vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; vk_descriptor_write->pNext = NULL; vk_descriptor_write->dstSet = vk_descriptor_sets[set]; vk_descriptor_write->dstBinding = use_array ? vk_binding : vk_binding + index; vk_descriptor_write->dstArrayElement = use_array ? index : 0; vk_descriptor_write->descriptorCount = 1; - vk_descriptor_write->descriptorType = descriptor->vk_descriptor_type; + vk_descriptor_write->descriptorType = vk_descriptor_type; vk_descriptor_write->pImageInfo = NULL; vk_descriptor_write->pBufferInfo = NULL; vk_descriptor_write->pTexelBufferView = NULL; - switch (descriptor->magic) + switch (u.header->magic) { case VKD3D_DESCRIPTOR_MAGIC_CBV: - vk_descriptor_write->pBufferInfo = &descriptor->u.vk_cbv_info; + vk_descriptor_write->pBufferInfo = &u.cb_desc->vk_cbv_info; break; case VKD3D_DESCRIPTOR_MAGIC_SRV: @@ -2900,8 +2903,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des * in pairs in one set. */ if (range->descriptor_count == UINT_MAX) { - if (descriptor->vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - && descriptor->vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; vk_descriptor_write->dstBinding = 0; @@ -2911,21 +2914,21 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des { if (!use_array) vk_descriptor_write->dstBinding = vk_binding + 2 * index; - if (descriptor->vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - && descriptor->vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) ++vk_descriptor_write->dstBinding; } - if (descriptor->vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - || descriptor->vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + if (vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + || vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { - vk_descriptor_write->pTexelBufferView = &view->u.vk_buffer_view; + vk_descriptor_write->pTexelBufferView = &u.view_desc->view.u.vk_buffer_view; } else { vk_image_info->sampler = VK_NULL_HANDLE; - vk_image_info->imageView = view->u.vk_image_view; - vk_image_info->imageLayout = descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_SRV + vk_image_info->imageView = u.view_desc->view.u.vk_image_view; + vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; vk_descriptor_write->pImageInfo = vk_image_info; @@ -2933,7 +2936,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des break; case VKD3D_DESCRIPTOR_MAGIC_SAMPLER: - vk_image_info->sampler = view->u.vk_sampler; + vk_image_info->sampler = u.view_desc->view.u.vk_sampler; vk_image_info->imageView = VK_NULL_HANDLE; vk_image_info->imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -2941,7 +2944,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des break; default: - ERR("Invalid descriptor %#x.\n", descriptor->magic); + ERR("Invalid descriptor %#x.\n", u.header->magic); return false; } @@ -2996,6 +2999,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list for (j = 0; j < descriptor_count; ++j, ++descriptor) { unsigned int register_idx = range->base_register_idx + j; + union d3d12_desc_object u = descriptor->u; + VkBufferView vk_counter_view; + + vk_counter_view = (u.header && u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV) + ? u.view_desc->view.vk_counter_view : VK_NULL_HANDLE; /* Track UAV counters. */ if (range->descriptor_magic == VKD3D_DESCRIPTOR_MAGIC_UAV) @@ -3005,8 +3013,6 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list if (state->uav_counters.bindings[k].register_space == range->register_space && state->uav_counters.bindings[k].register_index == register_idx) { - VkBufferView vk_counter_view = descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_UAV - ? descriptor->u.view_info.view->vk_counter_view : VK_NULL_HANDLE; if (bindings->vk_uav_counter_views[k] != vk_counter_view) bindings->uav_counters_dirty = true; bindings->vk_uav_counter_views[k] = vk_counter_view; @@ -3016,7 +3022,7 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list } /* Not all descriptors are necessarily populated if the range is unbounded. */ - if (descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_FREE) + if (!u.header) continue; if (!vk_write_descriptor_set_from_d3d12_desc(current_descriptor_write, current_image_info, @@ -4956,8 +4962,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); const struct d3d12_rtv_desc *rtv_desc; const struct d3d12_dsv_desc *dsv_desc; + struct vkd3d_view_desc *view; VkFormat prev_dsv_format; - struct vkd3d_view *view; unsigned int i; TRACE("iface %p, render_target_descriptor_count %u, render_target_descriptors %p, " @@ -5003,7 +5009,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi WARN("Failed to add view.\n"); } - list->rtvs[i] = view->u.vk_image_view; + list->rtvs[i] = view->view.u.vk_image_view; list->fb_width = max(list->fb_width, rtv_desc->width); list->fb_height = max(list->fb_height, rtv_desc->height); list->fb_layer_count = max(list->fb_layer_count, rtv_desc->layer_count); @@ -5027,7 +5033,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi list->dsv = VK_NULL_HANDLE; } - list->dsv = view->u.vk_image_view; + list->dsv = view->view.u.vk_image_view; list->fb_width = max(list->fb_width, dsv_desc->width); list->fb_height = max(list->fb_height, dsv_desc->height); list->fb_layer_count = max(list->fb_layer_count, dsv_desc->layer_count); @@ -5049,7 +5055,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi static void d3d12_command_list_clear(struct d3d12_command_list *list, const struct VkAttachmentDescription *attachment_desc, const struct VkAttachmentReference *color_reference, const struct VkAttachmentReference *ds_reference, - struct vkd3d_view *view, size_t width, size_t height, unsigned int layer_count, + struct vkd3d_view_desc *view, size_t width, size_t height, unsigned int layer_count, const union VkClearValue *clear_value, unsigned int rect_count, const D3D12_RECT *rects) { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; @@ -5119,7 +5125,7 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, fb_desc.flags = 0; fb_desc.renderPass = vk_render_pass; fb_desc.attachmentCount = 1; - fb_desc.pAttachments = &view->u.vk_image_view; + fb_desc.pAttachments = &view->view.u.vk_image_view; fb_desc.width = width; fb_desc.height = height; fb_desc.layers = layer_count; @@ -5322,7 +5328,7 @@ static void vkd3d_uav_clear_state_get_image_pipeline(const struct vkd3d_uav_clea } static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, - struct d3d12_resource *resource, struct vkd3d_view *view, const VkClearColorValue *clear_colour, + struct d3d12_resource *resource, struct vkd3d_view_desc *view_desc, const VkClearColorValue *clear_colour, unsigned int rect_count, const D3D12_RECT *rects) { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; @@ -5332,6 +5338,7 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, VkDescriptorImageInfo image_info; D3D12_RECT full_rect, curr_rect; VkWriteDescriptorSet write_set; + const struct vkd3d_view *view; d3d12_command_list_track_resource_usage(list, resource); d3d12_command_list_end_current_render_pass(list); @@ -5340,8 +5347,9 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, d3d12_command_list_invalidate_bindings(list, list->state); d3d12_command_list_invalidate_root_parameters(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); - if (!d3d12_command_allocator_add_view(list->allocator, view)) + if (!d3d12_command_allocator_add_view(list->allocator, view_desc)) WARN("Failed to add view.\n"); + view = &view_desc->view; clear_args.colour = *clear_colour; @@ -5454,18 +5462,21 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID const UINT values[4], UINT rect_count, const D3D12_RECT *rects) { struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct vkd3d_view_desc *descriptor, *uint_view = NULL; struct d3d12_device *device = list->device; - struct vkd3d_view *view, *uint_view = NULL; struct vkd3d_texture_view_desc view_desc; const struct vkd3d_format *uint_format; struct d3d12_resource *resource_impl; + const struct vkd3d_view *view; VkClearColorValue colour; TRACE("iface %p, gpu_handle %#"PRIx64", cpu_handle %lx, resource %p, values %p, rect_count %u, rects %p.\n", iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects); resource_impl = unsafe_impl_from_ID3D12Resource(resource); - view = d3d12_desc_from_cpu_handle(cpu_handle)->u.view_info.view; + if (!(descriptor = d3d12_desc_from_cpu_handle(cpu_handle)->u.view_desc)) + return; + view = &descriptor->view; memcpy(colour.uint32, values, sizeof(colour.uint32)); if (view->format->type != VKD3D_FORMAT_TYPE_UINT) @@ -5479,8 +5490,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID if (d3d12_resource_is_buffer(resource_impl)) { - if (!vkd3d_create_buffer_view(device, resource_impl->u.vk_buffer, uint_format, - view->info.buffer.offset, view->info.buffer.size, &uint_view)) + if (!vkd3d_create_buffer_view_desc(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_buffer, + uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) { ERR("Failed to create buffer view.\n"); return; @@ -5496,19 +5507,19 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID view_desc.layer_idx = view->info.texture.layer_idx; view_desc.layer_count = view->info.texture.layer_count; - if (!vkd3d_create_texture_view(device, resource_impl->u.vk_image, &view_desc, &uint_view)) + if (!vkd3d_create_texture_view_desc(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_image, &view_desc, &uint_view)) { ERR("Failed to create image view.\n"); return; } } - view = uint_view; + descriptor = uint_view; } - d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); + d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); if (uint_view) - vkd3d_view_decref(uint_view, device); + vkd3d_view_desc_decref(uint_view, device); } static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList2 *iface, @@ -5517,14 +5528,15 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I { struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); struct d3d12_resource *resource_impl; + struct vkd3d_view_desc *view; VkClearColorValue colour; - struct vkd3d_view *view; TRACE("iface %p, gpu_handle %#"PRIx64", cpu_handle %lx, resource %p, values %p, rect_count %u, rects %p.\n", iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects); resource_impl = unsafe_impl_from_ID3D12Resource(resource); - view = d3d12_desc_from_cpu_handle(cpu_handle)->u.view_info.view; + if (!(view = d3d12_desc_from_cpu_handle(cpu_handle)->u.view_desc)) + return; memcpy(colour.float32, values, sizeof(colour.float32)); d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index eaedc444..7b56e033 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2575,6 +2575,25 @@ static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); }; +static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache) +{ + vkd3d_mutex_init(&cache->mutex); + cache->cache = NULL; + cache->capacity = 0; + cache->count = 0; + cache->reserve = 0; +} + +static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache) +{ + size_t i; + + vkd3d_mutex_destroy(&cache->mutex); + for (i = 0; i < cache->count; ++i) + vkd3d_free(cache->cache[i]); + vkd3d_free(cache->cache); +} + /* ID3D12Device */ static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) { @@ -2615,7 +2634,6 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) { struct d3d12_device *device = impl_from_ID3D12Device(iface); ULONG refcount = InterlockedDecrement(&device->refcount); - size_t i; TRACE("%p decreasing refcount to %u.\n", device, refcount); @@ -2634,8 +2652,8 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_render_pass_cache_cleanup(&device->render_pass_cache, device); d3d12_device_destroy_pipeline_cache(device); d3d12_device_destroy_vkd3d_queues(device); - for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - vkd3d_mutex_destroy(&device->desc_mutex[i]); + vkd3d_desc_object_cache_cleanup(&device->view_desc_cache); + vkd3d_desc_object_cache_cleanup(&device->cbuffer_desc_cache); VK_CALL(vkDestroyDevice(device->vk_device, NULL)); if (device->parent) IUnknown_Release(device->parent); @@ -3548,32 +3566,20 @@ static void d3d12_desc_buffered_copy_atomic(struct d3d12_desc *dst, const struct { struct d3d12_desc_copy_location *location; enum vkd3d_vk_descriptor_set_index set; - struct vkd3d_mutex *mutex; + union d3d12_desc_object src_u; - mutex = d3d12_device_get_descriptor_mutex(device, src); - vkd3d_mutex_lock(mutex); - - if (src->magic == VKD3D_DESCRIPTOR_MAGIC_FREE) + if (!(src_u.object = d3d12_desc_get_object_ref(src, device))) { - /* Source must be unlocked first, and therefore can't be used as a null source. */ - static const struct d3d12_desc null = {0}; - vkd3d_mutex_unlock(mutex); - d3d12_desc_write_atomic(dst, &null, device); + d3d12_desc_destroy(dst, device); return; } - set = vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(src->vk_descriptor_type); + set = vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(src_u.header->vk_descriptor_type); location = &locations[set][infos[set].count++]; + location->src = src_u; - location->src = *src; - - if (location->src.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - vkd3d_view_incref(location->src.u.view_info.view); - - vkd3d_mutex_unlock(mutex); - - infos[set].uav_counter |= (location->src.magic == VKD3D_DESCRIPTOR_MAGIC_UAV) - && !!location->src.u.view_info.view->vk_counter_view; + infos[set].uav_counter |= (src_u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV) + && !!src_u.view_desc->view.vk_counter_view; location->dst = dst; if (infos[set].count == ARRAY_SIZE(locations[0])) @@ -3633,8 +3639,7 @@ static void d3d12_device_vk_heaps_copy_descriptors(struct d3d12_device *device, * mutex is only intended to prevent use-after-free of the vkd3d_view caused by a * race condition in the calling app. It is unnecessary to protect this test as it's * the app's race condition, not ours. */ - if (dst[dst_idx].magic == src[src_idx].magic && (dst[dst_idx].magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - && dst[dst_idx].u.view_info.written_serial_id == src[src_idx].u.view_info.view->serial_id) + if (dst[dst_idx].u.object == src[src_idx].u.object) continue; d3d12_desc_buffered_copy_atomic(&dst[dst_idx], &src[src_idx], locations, infos, descriptor_heap, device); } @@ -4242,7 +4247,6 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, { const struct vkd3d_vk_device_procs *vk_procs; HRESULT hr; - size_t i; device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; device->refcount = 1; @@ -4285,8 +4289,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, device->blocked_queue_count = 0; - for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - vkd3d_mutex_init(&device->desc_mutex[i]); + vkd3d_desc_object_cache_init(&device->view_desc_cache); + vkd3d_desc_object_cache_init(&device->cbuffer_desc_cache); vkd3d_init_descriptor_pool_sizes(device->vk_pool_sizes, &device->vk_info.descriptor_limits); diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 68c28cd1..71d3c743 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2078,57 +2078,131 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource) return d3d12_resource_decref(impl_from_ID3D12Resource(resource)); } -/* CBVs, SRVs, UAVs */ -static struct vkd3d_view *vkd3d_view_create(enum vkd3d_view_type type) +/* Objects are cached so that vkd3d_desc_incref() can safely check the refcount + * of an object freed by another thread. */ +static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache, size_t size) { - struct vkd3d_view *view; + void *object = NULL; - if ((view = vkd3d_malloc(sizeof(*view)))) + vkd3d_mutex_lock(&cache->mutex); + + if (cache->count) + { + object = cache->cache[--cache->count]; + } + else + { + object = vkd3d_malloc(size); + ++cache->reserve; + } + + vkd3d_mutex_unlock(&cache->mutex); + return object; +} + +static void vkd3d_desc_object_cache_put(struct vkd3d_desc_object_cache *cache, void *object) +{ + vkd3d_mutex_lock(&cache->mutex); + + if (!vkd3d_array_reserve((void **)&cache->cache, &cache->capacity, max(cache->count + 1, cache->reserve), + sizeof(*cache->cache))) + { + ERR("Failed to allocate cache.\n"); + vkd3d_free(object); + return; + } + else { - view->refcount = 1; - view->type = type; - view->serial_id = InterlockedIncrement64(&object_global_serial_id); - view->vk_counter_view = VK_NULL_HANDLE; + cache->cache[cache->count++] = object; } - return view; + + vkd3d_mutex_unlock(&cache->mutex); } -void vkd3d_view_incref(struct vkd3d_view *view) +static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device) { - InterlockedIncrement(&view->refcount); + struct vkd3d_cbuffer_desc *desc; + + if (!(desc = vkd3d_desc_object_cache_get(&device->cbuffer_desc_cache, sizeof(*desc)))) + return NULL; + + desc->h.magic = VKD3D_DESCRIPTOR_MAGIC_CBV; + desc->h.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + desc->h.refcount = 1; + + return desc; } -static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *device) +static struct vkd3d_view_desc *vkd3d_view_desc_create(uint32_t magic, VkDescriptorType vk_descriptor_type, + enum vkd3d_view_type type, struct d3d12_device *device) +{ + struct vkd3d_view_desc *desc; + + if (!(desc = vkd3d_desc_object_cache_get(&device->view_desc_cache, sizeof(*desc)))) + { + ERR("Failed to allocate descriptor object.\n"); + return NULL; + } + + desc->h.magic = magic; + desc->h.vk_descriptor_type = vk_descriptor_type; + desc->h.refcount = 1; + desc->view.type = type; + desc->view.vk_counter_view = VK_NULL_HANDLE; + + return desc; +} + +static void vkd3d_view_desc_destroy(struct vkd3d_view_desc *view, struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; TRACE("Destroying view %p.\n", view); - switch (view->type) + switch (view->view.type) { case VKD3D_VIEW_TYPE_BUFFER: - VK_CALL(vkDestroyBufferView(device->vk_device, view->u.vk_buffer_view, NULL)); + VK_CALL(vkDestroyBufferView(device->vk_device, view->view.u.vk_buffer_view, NULL)); break; case VKD3D_VIEW_TYPE_IMAGE: - VK_CALL(vkDestroyImageView(device->vk_device, view->u.vk_image_view, NULL)); + VK_CALL(vkDestroyImageView(device->vk_device, view->view.u.vk_image_view, NULL)); break; case VKD3D_VIEW_TYPE_SAMPLER: - VK_CALL(vkDestroySampler(device->vk_device, view->u.vk_sampler, NULL)); + VK_CALL(vkDestroySampler(device->vk_device, view->view.u.vk_sampler, NULL)); break; default: - WARN("Unhandled view type %d.\n", view->type); + WARN("Unhandled view type %d.\n", view->view.type); } - if (view->vk_counter_view) - VK_CALL(vkDestroyBufferView(device->vk_device, view->vk_counter_view, NULL)); + if (view->view.vk_counter_view) + VK_CALL(vkDestroyBufferView(device->vk_device, view->view.vk_counter_view, NULL)); - vkd3d_free(view); + vkd3d_desc_object_cache_put(&device->view_desc_cache, view); } -void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) +void vkd3d_view_desc_decref(struct vkd3d_view_desc *view, struct d3d12_device *device) { - if (!InterlockedDecrement(&view->refcount)) - vkd3d_view_destroy(view, device); + if (!InterlockedDecrement(&view->h.refcount)) + vkd3d_view_desc_destroy(view, device); +} + +void vkd3d_desc_decref(void *desc, struct d3d12_device *device) +{ + union d3d12_desc_object u = {desc}; + + if (InterlockedDecrement(&u.header->refcount)) + return; + + if (u.header->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + vkd3d_view_desc_destroy(u.view_desc, device); + else + vkd3d_desc_object_cache_put(&device->cbuffer_desc_cache, u.object); +} + +void d3d12_desc_replace(struct d3d12_desc *dst, void *desc, struct d3d12_device *device) +{ + if ((desc = InterlockedExchangePointer(&dst->u.object, desc))) + vkd3d_desc_decref(desc, device); } /* TODO: write null descriptors to all applicable sets (invalid behaviour workaround). */ @@ -2137,14 +2211,14 @@ static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descrip { unsigned int i, info_index = 0, write_index = 0; - switch (locations[0].src.vk_descriptor_type) + switch (locations[0].src.header->vk_descriptor_type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: for (; write_index < write_count; ++write_index) { descriptor_set->vk_descriptor_writes[write_index].pBufferInfo = &descriptor_set->vk_buffer_infos[info_index]; for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_buffer_infos[info_index] = locations[info_index].src.u.vk_cbv_info; + descriptor_set->vk_buffer_infos[info_index] = locations[info_index].src.cb_desc->vk_cbv_info; } break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: @@ -2153,7 +2227,7 @@ static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descrip { descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_image_infos[info_index].imageView = locations[info_index].src.u.view_info.view->u.vk_image_view; + descriptor_set->vk_image_infos[info_index].imageView = locations[info_index].src.view_desc->view.u.vk_image_view; } break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: @@ -2162,7 +2236,7 @@ static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descrip { descriptor_set->vk_descriptor_writes[write_index].pTexelBufferView = &descriptor_set->vk_buffer_views[info_index]; for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_buffer_views[info_index] = locations[info_index].src.u.view_info.view->u.vk_buffer_view; + descriptor_set->vk_buffer_views[info_index] = locations[info_index].src.view_desc->view.u.vk_buffer_view; } break; case VK_DESCRIPTOR_TYPE_SAMPLER: @@ -2170,11 +2244,11 @@ static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descrip { descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_image_infos[info_index].sampler = locations[info_index].src.u.view_info.view->u.vk_sampler; + descriptor_set->vk_image_infos[info_index].sampler = locations[info_index].src.view_desc->view.u.vk_sampler; } break; default: - ERR("Unhandled descriptor type %#x.\n", locations[0].src.vk_descriptor_type); + ERR("Unhandled descriptor type %#x.\n", locations[0].src.header->vk_descriptor_type); break; } } @@ -2220,46 +2294,44 @@ static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_hea } } -/* dst and src contain the same data unless another thread overwrites dst. The array index is - * calculated from dst, and src is thread safe. */ -static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_device *device) +void d3d12_desc_write_vk_heap(const struct d3d12_desc *src, void *object, struct d3d12_device *device) { struct d3d12_descriptor_heap_vk_set *descriptor_set; struct d3d12_descriptor_heap *descriptor_heap; const struct vkd3d_vk_device_procs *vk_procs; + union d3d12_desc_object u = {object}; bool is_null = false; - descriptor_heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&device->gpu_descriptor_allocator, dst); + descriptor_heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&device->gpu_descriptor_allocator, src); descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( - src->vk_descriptor_type)]; + u.header->vk_descriptor_type)]; vk_procs = &device->vk_procs; vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); - descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst + descriptor_set->vk_descriptor_writes[0].dstArrayElement = src - (const struct d3d12_desc *)descriptor_heap->descriptors; descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; - switch (src->vk_descriptor_type) + switch (u.header->vk_descriptor_type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - descriptor_set->vk_descriptor_writes[0].pBufferInfo = &src->u.vk_cbv_info; - is_null = !src->u.vk_cbv_info.buffer; + descriptor_set->vk_descriptor_writes[0].pBufferInfo = &u.cb_desc->vk_cbv_info; + is_null = !u.cb_desc->vk_cbv_info.buffer; break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - is_null = !(descriptor_set->vk_image_infos[0].imageView = src->u.view_info.view->u.vk_image_view); + is_null = !(descriptor_set->vk_image_infos[0].imageView = u.view_desc->view.u.vk_image_view); break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->u.view_info.view->u.vk_buffer_view; - is_null = !src->u.view_info.view->u.vk_buffer_view; + descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &u.view_desc->view.u.vk_buffer_view; + is_null = !u.view_desc->view.u.vk_buffer_view; break; case VK_DESCRIPTOR_TYPE_SAMPLER: - descriptor_set->vk_image_infos[0].sampler = src->u.view_info.view->u.vk_sampler; + descriptor_set->vk_image_infos[0].sampler = u.view_desc->view.u.vk_sampler; break; default: - ERR("Unhandled descriptor type %#x.\n", src->vk_descriptor_type); + ERR("Unhandled descriptor type %#x.\n", u.header->vk_descriptor_type); break; } if (is_null && device->vk_info.EXT_robustness2) @@ -2272,75 +2344,19 @@ static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); - if (src->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->u.view_info.view->vk_counter_view) + if (u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && u.view_desc->view.vk_counter_view) { descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; - descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst + descriptor_set->vk_descriptor_writes[0].dstArrayElement = src - (const struct d3d12_desc *)descriptor_heap->descriptors; descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; - descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->u.view_info.view->vk_counter_view; + descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &u.view_desc->view.vk_counter_view; VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); } vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); } -static void d3d12_desc_write_atomic_d3d12_only(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) -{ - struct vkd3d_view *defunct_view; - struct vkd3d_mutex *mutex; - - mutex = d3d12_device_get_descriptor_mutex(device, dst); - vkd3d_mutex_lock(mutex); - - if (!(dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) || InterlockedDecrement(&dst->u.view_info.view->refcount)) - { - *dst = *src; - vkd3d_mutex_unlock(mutex); - return; - } - - defunct_view = dst->u.view_info.view; - *dst = *src; - vkd3d_mutex_unlock(mutex); - - /* Destroy the view after unlocking to reduce wait time. */ - vkd3d_view_destroy(defunct_view, device); -} - -void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_device *device) -{ - struct vkd3d_view *defunct_view = NULL; - struct vkd3d_mutex *mutex; - - mutex = d3d12_device_get_descriptor_mutex(device, dst); - vkd3d_mutex_lock(mutex); - - /* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ - if ((dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - && !InterlockedDecrement(&dst->u.view_info.view->refcount)) - defunct_view = dst->u.view_info.view; - - *dst = *src; - - vkd3d_mutex_unlock(mutex); - - /* Destroy the view after unlocking to reduce wait time. */ - if (defunct_view) - vkd3d_view_destroy(defunct_view, device); - - if (device->use_vk_heaps && dst->magic) - d3d12_desc_write_vk_heap(dst, src, device); -} - -static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) -{ - static const struct d3d12_desc null_desc = {0}; - - d3d12_desc_write_atomic(descriptor, &null_desc, device); -} - void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, struct d3d12_device *device) @@ -2353,7 +2369,7 @@ void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, c for (i = 0, write_count = 0; i < info->count; ++i) { - d3d12_desc_write_atomic_d3d12_only(locations[i].dst, &locations[i].src, device); + d3d12_desc_replace(locations[i].dst, locations[i].src.object, device); if (i && locations[i].dst == locations[i - 1].dst + 1) { @@ -2377,9 +2393,11 @@ void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, c for (i = 0, write_count = 0; i < info->count; ++i) { - if (!locations[i].src.u.view_info.view->vk_counter_view) + VkBufferView vk_counter_view; + + if (!(vk_counter_view = locations[i].src.view_desc->view.vk_counter_view)) continue; - descriptor_set->vk_buffer_views[write_count] = locations[i].src.u.view_info.view->vk_counter_view; + descriptor_set->vk_buffer_views[write_count] = vk_counter_view; descriptor_set->vk_descriptor_writes[write_count].pTexelBufferView = &descriptor_set->vk_buffer_views[write_count]; descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst - (const struct d3d12_desc *)descriptor_heap->descriptors; @@ -2395,22 +2413,14 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { struct d3d12_desc tmp; - struct vkd3d_mutex *mutex; assert(dst != src); - /* Shadow of the Tomb Raider and possibly other titles sometimes destroy - * and rewrite a descriptor in another thread while it is being copied. */ - mutex = d3d12_device_get_descriptor_mutex(device, src); - vkd3d_mutex_lock(mutex); - - if (src->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - vkd3d_view_incref(src->u.view_info.view); - - tmp = *src; - - vkd3d_mutex_unlock(mutex); - + /* Shadow of the Tomb Raider and possibly other titles sometimes copy a + * descriptor to the same location concurrently in multiple threads. It + * doesn't seem to copy from a location while destroying it in another + * thread, but we handle this too. */ + tmp.u.object = d3d12_desc_get_object_ref(src, device); d3d12_desc_write_atomic(dst, &tmp, device); } @@ -2472,36 +2482,39 @@ static bool vkd3d_create_vk_buffer_view(struct d3d12_device *device, return vr == VK_SUCCESS; } -bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, - VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view) +bool vkd3d_create_buffer_view_desc(struct d3d12_device *device, uint32_t magic, VkBuffer vk_buffer, + const struct vkd3d_format *format, VkDeviceSize offset, VkDeviceSize size, + struct vkd3d_view_desc **view_desc) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkBufferView vk_view = VK_NULL_HANDLE; - struct vkd3d_view *object; + struct vkd3d_view_desc *object; if (vk_buffer && !vkd3d_create_vk_buffer_view(device, vk_buffer, format, offset, size, &vk_view)) return false; - if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_BUFFER))) + if (!(object = vkd3d_view_desc_create(magic, magic == VKD3D_DESCRIPTOR_MAGIC_UAV + ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + VKD3D_VIEW_TYPE_BUFFER, device))) { VK_CALL(vkDestroyBufferView(device->vk_device, vk_view, NULL)); return false; } - object->u.vk_buffer_view = vk_view; - object->format = format; - object->info.buffer.offset = offset; - object->info.buffer.size = size; - *view = object; + object->view.u.vk_buffer_view = vk_view; + object->view.format = format; + object->view.info.buffer.offset = offset; + object->view.info.buffer.size = size; + *view_desc = object; return true; } #define VKD3D_VIEW_RAW_BUFFER 0x1 static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, - struct d3d12_resource *resource, DXGI_FORMAT view_format, + uint32_t magic, struct d3d12_resource *resource, DXGI_FORMAT view_format, unsigned int offset, unsigned int size, unsigned int structure_stride, - unsigned int flags, struct vkd3d_view **view) + unsigned int flags, struct vkd3d_view_desc **view_desc) { const struct vkd3d_format *format; VkDeviceSize element_size; @@ -2530,8 +2543,8 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, assert(d3d12_resource_is_buffer(resource)); - return vkd3d_create_buffer_view(device, resource->u.vk_buffer, - format, offset * element_size, size * element_size, view); + return vkd3d_create_buffer_view_desc(device, magic, resource->u.vk_buffer, + format, offset * element_size, size * element_size, view_desc); } static void vkd3d_set_view_swizzle_for_format(VkComponentMapping *components, @@ -2758,14 +2771,14 @@ static void vkd3d_texture_view_desc_normalise(struct vkd3d_texture_view_desc *de desc->layer_count = max_layer_count; } -bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, - const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view) +bool vkd3d_create_texture_view_desc(struct d3d12_device *device, uint32_t magic, VkImage vk_image, + const struct vkd3d_texture_view_desc *desc, struct vkd3d_view_desc **view) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; const struct vkd3d_format *format = desc->format; struct VkImageViewCreateInfo view_desc; VkImageView vk_view = VK_NULL_HANDLE; - struct vkd3d_view *object; + struct vkd3d_view_desc *object; VkResult vr; if (vk_image) @@ -2791,18 +2804,19 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, } } - if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_IMAGE))) + if (!(object = vkd3d_view_desc_create(magic, magic == VKD3D_DESCRIPTOR_MAGIC_UAV ? VK_DESCRIPTOR_TYPE_STORAGE_IMAGE + : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VKD3D_VIEW_TYPE_IMAGE, device))) { VK_CALL(vkDestroyImageView(device->vk_device, vk_view, NULL)); return false; } - object->u.vk_image_view = vk_view; - object->format = format; - object->info.texture.vk_view_type = desc->view_type; - object->info.texture.miplevel_idx = desc->miplevel_idx; - object->info.texture.layer_idx = desc->layer_idx; - object->info.texture.layer_count = desc->layer_count; + object->view.u.vk_image_view = vk_view; + object->view.format = format; + object->view.info.texture.vk_view_type = desc->view_type; + object->view.info.texture.miplevel_idx = desc->miplevel_idx; + object->view.info.texture.layer_idx = desc->layer_idx; + object->view.info.texture.layer_count = desc->layer_count; *view = object; return true; } @@ -2811,6 +2825,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc) { struct VkDescriptorBufferInfo *buffer_info; + struct vkd3d_cbuffer_desc *cb_desc; struct d3d12_resource *resource; if (!desc) @@ -2819,13 +2834,19 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, return; } + if (!(cb_desc = vkd3d_cbuffer_desc_create(device))) + { + ERR("Failed to allocate descriptor object.\n"); + return; + } + if (desc->SizeInBytes & (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1)) { WARN("Size is not %u bytes aligned.\n", D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); return; } - buffer_info = &descriptor->u.vk_cbv_info; + buffer_info = &cb_desc->vk_cbv_info; if (desc->BufferLocation) { resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, desc->BufferLocation); @@ -2841,8 +2862,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, buffer_info->range = VK_WHOLE_SIZE; } - descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_CBV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor->u.cb_desc = cb_desc; } static unsigned int vkd3d_view_flags_from_d3d12_buffer_srv_flags(D3D12_BUFFER_SRV_FLAGS flags) @@ -2859,7 +2879,6 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, { struct vkd3d_null_resources *null_resources = &device->null_resources; struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; VkImage vk_image; if (!desc) @@ -2874,15 +2893,9 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, if (!device->vk_info.EXT_robustness2) WARN("Creating NULL buffer SRV %#x.\n", desc->Format); - if (vkd3d_create_buffer_view(device, null_resources->vk_buffer, + vkd3d_create_buffer_view_desc(device, VKD3D_DESCRIPTOR_MAGIC_SRV, null_resources->vk_buffer, vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false), - 0, VKD3D_NULL_BUFFER_SIZE, &view)) - { - descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - descriptor->u.view_info.view = view; - descriptor->u.view_info.written_serial_id = view->serial_id; - } + 0, VKD3D_NULL_BUFFER_SIZE, &descriptor->u.view_desc); return; case D3D12_SRV_DIMENSION_TEXTURE2D: @@ -2920,20 +2933,13 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_ZERO; vkd3d_desc.allowed_swizzle = true; - if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) - return; - - descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - descriptor->u.view_info.view = view; - descriptor->u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view_desc(device, VKD3D_DESCRIPTOR_MAGIC_SRV, vk_image, &vkd3d_desc, &descriptor->u.view_desc); } static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, struct d3d12_device *device, struct d3d12_resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) { - struct vkd3d_view *view; unsigned int flags; if (!desc) @@ -2949,15 +2955,9 @@ static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, } flags = vkd3d_view_flags_from_d3d12_buffer_srv_flags(desc->u.Buffer.Flags); - if (!vkd3d_create_buffer_view_for_resource(device, resource, desc->Format, + vkd3d_create_buffer_view_for_resource(device, VKD3D_DESCRIPTOR_MAGIC_SRV, resource, desc->Format, desc->u.Buffer.FirstElement, desc->u.Buffer.NumElements, - desc->u.Buffer.StructureByteStride, flags, &view)) - return; - - descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - descriptor->u.view_info.view = view; - descriptor->u.view_info.written_serial_id = view->serial_id; + desc->u.Buffer.StructureByteStride, flags, &descriptor->u.view_desc); } static VkImageAspectFlags vk_image_aspect_flags_from_d3d12_plane_slice(const struct vkd3d_format *format, @@ -2986,7 +2986,6 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; if (!resource) { @@ -3082,13 +3081,8 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, } } - if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) - return; - - descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - descriptor->u.view_info.view = view; - descriptor->u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view_desc(device, VKD3D_DESCRIPTOR_MAGIC_SRV, resource->u.vk_image, &vkd3d_desc, + &descriptor->u.view_desc); } static unsigned int vkd3d_view_flags_from_d3d12_buffer_uav_flags(D3D12_BUFFER_UAV_FLAGS flags) @@ -3105,7 +3099,6 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, { struct vkd3d_null_resources *null_resources = &device->null_resources; struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; VkImage vk_image; if (!desc) @@ -3120,15 +3113,9 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, if (!device->vk_info.EXT_robustness2) WARN("Creating NULL buffer UAV %#x.\n", desc->Format); - if (vkd3d_create_buffer_view(device, null_resources->vk_storage_buffer, + vkd3d_create_buffer_view_desc(device, VKD3D_DESCRIPTOR_MAGIC_UAV, null_resources->vk_storage_buffer, vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false), - 0, VKD3D_NULL_BUFFER_SIZE, &view)) - { - descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - descriptor->u.view_info.view = view; - descriptor->u.view_info.written_serial_id = view->serial_id; - } + 0, VKD3D_NULL_BUFFER_SIZE, &descriptor->u.view_desc); return; case D3D12_UAV_DIMENSION_TEXTURE2D: @@ -3166,20 +3153,14 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_A; vkd3d_desc.allowed_swizzle = false; - if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) - return; - - descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - descriptor->u.view_info.view = view; - descriptor->u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view_desc(device, VKD3D_DESCRIPTOR_MAGIC_UAV, vk_image, &vkd3d_desc, &descriptor->u.view_desc); } static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, struct d3d12_resource *resource, struct d3d12_resource *counter_resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) { - struct vkd3d_view *view; + struct vkd3d_view_desc *view_desc; unsigned int flags; if (!desc) @@ -3195,16 +3176,11 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ } flags = vkd3d_view_flags_from_d3d12_buffer_uav_flags(desc->u.Buffer.Flags); - if (!vkd3d_create_buffer_view_for_resource(device, resource, desc->Format, + if (!vkd3d_create_buffer_view_for_resource(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource, desc->Format, desc->u.Buffer.FirstElement, desc->u.Buffer.NumElements, - desc->u.Buffer.StructureByteStride, flags, &view)) + desc->u.Buffer.StructureByteStride, flags, &view_desc)) return; - descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - descriptor->u.view_info.view = view; - descriptor->u.view_info.written_serial_id = view->serial_id; - if (counter_resource) { const struct vkd3d_format *format; @@ -3214,13 +3190,16 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format, - desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->vk_counter_view)) + desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view_desc->view.vk_counter_view)) { WARN("Failed to create counter buffer view.\n"); - view->vk_counter_view = VK_NULL_HANDLE; - d3d12_desc_destroy(descriptor, device); + view_desc->view.vk_counter_view = VK_NULL_HANDLE; + vkd3d_view_desc_decref(view_desc, device); + return; } } + + descriptor->u.view_desc = view_desc; } static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, @@ -3228,7 +3207,6 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; if (!init_default_texture_view_desc(&vkd3d_desc, resource, desc ? desc->Format : 0)) return; @@ -3273,13 +3251,8 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, } } - if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) - return; - - descriptor->magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - descriptor->u.view_info.view = view; - descriptor->u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view_desc(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource->u.vk_image, &vkd3d_desc, + &descriptor->u.view_desc); } void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, @@ -3414,7 +3387,7 @@ static VkResult d3d12_create_sampler(struct d3d12_device *device, D3D12_FILTER f void d3d12_desc_create_sampler(struct d3d12_desc *sampler, struct d3d12_device *device, const D3D12_SAMPLER_DESC *desc) { - struct vkd3d_view *view; + struct vkd3d_view_desc *view_desc; if (!desc) { @@ -3428,21 +3401,19 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, FIXME("Ignoring border color {%.8e, %.8e, %.8e, %.8e}.\n", desc->BorderColor[0], desc->BorderColor[1], desc->BorderColor[2], desc->BorderColor[3]); - if (!(view = vkd3d_view_create(VKD3D_VIEW_TYPE_SAMPLER))) + if (!(view_desc = vkd3d_view_desc_create(VKD3D_DESCRIPTOR_MAGIC_SAMPLER, VK_DESCRIPTOR_TYPE_SAMPLER, + VKD3D_VIEW_TYPE_SAMPLER, device))) return; if (d3d12_create_sampler(device, desc->Filter, desc->AddressU, desc->AddressV, desc->AddressW, desc->MipLODBias, desc->MaxAnisotropy, - desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, &view->u.vk_sampler) < 0) + desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, &view_desc->view.u.vk_sampler) < 0) { - vkd3d_free(view); + vkd3d_free(view_desc); return; } - sampler->magic = VKD3D_DESCRIPTOR_MAGIC_SAMPLER; - sampler->vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLER; - sampler->u.view_info.view = view; - sampler->u.view_info.written_serial_id = view->serial_id; + sampler->u.view_desc = view_desc; } HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, @@ -3467,7 +3438,7 @@ static void d3d12_rtv_desc_destroy(struct d3d12_rtv_desc *rtv, struct d3d12_devi if (rtv->magic != VKD3D_DESCRIPTOR_MAGIC_RTV) return; - vkd3d_view_decref(rtv->view, device); + vkd3d_view_desc_decref(rtv->view, device); memset(rtv, 0, sizeof(*rtv)); } @@ -3475,7 +3446,7 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev struct d3d12_resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; + struct vkd3d_view_desc *view; d3d12_rtv_desc_destroy(rtv_desc, device); @@ -3543,7 +3514,7 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev assert(d3d12_resource_is_texture(resource)); - if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) + if (!vkd3d_create_texture_view_desc(device, VKD3D_DESCRIPTOR_MAGIC_RTV, resource->u.vk_image, &vkd3d_desc, &view)) return; rtv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_RTV; @@ -3562,7 +3533,7 @@ static void d3d12_dsv_desc_destroy(struct d3d12_dsv_desc *dsv, struct d3d12_devi if (dsv->magic != VKD3D_DESCRIPTOR_MAGIC_DSV) return; - vkd3d_view_decref(dsv->view, device); + vkd3d_view_desc_decref(dsv->view, device); memset(dsv, 0, sizeof(*dsv)); } @@ -3570,7 +3541,7 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; + struct vkd3d_view_desc *view; d3d12_dsv_desc_destroy(dsv_desc, device); @@ -3628,7 +3599,7 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev assert(d3d12_resource_is_texture(resource)); - if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) + if (!vkd3d_create_texture_view_desc(device, VKD3D_DESCRIPTOR_MAGIC_DSV, resource->u.vk_image, &vkd3d_desc, &view)) return; dsv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_DSV; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index f00181a2..4167c81a 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -660,9 +660,7 @@ enum vkd3d_view_type struct vkd3d_view { - LONG refcount; enum vkd3d_view_type type; - uint64_t serial_id; union { VkBufferView vk_buffer_view; @@ -688,9 +686,6 @@ struct vkd3d_view } info; }; -void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device); -void vkd3d_view_incref(struct vkd3d_view *view); - struct vkd3d_texture_view_desc { VkImageViewType view_type; @@ -704,28 +699,88 @@ struct vkd3d_texture_view_desc bool allowed_swizzle; }; -bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, - VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view); -bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, - const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view); +struct vkd3d_desc_header +{ + uint32_t magic; + LONG refcount; + VkDescriptorType vk_descriptor_type; +}; + +struct vkd3d_view_desc +{ + struct vkd3d_desc_header h; + struct vkd3d_view view; +}; -struct vkd3d_view_info +struct vkd3d_cbuffer_desc { - uint64_t written_serial_id; - struct vkd3d_view *view; + struct vkd3d_desc_header h; + VkDescriptorBufferInfo vk_cbv_info; }; struct d3d12_desc { - uint32_t magic; - VkDescriptorType vk_descriptor_type; - union + union d3d12_desc_object { - VkDescriptorBufferInfo vk_cbv_info; - struct vkd3d_view_info view_info; + struct vkd3d_desc_header *header; + struct vkd3d_view_desc *view_desc; + struct vkd3d_cbuffer_desc *cb_desc; + void *object; } u; }; +bool vkd3d_create_buffer_view_desc(struct d3d12_device *device, uint32_t magic, VkBuffer vk_buffer, + const struct vkd3d_format *format, VkDeviceSize offset, VkDeviceSize size, + struct vkd3d_view_desc **view_desc); +bool vkd3d_create_texture_view_desc(struct d3d12_device *device, uint32_t magic, VkImage vk_image, + const struct vkd3d_texture_view_desc *desc, struct vkd3d_view_desc **view); + +void vkd3d_view_desc_decref(struct vkd3d_view_desc *view_desc, struct d3d12_device *device); +void vkd3d_desc_decref(void *desc, struct d3d12_device *device); +void d3d12_desc_replace(struct d3d12_desc *dst, void *desc, struct d3d12_device *device); + +static inline bool vkd3d_desc_incref(void *desc) +{ + struct vkd3d_desc_header *h = desc; + LONG refcount; + + do + { + refcount = h->refcount; + /* Avoid incrementing a freed object. Reading the value is safe because objects are recycled. */ + if (refcount <= 0) + return false; + } + while (InterlockedCompareExchange(&h->refcount, refcount + 1, refcount) != refcount); + + return true; +} + +static inline void *d3d12_desc_get_object_ref(const volatile struct d3d12_desc *src, struct d3d12_device *device) +{ + void *desc; + + for (;;) + { + do + { + desc = src->u.object; + } while (desc && !vkd3d_desc_incref(desc)); + + /* Check if the object is still in src to handle the case where it was + * already freed and reused elsewhere when the refcount was incremented. */ + if (desc == src->u.object) + return desc; + + vkd3d_desc_decref(desc, device); + } +} + +static inline void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) +{ + d3d12_desc_replace(descriptor, NULL, device); +} + static inline struct d3d12_desc *d3d12_desc_from_cpu_handle(D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle) { return (struct d3d12_desc *)cpu_handle.ptr; @@ -746,7 +801,7 @@ void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *d struct d3d12_resource *resource, struct d3d12_resource *counter_resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc); void d3d12_desc_create_sampler(struct d3d12_desc *sampler, struct d3d12_device *device, const D3D12_SAMPLER_DESC *desc); -void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); +void d3d12_desc_write_vk_heap(const struct d3d12_desc *src, void *object, struct d3d12_device *device); bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view); @@ -761,7 +816,7 @@ struct d3d12_rtv_desc uint64_t width; unsigned int height; unsigned int layer_count; - struct vkd3d_view *view; + struct vkd3d_view_desc *view; struct d3d12_resource *resource; }; @@ -781,7 +836,7 @@ struct d3d12_dsv_desc uint64_t width; unsigned int height; unsigned int layer_count; - struct vkd3d_view *view; + struct vkd3d_view_desc *view; struct d3d12_resource *resource; }; @@ -861,7 +916,7 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, struct d3d12_desc_copy_location { - struct d3d12_desc src; + union d3d12_desc_object src; struct d3d12_desc *dst; }; @@ -1156,7 +1211,7 @@ struct d3d12_command_allocator size_t descriptor_pools_size; size_t descriptor_pool_count; - struct vkd3d_view **views; + struct vkd3d_view_desc **views; size_t views_size; size_t view_count; @@ -1450,6 +1505,15 @@ struct vkd3d_uav_clear_state HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); +struct vkd3d_desc_object_cache +{ + struct vkd3d_mutex mutex; + void **cache; + size_t capacity; + size_t count; + size_t reserve; +}; + #define VKD3D_DESCRIPTOR_POOL_COUNT 6 /* ID3D12Device */ @@ -1468,7 +1532,8 @@ struct d3d12_device struct vkd3d_gpu_va_allocator gpu_va_allocator; struct vkd3d_mutex mutex; - struct vkd3d_mutex desc_mutex[8]; + struct vkd3d_desc_object_cache view_desc_cache; + struct vkd3d_desc_object_cache cbuffer_desc_cache; struct vkd3d_render_pass_cache render_pass_cache; VkPipelineCache vk_pipeline_cache; @@ -1542,17 +1607,16 @@ static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(str return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); } -static inline struct vkd3d_mutex *d3d12_device_get_descriptor_mutex(struct d3d12_device *device, - const struct d3d12_desc *descriptor) +static inline void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) { - STATIC_ASSERT(!(ARRAY_SIZE(device->desc_mutex) & (ARRAY_SIZE(device->desc_mutex) - 1))); - uintptr_t idx = (uintptr_t)descriptor; - - idx ^= idx >> 12; - idx ^= idx >> 6; - idx ^= idx >> 3; + void *object = src->u.object; - return &device->desc_mutex[idx & (ARRAY_SIZE(device->desc_mutex) - 1)]; + /* Write the Vulkan descriptor first. Otherwise, while it's unlikely the dst + * and src would be destroyed before the write occurs, it is possible. */ + if (device->use_vk_heaps && object) + d3d12_desc_write_vk_heap(dst, object, device); + d3d12_desc_replace(dst, object, device); } /* utils */ -- 2.36.1