From: Zebediah Figura Subject: [PATCH 2/3] wined3d: Prefer mapping a BO if the buffer has WINED3D_BUFFER_USE_BO set. Message-Id: <20220128022251.3240338-2-zfigura@codeweavers.com> Date: Thu, 27 Jan 2022 20:22:50 -0600 In-Reply-To: <20220128022251.3240338-1-zfigura@codeweavers.com> References: <20220128022251.3240338-1-zfigura@codeweavers.com> Instead of checking whether a BO already exists. This will end up allocating a BO earlier in some cases. This is not particularly impactful by itself, since we already would have sysmem available and thus could use it without a performance penalty. However, we would like to avoid ever allocating sysmem where not necessary, in particular by deferring allocation of any location at all until the resource is written to. This also has the side effect of fixing test_map_synchronization() on 64-bit architectures, broken since 194b47b4fd92dda8ebf24e88ca7a14fc926c84ab. The test creates a buffer, maps it once, then maps it again with NOOVERWRITE while the GPU is still drawing, expecting the new data to be read by the GPU during the draw. On 32-bit machines, and 64-bit machines before the offending commit, we do the following: First map: uses SYSMEM since the BO is not created yet Draw: upload to VBO Second map: map the existing VBO with GL_MAP_UNSYNCHRONIZED_BIT After 194b47b4fd9, we don't use GL_MAP_UNSYNCHRONIZED_BIT since the buffer has READ access, which means that the second map will be synchronized and wait for the draw to complete. After this patch, we do the following: First map: create and map a VBO (not unsynchronized, but coherent and persistently mapped) Draw: use mapped VBO Second map: write to existing (coherent) VBO, which is unsynchronized Signed-off-by: Zebediah Figura --- This diff is distinctly more readable with --ignore-all-space. dlls/wined3d/buffer.c | 139 ++++++++++++++++++++++-------------------- 1 file changed, 72 insertions(+), 67 deletions(-) diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c index 3d66af3e5e6..abbef0fac41 100644 --- a/dlls/wined3d/buffer.c +++ b/dlls/wined3d/buffer.c @@ -911,105 +911,110 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc void **map_ptr, const struct wined3d_box *box, uint32_t flags) { struct wined3d_buffer *buffer = buffer_from_resource(resource); + unsigned int offset, size, dirty_offset, dirty_size; struct wined3d_device *device = resource->device; struct wined3d_context *context; - unsigned int offset, size; + struct wined3d_bo_address addr; uint8_t *base; LONG count; TRACE("resource %p, sub_resource_idx %u, map_ptr %p, box %s, flags %#x.\n", resource, sub_resource_idx, map_ptr, debug_box(box), flags); - offset = box->left; - size = box->right - box->left; + dirty_offset = offset = box->left; + dirty_size = size = box->right - box->left; count = ++resource->map_count; - if (buffer->buffer_object) + /* DISCARD invalidates the entire buffer, regardless of the specified + * offset and size. Some applications also depend on the entire buffer + * being uploaded in that case. Two such applications are Port Royale + * and Darkstar One. */ + if (flags & WINED3D_MAP_DISCARD) { - unsigned int dirty_offset = offset, dirty_size = size; - struct wined3d_bo_address addr; + dirty_offset = 0; + dirty_size = 0; + } - /* DISCARD invalidates the entire buffer, regardless of the specified - * offset and size. Some applications also depend on the entire buffer - * being uploaded in that case. Two such applications are Port Royale - * and Darkstar One. */ - if (flags & WINED3D_MAP_DISCARD) + if (((flags & WINED3D_MAP_WRITE) && !(flags & (WINED3D_MAP_NOOVERWRITE | WINED3D_MAP_DISCARD))) + || (!(flags & WINED3D_MAP_WRITE) && (buffer->locations & WINED3D_LOCATION_SYSMEM)) + || buffer->flags & WINED3D_BUFFER_PIN_SYSMEM + || !(buffer->flags & WINED3D_BUFFER_USE_BO)) + { + if (!(buffer->locations & WINED3D_LOCATION_SYSMEM)) { - dirty_offset = 0; - dirty_size = 0; + context = context_acquire(device, NULL, 0); + wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_SYSMEM); + context_release(context); } - if (((flags & WINED3D_MAP_WRITE) && !(flags & (WINED3D_MAP_NOOVERWRITE | WINED3D_MAP_DISCARD))) - || (!(flags & WINED3D_MAP_WRITE) && (buffer->locations & WINED3D_LOCATION_SYSMEM)) - || buffer->flags & WINED3D_BUFFER_PIN_SYSMEM) + if (flags & WINED3D_MAP_WRITE) + wined3d_buffer_invalidate_range(buffer, WINED3D_LOCATION_BUFFER, dirty_offset, dirty_size); + } + else + { + context = context_acquire(device, NULL, 0); + + if (flags & WINED3D_MAP_DISCARD) { - if (!(buffer->locations & WINED3D_LOCATION_SYSMEM)) + if (!wined3d_buffer_prepare_location(buffer, context, WINED3D_LOCATION_BUFFER)) { - context = context_acquire(device, NULL, 0); - wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_SYSMEM); context_release(context); + return E_OUTOFMEMORY; } - - if (flags & WINED3D_MAP_WRITE) - wined3d_buffer_invalidate_range(buffer, WINED3D_LOCATION_BUFFER, dirty_offset, dirty_size); + wined3d_buffer_validate_location(buffer, WINED3D_LOCATION_BUFFER); } else { - context = context_acquire(device, NULL, 0); + wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_BUFFER); + } - if (flags & WINED3D_MAP_DISCARD) - wined3d_buffer_validate_location(buffer, WINED3D_LOCATION_BUFFER); - else - wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_BUFFER); + if (flags & WINED3D_MAP_WRITE) + { + wined3d_buffer_invalidate_location(buffer, WINED3D_LOCATION_SYSMEM); + buffer_invalidate_bo_range(buffer, dirty_offset, dirty_size); + } - if (flags & WINED3D_MAP_WRITE) + if ((flags & WINED3D_MAP_DISCARD) && resource->heap_memory) + wined3d_buffer_evict_sysmem(buffer); + + if (count == 1) + { + addr.buffer_object = buffer->buffer_object; + addr.addr = 0; + buffer->map_ptr = wined3d_context_map_bo_address(context, &addr, resource->size, flags); + /* We are accessing buffer->resource.client from the CS thread, + * but it's safe because the client thread will wait for the + * map to return, thus completely serializing this call with + * other client code. */ + buffer->resource.client.addr = addr; + + if (((DWORD_PTR)buffer->map_ptr) & (RESOURCE_ALIGNMENT - 1)) { - wined3d_buffer_invalidate_location(buffer, WINED3D_LOCATION_SYSMEM); - buffer_invalidate_bo_range(buffer, dirty_offset, dirty_size); - } + WARN("Pointer %p is not %u byte aligned.\n", buffer->map_ptr, RESOURCE_ALIGNMENT); - if ((flags & WINED3D_MAP_DISCARD) && resource->heap_memory) - wined3d_buffer_evict_sysmem(buffer); + wined3d_context_unmap_bo_address(context, &addr, 0, NULL); + buffer->map_ptr = NULL; - if (count == 1) - { - addr.buffer_object = buffer->buffer_object; - addr.addr = 0; - buffer->map_ptr = wined3d_context_map_bo_address(context, &addr, resource->size, flags); - /* We are accessing buffer->resource.client from the CS thread, - * but it's safe because the client thread will wait for the - * map to return, thus completely serializing this call with - * other client code. */ - buffer->resource.client.addr = addr; - - if (((DWORD_PTR)buffer->map_ptr) & (RESOURCE_ALIGNMENT - 1)) + if (resource->usage & WINED3DUSAGE_DYNAMIC) + { + /* The extra copy is more expensive than not using VBOs + * at all on the NVIDIA Linux driver, which is the + * only driver that returns unaligned pointers. */ + TRACE("Dynamic buffer, dropping VBO.\n"); + wined3d_buffer_drop_bo(buffer); + } + else { - WARN("Pointer %p is not %u byte aligned.\n", buffer->map_ptr, RESOURCE_ALIGNMENT); - - wined3d_context_unmap_bo_address(context, &addr, 0, NULL); - buffer->map_ptr = NULL; - - if (resource->usage & WINED3DUSAGE_DYNAMIC) - { - /* The extra copy is more expensive than not using VBOs - * at all on the NVIDIA Linux driver, which is the - * only driver that returns unaligned pointers. */ - TRACE("Dynamic buffer, dropping VBO.\n"); - wined3d_buffer_drop_bo(buffer); - } - else - { - TRACE("Falling back to doublebuffered operation.\n"); - wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_SYSMEM); - buffer->flags |= WINED3D_BUFFER_PIN_SYSMEM; - } - TRACE("New pointer is %p.\n", resource->heap_memory); + TRACE("Falling back to doublebuffered operation.\n"); + wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_SYSMEM); + buffer->flags |= WINED3D_BUFFER_PIN_SYSMEM; } + TRACE("New pointer is %p.\n", resource->heap_memory); } - - context_release(context); } + + context_release(context); } base = buffer->map_ptr ? buffer->map_ptr : resource->heap_memory; -- 2.34.1