From: Axel Davy Subject: Re: [PATCH 1/5] d3d9: Support drawing from D3DPOOL_SYSTEMMEM vertex buffers. Message-Id: <6072b6ea-7887-3625-c1d7-581e4fca566a@gmail.com> Date: Thu, 13 Dec 2018 18:29:41 +0100 In-Reply-To: <20181212160958.18212-1-mbruni@codeweavers.com> References: <20181212160958.18212-1-mbruni@codeweavers.com> Hi, Why not always allocate SYSTEMMEM buffers into GPU memory ? For example, GTT memory (without WC, thus fast cpu read), with persistent flags, which means you don't need to unlock. Then no upload is needed. Yours, Axel On 12/12/2018 17:09, Matteo Bruni wrote: > From: Henri Verbeet > > Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=45486 > Signed-off-by: Matteo Bruni > --- > dlls/d3d9/buffer.c | 24 ++++++++++++++-- > dlls/d3d9/d3d9_private.h | 4 +++ > dlls/d3d9/device.c | 60 ++++++++++++++++++++++++++++++++++++---- > dlls/d3d9/stateblock.c | 17 ++++++++++++ > 4 files changed, 96 insertions(+), 9 deletions(-) > > diff --git a/dlls/d3d9/buffer.c b/dlls/d3d9/buffer.c > index 02b3570402f..15b7e2bdbce 100644 > --- a/dlls/d3d9/buffer.c > +++ b/dlls/d3d9/buffer.c > @@ -59,6 +59,8 @@ static ULONG WINAPI d3d9_vertexbuffer_AddRef(IDirect3DVertexBuffer9 *iface) > IDirect3DDevice9Ex_AddRef(buffer->parent_device); > wined3d_mutex_lock(); > wined3d_buffer_incref(buffer->wined3d_buffer); > + if (buffer->draw_buffer) > + wined3d_buffer_incref(buffer->draw_buffer); > wined3d_mutex_unlock(); > } > > @@ -78,6 +80,8 @@ static ULONG WINAPI d3d9_vertexbuffer_Release(IDirect3DVertexBuffer9 *iface) > > wined3d_mutex_lock(); > wined3d_buffer_decref(buffer->wined3d_buffer); > + if (buffer->draw_buffer) > + wined3d_buffer_decref(buffer->draw_buffer); > wined3d_mutex_unlock(); > > /* Release the device last, as it may cause the device to be destroyed. */ > @@ -274,6 +278,7 @@ static const struct wined3d_parent_ops d3d9_vertexbuffer_wined3d_parent_ops = > HRESULT vertexbuffer_init(struct d3d9_vertexbuffer *buffer, struct d3d9_device *device, > UINT size, UINT usage, DWORD fvf, D3DPOOL pool) > { > + const struct wined3d_parent_ops *parent_ops = &d3d9_null_wined3d_parent_ops; > struct wined3d_buffer_desc desc; > HRESULT hr; > > @@ -299,15 +304,28 @@ HRESULT vertexbuffer_init(struct d3d9_vertexbuffer *buffer, struct d3d9_device * > > desc.byte_width = size; > desc.usage = usage & WINED3DUSAGE_MASK; > - desc.bind_flags = WINED3D_BIND_VERTEX_BUFFER; > + desc.bind_flags = 0; > desc.access = wined3daccess_from_d3dpool(pool, usage) > | WINED3D_RESOURCE_ACCESS_MAP_R | WINED3D_RESOURCE_ACCESS_MAP_W; > desc.misc_flags = 0; > desc.structure_byte_stride = 0; > > + if (desc.access & WINED3D_RESOURCE_ACCESS_GPU) > + { > + desc.bind_flags = WINED3D_BIND_VERTEX_BUFFER; > + parent_ops = &d3d9_vertexbuffer_wined3d_parent_ops; > + } > + > wined3d_mutex_lock(); > - hr = wined3d_buffer_create(device->wined3d_device, &desc, NULL, buffer, > - &d3d9_vertexbuffer_wined3d_parent_ops, &buffer->wined3d_buffer); > + hr = wined3d_buffer_create(device->wined3d_device, &desc, NULL, buffer, parent_ops, &buffer->wined3d_buffer); > + if (SUCCEEDED(hr) && !(desc.access & WINED3D_RESOURCE_ACCESS_GPU)) > + { > + desc.bind_flags = WINED3D_BIND_VERTEX_BUFFER; > + desc.access = WINED3D_RESOURCE_ACCESS_GPU; > + if (FAILED(hr = wined3d_buffer_create(device->wined3d_device, &desc, NULL, buffer, > + &d3d9_vertexbuffer_wined3d_parent_ops, &buffer->draw_buffer))) > + wined3d_buffer_decref(buffer->wined3d_buffer); > + } > wined3d_mutex_unlock(); > if (FAILED(hr)) > { > diff --git a/dlls/d3d9/d3d9_private.h b/dlls/d3d9/d3d9_private.h > index d6155eba60b..8647aac4af1 100644 > --- a/dlls/d3d9/d3d9_private.h > +++ b/dlls/d3d9/d3d9_private.h > @@ -42,6 +42,7 @@ > > #define D3D9_MAX_VERTEX_SHADER_CONSTANTF 256 > #define D3D9_MAX_TEXTURE_UNITS 20 > +#define D3D9_MAX_STREAMS 16 > > #define D3DPRESENTFLAGS_MASK 0x00000fffu > > @@ -105,6 +106,8 @@ struct d3d9_device > BOOL in_destruction; > BOOL in_scene; > BOOL has_vertex_declaration; > + BOOL recording; > + unsigned int upload_map; > > unsigned int max_user_clip_planes; > > @@ -181,6 +184,7 @@ struct d3d9_vertexbuffer > struct d3d9_resource resource; > struct wined3d_buffer *wined3d_buffer; > IDirect3DDevice9Ex *parent_device; > + struct wined3d_buffer *draw_buffer; > DWORD fvf; > }; > > diff --git a/dlls/d3d9/device.c b/dlls/d3d9/device.c > index f19e2ae6f65..7c488bfd601 100644 > --- a/dlls/d3d9/device.c > +++ b/dlls/d3d9/device.c > @@ -974,6 +974,7 @@ static HRESULT d3d9_device_reset(struct d3d9_device *device, > > if (!extended) > { > + device->recording = FALSE; > wined3d_device_set_render_state(device->wined3d_device, WINED3D_RS_ZENABLE, > !!swapchain_desc.enable_auto_depth_stencil); > } > @@ -2317,7 +2318,8 @@ static HRESULT WINAPI d3d9_device_BeginStateBlock(IDirect3DDevice9Ex *iface) > TRACE("iface %p.\n", iface); > > wined3d_mutex_lock(); > - hr = wined3d_device_begin_stateblock(device->wined3d_device); > + if (SUCCEEDED(hr = wined3d_device_begin_stateblock(device->wined3d_device))) > + device->recording = TRUE; > wined3d_mutex_unlock(); > > return hr; > @@ -2340,6 +2342,7 @@ static HRESULT WINAPI d3d9_device_EndStateBlock(IDirect3DDevice9Ex *iface, IDire > WARN("Failed to end the state block, hr %#x.\n", hr); > return hr; > } > + device->recording = FALSE; > > if (!(object = heap_alloc_zero(sizeof(*object)))) > { > @@ -2688,10 +2691,38 @@ static void d3d9_generate_auto_mipmaps(struct d3d9_device *device) > } > } > > +static void d3d9_device_upload_sysmem_buffers(struct d3d9_device *device, > + unsigned int start_vertex, unsigned int vertex_count) > +{ > + struct wined3d_box box = {0, 0, 0, 1, 0, 1}; > + struct d3d9_vertexbuffer *d3d9_buffer; > + unsigned int i, offset, stride, map; > + struct wined3d_buffer *dst_buffer; > + HRESULT hr; > + > + map = device->upload_map; > + while (map) > + { > + i = ffs(map) - 1; > + map ^= 1u << i; > + > + if (FAILED(hr = wined3d_device_get_stream_source(device->wined3d_device, i, &dst_buffer, &offset, &stride))) > + ERR("Failed to get stream source.\n"); > + d3d9_buffer = wined3d_buffer_get_parent(dst_buffer); > + box.left = offset + start_vertex * stride; > + box.right = box.left + vertex_count * stride; > + if (FAILED(hr = wined3d_device_copy_sub_resource_region(device->wined3d_device, > + wined3d_buffer_get_resource(dst_buffer), 0, box.left, 0, 0, > + wined3d_buffer_get_resource(d3d9_buffer->wined3d_buffer), 0, &box, 0))) > + ERR("Failed to update buffer.\n"); > + } > +} > + > static HRESULT WINAPI d3d9_device_DrawPrimitive(IDirect3DDevice9Ex *iface, > D3DPRIMITIVETYPE primitive_type, UINT start_vertex, UINT primitive_count) > { > struct d3d9_device *device = impl_from_IDirect3DDevice9Ex(iface); > + unsigned int vertex_count; > HRESULT hr; > > TRACE("iface %p, primitive_type %#x, start_vertex %u, primitive_count %u.\n", > @@ -2704,10 +2735,11 @@ static HRESULT WINAPI d3d9_device_DrawPrimitive(IDirect3DDevice9Ex *iface, > WARN("Called without a valid vertex declaration set.\n"); > return D3DERR_INVALIDCALL; > } > + vertex_count = vertex_count_from_primitive_count(primitive_type, primitive_count); > + d3d9_device_upload_sysmem_buffers(device, start_vertex, vertex_count); > d3d9_generate_auto_mipmaps(device); > wined3d_device_set_primitive_type(device->wined3d_device, primitive_type, 0); > - hr = wined3d_device_draw_primitive(device->wined3d_device, start_vertex, > - vertex_count_from_primitive_count(primitive_type, primitive_count)); > + hr = wined3d_device_draw_primitive(device->wined3d_device, start_vertex, vertex_count); > if (SUCCEEDED(hr)) > d3d9_rts_flag_auto_gen_mipmap(device); > wined3d_mutex_unlock(); > @@ -2734,6 +2766,7 @@ static HRESULT WINAPI d3d9_device_DrawIndexedPrimitive(IDirect3DDevice9Ex *iface > WARN("Called without a valid vertex declaration set.\n"); > return D3DERR_INVALIDCALL; > } > + d3d9_device_upload_sysmem_buffers(device, min_vertex_idx, vertex_count); > d3d9_generate_auto_mipmaps(device); > wined3d_device_set_base_vertex_index(device->wined3d_device, base_vertex_idx); > wined3d_device_set_primitive_type(device->wined3d_device, primitive_type, 0); > @@ -3378,6 +3411,7 @@ static HRESULT WINAPI d3d9_device_SetStreamSource(IDirect3DDevice9Ex *iface, > { > struct d3d9_device *device = impl_from_IDirect3DDevice9Ex(iface); > struct d3d9_vertexbuffer *buffer_impl = unsafe_impl_from_IDirect3DVertexBuffer9(buffer); > + struct wined3d_buffer *wined3d_buffer; > HRESULT hr; > > TRACE("iface %p, stream_idx %u, buffer %p, offset %u, stride %u.\n", > @@ -3386,14 +3420,28 @@ static HRESULT WINAPI d3d9_device_SetStreamSource(IDirect3DDevice9Ex *iface, > wined3d_mutex_lock(); > if (!stride) > { > - struct wined3d_buffer *wined3d_buffer; > unsigned int cur_offset; > > hr = wined3d_device_get_stream_source(device->wined3d_device, stream_idx, &wined3d_buffer, > &cur_offset, &stride); > } > - hr = wined3d_device_set_stream_source(device->wined3d_device, stream_idx, > - buffer_impl ? buffer_impl->wined3d_buffer : NULL, offset, stride); > + > + if (!buffer_impl) > + wined3d_buffer = NULL; > + else if (buffer_impl->draw_buffer) > + wined3d_buffer = buffer_impl->draw_buffer; > + else > + wined3d_buffer = buffer_impl->wined3d_buffer; > + > + hr = wined3d_device_set_stream_source(device->wined3d_device, stream_idx, wined3d_buffer, offset, stride); > + if (SUCCEEDED(hr) && !device->recording) > + { > + if (buffer_impl && buffer_impl->draw_buffer) > + device->upload_map |= (1u << stream_idx); > + else > + device->upload_map &= ~(1u << stream_idx); > + } > + > wined3d_mutex_unlock(); > > return hr; > diff --git a/dlls/d3d9/stateblock.c b/dlls/d3d9/stateblock.c > index 62b3bacb28d..c5306fd80b3 100644 > --- a/dlls/d3d9/stateblock.c > +++ b/dlls/d3d9/stateblock.c > @@ -108,11 +108,28 @@ static HRESULT WINAPI d3d9_stateblock_Capture(IDirect3DStateBlock9 *iface) > static HRESULT WINAPI d3d9_stateblock_Apply(IDirect3DStateBlock9 *iface) > { > struct d3d9_stateblock *stateblock = impl_from_IDirect3DStateBlock9(iface); > + struct wined3d_buffer *wined3d_buffer; > + struct d3d9_vertexbuffer *buffer; > + unsigned int i, offset, stride; > + struct d3d9_device *device; > + HRESULT hr; > > TRACE("iface %p.\n", iface); > > wined3d_mutex_lock(); > wined3d_stateblock_apply(stateblock->wined3d_stateblock); > + device = impl_from_IDirect3DDevice9Ex(stateblock->parent_device); > + device->upload_map = 0; > + for (i = 0; i < D3D9_MAX_STREAMS; ++i) > + { > + if (FAILED(hr = wined3d_device_get_stream_source(device->wined3d_device, > + i, &wined3d_buffer, &offset, &stride))) > + continue; > + if (!wined3d_buffer || !(buffer = wined3d_buffer_get_parent(wined3d_buffer))) > + continue; > + if (buffer->draw_buffer) > + device->upload_map |= 1u << i; > + } > wined3d_mutex_unlock(); > > return D3D_OK;