From: Daniel Ansorregui Subject: [PATCH v5 2/4] wined3d: Add fetch4 to shader FFP generate texture stage Message-Id: <20190211134839.4274-2-mailszeros@gmail.com> Date: Mon, 11 Feb 2019 13:48:37 +0000 In-Reply-To: <20190211134839.4274-1-mailszeros@gmail.com> References: <20190108004217.10267-2-mailszeros@gmail.com> <20190211134839.4274-1-mailszeros@gmail.com> - Add flag to indicate FETCH4 support in textures - Implementation follows AMD implementation and swizzle projection is allowed and 0.5 texel offset is added Signed-off-by: Daniel Ansorregui --- dlls/wined3d/glsl_shader.c | 43 ++++++++++++++++++++++++++++------ dlls/wined3d/utils.c | 27 +++++++++++++++++++++ dlls/wined3d/wined3d_private.h | 4 +++- 3 files changed, 66 insertions(+), 8 deletions(-) diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 3298a604fd..1950db06a9 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -9711,6 +9711,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "#extension GL_ARB_shading_language_420pack : enable\n"); if (gl_info->supported[ARB_TEXTURE_RECTANGLE]) shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n"); + if (gl_info->supported[ARB_TEXTURE_GATHER]) + shader_addline(buffer, "#extension GL_ARB_texture_gather : enable\n"); if (!needs_legacy_glsl_syntax(gl_info)) { @@ -9851,6 +9853,9 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage) { const char *texture_function, *coord_mask; + struct wined3d_string_buffer offset; + BOOL fetch4 = settings->op[stage].fetch4; + BOOL fetch4_proj = FALSE; BOOL proj; if (!(tex_map & (1u << stage))) @@ -9870,7 +9875,6 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); proj = TRUE; } - if (settings->op[stage].tex_type == WINED3D_GL_RES_TYPE_TEX_CUBE) proj = FALSE; @@ -9879,6 +9883,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_1D: texture_function = "texture1D"; coord_mask = "x"; + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_2D: texture_function = "texture2D"; @@ -9887,6 +9892,9 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_3D: texture_function = "texture3D"; coord_mask = "xyz"; + if (fetch4) + FIXME("Unsupported Fetch4 and texture3D sampling"); + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_CUBE: texture_function = "textureCube"; @@ -9901,11 +9909,24 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * texture_function = ""; coord_mask = "xyzw"; proj = FALSE; + fetch4 = FALSE; break; } if (!legacy_syntax) texture_function = "texture"; + string_buffer_init(&offset); + if (fetch4) + { + texture_function = "textureGather"; + /* Apply a 0.5 texel offset as in AMD implementation */ + shader_addline(&offset, " + (vec2(0.5) / textureSize(ps_sampler%u, 0).xy)", stage); + + /* When projection is needed on fetch4 we have to apply it manually by dividing .w */ + fetch4_proj = proj; + proj = FALSE; + } + if (stage > 0 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)) @@ -9936,8 +9957,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "ret = ffp_texcoord[%u] + ret.xyxy;\n", stage); } - shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s);\n", - stage, texture_function, proj ? "Proj" : "", stage, coord_mask, proj ? "w" : ""); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s%s%s);\n", stage, texture_function, + proj ? "Proj" : "", stage, coord_mask, proj ? "w" : "", fetch4_proj ? " / ret.w" : "", offset.buffer); if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) shader_addline(buffer, "tex%u *= clamp(tex%u.z * bumpenv_lum_scale%u + bumpenv_lum_offset%u, 0.0, 1.0);\n", @@ -9945,14 +9966,22 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * } else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT3) { - shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz);\n", - stage, texture_function, proj ? "Proj" : "", stage, stage); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz%s);\n", stage, + texture_function, proj ? "Proj" : "", stage, stage, offset.buffer); } else { - shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s);\n", - stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s", stage, + texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); + if (fetch4_proj) + shader_addline(buffer, " / ffp_texcoord[%u].w", stage); + shader_addline(buffer, "%s);\n", offset.buffer); } + string_buffer_clear(&offset); + + /* Match FETCH4 swizzle with textureGather swizzle */ + if (fetch4) + shader_addline(buffer, "tex%u = tex%u.zxyw;\n", stage, stage); string_buffer_sprintf(tex_reg_name, "tex%u", stage); shader_glsl_color_correction_ext(buffer, tex_reg_name->buffer, WINED3DSP_WRITEMASK_ALL, diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 7b42202213..b0cae022c6 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -342,6 +342,19 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_RESZ, WINED3DFMT_FLAG_EXTENSION}, }; +/* List of textures were fetch4 can be enabled. + * Only available if ARB_TEXTURE_GATHER extension is present */ +static const enum wined3d_format_id wined3d_format_fetch4_enabled[] = +{ + WINED3DFMT_L8_UNORM, + WINED3DFMT_L16_UNORM, + WINED3DFMT_R16_FLOAT, + WINED3DFMT_R16, + WINED3DFMT_R32_FLOAT, + WINED3DFMT_A8_UNORM, + WINED3DFMT_INTZ, +}; + static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE *b) { BYTE c; @@ -2120,6 +2133,15 @@ static BOOL init_format_base_info(struct wined3d_adapter *adapter) format_set_flag(format, format_base_flags[i].flags); } + if (adapter->gl_info.supported[ARB_TEXTURE_GATHER]) + for (i = 0; i < ARRAY_SIZE(wined3d_format_fetch4_enabled); ++i) + { + if (!(format = get_format_internal(adapter, wined3d_format_fetch4_enabled[i]))) + return FALSE; + + format_set_flag(format, WINED3DFMT_FLAG_ALLOW_FETCH4); + } + return TRUE; } @@ -5780,6 +5802,7 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].tmp_dst = 0; settings->op[i].tex_type = WINED3D_GL_RES_TYPE_TEX_1D; settings->op[i].projected = WINED3D_PROJECTION_NONE; + settings->op[i].fetch4 = FALSE; i++; break; } @@ -5923,6 +5946,10 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].aarg1 = aarg1; settings->op[i].aarg2 = aarg2; settings->op[i].tmp_dst = state->texture_states[i][WINED3D_TSS_RESULT_ARG] == WINED3DTA_TEMP; + settings->op[i].fetch4 = (state->textures[i] + && state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4') + && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4 + && settings->op[i].tex_type & (WINED3D_GL_RES_TYPE_TEX_2D | WINED3D_GL_RES_TYPE_TEX_RECT)); } /* Clear unsupported stages */ diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 1e3ec28d6b..4224461142 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -2747,7 +2747,8 @@ struct texture_stage_op unsigned tex_type : 3; unsigned tmp_dst : 1; unsigned projected : 2; - unsigned padding : 10; + unsigned fetch4 : 1; + unsigned padding : 9; }; struct ffp_frag_settings @@ -4434,6 +4435,7 @@ extern enum wined3d_format_id pixelformat_for_depth(DWORD depth) DECLSPEC_HIDDEN #define WINED3DFMT_FLAG_VERTEX_ATTRIBUTE 0x01000000 #define WINED3DFMT_FLAG_BLIT 0x02000000 #define WINED3DFMT_FLAG_MAPPABLE 0x04000000 +#define WINED3DFMT_FLAG_ALLOW_FETCH4 0x08000000 struct wined3d_rational { -- 2.17.1