From: Paul Gofman Subject: [PATCH v2] wined3d: Avoid using float literals for MAX_FLOAT in GLSL when possible. Message-Id: <20190405180809.15555-1-gofmanp@gmail.com> Date: Fri, 5 Apr 2019 21:08:09 +0300 When application uses FLT_MAX in shader local constants, GLSL shader code gets 3.40282347e+38 string literal. Sometimes (seemingly depending on some side factors) NVIDIA GLSL compiler rejects such shaders with 'floating point constant overflow' error. Signed-off-by: Paul Gofman --- v2: - removed leftover debug change in utils.c. dlls/d3d9/tests/visual.c | 48 ++++++++++++++++++++++++++++++++++++-- dlls/wined3d/glsl_shader.c | 33 +++++++++++++++++--------- 2 files changed, 68 insertions(+), 13 deletions(-) diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index 85c1fa1bf8..36465f2156 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -28,6 +28,7 @@ * causes visible results in games can be tested in a way that does not depend on pixel exactness */ +#include #include #include @@ -16644,7 +16645,23 @@ static void fp_special_test(void) 0x0000ffff, /* end */ }; - struct + static const DWORD ps_fltmax_code[] = + { + 0xffff0200, /* ps_2_0 */ + 0x05000051, 0xa00f0000, 0x7f7fffff, 0xff7fffff, 0x7f7fffff, 0xff7fffff, /* def c0, FLT_MAX, -FLT_MAX, + * FLT_MAX, -FLT_MAX */ + 0x05000051, 0xa00f0002, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, /* def c2, 1.0, 1.0, 1.0, 1.0 */ + 0x05000051, 0xa00f0003, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, /* def c3, 0.5, 0.5, 0.5, 0.5 */ + 0x02000001, 0x800f0001, 0xa0e40001, /* mov r1, c1 */ + 0x03000002, 0x800f0000, 0xa0e40000, 0x81e40001, /* add r0, c0, -r1 */ + 0x02000023, 0x800f0000, 0x80e40000, /* abs r0, r0 */ + 0x02000001, 0x800f0001, 0xa0e40003, /* mov r1, c3 */ + 0x04000058, 0x800f0000, 0x81e40000, 0xa0e40002, 0x80e40001, /* cmp r0, -r0, c2, r1 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + + static const struct { float x, y, z; float s; @@ -16657,10 +16674,12 @@ static void fp_special_test(void) { 1.0f, -1.0f, 1.0f, 0.0f}, }; + static const struct vec4 const_max_flt = {FLT_MAX, -FLT_MAX, FLT_MAX, -FLT_MAX}; IDirect3DPixelShader9 *ps; IDirect3DDevice9 *device; UINT body_size = 0; IDirect3D9 *d3d; + D3DCOLOR color; DWORD *vs_code; ULONG refcount; D3DCAPS9 caps; @@ -16712,7 +16731,6 @@ static void fp_special_test(void) { DWORD offset = ARRAY_SIZE(vs_header); IDirect3DVertexShader9 *vs; - D3DCOLOR color; memcpy(vs_code + offset, vs_body[i].ops, vs_body[i].size); offset += vs_body[i].size / sizeof(*vs_body[i].ops); @@ -16752,6 +16770,32 @@ static void fp_special_test(void) hr = IDirect3DDevice9_SetPixelShader(device, NULL); ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); IDirect3DPixelShader9_Release(ps); + + hr = IDirect3DDevice9_CreatePixelShader(device, ps_fltmax_code, &ps); + ok(hr == D3D_OK, "Got unexpected hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, ps); + ok(hr == D3D_OK, "Got unexpected hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetPixelShaderConstantF(device, 1, &const_max_flt.x, 1); + ok(hr == D3D_OK, "IDirect3DDevice9_SetPixelShaderConstantF returned %08x\n", hr); + + hr = IDirect3DDevice9_BeginScene(device); + ok(hr == D3D_OK, "Got unexpected hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(hr == D3D_OK, "Got unexpected hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(hr == D3D_OK, "Got unexpected hr %#x.\n", hr); + + color = getPixelColor(device, 320, 240); + ok(color == 0x00ffffff, "Got unexpected color 0x%08x.\n", color); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(hr == D3D_OK, "Got unexpected hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetPixelShader(device, NULL); + ok(hr == D3D_OK, "Got unexpected hr %#x.\n", hr); + IDirect3DPixelShader9_Release(ps); + refcount = IDirect3DDevice9_Release(device); ok(!refcount, "Device has %u references left.\n", refcount); done: diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 746d7cfa36..4a60f6e0e4 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -432,14 +432,25 @@ static void shader_glsl_add_version_declaration(struct wined3d_string_buffer *bu shader_addline(buffer, "#version %u\n", shader_glsl_get_version(gl_info)); } -static void shader_glsl_append_imm_vec4(struct wined3d_string_buffer *buffer, const float *values) +static void shader_glsl_imm_float_literal(float value, char *str, + const struct wined3d_gl_info *gl_info) +{ + if (fabsf(value) == FLT_MAX && isfinite(value) && gl_info->supported[ARB_SHADER_BIT_ENCODING]) + strcpy(str, value == FLT_MAX ? "uintBitsToFloat(0x7f7fffffu)" + : "uintBitsToFloat(0xff7fffffu)"); + else + wined3d_ftoa(value, str); +} + +static void shader_glsl_append_imm_vec4(struct wined3d_string_buffer *buffer, const float *values, + const struct wined3d_gl_info *gl_info) { - char str[4][17]; + char str[4][29]; - wined3d_ftoa(values[0], str[0]); - wined3d_ftoa(values[1], str[1]); - wined3d_ftoa(values[2], str[2]); - wined3d_ftoa(values[3], str[3]); + shader_glsl_imm_float_literal(values[0], str[0], gl_info); + shader_glsl_imm_float_literal(values[1], str[1], gl_info); + shader_glsl_imm_float_literal(values[2], str[2], gl_info); + shader_glsl_imm_float_literal(values[3], str[3], gl_info); shader_addline(buffer, "vec4(%s, %s, %s, %s)", str[0], str[1], str[2], str[3]); } @@ -2932,7 +2943,7 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) { shader_addline(buffer, "const vec4 %s_lc%u = ", prefix, lconst->idx); - shader_glsl_append_imm_vec4(buffer, (const float *)lconst->value); + shader_glsl_append_imm_vec4(buffer, (const float *)lconst->value, gl_info); shader_addline(buffer, ";\n"); } } @@ -8104,10 +8115,10 @@ static GLuint shader_glsl_generate_pshader(const struct wined3d_context *context if (args->srgb_correction) { shader_addline(buffer, "const vec4 srgb_const0 = "); - shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const0); + shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const0, gl_info); shader_addline(buffer, ";\n"); shader_addline(buffer, "const vec4 srgb_const1 = "); - shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const1); + shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const1, gl_info); shader_addline(buffer, ";\n"); } if (reg_maps->vpos || reg_maps->usesdsy) @@ -10001,10 +10012,10 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * if (settings->sRGB_write) { shader_addline(buffer, "const vec4 srgb_const0 = "); - shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const0); + shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const0, gl_info); shader_addline(buffer, ";\n"); shader_addline(buffer, "const vec4 srgb_const1 = "); - shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const1); + shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const1, gl_info); shader_addline(buffer, ";\n"); } -- 2.20.1