From: "Stefan Dösinger" <stefan@codeweavers.com>
Subject: [PATCH 1/3] wined3d: Test if a nudge is needed to get the correct filling convention.
Message-Id: <20210906194125.111300-1-stefan@codeweavers.com>
Date: Mon,  6 Sep 2021 22:41:23 +0300

This fixes stray lines in GameFace GUIs, e.g. in World of Tanks.

Signed-off-by: Stefan Dösinger <stefan@codeweavers.com>

---

The ddraw tests started succeeding in the todo block for a few of the tested
pixels. I looked at the output results and I don't think it happened because
the Z values are more precise now. I think a few bits flipped (in the random
readback we get) and now a few of those tests were inside the expected diff.
---
 dlls/d3d11/tests/d3d11.c       |   1 -
 dlls/ddraw/tests/ddraw4.c      |   6 +-
 dlls/ddraw/tests/ddraw7.c      |   6 +-
 dlls/wined3d/adapter_gl.c      |  40 +++++++++++
 dlls/wined3d/state.c           |   3 +-
 dlls/wined3d/utils.c           | 127 +++++++++++++++++++++++++++++----
 dlls/wined3d/wined3d_private.h |  13 +++-
 7 files changed, 176 insertions(+), 20 deletions(-)

diff --git a/dlls/d3d11/tests/d3d11.c b/dlls/d3d11/tests/d3d11.c
index ae3020e130d..e08408b4884 100644
--- a/dlls/d3d11/tests/d3d11.c
+++ b/dlls/d3d11/tests/d3d11.c
@@ -28155,7 +28155,6 @@ static void test_fractional_viewports(void)
                 ok(compare_float(v->x, expected.x, 0) && compare_float(v->y, expected.y, 0),
                         "Got fragcoord {%.8e, %.8e}, expected {%.8e, %.8e} at (%u, %u), offset %.8e.\n",
                         v->x, v->y, expected.x, expected.y, x, y, viewport_offsets[i]);
-                todo_wine
                 ok(compare_float(v->z, expected.z, 2) && compare_float(v->w, expected.w, 2),
                         "Got texcoord {%.8e, %.8e}, expected {%.8e, %.8e} at (%u, %u), offset %.8e.\n",
                         v->z, v->w, expected.z, expected.w, x, y, viewport_offsets[i]);
diff --git a/dlls/ddraw/tests/ddraw4.c b/dlls/ddraw/tests/ddraw4.c
index 6b514b15e25..4f052256882 100644
--- a/dlls/ddraw/tests/ddraw4.c
+++ b/dlls/ddraw/tests/ddraw4.c
@@ -16118,8 +16118,10 @@ static void test_depth_readback(void)
                 /* The ddraw4 version of this test behaves similarly to the ddraw7 version on Nvidia GPUs,
                  * except that Geforce 7 also returns garbage data in D24S8, whereas the ddraw7 version
                  * returns 0 for that format. Give up on pre-filtering formats, accept Nvidia as generally
-                 * broken here, but still expect at least one format (D16 or D24X8 in practise) to pass. */
-                todo_wine_if(tests[i].todo)
+                 * broken here, but still expect at least one format (D16 or D24X8 in practise) to pass.
+                 *
+                 * Some of the tested places pass on some GPUs on Wine by accident. */
+                todo_wine_if(tests[i].todo && !compare_uint(expected_depth, depth, max_diff))
                     ok(compare_uint(expected_depth, depth, max_diff) || ddraw_is_nvidia(ddraw),
                              "Test %u: Got depth 0x%08x (diff %d), expected 0x%08x+/-%u, at %u, %u.\n",
                              i, depth, expected_depth - depth, expected_depth, max_diff, x, y);
diff --git a/dlls/ddraw/tests/ddraw7.c b/dlls/ddraw/tests/ddraw7.c
index 4c42d6f4b64..4402f2d93b5 100644
--- a/dlls/ddraw/tests/ddraw7.c
+++ b/dlls/ddraw/tests/ddraw7.c
@@ -15597,8 +15597,10 @@ static void test_depth_readback(void)
                  * Geforce GTX 650 has working D16 and D24, but D24S8 returns 0.
                  *
                  * Arx Fatalis is broken on the Geforce 9 in the same way it was broken in Wine (bug 43654).
-                 * The !tests[i].s_depth is supposed to rule out D16 on GF9 and D24X8 on GF7. */
-                todo_wine_if(tests[i].todo)
+                 * The !tests[i].s_depth is supposed to rule out D16 on GF9 and D24X8 on GF7.
+                 *
+                 * Some of the tested places pass on some GPUs on Wine by accident. */
+                todo_wine_if(tests[i].todo && !compare_uint(expected_depth, depth, max_diff))
                     ok(compare_uint(expected_depth, depth, max_diff)
                             || (ddraw_is_nvidia(ddraw) && (all_zero || all_one || !tests[i].s_depth)),
                             "Test %u: Got depth 0x%08x (diff %d), expected 0x%08x+/-%u, at %u, %u.\n",
diff --git a/dlls/wined3d/adapter_gl.c b/dlls/wined3d/adapter_gl.c
index 902b9620f30..72f46693274 100644
--- a/dlls/wined3d/adapter_gl.c
+++ b/dlls/wined3d/adapter_gl.c
@@ -5130,6 +5130,7 @@ static void wined3d_adapter_gl_init_d3d_info(struct wined3d_adapter_gl *adapter_
     d3d_info->full_ffp_varyings = !!(shader_caps.wined3d_caps & WINED3D_SHADER_CAP_FULL_FFP_VARYINGS);
     d3d_info->scaled_resolve = !!gl_info->supported[EXT_FRAMEBUFFER_MULTISAMPLE_BLIT_SCALED];
     d3d_info->feature_level = feature_level_from_caps(gl_info, &shader_caps, &fragment_caps);
+    d3d_info->filling_convention_nudge = gl_info->filling_convention_nudge;
 
     if (gl_info->supported[ARB_TEXTURE_MULTISAMPLE])
         d3d_info->multisample_draw_location = WINED3D_LOCATION_TEXTURE_RGB;
@@ -5137,6 +5138,43 @@ static void wined3d_adapter_gl_init_d3d_info(struct wined3d_adapter_gl *adapter_
         d3d_info->multisample_draw_location = WINED3D_LOCATION_RB_MULTISAMPLE;
 }
 
+static float wined3d_adapter_find_fill_nudge(struct wined3d_caps_gl_ctx *ctx)
+{
+    static const float test_array[] =
+    {
+        0.0f,
+        -1.0f / 1024.0f,
+        -1.0f / 512.0f,
+        -1.0f / 256.0f,
+        -1.0f / 128.0f,
+        -1.0f / 64.0f /* Accept this without a warning if it gets the right result. */
+    };
+    /* This value was used unconditionally before the dynamic test function was
+     * introduced. */
+    static const float no_clue = -1.0f / 64.0f;
+    unsigned int i;
+    float value;
+
+    if (wined3d_settings.offscreen_rendering_mode != ORM_FBO)
+        return no_clue;
+
+    for (i = 0; i < ARRAY_SIZE(test_array); ++i)
+    {
+        value = test_array[i];
+        if (wined3d_caps_gl_ctx_test_filling_convention(ctx, value))
+        {
+            if (value)
+                WARN("Using a filling convention fixup nudge of -1/%f\n", -1.0f / value);
+            else
+                TRACE("No need for a filling convetion nudge.\n");
+            return value;
+        }
+    }
+
+    FIXME("Did not find a way to get the filling convention we want.\n");
+    return no_clue;
+}
+
 static BOOL wined3d_adapter_gl_init(struct wined3d_adapter_gl *adapter_gl,
         unsigned int ordinal, unsigned int wined3d_creation_flags)
 {
@@ -5222,6 +5260,8 @@ static BOOL wined3d_adapter_gl_init(struct wined3d_adapter_gl *adapter_gl,
         return FALSE;
     }
 
+    gl_info->filling_convention_nudge = wined3d_adapter_find_fill_nudge(&caps_gl_ctx);
+
     wined3d_adapter_gl_init_d3d_info(adapter_gl, wined3d_creation_flags);
 
     if (!adapter_gl->a.d3d_info.shader_color_key)
diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c
index 8316269afcf..5c1c69fb650 100644
--- a/dlls/wined3d/state.c
+++ b/dlls/wined3d/state.c
@@ -4233,13 +4233,14 @@ static void viewport_miscpart_cc(struct wined3d_context *context,
     const struct wined3d_gl_info *gl_info = wined3d_context_gl(context)->gl_info;
     /* See get_projection_matrix() in utils.c for a discussion about those values. */
     float pixel_center_offset = context->d3d_info->wined3d_creation_flags
-            & WINED3D_PIXEL_CENTER_INTEGER ? 63.0f / 128.0f : -1.0f / 128.0f;
+            & WINED3D_PIXEL_CENTER_INTEGER ? 0.5f : 0.0f;
     struct wined3d_viewport vp[WINED3D_MAX_VIEWPORTS];
     GLdouble depth_ranges[2 * WINED3D_MAX_VIEWPORTS];
     GLfloat viewports[4 * WINED3D_MAX_VIEWPORTS];
     unsigned int i, reset_count = 0;
     float min_z, max_z;
 
+    pixel_center_offset += context->d3d_info->filling_convention_nudge / 2.0f;
     get_viewports(context, state, state->viewport_count, vp);
 
     GL_EXTCALL(glClipControl(context->render_offscreen ? GL_UPPER_LEFT : GL_LOWER_LEFT, GL_ZERO_TO_ONE));
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c
index 4019dd4d812..38704db2f35 100644
--- a/dlls/wined3d/utils.c
+++ b/dlls/wined3d/utils.c
@@ -3928,6 +3928,105 @@ BOOL wined3d_caps_gl_ctx_test_viewport_subpixel_bits(struct wined3d_caps_gl_ctx
     return TRUE;
 }
 
+BOOL wined3d_caps_gl_ctx_test_filling_convention(struct wined3d_caps_gl_ctx *ctx, float nudge)
+{
+    static const struct wined3d_color red = {1.0f, 0.0f, 0.0f, 1.0f};
+    const struct wined3d_gl_info *gl_info = ctx->gl_info;
+    GLuint texture, fbo;
+    DWORD readback[8][8];
+    unsigned int x, y;
+
+    /* This is a very simple test to find out how GL handles pixel edges:
+     * Draw a quad exactly between 4 pixels in an 8x8 viewport and see
+     * which pixel it ends up in. So far we've seen top left and bottom
+     * left conventions. This test may produce unexpected results if the
+     * driver forces multisampling on us.
+     *
+     * If we find a bottom-left filling behavior we also nudge the x axis
+     * by the same amount. This is necessary to keep diagonals that go
+     * through the pixel center intact.
+     *
+     * Note that we are ignoring some settings that might influence the
+     * driver: How we switch GL to a upper-left coordinate system, if we
+     * are drawing into the on-screen framebuffer (we don't have one here
+     * because our window is hidden), shaders vs fixed function GL.
+     * Testing these isn't possible with the current draw_test_quad()
+     * infrastructure.
+     *
+     * The minimum nudge also depends on the viewport size, although
+     * the relation between those two is GPU dependent and not exactly
+     * sensible. E.g. a 8192x8192 viewport on a GeForce 9 needs at
+     * least a nudge of 1/240.9, whereas a 8x8 one needs 1/255.982;
+     * 32x32 needs 1/255.935. 4x4 and lower are happy with something
+     * below 1/256. The 8x8 size below has been arbitrarily chosen to
+     * get a useful result out of that card and avoid allocating a
+     * gigantic texture during library init.
+     *
+     * Newer cards usually do the right thing anyway. In cases where
+     * they do not (e.g. Radeon GPUs in a macbookpro14,3 running MacOS)
+     * a nudge of 1/2^20 is enough. */
+    const struct wined3d_vec3 edge_geometry[] =
+    {
+        {(-1.0f + nudge) / 8.0f, (-1.0f + nudge) / 8.0f, 0.0f},
+        {( 1.0f + nudge) / 8.0f, (-1.0f + nudge) / 8.0f, 0.0f},
+        {(-1.0f + nudge) / 8.0f, ( 1.0f + nudge) / 8.0f, 0.0f},
+        {( 1.0f + nudge) / 8.0f, ( 1.0f + nudge) / 8.0f, 0.0f},
+    };
+
+    gl_info->gl_ops.gl.p_glGenTextures(1, &texture);
+    gl_info->gl_ops.gl.p_glBindTexture(GL_TEXTURE_2D, texture);
+    gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
+    gl_info->gl_ops.gl.p_glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 8, 8, 0,
+            GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
+    gl_info->fbo_ops.glGenFramebuffers(1, &fbo);
+    gl_info->fbo_ops.glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+    gl_info->fbo_ops.glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+            GL_TEXTURE_2D, texture, 0);
+    checkGLcall("create resources");
+
+    gl_info->gl_ops.gl.p_glViewport(0, 0, 8, 8);
+    gl_info->gl_ops.gl.p_glClearColor(0.0f, 0.0f, 1.0f, 1.0f);
+    gl_info->gl_ops.gl.p_glClear(GL_COLOR_BUFFER_BIT);
+
+    draw_test_quad(ctx, edge_geometry, &red);
+    checkGLcall("draw");
+
+    gl_info->gl_ops.gl.p_glBindTexture(GL_TEXTURE_2D, texture);
+    gl_info->gl_ops.gl.p_glGetTexImage(GL_TEXTURE_2D, 0,
+            GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, readback);
+    checkGLcall("readback");
+
+    gl_info->gl_ops.gl.p_glDeleteTextures(1, &texture);
+    gl_info->fbo_ops.glDeleteFramebuffers(1, &fbo);
+    gl_info->fbo_ops.glBindFramebuffer(GL_FRAMEBUFFER, 0);
+    checkGLcall("delete resources");
+
+    if (readback[3][3] == 0xffff0000 && readback[3][4] == 0xff0000ff
+            && readback[4][3] == 0xff0000ff && readback[4][4] == 0xff0000ff)
+    {
+        TRACE("Got the expected filling result.\n");
+        return TRUE;
+    }
+    else if (readback[3][3] == 0xff0000ff && readback[3][4] == 0xff0000ff
+            && readback[4][3] == 0xffff0000 && readback[4][4] == 0xff0000ff)
+    {
+        WARN("GPU uses a bottom-left filling convention in FBOs.\n");
+        return FALSE;
+    }
+
+    FIXME("Unexpected filling convention test result\n");
+
+    for (y = 0; y < ARRAY_SIZE(readback); ++y)
+    {
+        for (x = 0; x < ARRAY_SIZE(readback[0]); ++x)
+        {
+            FIXME("0x%08x ", readback[y][x]);
+        }
+        FIXME("\n");
+    }
+
+    return FALSE;
+}
 static float wined3d_adapter_find_polyoffset_scale(struct wined3d_caps_gl_ctx *ctx, GLenum format)
 {
     const struct wined3d_gl_info *gl_info = ctx->gl_info;
@@ -5540,22 +5639,26 @@ void get_projection_matrix(const struct wined3d_context *context, const struct w
      *   - We need to flip along the y-axis in case of offscreen rendering.
      *   - OpenGL Z range is {-Wc,...,Wc} while D3D Z range is {0,...,Wc}.
      *   - <= D3D9 coordinates refer to pixel centers while GL coordinates
-     *     refer to pixel corners.
-     *   - D3D has a top-left filling convention. We need to maintain this
-     *     even after the y-flip mentioned above.
-     * In order to handle the last two points, we translate by
-     * (63.0 / 128.0) / VPw and (63.0 / 128.0) / VPh. This is equivalent to
-     * translating slightly less than half a pixel. We want the difference to
-     * be large enough that it doesn't get lost due to rounding inside the
-     * driver, but small enough to prevent it from interfering with any
-     * anti-aliasing. */
+     *     refer to pixel corners. D3D10 fixed this particular oddity.
+     *   - D3D has a top-left filling convention while GL does not specify
+     *     a particular behavior, other than that that the GL implementation
+     *     needs to be consistent.
+     *
+     * In order to handle the pixel center, we translate by 0.5 / VPw and
+     * 0.5 / VPh. We test the filling convention during adapter init and
+     * add a small offset to correct it if necessary. See
+     * wined3d_caps_gl_ctx_test_filling_convention for more details on how
+     * we test GL and considerations regarding the added nudge value.
+     *
+     * If we have GL_ARB_clip_control we take care of all this through
+     * viewport properties and don't have to translate geometry. */
 
     clip_control = d3d_info->clip_control;
     flip = !clip_control && context->render_offscreen;
-    if (!clip_control && d3d_info->wined3d_creation_flags & WINED3D_PIXEL_CENTER_INTEGER)
-        center_offset = 63.0f / 64.0f;
+    if (!clip_control)
+        center_offset = 1.0f + d3d_info->filling_convention_nudge;
     else
-        center_offset = -1.0f / 64.0f;
+        center_offset = 0.0f;
 
     if (context->last_was_rhw)
     {
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 86c053df0db..f305829475e 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -242,6 +242,8 @@ struct wined3d_d3d_info
     enum wined3d_feature_level feature_level;
 
     DWORD multisample_draw_location;
+
+    float filling_convention_nudge;
 };
 
 static const struct color_fixup_desc COLOR_FIXUP_IDENTITY =
@@ -3239,6 +3241,7 @@ struct wined3d_gl_info
     DWORD quirks;
     BOOL supported[WINED3D_GL_EXT_COUNT];
     GLint wrap_lookup[WINED3D_TADDRESS_MIRROR_ONCE - WINED3D_TADDRESS_WRAP + 1];
+    float filling_convention_nudge;
 
     HGLRC (WINAPI *p_wglCreateContextAttribsARB)(HDC dc, HGLRC share, const GLint *attribs);
     struct opengl_funcs gl_ops;
@@ -3487,6 +3490,7 @@ BOOL wined3d_adapter_vk_init_format_info(struct wined3d_adapter_vk *adapter_vk,
 UINT64 adapter_adjust_memory(struct wined3d_adapter *adapter, INT64 amount) DECLSPEC_HIDDEN;
 
 BOOL wined3d_caps_gl_ctx_test_viewport_subpixel_bits(struct wined3d_caps_gl_ctx *ctx) DECLSPEC_HIDDEN;
+BOOL wined3d_caps_gl_ctx_test_filling_convention(struct wined3d_caps_gl_ctx *ctx, float nudge) DECLSPEC_HIDDEN;
 
 void install_gl_compat_wrapper(struct wined3d_gl_info *gl_info, enum wined3d_gl_extension ext) DECLSPEC_HIDDEN;
 
@@ -5660,10 +5664,15 @@ static inline void shader_get_position_fixup(const struct wined3d_context *conte
     float center_offset;
     unsigned int i;
 
+    /* See get_projection_matrix() in utils.c for a discussion of the position fixup.
+     * This function here also applies to d3d10+ which does not need adjustment for
+     * integer pixel centers, but it may need the filling convention nudge. */
     if (context->d3d_info->wined3d_creation_flags & WINED3D_PIXEL_CENTER_INTEGER)
-        center_offset = 63.0f / 64.0f;
+        center_offset = 1.0f;
     else
-        center_offset = -1.0f / 64.0f;
+        center_offset = 0.0f;
+
+    center_offset += context->d3d_info->filling_convention_nudge;
 
     for (i = 0; i < fixup_count; ++i)
     {

-- 
2.32.0