From: Stefan Dösinger <stefan@codeweavers.com>
Subject: [PATCH 1/2] wined3d: Try to detect the polygon offset scale value (v2).
Message-Id: <1438275016-20387-1-git-send-email-stefan@codeweavers.com>
Date: Thu, 30 Jul 2015 18:50:15 +0200

Version 2: Use shifts instead of powf, replace while (1) with for (;;).
Supersedes patch 113273.

This fixes bug 27145.

FEAR draws the same geometry twice, the second time using zfunc=equal.
In both cases it sets a huge depth bias of -0.5, presumably to get
better precision for the fragile Z comparison. The GL polygon offset we
set ends up being so large that it pulls the geometry into the negative
Z range. It isn't clipped (or no longer, older NV drivers probably had a
separate bug there), but the Z value gets clamped to 0.0 in the first
draw and doesn't match the incoming Z in the second draw.

Most drivers (r300g, r600, Nvidia on various versions) want a scaling
value of 2^23 for the depth bias on fixed point depth buffers to produce
the same result as native d3d. The only exception here is r200, which
wants 2^24. For floating point buffers Nvidia wants 2^22 and r600g wants
2^24. Those values are independent of the precision of the depth buffer.
The included detection function detects all values correctly, except the
floating point value for Nvidia (Instead it finds 2^23, but the tests
want 2^22). For this reason I have disabled the floating point tests in
the next patch.

The test has been very reliable for me. It runs correctly on r200, r500,
Evergreen, Geforce 7 and Geforce 650M GPUs on Windows. On Linux I tested
it on r200, r300g, r600g, Geforce 7, Geforce 650M.

I have written the detection function in a way that it (hopefully) works
on core contexts. It requires at least ARBvp, but I think it's safe to
assume that if we have FBOs we also have ARBvp support. The oldest GPU I
tested it on was r200.
---
 dlls/wined3d/cs.c              |  3 +-
 dlls/wined3d/directx.c         |  4 ++
 dlls/wined3d/state.c           | 25 +++++++----
 dlls/wined3d/utils.c           | 94 ++++++++++++++++++++++++++++++++++++++++++
 dlls/wined3d/wined3d_private.h |  2 +
 5 files changed, 119 insertions(+), 9 deletions(-)

diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
index 39e78e1..459fd56 100644
--- a/dlls/wined3d/cs.c
+++ b/dlls/wined3d/cs.c
@@ -446,7 +446,8 @@ static void wined3d_cs_exec_set_depth_stencil_view(struct wined3d_cs *cs, const
         device_invalidate_state(device, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
         device_invalidate_state(device, STATE_RENDER(WINED3D_RS_DEPTHBIAS));
     }
-    else if (prev && prev->format->depth_size != op->view->format->depth_size)
+    else if (prev && (prev->format_flags & WINED3DFMT_FLAG_FLOAT)
+            != (op->view->format_flags & WINED3DFMT_FLAG_FLOAT))
     {
         device_invalidate_state(device, STATE_RENDER(WINED3D_RS_DEPTHBIAS));
     }
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c
index ce23c47..8e4448d 100644
--- a/dlls/wined3d/directx.c
+++ b/dlls/wined3d/directx.c
@@ -5903,6 +5903,10 @@ static BOOL wined3d_adapter_init(struct wined3d_adapter *adapter, UINT ordinal)
         return FALSE;
     }
 
+    gl_info->fixed_polyoffset_scale = wined3d_adapter_find_polyoffset_scale(&caps_gl_ctx, GL_DEPTH_COMPONENT);
+    if (gl_info->supported[ARB_DEPTH_BUFFER_FLOAT])
+        gl_info->float_polyoffset_scale = wined3d_adapter_find_polyoffset_scale(&caps_gl_ctx, GL_DEPTH32F_STENCIL8);
+
     adapter->vram_bytes = adapter->driver_info.vram_bytes;
     adapter->vram_bytes_used = 0;
     TRACE("Emulating 0x%s bytes of video ram.\n", wine_dbgstr_longlong(adapter->vram_bytes));
diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c
index e39b7b4..3225aa6 100644
--- a/dlls/wined3d/state.c
+++ b/dlls/wined3d/state.c
@@ -1632,11 +1632,17 @@ static void state_scissor(struct wined3d_context *context, const struct wined3d_
  * OpenGL the bias is specified in units of "the smallest value that is
  * guaranteed to produce a resolvable offset for a given implementation". To
  * convert from D3D to GL we need to divide the D3D depth bias by that value.
- * There's no practical way to retrieve that value from a given GL
- * implementation, but the D3D application has essentially the same problem,
- * which makes a guess of the depth buffer format's highest possible value a
- * reasonable guess. Note that SLOPESCALEDEPTHBIAS is a scaling factor for the
- * depth slope, and doesn't need to be scaled. */
+ * We try to detect the value from GL with test draws. On most drivers (r300g,
+ * 600g, Nvidia, i965 on Mesa) the value is 2^23 for fixed point depth buffers,
+ * for r200 and i965 on OSX it is 2^24, for r500 on OSX it is 2^22. For floating
+ * point buffers it is 2^22, 2^23 or 2^24 depending on the GPU. The value does
+ * not depend on the depth buffer precision on any driver.
+ *
+ * Two games that are picky regarding depth bias are Mass Effect 2 (flickering
+ * decals) and F.E.A.R and F.E.A.R. 2 (semi-transparent guns).
+ *
+ * Note that SLOPESCALEDEPTHBIAS is a scaling factor for the depth slope, and
+ * doesn't need to be scaled to account for GL vs D3D differences. */
 static void state_depthbias(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
 {
     const struct wined3d_gl_info *gl_info = context->gl_info;
@@ -1669,10 +1675,13 @@ static void state_depthbias(struct wined3d_context *context, const struct wined3
         {
             if (depth)
             {
-                const struct wined3d_format *fmt = depth->format;
-                scale = powf(2, fmt->depth_size) - 1;
+                if (depth->format_flags & WINED3DFMT_FLAG_FLOAT)
+                    scale = gl_info->float_polyoffset_scale;
+                else
+                    scale = gl_info->fixed_polyoffset_scale;
+
                 TRACE("Depth format %s, using depthbias scale of %.8e.\n",
-                      debug_d3dformat(fmt->id), scale);
+                      debug_d3dformat(depth->format->id), scale);
             }
             else
             {
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c
index e57db99..30ad0e0 100644
--- a/dlls/wined3d/utils.c
+++ b/dlls/wined3d/utils.c
@@ -2707,6 +2707,100 @@ fail:
     return FALSE;
 }
 
+float wined3d_adapter_find_polyoffset_scale(struct wined3d_caps_gl_ctx *ctx, GLenum format)
+{
+    const struct wined3d_gl_info *gl_info = ctx->gl_info;
+    static const struct wined3d_color blue = {0.0f, 0.0f, 1.0f, 1.0f};
+    GLuint fbo, color, depth;
+    unsigned int low = 0, high = 32, cur;
+    DWORD readback[256];
+    static const struct wined3d_vec3 geometry[] =
+    {
+        {-1.0f, -1.0f, -1.0f},
+        { 1.0f, -1.0f,  0.0f},
+        {-1.0f,  1.0f, -1.0f},
+        { 1.0f,  1.0f,  0.0f},
+    };
+
+    /* Most drivers want 2^23 for fixed point depth buffers, including r300g, r600g,
+     * Nvidia. Use this as a fallback if the detection fails. */
+    unsigned int fallback = 23;
+
+    if (wined3d_settings.offscreen_rendering_mode != ORM_FBO)
+    {
+        FIXME("No FBOs, assuming polyoffset scale of 2^%u.\n", fallback);
+        return (float)(1 << fallback);
+    }
+
+    gl_info->gl_ops.gl.p_glGenTextures(1, &color);
+    gl_info->gl_ops.gl.p_glBindTexture(GL_TEXTURE_2D, color);
+    gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
+    gl_info->gl_ops.gl.p_glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 256, 1, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 0);
+
+    gl_info->fbo_ops.glGenRenderbuffers(1, &depth);
+    gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, depth);
+    gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, format, 256, 1);
+
+    gl_info->fbo_ops.glGenFramebuffers(1, &fbo);
+    gl_info->fbo_ops.glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+    gl_info->fbo_ops.glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color, 0);
+    gl_info->fbo_ops.glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depth);
+    checkGLcall("Setup framebuffer");
+
+    gl_info->gl_ops.gl.p_glClearColor(0.0f, 0.0f, 0.5f, 0.0f);
+    gl_info->gl_ops.gl.p_glClearDepth(0.5f);
+    gl_info->gl_ops.gl.p_glEnable(GL_DEPTH_TEST);
+    gl_info->gl_ops.gl.p_glEnable(GL_POLYGON_OFFSET_FILL);
+    gl_info->gl_ops.gl.p_glViewport(0, 0, 256, 1);
+    checkGLcall("Misc parameters");
+
+    for (;;)
+    {
+        if (high - low <= 1)
+        {
+            ERR("PolygonOffset scale factor detection failed, using fallback value 2^%u.\n", fallback);
+            cur = fallback;
+            break;
+        }
+        cur = (low + high) / 2;
+
+        gl_info->gl_ops.gl.p_glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+        /* The post viewport transform Z of the geometry runs from 0.0 to 0.5. We want to push it another
+         * 0.25 so that the Z buffer content (0.5) cuts the quad off at half the screen. */
+        gl_info->gl_ops.gl.p_glPolygonOffset(0.0f, (float)(1 << cur) * 0.25f);
+        draw_test_quad(ctx, geometry, &blue);
+        checkGLcall("Test draw");
+
+        /* Rebinding texture to workaround a fglrx bug. */
+        gl_info->gl_ops.gl.p_glBindTexture(GL_TEXTURE_2D, color);
+        gl_info->gl_ops.gl.p_glGetTexImage(GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, readback);
+        checkGLcall("readback");
+
+        TRACE("low %02u, high %02u, cur %2u, 0=0x%08x, 125=0x%08x, 131=0x%08x, 255=0x%08x\n",
+                low, high, cur, readback[0], readback[125], readback[131], readback[255]);
+
+        if ((readback[125] & 0xff) < 0xa0)
+            high = cur;
+        else if ((readback[131] & 0xff) > 0xa0)
+            low = cur;
+        else
+        {
+            TRACE("Found scale factor 2^%u for format %x\n", cur, format);
+            break;
+        }
+    }
+
+    gl_info->gl_ops.gl.p_glDeleteTextures(1, &color);
+    gl_info->fbo_ops.glDeleteRenderbuffers(1, &depth);
+    gl_info->fbo_ops.glDeleteFramebuffers(1, &fbo);
+    gl_info->fbo_ops.glBindFramebuffer(GL_FRAMEBUFFER, 0);
+    checkGLcall("Delete framebuffer");
+
+    gl_info->gl_ops.gl.p_glDisable(GL_DEPTH_TEST);
+    gl_info->gl_ops.gl.p_glDisable(GL_POLYGON_OFFSET_FILL);
+    return (float)(1 << cur);
+}
+
 const struct wined3d_format *wined3d_get_format(const struct wined3d_gl_info *gl_info,
         enum wined3d_format_id format_id)
 {
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index a1331d3..c22fe33 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -1723,6 +1723,7 @@ struct wined3d_gl_info
     DWORD quirks;
     BOOL supported[WINED3D_GL_EXT_COUNT];
     GLint wrap_lookup[WINED3D_TADDRESS_MIRROR_ONCE - WINED3D_TADDRESS_WRAP + 1];
+    float fixed_polyoffset_scale, float_polyoffset_scale;
 
     HGLRC (WINAPI *p_wglCreateContextAttribsARB)(HDC dc, HGLRC share, const GLint *attribs);
     struct opengl_funcs gl_ops;
@@ -1812,6 +1813,7 @@ struct wined3d_caps_gl_ctx
     GLuint test_program_id;
 };
 
+float wined3d_adapter_find_polyoffset_scale(struct wined3d_caps_gl_ctx *ctx, GLenum format) DECLSPEC_HIDDEN;
 BOOL wined3d_adapter_init_format_info(struct wined3d_adapter *adapter,
         struct wined3d_caps_gl_ctx *ctx) DECLSPEC_HIDDEN;
 UINT64 adapter_adjust_memory(struct wined3d_adapter *adapter, INT64 amount) DECLSPEC_HIDDEN;

-- 
2.4.6