From: Matteo Bruni Subject: [PATCH 6/6] wined3d: Don't replicate shader limits values for each shader. Message-Id: <1412870460-5450-6-git-send-email-mbruni@codeweavers.com> Date: Thu, 9 Oct 2014 18:01:00 +0200 --- dlls/wined3d/arb_program_shader.c | 4 +- dlls/wined3d/glsl_shader.c | 40 +++---- dlls/wined3d/shader.c | 237 +++++++++++--------------------------- dlls/wined3d/wined3d_private.h | 4 +- 4 files changed, 92 insertions(+), 193 deletions(-) diff --git a/dlls/wined3d/arb_program_shader.c b/dlls/wined3d/arb_program_shader.c index 6fbe296..bfbe09c 100644 --- a/dlls/wined3d/arb_program_shader.c +++ b/dlls/wined3d/arb_program_shader.c @@ -823,7 +823,7 @@ static void shader_generate_arb_declarations(const struct wined3d_shader *shader max_constantsF -= count_bits(reg_maps->integer_constants); max_constantsF -= gl_info->reserved_arb_constants; - for (i = 0; i < shader->limits.constant_float; ++i) + for (i = 0; i < shader->limits->constant_float; ++i) { DWORD idx = i >> 5; DWORD shift = i & 0x1f; @@ -899,7 +899,7 @@ static void shader_generate_arb_declarations(const struct wined3d_shader *shader } /* Avoid declaring more constants than needed */ - max_constantsF = min(max_constantsF, shader->limits.constant_float); + max_constantsF = min(max_constantsF, shader->limits->constant_float); /* we use the array-based constants array if the local constants are marked for loading, * because then we use indirect addressing, or when the local constant list is empty, diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 34cc567..7379ba2 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -971,7 +971,7 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont } /* Declare the constants (aka uniforms) */ - if (shader->limits.constant_float > 0) + if (shader->limits->constant_float > 0) { unsigned max_constantsF; @@ -1025,7 +1025,7 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont /* Set by driver quirks in directx.c */ max_constantsF -= gl_info->reserved_glsl_constants; - if (max_constantsF < shader->limits.constant_float) + if (max_constantsF < shader->limits->constant_float) { static unsigned int once; @@ -1041,18 +1041,18 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont max_constantsF = gl_info->limits.glsl_vs_float_constants; } } - max_constantsF = min(shader->limits.constant_float, max_constantsF); + max_constantsF = min(shader->limits->constant_float, max_constantsF); shader_addline(buffer, "uniform vec4 %s_c[%u];\n", prefix, max_constantsF); } /* Always declare the full set of constants, the compiler can remove the * unused ones because d3d doesn't (yet) support indirect int and bool * constant addressing. This avoids problems if the app uses e.g. i0 and i9. */ - if (shader->limits.constant_int > 0 && reg_maps->integer_constants) - shader_addline(buffer, "uniform ivec4 %s_i[%u];\n", prefix, shader->limits.constant_int); + if (shader->limits->constant_int > 0 && reg_maps->integer_constants) + shader_addline(buffer, "uniform ivec4 %s_i[%u];\n", prefix, shader->limits->constant_int); - if (shader->limits.constant_bool > 0 && reg_maps->boolean_constants) - shader_addline(buffer, "uniform bool %s_b[%u];\n", prefix, shader->limits.constant_bool); + if (shader->limits->constant_bool > 0 && reg_maps->boolean_constants) + shader_addline(buffer, "uniform bool %s_b[%u];\n", prefix, shader->limits->constant_bool); for (i = 0; i < WINED3D_MAX_CBS; ++i) { @@ -1062,7 +1062,7 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont } /* Declare texture samplers */ - for (i = 0; i < shader->limits.sampler; ++i) + for (i = 0; i < shader->limits->sampler; ++i) { if (reg_maps->sampler_type[i]) { @@ -1128,7 +1128,7 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont * samplerNP2Fixup stores texture dimensions and is updated through * shader_glsl_load_np2fixup_constants when the sampler changes. */ - for (i = 0; i < shader->limits.sampler; ++i) + for (i = 0; i < shader->limits->sampler; ++i) { if (reg_maps->sampler_type[i]) { @@ -1170,17 +1170,17 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont } shader_addline(buffer, "uniform vec4 posFixup;\n"); - shader_addline(buffer, "void order_ps_input(in vec4[%u]);\n", shader->limits.packed_output); + shader_addline(buffer, "void order_ps_input(in vec4[%u]);\n", shader->limits->packed_output); } else if (version->type == WINED3D_SHADER_TYPE_GEOMETRY) { - shader_addline(buffer, "varying in vec4 gs_in[][%u];\n", shader->limits.packed_input); + shader_addline(buffer, "varying in vec4 gs_in[][%u];\n", shader->limits->packed_input); } else if (version->type == WINED3D_SHADER_TYPE_PIXEL) { if (version->major >= 3) { - UINT in_count = min(vec4_varyings(version->major, gl_info), shader->limits.packed_input); + UINT in_count = min(vec4_varyings(version->major, gl_info), shader->limits->packed_input); if (use_vs(state)) shader_addline(buffer, "varying vec4 %s_in[%u];\n", prefix, in_count); @@ -1221,7 +1221,7 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont } if (reg_maps->vpos || reg_maps->usesdsy) { - if (shader->limits.constant_float + extra_constants_needed + if (shader->limits->constant_float + extra_constants_needed + 1 < gl_info->limits.glsl_ps_float_constants) { shader_addline(buffer, "uniform vec4 ycorrection;\n"); @@ -1250,8 +1250,8 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont } /* Declare output register temporaries */ - if (shader->limits.packed_output) - shader_addline(buffer, "vec4 %s_out[%u];\n", prefix, shader->limits.packed_output); + if (shader->limits->packed_output) + shader_addline(buffer, "vec4 %s_out[%u];\n", prefix, shader->limits->packed_output); /* Declare temporary variables */ for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i) @@ -4306,7 +4306,7 @@ static GLhandleARB generate_param_reorder_function(struct wined3d_shader_buffer if (ps_major < 3) { - shader_addline(buffer, "void order_ps_input(in vec4 vs_out[%u])\n{\n", vs->limits.packed_output); + shader_addline(buffer, "void order_ps_input(in vec4 vs_out[%u])\n{\n", vs->limits->packed_output); for (i = 0; map; map >>= 1, ++i) { @@ -4359,10 +4359,10 @@ static GLhandleARB generate_param_reorder_function(struct wined3d_shader_buffer } else { - UINT in_count = min(vec4_varyings(ps_major, gl_info), ps->limits.packed_input); + UINT in_count = min(vec4_varyings(ps_major, gl_info), ps->limits->packed_input); /* This one is tricky: a 3.0 pixel shader reads from a 3.0 vertex shader */ shader_addline(buffer, "varying vec4 ps_in[%u];\n", in_count); - shader_addline(buffer, "void order_ps_input(in vec4 vs_out[%u])\n{\n", vs->limits.packed_output); + shader_addline(buffer, "void order_ps_input(in vec4 vs_out[%u])\n{\n", vs->limits->packed_output); /* First, sort out position and point size. Those are not passed to the pixel shader */ for (i = 0; map; map >>= 1, ++i) @@ -5958,9 +5958,9 @@ static void set_glsl_shader_program(const struct wined3d_context *context, const shader_glsl_validate_link(gl_info, programId); shader_glsl_init_vs_uniform_locations(gl_info, programId, &entry->vs, - vshader ? vshader->limits.constant_float : 0); + vshader ? min(vshader->limits->constant_float, gl_info->limits.glsl_vs_float_constants) : 0); shader_glsl_init_ps_uniform_locations(gl_info, programId, &entry->ps, - pshader ? pshader->limits.constant_float : 0); + pshader ? min(pshader->limits->constant_float, gl_info->limits.glsl_ps_float_constants) : 0); checkGLcall("Find glsl program uniform locations"); if (pshader && pshader->reg_maps.shader_version.major >= 3 diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c index 4501dc6..442bc66 100644 --- a/dlls/wined3d/shader.c +++ b/dlls/wined3d/shader.c @@ -370,167 +370,77 @@ static void shader_delete_constant_list(struct list *clist) list_init(clist); } -static void vertexshader_set_limits(struct wined3d_shader *shader) +static const struct wined3d_shader_limits vs_limits[] = { - DWORD shader_version = WINED3D_SHADER_VERSION(shader->reg_maps.shader_version.major, - shader->reg_maps.shader_version.minor); - struct wined3d_device *device = shader->device; - const DWORD vs_uniform_count = device->adapter->d3d_info.limits.vs_uniform_count; - - shader->limits.packed_input = 0; - - switch (shader_version) - { - case WINED3D_SHADER_VERSION(1, 0): - case WINED3D_SHADER_VERSION(1, 1): - shader->limits.constant_bool = 0; - shader->limits.constant_int = 0; - shader->limits.packed_output = 12; - shader->limits.sampler = 0; - /* TODO: vs_1_1 has a minimum of 96 constants. What happens when - * a vs_1_1 shader is used on a vs_3_0 capable card that has 256 - * constants? */ - shader->limits.constant_float = min(256, vs_uniform_count); - break; - - case WINED3D_SHADER_VERSION(2, 0): - case WINED3D_SHADER_VERSION(2, 1): - shader->limits.constant_bool = 16; - shader->limits.constant_int = 16; - shader->limits.packed_output = 12; - shader->limits.sampler = 0; - shader->limits.constant_float = min(256, vs_uniform_count); - break; - - case WINED3D_SHADER_VERSION(3, 0): - shader->limits.constant_bool = 16; - shader->limits.constant_int = 16; - shader->limits.packed_output = 12; - shader->limits.sampler = 4; - /* DX10 cards on Windows advertise a d3d9 constant limit of 256 - * even though they are capable of supporting much more (GL - * drivers advertise 1024). d3d9.dll and d3d8.dll clamp the - * wined3d-advertised maximum. Clamp the constant limit for <= 3.0 - * shaders to 256. */ - shader->limits.constant_float = min(256, vs_uniform_count); - break; - - case WINED3D_SHADER_VERSION(4, 0): - shader->limits.sampler = 16; /* FIXME: 128 resources, 16 sampler states */ - shader->limits.constant_int = 0; - shader->limits.constant_float = 0; - shader->limits.constant_bool = 0; - shader->limits.packed_output = 16; - shader->limits.packed_input = 0; - break; - - default: - shader->limits.constant_bool = 16; - shader->limits.constant_int = 16; - shader->limits.packed_output = 12; - shader->limits.sampler = 0; - shader->limits.constant_float = min(256, vs_uniform_count); - FIXME("Unrecognized vertex shader version \"%u.%u\".\n", - shader->reg_maps.shader_version.major, - shader->reg_maps.shader_version.minor); - } -} + /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */ + {WINED3D_SHADER_VERSION(1, 0), WINED3D_SHADER_VERSION(1, 1), 0, 0, 256, 0, 12, 0}, + {WINED3D_SHADER_VERSION(2, 0), WINED3D_SHADER_VERSION(2, 1), 0, 16, 256, 16, 12, 0}, + /* DX10 cards on Windows advertise a D3D9 constant limit of 256 + * even though they are capable of supporting much more (GL + * drivers advertise 1024). d3d9.dll and d3d8.dll clamp the + * wined3d-advertised maximum. Clamp the constant limit for <= 3.0 + * shaders to 256. */ + {WINED3D_SHADER_VERSION(3, 0), WINED3D_SHADER_VERSION(3, 0), 4, 16, 256, 16, 12, 0}, + {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(4, 0), 16, 0, 0, 0, 16, 0}, + {0} +}; -static void geometryshader_set_limits(struct wined3d_shader *shader) +static const struct wined3d_shader_limits gs_limits[] = { - DWORD shader_version = WINED3D_SHADER_VERSION(shader->reg_maps.shader_version.major, - shader->reg_maps.shader_version.minor); - - switch (shader_version) - { - case WINED3D_SHADER_VERSION(4, 0): - shader->limits.sampler = 16; /* FIXME: 128 resources, 16 sampler states */ - shader->limits.constant_int = 0; - shader->limits.constant_float = 0; - shader->limits.constant_bool = 0; - shader->limits.packed_output = 32; - shader->limits.packed_input = 16; - break; + /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */ + {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(4, 0), 16, 0, 0, 0, 32, 16}, + {0} +}; - default: - memset(&shader->limits, 0, sizeof(shader->limits)); - FIXME("Unhandled geometry shader version \"%u.%u\".\n", - shader->reg_maps.shader_version.major, - shader->reg_maps.shader_version.minor); - } -} +static const struct wined3d_shader_limits ps_limits[] = +{ + /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */ + {WINED3D_SHADER_VERSION(1, 0), WINED3D_SHADER_VERSION(1, 3), 4, 0, 8, 0, 0, 0}, + {WINED3D_SHADER_VERSION(1, 4), WINED3D_SHADER_VERSION(1, 4), 6, 0, 8, 0, 0, 0}, + {WINED3D_SHADER_VERSION(2, 0), WINED3D_SHADER_VERSION(2, 1), 16, 16, 32, 16, 0, 0}, + {WINED3D_SHADER_VERSION(3, 0), WINED3D_SHADER_VERSION(3, 0), 16, 16, 224, 16, 0, 12}, + {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(4, 0), 16, 0, 0, 0, 0, 32}, + {0} +}; -static void pixelshader_set_limits(struct wined3d_shader *shader) +static void shader_set_limits(struct wined3d_shader *shader) { DWORD shader_version = WINED3D_SHADER_VERSION(shader->reg_maps.shader_version.major, shader->reg_maps.shader_version.minor); - const DWORD ps_uniform_count = shader->device->adapter->d3d_info.limits.ps_uniform_count; - - shader->limits.packed_output = 0; + const struct wined3d_shader_limits *limits_array; + int i = 0; - switch (shader_version) + switch (shader->reg_maps.shader_version.type) { - case WINED3D_SHADER_VERSION(1, 0): - case WINED3D_SHADER_VERSION(1, 1): - case WINED3D_SHADER_VERSION(1, 2): - case WINED3D_SHADER_VERSION(1, 3): - shader->limits.constant_float = 8; - shader->limits.constant_int = 0; - shader->limits.constant_bool = 0; - shader->limits.sampler = 4; - shader->limits.packed_input = 0; - break; - - case WINED3D_SHADER_VERSION(1, 4): - shader->limits.constant_float = 8; - shader->limits.constant_int = 0; - shader->limits.constant_bool = 0; - shader->limits.sampler = 6; - shader->limits.packed_input = 0; - break; - - /* FIXME: Temporaries must match D3DPSHADERCAPS2_0.NumTemps. */ - case WINED3D_SHADER_VERSION(2, 0): - shader->limits.constant_float = 32; - shader->limits.constant_int = 16; - shader->limits.constant_bool = 16; - shader->limits.sampler = 16; - shader->limits.packed_input = 0; + default: + FIXME("Unexpected shader type %u found.\n", shader->reg_maps.shader_version.type); + /* Fall-through. */ + case WINED3D_SHADER_TYPE_VERTEX: + limits_array = vs_limits; break; - - case WINED3D_SHADER_VERSION(2, 1): - shader->limits.constant_float = 32; - shader->limits.constant_int = 16; - shader->limits.constant_bool = 16; - shader->limits.sampler = 16; - shader->limits.packed_input = 0; + case WINED3D_SHADER_TYPE_GEOMETRY: + limits_array = gs_limits; break; - - case WINED3D_SHADER_VERSION(3, 0): - shader->limits.constant_float = min(224, ps_uniform_count); - shader->limits.constant_int = 16; - shader->limits.constant_bool = 16; - shader->limits.sampler = 16; - shader->limits.packed_input = 12; + case WINED3D_SHADER_TYPE_PIXEL: + limits_array = ps_limits; break; + } - case WINED3D_SHADER_VERSION(4, 0): - shader->limits.sampler = 16; /* FIXME: 128 resources, 16 sampler states */ - shader->limits.constant_int = 0; - shader->limits.constant_float = 0; - shader->limits.constant_bool = 0; - shader->limits.packed_input = 32; + while (limits_array[i].min_version && limits_array[i].min_version <= shader_version) + { + if (shader_version <= limits_array[i].max_version) + { + shader->limits = &limits_array[i]; break; - - default: - shader->limits.constant_float = 32; - shader->limits.constant_int = 16; - shader->limits.constant_bool = 16; - shader->limits.sampler = 16; - shader->limits.packed_input = 0; - FIXME("Unrecognized pixel shader version %u.%u\n", - shader->reg_maps.shader_version.major, - shader->reg_maps.shader_version.minor); + } + ++i; + } + if (!shader->limits) + { + FIXME("Unexpected shader version \"%u.%u\".\n", + shader->reg_maps.shader_version.major, + shader->reg_maps.shader_version.minor); + shader->limits = &limits_array[max(0, i - 1)]; } } @@ -607,7 +517,7 @@ static BOOL shader_record_register_usage(struct wined3d_shader *shader, struct w } else { - if (reg->idx[0].offset >= min(shader->limits.constant_float, constf_size)) + if (reg->idx[0].offset >= min(shader->limits->constant_float, constf_size)) { WARN("Shader using float constant %u which is not supported.\n", reg->idx[0].offset); return FALSE; @@ -620,7 +530,7 @@ static BOOL shader_record_register_usage(struct wined3d_shader *shader, struct w break; case WINED3DSPR_CONSTINT: - if (reg->idx[0].offset >= shader->limits.constant_int) + if (reg->idx[0].offset >= shader->limits->constant_int) { WARN("Shader using integer constant %u which is not supported.\n", reg->idx[0].offset); return FALSE; @@ -632,7 +542,7 @@ static BOOL shader_record_register_usage(struct wined3d_shader *shader, struct w break; case WINED3DSPR_CONSTBOOL: - if (reg->idx[0].offset >= shader->limits.constant_bool) + if (reg->idx[0].offset >= shader->limits->constant_bool) { WARN("Shader using bool constant %u which is not supported.\n", reg->idx[0].offset); return FALSE; @@ -691,23 +601,10 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st fe->shader_read_header(fe_data, &ptr, &shader_version); reg_maps->shader_version = shader_version; - switch (reg_maps->shader_version.type) - { - case WINED3D_SHADER_TYPE_VERTEX: - vertexshader_set_limits(shader); - break; - case WINED3D_SHADER_TYPE_GEOMETRY: - geometryshader_set_limits(shader); - break; - case WINED3D_SHADER_TYPE_PIXEL: - pixelshader_set_limits(shader); - break; - default: - FIXME("Unexpected shader type %u found.\n", reg_maps->shader_version.type); - } + shader_set_limits(shader); reg_maps->constf = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, - sizeof(*reg_maps->constf) * ((min(shader->limits.constant_float, constf_size) + 31) / 32)); + sizeof(*reg_maps->constf) * ((min(shader->limits->constant_float, constf_size) + 31) / 32)); if (!reg_maps->constf) { ERR("Failed to allocate constant map memory.\n"); @@ -2002,11 +1899,11 @@ HRESULT CDECL wined3d_shader_set_local_constants_float(struct wined3d_shader *sh TRACE("shader %p, start_idx %u, src_data %p, count %u.\n", shader, start_idx, src_data, count); - if (end_idx > shader->limits.constant_float) + if (end_idx > shader->limits->constant_float) { WARN("end_idx %u > float constants limit %u.\n", - end_idx, shader->limits.constant_float); - end_idx = shader->limits.constant_float; + end_idx, shader->limits->constant_float); + end_idx = shader->limits->constant_float; } for (i = start_idx; i < end_idx; ++i) @@ -2169,7 +2066,7 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3 if (shader->reg_maps.shader_version.major == 1 && shader->reg_maps.shader_version.minor <= 3) { - for (i = 0; i < shader->limits.sampler; ++i) + for (i = 0; i < shader->limits->sampler; ++i) { DWORD flags = state->texture_states[i][WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS]; @@ -2223,7 +2120,7 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3 if (shader->reg_maps.shader_version.major == 1 && shader->reg_maps.shader_version.minor <= 4) { - for (i = 0; i < shader->limits.sampler; ++i) + for (i = 0; i < shader->limits->sampler; ++i) { const struct wined3d_texture *texture = state->textures[i]; @@ -2393,7 +2290,7 @@ void pixelshader_update_samplers(struct wined3d_shader *shader, WORD tex_types) if (reg_maps->shader_version.major != 1) return; - for (i = 0; i < shader->limits.sampler; ++i) + for (i = 0; i < shader->limits->sampler; ++i) { /* We don't sample from this sampler. */ if (!sampler_type[i]) continue; diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 810da0f..4d13d48 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -2807,6 +2807,8 @@ struct wined3d_shader_lconst struct wined3d_shader_limits { + unsigned int min_version; + unsigned int max_version; unsigned int sampler; unsigned int constant_int; unsigned int constant_float; @@ -2856,7 +2858,7 @@ struct wined3d_pixel_shader struct wined3d_shader { LONG ref; - struct wined3d_shader_limits limits; + const struct wined3d_shader_limits *limits; DWORD *function; UINT functionLength; BOOL load_local_constsF; -- 2.0.4