From: Giovanni Mascellani Subject: [PATCH vkd3d v3 4/7] vkd3d-shader/hlsl: Lower matrix casts. Message-Id: <20220602140609.3419134-4-gmascellani@codeweavers.com> Date: Thu, 2 Jun 2022 16:06:06 +0200 In-Reply-To: <20220602140609.3419134-1-gmascellani@codeweavers.com> References: <20220602140609.3419134-1-gmascellani@codeweavers.com> Signed-off-by: Giovanni Mascellani --- I had to add a forward declaration for add_load(). I don't know if in cases like that it is preferred to add a forward declaration (which adds cruft) or move the definition backwards (which break the vague logical order functions have). --- libs/vkd3d-shader/hlsl.y | 93 ++++++++++++++++++- tests/hlsl-duplicate-modifiers.shader_test | 2 +- tests/hlsl-initializer-matrix.shader_test | 2 +- ...lsl-return-implicit-conversion.shader_test | 10 +- tests/hlsl-shape.shader_test | 10 +- tests/matrix-semantics.shader_test | 20 ++-- 6 files changed, 111 insertions(+), 26 deletions(-) diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 23c48cdc..f85d4252 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -266,6 +266,9 @@ static bool implicit_compatible_data_types(struct hlsl_type *t1, struct hlsl_typ return false; } +static struct hlsl_ir_load *add_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_node, + struct hlsl_ir_node *offset, struct hlsl_type *data_type, const struct vkd3d_shader_location loc); + static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) { @@ -275,10 +278,92 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, if (hlsl_types_are_equal(src_type, dst_type)) return node; - if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) - return NULL; - list_add_tail(instrs, &cast->node.entry); - return &cast->node; + if ((src_type->type == HLSL_CLASS_MATRIX || dst_type->type == HLSL_CLASS_MATRIX) + && src_type->type <= HLSL_CLASS_LAST_NUMERIC && dst_type->type <= HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *name; + static unsigned int counter = 0; + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + unsigned int dst_idx; + bool broadcast; + + broadcast = src_type->dimx == 1 && src_type->dimy == 1; + assert(dst_type->dimx * dst_type->dimy <= src_type->dimx * src_type->dimy || broadcast); + if (src_type->type == HLSL_CLASS_MATRIX && dst_type->type == HLSL_CLASS_MATRIX && !broadcast) + { + assert(dst_type->dimx <= src_type->dimx); + assert(dst_type->dimy <= src_type->dimy); + } + + name = vkd3d_string_buffer_get(&ctx->string_buffers); + vkd3d_string_buffer_printf(name, "", counter++); + var = hlsl_new_synthetic_var(ctx, name->buffer, dst_type, *loc); + vkd3d_string_buffer_release(&ctx->string_buffers, name); + if (!var) + return NULL; + + for (dst_idx = 0; dst_idx < dst_type->dimx * dst_type->dimy; ++dst_idx) + { + struct hlsl_type *src_scalar_type, *dst_scalar_type; + unsigned int src_idx, src_offset, dst_offset; + struct hlsl_ir_store *store; + struct hlsl_ir_constant *c; + + if (broadcast) + { + src_idx = 0; + } + else + { + if (src_type->type == HLSL_CLASS_MATRIX && dst_type->type == HLSL_CLASS_MATRIX) + { + unsigned int x = dst_idx % dst_type->dimx, y = dst_idx / dst_type->dimx; + + src_idx = y * src_type->dimx + x; + } + else + { + src_idx = dst_idx; + } + } + + dst_offset = hlsl_compute_component_offset(ctx, dst_type, dst_idx, &dst_scalar_type); + src_offset = hlsl_compute_component_offset(ctx, src_type, src_idx, &src_scalar_type); + + if (!(c = hlsl_new_uint_constant(ctx, src_offset, loc))) + return NULL; + list_add_tail(instrs, &c->node.entry); + + if (!(load = add_load(ctx, instrs, node, &c->node, src_scalar_type, *loc))) + return NULL; + + if (!(cast = hlsl_new_cast(ctx, &load->node, dst_scalar_type, loc))) + return NULL; + list_add_tail(instrs, &cast->node.entry); + + if (!(c = hlsl_new_uint_constant(ctx, dst_offset, loc))) + return NULL; + list_add_tail(instrs, &c->node.entry); + + if (!(store = hlsl_new_store(ctx, var, &c->node, &cast->node, 0, *loc))) + return NULL; + list_add_tail(instrs, &store->node.entry); + } + + if (!(load = hlsl_new_load(ctx, var, NULL, dst_type, *loc))) + return NULL; + list_add_tail(instrs, &load->node.entry); + + return &load->node; + } + else + { + if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) + return NULL; + list_add_tail(instrs, &cast->node.entry); + return &cast->node; + } } static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs, diff --git a/tests/hlsl-duplicate-modifiers.shader_test b/tests/hlsl-duplicate-modifiers.shader_test index fcae12da..6491701a 100644 --- a/tests/hlsl-duplicate-modifiers.shader_test +++ b/tests/hlsl-duplicate-modifiers.shader_test @@ -7,5 +7,5 @@ float4 main() : sv_target } [test] -todo draw quad +draw quad probe all rgba (0.1, 0.2, 0.3, 0.4) diff --git a/tests/hlsl-initializer-matrix.shader_test b/tests/hlsl-initializer-matrix.shader_test index ea9de9c0..7e12b0a0 100644 --- a/tests/hlsl-initializer-matrix.shader_test +++ b/tests/hlsl-initializer-matrix.shader_test @@ -55,7 +55,7 @@ float4 main() : SV_TARGET } [test] -todo draw quad +draw quad probe all rgba (21, 22, 31, 32) diff --git a/tests/hlsl-return-implicit-conversion.shader_test b/tests/hlsl-return-implicit-conversion.shader_test index bf99d9cb..4fe8e7eb 100644 --- a/tests/hlsl-return-implicit-conversion.shader_test +++ b/tests/hlsl-return-implicit-conversion.shader_test @@ -5,7 +5,7 @@ float4 main() : sv_target } [test] -todo draw quad +draw quad probe all rgba (0.4, 0.3, 0.2, 0.1) [pixel shader] @@ -15,7 +15,7 @@ float4 main() : sv_target } [test] -todo draw quad +draw quad probe all rgba (0.4, 0.3, 0.2, 0.1) [pixel shader] @@ -25,7 +25,7 @@ float4 main() : sv_target } [test] -todo draw quad +draw quad probe all rgba (0.4, 0.3, 0.2, 0.1) [pixel shader] @@ -35,8 +35,8 @@ float4x1 main() : sv_target } [test] -todo draw quad -probe all rgba (0.4, 0.3, 0.2, 0.1) +draw quad +todo probe all rgba (0.4, 0.3, 0.2, 0.1) [pixel shader] float3 func() diff --git a/tests/hlsl-shape.shader_test b/tests/hlsl-shape.shader_test index 57d59534..65cc322c 100644 --- a/tests/hlsl-shape.shader_test +++ b/tests/hlsl-shape.shader_test @@ -211,7 +211,7 @@ float4 main() : sv_target } [test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 6.0, 8.0) [pixel shader] @@ -235,7 +235,7 @@ float4 main() : sv_target } [test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 6.0, 8.0) [pixel shader] @@ -260,7 +260,7 @@ float4 main() : sv_target } [test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 6.0, 8.0) [pixel shader] @@ -309,7 +309,7 @@ float4 main() : sv_target } [test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 0.0, 0.0) [pixel shader] @@ -321,7 +321,7 @@ float4 main() : sv_target } [test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 0.0, 0.0) [pixel shader] diff --git a/tests/matrix-semantics.shader_test b/tests/matrix-semantics.shader_test index 6a089683..d297b0d9 100644 --- a/tests/matrix-semantics.shader_test +++ b/tests/matrix-semantics.shader_test @@ -5,8 +5,8 @@ float4x1 main() : sv_target } [test] -todo draw quad -probe all rgba (1.0, 2.0, 3.0, 4.0) +draw quad +todo probe all rgba (1.0, 2.0, 3.0, 4.0) [pixel shader] row_major float1x4 main() : sv_target @@ -15,7 +15,7 @@ row_major float1x4 main() : sv_target } [test] -todo draw quad +draw quad probe all rgba (1.0, 2.0, 3.0, 4.0) [require] @@ -28,8 +28,8 @@ row_major float4x1 main() : sv_target } [test] -todo draw quad -probe all r (1.0) +draw quad +todo probe all r (1.0) [pixel shader] float1x4 main() : sv_target @@ -38,8 +38,8 @@ float1x4 main() : sv_target } [test] -todo draw quad -probe all r (1.0) +draw quad +todo probe all r (1.0) [pixel shader] void main(out row_major float1x4 x : sv_target0, out float1x4 y : sv_target1) @@ -49,7 +49,7 @@ void main(out row_major float1x4 x : sv_target0, out float1x4 y : sv_target1) } [test] -todo draw quad +draw quad probe all rgba (1.0, 2.0, 3.0, 4.0) [pixel shader fail todo] @@ -67,5 +67,5 @@ void main(out float1x4 x : sv_target0, out float1x4 y : sv_target4) } [test] -todo draw quad -probe all r (1.0) +draw quad +todo probe all r (1.0) -- 2.36.1