From: Zebediah Figura Subject: Re: [PATCH 2/5] d3dcompiler: Fold addition and multiplication of uint constants. Message-Id: <29438019-3d6b-d5b8-bf39-006c91826ad1@codeweavers.com> Date: Thu, 2 Jul 2020 11:47:13 -0500 In-Reply-To: References: <20200629235539.392976-1-zfigura@codeweavers.com> <20200629235539.392976-2-zfigura@codeweavers.com> On 7/2/20 11:15 AM, Matteo Bruni wrote: > On Tue, Jun 30, 2020 at 1:56 AM Zebediah Figura wrote: >> >> Signed-off-by: Zebediah Figura >> --- >> dlls/d3dcompiler_43/hlsl.y | 101 +++++++++++++++++++++++++++++++++++++ >> 1 file changed, 101 insertions(+) >> >> diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y >> index 475fd414fd7..15bd0d007b9 100644 >> --- a/dlls/d3dcompiler_43/hlsl.y >> +++ b/dlls/d3dcompiler_43/hlsl.y >> @@ -2838,6 +2838,105 @@ static void dump_function(struct wine_rb_entry *entry, void *context) >> wine_rb_for_each_entry(&func->overloads, dump_function_decl, NULL); >> } >> >> +static BOOL transform_ir(BOOL (*func)(struct hlsl_ir_node *), struct list *instrs) > > Since this is going to be a pretty generic helper: we might want to > pass some "void *context" to func. Yes, probably. In that case I guess it'd be also useful for the RA passes I have now. > >> +{ >> + struct hlsl_ir_node *instr, *next; >> + BOOL progress = 0; >> + >> + LIST_FOR_EACH_ENTRY_SAFE(instr, next, instrs, struct hlsl_ir_node, entry) >> + { >> + if (instr->type == HLSL_IR_IF) >> + { >> + struct hlsl_ir_if *iff = if_from_node(instr); >> + progress |= transform_ir(func, iff->then_instrs); >> + if (iff->else_instrs) >> + progress |= transform_ir(func, iff->else_instrs); >> + } > > I like to have a blank line between declarations and other statements. Oops, missed that one :-/ > >> + else if (instr->type == HLSL_IR_LOOP) >> + progress |= transform_ir(func, loop_from_node(instr)->body); > > Unrelated to the patch: maybe we should always initialize else_instrs? > I think that would get rid of a bunch of checks with no drawback. Besides another heap allocation (though maybe we could just store those inline instead? I don't think there's any reason why not), but yes, probably. > >> + >> + progress |= func(instr); >> + } >> + >> + return progress; >> +} >> + >> +static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) >> +{ >> + struct hlsl_src *src, *next; >> + >> + LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry) >> + { >> + hlsl_src_remove(src); >> + hlsl_src_from_node(src, new); >> + } >> + list_add_before(&old->entry, &new->entry); >> + list_remove(&old->entry); >> + free_instr(old); >> +} >> + >> +static BOOL fold_constants(struct hlsl_ir_node *instr) >> +{ >> + struct hlsl_ir_constant *arg1, *arg2 = NULL, *res; >> + struct hlsl_ir_expr *expr; >> + unsigned int i; >> + >> + if (instr->type != HLSL_IR_EXPR) >> + return FALSE; >> + expr = expr_from_node(instr); >> + >> + for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) >> + { >> + if (expr->operands[i].node && expr->operands[i].node->type != HLSL_IR_CONSTANT) >> + return FALSE; >> + } >> + arg1 = constant_from_node(expr->operands[0].node); >> + if (expr->operands[1].node) >> + arg2 = constant_from_node(expr->operands[1].node); >> + >> + if (!(res = d3dcompiler_alloc(sizeof(*res)))) >> + { >> + hlsl_ctx.status = PARSE_ERR; >> + return FALSE; >> + } >> + init_node(&res->node, HLSL_IR_CONSTANT, instr->data_type, instr->loc); >> + >> + switch (instr->data_type->base_type) >> + { >> + case HLSL_TYPE_UINT: >> + { >> + unsigned int i; >> + >> + switch (expr->op) >> + { >> + case HLSL_IR_BINOP_ADD: >> + for (i = 0; i < 16; ++i) >> + res->value.u[i] = arg1->value.u[i] + arg2->value.u[i]; >> + break; >> + >> + case HLSL_IR_BINOP_MUL: >> + for (i = 0; i < 16; ++i) >> + res->value.u[i] = arg1->value.u[i] * arg2->value.u[i]; >> + break; > > I'd prefer if this only operated on the valid components: maybe it's > not going to be any faster but I feel it would be less confusing and > it also happens to be "partial constants ready" (whether we go ahead > with that or not). Sure. > >> + >> + default: >> + FIXME("Fold uint expr %#x.\n", expr->op); >> + d3dcompiler_free(res); >> + return FALSE; >> + } >> + break; >> + } >> + >> + default: >> + FIXME("Fold %s expr %#x.\n", debug_base_type(instr->data_type), expr->op); >> + d3dcompiler_free(res); >> + return FALSE; >> + } >> + >> + replace_node(&expr->node, &res->node); >> + return TRUE; >> +} >> + >> /* Allocate a unique, ordered index to each instruction, which will be used for >> * computing liveness ranges. */ >> static unsigned int index_instructions(struct list *instrs, unsigned int index) >> @@ -3009,6 +3108,8 @@ HRESULT parse_hlsl(enum shader_type type, DWORD major, DWORD minor, >> >> list_move_head(entry_func->body, &hlsl_ctx.static_initializers); >> >> + while (transform_ir(fold_constants, entry_func->body)); >> + >> /* Index 0 means unused; index 1 means function entry, so start at 2. */ >> index_instructions(entry_func->body, 2); > > As we discussed elsewhere, we probably want to move transform_ir() and > its calls out of hlsl.y. > Yep, will do... -----BEGIN PGP SIGNATURE----- iQEzBAEBCAAdFiEENTo75Twe9rbPART3DZ01igeheEAFAl7+D5EACgkQDZ01igeh eEB5WggAiIJFoTX/9VlPOV1c8v2jqnvmh4ziLyu+I0jwvY57GH3HxtqEHgeA1rZO 01OvGezaOpWSCAyj5JBAIul+xuq6eozyG52iVb8S38mnDyYRx3bK0rSVMZYmZRhH r/34GCphjy75flqBzW3rDtwHdBjqigFzSdnHAWvVPZqcBPnjdpWzBjH1a7apozDQ vt2/tvGZtdvys/6a8Wamdp8sBk+ohCNYy6DDjqx3p0zkWWkPd2MNVomBDxSG6FFT 5A6sEz9ZHU1OkIWkGDNtDrMMkwoBkUKzPfFcgatOroeatICM/4CDI5uF87tWmw9X VjxxYZN6HzfmI9QK3wJj25Ae22crdw== =0lyU -----END PGP SIGNATURE-----