From: Matteo Bruni Subject: Re: [PATCH 5/5] d3dcompiler: Allocate temporary registers for variables. Message-Id: Date: Wed, 1 Apr 2020 20:37:02 +0200 In-Reply-To: <20200330025320.564142-5-z.figura12@gmail.com> References: <20200330025320.564142-1-z.figura12@gmail.com> <20200330025320.564142-5-z.figura12@gmail.com> On Mon, Mar 30, 2020 at 4:54 AM Zebediah Figura wrote: Hi Zeb, I have a number of questions / comments, inline. > diff --git a/dlls/d3dcompiler_43/hlsl.y b/dlls/d3dcompiler_43/hlsl.y > index d6c64edcace..f828e05e335 100644 > --- a/dlls/d3dcompiler_43/hlsl.y > +++ b/dlls/d3dcompiler_43/hlsl.y > @@ -2732,6 +2732,177 @@ static void compute_liveness(struct hlsl_ir_function_decl *entry_func) > compute_liveness_recurse(entry_func->body, 0, 0); > } > > +struct liveness_ctx > +{ > + size_t count; > + struct > + { > + /* 0 if not live yet. */ > + unsigned int last_read; > + } *regs; > +}; > + > +static unsigned char get_dead_writemask(struct liveness_ctx *liveness, > + unsigned int first_write, unsigned int index, unsigned int components) Maybe get_available_writemask()? Same for the other similarly named functions. > +{ > + unsigned char i, writemask = 0, count = 0; > + > + for (i = 0; i < 4; ++i) > + { > + if (liveness->regs[index + i].last_read <= first_write) > + { > + writemask |= 1 << i; > + if (++count == components) > + return writemask; > + } > + } > + > + return 0; > +} > + > +static struct hlsl_reg allocate_temp_register(struct liveness_ctx *liveness, > + unsigned int first_write, unsigned int last_read, unsigned char components) How is this going to be different to the non-temp register allocation? > +{ > + struct hlsl_reg ret = {.allocated = TRUE}; > + unsigned char writemask, i; > + unsigned int regnum; > + > + for (regnum = 0; regnum < liveness->count; regnum += 4) > + { > + if ((writemask = get_dead_writemask(liveness, first_write, regnum, components))) > + break; > + } > + if (regnum == liveness->count) > + { > + liveness->count = max(liveness->count * 2, 32); > + liveness->regs = d3dcompiler_realloc(liveness->regs, liveness->count * sizeof(*liveness->regs)); Do we want to use array_reserve() here? > + writemask = (1 << components) - 1; > + } > + for (i = 0; i < 4; ++i) > + { > + if (writemask & (1 << i)) > + liveness->regs[regnum + i].last_read = last_read; > + } > + ret.reg = regnum / 4; > + ret.writemask = writemask; > + return ret; > +} > + > +static BOOL is_range_dead(struct liveness_ctx *liveness, unsigned int first_write, > + unsigned int index, unsigned int elements) > +{ > + unsigned int i; > + > + for (i = 0; i < elements; i += 4) > + { > + if (!get_dead_writemask(liveness, first_write, index + i, 4)) > + return FALSE; > + } > + return TRUE; > +} > + > +/* "elements" is the total number of consecutive whole registers needed. */ > +static struct hlsl_reg allocate_temp_range(struct liveness_ctx *liveness, > + unsigned int first_write, unsigned int last_read, unsigned int elements) > +{ > + struct hlsl_reg ret = {.allocated = TRUE}; > + unsigned int i, regnum; > + > + elements *= 4; > + > + for (regnum = 0; regnum < liveness->count; regnum += 4) > + { > + if (is_range_dead(liveness, first_write, regnum, min(elements, liveness->count - regnum))) > + break; > + } > + if (regnum + elements >= liveness->count) > + { > + liveness->count = max(liveness->count * 2, regnum + elements); > + liveness->regs = d3dcompiler_realloc(liveness->regs, liveness->count * sizeof(*liveness->regs)); > + } > + for (i = 0; i < elements; ++i) > + liveness->regs[regnum + i].last_read = last_read; > + ret.reg = regnum / 4; > + return ret; It feels like the for should execute elements * 4 iterations, instead. > +} > + > +static void allocate_variable_temp_register(struct hlsl_ir_var *var, struct liveness_ctx *liveness) > +{ > + if (!var->reg.allocated && var->last_read) > + { > + if (var->data_type->reg_size > 1) > + { > + var->reg = allocate_temp_range(liveness, var->first_write, > + var->last_read, var->data_type->reg_size); > + TRACE("Allocated r%u-r%u to %s (liveness %u-%u).\n", var->reg.reg, > + var->reg.reg + var->data_type->reg_size - 1, var->name, var->first_write, var->last_read); > + } > + else > + { > + var->reg = allocate_temp_register(liveness, var->first_write, > + var->last_read, var->data_type->dimx); > + TRACE("Allocated r%u%s to %s (liveness %u-%u).\n", var->reg.reg, > + debug_writemask(var->reg.writemask), var->name, var->first_write, var->last_read); > + } > + } > +} > + > +static void allocate_temp_registers_recurse(struct list *instrs, struct liveness_ctx *liveness) > +{ > + struct hlsl_ir_node *instr; > + > + LIST_FOR_EACH_ENTRY(instr, instrs, struct hlsl_ir_node, entry) > + { > + switch (instr->type) > + { > + case HLSL_IR_ASSIGNMENT: > + { > + struct hlsl_ir_assignment *assignment = assignment_from_node(instr); > + allocate_variable_temp_register(hlsl_var_from_deref(&assignment->lhs), liveness); > + break; > + } > + case HLSL_IR_IF: > + { > + struct hlsl_ir_if *iff = if_from_node(instr); > + allocate_temp_registers_recurse(iff->then_instrs, liveness); > + if (iff->else_instrs) > + allocate_temp_registers_recurse(iff->else_instrs, liveness); > + break; > + } > + case HLSL_IR_LOOP: > + { > + struct hlsl_ir_loop *loop = loop_from_node(instr); > + allocate_temp_registers_recurse(loop->body, liveness); > + break; > + } > + default: > + break; > + } > + } > +} Do we need to allocate temporary registers for other instructions too, like expressions? > + > +/* Simple greedy temporary register allocation pass that just assigns a unique > + * index to all (simultaneously live) variables or intermediate values. Agnostic > + * as to how many registers are actually available for the current backend, and > + * does not handle constants. */ > +static void allocate_temp_registers(struct hlsl_ir_function_decl *entry_func) > +{ > + struct liveness_ctx liveness = {}; I'm pretty sure that this is a gcc extension: you want '{0}' instead.