summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp66
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp36
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp31
4 files changed, 70 insertions, 64 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b63ca23e3d..cb13fcb1cc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3523,6 +3523,71 @@ fs_visitor::lower_load_payload()
return progress;
}
+bool
+fs_visitor::lower_integer_multiplication()
+{
+ bool progress = false;
+
+ /* Gen8's MUL instruction can do a 32-bit x 32-bit -> 32-bit operation
+ * directly, but Cherryview cannot.
+ */
+ if (devinfo->gen >= 8 && !devinfo->is_cherryview)
+ return false;
+
+ foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+ if (inst->opcode != BRW_OPCODE_MUL ||
+ inst->dst.is_accumulator() ||
+ (inst->dst.type != BRW_REGISTER_TYPE_D &&
+ inst->dst.type != BRW_REGISTER_TYPE_UD))
+ continue;
+
+#define insert(instr) inst->insert_before(block, instr)
+
+ /* The MUL instruction isn't commutative. On Gen <= 6, only the low
+ * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
+ * src1 are used.
+ *
+ * If multiplying by an immediate value that fits in 16-bits, do a
+ * single MUL instruction with that value in the proper location.
+ */
+ if (inst->src[1].file == IMM &&
+ inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) {
+ if (devinfo->gen < 7) {
+ fs_reg imm(GRF, alloc.allocate(dispatch_width / 8),
+ inst->dst.type, dispatch_width);
+ insert(MOV(imm, inst->src[1]));
+ insert(MUL(inst->dst, imm, inst->src[0]));
+ } else {
+ insert(MUL(inst->dst, inst->src[0], inst->src[1]));
+ }
+ } else {
+ if (devinfo->gen >= 7)
+ no16("SIMD16 integer multiply unsupported\n");
+
+ const unsigned channels = dispatch_width;
+ const enum brw_reg_type type = inst->dst.type;
+ const fs_reg acc(retype(brw_acc_reg(channels), type));
+ const fs_reg null(retype(brw_null_vec(channels), type));
+
+ const fs_reg &src0 = inst->src[0];
+ const fs_reg &src1 = inst->src[1];
+
+ insert(MUL(acc, src0, src1));
+ insert(MACH(null, src0, src1));
+ insert(MOV(inst->dst, acc));
+ }
+#undef insert
+
+ inst->remove(block);
+ progress = true;
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
void
fs_visitor::dump_instructions()
{
@@ -4001,6 +4066,7 @@ fs_visitor::optimize()
}
OPT(opt_combine_constants);
+ OPT(lower_integer_multiplication);
lower_uniform_pull_constant_loads();
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 991cff9632..f2aa0ae957 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -241,6 +241,7 @@ public:
void no16(const char *msg, ...);
void lower_uniform_pull_constant_loads();
bool lower_load_payload();
+ bool lower_integer_multiplication();
bool opt_combine_constants();
void emit_dummy_fs();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 9cfd0e792a..5dd8363b91 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -780,41 +780,9 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
inst->saturate = instr->dest.saturate;
break;
- case nir_op_imul: {
- if (devinfo->gen >= 8) {
- emit(MUL(result, op[0], op[1]));
- break;
- } else {
- nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src);
- nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
-
- if (value0 && value0->u[0] < (1 << 16)) {
- if (devinfo->gen < 7) {
- emit(MUL(result, op[0], op[1]));
- } else {
- emit(MUL(result, op[1], op[0]));
- }
- break;
- } else if (value1 && value1->u[0] < (1 << 16)) {
- if (devinfo->gen < 7) {
- emit(MUL(result, op[1], op[0]));
- } else {
- emit(MUL(result, op[0], op[1]));
- }
- break;
- }
- }
-
- if (devinfo->gen >= 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
- struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
-
- emit(MUL(acc, op[0], op[1]));
- emit(MACH(reg_null_d, op[0], op[1]));
- emit(MOV(result, fs_reg(acc)));
+ case nir_op_imul:
+ emit(MUL(result, op[0], op[1]));
break;
- }
case nir_op_imul_high:
case nir_op_umul_high: {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index abaea5f4e1..ead7768664 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -873,36 +873,7 @@ fs_visitor::visit(ir_expression *ir)
unreachable("not reached: should be handled by ir_sub_to_add_neg");
case ir_binop_mul:
- if (devinfo->gen < 8 && ir->type->is_integer()) {
- /* For integer multiplication, the MUL uses the low 16 bits
- * of one of the operands (src0 on gen6, src1 on gen7). The
- * MACH accumulates in the contribution of the upper 16 bits
- * of that operand.
- */
- if (ir->operands[0]->is_uint16_constant()) {
- if (devinfo->gen < 7)
- emit(MUL(this->result, op[0], op[1]));
- else
- emit(MUL(this->result, op[1], op[0]));
- } else if (ir->operands[1]->is_uint16_constant()) {
- if (devinfo->gen < 7)
- emit(MUL(this->result, op[1], op[0]));
- else
- emit(MUL(this->result, op[0], op[1]));
- } else {
- if (devinfo->gen >= 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
- struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
- this->result.type);
-
- emit(MUL(acc, op[0], op[1]));
- emit(MACH(reg_null_d, op[0], op[1]));
- emit(MOV(this->result, fs_reg(acc)));
- }
- } else {
- emit(MUL(this->result, op[0], op[1]));
- }
+ emit(MUL(this->result, op[0], op[1]));
break;
case ir_binop_imul_high: {
if (devinfo->gen >= 7)