summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2017-01-12 18:05:58 -0800
committerFrancisco Jerez <currojerez@riseup.net>2017-04-14 14:56:08 -0700
commit630b84cdc80594d912a64f64aa75ac498e6f1248 (patch)
treea11078b21ef6f4409d344fe69543182dd0b3f369
parent3198ce3f96848856206e7b2e54a53024bcca7737 (diff)
i965: Use source region <1,2,0> when converting to DF.
Doing so allows us to use a single MOV in VEC4_OPCODE_TO_DOUBLE instead of two. Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
-rw-r--r--src/intel/compiler/brw_eu_emit.c28
-rw-r--r--src/intel/compiler/brw_vec4_generator.cpp13
2 files changed, 28 insertions, 13 deletions
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 058742d4f6..8637310a35 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -1089,7 +1089,6 @@ void brw_##OP(struct brw_codegen *p, \
}
-ALU1(MOV)
ALU2(SEL)
ALU1(NOT)
ALU2(AND)
@@ -1123,6 +1122,33 @@ ALU2(SUBB)
ROUND(RNDZ)
ROUND(RNDE)
+brw_inst *
+brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0)
+{
+ const struct gen_device_info *devinfo = p->devinfo;
+
+ /* When converting F->DF on IVB/BYT, every odd source channel is ignored.
+ * To avoid the problems that causes, we use a <1,2,0> source region to read
+ * each element twice.
+ */
+ if (devinfo->gen == 7 && !devinfo->is_haswell &&
+ brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1 &&
+ dest.type == BRW_REGISTER_TYPE_DF &&
+ (src0.type == BRW_REGISTER_TYPE_F ||
+ src0.type == BRW_REGISTER_TYPE_D ||
+ src0.type == BRW_REGISTER_TYPE_UD) &&
+ !has_scalar_region(src0)) {
+ assert(src0.vstride == BRW_VERTICAL_STRIDE_4 &&
+ src0.width == BRW_WIDTH_4 &&
+ src0.hstride == BRW_HORIZONTAL_STRIDE_1);
+
+ src0.vstride = BRW_VERTICAL_STRIDE_1;
+ src0.width = BRW_WIDTH_2;
+ src0.hstride = BRW_HORIZONTAL_STRIDE_0;
+ }
+
+ return brw_alu1(p, BRW_OPCODE_MOV, dest, src0);
+}
brw_inst *
brw_ADD(struct brw_codegen *p, struct brw_reg dest,
diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp
index 2ac287f17f..d3192ab7db 100644
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -1958,18 +1958,7 @@ generate_code(struct brw_codegen *p,
brw_set_default_access_mode(p, BRW_ALIGN_1);
- struct brw_reg tmp = retype(dst, src[0].type);
- tmp.hstride = BRW_HORIZONTAL_STRIDE_2;
- tmp.width = BRW_WIDTH_4;
- src[0].vstride = BRW_VERTICAL_STRIDE_4;
- src[0].hstride = BRW_HORIZONTAL_STRIDE_1;
- src[0].width = BRW_WIDTH_4;
- brw_MOV(p, tmp, src[0]);
-
- tmp.vstride = BRW_VERTICAL_STRIDE_8;
- tmp.hstride = BRW_HORIZONTAL_STRIDE_2;
- tmp.width = BRW_WIDTH_4;
- brw_MOV(p, dst, tmp);
+ brw_MOV(p, dst, src[0]);
brw_set_default_access_mode(p, BRW_ALIGN_16);
break;