diff options
author | Matt Turner <mattst88@gmail.com> | 2017-01-12 18:05:58 -0800 |
---|---|---|
committer | Francisco Jerez <currojerez@riseup.net> | 2017-04-14 14:56:08 -0700 |
commit | 630b84cdc80594d912a64f64aa75ac498e6f1248 (patch) | |
tree | a11078b21ef6f4409d344fe69543182dd0b3f369 /src/intel | |
parent | 3198ce3f96848856206e7b2e54a53024bcca7737 (diff) |
i965: Use source region <1,2,0> when converting to DF.
Doing so allows us to use a single MOV in VEC4_OPCODE_TO_DOUBLE instead
of two.
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/compiler/brw_eu_emit.c | 28 | ||||
-rw-r--r-- | src/intel/compiler/brw_vec4_generator.cpp | 13 |
2 files changed, 28 insertions, 13 deletions
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 058742d4f6..8637310a35 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -1089,7 +1089,6 @@ void brw_##OP(struct brw_codegen *p, \ } -ALU1(MOV) ALU2(SEL) ALU1(NOT) ALU2(AND) @@ -1123,6 +1122,33 @@ ALU2(SUBB) ROUND(RNDZ) ROUND(RNDE) +brw_inst * +brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0) +{ + const struct gen_device_info *devinfo = p->devinfo; + + /* When converting F->DF on IVB/BYT, every odd source channel is ignored. + * To avoid the problems that causes, we use a <1,2,0> source region to read + * each element twice. + */ + if (devinfo->gen == 7 && !devinfo->is_haswell && + brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1 && + dest.type == BRW_REGISTER_TYPE_DF && + (src0.type == BRW_REGISTER_TYPE_F || + src0.type == BRW_REGISTER_TYPE_D || + src0.type == BRW_REGISTER_TYPE_UD) && + !has_scalar_region(src0)) { + assert(src0.vstride == BRW_VERTICAL_STRIDE_4 && + src0.width == BRW_WIDTH_4 && + src0.hstride == BRW_HORIZONTAL_STRIDE_1); + + src0.vstride = BRW_VERTICAL_STRIDE_1; + src0.width = BRW_WIDTH_2; + src0.hstride = BRW_HORIZONTAL_STRIDE_0; + } + + return brw_alu1(p, BRW_OPCODE_MOV, dest, src0); +} brw_inst * brw_ADD(struct brw_codegen *p, struct brw_reg dest, diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index 2ac287f17f..d3192ab7db 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -1958,18 +1958,7 @@ generate_code(struct brw_codegen *p, brw_set_default_access_mode(p, BRW_ALIGN_1); - struct brw_reg tmp = retype(dst, src[0].type); - tmp.hstride = BRW_HORIZONTAL_STRIDE_2; - tmp.width = BRW_WIDTH_4; - src[0].vstride = BRW_VERTICAL_STRIDE_4; - src[0].hstride = BRW_HORIZONTAL_STRIDE_1; - src[0].width = BRW_WIDTH_4; - brw_MOV(p, tmp, src[0]); - - tmp.vstride = BRW_VERTICAL_STRIDE_8; - tmp.hstride = BRW_HORIZONTAL_STRIDE_2; - tmp.width = BRW_WIDTH_4; - brw_MOV(p, dst, tmp); + brw_MOV(p, dst, src[0]); brw_set_default_access_mode(p, BRW_ALIGN_16); break; |