diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 39 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_reg.h | 12 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 |
5 files changed, 50 insertions, 13 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 3e52764370..737a335ab5 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -488,7 +488,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p, void brw_find_live_channel(struct brw_codegen *p, - struct brw_reg dst); + struct brw_reg dst, + struct brw_reg mask); void brw_broadcast(struct brw_codegen *p, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 3b12030ec0..c98867aa6c 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -3361,7 +3361,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p, } void -brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst) +brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, + struct brw_reg mask) { const struct gen_device_info *devinfo = p->devinfo; const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current); @@ -3369,6 +3370,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst) brw_inst *inst; assert(devinfo->gen >= 7); + assert(mask.type == BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); @@ -3377,18 +3379,32 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst) if (devinfo->gen >= 8) { /* Getting the first active channel index is easy on Gen8: Just find - * the first bit set in the mask register. The same register exists - * on HSW already but it reads back as all ones when the current + * the first bit set in the execution mask. The register exists on + * HSW already but it reads back as all ones when the current * instruction has execution masking disabled, so it's kind of * useless. */ - inst = brw_FBL(p, vec1(dst), - retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)); + struct brw_reg exec_mask = + retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD); + + if (mask.file != BRW_IMMEDIATE_VALUE || mask.ud != 0xffffffff) { + /* Unfortunately, ce0 does not take into account the thread + * dispatch mask, which may be a problem in cases where it's not + * tightly packed (i.e. it doesn't have the form '2^n - 1' for + * some n). Combine ce0 with the given dispatch (or vector) mask + * to mask off those channels which were never dispatched by the + * hardware. + */ + brw_SHR(p, vec1(dst), mask, brw_imm_ud(qtr_control * 8)); + brw_AND(p, vec1(dst), exec_mask, vec1(dst)); + exec_mask = vec1(dst); + } /* Quarter control has the effect of magically shifting the value of - * this register so you'll get the first active channel relative to - * the specified quarter control as result. + * ce0 so you'll get the first active channel relative to the + * specified quarter control as result. */ + inst = brw_FBL(p, vec1(dst), exec_mask); } else { const struct brw_reg flag = brw_flag_reg(1, 0); @@ -3422,9 +3438,14 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst) } else { brw_set_default_mask_control(p, BRW_MASK_DISABLE); - if (devinfo->gen >= 8) { + if (devinfo->gen >= 8 && + mask.file == BRW_IMMEDIATE_VALUE && mask.ud == 0xffffffff) { /* In SIMD4x2 mode the first active channel index is just the - * negation of the first bit of the mask register. + * negation of the first bit of the mask register. Note that ce0 + * doesn't take into account the dispatch mask, so the Gen7 path + * should be used instead unless you have the guarantee that the + * dispatch mask is tightly packed (i.e. it has the form '2^n - 1' + * for some n). */ inst = brw_AND(p, brw_writemask(dst, WRITEMASK_X), negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)), diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 110f3f8206..c510f42836 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -2043,9 +2043,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_set_simd4x2_offset(inst, dst, src[0]); break; - case SHADER_OPCODE_FIND_LIVE_CHANNEL: - brw_find_live_channel(p, dst); + case SHADER_OPCODE_FIND_LIVE_CHANNEL: { + const struct brw_reg mask = + stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() : brw_dmask_reg(); + brw_find_live_channel(p, dst, mask); break; + } case SHADER_OPCODE_BROADCAST: assert(inst->force_writemask_all); diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index b71c63b62d..3b46d27fcd 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -826,6 +826,18 @@ brw_mask_reg(unsigned subnr) } static inline struct brw_reg +brw_vmask_reg() +{ + return brw_sr0_reg(3); +} + +static inline struct brw_reg +brw_dmask_reg() +{ + return brw_sr0_reg(2); +} + +static inline struct brw_reg brw_message_reg(unsigned nr) { return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 256abae55e..f9e6d1c156 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1863,7 +1863,7 @@ generate_code(struct brw_codegen *p, break; case SHADER_OPCODE_FIND_LIVE_CHANNEL: - brw_find_live_channel(p, dst); + brw_find_live_channel(p, dst, brw_dmask_reg()); break; case SHADER_OPCODE_BROADCAST: |