summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Ekstrand <jason@jlekstrand.net>2016-09-14 15:09:33 -0700
committerFrancisco Jerez <currojerez@riseup.net>2016-09-21 13:45:45 +0300
commit8a468d186e6fc27c26dd12ba989192e7596f667a (patch)
treef08df7ccc12e4ae2ecb31eea24b3946528216563
parenta2392cee48076f1fe6feab7d49214990cfa6a551 (diff)
i965/fs: Take Dispatch/Vector mask into account in FIND_LIVE_CHANNEL
On at least Sky Lake, ce0 does not contain the full story as far as enabled channels goes. It is possible to have completely disabled channels where the corresponding bits in ce0 are 1. In order to get the correct execution mask, you have to mask off those channels which were disabled from the beginning by taking the AND of ce0 with either sr0.2 or sr0.3 depending on the shader stage. Failure to do so can result in FIND_LIVE_CHANNEL returning a completely dead channel. Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Cc: Francisco Jerez <currojerez@riseup.net> [ Francisco Jerez: Fix a couple of typos, add mask register type assertion, clarify reason why ce0 can have bits set for disabled channels, clarify that this may only be a problem when thread dispatch doesn't pack channels tightly in the SIMD thread. Apply same treatment to Align16 path. ] Reviewed-by: Francisco Jerez <currojerez@riseup.net>
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c39
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp7
-rw-r--r--src/mesa/drivers/dri/i965/brw_reg.h12
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp2
5 files changed, 50 insertions, 13 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 3e52764370..737a335ab5 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -488,7 +488,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
void
brw_find_live_channel(struct brw_codegen *p,
- struct brw_reg dst);
+ struct brw_reg dst,
+ struct brw_reg mask);
void
brw_broadcast(struct brw_codegen *p,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 3b12030ec0..c98867aa6c 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -3361,7 +3361,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
}
void
-brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst)
+brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
+ struct brw_reg mask)
{
const struct gen_device_info *devinfo = p->devinfo;
const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
@@ -3369,6 +3370,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst)
brw_inst *inst;
assert(devinfo->gen >= 7);
+ assert(mask.type == BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
@@ -3377,18 +3379,32 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst)
if (devinfo->gen >= 8) {
/* Getting the first active channel index is easy on Gen8: Just find
- * the first bit set in the mask register. The same register exists
- * on HSW already but it reads back as all ones when the current
+ * the first bit set in the execution mask. The register exists on
+ * HSW already but it reads back as all ones when the current
* instruction has execution masking disabled, so it's kind of
* useless.
*/
- inst = brw_FBL(p, vec1(dst),
- retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD));
+ struct brw_reg exec_mask =
+ retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD);
+
+ if (mask.file != BRW_IMMEDIATE_VALUE || mask.ud != 0xffffffff) {
+ /* Unfortunately, ce0 does not take into account the thread
+ * dispatch mask, which may be a problem in cases where it's not
+ * tightly packed (i.e. it doesn't have the form '2^n - 1' for
+ * some n). Combine ce0 with the given dispatch (or vector) mask
+ * to mask off those channels which were never dispatched by the
+ * hardware.
+ */
+ brw_SHR(p, vec1(dst), mask, brw_imm_ud(qtr_control * 8));
+ brw_AND(p, vec1(dst), exec_mask, vec1(dst));
+ exec_mask = vec1(dst);
+ }
/* Quarter control has the effect of magically shifting the value of
- * this register so you'll get the first active channel relative to
- * the specified quarter control as result.
+ * ce0 so you'll get the first active channel relative to the
+ * specified quarter control as result.
*/
+ inst = brw_FBL(p, vec1(dst), exec_mask);
} else {
const struct brw_reg flag = brw_flag_reg(1, 0);
@@ -3422,9 +3438,14 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst)
} else {
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- if (devinfo->gen >= 8) {
+ if (devinfo->gen >= 8 &&
+ mask.file == BRW_IMMEDIATE_VALUE && mask.ud == 0xffffffff) {
/* In SIMD4x2 mode the first active channel index is just the
- * negation of the first bit of the mask register.
+ * negation of the first bit of the mask register. Note that ce0
+ * doesn't take into account the dispatch mask, so the Gen7 path
+ * should be used instead unless you have the guarantee that the
+ * dispatch mask is tightly packed (i.e. it has the form '2^n - 1'
+ * for some n).
*/
inst = brw_AND(p, brw_writemask(dst, WRITEMASK_X),
negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)),
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 110f3f8206..c510f42836 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -2043,9 +2043,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
generate_set_simd4x2_offset(inst, dst, src[0]);
break;
- case SHADER_OPCODE_FIND_LIVE_CHANNEL:
- brw_find_live_channel(p, dst);
+ case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
+ const struct brw_reg mask =
+ stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() : brw_dmask_reg();
+ brw_find_live_channel(p, dst, mask);
break;
+ }
case SHADER_OPCODE_BROADCAST:
assert(inst->force_writemask_all);
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index b71c63b62d..3b46d27fcd 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -826,6 +826,18 @@ brw_mask_reg(unsigned subnr)
}
static inline struct brw_reg
+brw_vmask_reg()
+{
+ return brw_sr0_reg(3);
+}
+
+static inline struct brw_reg
+brw_dmask_reg()
+{
+ return brw_sr0_reg(2);
+}
+
+static inline struct brw_reg
brw_message_reg(unsigned nr)
{
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 256abae55e..f9e6d1c156 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1863,7 +1863,7 @@ generate_code(struct brw_codegen *p,
break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
- brw_find_live_channel(p, dst);
+ brw_find_live_channel(p, dst, brw_dmask_reg());
break;
case SHADER_OPCODE_BROADCAST: