nir: take cross-thread operations into account into a few places

These optimizations happened to work with derivatives, but they won't with upcoming shader_ballot and group_vote instructions. v2: fixup for new convergent & uniform-control semantics
author: Connor Abbott <cwabbott0@gmail.com> 2017-06-01 18:43:56 -0700
committer: Connor Abbott <cwabbott0@gmail.com> 2017-08-08 12:00:50 -0700
commit: c06af927f1ac3522e2d44b1cbeb6e2a4843a8c9e (patch)
tree: b684ae2103f8623c9e4cab30eca3c883b0730d36
parent: e4e0085c11a307c430173fbfbf42e4188bc19304 (diff)
2 files changed, 35 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
index 9cb9ed43e8..b40c203e02 100644
--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -178,6 +178,14 @@ hash_instr(const void *data)
    const nir_instr *instr = data;
    uint32_t hash = _mesa_fnv32_1a_offset_bias;
 
+   /*
+    * In nir_instrs_equal(), we compare the instruction's basic blocks in this
+    * case. See the comment there for the explanation.
+    */
+   if (nir_instr_is_convergent(instr) && !nir_instr_is_uniform_control(instr)) {
+      HASH(hash, instr->block);
+   }
+
    switch (instr->type) {
    case nir_instr_type_alu:
       hash = hash_alu(hash, nir_instr_as_alu(instr));
@@ -256,6 +264,22 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
    if (instr1->type != instr2->type)
       return false;
 
+   /*
+    * If the instructions are cross-thread, then they must have the same
+    * execution mask, and if they are convergent, then the one being replaced
+    * must have a smaller execution mask. If they are uniform-control, then we
+    * can always replace one invocation with another since every invocation
+    * must already have the same execution mask (the largest possible one).
+    * But not so for non-uniform-control instructions, since different
+    * invocations may be called with different execution masks and therefore
+    * have different results. Conservatively enforce that the instructions are
+    * in the same basic block.
+    */
+   if (nir_instr_is_convergent(instr1) && !nir_instr_is_uniform_control(instr1)) {
+      if (instr1->block != instr2->block)
+         return false;
+   }
+
    switch (instr1->type) {
    case nir_instr_type_alu: {
       nir_alu_instr *alu1 = nir_instr_as_alu(instr1);
diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c
index 4ca4f80d78..ce41781e45 100644
--- a/src/compiler/nir/nir_opt_peephole_select.c
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -61,6 +61,17 @@ static bool
 block_check_for_allowed_instrs(nir_block *block, unsigned *count, bool alu_ok)
 {
    nir_foreach_instr(instr, block) {
+      if (nir_instr_is_cross_thread(instr) && !nir_instr_is_convergent(instr)) {
+         /* If the instruction is cross-thread, then we can't execute it
+          * conditionally when we would've executed it unconditionally before,
+          * except when the condition is uniform. If the instruction is
+          * convergent, though, we're already guaranteed that the entire
+          * region is convergent (including the condition) so we can go ahead.
+          *
+          * TODO: allow when the if-condition is uniform
+          */
+         return false;
+      }
       switch (instr->type) {
       case nir_instr_type_intrinsic: {
          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
author	Connor Abbott <cwabbott0@gmail.com>	2017-06-01 18:43:56 -0700
committer	Connor Abbott <cwabbott0@gmail.com>	2017-08-08 12:00:50 -0700
commit	c06af927f1ac3522e2d44b1cbeb6e2a4843a8c9e (patch)
tree	b684ae2103f8623c9e4cab30eca3c883b0730d36
parent	e4e0085c11a307c430173fbfbf42e4188bc19304 (diff)