nir: add new convergent, uniform-control, and cross-thread attributes

These are properties of the instruction that must be respected when moving it around, in addition to the usual SSA dominance guarantee. Previously, we only had special handling for fddx and fddy, in a very ad-hoc way. But with arb_shader_ballot and arb_shader_group_vote, we'll have to start handling a lot more instructions with similar constraints, so we want to add a more formal model of what optimizations can and cannot do. v2: don't add attribute for ALU instructions v3: special-case derivative ALU instructions v4: rename convergent to uniform-control, and add LLVM-style convergent attribute
author: Connor Abbott <cwabbott0@gmail.com> 2017-06-01 18:21:21 -0700
committer: Connor Abbott <cwabbott0@gmail.com> 2017-08-08 12:00:50 -0700
commit: d256d30654be6aa35e3cd9559959864fe8ba29df (patch)
tree: 7fee1db01878df3fc1a465a02406d0db0d56af2a
parent: c12c2e40a36f707f733c0d6ad90160472b7a3cf6 (diff)
1 files changed, 126 insertions, 0 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 9313b7ac90..24934f05ed 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -986,6 +986,39 @@ typedef enum {
     * intrinsic are due to the register reads/writes.
     */
    NIR_INTRINSIC_CAN_REORDER = (1 << 1),
+
+   /**
+    * Indicates whether this intrinsic is "convergent". An operation is
+    * convergent if results from one thread depend on results from another
+    * thread, but in such a way that additional threads being enabled doesn't
+    * affect the result of the operation. Examples of convergent operations
+    * include screen-space derivatives, readInvocation() from
+    * ARB_shader_ballot, etc. Note that this is a more precise version of
+    * LLVM's "convergent" attribute, which simply stipulates that control
+    * dependencies cannot be added, since the set of active threads can only be
+    * reduced by adding control dependencies.
+    */
+   NIR_INTRINSIC_CONVERGENT = (1 << 2),
+
+   /**
+    * Indicates whether this intrinsic is "cross-thread". An operation is
+    * cross-thread if results in one thread depend on the set of active threads
+    * when it is executed, as well as possibly the input value of the other
+    * threads, and therefore optimizations cannot change the execution mask
+    * when the operation is called. Examples of cross-thread operations include
+    * the "any" reduction which returns "true" in all threads if any thread
+    * inputs "true", ballotARB() from ARB_shader_ballot, etc. Note that any
+    * cross-thread operation must be convergent.
+    */
+   NIR_INTRINSIC_CROSS_THREAD = (1 << 3),
+
+   /**
+    * Indicates that this intrinsic is guaranteed to always be called in
+    * uniform control flow, that is, control flow with the same execution mask
+    * as when the program started. If an operation is uniform-control, it must
+    * be convergent as well, since the optimizer must maintain the guarantee.
+    */
+   NIR_INTRINSIC_UNIFORM_CONTROL = (1 << 4),
 } nir_intrinsic_semantic_flag;
 
 /**
@@ -1460,6 +1493,99 @@ NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
                 type, nir_instr_type_parallel_copy)
 
 /*
+ * Helpers to determine if an instruction is cross-thread, convergent, or
+ * uniform-control. See NIR_INTRINSIC_{CONVERGENT|CROSS_THREAD|UNIFORM_CONTROL}
+ * for the definitions.
+ */
+static inline bool
+nir_instr_is_uniform_control(const nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      switch (nir_instr_as_alu(instr)->op) {
+      case nir_op_fddx:
+      case nir_op_fddy:
+      case nir_op_fddx_fine:
+      case nir_op_fddy_fine:
+      case nir_op_fddx_coarse:
+      case nir_op_fddy_coarse:
+         /* Section 8.13.1 (Derivative Functions) of the GLSL 4.50 spec says:
+          *
+          *    "Derivatives are undefined within non-uniform control flow."
+          *
+          * Thus, we can assume they are called in uniform control flow. 
+          */
+         return true;
+
+      default:
+         return false;
+      }
+
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      return nir_intrinsic_infos[intrin->intrinsic].flags &
+         NIR_INTRINSIC_UNIFORM_CONTROL;
+   }
+
+   case nir_instr_type_tex:
+         switch (nir_instr_as_tex(instr)->op) {
+         case nir_texop_tex:
+         case nir_texop_txb:
+         case nir_texop_lod:
+            /* These three take implicit derivatives, so they are
+             * uniform-control as well.
+             */
+            return true;
+
+         default:
+            return false;
+         }
+
+   default:
+      return false;
+   }
+}
+
+static inline bool
+nir_instr_is_cross_thread(const nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      return nir_intrinsic_infos[intrin->intrinsic].flags &
+         NIR_INTRINSIC_CROSS_THREAD;
+   }
+
+   default:
+      return false;
+   }
+}
+
+static inline bool
+nir_instr_is_convergent(const nir_instr *instr)
+{
+   /* Instructions marked as uniform-control must be convergent, since
+    * optimizations must keep the operation in uniform control flow.
+    */
+   if (nir_instr_is_uniform_control(instr))
+      return true;
+
+   /* Instructions marked as cross-thread must be convergent, since
+    * cross-thread is more conservative than convergent.
+    */
+   if (nir_instr_is_cross_thread(instr))
+      return true;
+
+   if (instr->type == nir_instr_type_intrinsic) {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      return nir_intrinsic_infos[intrin->intrinsic].flags &
+         NIR_INTRINSIC_CONVERGENT;
+   }
+
+   return false;
+}
+
+/*
  * Control flow
  *
  * Control flow consists of a tree of control flow nodes, which include
author	Connor Abbott <cwabbott0@gmail.com>	2017-06-01 18:21:21 -0700
committer	Connor Abbott <cwabbott0@gmail.com>	2017-08-08 12:00:50 -0700
commit	d256d30654be6aa35e3cd9559959864fe8ba29df (patch)
tree	7fee1db01878df3fc1a465a02406d0db0d56af2a
parent	c12c2e40a36f707f733c0d6ad90160472b7a3cf6 (diff)