nir: Narrow unnecessary 64-bit operations to 32-bitsballot

If we know the high bits are zero, we can just do a 32-bit comparison on the low bytes instead. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
author: Matt Turner <mattst88@gmail.com> 2017-06-30 16:00:51 -0700
committer: Matt Turner <mattst88@gmail.com> 2017-07-21 01:29:24 -0700
commit: 278193d3899be9e63c4361c9cc84701dc6a384ed (patch)
tree: ec36b2c9b21b85e0e4757d8b58579e5c403dc492
parent: 0bfa8f68cf94d81235de71d13453a3631b185de1 (diff)
2 files changed, 77 insertions, 1 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index df5854270c4..a9c3e80929a 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -44,7 +44,7 @@ d = 'd'
 # however, be used for backend-requested lowering operations as those need to
 # happen regardless of precision.
 #
-# Variable names are specified as "[#]name[@type][(cond)]" where "#" inicates
+# Variable names are specified as "[#]name[@type][(cond)]" where "#" indicates
 # that the given variable will only match constants and the type indicates that
 # the given variable will only match values from ALU instructions with the
 # given output type, and (cond) specifies an additional condition function
@@ -144,6 +144,16 @@ optimizations = [
    (('inot', ('ieq', a, b)), ('ine', a, b)),
    (('inot', ('ine', a, b)), ('ieq', a, b)),
 
+   # Unnecessary 64-bit comparisons
+   (('ieq', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ieq', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
+   (('ine', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ine', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
+   (('ilt', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ilt', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
+   (('ige', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ige', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
+   (('ult', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ult', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
+   (('uge', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('uge', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
+
+   (('iand', 'a@64(fits_in_32_bits)', 'b@64'), ('pack_64_2x32_split', ('iand', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)), 0)),
+
    # 0.0 >= b2f(a)
    # b2f(a) <= 0.0
    # b2f(a) == 0.0 because b2f(a) can only be 0 or 1
@@ -315,6 +325,8 @@ optimizations = [
    (('pack_64_2x32_split', ('unpack_64_2x32_split_x', a),
                            ('unpack_64_2x32_split_y', a)), a),
 
+   (('unpack_64_2x32_split_y', 'a(fits_in_32_bits)'), 0),
+
    # Byte extraction
    (('ushr', a, 24), ('extract_u8', a, 3), '!options->lower_extract_byte'),
    (('iand', 0xff, ('ushr', a, 16)), ('extract_u8', a, 2), '!options->lower_extract_byte'),
diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h
index 200f2471f84..c98ecb895f1 100644
--- a/src/compiler/nir/nir_search_helpers.h
+++ b/src/compiler/nir/nir_search_helpers.h
@@ -115,6 +115,70 @@ is_zero_to_one(nir_alu_instr *instr, unsigned src, unsigned num_components,
 }
 
 static inline bool
+fits_in_32_bits(nir_alu_instr *instr, unsigned src, unsigned num_components,
+                const uint8_t *swizzle)
+{
+   if (instr->src[src].src.is_ssa &&
+       instr->src[src].src.ssa->parent_instr->type == nir_instr_type_alu) {
+      nir_alu_instr *parent_instr =
+         nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
+
+      switch (parent_instr->op) {
+      case nir_op_pack_64_2x32_split: {
+         nir_const_value *lo_bits =
+            nir_src_as_const_value(parent_instr->src[0].src);
+         nir_const_value *hi_bits =
+            nir_src_as_const_value(parent_instr->src[1].src);
+
+         for (unsigned i = 0; i < num_components; i++) {
+            switch (nir_op_infos[instr->op].input_types[src]) {
+            case nir_type_int:
+               if (!hi_bits || !lo_bits ||
+                   hi_bits->i32[i] != (lo_bits->i32[i] >> 31))
+                  return false;
+               break;
+            case nir_type_uint:
+               if (!hi_bits || hi_bits->u32[i] != 0)
+                  return false;
+               break;
+            default:
+               return false;
+            }
+         }
+
+         return true;
+      }
+      default:
+         break;
+      }
+
+      return false;
+   }
+
+   nir_const_value *val = nir_src_as_const_value(instr->src[src].src);
+
+   if (!val)
+      return false;
+
+   for (unsigned i = 0; i < num_components; i++) {
+      switch (nir_op_infos[instr->op].input_types[src]) {
+      case nir_type_int:
+         if (val->i64[swizzle[i]] != (int32_t)val->i64[swizzle[i]])
+            return false;
+         break;
+      case nir_type_uint:
+         if (val->u64[swizzle[i]] != (uint32_t)val->u64[swizzle[i]])
+            return false;
+         break;
+      default:
+         return false;
+      }
+   }
+
+   return true;
+}
+
+static inline bool
 is_not_const(nir_alu_instr *instr, unsigned src, unsigned num_components,
              const uint8_t *swizzle)
 {
author	Matt Turner <mattst88@gmail.com>	2017-06-30 16:00:51 -0700
committer	Matt Turner <mattst88@gmail.com>	2017-07-21 01:29:24 -0700
commit	278193d3899be9e63c4361c9cc84701dc6a384ed (patch)
tree	ec36b2c9b21b85e0e4757d8b58579e5c403dc492
parent	0bfa8f68cf94d81235de71d13453a3631b185de1 (diff)