fquantize2f16

squash! WIP: intel/compiler: Import Gen8 / Gen9 ALU machine description
author: Ian Romanick <ian.d.romanick@intel.com> 2019-02-26 17:19:08 -0800
committer: Ian Romanick <ian.d.romanick@intel.com> 2019-03-26 10:29:22 -0700
commit: a9a65c6071905190e1663521cabbe698bd51b6fe (patch)
tree: 8c3e84e67cfb7f3b49c1a8921d8d5ab7ac5c2fd5
parent: bf8b7fef31a3eaea6a6f152ec0c8a5b3d8727589 (diff)
1 files changed, 22 insertions, 0 deletions
diff --git a/src/intel/compiler/gen8_md.py b/src/intel/compiler/gen8_md.py
index 320636443557..8b0161a82286 100644
--- a/src/intel/compiler/gen8_md.py
+++ b/src/intel/compiler/gen8_md.py
@@ -82,6 +82,28 @@ gen8_md = [
     (('f2i16', a), Instruction('MOV', r, a)),
     (('f2u16', a), Instruction('MOV', r, a)),
 
+    # FINISHME: The original hand-coded version of this did (tmp16 there is t0
+    # here):
+    #
+    #    /* The destination stride must be at least as big as the source stride. */
+    #    tmp16.type = BRW_REGISTER_TYPE_W;
+    #    tmp16.stride = 2;
+    #
+    # Without this, there's an extra move to zero the upper 16-bits of t0.  I
+    # also tried putting retype(t0, W) in place of t0, but that prevents
+    # copy-propagation from being able to do its job.
+    (('fquantize2f16', a), InstructionList([(t0, UD), (t1, F), (zero, F)],
+                                           (# Check for denormal
+                                            Instruction('CMP', null(F), abs(a), imm(ldexp(1.0, -14), F)).cmod('L'),
+                                            # Get the appropriately signed zero.
+                                            Instruction('AND', retype(zero, UD), retype(a, UD), imm(0x80000000, UD)),
+                                            # Do the actual F32 -> F16 -> F32 conversion
+                                            Instruction('F32TO16', t0, a),
+                                            Instruction('F16TO32', t1, t0),
+                                            # Select that or zero based on normal status
+                                            Instruction('SEL', r, zero, t1).predicate()))
+    ),
+
     (('i2f64', a), Instruction('MOV', r, a)),
     (('i2i64', a), Instruction('MOV', r, a)),
     (('i2i32', a), Instruction('MOV', r, a)),
author	Ian Romanick <ian.d.romanick@intel.com>	2019-02-26 17:19:08 -0800
committer	Ian Romanick <ian.d.romanick@intel.com>	2019-03-26 10:29:22 -0700
commit	a9a65c6071905190e1663521cabbe698bd51b6fe (patch)
tree	8c3e84e67cfb7f3b49c1a8921d8d5ab7ac5c2fd5
parent	bf8b7fef31a3eaea6a6f152ec0c8a5b3d8727589 (diff)