summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2019-02-26 17:19:08 -0800
committerIan Romanick <ian.d.romanick@intel.com>2019-03-26 10:29:22 -0700
commita9a65c6071905190e1663521cabbe698bd51b6fe (patch)
tree8c3e84e67cfb7f3b49c1a8921d8d5ab7ac5c2fd5
parentbf8b7fef31a3eaea6a6f152ec0c8a5b3d8727589 (diff)
fquantize2f16
squash! WIP: intel/compiler: Import Gen8 / Gen9 ALU machine description
-rw-r--r--src/intel/compiler/gen8_md.py22
1 files changed, 22 insertions, 0 deletions
diff --git a/src/intel/compiler/gen8_md.py b/src/intel/compiler/gen8_md.py
index 320636443557..8b0161a82286 100644
--- a/src/intel/compiler/gen8_md.py
+++ b/src/intel/compiler/gen8_md.py
@@ -82,6 +82,28 @@ gen8_md = [
(('f2i16', a), Instruction('MOV', r, a)),
(('f2u16', a), Instruction('MOV', r, a)),
+ # FINISHME: The original hand-coded version of this did (tmp16 there is t0
+ # here):
+ #
+ # /* The destination stride must be at least as big as the source stride. */
+ # tmp16.type = BRW_REGISTER_TYPE_W;
+ # tmp16.stride = 2;
+ #
+ # Without this, there's an extra move to zero the upper 16-bits of t0. I
+ # also tried putting retype(t0, W) in place of t0, but that prevents
+ # copy-propagation from being able to do its job.
+ (('fquantize2f16', a), InstructionList([(t0, UD), (t1, F), (zero, F)],
+ (# Check for denormal
+ Instruction('CMP', null(F), abs(a), imm(ldexp(1.0, -14), F)).cmod('L'),
+ # Get the appropriately signed zero.
+ Instruction('AND', retype(zero, UD), retype(a, UD), imm(0x80000000, UD)),
+ # Do the actual F32 -> F16 -> F32 conversion
+ Instruction('F32TO16', t0, a),
+ Instruction('F16TO32', t1, t0),
+ # Select that or zero based on normal status
+ Instruction('SEL', r, zero, t1).predicate()))
+ ),
+
(('i2f64', a), Instruction('MOV', r, a)),
(('i2i64', a), Instruction('MOV', r, a)),
(('i2i32', a), Instruction('MOV', r, a)),