diff options
author | Ian Romanick <ian.d.romanick@intel.com> | 2019-02-26 17:19:08 -0800 |
---|---|---|
committer | Ian Romanick <ian.d.romanick@intel.com> | 2019-03-26 10:29:22 -0700 |
commit | a9a65c6071905190e1663521cabbe698bd51b6fe (patch) | |
tree | 8c3e84e67cfb7f3b49c1a8921d8d5ab7ac5c2fd5 | |
parent | bf8b7fef31a3eaea6a6f152ec0c8a5b3d8727589 (diff) |
fquantize2f16
squash! WIP: intel/compiler: Import Gen8 / Gen9 ALU machine description
-rw-r--r-- | src/intel/compiler/gen8_md.py | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/src/intel/compiler/gen8_md.py b/src/intel/compiler/gen8_md.py index 320636443557..8b0161a82286 100644 --- a/src/intel/compiler/gen8_md.py +++ b/src/intel/compiler/gen8_md.py @@ -82,6 +82,28 @@ gen8_md = [ (('f2i16', a), Instruction('MOV', r, a)), (('f2u16', a), Instruction('MOV', r, a)), + # FINISHME: The original hand-coded version of this did (tmp16 there is t0 + # here): + # + # /* The destination stride must be at least as big as the source stride. */ + # tmp16.type = BRW_REGISTER_TYPE_W; + # tmp16.stride = 2; + # + # Without this, there's an extra move to zero the upper 16-bits of t0. I + # also tried putting retype(t0, W) in place of t0, but that prevents + # copy-propagation from being able to do its job. + (('fquantize2f16', a), InstructionList([(t0, UD), (t1, F), (zero, F)], + (# Check for denormal + Instruction('CMP', null(F), abs(a), imm(ldexp(1.0, -14), F)).cmod('L'), + # Get the appropriately signed zero. + Instruction('AND', retype(zero, UD), retype(a, UD), imm(0x80000000, UD)), + # Do the actual F32 -> F16 -> F32 conversion + Instruction('F32TO16', t0, a), + Instruction('F16TO32', t1, t0), + # Select that or zero based on normal status + Instruction('SEL', r, zero, t1).predicate())) + ), + (('i2f64', a), Instruction('MOV', r, a)), (('i2i64', a), Instruction('MOV', r, a)), (('i2i32', a), Instruction('MOV', r, a)), |