diff options
author | Matt Arsenault <arsenm2@gmail.com> | 2017-10-06 14:23:32 -0700 |
---|---|---|
committer | Jan Vesely <jan.vesely@rutgers.edu> | 2017-10-06 18:40:22 -0400 |
commit | 772955db4b29ee515d9553ebbaff49905effc430 (patch) | |
tree | 8f83ad43b2bbf7f3ba8dfe154a23c5577dbd0f17 /tests/cl | |
parent | 68a003409532feebd2fb4640f37f6c426d1e9082 (diff) |
cl: Add tests for mad mix
gfx9 added v_mad_mix_f32, v_mad_mixlo_f16,
and v_mad_mixhi_f16 instructions. Make sure the
conversion to/from f16 is folded into this
instruction and it works.
These aren't great since they need more test
values, and generating half results is kind
of a pain from any other tool. The perfect
values used don't really stress the conversions,
but this as at least enough to make sure the
encoding is correct.
v2: Fix backslash alignments
Reviewed-by: Jan Veselu <jan.vesely@rutgers.edu>
Diffstat (limited to 'tests/cl')
-rw-r--r-- | tests/cl/program/execute/mad-mix.cl | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/tests/cl/program/execute/mad-mix.cl b/tests/cl/program/execute/mad-mix.cl new file mode 100644 index 000000000..a5955361d --- /dev/null +++ b/tests/cl/program/execute/mad-mix.cl @@ -0,0 +1,283 @@ +/*! + +[config] +name: f32 mad with conversion from f16 +clc_version_min: 10 +build_options: -cl-denorms-are-zero +require_device_extensions: cl_khr_fp16 + +dimensions: 1 + +[test] +name: mad mix f32 f16lo f16lo f16lo +kernel_name: mad_mix_f32_f16lo_f16lo_f16lo +global_size: 4 0 0 + +arg_out: 0 buffer float[4] \ + 0.0 1.0 1.0 -1.0 + +arg_in: 1 buffer half[4] \ + 0.0 1.0 0.0 -1.0 + +arg_in: 2 buffer half[4] \ + 0.0 1.0 1.0 1.0 + +arg_in: 3 buffer half[4] \ + 0.0 0.0 1.0 0.0 + + +[test] +name: mad mix f32 fneg(f16lo) f16lo f16lo +kernel_name: mad_mix_f32_negf16lo_f16lo_f16lo +global_size: 4 0 0 + +arg_out: 0 buffer float[4] \ + 0.0 -1.0 1.0 1.0 + +arg_in: 1 buffer half[4] \ + 0.0 1.0 0.0 -1.0 + +arg_in: 2 buffer half[4] \ + 0.0 1.0 1.0 1.0 + +arg_in: 3 buffer half[4] \ + 0.0 0.0 1.0 0.0 + + +[test] +name: mad mix f32 f16lo f16lo f16hi +kernel_name: mad_mix_f32_f16lo_f16lo_f16hi +global_size: 4 0 0 + +arg_out: 0 buffer float[4] \ + 0.0 \ + 1.0 \ + 1.0 \ + -1.0 + +arg_in: 1 buffer half[4] \ + 0.0 \ + 1.0 \ + 0.0 \ + -1.0 + +arg_in: 2 buffer half[4] \ + 0.0 \ + 1.0 \ + 1.0 \ + 1.0 + +arg_in: 3 buffer half2[4] \ + 1000.0 0.0 \ + 1000.0 0.0 \ + 1000.0 1.0 \ + 1000.0 0.0 + + +[test] +name: mad mix f32 f16lo f16lo neg(f16hi) +kernel_name: mad_mix_f32_f16lo_f16lo_negf16hi +global_size: 5 0 0 + +arg_out: 0 buffer float[5] \ + 0.0 \ + 1.0 \ + -1.0 \ + -1.0 \ + 0.0 + +arg_in: 1 buffer half[5] \ + 0.0 \ + 1.0 \ + 0.0 \ + -1.0 \ + 2.0 + +arg_in: 2 buffer half[5] \ + 0.0 \ + 1.0 \ + 1.0 \ + 1.0 \ + 2.0 + +arg_in: 3 buffer half2[5] \ + 1000.0 0.0 \ + 1000.0 0.0 \ + 1000.0 1.0 \ + 1000.0 0.0 \ + 1000.0 4.0 + + +[test] +name: mad mix f16lo fneg(f16lo) f16lo f16lo +kernel_name: mad_mix_f16lo_negf16lo_f16lo_f16lo +global_size: 4 0 0 + +arg_out: 0 buffer half[4] \ + 0.0 -1.0 1.0 1.0 + +arg_in: 1 buffer half[4] \ + 0.0 1.0 0.0 -1.0 + +arg_in: 2 buffer half[4] \ + 0.0 1.0 1.0 1.0 + +arg_in: 3 buffer half[4] \ + 0.0 0.0 1.0 0.0 + + +[test] +name: mad mix f16hi fneg(f16lo) f16lo f16lo +kernel_name: mad_mix_f16hi_negf16lo_f16lo_f16lo +global_size: 4 0 0 + +arg_out: 0 buffer half2[4] \ + 2.0 0.0 \ + 2.0 -1.0 \ + 2.0 1.0 \ + 2.0 1.0 + +arg_in: 1 buffer half[4] \ + 0.0 1.0 0.0 -1.0 + +arg_in: 2 buffer half[4] \ + 0.0 1.0 1.0 1.0 + +arg_in: 3 buffer half[4] \ + 0.0 0.0 1.0 0.0 + + + +[test] +name: mad mix f32 f16lo f16lo f16lo with clamp +kernel_name: mad_mix_f32_f16lo_f16lo_f16lo_clamp +global_size: 5 0 0 + +arg_out: 0 buffer float[5] \ + 0.0 1.0 0.0 0.75 \ + 1.0 + +arg_in: 1 buffer half[5] \ + 0.0 2.0 -2.0 0.5 \ + 0.5 + +arg_in: 2 buffer half[5] \ + 0.0 1.0 1.0 0.5 \ + 1.0 + +arg_in: 3 buffer half[5] \ + 0.0 1.0 1.0 0.5 \ + 0.5 + + +[test] +name: mad mix f16lo f16lo f16lo f16lo with clamp +kernel_name: mad_mix_f16lo_f16lo_f16lo_f16lo_clamp +global_size: 5 0 0 + +arg_out: 0 buffer half[5] \ + 0.0 1.0 0.0 0.75 \ + 1.0 + +arg_in: 1 buffer half[5] \ + 0.0 2.0 -2.0 0.5 \ + 0.5 + +arg_in: 2 buffer half[5] \ + 0.0 1.0 1.0 0.5 \ + 1.0 + +arg_in: 3 buffer half[5] \ + 0.0 1.0 1.0 0.5 \ + 0.5 + + +[test] +name: mad mix f16hi f16lo f16lo f16lo with clamp +kernel_name: mad_mix_f16hi_f16lo_f16lo_f16lo_clamp +global_size: 5 0 0 + +arg_out: 0 buffer half2[5] \ + 2.0 0.0 \ + 2.0 1.0 \ + 2.0 0.0 \ + 2.0 0.75 \ + 2.0 1.0 + +arg_in: 1 buffer half[5] \ + 0.0 2.0 -2.0 0.5 \ + 0.5 + +arg_in: 2 buffer half[5] \ + 0.0 1.0 1.0 0.5 \ + 1.0 + +arg_in: 3 buffer half[5] \ + 0.0 1.0 1.0 0.5 \ + 0.5 + + +!*/ + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +kernel void mad_mix_f32_f16lo_f16lo_f16lo(global float* out, global half* in0, global half* in1, global half* in2) +{ + int id = get_global_id(0); + out[id] = (float)in0[id] * (float)in1[id] + (float)in2[id]; +} + +kernel void mad_mix_f32_negf16lo_f16lo_f16lo(global float* out, global half* in0, global half* in1, global half* in2) +{ + int id = get_global_id(0); + out[id] = (float)-in0[id] * (float)in1[id] + (float)in2[id]; +} + +kernel void mad_mix_f32_f16lo_f16lo_f16hi(global float* out, global half* in0, global half* in1, volatile global half2* in2) +{ + int id = get_global_id(0); + out[id] = (float)in0[id] * (float)in1[id] + (float)in2[id].y; +} + +kernel void mad_mix_f32_f16lo_f16lo_negf16hi(global float* out, global half* in0, global half* in1, volatile global half2* in2) +{ + int id = get_global_id(0); + out[id] = (float)in0[id] * (float)in1[id] + (float)-in2[id].y;; +} + +kernel void mad_mix_f16lo_negf16lo_f16lo_f16lo(global half* out, global half* in0, global half* in1, global half* in2) +{ + int id = get_global_id(0); + float mad = (float)-in0[id] * (float)in1[id] + (float)in2[id]; + out[id] = (half)mad; +} + +kernel void mad_mix_f16hi_negf16lo_f16lo_f16lo(volatile global half2* out, global half* in0, global half* in1, global half* in2) +{ + int id = get_global_id(0); + float mad = (float)-in0[id] * (float)in1[id] + (float)in2[id]; + half2 result = { 2.0h, (half)mad }; + out[id] = result; +} + +kernel void mad_mix_f32_f16lo_f16lo_f16lo_clamp(global float* out, global half* in0, global half* in1, global half* in2) +{ + int id = get_global_id(0); + float mad = (float)in0[id] * (float)in1[id] + (float)in2[id]; + out[id] = clamp(mad, 0.0f, 1.0f); +} + +kernel void mad_mix_f16lo_f16lo_f16lo_f16lo_clamp(global half* out, global half* in0, global half* in1, global half* in2) +{ + int id = get_global_id(0); + float mad = (float)in0[id] * (float)in1[id] + (float)in2[id]; + out[id] = clamp((half)mad, 0.0h, 1.0h); +} + +kernel void mad_mix_f16hi_f16lo_f16lo_f16lo_clamp(volatile global half2* out, global half* in0, global half* in1, global half* in2) +{ + int id = get_global_id(0); + float mad = (float)in0[id] * (float)in1[id] + (float)in2[id]; + half2 result = { 2.0h, clamp((half)mad, 0.0h, 1.0h) }; + out[id] = result; +} |