diff options
author | rander.wang <rander.wang@intel.com> | 2017-05-15 14:45:38 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-05-17 18:10:16 +0800 |
commit | c56b5076fcccc2718ffa63c8d461614cbbc82e67 (patch) | |
tree | 2d7d59f81b20337f74c5b550e6d7da5c414f30c1 | |
parent | a476a849e726968dd76771da867f18f79fbf4ee9 (diff) |
backend: refine the argue reduce
using a simple algorithm to get it
Signed-off-by: rander.wang <rander.wang@intel.com>
Tested-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 38 |
1 files changed, 14 insertions, 24 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl index f860f8f2..7c449423 100644 --- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl @@ -417,36 +417,26 @@ int payne_hanek(float x, float *y) { } int argumentReduceSmall(float x, float * remainder) { - union { - float f; - unsigned u; - } ieee; + float halfPi = 2.0f/3.14159265f; + // pi/2 = 0.C90FDAA22168C234C4p+1; + float halfPi_p1 = (float) 0xC908/0x1.0p15, + halfPi_p2 = (float) 0x7DAA/0x1.0p27, + halfPi_p3 = (float) 0x22168C/0x1.0p51, + halfPi_p4 = (float) 0x234C4/0x1.0p71; - float twoByPi = 2.0f/3.14159265f; - float piBy2_1h = (float) 0xc90/0x1.0p11, - piBy2_1l = (float) 0xfda/0x1.0p23, - piBy2_2h = (float) 0xa22/0x1.0p35, - piBy2_2l = (float) 0x168/0x1.0p47, - piBy2_3h = (float) 0xc23/0x1.0p59, - piBy2_3l = (float) 0x4c4/0x1.0p71; + uint iy = (uint)mad(halfPi, x, 0.5f); + float y = (float)iy; + float rem = mad(y, -halfPi_p1, x); + rem = mad(y, -halfPi_p2, rem); + rem = mad(y, -halfPi_p3, rem); + *remainder = rem; - float y = (float)(int)(twoByPi * x + 0.5f); - ieee.f = y; - ieee.u = ieee.u & 0xfffff000; - - float yh = ieee.f; - float yl = y - yh; - float rem = x - yh*piBy2_1h - yh*piBy2_1l - yl*piBy2_1h - yl*piBy2_1l; - rem = rem - yh*piBy2_2h - yh*piBy2_2l + yl*piBy2_2h + yl*piBy2_2l; - rem = rem - yh*piBy2_3h - yh*piBy2_3l - yl*piBy2_3h - yl*piBy2_3l; - - *remainder = rem; - return (int)y; + return iy; } int __ieee754_rem_pio2f(float x, float *y) { - if (x < 4000.0f) { + if (x < 2.5e2) { return argumentReduceSmall(x, y); } else { return payne_hanek(x, y); |