summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrander.wang <rander.wang@intel.com>2017-05-15 14:45:38 +0800
committerYang Rong <rong.r.yang@intel.com>2017-05-17 18:10:16 +0800
commitc56b5076fcccc2718ffa63c8d461614cbbc82e67 (patch)
tree2d7d59f81b20337f74c5b550e6d7da5c414f30c1
parenta476a849e726968dd76771da867f18f79fbf4ee9 (diff)
backend: refine the argue reduce
using a simple algorithm to get it Signed-off-by: rander.wang <rander.wang@intel.com> Tested-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/libocl/tmpl/ocl_math_common.tmpl.cl38
1 files changed, 14 insertions, 24 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index f860f8f2..7c449423 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -417,36 +417,26 @@ int payne_hanek(float x, float *y) {
}
int argumentReduceSmall(float x, float * remainder) {
- union {
- float f;
- unsigned u;
- } ieee;
+ float halfPi = 2.0f/3.14159265f;
+ // pi/2 = 0.C90FDAA22168C234C4p+1;
+ float halfPi_p1 = (float) 0xC908/0x1.0p15,
+ halfPi_p2 = (float) 0x7DAA/0x1.0p27,
+ halfPi_p3 = (float) 0x22168C/0x1.0p51,
+ halfPi_p4 = (float) 0x234C4/0x1.0p71;
- float twoByPi = 2.0f/3.14159265f;
- float piBy2_1h = (float) 0xc90/0x1.0p11,
- piBy2_1l = (float) 0xfda/0x1.0p23,
- piBy2_2h = (float) 0xa22/0x1.0p35,
- piBy2_2l = (float) 0x168/0x1.0p47,
- piBy2_3h = (float) 0xc23/0x1.0p59,
- piBy2_3l = (float) 0x4c4/0x1.0p71;
+ uint iy = (uint)mad(halfPi, x, 0.5f);
+ float y = (float)iy;
+ float rem = mad(y, -halfPi_p1, x);
+ rem = mad(y, -halfPi_p2, rem);
+ rem = mad(y, -halfPi_p3, rem);
+ *remainder = rem;
- float y = (float)(int)(twoByPi * x + 0.5f);
- ieee.f = y;
- ieee.u = ieee.u & 0xfffff000;
-
- float yh = ieee.f;
- float yl = y - yh;
- float rem = x - yh*piBy2_1h - yh*piBy2_1l - yl*piBy2_1h - yl*piBy2_1l;
- rem = rem - yh*piBy2_2h - yh*piBy2_2l + yl*piBy2_2h + yl*piBy2_2l;
- rem = rem - yh*piBy2_3h - yh*piBy2_3l - yl*piBy2_3h - yl*piBy2_3l;
-
- *remainder = rem;
- return (int)y;
+ return iy;
}
int __ieee754_rem_pio2f(float x, float *y) {
- if (x < 4000.0f) {
+ if (x < 2.5e2) {
return argumentReduceSmall(x, y);
} else {
return payne_hanek(x, y);