summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrander.wang <rander.wang@intel.com>2017-05-15 15:39:02 +0800
committerYang Rong <rong.r.yang@intel.com>2017-05-17 18:10:46 +0800
commit19710a52bf83ad86033168bae4e0f4bb5c8fdb54 (patch)
treed1b4f90f9b376d4a6d95281f2845a21961cc938d
parent511071cd6e27437e0d2c96f73b64d4aa21dc452f (diff)
backend: refine tan function
get it from crlibm and refine it for gen Signed-off-by: rander.wang <rander.wang@intel.com> Tested-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/libocl/tmpl/ocl_math_common.tmpl.cl61
1 files changed, 45 insertions, 16 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index edb170fa..576033e9 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -608,29 +608,58 @@ float __kernel_tanf(float x, float y, int iy)
return -1.0/(x+r);
}
+/*Author : David Defour, Catherine Daramy, Florent de Dinechin, Christoph Lauter Contact :
+David.Defour@ens-lyon.fr, catherine_daramy@ens-lyon.fr
+
+ This program is free software; you can redistribute it and/or modify it under the terms
+of the GNU Lesser General Public License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version. This program is distributed
+in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+You should have received a copy of the GNU Lesser General Public License along with this program;
+if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA*/
+
+/* 23 */
+/* tan(x) ~ x + T1*x + ... + T13*x */
+float __kernel_tanf_fast(float x, float y, int iy)
+{
+ float x2 = x*x;
+ float sum;
+
+ sum = mad(0.0000392783222196f, x2, 0.0000969153770711f);
+ sum = mad(sum, x2, 0.0002391291200183f);
+ sum = mad(sum, x2, 0.0005900274263695f);
+ sum = mad(sum, x2, 0.0014558343682438f);
+ sum = mad(sum, x2, 0.0035921279340982f);
+ sum = mad(sum, x2, 0.0088632358238101f);
+ sum = mad(sum, x2, 0.0218694880604744f);
+ sum = mad(sum, x2, 0.0539682544767857f);
+ sum = mad(sum, x2, 0.1333333402872086f);
+ sum = mad(sum, x2, 0.3333333432674408f);
+ sum = sum*x2;
+ sum = mad(sum, x, x);
+
+ if(iy == 1)
+ {
+ sum = 1.0f/sum;
+ sum = -sum;
+ }
+
+ return sum;
+}
+
OVERLOADABLE float tan(float x)
{
if (__ocl_math_fastpath_flag)
return __gen_ocl_internal_fastpath_tan(x);
- float y,z=0.0;
+ float y,na=0.0;
int n, ix;
float negative = x < 0.0f? -1.0f : 1.0f;
- x = negative * x;
-
- GEN_OCL_GET_FLOAT_WORD(ix,x);
-
- ix &= 0x7fffffff;
-
- /* tan(Inf or NaN) is NaN */
- if (ix>=0x7f800000) return x-x; /* NaN */
-
- /* argument reduction needed */
- else {
- n = __ieee754_rem_pio2f(x,&y);
- return negative * __kernel_tanf(y,0.0f,1-((n&1)<<1)); /* 1 -- n even
- -1 -- n odd */
- }
+ x = fabs(x);
+ na = x -x;
+ n = __ieee754_rem_pio2f(x,&y);
+ return mad(negative , __kernel_tanf_fast(y, 0.0f, (n&1)), na);
}
OVERLOADABLE float __gen_ocl_internal_cospi(float x) {