summaryrefslogtreecommitdiff
path: root/backend
diff options
context:
space:
mode:
authorGuo Yejun <yejun.guo@intel.com>2014-02-21 05:51:33 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-02-27 13:17:56 +0800
commit04316f7ec5e4d2ab220c14020dc5808dfe663e80 (patch)
tree45b0d2551b385b092b0cf02279b3ba5004437e31 /backend
parent237a2f813bedc6417227275050bcf1ea720d586c (diff)
GBE: add fast path for more math functions
Signed-off-by: Guo Yejun <yejun.guo@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'backend')
-rw-r--r--backend/src/backend/program.cpp27
-rw-r--r--backend/src/builtin_vector_proto.def39
-rwxr-xr-xbackend/src/ocl_stdlib.tmpl.h147
3 files changed, 207 insertions, 6 deletions
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 98fcded6..c2ac83df 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -461,8 +461,33 @@ namespace gbe {
#define REDEF_MATH_FUNC(x) "#ifdef "#x"\n#undef "#x"\n#endif\n#define "#x" __gen_ocl_internal_fastpath_"#x"\n"
std::string ocl_mathfunc_fastpath_str =
- REDEF_MATH_FUNC(sin)
+ REDEF_MATH_FUNC(acosh)
+ REDEF_MATH_FUNC(asinh)
+ REDEF_MATH_FUNC(atanh)
+ REDEF_MATH_FUNC(cbrt)
REDEF_MATH_FUNC(cos)
+ REDEF_MATH_FUNC(cosh)
+ REDEF_MATH_FUNC(cospi)
+ REDEF_MATH_FUNC(exp)
+ REDEF_MATH_FUNC(exp10)
+ REDEF_MATH_FUNC(expm1)
+ REDEF_MATH_FUNC(fmod)
+ REDEF_MATH_FUNC(hypot)
+ REDEF_MATH_FUNC(ilogb)
+ REDEF_MATH_FUNC(ldexp)
+ REDEF_MATH_FUNC(log)
+ REDEF_MATH_FUNC(log2)
+ REDEF_MATH_FUNC(log10)
+ REDEF_MATH_FUNC(log1p)
+ REDEF_MATH_FUNC(logb)
+ REDEF_MATH_FUNC(remainder)
+ REDEF_MATH_FUNC(rootn)
+ REDEF_MATH_FUNC(sin)
+ REDEF_MATH_FUNC(sincos)
+ REDEF_MATH_FUNC(sinh)
+ REDEF_MATH_FUNC(sinpi)
+ REDEF_MATH_FUNC(tan)
+ REDEF_MATH_FUNC(tanh)
"\n"
;
diff --git a/backend/src/builtin_vector_proto.def b/backend/src/builtin_vector_proto.def
index 7bc7c489..103e6612 100644
--- a/backend/src/builtin_vector_proto.def
+++ b/backend/src/builtin_vector_proto.def
@@ -130,8 +130,43 @@ gentype tgamma (gentype)
gentype trunc (gentype)
##math function fast path
-gentype __gen_ocl_internal_fastpath_sin (gentype)
-gentype __gen_ocl_internal_fastpath_cos (gentype)
+gentype __gen_ocl_internal_fastpath_acosh (gentype x)
+gentype __gen_ocl_internal_fastpath_asinh (gentype x)
+gentype __gen_ocl_internal_fastpath_atanh (gentype x)
+gentype __gen_ocl_internal_fastpath_cbrt (gentype x)
+gentype __gen_ocl_internal_fastpath_cos (gentype x)
+gentype __gen_ocl_internal_fastpath_cosh (gentype x)
+gentype __gen_ocl_internal_fastpath_cospi (gentype x)
+gentype __gen_ocl_internal_fastpath_exp (gentype x)
+gentype __gen_ocl_internal_fastpath_exp10 (gentype x)
+gentype __gen_ocl_internal_fastpath_expm1 (gentype x)
+gentype __gen_ocl_internal_fastpath_fmod (gentype x, gentype y)
+gentype __gen_ocl_internal_fastpath_hypot (gentype x, gentype y)
+intn __gen_ocl_internal_fastpath_ilogb (floatn x)
+int __gen_ocl_internal_fastpath_ilogb (float x)
+intn __gen_ocl_internal_fastpath_ilogb (doublen x)
+int __gen_ocl_internal_fastpath_ilogb (double x)
+floatn __gen_ocl_internal_fastpath_ldexp (floatn x, intn k)
+floatn __gen_ocl_internal_fastpath_ldexp (floatn x, int k)
+float __gen_ocl_internal_fastpath_ldexp (float x, int k)
+doublen __gen_ocl_internal_fastpath_ldexp (doublen x, intn k)
+doublen __gen_ocl_internal_fastpath_ldexp (doublen x, int k)
+double __gen_ocl_internal_fastpath_ldexp (double x, int k)
+gentype __gen_ocl_internal_fastpath_log (gentype x)
+gentype __gen_ocl_internal_fastpath_log2 (gentype x)
+gentype __gen_ocl_internal_fastpath_log10 (gentype x)
+gentype __gen_ocl_internal_fastpath_log1p (gentype x)
+gentype __gen_ocl_internal_fastpath_logb (gentype x)
+gentype __gen_ocl_internal_fastpath_remainder (gentype x, gentype y)
+gentype __gen_ocl_internal_fastpath_rootn (gentype x, int n)
+gentype __gen_ocl_internal_fastpath_sin (gentype x)
+gentype __gen_ocl_internal_fastpath_sincos (gentype x, __global gentype *cosval)
+gentype __gen_ocl_internal_fastpath_sincos (gentype x, __local gentype *cosval)
+gentype __gen_ocl_internal_fastpath_sincos (gentype x, __private gentype *cosval)
+gentype __gen_ocl_internal_fastpath_sinh (gentype x)
+gentype __gen_ocl_internal_fastpath_sinpi (gentype x)
+gentype __gen_ocl_internal_fastpath_tan (gentype x)
+gentype __gen_ocl_internal_fastpath_tanh (gentype x)
##half_native_math
#gentype half_cos (gentype x)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 3b6eb322..76395fae 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -4701,14 +4701,155 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
{ return __gen_ocl_get_image_array_size(image); }
#endif
-INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin(float x) {
- return native_sin(x);
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_acosh (float x)
+{
+ return native_log(x + native_sqrt(x + 1) * native_sqrt(x - 1));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_asinh (float x)
+{
+ return native_log(x + native_sqrt(x * x + 1));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_atanh (float x)
+{
+ return 0.5f * native_sqrt((1 + x) / (1 - x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cbrt (float x)
+{
+ return __gen_ocl_pow(x, 0.3333333333f);
}
-INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos(float x) {
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos (float x)
+{
return native_cos(x);
}
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cosh (float x)
+{
+ return (1 + native_exp(-2 * x)) / (2 * native_exp(-x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cospi (float x)
+{
+ return __gen_ocl_cos(x * M_PI_F);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_exp (float x)
+{
+ return native_exp(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_exp10 (float x)
+{
+ return native_exp10(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_expm1 (float x)
+{
+ return __gen_ocl_pow(M_E_F, x) - 1;
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_fmod (float x, float y)
+{
+ return x-y*__gen_ocl_rndz(x/y);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_hypot (float x, float y)
+{
+ return __gen_ocl_sqrt(x*x + y*y);
+}
+
+INLINE_OVERLOADABLE int __gen_ocl_internal_fastpath_ilogb (float x)
+{
+ return __gen_ocl_rndd(native_log2(x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_ldexp (float x, int n)
+{
+ return __gen_ocl_pow(2, n) * x;
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log (float x)
+{
+ return native_log(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log2 (float x)
+{
+ return native_log2(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log10 (float x)
+{
+ return native_log10(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log1p (float x)
+{
+ return native_log(x + 1);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_logb (float x)
+{
+ return __gen_ocl_rndd(native_log2(x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_remainder (float x, float y)
+{
+ return x-y*__gen_ocl_rnde(x/y);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_rootn(float x, int n)
+{
+ return __gen_ocl_pow(x, 1.f / n);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin (float x)
+{
+ return native_sin(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __global float *cosval)
+{
+ *cosval = native_cos(x);
+ return native_sin(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __local float *cosval)
+{
+ *cosval = native_cos(x);
+ return native_sin(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __private float *cosval)
+{
+ *cosval = native_cos(x);
+ return native_sin(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sinh (float x)
+{
+ return (1 - native_exp(-2 * x)) / (2 * native_exp(-x));
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sinpi (float x)
+{
+ return __gen_ocl_sin(x * M_PI_F);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tan (float x)
+{
+ return native_tan(x);
+}
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tanh (float x)
+{
+ float y = native_exp(-2 * x);
+ return (1 - y) / (1 + y);
+}
+
#pragma OPENCL EXTENSION cl_khr_fp64 : disable
#undef DECL_IMAGE