diff options
author | Guo Yejun <yejun.guo@intel.com> | 2014-02-21 05:51:33 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-02-27 13:17:56 +0800 |
commit | 04316f7ec5e4d2ab220c14020dc5808dfe663e80 (patch) | |
tree | 45b0d2551b385b092b0cf02279b3ba5004437e31 /backend | |
parent | 237a2f813bedc6417227275050bcf1ea720d586c (diff) |
GBE: add fast path for more math functions
Signed-off-by: Guo Yejun <yejun.guo@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'backend')
-rw-r--r-- | backend/src/backend/program.cpp | 27 | ||||
-rw-r--r-- | backend/src/builtin_vector_proto.def | 39 | ||||
-rwxr-xr-x | backend/src/ocl_stdlib.tmpl.h | 147 |
3 files changed, 207 insertions, 6 deletions
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index 98fcded6..c2ac83df 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -461,8 +461,33 @@ namespace gbe { #define REDEF_MATH_FUNC(x) "#ifdef "#x"\n#undef "#x"\n#endif\n#define "#x" __gen_ocl_internal_fastpath_"#x"\n" std::string ocl_mathfunc_fastpath_str = - REDEF_MATH_FUNC(sin) + REDEF_MATH_FUNC(acosh) + REDEF_MATH_FUNC(asinh) + REDEF_MATH_FUNC(atanh) + REDEF_MATH_FUNC(cbrt) REDEF_MATH_FUNC(cos) + REDEF_MATH_FUNC(cosh) + REDEF_MATH_FUNC(cospi) + REDEF_MATH_FUNC(exp) + REDEF_MATH_FUNC(exp10) + REDEF_MATH_FUNC(expm1) + REDEF_MATH_FUNC(fmod) + REDEF_MATH_FUNC(hypot) + REDEF_MATH_FUNC(ilogb) + REDEF_MATH_FUNC(ldexp) + REDEF_MATH_FUNC(log) + REDEF_MATH_FUNC(log2) + REDEF_MATH_FUNC(log10) + REDEF_MATH_FUNC(log1p) + REDEF_MATH_FUNC(logb) + REDEF_MATH_FUNC(remainder) + REDEF_MATH_FUNC(rootn) + REDEF_MATH_FUNC(sin) + REDEF_MATH_FUNC(sincos) + REDEF_MATH_FUNC(sinh) + REDEF_MATH_FUNC(sinpi) + REDEF_MATH_FUNC(tan) + REDEF_MATH_FUNC(tanh) "\n" ; diff --git a/backend/src/builtin_vector_proto.def b/backend/src/builtin_vector_proto.def index 7bc7c489..103e6612 100644 --- a/backend/src/builtin_vector_proto.def +++ b/backend/src/builtin_vector_proto.def @@ -130,8 +130,43 @@ gentype tgamma (gentype) gentype trunc (gentype) ##math function fast path -gentype __gen_ocl_internal_fastpath_sin (gentype) -gentype __gen_ocl_internal_fastpath_cos (gentype) +gentype __gen_ocl_internal_fastpath_acosh (gentype x) +gentype __gen_ocl_internal_fastpath_asinh (gentype x) +gentype __gen_ocl_internal_fastpath_atanh (gentype x) +gentype __gen_ocl_internal_fastpath_cbrt (gentype x) +gentype __gen_ocl_internal_fastpath_cos (gentype x) +gentype __gen_ocl_internal_fastpath_cosh (gentype x) +gentype __gen_ocl_internal_fastpath_cospi (gentype x) +gentype __gen_ocl_internal_fastpath_exp (gentype x) +gentype __gen_ocl_internal_fastpath_exp10 (gentype x) +gentype __gen_ocl_internal_fastpath_expm1 (gentype x) +gentype __gen_ocl_internal_fastpath_fmod (gentype x, gentype y) +gentype __gen_ocl_internal_fastpath_hypot (gentype x, gentype y) +intn __gen_ocl_internal_fastpath_ilogb (floatn x) +int __gen_ocl_internal_fastpath_ilogb (float x) +intn __gen_ocl_internal_fastpath_ilogb (doublen x) +int __gen_ocl_internal_fastpath_ilogb (double x) +floatn __gen_ocl_internal_fastpath_ldexp (floatn x, intn k) +floatn __gen_ocl_internal_fastpath_ldexp (floatn x, int k) +float __gen_ocl_internal_fastpath_ldexp (float x, int k) +doublen __gen_ocl_internal_fastpath_ldexp (doublen x, intn k) +doublen __gen_ocl_internal_fastpath_ldexp (doublen x, int k) +double __gen_ocl_internal_fastpath_ldexp (double x, int k) +gentype __gen_ocl_internal_fastpath_log (gentype x) +gentype __gen_ocl_internal_fastpath_log2 (gentype x) +gentype __gen_ocl_internal_fastpath_log10 (gentype x) +gentype __gen_ocl_internal_fastpath_log1p (gentype x) +gentype __gen_ocl_internal_fastpath_logb (gentype x) +gentype __gen_ocl_internal_fastpath_remainder (gentype x, gentype y) +gentype __gen_ocl_internal_fastpath_rootn (gentype x, int n) +gentype __gen_ocl_internal_fastpath_sin (gentype x) +gentype __gen_ocl_internal_fastpath_sincos (gentype x, __global gentype *cosval) +gentype __gen_ocl_internal_fastpath_sincos (gentype x, __local gentype *cosval) +gentype __gen_ocl_internal_fastpath_sincos (gentype x, __private gentype *cosval) +gentype __gen_ocl_internal_fastpath_sinh (gentype x) +gentype __gen_ocl_internal_fastpath_sinpi (gentype x) +gentype __gen_ocl_internal_fastpath_tan (gentype x) +gentype __gen_ocl_internal_fastpath_tanh (gentype x) ##half_native_math #gentype half_cos (gentype x) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 3b6eb322..76395fae 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -4701,14 +4701,155 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image) { return __gen_ocl_get_image_array_size(image); } #endif -INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin(float x) { - return native_sin(x); +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_acosh (float x) +{ + return native_log(x + native_sqrt(x + 1) * native_sqrt(x - 1)); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_asinh (float x) +{ + return native_log(x + native_sqrt(x * x + 1)); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_atanh (float x) +{ + return 0.5f * native_sqrt((1 + x) / (1 - x)); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cbrt (float x) +{ + return __gen_ocl_pow(x, 0.3333333333f); } -INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos(float x) { +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos (float x) +{ return native_cos(x); } +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cosh (float x) +{ + return (1 + native_exp(-2 * x)) / (2 * native_exp(-x)); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cospi (float x) +{ + return __gen_ocl_cos(x * M_PI_F); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_exp (float x) +{ + return native_exp(x); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_exp10 (float x) +{ + return native_exp10(x); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_expm1 (float x) +{ + return __gen_ocl_pow(M_E_F, x) - 1; +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_fmod (float x, float y) +{ + return x-y*__gen_ocl_rndz(x/y); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_hypot (float x, float y) +{ + return __gen_ocl_sqrt(x*x + y*y); +} + +INLINE_OVERLOADABLE int __gen_ocl_internal_fastpath_ilogb (float x) +{ + return __gen_ocl_rndd(native_log2(x)); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_ldexp (float x, int n) +{ + return __gen_ocl_pow(2, n) * x; +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log (float x) +{ + return native_log(x); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log2 (float x) +{ + return native_log2(x); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log10 (float x) +{ + return native_log10(x); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log1p (float x) +{ + return native_log(x + 1); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_logb (float x) +{ + return __gen_ocl_rndd(native_log2(x)); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_remainder (float x, float y) +{ + return x-y*__gen_ocl_rnde(x/y); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_rootn(float x, int n) +{ + return __gen_ocl_pow(x, 1.f / n); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin (float x) +{ + return native_sin(x); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __global float *cosval) +{ + *cosval = native_cos(x); + return native_sin(x); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __local float *cosval) +{ + *cosval = native_cos(x); + return native_sin(x); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, __private float *cosval) +{ + *cosval = native_cos(x); + return native_sin(x); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sinh (float x) +{ + return (1 - native_exp(-2 * x)) / (2 * native_exp(-x)); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sinpi (float x) +{ + return __gen_ocl_sin(x * M_PI_F); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tan (float x) +{ + return native_tan(x); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tanh (float x) +{ + float y = native_exp(-2 * x); + return (1 - y) / (1 + y); +} + #pragma OPENCL EXTENSION cl_khr_fp64 : disable #undef DECL_IMAGE |