diff options
author | Zhigang Gong <zhigang.gong@intel.com> | 2014-08-29 10:04:38 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-08-29 10:12:25 +0800 |
commit | 3e2dc7803c4e3928afed2485df6e6e7e0e466a30 (patch) | |
tree | 16c56a3f436df159b2b289e7a0623263809d3f33 | |
parent | 7daf106417493acc89aaa3c26460da2cfdfa51d6 (diff) |
GBE: fix error in the rootn fastpath function for some special input.vload_opt
The fastpath is to lose some accuracy but get fast speed. It is not
to generate error result. The rootn has many special input and need
to be taken care before we call the native pow directly.
This patch fix all the pow related failures at the OpenCV 3.0 test
suite.
Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
-rwxr-xr-x | backend/src/ocl_stdlib.tmpl.h | 15 |
1 files changed, 12 insertions, 3 deletions
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 01bcbefb..2e375136 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -3731,7 +3731,9 @@ INLINE_OVERLOADABLE float pown(float x, int n) { return 1; return powr(x, n); } -INLINE_OVERLOADABLE float rootn(float x, int n) { + +INLINE_OVERLOADABLE float internal_rootn(float x, int n, const bool isFastpath) +{ float ax,re; int sign = 0; if( n == 0 )return NAN; @@ -3758,12 +3760,19 @@ INLINE_OVERLOADABLE float rootn(float x, int n) { ax = __gen_ocl_fabs(x); if(x <0.0f && (n&1)) sign = 1; - re = __gen_ocl_internal_pow(ax,1.f/n); + if (isFastpath) + re = __gen_ocl_pow(ax,1.f/n); + else + re = __gen_ocl_internal_pow(ax,1.f/n); if(sign) re = -re; return re; } +INLINE_OVERLOADABLE float rootn(float x, int n) { + return internal_rootn(x, n, 0); +} + ///////////////////////////////////////////////////////////////////////////// // Geometric functions (see 6.11.5 of OCL 1.1 spec) ///////////////////////////////////////////////////////////////////////////// @@ -5082,7 +5091,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_remainder (float x, float INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_rootn(float x, int n) { - return __gen_ocl_pow(x, 1.f / n); + return internal_rootn(x, n, 1); } INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin (float x) |