summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2014-08-29 10:04:38 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-08-29 10:12:25 +0800
commit3e2dc7803c4e3928afed2485df6e6e7e0e466a30 (patch)
tree16c56a3f436df159b2b289e7a0623263809d3f33
parent7daf106417493acc89aaa3c26460da2cfdfa51d6 (diff)
GBE: fix error in the rootn fastpath function for some special input.vload_opt
The fastpath is to lose some accuracy but get fast speed. It is not to generate error result. The rootn has many special input and need to be taken care before we call the native pow directly. This patch fix all the pow related failures at the OpenCV 3.0 test suite. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
-rwxr-xr-xbackend/src/ocl_stdlib.tmpl.h15
1 files changed, 12 insertions, 3 deletions
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 01bcbefb..2e375136 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -3731,7 +3731,9 @@ INLINE_OVERLOADABLE float pown(float x, int n) {
return 1;
return powr(x, n);
}
-INLINE_OVERLOADABLE float rootn(float x, int n) {
+
+INLINE_OVERLOADABLE float internal_rootn(float x, int n, const bool isFastpath)
+{
float ax,re;
int sign = 0;
if( n == 0 )return NAN;
@@ -3758,12 +3760,19 @@ INLINE_OVERLOADABLE float rootn(float x, int n) {
ax = __gen_ocl_fabs(x);
if(x <0.0f && (n&1))
sign = 1;
- re = __gen_ocl_internal_pow(ax,1.f/n);
+ if (isFastpath)
+ re = __gen_ocl_pow(ax,1.f/n);
+ else
+ re = __gen_ocl_internal_pow(ax,1.f/n);
if(sign)
re = -re;
return re;
}
+INLINE_OVERLOADABLE float rootn(float x, int n) {
+ return internal_rootn(x, n, 0);
+}
+
/////////////////////////////////////////////////////////////////////////////
// Geometric functions (see 6.11.5 of OCL 1.1 spec)
/////////////////////////////////////////////////////////////////////////////
@@ -5082,7 +5091,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_remainder (float x, float
INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_rootn(float x, int n)
{
- return __gen_ocl_pow(x, 1.f / n);
+ return internal_rootn(x, n, 1);
}
INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin (float x)