Fix implicit Event leak

Not return Event object but handle cl event in function itself. Fix implicit event leak. Now it does wait then release for cl_event. Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
author: Zhenyu Wang <zhenyuw@linux.intel.com> 2014-10-23 13:09:21 +0800
committer: Zhenyu Wang <zhenyuw@linux.intel.com> 2014-10-23 13:09:21 +0800
commit: fd0204918b920077a14298b00a64bf38f08fb2d3 (patch)
tree: fe627ef9955d46b2086247e9ea964275bbe056e2
parent: 68e519cdb3e28459ffc53e490b0c8288cd579472 (diff)
4 files changed, 54 insertions, 39 deletions
diff --git a/BealtoOpenCL/include/CLCommandQueue.h b/BealtoOpenCL/include/CLCommandQueue.h
index 6fde44c..33abb70 100644
--- a/BealtoOpenCL/include/CLCommandQueue.h
+++ b/BealtoOpenCL/include/CLCommandQueue.h
@@ -43,45 +43,55 @@ public:
   // If the call fails, the returned event is invalid.
 
   // Read buffer to host memory
-  Event readBuffer(Buffer * b,cl_bool blocking_read,size_t offset,size_t cb,void * ptr,const EventList & wait_list = EventList())
+  cl_bool readBuffer(Buffer * b,cl_bool blocking_read,size_t offset,size_t cb,void * ptr,const EventList & wait_list = EventList())
   {
-    if (b == 0) return Event(0); // Invalid
+    if (b == 0) return CL_FALSE; // Invalid
     cl_uint num_events = 0;
     const cl_event * events = 0;
     wait_list.getParams(num_events,events);
     cl_event e = 0;
     cl_int status = clEnqueueReadBuffer(mX,b->mX,blocking_read,offset,cb,ptr,num_events,events,&e);
     REPORT_OPENCL_STATUS(status);
-    if (status != CL_SUCCESS) e = 0;
-    return Event(e);
+    if (e != 0) {
+      clWaitForEvents(1, &e);
+      clReleaseEvent(e);
+    }
+    return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
   }
 
   // Write buffer from host memory
-  Event writeBuffer(Buffer * b,cl_bool blocking_write,size_t offset,size_t cb,const void * ptr,const EventList & wait_list = EventList())
+  cl_bool writeBuffer(Buffer * b,cl_bool blocking_write,size_t offset,size_t cb,const void * ptr,const EventList & wait_list = EventList())
   {
-    if (b == 0) return Event(0); // Invalid
+    if (b == 0) return CL_FALSE; // Invalid
     cl_uint num_events = 0;
     const cl_event * events = 0;
     wait_list.getParams(num_events,events);
     cl_event e = 0;
     cl_int status = clEnqueueWriteBuffer(mX,b->mX,blocking_write,offset,cb,ptr,num_events,events,&e);
     REPORT_OPENCL_STATUS(status);
-    if (status != CL_SUCCESS) e = 0;
-    return Event(e);
+    if (e != 0) {
+      clWaitForEvents(1, &e);
+      clReleaseEvent(e);
+    }
+
+    return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
   }
 
   // Copy buffers
-  Event copyBuffer(Buffer * src,Buffer * dst,size_t src_offset,size_t dst_offset,size_t cb,const EventList & wait_list = EventList())
+  cl_bool copyBuffer(Buffer * src,Buffer * dst,size_t src_offset,size_t dst_offset,size_t cb,const EventList & wait_list = EventList())
   {
-    if (src == 0 || dst == 0) return Event(0); // Invalid
+    if (src == 0 || dst == 0) return CL_FALSE; // Invalid
     cl_uint num_events = 0;
     const cl_event * events = 0;
     wait_list.getParams(num_events,events);
     cl_event e = 0;
     cl_int status = clEnqueueCopyBuffer(mX,src->mX,dst->mX,src_offset,dst_offset,cb,num_events,events,&e);
     REPORT_OPENCL_STATUS(status);
-    if (status != CL_SUCCESS) e = 0;
-    return Event(e);
+    if (e != 0) {
+      clWaitForEvents(1, &e);
+      clReleaseEvent(e);
+    }
+    return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
   }
 
   // Map buffer.  The mapped address is put in ADDRESS.
@@ -127,9 +137,9 @@ public:
   // N is the total number of work items (global work size).
   // G is the number of work items inside a work group. G must divide N.
   // G can be 0, in which case the OpenCL implementation will choose the best value.
-  Event execKernel1(Kernel * k,size_t n,size_t g,const EventList & wait_list = EventList())
+  cl_bool execKernel1(Kernel * k,size_t n,size_t g,const EventList & wait_list = EventList())
   {
-    if (k == 0) return Event(0); // Invalid
+    if (k == 0) return CL_FALSE; // Invalid
     cl_uint num_events = 0;
     const cl_event * events = 0;
     wait_list.getParams(num_events,events);
@@ -138,17 +148,20 @@ public:
     size_t * plw = (g>0)?(&g):0;
     cl_int status = clEnqueueNDRangeKernel(mX,k->mX,1,0,pgw,plw,num_events,events,&e);
     REPORT_OPENCL_STATUS(status);
-    if (status != CL_SUCCESS) e = 0;
-    return Event(e);
+    if (e != 0) {
+      clWaitForEvents(1, &e);
+      clReleaseEvent(e);
+    }
+    return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
   }
 
   // Enqueue 2D kernel execution
   // K is the kernel to run.
   // NX*NY is the total number of work items (global work size).
   // GX*GY is the number of work items inside a work group. G<d> must divide N<d>.
-  Event execKernel2(Kernel * k,size_t nx,size_t ny,size_t gx,size_t gy,const EventList & wait_list = EventList())
+  cl_bool execKernel2(Kernel * k,size_t nx,size_t ny,size_t gx,size_t gy,const EventList & wait_list = EventList())
   {
-    if (k == 0) return Event(0); // Invalid
+    if (k == 0) return CL_FALSE; // Invalid
     cl_uint num_events = 0;
     const cl_event * events = 0;
     wait_list.getParams(num_events,events);
@@ -157,8 +170,11 @@ public:
     size_t plw[2]; plw[0] = gx; plw[1] = gy;
     cl_int status = clEnqueueNDRangeKernel(mX,k->mX,2,0,pgw,plw,num_events,events,&e);
     REPORT_OPENCL_STATUS(status);
-    if (status != CL_SUCCESS) e = 0;
-    return Event(e);
+    if (e != 0) {
+      //      clWaitForEvents(1, &e);
+      clReleaseEvent(e);
+    }
+    return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
   }
 
   //
diff --git a/MPBenchmarks/ArithmeticTasks.cpp b/MPBenchmarks/ArithmeticTasks.cpp
index 670f1d6..61ef40d 100644
--- a/MPBenchmarks/ArithmeticTasks.cpp
+++ b/MPBenchmarks/ArithmeticTasks.cpp
@@ -49,15 +49,15 @@ double AddNGPUTask::run(int workgroupSize,size_t sz)
 
   // Initialize A and B
   for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(rand() & 0xFF);
-  if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END;
+  if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END;
   for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(rand() & 0xFF);
-  if (!q->writeBuffer(b,true,0,sz,buf).isValid()) goto END;
+  if (q->writeBuffer(b,true,0,sz,buf) == CL_FALSE) goto END;
 
   // Run tests, double nOps until min time is reached
   kernel->setArg(0,a);
   kernel->setArg(1,b);
   kernel->setArg(2,y);
-  if (!q->execKernel1(kernel,n,workgroupSize).isValid()) goto END;
+  if (q->execKernel1(kernel,n,workgroupSize) == CL_FALSE) goto END;
   for (int nOps = 1; ; nOps <<= 1)
   {
     double t0 = getT();
@@ -107,13 +107,13 @@ double Mul1GPUTask::run(int workgroupSize,size_t sz)
 
   // Initialize A
   for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(rand() & 0xFF);
-  if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END;
+  if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END;
 
   // Run tests, double nOps until min time is reached
   kernel->setArg(0,kk);
   kernel->setArg(1,a);
   kernel->setArg(2,y);
-  if (!q->execKernel1(kernel,n,workgroupSize).isValid()) goto END;
+  if (q->execKernel1(kernel,n,workgroupSize)== CL_FALSE) goto END;
   for (int nOps = 1; ; nOps <<= 1)
   {
     double t0 = getT();
diff --git a/MPBenchmarks/MemoryTasks.cpp b/MPBenchmarks/MemoryTasks.cpp
index c6c0fb2..e32036d 100644
--- a/MPBenchmarks/MemoryTasks.cpp
+++ b/MPBenchmarks/MemoryTasks.cpp
@@ -40,9 +40,10 @@ double CopyGPUTask::run(int workgroupSize,size_t sz)
 
   // Initialize A and check errors
   for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(i & 0xFF);
-  if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END;
+  if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END;
   for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)0;
-  if (!q->readBuffer(a,true,0,sz,buf).isValid()) goto END;
+  if (q->readBuffer(a,true,0,sz,buf) == CL_FALSE) goto END;
+
   ok = true;
 
   // check write+read loop
@@ -53,11 +54,11 @@ double CopyGPUTask::run(int workgroupSize,size_t sz)
   if (b != 0)
   {
     for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(i & 0xFF);
-    if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END;
+    if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END;
     for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)0;
-    if (!q->writeBuffer(b,true,0,sz,buf).isValid()) goto END;
-    if (!q->copyBuffer(a,b,0,0,sz).isValid()) goto END;
-    if (!q->readBuffer(b,true,0,sz,buf).isValid()) goto END;
+    if (q->writeBuffer(b,true,0,sz,buf) == CL_FALSE) goto END;
+    if (q->copyBuffer(a,b,0,0,sz) == CL_FALSE) goto END;
+    if (q->readBuffer(b,true,0,sz,buf) == CL_FALSE) goto END;
     for (size_t i=0;i<sz;i++) if (buf[i] != (unsigned char)(i & 0xFF)) ok = false;
     if (!ok) { fprintf(stderr,"write+copy+read failed\n"); goto END; }
   }
@@ -74,11 +75,10 @@ double CopyGPUTask::run(int workgroupSize,size_t sz)
     case DEVICE_TO_HOST_COPY:
       for (int i=0;i<nOps;i++) q->readBuffer(a,false,0,sz,buf);
       break;
-    case DEVICE_TO_DEVICE_COPY: {
+    case DEVICE_TO_DEVICE_COPY:
       for (int i=0;i<nOps;i++) { ncount++; q->copyBuffer(a,b,0,0,sz);}
       break;
     }
-    }
     q->finish();
     double t = (getT() - t0);
     if (t < MIN_RUNNING_TIME) continue;
@@ -95,7 +95,6 @@ END:
   if (buf != 0) _aligned_free(buf);
   if (a != 0) delete a;
   if (b != 0) delete b;
-
   return mbps;
 }
 
@@ -122,14 +121,14 @@ double ZeroGPUTask::run(int workgroupSize,size_t sz)
 
   // Initialize A
   for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(i & 0xFF);
-  if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END;
+  if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END;
 
   // Run tests, double nOps until min time is reached
   kernel->setArg(0,a);
-  if (!q->execKernel1(kernel,n,workgroupSize).isValid()) goto END;
+  if (q->execKernel1(kernel,n,workgroupSize) == CL_FALSE) goto END;
 
   // check write+zero+read
-  if (!q->readBuffer(a,true,0,sz,buf).isValid()) goto END;
+  if (q->readBuffer(a,true,0,sz,buf) == CL_FALSE) goto END;
   ok = true;
   for (size_t i=0;i<sz;i++) if (buf[i] != (unsigned char)0) ok = false;
   if (!ok) { fprintf(stderr,"write+zero+read failed\n"); goto END; }
diff --git a/MPBenchmarks/appMain.cpp b/MPBenchmarks/appMain.cpp
index 2992859..3ddb725 100644
--- a/MPBenchmarks/appMain.cpp
+++ b/MPBenchmarks/appMain.cpp
@@ -103,6 +103,7 @@ int main(int argc,char ** argv)
 
   int minLogSize = 10;
   int maxLogSize = 29;
+
   std::vector<TestResult> allTests;
 
   // Run all CPU tests
@@ -179,7 +180,7 @@ int main(int argc,char ** argv)
 	      char aux[200];
 	      std::string usz;
 	      getUserSize(ls,usz);
-	      _snprintf(aux,200,"NT=%3d  SZ=%7s   %6.0f MB/s",nt,usz.c_str(),mbps);
+	      _snprintf(aux,200,"NTHREAD=%3d  SZ=%7s   %6.0f MB/s",nt,usz.c_str(),mbps);
 	      log->append(aux);
 	      tr.update(ls,mbps);
 	    }
@@ -265,7 +266,6 @@ int main(int argc,char ** argv)
 	    title.assign("Mul1 v1 32-bit");
 	    g = new Mul1GPUTask(MUL1_V1,1,log);
 	    break;
-
 	  }
 	if (g == 0) continue; // nothing to do
author	Zhenyu Wang <zhenyuw@linux.intel.com>	2014-10-23 13:09:21 +0800
committer	Zhenyu Wang <zhenyuw@linux.intel.com>	2014-10-23 13:09:21 +0800
commit	fd0204918b920077a14298b00a64bf38f08fb2d3 (patch)
tree	fe627ef9955d46b2086247e9ea964275bbe056e2
parent	68e519cdb3e28459ffc53e490b0c8288cd579472 (diff)