1 files changed, 36 insertions, 20 deletions
diff --git a/BealtoOpenCL/include/CLCommandQueue.h b/BealtoOpenCL/include/CLCommandQueue.h
index 6fde44c..33abb70 100644
--- a/BealtoOpenCL/include/CLCommandQueue.h
+++ b/BealtoOpenCL/include/CLCommandQueue.h
@@ -43,45 +43,55 @@ public:
   // If the call fails, the returned event is invalid.
 
   // Read buffer to host memory
-  Event readBuffer(Buffer * b,cl_bool blocking_read,size_t offset,size_t cb,void * ptr,const EventList & wait_list = EventList())
+  cl_bool readBuffer(Buffer * b,cl_bool blocking_read,size_t offset,size_t cb,void * ptr,const EventList & wait_list = EventList())
   {
-    if (b == 0) return Event(0); // Invalid
+    if (b == 0) return CL_FALSE; // Invalid
     cl_uint num_events = 0;
     const cl_event * events = 0;
     wait_list.getParams(num_events,events);
     cl_event e = 0;
     cl_int status = clEnqueueReadBuffer(mX,b->mX,blocking_read,offset,cb,ptr,num_events,events,&e);
     REPORT_OPENCL_STATUS(status);
-    if (status != CL_SUCCESS) e = 0;
-    return Event(e);
+    if (e != 0) {
+      clWaitForEvents(1, &e);
+      clReleaseEvent(e);
+    }
+    return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
   }
 
   // Write buffer from host memory
-  Event writeBuffer(Buffer * b,cl_bool blocking_write,size_t offset,size_t cb,const void * ptr,const EventList & wait_list = EventList())
+  cl_bool writeBuffer(Buffer * b,cl_bool blocking_write,size_t offset,size_t cb,const void * ptr,const EventList & wait_list = EventList())
   {
-    if (b == 0) return Event(0); // Invalid
+    if (b == 0) return CL_FALSE; // Invalid
     cl_uint num_events = 0;
     const cl_event * events = 0;
     wait_list.getParams(num_events,events);
     cl_event e = 0;
     cl_int status = clEnqueueWriteBuffer(mX,b->mX,blocking_write,offset,cb,ptr,num_events,events,&e);
     REPORT_OPENCL_STATUS(status);
-    if (status != CL_SUCCESS) e = 0;
-    return Event(e);
+    if (e != 0) {
+      clWaitForEvents(1, &e);
+      clReleaseEvent(e);
+    }
+
+    return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
   }
 
   // Copy buffers
-  Event copyBuffer(Buffer * src,Buffer * dst,size_t src_offset,size_t dst_offset,size_t cb,const EventList & wait_list = EventList())
+  cl_bool copyBuffer(Buffer * src,Buffer * dst,size_t src_offset,size_t dst_offset,size_t cb,const EventList & wait_list = EventList())
   {
-    if (src == 0 || dst == 0) return Event(0); // Invalid
+    if (src == 0 || dst == 0) return CL_FALSE; // Invalid
     cl_uint num_events = 0;
     const cl_event * events = 0;
     wait_list.getParams(num_events,events);
     cl_event e = 0;
     cl_int status = clEnqueueCopyBuffer(mX,src->mX,dst->mX,src_offset,dst_offset,cb,num_events,events,&e);
     REPORT_OPENCL_STATUS(status);
-    if (status != CL_SUCCESS) e = 0;
-    return Event(e);
+    if (e != 0) {
+      clWaitForEvents(1, &e);
+      clReleaseEvent(e);
+    }
+    return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
   }
 
   // Map buffer.  The mapped address is put in ADDRESS.
@@ -127,9 +137,9 @@ public:
   // N is the total number of work items (global work size).
   // G is the number of work items inside a work group. G must divide N.
   // G can be 0, in which case the OpenCL implementation will choose the best value.
-  Event execKernel1(Kernel * k,size_t n,size_t g,const EventList & wait_list = EventList())
+  cl_bool execKernel1(Kernel * k,size_t n,size_t g,const EventList & wait_list = EventList())
   {
-    if (k == 0) return Event(0); // Invalid
+    if (k == 0) return CL_FALSE; // Invalid
     cl_uint num_events = 0;
     const cl_event * events = 0;
     wait_list.getParams(num_events,events);
@@ -138,17 +148,20 @@ public:
     size_t * plw = (g>0)?(&g):0;
     cl_int status = clEnqueueNDRangeKernel(mX,k->mX,1,0,pgw,plw,num_events,events,&e);
     REPORT_OPENCL_STATUS(status);
-    if (status != CL_SUCCESS) e = 0;
-    return Event(e);
+    if (e != 0) {
+      clWaitForEvents(1, &e);
+      clReleaseEvent(e);
+    }
+    return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
   }
 
   // Enqueue 2D kernel execution
   // K is the kernel to run.
   // NX*NY is the total number of work items (global work size).
   // GX*GY is the number of work items inside a work group. G<d> must divide N<d>.
-  Event execKernel2(Kernel * k,size_t nx,size_t ny,size_t gx,size_t gy,const EventList & wait_list = EventList())
+  cl_bool execKernel2(Kernel * k,size_t nx,size_t ny,size_t gx,size_t gy,const EventList & wait_list = EventList())
   {
-    if (k == 0) return Event(0); // Invalid
+    if (k == 0) return CL_FALSE; // Invalid
     cl_uint num_events = 0;
     const cl_event * events = 0;
     wait_list.getParams(num_events,events);
@@ -157,8 +170,11 @@ public:
     size_t plw[2]; plw[0] = gx; plw[1] = gy;
     cl_int status = clEnqueueNDRangeKernel(mX,k->mX,2,0,pgw,plw,num_events,events,&e);
     REPORT_OPENCL_STATUS(status);
-    if (status != CL_SUCCESS) e = 0;
-    return Event(e);
+    if (e != 0) {
+      //      clWaitForEvents(1, &e);
+      clReleaseEvent(e);
+    }
+    return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
   }
 
   //