diff options
author | Zhenyu Wang <zhenyuw@linux.intel.com> | 2014-10-23 13:09:21 +0800 |
---|---|---|
committer | Zhenyu Wang <zhenyuw@linux.intel.com> | 2014-10-23 13:09:21 +0800 |
commit | fd0204918b920077a14298b00a64bf38f08fb2d3 (patch) | |
tree | fe627ef9955d46b2086247e9ea964275bbe056e2 | |
parent | 68e519cdb3e28459ffc53e490b0c8288cd579472 (diff) |
Fix implicit Event leak
Not return Event object but handle cl event in function itself.
Fix implicit event leak.
Now it does wait then release for cl_event.
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
-rw-r--r-- | BealtoOpenCL/include/CLCommandQueue.h | 56 | ||||
-rw-r--r-- | MPBenchmarks/ArithmeticTasks.cpp | 10 | ||||
-rw-r--r-- | MPBenchmarks/MemoryTasks.cpp | 23 | ||||
-rw-r--r-- | MPBenchmarks/appMain.cpp | 4 |
4 files changed, 54 insertions, 39 deletions
diff --git a/BealtoOpenCL/include/CLCommandQueue.h b/BealtoOpenCL/include/CLCommandQueue.h index 6fde44c..33abb70 100644 --- a/BealtoOpenCL/include/CLCommandQueue.h +++ b/BealtoOpenCL/include/CLCommandQueue.h @@ -43,45 +43,55 @@ public: // If the call fails, the returned event is invalid. // Read buffer to host memory - Event readBuffer(Buffer * b,cl_bool blocking_read,size_t offset,size_t cb,void * ptr,const EventList & wait_list = EventList()) + cl_bool readBuffer(Buffer * b,cl_bool blocking_read,size_t offset,size_t cb,void * ptr,const EventList & wait_list = EventList()) { - if (b == 0) return Event(0); // Invalid + if (b == 0) return CL_FALSE; // Invalid cl_uint num_events = 0; const cl_event * events = 0; wait_list.getParams(num_events,events); cl_event e = 0; cl_int status = clEnqueueReadBuffer(mX,b->mX,blocking_read,offset,cb,ptr,num_events,events,&e); REPORT_OPENCL_STATUS(status); - if (status != CL_SUCCESS) e = 0; - return Event(e); + if (e != 0) { + clWaitForEvents(1, &e); + clReleaseEvent(e); + } + return status == CL_SUCCESS ? CL_TRUE : CL_FALSE; } // Write buffer from host memory - Event writeBuffer(Buffer * b,cl_bool blocking_write,size_t offset,size_t cb,const void * ptr,const EventList & wait_list = EventList()) + cl_bool writeBuffer(Buffer * b,cl_bool blocking_write,size_t offset,size_t cb,const void * ptr,const EventList & wait_list = EventList()) { - if (b == 0) return Event(0); // Invalid + if (b == 0) return CL_FALSE; // Invalid cl_uint num_events = 0; const cl_event * events = 0; wait_list.getParams(num_events,events); cl_event e = 0; cl_int status = clEnqueueWriteBuffer(mX,b->mX,blocking_write,offset,cb,ptr,num_events,events,&e); REPORT_OPENCL_STATUS(status); - if (status != CL_SUCCESS) e = 0; - return Event(e); + if (e != 0) { + clWaitForEvents(1, &e); + clReleaseEvent(e); + } + + return status == CL_SUCCESS ? CL_TRUE : CL_FALSE; } // Copy buffers - Event copyBuffer(Buffer * src,Buffer * dst,size_t src_offset,size_t dst_offset,size_t cb,const EventList & wait_list = EventList()) + cl_bool copyBuffer(Buffer * src,Buffer * dst,size_t src_offset,size_t dst_offset,size_t cb,const EventList & wait_list = EventList()) { - if (src == 0 || dst == 0) return Event(0); // Invalid + if (src == 0 || dst == 0) return CL_FALSE; // Invalid cl_uint num_events = 0; const cl_event * events = 0; wait_list.getParams(num_events,events); cl_event e = 0; cl_int status = clEnqueueCopyBuffer(mX,src->mX,dst->mX,src_offset,dst_offset,cb,num_events,events,&e); REPORT_OPENCL_STATUS(status); - if (status != CL_SUCCESS) e = 0; - return Event(e); + if (e != 0) { + clWaitForEvents(1, &e); + clReleaseEvent(e); + } + return status == CL_SUCCESS ? CL_TRUE : CL_FALSE; } // Map buffer. The mapped address is put in ADDRESS. @@ -127,9 +137,9 @@ public: // N is the total number of work items (global work size). // G is the number of work items inside a work group. G must divide N. // G can be 0, in which case the OpenCL implementation will choose the best value. - Event execKernel1(Kernel * k,size_t n,size_t g,const EventList & wait_list = EventList()) + cl_bool execKernel1(Kernel * k,size_t n,size_t g,const EventList & wait_list = EventList()) { - if (k == 0) return Event(0); // Invalid + if (k == 0) return CL_FALSE; // Invalid cl_uint num_events = 0; const cl_event * events = 0; wait_list.getParams(num_events,events); @@ -138,17 +148,20 @@ public: size_t * plw = (g>0)?(&g):0; cl_int status = clEnqueueNDRangeKernel(mX,k->mX,1,0,pgw,plw,num_events,events,&e); REPORT_OPENCL_STATUS(status); - if (status != CL_SUCCESS) e = 0; - return Event(e); + if (e != 0) { + clWaitForEvents(1, &e); + clReleaseEvent(e); + } + return status == CL_SUCCESS ? CL_TRUE : CL_FALSE; } // Enqueue 2D kernel execution // K is the kernel to run. // NX*NY is the total number of work items (global work size). // GX*GY is the number of work items inside a work group. G<d> must divide N<d>. - Event execKernel2(Kernel * k,size_t nx,size_t ny,size_t gx,size_t gy,const EventList & wait_list = EventList()) + cl_bool execKernel2(Kernel * k,size_t nx,size_t ny,size_t gx,size_t gy,const EventList & wait_list = EventList()) { - if (k == 0) return Event(0); // Invalid + if (k == 0) return CL_FALSE; // Invalid cl_uint num_events = 0; const cl_event * events = 0; wait_list.getParams(num_events,events); @@ -157,8 +170,11 @@ public: size_t plw[2]; plw[0] = gx; plw[1] = gy; cl_int status = clEnqueueNDRangeKernel(mX,k->mX,2,0,pgw,plw,num_events,events,&e); REPORT_OPENCL_STATUS(status); - if (status != CL_SUCCESS) e = 0; - return Event(e); + if (e != 0) { + // clWaitForEvents(1, &e); + clReleaseEvent(e); + } + return status == CL_SUCCESS ? CL_TRUE : CL_FALSE; } // diff --git a/MPBenchmarks/ArithmeticTasks.cpp b/MPBenchmarks/ArithmeticTasks.cpp index 670f1d6..61ef40d 100644 --- a/MPBenchmarks/ArithmeticTasks.cpp +++ b/MPBenchmarks/ArithmeticTasks.cpp @@ -49,15 +49,15 @@ double AddNGPUTask::run(int workgroupSize,size_t sz) // Initialize A and B for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(rand() & 0xFF); - if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END; + if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END; for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(rand() & 0xFF); - if (!q->writeBuffer(b,true,0,sz,buf).isValid()) goto END; + if (q->writeBuffer(b,true,0,sz,buf) == CL_FALSE) goto END; // Run tests, double nOps until min time is reached kernel->setArg(0,a); kernel->setArg(1,b); kernel->setArg(2,y); - if (!q->execKernel1(kernel,n,workgroupSize).isValid()) goto END; + if (q->execKernel1(kernel,n,workgroupSize) == CL_FALSE) goto END; for (int nOps = 1; ; nOps <<= 1) { double t0 = getT(); @@ -107,13 +107,13 @@ double Mul1GPUTask::run(int workgroupSize,size_t sz) // Initialize A for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(rand() & 0xFF); - if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END; + if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END; // Run tests, double nOps until min time is reached kernel->setArg(0,kk); kernel->setArg(1,a); kernel->setArg(2,y); - if (!q->execKernel1(kernel,n,workgroupSize).isValid()) goto END; + if (q->execKernel1(kernel,n,workgroupSize)== CL_FALSE) goto END; for (int nOps = 1; ; nOps <<= 1) { double t0 = getT(); diff --git a/MPBenchmarks/MemoryTasks.cpp b/MPBenchmarks/MemoryTasks.cpp index c6c0fb2..e32036d 100644 --- a/MPBenchmarks/MemoryTasks.cpp +++ b/MPBenchmarks/MemoryTasks.cpp @@ -40,9 +40,10 @@ double CopyGPUTask::run(int workgroupSize,size_t sz) // Initialize A and check errors for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(i & 0xFF); - if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END; + if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END; for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)0; - if (!q->readBuffer(a,true,0,sz,buf).isValid()) goto END; + if (q->readBuffer(a,true,0,sz,buf) == CL_FALSE) goto END; + ok = true; // check write+read loop @@ -53,11 +54,11 @@ double CopyGPUTask::run(int workgroupSize,size_t sz) if (b != 0) { for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(i & 0xFF); - if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END; + if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END; for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)0; - if (!q->writeBuffer(b,true,0,sz,buf).isValid()) goto END; - if (!q->copyBuffer(a,b,0,0,sz).isValid()) goto END; - if (!q->readBuffer(b,true,0,sz,buf).isValid()) goto END; + if (q->writeBuffer(b,true,0,sz,buf) == CL_FALSE) goto END; + if (q->copyBuffer(a,b,0,0,sz) == CL_FALSE) goto END; + if (q->readBuffer(b,true,0,sz,buf) == CL_FALSE) goto END; for (size_t i=0;i<sz;i++) if (buf[i] != (unsigned char)(i & 0xFF)) ok = false; if (!ok) { fprintf(stderr,"write+copy+read failed\n"); goto END; } } @@ -74,11 +75,10 @@ double CopyGPUTask::run(int workgroupSize,size_t sz) case DEVICE_TO_HOST_COPY: for (int i=0;i<nOps;i++) q->readBuffer(a,false,0,sz,buf); break; - case DEVICE_TO_DEVICE_COPY: { + case DEVICE_TO_DEVICE_COPY: for (int i=0;i<nOps;i++) { ncount++; q->copyBuffer(a,b,0,0,sz);} break; } - } q->finish(); double t = (getT() - t0); if (t < MIN_RUNNING_TIME) continue; @@ -95,7 +95,6 @@ END: if (buf != 0) _aligned_free(buf); if (a != 0) delete a; if (b != 0) delete b; - return mbps; } @@ -122,14 +121,14 @@ double ZeroGPUTask::run(int workgroupSize,size_t sz) // Initialize A for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(i & 0xFF); - if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END; + if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END; // Run tests, double nOps until min time is reached kernel->setArg(0,a); - if (!q->execKernel1(kernel,n,workgroupSize).isValid()) goto END; + if (q->execKernel1(kernel,n,workgroupSize) == CL_FALSE) goto END; // check write+zero+read - if (!q->readBuffer(a,true,0,sz,buf).isValid()) goto END; + if (q->readBuffer(a,true,0,sz,buf) == CL_FALSE) goto END; ok = true; for (size_t i=0;i<sz;i++) if (buf[i] != (unsigned char)0) ok = false; if (!ok) { fprintf(stderr,"write+zero+read failed\n"); goto END; } diff --git a/MPBenchmarks/appMain.cpp b/MPBenchmarks/appMain.cpp index 2992859..3ddb725 100644 --- a/MPBenchmarks/appMain.cpp +++ b/MPBenchmarks/appMain.cpp @@ -103,6 +103,7 @@ int main(int argc,char ** argv) int minLogSize = 10; int maxLogSize = 29; + std::vector<TestResult> allTests; // Run all CPU tests @@ -179,7 +180,7 @@ int main(int argc,char ** argv) char aux[200]; std::string usz; getUserSize(ls,usz); - _snprintf(aux,200,"NT=%3d SZ=%7s %6.0f MB/s",nt,usz.c_str(),mbps); + _snprintf(aux,200,"NTHREAD=%3d SZ=%7s %6.0f MB/s",nt,usz.c_str(),mbps); log->append(aux); tr.update(ls,mbps); } @@ -265,7 +266,6 @@ int main(int argc,char ** argv) title.assign("Mul1 v1 32-bit"); g = new Mul1GPUTask(MUL1_V1,1,log); break; - } if (g == 0) continue; // nothing to do |