summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhenyu Wang <zhenyuw@linux.intel.com>2014-10-23 13:09:21 +0800
committerZhenyu Wang <zhenyuw@linux.intel.com>2014-10-23 13:09:21 +0800
commitfd0204918b920077a14298b00a64bf38f08fb2d3 (patch)
treefe627ef9955d46b2086247e9ea964275bbe056e2
parent68e519cdb3e28459ffc53e490b0c8288cd579472 (diff)
Fix implicit Event leak
Not return Event object but handle cl event in function itself. Fix implicit event leak. Now it does wait then release for cl_event. Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
-rw-r--r--BealtoOpenCL/include/CLCommandQueue.h56
-rw-r--r--MPBenchmarks/ArithmeticTasks.cpp10
-rw-r--r--MPBenchmarks/MemoryTasks.cpp23
-rw-r--r--MPBenchmarks/appMain.cpp4
4 files changed, 54 insertions, 39 deletions
diff --git a/BealtoOpenCL/include/CLCommandQueue.h b/BealtoOpenCL/include/CLCommandQueue.h
index 6fde44c..33abb70 100644
--- a/BealtoOpenCL/include/CLCommandQueue.h
+++ b/BealtoOpenCL/include/CLCommandQueue.h
@@ -43,45 +43,55 @@ public:
// If the call fails, the returned event is invalid.
// Read buffer to host memory
- Event readBuffer(Buffer * b,cl_bool blocking_read,size_t offset,size_t cb,void * ptr,const EventList & wait_list = EventList())
+ cl_bool readBuffer(Buffer * b,cl_bool blocking_read,size_t offset,size_t cb,void * ptr,const EventList & wait_list = EventList())
{
- if (b == 0) return Event(0); // Invalid
+ if (b == 0) return CL_FALSE; // Invalid
cl_uint num_events = 0;
const cl_event * events = 0;
wait_list.getParams(num_events,events);
cl_event e = 0;
cl_int status = clEnqueueReadBuffer(mX,b->mX,blocking_read,offset,cb,ptr,num_events,events,&e);
REPORT_OPENCL_STATUS(status);
- if (status != CL_SUCCESS) e = 0;
- return Event(e);
+ if (e != 0) {
+ clWaitForEvents(1, &e);
+ clReleaseEvent(e);
+ }
+ return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
}
// Write buffer from host memory
- Event writeBuffer(Buffer * b,cl_bool blocking_write,size_t offset,size_t cb,const void * ptr,const EventList & wait_list = EventList())
+ cl_bool writeBuffer(Buffer * b,cl_bool blocking_write,size_t offset,size_t cb,const void * ptr,const EventList & wait_list = EventList())
{
- if (b == 0) return Event(0); // Invalid
+ if (b == 0) return CL_FALSE; // Invalid
cl_uint num_events = 0;
const cl_event * events = 0;
wait_list.getParams(num_events,events);
cl_event e = 0;
cl_int status = clEnqueueWriteBuffer(mX,b->mX,blocking_write,offset,cb,ptr,num_events,events,&e);
REPORT_OPENCL_STATUS(status);
- if (status != CL_SUCCESS) e = 0;
- return Event(e);
+ if (e != 0) {
+ clWaitForEvents(1, &e);
+ clReleaseEvent(e);
+ }
+
+ return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
}
// Copy buffers
- Event copyBuffer(Buffer * src,Buffer * dst,size_t src_offset,size_t dst_offset,size_t cb,const EventList & wait_list = EventList())
+ cl_bool copyBuffer(Buffer * src,Buffer * dst,size_t src_offset,size_t dst_offset,size_t cb,const EventList & wait_list = EventList())
{
- if (src == 0 || dst == 0) return Event(0); // Invalid
+ if (src == 0 || dst == 0) return CL_FALSE; // Invalid
cl_uint num_events = 0;
const cl_event * events = 0;
wait_list.getParams(num_events,events);
cl_event e = 0;
cl_int status = clEnqueueCopyBuffer(mX,src->mX,dst->mX,src_offset,dst_offset,cb,num_events,events,&e);
REPORT_OPENCL_STATUS(status);
- if (status != CL_SUCCESS) e = 0;
- return Event(e);
+ if (e != 0) {
+ clWaitForEvents(1, &e);
+ clReleaseEvent(e);
+ }
+ return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
}
// Map buffer. The mapped address is put in ADDRESS.
@@ -127,9 +137,9 @@ public:
// N is the total number of work items (global work size).
// G is the number of work items inside a work group. G must divide N.
// G can be 0, in which case the OpenCL implementation will choose the best value.
- Event execKernel1(Kernel * k,size_t n,size_t g,const EventList & wait_list = EventList())
+ cl_bool execKernel1(Kernel * k,size_t n,size_t g,const EventList & wait_list = EventList())
{
- if (k == 0) return Event(0); // Invalid
+ if (k == 0) return CL_FALSE; // Invalid
cl_uint num_events = 0;
const cl_event * events = 0;
wait_list.getParams(num_events,events);
@@ -138,17 +148,20 @@ public:
size_t * plw = (g>0)?(&g):0;
cl_int status = clEnqueueNDRangeKernel(mX,k->mX,1,0,pgw,plw,num_events,events,&e);
REPORT_OPENCL_STATUS(status);
- if (status != CL_SUCCESS) e = 0;
- return Event(e);
+ if (e != 0) {
+ clWaitForEvents(1, &e);
+ clReleaseEvent(e);
+ }
+ return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
}
// Enqueue 2D kernel execution
// K is the kernel to run.
// NX*NY is the total number of work items (global work size).
// GX*GY is the number of work items inside a work group. G<d> must divide N<d>.
- Event execKernel2(Kernel * k,size_t nx,size_t ny,size_t gx,size_t gy,const EventList & wait_list = EventList())
+ cl_bool execKernel2(Kernel * k,size_t nx,size_t ny,size_t gx,size_t gy,const EventList & wait_list = EventList())
{
- if (k == 0) return Event(0); // Invalid
+ if (k == 0) return CL_FALSE; // Invalid
cl_uint num_events = 0;
const cl_event * events = 0;
wait_list.getParams(num_events,events);
@@ -157,8 +170,11 @@ public:
size_t plw[2]; plw[0] = gx; plw[1] = gy;
cl_int status = clEnqueueNDRangeKernel(mX,k->mX,2,0,pgw,plw,num_events,events,&e);
REPORT_OPENCL_STATUS(status);
- if (status != CL_SUCCESS) e = 0;
- return Event(e);
+ if (e != 0) {
+ // clWaitForEvents(1, &e);
+ clReleaseEvent(e);
+ }
+ return status == CL_SUCCESS ? CL_TRUE : CL_FALSE;
}
//
diff --git a/MPBenchmarks/ArithmeticTasks.cpp b/MPBenchmarks/ArithmeticTasks.cpp
index 670f1d6..61ef40d 100644
--- a/MPBenchmarks/ArithmeticTasks.cpp
+++ b/MPBenchmarks/ArithmeticTasks.cpp
@@ -49,15 +49,15 @@ double AddNGPUTask::run(int workgroupSize,size_t sz)
// Initialize A and B
for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(rand() & 0xFF);
- if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END;
+ if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END;
for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(rand() & 0xFF);
- if (!q->writeBuffer(b,true,0,sz,buf).isValid()) goto END;
+ if (q->writeBuffer(b,true,0,sz,buf) == CL_FALSE) goto END;
// Run tests, double nOps until min time is reached
kernel->setArg(0,a);
kernel->setArg(1,b);
kernel->setArg(2,y);
- if (!q->execKernel1(kernel,n,workgroupSize).isValid()) goto END;
+ if (q->execKernel1(kernel,n,workgroupSize) == CL_FALSE) goto END;
for (int nOps = 1; ; nOps <<= 1)
{
double t0 = getT();
@@ -107,13 +107,13 @@ double Mul1GPUTask::run(int workgroupSize,size_t sz)
// Initialize A
for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(rand() & 0xFF);
- if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END;
+ if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END;
// Run tests, double nOps until min time is reached
kernel->setArg(0,kk);
kernel->setArg(1,a);
kernel->setArg(2,y);
- if (!q->execKernel1(kernel,n,workgroupSize).isValid()) goto END;
+ if (q->execKernel1(kernel,n,workgroupSize)== CL_FALSE) goto END;
for (int nOps = 1; ; nOps <<= 1)
{
double t0 = getT();
diff --git a/MPBenchmarks/MemoryTasks.cpp b/MPBenchmarks/MemoryTasks.cpp
index c6c0fb2..e32036d 100644
--- a/MPBenchmarks/MemoryTasks.cpp
+++ b/MPBenchmarks/MemoryTasks.cpp
@@ -40,9 +40,10 @@ double CopyGPUTask::run(int workgroupSize,size_t sz)
// Initialize A and check errors
for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(i & 0xFF);
- if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END;
+ if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END;
for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)0;
- if (!q->readBuffer(a,true,0,sz,buf).isValid()) goto END;
+ if (q->readBuffer(a,true,0,sz,buf) == CL_FALSE) goto END;
+
ok = true;
// check write+read loop
@@ -53,11 +54,11 @@ double CopyGPUTask::run(int workgroupSize,size_t sz)
if (b != 0)
{
for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(i & 0xFF);
- if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END;
+ if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END;
for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)0;
- if (!q->writeBuffer(b,true,0,sz,buf).isValid()) goto END;
- if (!q->copyBuffer(a,b,0,0,sz).isValid()) goto END;
- if (!q->readBuffer(b,true,0,sz,buf).isValid()) goto END;
+ if (q->writeBuffer(b,true,0,sz,buf) == CL_FALSE) goto END;
+ if (q->copyBuffer(a,b,0,0,sz) == CL_FALSE) goto END;
+ if (q->readBuffer(b,true,0,sz,buf) == CL_FALSE) goto END;
for (size_t i=0;i<sz;i++) if (buf[i] != (unsigned char)(i & 0xFF)) ok = false;
if (!ok) { fprintf(stderr,"write+copy+read failed\n"); goto END; }
}
@@ -74,11 +75,10 @@ double CopyGPUTask::run(int workgroupSize,size_t sz)
case DEVICE_TO_HOST_COPY:
for (int i=0;i<nOps;i++) q->readBuffer(a,false,0,sz,buf);
break;
- case DEVICE_TO_DEVICE_COPY: {
+ case DEVICE_TO_DEVICE_COPY:
for (int i=0;i<nOps;i++) { ncount++; q->copyBuffer(a,b,0,0,sz);}
break;
}
- }
q->finish();
double t = (getT() - t0);
if (t < MIN_RUNNING_TIME) continue;
@@ -95,7 +95,6 @@ END:
if (buf != 0) _aligned_free(buf);
if (a != 0) delete a;
if (b != 0) delete b;
-
return mbps;
}
@@ -122,14 +121,14 @@ double ZeroGPUTask::run(int workgroupSize,size_t sz)
// Initialize A
for (size_t i=0;i<sz;i++) buf[i] = (unsigned char)(i & 0xFF);
- if (!q->writeBuffer(a,true,0,sz,buf).isValid()) goto END;
+ if (q->writeBuffer(a,true,0,sz,buf) == CL_FALSE) goto END;
// Run tests, double nOps until min time is reached
kernel->setArg(0,a);
- if (!q->execKernel1(kernel,n,workgroupSize).isValid()) goto END;
+ if (q->execKernel1(kernel,n,workgroupSize) == CL_FALSE) goto END;
// check write+zero+read
- if (!q->readBuffer(a,true,0,sz,buf).isValid()) goto END;
+ if (q->readBuffer(a,true,0,sz,buf) == CL_FALSE) goto END;
ok = true;
for (size_t i=0;i<sz;i++) if (buf[i] != (unsigned char)0) ok = false;
if (!ok) { fprintf(stderr,"write+zero+read failed\n"); goto END; }
diff --git a/MPBenchmarks/appMain.cpp b/MPBenchmarks/appMain.cpp
index 2992859..3ddb725 100644
--- a/MPBenchmarks/appMain.cpp
+++ b/MPBenchmarks/appMain.cpp
@@ -103,6 +103,7 @@ int main(int argc,char ** argv)
int minLogSize = 10;
int maxLogSize = 29;
+
std::vector<TestResult> allTests;
// Run all CPU tests
@@ -179,7 +180,7 @@ int main(int argc,char ** argv)
char aux[200];
std::string usz;
getUserSize(ls,usz);
- _snprintf(aux,200,"NT=%3d SZ=%7s %6.0f MB/s",nt,usz.c_str(),mbps);
+ _snprintf(aux,200,"NTHREAD=%3d SZ=%7s %6.0f MB/s",nt,usz.c_str(),mbps);
log->append(aux);
tr.update(ls,mbps);
}
@@ -265,7 +266,6 @@ int main(int argc,char ** argv)
title.assign("Mul1 v1 32-bit");
g = new Mul1GPUTask(MUL1_V1,1,log);
break;
-
}
if (g == 0) continue; // nothing to do