summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhenyu Wang <zhenyuw@linux.intel.com>2014-10-22 15:15:10 +0800
committerZhenyu Wang <zhenyuw@linux.intel.com>2014-10-22 15:15:10 +0800
commit13f670a2202fe598ae1cdf44979c90fec3cdcf91 (patch)
treecc9b230540194ca9a6f9b5957f7710ca8982d24c
parent1953adf5e7978439fc9ec25caaf977e4d28d6355 (diff)
Print num of executions
Print num of calls for copy buffer and kernel execution times for zero set. Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
-rw-r--r--MPBenchmarks/MemoryTasks.cpp16
1 files changed, 12 insertions, 4 deletions
diff --git a/MPBenchmarks/MemoryTasks.cpp b/MPBenchmarks/MemoryTasks.cpp
index a7780bc..c6c0fb2 100644
--- a/MPBenchmarks/MemoryTasks.cpp
+++ b/MPBenchmarks/MemoryTasks.cpp
@@ -5,13 +5,14 @@
#include <string.h>
-
// GPU
double CopyGPUTask::run(int workgroupSize,size_t sz)
{
cl::Context * c = getContext();
cl::CommandQueue * q = getQueue();
+ int ncount = 0;
+
if (c == 0 || q == 0) return -1;
// Check allocation size
@@ -73,10 +74,11 @@ double CopyGPUTask::run(int workgroupSize,size_t sz)
case DEVICE_TO_HOST_COPY:
for (int i=0;i<nOps;i++) q->readBuffer(a,false,0,sz,buf);
break;
- case DEVICE_TO_DEVICE_COPY:
- for (int i=0;i<nOps;i++) q->copyBuffer(a,b,0,0,sz);
+ case DEVICE_TO_DEVICE_COPY: {
+ for (int i=0;i<nOps;i++) { ncount++; q->copyBuffer(a,b,0,0,sz);}
break;
}
+ }
q->finish();
double t = (getT() - t0);
if (t < MIN_RUNNING_TIME) continue;
@@ -87,6 +89,9 @@ double CopyGPUTask::run(int workgroupSize,size_t sz)
}
END:
+ if (mCT == DEVICE_TO_DEVICE_COPY)
+ fprintf(stderr, "call copy buffer %d times\n", ncount);
+
if (buf != 0) _aligned_free(buf);
if (a != 0) delete a;
if (b != 0) delete b;
@@ -99,6 +104,8 @@ double ZeroGPUTask::run(int workgroupSize,size_t sz)
cl::Context * c = getContext();
cl::CommandQueue * q = getQueue();
cl::Program * p = getProgram();
+ int ncount = 0;
+
if (c == 0 || q == 0 || p == 0) return -1;
// Check allocation size
@@ -130,7 +137,7 @@ double ZeroGPUTask::run(int workgroupSize,size_t sz)
for (int nOps = 5; ; nOps <<= 1)
{
double t0 = getT();
- for (int i=0;i<nOps;i++) q->execKernel1(kernel,n,workgroupSize);
+ for (int i=0;i<nOps;i++) { ncount++; q->execKernel1(kernel,n,workgroupSize);}
q->finish();
double t = (getT() - t0);
if (t < MIN_RUNNING_TIME) continue;
@@ -141,6 +148,7 @@ double ZeroGPUTask::run(int workgroupSize,size_t sz)
}
END:
+ fprintf(stderr, "exec kernel %d times\n", ncount);
if (buf != 0) _aligned_free(buf);
if (a != 0) delete a;
if (kernel != 0) delete kernel;