diff options
author | Zhenyu Wang <zhenyuw@linux.intel.com> | 2014-10-22 15:15:10 +0800 |
---|---|---|
committer | Zhenyu Wang <zhenyuw@linux.intel.com> | 2014-10-22 15:15:10 +0800 |
commit | 13f670a2202fe598ae1cdf44979c90fec3cdcf91 (patch) | |
tree | cc9b230540194ca9a6f9b5957f7710ca8982d24c | |
parent | 1953adf5e7978439fc9ec25caaf977e4d28d6355 (diff) |
Print num of executions
Print num of calls for copy buffer and kernel execution times for zero set.
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
-rw-r--r-- | MPBenchmarks/MemoryTasks.cpp | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/MPBenchmarks/MemoryTasks.cpp b/MPBenchmarks/MemoryTasks.cpp index a7780bc..c6c0fb2 100644 --- a/MPBenchmarks/MemoryTasks.cpp +++ b/MPBenchmarks/MemoryTasks.cpp @@ -5,13 +5,14 @@ #include <string.h> - // GPU double CopyGPUTask::run(int workgroupSize,size_t sz) { cl::Context * c = getContext(); cl::CommandQueue * q = getQueue(); + int ncount = 0; + if (c == 0 || q == 0) return -1; // Check allocation size @@ -73,10 +74,11 @@ double CopyGPUTask::run(int workgroupSize,size_t sz) case DEVICE_TO_HOST_COPY: for (int i=0;i<nOps;i++) q->readBuffer(a,false,0,sz,buf); break; - case DEVICE_TO_DEVICE_COPY: - for (int i=0;i<nOps;i++) q->copyBuffer(a,b,0,0,sz); + case DEVICE_TO_DEVICE_COPY: { + for (int i=0;i<nOps;i++) { ncount++; q->copyBuffer(a,b,0,0,sz);} break; } + } q->finish(); double t = (getT() - t0); if (t < MIN_RUNNING_TIME) continue; @@ -87,6 +89,9 @@ double CopyGPUTask::run(int workgroupSize,size_t sz) } END: + if (mCT == DEVICE_TO_DEVICE_COPY) + fprintf(stderr, "call copy buffer %d times\n", ncount); + if (buf != 0) _aligned_free(buf); if (a != 0) delete a; if (b != 0) delete b; @@ -99,6 +104,8 @@ double ZeroGPUTask::run(int workgroupSize,size_t sz) cl::Context * c = getContext(); cl::CommandQueue * q = getQueue(); cl::Program * p = getProgram(); + int ncount = 0; + if (c == 0 || q == 0 || p == 0) return -1; // Check allocation size @@ -130,7 +137,7 @@ double ZeroGPUTask::run(int workgroupSize,size_t sz) for (int nOps = 5; ; nOps <<= 1) { double t0 = getT(); - for (int i=0;i<nOps;i++) q->execKernel1(kernel,n,workgroupSize); + for (int i=0;i<nOps;i++) { ncount++; q->execKernel1(kernel,n,workgroupSize);} q->finish(); double t = (getT() - t0); if (t < MIN_RUNNING_TIME) continue; @@ -141,6 +148,7 @@ double ZeroGPUTask::run(int workgroupSize,size_t sz) } END: + fprintf(stderr, "exec kernel %d times\n", ncount); if (buf != 0) _aligned_free(buf); if (a != 0) delete a; if (kernel != 0) delete kernel; |