summaryrefslogtreecommitdiff
path: root/backend/src/driver
diff options
context:
space:
mode:
authorJunyan He <junyan.he@intel.com>2016-04-03 00:31:50 +0800
committerJunyan He <junyan.he@intel.com>2016-04-03 00:31:50 +0800
commit1c58d14bf1c4b9ce52dfeca4dae0a5c9a7ede1b9 (patch)
tree6efce91499ae0e9ac03a6c4a25dd4d2b1c50ea96 /backend/src/driver
parentf11a74496c4b607add2e8a70a6619593643dc668 (diff)
ndrange
Diffstat (limited to 'backend/src/driver')
-rw-r--r--backend/src/driver/cl_gen_kernel.cpp52
-rw-r--r--backend/src/driver/cl_gen_mem.cpp10
2 files changed, 54 insertions, 8 deletions
diff --git a/backend/src/driver/cl_gen_kernel.cpp b/backend/src/driver/cl_gen_kernel.cpp
index 8e2baf3d..3e1877c2 100644
--- a/backend/src/driver/cl_gen_kernel.cpp
+++ b/backend/src/driver/cl_gen_kernel.cpp
@@ -351,6 +351,45 @@ static int32_t genFillCurbe(cl_kernel kernel, Kernel* ker, char* curbe, const ui
return slm_offset;
}
+cl_int genGPUBindSurfaces(GenGPUState& gpuState, cl_command_queue queue, cl_kernel kernel, Kernel* ker)
+{
+ /* Bind all user buffers (given by clSetKernelArg) */
+ uint32_t i;
+ enum gbe_arg_type arg_type; /* kind of argument */
+ for (i = 0; i < ker->getArgNum(); ++i) {
+ int32_t offset; // location of the address in the curbe
+ arg_type = ker->getArgType(i);
+ cl_mem mem = kernel->args[i]->mem;
+ if (arg_type != GBE_ARG_GLOBAL_PTR || !mem)
+ continue;
+
+ offset = ker->getCurbeOffset(GBE_CURBE_KERNEL_ARGUMENT, i);
+ if (offset < 0)
+ continue;
+
+ cl_mem_buffer buffer = cl_mem_to_buffer(mem);
+ uint8_t bti = ker->getArgBTI(i);
+ GenGPUMem* genMem = reinterpret_cast<GenGPUMem*>(getGenMemPrivate(mem, queue->device));
+ if (genMem == NULL)
+ return CL_INVALID_VALUE;
+
+ if (genMem->bo == NULL) { // We have not allocated buffer, first time use this mem.
+ if (genMem->genAllocMemBo(mem) == false) {
+ return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ }
+ }
+
+ if (genMem->alignedHostPtr == NULL) {
+ gpuState.bindBuf(genMem->bo, offset, buffer->sub_offset, genMem->realSize, bti);
+ } else {
+ gpuState.bindBuf(genMem->bo, offset, (char*)mem->host_ptr - (char*)genMem->alignedHostPtr
+ + buffer->sub_offset, genMem->realSize, bti);
+ }
+ }
+
+ return CL_SUCCESS;
+}
+
extern "C"
cl_int genEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const uint32_t work_dim,
const size_t *global_wk_off, const size_t *global_wk_sz,
@@ -374,10 +413,8 @@ cl_int genEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u
size_t cst_sz = ker->getCurbeSize();
int32_t scratch_sz = ker->getScratchSize();
size_t thread_n = 0u;
- int printf_num = 0;
cl_int err = CL_SUCCESS;
size_t global_size = global_wk_sz[0] * global_wk_sz[1] * global_wk_sz[2];
- void* printf_info = NULL;
bool use_slm = ker->getUseSLM();
size_t local_sz = 0u;
@@ -395,7 +432,7 @@ cl_int genEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u
thread_n = (local_sz + simd_sz - 1) / simd_sz;
- if (scratch_sz > gpuDev->scratch_mem_size) {
+ if ((uint32_t)scratch_sz > gpuDev->scratch_mem_size) {
printf("Out of scratch memory %d.", scratch_sz);
return CL_OUT_OF_RESOURCES;
}
@@ -406,7 +443,7 @@ cl_int genEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u
if (curbe) {
int32_t slm_sz = genFillCurbe(kernel, ker, curbe, work_dim,
global_wk_off, global_wk_sz, local_wk_sz, thread_n);
- if (slm_sz > queue->device->local_mem_size) {
+ if ((uint32_t)slm_sz > queue->device->local_mem_size) {
printf("Out of shared local memory %d.\n", slm_sz);
return CL_OUT_OF_RESOURCES;
}
@@ -417,8 +454,11 @@ cl_int genEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u
err = ndRange->gpuState->stateInit(
queue->device->max_compute_unit * gpuDev->max_thread_per_unit, cst_sz / 32);
-// if (queue->props & CL_QUEUE_PROFILING_ENABLE)
-// err = cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit * ctx->device->max_thread_per_unit, cst_sz / 32, 0);
+ /* We may allocate buffers, begin to lock.*/
+ GenGPUCommandQueueLocker locker(gpuQueue);
+ err = genGPUBindSurfaces(*ndRange->gpuState, queue, kernel, ker);
+ if (err != CL_SUCCESS)
+ return err;
}
diff --git a/backend/src/driver/cl_gen_mem.cpp b/backend/src/driver/cl_gen_mem.cpp
index a2d57b60..33e1d5ca 100644
--- a/backend/src/driver/cl_gen_mem.cpp
+++ b/backend/src/driver/cl_gen_mem.cpp
@@ -146,8 +146,14 @@ bool GenGPUMem::genAllocMemBo(cl_mem mem)
}
/* Copy the data if required */
- if (mem->flags & CL_MEM_COPY_HOST_PTR) {
- drm_intel_bo_subdata(this->bo, 0, mem->size, mem->host_ptr);
+ if ((mem->flags & CL_MEM_COPY_HOST_PTR) ||
+ (mem->flags & CL_MEM_USE_HOST_PTR && this->alignedHostPtr == NULL)) {
+ if (IS_IMAGE(mem)) {
+ drm_intel_bo_subdata(this->bo, 0, mem->size, mem->host_ptr);
+ } else {
+ cl_mem_buffer buffer = cl_mem_to_buffer(mem);
+ drm_intel_bo_subdata(this->bo, buffer->sub_offset, mem->size, mem->host_ptr);
+ }
}
return true;