diff options
author | Junyan He <junyan.he@intel.com> | 2016-04-03 00:31:50 +0800 |
---|---|---|
committer | Junyan He <junyan.he@intel.com> | 2016-04-03 00:31:50 +0800 |
commit | 1c58d14bf1c4b9ce52dfeca4dae0a5c9a7ede1b9 (patch) | |
tree | 6efce91499ae0e9ac03a6c4a25dd4d2b1c50ea96 /backend/src | |
parent | f11a74496c4b607add2e8a70a6619593643dc668 (diff) |
ndrange
Diffstat (limited to 'backend/src')
-rw-r--r-- | backend/src/driver/cl_gen_kernel.cpp | 52 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_mem.cpp | 10 |
2 files changed, 54 insertions, 8 deletions
diff --git a/backend/src/driver/cl_gen_kernel.cpp b/backend/src/driver/cl_gen_kernel.cpp index 8e2baf3d..3e1877c2 100644 --- a/backend/src/driver/cl_gen_kernel.cpp +++ b/backend/src/driver/cl_gen_kernel.cpp @@ -351,6 +351,45 @@ static int32_t genFillCurbe(cl_kernel kernel, Kernel* ker, char* curbe, const ui return slm_offset; } +cl_int genGPUBindSurfaces(GenGPUState& gpuState, cl_command_queue queue, cl_kernel kernel, Kernel* ker) +{ + /* Bind all user buffers (given by clSetKernelArg) */ + uint32_t i; + enum gbe_arg_type arg_type; /* kind of argument */ + for (i = 0; i < ker->getArgNum(); ++i) { + int32_t offset; // location of the address in the curbe + arg_type = ker->getArgType(i); + cl_mem mem = kernel->args[i]->mem; + if (arg_type != GBE_ARG_GLOBAL_PTR || !mem) + continue; + + offset = ker->getCurbeOffset(GBE_CURBE_KERNEL_ARGUMENT, i); + if (offset < 0) + continue; + + cl_mem_buffer buffer = cl_mem_to_buffer(mem); + uint8_t bti = ker->getArgBTI(i); + GenGPUMem* genMem = reinterpret_cast<GenGPUMem*>(getGenMemPrivate(mem, queue->device)); + if (genMem == NULL) + return CL_INVALID_VALUE; + + if (genMem->bo == NULL) { // We have not allocated buffer, first time use this mem. + if (genMem->genAllocMemBo(mem) == false) { + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } + } + + if (genMem->alignedHostPtr == NULL) { + gpuState.bindBuf(genMem->bo, offset, buffer->sub_offset, genMem->realSize, bti); + } else { + gpuState.bindBuf(genMem->bo, offset, (char*)mem->host_ptr - (char*)genMem->alignedHostPtr + + buffer->sub_offset, genMem->realSize, bti); + } + } + + return CL_SUCCESS; +} + extern "C" cl_int genEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const uint32_t work_dim, const size_t *global_wk_off, const size_t *global_wk_sz, @@ -374,10 +413,8 @@ cl_int genEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u size_t cst_sz = ker->getCurbeSize(); int32_t scratch_sz = ker->getScratchSize(); size_t thread_n = 0u; - int printf_num = 0; cl_int err = CL_SUCCESS; size_t global_size = global_wk_sz[0] * global_wk_sz[1] * global_wk_sz[2]; - void* printf_info = NULL; bool use_slm = ker->getUseSLM(); size_t local_sz = 0u; @@ -395,7 +432,7 @@ cl_int genEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u thread_n = (local_sz + simd_sz - 1) / simd_sz; - if (scratch_sz > gpuDev->scratch_mem_size) { + if ((uint32_t)scratch_sz > gpuDev->scratch_mem_size) { printf("Out of scratch memory %d.", scratch_sz); return CL_OUT_OF_RESOURCES; } @@ -406,7 +443,7 @@ cl_int genEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u if (curbe) { int32_t slm_sz = genFillCurbe(kernel, ker, curbe, work_dim, global_wk_off, global_wk_sz, local_wk_sz, thread_n); - if (slm_sz > queue->device->local_mem_size) { + if ((uint32_t)slm_sz > queue->device->local_mem_size) { printf("Out of shared local memory %d.\n", slm_sz); return CL_OUT_OF_RESOURCES; } @@ -417,8 +454,11 @@ cl_int genEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u err = ndRange->gpuState->stateInit( queue->device->max_compute_unit * gpuDev->max_thread_per_unit, cst_sz / 32); -// if (queue->props & CL_QUEUE_PROFILING_ENABLE) -// err = cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit * ctx->device->max_thread_per_unit, cst_sz / 32, 0); + /* We may allocate buffers, begin to lock.*/ + GenGPUCommandQueueLocker locker(gpuQueue); + err = genGPUBindSurfaces(*ndRange->gpuState, queue, kernel, ker); + if (err != CL_SUCCESS) + return err; } diff --git a/backend/src/driver/cl_gen_mem.cpp b/backend/src/driver/cl_gen_mem.cpp index a2d57b60..33e1d5ca 100644 --- a/backend/src/driver/cl_gen_mem.cpp +++ b/backend/src/driver/cl_gen_mem.cpp @@ -146,8 +146,14 @@ bool GenGPUMem::genAllocMemBo(cl_mem mem) } /* Copy the data if required */ - if (mem->flags & CL_MEM_COPY_HOST_PTR) { - drm_intel_bo_subdata(this->bo, 0, mem->size, mem->host_ptr); + if ((mem->flags & CL_MEM_COPY_HOST_PTR) || + (mem->flags & CL_MEM_USE_HOST_PTR && this->alignedHostPtr == NULL)) { + if (IS_IMAGE(mem)) { + drm_intel_bo_subdata(this->bo, 0, mem->size, mem->host_ptr); + } else { + cl_mem_buffer buffer = cl_mem_to_buffer(mem); + drm_intel_bo_subdata(this->bo, buffer->sub_offset, mem->size, mem->host_ptr); + } } return true; |