summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--backend/src/driver/cl_gen_kernel.cpp31
-rw-r--r--include/cl_driver.h1
-rw-r--r--libclapi/cl_kernel.c16
3 files changed, 37 insertions, 11 deletions
diff --git a/backend/src/driver/cl_gen_kernel.cpp b/backend/src/driver/cl_gen_kernel.cpp
index cfde43e5..c9ad3a39 100644
--- a/backend/src/driver/cl_gen_kernel.cpp
+++ b/backend/src/driver/cl_gen_kernel.cpp
@@ -79,19 +79,12 @@ static size_t genGetKernelMaxWorkGroupSize(cl_kernel kernel, Kernel* ker, const
}
extern "C"
-cl_int GenCreateKernel(cl_kernel kernel, const cl_device_id device)
+cl_int GenGetWorkgroupInfo(cl_kernel kernel, const cl_device_id device, cl_kernel_workgroup_info wgInfo)
{
- GBE_ASSERT(getGenKernelPrivate(kernel, device) == NULL);
-
- Program* p = reinterpret_cast<Program*>(getGenProgramPrivate(kernel->program, device));
- if (p == NULL)
- return CL_INVALID_PROGRAM;
-
- Kernel* ker = p->getKernel(kernel->name);
+ Kernel* ker = reinterpret_cast<Kernel*>(getGenKernelPrivate(kernel, device));
if (ker == NULL)
- return CL_INVALID_KERNEL_NAME;
+ return CL_INVALID_VALUE;
- cl_kernel_workgroup_info wgInfo = &kernel->wg_info[findIndexByDevice(kernel->program->ctx, device)];
ker->getCompileWorkGroupSize(wgInfo->compile_wg_sz);
wgInfo->local_mem_sz = ker->getSLMSize();
wgInfo->private_mem_sz = ker->getStackSize();
@@ -108,6 +101,22 @@ cl_int GenCreateKernel(cl_kernel kernel, const cl_device_id device)
memcpy(wgInfo->global_work_sz, device->max_3d_global_work_sizes, sizeof(device->max_3d_global_work_sizes));
}
+ return CL_SUCCESS;
+}
+
+extern "C"
+cl_int GenCreateKernel(cl_kernel kernel, const cl_device_id device)
+{
+ GBE_ASSERT(getGenKernelPrivate(kernel, device) == NULL);
+
+ Program* p = reinterpret_cast<Program*>(getGenProgramPrivate(kernel->program, device));
+ if (p == NULL)
+ return CL_INVALID_PROGRAM;
+
+ Kernel* ker = p->getKernel(kernel->name);
+ if (ker == NULL)
+ return CL_INVALID_KERNEL_NAME;
+
setGenKernelPrivate(kernel, device, ker);
return CL_SUCCESS;
}
@@ -750,7 +759,7 @@ cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u
}
GenGPUWorkItemNDRange* ndRange = GBE_NEW(GenGPUWorkItemNDRange, gpuQueue->bufmgr, gpuQueue->ctx,
- gpuDev->device_id,
+ gpuDev->device_id,
event_ret, events, num_events);
if (ndRange->gpuState->stateInit(
queue->device->max_compute_unit * gpuDev->max_thread_per_unit, cst_sz / 32) != true) {
diff --git a/include/cl_driver.h b/include/cl_driver.h
index 2ace35ba..17619097 100644
--- a/include/cl_driver.h
+++ b/include/cl_driver.h
@@ -51,6 +51,7 @@ typedef struct _cl_driver {
cl_uint name_sz, cl_uint* ret_sz, cl_uint* ker_num);
cl_int (*create_kernel)(cl_kernel kernel, const cl_device_id device);
cl_int (*release_kernel)(cl_kernel kernel, const cl_device_id device);
+ cl_int (*get_workgroup_info)(cl_kernel kernel, const cl_device_id device, cl_kernel_workgroup_info wg_info);
cl_int (*get_arg_num)(cl_kernel kernel, const cl_device_id device, cl_uint* ret_num);
cl_int (*get_arg_name)(cl_kernel kernel, const cl_device_id device, cl_uint index,
char *name, cl_uint name_sz, cl_uint* ret_sz);
diff --git a/libclapi/cl_kernel.c b/libclapi/cl_kernel.c
index 9c47b91f..6cfe5462 100644
--- a/libclapi/cl_kernel.c
+++ b/libclapi/cl_kernel.c
@@ -270,6 +270,7 @@ LOCAL cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int*
char *type_name = NULL;
cl_uint name_sz = 0;
cl_uint type_name_sz = 0;
+ _cl_kernel_workgroup_info wg_info;
cl_kernel_arg_type type;
cl_kernel_arg_address_qualifier qualifier;
cl_kernel_arg_access_qualifier access;
@@ -308,6 +309,21 @@ LOCAL cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int*
}
}
+ // Verify the workgroup info for kernel.
+ for (i = 0; i < p->ctx->device_num; i++) {
+ if (build_status[i] != CL_BUILD_SUCCESS) // We do not build for that device.
+ continue;
+
+ memset(&wg_info, 0, sizeof(wg_info));
+ err = p->ctx->devices[i]->driver->get_workgroup_info(k, p->ctx->devices[i], &wg_info);
+ if (err != CL_SUCCESS) {
+ err = CL_INVALID_PROGRAM_EXECUTABLE;
+ goto error;
+ }
+
+ memcpy(&k->wg_info[i], &wg_info, sizeof(wg_info));
+ }
+
// Verify and create arguments.
for (i = 0; i < p->ctx->device_num; i++) {
if (build_status[i] != CL_BUILD_SUCCESS) // We do not build for that device.