summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@intel.com>2016-04-11 18:22:15 +0800
committerJunyan He <junyan.he@intel.com>2016-04-11 18:22:15 +0800
commit8096dfae2edf9a9a784ba7aa26bcb85872e05bdd (patch)
tree90216b6197629c7e4d9ec4e7fed691844298d5ce
parent2ca0719a2d886c9af12d1ce501922a0272a1b68d (diff)
refine program
-rw-r--r--backend/src/driver/cl_gen_command_queue.cpp2
-rw-r--r--backend/src/driver/cl_gen_device_id.cpp6
-rw-r--r--backend/src/driver/cl_gen_kernel.cpp18
-rw-r--r--backend/src/driver/cl_gen_program.cpp15
-rw-r--r--libclapi/cl_kernel.c50
-rw-r--r--libclapi/cl_program.c21
6 files changed, 75 insertions, 37 deletions
diff --git a/backend/src/driver/cl_gen_command_queue.cpp b/backend/src/driver/cl_gen_command_queue.cpp
index 57fee252..5ee7730e 100644
--- a/backend/src/driver/cl_gen_command_queue.cpp
+++ b/backend/src/driver/cl_gen_command_queue.cpp
@@ -266,7 +266,7 @@ cl_int GenReleaseCommandQueue(cl_command_queue queue)
return CL_INVALID_VALUE;
}
- GBE_FREE(gpuQueue);
+ GBE_DELETE(gpuQueue);
setGenCommandQueuePrivate(queue, NULL);
return CL_SUCCESS;
}
diff --git a/backend/src/driver/cl_gen_device_id.cpp b/backend/src/driver/cl_gen_device_id.cpp
index 005be34b..b665d6b9 100644
--- a/backend/src/driver/cl_gen_device_id.cpp
+++ b/backend/src/driver/cl_gen_device_id.cpp
@@ -818,7 +818,7 @@ cl_int GenDriverInit(cl_platform_id platform)
GenGPUDevice* gpuDev = GBE_NEW(GenGPUDevice);
if (gpuDev->gen_ver < 7) {
gen_device = NULL;
- GBE_FREE(gpuDev);
+ GBE_DELETE(gpuDev);
err = CL_DEVICE_NOT_FOUND;
return err;
}
@@ -826,7 +826,7 @@ cl_int GenDriverInit(cl_platform_id platform)
initGenDevice(gpuDev);
if (!gen_device) {
gen_device = NULL;
- GBE_FREE(gpuDev);
+ GBE_DELETE(gpuDev);
err = CL_DEVICE_NOT_FOUND;
return err;
}
@@ -838,7 +838,7 @@ cl_int GenDriverInit(cl_platform_id platform)
setGenDevicePrivate(gen_device, gpuDev);
/* Check and set the extension for device. */
if (!checkDeviceExtension(platform)) {
- GBE_FREE(gpuDev);
+ GBE_DELETE(gpuDev);
setGenDevicePrivate(gen_device, NULL);
gen_device = NULL;
err = CL_DEVICE_NOT_FOUND;
diff --git a/backend/src/driver/cl_gen_kernel.cpp b/backend/src/driver/cl_gen_kernel.cpp
index 5f828b29..cfde43e5 100644
--- a/backend/src/driver/cl_gen_kernel.cpp
+++ b/backend/src/driver/cl_gen_kernel.cpp
@@ -759,14 +759,14 @@ cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u
err = genGPUBindSurfaces(*ndRange->gpuState, queue, kernel, ker);
if (err != CL_SUCCESS) {
- GBE_FREE(ndRange);
+ GBE_DELETE(ndRange);
return err;
}
if (ker->getImageSize()) {
err = genGPUBindImage(curbe, *ndRange->gpuState, kernel, ker);
if (err != CL_SUCCESS) {
- GBE_FREE(ndRange);
+ GBE_DELETE(ndRange);
return err;
}
}
@@ -774,13 +774,13 @@ cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u
if (ker->getSamplerSize()) {
err = genGPUBindSamplers(*ndRange->gpuState, kernel, ker);
if (err != CL_SUCCESS) {
- GBE_FREE(ndRange);
+ GBE_DELETE(ndRange);
return err;
}
}
if (ndRange->gpuState->setScratch(scratch_sz) != 0) {
- GBE_FREE(ndRange);
+ GBE_DELETE(ndRange);
return CL_OUT_OF_RESOURCES;
}
@@ -788,7 +788,7 @@ cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u
genGPUSetStack(*ndRange->gpuState, ker, queue->device, gpuDev);
if (genUploadConstantBuffer(queue, curbe, *ndRange->gpuState, kernel, ker, prog) != 0) {
- GBE_FREE(ndRange);
+ GBE_DELETE(ndRange);
return CL_OUT_OF_RESOURCES;
}
@@ -801,7 +801,7 @@ cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u
GBE_ASSERT(cst_sz > 0);
char *final_curbe = (char*)alloca(thread_n * cst_sz);
if (final_curbe == NULL) {
- GBE_FREE(ndRange);
+ GBE_DELETE(ndRange);
return CL_OUT_OF_HOST_MEMORY;
}
@@ -810,12 +810,12 @@ cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u
}
if (genSetVaryingPayload(ker, final_curbe, local_wk_sz, simd_sz, cst_sz, thread_n) != true) {
- GBE_FREE(ndRange);
+ GBE_DELETE(ndRange);
return CL_OUT_OF_HOST_MEMORY;
}
if (ndRange->gpuState->uploadCurbes(final_curbe, thread_n*cst_sz, thread_n, cst_sz) != true) {
- GBE_FREE(ndRange);
+ GBE_DELETE(ndRange);
return CL_OUT_OF_RESOURCES;
}
}
@@ -831,7 +831,7 @@ cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u
/* Final,enqueue it in the queue worker thread. */
if (gpuQueue->enqueueWorkItem(ndRange) == false) {
- GBE_FREE(ndRange);
+ GBE_DELETE(ndRange);
return CL_INVALID_COMMAND_QUEUE;
}
diff --git a/backend/src/driver/cl_gen_program.cpp b/backend/src/driver/cl_gen_program.cpp
index 1da69cd3..49de5754 100644
--- a/backend/src/driver/cl_gen_program.cpp
+++ b/backend/src/driver/cl_gen_program.cpp
@@ -35,14 +35,20 @@ using namespace gbe;
extern "C"
cl_int GenBuildProgram(cl_program program, const cl_device_id device)
{
- GBE_ASSERT(getGenProgramPrivate(program, device) == NULL);
+ if (getGenProgramPrivate(program, device) != NULL) { // Rebuild ?
+ Program* prog = reinterpret_cast<Program*>(getGenProgramPrivate(program, device));
+ GBE_DELETE(prog);
+ setGenProgramPrivate(program, device, NULL);
+ }
int index = findIndexByDevice(program->ctx, device);
int device_id = (reinterpret_cast<GenGPUDevice*>(getGenDevicePrivate(device)))->device_id;
GBE_ASSERT(index >= 0);
- Program* p = ProgramNewFromSource(device_id, program->source, CL_BUILD_LOG_MAX_SZ, program->build_opts,
- program->build_log[index], &program->build_log_sz[index]);
+ Program* p = ProgramNewFromSource(device_id, program->source, CL_BUILD_LOG_MAX_SZ,
+ program->device_status[index].build_opts,
+ program->device_status[index].build_log,
+ &program->device_status[index].build_log_sz);
if (p == NULL) {
return CL_BUILD_PROGRAM_FAILURE;
}
@@ -94,8 +100,7 @@ cl_int GenReleaseProgram(cl_program program, const cl_device_id device)
if (p == NULL)
return CL_INVALID_VALUE;
- GBE_FREE(p);
-
+ GBE_DELETE(p);
setGenProgramPrivate(program, device, NULL);
return CL_SUCCESS;
}
diff --git a/libclapi/cl_kernel.c b/libclapi/cl_kernel.c
index b1b70fe3..9c47b91f 100644
--- a/libclapi/cl_kernel.c
+++ b/libclapi/cl_kernel.c
@@ -136,10 +136,8 @@ static cl_kernel cl_kernel_new(cl_program p, const char* name)
k->magic = CL_MAGIC_KERNEL_HEADER;
pthread_mutex_init(&k->lock, NULL);
- k->program = p;
cl_retain_program(p);
- strcpy(k->name, name);
-
+ k->program = p;
pthread_mutex_lock(&p->lock);
k->next = p->kernels;
if (p->kernels != NULL)
@@ -147,6 +145,8 @@ static cl_kernel cl_kernel_new(cl_program p, const char* name)
p->kernels = k;
p->kernel_created++;
pthread_mutex_unlock(&p->lock);
+
+ strcpy(k->name, name);
return k;
}
@@ -197,13 +197,21 @@ LOCAL void cl_retain_kernel(cl_kernel k)
LOCAL void cl_release_kernel(cl_kernel k)
{
cl_uint i;
+ int32_t *build_status = NULL;
assert(k);
if (atomic_dec(&k->ref_n) > 1)
return;
+ build_status = CALLOC_ARRAY(int32_t, k->program->ctx->device_num);
+ assert(build_status != NULL);
+ pthread_mutex_lock(&k->program->lock);
+ for (i = 0; i < k->program->ctx->device_num; i++)
+ build_status[i] = k->program->device_status[i].build_status;
+ pthread_mutex_unlock(&k->program->lock);
+
for (i = 0; i < k->program->ctx->device_num; i++) {
- if (k->program->valid[i] == 0) // We do not build for that device.
+ if (build_status[i] != CL_BUILD_SUCCESS) // We do not build for that device.
continue;
k->program->ctx->devices[i]->driver->release_kernel(k, k->program->ctx->devices[i]);
@@ -227,23 +235,28 @@ static cl_int cl_find_kernel_by_name(cl_program p, const char* kernel_name)
return err;
}
+ pthread_mutex_lock(&p->lock);
s = strstr(p->kernel_names, kernel_name);
/* Make sure this kernel name is valid. */
if (s == NULL) {
+ pthread_mutex_unlock(&p->lock);
err = CL_INVALID_KERNEL_NAME;
return err;
}
assert(s >= p->kernel_names);
if (s > p->kernel_names && p->kernel_names[s - p->kernel_names - 1] != ';') {
+ pthread_mutex_unlock(&p->lock);
err = CL_INVALID_KERNEL_NAME;
return err;
}
if (s[strlen(kernel_name)] != 0 && s[strlen(kernel_name)] != ';') {
+ pthread_mutex_unlock(&p->lock);
err = CL_INVALID_KERNEL_NAME;
return err;
}
+ pthread_mutex_unlock(&p->lock);
return err;
}
@@ -262,6 +275,17 @@ LOCAL cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int*
cl_kernel_arg_access_qualifier access;
cl_kernel_arg_type_qualifier type_qualifier;
size_t arg_size = 0;
+ int32_t *build_status = NULL;
+
+ err = cl_find_kernel_by_name(p, kernel_name);
+ if (err != CL_SUCCESS)
+ goto error;
+
+ build_status = CALLOC_ARRAY(int32_t, p->ctx->device_num);
+ if (UNLIKELY(build_status == NULL)) {
+ err = CL_OUT_OF_HOST_MEMORY;
+ goto error;
+ }
k = cl_kernel_new(p, kernel_name);
if (UNLIKELY(k == NULL)) {
@@ -269,12 +293,13 @@ LOCAL cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int*
goto error;
}
- err = cl_find_kernel_by_name(p, kernel_name);
- if (err != CL_SUCCESS)
- goto error;
+ pthread_mutex_lock(&p->lock);
+ for (i = 0; i < p->ctx->device_num; i++)
+ build_status[i] = p->device_status[i].build_status;
+ pthread_mutex_unlock(&p->lock);
for (i = 0; i < p->ctx->device_num; i++) {
- if (p->valid[i] == 0) // We do not build for that device.
+ if (build_status[i] != CL_BUILD_SUCCESS) // We do not build for that device.
continue;
err = p->ctx->devices[i]->driver->create_kernel(k, p->ctx->devices[i]);
@@ -285,7 +310,7 @@ LOCAL cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int*
// Verify and create arguments.
for (i = 0; i < p->ctx->device_num; i++) {
- if (p->valid[i] == 0) // We do not build for that device.
+ if (build_status[i] != CL_BUILD_SUCCESS) // We do not build for that device.
continue;
err = p->ctx->devices[i]->driver->get_arg_num(k, p->ctx->devices[i], &arg_num);
@@ -318,7 +343,7 @@ LOCAL cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int*
cl_kernel_arg_type_qualifier tyq = 0;
size_t asz = 0;
- if (p->valid[i] == 0) // We do not build for that device.
+ if (build_status[i] != CL_BUILD_SUCCESS) // We do not build for that device.
continue;
if (name_sz == 0) {
@@ -403,13 +428,16 @@ LOCAL cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int*
cl_free(name);
if (type_name)
cl_free(type_name);
-
+ if (build_status)
+ cl_free(build_status);
if (errcode_ret)
*errcode_ret = err;
return k;
error:
+ if (build_status)
+ cl_free(build_status);
if (name)
cl_free(name);
if (type_name)
diff --git a/libclapi/cl_program.c b/libclapi/cl_program.c
index 4b7857f1..566c74f9 100644
--- a/libclapi/cl_program.c
+++ b/libclapi/cl_program.c
@@ -99,6 +99,7 @@ static void cl_program_delete(cl_program p)
cl_free(p->pdata);
}
+ pthread_mutex_destroy(&p->lock);
p->magic = CL_MAGIC_DEAD_HEADER; /* For safety */
cl_free(p);
}
@@ -132,6 +133,7 @@ LOCAL void cl_release_program(cl_program p)
static cl_program cl_program_new(cl_context ctx)
{
cl_program p = NULL;
+ int i;
/* Allocate the structure */
TRY_ALLOC_NO_ERR(p, CALLOC(struct _cl_program));
@@ -143,8 +145,11 @@ static cl_program cl_program_new(cl_context ctx)
TRY_ALLOC_NO_ERR(p->device_status,
CALLOC_ARRAY(_cl_program_device_status, ctx->device_num));
- p->build_status = CL_BUILD_NONE;
- p->binary_type = CL_PROGRAM_BINARY_TYPE_NONE;
+ for (i = 0; i < ctx->device_num; i++) {
+ p->device_status[i].build_status = CL_BUILD_NONE;
+ p->device_status[i].binary_type = CL_PROGRAM_BINARY_TYPE_NONE;
+ }
+
p->ref_n = 1;
p->magic = CL_MAGIC_PROGRAM_HEADER;
p->ctx = ctx;
@@ -290,7 +295,6 @@ static int check_cl_version_option(cl_program p, const char* options)
static cl_int cl_get_kernel_names(cl_program p, const cl_device_id device)
{
- cl_uint i;
cl_int err = CL_SUCCESS;
cl_uint ker_n;
char* kernel_names = NULL;
@@ -649,12 +653,13 @@ static cl_program cl_program_link(cl_context context, cl_uint num_devices, const
cl_int i = 0;
int avialable_program = 0;
+// TODO:
for(i = 0; i < num_input_programs; i++) {
//num_input_programs >0 and input_programs MUST not NULL.
- if(input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_LIBRARY ||
- input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) {
+// if(input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_LIBRARY ||
+// input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) {
avialable_program++;
- }
+ // }
}
//None of program contain a compilerd binary or library.
@@ -688,8 +693,8 @@ static cl_program cl_program_link(cl_context context, cl_uint num_devices, const
}
done:
- if (p)
- p->build_status = CL_BUILD_SUCCESS;
+ // if (p)
+// p->build_status = CL_BUILD_SUCCESS;
if (errcode_ret)
*errcode_ret = err;
return p;