diff options
author | Junyan He <junyan.he@intel.com> | 2016-04-11 18:22:15 +0800 |
---|---|---|
committer | Junyan He <junyan.he@intel.com> | 2016-04-11 18:22:15 +0800 |
commit | 8096dfae2edf9a9a784ba7aa26bcb85872e05bdd (patch) | |
tree | 90216b6197629c7e4d9ec4e7fed691844298d5ce | |
parent | 2ca0719a2d886c9af12d1ce501922a0272a1b68d (diff) |
refine program
-rw-r--r-- | backend/src/driver/cl_gen_command_queue.cpp | 2 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_device_id.cpp | 6 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_kernel.cpp | 18 | ||||
-rw-r--r-- | backend/src/driver/cl_gen_program.cpp | 15 | ||||
-rw-r--r-- | libclapi/cl_kernel.c | 50 | ||||
-rw-r--r-- | libclapi/cl_program.c | 21 |
6 files changed, 75 insertions, 37 deletions
diff --git a/backend/src/driver/cl_gen_command_queue.cpp b/backend/src/driver/cl_gen_command_queue.cpp index 57fee252..5ee7730e 100644 --- a/backend/src/driver/cl_gen_command_queue.cpp +++ b/backend/src/driver/cl_gen_command_queue.cpp @@ -266,7 +266,7 @@ cl_int GenReleaseCommandQueue(cl_command_queue queue) return CL_INVALID_VALUE; } - GBE_FREE(gpuQueue); + GBE_DELETE(gpuQueue); setGenCommandQueuePrivate(queue, NULL); return CL_SUCCESS; } diff --git a/backend/src/driver/cl_gen_device_id.cpp b/backend/src/driver/cl_gen_device_id.cpp index 005be34b..b665d6b9 100644 --- a/backend/src/driver/cl_gen_device_id.cpp +++ b/backend/src/driver/cl_gen_device_id.cpp @@ -818,7 +818,7 @@ cl_int GenDriverInit(cl_platform_id platform) GenGPUDevice* gpuDev = GBE_NEW(GenGPUDevice); if (gpuDev->gen_ver < 7) { gen_device = NULL; - GBE_FREE(gpuDev); + GBE_DELETE(gpuDev); err = CL_DEVICE_NOT_FOUND; return err; } @@ -826,7 +826,7 @@ cl_int GenDriverInit(cl_platform_id platform) initGenDevice(gpuDev); if (!gen_device) { gen_device = NULL; - GBE_FREE(gpuDev); + GBE_DELETE(gpuDev); err = CL_DEVICE_NOT_FOUND; return err; } @@ -838,7 +838,7 @@ cl_int GenDriverInit(cl_platform_id platform) setGenDevicePrivate(gen_device, gpuDev); /* Check and set the extension for device. */ if (!checkDeviceExtension(platform)) { - GBE_FREE(gpuDev); + GBE_DELETE(gpuDev); setGenDevicePrivate(gen_device, NULL); gen_device = NULL; err = CL_DEVICE_NOT_FOUND; diff --git a/backend/src/driver/cl_gen_kernel.cpp b/backend/src/driver/cl_gen_kernel.cpp index 5f828b29..cfde43e5 100644 --- a/backend/src/driver/cl_gen_kernel.cpp +++ b/backend/src/driver/cl_gen_kernel.cpp @@ -759,14 +759,14 @@ cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u err = genGPUBindSurfaces(*ndRange->gpuState, queue, kernel, ker); if (err != CL_SUCCESS) { - GBE_FREE(ndRange); + GBE_DELETE(ndRange); return err; } if (ker->getImageSize()) { err = genGPUBindImage(curbe, *ndRange->gpuState, kernel, ker); if (err != CL_SUCCESS) { - GBE_FREE(ndRange); + GBE_DELETE(ndRange); return err; } } @@ -774,13 +774,13 @@ cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u if (ker->getSamplerSize()) { err = genGPUBindSamplers(*ndRange->gpuState, kernel, ker); if (err != CL_SUCCESS) { - GBE_FREE(ndRange); + GBE_DELETE(ndRange); return err; } } if (ndRange->gpuState->setScratch(scratch_sz) != 0) { - GBE_FREE(ndRange); + GBE_DELETE(ndRange); return CL_OUT_OF_RESOURCES; } @@ -788,7 +788,7 @@ cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u genGPUSetStack(*ndRange->gpuState, ker, queue->device, gpuDev); if (genUploadConstantBuffer(queue, curbe, *ndRange->gpuState, kernel, ker, prog) != 0) { - GBE_FREE(ndRange); + GBE_DELETE(ndRange); return CL_OUT_OF_RESOURCES; } @@ -801,7 +801,7 @@ cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u GBE_ASSERT(cst_sz > 0); char *final_curbe = (char*)alloca(thread_n * cst_sz); if (final_curbe == NULL) { - GBE_FREE(ndRange); + GBE_DELETE(ndRange); return CL_OUT_OF_HOST_MEMORY; } @@ -810,12 +810,12 @@ cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u } if (genSetVaryingPayload(ker, final_curbe, local_wk_sz, simd_sz, cst_sz, thread_n) != true) { - GBE_FREE(ndRange); + GBE_DELETE(ndRange); return CL_OUT_OF_HOST_MEMORY; } if (ndRange->gpuState->uploadCurbes(final_curbe, thread_n*cst_sz, thread_n, cst_sz) != true) { - GBE_FREE(ndRange); + GBE_DELETE(ndRange); return CL_OUT_OF_RESOURCES; } } @@ -831,7 +831,7 @@ cl_int GenEnqueueNDRangeKernel(cl_command_queue queue, cl_kernel kernel, const u /* Final,enqueue it in the queue worker thread. */ if (gpuQueue->enqueueWorkItem(ndRange) == false) { - GBE_FREE(ndRange); + GBE_DELETE(ndRange); return CL_INVALID_COMMAND_QUEUE; } diff --git a/backend/src/driver/cl_gen_program.cpp b/backend/src/driver/cl_gen_program.cpp index 1da69cd3..49de5754 100644 --- a/backend/src/driver/cl_gen_program.cpp +++ b/backend/src/driver/cl_gen_program.cpp @@ -35,14 +35,20 @@ using namespace gbe; extern "C" cl_int GenBuildProgram(cl_program program, const cl_device_id device) { - GBE_ASSERT(getGenProgramPrivate(program, device) == NULL); + if (getGenProgramPrivate(program, device) != NULL) { // Rebuild ? + Program* prog = reinterpret_cast<Program*>(getGenProgramPrivate(program, device)); + GBE_DELETE(prog); + setGenProgramPrivate(program, device, NULL); + } int index = findIndexByDevice(program->ctx, device); int device_id = (reinterpret_cast<GenGPUDevice*>(getGenDevicePrivate(device)))->device_id; GBE_ASSERT(index >= 0); - Program* p = ProgramNewFromSource(device_id, program->source, CL_BUILD_LOG_MAX_SZ, program->build_opts, - program->build_log[index], &program->build_log_sz[index]); + Program* p = ProgramNewFromSource(device_id, program->source, CL_BUILD_LOG_MAX_SZ, + program->device_status[index].build_opts, + program->device_status[index].build_log, + &program->device_status[index].build_log_sz); if (p == NULL) { return CL_BUILD_PROGRAM_FAILURE; } @@ -94,8 +100,7 @@ cl_int GenReleaseProgram(cl_program program, const cl_device_id device) if (p == NULL) return CL_INVALID_VALUE; - GBE_FREE(p); - + GBE_DELETE(p); setGenProgramPrivate(program, device, NULL); return CL_SUCCESS; } diff --git a/libclapi/cl_kernel.c b/libclapi/cl_kernel.c index b1b70fe3..9c47b91f 100644 --- a/libclapi/cl_kernel.c +++ b/libclapi/cl_kernel.c @@ -136,10 +136,8 @@ static cl_kernel cl_kernel_new(cl_program p, const char* name) k->magic = CL_MAGIC_KERNEL_HEADER; pthread_mutex_init(&k->lock, NULL); - k->program = p; cl_retain_program(p); - strcpy(k->name, name); - + k->program = p; pthread_mutex_lock(&p->lock); k->next = p->kernels; if (p->kernels != NULL) @@ -147,6 +145,8 @@ static cl_kernel cl_kernel_new(cl_program p, const char* name) p->kernels = k; p->kernel_created++; pthread_mutex_unlock(&p->lock); + + strcpy(k->name, name); return k; } @@ -197,13 +197,21 @@ LOCAL void cl_retain_kernel(cl_kernel k) LOCAL void cl_release_kernel(cl_kernel k) { cl_uint i; + int32_t *build_status = NULL; assert(k); if (atomic_dec(&k->ref_n) > 1) return; + build_status = CALLOC_ARRAY(int32_t, k->program->ctx->device_num); + assert(build_status != NULL); + pthread_mutex_lock(&k->program->lock); + for (i = 0; i < k->program->ctx->device_num; i++) + build_status[i] = k->program->device_status[i].build_status; + pthread_mutex_unlock(&k->program->lock); + for (i = 0; i < k->program->ctx->device_num; i++) { - if (k->program->valid[i] == 0) // We do not build for that device. + if (build_status[i] != CL_BUILD_SUCCESS) // We do not build for that device. continue; k->program->ctx->devices[i]->driver->release_kernel(k, k->program->ctx->devices[i]); @@ -227,23 +235,28 @@ static cl_int cl_find_kernel_by_name(cl_program p, const char* kernel_name) return err; } + pthread_mutex_lock(&p->lock); s = strstr(p->kernel_names, kernel_name); /* Make sure this kernel name is valid. */ if (s == NULL) { + pthread_mutex_unlock(&p->lock); err = CL_INVALID_KERNEL_NAME; return err; } assert(s >= p->kernel_names); if (s > p->kernel_names && p->kernel_names[s - p->kernel_names - 1] != ';') { + pthread_mutex_unlock(&p->lock); err = CL_INVALID_KERNEL_NAME; return err; } if (s[strlen(kernel_name)] != 0 && s[strlen(kernel_name)] != ';') { + pthread_mutex_unlock(&p->lock); err = CL_INVALID_KERNEL_NAME; return err; } + pthread_mutex_unlock(&p->lock); return err; } @@ -262,6 +275,17 @@ LOCAL cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int* cl_kernel_arg_access_qualifier access; cl_kernel_arg_type_qualifier type_qualifier; size_t arg_size = 0; + int32_t *build_status = NULL; + + err = cl_find_kernel_by_name(p, kernel_name); + if (err != CL_SUCCESS) + goto error; + + build_status = CALLOC_ARRAY(int32_t, p->ctx->device_num); + if (UNLIKELY(build_status == NULL)) { + err = CL_OUT_OF_HOST_MEMORY; + goto error; + } k = cl_kernel_new(p, kernel_name); if (UNLIKELY(k == NULL)) { @@ -269,12 +293,13 @@ LOCAL cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int* goto error; } - err = cl_find_kernel_by_name(p, kernel_name); - if (err != CL_SUCCESS) - goto error; + pthread_mutex_lock(&p->lock); + for (i = 0; i < p->ctx->device_num; i++) + build_status[i] = p->device_status[i].build_status; + pthread_mutex_unlock(&p->lock); for (i = 0; i < p->ctx->device_num; i++) { - if (p->valid[i] == 0) // We do not build for that device. + if (build_status[i] != CL_BUILD_SUCCESS) // We do not build for that device. continue; err = p->ctx->devices[i]->driver->create_kernel(k, p->ctx->devices[i]); @@ -285,7 +310,7 @@ LOCAL cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int* // Verify and create arguments. for (i = 0; i < p->ctx->device_num; i++) { - if (p->valid[i] == 0) // We do not build for that device. + if (build_status[i] != CL_BUILD_SUCCESS) // We do not build for that device. continue; err = p->ctx->devices[i]->driver->get_arg_num(k, p->ctx->devices[i], &arg_num); @@ -318,7 +343,7 @@ LOCAL cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int* cl_kernel_arg_type_qualifier tyq = 0; size_t asz = 0; - if (p->valid[i] == 0) // We do not build for that device. + if (build_status[i] != CL_BUILD_SUCCESS) // We do not build for that device. continue; if (name_sz == 0) { @@ -403,13 +428,16 @@ LOCAL cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int* cl_free(name); if (type_name) cl_free(type_name); - + if (build_status) + cl_free(build_status); if (errcode_ret) *errcode_ret = err; return k; error: + if (build_status) + cl_free(build_status); if (name) cl_free(name); if (type_name) diff --git a/libclapi/cl_program.c b/libclapi/cl_program.c index 4b7857f1..566c74f9 100644 --- a/libclapi/cl_program.c +++ b/libclapi/cl_program.c @@ -99,6 +99,7 @@ static void cl_program_delete(cl_program p) cl_free(p->pdata); } + pthread_mutex_destroy(&p->lock); p->magic = CL_MAGIC_DEAD_HEADER; /* For safety */ cl_free(p); } @@ -132,6 +133,7 @@ LOCAL void cl_release_program(cl_program p) static cl_program cl_program_new(cl_context ctx) { cl_program p = NULL; + int i; /* Allocate the structure */ TRY_ALLOC_NO_ERR(p, CALLOC(struct _cl_program)); @@ -143,8 +145,11 @@ static cl_program cl_program_new(cl_context ctx) TRY_ALLOC_NO_ERR(p->device_status, CALLOC_ARRAY(_cl_program_device_status, ctx->device_num)); - p->build_status = CL_BUILD_NONE; - p->binary_type = CL_PROGRAM_BINARY_TYPE_NONE; + for (i = 0; i < ctx->device_num; i++) { + p->device_status[i].build_status = CL_BUILD_NONE; + p->device_status[i].binary_type = CL_PROGRAM_BINARY_TYPE_NONE; + } + p->ref_n = 1; p->magic = CL_MAGIC_PROGRAM_HEADER; p->ctx = ctx; @@ -290,7 +295,6 @@ static int check_cl_version_option(cl_program p, const char* options) static cl_int cl_get_kernel_names(cl_program p, const cl_device_id device) { - cl_uint i; cl_int err = CL_SUCCESS; cl_uint ker_n; char* kernel_names = NULL; @@ -649,12 +653,13 @@ static cl_program cl_program_link(cl_context context, cl_uint num_devices, const cl_int i = 0; int avialable_program = 0; +// TODO: for(i = 0; i < num_input_programs; i++) { //num_input_programs >0 and input_programs MUST not NULL. - if(input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_LIBRARY || - input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) { +// if(input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_LIBRARY || +// input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) { avialable_program++; - } + // } } //None of program contain a compilerd binary or library. @@ -688,8 +693,8 @@ static cl_program cl_program_link(cl_context context, cl_uint num_devices, const } done: - if (p) - p->build_status = CL_BUILD_SUCCESS; + // if (p) +// p->build_status = CL_BUILD_SUCCESS; if (errcode_ret) *errcode_ret = err; return p; |