From 04b7ba95da781db41c2747e889e00287d83770a8 Mon Sep 17 00:00:00 2001 From: Junyan He Date: Tue, 26 Apr 2016 19:11:22 +0800 Subject: new enqueue --- backend/src/driver/cl_gen_driver.hpp | 9 ---- backend/src/driver/cl_gen_kernel.cpp | 30 ----------- libclapi/cl_enqueue.c | 99 ++++++++++++++++++++++++++++++------ libclapi/cl_internals.h | 4 ++ libclapi/cl_kernel.c | 36 +++++++++++-- 5 files changed, 120 insertions(+), 58 deletions(-) diff --git a/backend/src/driver/cl_gen_driver.hpp b/backend/src/driver/cl_gen_driver.hpp index d81c5dc6..6f880174 100644 --- a/backend/src/driver/cl_gen_driver.hpp +++ b/backend/src/driver/cl_gen_driver.hpp @@ -231,15 +231,6 @@ struct GenGPUWorkItem { // Represent Some real work for GPU to do. virtual ~GenGPUWorkItem() { }; }; -struct GenGPUWorkItemNDRange : public GenGPUWorkItem { - GenGPUState* gpuState; - virtual ~GenGPUWorkItemNDRange(void) { if (gpuState) GBE_DELETE(gpuState); } - GenGPUWorkItemNDRange(dri_bufmgr *bufmgr, drm_intel_context *ctx, int device_id, - cl_event event, const cl_event* dependEvents, cl_uint num_events); - virtual bool submit(void); - virtual bool complete(void); -}; - struct GenGPUCommandQueue { cl_command_queue queue; // Pointer back to queue. dri_bufmgr *bufmgr; diff --git a/backend/src/driver/cl_gen_kernel.cpp b/backend/src/driver/cl_gen_kernel.cpp index bd61713c..cbc7169f 100644 --- a/backend/src/driver/cl_gen_kernel.cpp +++ b/backend/src/driver/cl_gen_kernel.cpp @@ -320,36 +320,6 @@ cl_int GenGetKernelArgInfo(cl_kernel kernel, const cl_device_id device, cl_uint return CL_SUCCESS; } -GenGPUWorkItemNDRange::GenGPUWorkItemNDRange(dri_bufmgr *bufmgr, drm_intel_context *ctx, int device_id, - cl_event event, const cl_event* dependEvents, cl_uint num_events) - : GenGPUWorkItem(event, dependEvents, num_events) -{ - if (IS_GEN9(device_id)) { - - } else if (IS_GEN8(device_id)) { - - } else if (IS_GEN75(device_id)) { - - } else if (IS_GEN7(device_id)) { - this->gpuState = GBE_NEW(Gen7GPUState, bufmgr, ctx, device_id); - } else - GBE_ASSERT(0); // not support any more. -} - -bool GenGPUWorkItemNDRange::submit(void) -{ - bool ret; - ret = this->gpuState->flush(); - return ret; -} - -bool GenGPUWorkItemNDRange::complete(void) -{ - /* Wait it to complete. */ - this->gpuState->sync(); - return true; -} - static cl_int genNDRangeRun(cl_command_queue_work_item it) { GBE_ASSERT(0); diff --git a/libclapi/cl_enqueue.c b/libclapi/cl_enqueue.c index ef3ada61..991d29d0 100644 --- a/libclapi/cl_enqueue.c +++ b/libclapi/cl_enqueue.c @@ -105,7 +105,6 @@ static cl_int check_work_item_ready(cl_command_queue_work_item item) static void *worker_thread_function(void *Arg) { - cl_int ret; cl_command_queue_worker worker = (cl_command_queue_worker)Arg; cl_uint last_cookie = worker->cookie; cl_command_queue_work_item it, start_it; @@ -155,20 +154,9 @@ static void *worker_thread_function(void *Arg) if (is_ready < 0) // Error happend, just cancel. set_work_item_status(ready_one, -1); - if (ready_one->status == CL_QUEUED) { - ret = ready_one->submit(ready_one); - set_work_item_status(ready_one, ret); - } - - if (ready_one->status == CL_SUBMITTED) { - ret = ready_one->run(ready_one); - set_work_item_status(ready_one, ret); - } - - if (ready_one->status == CL_RUNNING) { - ret = ready_one->complete(ready_one); - set_work_item_status(ready_one, ret); - } + cl_enqueue_submit_work_item(worker->queue, ready_one); + cl_enqueue_run_work_item(worker->queue, ready_one); + cl_enqueue_complete_work_item(worker->queue, ready_one); cl_enqueue_destroy_work_item(worker->queue, ready_one); CL_MUTEX_LOCK(&worker->mutex); @@ -184,7 +172,64 @@ static void *worker_thread_function(void *Arg) return NULL; } -LOCAL cl_int cl_enqueue_queue_work_item(cl_command_queue queue, cl_command_queue_work_item item) +LOCAL cl_int cl_enqueue_submit_work_item(cl_command_queue queue, cl_command_queue_work_item item) +{ + cl_int ret; + + assert(item->queue == queue); + if (item->event) { + assert(item->event->queue == queue); //Should belong to this queue. + } + + if (item->status == CL_QUEUED) { + ret = item->submit(item); + set_work_item_status(item, ret); + } else { + ret = item->status; + } + + return ret; +} + +LOCAL cl_int cl_enqueue_run_work_item(cl_command_queue queue, cl_command_queue_work_item item) +{ + cl_int ret; + + assert(item->queue == queue); + if (item->event) { + assert(item->event->queue == queue); //Should belong to this queue. + } + + if (item->status == CL_SUBMITTED) { + ret = item->run(item); + set_work_item_status(item, ret); + } else { + ret = item->status; + } + + return ret; +} + +LOCAL cl_int cl_enqueue_complete_work_item(cl_command_queue queue, cl_command_queue_work_item item) +{ + cl_int ret; + + assert(item->queue == queue); + if (item->event) { + assert(item->event->queue == queue); //Should belong to this queue. + } + + if (item->status == CL_RUNNING) { + ret = item->complete(item); + set_work_item_status(item, ret); + } else { + ret = item->status; + } + + return ret; +} + +LOCAL cl_int cl_enqueue_insert_work_item(cl_command_queue queue, cl_command_queue_work_item item) { cl_command_queue_worker worker; @@ -217,6 +262,28 @@ LOCAL cl_int cl_enqueue_queue_work_item(cl_command_queue queue, cl_command_queue return CL_SUCCESS; } +LOCAL cl_int cl_enqueue_queue_work_item(cl_command_queue queue, cl_command_queue_work_item item) +{ + cl_int ret; + + assert(item->queue == queue); + if (item->event) { + assert(item->event->queue == queue); //Should belong to this queue. + } + + if (item->status == CL_QUEUED) { + ret = CL_QUEUED; + if (cl_enqueue_insert_work_item(queue, item) != CL_SUCCESS) { + ret = -1; + set_work_item_status(item, ret); + } + } else { + ret = item->status; + } + + return ret; +} + LOCAL cl_command_queue_work_item cl_enqueue_create_work_item(cl_command_queue queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event event) { diff --git a/libclapi/cl_internals.h b/libclapi/cl_internals.h index f32bc65c..fa6942b8 100644 --- a/libclapi/cl_internals.h +++ b/libclapi/cl_internals.h @@ -487,7 +487,11 @@ extern void cl_release_command_queue(cl_command_queue queue); extern cl_int cl_retain_event(cl_event e); extern cl_int cl_event_get_status(cl_event event); extern cl_int cl_event_set_status(cl_event event, cl_int status); +extern cl_int cl_enqueue_insert_work_item(cl_command_queue queue, cl_command_queue_work_item item); extern cl_int cl_enqueue_queue_work_item(cl_command_queue queue, cl_command_queue_work_item item); +extern cl_int cl_enqueue_submit_work_item(cl_command_queue queue, cl_command_queue_work_item item); +extern cl_int cl_enqueue_run_work_item(cl_command_queue queue, cl_command_queue_work_item item); +extern cl_int cl_enqueue_complete_work_item(cl_command_queue queue, cl_command_queue_work_item item); extern cl_command_queue_work_item cl_enqueue_create_work_item(cl_command_queue queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event event); extern void cl_enqueue_destroy_work_item(cl_command_queue queue, cl_command_queue_work_item item); diff --git a/libclapi/cl_kernel.c b/libclapi/cl_kernel.c index 0f110bef..b0853536 100644 --- a/libclapi/cl_kernel.c +++ b/libclapi/cl_kernel.c @@ -773,7 +773,10 @@ static cl_int cl_command_queue_ND_range(cl_command_queue queue, cl_kernel kernel const cl_event *event_wait_list, cl_event *event_ret) { cl_event event = NULL; + cl_command_queue_work_item it = NULL; cl_int err = CL_SUCCESS; + uint32_t i; + cl_int ret_status; if (event_ret) { event = cl_create_event(queue->ctx, queue, CL_FALSE, num_events_in_wait_list, event_wait_list, &err); @@ -781,21 +784,46 @@ static cl_int cl_command_queue_ND_range(cl_command_queue queue, cl_kernel kernel goto error; } - uint32_t i; + it = cl_enqueue_create_work_item(queue, num_events_in_wait_list, event_wait_list, event); + if (it == NULL) { + err = CL_OUT_OF_HOST_MEMORY; + goto error; + } + CL_MUTEX_LOCK(&kernel->lock); for (i = 0; i < kernel->arg_num; ++i) if (kernel->args[i]->is_set == CL_FALSE) { - return CL_INVALID_KERNEL_ARGS; + err = CL_INVALID_KERNEL_ARGS; CL_MUTEX_UNLOCK(&kernel->lock); + goto error; } CL_MUTEX_UNLOCK(&kernel->lock); err = queue->device->driver->enqueue_nd_range_kernel(queue, kernel, work_dim, global_wk_off, - global_wk_sz, local_wk_sz, num_events_in_wait_list, event_wait_list, event); + global_wk_sz, local_wk_sz, it); if (err != CL_SUCCESS) { goto error; } + /* If no events depend, we submit it immediately. */ + if (event_wait_list == NULL) { + ret_status = cl_enqueue_submit_work_item(queue, it); + if (ret_status < 0) { + /* We consider it as a error and return fail. */ + err = CL_OUT_OF_RESOURCES; + goto error; + } + if (ret_status > CL_COMPLETE) { + err = cl_enqueue_insert_work_item(queue, it); + if (err != CL_SUCCESS) + goto error; + } + } else { + err = cl_enqueue_insert_work_item(queue, it); + if (err != CL_SUCCESS) + goto error; + } + if (event_ret) *event_ret = event; @@ -814,6 +842,8 @@ static cl_int cl_command_queue_ND_range(cl_command_queue queue, cl_kernel kernel return err; error: + if (it) + cl_enqueue_destroy_work_item(queue, it); if (event) cl_release_event(event); return err; -- cgit v1.2.3