summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyan He <junyan.he@intel.com>2016-04-26 19:11:22 +0800
committerJunyan He <junyan.he@intel.com>2016-04-26 19:11:22 +0800
commit04b7ba95da781db41c2747e889e00287d83770a8 (patch)
tree03ddc268ca9434781f354139cee4805333194d21
parent76d2ad118883c93335168205dfdfc6f2433138ea (diff)
new enqueue
-rw-r--r--backend/src/driver/cl_gen_driver.hpp9
-rw-r--r--backend/src/driver/cl_gen_kernel.cpp30
-rw-r--r--libclapi/cl_enqueue.c99
-rw-r--r--libclapi/cl_internals.h4
-rw-r--r--libclapi/cl_kernel.c36
5 files changed, 120 insertions, 58 deletions
diff --git a/backend/src/driver/cl_gen_driver.hpp b/backend/src/driver/cl_gen_driver.hpp
index d81c5dc6..6f880174 100644
--- a/backend/src/driver/cl_gen_driver.hpp
+++ b/backend/src/driver/cl_gen_driver.hpp
@@ -231,15 +231,6 @@ struct GenGPUWorkItem { // Represent Some real work for GPU to do.
virtual ~GenGPUWorkItem() { };
};
-struct GenGPUWorkItemNDRange : public GenGPUWorkItem {
- GenGPUState* gpuState;
- virtual ~GenGPUWorkItemNDRange(void) { if (gpuState) GBE_DELETE(gpuState); }
- GenGPUWorkItemNDRange(dri_bufmgr *bufmgr, drm_intel_context *ctx, int device_id,
- cl_event event, const cl_event* dependEvents, cl_uint num_events);
- virtual bool submit(void);
- virtual bool complete(void);
-};
-
struct GenGPUCommandQueue {
cl_command_queue queue; // Pointer back to queue.
dri_bufmgr *bufmgr;
diff --git a/backend/src/driver/cl_gen_kernel.cpp b/backend/src/driver/cl_gen_kernel.cpp
index bd61713c..cbc7169f 100644
--- a/backend/src/driver/cl_gen_kernel.cpp
+++ b/backend/src/driver/cl_gen_kernel.cpp
@@ -320,36 +320,6 @@ cl_int GenGetKernelArgInfo(cl_kernel kernel, const cl_device_id device, cl_uint
return CL_SUCCESS;
}
-GenGPUWorkItemNDRange::GenGPUWorkItemNDRange(dri_bufmgr *bufmgr, drm_intel_context *ctx, int device_id,
- cl_event event, const cl_event* dependEvents, cl_uint num_events)
- : GenGPUWorkItem(event, dependEvents, num_events)
-{
- if (IS_GEN9(device_id)) {
-
- } else if (IS_GEN8(device_id)) {
-
- } else if (IS_GEN75(device_id)) {
-
- } else if (IS_GEN7(device_id)) {
- this->gpuState = GBE_NEW(Gen7GPUState, bufmgr, ctx, device_id);
- } else
- GBE_ASSERT(0); // not support any more.
-}
-
-bool GenGPUWorkItemNDRange::submit(void)
-{
- bool ret;
- ret = this->gpuState->flush();
- return ret;
-}
-
-bool GenGPUWorkItemNDRange::complete(void)
-{
- /* Wait it to complete. */
- this->gpuState->sync();
- return true;
-}
-
static cl_int genNDRangeRun(cl_command_queue_work_item it)
{
GBE_ASSERT(0);
diff --git a/libclapi/cl_enqueue.c b/libclapi/cl_enqueue.c
index ef3ada61..991d29d0 100644
--- a/libclapi/cl_enqueue.c
+++ b/libclapi/cl_enqueue.c
@@ -105,7 +105,6 @@ static cl_int check_work_item_ready(cl_command_queue_work_item item)
static void *worker_thread_function(void *Arg)
{
- cl_int ret;
cl_command_queue_worker worker = (cl_command_queue_worker)Arg;
cl_uint last_cookie = worker->cookie;
cl_command_queue_work_item it, start_it;
@@ -155,20 +154,9 @@ static void *worker_thread_function(void *Arg)
if (is_ready < 0) // Error happend, just cancel.
set_work_item_status(ready_one, -1);
- if (ready_one->status == CL_QUEUED) {
- ret = ready_one->submit(ready_one);
- set_work_item_status(ready_one, ret);
- }
-
- if (ready_one->status == CL_SUBMITTED) {
- ret = ready_one->run(ready_one);
- set_work_item_status(ready_one, ret);
- }
-
- if (ready_one->status == CL_RUNNING) {
- ret = ready_one->complete(ready_one);
- set_work_item_status(ready_one, ret);
- }
+ cl_enqueue_submit_work_item(worker->queue, ready_one);
+ cl_enqueue_run_work_item(worker->queue, ready_one);
+ cl_enqueue_complete_work_item(worker->queue, ready_one);
cl_enqueue_destroy_work_item(worker->queue, ready_one);
CL_MUTEX_LOCK(&worker->mutex);
@@ -184,7 +172,64 @@ static void *worker_thread_function(void *Arg)
return NULL;
}
-LOCAL cl_int cl_enqueue_queue_work_item(cl_command_queue queue, cl_command_queue_work_item item)
+LOCAL cl_int cl_enqueue_submit_work_item(cl_command_queue queue, cl_command_queue_work_item item)
+{
+ cl_int ret;
+
+ assert(item->queue == queue);
+ if (item->event) {
+ assert(item->event->queue == queue); //Should belong to this queue.
+ }
+
+ if (item->status == CL_QUEUED) {
+ ret = item->submit(item);
+ set_work_item_status(item, ret);
+ } else {
+ ret = item->status;
+ }
+
+ return ret;
+}
+
+LOCAL cl_int cl_enqueue_run_work_item(cl_command_queue queue, cl_command_queue_work_item item)
+{
+ cl_int ret;
+
+ assert(item->queue == queue);
+ if (item->event) {
+ assert(item->event->queue == queue); //Should belong to this queue.
+ }
+
+ if (item->status == CL_SUBMITTED) {
+ ret = item->run(item);
+ set_work_item_status(item, ret);
+ } else {
+ ret = item->status;
+ }
+
+ return ret;
+}
+
+LOCAL cl_int cl_enqueue_complete_work_item(cl_command_queue queue, cl_command_queue_work_item item)
+{
+ cl_int ret;
+
+ assert(item->queue == queue);
+ if (item->event) {
+ assert(item->event->queue == queue); //Should belong to this queue.
+ }
+
+ if (item->status == CL_RUNNING) {
+ ret = item->complete(item);
+ set_work_item_status(item, ret);
+ } else {
+ ret = item->status;
+ }
+
+ return ret;
+}
+
+LOCAL cl_int cl_enqueue_insert_work_item(cl_command_queue queue, cl_command_queue_work_item item)
{
cl_command_queue_worker worker;
@@ -217,6 +262,28 @@ LOCAL cl_int cl_enqueue_queue_work_item(cl_command_queue queue, cl_command_queue
return CL_SUCCESS;
}
+LOCAL cl_int cl_enqueue_queue_work_item(cl_command_queue queue, cl_command_queue_work_item item)
+{
+ cl_int ret;
+
+ assert(item->queue == queue);
+ if (item->event) {
+ assert(item->event->queue == queue); //Should belong to this queue.
+ }
+
+ if (item->status == CL_QUEUED) {
+ ret = CL_QUEUED;
+ if (cl_enqueue_insert_work_item(queue, item) != CL_SUCCESS) {
+ ret = -1;
+ set_work_item_status(item, ret);
+ }
+ } else {
+ ret = item->status;
+ }
+
+ return ret;
+}
+
LOCAL cl_command_queue_work_item cl_enqueue_create_work_item(cl_command_queue queue,
cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event event)
{
diff --git a/libclapi/cl_internals.h b/libclapi/cl_internals.h
index f32bc65c..fa6942b8 100644
--- a/libclapi/cl_internals.h
+++ b/libclapi/cl_internals.h
@@ -487,7 +487,11 @@ extern void cl_release_command_queue(cl_command_queue queue);
extern cl_int cl_retain_event(cl_event e);
extern cl_int cl_event_get_status(cl_event event);
extern cl_int cl_event_set_status(cl_event event, cl_int status);
+extern cl_int cl_enqueue_insert_work_item(cl_command_queue queue, cl_command_queue_work_item item);
extern cl_int cl_enqueue_queue_work_item(cl_command_queue queue, cl_command_queue_work_item item);
+extern cl_int cl_enqueue_submit_work_item(cl_command_queue queue, cl_command_queue_work_item item);
+extern cl_int cl_enqueue_run_work_item(cl_command_queue queue, cl_command_queue_work_item item);
+extern cl_int cl_enqueue_complete_work_item(cl_command_queue queue, cl_command_queue_work_item item);
extern cl_command_queue_work_item cl_enqueue_create_work_item(cl_command_queue queue,
cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event event);
extern void cl_enqueue_destroy_work_item(cl_command_queue queue, cl_command_queue_work_item item);
diff --git a/libclapi/cl_kernel.c b/libclapi/cl_kernel.c
index 0f110bef..b0853536 100644
--- a/libclapi/cl_kernel.c
+++ b/libclapi/cl_kernel.c
@@ -773,7 +773,10 @@ static cl_int cl_command_queue_ND_range(cl_command_queue queue, cl_kernel kernel
const cl_event *event_wait_list, cl_event *event_ret)
{
cl_event event = NULL;
+ cl_command_queue_work_item it = NULL;
cl_int err = CL_SUCCESS;
+ uint32_t i;
+ cl_int ret_status;
if (event_ret) {
event = cl_create_event(queue->ctx, queue, CL_FALSE, num_events_in_wait_list, event_wait_list, &err);
@@ -781,21 +784,46 @@ static cl_int cl_command_queue_ND_range(cl_command_queue queue, cl_kernel kernel
goto error;
}
- uint32_t i;
+ it = cl_enqueue_create_work_item(queue, num_events_in_wait_list, event_wait_list, event);
+ if (it == NULL) {
+ err = CL_OUT_OF_HOST_MEMORY;
+ goto error;
+ }
+
CL_MUTEX_LOCK(&kernel->lock);
for (i = 0; i < kernel->arg_num; ++i)
if (kernel->args[i]->is_set == CL_FALSE) {
- return CL_INVALID_KERNEL_ARGS;
+ err = CL_INVALID_KERNEL_ARGS;
CL_MUTEX_UNLOCK(&kernel->lock);
+ goto error;
}
CL_MUTEX_UNLOCK(&kernel->lock);
err = queue->device->driver->enqueue_nd_range_kernel(queue, kernel, work_dim, global_wk_off,
- global_wk_sz, local_wk_sz, num_events_in_wait_list, event_wait_list, event);
+ global_wk_sz, local_wk_sz, it);
if (err != CL_SUCCESS) {
goto error;
}
+ /* If no events depend, we submit it immediately. */
+ if (event_wait_list == NULL) {
+ ret_status = cl_enqueue_submit_work_item(queue, it);
+ if (ret_status < 0) {
+ /* We consider it as a error and return fail. */
+ err = CL_OUT_OF_RESOURCES;
+ goto error;
+ }
+ if (ret_status > CL_COMPLETE) {
+ err = cl_enqueue_insert_work_item(queue, it);
+ if (err != CL_SUCCESS)
+ goto error;
+ }
+ } else {
+ err = cl_enqueue_insert_work_item(queue, it);
+ if (err != CL_SUCCESS)
+ goto error;
+ }
+
if (event_ret)
*event_ret = event;
@@ -814,6 +842,8 @@ static cl_int cl_command_queue_ND_range(cl_command_queue queue, cl_kernel kernel
return err;
error:
+ if (it)
+ cl_enqueue_destroy_work_item(queue, it);
if (event)
cl_release_event(event);
return err;