diff options
author | Junyan He <junyan.he@intel.com> | 2016-09-26 16:00:07 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2016-09-28 15:59:47 +0800 |
commit | 4a61637a8dbb1d7ddea131a059afd02b33df4ce0 (patch) | |
tree | 705daadeb402fa24e859d0b0c5523c123cb44536 /src/cl_event.c | |
parent | adb62811ea72bad4018b8e0af2cedca513a9eea4 (diff) |
Modify all event related functions using new event handle.
Rewrite the cl_event, and modify all the event functions
using this new event manner. Event will co-operate with
command queue's thread together.
v2:
Fix a logic problem in event create failed.
V3:
Set enqueue default to do nothing, handle some enqueue has nothing
to do.
Signed-off-by: Junyan He <junyan.he@intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
Diffstat (limited to 'src/cl_event.c')
-rw-r--r-- | src/cl_event.c | 1067 |
1 files changed, 466 insertions, 601 deletions
diff --git a/src/cl_event.c b/src/cl_event.c index 6c7c2e0c..4acd619b 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -14,750 +14,615 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see <http://www.gnu.org/licenses/>. * - * Author: Rong Yang <rong.r.yang@intel.com> */ #include "cl_event.h" #include "cl_context.h" -#include "cl_utils.h" -#include "cl_alloc.h" -#include "cl_khr_icd.h" -#include "cl_kernel.h" #include "cl_command_queue.h" - -#include <assert.h> +#include "cl_alloc.h" +#include <string.h> #include <stdio.h> -void cl_event_update_last_events(cl_command_queue queue, int wait) +LOCAL cl_int +cl_event_get_timestamp(cl_event event, cl_profiling_info param_name) { - cl_event last_event = get_last_event(queue); - if(!last_event) return; - cl_event next, now; - now = last_event; - while(now){ - next = now->last_next;//get next first in case set status maintain it - cl_event_update_status(now,wait);//update event status - now = next; - } + // TODO: + return CL_INVALID_VALUE; } -void cl_event_insert_last_events(cl_command_queue queue,cl_event event) +LOCAL cl_ulong +cl_event_get_timestamp_delta(cl_ulong start_timestamp, cl_ulong end_timestamp) { - if(!event) return; - cl_event last_event = get_last_event(queue); - if(last_event){ - cl_event now = last_event; - while(now->last_next) - now = now->last_next; - now->last_next = event; - event->last_prev = now; + cl_ulong ret_val; + + if (end_timestamp > start_timestamp) { + ret_val = end_timestamp - start_timestamp; + } else { + /*if start time stamp is greater than end timstamp then set ret value to max*/ + ret_val = ((cl_ulong)1 << 32); } - else set_last_event(queue,event); + + return ret_val; } -static inline cl_bool -cl_event_is_gpu_command_type(cl_command_type type) +LOCAL cl_ulong +cl_event_get_start_timestamp(cl_event event) { - switch(type) { - case CL_COMMAND_COPY_BUFFER: - case CL_COMMAND_FILL_BUFFER: - case CL_COMMAND_COPY_IMAGE: - case CL_COMMAND_COPY_IMAGE_TO_BUFFER: - case CL_COMMAND_COPY_BUFFER_TO_IMAGE: - case CL_COMMAND_COPY_BUFFER_RECT: - case CL_COMMAND_TASK: - case CL_COMMAND_NDRANGE_KERNEL: - return CL_TRUE; - default: - return CL_FALSE; - } + cl_ulong ret_val; + + ret_val = cl_event_get_timestamp_delta(event->timestamp[0], event->timestamp[2]); + + return ret_val; } -int cl_event_flush(cl_event event) +LOCAL cl_ulong +cl_event_get_end_timestamp(cl_event event) { - int err = CL_SUCCESS; - if(!event) { - err = CL_INVALID_VALUE; - return err; - } + cl_ulong ret_val; - assert(event->gpgpu_event != NULL); - if (event->gpgpu) { - err = cl_command_queue_flush_gpgpu(event->queue, event->gpgpu); - cl_gpgpu_delete(event->gpgpu); - event->gpgpu = NULL; - } - cl_gpgpu_event_flush(event->gpgpu_event); - cl_event_insert_last_events(event->queue,event); - return err; + ret_val = cl_event_get_timestamp_delta(event->timestamp[0], event->timestamp[3]); + + return ret_val; } -cl_event cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type, cl_bool emplict) +LOCAL void +cl_event_add_ref(cl_event event) { - cl_event event = NULL; - GET_QUEUE_THREAD_GPGPU(queue); + assert(event); + CL_OBJECT_INC_REF(event); +} - /* Allocate and inialize the structure itself */ - TRY_ALLOC_NO_ERR (event, CALLOC(struct _cl_event)); - CL_OBJECT_INIT_BASE(event, CL_OBJECT_EVENT_MAGIC); +LOCAL cl_int +cl_event_get_status(cl_event event) +{ + cl_int ret; + + assert(event); + CL_OBJECT_LOCK(event); + ret = event->status; + CL_OBJECT_UNLOCK(event); + return ret; +} + +static cl_event +cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type, + cl_uint num_events, cl_event *event_list) +{ + cl_event e = cl_calloc(1, sizeof(_cl_event)); + if (e == NULL) + return NULL; + + CL_OBJECT_INIT_BASE(e, CL_OBJECT_EVENT_MAGIC); /* Append the event in the context event list */ - cl_context_add_event(ctx, event); - - /* Initialize all members and create GPGPU event object */ - event->queue = queue; - event->type = type; - event->gpgpu_event = NULL; - if(type == CL_COMMAND_USER) { - event->status = CL_SUBMITTED; + cl_context_add_event(ctx, e); + e->queue = queue; + + list_init(&e->callbacks); + list_init(&e->enqueue_node); + + assert(type >= CL_COMMAND_NDRANGE_KERNEL && type <= CL_COMMAND_FILL_IMAGE); + e->event_type = type; + if (type == CL_COMMAND_USER) { + e->status = CL_SUBMITTED; + } else { + e->status = CL_QUEUED; } - else { - event->status = CL_QUEUED; - if(cl_event_is_gpu_command_type(event->type)) - event->gpgpu_event = cl_gpgpu_event_new(gpgpu); + + if (type == CL_COMMAND_USER) { + assert(queue == NULL); } - cl_event_add_ref(event); //dec when complete - event->user_cb = NULL; - event->enqueue_cb = NULL; - event->waits_head = NULL; - event->emplict = emplict; - -exit: - return event; -error: - cl_event_delete(event); - event = NULL; - goto exit; + + e->depend_events = event_list; + e->depend_event_num = num_events; + return e; } -void cl_event_delete(cl_event event) +LOCAL void +cl_event_delete(cl_event event) { + int i; + cl_event_user_callback cb; + if (UNLIKELY(event == NULL)) return; - cl_event_update_status(event, 0); - if (CL_OBJECT_DEC_REF(event) > 1) return; - /* Call all user's callback if haven't execute */ - cl_event_call_callback(event, CL_COMPLETE, CL_TRUE); // CL_COMPLETE status will force all callbacks that are not executed to run + cl_enqueue_delete(&event->exec_data); - /* delete gpgpu event object */ - if(event->gpgpu_event) - cl_gpgpu_event_delete(event->gpgpu_event); + assert(list_empty(&event->enqueue_node)); - /* Remove it from the list */ - cl_context_remove_event(event->ctx, event); + if (event->depend_events) { + assert(event->depend_event_num); + for (i = 0; i < event->depend_event_num; i++) { + cl_event_delete(event->depend_events[i]); + } + cl_free(event->depend_events); + } - if (event->gpgpu) { - fprintf(stderr, "Warning: a event is deleted with a pending enqueued task.\n"); - cl_gpgpu_delete(event->gpgpu); - event->gpgpu = NULL; + /* Free all the callbacks. Last ref, no need to lock. */ + while (!list_empty(&event->callbacks)) { + cb = list_entry(event->callbacks.next, _cl_event_user_callback, node); + list_del(&cb->node); + cl_free(cb); } + /* Remove it from the list */ + assert(event->ctx); + cl_context_remove_event(event->ctx, event); + CL_OBJECT_DESTROY_BASE(event); cl_free(event); } -void cl_event_add_ref(cl_event event) +LOCAL cl_event +cl_event_create(cl_context ctx, cl_command_queue queue, cl_uint num_events, + const cl_event *event_list, cl_command_type type, cl_int *errcode_ret) { - assert(event); - CL_OBJECT_INC_REF(event); -} + cl_event e = NULL; + cl_event *depend_events = NULL; + cl_int err = CL_SUCCESS; + cl_uint total_events = 0; + int i; -cl_int cl_event_set_callback(cl_event event , - cl_int command_exec_callback_type, - EVENT_NOTIFY pfn_notify, - void* user_data) -{ - assert(event); - assert(pfn_notify); + assert(ctx); - cl_int err = CL_SUCCESS; - user_callback *cb; - TRY_ALLOC(cb, CALLOC(user_callback)); - - cb->pfn_notify = pfn_notify; - cb->user_data = user_data; - cb->status = command_exec_callback_type; - cb->executed = CL_FALSE; - - - // It is possible that the event enqueued is already completed. - // clEnqueueReadBuffer can be synchronous and when the callback - // is registered after, it still needs to get executed. - CL_OBJECT_LOCK(event); // Thread safety required: operations on the event->status can be made from many different threads - if(event->status <= command_exec_callback_type) { - /* Call user callback */ - CL_OBJECT_UNLOCK(event); // pfn_notify can call clFunctions that use the event_lock and from here it's not required - cb->pfn_notify(event, event->status, cb->user_data); - cl_free(cb); - } else { - // Enqueue to callback list - cb->next = event->user_cb; - event->user_cb = cb; - CL_OBJECT_UNLOCK(event); - } + do { + if (event_list) + assert(num_events); -exit: - return err; -error: - err = CL_OUT_OF_HOST_MEMORY; - cl_free(cb); - goto exit; -}; - -cl_int cl_event_check_waitlist(cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event,cl_context ctx) -{ - cl_int err = CL_SUCCESS; - cl_int i; - /* check the event_wait_list and num_events_in_wait_list */ - if((event_wait_list == NULL) && - (num_events_in_wait_list > 0)) - goto error; - - if ((event_wait_list != NULL) && - (num_events_in_wait_list == 0)){ - goto error; - } + if (queue == NULL) { + assert(type == CL_COMMAND_USER); + assert(event_list == NULL); + assert(num_events == 0); - /* check the event and context */ - for(i=0; i<num_events_in_wait_list; i++) { - CHECK_EVENT(event_wait_list[i]); - if(event_wait_list[i]->status < CL_COMPLETE) { - err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; - goto exit; - } - if(event && event == &event_wait_list[i]) - goto error; - if(event_wait_list[i]->ctx != ctx) { - err = CL_INVALID_CONTEXT; - goto exit; - } - } + e = cl_event_new(ctx, queue, type, 0, NULL); + if (e == NULL) { + err = CL_OUT_OF_HOST_MEMORY; + break; + } + } else { + CL_OBJECT_LOCK(queue); + total_events = queue->barrier_events_num + num_events; + + if (total_events) { + depend_events = cl_calloc(total_events, sizeof(cl_event)); + if (depend_events == NULL) { + CL_OBJECT_UNLOCK(queue); + err = CL_OUT_OF_HOST_MEMORY; + break; + } + } -exit: - return err; -error: - err = CL_INVALID_EVENT_WAIT_LIST; //reset error - goto exit; -} + /* Add all the barrier events as depend events. */ + for (i = 0; i < queue->barrier_events_num; i++) { + assert(CL_EVENT_IS_BARRIER(queue->barrier_events[i])); + cl_event_add_ref(queue->barrier_events[i]); + depend_events[num_events + i] = queue->barrier_events[i]; + } -cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_command_queue queue) -{ - cl_int i; + CL_OBJECT_UNLOCK(queue); - /* Check whether wait user events */ - for(i=0; i<num_events_in_wait_list; i++) { - if(event_wait_list[i]->status <= CL_COMPLETE) - continue; + for (i = 0; i < num_events; i++) { + assert(event_list[i]); + assert(event_list[i]->ctx == ctx); + assert(CL_OBJECT_IS_EVENT(event_list[i])); + cl_event_add_ref(event_list[i]); + depend_events[i] = event_list[i]; + } - /* Need wait on user event, return and do enqueue defer */ - if((event_wait_list[i]->type == CL_COMMAND_USER) || - (event_wait_list[i]->enqueue_cb && - (event_wait_list[i]->enqueue_cb->wait_user_events != NULL))){ - return CL_ENQUEUE_EXECUTE_DEFER; - } - } + if (depend_events) + assert(total_events); - if(queue && queue->barrier_events_num ) - return CL_ENQUEUE_EXECUTE_DEFER; + e = cl_event_new(ctx, queue, type, total_events, depend_events); + if (e == NULL) { + err = CL_OUT_OF_HOST_MEMORY; + break; + } + depend_events = NULL; + } + } while (0); - /* Non user events or all user event finished, wait all enqueue events finish */ - for(i=0; i<num_events_in_wait_list; i++) { - if(event_wait_list[i]->status <= CL_COMPLETE) - continue; + if (err != CL_SUCCESS) { + if (depend_events) { + for (i = 0; i < total_events; i++) { + cl_event_delete(depend_events[i]); + } + cl_free(depend_events); + } - //enqueue callback haven't finish, in another thread, wait - if(event_wait_list[i]->enqueue_cb != NULL) - return CL_ENQUEUE_EXECUTE_DEFER; - if(event_wait_list[i]->gpgpu_event) - cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1); - cl_event_set_status(event_wait_list[i], CL_COMPLETE); //Execute user's callback + // if set depend_events, must succeed. + assert(e->depend_events == NULL); + cl_event_delete(e); } - return CL_ENQUEUE_EXECUTE_IMM; + + if (errcode_ret) + *errcode_ret = err; + + return e; } -void cl_event_new_enqueue_callback(cl_event event, - enqueue_data *data, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list) +LOCAL cl_int +cl_event_set_callback(cl_event event, cl_int exec_type, cl_event_notify_cb pfn_notify, void *user_data) { - enqueue_callback *cb, *node; - user_event *user_events, *u_ev; - cl_command_queue queue = event ? event->queue : NULL; - cl_int i; cl_int err = CL_SUCCESS; + cl_event_user_callback cb; + cl_bool exec_imm = CL_FALSE; - /* Allocate and initialize the structure itself */ - TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback)); - cb->num_events = 0; - TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, num_events_in_wait_list)); - for(i=0; i<num_events_in_wait_list; i++) { - //user event will insert to cb->wait_user_events, need not in wait list, avoid ref twice - if(event_wait_list[i]->type != CL_COMMAND_USER) { - cb->wait_list[cb->num_events++] = event_wait_list[i]; - cl_event_add_ref(event_wait_list[i]); //add defer enqueue's wait event reference - } - } - cb->event = event; - cb->next = NULL; - cb->wait_user_events = NULL; - - if(queue && queue->barrier_events_num > 0) { - for(i=0; i<queue->barrier_events_num; i++) { - /* Insert the enqueue_callback to user event list */ - node = queue->wait_events[i]->waits_head; - if(node == NULL) - queue->wait_events[i]->waits_head = cb; - else{ - while((node != cb) && node->next) - node = node->next; - if(node == cb) //wait on dup user event - continue; - node->next = cb; - } + assert(event); + assert(pfn_notify); - /* Insert the user event to enqueue_callback's wait_user_events */ - TRY(cl_event_insert_user_event, &cb->wait_user_events, queue->wait_events[i]); - cl_event_add_ref(queue->wait_events[i]); + do { + cb = cl_calloc(1, sizeof(_cl_event_user_callback)); + if (cb == NULL) { + err = CL_OUT_OF_HOST_MEMORY; + break; } - } - /* Find out all user events that in event_wait_list wait */ - for(i=0; i<num_events_in_wait_list; i++) { - if(event_wait_list[i]->status <= CL_COMPLETE) - continue; - - if(event_wait_list[i]->type == CL_COMMAND_USER) { - /* Insert the enqueue_callback to user event list */ - node = event_wait_list[i]->waits_head; - if(node == NULL) - event_wait_list[i]->waits_head = cb; - else { - while((node != cb) && node->next) - node = node->next; - if(node == cb) //wait on dup user event - continue; - node->next = cb; - } - /* Insert the user event to enqueue_callback's wait_user_events */ - TRY(cl_event_insert_user_event, &cb->wait_user_events, event_wait_list[i]); - cl_event_add_ref(event_wait_list[i]); - if(queue) - cl_command_queue_insert_event(queue, event_wait_list[i]); - if(queue && data->type == EnqueueBarrier){ - cl_command_queue_insert_barrier_event(queue, event_wait_list[i]); - } - } else if(event_wait_list[i]->enqueue_cb != NULL) { - user_events = event_wait_list[i]->enqueue_cb->wait_user_events; - while(user_events != NULL) { - /* Insert the enqueue_callback to user event's waits_tail */ - node = user_events->event->waits_head; - if(node == NULL) - event_wait_list[i]->waits_head = cb; - else{ - while((node != cb) && node->next) - node = node->next; - if(node == cb) { //wait on dup user event - user_events = user_events->next; - continue; - } - node->next = cb; - } - - /* Insert the user event to enqueue_callback's wait_user_events */ - TRY(cl_event_insert_user_event, &cb->wait_user_events, user_events->event); - cl_event_add_ref(user_events->event); - if(queue) - cl_command_queue_insert_event(event->queue, user_events->event); - if(queue && data->type == EnqueueBarrier){ - cl_command_queue_insert_barrier_event(event->queue, user_events->event); - } - user_events = user_events->next; - } - } - } - if(event != NULL && event->queue != NULL && event->gpgpu_event != NULL) { - event->gpgpu = cl_thread_gpgpu_take(event->queue); - data->ptr = (void *)event->gpgpu_event; - } - cb->data = *data; - if(event) - event->enqueue_cb = cb; - -exit: - return; -error: - if(cb) { - while(cb->wait_user_events) { - u_ev = cb->wait_user_events; - cb->wait_user_events = cb->wait_user_events->next; - cl_event_delete(u_ev->event); - cl_free(u_ev); + list_init(&cb->node); + cb->pfn_notify = pfn_notify; + cb->user_data = user_data; + cb->status = exec_type; + cb->executed = CL_FALSE; + + CL_OBJECT_LOCK(event); + if (event->status > exec_type) { + list_add_tail(&cb->node, &event->callbacks); + cb = NULL; + } else { + /* The state has already OK, call it immediately. */ + exec_imm = CL_TRUE; } - for(i=0; i<cb->num_events; i++) { - if(cb->wait_list[i]) { - cl_event_delete(cb->wait_list[i]); - } - } - cl_free(cb); - } - goto exit; -} + CL_OBJECT_UNLOCK(event); -void cl_event_call_callback(cl_event event, cl_int status, cl_bool free_cb) { - user_callback *user_cb = NULL; - user_callback *queue_cb = NULL; // For thread safety, we create a queue that holds user_callback's pfn_notify contents - user_callback *temp_cb = NULL; - user_cb = event->user_cb; - CL_OBJECT_LOCK(event); - while(user_cb) { - if(user_cb->status >= status - && user_cb->executed == CL_FALSE) { // Added check to not execute a callback when it was already handled - user_cb->executed = CL_TRUE; - temp_cb = cl_malloc(sizeof(user_callback)); - if(!temp_cb) { - break; // Out of memory - } - temp_cb->pfn_notify = user_cb->pfn_notify; // Minor struct copy to call ppfn_notify out of the pthread_mutex - temp_cb->user_data = user_cb->user_data; - if(free_cb) { - cl_free(user_cb); - } - if(!queue_cb) { - queue_cb = temp_cb; - queue_cb->next = NULL; - } else { // Enqueue First - temp_cb->next = queue_cb; - queue_cb = temp_cb; - } + if (exec_imm) { + cb->pfn_notify(event, event->status, cb->user_data); } - user_cb = user_cb->next; - } - CL_OBJECT_UNLOCK(event); - // Calling the callbacks outside of the event_lock is required because the callback can call cl_api functions and get deadlocked - while(queue_cb) { // For each callback queued, actually execute the callback - queue_cb->pfn_notify(event, event->status, queue_cb->user_data); - temp_cb = queue_cb; - queue_cb = queue_cb->next; - cl_free(temp_cb); - } + } while (0); + + if (cb) + cl_free(cb); + + return err; } -void cl_event_set_status(cl_event event, cl_int status) +LOCAL cl_int +cl_event_set_status(cl_event event, cl_int status) { - cl_int ret, i; - cl_event evt; + list_head tmp_callbacks; + list_head *n; + list_head *pos; + cl_bool notify_queue = CL_FALSE; + cl_event_user_callback cb; + + assert(event); CL_OBJECT_LOCK(event); - if(status >= event->status) { + if (event->status <= CL_COMPLETE) { // Already set to error or completed CL_OBJECT_UNLOCK(event); - return; + return CL_INVALID_OPERATION; } - if(event->status <= CL_COMPLETE) { - event->status = status; //have done enqueue before or doing in another thread - CL_OBJECT_UNLOCK(event); - return; + + if (CL_EVENT_IS_USER(event)) { + assert(event->status != CL_RUNNING && event->status != CL_QUEUED); + } else { + assert(event->queue); // Must belong to some queue. } - if(status <= CL_COMPLETE) { - if(event->enqueue_cb) { - if(status == CL_COMPLETE) { - cl_enqueue_handle(event, &event->enqueue_cb->data); - if(event->gpgpu_event) - cl_gpgpu_event_update_status(event->gpgpu_event, 1); //now set complet, need refine - } else { - if(event->gpgpu_event) { - // Error then cancel the enqueued event. - cl_gpgpu_delete(event->gpgpu); - event->gpgpu = NULL; - } - } + if (status >= event->status) { // Should never go back. + CL_OBJECT_UNLOCK(event); + return CL_INVALID_OPERATION; + } - event->status = status; //Change the event status after enqueue and befor unlock + event->status = status; + /* Call all the callbacks. */ + if (!list_empty(&event->callbacks)) { + do { + status = event->status; + list_init(&tmp_callbacks); + list_replace(&event->callbacks, &tmp_callbacks); + list_init(&event->callbacks); + /* Call all the callbacks without lock. */ CL_OBJECT_UNLOCK(event); - for(i=0; i<event->enqueue_cb->num_events; i++) - cl_event_delete(event->enqueue_cb->wait_list[i]); + + list_for_each_safe(pos, n, &tmp_callbacks) + { + cb = list_entry(pos, _cl_event_user_callback, node); + + assert(cb->executed == CL_FALSE); + + if (cb->status < status) + continue; + + list_del(&cb->node); + cb->executed = CL_TRUE; + cb->pfn_notify(event, status, cb->user_data); + cl_free(cb); + } + CL_OBJECT_LOCK(event); - if(event->enqueue_cb->wait_list) - cl_free(event->enqueue_cb->wait_list); - cl_free(event->enqueue_cb); - event->enqueue_cb = NULL; - } + // Set back the uncalled callbacks. + list_splice_tail(&tmp_callbacks, &event->callbacks); + + /* Status may changed because we unlock. need to check again. */ + } while (status != event->status); } - if(event->status >= status) //maybe changed in other threads - event->status = status; + + /* Wakeup all the waiter for status change. */ + CL_OBJECT_NOTIFY_COND(event); + + if (event->status <= CL_COMPLETE) { + notify_queue = CL_TRUE; + } + CL_OBJECT_UNLOCK(event); - /* Call user callback */ - cl_event_call_callback(event, status, CL_FALSE); + /* Need to notify all the command queue within the same context. */ + if (notify_queue) { + cl_command_queue *q_list = NULL; + cl_uint queue_num = 0; + int i = 0; + int cookie = 0; + + /*First, we need to remove it from queue's barrier list. */ + if (CL_EVENT_IS_BARRIER(event)) { + assert(event->queue); + cl_command_queue_remove_barrier_event(event->queue, event); + } - if(event->type == CL_COMMAND_USER) { - /* Check all defer enqueue */ - enqueue_callback *cb, *enqueue_cb = event->waits_head; - while(enqueue_cb) { - /* Remove this user event in enqueue_cb, update the header if needed. */ - cl_event_remove_user_event(&enqueue_cb->wait_user_events, event); - cl_event_delete(event); + /* Then, notify all the queues within the same context. */ + CL_OBJECT_LOCK(event->ctx); + do { + queue_num = event->ctx->queue_num; + cookie = event->ctx->queue_cookie; + + if (queue_num > 0) { + q_list = cl_calloc(queue_num, sizeof(cl_command_queue)); + assert(q_list); + i = 0; + list_for_each(pos, &event->ctx->queues) + { + q_list[i] = (cl_command_queue)(list_entry(pos, _cl_base_object, node)); + assert(i < queue_num); + i++; + } - /* Still wait on other user events */ - if(enqueue_cb->wait_user_events != NULL) { - enqueue_cb = enqueue_cb->next; - continue; - } + CL_OBJECT_UNLOCK(event->ctx); // Update status without context lock. - //remove user event frome enqueue_cb's ctx - cl_command_queue_remove_event(enqueue_cb->event->queue, event); - cl_command_queue_remove_barrier_event(enqueue_cb->event->queue, event); - - /* All user events complete, now wait enqueue events */ - ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list, - enqueue_cb->event->queue); - assert(ret != CL_ENQUEUE_EXECUTE_DEFER); - ret = ~ret; - cb = enqueue_cb; - enqueue_cb = enqueue_cb->next; - - /* Call the pending operation */ - evt = cb->event; - /* TODO: if this event wait on several events, one event's - status is error, the others is complete, what's the status - of this event? Can't find the description in OpenCL spec. - Simply update to latest finish wait event.*/ - cl_event_set_status(cb->event, status); - if(evt->emplict == CL_FALSE) { - cl_event_delete(evt); + for (i = 0; i < queue_num; i++) { + cl_command_queue_notify(q_list[i]); + } + + CL_OBJECT_LOCK(event->ctx); // Lock again. + } else { + /* No queue? Just do nothing. */ } - } - event->waits_head = NULL; - } - if(event->status <= CL_COMPLETE){ - /* Maintain the last_list when event completed*/ - if (event->last_prev) - event->last_prev->last_next = event->last_next; - if (event->last_next) - event->last_next->last_prev = event->last_prev; - if(event->queue && get_last_event(event->queue) == event) - set_last_event(event->queue, event->last_next); - event->last_prev = NULL; - event->last_next = NULL; - cl_event_delete(event); + } while (cookie != event->ctx->queue_cookie); // Some queue may be added when we unlock. + CL_OBJECT_UNLOCK(event->ctx); + + if (q_list) + cl_free(q_list); } + + return CL_SUCCESS; } -void cl_event_update_status(cl_event event, int wait) +LOCAL cl_int +cl_event_wait_for_event_ready(const cl_event event) { - if(event->status <= CL_COMPLETE) - return; - if((event->gpgpu_event) && - (cl_gpgpu_event_update_status(event->gpgpu_event, wait) == command_complete)) - cl_event_set_status(event, CL_COMPLETE); + assert(CL_OBJECT_IS_EVENT(event)); + return cl_event_wait_for_events_list(event->depend_event_num, event->depend_events); } -cl_int cl_event_marker_with_wait_list(cl_command_queue queue, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event* event) +LOCAL cl_int +cl_event_wait_for_events_list(cl_uint num_events, const cl_event *event_list) { - enqueue_data data = { 0 }; + int i; cl_event e; + cl_int ret = CL_SUCCESS; - e = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE); - if(e == NULL) - return CL_OUT_OF_HOST_MEMORY; - - if(event != NULL ){ - *event = e; - } + for (i = 0; i < num_events; i++) { + e = event_list[i]; + assert(e); + assert(CL_OBJECT_IS_EVENT(e)); -//enqueues a marker command which waits for either a list of events to complete, or if the list is -//empty it waits for all commands previously enqueued in command_queue to complete before it completes. - if(num_events_in_wait_list > 0){ - if(cl_event_wait_events(num_events_in_wait_list, event_wait_list, queue) == CL_ENQUEUE_EXECUTE_DEFER) { - data.type = EnqueueMarker; - cl_event_new_enqueue_callback(event?*event:NULL, &data, num_events_in_wait_list, event_wait_list); - return CL_SUCCESS; + CL_OBJECT_LOCK(e); + while (e->status > CL_COMPLETE) { + CL_OBJECT_WAIT_ON_COND(e); } - } else if(queue->wait_events_num > 0) { - data.type = EnqueueMarker; - cl_event_new_enqueue_callback(event?*event:NULL, &data, queue->wait_events_num, queue->wait_events); - return CL_SUCCESS; + /* Iff some error happened, return the error. */ + if (e->status < CL_COMPLETE) { + ret = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; + } + CL_OBJECT_UNLOCK(e); } - cl_event_update_last_events(queue,1); - - cl_event_set_status(e, CL_COMPLETE); - return CL_SUCCESS; + return ret; } -cl_int cl_event_barrier_with_wait_list(cl_command_queue queue, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event* event) +LOCAL cl_int +cl_event_check_waitlist(cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event, cl_context ctx) { - enqueue_data data = { 0 }; - cl_event e; - - e = cl_event_new(queue->ctx, queue, CL_COMMAND_BARRIER, CL_TRUE); - if(e == NULL) - return CL_OUT_OF_HOST_MEMORY; + cl_int err = CL_SUCCESS; + cl_int i; - if(event != NULL ){ - *event = e; - } -//enqueues a barrier command which waits for either a list of events to complete, or if the list is -//empty it waits for all commands previously enqueued in command_queue to complete before it completes. - if(num_events_in_wait_list > 0){ - if(cl_event_wait_events(num_events_in_wait_list, event_wait_list, queue) == CL_ENQUEUE_EXECUTE_DEFER) { - data.type = EnqueueBarrier; - cl_event_new_enqueue_callback(e, &data, num_events_in_wait_list, event_wait_list); - return CL_SUCCESS; + do { + /* check the event_wait_list and num_events_in_wait_list */ + if ((event_wait_list == NULL) && (num_events_in_wait_list > 0)) { + err = CL_INVALID_EVENT_WAIT_LIST; + break; } - } else if(queue->wait_events_num > 0) { - data.type = EnqueueBarrier; - cl_event_new_enqueue_callback(e, &data, queue->wait_events_num, queue->wait_events); - return CL_SUCCESS; - } - cl_event_update_last_events(queue,1); + if ((event_wait_list != NULL) && (num_events_in_wait_list == 0)) { + err = CL_INVALID_EVENT_WAIT_LIST; + break; + } - cl_event_set_status(e, CL_COMPLETE); - return CL_SUCCESS; -} + /* check the event and context */ + for (i = 0; i < num_events_in_wait_list; i++) { + if (event_wait_list[i] == NULL || !CL_OBJECT_IS_EVENT(event_wait_list[i])) { + err = CL_INVALID_EVENT; + break; + } -cl_ulong cl_event_get_cpu_timestamp(cl_ulong *cpu_time) -{ - struct timespec ts; + if (cl_event_get_status(event_wait_list[i]) < CL_COMPLETE) { + err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; + break; + } - if(clock_gettime(CLOCK_MONOTONIC_RAW,&ts) != 0){ - printf("CPU Timmer error\n"); - return CL_FALSE; - } - *cpu_time = (1000000000.0) * (cl_ulong) ts.tv_sec + (cl_ulong) ts.tv_nsec; + if (event == event_wait_list + i) { /* Pointer of element of the wait list */ + err = CL_INVALID_EVENT_WAIT_LIST; + break; + } - return CL_SUCCESS; -} + /* check all belong to same context. */ + if (ctx == NULL) { + ctx = event_wait_list[i]->ctx; + } + if (event_wait_list[i]->ctx != ctx) { + err = CL_INVALID_CONTEXT; + break; + } + } -cl_int cl_event_get_queued_cpu_timestamp(cl_event event) -{ - cl_int ret_val; + if (err != CL_SUCCESS) + break; - ret_val = cl_event_get_cpu_timestamp(&event->queued_timestamp); + } while (0); - return ret_val; + return err; } -cl_ulong cl_event_get_timestamp_delta(cl_ulong start_timestamp,cl_ulong end_timestamp) +LOCAL void +cl_event_exec(cl_event event, cl_int exec_status) { - cl_ulong ret_val; + /* We are MT safe here, no one should call this + at the same time. No need to lock */ + cl_int ret = CL_SUCCESS; + cl_int status = cl_event_get_status(event); + cl_int depend_status; - if(end_timestamp > start_timestamp){ - ret_val = end_timestamp - start_timestamp; - } - else { - /*if start time stamp is greater than end timstamp then set ret value to max*/ - ret_val = ((cl_ulong) 1 << 32); + if (status < CL_COMPLETE || status <= exec_status) { + return; } - return ret_val; -} - -cl_ulong cl_event_get_start_timestamp(cl_event event) -{ - cl_ulong ret_val; + depend_status = cl_event_is_ready(event); + assert(depend_status <= CL_COMPLETE); + if (depend_status < CL_COMPLETE) { // Error happend, cancel exec. + ret = cl_event_set_status(event, depend_status); + return; + } - ret_val = cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[2]); + /* Do the according thing based on event type. */ + ret = cl_enqueue_handle(&event->exec_data, exec_status); - return ret_val; + if (ret != CL_SUCCESS) { + assert(ret < 0); + DEBUGP(DL_WARNING, "Exec event %p error, type is %d, error staus is %d", + event, event->event_type, ret); + ret = cl_event_set_status(event, ret); + assert(ret == CL_SUCCESS); + } else { + ret = cl_event_set_status(event, exec_status); + assert(ret == CL_SUCCESS); + } } -cl_ulong cl_event_get_end_timestamp(cl_event event) +/* 0 means ready, >0 means not ready, <0 means error. */ +LOCAL cl_int +cl_event_is_ready(cl_event event) { - cl_ulong ret_val; - - ret_val = cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[3]); + int i; + int status; - return ret_val; -} + for (i = 0; i < event->depend_event_num; i++) { + status = cl_event_get_status(event->depend_events[i]); -cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name) -{ - cl_ulong ret_val = 0; - GET_QUEUE_THREAD_GPGPU(event->queue); - - if (!event->gpgpu_event) { - cl_gpgpu_event_get_gpu_cur_timestamp(gpgpu, &ret_val); - event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val; - return CL_SUCCESS; + if (status != CL_COMPLETE) { + return status; + } } - if(param_name == CL_PROFILING_COMMAND_SUBMIT || - param_name == CL_PROFILING_COMMAND_QUEUED) { - cl_gpgpu_event_get_gpu_cur_timestamp(gpgpu, &ret_val); - event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val; - return CL_SUCCESS; - } else if(param_name == CL_PROFILING_COMMAND_START) { - cl_gpgpu_event_get_exec_timestamp(gpgpu, event->gpgpu_event, 0, &ret_val); - event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val; - return CL_SUCCESS; - } else if (param_name == CL_PROFILING_COMMAND_END) { - cl_gpgpu_event_get_exec_timestamp(gpgpu, event->gpgpu_event, 1, &ret_val); - event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val; - return CL_SUCCESS; - } - return CL_INVALID_VALUE; + return CL_COMPLETE; } -cl_int cl_event_insert_user_event(user_event** p_u_ev, cl_event event) +LOCAL cl_event +cl_event_create_marker_or_barrier(cl_command_queue queue, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_bool is_barrier, cl_int *error) { - user_event * u_iter = *p_u_ev; - user_event * u_ev; - - while(u_iter) - { - if(u_iter->event == event) - return CL_SUCCESS; - u_iter = u_iter->next; - } + cl_event e = NULL; + cl_int err = CL_SUCCESS; + cl_command_type type = CL_COMMAND_MARKER; + enqueue_type eq_type = EnqueueMarker; - TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); - u_ev->event = event; - u_ev->next = *p_u_ev; - *p_u_ev = u_ev; + if (is_barrier) { + type = CL_COMMAND_BARRIER; + eq_type = EnqueueBarrier; + } + if (event_wait_list) { + assert(num_events_in_wait_list > 0); - return CL_SUCCESS; -error: - return CL_FALSE; -} + e = cl_event_create(queue->ctx, queue, num_events_in_wait_list, + event_wait_list, type, &err); + if (err != CL_SUCCESS) { + *error = err; + return NULL; + } + } else { /* The marker depends on all events in the queue now. */ + cl_command_queue_enqueue_worker worker = &queue->worker; + cl_uint i; + cl_uint event_num; + cl_event *depend_events; + + CL_OBJECT_LOCK(queue); + + /* First, wait for the command queue retire all in executing event. */ + while (1) { + if (worker->quit) { // already destroy the queue? + CL_OBJECT_UNLOCK(queue); + *error = CL_INVALID_COMMAND_QUEUE; + return NULL; + } -cl_int cl_event_remove_user_event(user_event** p_u_ev, cl_event event) -{ - user_event * u_iter = *p_u_ev; - user_event * u_prev = *p_u_ev; - - while(u_iter){ - if(u_iter->event == event ){ - if(u_iter == *p_u_ev){ - *p_u_ev = u_iter->next; - }else{ - u_prev->next = u_iter->next; + if (worker->in_exec_status != CL_COMPLETE) { + CL_OBJECT_WAIT_ON_COND(queue); + continue; } - cl_free(u_iter); + break; } - u_prev = u_iter; - u_iter = u_iter->next; + + event_num = 0; + depend_events = NULL; + if (!list_empty(&worker->enqueued_events)) { + depend_events = cl_command_queue_record_in_queue_events(queue, &event_num); + } + + CL_OBJECT_UNLOCK(queue); + + e = cl_event_create(queue->ctx, queue, event_num, depend_events, type, &err); + + for (i = 0; i < event_num; i++) { //unref the temp + cl_event_delete(depend_events[i]); + } + if (depend_events) + cl_free(depend_events); + + if (err != CL_SUCCESS) { + *error = err; + return NULL; + } } - return CL_SUCCESS; + e->exec_data.type = eq_type; + *error = CL_SUCCESS; + return e; } |