summaryrefslogtreecommitdiff
path: root/src/cl_event.c
diff options
context:
space:
mode:
authorJunyan He <junyan.he@intel.com>2016-09-26 16:00:07 +0800
committerYang Rong <rong.r.yang@intel.com>2016-09-28 15:59:47 +0800
commit4a61637a8dbb1d7ddea131a059afd02b33df4ce0 (patch)
tree705daadeb402fa24e859d0b0c5523c123cb44536 /src/cl_event.c
parentadb62811ea72bad4018b8e0af2cedca513a9eea4 (diff)
Modify all event related functions using new event handle.
Rewrite the cl_event, and modify all the event functions using this new event manner. Event will co-operate with command queue's thread together. v2: Fix a logic problem in event create failed. V3: Set enqueue default to do nothing, handle some enqueue has nothing to do. Signed-off-by: Junyan He <junyan.he@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
Diffstat (limited to 'src/cl_event.c')
-rw-r--r--src/cl_event.c1067
1 files changed, 466 insertions, 601 deletions
diff --git a/src/cl_event.c b/src/cl_event.c
index 6c7c2e0c..4acd619b 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -14,750 +14,615 @@
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
*
- * Author: Rong Yang <rong.r.yang@intel.com>
*/
#include "cl_event.h"
#include "cl_context.h"
-#include "cl_utils.h"
-#include "cl_alloc.h"
-#include "cl_khr_icd.h"
-#include "cl_kernel.h"
#include "cl_command_queue.h"
-
-#include <assert.h>
+#include "cl_alloc.h"
+#include <string.h>
#include <stdio.h>
-void cl_event_update_last_events(cl_command_queue queue, int wait)
+LOCAL cl_int
+cl_event_get_timestamp(cl_event event, cl_profiling_info param_name)
{
- cl_event last_event = get_last_event(queue);
- if(!last_event) return;
- cl_event next, now;
- now = last_event;
- while(now){
- next = now->last_next;//get next first in case set status maintain it
- cl_event_update_status(now,wait);//update event status
- now = next;
- }
+ // TODO:
+ return CL_INVALID_VALUE;
}
-void cl_event_insert_last_events(cl_command_queue queue,cl_event event)
+LOCAL cl_ulong
+cl_event_get_timestamp_delta(cl_ulong start_timestamp, cl_ulong end_timestamp)
{
- if(!event) return;
- cl_event last_event = get_last_event(queue);
- if(last_event){
- cl_event now = last_event;
- while(now->last_next)
- now = now->last_next;
- now->last_next = event;
- event->last_prev = now;
+ cl_ulong ret_val;
+
+ if (end_timestamp > start_timestamp) {
+ ret_val = end_timestamp - start_timestamp;
+ } else {
+ /*if start time stamp is greater than end timstamp then set ret value to max*/
+ ret_val = ((cl_ulong)1 << 32);
}
- else set_last_event(queue,event);
+
+ return ret_val;
}
-static inline cl_bool
-cl_event_is_gpu_command_type(cl_command_type type)
+LOCAL cl_ulong
+cl_event_get_start_timestamp(cl_event event)
{
- switch(type) {
- case CL_COMMAND_COPY_BUFFER:
- case CL_COMMAND_FILL_BUFFER:
- case CL_COMMAND_COPY_IMAGE:
- case CL_COMMAND_COPY_IMAGE_TO_BUFFER:
- case CL_COMMAND_COPY_BUFFER_TO_IMAGE:
- case CL_COMMAND_COPY_BUFFER_RECT:
- case CL_COMMAND_TASK:
- case CL_COMMAND_NDRANGE_KERNEL:
- return CL_TRUE;
- default:
- return CL_FALSE;
- }
+ cl_ulong ret_val;
+
+ ret_val = cl_event_get_timestamp_delta(event->timestamp[0], event->timestamp[2]);
+
+ return ret_val;
}
-int cl_event_flush(cl_event event)
+LOCAL cl_ulong
+cl_event_get_end_timestamp(cl_event event)
{
- int err = CL_SUCCESS;
- if(!event) {
- err = CL_INVALID_VALUE;
- return err;
- }
+ cl_ulong ret_val;
- assert(event->gpgpu_event != NULL);
- if (event->gpgpu) {
- err = cl_command_queue_flush_gpgpu(event->queue, event->gpgpu);
- cl_gpgpu_delete(event->gpgpu);
- event->gpgpu = NULL;
- }
- cl_gpgpu_event_flush(event->gpgpu_event);
- cl_event_insert_last_events(event->queue,event);
- return err;
+ ret_val = cl_event_get_timestamp_delta(event->timestamp[0], event->timestamp[3]);
+
+ return ret_val;
}
-cl_event cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type, cl_bool emplict)
+LOCAL void
+cl_event_add_ref(cl_event event)
{
- cl_event event = NULL;
- GET_QUEUE_THREAD_GPGPU(queue);
+ assert(event);
+ CL_OBJECT_INC_REF(event);
+}
- /* Allocate and inialize the structure itself */
- TRY_ALLOC_NO_ERR (event, CALLOC(struct _cl_event));
- CL_OBJECT_INIT_BASE(event, CL_OBJECT_EVENT_MAGIC);
+LOCAL cl_int
+cl_event_get_status(cl_event event)
+{
+ cl_int ret;
+
+ assert(event);
+ CL_OBJECT_LOCK(event);
+ ret = event->status;
+ CL_OBJECT_UNLOCK(event);
+ return ret;
+}
+
+static cl_event
+cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type,
+ cl_uint num_events, cl_event *event_list)
+{
+ cl_event e = cl_calloc(1, sizeof(_cl_event));
+ if (e == NULL)
+ return NULL;
+
+ CL_OBJECT_INIT_BASE(e, CL_OBJECT_EVENT_MAGIC);
/* Append the event in the context event list */
- cl_context_add_event(ctx, event);
-
- /* Initialize all members and create GPGPU event object */
- event->queue = queue;
- event->type = type;
- event->gpgpu_event = NULL;
- if(type == CL_COMMAND_USER) {
- event->status = CL_SUBMITTED;
+ cl_context_add_event(ctx, e);
+ e->queue = queue;
+
+ list_init(&e->callbacks);
+ list_init(&e->enqueue_node);
+
+ assert(type >= CL_COMMAND_NDRANGE_KERNEL && type <= CL_COMMAND_FILL_IMAGE);
+ e->event_type = type;
+ if (type == CL_COMMAND_USER) {
+ e->status = CL_SUBMITTED;
+ } else {
+ e->status = CL_QUEUED;
}
- else {
- event->status = CL_QUEUED;
- if(cl_event_is_gpu_command_type(event->type))
- event->gpgpu_event = cl_gpgpu_event_new(gpgpu);
+
+ if (type == CL_COMMAND_USER) {
+ assert(queue == NULL);
}
- cl_event_add_ref(event); //dec when complete
- event->user_cb = NULL;
- event->enqueue_cb = NULL;
- event->waits_head = NULL;
- event->emplict = emplict;
-
-exit:
- return event;
-error:
- cl_event_delete(event);
- event = NULL;
- goto exit;
+
+ e->depend_events = event_list;
+ e->depend_event_num = num_events;
+ return e;
}
-void cl_event_delete(cl_event event)
+LOCAL void
+cl_event_delete(cl_event event)
{
+ int i;
+ cl_event_user_callback cb;
+
if (UNLIKELY(event == NULL))
return;
- cl_event_update_status(event, 0);
-
if (CL_OBJECT_DEC_REF(event) > 1)
return;
- /* Call all user's callback if haven't execute */
- cl_event_call_callback(event, CL_COMPLETE, CL_TRUE); // CL_COMPLETE status will force all callbacks that are not executed to run
+ cl_enqueue_delete(&event->exec_data);
- /* delete gpgpu event object */
- if(event->gpgpu_event)
- cl_gpgpu_event_delete(event->gpgpu_event);
+ assert(list_empty(&event->enqueue_node));
- /* Remove it from the list */
- cl_context_remove_event(event->ctx, event);
+ if (event->depend_events) {
+ assert(event->depend_event_num);
+ for (i = 0; i < event->depend_event_num; i++) {
+ cl_event_delete(event->depend_events[i]);
+ }
+ cl_free(event->depend_events);
+ }
- if (event->gpgpu) {
- fprintf(stderr, "Warning: a event is deleted with a pending enqueued task.\n");
- cl_gpgpu_delete(event->gpgpu);
- event->gpgpu = NULL;
+ /* Free all the callbacks. Last ref, no need to lock. */
+ while (!list_empty(&event->callbacks)) {
+ cb = list_entry(event->callbacks.next, _cl_event_user_callback, node);
+ list_del(&cb->node);
+ cl_free(cb);
}
+ /* Remove it from the list */
+ assert(event->ctx);
+ cl_context_remove_event(event->ctx, event);
+
CL_OBJECT_DESTROY_BASE(event);
cl_free(event);
}
-void cl_event_add_ref(cl_event event)
+LOCAL cl_event
+cl_event_create(cl_context ctx, cl_command_queue queue, cl_uint num_events,
+ const cl_event *event_list, cl_command_type type, cl_int *errcode_ret)
{
- assert(event);
- CL_OBJECT_INC_REF(event);
-}
+ cl_event e = NULL;
+ cl_event *depend_events = NULL;
+ cl_int err = CL_SUCCESS;
+ cl_uint total_events = 0;
+ int i;
-cl_int cl_event_set_callback(cl_event event ,
- cl_int command_exec_callback_type,
- EVENT_NOTIFY pfn_notify,
- void* user_data)
-{
- assert(event);
- assert(pfn_notify);
+ assert(ctx);
- cl_int err = CL_SUCCESS;
- user_callback *cb;
- TRY_ALLOC(cb, CALLOC(user_callback));
-
- cb->pfn_notify = pfn_notify;
- cb->user_data = user_data;
- cb->status = command_exec_callback_type;
- cb->executed = CL_FALSE;
-
-
- // It is possible that the event enqueued is already completed.
- // clEnqueueReadBuffer can be synchronous and when the callback
- // is registered after, it still needs to get executed.
- CL_OBJECT_LOCK(event); // Thread safety required: operations on the event->status can be made from many different threads
- if(event->status <= command_exec_callback_type) {
- /* Call user callback */
- CL_OBJECT_UNLOCK(event); // pfn_notify can call clFunctions that use the event_lock and from here it's not required
- cb->pfn_notify(event, event->status, cb->user_data);
- cl_free(cb);
- } else {
- // Enqueue to callback list
- cb->next = event->user_cb;
- event->user_cb = cb;
- CL_OBJECT_UNLOCK(event);
- }
+ do {
+ if (event_list)
+ assert(num_events);
-exit:
- return err;
-error:
- err = CL_OUT_OF_HOST_MEMORY;
- cl_free(cb);
- goto exit;
-};
-
-cl_int cl_event_check_waitlist(cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list,
- cl_event *event,cl_context ctx)
-{
- cl_int err = CL_SUCCESS;
- cl_int i;
- /* check the event_wait_list and num_events_in_wait_list */
- if((event_wait_list == NULL) &&
- (num_events_in_wait_list > 0))
- goto error;
-
- if ((event_wait_list != NULL) &&
- (num_events_in_wait_list == 0)){
- goto error;
- }
+ if (queue == NULL) {
+ assert(type == CL_COMMAND_USER);
+ assert(event_list == NULL);
+ assert(num_events == 0);
- /* check the event and context */
- for(i=0; i<num_events_in_wait_list; i++) {
- CHECK_EVENT(event_wait_list[i]);
- if(event_wait_list[i]->status < CL_COMPLETE) {
- err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
- goto exit;
- }
- if(event && event == &event_wait_list[i])
- goto error;
- if(event_wait_list[i]->ctx != ctx) {
- err = CL_INVALID_CONTEXT;
- goto exit;
- }
- }
+ e = cl_event_new(ctx, queue, type, 0, NULL);
+ if (e == NULL) {
+ err = CL_OUT_OF_HOST_MEMORY;
+ break;
+ }
+ } else {
+ CL_OBJECT_LOCK(queue);
+ total_events = queue->barrier_events_num + num_events;
+
+ if (total_events) {
+ depend_events = cl_calloc(total_events, sizeof(cl_event));
+ if (depend_events == NULL) {
+ CL_OBJECT_UNLOCK(queue);
+ err = CL_OUT_OF_HOST_MEMORY;
+ break;
+ }
+ }
-exit:
- return err;
-error:
- err = CL_INVALID_EVENT_WAIT_LIST; //reset error
- goto exit;
-}
+ /* Add all the barrier events as depend events. */
+ for (i = 0; i < queue->barrier_events_num; i++) {
+ assert(CL_EVENT_IS_BARRIER(queue->barrier_events[i]));
+ cl_event_add_ref(queue->barrier_events[i]);
+ depend_events[num_events + i] = queue->barrier_events[i];
+ }
-cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
- cl_command_queue queue)
-{
- cl_int i;
+ CL_OBJECT_UNLOCK(queue);
- /* Check whether wait user events */
- for(i=0; i<num_events_in_wait_list; i++) {
- if(event_wait_list[i]->status <= CL_COMPLETE)
- continue;
+ for (i = 0; i < num_events; i++) {
+ assert(event_list[i]);
+ assert(event_list[i]->ctx == ctx);
+ assert(CL_OBJECT_IS_EVENT(event_list[i]));
+ cl_event_add_ref(event_list[i]);
+ depend_events[i] = event_list[i];
+ }
- /* Need wait on user event, return and do enqueue defer */
- if((event_wait_list[i]->type == CL_COMMAND_USER) ||
- (event_wait_list[i]->enqueue_cb &&
- (event_wait_list[i]->enqueue_cb->wait_user_events != NULL))){
- return CL_ENQUEUE_EXECUTE_DEFER;
- }
- }
+ if (depend_events)
+ assert(total_events);
- if(queue && queue->barrier_events_num )
- return CL_ENQUEUE_EXECUTE_DEFER;
+ e = cl_event_new(ctx, queue, type, total_events, depend_events);
+ if (e == NULL) {
+ err = CL_OUT_OF_HOST_MEMORY;
+ break;
+ }
+ depend_events = NULL;
+ }
+ } while (0);
- /* Non user events or all user event finished, wait all enqueue events finish */
- for(i=0; i<num_events_in_wait_list; i++) {
- if(event_wait_list[i]->status <= CL_COMPLETE)
- continue;
+ if (err != CL_SUCCESS) {
+ if (depend_events) {
+ for (i = 0; i < total_events; i++) {
+ cl_event_delete(depend_events[i]);
+ }
+ cl_free(depend_events);
+ }
- //enqueue callback haven't finish, in another thread, wait
- if(event_wait_list[i]->enqueue_cb != NULL)
- return CL_ENQUEUE_EXECUTE_DEFER;
- if(event_wait_list[i]->gpgpu_event)
- cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1);
- cl_event_set_status(event_wait_list[i], CL_COMPLETE); //Execute user's callback
+ // if set depend_events, must succeed.
+ assert(e->depend_events == NULL);
+ cl_event_delete(e);
}
- return CL_ENQUEUE_EXECUTE_IMM;
+
+ if (errcode_ret)
+ *errcode_ret = err;
+
+ return e;
}
-void cl_event_new_enqueue_callback(cl_event event,
- enqueue_data *data,
- cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list)
+LOCAL cl_int
+cl_event_set_callback(cl_event event, cl_int exec_type, cl_event_notify_cb pfn_notify, void *user_data)
{
- enqueue_callback *cb, *node;
- user_event *user_events, *u_ev;
- cl_command_queue queue = event ? event->queue : NULL;
- cl_int i;
cl_int err = CL_SUCCESS;
+ cl_event_user_callback cb;
+ cl_bool exec_imm = CL_FALSE;
- /* Allocate and initialize the structure itself */
- TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback));
- cb->num_events = 0;
- TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, num_events_in_wait_list));
- for(i=0; i<num_events_in_wait_list; i++) {
- //user event will insert to cb->wait_user_events, need not in wait list, avoid ref twice
- if(event_wait_list[i]->type != CL_COMMAND_USER) {
- cb->wait_list[cb->num_events++] = event_wait_list[i];
- cl_event_add_ref(event_wait_list[i]); //add defer enqueue's wait event reference
- }
- }
- cb->event = event;
- cb->next = NULL;
- cb->wait_user_events = NULL;
-
- if(queue && queue->barrier_events_num > 0) {
- for(i=0; i<queue->barrier_events_num; i++) {
- /* Insert the enqueue_callback to user event list */
- node = queue->wait_events[i]->waits_head;
- if(node == NULL)
- queue->wait_events[i]->waits_head = cb;
- else{
- while((node != cb) && node->next)
- node = node->next;
- if(node == cb) //wait on dup user event
- continue;
- node->next = cb;
- }
+ assert(event);
+ assert(pfn_notify);
- /* Insert the user event to enqueue_callback's wait_user_events */
- TRY(cl_event_insert_user_event, &cb->wait_user_events, queue->wait_events[i]);
- cl_event_add_ref(queue->wait_events[i]);
+ do {
+ cb = cl_calloc(1, sizeof(_cl_event_user_callback));
+ if (cb == NULL) {
+ err = CL_OUT_OF_HOST_MEMORY;
+ break;
}
- }
- /* Find out all user events that in event_wait_list wait */
- for(i=0; i<num_events_in_wait_list; i++) {
- if(event_wait_list[i]->status <= CL_COMPLETE)
- continue;
-
- if(event_wait_list[i]->type == CL_COMMAND_USER) {
- /* Insert the enqueue_callback to user event list */
- node = event_wait_list[i]->waits_head;
- if(node == NULL)
- event_wait_list[i]->waits_head = cb;
- else {
- while((node != cb) && node->next)
- node = node->next;
- if(node == cb) //wait on dup user event
- continue;
- node->next = cb;
- }
- /* Insert the user event to enqueue_callback's wait_user_events */
- TRY(cl_event_insert_user_event, &cb->wait_user_events, event_wait_list[i]);
- cl_event_add_ref(event_wait_list[i]);
- if(queue)
- cl_command_queue_insert_event(queue, event_wait_list[i]);
- if(queue && data->type == EnqueueBarrier){
- cl_command_queue_insert_barrier_event(queue, event_wait_list[i]);
- }
- } else if(event_wait_list[i]->enqueue_cb != NULL) {
- user_events = event_wait_list[i]->enqueue_cb->wait_user_events;
- while(user_events != NULL) {
- /* Insert the enqueue_callback to user event's waits_tail */
- node = user_events->event->waits_head;
- if(node == NULL)
- event_wait_list[i]->waits_head = cb;
- else{
- while((node != cb) && node->next)
- node = node->next;
- if(node == cb) { //wait on dup user event
- user_events = user_events->next;
- continue;
- }
- node->next = cb;
- }
-
- /* Insert the user event to enqueue_callback's wait_user_events */
- TRY(cl_event_insert_user_event, &cb->wait_user_events, user_events->event);
- cl_event_add_ref(user_events->event);
- if(queue)
- cl_command_queue_insert_event(event->queue, user_events->event);
- if(queue && data->type == EnqueueBarrier){
- cl_command_queue_insert_barrier_event(event->queue, user_events->event);
- }
- user_events = user_events->next;
- }
- }
- }
- if(event != NULL && event->queue != NULL && event->gpgpu_event != NULL) {
- event->gpgpu = cl_thread_gpgpu_take(event->queue);
- data->ptr = (void *)event->gpgpu_event;
- }
- cb->data = *data;
- if(event)
- event->enqueue_cb = cb;
-
-exit:
- return;
-error:
- if(cb) {
- while(cb->wait_user_events) {
- u_ev = cb->wait_user_events;
- cb->wait_user_events = cb->wait_user_events->next;
- cl_event_delete(u_ev->event);
- cl_free(u_ev);
+ list_init(&cb->node);
+ cb->pfn_notify = pfn_notify;
+ cb->user_data = user_data;
+ cb->status = exec_type;
+ cb->executed = CL_FALSE;
+
+ CL_OBJECT_LOCK(event);
+ if (event->status > exec_type) {
+ list_add_tail(&cb->node, &event->callbacks);
+ cb = NULL;
+ } else {
+ /* The state has already OK, call it immediately. */
+ exec_imm = CL_TRUE;
}
- for(i=0; i<cb->num_events; i++) {
- if(cb->wait_list[i]) {
- cl_event_delete(cb->wait_list[i]);
- }
- }
- cl_free(cb);
- }
- goto exit;
-}
+ CL_OBJECT_UNLOCK(event);
-void cl_event_call_callback(cl_event event, cl_int status, cl_bool free_cb) {
- user_callback *user_cb = NULL;
- user_callback *queue_cb = NULL; // For thread safety, we create a queue that holds user_callback's pfn_notify contents
- user_callback *temp_cb = NULL;
- user_cb = event->user_cb;
- CL_OBJECT_LOCK(event);
- while(user_cb) {
- if(user_cb->status >= status
- && user_cb->executed == CL_FALSE) { // Added check to not execute a callback when it was already handled
- user_cb->executed = CL_TRUE;
- temp_cb = cl_malloc(sizeof(user_callback));
- if(!temp_cb) {
- break; // Out of memory
- }
- temp_cb->pfn_notify = user_cb->pfn_notify; // Minor struct copy to call ppfn_notify out of the pthread_mutex
- temp_cb->user_data = user_cb->user_data;
- if(free_cb) {
- cl_free(user_cb);
- }
- if(!queue_cb) {
- queue_cb = temp_cb;
- queue_cb->next = NULL;
- } else { // Enqueue First
- temp_cb->next = queue_cb;
- queue_cb = temp_cb;
- }
+ if (exec_imm) {
+ cb->pfn_notify(event, event->status, cb->user_data);
}
- user_cb = user_cb->next;
- }
- CL_OBJECT_UNLOCK(event);
- // Calling the callbacks outside of the event_lock is required because the callback can call cl_api functions and get deadlocked
- while(queue_cb) { // For each callback queued, actually execute the callback
- queue_cb->pfn_notify(event, event->status, queue_cb->user_data);
- temp_cb = queue_cb;
- queue_cb = queue_cb->next;
- cl_free(temp_cb);
- }
+ } while (0);
+
+ if (cb)
+ cl_free(cb);
+
+ return err;
}
-void cl_event_set_status(cl_event event, cl_int status)
+LOCAL cl_int
+cl_event_set_status(cl_event event, cl_int status)
{
- cl_int ret, i;
- cl_event evt;
+ list_head tmp_callbacks;
+ list_head *n;
+ list_head *pos;
+ cl_bool notify_queue = CL_FALSE;
+ cl_event_user_callback cb;
+
+ assert(event);
CL_OBJECT_LOCK(event);
- if(status >= event->status) {
+ if (event->status <= CL_COMPLETE) { // Already set to error or completed
CL_OBJECT_UNLOCK(event);
- return;
+ return CL_INVALID_OPERATION;
}
- if(event->status <= CL_COMPLETE) {
- event->status = status; //have done enqueue before or doing in another thread
- CL_OBJECT_UNLOCK(event);
- return;
+
+ if (CL_EVENT_IS_USER(event)) {
+ assert(event->status != CL_RUNNING && event->status != CL_QUEUED);
+ } else {
+ assert(event->queue); // Must belong to some queue.
}
- if(status <= CL_COMPLETE) {
- if(event->enqueue_cb) {
- if(status == CL_COMPLETE) {
- cl_enqueue_handle(event, &event->enqueue_cb->data);
- if(event->gpgpu_event)
- cl_gpgpu_event_update_status(event->gpgpu_event, 1); //now set complet, need refine
- } else {
- if(event->gpgpu_event) {
- // Error then cancel the enqueued event.
- cl_gpgpu_delete(event->gpgpu);
- event->gpgpu = NULL;
- }
- }
+ if (status >= event->status) { // Should never go back.
+ CL_OBJECT_UNLOCK(event);
+ return CL_INVALID_OPERATION;
+ }
- event->status = status; //Change the event status after enqueue and befor unlock
+ event->status = status;
+ /* Call all the callbacks. */
+ if (!list_empty(&event->callbacks)) {
+ do {
+ status = event->status;
+ list_init(&tmp_callbacks);
+ list_replace(&event->callbacks, &tmp_callbacks);
+ list_init(&event->callbacks);
+ /* Call all the callbacks without lock. */
CL_OBJECT_UNLOCK(event);
- for(i=0; i<event->enqueue_cb->num_events; i++)
- cl_event_delete(event->enqueue_cb->wait_list[i]);
+
+ list_for_each_safe(pos, n, &tmp_callbacks)
+ {
+ cb = list_entry(pos, _cl_event_user_callback, node);
+
+ assert(cb->executed == CL_FALSE);
+
+ if (cb->status < status)
+ continue;
+
+ list_del(&cb->node);
+ cb->executed = CL_TRUE;
+ cb->pfn_notify(event, status, cb->user_data);
+ cl_free(cb);
+ }
+
CL_OBJECT_LOCK(event);
- if(event->enqueue_cb->wait_list)
- cl_free(event->enqueue_cb->wait_list);
- cl_free(event->enqueue_cb);
- event->enqueue_cb = NULL;
- }
+ // Set back the uncalled callbacks.
+ list_splice_tail(&tmp_callbacks, &event->callbacks);
+
+ /* Status may changed because we unlock. need to check again. */
+ } while (status != event->status);
}
- if(event->status >= status) //maybe changed in other threads
- event->status = status;
+
+ /* Wakeup all the waiter for status change. */
+ CL_OBJECT_NOTIFY_COND(event);
+
+ if (event->status <= CL_COMPLETE) {
+ notify_queue = CL_TRUE;
+ }
+
CL_OBJECT_UNLOCK(event);
- /* Call user callback */
- cl_event_call_callback(event, status, CL_FALSE);
+ /* Need to notify all the command queue within the same context. */
+ if (notify_queue) {
+ cl_command_queue *q_list = NULL;
+ cl_uint queue_num = 0;
+ int i = 0;
+ int cookie = 0;
+
+ /*First, we need to remove it from queue's barrier list. */
+ if (CL_EVENT_IS_BARRIER(event)) {
+ assert(event->queue);
+ cl_command_queue_remove_barrier_event(event->queue, event);
+ }
- if(event->type == CL_COMMAND_USER) {
- /* Check all defer enqueue */
- enqueue_callback *cb, *enqueue_cb = event->waits_head;
- while(enqueue_cb) {
- /* Remove this user event in enqueue_cb, update the header if needed. */
- cl_event_remove_user_event(&enqueue_cb->wait_user_events, event);
- cl_event_delete(event);
+ /* Then, notify all the queues within the same context. */
+ CL_OBJECT_LOCK(event->ctx);
+ do {
+ queue_num = event->ctx->queue_num;
+ cookie = event->ctx->queue_cookie;
+
+ if (queue_num > 0) {
+ q_list = cl_calloc(queue_num, sizeof(cl_command_queue));
+ assert(q_list);
+ i = 0;
+ list_for_each(pos, &event->ctx->queues)
+ {
+ q_list[i] = (cl_command_queue)(list_entry(pos, _cl_base_object, node));
+ assert(i < queue_num);
+ i++;
+ }
- /* Still wait on other user events */
- if(enqueue_cb->wait_user_events != NULL) {
- enqueue_cb = enqueue_cb->next;
- continue;
- }
+ CL_OBJECT_UNLOCK(event->ctx); // Update status without context lock.
- //remove user event frome enqueue_cb's ctx
- cl_command_queue_remove_event(enqueue_cb->event->queue, event);
- cl_command_queue_remove_barrier_event(enqueue_cb->event->queue, event);
-
- /* All user events complete, now wait enqueue events */
- ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list,
- enqueue_cb->event->queue);
- assert(ret != CL_ENQUEUE_EXECUTE_DEFER);
- ret = ~ret;
- cb = enqueue_cb;
- enqueue_cb = enqueue_cb->next;
-
- /* Call the pending operation */
- evt = cb->event;
- /* TODO: if this event wait on several events, one event's
- status is error, the others is complete, what's the status
- of this event? Can't find the description in OpenCL spec.
- Simply update to latest finish wait event.*/
- cl_event_set_status(cb->event, status);
- if(evt->emplict == CL_FALSE) {
- cl_event_delete(evt);
+ for (i = 0; i < queue_num; i++) {
+ cl_command_queue_notify(q_list[i]);
+ }
+
+ CL_OBJECT_LOCK(event->ctx); // Lock again.
+ } else {
+ /* No queue? Just do nothing. */
}
- }
- event->waits_head = NULL;
- }
- if(event->status <= CL_COMPLETE){
- /* Maintain the last_list when event completed*/
- if (event->last_prev)
- event->last_prev->last_next = event->last_next;
- if (event->last_next)
- event->last_next->last_prev = event->last_prev;
- if(event->queue && get_last_event(event->queue) == event)
- set_last_event(event->queue, event->last_next);
- event->last_prev = NULL;
- event->last_next = NULL;
- cl_event_delete(event);
+ } while (cookie != event->ctx->queue_cookie); // Some queue may be added when we unlock.
+ CL_OBJECT_UNLOCK(event->ctx);
+
+ if (q_list)
+ cl_free(q_list);
}
+
+ return CL_SUCCESS;
}
-void cl_event_update_status(cl_event event, int wait)
+LOCAL cl_int
+cl_event_wait_for_event_ready(const cl_event event)
{
- if(event->status <= CL_COMPLETE)
- return;
- if((event->gpgpu_event) &&
- (cl_gpgpu_event_update_status(event->gpgpu_event, wait) == command_complete))
- cl_event_set_status(event, CL_COMPLETE);
+ assert(CL_OBJECT_IS_EVENT(event));
+ return cl_event_wait_for_events_list(event->depend_event_num, event->depend_events);
}
-cl_int cl_event_marker_with_wait_list(cl_command_queue queue,
- cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list,
- cl_event* event)
+LOCAL cl_int
+cl_event_wait_for_events_list(cl_uint num_events, const cl_event *event_list)
{
- enqueue_data data = { 0 };
+ int i;
cl_event e;
+ cl_int ret = CL_SUCCESS;
- e = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE);
- if(e == NULL)
- return CL_OUT_OF_HOST_MEMORY;
-
- if(event != NULL ){
- *event = e;
- }
+ for (i = 0; i < num_events; i++) {
+ e = event_list[i];
+ assert(e);
+ assert(CL_OBJECT_IS_EVENT(e));
-//enqueues a marker command which waits for either a list of events to complete, or if the list is
-//empty it waits for all commands previously enqueued in command_queue to complete before it completes.
- if(num_events_in_wait_list > 0){
- if(cl_event_wait_events(num_events_in_wait_list, event_wait_list, queue) == CL_ENQUEUE_EXECUTE_DEFER) {
- data.type = EnqueueMarker;
- cl_event_new_enqueue_callback(event?*event:NULL, &data, num_events_in_wait_list, event_wait_list);
- return CL_SUCCESS;
+ CL_OBJECT_LOCK(e);
+ while (e->status > CL_COMPLETE) {
+ CL_OBJECT_WAIT_ON_COND(e);
}
- } else if(queue->wait_events_num > 0) {
- data.type = EnqueueMarker;
- cl_event_new_enqueue_callback(event?*event:NULL, &data, queue->wait_events_num, queue->wait_events);
- return CL_SUCCESS;
+ /* Iff some error happened, return the error. */
+ if (e->status < CL_COMPLETE) {
+ ret = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
+ }
+ CL_OBJECT_UNLOCK(e);
}
- cl_event_update_last_events(queue,1);
-
- cl_event_set_status(e, CL_COMPLETE);
- return CL_SUCCESS;
+ return ret;
}
-cl_int cl_event_barrier_with_wait_list(cl_command_queue queue,
- cl_uint num_events_in_wait_list,
- const cl_event *event_wait_list,
- cl_event* event)
+LOCAL cl_int
+cl_event_check_waitlist(cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
+ cl_event *event, cl_context ctx)
{
- enqueue_data data = { 0 };
- cl_event e;
-
- e = cl_event_new(queue->ctx, queue, CL_COMMAND_BARRIER, CL_TRUE);
- if(e == NULL)
- return CL_OUT_OF_HOST_MEMORY;
+ cl_int err = CL_SUCCESS;
+ cl_int i;
- if(event != NULL ){
- *event = e;
- }
-//enqueues a barrier command which waits for either a list of events to complete, or if the list is
-//empty it waits for all commands previously enqueued in command_queue to complete before it completes.
- if(num_events_in_wait_list > 0){
- if(cl_event_wait_events(num_events_in_wait_list, event_wait_list, queue) == CL_ENQUEUE_EXECUTE_DEFER) {
- data.type = EnqueueBarrier;
- cl_event_new_enqueue_callback(e, &data, num_events_in_wait_list, event_wait_list);
- return CL_SUCCESS;
+ do {
+ /* check the event_wait_list and num_events_in_wait_list */
+ if ((event_wait_list == NULL) && (num_events_in_wait_list > 0)) {
+ err = CL_INVALID_EVENT_WAIT_LIST;
+ break;
}
- } else if(queue->wait_events_num > 0) {
- data.type = EnqueueBarrier;
- cl_event_new_enqueue_callback(e, &data, queue->wait_events_num, queue->wait_events);
- return CL_SUCCESS;
- }
- cl_event_update_last_events(queue,1);
+ if ((event_wait_list != NULL) && (num_events_in_wait_list == 0)) {
+ err = CL_INVALID_EVENT_WAIT_LIST;
+ break;
+ }
- cl_event_set_status(e, CL_COMPLETE);
- return CL_SUCCESS;
-}
+ /* check the event and context */
+ for (i = 0; i < num_events_in_wait_list; i++) {
+ if (event_wait_list[i] == NULL || !CL_OBJECT_IS_EVENT(event_wait_list[i])) {
+ err = CL_INVALID_EVENT;
+ break;
+ }
-cl_ulong cl_event_get_cpu_timestamp(cl_ulong *cpu_time)
-{
- struct timespec ts;
+ if (cl_event_get_status(event_wait_list[i]) < CL_COMPLETE) {
+ err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
+ break;
+ }
- if(clock_gettime(CLOCK_MONOTONIC_RAW,&ts) != 0){
- printf("CPU Timmer error\n");
- return CL_FALSE;
- }
- *cpu_time = (1000000000.0) * (cl_ulong) ts.tv_sec + (cl_ulong) ts.tv_nsec;
+ if (event == event_wait_list + i) { /* Pointer of element of the wait list */
+ err = CL_INVALID_EVENT_WAIT_LIST;
+ break;
+ }
- return CL_SUCCESS;
-}
+ /* check all belong to same context. */
+ if (ctx == NULL) {
+ ctx = event_wait_list[i]->ctx;
+ }
+ if (event_wait_list[i]->ctx != ctx) {
+ err = CL_INVALID_CONTEXT;
+ break;
+ }
+ }
-cl_int cl_event_get_queued_cpu_timestamp(cl_event event)
-{
- cl_int ret_val;
+ if (err != CL_SUCCESS)
+ break;
- ret_val = cl_event_get_cpu_timestamp(&event->queued_timestamp);
+ } while (0);
- return ret_val;
+ return err;
}
-cl_ulong cl_event_get_timestamp_delta(cl_ulong start_timestamp,cl_ulong end_timestamp)
+LOCAL void
+cl_event_exec(cl_event event, cl_int exec_status)
{
- cl_ulong ret_val;
+ /* We are MT safe here, no one should call this
+ at the same time. No need to lock */
+ cl_int ret = CL_SUCCESS;
+ cl_int status = cl_event_get_status(event);
+ cl_int depend_status;
- if(end_timestamp > start_timestamp){
- ret_val = end_timestamp - start_timestamp;
- }
- else {
- /*if start time stamp is greater than end timstamp then set ret value to max*/
- ret_val = ((cl_ulong) 1 << 32);
+ if (status < CL_COMPLETE || status <= exec_status) {
+ return;
}
- return ret_val;
-}
-
-cl_ulong cl_event_get_start_timestamp(cl_event event)
-{
- cl_ulong ret_val;
+ depend_status = cl_event_is_ready(event);
+ assert(depend_status <= CL_COMPLETE);
+ if (depend_status < CL_COMPLETE) { // Error happend, cancel exec.
+ ret = cl_event_set_status(event, depend_status);
+ return;
+ }
- ret_val = cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[2]);
+ /* Do the according thing based on event type. */
+ ret = cl_enqueue_handle(&event->exec_data, exec_status);
- return ret_val;
+ if (ret != CL_SUCCESS) {
+ assert(ret < 0);
+ DEBUGP(DL_WARNING, "Exec event %p error, type is %d, error staus is %d",
+ event, event->event_type, ret);
+ ret = cl_event_set_status(event, ret);
+ assert(ret == CL_SUCCESS);
+ } else {
+ ret = cl_event_set_status(event, exec_status);
+ assert(ret == CL_SUCCESS);
+ }
}
-cl_ulong cl_event_get_end_timestamp(cl_event event)
+/* 0 means ready, >0 means not ready, <0 means error. */
+LOCAL cl_int
+cl_event_is_ready(cl_event event)
{
- cl_ulong ret_val;
-
- ret_val = cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[3]);
+ int i;
+ int status;
- return ret_val;
-}
+ for (i = 0; i < event->depend_event_num; i++) {
+ status = cl_event_get_status(event->depend_events[i]);
-cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name)
-{
- cl_ulong ret_val = 0;
- GET_QUEUE_THREAD_GPGPU(event->queue);
-
- if (!event->gpgpu_event) {
- cl_gpgpu_event_get_gpu_cur_timestamp(gpgpu, &ret_val);
- event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val;
- return CL_SUCCESS;
+ if (status != CL_COMPLETE) {
+ return status;
+ }
}
- if(param_name == CL_PROFILING_COMMAND_SUBMIT ||
- param_name == CL_PROFILING_COMMAND_QUEUED) {
- cl_gpgpu_event_get_gpu_cur_timestamp(gpgpu, &ret_val);
- event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val;
- return CL_SUCCESS;
- } else if(param_name == CL_PROFILING_COMMAND_START) {
- cl_gpgpu_event_get_exec_timestamp(gpgpu, event->gpgpu_event, 0, &ret_val);
- event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val;
- return CL_SUCCESS;
- } else if (param_name == CL_PROFILING_COMMAND_END) {
- cl_gpgpu_event_get_exec_timestamp(gpgpu, event->gpgpu_event, 1, &ret_val);
- event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val;
- return CL_SUCCESS;
- }
- return CL_INVALID_VALUE;
+ return CL_COMPLETE;
}
-cl_int cl_event_insert_user_event(user_event** p_u_ev, cl_event event)
+LOCAL cl_event
+cl_event_create_marker_or_barrier(cl_command_queue queue, cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list, cl_bool is_barrier, cl_int *error)
{
- user_event * u_iter = *p_u_ev;
- user_event * u_ev;
-
- while(u_iter)
- {
- if(u_iter->event == event)
- return CL_SUCCESS;
- u_iter = u_iter->next;
- }
+ cl_event e = NULL;
+ cl_int err = CL_SUCCESS;
+ cl_command_type type = CL_COMMAND_MARKER;
+ enqueue_type eq_type = EnqueueMarker;
- TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
- u_ev->event = event;
- u_ev->next = *p_u_ev;
- *p_u_ev = u_ev;
+ if (is_barrier) {
+ type = CL_COMMAND_BARRIER;
+ eq_type = EnqueueBarrier;
+ }
+ if (event_wait_list) {
+ assert(num_events_in_wait_list > 0);
- return CL_SUCCESS;
-error:
- return CL_FALSE;
-}
+ e = cl_event_create(queue->ctx, queue, num_events_in_wait_list,
+ event_wait_list, type, &err);
+ if (err != CL_SUCCESS) {
+ *error = err;
+ return NULL;
+ }
+ } else { /* The marker depends on all events in the queue now. */
+ cl_command_queue_enqueue_worker worker = &queue->worker;
+ cl_uint i;
+ cl_uint event_num;
+ cl_event *depend_events;
+
+ CL_OBJECT_LOCK(queue);
+
+ /* First, wait for the command queue retire all in executing event. */
+ while (1) {
+ if (worker->quit) { // already destroy the queue?
+ CL_OBJECT_UNLOCK(queue);
+ *error = CL_INVALID_COMMAND_QUEUE;
+ return NULL;
+ }
-cl_int cl_event_remove_user_event(user_event** p_u_ev, cl_event event)
-{
- user_event * u_iter = *p_u_ev;
- user_event * u_prev = *p_u_ev;
-
- while(u_iter){
- if(u_iter->event == event ){
- if(u_iter == *p_u_ev){
- *p_u_ev = u_iter->next;
- }else{
- u_prev->next = u_iter->next;
+ if (worker->in_exec_status != CL_COMPLETE) {
+ CL_OBJECT_WAIT_ON_COND(queue);
+ continue;
}
- cl_free(u_iter);
+
break;
}
- u_prev = u_iter;
- u_iter = u_iter->next;
+
+ event_num = 0;
+ depend_events = NULL;
+ if (!list_empty(&worker->enqueued_events)) {
+ depend_events = cl_command_queue_record_in_queue_events(queue, &event_num);
+ }
+
+ CL_OBJECT_UNLOCK(queue);
+
+ e = cl_event_create(queue->ctx, queue, event_num, depend_events, type, &err);
+
+ for (i = 0; i < event_num; i++) { //unref the temp
+ cl_event_delete(depend_events[i]);
+ }
+ if (depend_events)
+ cl_free(depend_events);
+
+ if (err != CL_SUCCESS) {
+ *error = err;
+ return NULL;
+ }
}
- return CL_SUCCESS;
+ e->exec_data.type = eq_type;
+ *error = CL_SUCCESS;
+ return e;
}