/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see .
*
* Author: Rong Yang
*/
#include "cl_event.h"
#include "cl_context.h"
#include "cl_utils.h"
#include "cl_alloc.h"
#include "cl_khr_icd.h"
#include "cl_kernel.h"
#include "cl_command_queue.h"
#include
#include
inline cl_bool
cl_event_is_gpu_command_type(cl_command_type type)
{
switch(type) {
case CL_COMMAND_COPY_BUFFER:
case CL_COMMAND_COPY_IMAGE:
case CL_COMMAND_COPY_IMAGE_TO_BUFFER:
case CL_COMMAND_COPY_BUFFER_TO_IMAGE:
case CL_COMMAND_COPY_BUFFER_RECT:
case CL_COMMAND_TASK:
case CL_COMMAND_NDRANGE_KERNEL:
return CL_TRUE;
default:
return CL_FALSE;
}
}
cl_event cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type, cl_bool emplict)
{
cl_event event = NULL;
GET_QUEUE_THREAD_GPGPU(queue);
/* Allocate and inialize the structure itself */
TRY_ALLOC_NO_ERR (event, CALLOC(struct _cl_event));
SET_ICD(event->dispatch)
event->magic = CL_MAGIC_EVENT_HEADER;
event->ref_n = 1;
/* Append the event in the context event list */
pthread_mutex_lock(&ctx->event_lock);
event->next = ctx->events;
if (ctx->events != NULL)
ctx->events->prev = event;
ctx->events = event;
pthread_mutex_unlock(&ctx->event_lock);
event->ctx = ctx;
cl_context_add_ref(ctx);
/* Initialize all members and create GPGPU event object */
event->queue = queue;
event->type = type;
event->gpgpu_event = NULL;
if(type == CL_COMMAND_USER) {
event->status = CL_SUBMITTED;
}
else {
event->status = CL_QUEUED;
if(cl_event_is_gpu_command_type(event->type))
event->gpgpu_event = cl_gpgpu_event_new(gpgpu);
}
cl_event_add_ref(event); //dec when complete
event->user_cb = NULL;
event->enqueue_cb = NULL;
event->waits_head = NULL;
event->emplict = emplict;
if(queue && event->gpgpu_event)
queue->last_event = event;
exit:
return event;
error:
cl_event_delete(event);
event = NULL;
goto exit;
}
void cl_event_delete(cl_event event)
{
if (UNLIKELY(event == NULL))
return;
cl_event_update_status(event);
if (atomic_dec(&event->ref_n) > 1)
return;
if(event->queue && event->queue->last_event == event)
event->queue->last_event = NULL;
/* Call all user's callback if haven't execute */
user_callback *cb = event->user_cb;
while(event->user_cb) {
cb = event->user_cb;
if(cb->executed == CL_FALSE) {
cb->pfn_notify(event, event->status, cb->user_data);
}
event->user_cb = cb->next;
cl_free(cb);
}
/* delete gpgpu event object */
if(event->gpgpu_event)
cl_gpgpu_event_delete(event->gpgpu_event);
/* Remove it from the list */
assert(event->ctx);
pthread_mutex_lock(&event->ctx->event_lock);
if (event->prev)
event->prev->next = event->next;
if (event->next)
event->next->prev = event->prev;
/* if this is the head, update head pointer ctx->events */
if (event->ctx->events == event)
event->ctx->events = event->next;
pthread_mutex_unlock(&event->ctx->event_lock);
cl_context_delete(event->ctx);
cl_free(event);
}
void cl_event_add_ref(cl_event event)
{
assert(event);
atomic_inc(&event->ref_n);
}
cl_int cl_event_set_callback(cl_event event ,
cl_int command_exec_callback_type,
EVENT_NOTIFY pfn_notify,
void* user_data)
{
assert(event);
assert(pfn_notify);
cl_int err = CL_SUCCESS;
user_callback *cb;
TRY_ALLOC(cb, CALLOC(user_callback));
cb->pfn_notify = pfn_notify;
cb->user_data = user_data;
cb->status = command_exec_callback_type;
cb->executed = CL_FALSE;
cb->next = event->user_cb;
event->user_cb = cb;
exit:
return err;
error:
err = CL_OUT_OF_HOST_MEMORY;
cl_free(cb);
goto exit;
};
cl_int cl_event_check_waitlist(cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event,cl_context ctx)
{
cl_int err = CL_SUCCESS;
cl_int i;
/* check the event_wait_list and num_events_in_wait_list */
if((event_wait_list == NULL) &&
(num_events_in_wait_list > 0))
goto error;
if ((event_wait_list != NULL) &&
(num_events_in_wait_list == 0)){
goto error;
}
/* check the event and context */
for(i=0; istatus < CL_COMPLETE) {
err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
goto exit;
}
if(event && *event == event_wait_list[i])
goto error;
if(event_wait_list[i]->ctx != ctx)
goto error;
}
exit:
return err;
error:
err = CL_INVALID_EVENT_WAIT_LIST; //reset error
goto exit;
}
cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
cl_command_queue queue)
{
cl_int i, j;
/* Check whether wait user events */
for(i=0; istatus <= CL_COMPLETE)
continue;
/* Need wait on user event, return and do enqueue defer */
if((event_wait_list[i]->type == CL_COMMAND_USER) ||
(event_wait_list[i]->enqueue_cb &&
(event_wait_list[i]->enqueue_cb->wait_user_events != NULL))){
for(j=0; jbarrier_index > 0) {
for(j=0; jwait_events_num; j++){
cl_event_add_ref(queue->wait_events[j]); //add defer enqueue's wait event reference
}
return CL_ENQUEUE_EXECUTE_DEFER;
}
/* Non user events or all user event finished, wait all enqueue events finish */
for(i=0; istatus <= CL_COMPLETE)
continue;
//enqueue callback haven't finish, in another thread, wait
if(event_wait_list[i]->enqueue_cb != NULL)
return CL_ENQUEUE_EXECUTE_DEFER;
if(event_wait_list[i]->gpgpu_event)
cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1);
cl_event_set_status(event_wait_list[i], CL_COMPLETE); //Execute user's callback
}
return CL_ENQUEUE_EXECUTE_IMM;
}
void cl_event_new_enqueue_callback(cl_event event,
enqueue_data *data,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list)
{
enqueue_callback *cb, *node;
user_event *user_events, *u_ev;
cl_command_queue queue = event->queue;
cl_int i;
cl_int err = CL_SUCCESS;
GET_QUEUE_THREAD_GPGPU(data->queue);
/* Allocate and initialize the structure itself */
TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback));
cb->num_events = num_events_in_wait_list;
TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, num_events_in_wait_list));
for(i=0; iwait_list[i] = event_wait_list[i];
cb->event = event;
cb->next = NULL;
cb->wait_user_events = NULL;
if(queue && queue->barrier_index > 0) {
for(i=0; ibarrier_index; i++) {
/* Insert the enqueue_callback to user event list */
node = queue->wait_events[i]->waits_head;
if(node == NULL)
queue->wait_events[i]->waits_head = cb;
else{
while((node != cb) && node->next)
node = node->next;
if(node == cb) //wait on dup user event
continue;
node->next = cb;
}
/* Insert the user event to enqueue_callback's wait_user_events */
TRY(cl_event_insert_user_event, &cb->wait_user_events, queue->wait_events[i]);
}
}
/* Find out all user events that in event_wait_list wait */
for(i=0; istatus <= CL_COMPLETE)
continue;
if(event_wait_list[i]->type == CL_COMMAND_USER) {
/* Insert the enqueue_callback to user event list */
node = event_wait_list[i]->waits_head;
if(node == NULL)
event_wait_list[i]->waits_head = cb;
else {
while((node != cb) && node->next)
node = node->next;
if(node == cb) //wait on dup user event
continue;
node->next = cb;
}
/* Insert the user event to enqueue_callback's wait_user_events */
TRY(cl_event_insert_user_event, &cb->wait_user_events, event_wait_list[i]);
cl_command_queue_insert_event(event->queue, event_wait_list[i]);
} else if(event_wait_list[i]->enqueue_cb != NULL) {
user_events = event_wait_list[i]->enqueue_cb->wait_user_events;
while(user_events != NULL) {
/* Insert the enqueue_callback to user event's waits_tail */
node = user_events->event->waits_head;
if(node == NULL)
event_wait_list[i]->waits_head = cb;
else{
while((node != cb) && node->next)
node = node->next;
if(node == cb) { //wait on dup user event
user_events = user_events->next;
continue;
}
node->next = cb;
}
/* Insert the user event to enqueue_callback's wait_user_events */
TRY(cl_event_insert_user_event, &cb->wait_user_events, user_events->event);
cl_command_queue_insert_event(event->queue, user_events->event);
user_events = user_events->next;
}
}
}
if(data->queue != NULL && event->gpgpu_event != NULL) {
cl_gpgpu_event_pending(gpgpu, event->gpgpu_event);
data->ptr = (void *)event->gpgpu_event;
}
cb->data = *data;
event->enqueue_cb = cb;
exit:
return;
error:
if(cb) {
while(cb->wait_user_events) {
u_ev = cb->wait_user_events;
cb->wait_user_events = cb->wait_user_events->next;
cl_free(u_ev);
}
if(cb->wait_list)
cl_free(cb->wait_list);
cl_free(cb);
}
goto exit;
}
void cl_event_set_status(cl_event event, cl_int status)
{
user_callback *user_cb;
cl_int ret, i;
cl_event evt;
pthread_mutex_lock(&event->ctx->event_lock);
if(status >= event->status) {
pthread_mutex_unlock(&event->ctx->event_lock);
return;
}
if(event->status <= CL_COMPLETE) {
event->status = status; //have done enqueue before or doing in another thread
pthread_mutex_unlock(&event->ctx->event_lock);
return;
}
if(status <= CL_COMPLETE) {
if(event->enqueue_cb) {
cl_enqueue_handle(event, &event->enqueue_cb->data);
if(event->gpgpu_event)
cl_gpgpu_event_update_status(event->gpgpu_event, 1); //now set complet, need refine
event->status = status; //Change the event status after enqueue and befor unlock
pthread_mutex_unlock(&event->ctx->event_lock);
for(i=0; ienqueue_cb->num_events; i++)
cl_event_delete(event->enqueue_cb->wait_list[i]);
pthread_mutex_lock(&event->ctx->event_lock);
if(event->enqueue_cb->wait_list)
cl_free(event->enqueue_cb->wait_list);
cl_free(event->enqueue_cb);
event->enqueue_cb = NULL;
}
}
if(event->status >= status) //maybe changed in other threads
event->status = status;
pthread_mutex_unlock(&event->ctx->event_lock);
if(event->status <= CL_COMPLETE)
cl_event_delete(event);
/* Call user callback */
user_cb = event->user_cb;
while(user_cb) {
if(user_cb->status >= status) {
user_cb->pfn_notify(event, event->status, user_cb->user_data);
user_cb->executed = CL_TRUE;
}
user_cb = user_cb->next;
}
if(event->type != CL_COMMAND_USER)
return;
/* Check all defer enqueue */
enqueue_callback *cb, *enqueue_cb = event->waits_head;
while(enqueue_cb) {
/* Remove this user event in enqueue_cb, update the header if needed. */
cl_event_remove_user_event(&enqueue_cb->wait_user_events, event);
/* Still wait on other user events */
if(enqueue_cb->wait_user_events != NULL) {
enqueue_cb = enqueue_cb->next;
continue;
}
//remove user event frome enqueue_cb's ctx
cl_command_queue_remove_event(enqueue_cb->event->queue, event);
/* All user events complete, now wait enqueue events */
ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list,
enqueue_cb->event->queue);
ret = ret;
assert(ret != CL_ENQUEUE_EXECUTE_DEFER);
cb = enqueue_cb;
enqueue_cb = enqueue_cb->next;
/* Call the pending operation */
evt = cb->event;
cl_event_set_status(cb->event, CL_COMPLETE);
if(evt->emplict == CL_FALSE) {
cl_event_delete(evt);
}
}
event->waits_head = NULL;
}
void cl_event_update_status(cl_event event)
{
if(event->status <= CL_COMPLETE)
return;
if((event->gpgpu_event) &&
(cl_gpgpu_event_update_status(event->gpgpu_event, 0) == command_complete))
cl_event_set_status(event, CL_COMPLETE);
}
cl_int cl_event_marker(cl_command_queue queue, cl_event* event)
{
enqueue_data data;
*event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE);
if(event == NULL)
return CL_OUT_OF_HOST_MEMORY;
//if wait_events_num>0, the marker event need wait queue->wait_events
if(queue->wait_events_num > 0) {
data.type = EnqueueMarker;
cl_event_new_enqueue_callback(*event, &data, queue->wait_events_num, queue->wait_events);
return CL_SUCCESS;
}
if(queue->last_event && queue->last_event->gpgpu_event) {
cl_gpgpu_event_update_status(queue->last_event->gpgpu_event, 1);
}
cl_event_set_status(*event, CL_COMPLETE);
return CL_SUCCESS;
}
cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name)
{
cl_ulong ret_val = 0;
GET_QUEUE_THREAD_GPGPU(event->queue);
if (!event->gpgpu_event) {
cl_gpgpu_event_get_gpu_cur_timestamp(gpgpu, &ret_val);
event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val;
return CL_SUCCESS;
}
if(param_name == CL_PROFILING_COMMAND_SUBMIT ||
param_name == CL_PROFILING_COMMAND_QUEUED) {
cl_gpgpu_event_get_gpu_cur_timestamp(gpgpu, &ret_val);
event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val;
return CL_SUCCESS;
} else if(param_name == CL_PROFILING_COMMAND_START) {
cl_gpgpu_event_get_exec_timestamp(gpgpu, event->gpgpu_event, 0, &ret_val);
event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val;
return CL_SUCCESS;
} else if (param_name == CL_PROFILING_COMMAND_END) {
cl_gpgpu_event_get_exec_timestamp(gpgpu, event->gpgpu_event, 1, &ret_val);
event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val;
return CL_SUCCESS;
}
return CL_INVALID_VALUE;
}
cl_int cl_event_insert_user_event(user_event** p_u_ev, cl_event event)
{
user_event * u_iter = *p_u_ev;
user_event * u_ev;
while(u_iter)
{
if(u_iter->event == event)
return CL_SUCCESS;
u_iter = u_iter->next;
}
TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
u_ev->event = event;
u_ev->next = *p_u_ev;
*p_u_ev = u_ev;
return CL_SUCCESS;
error:
return CL_FALSE;
}
cl_int cl_event_remove_user_event(user_event** p_u_ev, cl_event event)
{
user_event * u_iter = *p_u_ev;
user_event * u_prev = *p_u_ev;
while(u_iter){
if(u_iter->event == event ){
if(u_iter == *p_u_ev){
*p_u_ev = u_iter->next;
}else{
u_prev->next = u_iter->next;
}
cl_free(u_iter);
break;
}
u_prev = u_iter;
u_iter = u_iter->next;
}
return CL_SUCCESS;
}