/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see .
*
*/
#include "cl_event.h"
#include "cl_context.h"
#include "cl_command_queue.h"
#include "cl_alloc.h"
#include
#include
// TODO: Need to move it to some device related file later.
static void
cl_event_update_timestamp_gen(cl_event event, cl_int status)
{
cl_ulong ts = 0;
if ((event->exec_data.type == EnqueueCopyBufferRect) ||
(event->exec_data.type == EnqueueCopyBuffer) ||
(event->exec_data.type == EnqueueCopyImage) ||
(event->exec_data.type == EnqueueCopyBufferToImage) ||
(event->exec_data.type == EnqueueCopyImageToBuffer) ||
(event->exec_data.type == EnqueueNDRangeKernel) ||
(event->exec_data.type == EnqueueFillBuffer) ||
(event->exec_data.type == EnqueueFillImage)) {
if (status == CL_QUEUED || status == CL_SUBMITTED) {
cl_gpgpu_event_get_gpu_cur_timestamp(event->queue->ctx->drv, &ts);
if (ts == CL_EVENT_INVALID_TIMESTAMP)
ts++;
event->timestamp[CL_QUEUED - status] = ts;
return;
} else if (status == CL_RUNNING) {
assert(event->exec_data.gpgpu);
return; // Wait for the event complete and get run and complete then.
} else {
assert(event->exec_data.gpgpu);
cl_gpgpu_event_get_exec_timestamp(event->exec_data.gpgpu, 0, &ts);
if (ts == CL_EVENT_INVALID_TIMESTAMP)
ts++;
event->timestamp[2] = ts;
cl_gpgpu_event_get_exec_timestamp(event->exec_data.gpgpu, 1, &ts);
if (ts == CL_EVENT_INVALID_TIMESTAMP)
ts++;
event->timestamp[3] = ts;
/* Set the submit time the same as running time if it is later. */
if (event->timestamp[1] > event->timestamp[2] ||
event->timestamp[2] - event->timestamp[1] > 0x0FFFFFFFFFF /*Overflowed */)
event->timestamp[1] = event->timestamp[2];
return;
}
} else {
cl_gpgpu_event_get_gpu_cur_timestamp(event->queue->ctx->drv, &ts);
if (ts == CL_EVENT_INVALID_TIMESTAMP)
ts++;
event->timestamp[CL_QUEUED - status] = ts;
return;
}
}
LOCAL void
cl_event_update_timestamp(cl_event event, cl_int state)
{
int i;
cl_bool re_cal = CL_FALSE;
cl_ulong ts[4];
assert(state >= CL_COMPLETE || state <= CL_QUEUED);
if (event->event_type == CL_COMMAND_USER)
return;
assert(event->queue);
if ((event->queue->props & CL_QUEUE_PROFILING_ENABLE) == 0)
return;
/* Should not record the timestamp twice. */
assert(event->timestamp[CL_QUEUED - state] == CL_EVENT_INVALID_TIMESTAMP);
cl_event_update_timestamp_gen(event, state);
if (state == CL_COMPLETE) {
// TODO: Need to set the CL_PROFILING_COMMAND_COMPLETE when enable child enqueue.
// Just a duplicate of event complete time now.
event->timestamp[4] = event->timestamp[3];
/* If timestamp overflow, set queued time to 0 and re-calculate. */
for (i = 0; i < 4; i++) {
if (event->timestamp[i + 1] < event->timestamp[i]) {
re_cal = CL_TRUE;
break;
}
}
if (re_cal) {
for (i = 3; i >= 0; i--) {
if (event->timestamp[i + 1] < event->timestamp[i]) { //overflow
ts[i] = event->timestamp[i + 1] + (CL_EVENT_INVALID_TIMESTAMP - event->timestamp[i]);
} else {
ts[i] = event->timestamp[i + 1] - event->timestamp[i];
}
}
event->timestamp[0] = 0;
for (i = 1; i < 5; i++) {
event->timestamp[i] = event->timestamp[i - 1] + ts[i - 1];
}
}
}
}
LOCAL void
cl_event_add_ref(cl_event event)
{
assert(event);
CL_OBJECT_INC_REF(event);
}
LOCAL cl_int
cl_event_get_status(cl_event event)
{
cl_int ret;
assert(event);
CL_OBJECT_LOCK(event);
ret = event->status;
CL_OBJECT_UNLOCK(event);
return ret;
}
static cl_event
cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type,
cl_uint num_events, cl_event *event_list)
{
int i;
cl_event e = cl_calloc(1, sizeof(_cl_event));
if (e == NULL)
return NULL;
CL_OBJECT_INIT_BASE(e, CL_OBJECT_EVENT_MAGIC);
/* Append the event in the context event list */
cl_context_add_event(ctx, e);
e->queue = queue;
list_init(&e->callbacks);
list_node_init(&e->enqueue_node);
assert(type >= CL_COMMAND_NDRANGE_KERNEL && type <= CL_COMMAND_SVM_UNMAP);
e->event_type = type;
if (type == CL_COMMAND_USER) {
e->status = CL_SUBMITTED;
} else {
e->status = CL_EVENT_STATE_UNKNOWN;
}
if (type == CL_COMMAND_USER) {
assert(queue == NULL);
}
e->depend_events = event_list;
e->depend_event_num = num_events;
for (i = 0; i < 4; i++) {
e->timestamp[i] = CL_EVENT_INVALID_TIMESTAMP;
}
return e;
}
LOCAL void
cl_event_delete(cl_event event)
{
int i;
cl_event_user_callback cb;
if (UNLIKELY(event == NULL))
return;
if (CL_OBJECT_DEC_REF(event) > 1)
return;
cl_enqueue_delete(&event->exec_data);
assert(list_node_out_of_list(&event->enqueue_node));
if (event->depend_events) {
assert(event->depend_event_num);
for (i = 0; i < event->depend_event_num; i++) {
cl_event_delete(event->depend_events[i]);
}
cl_free(event->depend_events);
}
/* Free all the callbacks. Last ref, no need to lock. */
while (!list_empty(&event->callbacks)) {
cb = list_entry(event->callbacks.head_node.n, _cl_event_user_callback, node);
list_node_del(&cb->node);
cl_free(cb);
}
/* Remove it from the list */
assert(event->ctx);
cl_context_remove_event(event->ctx, event);
CL_OBJECT_DESTROY_BASE(event);
cl_free(event);
}
LOCAL cl_event
cl_event_create(cl_context ctx, cl_command_queue queue, cl_uint num_events,
const cl_event *event_list, cl_command_type type, cl_int *errcode_ret)
{
cl_event e = NULL;
cl_event *depend_events = NULL;
cl_int err = CL_SUCCESS;
cl_uint total_events = 0;
int i;
assert(ctx);
do {
if (event_list)
assert(num_events);
if (queue == NULL) {
assert(type == CL_COMMAND_USER);
assert(event_list == NULL);
assert(num_events == 0);
e = cl_event_new(ctx, queue, type, 0, NULL);
if (e == NULL) {
err = CL_OUT_OF_HOST_MEMORY;
break;
}
} else {
CL_OBJECT_LOCK(queue);
total_events = queue->barrier_events_num + num_events;
if (total_events) {
depend_events = cl_calloc(total_events, sizeof(cl_event));
if (depend_events == NULL) {
CL_OBJECT_UNLOCK(queue);
err = CL_OUT_OF_HOST_MEMORY;
break;
}
}
/* Add all the barrier events as depend events. */
for (i = 0; i < queue->barrier_events_num; i++) {
assert(CL_EVENT_IS_BARRIER(queue->barrier_events[i]));
cl_event_add_ref(queue->barrier_events[i]);
depend_events[num_events + i] = queue->barrier_events[i];
}
CL_OBJECT_UNLOCK(queue);
for (i = 0; i < num_events; i++) {
assert(event_list && event_list[i]);
assert(event_list[i]->ctx == ctx);
assert(CL_OBJECT_IS_EVENT(event_list[i]));
cl_event_add_ref(event_list[i]);
depend_events[i] = event_list[i];
}
if (depend_events)
assert(total_events);
e = cl_event_new(ctx, queue, type, total_events, depend_events);
if (e == NULL) {
err = CL_OUT_OF_HOST_MEMORY;
break;
}
depend_events = NULL;
}
} while (0);
if (err != CL_SUCCESS) {
if (depend_events) {
for (i = 0; i < total_events; i++) {
cl_event_delete(depend_events[i]);
}
cl_free(depend_events);
}
// if set depend_events, must succeed.
assert(e->depend_events == NULL);
cl_event_delete(e);
}
if (errcode_ret)
*errcode_ret = err;
return e;
}
LOCAL cl_int
cl_event_set_callback(cl_event event, cl_int exec_type, cl_event_notify_cb pfn_notify, void *user_data)
{
cl_int err = CL_SUCCESS;
cl_event_user_callback cb;
cl_bool exec_imm = CL_FALSE;
assert(event);
assert(pfn_notify);
do {
cb = cl_calloc(1, sizeof(_cl_event_user_callback));
if (cb == NULL) {
err = CL_OUT_OF_HOST_MEMORY;
break;
}
list_node_init(&cb->node);
cb->pfn_notify = pfn_notify;
cb->user_data = user_data;
cb->status = exec_type;
cb->executed = CL_FALSE;
CL_OBJECT_LOCK(event);
if (event->status > exec_type) {
list_add_tail(&event->callbacks, &cb->node);
cb = NULL;
} else {
/* The state has already OK, call it immediately. */
exec_imm = CL_TRUE;
}
CL_OBJECT_UNLOCK(event);
if (exec_imm) {
cb->pfn_notify(event, event->status, cb->user_data);
}
} while (0);
if (cb)
cl_free(cb);
return err;
}
LOCAL cl_int
cl_event_set_status(cl_event event, cl_int status)
{
list_head tmp_callbacks;
list_node *n;
list_node *pos;
cl_bool notify_queue = CL_FALSE;
cl_event_user_callback cb;
assert(event);
CL_OBJECT_LOCK(event);
if (event->status <= CL_COMPLETE) { // Already set to error or completed
CL_OBJECT_UNLOCK(event);
return CL_INVALID_OPERATION;
}
if (CL_EVENT_IS_USER(event)) {
assert(event->status != CL_RUNNING && event->status != CL_QUEUED);
} else {
assert(event->queue); // Must belong to some queue.
}
if (status >= event->status) { // Should never go back.
CL_OBJECT_UNLOCK(event);
return CL_INVALID_OPERATION;
}
event->status = status;
/* Call all the callbacks. */
if (!list_empty(&event->callbacks)) {
do {
status = event->status;
list_init(&tmp_callbacks);
list_move(&event->callbacks, &tmp_callbacks);
/* Call all the callbacks without lock. */
CL_OBJECT_UNLOCK(event);
list_for_each_safe(pos, n, &tmp_callbacks)
{
cb = list_entry(pos, _cl_event_user_callback, node);
assert(cb->executed == CL_FALSE);
if (cb->status < status)
continue;
list_node_del(&cb->node);
cb->executed = CL_TRUE;
cb->pfn_notify(event, status, cb->user_data);
cl_free(cb);
}
CL_OBJECT_LOCK(event);
// Set back the uncalled callbacks.
list_merge(&event->callbacks, &tmp_callbacks);
/* Status may changed because we unlock. need to check again. */
} while (status != event->status);
}
/* Wakeup all the waiter for status change. */
CL_OBJECT_NOTIFY_COND(event);
if (event->status <= CL_COMPLETE) {
notify_queue = CL_TRUE;
}
CL_OBJECT_UNLOCK(event);
/* Need to notify all the command queue within the same context. */
if (notify_queue) {
cl_command_queue queue = NULL;
/*First, we need to remove it from queue's barrier list. */
if (CL_EVENT_IS_BARRIER(event)) {
assert(event->queue);
cl_command_queue_remove_barrier_event(event->queue, event);
}
/* Then, notify all the queues within the same context. */
CL_OBJECT_LOCK(event->ctx);
/* Disable remove and add queue to the context temporary. We need to
make sure all the queues in the context currently are valid. */
event->ctx->queue_modify_disable++;
CL_OBJECT_UNLOCK(event->ctx);
list_for_each(pos, &event->ctx->queues)
{
queue = (cl_command_queue)(list_entry(pos, _cl_base_object, node));
assert(queue != NULL);
cl_command_queue_notify(queue);
}
CL_OBJECT_LOCK(event->ctx);
/* Disable remove and add queue to the context temporary. We need to
make sure all the queues in the context currently are valid. */
event->ctx->queue_modify_disable--;
CL_OBJECT_NOTIFY_COND(event->ctx);
CL_OBJECT_UNLOCK(event->ctx);
}
return CL_SUCCESS;
}
LOCAL cl_int
cl_event_wait_for_event_ready(const cl_event event)
{
assert(CL_OBJECT_IS_EVENT(event));
return cl_event_wait_for_events_list(event->depend_event_num, event->depend_events);
}
LOCAL cl_int
cl_event_wait_for_events_list(cl_uint num_events, const cl_event *event_list)
{
int i;
cl_event e;
cl_int ret = CL_SUCCESS;
for (i = 0; i < num_events; i++) {
e = event_list[i];
assert(e);
assert(CL_OBJECT_IS_EVENT(e));
CL_OBJECT_LOCK(e);
while (e->status > CL_COMPLETE) {
CL_OBJECT_WAIT_ON_COND(e);
}
assert(e->status <= CL_COMPLETE);
/* Iff some error happened, return the error. */
if (e->status < CL_COMPLETE) {
ret = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
}
CL_OBJECT_UNLOCK(e);
}
return ret;
}
LOCAL cl_int
cl_event_check_waitlist(cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
cl_event *event, cl_context ctx)
{
cl_int err = CL_SUCCESS;
cl_int i;
do {
/* check the event_wait_list and num_events_in_wait_list */
if ((event_wait_list == NULL) && (num_events_in_wait_list > 0)) {
err = CL_INVALID_EVENT_WAIT_LIST;
break;
}
if ((event_wait_list != NULL) && (num_events_in_wait_list == 0)) {
err = CL_INVALID_EVENT_WAIT_LIST;
break;
}
/* check the event and context */
for (i = 0; i < num_events_in_wait_list; i++) {
if (!CL_OBJECT_IS_EVENT(event_wait_list[i])) {
err = CL_INVALID_EVENT_WAIT_LIST;
break;
}
if (event == event_wait_list + i) { /* Pointer of element of the wait list */
err = CL_INVALID_EVENT_WAIT_LIST;
break;
}
/* check all belong to same context. */
if (ctx == NULL) {
ctx = event_wait_list[i]->ctx;
}
if (event_wait_list[i]->ctx != ctx) {
err = CL_INVALID_CONTEXT;
break;
}
}
if (err != CL_SUCCESS)
break;
} while (0);
return err;
}
/* When we call this function, all the events it depends
on should already be ready, unless ignore_depends is set. */
LOCAL cl_uint
cl_event_exec(cl_event event, cl_int exec_to_status, cl_bool ignore_depends)
{
/* We are MT safe here, no one should call this
at the same time. No need to lock */
cl_int ret = CL_SUCCESS;
cl_int cur_status = cl_event_get_status(event);
cl_int depend_status;
cl_int s;
assert(exec_to_status >= CL_COMPLETE);
assert(exec_to_status <= CL_QUEUED);
if (cur_status < CL_COMPLETE) {
return cur_status;
}
depend_status = cl_event_is_ready(event);
assert(depend_status <= CL_COMPLETE || ignore_depends || exec_to_status == CL_QUEUED);
if (depend_status < CL_COMPLETE) { // Error happend, cancel exec.
ret = cl_event_set_status(event, depend_status);
return depend_status;
}
if (cur_status <= exec_to_status) {
return ret;
}
/* Exec to the target status. */
for (s = cur_status - 1; s >= exec_to_status; s--) {
assert(s >= CL_COMPLETE);
ret = cl_enqueue_handle(&event->exec_data, s);
if (ret != CL_SUCCESS) {
assert(ret < 0);
DEBUGP(DL_WARNING, "Exec event %p error, type is %d, error status is %d",
event, event->event_type, ret);
ret = cl_event_set_status(event, ret);
assert(ret == CL_SUCCESS);
return ret; // Failed and we never do further.
} else {
assert(!CL_EVENT_IS_USER(event));
if ((event->queue->props & CL_QUEUE_PROFILING_ENABLE) != 0) {
/* record the timestamp before actually doing something. */
cl_event_update_timestamp(event, s);
}
ret = cl_event_set_status(event, s);
assert(ret == CL_SUCCESS);
}
}
return ret;
}
/* 0 means ready, >0 means not ready, <0 means error. */
LOCAL cl_int
cl_event_is_ready(cl_event event)
{
int i;
int status;
int ret_status = CL_COMPLETE;
for (i = 0; i < event->depend_event_num; i++) {
status = cl_event_get_status(event->depend_events[i]);
if (status > CL_COMPLETE) { // Find some not ready, just OK
return status;
}
if (status < CL_COMPLETE) { // Record some error.
ret_status = status;
}
}
return ret_status;
}
LOCAL cl_event
cl_event_create_marker_or_barrier(cl_command_queue queue, cl_uint num_events_in_wait_list,
const cl_event *event_wait_list, cl_bool is_barrier, cl_int *error)
{
cl_event e = NULL;
cl_int err = CL_SUCCESS;
cl_command_type type = CL_COMMAND_MARKER;
enqueue_type eq_type = EnqueueMarker;
if (is_barrier) {
type = CL_COMMAND_BARRIER;
eq_type = EnqueueBarrier;
}
if (event_wait_list) {
assert(num_events_in_wait_list > 0);
e = cl_event_create(queue->ctx, queue, num_events_in_wait_list,
event_wait_list, type, &err);
if (err != CL_SUCCESS) {
*error = err;
return NULL;
}
} else { /* The marker depends on all events in the queue now. */
cl_command_queue_enqueue_worker worker = &queue->worker;
cl_uint i;
cl_uint event_num;
cl_event *depend_events;
CL_OBJECT_LOCK(queue);
/* First, wait for the command queue retire all in executing event. */
while (1) {
if (worker->quit) { // already destroy the queue?
CL_OBJECT_UNLOCK(queue);
*error = CL_INVALID_COMMAND_QUEUE;
return NULL;
}
if (worker->in_exec_status != CL_COMPLETE) {
CL_OBJECT_WAIT_ON_COND(queue);
continue;
}
break;
}
event_num = 0;
depend_events = NULL;
if (!list_empty(&worker->enqueued_events)) {
depend_events = cl_command_queue_record_in_queue_events(queue, &event_num);
}
CL_OBJECT_UNLOCK(queue);
e = cl_event_create(queue->ctx, queue, event_num, depend_events, type, &err);
for (i = 0; i < event_num; i++) { //unref the temp
cl_event_delete(depend_events[i]);
}
if (depend_events)
cl_free(depend_events);
if (err != CL_SUCCESS) {
*error = err;
return NULL;
}
}
e->exec_data.type = eq_type;
*error = CL_SUCCESS;
return e;
}