/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see .
*
* Author: Benjamin Segovia
*/
#include "cl_kernel.h"
#include "cl_program.h"
#include "cl_device_id.h"
#include "cl_context.h"
#include "cl_mem.h"
#include "cl_alloc.h"
#include "cl_utils.h"
#include "cl_khr_icd.h"
#include "CL/cl.h"
#include "cl_sampler.h"
#include
#include
#include
#include
#include
LOCAL void
cl_kernel_delete(cl_kernel k)
{
uint32_t i;
if (k == NULL) return;
/* We are not done with the kernel */
if (atomic_dec(&k->ref_n) > 1) return;
/* Release one reference on all bos we own */
if (k->bo) cl_buffer_unreference(k->bo);
if (k->const_bo) cl_buffer_unreference(k->const_bo);
/* This will be true for kernels created by clCreateKernel */
if (k->ref_its_program) cl_program_delete(k->program);
/* Release the curbe if allocated */
if (k->curbe) cl_free(k->curbe);
/* Release the argument array if required */
if (k->args) {
for (i = 0; i < k->arg_n; ++i)
if (k->args[i].mem != NULL)
cl_mem_delete(k->args[i].mem);
cl_free(k->args);
}
if (k->image_sz)
cl_free(k->images);
k->magic = CL_MAGIC_DEAD_HEADER; /* For safety */
cl_free(k);
}
LOCAL cl_kernel
cl_kernel_new(cl_program p)
{
cl_kernel k = NULL;
TRY_ALLOC_NO_ERR (k, CALLOC(struct _cl_kernel));
SET_ICD(k->dispatch)
k->ref_n = 1;
k->magic = CL_MAGIC_KERNEL_HEADER;
k->program = p;
exit:
return k;
error:
cl_kernel_delete(k);
k = NULL;
goto exit;
}
LOCAL const char*
cl_kernel_get_name(cl_kernel k)
{
if (UNLIKELY(k == NULL)) return NULL;
return gbe_kernel_get_name(k->opaque);
}
LOCAL void
cl_kernel_add_ref(cl_kernel k)
{
atomic_inc(&k->ref_n);
}
LOCAL cl_int
cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
{
uint32_t offset; /* where to patch */
enum gbe_arg_type arg_type; /* kind of argument */
size_t arg_sz; /* size of the argument */
cl_mem mem; /* for __global, __constant and image arguments */
if (UNLIKELY(index >= k->arg_n))
return CL_INVALID_ARG_INDEX;
arg_type = gbe_kernel_get_arg_type(k->opaque, index);
arg_sz = gbe_kernel_get_arg_size(k->opaque, index);
if (UNLIKELY(arg_type != GBE_ARG_LOCAL_PTR && arg_sz != sz))
return CL_INVALID_ARG_SIZE;
if(UNLIKELY(arg_type == GBE_ARG_LOCAL_PTR && sz == 0))
return CL_INVALID_ARG_SIZE;
if(arg_type == GBE_ARG_VALUE) {
if(UNLIKELY(value == NULL))
return CL_INVALID_ARG_VALUE;
} else if(arg_type == GBE_ARG_LOCAL_PTR) {
if(UNLIKELY(value != NULL))
return CL_INVALID_ARG_VALUE;
} else if(arg_type == GBE_ARG_SAMPLER) {
if (UNLIKELY(value == NULL))
return CL_INVALID_ARG_VALUE;
cl_sampler s = *(cl_sampler*)value;
if(s->magic != CL_MAGIC_SAMPLER_HEADER)
return CL_INVALID_SAMPLER;
} else {
// should be image, GLOBAL_PTR, CONSTANT_PTR
if (UNLIKELY(value == NULL && arg_type == GBE_ARG_IMAGE))
return CL_INVALID_ARG_VALUE;
if(value != NULL) {
mem = *(cl_mem*)value;
if (UNLIKELY(mem->magic != CL_MAGIC_MEM_HEADER))
return CL_INVALID_MEM_OBJECT;
if (UNLIKELY((arg_type == GBE_ARG_IMAGE && !mem->is_image)
|| (arg_type != GBE_ARG_IMAGE && mem->is_image)))
return CL_INVALID_ARG_VALUE;
}
}
/* Copy the structure or the value directly into the curbe */
if (arg_type == GBE_ARG_VALUE) {
offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
assert(offset + sz <= k->curbe_sz);
memcpy(k->curbe + offset, value, sz);
k->args[index].local_sz = 0;
k->args[index].is_set = 1;
k->args[index].mem = NULL;
return CL_SUCCESS;
}
/* For a local pointer just save the size */
if (arg_type == GBE_ARG_LOCAL_PTR) {
k->args[index].local_sz = sz;
k->args[index].is_set = 1;
k->args[index].mem = NULL;
return CL_SUCCESS;
}
/* Is it a sampler*/
if (arg_type == GBE_ARG_SAMPLER) {
cl_sampler sampler;
memcpy(&sampler, value, sz);
k->args[index].local_sz = 0;
k->args[index].is_set = 1;
k->args[index].mem = NULL;
k->args[index].sampler = sampler;
cl_set_sampler_arg_slot(k, index, sampler);
return CL_SUCCESS;
}
if(value == NULL) {
/* for buffer object GLOBAL_PTR CONSTANT_PTR, it maybe NULL */
int32_t offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
*((uint32_t *)(k->curbe + offset)) = 0;
assert(arg_type == GBE_ARG_GLOBAL_PTR || arg_type == GBE_ARG_CONSTANT_PTR);
if (k->args[index].mem)
cl_mem_delete(k->args[index].mem);
k->args[index].mem = NULL;
k->args[index].is_set = 1;
k->args[index].local_sz = 0;
return CL_SUCCESS;
}
mem = *(cl_mem*) value;
if(arg_type == GBE_ARG_CONSTANT_PTR) {
int32_t cbOffset;
cbOffset = gbe_kernel_set_const_buffer_size(k->opaque, index, mem->size);
//constant ptr's curbe offset changed, update it
if(cbOffset >= 0) {
offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
*((uint32_t *)(k->curbe + offset)) = cbOffset; //cb offset in curbe
}
}
cl_mem_add_ref(mem);
if (k->args[index].mem)
cl_mem_delete(k->args[index].mem);
k->args[index].mem = mem;
k->args[index].is_set = 1;
k->args[index].local_sz = 0;
return CL_SUCCESS;
}
LOCAL uint32_t
cl_kernel_get_simd_width(cl_kernel k)
{
assert(k != NULL);
return gbe_kernel_get_simd_width(k->opaque);
}
LOCAL void
cl_kernel_setup(cl_kernel k, gbe_kernel opaque)
{
cl_context ctx = k->program->ctx;
cl_buffer_mgr bufmgr = cl_context_get_bufmgr(ctx);
if(k->bo != NULL)
cl_buffer_unreference(k->bo);
/* Allocate the gen code here */
const uint32_t code_sz = gbe_kernel_get_code_size(opaque);
const char *code = gbe_kernel_get_code(opaque);
k->bo = cl_buffer_alloc(bufmgr, "CL kernel", code_sz, 64u);
k->arg_n = gbe_kernel_get_arg_num(opaque);
/* Upload the code */
cl_buffer_subdata(k->bo, 0, code_sz, code);
k->opaque = opaque;
/* Create the curbe */
k->curbe_sz = gbe_kernel_get_curbe_size(k->opaque);
/* Get sampler data & size */
k->sampler_sz = gbe_kernel_get_sampler_size(k->opaque);
assert(k->sampler_sz <= GEN_MAX_SAMPLERS);
if (k->sampler_sz > 0)
gbe_kernel_get_sampler_data(k->opaque, k->samplers);
/* Get image data & size */
k->image_sz = gbe_kernel_get_image_size(k->opaque);
assert(k->sampler_sz <= GEN_MAX_SURFACES);
if (k->image_sz > 0) {
TRY_ALLOC_NO_ERR(k->images, cl_calloc(k->image_sz, sizeof(k->images[0])));
gbe_kernel_get_image_data(k->opaque, k->images);
} else
k->images = NULL;
return;
error:
cl_buffer_unreference(k->bo);
k->bo = NULL;
}
LOCAL cl_kernel
cl_kernel_dup(cl_kernel from)
{
cl_kernel to = NULL;
if (UNLIKELY(from == NULL))
return NULL;
TRY_ALLOC_NO_ERR (to, CALLOC(struct _cl_kernel));
SET_ICD(to->dispatch)
to->bo = from->bo;
to->const_bo = from->const_bo;
to->opaque = from->opaque;
to->ref_n = 1;
to->magic = CL_MAGIC_KERNEL_HEADER;
to->program = from->program;
to->arg_n = from->arg_n;
to->curbe_sz = from->curbe_sz;
to->sampler_sz = from->sampler_sz;
to->image_sz = from->image_sz;
if (to->sampler_sz)
memcpy(to->samplers, from->samplers, to->sampler_sz * sizeof(uint32_t));
if (to->image_sz) {
TRY_ALLOC_NO_ERR(to->images, cl_calloc(to->image_sz, sizeof(to->images[0])));
memcpy(to->images, from->images, to->image_sz * sizeof(to->images[0]));
} else
to->images = NULL;
TRY_ALLOC_NO_ERR(to->args, cl_calloc(to->arg_n, sizeof(cl_argument)));
if (to->curbe_sz) TRY_ALLOC_NO_ERR(to->curbe, cl_calloc(1, to->curbe_sz));
/* Retain the bos */
if (from->bo) cl_buffer_reference(from->bo);
if (from->const_bo) cl_buffer_reference(from->const_bo);
/* We retain the program destruction since this kernel (user allocated)
* depends on the program for some of its pointers
*/
assert(from->program);
cl_program_add_ref(from->program);
to->ref_its_program = CL_TRUE;
exit:
return to;
error:
cl_kernel_delete(to);
to = NULL;
goto exit;
}
LOCAL cl_int
cl_kernel_work_group_sz(cl_kernel ker,
const size_t *local_wk_sz,
uint32_t wk_dim,
size_t *wk_grp_sz)
{
cl_int err = CL_SUCCESS;
size_t sz = 0;
cl_uint i;
for (i = 0; i < wk_dim; ++i) {
const uint32_t required_sz = gbe_kernel_get_required_work_group_size(ker->opaque, i);
if (required_sz != 0 && required_sz != local_wk_sz[i]) {
err = CL_INVALID_WORK_ITEM_SIZE;
goto error;
}
}
sz = local_wk_sz[0];
for (i = 1; i < wk_dim; ++i)
sz *= local_wk_sz[i];
if (sz > ker->program->ctx->device->max_work_group_size) {
err = CL_INVALID_WORK_ITEM_SIZE;
goto error;
}
error:
if (wk_grp_sz) *wk_grp_sz = sz;
return err;
}