/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see .
*
* Author: Benjamin Segovia
*/
#ifndef __CL_KERNEL_H__
#define __CL_KERNEL_H__
#include "cl_internals.h"
#include "cl_base_object.h"
#include "cl_driver.h"
#include "cl_gbe_loader.h"
#include "CL/cl.h"
#include "CL/cl_ext.h"
#include
#include
/* This is the kernel as it is interfaced by the compiler */
struct _gbe_kernel;
/* We need to save buffer data for relocation and binding and we must figure out
* if all arguments are properly set
*/
typedef struct cl_argument {
cl_mem mem; /* For image and regular buffers */
cl_sampler sampler; /* For sampler. */
cl_accelerator_intel accel;
unsigned char bti;
void *ptr; /* SVM ptr value. */
uint32_t local_sz:30; /* For __local size specification */
uint32_t is_set:1; /* All args must be set before NDRange */
uint32_t is_svm:1; /* Indicate this argument is SVMPointer */
} cl_argument;
/* One OCL function */
struct _cl_kernel {
_cl_base_object base;
cl_buffer bo; /* The code itself */
cl_program program; /* Owns this structure (and pointers) */
gbe_kernel opaque; /* (Opaque) compiler structure for the OCL kernel */
cl_accelerator_intel accel; /* accelerator */
char *curbe; /* One curbe per kernel */
size_t curbe_sz; /* Size of it */
uint32_t samplers[GEN_MAX_SAMPLERS]; /* samplers defined in kernel & kernel args */
size_t sampler_sz; /* sampler size defined in kernel & kernel args. */
struct ImageInfo *images; /* images defined in kernel args */
size_t image_sz; /* image count in kernel args */
cl_ulong local_mem_sz; /* local memory size specified in kernel args. */
size_t compile_wg_sz[3]; /* Required workgroup size by __attribute__((reqd_work_gro
up_size(X, Y, Z))) qualifier.*/
size_t global_work_sz[3]; /* maximum global size that can be used to execute a kernel
(i.e. global_work_size argument to clEnqueueNDRangeKernel.)*/
size_t stack_size; /* stack size per work item. */
cl_argument *args; /* To track argument setting */
uint32_t arg_n:30; /* Number of arguments */
uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */
uint32_t vme:1; /* True only if it is a built-in kernel for VME */
void* cmrt_kernel; /* CmKernel* */
uint32_t exec_info_n; /* The kernel's exec info count */
void** exec_info; /* The kernel's exec info */
cl_bool useDeviceEnqueue; /* kernel use device enqueue */
void* device_enqueue_ptr; /* device_enqueue buffer*/
uint32_t device_enqueue_info_n; /* count of parent kernel's arguments buffers, as child enqueues' exec info */
void** device_enqueue_infos; /* parent kernel's arguments buffers, as child enqueues' exec info */
};
#define CL_OBJECT_KERNEL_MAGIC 0x1234567890abedefLL
#define CL_OBJECT_IS_KERNEL(obj) ((obj && \
((cl_base_object)obj)->magic == CL_OBJECT_KERNEL_MAGIC && \
CL_OBJECT_GET_REF(obj) >= 1))
/* Allocate an empty kernel */
extern cl_kernel cl_kernel_new(cl_program);
/* Destroy and deallocate an empty kernel */
extern void cl_kernel_delete(cl_kernel);
/* Setup the kernel with the given GBE Kernel */
extern void cl_kernel_setup(cl_kernel k, gbe_kernel opaque);
/* Get the kernel name */
extern const char *cl_kernel_get_name(cl_kernel k);
/* Get the kernel attributes*/
extern const char *cl_kernel_get_attributes(cl_kernel k);
/* Get the simd width as used in the code */
extern uint32_t cl_kernel_get_simd_width(cl_kernel k);
/* When a kernel is created from outside, we just duplicate the structure we
* have internally and give it back to the user
*/
extern cl_kernel cl_kernel_dup(cl_kernel);
/* Add one more reference on the kernel object */
extern void cl_kernel_add_ref(cl_kernel);
/* Set the argument before kernel execution */
extern int cl_kernel_set_arg(cl_kernel,
uint32_t arg_index,
size_t arg_size,
const void *arg_value);
extern int cl_kernel_set_arg_svm_pointer(cl_kernel,
uint32_t arg_index,
const void *arg_value);
extern cl_int cl_kernel_set_exec_info(cl_kernel k,
size_t n,
const void *value);
/* Get the argument information */
extern int cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index,
cl_kernel_arg_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret);
/* Compute and check the work group size from the user provided local size */
extern cl_int
cl_kernel_work_group_sz(cl_kernel ker,
const size_t *local_wk_sz,
cl_uint wk_dim,
size_t *wk_grp_sz);
#endif /* __CL_KERNEL_H__ */