1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
*
* Author: Benjamin Segovia <benjamin.segovia@intel.com>
*/
#ifndef __CL_KERNEL_H__
#define __CL_KERNEL_H__
#include "cl_internals.h"
#include "cl_base_object.h"
#include "cl_driver.h"
#include "cl_gbe_loader.h"
#include "CL/cl.h"
#include "CL/cl_ext.h"
#include <stdint.h>
#include <stdlib.h>
/* This is the kernel as it is interfaced by the compiler */
struct _gbe_kernel;
/* We need to save buffer data for relocation and binding and we must figure out
* if all arguments are properly set
*/
typedef struct cl_argument {
cl_mem mem; /* For image and regular buffers */
cl_sampler sampler; /* For sampler. */
cl_accelerator_intel accel;
unsigned char bti;
void *ptr; /* SVM ptr value. */
uint32_t local_sz:30; /* For __local size specification */
uint32_t is_set:1; /* All args must be set before NDRange */
uint32_t is_svm:1; /* Indicate this argument is SVMPointer */
} cl_argument;
/* One OCL function */
struct _cl_kernel {
_cl_base_object base;
cl_buffer bo; /* The code itself */
cl_program program; /* Owns this structure (and pointers) */
gbe_kernel opaque; /* (Opaque) compiler structure for the OCL kernel */
cl_accelerator_intel accel; /* accelerator */
char *curbe; /* One curbe per kernel */
size_t curbe_sz; /* Size of it */
uint32_t samplers[GEN_MAX_SAMPLERS]; /* samplers defined in kernel & kernel args */
size_t sampler_sz; /* sampler size defined in kernel & kernel args. */
struct ImageInfo *images; /* images defined in kernel args */
size_t image_sz; /* image count in kernel args */
cl_ulong local_mem_sz; /* local memory size specified in kernel args. */
size_t compile_wg_sz[3]; /* Required workgroup size by __attribute__((reqd_work_gro
up_size(X, Y, Z))) qualifier.*/
size_t global_work_sz[3]; /* maximum global size that can be used to execute a kernel
(i.e. global_work_size argument to clEnqueueNDRangeKernel.)*/
size_t stack_size; /* stack size per work item. */
cl_argument *args; /* To track argument setting */
uint32_t arg_n:30; /* Number of arguments */
uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */
uint32_t vme:1; /* True only if it is a built-in kernel for VME */
void* cmrt_kernel; /* CmKernel* */
uint32_t exec_info_n; /* The kernel's exec info count */
void** exec_info; /* The kernel's exec info */
cl_bool useDeviceEnqueue; /* kernel use device enqueue */
void* device_enqueue_ptr; /* device_enqueue buffer*/
uint32_t device_enqueue_info_n; /* count of parent kernel's arguments buffers, as child enqueues' exec info */
void** device_enqueue_infos; /* parent kernel's arguments buffers, as child enqueues' exec info */
};
#define CL_OBJECT_KERNEL_MAGIC 0x1234567890abedefLL
#define CL_OBJECT_IS_KERNEL(obj) ((obj && \
((cl_base_object)obj)->magic == CL_OBJECT_KERNEL_MAGIC && \
CL_OBJECT_GET_REF(obj) >= 1))
/* Allocate an empty kernel */
extern cl_kernel cl_kernel_new(cl_program);
/* Destroy and deallocate an empty kernel */
extern void cl_kernel_delete(cl_kernel);
/* Setup the kernel with the given GBE Kernel */
extern void cl_kernel_setup(cl_kernel k, gbe_kernel opaque);
/* Get the kernel name */
extern const char *cl_kernel_get_name(cl_kernel k);
/* Get the kernel attributes*/
extern const char *cl_kernel_get_attributes(cl_kernel k);
/* Get the simd width as used in the code */
extern uint32_t cl_kernel_get_simd_width(cl_kernel k);
/* When a kernel is created from outside, we just duplicate the structure we
* have internally and give it back to the user
*/
extern cl_kernel cl_kernel_dup(cl_kernel);
/* Add one more reference on the kernel object */
extern void cl_kernel_add_ref(cl_kernel);
/* Set the argument before kernel execution */
extern int cl_kernel_set_arg(cl_kernel,
uint32_t arg_index,
size_t arg_size,
const void *arg_value);
extern int cl_kernel_set_arg_svm_pointer(cl_kernel,
uint32_t arg_index,
const void *arg_value);
extern cl_int cl_kernel_set_exec_info(cl_kernel k,
size_t n,
const void *value);
/* Get the argument information */
extern int cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index,
cl_kernel_arg_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret);
/* Compute and check the work group size from the user provided local size */
extern cl_int
cl_kernel_work_group_sz(cl_kernel ker,
const size_t *local_wk_sz,
cl_uint wk_dim,
size_t *wk_grp_sz);
#endif /* __CL_KERNEL_H__ */
|