/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see .
*
* Author: Benjamin Segovia
*/
#ifndef __CL_DRIVER_H__
#define __CL_DRIVER_H__
#include
#include
#include "cl_driver_type.h"
#include "CL/cl_ext.h"
/* Various limitations we should remove actually */
#define GEN_MAX_SURFACES 256
#define GEN_MAX_SAMPLERS 16
#define GEN_MAX_VME_STATES 8
/**************************************************************************
* cl_driver:
* Hide behind some call backs the buffer allocation / deallocation ... This
* will allow us to make the use of a software performance simulator easier and
* to minimize the code specific for the HW and for the simulator
**************************************************************************/
/* Create a new driver */
typedef cl_driver (cl_driver_new_cb)(cl_context_prop);
extern cl_driver_new_cb *cl_driver_new;
/* Delete the driver */
typedef void (cl_driver_delete_cb)(cl_driver);
extern cl_driver_delete_cb *cl_driver_delete;
/* Get the buffer manager from the driver */
typedef cl_buffer_mgr (cl_driver_get_bufmgr_cb)(cl_driver);
extern cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr;
/* Get the Gen version from the driver */
typedef uint32_t (cl_driver_get_ver_cb)(cl_driver);
extern cl_driver_get_ver_cb *cl_driver_get_ver;
/* enlarge stack size from the driver */
typedef void (cl_driver_enlarge_stack_size_cb)(cl_driver, int32_t*);
extern cl_driver_enlarge_stack_size_cb *cl_driver_enlarge_stack_size;
typedef enum cl_self_test_res{
SELF_TEST_PASS = 0,
SELF_TEST_SLM_FAIL = 1,
SELF_TEST_ATOMIC_FAIL = 2,
SELF_TEST_OTHER_FAIL = 3,
} cl_self_test_res;
/* Set the atomic enable/disable flag in the driver */
typedef void (cl_driver_set_atomic_flag_cb)(cl_driver, int);
extern cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag;
/**************************************************************************
* GPGPU command streamer
**************************************************************************/
/* Describe texture tiling */
typedef enum cl_gpgpu_tiling {
GPGPU_NO_TILE = 0,
GPGPU_TILE_X = 1,
GPGPU_TILE_Y = 2,
} cl_gpgpu_tiling;
/* Cache control options for gen7 */
typedef enum cl_cache_control {
cc_gtt = 0x0,
cc_l3 = 0x1,
cc_llc = 0x2,
cc_llc_l3 = 0x3
} cl_cache_control;
/* L3 Cache control options for gen75 */
typedef enum cl_l3_cache_control {
l3cc_uc = 0x0,
l3cc_ec = 0x1
} cl_l3_cache_control;
/* LLCCC Cache control options for gen75 */
typedef enum cl_llccc_cache_control {
llccc_pte = 0x0<<1,
llccc_uc = 0x1<<1,
llccc_ec = 0x2<<1,
llccc_ucllc = 0x3<<1
} cl_llccc_cache_control;
/* Target Cache control options for gen8 */
typedef enum cl_target_cache_control {
tcc_ec_only = 0x0<<3,
tcc_llc_only = 0x1<<3,
tcc_llc_ec = 0x2<<3,
tcc_llc_ec_l3 = 0x3<<3
} cl_target_cache_control;
/* Memory type LLC/ELLC Cache control options for gen8 */
typedef enum cl_mtllc_cache_control {
mtllc_pte = 0x0<<5,
mtllc_none = 0x1<<5,
mtllc_wt = 0x2<<5,
mtllc_wb = 0x3<<5
} cl_mtllc_cache_control;
typedef enum gpu_command_status {
command_queued = 3,
command_submitted = 2,
command_running = 1,
command_complete = 0
} gpu_command_status;
/* Use this structure to bind kernels in the gpgpu state */
typedef struct cl_gpgpu_kernel {
const char *name; /* kernel name and bo name */
uint32_t grf_blocks; /* register blocks kernel wants (in 8 reg blocks) */
uint32_t curbe_sz; /* total size of all curbes */
cl_buffer bo; /* kernel code in the proper addr space */
int32_t barrierID; /* barrierID for _this_ kernel */
uint32_t use_slm:1; /* For gen7 (automatic barrier management) */
uint32_t thread_n:15; /* For gen7 (automatic barrier management) */
uint32_t slm_sz; /* For gen7 (automatic SLM allocation) */
} cl_gpgpu_kernel;
/* Create a new gpgpu state */
typedef cl_gpgpu (cl_gpgpu_new_cb)(cl_driver);
extern cl_gpgpu_new_cb *cl_gpgpu_new;
/* Delete the gpgpu state */
typedef void (cl_gpgpu_delete_cb)(cl_gpgpu);
extern cl_gpgpu_delete_cb *cl_gpgpu_delete;
/* Synchonize GPU with CPU */
typedef void (cl_gpgpu_sync_cb)(void*);
extern cl_gpgpu_sync_cb *cl_gpgpu_sync;
/* Bind a regular unformatted buffer */
typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t internal_offset, size_t size, uint8_t bti);
extern cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf;
typedef void (cl_gpgpu_set_kernel_cb)(cl_gpgpu, void *);
extern cl_gpgpu_set_kernel_cb *cl_gpgpu_set_kernel;
typedef void* (cl_gpgpu_get_kernel_cb)(cl_gpgpu);
extern cl_gpgpu_get_kernel_cb *cl_gpgpu_get_kernel;
/* bind samplers defined in both kernel and kernel args. */
typedef void (cl_gpgpu_bind_sampler_cb)(cl_gpgpu, uint32_t *samplers, size_t sampler_sz);
extern cl_gpgpu_bind_sampler_cb *cl_gpgpu_bind_sampler;
typedef void (cl_gpgpu_bind_vme_state_cb)(cl_gpgpu, cl_accelerator_intel accel);
extern cl_gpgpu_bind_vme_state_cb *cl_gpgpu_bind_vme_state;
/* get the default cache control value. */
typedef uint32_t (cl_gpgpu_get_cache_ctrl_cb)();
extern cl_gpgpu_get_cache_ctrl_cb *cl_gpgpu_get_cache_ctrl;
/* Set a 2d texture */
typedef void (cl_gpgpu_bind_image_cb)(cl_gpgpu state,
uint32_t id,
cl_buffer obj_bo,
uint32_t obj_bo_offset,
uint32_t format,
uint32_t bpp,
uint32_t type,
int32_t w,
int32_t h,
int32_t depth,
int pitch,
int32_t slice_pitch,
cl_gpgpu_tiling tiling);
extern cl_gpgpu_bind_image_cb *cl_gpgpu_bind_image;
typedef void (cl_gpgpu_bind_image_for_vme_cb)(cl_gpgpu state,
uint32_t id,
cl_buffer obj_bo,
uint32_t obj_bo_offset,
uint32_t format,
uint32_t bpp,
uint32_t type,
int32_t w,
int32_t h,
int32_t depth,
int pitch,
int32_t slice_pitch,
cl_gpgpu_tiling tiling);
extern cl_gpgpu_bind_image_for_vme_cb *cl_gpgpu_bind_image_for_vme;
/* Setup a stack */
typedef void (cl_gpgpu_set_stack_cb)(cl_gpgpu, uint32_t offset, uint32_t size, uint32_t cchint);
extern cl_gpgpu_set_stack_cb *cl_gpgpu_set_stack;
/* Setup scratch */
typedef int (cl_gpgpu_set_scratch_cb)(cl_gpgpu, uint32_t per_thread_size);
extern cl_gpgpu_set_scratch_cb *cl_gpgpu_set_scratch;
/* Configure internal state */
typedef int (cl_gpgpu_state_init_cb)(cl_gpgpu, uint32_t max_threads, uint32_t size_cs_entry, int profiling);
extern cl_gpgpu_state_init_cb *cl_gpgpu_state_init;
/* Set the buffer object where to report performance counters */
typedef void (cl_gpgpu_set_perf_counters_cb)(cl_gpgpu, cl_buffer perf);
extern cl_gpgpu_set_perf_counters_cb *cl_gpgpu_set_perf_counters;
/* Fills current curbe buffer with data */
typedef int (cl_gpgpu_upload_curbes_cb)(cl_gpgpu, const void* data, uint32_t size);
extern cl_gpgpu_upload_curbes_cb *cl_gpgpu_upload_curbes;
typedef cl_buffer (cl_gpgpu_alloc_constant_buffer_cb)(cl_gpgpu, uint32_t size, uint8_t bti);
extern cl_gpgpu_alloc_constant_buffer_cb *cl_gpgpu_alloc_constant_buffer;
/* Setup all indirect states */
typedef void (cl_gpgpu_states_setup_cb)(cl_gpgpu, cl_gpgpu_kernel *kernel);
extern cl_gpgpu_states_setup_cb *cl_gpgpu_states_setup;
/* Upload the constant samplers as specified inside the OCL kernel */
typedef void (cl_gpgpu_upload_samplers_cb)(cl_gpgpu *state, const void *data, uint32_t n);
extern cl_gpgpu_upload_samplers_cb *cl_gpgpu_upload_samplers;
/* Set a sampler */
typedef void (cl_gpgpu_set_sampler_cb)(cl_gpgpu, uint32_t index, uint32_t non_normalized);
extern cl_gpgpu_set_sampler_cb *cl_gpgpu_set_sampler;
/* Allocate the batch buffer and return the BO used for the batch buffer */
typedef int (cl_gpgpu_batch_reset_cb)(cl_gpgpu, size_t sz);
extern cl_gpgpu_batch_reset_cb *cl_gpgpu_batch_reset;
/* Atomic begin, pipeline select, urb, pipeline state and constant buffer */
typedef void (cl_gpgpu_batch_start_cb)(cl_gpgpu);
extern cl_gpgpu_batch_start_cb *cl_gpgpu_batch_start;
/* atomic end with possibly inserted flush */
typedef void (cl_gpgpu_batch_end_cb)(cl_gpgpu, int32_t flush_mode);
extern cl_gpgpu_batch_end_cb *cl_gpgpu_batch_end;
/* Flush the command buffer */
typedef int (cl_gpgpu_flush_cb)(cl_gpgpu);
extern cl_gpgpu_flush_cb *cl_gpgpu_flush;
/* new a event for a batch buffer */
typedef cl_gpgpu_event (cl_gpgpu_event_new_cb)(cl_gpgpu);
extern cl_gpgpu_event_new_cb *cl_gpgpu_event_new;
/* update the batch buffer of this event */
typedef int (cl_gpgpu_event_update_status_cb)(cl_gpgpu_event, int);
extern cl_gpgpu_event_update_status_cb *cl_gpgpu_event_update_status;
/* flush the batch buffer of this event */
typedef void (cl_gpgpu_event_flush_cb)(cl_gpgpu_event);
extern cl_gpgpu_event_flush_cb *cl_gpgpu_event_flush;
/* cancel exec batch buffer of this event */
typedef void (cl_gpgpu_event_cancel_cb)(cl_gpgpu_event);
extern cl_gpgpu_event_cancel_cb *cl_gpgpu_event_cancel;
/* delete a gpgpu event */
typedef void (cl_gpgpu_event_delete_cb)(cl_gpgpu_event);
extern cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete;
/* Get a event time stamp */
typedef void (cl_gpgpu_event_get_exec_timestamp_cb)(cl_gpgpu, int, uint64_t*);
extern cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp;
/* Get current GPU time stamp */
typedef void (cl_gpgpu_event_get_gpu_cur_timestamp_cb)(cl_driver, uint64_t*);
extern cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp;
/* Get current batch buffer handle */
typedef void* (cl_gpgpu_ref_batch_buf_cb)(cl_gpgpu);
extern cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf;
/* Get release batch buffer handle */
typedef void (cl_gpgpu_unref_batch_buf_cb)(void*);
extern cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf;
/* Set the profiling buffer */
typedef int (cl_gpgpu_set_profiling_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint8_t);
extern cl_gpgpu_set_profiling_buffer_cb *cl_gpgpu_set_profiling_buffer;
typedef int (cl_gpgpu_set_profiling_info_cb)(cl_gpgpu, void *);
extern cl_gpgpu_set_profiling_info_cb *cl_gpgpu_set_profiling_info;
typedef void* (cl_gpgpu_get_profiling_info_cb)(cl_gpgpu);
extern cl_gpgpu_get_profiling_info_cb *cl_gpgpu_get_profiling_info;
typedef void* (cl_gpgpu_map_profiling_buffer_cb)(cl_gpgpu);
extern cl_gpgpu_map_profiling_buffer_cb *cl_gpgpu_map_profiling_buffer;
typedef void (cl_gpgpu_unmap_profiling_buffer_cb)(cl_gpgpu);
extern cl_gpgpu_unmap_profiling_buffer_cb *cl_gpgpu_unmap_profiling_buffer;
/* Set the printf buffer */
typedef int (cl_gpgpu_set_printf_buffer_cb)(cl_gpgpu, uint32_t, uint8_t);
extern cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer;
/* get the printf buffer offset in the apeture*/
typedef unsigned long (cl_gpgpu_reloc_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t);
extern cl_gpgpu_reloc_printf_buffer_cb *cl_gpgpu_reloc_printf_buffer;
/* map the printf buffer */
typedef void* (cl_gpgpu_map_printf_buffer_cb)(cl_gpgpu);
extern cl_gpgpu_map_printf_buffer_cb *cl_gpgpu_map_printf_buffer;
/* unmap the printf buffer */
typedef void (cl_gpgpu_unmap_printf_buffer_cb)(cl_gpgpu);
extern cl_gpgpu_unmap_printf_buffer_cb *cl_gpgpu_unmap_printf_buffer;
/* release the printf buffer */
typedef unsigned long (cl_gpgpu_release_printf_buffer_cb)(cl_gpgpu);
extern cl_gpgpu_release_printf_buffer_cb *cl_gpgpu_release_printf_buffer;
/* Set the last printfset pointer */
typedef int (cl_gpgpu_set_printf_info_cb)(cl_gpgpu, void *);
extern cl_gpgpu_set_printf_info_cb *cl_gpgpu_set_printf_info;
/* Get the last printfset pointer */
typedef void* (cl_gpgpu_get_printf_info_cb)(cl_gpgpu);
extern cl_gpgpu_get_printf_info_cb *cl_gpgpu_get_printf_info;
/* Will spawn all threads */
typedef void (cl_gpgpu_walker_cb)(cl_gpgpu,
uint32_t simd_sz,
uint32_t thread_n,
const size_t global_wk_off[3],
const size_t global_dim_off[3],
const size_t global_wk_sz[3],
const size_t local_wk_sz[3]);
extern cl_gpgpu_walker_cb *cl_gpgpu_walker;
/**************************************************************************
* Buffer
**************************************************************************/
/* Allocate a buffer */
typedef cl_buffer (cl_buffer_alloc_cb)(cl_buffer_mgr, const char*, size_t, size_t);
extern cl_buffer_alloc_cb *cl_buffer_alloc;
typedef cl_buffer (cl_buffer_alloc_userptr_cb)(cl_buffer_mgr, const char*, void *, size_t, unsigned long);
extern cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr;
typedef int (cl_buffer_set_softpin_offset_cb)(cl_buffer, uint64_t);
extern cl_buffer_set_softpin_offset_cb *cl_buffer_set_softpin_offset;
typedef int (cl_buffer_set_bo_use_full_range_cb)(cl_buffer, uint32_t);
extern cl_buffer_set_bo_use_full_range_cb *cl_buffer_set_bo_use_full_range;
typedef int (cl_buffer_disable_reuse_cb)(cl_buffer);
extern cl_buffer_disable_reuse_cb *cl_buffer_disable_reuse;
/* Set a buffer's tiling mode */
typedef int (cl_buffer_set_tiling_cb)(cl_buffer, int tiling, size_t stride);
extern cl_buffer_set_tiling_cb *cl_buffer_set_tiling;
#include "cl_context.h"
#include "cl_mem.h"
typedef cl_buffer (cl_buffer_alloc_from_texture_cb)(cl_context, unsigned int, int, unsigned int,
struct _cl_mem_image *gl_image);
extern cl_buffer_alloc_from_texture_cb *cl_buffer_alloc_from_texture;
typedef void (cl_buffer_release_from_texture_cb)(cl_context, struct _cl_mem_gl_image *);
extern cl_buffer_release_from_texture_cb *cl_buffer_release_from_texture;
typedef cl_buffer (cl_buffer_get_buffer_from_libva_cb)(cl_context ctx, unsigned int bo_name, size_t *sz);
extern cl_buffer_get_buffer_from_libva_cb *cl_buffer_get_buffer_from_libva;
typedef cl_buffer (cl_buffer_get_image_from_libva_cb)(cl_context ctx, unsigned int bo_name, struct _cl_mem_image *image);
extern cl_buffer_get_image_from_libva_cb *cl_buffer_get_image_from_libva;
/* Unref a buffer and destroy it if no more ref */
typedef int (cl_buffer_unreference_cb)(cl_buffer);
extern cl_buffer_unreference_cb *cl_buffer_unreference;
/* Add one more ref on a buffer */
typedef void (cl_buffer_reference_cb)(cl_buffer);
extern cl_buffer_reference_cb *cl_buffer_reference;
/* Map a buffer */
typedef int (cl_buffer_map_cb)(cl_buffer, uint32_t write_enable);
extern cl_buffer_map_cb *cl_buffer_map;
/* Unmap a buffer */
typedef int (cl_buffer_unmap_cb)(cl_buffer);
extern cl_buffer_unmap_cb *cl_buffer_unmap;
/* Map a buffer in the GTT domain */
typedef int (cl_buffer_map_gtt_cb)(cl_buffer);
extern cl_buffer_map_gtt_cb *cl_buffer_map_gtt;
/* Map a buffer in the GTT domain, non waiting the GPU read or write*/
typedef int (cl_buffer_map_gtt_unsync_cb)(cl_buffer);
extern cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync;
/* Unmap a buffer in the GTT domain */
typedef int (cl_buffer_unmap_gtt_cb)(cl_buffer);
extern cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt;
/* Get the virtual address (when mapped) */
typedef void* (cl_buffer_get_virtual_cb)(cl_buffer);
extern cl_buffer_get_virtual_cb *cl_buffer_get_virtual;
/* Get the size of the buffer */
typedef size_t (cl_buffer_get_size_cb)(cl_buffer);
extern cl_buffer_get_size_cb *cl_buffer_get_size;
/* Pin a buffer */
typedef int (cl_buffer_pin_cb)(cl_buffer, uint32_t alignment);
extern cl_buffer_pin_cb *cl_buffer_pin;
/* Unpin a buffer */
typedef int (cl_buffer_unpin_cb)(cl_buffer);
extern cl_buffer_unpin_cb *cl_buffer_unpin;
/* Fill data in the buffer */
typedef int (cl_buffer_subdata_cb)(cl_buffer, unsigned long, unsigned long, const void*);
extern cl_buffer_subdata_cb *cl_buffer_subdata;
/* Get data from buffer */
typedef int (cl_buffer_get_subdata_cb)(cl_buffer, unsigned long, unsigned long, void*);
extern cl_buffer_get_subdata_cb *cl_buffer_get_subdata;
/* Wait for all pending rendering for this buffer to complete */
typedef int (cl_buffer_wait_rendering_cb) (cl_buffer);
extern cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering;
typedef int (cl_buffer_get_fd_cb)(cl_buffer, int *fd);
extern cl_buffer_get_fd_cb *cl_buffer_get_fd;
typedef int (cl_buffer_get_tiling_align_cb)(cl_context ctx, uint32_t tiling_mode, uint32_t dim);
extern cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align;
typedef cl_buffer (cl_buffer_get_buffer_from_fd_cb)(cl_context ctx, int fd, int size);
extern cl_buffer_get_buffer_from_fd_cb *cl_buffer_get_buffer_from_fd;
typedef cl_buffer (cl_buffer_get_image_from_fd_cb)(cl_context ctx, int fd, int size, struct _cl_mem_image *image);
extern cl_buffer_get_image_from_fd_cb *cl_buffer_get_image_from_fd;
/* Get the device id */
typedef int (cl_driver_get_device_id_cb)(void);
extern cl_driver_get_device_id_cb *cl_driver_get_device_id;
/* Update the device info */
typedef void (cl_driver_update_device_info_cb)(cl_device_id device);
extern cl_driver_update_device_info_cb *cl_driver_update_device_info;
#endif /* __CL_DRIVER_H__ */