/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_INTERNALS_H__ #define __CL_INTERNALS_H__ #include #include #include "CL/cl.h" #include "cl_extension.h" #include "cl_driver.h" /* INLINE is forceinline */ #define INLINE __attribute__((always_inline)) inline /* 32 bits atomic variable */ static INLINE int atomic_add(volatile int* v, const int c) { register int i = c; __asm__ __volatile__("lock ; xaddl %0, %1;" : "+r"(i), "+m"(*v) : "m"(*v), "r"(i)); return i; } static INLINE int atomic_inc(volatile int *v) { return atomic_add(v, 1); } static INLINE int atomic_dec(volatile int *v) { return atomic_add(v, -1); } static INLINE int atomic_cmpxchg_w(volatile short* ptr, short oldv, short newv) { int ret; __asm__ __volatile__("lock cmpxchgw %2,%1" : "=a" (ret), "+m" (*ptr) : "r" (newv), "0" (oldv) : "memory"); return ret; } static INLINE int cl_ref_inc(volatile int *v) { volatile short* lockptr = (short*)v; volatile short* valptr = lockptr + 1; int tmp; int ret; while (atomic_cmpxchg_w(lockptr, 0, 1)) { } // loop to get the lock ret = *valptr; assert(ret >= 0); *valptr = ret + 1; tmp = atomic_cmpxchg_w(lockptr, 1, 0); assert(tmp == 1); return ret; } static INLINE int cl_ref_get_val(volatile int *v) { volatile short* lockptr = (short*)v; volatile short* valptr = lockptr + 1; int tmp; int ret; while (atomic_cmpxchg_w(lockptr, 0, 1)) { } // loop to get the lock ret = *valptr; assert(ret >= 0); tmp = atomic_cmpxchg_w(lockptr, 1, 0); assert(tmp == 1); return ret; } static INLINE void cl_ref_set_val(volatile int *v, int value) { volatile short* lockptr = (short*)v; volatile short* valptr = lockptr + 1; int tmp; while (atomic_cmpxchg_w(lockptr, 0, 1)) { } // loop to get the lock *valptr = value; assert(value >= 0); tmp = atomic_cmpxchg_w(lockptr, 1, 0); assert(tmp == 1); } /* We may add ref to a object which has already reached ref 0, so it should already begin to destroy itself, we should never use that kind of objects. */ static INLINE int cl_ref_inc_if_positive(volatile int *v) { volatile short* lockptr = (short*)v; volatile short* valptr = lockptr + 1; int tmp; int ret; while (atomic_cmpxchg_w(lockptr, 0, 1)) { } // loop to get the lock ret = *valptr; if (ret > 0) *valptr = ret + 1; tmp = atomic_cmpxchg_w(lockptr, 1, 0); assert(tmp == 1); assert(ret >= 0); return ret; } static INLINE int cl_ref_dec(volatile int *v) { volatile short* lockptr = (short*)v; volatile short* valptr = lockptr + 1; int tmp; int ret; while (atomic_cmpxchg_w(lockptr, 0, 1)) { } // loop to get the lock ret = *valptr; *valptr = ret - 1; tmp = atomic_cmpxchg_w(lockptr, 1, 0); assert(tmp == 1); assert(ret >= 0); return ret; } /* Branch hint */ #define LIKELY(x) __builtin_expect((x),1) #define UNLIKELY(x) __builtin_expect((x),0) /* Stringify macros */ #define JOIN(X, Y) _DO_JOIN(X, Y) #define _DO_JOIN(X, Y) _DO_JOIN2(X, Y) #define _DO_JOIN2(X, Y) X##Y enum DEBUGP_LEVEL { DL_INFO, DL_WARNING, DL_ERROR }; #ifdef NDEBUG #define DEBUGP(...) #else //TODO: decide print or not with the value of level from environment #define DEBUGP(level, fmt, ...) \ do { \ fprintf(stderr, "Beignet: "#fmt, ##__VA_ARGS__); \ fprintf(stderr, "\n"); \ } while (0) #endif /* Check compile time errors */ #define STATIC_ASSERT(value) \ struct JOIN(__,JOIN(__,__LINE__)) { \ int x[(value) ? 1 : -1]; \ } /* Throw errors */ #ifdef NDEBUG #define ERR(ERROR, ...) \ do { \ err = ERROR; \ goto error; \ } while (0) #else #define ERR(ERROR, ...) \ do { \ fprintf(stderr, "error in %s line %i\n", __FILE__, __LINE__); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "\n"); \ err = ERROR; \ goto error; \ } while (0) #endif #define DO_ALLOC_ERR \ do { \ ERR(CL_OUT_OF_HOST_MEMORY, "Out of memory"); \ } while (0) #define ERR_IF(COND, ERROR, ...) \ do { \ if (UNLIKELY(COND)) ERR (ERROR, __VA_ARGS__); \ } while (0) #define INVALID_VALUE_IF(COND) \ do { \ ERR_IF(COND, CL_INVALID_VALUE, "Invalid value"); \ } while (0) #define INVALID_DEVICE_IF(COND) \ do { \ ERR_IF(COND, CL_INVALID_DEVICE, "Invalid device"); \ } while (0) #define MAX(x0, x1) ((x0) > (x1) ? (x0) : (x1)) #define MIN(x0, x1) ((x0) < (x1) ? (x0) : (x1)) #define ALIGN(A, B) (((A) % (B)) ? (A) + (B) - ((A) % (B)) : (A)) #define DO_ALLOC_ERROR \ do { \ err = CL_OUT_OF_HOST_MEMORY; \ goto error; \ } while (0) #define FATAL(...) \ do { \ fprintf(stderr, "error: "); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "\n"); \ assert(0); \ exit(-1); \ } while (0) #define FATAL_IF(COND, ...) \ do { \ if (UNLIKELY(COND)) FATAL(__VA_ARGS__); \ } while (0) #define NOT_IMPLEMENTED FATAL ("Not implemented") #define CHECK_PLATFORM(PLATFORM) \ do { \ if (UNLIKELY(PLATFORM == NULL)) { \ err = CL_INVALID_PLATFORM; \ goto error; \ } \ if (UNLIKELY(PLATFORM->magic != CL_MAGIC_PLATFORM_HEADER)) { \ err = CL_INVALID_PLATFORM; \ goto error; \ } \ } while (0) #define CHECK_DEVICE(DEVICE) \ do { \ if (UNLIKELY(DEVICE == NULL)) { \ err = CL_INVALID_DEVICE; \ goto error; \ } \ if (UNLIKELY(DEVICE->magic != CL_MAGIC_DEVICE_HEADER)) { \ err = CL_INVALID_DEVICE; \ goto error; \ } \ } while (0) #define CHECK_CONTEXT(CTX) \ do { \ if (UNLIKELY(CTX == NULL)) { \ err = CL_INVALID_CONTEXT; \ goto error; \ } \ if (UNLIKELY(CTX->magic != CL_MAGIC_CONTEXT_HEADER)) { \ err = CL_INVALID_CONTEXT; \ goto error; \ } \ } while (0) #define CHECK_QUEUE(QUEUE) \ do { \ if (UNLIKELY(QUEUE == NULL)) { \ err = CL_INVALID_COMMAND_QUEUE; \ goto error; \ } \ if (UNLIKELY(QUEUE->magic != CL_MAGIC_QUEUE_HEADER)) { \ err = CL_INVALID_COMMAND_QUEUE; \ goto error; \ } \ } while (0) #define CHECK_MEM(MEM) \ do { \ if (UNLIKELY(MEM == NULL)) { \ err = CL_INVALID_MEM_OBJECT; \ goto error; \ } \ if (UNLIKELY(MEM->magic != CL_MAGIC_MEM_HEADER)) { \ err = CL_INVALID_MEM_OBJECT; \ goto error; \ } \ } while (0) #define CHECK_IMAGE(MEM) \ CHECK_MEM(MEM); \ do { \ if (UNLIKELY(!IS_IMAGE(MEM))) { \ err = CL_INVALID_MEM_OBJECT; \ goto error; \ } \ } while (0); \ #define FIXUP_IMAGE_REGION(IMAGE, PREGION, REGION) \ const size_t *REGION; \ size_t REGION ##_REC[3]; \ do { \ if (IMAGE->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { \ REGION ##_REC[0] = PREGION[0]; \ REGION ##_REC[1] = 1; \ REGION ##_REC[2] = PREGION[1]; \ REGION = REGION ##_REC; \ } else { \ REGION = PREGION; \ } \ } while(0) #define FIXUP_IMAGE_ORIGIN(IMAGE, PREGION, REGION) \ const size_t *REGION; \ size_t REGION ##_REC[3]; \ do { \ if (IMAGE->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { \ REGION ##_REC[0] = PREGION[0]; \ REGION ##_REC[1] = 0; \ REGION ##_REC[2] = PREGION[1]; \ REGION = REGION ##_REC; \ } else { \ REGION = PREGION; \ } \ } while(0) #define CHECK_EVENT(EVENT) \ do { \ if (UNLIKELY(EVENT == NULL)) { \ err = CL_INVALID_EVENT; \ goto error; \ } \ if (UNLIKELY(EVENT->magic != CL_MAGIC_EVENT_HEADER)) { \ err = CL_INVALID_EVENT; \ goto error; \ } \ } while (0) #define CHECK_SAMPLER(SAMPLER) \ do { \ if (UNLIKELY(SAMPLER == NULL)) { \ err = CL_INVALID_SAMPLER; \ goto error; \ } \ if (UNLIKELY(SAMPLER->magic != CL_MAGIC_SAMPLER_HEADER)) {\ err = CL_INVALID_SAMPLER; \ goto error; \ } \ } while (0) #define CHECK_KERNEL(KERNEL) \ do { \ if (UNLIKELY(KERNEL == NULL)) { \ err = CL_INVALID_KERNEL; \ goto error; \ } \ if (UNLIKELY(KERNEL->magic != CL_MAGIC_KERNEL_HEADER)) { \ err = CL_INVALID_KERNEL; \ goto error; \ } \ } while (0) #define CHECK_PROGRAM(PROGRAM) \ do { \ if (UNLIKELY(PROGRAM == NULL)) { \ err = CL_INVALID_PROGRAM; \ goto error; \ } \ if (UNLIKELY(PROGRAM->magic != CL_MAGIC_PROGRAM_HEADER)) {\ err = CL_INVALID_PROGRAM; \ goto error; \ } \ } while (0) #define ELEMENTS(x) (sizeof(x)/sizeof(*(x))) #define MEMZERO(x) do { memset((x),0,sizeof(*(x))); } while (0) /* Run some code and catch errors */ #define TRY(fn,...) \ do { \ if (UNLIKELY((err = fn(__VA_ARGS__)) != CL_SUCCESS)) \ goto error; \ } while (0) #define TRY_NO_ERR(fn,...) \ do { \ if (UNLIKELY(fn(__VA_ARGS__) != CL_SUCCESS)) \ goto error; \ } while (0) #define TRY_ALLOC(dst, EXPR) \ do { \ if (UNLIKELY((dst = EXPR) == NULL)) \ DO_ALLOC_ERROR; \ } while (0) #define TRY_ALLOC_NO_ERR(dst, EXPR) \ do { \ if (UNLIKELY((dst = EXPR) == NULL)) \ goto error; \ } while (0) #define TRY_ALLOC_NO_RET(EXPR) \ do { \ if (UNLIKELY((EXPR) == NULL)) \ DO_ALLOC_ERROR; \ } while (0) /* Break Point Definitions */ #if !defined(NDEBUG) #define BREAK \ do { \ __asm__("int3"); \ } while(0) #define BREAK_IF(value) \ do { \ if (UNLIKELY(!(value))) BREAKPOINT(); \ } while(0) #else #define BREAKPOINT() do { } while(0) #define ASSERT(value) do { } while(0) #endif /* For all internal functions */ #define LOCAL __attribute__ ((visibility ("internal"))) /* Align a structure or a variable */ #define ALIGNED(X) __attribute__ ((aligned (X))) /* Number of DWORDS */ #define SIZEOF32(X) (sizeof(X) / sizeof(uint32_t)) /* Memory quantity */ #define KB 1024 #define MB (KB*KB) /* To help bitfield definitions */ #define BITFIELD_BIT(X) 1 #define BITFIELD_RANGE(X,Y) ((Y) - (X) + 1) #define FILL_GETINFO_RET(TYPE, ELT, VAL, RET) \ do { \ if (param_value && param_value_size < sizeof(TYPE)*ELT) \ return CL_INVALID_VALUE; \ if (param_value) { \ memset(param_value, 0, param_value_size); \ memcpy(param_value, (VAL), sizeof(TYPE)*ELT); \ } \ \ if (param_value_size_ret) \ *param_value_size_ret = sizeof(TYPE)*ELT; \ return RET; \ } while(0) extern void cl_release_context(cl_context ctx); extern cl_int cl_retain_context(cl_context ctx); extern cl_int cl_context_get_device_index(cl_context ctx, const cl_device_id device); extern void cl_retain_device_id(cl_device_id device); extern void cl_release_device_id(cl_device_id device); extern cl_int cl_get_device_info(cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); extern cl_mem cl_mem_new(cl_mem_object_type type, cl_context ctx, cl_mem_flags flags, size_t sz); extern void cl_release_mem(cl_mem mem); extern cl_int cl_retain_mem(cl_mem mem); extern cl_int cl_retain_program(cl_program p); extern void cl_release_program(cl_program p); extern cl_kernel cl_create_kernel(cl_program p, const char* kernel_name, cl_int* err); extern cl_int cl_event_check_waitlist(cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event,cl_context ctx); extern void cl_release_kernel(cl_kernel k); extern void cl_release_event(cl_event e); extern cl_event* cl_check_context_barrier_events(cl_context context, cl_int* event_num, cl_int* err); extern cl_event cl_create_event(cl_context context, cl_command_queue queue, cl_bool user_event, cl_uint num_events, const cl_event *event_list, cl_int *errcode_ret); extern void cl_platform_extension_init(cl_extensions extensions, char* ext_str, int str_max); extern cl_int cl_driver_check(cl_driver drv); extern cl_int cl_retain_command_queue(cl_command_queue queue); extern void cl_release_command_queue(cl_command_queue queue); extern cl_int cl_retain_event(cl_event e); extern cl_int cl_event_get_status(cl_event event); extern cl_int cl_event_set_status(cl_event event, cl_int status); extern cl_int cl_enqueue_insert_work_item(cl_command_queue queue, cl_command_queue_work_item item); extern cl_int cl_enqueue_queue_work_item(cl_command_queue queue, cl_command_queue_work_item item); extern cl_int cl_enqueue_submit_work_item(cl_command_queue queue, cl_command_queue_work_item item); extern cl_int cl_enqueue_run_work_item(cl_command_queue queue, cl_command_queue_work_item item); extern cl_int cl_enqueue_complete_work_item(cl_command_queue queue, cl_command_queue_work_item item); extern cl_command_queue_work_item cl_enqueue_create_work_item(cl_command_queue queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list); extern void cl_enqueue_set_work_item_event(cl_command_queue_work_item it, cl_event event); extern void cl_enqueue_destroy_work_item(cl_command_queue queue, cl_command_queue_work_item item); extern cl_int cl_command_queue_worker_init(cl_command_queue queue); extern void cl_command_queue_worker_destroy(cl_command_queue queue); extern cl_int cl_enqueue_wait_for_flush(cl_command_queue queue); extern cl_int cl_enqueue_wait_for_finish(cl_command_queue queue); extern void cl_enqueue_notify_event_changed(cl_command_queue queue); #endif /* __CL_INTERNALS_H__ */