summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorbsegovia <devnull@localhost>2011-10-15 04:06:29 +0000
committerKeith Packard <keithp@keithp.com>2012-08-10 16:14:59 -0700
commit716af9adf3b4d28919b8fe2925cf920759ab2140 (patch)
treea2e6b2a6032547f165bcd6cdf80b36127252d913 /src
parentf7f12f6a69b73c51c95a96ed484dfb42a37790f4 (diff)
Finished first support for image2D. Right now, we only support linear formats
Diffstat (limited to 'src')
-rw-r--r--src/cl_api.c36
-rw-r--r--src/cl_command_queue.c3
-rw-r--r--src/cl_image.c119
-rw-r--r--src/cl_image.h13
-rw-r--r--src/cl_mem.c11
-rw-r--r--src/cl_mem.h2
-rw-r--r--src/cl_utils.h3
-rw-r--r--src/intel/intel_gpgpu.c6
8 files changed, 177 insertions, 16 deletions
diff --git a/src/cl_api.c b/src/cl_api.c
index e6c7dfba..01fe8c89 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -24,6 +24,7 @@
#include "cl_program.h"
#include "cl_kernel.h"
#include "cl_mem.h"
+#include "cl_image.h"
#include "cl_alloc.h"
#include "cl_utils.h"
@@ -302,15 +303,32 @@ error:
}
cl_int
-clGetSupportedImageFormats(cl_context context,
- cl_mem_flags flags,
- cl_mem_type image_type,
- cl_uint num_entries,
- cl_image_format * image_formats,
- cl_uint * num_image_formats)
+clGetSupportedImageFormats(cl_context ctx,
+ cl_mem_flags flags,
+ cl_mem_type image_type,
+ cl_uint num_entries,
+ cl_image_format *image_formats,
+ cl_uint * num_image_formats)
{
- NOT_IMPLEMENTED;
- return 0;
+ cl_int err = CL_SUCCESS;
+ CHECK_CONTEXT (ctx);
+ if (UNLIKELY(num_entries == 0 && image_formats != NULL)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE2D &&
+ image_type != CL_MEM_OBJECT_IMAGE3D)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ err = cl_image_get_supported_fmt(ctx,
+ image_type,
+ num_entries,
+ image_formats,
+ num_image_formats);
+
+error:
+ return err;
}
cl_int
@@ -1061,7 +1079,7 @@ clEnqueueBarrier(cl_command_queue command_queue)
}
void*
-clGetExtensionFunctionAddress(const char * func_name)
+clGetExtensionFunctionAddress(const char *func_name)
{
NOT_IMPLEMENTED;
return NULL;
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 459dd0b5..fc23c060 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -151,8 +151,7 @@ cl_command_queue_bind_surface(cl_command_queue queue,
bo = mem->bo;
assert(bo);
if (mem->is_image) {
- const int fmt =0x0CB; // I965_SURFACEFORMAT_R8G8B8A8_UINT
- gpgpu_bind_image2D(gpgpu, index, bo, fmt, mem->w, mem->h, 4, cc_llc_l3);
+ gpgpu_bind_image2D(gpgpu, index, bo, mem->intel_fmt, mem->w, mem->h, mem->bpp, cc_llc_l3);
cl_kernel_copy_image_parameters(k, mem, index, curbe);
} else
gpgpu_bind_buf(gpgpu, index, bo, bo->size, cc_llc_l3);
diff --git a/src/cl_image.c b/src/cl_image.c
index 7e6e700a..bba741dd 100644
--- a/src/cl_image.c
+++ b/src/cl_image.c
@@ -19,6 +19,7 @@
#include "cl_image.h"
#include "cl_utils.h"
+#include "intel/intel_defines.h"
#include <assert.h>
@@ -93,3 +94,121 @@ cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp)
return CL_SUCCESS;
}
+LOCAL uint32_t
+cl_image_get_intel_format(const cl_image_format *fmt)
+{
+ const uint32_t type = fmt->image_channel_data_type;
+ const uint32_t order = fmt->image_channel_order;
+ switch (order) {
+ case CL_R:
+ case CL_A:
+ case CL_INTENSITY:
+ case CL_LUMINANCE:
+ switch (type) {
+ case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16_FLOAT;
+ case CL_FLOAT: return I965_SURFACEFORMAT_R32_FLOAT;
+ case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16_SNORM;
+ case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8_SNORM;
+ case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8_UNORM;
+ case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16_UNORM;
+ case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8_SINT;
+ case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16_SINT;
+ case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32_SINT;
+ case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8_UINT;
+ case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16_UINT;
+ case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32_UINT;
+ default: return INTEL_UNSUPPORTED_FORMAT;
+ };
+ case CL_RG:
+ case CL_RA:
+ case CL_Rx:
+ switch (type) {
+ case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16G16_FLOAT;
+ case CL_FLOAT: return I965_SURFACEFORMAT_R32G32_FLOAT;
+ case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16G16_SNORM;
+ case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8G8_SNORM;
+ case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8G8_UNORM;
+ case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16G16_UNORM;
+ case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8G8_SINT;
+ case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16G16_SINT;
+ case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32G32_SINT;
+ case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8G8_UINT;
+ case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16G16_UINT;
+ case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32G32_UINT;
+ default: return INTEL_UNSUPPORTED_FORMAT;
+ };
+ case CL_RGB:
+ case CL_RGBx:
+ switch (type) {
+ case CL_UNORM_INT_101010: return I965_SURFACEFORMAT_R10G10B10A2_UNORM;
+ case CL_UNORM_SHORT_565:
+ case CL_UNORM_SHORT_555:
+ default: return INTEL_UNSUPPORTED_FORMAT;
+ };
+ case CL_RGBA:
+ switch (type) {
+ case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16G16B16A16_FLOAT;
+ case CL_FLOAT: return I965_SURFACEFORMAT_R32G32B32A32_FLOAT;
+ case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16G16B16A16_SNORM;
+ case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8G8B8A8_SNORM;
+ case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8G8B8A8_UNORM;
+ case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16G16B16A16_UNORM;
+ case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8G8B8A8_SINT;
+ case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16G16B16A16_SINT;
+ case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32G32B32A32_SINT;
+ case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8G8B8A8_UINT;
+ case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16G16B16A16_UINT;
+ case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32G32B32A32_UINT;
+ default: return INTEL_UNSUPPORTED_FORMAT;
+ };
+ case CL_ARGB: return INTEL_UNSUPPORTED_FORMAT;
+ case CL_BGRA:
+ switch (type) {
+ case CL_UNORM_INT8: return I965_SURFACEFORMAT_B8G8R8A8_UNORM;
+ default: return INTEL_UNSUPPORTED_FORMAT;
+ };
+ default: return INTEL_UNSUPPORTED_FORMAT;
+ };
+}
+
+static const uint32_t cl_image_order[] = {
+ CL_R, CL_A, CL_RG, CL_RA, CL_RGB, CL_RGBA, CL_BGRA, CL_ARGB,
+ CL_INTENSITY, CL_LUMINANCE, CL_Rx, CL_RGx, CL_RGBx
+};
+
+static const uint32_t cl_image_type[] = {
+ CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16,
+ CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010,
+ CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32,
+ CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32,
+ CL_HALF_FLOAT, CL_FLOAT
+};
+
+static const size_t cl_image_order_n = SIZEOF32(cl_image_order);
+static const size_t cl_image_type_n = SIZEOF32(cl_image_type);
+
+cl_int
+cl_image_get_supported_fmt(cl_context ctx,
+ cl_mem_type image_type,
+ cl_uint num_entries,
+ cl_image_format *image_formats,
+ cl_uint *num_image_formats)
+{
+ size_t i, j, n = 0;
+ assert(image_formats);
+ for (i = 0; i < cl_image_order_n; ++i)
+ for (j = 0; j < cl_image_type_n; ++j) {
+ const cl_image_format fmt = {
+ .image_channel_order = cl_image_order[i],
+ .image_channel_data_type = cl_image_type[j]
+ };
+ const uint32_t intel_fmt = cl_image_get_intel_format(&fmt);
+ if (intel_fmt == INTEL_UNSUPPORTED_FORMAT)
+ continue;
+ if (n < num_entries && image_formats) image_formats[n] = fmt;
+ n++;
+ }
+ if (num_image_formats) *num_image_formats = n;
+ return CL_SUCCESS;
+}
+
diff --git a/src/cl_image.h b/src/cl_image.h
index 923bf1da..4afb4d4f 100644
--- a/src/cl_image.h
+++ b/src/cl_image.h
@@ -24,8 +24,21 @@
#include "CL/cl.h"
#include <stdint.h>
+/* Returned when the OCL format is not supported */
+#define INTEL_UNSUPPORTED_FORMAT ((uint32_t) ~0x0u)
+
/* Compute the number of bytes per pixel if the format is supported */
extern cl_int cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp);
+/* Return the intel format for the given OCL format */
+extern uint32_t cl_image_get_intel_format(const cl_image_format *fmt);
+
+/* Return the list of formats supported by the API */
+extern cl_int cl_image_get_supported_fmt(cl_context context,
+ cl_mem_type image_type,
+ cl_uint num_entries,
+ cl_image_format *image_formats,
+ cl_uint *num_image_formats);
+
#endif /* __CL_IMAGE_H__ */
diff --git a/src/cl_mem.c b/src/cl_mem.c
index f2fad25d..a07414d2 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -136,7 +136,7 @@ cl_mem_new_image2D(cl_context ctx,
{
cl_int err = CL_SUCCESS;
cl_mem mem = NULL;
- uint32_t bpp = 0;
+ uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT;
size_t sz = 0;
/* Check flags consistency */
@@ -149,6 +149,13 @@ cl_mem_new_image2D(cl_context ctx,
if (UNLIKELY((err = cl_image_byte_per_pixel(fmt, &bpp)) != CL_SUCCESS))
goto error;
+ /* Only a sub-set of the formats are supported */
+ intel_fmt = cl_image_get_intel_format(fmt);
+ if (UNLIKELY(intel_fmt == INTEL_UNSUPPORTED_FORMAT)) {
+ err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ goto error;
+ }
+
/* See if the user parameters match */
#define DO_IMAGE_ERROR \
do { \
@@ -188,7 +195,9 @@ cl_mem_new_image2D(cl_context ctx,
mem->w = w;
mem->h = h;
mem->fmt = *fmt;
+ mem->intel_fmt = intel_fmt;
mem->pitch = w * bpp;
+ mem->bpp = bpp;
mem->is_image = 1;
exit:
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 9a9ceb36..420e869d 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -37,6 +37,8 @@ struct _cl_mem {
uint32_t is_image; /* Indicate if this is an image or not */
cl_image_format fmt; /* only for images */
size_t w,h,depth,pitch; /* only for images (depth is only for 3d images) */
+ uint32_t intel_fmt; /* format to provide in the surface state */
+ uint32_t bpp; /* number of bytes per pixel */
};
/* Create a new memory object and initialize it with possible user data */
diff --git a/src/cl_utils.h b/src/cl_utils.h
index 944e0443..29268c60 100644
--- a/src/cl_utils.h
+++ b/src/cl_utils.h
@@ -311,6 +311,9 @@ do { \
/* Align a structure or a variable */
#define ALIGNED(X) __attribute__ ((aligned (X)))
+/* Number of DWORDS */
+#define SIZEOF32(X) (sizeof(X) / sizeof(uint32_t))
+
/* 32 bits atomic variable */
typedef volatile int atomic_t;
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 040e4aaa..eb5909a0 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -256,8 +256,6 @@ static const uint32_t gpgpu_l3_config_reg2[] =
#define L3_CNTL_REG2_ADDRESS_OFFSET (0xB020)
#define L3_CNTL_REG3_ADDRESS_OFFSET (0xB024)
-#define sizeof32(X) (sizeof(X) / sizeof(uint32_t))
-
enum INSTRUCTION_PIPELINE
{
PIPE_COMMON = 0x0,
@@ -310,11 +308,11 @@ enum GFX3DSTATE_PIPELINED_SUBOPCODE
static void
gpgpu_pipe_control(intel_gpgpu_t *state)
{
- BEGIN_BATCH(state->batch, sizeof32(gen6_pipe_control_t));
+ BEGIN_BATCH(state->batch, SIZEOF32(gen6_pipe_control_t));
gen6_pipe_control_t* pc = (gen6_pipe_control_t*)
intel_batchbuffer_alloc_space(state->batch, 0);
memset(pc, 0, sizeof(*pc));
- pc->dw0.length = sizeof32(gen6_pipe_control_t) - 2;
+ pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2;
pc->dw0.instruction_subopcode = GFX3DSUBOP_3DCONTROL;
pc->dw0.instruction_opcode = GFX3DOP_3DCONTROL;
pc->dw0.instruction_pipeline = PIPE_3D;