summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xkernels/Mandelbrot_Kernels.cl243
-rw-r--r--kernels/test_copy_image.cl14
-rw-r--r--src/cl_api.c19
-rw-r--r--src/cl_command_queue_gen6.c29
-rw-r--r--src/cl_gt_device.h10
-rw-r--r--src/cl_kernel.c59
-rw-r--r--src/cl_mem.c207
-rw-r--r--src/cl_mem.h15
-rw-r--r--src/intel/intel_gpgpu.c259
-rw-r--r--src/intel/intel_structs.h324
10 files changed, 479 insertions, 700 deletions
diff --git a/kernels/Mandelbrot_Kernels.cl b/kernels/Mandelbrot_Kernels.cl
deleted file mode 100755
index 97e21e05..00000000
--- a/kernels/Mandelbrot_Kernels.cl
+++ /dev/null
@@ -1,243 +0,0 @@
-/**
-* A fractal generator that calculates the mandlebrot set
-* http://en.wikipedia.org/wiki/Mandelbrot_set
-* @param mandelbrotImage mandelbrot images is stored in this
-* @param scale Represents the distance from which the fractal
-* is being seen if this is greater more area and
-* less detail is seen
-* @param maxIterations More iterations gives more accurate mandelbrot image
-* @param width size of the image
-*/
-
-__kernel void mandelbrot_vector (__global uchar4 * mandelbrotImage,
- const float posx,
- const float posy,
- const float stepSizeX,
- const float stepSizeY,
- const uint maxIterations,
- const int width,
- const int bench
- )
-{
- int tid = get_global_id(0);
-
- int i = tid % (width / 4);
- int j = tid / (width / 4);
-
- int4 veci = {4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3};
- int4 vecj = {j, j, j, j};
-
- float4 x0;
- x0.s0 = (float)(posx + stepSizeX * (float)veci.s0);
- x0.s1 = (float)(posx + stepSizeX * (float)veci.s1);
- x0.s2 = (float)(posx + stepSizeX * (float)veci.s2);
- x0.s3 = (float)(posx + stepSizeX * (float)veci.s3);
- float4 y0;
- y0.s0 = (float)(posy + stepSizeY * (float)vecj.s0);
- y0.s1 = (float)(posy + stepSizeY * (float)vecj.s1);
- y0.s2 = (float)(posy + stepSizeY * (float)vecj.s2);
- y0.s3 = (float)(posy + stepSizeY * (float)vecj.s3);
-
- float4 x = x0;
- float4 y = y0;
-
- uint iter=0;
- float4 tmp;
- int4 stay;
- int4 ccount = 0;
-
- stay.s0 = (x.s0 * x.s0 + y.s0 * y.s0) <= 4.0f;
- stay.s1 = (x.s1 * x.s1 + y.s1 * y.s1) <= 4.0f;
- stay.s2 = (x.s2 * x.s2 + y.s2 * y.s2) <= 4.0f;
- stay.s3 = (x.s3 * x.s3 + y.s3 * y.s3) <= 4.0f;
- float4 savx = x;
- float4 savy = y;
- for(iter=0; (stay.s0 | stay.s1 | stay.s2 | stay.s3) && (iter < maxIterations); iter+= 16)
- {
- x = savx;
- y = savy;
-
- // Two iterations
- tmp = x * x + x0 - y * y;
- y = 2.0f * x * y + y0;
- x = tmp * tmp + x0 - y * y;
- y = 2.0f * tmp * y + y0;
-
- // Two iterations
- tmp = x * x + x0 - y * y;
- y = 2.0f * x * y + y0;
- x = tmp * tmp + x0 - y * y;
- y = 2.0f * tmp * y + y0;
-
- // Two iterations
- tmp = x * x + x0 - y * y;
- y = 2.0f * x * y + y0;
- x = tmp * tmp + x0 - y * y;
- y = 2.0f * tmp * y + y0;
-
- // Two iterations
- tmp = x * x + x0 - y * y;
- y = 2.0f * x * y + y0;
- x = tmp * tmp + x0 - y * y;
- y = 2.0f * tmp * y + y0;
-
- // Two iterations
- tmp = x * x + x0 - y * y;
- y = 2.0f * x * y + y0;
- x = tmp * tmp + x0 - y * y;
- y = 2.0f * tmp * y + y0;
-
- // Two iterations
- tmp = x * x + x0 - y * y;
- y = 2.0f * x * y + y0;
- x = tmp * tmp + x0 - y * y;
- y = 2.0f * tmp * y + y0;
-
- // Two iterations
- tmp = x * x + x0 - y * y;
- y = 2.0f * x * y + y0;
- x = tmp * tmp + x0 - y * y;
- y = 2.0f * tmp * y + y0;
-
- // Two iterations
- tmp = x * x + x0 - y * y;
- y = 2.0f * x * y + y0;
- x = tmp * tmp + x0 - y * y;
- y = 2.0f * tmp * y + y0;
-
- stay.s0 = (x.s0 * x.s0 + y.s0 * y.s0) <= 4.0f;
- stay.s1 = (x.s1 * x.s1 + y.s1 * y.s1) <= 4.0f;
- stay.s2 = (x.s2 * x.s2 + y.s2 * y.s2) <= 4.0f;
- stay.s3 = (x.s3 * x.s3 + y.s3 * y.s3) <= 4.0f;
-
- savx.s0 = (stay.s0 ? x.s0 : savx.s0);
- savx.s1 = (stay.s1 ? x.s1 : savx.s1);
- savx.s2 = (stay.s2 ? x.s2 : savx.s2);
- savx.s3 = (stay.s3 ? x.s3 : savx.s3);
- savy.s0 = (stay.s0 ? y.s0 : savy.s0);
- savy.s1 = (stay.s1 ? y.s1 : savy.s1);
- savy.s2 = (stay.s2 ? y.s2 : savy.s2);
- savy.s3 = (stay.s3 ? y.s3 : savy.s3);
- ccount += stay*16;
- }
- // Handle remainder
- if (!(stay.s0 & stay.s1 & stay.s2 & stay.s3))
- {
- iter = 16;
- do
- {
- x = savx;
- y = savy;
- stay.s0 = ((x.s0 * x.s0 + y.s0 * y.s0) <= 4.0f) &&
- (ccount.s0 < maxIterations);
- stay.s1 = ((x.s1 * x.s1 + y.s1 * y.s1) <= 4.0f) &&
- (ccount.s1 < maxIterations);
- stay.s2 = ((x.s2 * x.s2 + y.s2 * y.s2) <= 4.0f) &&
- (ccount.s2 < maxIterations);
- stay.s3 = ((x.s3 * x.s3 + y.s3 * y.s3) <= 4.0f) &&
- (ccount.s3 < maxIterations);
- tmp = x;
- x = x * x + x0 - y * y;
- y = 2.0f * tmp * y + y0;
- ccount += stay;
- iter--;
- savx.s0 = (stay.s0 ? x.s0 : savx.s0);
- savx.s1 = (stay.s1 ? x.s1 : savx.s1);
- savx.s2 = (stay.s2 ? x.s2 : savx.s2);
- savx.s3 = (stay.s3 ? x.s3 : savx.s3);
- savy.s0 = (stay.s0 ? y.s0 : savy.s0);
- savy.s1 = (stay.s1 ? y.s1 : savy.s1);
- savy.s2 = (stay.s2 ? y.s2 : savy.s2);
- savy.s3 = (stay.s3 ? y.s3 : savy.s3);
- } while ((stay.s0 | stay.s1 | stay.s2 | stay.s3) && iter);
- }
- x = savx;
- y = savy;
- float4 fc = convert_float4(ccount);
- fc.s0 = (float)ccount.s0 + 1 -
- native_log2(native_log2(x.s0 * x.s0 + y.s0 * y.s0));
- fc.s1 = (float)ccount.s1 + 1 -
- native_log2(native_log2(x.s1 * x.s1 + y.s1 * y.s1));
- fc.s2 = (float)ccount.s2 + 1 -
- native_log2(native_log2(x.s2 * x.s2 + y.s2 * y.s2));
- fc.s3 = (float)ccount.s3 + 1 -
- native_log2(native_log2(x.s3 * x.s3 + y.s3 * y.s3));
-
- float c = fc.s0 * 2.0f * 3.1416f / 256.0f;
- uchar4 color[4];
- color[0].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255;
- color[0].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
- color[0].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
- color[0].s3 = 0xff;
- if (ccount.s0 == maxIterations)
- {
- color[0].s0 = 0;
- color[0].s1 = 0;
- color[0].s2 = 0;
- }
- if (bench)
- {
- color[0].s0 = ccount.s0 & 0xff;
- color[0].s1 = (ccount.s0 & 0xff00) >> 8;
- color[0].s2 = (ccount.s0 & 0xff0000) >> 16;
- color[0].s3 = (ccount.s0 & 0xff000000) >> 24;
- }
- mandelbrotImage[4 * tid] = color[0];
- c = fc.s1 * 2.0f * 3.1416f / 256.0f;
- color[1].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255;
- color[1].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
- color[1].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
- color[1].s3 = 0xff;
- if (ccount.s1 == maxIterations)
- {
- color[1].s0 = 0;
- color[1].s1 = 0;
- color[1].s2 = 0;
- }
- if (bench)
- {
- color[1].s0 = ccount.s1 & 0xff;
- color[1].s1 = (ccount.s1 & 0xff00) >> 8;
- color[1].s2 = (ccount.s1 & 0xff0000) >> 16;
- color[1].s3 = (ccount.s1 & 0xff000000) >> 24;
- }
- mandelbrotImage[4 * tid + 1] = color[1];
- c = fc.s2 * 2.0f * 3.1416f / 256.0f;
- color[2].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255;
- color[2].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
- color[2].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
- color[2].s3 = 0xff;
- if (ccount.s2 == maxIterations)
- {
- color[2].s0 = 0;
- color[2].s1 = 0;
- color[2].s2 = 0;
- }
- if (bench)
- {
- color[2].s0 = ccount.s2 & 0xff;
- color[2].s1 = (ccount.s2 & 0xff00) >> 8;
- color[2].s2 = (ccount.s2 & 0xff0000) >> 16;
- color[2].s3 = (ccount.s2 & 0xff000000) >> 24;
- }
- mandelbrotImage[4 * tid + 2] = color[2];
- c = fc.s3 * 2.0f * 3.1416f / 256.0f;
- color[3].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255;
- color[3].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
- color[3].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
- color[3].s3 = 0xff;
- if (ccount.s3 == maxIterations)
- {
- color[3].s0 = 0;
- color[3].s1 = 0;
- color[3].s2 = 0;
- }
- if (bench)
- {
- color[3].s0 = ccount.s3 & 0xff;
- color[3].s1 = (ccount.s3 & 0xff00) >> 8;
- color[3].s2 = (ccount.s3 & 0xff0000) >> 16;
- color[3].s3 = (ccount.s3 & 0xff000000) >> 24;
- }
- mandelbrotImage[4 * tid + 3] = color[3];
-}
diff --git a/kernels/test_copy_image.cl b/kernels/test_copy_image.cl
index e14ce682..1ccc581d 100644
--- a/kernels/test_copy_image.cl
+++ b/kernels/test_copy_image.cl
@@ -1,9 +1,6 @@
-__constant sampler_t s0 = CLK_NORMALIZED_COORDS_FALSE |
- CLK_ADDRESS_CLAMP |
- CLK_FILTER_NEAREST;
-__constant sampler_t s1 = CLK_NORMALIZED_COORDS_TRUE |
- CLK_ADDRESS_CLAMP |
- CLK_FILTER_NEAREST;
+__constant sampler_t s = CLK_NORMALIZED_COORDS_FALSE |
+ CLK_ADDRESS_CLAMP |
+ CLK_FILTER_NEAREST;
__kernel void
test_copy_image(__read_only image2d_t src,
__global uchar4 *dst)
@@ -11,8 +8,7 @@ test_copy_image(__read_only image2d_t src,
const int x = (int) get_global_id(0);
const int y = (int) get_global_id(1);
const int id = x + y * get_image_width(src);
- const uchar4 from = convert_uchar4(read_imageui(src, s0, (int2)(x,y)));
- const uchar4 from0 = convert_uchar4(read_imageui(src, s1, (int2)(x,y)));
- dst[id] = from + from0;
+ const uchar4 from = convert_uchar4(read_imageui(src, s, (int2)(x,y)));
+ dst[id] = from;
}
diff --git a/src/cl_api.c b/src/cl_api.c
index 5d7d0ea1..e6c7dfba 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -213,6 +213,7 @@ clCreateBuffer(cl_context context,
cl_mem mem = NULL;
cl_int err = CL_SUCCESS;
CHECK_CONTEXT (context);
+
mem = cl_mem_new(context, flags, size, host_ptr, &err);
error:
if (errcode_ret)
@@ -246,8 +247,22 @@ clCreateImage2D(cl_context context,
void * host_ptr,
cl_int * errcode_ret)
{
- NOT_IMPLEMENTED;
- return NULL;
+ cl_mem mem = NULL;
+ cl_int err = CL_SUCCESS;
+ CHECK_CONTEXT (context);
+
+ mem = cl_mem_new_image2D(context,
+ flags,
+ image_format,
+ image_width,
+ image_height,
+ image_row_pitch,
+ host_ptr,
+ errcode_ret);
+error:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return mem;
}
cl_mem
diff --git a/src/cl_command_queue_gen6.c b/src/cl_command_queue_gen6.c
index 78583698..d8464446 100644
--- a/src/cl_command_queue_gen6.c
+++ b/src/cl_command_queue_gen6.c
@@ -61,10 +61,10 @@ cl_kernel_compute_batch_sz(cl_kernel k, size_t wk_grp_n, size_t thread_n)
static INLINE void
cl_command_queue_enqueue_wk_grp(cl_command_queue queue,
- cl_local_id_t **ids,
- const cl_inline_header_t *header,
- uint32_t thread_n,
- uint32_t barrierID)
+ cl_local_id_t **ids,
+ const cl_inline_header_t *header,
+ uint32_t thread_n,
+ uint32_t barrierID)
{
intel_gpgpu_t *gpgpu = queue->gpgpu;
uint32_t i;
@@ -85,10 +85,10 @@ cl_command_queue_enqueue_wk_grp(cl_command_queue queue,
LOCAL cl_int
cl_command_queue_ND_range_gen6(cl_command_queue queue,
- cl_kernel ker,
- const size_t *global_wk_off,
- const size_t *global_wk_sz,
- const size_t *local_wk_sz)
+ cl_kernel ker,
+ const size_t *global_wk_off,
+ const size_t *global_wk_sz,
+ const size_t *local_wk_sz)
{
cl_context ctx = queue->ctx;
intel_gpgpu_t *gpgpu = queue->gpgpu;
@@ -180,7 +180,7 @@ cl_command_queue_ND_range_gen6(cl_command_queue queue,
/* Start a new batch buffer */
gpgpu_batch_reset(gpgpu, batch_sz);
gpgpu_batch_start(gpgpu);
-#if 1
+
/* Push all media objects. We implement three paths to make it (a bit) faster.
* Local IDs are shared from work group to work group. We allocate once the
* buffers and reuse them
@@ -201,16 +201,13 @@ cl_command_queue_ND_range_gen6(cl_command_queue queue,
cl_command_queue_enqueue_wk_grp(queue, ids, &header, thread_n, barrierID);
barrierID = (barrierID + 1) % 16;
}
-#endif
+
gpgpu_batch_end(gpgpu, 0);
gpgpu_flush(gpgpu);
- if (slm_bo)
- drm_intel_bo_unreference(slm_bo);
- if (private_bo)
- drm_intel_bo_unreference(private_bo);
- if (scratch_bo)
- drm_intel_bo_unreference(scratch_bo);
+ if (slm_bo) drm_intel_bo_unreference(slm_bo);
+ if (private_bo) drm_intel_bo_unreference(private_bo);
+ if (scratch_bo) drm_intel_bo_unreference(scratch_bo);
error:
cl_free(ids[0]);
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index d66d6ead..8717a166 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -40,11 +40,11 @@
.image_support = CL_FALSE,
.max_read_image_args = 0,
.max_write_image_args = 0,
-.image2d_max_width = 0,
-.image2d_max_height = 0,
-.image3d_max_width = 0,
-.image3d_max_height = 0,
-.image3d_max_depth = 0,
+.image2d_max_width = 8192,
+.image2d_max_height = 8192,
+.image3d_max_width = 8192,
+.image3d_max_height = 8192,
+.image3d_max_depth = 8192,
.max_samplers = 0,
.mem_base_addr_align = sizeof(cl_uint) * 8,
.min_data_type_align_size = sizeof(cl_uint),
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index 990cb0b6..c1997863 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -352,6 +352,7 @@ cl_kernel_setup_patch_list(cl_kernel k, const char *patch, size_t sz)
info->curbe.sz = *(uint32_t *) (patch + sizeof(cl_patch_item_header_t));
info->curbe.offset = 0;
break;
+ case PATCH_TOKEN_IMAGE_MEMORY_KERNEL_ARGUMENT:
case PATCH_TOKEN_CONSTANT_MEMORY_KERNEL_ARGUMENT:
case PATCH_TOKEN_GLOBAL_MEMORY_KERNEL_ARGUMENT:
{
@@ -360,7 +361,13 @@ cl_kernel_setup_patch_list(cl_kernel k, const char *patch, size_t sz)
TRY_ALLOC (arg_info, CALLOC(cl_arg_info_t));
arg_info->arg_index = from->index;
arg_info->offset = from->offset;
- arg_info->type = OCLRT_ARG_TYPE_BUFFER;
+ if (item->token == PATCH_TOKEN_GLOBAL_MEMORY_KERNEL_ARGUMENT)
+ arg_info->type = OCLRT_ARG_TYPE_BUFFER;
+ else if (item->token == PATCH_TOKEN_CONSTANT_MEMORY_KERNEL_ARGUMENT)
+ arg_info->type = OCLRT_ARG_TYPE_CONST;
+ else
+ arg_info->type = OCLRT_ARG_TYPE_IMAGE;
+
arg_info->sz = sizeof(cl_mem);
arg_info->is_patched = CL_FALSE;
@@ -444,6 +451,54 @@ error:
}
#undef ASSOC_ITEM
+typedef struct i965_sampler_state
+{
+ struct
+ {
+ uint32_t shadow_function:3;
+ uint32_t lod_bias:11;
+ uint32_t min_filter:3;
+ uint32_t mag_filter:3;
+ uint32_t mip_filter:2;
+ uint32_t base_level:5;
+ uint32_t min_mag_neq:1;
+ uint32_t lod_preclamp:1;
+ uint32_t default_color_mode:1;
+ uint32_t pad0:1;
+ uint32_t disable:1;
+ } ss0;
+
+ struct
+ {
+ uint32_t r_wrap_mode:3;
+ uint32_t t_wrap_mode:3;
+ uint32_t s_wrap_mode:3;
+ uint32_t cube_control_mode:1;
+ uint32_t pad:2;
+ uint32_t max_lod:10;
+ uint32_t min_lod:10;
+ } ss1;
+
+
+ struct
+ {
+ uint32_t pad:5;
+ uint32_t default_color_pointer:27;
+ } ss2;
+
+ struct
+ {
+ uint32_t non_normalized_coord:1;
+ uint32_t pad:12;
+ uint32_t address_round:6;
+ uint32_t max_aniso:3;
+ uint32_t chroma_key_mode:1;
+ uint32_t chroma_key_index:2;
+ uint32_t chroma_key_enable:1;
+ uint32_t monochrome_filter_width:3;
+ uint32_t monochrome_filter_height:3;
+ } ss3;
+} i965_sampler_state_t;
LOCAL int
cl_kernel_setup(cl_kernel k, const char *ker)
@@ -668,6 +723,8 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
/* Is it a buffer / image / sampler to set */
if ((arg_info = cl_kernel_get_arg_info(k, index)) != NULL) {
switch (arg_info->type) {
+ case OCLRT_ARG_TYPE_CONST:
+ case OCLRT_ARG_TYPE_IMAGE:
case OCLRT_ARG_TYPE_BUFFER:
{
/* Check the buffer consistency */
diff --git a/src/cl_mem.c b/src/cl_mem.c
index f37c846f..7151df72 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -21,6 +21,7 @@
#include "cl_context.h"
#include "cl_utils.h"
#include "cl_alloc.h"
+#include "cl_device_id.h"
#include "intel_bufmgr.h" /* libdrm_intel */
@@ -29,12 +30,8 @@
#include <assert.h>
#include <stdio.h>
-LOCAL cl_mem
-cl_mem_new(cl_context ctx,
- cl_mem_flags flags,
- size_t sz,
- void *data,
- cl_int *errcode_ret)
+static cl_mem
+cl_mem_allocate(cl_context ctx, cl_mem_flags flags, size_t sz, cl_int *errcode)
{
drm_intel_bufmgr *bufmgr = NULL;
cl_mem mem = NULL;
@@ -50,10 +47,6 @@ cl_mem_new(cl_context ctx,
err = CL_INVALID_BUFFER_SIZE;
goto error;
}
- if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) {
- err = CL_INVALID_HOST_PTR;
- goto error;
- }
/* Allocate and inialize the structure itself */
TRY_ALLOC (mem, CALLOC(struct _cl_mem));
@@ -73,16 +66,6 @@ cl_mem_new(cl_context ctx,
err = CL_MEM_ALLOCATION_FAILURE;
goto error;
}
- /* Copy the data if required */
- if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */
- drm_intel_bo_subdata(mem->bo, 0, sz, data);
- #if 0
- if (UNLIKELY(drm_intel_bo_subdata(mem->bo, 0, sz, data) != 0)) {
- err = CL_MEM_ALLOCATION_FAILURE;
- goto error;
- }
- #endif
-
/* Append the buffer in the context buffer list */
pthread_mutex_lock(&ctx->buffer_lock);
@@ -95,6 +78,190 @@ cl_mem_new(cl_context ctx,
cl_context_add_ref(ctx);
exit:
+ if (errcode)
+ *errcode = err;
+ return mem;
+error:
+ cl_mem_delete(mem);
+ mem = NULL;
+ goto exit;
+
+}
+
+LOCAL cl_mem
+cl_mem_new(cl_context ctx,
+ cl_mem_flags flags,
+ size_t sz,
+ void *data,
+ cl_int *errcode_ret)
+{
+ cl_int err = CL_SUCCESS;
+ cl_mem mem = NULL;
+
+ /* Check flags consistency */
+ if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) {
+ err = CL_INVALID_HOST_PTR;
+ goto error;
+ }
+
+ /* Create the buffer in video memory */
+ mem = cl_mem_allocate(ctx, flags, sz, &err);
+ if (mem == NULL || err != CL_SUCCESS)
+ goto error;
+
+ /* Copy the data if required */
+ if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */
+ drm_intel_bo_subdata(mem->bo, 0, sz, data);
+
+exit:
+ if (errcode_ret)
+ *errcode_ret = err;
+ return mem;
+error:
+ cl_mem_delete(mem);
+ mem = NULL;
+ goto exit;
+}
+
+static cl_int
+cl_mem_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp)
+{
+ assert(bpp);
+
+ const uint32_t type = fmt->image_channel_data_type;
+ const uint32_t order = fmt->image_channel_order;
+ switch (type) {
+#define DECL_BPP(DATA_TYPE, VALUE) case DATA_TYPE: *bpp = VALUE;
+ DECL_BPP(CL_SNORM_INT8, 1); break;
+ DECL_BPP(CL_SNORM_INT16, 2); break;
+ DECL_BPP(CL_UNORM_INT8, 1); break;
+ DECL_BPP(CL_UNORM_INT16, 2); break;
+ DECL_BPP(CL_UNORM_SHORT_565, 2);
+ if (order != CL_RGBx && order != CL_RGB)
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ break;
+ DECL_BPP(CL_UNORM_SHORT_555, 2);
+ if (order != CL_RGBx && order != CL_RGB)
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ break;
+ DECL_BPP(CL_UNORM_INT_101010, 4);
+ if (order != CL_RGBx && order != CL_RGB)
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ break;
+ DECL_BPP(CL_SIGNED_INT8, 1); break;
+ DECL_BPP(CL_SIGNED_INT16, 2); break;
+ DECL_BPP(CL_SIGNED_INT32, 4); break;
+ DECL_BPP(CL_UNSIGNED_INT8, 1); break;
+ DECL_BPP(CL_UNSIGNED_INT16, 2); break;
+ DECL_BPP(CL_UNSIGNED_INT32, 4); break;
+ DECL_BPP(CL_HALF_FLOAT, 2); break;
+ DECL_BPP(CL_FLOAT, 4); break;
+#undef DECL_BPP
+ default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ };
+
+ switch (order) {
+ case CL_R: break;
+ case CL_A: break;
+ case CL_RA: *bpp *= 2; break;
+ case CL_RG: *bpp *= 2; break;
+ case CL_Rx: *bpp *= 2; break;
+ case CL_INTENSITY:
+ case CL_LUMINANCE:
+ if (type != CL_UNORM_INT8 && type != CL_UNORM_INT16 &&
+ type != CL_SNORM_INT8 && type != CL_SNORM_INT16 &&
+ type != CL_HALF_FLOAT && type != CL_FLOAT)
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ break;
+ case CL_RGB:
+ case CL_RGBx:
+ if (type != CL_UNORM_SHORT_555 &&
+ type != CL_UNORM_SHORT_565 &&
+ type != CL_UNORM_INT_101010)
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ break;
+ case CL_RGBA: *bpp *= 4; break;
+ case CL_ARGB:
+ case CL_BGRA:
+ if (type != CL_UNORM_INT8 && type != CL_SIGNED_INT8 &&
+ type != CL_SNORM_INT8 && type != CL_UNSIGNED_INT8)
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ *bpp *= 4;
+ break;
+ default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ };
+
+ return CL_SUCCESS;
+}
+
+LOCAL cl_mem
+cl_mem_new_image2D(cl_context ctx,
+ cl_mem_flags flags,
+ const cl_image_format *fmt,
+ size_t w,
+ size_t h,
+ size_t pitch,
+ void *data,
+ cl_int *errcode_ret)
+{
+ cl_int err = CL_SUCCESS;
+ cl_mem mem = NULL;
+ uint32_t bpp = 0;
+ size_t sz = 0;
+
+ /* Check flags consistency */
+ if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) {
+ err = CL_INVALID_HOST_PTR;
+ goto error;
+ }
+
+ /* Get the size of each pixel */
+ if (UNLIKELY((err = cl_mem_byte_per_pixel(fmt, &bpp)) != CL_SUCCESS))
+ goto error;
+
+ /* See if the user parameters match */
+#define DO_IMAGE_ERROR \
+ do { \
+ err = CL_INVALID_IMAGE_SIZE; \
+ goto error; \
+ } while (0);
+ if (UNLIKELY(w == 0)) DO_IMAGE_ERROR;
+ if (UNLIKELY(h == 0)) DO_IMAGE_ERROR;
+ if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
+ if (UNLIKELY(h > ctx->device->image2d_max_height)) DO_IMAGE_ERROR;
+ if (UNLIKELY(bpp*w > pitch)) DO_IMAGE_ERROR;
+#undef DO_IMAGE_ERROR
+
+ /* Create the buffer in video memory */
+ sz = w * h * bpp;
+ mem = cl_mem_allocate(ctx, flags, sz, &err);
+ if (mem == NULL || err != CL_SUCCESS)
+ goto error;
+
+ /* Copy the data if required */
+ if (flags & CL_MEM_COPY_HOST_PTR) {/* TODO check other flags too */
+ size_t x, y, p;
+ char *dst;
+ drm_intel_bo_map(mem->bo, 1);
+ dst = mem->bo->virtual;
+ for (y = 0; y < h; ++y) {
+ char *src = (char*) data + pitch * y;
+ for (x = 0; x < w; ++x) {
+ for (p = 0; p < bpp; ++p)
+ dst[p] = src[p];
+ dst += bpp;
+ src += bpp;
+ }
+ }
+ drm_intel_bo_unmap(mem->bo);
+ }
+ mem->w = w;
+ mem->h = h;
+ mem->fmt = *fmt;
+ mem->pitch = w * bpp;
+ mem->is_image = 1;
+
+exit:
if (errcode_ret)
*errcode_ret = err;
return mem;
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 96ff8da0..9a9ceb36 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -33,12 +33,25 @@ struct _cl_mem {
struct _drm_intel_bo *bo; /* Data in GPU memory */
cl_mem prev, next; /* We chain the memory buffers together */
cl_context ctx; /* Context it belongs to */
- cl_mem_flags flags; /* Flags specified at the creation time */
+ cl_mem_flags flags; /* Flags specified at the creation time */
+ uint32_t is_image; /* Indicate if this is an image or not */
+ cl_image_format fmt; /* only for images */
+ size_t w,h,depth,pitch; /* only for images (depth is only for 3d images) */
};
/* Create a new memory object and initialize it with possible user data */
extern cl_mem cl_mem_new(cl_context, cl_mem_flags, size_t, void*, cl_int*);
+/* Idem but this is an image */
+extern cl_mem cl_mem_new_image2D(cl_context,
+ cl_mem_flags,
+ const cl_image_format*,
+ size_t w,
+ size_t h,
+ size_t pitch,
+ void *,
+ cl_int *);
+
/* Unref the object and delete it if no more reference */
extern void cl_mem_delete(cl_mem);
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index a4b476be..f9e7db1d 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -42,256 +42,8 @@
#define MO_RETAIN_BIT (1 << 28)
#define SAMPLER_STATE_SIZE (16)
-typedef struct gen6_surface_state
-{
- struct {
- uint32_t cube_pos_z:1;
- uint32_t cube_neg_z:1;
- uint32_t cube_pos_y:1;
- uint32_t cube_neg_y:1;
- uint32_t cube_pos_x:1;
- uint32_t cube_neg_x:1;
- uint32_t pad:2;
- uint32_t render_cache_read_mode:1;
- uint32_t cube_map_corner_mode:1;
- uint32_t mipmap_layout_mode:1;
- uint32_t vert_line_stride_ofs:1;
- uint32_t vert_line_stride:1;
- uint32_t color_blend:1;
- uint32_t writedisable_blue:1;
- uint32_t writedisable_green:1;
- uint32_t writedisable_red:1;
- uint32_t writedisable_alpha:1;
- uint32_t surface_format:9;
- uint32_t data_return_format:1;
- uint32_t pad0:1;
- uint32_t surface_type:3;
- } ss0;
-
- struct {
- uint32_t base_addr;
- } ss1;
-
- struct {
- uint32_t render_target_rotation:2;
- uint32_t mip_count:4;
- uint32_t width:13;
- uint32_t height:13;
- } ss2;
-
- struct {
- uint32_t tile_walk:1;
- uint32_t tiled_surface:1;
- uint32_t pad:1;
- uint32_t pitch:18;
- uint32_t depth:11;
- } ss3;
-
- struct {
- uint32_t multisample_pos_index:3;
- uint32_t pad:1;
- uint32_t multisample_count:3;
- uint32_t pad1:1;
- uint32_t rt_view_extent:9;
- uint32_t min_array_elt:11;
- uint32_t min_lod:4;
- } ss4;
-
- struct {
- uint32_t pad:16;
- uint32_t cache_control:2; /* different values for GT and IVB */
- uint32_t gfdt:1; /* allows selective flushing of LLC (e.g. for scanout) */
- uint32_t encrypted_data:1;
- uint32_t y_offset:4;
- uint32_t vertical_alignment:1;
- uint32_t x_offset:7;
- } ss5;
-
- uint32_t ss6; /* unused */
- uint32_t ss7; /* unused */
-} gen6_surface_state_t;
-
-typedef struct gen7_surface_state
-{
- struct {
- uint32_t cube_pos_z:1;
- uint32_t cube_neg_z:1;
- uint32_t cube_pos_y:1;
- uint32_t cube_neg_y:1;
- uint32_t cube_pos_x:1;
- uint32_t cube_neg_x:1;
- uint32_t media_boundary_pixel_mode:2;
- uint32_t render_cache_rw_mode:1;
- uint32_t pad1:1;
- uint32_t surface_array_spacing:1;
- uint32_t vertical_line_stride_offset:1;
- uint32_t vertical_line_stride:1;
- uint32_t tile_walk:1;
- uint32_t tiled_surface:1;
- uint32_t horizontal_alignment:1;
- uint32_t vertical_alignment:2;
- uint32_t surface_format:9;
- uint32_t pad0:1;
- uint32_t surface_array:1;
- uint32_t surface_type:3;
- } ss0;
-
- struct {
- uint32_t base_addr;
- } ss1;
-
- struct {
- uint32_t width:14;
- uint32_t pad1:2;
- uint32_t height:14;
- uint32_t pad0:2;
- } ss2;
-
- struct {
- uint32_t pitch:18;
- uint32_t pad0:3;
- uint32_t depth:11;
- } ss3;
-
- uint32_t ss4;
-
- struct {
- uint32_t mip_count:4;
- uint32_t surface_min_load:4;
- uint32_t pad2:6;
- uint32_t coherence_type:1;
- uint32_t stateless_force_write_thru:1;
- uint32_t surface_object_control_state:4;
- uint32_t y_offset:4;
- uint32_t pad0:1;
- uint32_t x_offset:7;
- } ss5;
-
- uint32_t ss6; /* unused */
- uint32_t ss7; /* unused */
-
-} gen7_surface_state_t;
-
#define GEN7_CACHED_IN_LLC 3
-STATIC_ASSERT(sizeof(gen6_surface_state_t) == sizeof(gen7_surface_state_t));
-static const size_t surface_state_sz = sizeof(gen6_surface_state_t);
-
-typedef struct gen6_vfe_state_inline
-{
- struct {
- uint32_t per_thread_scratch_space:4;
- uint32_t pad3:3;
- uint32_t extend_vfe_state_present:1;
- uint32_t pad2:2;
- uint32_t scratch_base:22;
- } vfe0;
-
- struct {
- uint32_t debug_counter_control:2;
- uint32_t gpgpu_mode:1; /* 0 for SNB!!! */
- uint32_t gateway_mmio_access:2;
- uint32_t fast_preempt:1;
- uint32_t bypass_gateway_ctl:1; /* 0 - legacy, 1 - no open/close */
- uint32_t reset_gateway_timer:1;
- uint32_t urb_entries:8;
- uint32_t max_threads:16;
- } vfe1;
-
- struct {
- uint32_t pad8:8;
- uint32_t debug_object_id:24;
- } vfe2;
-
- struct {
- uint32_t curbe_size:16; /* in GRFs */
- uint32_t urbe_size:16; /* in GRFs */
- } vfe3;
-
- struct {
- uint32_t scoreboard_mask:32; /* 1 - enable the corresponding dependency */
- } vfe4;
-
- struct {
- uint32_t scoreboard0_dx:4;
- uint32_t scoreboard0_dy:4;
- uint32_t scoreboard1_dx:4;
- uint32_t scoreboard1_dy:4;
- uint32_t scoreboard2_dx:4;
- uint32_t scoreboard2_dy:4;
- uint32_t scoreboard3_dx:4;
- uint32_t scoreboard3_dy:4;
- } vfe5;
-
- struct {
- uint32_t scoreboard4_dx:4;
- uint32_t scoreboard4_dy:4;
- uint32_t scoreboard5_dx:4;
- uint32_t scoreboard5_dy:4;
- uint32_t scoreboard6_dx:4;
- uint32_t scoreboard6_dy:4;
- uint32_t scoreboard7_dx:4;
- uint32_t scoreboard7_dy:4;
- } vfe6;
-} gen6_vfe_state_inline_t;
-
-typedef struct gen6_interface_descriptor
-{
- struct {
- uint32_t pad6:6;
- uint32_t kernel_start_pointer:26;
- } desc0;
-
- struct {
- uint32_t pad:7;
- uint32_t software_exception:1;
- uint32_t pad2:3;
- uint32_t maskstack_exception:1;
- uint32_t pad3:1;
- uint32_t illegal_opcode_exception:1;
- uint32_t pad4:2;
- uint32_t floating_point_mode:1;
- uint32_t thread_priority:1;
- uint32_t single_program_flow:1;
- uint32_t pad5:1;
- uint32_t pad6:6;
- uint32_t pad7:6;
- } desc1;
-
- struct {
- uint32_t pad:2;
- uint32_t sampler_count:3;
- uint32_t sampler_state_pointer:27;
- } desc2;
-
- struct {
- uint32_t binding_table_entry_count:5; /* prefetch entries only */
- uint32_t binding_table_pointer:27; /* 11 bit only on IVB+ */
- } desc3;
-
- struct {
- uint32_t curbe_read_offset:16; /* in GRFs */
- uint32_t curbe_read_len:16; /* in GRFs */
- } desc4;
-
- struct {
- uint32_t group_threads_num:8; /* 0..64, 0 - no barrier use */
- uint32_t barrier_return_byte:8;
- uint32_t slm_sz:5; /* 0..16 - 0K..64K */
- uint32_t barrier_enable:1;
- uint32_t rounding_mode:2;
- uint32_t barrier_return_grf_offset:8;
- } desc5;
-
- struct {
- uint32_t reserved_mbz;
- } desc6;
-
- struct {
- uint32_t reserved_mbz;
- } desc7;
-} gen6_interface_descriptor_t;
-
/* No dependency on Gen specific structures */
struct opaque_sampler_state {
char opaque[SAMPLER_STATE_SIZE];
@@ -337,7 +89,7 @@ struct intel_gpgpu
};
/* Be sure that the size is still valid */
-STATIC_ASSERT(sizeof(struct opaque_sampler_state) == sizeof(struct i965_sampler_state));
+STATIC_ASSERT(sizeof(struct opaque_sampler_state) == 16);//sizeof(struct i965_sampler_state));
LOCAL intel_gpgpu_t*
intel_gpgpu_new(intel_driver_t *drv)
@@ -560,11 +312,11 @@ enum GFX3DSTATE_PIPELINED_SUBOPCODE
static void
gpgpu_pipe_control(intel_gpgpu_t *state)
{
- BEGIN_BATCH(state->batch, sizeof32(i965_pipe_control_t));
- i965_pipe_control_t* pc = (i965_pipe_control_t*)
+ BEGIN_BATCH(state->batch, sizeof32(gen6_pipe_control_t));
+ gen6_pipe_control_t* pc = (gen6_pipe_control_t*)
intel_batchbuffer_alloc_space(state->batch, 0);
memset(pc, 0, sizeof(*pc));
- pc->dw0.length = sizeof32(i965_pipe_control_t) - 2;
+ pc->dw0.length = sizeof32(gen6_pipe_control_t) - 2;
pc->dw0.instruction_subopcode = GFX3DSUBOP_3DCONTROL;
pc->dw0.instruction_opcode = GFX3DOP_3DCONTROL;
pc->dw0.instruction_pipeline = PIPE_3D;
@@ -715,7 +467,8 @@ gpgpu_state_init(intel_gpgpu_t *state,
dri_bo_unreference(state->sampler_state_b.bo);
bo = dri_bo_alloc(state->drv->bufmgr,
"sample states",
- MAX_SAMPLERS * sizeof(struct i965_sampler_state),
+ //MAX_SAMPLERS * sizeof(struct i965_sampler_state),
+ MAX_SAMPLERS * 16,
32);
assert(bo);
state->sampler_state_b.bo = bo;
diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h
index f19c1b90..134b2709 100644
--- a/src/intel/intel_structs.h
+++ b/src/intel/intel_structs.h
@@ -22,142 +22,10 @@
#include <stdint.h>
-struct i965_vfe_state
-{
- struct {
- uint32_t per_thread_scratch_space:4;
- uint32_t pad3:3;
- uint32_t extend_vfe_state_present:1;
- uint32_t pad2:2;
- uint32_t scratch_base:22;
- } vfe0;
-
- struct {
- uint32_t debug_counter_control:2;
- uint32_t children_present:1;
- uint32_t vfe_mode:4;
- uint32_t pad2:2;
- uint32_t num_urb_entries:7;
- uint32_t urb_entry_alloc_size:9;
- uint32_t max_threads:7;
- } vfe1;
-
- struct {
- uint32_t pad4:4;
- uint32_t interface_descriptor_base:28;
- } vfe2;
-};
-
-struct i965_vfe_state_ex
-{
- struct {
- uint32_t pad:8;
- uint32_t obj_id:24;
- } vfex0;
-
- struct {
- uint32_t residual_grf_offset:5;
- uint32_t pad0:3;
- uint32_t weight_grf_offset:5;
- uint32_t pad1:3;
- uint32_t residual_data_offset:8;
- uint32_t sub_field_present_flag:2;
- uint32_t residual_data_fix_offset:1;
- uint32_t pad2:5;
- }vfex1;
-
- struct {
- uint32_t remap_index_0:4;
- uint32_t remap_index_1:4;
- uint32_t remap_index_2:4;
- uint32_t remap_index_3:4;
- uint32_t remap_index_4:4;
- uint32_t remap_index_5:4;
- uint32_t remap_index_6:4;
- uint32_t remap_index_7:4;
- }remap_table0;
-
- struct {
- uint32_t remap_index_8:4;
- uint32_t remap_index_9:4;
- uint32_t remap_index_10:4;
- uint32_t remap_index_11:4;
- uint32_t remap_index_12:4;
- uint32_t remap_index_13:4;
- uint32_t remap_index_14:4;
- uint32_t remap_index_15:4;
- } remap_table1;
-
- struct {
- uint32_t scoreboard_mask:8;
- uint32_t pad:22;
- uint32_t type:1;
- uint32_t enable:1;
- } scoreboard0;
-
- struct {
- uint32_t ignore;
- } scoreboard1;
-
- struct {
- uint32_t ignore;
- } scoreboard2;
-
- uint32_t pad;
-};
-
-struct i965_vld_state
+typedef struct gen6_interface_descriptor
{
struct {
uint32_t pad6:6;
- uint32_t scan_order:1;
- uint32_t intra_vlc_format:1;
- uint32_t quantizer_scale_type:1;
- uint32_t concealment_motion_vector:1;
- uint32_t frame_predict_frame_dct:1;
- uint32_t top_field_first:1;
- uint32_t picture_structure:2;
- uint32_t intra_dc_precision:2;
- uint32_t f_code_0_0:4;
- uint32_t f_code_0_1:4;
- uint32_t f_code_1_0:4;
- uint32_t f_code_1_1:4;
- } vld0;
-
- struct {
- uint32_t pad2:9;
- uint32_t picture_coding_type:2;
- uint32_t pad:21;
- } vld1;
-
- struct {
- uint32_t index_0:4;
- uint32_t index_1:4;
- uint32_t index_2:4;
- uint32_t index_3:4;
- uint32_t index_4:4;
- uint32_t index_5:4;
- uint32_t index_6:4;
- uint32_t index_7:4;
- } desc_remap_table0;
-
- struct {
- uint32_t index_8:4;
- uint32_t index_9:4;
- uint32_t index_10:4;
- uint32_t index_11:4;
- uint32_t index_12:4;
- uint32_t index_13:4;
- uint32_t index_14:4;
- uint32_t index_15:4;
- } desc_remap_table1;
-};
-
-struct i965_interface_descriptor
-{
- struct {
- uint32_t grf_reg_blocks:4;
- uint32_t pad:2;
uint32_t kernel_start_pointer:26;
} desc0;
@@ -173,8 +41,8 @@ struct i965_interface_descriptor
uint32_t thread_priority:1;
uint32_t single_program_flow:1;
uint32_t pad5:1;
- uint32_t const_urb_entry_read_offset:6;
- uint32_t const_urb_entry_read_len:6;
+ uint32_t pad6:6;
+ uint32_t pad7:6;
} desc1;
struct {
@@ -184,12 +52,34 @@ struct i965_interface_descriptor
} desc2;
struct {
- uint32_t binding_table_entry_count:5;
- uint32_t binding_table_pointer:27;
+ uint32_t binding_table_entry_count:5; /* prefetch entries only */
+ uint32_t binding_table_pointer:27; /* 11 bit only on IVB+ */
} desc3;
-};
-struct i965_surface_state
+ struct {
+ uint32_t curbe_read_offset:16; /* in GRFs */
+ uint32_t curbe_read_len:16; /* in GRFs */
+ } desc4;
+
+ struct {
+ uint32_t group_threads_num:8; /* 0..64, 0 - no barrier use */
+ uint32_t barrier_return_byte:8;
+ uint32_t slm_sz:5; /* 0..16 - 0K..64K */
+ uint32_t barrier_enable:1;
+ uint32_t rounding_mode:2;
+ uint32_t barrier_return_grf_offset:8;
+ } desc5;
+
+ struct {
+ uint32_t reserved_mbz;
+ } desc6;
+
+ struct {
+ uint32_t reserved_mbz;
+ } desc7;
+} gen6_interface_descriptor_t;
+
+typedef struct gen6_surface_state
{
struct {
uint32_t cube_pos_z:1;
@@ -198,8 +88,9 @@ struct i965_surface_state
uint32_t cube_neg_y:1;
uint32_t cube_pos_x:1;
uint32_t cube_neg_x:1;
- uint32_t pad:3;
+ uint32_t pad:2;
uint32_t render_cache_read_mode:1;
+ uint32_t cube_map_corner_mode:1;
uint32_t mipmap_layout_mode:1;
uint32_t vert_line_stride_ofs:1;
uint32_t vert_line_stride:1;
@@ -234,23 +125,155 @@ struct i965_surface_state
} ss3;
struct {
- uint32_t pad:19;
- uint32_t min_array_elt:9;
+ uint32_t multisample_pos_index:3;
+ uint32_t pad:1;
+ uint32_t multisample_count:3;
+ uint32_t pad1:1;
+ uint32_t rt_view_extent:9;
+ uint32_t min_array_elt:11;
uint32_t min_lod:4;
} ss4;
struct {
- uint32_t pad:20;
+ uint32_t pad:16;
+ uint32_t cache_control:2; /* different values for GT and IVB */
+ uint32_t gfdt:1; /* allows selective flushing of LLC (e.g. for scanout) */
+ uint32_t encrypted_data:1;
uint32_t y_offset:4;
- uint32_t pad2:1;
+ uint32_t vertical_alignment:1;
uint32_t x_offset:7;
} ss5;
-};
+
+ uint32_t ss6; /* unused */
+ uint32_t ss7; /* unused */
+} gen6_surface_state_t;
+
+typedef struct gen7_surface_state
+{
+ struct {
+ uint32_t cube_pos_z:1;
+ uint32_t cube_neg_z:1;
+ uint32_t cube_pos_y:1;
+ uint32_t cube_neg_y:1;
+ uint32_t cube_pos_x:1;
+ uint32_t cube_neg_x:1;
+ uint32_t media_boundary_pixel_mode:2;
+ uint32_t render_cache_rw_mode:1;
+ uint32_t pad1:1;
+ uint32_t surface_array_spacing:1;
+ uint32_t vertical_line_stride_offset:1;
+ uint32_t vertical_line_stride:1;
+ uint32_t tile_walk:1;
+ uint32_t tiled_surface:1;
+ uint32_t horizontal_alignment:1;
+ uint32_t vertical_alignment:2;
+ uint32_t surface_format:9;
+ uint32_t pad0:1;
+ uint32_t surface_array:1;
+ uint32_t surface_type:3;
+ } ss0;
+
+ struct {
+ uint32_t base_addr;
+ } ss1;
+
+ struct {
+ uint32_t width:14;
+ uint32_t pad1:2;
+ uint32_t height:14;
+ uint32_t pad0:2;
+ } ss2;
+
+ struct {
+ uint32_t pitch:18;
+ uint32_t pad0:3;
+ uint32_t depth:11;
+ } ss3;
+
+ uint32_t ss4;
+
+ struct {
+ uint32_t mip_count:4;
+ uint32_t surface_min_load:4;
+ uint32_t pad2:6;
+ uint32_t coherence_type:1;
+ uint32_t stateless_force_write_thru:1;
+ uint32_t surface_object_control_state:4;
+ uint32_t y_offset:4;
+ uint32_t pad0:1;
+ uint32_t x_offset:7;
+ } ss5;
+
+ uint32_t ss6; /* unused */
+ uint32_t ss7; /* unused */
+
+} gen7_surface_state_t;
+
+STATIC_ASSERT(sizeof(gen6_surface_state_t) == sizeof(gen7_surface_state_t));
+static const size_t surface_state_sz = sizeof(gen6_surface_state_t);
+
+typedef struct gen6_vfe_state_inline
+{
+ struct {
+ uint32_t per_thread_scratch_space:4;
+ uint32_t pad3:3;
+ uint32_t extend_vfe_state_present:1;
+ uint32_t pad2:2;
+ uint32_t scratch_base:22;
+ } vfe0;
+
+ struct {
+ uint32_t debug_counter_control:2;
+ uint32_t gpgpu_mode:1; /* 0 for SNB!!! */
+ uint32_t gateway_mmio_access:2;
+ uint32_t fast_preempt:1;
+ uint32_t bypass_gateway_ctl:1; /* 0 - legacy, 1 - no open/close */
+ uint32_t reset_gateway_timer:1;
+ uint32_t urb_entries:8;
+ uint32_t max_threads:16;
+ } vfe1;
+
+ struct {
+ uint32_t pad8:8;
+ uint32_t debug_object_id:24;
+ } vfe2;
+
+ struct {
+ uint32_t curbe_size:16; /* in GRFs */
+ uint32_t urbe_size:16; /* in GRFs */
+ } vfe3;
+
+ struct {
+ uint32_t scoreboard_mask:32; /* 1 - enable the corresponding dependency */
+ } vfe4;
+
+ struct {
+ uint32_t scoreboard0_dx:4;
+ uint32_t scoreboard0_dy:4;
+ uint32_t scoreboard1_dx:4;
+ uint32_t scoreboard1_dy:4;
+ uint32_t scoreboard2_dx:4;
+ uint32_t scoreboard2_dy:4;
+ uint32_t scoreboard3_dx:4;
+ uint32_t scoreboard3_dy:4;
+ } vfe5;
+
+ struct {
+ uint32_t scoreboard4_dx:4;
+ uint32_t scoreboard4_dy:4;
+ uint32_t scoreboard5_dx:4;
+ uint32_t scoreboard5_dy:4;
+ uint32_t scoreboard6_dx:4;
+ uint32_t scoreboard6_dy:4;
+ uint32_t scoreboard7_dx:4;
+ uint32_t scoreboard7_dy:4;
+ } vfe6;
+} gen6_vfe_state_inline_t;
#define BITFIELD_BIT(X) 1
#define BITFIELD_RANGE(X,Y) ((Y) - (X) + 1)
-typedef struct i965_pipe_control
+typedef struct gen6_pipe_control
{
struct
{
@@ -300,9 +323,9 @@ typedef struct i965_pipe_control
{
uint64_t data;
} qw0;
-} i965_pipe_control_t;
-
-typedef struct i965_sampler_state
+} gen6_pipe_control_t;
+#if 0
+typedef struct gen6_sampler_state
{
struct
{
@@ -349,7 +372,8 @@ typedef struct i965_sampler_state
uint32_t monochrome_filter_width:3;
uint32_t monochrome_filter_height:3;
} ss3;
-} i965_sampler_state_t;
+} gen6_sampler_state_t;
+#endif
typedef struct gen7_sampler_state
{