diff options
-rwxr-xr-x | kernels/Mandelbrot_Kernels.cl | 243 | ||||
-rw-r--r-- | kernels/test_copy_image.cl | 14 | ||||
-rw-r--r-- | src/cl_api.c | 19 | ||||
-rw-r--r-- | src/cl_command_queue_gen6.c | 29 | ||||
-rw-r--r-- | src/cl_gt_device.h | 10 | ||||
-rw-r--r-- | src/cl_kernel.c | 59 | ||||
-rw-r--r-- | src/cl_mem.c | 207 | ||||
-rw-r--r-- | src/cl_mem.h | 15 | ||||
-rw-r--r-- | src/intel/intel_gpgpu.c | 259 | ||||
-rw-r--r-- | src/intel/intel_structs.h | 324 |
10 files changed, 479 insertions, 700 deletions
diff --git a/kernels/Mandelbrot_Kernels.cl b/kernels/Mandelbrot_Kernels.cl deleted file mode 100755 index 97e21e05..00000000 --- a/kernels/Mandelbrot_Kernels.cl +++ /dev/null @@ -1,243 +0,0 @@ -/** -* A fractal generator that calculates the mandlebrot set -* http://en.wikipedia.org/wiki/Mandelbrot_set -* @param mandelbrotImage mandelbrot images is stored in this -* @param scale Represents the distance from which the fractal -* is being seen if this is greater more area and -* less detail is seen -* @param maxIterations More iterations gives more accurate mandelbrot image -* @param width size of the image -*/ - -__kernel void mandelbrot_vector (__global uchar4 * mandelbrotImage, - const float posx, - const float posy, - const float stepSizeX, - const float stepSizeY, - const uint maxIterations, - const int width, - const int bench - ) -{ - int tid = get_global_id(0); - - int i = tid % (width / 4); - int j = tid / (width / 4); - - int4 veci = {4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3}; - int4 vecj = {j, j, j, j}; - - float4 x0; - x0.s0 = (float)(posx + stepSizeX * (float)veci.s0); - x0.s1 = (float)(posx + stepSizeX * (float)veci.s1); - x0.s2 = (float)(posx + stepSizeX * (float)veci.s2); - x0.s3 = (float)(posx + stepSizeX * (float)veci.s3); - float4 y0; - y0.s0 = (float)(posy + stepSizeY * (float)vecj.s0); - y0.s1 = (float)(posy + stepSizeY * (float)vecj.s1); - y0.s2 = (float)(posy + stepSizeY * (float)vecj.s2); - y0.s3 = (float)(posy + stepSizeY * (float)vecj.s3); - - float4 x = x0; - float4 y = y0; - - uint iter=0; - float4 tmp; - int4 stay; - int4 ccount = 0; - - stay.s0 = (x.s0 * x.s0 + y.s0 * y.s0) <= 4.0f; - stay.s1 = (x.s1 * x.s1 + y.s1 * y.s1) <= 4.0f; - stay.s2 = (x.s2 * x.s2 + y.s2 * y.s2) <= 4.0f; - stay.s3 = (x.s3 * x.s3 + y.s3 * y.s3) <= 4.0f; - float4 savx = x; - float4 savy = y; - for(iter=0; (stay.s0 | stay.s1 | stay.s2 | stay.s3) && (iter < maxIterations); iter+= 16) - { - x = savx; - y = savy; - - // Two iterations - tmp = x * x + x0 - y * y; - y = 2.0f * x * y + y0; - x = tmp * tmp + x0 - y * y; - y = 2.0f * tmp * y + y0; - - // Two iterations - tmp = x * x + x0 - y * y; - y = 2.0f * x * y + y0; - x = tmp * tmp + x0 - y * y; - y = 2.0f * tmp * y + y0; - - // Two iterations - tmp = x * x + x0 - y * y; - y = 2.0f * x * y + y0; - x = tmp * tmp + x0 - y * y; - y = 2.0f * tmp * y + y0; - - // Two iterations - tmp = x * x + x0 - y * y; - y = 2.0f * x * y + y0; - x = tmp * tmp + x0 - y * y; - y = 2.0f * tmp * y + y0; - - // Two iterations - tmp = x * x + x0 - y * y; - y = 2.0f * x * y + y0; - x = tmp * tmp + x0 - y * y; - y = 2.0f * tmp * y + y0; - - // Two iterations - tmp = x * x + x0 - y * y; - y = 2.0f * x * y + y0; - x = tmp * tmp + x0 - y * y; - y = 2.0f * tmp * y + y0; - - // Two iterations - tmp = x * x + x0 - y * y; - y = 2.0f * x * y + y0; - x = tmp * tmp + x0 - y * y; - y = 2.0f * tmp * y + y0; - - // Two iterations - tmp = x * x + x0 - y * y; - y = 2.0f * x * y + y0; - x = tmp * tmp + x0 - y * y; - y = 2.0f * tmp * y + y0; - - stay.s0 = (x.s0 * x.s0 + y.s0 * y.s0) <= 4.0f; - stay.s1 = (x.s1 * x.s1 + y.s1 * y.s1) <= 4.0f; - stay.s2 = (x.s2 * x.s2 + y.s2 * y.s2) <= 4.0f; - stay.s3 = (x.s3 * x.s3 + y.s3 * y.s3) <= 4.0f; - - savx.s0 = (stay.s0 ? x.s0 : savx.s0); - savx.s1 = (stay.s1 ? x.s1 : savx.s1); - savx.s2 = (stay.s2 ? x.s2 : savx.s2); - savx.s3 = (stay.s3 ? x.s3 : savx.s3); - savy.s0 = (stay.s0 ? y.s0 : savy.s0); - savy.s1 = (stay.s1 ? y.s1 : savy.s1); - savy.s2 = (stay.s2 ? y.s2 : savy.s2); - savy.s3 = (stay.s3 ? y.s3 : savy.s3); - ccount += stay*16; - } - // Handle remainder - if (!(stay.s0 & stay.s1 & stay.s2 & stay.s3)) - { - iter = 16; - do - { - x = savx; - y = savy; - stay.s0 = ((x.s0 * x.s0 + y.s0 * y.s0) <= 4.0f) && - (ccount.s0 < maxIterations); - stay.s1 = ((x.s1 * x.s1 + y.s1 * y.s1) <= 4.0f) && - (ccount.s1 < maxIterations); - stay.s2 = ((x.s2 * x.s2 + y.s2 * y.s2) <= 4.0f) && - (ccount.s2 < maxIterations); - stay.s3 = ((x.s3 * x.s3 + y.s3 * y.s3) <= 4.0f) && - (ccount.s3 < maxIterations); - tmp = x; - x = x * x + x0 - y * y; - y = 2.0f * tmp * y + y0; - ccount += stay; - iter--; - savx.s0 = (stay.s0 ? x.s0 : savx.s0); - savx.s1 = (stay.s1 ? x.s1 : savx.s1); - savx.s2 = (stay.s2 ? x.s2 : savx.s2); - savx.s3 = (stay.s3 ? x.s3 : savx.s3); - savy.s0 = (stay.s0 ? y.s0 : savy.s0); - savy.s1 = (stay.s1 ? y.s1 : savy.s1); - savy.s2 = (stay.s2 ? y.s2 : savy.s2); - savy.s3 = (stay.s3 ? y.s3 : savy.s3); - } while ((stay.s0 | stay.s1 | stay.s2 | stay.s3) && iter); - } - x = savx; - y = savy; - float4 fc = convert_float4(ccount); - fc.s0 = (float)ccount.s0 + 1 - - native_log2(native_log2(x.s0 * x.s0 + y.s0 * y.s0)); - fc.s1 = (float)ccount.s1 + 1 - - native_log2(native_log2(x.s1 * x.s1 + y.s1 * y.s1)); - fc.s2 = (float)ccount.s2 + 1 - - native_log2(native_log2(x.s2 * x.s2 + y.s2 * y.s2)); - fc.s3 = (float)ccount.s3 + 1 - - native_log2(native_log2(x.s3 * x.s3 + y.s3 * y.s3)); - - float c = fc.s0 * 2.0f * 3.1416f / 256.0f; - uchar4 color[4]; - color[0].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255; - color[0].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; - color[0].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; - color[0].s3 = 0xff; - if (ccount.s0 == maxIterations) - { - color[0].s0 = 0; - color[0].s1 = 0; - color[0].s2 = 0; - } - if (bench) - { - color[0].s0 = ccount.s0 & 0xff; - color[0].s1 = (ccount.s0 & 0xff00) >> 8; - color[0].s2 = (ccount.s0 & 0xff0000) >> 16; - color[0].s3 = (ccount.s0 & 0xff000000) >> 24; - } - mandelbrotImage[4 * tid] = color[0]; - c = fc.s1 * 2.0f * 3.1416f / 256.0f; - color[1].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255; - color[1].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; - color[1].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; - color[1].s3 = 0xff; - if (ccount.s1 == maxIterations) - { - color[1].s0 = 0; - color[1].s1 = 0; - color[1].s2 = 0; - } - if (bench) - { - color[1].s0 = ccount.s1 & 0xff; - color[1].s1 = (ccount.s1 & 0xff00) >> 8; - color[1].s2 = (ccount.s1 & 0xff0000) >> 16; - color[1].s3 = (ccount.s1 & 0xff000000) >> 24; - } - mandelbrotImage[4 * tid + 1] = color[1]; - c = fc.s2 * 2.0f * 3.1416f / 256.0f; - color[2].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255; - color[2].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; - color[2].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; - color[2].s3 = 0xff; - if (ccount.s2 == maxIterations) - { - color[2].s0 = 0; - color[2].s1 = 0; - color[2].s2 = 0; - } - if (bench) - { - color[2].s0 = ccount.s2 & 0xff; - color[2].s1 = (ccount.s2 & 0xff00) >> 8; - color[2].s2 = (ccount.s2 & 0xff0000) >> 16; - color[2].s3 = (ccount.s2 & 0xff000000) >> 24; - } - mandelbrotImage[4 * tid + 2] = color[2]; - c = fc.s3 * 2.0f * 3.1416f / 256.0f; - color[3].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255; - color[3].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; - color[3].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; - color[3].s3 = 0xff; - if (ccount.s3 == maxIterations) - { - color[3].s0 = 0; - color[3].s1 = 0; - color[3].s2 = 0; - } - if (bench) - { - color[3].s0 = ccount.s3 & 0xff; - color[3].s1 = (ccount.s3 & 0xff00) >> 8; - color[3].s2 = (ccount.s3 & 0xff0000) >> 16; - color[3].s3 = (ccount.s3 & 0xff000000) >> 24; - } - mandelbrotImage[4 * tid + 3] = color[3]; -} diff --git a/kernels/test_copy_image.cl b/kernels/test_copy_image.cl index e14ce682..1ccc581d 100644 --- a/kernels/test_copy_image.cl +++ b/kernels/test_copy_image.cl @@ -1,9 +1,6 @@ -__constant sampler_t s0 = CLK_NORMALIZED_COORDS_FALSE | - CLK_ADDRESS_CLAMP | - CLK_FILTER_NEAREST; -__constant sampler_t s1 = CLK_NORMALIZED_COORDS_TRUE | - CLK_ADDRESS_CLAMP | - CLK_FILTER_NEAREST; +__constant sampler_t s = CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP | + CLK_FILTER_NEAREST; __kernel void test_copy_image(__read_only image2d_t src, __global uchar4 *dst) @@ -11,8 +8,7 @@ test_copy_image(__read_only image2d_t src, const int x = (int) get_global_id(0); const int y = (int) get_global_id(1); const int id = x + y * get_image_width(src); - const uchar4 from = convert_uchar4(read_imageui(src, s0, (int2)(x,y))); - const uchar4 from0 = convert_uchar4(read_imageui(src, s1, (int2)(x,y))); - dst[id] = from + from0; + const uchar4 from = convert_uchar4(read_imageui(src, s, (int2)(x,y))); + dst[id] = from; } diff --git a/src/cl_api.c b/src/cl_api.c index 5d7d0ea1..e6c7dfba 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -213,6 +213,7 @@ clCreateBuffer(cl_context context, cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); + mem = cl_mem_new(context, flags, size, host_ptr, &err); error: if (errcode_ret) @@ -246,8 +247,22 @@ clCreateImage2D(cl_context context, void * host_ptr, cl_int * errcode_ret) { - NOT_IMPLEMENTED; - return NULL; + cl_mem mem = NULL; + cl_int err = CL_SUCCESS; + CHECK_CONTEXT (context); + + mem = cl_mem_new_image2D(context, + flags, + image_format, + image_width, + image_height, + image_row_pitch, + host_ptr, + errcode_ret); +error: + if (errcode_ret) + *errcode_ret = err; + return mem; } cl_mem diff --git a/src/cl_command_queue_gen6.c b/src/cl_command_queue_gen6.c index 78583698..d8464446 100644 --- a/src/cl_command_queue_gen6.c +++ b/src/cl_command_queue_gen6.c @@ -61,10 +61,10 @@ cl_kernel_compute_batch_sz(cl_kernel k, size_t wk_grp_n, size_t thread_n) static INLINE void cl_command_queue_enqueue_wk_grp(cl_command_queue queue, - cl_local_id_t **ids, - const cl_inline_header_t *header, - uint32_t thread_n, - uint32_t barrierID) + cl_local_id_t **ids, + const cl_inline_header_t *header, + uint32_t thread_n, + uint32_t barrierID) { intel_gpgpu_t *gpgpu = queue->gpgpu; uint32_t i; @@ -85,10 +85,10 @@ cl_command_queue_enqueue_wk_grp(cl_command_queue queue, LOCAL cl_int cl_command_queue_ND_range_gen6(cl_command_queue queue, - cl_kernel ker, - const size_t *global_wk_off, - const size_t *global_wk_sz, - const size_t *local_wk_sz) + cl_kernel ker, + const size_t *global_wk_off, + const size_t *global_wk_sz, + const size_t *local_wk_sz) { cl_context ctx = queue->ctx; intel_gpgpu_t *gpgpu = queue->gpgpu; @@ -180,7 +180,7 @@ cl_command_queue_ND_range_gen6(cl_command_queue queue, /* Start a new batch buffer */ gpgpu_batch_reset(gpgpu, batch_sz); gpgpu_batch_start(gpgpu); -#if 1 + /* Push all media objects. We implement three paths to make it (a bit) faster. * Local IDs are shared from work group to work group. We allocate once the * buffers and reuse them @@ -201,16 +201,13 @@ cl_command_queue_ND_range_gen6(cl_command_queue queue, cl_command_queue_enqueue_wk_grp(queue, ids, &header, thread_n, barrierID); barrierID = (barrierID + 1) % 16; } -#endif + gpgpu_batch_end(gpgpu, 0); gpgpu_flush(gpgpu); - if (slm_bo) - drm_intel_bo_unreference(slm_bo); - if (private_bo) - drm_intel_bo_unreference(private_bo); - if (scratch_bo) - drm_intel_bo_unreference(scratch_bo); + if (slm_bo) drm_intel_bo_unreference(slm_bo); + if (private_bo) drm_intel_bo_unreference(private_bo); + if (scratch_bo) drm_intel_bo_unreference(scratch_bo); error: cl_free(ids[0]); diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index d66d6ead..8717a166 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -40,11 +40,11 @@ .image_support = CL_FALSE, .max_read_image_args = 0, .max_write_image_args = 0, -.image2d_max_width = 0, -.image2d_max_height = 0, -.image3d_max_width = 0, -.image3d_max_height = 0, -.image3d_max_depth = 0, +.image2d_max_width = 8192, +.image2d_max_height = 8192, +.image3d_max_width = 8192, +.image3d_max_height = 8192, +.image3d_max_depth = 8192, .max_samplers = 0, .mem_base_addr_align = sizeof(cl_uint) * 8, .min_data_type_align_size = sizeof(cl_uint), diff --git a/src/cl_kernel.c b/src/cl_kernel.c index 990cb0b6..c1997863 100644 --- a/src/cl_kernel.c +++ b/src/cl_kernel.c @@ -352,6 +352,7 @@ cl_kernel_setup_patch_list(cl_kernel k, const char *patch, size_t sz) info->curbe.sz = *(uint32_t *) (patch + sizeof(cl_patch_item_header_t)); info->curbe.offset = 0; break; + case PATCH_TOKEN_IMAGE_MEMORY_KERNEL_ARGUMENT: case PATCH_TOKEN_CONSTANT_MEMORY_KERNEL_ARGUMENT: case PATCH_TOKEN_GLOBAL_MEMORY_KERNEL_ARGUMENT: { @@ -360,7 +361,13 @@ cl_kernel_setup_patch_list(cl_kernel k, const char *patch, size_t sz) TRY_ALLOC (arg_info, CALLOC(cl_arg_info_t)); arg_info->arg_index = from->index; arg_info->offset = from->offset; - arg_info->type = OCLRT_ARG_TYPE_BUFFER; + if (item->token == PATCH_TOKEN_GLOBAL_MEMORY_KERNEL_ARGUMENT) + arg_info->type = OCLRT_ARG_TYPE_BUFFER; + else if (item->token == PATCH_TOKEN_CONSTANT_MEMORY_KERNEL_ARGUMENT) + arg_info->type = OCLRT_ARG_TYPE_CONST; + else + arg_info->type = OCLRT_ARG_TYPE_IMAGE; + arg_info->sz = sizeof(cl_mem); arg_info->is_patched = CL_FALSE; @@ -444,6 +451,54 @@ error: } #undef ASSOC_ITEM +typedef struct i965_sampler_state +{ + struct + { + uint32_t shadow_function:3; + uint32_t lod_bias:11; + uint32_t min_filter:3; + uint32_t mag_filter:3; + uint32_t mip_filter:2; + uint32_t base_level:5; + uint32_t min_mag_neq:1; + uint32_t lod_preclamp:1; + uint32_t default_color_mode:1; + uint32_t pad0:1; + uint32_t disable:1; + } ss0; + + struct + { + uint32_t r_wrap_mode:3; + uint32_t t_wrap_mode:3; + uint32_t s_wrap_mode:3; + uint32_t cube_control_mode:1; + uint32_t pad:2; + uint32_t max_lod:10; + uint32_t min_lod:10; + } ss1; + + + struct + { + uint32_t pad:5; + uint32_t default_color_pointer:27; + } ss2; + + struct + { + uint32_t non_normalized_coord:1; + uint32_t pad:12; + uint32_t address_round:6; + uint32_t max_aniso:3; + uint32_t chroma_key_mode:1; + uint32_t chroma_key_index:2; + uint32_t chroma_key_enable:1; + uint32_t monochrome_filter_width:3; + uint32_t monochrome_filter_height:3; + } ss3; +} i965_sampler_state_t; LOCAL int cl_kernel_setup(cl_kernel k, const char *ker) @@ -668,6 +723,8 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value) /* Is it a buffer / image / sampler to set */ if ((arg_info = cl_kernel_get_arg_info(k, index)) != NULL) { switch (arg_info->type) { + case OCLRT_ARG_TYPE_CONST: + case OCLRT_ARG_TYPE_IMAGE: case OCLRT_ARG_TYPE_BUFFER: { /* Check the buffer consistency */ diff --git a/src/cl_mem.c b/src/cl_mem.c index f37c846f..7151df72 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -21,6 +21,7 @@ #include "cl_context.h" #include "cl_utils.h" #include "cl_alloc.h" +#include "cl_device_id.h" #include "intel_bufmgr.h" /* libdrm_intel */ @@ -29,12 +30,8 @@ #include <assert.h> #include <stdio.h> -LOCAL cl_mem -cl_mem_new(cl_context ctx, - cl_mem_flags flags, - size_t sz, - void *data, - cl_int *errcode_ret) +static cl_mem +cl_mem_allocate(cl_context ctx, cl_mem_flags flags, size_t sz, cl_int *errcode) { drm_intel_bufmgr *bufmgr = NULL; cl_mem mem = NULL; @@ -50,10 +47,6 @@ cl_mem_new(cl_context ctx, err = CL_INVALID_BUFFER_SIZE; goto error; } - if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) { - err = CL_INVALID_HOST_PTR; - goto error; - } /* Allocate and inialize the structure itself */ TRY_ALLOC (mem, CALLOC(struct _cl_mem)); @@ -73,16 +66,6 @@ cl_mem_new(cl_context ctx, err = CL_MEM_ALLOCATION_FAILURE; goto error; } - /* Copy the data if required */ - if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */ - drm_intel_bo_subdata(mem->bo, 0, sz, data); - #if 0 - if (UNLIKELY(drm_intel_bo_subdata(mem->bo, 0, sz, data) != 0)) { - err = CL_MEM_ALLOCATION_FAILURE; - goto error; - } - #endif - /* Append the buffer in the context buffer list */ pthread_mutex_lock(&ctx->buffer_lock); @@ -95,6 +78,190 @@ cl_mem_new(cl_context ctx, cl_context_add_ref(ctx); exit: + if (errcode) + *errcode = err; + return mem; +error: + cl_mem_delete(mem); + mem = NULL; + goto exit; + +} + +LOCAL cl_mem +cl_mem_new(cl_context ctx, + cl_mem_flags flags, + size_t sz, + void *data, + cl_int *errcode_ret) +{ + cl_int err = CL_SUCCESS; + cl_mem mem = NULL; + + /* Check flags consistency */ + if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) { + err = CL_INVALID_HOST_PTR; + goto error; + } + + /* Create the buffer in video memory */ + mem = cl_mem_allocate(ctx, flags, sz, &err); + if (mem == NULL || err != CL_SUCCESS) + goto error; + + /* Copy the data if required */ + if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */ + drm_intel_bo_subdata(mem->bo, 0, sz, data); + +exit: + if (errcode_ret) + *errcode_ret = err; + return mem; +error: + cl_mem_delete(mem); + mem = NULL; + goto exit; +} + +static cl_int +cl_mem_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp) +{ + assert(bpp); + + const uint32_t type = fmt->image_channel_data_type; + const uint32_t order = fmt->image_channel_order; + switch (type) { +#define DECL_BPP(DATA_TYPE, VALUE) case DATA_TYPE: *bpp = VALUE; + DECL_BPP(CL_SNORM_INT8, 1); break; + DECL_BPP(CL_SNORM_INT16, 2); break; + DECL_BPP(CL_UNORM_INT8, 1); break; + DECL_BPP(CL_UNORM_INT16, 2); break; + DECL_BPP(CL_UNORM_SHORT_565, 2); + if (order != CL_RGBx && order != CL_RGB) + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + break; + DECL_BPP(CL_UNORM_SHORT_555, 2); + if (order != CL_RGBx && order != CL_RGB) + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + break; + DECL_BPP(CL_UNORM_INT_101010, 4); + if (order != CL_RGBx && order != CL_RGB) + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + break; + DECL_BPP(CL_SIGNED_INT8, 1); break; + DECL_BPP(CL_SIGNED_INT16, 2); break; + DECL_BPP(CL_SIGNED_INT32, 4); break; + DECL_BPP(CL_UNSIGNED_INT8, 1); break; + DECL_BPP(CL_UNSIGNED_INT16, 2); break; + DECL_BPP(CL_UNSIGNED_INT32, 4); break; + DECL_BPP(CL_HALF_FLOAT, 2); break; + DECL_BPP(CL_FLOAT, 4); break; +#undef DECL_BPP + default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + }; + + switch (order) { + case CL_R: break; + case CL_A: break; + case CL_RA: *bpp *= 2; break; + case CL_RG: *bpp *= 2; break; + case CL_Rx: *bpp *= 2; break; + case CL_INTENSITY: + case CL_LUMINANCE: + if (type != CL_UNORM_INT8 && type != CL_UNORM_INT16 && + type != CL_SNORM_INT8 && type != CL_SNORM_INT16 && + type != CL_HALF_FLOAT && type != CL_FLOAT) + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + break; + case CL_RGB: + case CL_RGBx: + if (type != CL_UNORM_SHORT_555 && + type != CL_UNORM_SHORT_565 && + type != CL_UNORM_INT_101010) + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + break; + case CL_RGBA: *bpp *= 4; break; + case CL_ARGB: + case CL_BGRA: + if (type != CL_UNORM_INT8 && type != CL_SIGNED_INT8 && + type != CL_SNORM_INT8 && type != CL_UNSIGNED_INT8) + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + *bpp *= 4; + break; + default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + }; + + return CL_SUCCESS; +} + +LOCAL cl_mem +cl_mem_new_image2D(cl_context ctx, + cl_mem_flags flags, + const cl_image_format *fmt, + size_t w, + size_t h, + size_t pitch, + void *data, + cl_int *errcode_ret) +{ + cl_int err = CL_SUCCESS; + cl_mem mem = NULL; + uint32_t bpp = 0; + size_t sz = 0; + + /* Check flags consistency */ + if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) { + err = CL_INVALID_HOST_PTR; + goto error; + } + + /* Get the size of each pixel */ + if (UNLIKELY((err = cl_mem_byte_per_pixel(fmt, &bpp)) != CL_SUCCESS)) + goto error; + + /* See if the user parameters match */ +#define DO_IMAGE_ERROR \ + do { \ + err = CL_INVALID_IMAGE_SIZE; \ + goto error; \ + } while (0); + if (UNLIKELY(w == 0)) DO_IMAGE_ERROR; + if (UNLIKELY(h == 0)) DO_IMAGE_ERROR; + if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR; + if (UNLIKELY(h > ctx->device->image2d_max_height)) DO_IMAGE_ERROR; + if (UNLIKELY(bpp*w > pitch)) DO_IMAGE_ERROR; +#undef DO_IMAGE_ERROR + + /* Create the buffer in video memory */ + sz = w * h * bpp; + mem = cl_mem_allocate(ctx, flags, sz, &err); + if (mem == NULL || err != CL_SUCCESS) + goto error; + + /* Copy the data if required */ + if (flags & CL_MEM_COPY_HOST_PTR) {/* TODO check other flags too */ + size_t x, y, p; + char *dst; + drm_intel_bo_map(mem->bo, 1); + dst = mem->bo->virtual; + for (y = 0; y < h; ++y) { + char *src = (char*) data + pitch * y; + for (x = 0; x < w; ++x) { + for (p = 0; p < bpp; ++p) + dst[p] = src[p]; + dst += bpp; + src += bpp; + } + } + drm_intel_bo_unmap(mem->bo); + } + mem->w = w; + mem->h = h; + mem->fmt = *fmt; + mem->pitch = w * bpp; + mem->is_image = 1; + +exit: if (errcode_ret) *errcode_ret = err; return mem; diff --git a/src/cl_mem.h b/src/cl_mem.h index 96ff8da0..9a9ceb36 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -33,12 +33,25 @@ struct _cl_mem { struct _drm_intel_bo *bo; /* Data in GPU memory */ cl_mem prev, next; /* We chain the memory buffers together */ cl_context ctx; /* Context it belongs to */ - cl_mem_flags flags; /* Flags specified at the creation time */ + cl_mem_flags flags; /* Flags specified at the creation time */ + uint32_t is_image; /* Indicate if this is an image or not */ + cl_image_format fmt; /* only for images */ + size_t w,h,depth,pitch; /* only for images (depth is only for 3d images) */ }; /* Create a new memory object and initialize it with possible user data */ extern cl_mem cl_mem_new(cl_context, cl_mem_flags, size_t, void*, cl_int*); +/* Idem but this is an image */ +extern cl_mem cl_mem_new_image2D(cl_context, + cl_mem_flags, + const cl_image_format*, + size_t w, + size_t h, + size_t pitch, + void *, + cl_int *); + /* Unref the object and delete it if no more reference */ extern void cl_mem_delete(cl_mem); diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index a4b476be..f9e7db1d 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -42,256 +42,8 @@ #define MO_RETAIN_BIT (1 << 28) #define SAMPLER_STATE_SIZE (16) -typedef struct gen6_surface_state -{ - struct { - uint32_t cube_pos_z:1; - uint32_t cube_neg_z:1; - uint32_t cube_pos_y:1; - uint32_t cube_neg_y:1; - uint32_t cube_pos_x:1; - uint32_t cube_neg_x:1; - uint32_t pad:2; - uint32_t render_cache_read_mode:1; - uint32_t cube_map_corner_mode:1; - uint32_t mipmap_layout_mode:1; - uint32_t vert_line_stride_ofs:1; - uint32_t vert_line_stride:1; - uint32_t color_blend:1; - uint32_t writedisable_blue:1; - uint32_t writedisable_green:1; - uint32_t writedisable_red:1; - uint32_t writedisable_alpha:1; - uint32_t surface_format:9; - uint32_t data_return_format:1; - uint32_t pad0:1; - uint32_t surface_type:3; - } ss0; - - struct { - uint32_t base_addr; - } ss1; - - struct { - uint32_t render_target_rotation:2; - uint32_t mip_count:4; - uint32_t width:13; - uint32_t height:13; - } ss2; - - struct { - uint32_t tile_walk:1; - uint32_t tiled_surface:1; - uint32_t pad:1; - uint32_t pitch:18; - uint32_t depth:11; - } ss3; - - struct { - uint32_t multisample_pos_index:3; - uint32_t pad:1; - uint32_t multisample_count:3; - uint32_t pad1:1; - uint32_t rt_view_extent:9; - uint32_t min_array_elt:11; - uint32_t min_lod:4; - } ss4; - - struct { - uint32_t pad:16; - uint32_t cache_control:2; /* different values for GT and IVB */ - uint32_t gfdt:1; /* allows selective flushing of LLC (e.g. for scanout) */ - uint32_t encrypted_data:1; - uint32_t y_offset:4; - uint32_t vertical_alignment:1; - uint32_t x_offset:7; - } ss5; - - uint32_t ss6; /* unused */ - uint32_t ss7; /* unused */ -} gen6_surface_state_t; - -typedef struct gen7_surface_state -{ - struct { - uint32_t cube_pos_z:1; - uint32_t cube_neg_z:1; - uint32_t cube_pos_y:1; - uint32_t cube_neg_y:1; - uint32_t cube_pos_x:1; - uint32_t cube_neg_x:1; - uint32_t media_boundary_pixel_mode:2; - uint32_t render_cache_rw_mode:1; - uint32_t pad1:1; - uint32_t surface_array_spacing:1; - uint32_t vertical_line_stride_offset:1; - uint32_t vertical_line_stride:1; - uint32_t tile_walk:1; - uint32_t tiled_surface:1; - uint32_t horizontal_alignment:1; - uint32_t vertical_alignment:2; - uint32_t surface_format:9; - uint32_t pad0:1; - uint32_t surface_array:1; - uint32_t surface_type:3; - } ss0; - - struct { - uint32_t base_addr; - } ss1; - - struct { - uint32_t width:14; - uint32_t pad1:2; - uint32_t height:14; - uint32_t pad0:2; - } ss2; - - struct { - uint32_t pitch:18; - uint32_t pad0:3; - uint32_t depth:11; - } ss3; - - uint32_t ss4; - - struct { - uint32_t mip_count:4; - uint32_t surface_min_load:4; - uint32_t pad2:6; - uint32_t coherence_type:1; - uint32_t stateless_force_write_thru:1; - uint32_t surface_object_control_state:4; - uint32_t y_offset:4; - uint32_t pad0:1; - uint32_t x_offset:7; - } ss5; - - uint32_t ss6; /* unused */ - uint32_t ss7; /* unused */ - -} gen7_surface_state_t; - #define GEN7_CACHED_IN_LLC 3 -STATIC_ASSERT(sizeof(gen6_surface_state_t) == sizeof(gen7_surface_state_t)); -static const size_t surface_state_sz = sizeof(gen6_surface_state_t); - -typedef struct gen6_vfe_state_inline -{ - struct { - uint32_t per_thread_scratch_space:4; - uint32_t pad3:3; - uint32_t extend_vfe_state_present:1; - uint32_t pad2:2; - uint32_t scratch_base:22; - } vfe0; - - struct { - uint32_t debug_counter_control:2; - uint32_t gpgpu_mode:1; /* 0 for SNB!!! */ - uint32_t gateway_mmio_access:2; - uint32_t fast_preempt:1; - uint32_t bypass_gateway_ctl:1; /* 0 - legacy, 1 - no open/close */ - uint32_t reset_gateway_timer:1; - uint32_t urb_entries:8; - uint32_t max_threads:16; - } vfe1; - - struct { - uint32_t pad8:8; - uint32_t debug_object_id:24; - } vfe2; - - struct { - uint32_t curbe_size:16; /* in GRFs */ - uint32_t urbe_size:16; /* in GRFs */ - } vfe3; - - struct { - uint32_t scoreboard_mask:32; /* 1 - enable the corresponding dependency */ - } vfe4; - - struct { - uint32_t scoreboard0_dx:4; - uint32_t scoreboard0_dy:4; - uint32_t scoreboard1_dx:4; - uint32_t scoreboard1_dy:4; - uint32_t scoreboard2_dx:4; - uint32_t scoreboard2_dy:4; - uint32_t scoreboard3_dx:4; - uint32_t scoreboard3_dy:4; - } vfe5; - - struct { - uint32_t scoreboard4_dx:4; - uint32_t scoreboard4_dy:4; - uint32_t scoreboard5_dx:4; - uint32_t scoreboard5_dy:4; - uint32_t scoreboard6_dx:4; - uint32_t scoreboard6_dy:4; - uint32_t scoreboard7_dx:4; - uint32_t scoreboard7_dy:4; - } vfe6; -} gen6_vfe_state_inline_t; - -typedef struct gen6_interface_descriptor -{ - struct { - uint32_t pad6:6; - uint32_t kernel_start_pointer:26; - } desc0; - - struct { - uint32_t pad:7; - uint32_t software_exception:1; - uint32_t pad2:3; - uint32_t maskstack_exception:1; - uint32_t pad3:1; - uint32_t illegal_opcode_exception:1; - uint32_t pad4:2; - uint32_t floating_point_mode:1; - uint32_t thread_priority:1; - uint32_t single_program_flow:1; - uint32_t pad5:1; - uint32_t pad6:6; - uint32_t pad7:6; - } desc1; - - struct { - uint32_t pad:2; - uint32_t sampler_count:3; - uint32_t sampler_state_pointer:27; - } desc2; - - struct { - uint32_t binding_table_entry_count:5; /* prefetch entries only */ - uint32_t binding_table_pointer:27; /* 11 bit only on IVB+ */ - } desc3; - - struct { - uint32_t curbe_read_offset:16; /* in GRFs */ - uint32_t curbe_read_len:16; /* in GRFs */ - } desc4; - - struct { - uint32_t group_threads_num:8; /* 0..64, 0 - no barrier use */ - uint32_t barrier_return_byte:8; - uint32_t slm_sz:5; /* 0..16 - 0K..64K */ - uint32_t barrier_enable:1; - uint32_t rounding_mode:2; - uint32_t barrier_return_grf_offset:8; - } desc5; - - struct { - uint32_t reserved_mbz; - } desc6; - - struct { - uint32_t reserved_mbz; - } desc7; -} gen6_interface_descriptor_t; - /* No dependency on Gen specific structures */ struct opaque_sampler_state { char opaque[SAMPLER_STATE_SIZE]; @@ -337,7 +89,7 @@ struct intel_gpgpu }; /* Be sure that the size is still valid */ -STATIC_ASSERT(sizeof(struct opaque_sampler_state) == sizeof(struct i965_sampler_state)); +STATIC_ASSERT(sizeof(struct opaque_sampler_state) == 16);//sizeof(struct i965_sampler_state)); LOCAL intel_gpgpu_t* intel_gpgpu_new(intel_driver_t *drv) @@ -560,11 +312,11 @@ enum GFX3DSTATE_PIPELINED_SUBOPCODE static void gpgpu_pipe_control(intel_gpgpu_t *state) { - BEGIN_BATCH(state->batch, sizeof32(i965_pipe_control_t)); - i965_pipe_control_t* pc = (i965_pipe_control_t*) + BEGIN_BATCH(state->batch, sizeof32(gen6_pipe_control_t)); + gen6_pipe_control_t* pc = (gen6_pipe_control_t*) intel_batchbuffer_alloc_space(state->batch, 0); memset(pc, 0, sizeof(*pc)); - pc->dw0.length = sizeof32(i965_pipe_control_t) - 2; + pc->dw0.length = sizeof32(gen6_pipe_control_t) - 2; pc->dw0.instruction_subopcode = GFX3DSUBOP_3DCONTROL; pc->dw0.instruction_opcode = GFX3DOP_3DCONTROL; pc->dw0.instruction_pipeline = PIPE_3D; @@ -715,7 +467,8 @@ gpgpu_state_init(intel_gpgpu_t *state, dri_bo_unreference(state->sampler_state_b.bo); bo = dri_bo_alloc(state->drv->bufmgr, "sample states", - MAX_SAMPLERS * sizeof(struct i965_sampler_state), + //MAX_SAMPLERS * sizeof(struct i965_sampler_state), + MAX_SAMPLERS * 16, 32); assert(bo); state->sampler_state_b.bo = bo; diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h index f19c1b90..134b2709 100644 --- a/src/intel/intel_structs.h +++ b/src/intel/intel_structs.h @@ -22,142 +22,10 @@ #include <stdint.h> -struct i965_vfe_state -{ - struct { - uint32_t per_thread_scratch_space:4; - uint32_t pad3:3; - uint32_t extend_vfe_state_present:1; - uint32_t pad2:2; - uint32_t scratch_base:22; - } vfe0; - - struct { - uint32_t debug_counter_control:2; - uint32_t children_present:1; - uint32_t vfe_mode:4; - uint32_t pad2:2; - uint32_t num_urb_entries:7; - uint32_t urb_entry_alloc_size:9; - uint32_t max_threads:7; - } vfe1; - - struct { - uint32_t pad4:4; - uint32_t interface_descriptor_base:28; - } vfe2; -}; - -struct i965_vfe_state_ex -{ - struct { - uint32_t pad:8; - uint32_t obj_id:24; - } vfex0; - - struct { - uint32_t residual_grf_offset:5; - uint32_t pad0:3; - uint32_t weight_grf_offset:5; - uint32_t pad1:3; - uint32_t residual_data_offset:8; - uint32_t sub_field_present_flag:2; - uint32_t residual_data_fix_offset:1; - uint32_t pad2:5; - }vfex1; - - struct { - uint32_t remap_index_0:4; - uint32_t remap_index_1:4; - uint32_t remap_index_2:4; - uint32_t remap_index_3:4; - uint32_t remap_index_4:4; - uint32_t remap_index_5:4; - uint32_t remap_index_6:4; - uint32_t remap_index_7:4; - }remap_table0; - - struct { - uint32_t remap_index_8:4; - uint32_t remap_index_9:4; - uint32_t remap_index_10:4; - uint32_t remap_index_11:4; - uint32_t remap_index_12:4; - uint32_t remap_index_13:4; - uint32_t remap_index_14:4; - uint32_t remap_index_15:4; - } remap_table1; - - struct { - uint32_t scoreboard_mask:8; - uint32_t pad:22; - uint32_t type:1; - uint32_t enable:1; - } scoreboard0; - - struct { - uint32_t ignore; - } scoreboard1; - - struct { - uint32_t ignore; - } scoreboard2; - - uint32_t pad; -}; - -struct i965_vld_state +typedef struct gen6_interface_descriptor { struct { uint32_t pad6:6; - uint32_t scan_order:1; - uint32_t intra_vlc_format:1; - uint32_t quantizer_scale_type:1; - uint32_t concealment_motion_vector:1; - uint32_t frame_predict_frame_dct:1; - uint32_t top_field_first:1; - uint32_t picture_structure:2; - uint32_t intra_dc_precision:2; - uint32_t f_code_0_0:4; - uint32_t f_code_0_1:4; - uint32_t f_code_1_0:4; - uint32_t f_code_1_1:4; - } vld0; - - struct { - uint32_t pad2:9; - uint32_t picture_coding_type:2; - uint32_t pad:21; - } vld1; - - struct { - uint32_t index_0:4; - uint32_t index_1:4; - uint32_t index_2:4; - uint32_t index_3:4; - uint32_t index_4:4; - uint32_t index_5:4; - uint32_t index_6:4; - uint32_t index_7:4; - } desc_remap_table0; - - struct { - uint32_t index_8:4; - uint32_t index_9:4; - uint32_t index_10:4; - uint32_t index_11:4; - uint32_t index_12:4; - uint32_t index_13:4; - uint32_t index_14:4; - uint32_t index_15:4; - } desc_remap_table1; -}; - -struct i965_interface_descriptor -{ - struct { - uint32_t grf_reg_blocks:4; - uint32_t pad:2; uint32_t kernel_start_pointer:26; } desc0; @@ -173,8 +41,8 @@ struct i965_interface_descriptor uint32_t thread_priority:1; uint32_t single_program_flow:1; uint32_t pad5:1; - uint32_t const_urb_entry_read_offset:6; - uint32_t const_urb_entry_read_len:6; + uint32_t pad6:6; + uint32_t pad7:6; } desc1; struct { @@ -184,12 +52,34 @@ struct i965_interface_descriptor } desc2; struct { - uint32_t binding_table_entry_count:5; - uint32_t binding_table_pointer:27; + uint32_t binding_table_entry_count:5; /* prefetch entries only */ + uint32_t binding_table_pointer:27; /* 11 bit only on IVB+ */ } desc3; -}; -struct i965_surface_state + struct { + uint32_t curbe_read_offset:16; /* in GRFs */ + uint32_t curbe_read_len:16; /* in GRFs */ + } desc4; + + struct { + uint32_t group_threads_num:8; /* 0..64, 0 - no barrier use */ + uint32_t barrier_return_byte:8; + uint32_t slm_sz:5; /* 0..16 - 0K..64K */ + uint32_t barrier_enable:1; + uint32_t rounding_mode:2; + uint32_t barrier_return_grf_offset:8; + } desc5; + + struct { + uint32_t reserved_mbz; + } desc6; + + struct { + uint32_t reserved_mbz; + } desc7; +} gen6_interface_descriptor_t; + +typedef struct gen6_surface_state { struct { uint32_t cube_pos_z:1; @@ -198,8 +88,9 @@ struct i965_surface_state uint32_t cube_neg_y:1; uint32_t cube_pos_x:1; uint32_t cube_neg_x:1; - uint32_t pad:3; + uint32_t pad:2; uint32_t render_cache_read_mode:1; + uint32_t cube_map_corner_mode:1; uint32_t mipmap_layout_mode:1; uint32_t vert_line_stride_ofs:1; uint32_t vert_line_stride:1; @@ -234,23 +125,155 @@ struct i965_surface_state } ss3; struct { - uint32_t pad:19; - uint32_t min_array_elt:9; + uint32_t multisample_pos_index:3; + uint32_t pad:1; + uint32_t multisample_count:3; + uint32_t pad1:1; + uint32_t rt_view_extent:9; + uint32_t min_array_elt:11; uint32_t min_lod:4; } ss4; struct { - uint32_t pad:20; + uint32_t pad:16; + uint32_t cache_control:2; /* different values for GT and IVB */ + uint32_t gfdt:1; /* allows selective flushing of LLC (e.g. for scanout) */ + uint32_t encrypted_data:1; uint32_t y_offset:4; - uint32_t pad2:1; + uint32_t vertical_alignment:1; uint32_t x_offset:7; } ss5; -}; + + uint32_t ss6; /* unused */ + uint32_t ss7; /* unused */ +} gen6_surface_state_t; + +typedef struct gen7_surface_state +{ + struct { + uint32_t cube_pos_z:1; + uint32_t cube_neg_z:1; + uint32_t cube_pos_y:1; + uint32_t cube_neg_y:1; + uint32_t cube_pos_x:1; + uint32_t cube_neg_x:1; + uint32_t media_boundary_pixel_mode:2; + uint32_t render_cache_rw_mode:1; + uint32_t pad1:1; + uint32_t surface_array_spacing:1; + uint32_t vertical_line_stride_offset:1; + uint32_t vertical_line_stride:1; + uint32_t tile_walk:1; + uint32_t tiled_surface:1; + uint32_t horizontal_alignment:1; + uint32_t vertical_alignment:2; + uint32_t surface_format:9; + uint32_t pad0:1; + uint32_t surface_array:1; + uint32_t surface_type:3; + } ss0; + + struct { + uint32_t base_addr; + } ss1; + + struct { + uint32_t width:14; + uint32_t pad1:2; + uint32_t height:14; + uint32_t pad0:2; + } ss2; + + struct { + uint32_t pitch:18; + uint32_t pad0:3; + uint32_t depth:11; + } ss3; + + uint32_t ss4; + + struct { + uint32_t mip_count:4; + uint32_t surface_min_load:4; + uint32_t pad2:6; + uint32_t coherence_type:1; + uint32_t stateless_force_write_thru:1; + uint32_t surface_object_control_state:4; + uint32_t y_offset:4; + uint32_t pad0:1; + uint32_t x_offset:7; + } ss5; + + uint32_t ss6; /* unused */ + uint32_t ss7; /* unused */ + +} gen7_surface_state_t; + +STATIC_ASSERT(sizeof(gen6_surface_state_t) == sizeof(gen7_surface_state_t)); +static const size_t surface_state_sz = sizeof(gen6_surface_state_t); + +typedef struct gen6_vfe_state_inline +{ + struct { + uint32_t per_thread_scratch_space:4; + uint32_t pad3:3; + uint32_t extend_vfe_state_present:1; + uint32_t pad2:2; + uint32_t scratch_base:22; + } vfe0; + + struct { + uint32_t debug_counter_control:2; + uint32_t gpgpu_mode:1; /* 0 for SNB!!! */ + uint32_t gateway_mmio_access:2; + uint32_t fast_preempt:1; + uint32_t bypass_gateway_ctl:1; /* 0 - legacy, 1 - no open/close */ + uint32_t reset_gateway_timer:1; + uint32_t urb_entries:8; + uint32_t max_threads:16; + } vfe1; + + struct { + uint32_t pad8:8; + uint32_t debug_object_id:24; + } vfe2; + + struct { + uint32_t curbe_size:16; /* in GRFs */ + uint32_t urbe_size:16; /* in GRFs */ + } vfe3; + + struct { + uint32_t scoreboard_mask:32; /* 1 - enable the corresponding dependency */ + } vfe4; + + struct { + uint32_t scoreboard0_dx:4; + uint32_t scoreboard0_dy:4; + uint32_t scoreboard1_dx:4; + uint32_t scoreboard1_dy:4; + uint32_t scoreboard2_dx:4; + uint32_t scoreboard2_dy:4; + uint32_t scoreboard3_dx:4; + uint32_t scoreboard3_dy:4; + } vfe5; + + struct { + uint32_t scoreboard4_dx:4; + uint32_t scoreboard4_dy:4; + uint32_t scoreboard5_dx:4; + uint32_t scoreboard5_dy:4; + uint32_t scoreboard6_dx:4; + uint32_t scoreboard6_dy:4; + uint32_t scoreboard7_dx:4; + uint32_t scoreboard7_dy:4; + } vfe6; +} gen6_vfe_state_inline_t; #define BITFIELD_BIT(X) 1 #define BITFIELD_RANGE(X,Y) ((Y) - (X) + 1) -typedef struct i965_pipe_control +typedef struct gen6_pipe_control { struct { @@ -300,9 +323,9 @@ typedef struct i965_pipe_control { uint64_t data; } qw0; -} i965_pipe_control_t; - -typedef struct i965_sampler_state +} gen6_pipe_control_t; +#if 0 +typedef struct gen6_sampler_state { struct { @@ -349,7 +372,8 @@ typedef struct i965_sampler_state uint32_t monochrome_filter_width:3; uint32_t monochrome_filter_height:3; } ss3; -} i965_sampler_state_t; +} gen6_sampler_state_t; +#endif typedef struct gen7_sampler_state { |