summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2014-12-11 18:45:04 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-12-18 15:42:11 +0800
commitc09acfac5ac91177166d358c45541b6ecc6b2dc1 (patch)
treefae0691a2ff99ddcc5671d2570f6f0e1ed94b541
parent92866a083d19343bfa47463523484286d5143def (diff)
Refactor all image builtin functions.
Refactor almost all the image builtin related functions to simplfy the code and get rid of most of the awful macros. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com> Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
-rw-r--r--backend/src/libocl/src/ocl_image.cl811
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp174
-rw-r--r--backend/src/llvm/llvm_gen_ocl_function.hxx36
-rw-r--r--backend/src/llvm/llvm_scalarize.cpp13
4 files changed, 618 insertions, 416 deletions
diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl
index fd421bf4..95b98ff4 100644
--- a/backend/src/libocl/src/ocl_image.cl
+++ b/backend/src/libocl/src/ocl_image.cl
@@ -20,29 +20,90 @@
#include "ocl_integer.h"
#include "ocl_common.h"
+///////////////////////////////////////////////////////////////////////////////
+// Beignet builtin functions.
+///////////////////////////////////////////////////////////////////////////////
+
// 1D read
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ float u, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ int u, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ float u, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ int u, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ float u, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ int u, uint sampler_offset);
// 2D & 1D Array read
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ float2 coord, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ int2 coord, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ float2 coord, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ int2 coord, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ float2 coord, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ int2 coord, uint sampler_offset);
// 3D & 2D Array read
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ float4 coord, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ int4 coord, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ float4 coord, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ int4 coord, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ float4 coord, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ int4 coord, uint sampler_offset);
+
+// Don't know why we need to support 3 component coordinates, but it's in the old
+// version, let's keep to support it.
+INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ float3 coord, uint sampler_offset)
+{
+ return __gen_ocl_read_imagei(surface_id, sampler,
+ (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
+ int3 coord, uint sampler_offset)
+{
+ return __gen_ocl_read_imagei(surface_id, sampler,
+ (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ float3 coord, uint sampler_offset)
+{
+ return __gen_ocl_read_imageui(surface_id, sampler,
+ (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
+ int3 coord, uint sampler_offset)
+{
+ return __gen_ocl_read_imageui(surface_id, sampler,
+ (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ float3 coord, uint sampler_offset)
+{
+ return __gen_ocl_read_imagef(surface_id, sampler,
+ (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
+INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
+ int3 coord, uint sampler_offset)
+{
+ return __gen_ocl_read_imagef(surface_id, sampler,
+ (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
+}
// 1D write
OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int4 color);
@@ -50,14 +111,27 @@ OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, uint4 color);
OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, float4 color);
// 2D & 1D Array write
-OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int4 color);
-OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, uint4 color);
-OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, float4 color);
+OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int2 coord, int4 color);
+OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int2 coord, uint4 color);
+OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int2 coord, float4 color);
// 3D & 2D Array write
-OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int w, int4 color);
-OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, int w, uint4 color);
-OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, int w, float4 color);
+OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int4 coord, int4 color);
+OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int4 coord, uint4 color);
+OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int4 coord, float4 color);
+
+INLINE_OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int3 coord, int4 color)
+{
+ __gen_ocl_write_imagei(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color);
+}
+INLINE_OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int3 coord, uint4 color)
+{
+ __gen_ocl_write_imageui(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color);
+}
+INLINE_OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int3 coord, float4 color)
+{
+ __gen_ocl_write_imagef(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color);
+}
int __gen_ocl_get_image_width(uint surface_id);
int __gen_ocl_get_image_height(uint surface_id);
@@ -65,225 +139,436 @@ int __gen_ocl_get_image_channel_data_type(uint surface_id);
int __gen_ocl_get_image_channel_order(uint surface_id);
int __gen_ocl_get_image_depth(uint surface_id);
-// 2D 3D Image Common Macro
-#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
-#define GEN_FIX_1 1
-#else
-#define GEN_FIX_1 0
-#endif
#define GET_IMAGE(cl_image, surface_id) \
uint surface_id = (uint)cl_image
-OVERLOADABLE float __gen_compute_array_index(const float index, image1d_array_t image)
+
+///////////////////////////////////////////////////////////////////////////////
+// helper functions to validate array index.
+///////////////////////////////////////////////////////////////////////////////
+INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord, image1d_array_t image)
{
GET_IMAGE(image, surface_id);
float array_size = __gen_ocl_get_image_depth(surface_id);
- return clamp(rint(index), 0.f, array_size - 1.f);
+ coord.s1 = clamp(rint(coord.s1), 0.f, array_size - 1.f);
+ return coord;
}
-OVERLOADABLE float __gen_compute_array_index(float index, image2d_array_t image)
+INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord, image2d_array_t image)
{
GET_IMAGE(image, surface_id);
float array_size = __gen_ocl_get_image_depth(surface_id);
- return clamp(rint(index), 0.f, array_size - 1.f);
+ coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
+ return coord;
}
-OVERLOADABLE int __gen_compute_array_index(int index, image1d_array_t image)
+INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord, image2d_array_t image)
+{
+ GET_IMAGE(image, surface_id);
+ float array_size = __gen_ocl_get_image_depth(surface_id);
+ coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
+ return coord;
+}
+
+INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord, image1d_array_t image)
{
GET_IMAGE(image, surface_id);
int array_size = __gen_ocl_get_image_depth(surface_id);
- return clamp(index, 0, array_size - 1);
+ coord.s1 = clamp(coord.s1, 0, array_size - 1);
+ return coord;
}
-OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t image)
+INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord, image2d_array_t image)
{
GET_IMAGE(image, surface_id);
int array_size = __gen_ocl_get_image_depth(surface_id);
- return clamp(index, 0, array_size - 1);
-}
-
-#define DECL_READ_IMAGE0(int_clamping_fix, \
- image_type, type, suffix, coord_type, n) \
- OVERLOADABLE type read_image ##suffix(image_type cl_image, \
- const sampler_t sampler, \
- coord_type coord) \
- { \
- GET_IMAGE(cl_image, surface_id); \
- GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai); \
- if (int_clamping_fix && \
- ((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) && \
- ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) \
- return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORD(surface_id, sampler, coord)); \
- return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORDF(surface_id, sampler, coord), 0); \
- }
+ coord.s2 = clamp(coord.s2, 0, array_size - 1);
+ return coord;
+}
-#define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix, \
- image_type, type, suffix, coord_type, n) \
- OVERLOADABLE type read_image ##suffix(image_type cl_image, \
- const sampler_t sampler, \
- coord_type coord) \
- { \
- GET_IMAGE(cl_image, surface_id); \
- GET_IMAGE_ARRAY_SIZE(cl_image, coord, float, ai) \
- coord_type tmpCoord = coord; \
- if (float_coord_rounding_fix | int_clamping_fix) { \
- if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) \
- && ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) { \
- if (float_coord_rounding_fix \
- && ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0)) { \
- FIXUP_FLOAT_COORD(tmpCoord); \
- } \
- if (int_clamping_fix) { \
- coord_type intCoord; \
- if (sampler & CLK_NORMALIZED_COORDS_TRUE) { \
- DENORMALIZE_COORD(surface_id, intCoord, tmpCoord); \
- } else \
- intCoord = tmpCoord; \
- return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORDI(surface_id, sampler, intCoord));\
- } \
- } \
- } \
- return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORDF(surface_id, sampler, tmpCoord), 0);\
- }
+INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord, image2d_array_t image)
+{
+ GET_IMAGE(image, surface_id);
+ int array_size = __gen_ocl_get_image_depth(surface_id);
+ coord.s2 = clamp(coord.s2, 0, array_size - 1);
+ return coord;
+}
+
+// For non array image type, we need to do nothing.
+#define GEN_VALIDATE_ARRAY_INDEX(coord_type, image_type) \
+INLINE_OVERLOADABLE coord_type __gen_validate_array_index(coord_type coord, image_type image) \
+{ \
+ return coord; \
+}
+
+GEN_VALIDATE_ARRAY_INDEX(float, image1d_t)
+GEN_VALIDATE_ARRAY_INDEX(int, image1d_t)
+GEN_VALIDATE_ARRAY_INDEX(float2, image2d_t)
+GEN_VALIDATE_ARRAY_INDEX(int2, image2d_t)
+GEN_VALIDATE_ARRAY_INDEX(float4, image3d_t)
+GEN_VALIDATE_ARRAY_INDEX(int4, image3d_t)
+GEN_VALIDATE_ARRAY_INDEX(float3, image3d_t)
+GEN_VALIDATE_ARRAY_INDEX(int3, image3d_t)
+GEN_VALIDATE_ARRAY_INDEX(float, image1d_buffer_t)
+GEN_VALIDATE_ARRAY_INDEX(int, image1d_buffer_t)
+
+///////////////////////////////////////////////////////////////////////////////
+// Helper functions to work around some coordiate boundary issues.
+// The major issue on Gen7/Gen7.5 are the sample message could not sampling
+// integer type surfaces correctly with CLK_ADDRESS_CLAMP and CLK_FILTER_NEAREST.
+// The work around is to use a LD message instead of normal sample message.
+///////////////////////////////////////////////////////////////////////////////
+bool __gen_sampler_need_fix(const sampler_t sampler)
+{
+ return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) &&
+ ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
+}
+
+bool __gen_sampler_need_rounding_fix(const sampler_t sampler)
+{
+ return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
+}
+
+
+INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord)
+{
+ if (tmpCoord < 0 && tmpCoord > -0x1p-20f)
+ tmpCoord += -0x1p-9f;
+ return tmpCoord;
+}
+
+INLINE_OVERLOADABLE float2 __gen_fixup_float_coord(float2 tmpCoord)
+{
+ if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
+ tmpCoord.s0 += -0x1p-9f;
+ if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
+ tmpCoord.s1 += -0x1p-9f;
+ return tmpCoord;
+}
+
+INLINE_OVERLOADABLE float3 __gen_fixup_float_coord(float3 tmpCoord)
+{
+ if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
+ tmpCoord.s0 += -0x1p-9f;
+ if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
+ tmpCoord.s1 += -0x1p-9f;
+ if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f)
+ tmpCoord.s2 += -0x1p-9f;
+ return tmpCoord;
+}
+
+INLINE_OVERLOADABLE float4 __gen_fixup_float_coord(float4 tmpCoord)
+{
+ if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f)
+ tmpCoord.s0 += -0x1p-9f;
+ if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f)
+ tmpCoord.s1 += -0x1p-9f;
+ if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f)
+ tmpCoord.s2 += -0x1p-9f;
+ return tmpCoord;
+}
+
+// Functions to denormalize coordiates, it's needed when we need to use LD
+// message (sampler offset is non-zero) and the coordiates are normalized
+// coordiates.
+INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_t image, float srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ return srcCoord * __gen_ocl_get_image_width(surface_id);
+}
+
+INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image1d_array_t image, float2 srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+ return srcCoord;
+}
+
+INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_buffer_t image, float srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ return srcCoord * __gen_ocl_get_image_width(surface_id);
+}
+
+INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image2d_t image, float2 srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+ return srcCoord;
+}
+
+INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image2d_array_t image, float3 srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+ return srcCoord;
+}
+
+INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image3d_t image, float3 srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+ srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id);
+ return srcCoord;
+}
+
+INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image2d_array_t image, float4 srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+ return srcCoord;
+}
+
+INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image3d_t image, float4 srcCoord)
+{
+ GET_IMAGE(image, surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+ srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id);
+ return srcCoord;
+}
+
+// After denormalize, we have to fixup the negative boundary.
+INLINE_OVERLOADABLE float __gen_fixup_neg_boundary(float coord)
+{
+ return coord < 0 ? -1 : coord;
+}
+
+INLINE_OVERLOADABLE float2 __gen_fixup_neg_boundary(float2 coord)
+{
+ coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
+ coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
+ return coord;
+}
+
+INLINE_OVERLOADABLE float4 __gen_fixup_neg_boundary(float4 coord)
+{
+ coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
+ coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
+ coord.s2 = coord.s2 < 0 ? -1 : coord.s2;
+ return coord;
+}
-#define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type, n) \
- OVERLOADABLE type read_image ##suffix(image_type cl_image, \
- coord_type coord) \
- { \
- GET_IMAGE(cl_image, surface_id); \
- GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai) \
- return __gen_ocl_read_image ##suffix( \
- EXPEND_READ_COORDF(surface_id, \
- CLK_NORMALIZED_COORDS_FALSE \
- | CLK_ADDRESS_NONE \
- | CLK_FILTER_NEAREST, (float)coord), 0); \
+INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
+{
+ coord.s0 = coord.s0 < 0 ? -1 : coord.s0;
+ coord.s1 = coord.s1 < 0 ? -1 : coord.s1;
+ coord.s2 = coord.s2 < 0 ? -1 : coord.s2;
+ return coord;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Built-in Image Read/Write Functions
+///////////////////////////////////////////////////////////////////////////////
+
+// 2D 3D Image Common Macro
+#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
+#define GEN_FIX_FLOAT_ROUNDING 1
+#define GEN_FIX_INT_CLAMPING 1
+#else
+#define GEN_FIX_FLOAT_ROUNDING 0
+#define GEN_FIX_INT_CLAMPING 0
+#endif
+
+// For integer coordinates
+#define DECL_READ_IMAGE0(int_clamping_fix, image_type, \
+ image_data_type, suffix, coord_type) \
+ OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \
+ const sampler_t sampler, \
+ coord_type coord) \
+ { \
+ GET_IMAGE(cl_image, surface_id); \
+ coord = __gen_validate_array_index(coord, cl_image); \
+ if (int_clamping_fix && __gen_sampler_need_fix(sampler)) \
+ return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 1); \
+ return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0); \
}
-#define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \
- OVERLOADABLE void write_image ##suffix(image_type cl_image, coord_type coord, type color)\
- {\
- GET_IMAGE(cl_image, surface_id);\
- __gen_ocl_write_image ##suffix(EXPEND_WRITE_COORD(surface_id, coord, color));\
+// For float coordinates
+#define DECL_READ_IMAGE1(int_clamping_fix, image_type, \
+ image_data_type, suffix, coord_type) \
+ OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \
+ const sampler_t sampler, \
+ coord_type coord) \
+ { \
+ GET_IMAGE(cl_image, surface_id); \
+ coord_type tmpCoord = __gen_validate_array_index(coord, cl_image); \
+ if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) { \
+ if (__gen_sampler_need_fix(sampler)) { \
+ if (GEN_FIX_FLOAT_ROUNDING && \
+ __gen_sampler_need_rounding_fix(sampler)) \
+ tmpCoord = __gen_fixup_float_coord(tmpCoord); \
+ if (int_clamping_fix) { \
+ if (sampler & CLK_NORMALIZED_COORDS_TRUE) \
+ tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \
+ tmpCoord = __gen_fixup_neg_boundary(tmpCoord); \
+ return __gen_ocl_read_image ##suffix( \
+ surface_id, sampler, tmpCoord, 1); \
+ } \
+ } \
+ } \
+ return __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord, 0); \
}
-#define DECL_IMAGE_INFO_COMMON(image_type) \
- OVERLOADABLE int get_image_channel_data_type(image_type image)\
- { \
- GET_IMAGE(image, surface_id);\
- return __gen_ocl_get_image_channel_data_type(surface_id); \
- }\
- OVERLOADABLE int get_image_channel_order(image_type image)\
- { \
- GET_IMAGE(image, surface_id);\
- return __gen_ocl_get_image_channel_order(surface_id); \
- } \
- OVERLOADABLE int get_image_width(image_type image) \
- { \
- GET_IMAGE(image, surface_id); \
- return __gen_ocl_get_image_width(surface_id); \
+#define DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, \
+ suffix, coord_type) \
+ OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \
+ coord_type coord) \
+ { \
+ GET_IMAGE(cl_image, surface_id); \
+ coord = __gen_validate_array_index(coord, cl_image); \
+ return __gen_ocl_read_image ##suffix( \
+ surface_id, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE \
+ | CLK_FILTER_NEAREST, coord, 0); \
}
-// 1D
-#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix) \
- DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int, 1) \
- DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float, 1) \
- DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int, 1) \
- DECL_WRITE_IMAGE(image_type, type, suffix, int) \
- DECL_WRITE_IMAGE(image_type, type, suffix, float)
-
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord, 1
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord), 1
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = srcCoord * __gen_ocl_get_image_width(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord, color
-#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
-
-#define FIXUP_FLOAT_COORD(tmpCoord) \
- { \
- if (tmpCoord < 0 && tmpCoord > -0x1p-20f) \
- tmpCoord += -0x1p-9f; \
+#define DECL_WRITE_IMAGE(image_type, image_data_type, suffix, coord_type) \
+ OVERLOADABLE void write_image ##suffix(image_type cl_image, \
+ coord_type coord, \
+ image_data_type color) \
+ { \
+ GET_IMAGE(cl_image, surface_id); \
+ coord_type fixedCoord = __gen_validate_array_index(coord, cl_image); \
+ __gen_ocl_write_image ##suffix(surface_id, fixedCoord, color); \
}
-DECL_IMAGE(GEN_FIX_1, image1d_t, int4, i)
-DECL_IMAGE(GEN_FIX_1, image1d_t, uint4, ui)
-DECL_IMAGE(0, image1d_t, float4, f)
-DECL_IMAGE(GEN_FIX_1, image1d_buffer_t, int4, i)
-DECL_IMAGE(GEN_FIX_1, image1d_buffer_t, uint4, ui)
-DECL_IMAGE(0, image1d_buffer_t, float4, f)
+#define int1 int
+#define float1 float
-// 1D Info
-DECL_IMAGE_INFO_COMMON(image1d_t)
-DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDF
-#undef EXPEND_READ_COORDI
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-#undef DECL_IMAGE
-// End of 1D
-
-#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix, n) \
- DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int ##n, n) \
- DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float ##n, n) \
- DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n, n) \
- DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) \
- DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
-// 2D
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, 1
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
- (int)(coord.s1 < 0 ? -1 : coord.s1), 1
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
- dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, color
-
-#define FIXUP_FLOAT_COORD(tmpCoord) \
- { \
- if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
- tmpCoord.s0 += -0x1p-9f; \
- if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
- tmpCoord.s1 += -0x1p-9f; \
+#define DECL_IMAGE(int_clamping_fix, image_type, image_data_type, suffix, n) \
+ DECL_READ_IMAGE0(int_clamping_fix, image_type, \
+ image_data_type, suffix, int ##n) \
+ DECL_READ_IMAGE1(int_clamping_fix, image_type, \
+ image_data_type, suffix, float ##n) \
+ DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, suffix, int ##n) \
+ DECL_WRITE_IMAGE(image_type, image_data_type, suffix, int ## n) \
+
+// 1D
+#define DECL_IMAGE_TYPE(image_type, n) \
+ DECL_IMAGE(GEN_FIX_INT_CLAMPING, image_type, int4, i, n) \
+ DECL_IMAGE(GEN_FIX_INT_CLAMPING, image_type, uint4, ui, n) \
+ DECL_IMAGE(0, image_type, float4, f, n)
+
+DECL_IMAGE_TYPE(image1d_t, 1)
+DECL_IMAGE_TYPE(image1d_buffer_t, 1)
+DECL_IMAGE_TYPE(image2d_t, 2)
+DECL_IMAGE_TYPE(image3d_t, 4)
+DECL_IMAGE_TYPE(image3d_t, 3)
+DECL_IMAGE_TYPE(image2d_array_t, 4)
+DECL_IMAGE_TYPE(image2d_array_t, 3)
+
+// For 1D Array:
+// fixup_1darray_coord functions are to convert 1d array coord to 2d array coord
+// and the caller must set the sampler offset to 2 by using this converted coord.
+// It is used to work around an image 1d array restrication which could not set
+// ai in the LD message. We solve it by fake the same image as a 2D array, and
+// then access it by LD message as a 3D sufface, treat the ai as the w coordinate.
+INLINE_OVERLOADABLE float4 __gen_fixup_1darray_coord(float2 coord, image1d_array_t image)
+{
+ float4 newCoord;
+ newCoord.s0 = coord.s0 < 0 ? -1 : coord.s0;
+ newCoord.s1 = 0;
+ newCoord.s2 = coord.s1;
+ newCoord.s3 = 0;
+ return newCoord;
+}
+
+INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord, image1d_array_t image)
+{
+ int4 newCoord;
+ newCoord.s0 = coord.s0;
+ newCoord.s1 = 0;
+ newCoord.s2 = coord.s1;
+ newCoord.s3 = 0;
+ return newCoord;
+}
+
+// For integer coordinates
+#define DECL_READ_IMAGE0_1DArray(int_clamping_fix, \
+ image_data_type, suffix, coord_type) \
+ OVERLOADABLE image_data_type read_image ##suffix(image1d_array_t cl_image, \
+ const sampler_t sampler, \
+ coord_type coord) \
+ { \
+ GET_IMAGE(cl_image, surface_id); \
+ coord = __gen_validate_array_index(coord, cl_image); \
+ if (int_clamping_fix && __gen_sampler_need_fix(sampler)) { \
+ int4 newCoord = __gen_fixup_1darray_coord(coord, cl_image); \
+ return __gen_ocl_read_image ##suffix(surface_id, sampler, newCoord, 2); \
+ } \
+ return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0); \
}
-DECL_IMAGE(GEN_FIX_1, image2d_t, int4, i, 2)
-DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2)
-DECL_IMAGE(0, image2d_t, float4, f, 2)
-
-// 1D Array
-#undef GET_IMAGE_ARRAY_SIZE
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDF
-#undef EXPEND_READ_COORDI
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, (int)0, ai, 2
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)ai
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), 0, (int)ai, 2
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, __gen_compute_array_index(coord.s1, cl_image), color
-#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
- coord_type ai = __gen_compute_array_index(coord.s1, image);
-
-#define FIXUP_FLOAT_COORD(tmpCoord) \
- { \
- if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
- tmpCoord.s0 += -0x1p-9f; \
+// For float coordiates
+#define DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type, \
+ suffix, coord_type) \
+ OVERLOADABLE image_data_type read_image ##suffix(image1d_array_t cl_image, \
+ const sampler_t sampler, \
+ coord_type coord) \
+ { \
+ GET_IMAGE(cl_image, surface_id); \
+ coord_type tmpCoord = __gen_validate_array_index(coord, cl_image); \
+ if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) { \
+ if (__gen_sampler_need_fix(sampler)) { \
+ if (GEN_FIX_FLOAT_ROUNDING && \
+ __gen_sampler_need_rounding_fix(sampler)) \
+ tmpCoord = __gen_fixup_float_coord(tmpCoord); \
+ if (int_clamping_fix) { \
+ if (sampler & CLK_NORMALIZED_COORDS_TRUE) \
+ tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \
+ float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image); \
+ return __gen_ocl_read_image ##suffix( \
+ surface_id, sampler, newCoord, 2); \
+ } \
+ } \
+ } \
+ return __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord, 0); \
}
-DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2)
-DECL_IMAGE(GEN_FIX_1, image1d_array_t, uint4, ui, 2)
-DECL_IMAGE(0, image1d_array_t, float4, f, 2)
+#define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix) \
+ DECL_READ_IMAGE0_1DArray(int_clamping_fix, image_data_type, suffix, int2) \
+ DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type, \
+ suffix, float2) \
+ DECL_READ_IMAGE_NOSAMPLER(image1d_array_t, image_data_type, suffix, int2) \
+ DECL_WRITE_IMAGE(image1d_array_t, image_data_type, suffix, int2) \
+
+DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, int4, i)
+DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, uint4, ui)
+DECL_IMAGE_1DArray(0, float4, f)
+
+///////////////////////////////////////////////////////////////////////////////
+// Built-in Image Query Functions
+///////////////////////////////////////////////////////////////////////////////
+#define DECL_IMAGE_INFO_COMMON(image_type) \
+ OVERLOADABLE int get_image_channel_data_type(image_type image) \
+ { \
+ GET_IMAGE(image, surface_id); \
+ return __gen_ocl_get_image_channel_data_type(surface_id); \
+ } \
+ OVERLOADABLE int get_image_channel_order(image_type image) \
+ { \
+ GET_IMAGE(image, surface_id); \
+ return __gen_ocl_get_image_channel_order(surface_id); \
+ } \
+ OVERLOADABLE int get_image_width(image_type image) \
+ { \
+ GET_IMAGE(image, surface_id); \
+ return __gen_ocl_get_image_width(surface_id); \
+ }
-// 2D Info
+DECL_IMAGE_INFO_COMMON(image1d_t)
+DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
+DECL_IMAGE_INFO_COMMON(image1d_array_t)
DECL_IMAGE_INFO_COMMON(image2d_t)
+DECL_IMAGE_INFO_COMMON(image3d_t)
+DECL_IMAGE_INFO_COMMON(image2d_array_t)
+
+// 2D extra Info
OVERLOADABLE int get_image_height(image2d_t image)
{
GET_IMAGE(image, surface_id);
@@ -293,90 +578,9 @@ OVERLOADABLE int2 get_image_dim(image2d_t image)
{
return (int2){get_image_width(image), get_image_height(image)};
}
+// End of 2D
-// 1D Array info
-DECL_IMAGE_INFO_COMMON(image1d_array_t)
-OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
-{
- GET_IMAGE(image, surface_id);
- return __gen_ocl_get_image_depth(surface_id);
-}
-
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDI
-#undef EXPEND_READ_COORDF
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-#undef GET_IMAGE_ARRAY_SIZE
-// End of 2D and 1D Array
-
-// 3D
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2, 1
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)coord.s2
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
- (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2), 1
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
- dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id); \
- dstCoord.z = srcCoord.z * __gen_ocl_get_image_depth(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, coord.s2, color
-
-#define FIXUP_FLOAT_COORD(tmpCoord) \
- { \
- if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
- tmpCoord.s0 += -0x1p-9f; \
- if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
- tmpCoord.s1 += -0x1p-9f; \
- if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f) \
- tmpCoord.s2 += -0x1p-9f; \
- }
-#define GET_IMAGE_ARRAY_SIZE(a,b,c,d)
-
-DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4)
-DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4)
-DECL_IMAGE(0, image3d_t, float4, f, 4)
-
-DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3)
-DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3)
-DECL_IMAGE(0, image3d_t, float4, f, 3)
-
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDF
-#undef EXPEND_READ_COORDI
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-#undef GET_IMAGE_ARRAY_SIZE
-
-#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, ai, 1
-#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)ai
-#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
- (int)(coord.s1 < 0 ? -1 : coord.s1), (int)ai, 1
-#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \
- dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id);
-#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, __gen_compute_array_index(coord.s2, cl_image), color
-
-#define FIXUP_FLOAT_COORD(tmpCoord) \
- { \
- if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \
- tmpCoord.s0 += -0x1p-9f; \
- if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \
- tmpCoord.s1 += -0x1p-9f; \
- }
-#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \
- coord_type ai = __gen_compute_array_index(coord.s2, image);
-
-// 2D Array
-DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 4)
-DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 4)
-DECL_IMAGE(0, image2d_array_t, float4, f, 4)
-
-DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 3)
-DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 3)
-DECL_IMAGE(0, image2d_array_t, float4, f, 3)
-
-// 3D Info
-DECL_IMAGE_INFO_COMMON(image3d_t)
+// 3D extra Info
OVERLOADABLE int get_image_height(image3d_t image)
{
GET_IMAGE(image, surface_id);
@@ -389,11 +593,13 @@ OVERLOADABLE int get_image_depth(image3d_t image)
}
OVERLOADABLE int4 get_image_dim(image3d_t image)
{
- return (int4){get_image_width(image), get_image_height(image), get_image_depth(image), 0};
+ return (int4) (get_image_width(image),
+ get_image_height(image),
+ get_image_depth(image),
+ 0);
}
-// 2D Array Info
-DECL_IMAGE_INFO_COMMON(image2d_array_t)
+// 2D Array extra Info
OVERLOADABLE int get_image_height(image2d_array_t image)
{
GET_IMAGE(image, surface_id);
@@ -409,21 +615,10 @@ OVERLOADABLE size_t get_image_array_size(image2d_array_t image)
return __gen_ocl_get_image_depth(surface_id);
}
-#undef EXPEND_READ_COORD
-#undef EXPEND_READ_COORDF
-#undef EXPEND_READ_COORDI
-#undef DENORMALIZE_COORD
-#undef EXPEND_WRITE_COORD
-#undef FIXUP_FLOAT_COORD
-#undef GET_IMAGE_ARRAY_SIZE
-// End of 3D and 2D Array
-
-#undef DECL_IMAGE
-#undef DECL_READ_IMAGE
-#undef DECL_READ_IMAGE_NOSAMPLER
-#undef DECL_WRITE_IMAGE
-#undef GEN_FIX_1
-// End of Image
-
-
-#undef GET_IMAGE
+// 1D Array info
+OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
+{
+ GET_IMAGE(image, surface_id);
+ return __gen_ocl_get_image_depth(surface_id);
+}
+// End of 1DArray
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 1ea1f339..167b8f02 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -257,9 +257,10 @@ namespace gbe
/*! Get number of element to process dealing either with a vector or a scalar
* value
*/
- static ir::Type getVectorInfo(ir::Context &ctx, Type *llvmType, Value *value, uint32_t &elemNum, bool useUnsigned = false)
+ static ir::Type getVectorInfo(ir::Context &ctx, Value *value, uint32_t &elemNum, bool useUnsigned = false)
{
ir::Type type;
+ Type *llvmType = value->getType();
if (llvmType->isVectorTy() == true) {
VectorType *vectorType = cast<VectorType>(llvmType);
Type *elementType = vectorType->getElementType();
@@ -629,6 +630,7 @@ namespace gbe
void emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode);
uint8_t appendSampler(CallSite::arg_iterator AI);
+ uint8_t getImageID(CallInst &I);
// These instructions are not supported at all
void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;}
@@ -2507,8 +2509,8 @@ error:
Value *srcValue = I.getOperand(0);
Value *dstValue = &I;
uint32_t srcElemNum = 0, dstElemNum = 0 ;
- ir::Type srcType = getVectorInfo(ctx, srcValue->getType(), srcValue, srcElemNum);
- ir::Type dstType = getVectorInfo(ctx, dstValue->getType(), dstValue, dstElemNum);
+ ir::Type srcType = getVectorInfo(ctx, srcValue, srcElemNum);
+ ir::Type dstType = getVectorInfo(ctx, dstValue, dstElemNum);
// As long and double are not compatible in register storage
// and we do not support double yet, simply put an assert here
GBE_ASSERT(!(srcType == ir::TYPE_S64 && dstType == ir::TYPE_DOUBLE));
@@ -2908,7 +2910,7 @@ error:
{
// dst is a 4 elements vector. We allocate all 4 registers here.
uint32_t elemNum;
- (void)getVectorInfo(ctx, I.getType(), &I, elemNum);
+ (void)getVectorInfo(ctx, &I, elemNum);
GBE_ASSERT(elemNum == 4);
this->newRegister(&I);
break;
@@ -3036,6 +3038,15 @@ error:
return index;
}
+ uint8_t GenWriter::getImageID(CallInst &I) {
+ PtrOrigMapIter iter = pointerOrigMap.find(&I);
+ GBE_ASSERT(iter != pointerOrigMap.end());
+ SmallVectorImpl<Value *> &origins = iter->second;
+ GBE_ASSERT(origins.size() == 1);
+ const ir::Register imageReg = this->getRegister(origins[0]);
+ return ctx.getFunction().getImageSet()->getIdx(imageReg);
+ }
+
void GenWriter::emitCallInst(CallInst &I) {
if (Function *F = I.getCalledFunction()) {
if (F->getIntrinsicID() != 0) {
@@ -3199,7 +3210,6 @@ error:
default: NOT_IMPLEMENTED;
}
} else {
- int image_dim;
// Get the name of the called function and handle it
Value *Callee = I.getCalledValue();
const std::string fnName = Callee->getName();
@@ -3315,13 +3325,13 @@ error:
case GEN_OCL_GET_IMAGE_CHANNEL_DATA_TYPE:
case GEN_OCL_GET_IMAGE_CHANNEL_ORDER:
{
- GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
+ const uint8_t imageID = getImageID(I);
+ GBE_ASSERT(AI != AE); ++AI;
const ir::Register reg = this->getRegister(&I, 0);
int infoType = it->second - GEN_OCL_GET_IMAGE_WIDTH;
- const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
- ir::ImageInfoKey key(surfaceID, infoType);
+ ir::ImageInfoKey key(imageID, infoType);
const ir::Register infoReg = ctx.getFunction().getImageSet()->appendInfo(key, &ctx);
- ctx.GET_IMAGE_INFO(infoType, reg, surfaceID, infoReg);
+ ctx.GET_IMAGE_INFO(infoType, reg, imageID, infoReg);
break;
}
@@ -3331,69 +3341,75 @@ error:
case GEN_OCL_READ_IMAGE_I_1D_I:
case GEN_OCL_READ_IMAGE_UI_1D_I:
case GEN_OCL_READ_IMAGE_F_1D_I:
- image_dim = 1;
- goto handle_read_image;
case GEN_OCL_READ_IMAGE_I_2D:
case GEN_OCL_READ_IMAGE_UI_2D:
case GEN_OCL_READ_IMAGE_F_2D:
case GEN_OCL_READ_IMAGE_I_2D_I:
case GEN_OCL_READ_IMAGE_UI_2D_I:
case GEN_OCL_READ_IMAGE_F_2D_I:
- image_dim = 2;
- goto handle_read_image;
case GEN_OCL_READ_IMAGE_I_3D:
case GEN_OCL_READ_IMAGE_UI_3D:
case GEN_OCL_READ_IMAGE_F_3D:
case GEN_OCL_READ_IMAGE_I_3D_I:
case GEN_OCL_READ_IMAGE_UI_3D_I:
case GEN_OCL_READ_IMAGE_F_3D_I:
- image_dim = 3;
-handle_read_image:
{
- GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
- const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
+ const uint8_t imageID = getImageID(I);
+ GBE_ASSERT(AI != AE); ++AI;
GBE_ASSERT(AI != AE);
const uint8_t sampler = this->appendSampler(AI);
- ++AI;
-
- ir::Register ucoord;
- ir::Register vcoord;
- ir::Register wcoord;
-
- GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI;
- if (image_dim > 1) {
- GBE_ASSERT(AI != AE);
- vcoord = this->getRegister(*AI);
- ++AI;
- } else {
- vcoord = ir::ocl::invalid;
- }
-
- if (image_dim > 2) {
- GBE_ASSERT(AI != AE);
- wcoord = this->getRegister(*AI);
- ++AI;
- } else {
- wcoord = ir::ocl::invalid;
- }
+ ++AI; GBE_ASSERT(AI != AE);
+ uint32_t coordNum;
+ (void)getVectorInfo(ctx, *AI, coordNum);
+ if (coordNum == 4)
+ coordNum = 3;
+ const uint32_t imageDim = coordNum;
+ GBE_ASSERT(imageDim >= 1 && imageDim <= 3);
- vector<ir::Register> dstTupleData, srcTupleData;
- const uint32_t elemNum = 4;
- for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
- const ir::Register reg = this->getRegister(&I, elemID);
- dstTupleData.push_back(reg);
- }
- srcTupleData.push_back(ucoord);
- srcTupleData.push_back(vcoord);
- srcTupleData.push_back(wcoord);
uint8_t samplerOffset = 0;
+ Value *coordVal = *AI;
+ ++AI; GBE_ASSERT(AI != AE);
+ Value *samplerOffsetVal = *AI;
#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
- GBE_ASSERT(AI != AE); Constant *CPV = dyn_cast<Constant>(*AI);
+ Constant *CPV = dyn_cast<Constant>(samplerOffsetVal);
assert(CPV);
const ir::Immediate &x = processConstantImm(CPV);
GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() == ir::TYPE_S32, "Invalid sampler type");
samplerOffset = x.getIntegerValue();
#endif
+ bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
+ bool requiredFloatCoord = samplerOffset == 0;
+
+ vector<ir::Register> dstTupleData, srcTupleData;
+ for (uint32_t elemID = 0; elemID < 3; elemID++) {
+ ir::Register reg;
+
+ if (elemID < imageDim)
+ reg = this->getRegister(coordVal, elemID);
+ else
+ reg = ir::ocl::invalid;
+
+ if (isFloatCoord == requiredFloatCoord)
+ srcTupleData.push_back(reg);
+ else if (!requiredFloatCoord) {
+ ir::Register intCoordReg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
+ ctx.CVT(ir::TYPE_S32, ir::TYPE_FLOAT, intCoordReg, reg);
+ srcTupleData.push_back(intCoordReg);
+ } else {
+ ir::Register floatCoordReg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD);
+ ctx.CVT(ir::TYPE_FLOAT, ir::TYPE_S32, floatCoordReg, reg);
+ srcTupleData.push_back(floatCoordReg);
+ }
+ }
+
+ uint32_t elemNum;
+ (void)getVectorInfo(ctx, &I, elemNum);
+ GBE_ASSERT(elemNum == 4);
+
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ const ir::Register reg = this->getRegister(&I, elemID);
+ dstTupleData.push_back(reg);
+ }
const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum);
const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 3);
@@ -3426,58 +3442,46 @@ handle_read_image:
GBE_ASSERT(0); // never been here.
}
- bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
-
- ctx.SAMPLE(surfaceID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
- isFloatCoord, sampler, samplerOffset);
+ ctx.SAMPLE(imageID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
+ requiredFloatCoord, sampler, samplerOffset);
break;
}
case GEN_OCL_WRITE_IMAGE_I_1D:
case GEN_OCL_WRITE_IMAGE_UI_1D:
case GEN_OCL_WRITE_IMAGE_F_1D:
- image_dim = 1;
- goto handle_write_image;
case GEN_OCL_WRITE_IMAGE_I_2D:
case GEN_OCL_WRITE_IMAGE_UI_2D:
case GEN_OCL_WRITE_IMAGE_F_2D:
- image_dim = 2;
- goto handle_write_image;
case GEN_OCL_WRITE_IMAGE_I_3D:
case GEN_OCL_WRITE_IMAGE_UI_3D:
case GEN_OCL_WRITE_IMAGE_F_3D:
- image_dim = 3;
-handle_write_image:
{
- GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
- const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
- ir::Register ucoord, vcoord, wcoord;
-
- GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI;
+ const uint8_t imageID = getImageID(I);
+ GBE_ASSERT(AI != AE); ++AI; GBE_ASSERT(AI != AE);
+ uint32_t coordNum;
+ (void)getVectorInfo(ctx, *AI, coordNum);
+ if (coordNum == 4)
+ coordNum = 3;
+ const uint32_t imageDim = coordNum;
+ vector<ir::Register> srcTupleData;
+ GBE_ASSERT(imageDim >= 1 && imageDim <= 3);
- if (image_dim > 1) {
- GBE_ASSERT(AI != AE);
- vcoord = this->getRegister(*AI);
- ++AI;
- } else
- vcoord = ir::ocl::invalid;
-
- if (image_dim > 2) {
- GBE_ASSERT(AI != AE);
- wcoord = this->getRegister(*AI);
- ++AI;
- } else {
- wcoord = ir::ocl::invalid;
- }
+ for (uint32_t elemID = 0; elemID < 3; elemID++) {
+ ir::Register reg;
- GBE_ASSERT(AI != AE);
- vector<ir::Register> srcTupleData;
+ if (elemID < imageDim)
+ reg = this->getRegister(*AI, elemID);
+ else
+ reg = ir::ocl::invalid;
- srcTupleData.push_back(ucoord);
- srcTupleData.push_back(vcoord);
- srcTupleData.push_back(wcoord);
+ srcTupleData.push_back(reg);
+ }
+ ++AI; GBE_ASSERT(AI != AE);
+ uint32_t elemNum;
+ (void)getVectorInfo(ctx, *AI, elemNum);
+ GBE_ASSERT(elemNum == 4);
- const uint32_t elemNum = 4;
for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
const ir::Register reg = this->getRegister(*AI, elemID);
srcTupleData.push_back(reg);
@@ -3504,7 +3508,7 @@ handle_write_image:
GBE_ASSERT(0); // never been here.
}
- ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32);
+ ctx.TYPED_WRITE(imageID, srcTuple, srcType, ir::TYPE_U32);
break;
}
case GEN_OCL_MUL_HI_INT:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 7434c783..8d55c3f5 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -49,35 +49,35 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D, _Z21__gen_ocl_read_imageijtfj)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D, _Z22__gen_ocl_read_imageuijtfj)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D, _Z21__gen_ocl_read_imagefjtfj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, _Z21__gen_ocl_read_imageijtffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, _Z22__gen_ocl_read_imageuijtffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, _Z21__gen_ocl_read_imagefjtffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtfffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtfffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtfffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, _Z21__gen_ocl_read_imageijtDv2_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, _Z22__gen_ocl_read_imageuijtDv2_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, _Z21__gen_ocl_read_imagefjtDv2_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtDv4_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtDv4_fj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtDv4_fj)
// work around read image with the LD message. The coords are integer type.
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D_I, _Z21__gen_ocl_read_imageijtij)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D_I, _Z22__gen_ocl_read_imageuijtij)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D_I, _Z21__gen_ocl_read_imagefjtij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, _Z21__gen_ocl_read_imageijtiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, _Z22__gen_ocl_read_imageuijtiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, _Z21__gen_ocl_read_imagefjtiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, _Z21__gen_ocl_read_imageijtiiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, _Z22__gen_ocl_read_imageuijtiiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, _Z21__gen_ocl_read_imagefjtiiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, _Z21__gen_ocl_read_imageijtDv2_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, _Z22__gen_ocl_read_imageuijtDv2_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, _Z21__gen_ocl_read_imagefjtDv2_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, _Z21__gen_ocl_read_imageijtDv4_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, _Z22__gen_ocl_read_imageuijtDv4_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, _Z21__gen_ocl_read_imagefjtDv4_ij)
// To write_image functions.
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_1D, _Z22__gen_ocl_write_imageijiDv4_i)
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_1D, _Z23__gen_ocl_write_imageuijiDv4_j)
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_1D, _Z22__gen_ocl_write_imagefjiDv4_f)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, _Z22__gen_ocl_write_imageijiiDv4_i)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, _Z23__gen_ocl_write_imageuijiiDv4_j)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, _Z22__gen_ocl_write_imagefjiiDv4_f)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, _Z22__gen_ocl_write_imageijDv2_iDv4_i)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, _Z23__gen_ocl_write_imageuijDv2_iDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, _Z22__gen_ocl_write_imagefjDv2_iDv4_f)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, _Z22__gen_ocl_write_imageijiiiDv4_i)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, _Z23__gen_ocl_write_imageuijiiiDv4_j)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D, _Z22__gen_ocl_write_imagefjiiiDv4_f)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, _Z22__gen_ocl_write_imageijDv4_iS_)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, _Z23__gen_ocl_write_imageuijDv4_iDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D, _Z22__gen_ocl_write_imagefjDv4_iDv4_f)
// To get image info function
DECL_LLVM_GEN_FUNCTION(GET_IMAGE_WIDTH, __gen_ocl_get_image_width)
diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
index 5450a2b2..baf526b6 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -648,7 +648,7 @@ namespace gbe {
// Get the function arguments
CallSite CS(call);
- CallSite::arg_iterator CI = CS.arg_begin() + 2;
+ CallSite::arg_iterator CI = CS.arg_begin() + 1;
switch (it->second) {
default: break;
@@ -661,8 +661,7 @@ namespace gbe {
case GEN_OCL_READ_IMAGE_I_3D:
case GEN_OCL_READ_IMAGE_UI_3D:
case GEN_OCL_READ_IMAGE_F_3D:
-
- case GEN_OCL_READ_IMAGE_I_1D_I:
+ case GEN_OCL_READ_IMAGE_I_1D_I:
case GEN_OCL_READ_IMAGE_UI_1D_I:
case GEN_OCL_READ_IMAGE_F_1D_I:
case GEN_OCL_READ_IMAGE_I_2D_I:
@@ -674,6 +673,9 @@ namespace gbe {
case GEN_OCL_GET_IMAGE_WIDTH:
case GEN_OCL_GET_IMAGE_HEIGHT:
{
+ ++CI;
+ if ((*CI)->getType()->isVectorTy())
+ *CI = InsertToVector(call, *CI);
setAppendPoint(call);
extractFromVector(call);
break;
@@ -681,15 +683,16 @@ namespace gbe {
case GEN_OCL_WRITE_IMAGE_I_3D:
case GEN_OCL_WRITE_IMAGE_UI_3D:
case GEN_OCL_WRITE_IMAGE_F_3D:
- CI++;
case GEN_OCL_WRITE_IMAGE_I_2D:
case GEN_OCL_WRITE_IMAGE_UI_2D:
case GEN_OCL_WRITE_IMAGE_F_2D:
- CI++;
case GEN_OCL_WRITE_IMAGE_I_1D:
case GEN_OCL_WRITE_IMAGE_UI_1D:
case GEN_OCL_WRITE_IMAGE_F_1D:
{
+ if ((*CI)->getType()->isVectorTy())
+ *CI = InsertToVector(call, *CI);
+ ++CI;
*CI = InsertToVector(call, *CI);
break;
}