diff options
author | Zhigang Gong <zhigang.gong@intel.com> | 2014-12-11 18:45:04 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-12-18 15:42:11 +0800 |
commit | c09acfac5ac91177166d358c45541b6ecc6b2dc1 (patch) | |
tree | fae0691a2ff99ddcc5671d2570f6f0e1ed94b541 | |
parent | 92866a083d19343bfa47463523484286d5143def (diff) |
Refactor all image builtin functions.
Refactor almost all the image builtin related functions to simplfy the code
and get rid of most of the awful macros.
Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/libocl/src/ocl_image.cl | 811 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.cpp | 174 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_ocl_function.hxx | 36 | ||||
-rw-r--r-- | backend/src/llvm/llvm_scalarize.cpp | 13 |
4 files changed, 618 insertions, 416 deletions
diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl index fd421bf4..95b98ff4 100644 --- a/backend/src/libocl/src/ocl_image.cl +++ b/backend/src/libocl/src/ocl_image.cl @@ -20,29 +20,90 @@ #include "ocl_integer.h" #include "ocl_common.h" +/////////////////////////////////////////////////////////////////////////////// +// Beignet builtin functions. +/////////////////////////////////////////////////////////////////////////////// + // 1D read -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, uint sampler_offset); -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, uint sampler_offset); -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, uint sampler_offset); -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, uint sampler_offset); -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, uint sampler_offset); -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, uint sampler_offset); +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, + float u, uint sampler_offset); +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, + int u, uint sampler_offset); +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, + float u, uint sampler_offset); +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, + int u, uint sampler_offset); +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, + float u, uint sampler_offset); +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, + int u, uint sampler_offset); // 2D & 1D Array read -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset); -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset); -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset); -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset); -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset); -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset); +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, + float2 coord, uint sampler_offset); +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, + int2 coord, uint sampler_offset); +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, + float2 coord, uint sampler_offset); +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, + int2 coord, uint sampler_offset); +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, + float2 coord, uint sampler_offset); +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, + int2 coord, uint sampler_offset); // 3D & 2D Array read -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset); -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset); -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset); -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset); -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset); -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset); +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, + float4 coord, uint sampler_offset); +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, + int4 coord, uint sampler_offset); +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, + float4 coord, uint sampler_offset); +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, + int4 coord, uint sampler_offset); +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, + float4 coord, uint sampler_offset); +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, + int4 coord, uint sampler_offset); + +// Don't know why we need to support 3 component coordinates, but it's in the old +// version, let's keep to support it. +INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, + float3 coord, uint sampler_offset) +{ + return __gen_ocl_read_imagei(surface_id, sampler, + (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); +} +INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, + int3 coord, uint sampler_offset) +{ + return __gen_ocl_read_imagei(surface_id, sampler, + (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); +} +INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, + float3 coord, uint sampler_offset) +{ + return __gen_ocl_read_imageui(surface_id, sampler, + (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); +} +INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, + int3 coord, uint sampler_offset) +{ + return __gen_ocl_read_imageui(surface_id, sampler, + (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); +} +INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, + float3 coord, uint sampler_offset) +{ + return __gen_ocl_read_imagef(surface_id, sampler, + (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); +} +INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, + int3 coord, uint sampler_offset) +{ + return __gen_ocl_read_imagef(surface_id, sampler, + (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); +} // 1D write OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int4 color); @@ -50,14 +111,27 @@ OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, uint4 color); OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, float4 color); // 2D & 1D Array write -OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int4 color); -OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, uint4 color); -OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, float4 color); +OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int2 coord, int4 color); +OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int2 coord, uint4 color); +OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int2 coord, float4 color); // 3D & 2D Array write -OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int w, int4 color); -OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, int w, uint4 color); -OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, int w, float4 color); +OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int4 coord, int4 color); +OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int4 coord, uint4 color); +OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int4 coord, float4 color); + +INLINE_OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int3 coord, int4 color) +{ + __gen_ocl_write_imagei(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color); +} +INLINE_OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int3 coord, uint4 color) +{ + __gen_ocl_write_imageui(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color); +} +INLINE_OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int3 coord, float4 color) +{ + __gen_ocl_write_imagef(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color); +} int __gen_ocl_get_image_width(uint surface_id); int __gen_ocl_get_image_height(uint surface_id); @@ -65,225 +139,436 @@ int __gen_ocl_get_image_channel_data_type(uint surface_id); int __gen_ocl_get_image_channel_order(uint surface_id); int __gen_ocl_get_image_depth(uint surface_id); -// 2D 3D Image Common Macro -#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND -#define GEN_FIX_1 1 -#else -#define GEN_FIX_1 0 -#endif #define GET_IMAGE(cl_image, surface_id) \ uint surface_id = (uint)cl_image -OVERLOADABLE float __gen_compute_array_index(const float index, image1d_array_t image) + +/////////////////////////////////////////////////////////////////////////////// +// helper functions to validate array index. +/////////////////////////////////////////////////////////////////////////////// +INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord, image1d_array_t image) { GET_IMAGE(image, surface_id); float array_size = __gen_ocl_get_image_depth(surface_id); - return clamp(rint(index), 0.f, array_size - 1.f); + coord.s1 = clamp(rint(coord.s1), 0.f, array_size - 1.f); + return coord; } -OVERLOADABLE float __gen_compute_array_index(float index, image2d_array_t image) +INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord, image2d_array_t image) { GET_IMAGE(image, surface_id); float array_size = __gen_ocl_get_image_depth(surface_id); - return clamp(rint(index), 0.f, array_size - 1.f); + coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f); + return coord; } -OVERLOADABLE int __gen_compute_array_index(int index, image1d_array_t image) +INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord, image2d_array_t image) +{ + GET_IMAGE(image, surface_id); + float array_size = __gen_ocl_get_image_depth(surface_id); + coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f); + return coord; +} + +INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord, image1d_array_t image) { GET_IMAGE(image, surface_id); int array_size = __gen_ocl_get_image_depth(surface_id); - return clamp(index, 0, array_size - 1); + coord.s1 = clamp(coord.s1, 0, array_size - 1); + return coord; } -OVERLOADABLE int __gen_compute_array_index(int index, image2d_array_t image) +INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord, image2d_array_t image) { GET_IMAGE(image, surface_id); int array_size = __gen_ocl_get_image_depth(surface_id); - return clamp(index, 0, array_size - 1); -} - -#define DECL_READ_IMAGE0(int_clamping_fix, \ - image_type, type, suffix, coord_type, n) \ - OVERLOADABLE type read_image ##suffix(image_type cl_image, \ - const sampler_t sampler, \ - coord_type coord) \ - { \ - GET_IMAGE(cl_image, surface_id); \ - GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai); \ - if (int_clamping_fix && \ - ((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) && \ - ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) \ - return __gen_ocl_read_image ##suffix( \ - EXPEND_READ_COORD(surface_id, sampler, coord)); \ - return __gen_ocl_read_image ##suffix( \ - EXPEND_READ_COORDF(surface_id, sampler, coord), 0); \ - } + coord.s2 = clamp(coord.s2, 0, array_size - 1); + return coord; +} -#define DECL_READ_IMAGE1(float_coord_rounding_fix, int_clamping_fix, \ - image_type, type, suffix, coord_type, n) \ - OVERLOADABLE type read_image ##suffix(image_type cl_image, \ - const sampler_t sampler, \ - coord_type coord) \ - { \ - GET_IMAGE(cl_image, surface_id); \ - GET_IMAGE_ARRAY_SIZE(cl_image, coord, float, ai) \ - coord_type tmpCoord = coord; \ - if (float_coord_rounding_fix | int_clamping_fix) { \ - if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) \ - && ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) { \ - if (float_coord_rounding_fix \ - && ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0)) { \ - FIXUP_FLOAT_COORD(tmpCoord); \ - } \ - if (int_clamping_fix) { \ - coord_type intCoord; \ - if (sampler & CLK_NORMALIZED_COORDS_TRUE) { \ - DENORMALIZE_COORD(surface_id, intCoord, tmpCoord); \ - } else \ - intCoord = tmpCoord; \ - return __gen_ocl_read_image ##suffix( \ - EXPEND_READ_COORDI(surface_id, sampler, intCoord));\ - } \ - } \ - } \ - return __gen_ocl_read_image ##suffix( \ - EXPEND_READ_COORDF(surface_id, sampler, tmpCoord), 0);\ - } +INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord, image2d_array_t image) +{ + GET_IMAGE(image, surface_id); + int array_size = __gen_ocl_get_image_depth(surface_id); + coord.s2 = clamp(coord.s2, 0, array_size - 1); + return coord; +} + +// For non array image type, we need to do nothing. +#define GEN_VALIDATE_ARRAY_INDEX(coord_type, image_type) \ +INLINE_OVERLOADABLE coord_type __gen_validate_array_index(coord_type coord, image_type image) \ +{ \ + return coord; \ +} + +GEN_VALIDATE_ARRAY_INDEX(float, image1d_t) +GEN_VALIDATE_ARRAY_INDEX(int, image1d_t) +GEN_VALIDATE_ARRAY_INDEX(float2, image2d_t) +GEN_VALIDATE_ARRAY_INDEX(int2, image2d_t) +GEN_VALIDATE_ARRAY_INDEX(float4, image3d_t) +GEN_VALIDATE_ARRAY_INDEX(int4, image3d_t) +GEN_VALIDATE_ARRAY_INDEX(float3, image3d_t) +GEN_VALIDATE_ARRAY_INDEX(int3, image3d_t) +GEN_VALIDATE_ARRAY_INDEX(float, image1d_buffer_t) +GEN_VALIDATE_ARRAY_INDEX(int, image1d_buffer_t) + +/////////////////////////////////////////////////////////////////////////////// +// Helper functions to work around some coordiate boundary issues. +// The major issue on Gen7/Gen7.5 are the sample message could not sampling +// integer type surfaces correctly with CLK_ADDRESS_CLAMP and CLK_FILTER_NEAREST. +// The work around is to use a LD message instead of normal sample message. +/////////////////////////////////////////////////////////////////////////////// +bool __gen_sampler_need_fix(const sampler_t sampler) +{ + return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) && + ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)); +} + +bool __gen_sampler_need_rounding_fix(const sampler_t sampler) +{ + return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0); +} + + +INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord) +{ + if (tmpCoord < 0 && tmpCoord > -0x1p-20f) + tmpCoord += -0x1p-9f; + return tmpCoord; +} + +INLINE_OVERLOADABLE float2 __gen_fixup_float_coord(float2 tmpCoord) +{ + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) + tmpCoord.s0 += -0x1p-9f; + if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) + tmpCoord.s1 += -0x1p-9f; + return tmpCoord; +} + +INLINE_OVERLOADABLE float3 __gen_fixup_float_coord(float3 tmpCoord) +{ + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) + tmpCoord.s0 += -0x1p-9f; + if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) + tmpCoord.s1 += -0x1p-9f; + if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f) + tmpCoord.s2 += -0x1p-9f; + return tmpCoord; +} + +INLINE_OVERLOADABLE float4 __gen_fixup_float_coord(float4 tmpCoord) +{ + if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) + tmpCoord.s0 += -0x1p-9f; + if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) + tmpCoord.s1 += -0x1p-9f; + if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f) + tmpCoord.s2 += -0x1p-9f; + return tmpCoord; +} + +// Functions to denormalize coordiates, it's needed when we need to use LD +// message (sampler offset is non-zero) and the coordiates are normalized +// coordiates. +INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_t image, float srcCoord) +{ + GET_IMAGE(image, surface_id); + return srcCoord * __gen_ocl_get_image_width(surface_id); +} + +INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image1d_array_t image, float2 srcCoord) +{ + GET_IMAGE(image, surface_id); + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); + return srcCoord; +} + +INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_buffer_t image, float srcCoord) +{ + GET_IMAGE(image, surface_id); + return srcCoord * __gen_ocl_get_image_width(surface_id); +} + +INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image2d_t image, float2 srcCoord) +{ + GET_IMAGE(image, surface_id); + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); + return srcCoord; +} + +INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image2d_array_t image, float3 srcCoord) +{ + GET_IMAGE(image, surface_id); + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); + return srcCoord; +} + +INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image3d_t image, float3 srcCoord) +{ + GET_IMAGE(image, surface_id); + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); + srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id); + return srcCoord; +} + +INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image2d_array_t image, float4 srcCoord) +{ + GET_IMAGE(image, surface_id); + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); + return srcCoord; +} + +INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image3d_t image, float4 srcCoord) +{ + GET_IMAGE(image, surface_id); + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); + srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id); + return srcCoord; +} + +// After denormalize, we have to fixup the negative boundary. +INLINE_OVERLOADABLE float __gen_fixup_neg_boundary(float coord) +{ + return coord < 0 ? -1 : coord; +} + +INLINE_OVERLOADABLE float2 __gen_fixup_neg_boundary(float2 coord) +{ + coord.s0 = coord.s0 < 0 ? -1 : coord.s0; + coord.s1 = coord.s1 < 0 ? -1 : coord.s1; + return coord; +} + +INLINE_OVERLOADABLE float4 __gen_fixup_neg_boundary(float4 coord) +{ + coord.s0 = coord.s0 < 0 ? -1 : coord.s0; + coord.s1 = coord.s1 < 0 ? -1 : coord.s1; + coord.s2 = coord.s2 < 0 ? -1 : coord.s2; + return coord; +} -#define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type, n) \ - OVERLOADABLE type read_image ##suffix(image_type cl_image, \ - coord_type coord) \ - { \ - GET_IMAGE(cl_image, surface_id); \ - GET_IMAGE_ARRAY_SIZE(cl_image, coord, int, ai) \ - return __gen_ocl_read_image ##suffix( \ - EXPEND_READ_COORDF(surface_id, \ - CLK_NORMALIZED_COORDS_FALSE \ - | CLK_ADDRESS_NONE \ - | CLK_FILTER_NEAREST, (float)coord), 0); \ +INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord) +{ + coord.s0 = coord.s0 < 0 ? -1 : coord.s0; + coord.s1 = coord.s1 < 0 ? -1 : coord.s1; + coord.s2 = coord.s2 < 0 ? -1 : coord.s2; + return coord; +} + +/////////////////////////////////////////////////////////////////////////////// +// Built-in Image Read/Write Functions +/////////////////////////////////////////////////////////////////////////////// + +// 2D 3D Image Common Macro +#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND +#define GEN_FIX_FLOAT_ROUNDING 1 +#define GEN_FIX_INT_CLAMPING 1 +#else +#define GEN_FIX_FLOAT_ROUNDING 0 +#define GEN_FIX_INT_CLAMPING 0 +#endif + +// For integer coordinates +#define DECL_READ_IMAGE0(int_clamping_fix, image_type, \ + image_data_type, suffix, coord_type) \ + OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \ + const sampler_t sampler, \ + coord_type coord) \ + { \ + GET_IMAGE(cl_image, surface_id); \ + coord = __gen_validate_array_index(coord, cl_image); \ + if (int_clamping_fix && __gen_sampler_need_fix(sampler)) \ + return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 1); \ + return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0); \ } -#define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \ - OVERLOADABLE void write_image ##suffix(image_type cl_image, coord_type coord, type color)\ - {\ - GET_IMAGE(cl_image, surface_id);\ - __gen_ocl_write_image ##suffix(EXPEND_WRITE_COORD(surface_id, coord, color));\ +// For float coordinates +#define DECL_READ_IMAGE1(int_clamping_fix, image_type, \ + image_data_type, suffix, coord_type) \ + OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \ + const sampler_t sampler, \ + coord_type coord) \ + { \ + GET_IMAGE(cl_image, surface_id); \ + coord_type tmpCoord = __gen_validate_array_index(coord, cl_image); \ + if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) { \ + if (__gen_sampler_need_fix(sampler)) { \ + if (GEN_FIX_FLOAT_ROUNDING && \ + __gen_sampler_need_rounding_fix(sampler)) \ + tmpCoord = __gen_fixup_float_coord(tmpCoord); \ + if (int_clamping_fix) { \ + if (sampler & CLK_NORMALIZED_COORDS_TRUE) \ + tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \ + tmpCoord = __gen_fixup_neg_boundary(tmpCoord); \ + return __gen_ocl_read_image ##suffix( \ + surface_id, sampler, tmpCoord, 1); \ + } \ + } \ + } \ + return __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord, 0); \ } -#define DECL_IMAGE_INFO_COMMON(image_type) \ - OVERLOADABLE int get_image_channel_data_type(image_type image)\ - { \ - GET_IMAGE(image, surface_id);\ - return __gen_ocl_get_image_channel_data_type(surface_id); \ - }\ - OVERLOADABLE int get_image_channel_order(image_type image)\ - { \ - GET_IMAGE(image, surface_id);\ - return __gen_ocl_get_image_channel_order(surface_id); \ - } \ - OVERLOADABLE int get_image_width(image_type image) \ - { \ - GET_IMAGE(image, surface_id); \ - return __gen_ocl_get_image_width(surface_id); \ +#define DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, \ + suffix, coord_type) \ + OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \ + coord_type coord) \ + { \ + GET_IMAGE(cl_image, surface_id); \ + coord = __gen_validate_array_index(coord, cl_image); \ + return __gen_ocl_read_image ##suffix( \ + surface_id, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE \ + | CLK_FILTER_NEAREST, coord, 0); \ } -// 1D -#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix) \ - DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int, 1) \ - DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float, 1) \ - DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int, 1) \ - DECL_WRITE_IMAGE(image_type, type, suffix, int) \ - DECL_WRITE_IMAGE(image_type, type, suffix, float) - -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord, 1 -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord), 1 -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = srcCoord * __gen_ocl_get_image_width(id); -#define EXPEND_WRITE_COORD(id, coord, color) id, coord, color -#define GET_IMAGE_ARRAY_SIZE(a,b,c,d) - -#define FIXUP_FLOAT_COORD(tmpCoord) \ - { \ - if (tmpCoord < 0 && tmpCoord > -0x1p-20f) \ - tmpCoord += -0x1p-9f; \ +#define DECL_WRITE_IMAGE(image_type, image_data_type, suffix, coord_type) \ + OVERLOADABLE void write_image ##suffix(image_type cl_image, \ + coord_type coord, \ + image_data_type color) \ + { \ + GET_IMAGE(cl_image, surface_id); \ + coord_type fixedCoord = __gen_validate_array_index(coord, cl_image); \ + __gen_ocl_write_image ##suffix(surface_id, fixedCoord, color); \ } -DECL_IMAGE(GEN_FIX_1, image1d_t, int4, i) -DECL_IMAGE(GEN_FIX_1, image1d_t, uint4, ui) -DECL_IMAGE(0, image1d_t, float4, f) -DECL_IMAGE(GEN_FIX_1, image1d_buffer_t, int4, i) -DECL_IMAGE(GEN_FIX_1, image1d_buffer_t, uint4, ui) -DECL_IMAGE(0, image1d_buffer_t, float4, f) +#define int1 int +#define float1 float -// 1D Info -DECL_IMAGE_INFO_COMMON(image1d_t) -DECL_IMAGE_INFO_COMMON(image1d_buffer_t) -#undef EXPEND_READ_COORD -#undef EXPEND_READ_COORDF -#undef EXPEND_READ_COORDI -#undef DENORMALIZE_COORD -#undef EXPEND_WRITE_COORD -#undef FIXUP_FLOAT_COORD -#undef DECL_IMAGE -// End of 1D - -#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix, n) \ - DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int ##n, n) \ - DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float ##n, n) \ - DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n, n) \ - DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) \ - DECL_WRITE_IMAGE(image_type, type, suffix, float ## n) -// 2D -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, 1 -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1 -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \ - (int)(coord.s1 < 0 ? -1 : coord.s1), 1 -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \ - dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id); -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, color - -#define FIXUP_FLOAT_COORD(tmpCoord) \ - { \ - if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \ - tmpCoord.s0 += -0x1p-9f; \ - if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \ - tmpCoord.s1 += -0x1p-9f; \ +#define DECL_IMAGE(int_clamping_fix, image_type, image_data_type, suffix, n) \ + DECL_READ_IMAGE0(int_clamping_fix, image_type, \ + image_data_type, suffix, int ##n) \ + DECL_READ_IMAGE1(int_clamping_fix, image_type, \ + image_data_type, suffix, float ##n) \ + DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, suffix, int ##n) \ + DECL_WRITE_IMAGE(image_type, image_data_type, suffix, int ## n) \ + +// 1D +#define DECL_IMAGE_TYPE(image_type, n) \ + DECL_IMAGE(GEN_FIX_INT_CLAMPING, image_type, int4, i, n) \ + DECL_IMAGE(GEN_FIX_INT_CLAMPING, image_type, uint4, ui, n) \ + DECL_IMAGE(0, image_type, float4, f, n) + +DECL_IMAGE_TYPE(image1d_t, 1) +DECL_IMAGE_TYPE(image1d_buffer_t, 1) +DECL_IMAGE_TYPE(image2d_t, 2) +DECL_IMAGE_TYPE(image3d_t, 4) +DECL_IMAGE_TYPE(image3d_t, 3) +DECL_IMAGE_TYPE(image2d_array_t, 4) +DECL_IMAGE_TYPE(image2d_array_t, 3) + +// For 1D Array: +// fixup_1darray_coord functions are to convert 1d array coord to 2d array coord +// and the caller must set the sampler offset to 2 by using this converted coord. +// It is used to work around an image 1d array restrication which could not set +// ai in the LD message. We solve it by fake the same image as a 2D array, and +// then access it by LD message as a 3D sufface, treat the ai as the w coordinate. +INLINE_OVERLOADABLE float4 __gen_fixup_1darray_coord(float2 coord, image1d_array_t image) +{ + float4 newCoord; + newCoord.s0 = coord.s0 < 0 ? -1 : coord.s0; + newCoord.s1 = 0; + newCoord.s2 = coord.s1; + newCoord.s3 = 0; + return newCoord; +} + +INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord, image1d_array_t image) +{ + int4 newCoord; + newCoord.s0 = coord.s0; + newCoord.s1 = 0; + newCoord.s2 = coord.s1; + newCoord.s3 = 0; + return newCoord; +} + +// For integer coordinates +#define DECL_READ_IMAGE0_1DArray(int_clamping_fix, \ + image_data_type, suffix, coord_type) \ + OVERLOADABLE image_data_type read_image ##suffix(image1d_array_t cl_image, \ + const sampler_t sampler, \ + coord_type coord) \ + { \ + GET_IMAGE(cl_image, surface_id); \ + coord = __gen_validate_array_index(coord, cl_image); \ + if (int_clamping_fix && __gen_sampler_need_fix(sampler)) { \ + int4 newCoord = __gen_fixup_1darray_coord(coord, cl_image); \ + return __gen_ocl_read_image ##suffix(surface_id, sampler, newCoord, 2); \ + } \ + return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0); \ } -DECL_IMAGE(GEN_FIX_1, image2d_t, int4, i, 2) -DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2) -DECL_IMAGE(0, image2d_t, float4, f, 2) - -// 1D Array -#undef GET_IMAGE_ARRAY_SIZE -#undef EXPEND_READ_COORD -#undef EXPEND_READ_COORDF -#undef EXPEND_READ_COORDI -#undef DENORMALIZE_COORD -#undef EXPEND_WRITE_COORD -#undef FIXUP_FLOAT_COORD - -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, (int)0, ai, 2 -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)ai -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), 0, (int)ai, 2 -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, __gen_compute_array_index(coord.s1, cl_image), color -#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \ - coord_type ai = __gen_compute_array_index(coord.s1, image); - -#define FIXUP_FLOAT_COORD(tmpCoord) \ - { \ - if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \ - tmpCoord.s0 += -0x1p-9f; \ +// For float coordiates +#define DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type, \ + suffix, coord_type) \ + OVERLOADABLE image_data_type read_image ##suffix(image1d_array_t cl_image, \ + const sampler_t sampler, \ + coord_type coord) \ + { \ + GET_IMAGE(cl_image, surface_id); \ + coord_type tmpCoord = __gen_validate_array_index(coord, cl_image); \ + if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) { \ + if (__gen_sampler_need_fix(sampler)) { \ + if (GEN_FIX_FLOAT_ROUNDING && \ + __gen_sampler_need_rounding_fix(sampler)) \ + tmpCoord = __gen_fixup_float_coord(tmpCoord); \ + if (int_clamping_fix) { \ + if (sampler & CLK_NORMALIZED_COORDS_TRUE) \ + tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \ + float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image); \ + return __gen_ocl_read_image ##suffix( \ + surface_id, sampler, newCoord, 2); \ + } \ + } \ + } \ + return __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord, 0); \ } -DECL_IMAGE(GEN_FIX_1, image1d_array_t, int4, i, 2) -DECL_IMAGE(GEN_FIX_1, image1d_array_t, uint4, ui, 2) -DECL_IMAGE(0, image1d_array_t, float4, f, 2) +#define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix) \ + DECL_READ_IMAGE0_1DArray(int_clamping_fix, image_data_type, suffix, int2) \ + DECL_READ_IMAGE1_1DArray(int_clamping_fix, image_data_type, \ + suffix, float2) \ + DECL_READ_IMAGE_NOSAMPLER(image1d_array_t, image_data_type, suffix, int2) \ + DECL_WRITE_IMAGE(image1d_array_t, image_data_type, suffix, int2) \ + +DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, int4, i) +DECL_IMAGE_1DArray(GEN_FIX_INT_CLAMPING, uint4, ui) +DECL_IMAGE_1DArray(0, float4, f) + +/////////////////////////////////////////////////////////////////////////////// +// Built-in Image Query Functions +/////////////////////////////////////////////////////////////////////////////// +#define DECL_IMAGE_INFO_COMMON(image_type) \ + OVERLOADABLE int get_image_channel_data_type(image_type image) \ + { \ + GET_IMAGE(image, surface_id); \ + return __gen_ocl_get_image_channel_data_type(surface_id); \ + } \ + OVERLOADABLE int get_image_channel_order(image_type image) \ + { \ + GET_IMAGE(image, surface_id); \ + return __gen_ocl_get_image_channel_order(surface_id); \ + } \ + OVERLOADABLE int get_image_width(image_type image) \ + { \ + GET_IMAGE(image, surface_id); \ + return __gen_ocl_get_image_width(surface_id); \ + } -// 2D Info +DECL_IMAGE_INFO_COMMON(image1d_t) +DECL_IMAGE_INFO_COMMON(image1d_buffer_t) +DECL_IMAGE_INFO_COMMON(image1d_array_t) DECL_IMAGE_INFO_COMMON(image2d_t) +DECL_IMAGE_INFO_COMMON(image3d_t) +DECL_IMAGE_INFO_COMMON(image2d_array_t) + +// 2D extra Info OVERLOADABLE int get_image_height(image2d_t image) { GET_IMAGE(image, surface_id); @@ -293,90 +578,9 @@ OVERLOADABLE int2 get_image_dim(image2d_t image) { return (int2){get_image_width(image), get_image_height(image)}; } +// End of 2D -// 1D Array info -DECL_IMAGE_INFO_COMMON(image1d_array_t) -OVERLOADABLE size_t get_image_array_size(image1d_array_t image) -{ - GET_IMAGE(image, surface_id); - return __gen_ocl_get_image_depth(surface_id); -} - -#undef EXPEND_READ_COORD -#undef EXPEND_READ_COORDI -#undef EXPEND_READ_COORDF -#undef DENORMALIZE_COORD -#undef EXPEND_WRITE_COORD -#undef FIXUP_FLOAT_COORD -#undef GET_IMAGE_ARRAY_SIZE -// End of 2D and 1D Array - -// 3D -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2, 1 -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)coord.s2 -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \ - (int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2), 1 -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \ - dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id); \ - dstCoord.z = srcCoord.z * __gen_ocl_get_image_depth(id); -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, coord.s2, color - -#define FIXUP_FLOAT_COORD(tmpCoord) \ - { \ - if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \ - tmpCoord.s0 += -0x1p-9f; \ - if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \ - tmpCoord.s1 += -0x1p-9f; \ - if (tmpCoord.s2 < 0 && tmpCoord.s2 > -0x1p-20f) \ - tmpCoord.s2 += -0x1p-9f; \ - } -#define GET_IMAGE_ARRAY_SIZE(a,b,c,d) - -DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 4) -DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 4) -DECL_IMAGE(0, image3d_t, float4, f, 4) - -DECL_IMAGE(GEN_FIX_1, image3d_t, int4, i, 3) -DECL_IMAGE(GEN_FIX_1, image3d_t, uint4, ui, 3) -DECL_IMAGE(0, image3d_t, float4, f, 3) - -#undef EXPEND_READ_COORD -#undef EXPEND_READ_COORDF -#undef EXPEND_READ_COORDI -#undef DENORMALIZE_COORD -#undef EXPEND_WRITE_COORD -#undef FIXUP_FLOAT_COORD -#undef GET_IMAGE_ARRAY_SIZE - -#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, ai, 1 -#define EXPEND_READ_COORDF(id, sampler, coord) id, sampler, (float)coord.s0, (float)coord.s1, (float)ai -#define EXPEND_READ_COORDI(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \ - (int)(coord.s1 < 0 ? -1 : coord.s1), (int)ai, 1 -#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord.x = srcCoord.x * __gen_ocl_get_image_width(id); \ - dstCoord.y = srcCoord.y * __gen_ocl_get_image_height(id); -#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, __gen_compute_array_index(coord.s2, cl_image), color - -#define FIXUP_FLOAT_COORD(tmpCoord) \ - { \ - if (tmpCoord.s0 < 0 && tmpCoord.s0 > -0x1p-20f) \ - tmpCoord.s0 += -0x1p-9f; \ - if (tmpCoord.s1 < 0 && tmpCoord.s1 > -0x1p-20f) \ - tmpCoord.s1 += -0x1p-9f; \ - } -#define GET_IMAGE_ARRAY_SIZE(image, coord, coord_type, ai) \ - coord_type ai = __gen_compute_array_index(coord.s2, image); - -// 2D Array -DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 4) -DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 4) -DECL_IMAGE(0, image2d_array_t, float4, f, 4) - -DECL_IMAGE(GEN_FIX_1, image2d_array_t, int4, i, 3) -DECL_IMAGE(GEN_FIX_1, image2d_array_t, uint4, ui, 3) -DECL_IMAGE(0, image2d_array_t, float4, f, 3) - -// 3D Info -DECL_IMAGE_INFO_COMMON(image3d_t) +// 3D extra Info OVERLOADABLE int get_image_height(image3d_t image) { GET_IMAGE(image, surface_id); @@ -389,11 +593,13 @@ OVERLOADABLE int get_image_depth(image3d_t image) } OVERLOADABLE int4 get_image_dim(image3d_t image) { - return (int4){get_image_width(image), get_image_height(image), get_image_depth(image), 0}; + return (int4) (get_image_width(image), + get_image_height(image), + get_image_depth(image), + 0); } -// 2D Array Info -DECL_IMAGE_INFO_COMMON(image2d_array_t) +// 2D Array extra Info OVERLOADABLE int get_image_height(image2d_array_t image) { GET_IMAGE(image, surface_id); @@ -409,21 +615,10 @@ OVERLOADABLE size_t get_image_array_size(image2d_array_t image) return __gen_ocl_get_image_depth(surface_id); } -#undef EXPEND_READ_COORD -#undef EXPEND_READ_COORDF -#undef EXPEND_READ_COORDI -#undef DENORMALIZE_COORD -#undef EXPEND_WRITE_COORD -#undef FIXUP_FLOAT_COORD -#undef GET_IMAGE_ARRAY_SIZE -// End of 3D and 2D Array - -#undef DECL_IMAGE -#undef DECL_READ_IMAGE -#undef DECL_READ_IMAGE_NOSAMPLER -#undef DECL_WRITE_IMAGE -#undef GEN_FIX_1 -// End of Image - - -#undef GET_IMAGE +// 1D Array info +OVERLOADABLE size_t get_image_array_size(image1d_array_t image) +{ + GET_IMAGE(image, surface_id); + return __gen_ocl_get_image_depth(surface_id); +} +// End of 1DArray diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 1ea1f339..167b8f02 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -257,9 +257,10 @@ namespace gbe /*! Get number of element to process dealing either with a vector or a scalar * value */ - static ir::Type getVectorInfo(ir::Context &ctx, Type *llvmType, Value *value, uint32_t &elemNum, bool useUnsigned = false) + static ir::Type getVectorInfo(ir::Context &ctx, Value *value, uint32_t &elemNum, bool useUnsigned = false) { ir::Type type; + Type *llvmType = value->getType(); if (llvmType->isVectorTy() == true) { VectorType *vectorType = cast<VectorType>(llvmType); Type *elementType = vectorType->getElementType(); @@ -629,6 +630,7 @@ namespace gbe void emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode); uint8_t appendSampler(CallSite::arg_iterator AI); + uint8_t getImageID(CallInst &I); // These instructions are not supported at all void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;} @@ -2507,8 +2509,8 @@ error: Value *srcValue = I.getOperand(0); Value *dstValue = &I; uint32_t srcElemNum = 0, dstElemNum = 0 ; - ir::Type srcType = getVectorInfo(ctx, srcValue->getType(), srcValue, srcElemNum); - ir::Type dstType = getVectorInfo(ctx, dstValue->getType(), dstValue, dstElemNum); + ir::Type srcType = getVectorInfo(ctx, srcValue, srcElemNum); + ir::Type dstType = getVectorInfo(ctx, dstValue, dstElemNum); // As long and double are not compatible in register storage // and we do not support double yet, simply put an assert here GBE_ASSERT(!(srcType == ir::TYPE_S64 && dstType == ir::TYPE_DOUBLE)); @@ -2908,7 +2910,7 @@ error: { // dst is a 4 elements vector. We allocate all 4 registers here. uint32_t elemNum; - (void)getVectorInfo(ctx, I.getType(), &I, elemNum); + (void)getVectorInfo(ctx, &I, elemNum); GBE_ASSERT(elemNum == 4); this->newRegister(&I); break; @@ -3036,6 +3038,15 @@ error: return index; } + uint8_t GenWriter::getImageID(CallInst &I) { + PtrOrigMapIter iter = pointerOrigMap.find(&I); + GBE_ASSERT(iter != pointerOrigMap.end()); + SmallVectorImpl<Value *> &origins = iter->second; + GBE_ASSERT(origins.size() == 1); + const ir::Register imageReg = this->getRegister(origins[0]); + return ctx.getFunction().getImageSet()->getIdx(imageReg); + } + void GenWriter::emitCallInst(CallInst &I) { if (Function *F = I.getCalledFunction()) { if (F->getIntrinsicID() != 0) { @@ -3199,7 +3210,6 @@ error: default: NOT_IMPLEMENTED; } } else { - int image_dim; // Get the name of the called function and handle it Value *Callee = I.getCalledValue(); const std::string fnName = Callee->getName(); @@ -3315,13 +3325,13 @@ error: case GEN_OCL_GET_IMAGE_CHANNEL_DATA_TYPE: case GEN_OCL_GET_IMAGE_CHANNEL_ORDER: { - GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI; + const uint8_t imageID = getImageID(I); + GBE_ASSERT(AI != AE); ++AI; const ir::Register reg = this->getRegister(&I, 0); int infoType = it->second - GEN_OCL_GET_IMAGE_WIDTH; - const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg); - ir::ImageInfoKey key(surfaceID, infoType); + ir::ImageInfoKey key(imageID, infoType); const ir::Register infoReg = ctx.getFunction().getImageSet()->appendInfo(key, &ctx); - ctx.GET_IMAGE_INFO(infoType, reg, surfaceID, infoReg); + ctx.GET_IMAGE_INFO(infoType, reg, imageID, infoReg); break; } @@ -3331,69 +3341,75 @@ error: case GEN_OCL_READ_IMAGE_I_1D_I: case GEN_OCL_READ_IMAGE_UI_1D_I: case GEN_OCL_READ_IMAGE_F_1D_I: - image_dim = 1; - goto handle_read_image; case GEN_OCL_READ_IMAGE_I_2D: case GEN_OCL_READ_IMAGE_UI_2D: case GEN_OCL_READ_IMAGE_F_2D: case GEN_OCL_READ_IMAGE_I_2D_I: case GEN_OCL_READ_IMAGE_UI_2D_I: case GEN_OCL_READ_IMAGE_F_2D_I: - image_dim = 2; - goto handle_read_image; case GEN_OCL_READ_IMAGE_I_3D: case GEN_OCL_READ_IMAGE_UI_3D: case GEN_OCL_READ_IMAGE_F_3D: case GEN_OCL_READ_IMAGE_I_3D_I: case GEN_OCL_READ_IMAGE_UI_3D_I: case GEN_OCL_READ_IMAGE_F_3D_I: - image_dim = 3; -handle_read_image: { - GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI; - const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg); + const uint8_t imageID = getImageID(I); + GBE_ASSERT(AI != AE); ++AI; GBE_ASSERT(AI != AE); const uint8_t sampler = this->appendSampler(AI); - ++AI; - - ir::Register ucoord; - ir::Register vcoord; - ir::Register wcoord; - - GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI; - if (image_dim > 1) { - GBE_ASSERT(AI != AE); - vcoord = this->getRegister(*AI); - ++AI; - } else { - vcoord = ir::ocl::invalid; - } - - if (image_dim > 2) { - GBE_ASSERT(AI != AE); - wcoord = this->getRegister(*AI); - ++AI; - } else { - wcoord = ir::ocl::invalid; - } + ++AI; GBE_ASSERT(AI != AE); + uint32_t coordNum; + (void)getVectorInfo(ctx, *AI, coordNum); + if (coordNum == 4) + coordNum = 3; + const uint32_t imageDim = coordNum; + GBE_ASSERT(imageDim >= 1 && imageDim <= 3); - vector<ir::Register> dstTupleData, srcTupleData; - const uint32_t elemNum = 4; - for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { - const ir::Register reg = this->getRegister(&I, elemID); - dstTupleData.push_back(reg); - } - srcTupleData.push_back(ucoord); - srcTupleData.push_back(vcoord); - srcTupleData.push_back(wcoord); uint8_t samplerOffset = 0; + Value *coordVal = *AI; + ++AI; GBE_ASSERT(AI != AE); + Value *samplerOffsetVal = *AI; #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND - GBE_ASSERT(AI != AE); Constant *CPV = dyn_cast<Constant>(*AI); + Constant *CPV = dyn_cast<Constant>(samplerOffsetVal); assert(CPV); const ir::Immediate &x = processConstantImm(CPV); GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() == ir::TYPE_S32, "Invalid sampler type"); samplerOffset = x.getIntegerValue(); #endif + bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D; + bool requiredFloatCoord = samplerOffset == 0; + + vector<ir::Register> dstTupleData, srcTupleData; + for (uint32_t elemID = 0; elemID < 3; elemID++) { + ir::Register reg; + + if (elemID < imageDim) + reg = this->getRegister(coordVal, elemID); + else + reg = ir::ocl::invalid; + + if (isFloatCoord == requiredFloatCoord) + srcTupleData.push_back(reg); + else if (!requiredFloatCoord) { + ir::Register intCoordReg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD); + ctx.CVT(ir::TYPE_S32, ir::TYPE_FLOAT, intCoordReg, reg); + srcTupleData.push_back(intCoordReg); + } else { + ir::Register floatCoordReg = ctx.reg(ir::RegisterFamily::FAMILY_DWORD); + ctx.CVT(ir::TYPE_FLOAT, ir::TYPE_S32, floatCoordReg, reg); + srcTupleData.push_back(floatCoordReg); + } + } + + uint32_t elemNum; + (void)getVectorInfo(ctx, &I, elemNum); + GBE_ASSERT(elemNum == 4); + + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { + const ir::Register reg = this->getRegister(&I, elemID); + dstTupleData.push_back(reg); + } const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum); const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 3); @@ -3426,58 +3442,46 @@ handle_read_image: GBE_ASSERT(0); // never been here. } - bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D; - - ctx.SAMPLE(surfaceID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT, - isFloatCoord, sampler, samplerOffset); + ctx.SAMPLE(imageID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT, + requiredFloatCoord, sampler, samplerOffset); break; } case GEN_OCL_WRITE_IMAGE_I_1D: case GEN_OCL_WRITE_IMAGE_UI_1D: case GEN_OCL_WRITE_IMAGE_F_1D: - image_dim = 1; - goto handle_write_image; case GEN_OCL_WRITE_IMAGE_I_2D: case GEN_OCL_WRITE_IMAGE_UI_2D: case GEN_OCL_WRITE_IMAGE_F_2D: - image_dim = 2; - goto handle_write_image; case GEN_OCL_WRITE_IMAGE_I_3D: case GEN_OCL_WRITE_IMAGE_UI_3D: case GEN_OCL_WRITE_IMAGE_F_3D: - image_dim = 3; -handle_write_image: { - GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI; - const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg); - ir::Register ucoord, vcoord, wcoord; - - GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI; + const uint8_t imageID = getImageID(I); + GBE_ASSERT(AI != AE); ++AI; GBE_ASSERT(AI != AE); + uint32_t coordNum; + (void)getVectorInfo(ctx, *AI, coordNum); + if (coordNum == 4) + coordNum = 3; + const uint32_t imageDim = coordNum; + vector<ir::Register> srcTupleData; + GBE_ASSERT(imageDim >= 1 && imageDim <= 3); - if (image_dim > 1) { - GBE_ASSERT(AI != AE); - vcoord = this->getRegister(*AI); - ++AI; - } else - vcoord = ir::ocl::invalid; - - if (image_dim > 2) { - GBE_ASSERT(AI != AE); - wcoord = this->getRegister(*AI); - ++AI; - } else { - wcoord = ir::ocl::invalid; - } + for (uint32_t elemID = 0; elemID < 3; elemID++) { + ir::Register reg; - GBE_ASSERT(AI != AE); - vector<ir::Register> srcTupleData; + if (elemID < imageDim) + reg = this->getRegister(*AI, elemID); + else + reg = ir::ocl::invalid; - srcTupleData.push_back(ucoord); - srcTupleData.push_back(vcoord); - srcTupleData.push_back(wcoord); + srcTupleData.push_back(reg); + } + ++AI; GBE_ASSERT(AI != AE); + uint32_t elemNum; + (void)getVectorInfo(ctx, *AI, elemNum); + GBE_ASSERT(elemNum == 4); - const uint32_t elemNum = 4; for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { const ir::Register reg = this->getRegister(*AI, elemID); srcTupleData.push_back(reg); @@ -3504,7 +3508,7 @@ handle_write_image: GBE_ASSERT(0); // never been here. } - ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32); + ctx.TYPED_WRITE(imageID, srcTuple, srcType, ir::TYPE_U32); break; } case GEN_OCL_MUL_HI_INT: diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 7434c783..8d55c3f5 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -49,35 +49,35 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16) DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D, _Z21__gen_ocl_read_imageijtfj) DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D, _Z22__gen_ocl_read_imageuijtfj) DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D, _Z21__gen_ocl_read_imagefjtfj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, _Z21__gen_ocl_read_imageijtffj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, _Z22__gen_ocl_read_imageuijtffj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, _Z21__gen_ocl_read_imagefjtffj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtfffj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtfffj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtfffj) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, _Z21__gen_ocl_read_imageijtDv2_fj) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, _Z22__gen_ocl_read_imageuijtDv2_fj) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, _Z21__gen_ocl_read_imagefjtDv2_fj) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtDv4_fj) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtDv4_fj) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtDv4_fj) // work around read image with the LD message. The coords are integer type. DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D_I, _Z21__gen_ocl_read_imageijtij) DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D_I, _Z22__gen_ocl_read_imageuijtij) DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D_I, _Z21__gen_ocl_read_imagefjtij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, _Z21__gen_ocl_read_imageijtiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, _Z22__gen_ocl_read_imageuijtiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, _Z21__gen_ocl_read_imagefjtiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, _Z21__gen_ocl_read_imageijtiiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, _Z22__gen_ocl_read_imageuijtiiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, _Z21__gen_ocl_read_imagefjtiiij) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, _Z21__gen_ocl_read_imageijtDv2_ij) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, _Z22__gen_ocl_read_imageuijtDv2_ij) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, _Z21__gen_ocl_read_imagefjtDv2_ij) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, _Z21__gen_ocl_read_imageijtDv4_ij) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, _Z22__gen_ocl_read_imageuijtDv4_ij) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, _Z21__gen_ocl_read_imagefjtDv4_ij) // To write_image functions. DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_1D, _Z22__gen_ocl_write_imageijiDv4_i) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_1D, _Z23__gen_ocl_write_imageuijiDv4_j) DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_1D, _Z22__gen_ocl_write_imagefjiDv4_f) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, _Z22__gen_ocl_write_imageijiiDv4_i) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, _Z23__gen_ocl_write_imageuijiiDv4_j) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, _Z22__gen_ocl_write_imagefjiiDv4_f) +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, _Z22__gen_ocl_write_imageijDv2_iDv4_i) +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, _Z23__gen_ocl_write_imageuijDv2_iDv4_j) +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, _Z22__gen_ocl_write_imagefjDv2_iDv4_f) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, _Z22__gen_ocl_write_imageijiiiDv4_i) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, _Z23__gen_ocl_write_imageuijiiiDv4_j) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D, _Z22__gen_ocl_write_imagefjiiiDv4_f) +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, _Z22__gen_ocl_write_imageijDv4_iS_) +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, _Z23__gen_ocl_write_imageuijDv4_iDv4_j) +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D, _Z22__gen_ocl_write_imagefjDv4_iDv4_f) // To get image info function DECL_LLVM_GEN_FUNCTION(GET_IMAGE_WIDTH, __gen_ocl_get_image_width) diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp index 5450a2b2..baf526b6 100644 --- a/backend/src/llvm/llvm_scalarize.cpp +++ b/backend/src/llvm/llvm_scalarize.cpp @@ -648,7 +648,7 @@ namespace gbe { // Get the function arguments CallSite CS(call); - CallSite::arg_iterator CI = CS.arg_begin() + 2; + CallSite::arg_iterator CI = CS.arg_begin() + 1; switch (it->second) { default: break; @@ -661,8 +661,7 @@ namespace gbe { case GEN_OCL_READ_IMAGE_I_3D: case GEN_OCL_READ_IMAGE_UI_3D: case GEN_OCL_READ_IMAGE_F_3D: - - case GEN_OCL_READ_IMAGE_I_1D_I: + case GEN_OCL_READ_IMAGE_I_1D_I: case GEN_OCL_READ_IMAGE_UI_1D_I: case GEN_OCL_READ_IMAGE_F_1D_I: case GEN_OCL_READ_IMAGE_I_2D_I: @@ -674,6 +673,9 @@ namespace gbe { case GEN_OCL_GET_IMAGE_WIDTH: case GEN_OCL_GET_IMAGE_HEIGHT: { + ++CI; + if ((*CI)->getType()->isVectorTy()) + *CI = InsertToVector(call, *CI); setAppendPoint(call); extractFromVector(call); break; @@ -681,15 +683,16 @@ namespace gbe { case GEN_OCL_WRITE_IMAGE_I_3D: case GEN_OCL_WRITE_IMAGE_UI_3D: case GEN_OCL_WRITE_IMAGE_F_3D: - CI++; case GEN_OCL_WRITE_IMAGE_I_2D: case GEN_OCL_WRITE_IMAGE_UI_2D: case GEN_OCL_WRITE_IMAGE_F_2D: - CI++; case GEN_OCL_WRITE_IMAGE_I_1D: case GEN_OCL_WRITE_IMAGE_UI_1D: case GEN_OCL_WRITE_IMAGE_F_1D: { + if ((*CI)->getType()->isVectorTy()) + *CI = InsertToVector(call, *CI); + ++CI; *CI = InsertToVector(call, *CI); break; } |