diff options
author | Zhigang Gong <zhigang.gong@intel.com> | 2014-12-12 15:41:57 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-12-18 15:42:15 +0800 |
commit | 7ae159dfd17089e04991617cfc2e021f498c6e61 (patch) | |
tree | 92cbd3536829bf12ec004718bee8300dd40cc68a | |
parent | c09acfac5ac91177166d358c45541b6ecc6b2dc1 (diff) |
GBE: switch to use CLANG native image types.
CLANG has all native image types since 3.3. There is no
need to keep the original hacky implementation now.
Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/ir/function.hpp | 24 | ||||
-rw-r--r-- | backend/src/ir/instruction.cpp | 1 | ||||
-rw-r--r-- | backend/src/ir/instruction.hpp | 1 | ||||
-rw-r--r-- | backend/src/libocl/include/ocl_types.h | 21 | ||||
-rw-r--r-- | backend/src/libocl/src/ocl_image.cl | 275 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.cpp | 170 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.hpp | 24 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_ocl_function.hxx | 36 | ||||
-rw-r--r-- | backend/src/llvm/llvm_scalarize.cpp | 47 |
9 files changed, 175 insertions, 424 deletions
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index 1163a195..0f86fefd 100644 --- a/backend/src/ir/function.hpp +++ b/backend/src/ir/function.hpp @@ -180,6 +180,30 @@ namespace ir { std::string accessQual; std::string typeQual; std::string argName; // My different from arg->getName() + + bool isImage1dT() const { + return typeName.compare("image1d_t") == 0; + } + bool isImage1dArrayT() const { + return typeName.compare("image1d_array_t") == 0; + } + bool isImage1dBufferT() const { + return typeName.compare("image1d_buffer_t") == 0; + } + bool isImage2dT() const { + return typeName.compare("image2d_t") == 0; + } + bool isImage2dArrayT() const { + return typeName.compare("image2d_array_t") == 0; + } + bool isImage3dT() const { + return typeName.compare("image3d_t") == 0; + } + + bool isImageType() const { + return isImage1dT() || isImage1dArrayT() || isImage1dBufferT() || + isImage2dT() || isImage2dArrayT() || isImage3dT(); + } }; /*! Create a function input argument */ diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 2bd00616..82e7ddaa 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -1255,7 +1255,6 @@ namespace ir { case MEM_LOCAL: return out << "local"; case MEM_CONSTANT: return out << "constant"; case MEM_PRIVATE: return out << "private"; - case IMAGE: return out << "image"; case MEM_INVALID: return out << "invalid"; }; return out; diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 11e95092..47312f5b 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -58,7 +58,6 @@ namespace ir { MEM_LOCAL, //!< Local memory (thread group memory) MEM_CONSTANT, //!< Immutable global memory MEM_PRIVATE, //!< Per thread private memory - IMAGE, //!< For texture image. MEM_INVALID }; diff --git a/backend/src/libocl/include/ocl_types.h b/backend/src/libocl/include/ocl_types.h index 63478c97..49ac9070 100644 --- a/backend/src/libocl/include/ocl_types.h +++ b/backend/src/libocl/include/ocl_types.h @@ -87,27 +87,8 @@ DEF(double); // FIXME: // This is a transitional hack to bypass the LLVM 3.3 built-in types. // See the Khronos SPIR specification for handling of these types. -#define __texture __attribute__((address_space(4))) -struct _image1d_t; -typedef __texture struct _image1d_t* __image1d_t; -struct _image1d_buffer_t; -typedef __texture struct _image1d_buffer_t* __image1d_buffer_t; -struct _image1d_array_t; -typedef __texture struct _image1d_array_t* __image1d_array_t; -struct _image2d_t; -typedef __texture struct _image2d_t* __image2d_t; -struct _image2d_array_t; -typedef __texture struct _image2d_array_t* __image2d_array_t; -struct _image3d_t; -typedef __texture struct _image3d_t* __image3d_t; -typedef const ushort __sampler_t; -#define image1d_t __image1d_t -#define image1d_buffer_t __image1d_buffer_t -#define image1d_array_t __image1d_array_t -#define image2d_t __image2d_t -#define image2d_array_t __image2d_array_t -#define image3d_t __image3d_t #define sampler_t __sampler_t +typedef const ushort __sampler_t; ///////////////////////////////////////////////////////////////////////////// // OpenCL built-in event types diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl index 95b98ff4..c4ca2f83 100644 --- a/backend/src/libocl/src/ocl_image.cl +++ b/backend/src/libocl/src/ocl_image.cl @@ -20,176 +20,94 @@ #include "ocl_integer.h" #include "ocl_common.h" +#define int1 int +#define float1 float + /////////////////////////////////////////////////////////////////////////////// // Beignet builtin functions. /////////////////////////////////////////////////////////////////////////////// -// 1D read -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, - float u, uint sampler_offset); -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, - int u, uint sampler_offset); -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, - float u, uint sampler_offset); -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, - int u, uint sampler_offset); -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, - float u, uint sampler_offset); -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, - int u, uint sampler_offset); - -// 2D & 1D Array read -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, - float2 coord, uint sampler_offset); -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, - int2 coord, uint sampler_offset); -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, - float2 coord, uint sampler_offset); -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, - int2 coord, uint sampler_offset); -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, - float2 coord, uint sampler_offset); -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, - int2 coord, uint sampler_offset); - -// 3D & 2D Array read -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, - float4 coord, uint sampler_offset); -OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, - int4 coord, uint sampler_offset); -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, - float4 coord, uint sampler_offset); -OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, - int4 coord, uint sampler_offset); -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, - float4 coord, uint sampler_offset); -OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, - int4 coord, uint sampler_offset); - -// Don't know why we need to support 3 component coordinates, but it's in the old -// version, let's keep to support it. -INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, - float3 coord, uint sampler_offset) -{ - return __gen_ocl_read_imagei(surface_id, sampler, - (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); -} -INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, - int3 coord, uint sampler_offset) -{ - return __gen_ocl_read_imagei(surface_id, sampler, - (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); -} -INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, - float3 coord, uint sampler_offset) -{ - return __gen_ocl_read_imageui(surface_id, sampler, - (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); -} -INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, - int3 coord, uint sampler_offset) -{ - return __gen_ocl_read_imageui(surface_id, sampler, - (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); -} -INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, - float3 coord, uint sampler_offset) -{ - return __gen_ocl_read_imagef(surface_id, sampler, - (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); -} -INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, - int3 coord, uint sampler_offset) -{ - return __gen_ocl_read_imagef(surface_id, sampler, - (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset); -} - -// 1D write -OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int4 color); -OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, uint4 color); -OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, float4 color); - -// 2D & 1D Array write -OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int2 coord, int4 color); -OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int2 coord, uint4 color); -OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int2 coord, float4 color); - -// 3D & 2D Array write -OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int4 coord, int4 color); -OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int4 coord, uint4 color); -OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int4 coord, float4 color); - -INLINE_OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int3 coord, int4 color) -{ - __gen_ocl_write_imagei(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color); -} -INLINE_OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int3 coord, uint4 color) -{ - __gen_ocl_write_imageui(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color); -} -INLINE_OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int3 coord, float4 color) -{ - __gen_ocl_write_imagef(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color); -} - -int __gen_ocl_get_image_width(uint surface_id); -int __gen_ocl_get_image_height(uint surface_id); -int __gen_ocl_get_image_channel_data_type(uint surface_id); -int __gen_ocl_get_image_channel_order(uint surface_id); -int __gen_ocl_get_image_depth(uint surface_id); - - -#define GET_IMAGE(cl_image, surface_id) \ - uint surface_id = (uint)cl_image - +#define DECL_GEN_OCL_RW_IMAGE(image_type, n) \ + OVERLOADABLE int4 __gen_ocl_read_imagei(image_type image, sampler_t sampler, \ + float ##n coord, uint sampler_offset); \ + OVERLOADABLE int4 __gen_ocl_read_imagei(image_type image, sampler_t sampler, \ + int ##n coord, uint sampler_offset); \ + OVERLOADABLE uint4 __gen_ocl_read_imageui(image_type image, sampler_t sampler, \ + float ##n coord, uint sampler_offset); \ + OVERLOADABLE uint4 __gen_ocl_read_imageui(image_type image, sampler_t sampler, \ + int ##n coord, uint sampler_offset); \ + OVERLOADABLE float4 __gen_ocl_read_imagef(image_type image, sampler_t sampler, \ + float ##n coord, uint sampler_offset); \ + OVERLOADABLE float4 __gen_ocl_read_imagef(image_type image, sampler_t sampler, \ + int ##n coord, uint sampler_offset); \ + OVERLOADABLE void __gen_ocl_write_imagei(image_type image, int ##n coord , int4 color); \ + OVERLOADABLE void __gen_ocl_write_imageui(image_type image, int ##n coord, uint4 color);\ + OVERLOADABLE void __gen_ocl_write_imagef(image_type image, int ##n coord, float4 color); + +#define DECL_GEN_OCL_QUERY_IMAGE(image_type) \ + OVERLOADABLE int __gen_ocl_get_image_width(image_type image); \ + OVERLOADABLE int __gen_ocl_get_image_height(image_type image); \ + OVERLOADABLE int __gen_ocl_get_image_channel_data_type(image_type image); \ + OVERLOADABLE int __gen_ocl_get_image_channel_order(image_type image); \ + OVERLOADABLE int __gen_ocl_get_image_depth(image_type image); \ + +DECL_GEN_OCL_RW_IMAGE(image1d_t, 1) +DECL_GEN_OCL_RW_IMAGE(image1d_buffer_t, 1) +DECL_GEN_OCL_RW_IMAGE(image1d_array_t, 2) +DECL_GEN_OCL_RW_IMAGE(image1d_array_t, 4) +DECL_GEN_OCL_RW_IMAGE(image2d_t, 2) +DECL_GEN_OCL_RW_IMAGE(image2d_array_t, 3) +DECL_GEN_OCL_RW_IMAGE(image3d_t, 3) +DECL_GEN_OCL_RW_IMAGE(image2d_array_t, 4) +DECL_GEN_OCL_RW_IMAGE(image3d_t, 4) + +DECL_GEN_OCL_QUERY_IMAGE(image1d_t) +DECL_GEN_OCL_QUERY_IMAGE(image1d_buffer_t) +DECL_GEN_OCL_QUERY_IMAGE(image1d_array_t) +DECL_GEN_OCL_QUERY_IMAGE(image2d_t) +DECL_GEN_OCL_QUERY_IMAGE(image2d_array_t) +DECL_GEN_OCL_QUERY_IMAGE(image3d_t) /////////////////////////////////////////////////////////////////////////////// // helper functions to validate array index. /////////////////////////////////////////////////////////////////////////////// INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord, image1d_array_t image) { - GET_IMAGE(image, surface_id); - float array_size = __gen_ocl_get_image_depth(surface_id); + float array_size = __gen_ocl_get_image_depth(image); coord.s1 = clamp(rint(coord.s1), 0.f, array_size - 1.f); return coord; } INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord, image2d_array_t image) { - GET_IMAGE(image, surface_id); - float array_size = __gen_ocl_get_image_depth(surface_id); + float array_size = __gen_ocl_get_image_depth(image); coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f); return coord; } INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord, image2d_array_t image) { - GET_IMAGE(image, surface_id); - float array_size = __gen_ocl_get_image_depth(surface_id); + float array_size = __gen_ocl_get_image_depth(image); coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f); return coord; } INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord, image1d_array_t image) { - GET_IMAGE(image, surface_id); - int array_size = __gen_ocl_get_image_depth(surface_id); + int array_size = __gen_ocl_get_image_depth(image); coord.s1 = clamp(coord.s1, 0, array_size - 1); return coord; } INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord, image2d_array_t image) { - GET_IMAGE(image, surface_id); - int array_size = __gen_ocl_get_image_depth(surface_id); + int array_size = __gen_ocl_get_image_depth(image); coord.s2 = clamp(coord.s2, 0, array_size - 1); return coord; } INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord, image2d_array_t image) { - GET_IMAGE(image, surface_id); - int array_size = __gen_ocl_get_image_depth(surface_id); + int array_size = __gen_ocl_get_image_depth(image); coord.s2 = clamp(coord.s2, 0, array_size - 1); return coord; } @@ -273,62 +191,54 @@ INLINE_OVERLOADABLE float4 __gen_fixup_float_coord(float4 tmpCoord) // coordiates. INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_t image, float srcCoord) { - GET_IMAGE(image, surface_id); - return srcCoord * __gen_ocl_get_image_width(surface_id); + return srcCoord * __gen_ocl_get_image_width(image); } INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image1d_array_t image, float2 srcCoord) { - GET_IMAGE(image, surface_id); - srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image); return srcCoord; } INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_buffer_t image, float srcCoord) { - GET_IMAGE(image, surface_id); - return srcCoord * __gen_ocl_get_image_width(surface_id); + return srcCoord * __gen_ocl_get_image_width(image); } INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image2d_t image, float2 srcCoord) { - GET_IMAGE(image, surface_id); - srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); - srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image); + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image); return srcCoord; } INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image2d_array_t image, float3 srcCoord) { - GET_IMAGE(image, surface_id); - srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); - srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image); + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image); return srcCoord; } INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image3d_t image, float3 srcCoord) { - GET_IMAGE(image, surface_id); - srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); - srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); - srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id); + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image); + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image); + srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(image); return srcCoord; } INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image2d_array_t image, float4 srcCoord) { - GET_IMAGE(image, surface_id); - srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); - srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image); + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image); return srcCoord; } INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image3d_t image, float4 srcCoord) { - GET_IMAGE(image, surface_id); - srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id); - srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id); - srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id); + srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image); + srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image); + srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(image); return srcCoord; } @@ -381,11 +291,10 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord) const sampler_t sampler, \ coord_type coord) \ { \ - GET_IMAGE(cl_image, surface_id); \ coord = __gen_validate_array_index(coord, cl_image); \ if (int_clamping_fix && __gen_sampler_need_fix(sampler)) \ - return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 1); \ - return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0); \ + return __gen_ocl_read_image ##suffix(cl_image, sampler, coord, 1); \ + return __gen_ocl_read_image ##suffix(cl_image, sampler, coord, 0); \ } // For float coordinates @@ -395,7 +304,6 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord) const sampler_t sampler, \ coord_type coord) \ { \ - GET_IMAGE(cl_image, surface_id); \ coord_type tmpCoord = __gen_validate_array_index(coord, cl_image); \ if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) { \ if (__gen_sampler_need_fix(sampler)) { \ @@ -407,11 +315,11 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord) tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \ tmpCoord = __gen_fixup_neg_boundary(tmpCoord); \ return __gen_ocl_read_image ##suffix( \ - surface_id, sampler, tmpCoord, 1); \ + cl_image, sampler, tmpCoord, 1); \ } \ } \ } \ - return __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord, 0); \ + return __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0); \ } #define DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, \ @@ -419,10 +327,9 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord) OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \ coord_type coord) \ { \ - GET_IMAGE(cl_image, surface_id); \ coord = __gen_validate_array_index(coord, cl_image); \ return __gen_ocl_read_image ##suffix( \ - surface_id, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE \ + cl_image, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE \ | CLK_FILTER_NEAREST, coord, 0); \ } @@ -431,15 +338,10 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord) coord_type coord, \ image_data_type color) \ { \ - GET_IMAGE(cl_image, surface_id); \ coord_type fixedCoord = __gen_validate_array_index(coord, cl_image); \ - __gen_ocl_write_image ##suffix(surface_id, fixedCoord, color); \ + __gen_ocl_write_image ##suffix(cl_image, fixedCoord, color); \ } -#define int1 int -#define float1 float - - #define DECL_IMAGE(int_clamping_fix, image_type, image_data_type, suffix, n) \ DECL_READ_IMAGE0(int_clamping_fix, image_type, \ image_data_type, suffix, int ##n) \ @@ -495,13 +397,12 @@ INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord, image1d_array_t i const sampler_t sampler, \ coord_type coord) \ { \ - GET_IMAGE(cl_image, surface_id); \ coord = __gen_validate_array_index(coord, cl_image); \ if (int_clamping_fix && __gen_sampler_need_fix(sampler)) { \ int4 newCoord = __gen_fixup_1darray_coord(coord, cl_image); \ - return __gen_ocl_read_image ##suffix(surface_id, sampler, newCoord, 2); \ + return __gen_ocl_read_image ##suffix(cl_image, sampler, newCoord, 2); \ } \ - return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0); \ + return __gen_ocl_read_image ##suffix(cl_image, sampler, coord, 0); \ } // For float coordiates @@ -511,7 +412,6 @@ INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord, image1d_array_t i const sampler_t sampler, \ coord_type coord) \ { \ - GET_IMAGE(cl_image, surface_id); \ coord_type tmpCoord = __gen_validate_array_index(coord, cl_image); \ if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) { \ if (__gen_sampler_need_fix(sampler)) { \ @@ -523,11 +423,11 @@ INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord, image1d_array_t i tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \ float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image); \ return __gen_ocl_read_image ##suffix( \ - surface_id, sampler, newCoord, 2); \ + cl_image, sampler, newCoord, 2); \ } \ } \ } \ - return __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord, 0); \ + return __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0); \ } #define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix) \ @@ -547,18 +447,15 @@ DECL_IMAGE_1DArray(0, float4, f) #define DECL_IMAGE_INFO_COMMON(image_type) \ OVERLOADABLE int get_image_channel_data_type(image_type image) \ { \ - GET_IMAGE(image, surface_id); \ - return __gen_ocl_get_image_channel_data_type(surface_id); \ + return __gen_ocl_get_image_channel_data_type(image); \ } \ OVERLOADABLE int get_image_channel_order(image_type image) \ { \ - GET_IMAGE(image, surface_id); \ - return __gen_ocl_get_image_channel_order(surface_id); \ + return __gen_ocl_get_image_channel_order(image); \ } \ OVERLOADABLE int get_image_width(image_type image) \ { \ - GET_IMAGE(image, surface_id); \ - return __gen_ocl_get_image_width(surface_id); \ + return __gen_ocl_get_image_width(image); \ } DECL_IMAGE_INFO_COMMON(image1d_t) @@ -571,8 +468,7 @@ DECL_IMAGE_INFO_COMMON(image2d_array_t) // 2D extra Info OVERLOADABLE int get_image_height(image2d_t image) { - GET_IMAGE(image, surface_id); - return __gen_ocl_get_image_height(surface_id); + return __gen_ocl_get_image_height(image); } OVERLOADABLE int2 get_image_dim(image2d_t image) { @@ -583,13 +479,11 @@ OVERLOADABLE int2 get_image_dim(image2d_t image) // 3D extra Info OVERLOADABLE int get_image_height(image3d_t image) { - GET_IMAGE(image, surface_id); - return __gen_ocl_get_image_height(surface_id); + return __gen_ocl_get_image_height(image); } OVERLOADABLE int get_image_depth(image3d_t image) { - GET_IMAGE(image, surface_id); - return __gen_ocl_get_image_depth(surface_id); + return __gen_ocl_get_image_depth(image); } OVERLOADABLE int4 get_image_dim(image3d_t image) { @@ -602,8 +496,7 @@ OVERLOADABLE int4 get_image_dim(image3d_t image) // 2D Array extra Info OVERLOADABLE int get_image_height(image2d_array_t image) { - GET_IMAGE(image, surface_id); - return __gen_ocl_get_image_height(surface_id); + return __gen_ocl_get_image_height(image); } OVERLOADABLE int2 get_image_dim(image2d_array_t image) { @@ -611,14 +504,12 @@ OVERLOADABLE int2 get_image_dim(image2d_array_t image) } OVERLOADABLE size_t get_image_array_size(image2d_array_t image) { - GET_IMAGE(image, surface_id); - return __gen_ocl_get_image_depth(surface_id); + return __gen_ocl_get_image_depth(image); } // 1D Array info OVERLOADABLE size_t get_image_array_size(image1d_array_t image) { - GET_IMAGE(image, surface_id); - return __gen_ocl_get_image_depth(surface_id); + return __gen_ocl_get_image_depth(image); } // End of 1DArray diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 167b8f02..a62f46d9 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -286,7 +286,6 @@ namespace gbe case 1: return ir::MEM_GLOBAL; case 2: return ir::MEM_CONSTANT; case 3: return ir::MEM_LOCAL; - case 4: return ir::IMAGE; } GBE_ASSERT(false); return ir::MEM_GLOBAL; @@ -1538,18 +1537,13 @@ error: llvmInfo.addrSpace = (cast<ConstantInt>(addrSpaceNode->getOperand(1 + argID)))->getZExtValue(); llvmInfo.typeName = (cast<MDString>(typeNameNode->getOperand(1 + argID)))->getString(); - if (llvmInfo.typeName.find("image") != std::string::npos && - llvmInfo.typeName.find("*") != std::string::npos) { - uint32_t start = llvmInfo.typeName.find("image"); - uint32_t end = llvmInfo.typeName.find("*"); - llvmInfo.typeName = llvmInfo.typeName.substr(start, end - start); - } llvmInfo.accessQual = (cast<MDString>(accessQualNode->getOperand(1 + argID)))->getString(); llvmInfo.typeQual = (cast<MDString>(typeQualNode->getOperand(1 + argID)))->getString(); llvmInfo.argName = (cast<MDString>(argNameNode->getOperand(1 + argID)))->getString(); // function arguments are uniform values. this->newRegister(I, NULL, true); + // add support for vector argument. if(type->isVectorTy()) { VectorType *vectorType = cast<VectorType>(type); @@ -1572,6 +1566,12 @@ error: GBE_ASSERTM(isScalarType(type) == true, "vector type in the function argument is not supported yet"); const ir::Register reg = getRegister(I); + if (llvmInfo.isImageType()) { + ctx.input(argName, ir::FunctionArgument::IMAGE, reg, llvmInfo, 4, 4, 0); + ctx.getFunction().getImageSet()->append(reg, &ctx, incBtiBase()); + continue; + } + if (type->isPointerTy() == false) ctx.input(argName, ir::FunctionArgument::VALUE, reg, llvmInfo, getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0); else { @@ -1606,10 +1606,6 @@ error: case ir::MEM_CONSTANT: ctx.input(argName, ir::FunctionArgument::CONSTANT_POINTER, reg, llvmInfo, ptrSize, align, 0x2); break; - case ir::IMAGE: - ctx.input(argName, ir::FunctionArgument::IMAGE, reg, llvmInfo, ptrSize, align, 0x0); - ctx.getFunction().getImageSet()->append(reg, &ctx, incBtiBase()); - break; default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE); } } @@ -2772,16 +2768,8 @@ error: // Get the name of the called function and handle it const std::string fnName = Callee->getName(); - auto it = instrinsicMap.map.find(fnName); - // FIXME, should create a complete error reporting mechanism - // when found error in beignet managed passes including Gen pass. - if (it == instrinsicMap.map.end()) { - std::cerr << "Unresolved symbol: " << fnName << std::endl; - std::cerr << "Aborting..." << std::endl; - exit(-1); - } - GBE_ASSERT(it != instrinsicMap.map.end()); - switch (it->second) { + auto genIntrinsicID = intrinsicMap.find(fnName); + switch (genIntrinsicID) { case GEN_OCL_GET_GROUP_ID0: regTranslator.newScalarProxy(ir::ocl::groupid0, dst); break; case GEN_OCL_GET_GROUP_ID1: @@ -2878,35 +2866,13 @@ error: case GEN_OCL_LGBARRIER: ctx.getFunction().setUseSLM(true); break; - case GEN_OCL_WRITE_IMAGE_I_1D: - case GEN_OCL_WRITE_IMAGE_UI_1D: - case GEN_OCL_WRITE_IMAGE_F_1D: - case GEN_OCL_WRITE_IMAGE_I_2D: - case GEN_OCL_WRITE_IMAGE_UI_2D: - case GEN_OCL_WRITE_IMAGE_F_2D: - case GEN_OCL_WRITE_IMAGE_I_3D: - case GEN_OCL_WRITE_IMAGE_UI_3D: - case GEN_OCL_WRITE_IMAGE_F_3D: + case GEN_OCL_WRITE_IMAGE_I: + case GEN_OCL_WRITE_IMAGE_UI: + case GEN_OCL_WRITE_IMAGE_F: break; - case GEN_OCL_READ_IMAGE_I_1D: - case GEN_OCL_READ_IMAGE_UI_1D: - case GEN_OCL_READ_IMAGE_F_1D: - case GEN_OCL_READ_IMAGE_I_2D: - case GEN_OCL_READ_IMAGE_UI_2D: - case GEN_OCL_READ_IMAGE_F_2D: - case GEN_OCL_READ_IMAGE_I_3D: - case GEN_OCL_READ_IMAGE_UI_3D: - case GEN_OCL_READ_IMAGE_F_3D: - - case GEN_OCL_READ_IMAGE_I_1D_I: - case GEN_OCL_READ_IMAGE_UI_1D_I: - case GEN_OCL_READ_IMAGE_F_1D_I: - case GEN_OCL_READ_IMAGE_I_2D_I: - case GEN_OCL_READ_IMAGE_UI_2D_I: - case GEN_OCL_READ_IMAGE_F_2D_I: - case GEN_OCL_READ_IMAGE_I_3D_I: - case GEN_OCL_READ_IMAGE_UI_3D_I: - case GEN_OCL_READ_IMAGE_F_3D_I: + case GEN_OCL_READ_IMAGE_I: + case GEN_OCL_READ_IMAGE_UI: + case GEN_OCL_READ_IMAGE_F: { // dst is a 4 elements vector. We allocate all 4 registers here. uint32_t elemNum; @@ -3039,11 +3005,7 @@ error: } uint8_t GenWriter::getImageID(CallInst &I) { - PtrOrigMapIter iter = pointerOrigMap.find(&I); - GBE_ASSERT(iter != pointerOrigMap.end()); - SmallVectorImpl<Value *> &origins = iter->second; - GBE_ASSERT(origins.size() == 1); - const ir::Register imageReg = this->getRegister(origins[0]); + const ir::Register imageReg = this->getRegister(I.getOperand(0)); return ctx.getFunction().getImageSet()->getIdx(imageReg); } @@ -3213,8 +3175,7 @@ error: // Get the name of the called function and handle it Value *Callee = I.getCalledValue(); const std::string fnName = Callee->getName(); - auto it = instrinsicMap.map.find(fnName); - GBE_ASSERT(it != instrinsicMap.map.end()); + auto genIntrinsicID = intrinsicMap.find(fnName); // Get the function arguments CallSite CS(&I); @@ -3223,7 +3184,7 @@ error: CallSite::arg_iterator AE = CS.arg_end(); #endif /* GBE_DEBUG */ - switch (it->second) { + switch (genIntrinsicID) { case GEN_OCL_POW: { const ir::Register src0 = this->getRegister(*AI); ++AI; @@ -3328,31 +3289,16 @@ error: const uint8_t imageID = getImageID(I); GBE_ASSERT(AI != AE); ++AI; const ir::Register reg = this->getRegister(&I, 0); - int infoType = it->second - GEN_OCL_GET_IMAGE_WIDTH; + int infoType = genIntrinsicID - GEN_OCL_GET_IMAGE_WIDTH; ir::ImageInfoKey key(imageID, infoType); const ir::Register infoReg = ctx.getFunction().getImageSet()->appendInfo(key, &ctx); ctx.GET_IMAGE_INFO(infoType, reg, imageID, infoReg); break; } - case GEN_OCL_READ_IMAGE_I_1D: - case GEN_OCL_READ_IMAGE_UI_1D: - case GEN_OCL_READ_IMAGE_F_1D: - case GEN_OCL_READ_IMAGE_I_1D_I: - case GEN_OCL_READ_IMAGE_UI_1D_I: - case GEN_OCL_READ_IMAGE_F_1D_I: - case GEN_OCL_READ_IMAGE_I_2D: - case GEN_OCL_READ_IMAGE_UI_2D: - case GEN_OCL_READ_IMAGE_F_2D: - case GEN_OCL_READ_IMAGE_I_2D_I: - case GEN_OCL_READ_IMAGE_UI_2D_I: - case GEN_OCL_READ_IMAGE_F_2D_I: - case GEN_OCL_READ_IMAGE_I_3D: - case GEN_OCL_READ_IMAGE_UI_3D: - case GEN_OCL_READ_IMAGE_F_3D: - case GEN_OCL_READ_IMAGE_I_3D_I: - case GEN_OCL_READ_IMAGE_UI_3D_I: - case GEN_OCL_READ_IMAGE_F_3D_I: + case GEN_OCL_READ_IMAGE_I: + case GEN_OCL_READ_IMAGE_UI: + case GEN_OCL_READ_IMAGE_F: { const uint8_t imageID = getImageID(I); GBE_ASSERT(AI != AE); ++AI; @@ -3360,7 +3306,7 @@ error: const uint8_t sampler = this->appendSampler(AI); ++AI; GBE_ASSERT(AI != AE); uint32_t coordNum; - (void)getVectorInfo(ctx, *AI, coordNum); + const ir::Type coordType = getVectorInfo(ctx, *AI, coordNum); if (coordNum == 4) coordNum = 3; const uint32_t imageDim = coordNum; @@ -3377,7 +3323,7 @@ error: GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() == ir::TYPE_S32, "Invalid sampler type"); samplerOffset = x.getIntegerValue(); #endif - bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D; + bool isFloatCoord = coordType == ir::TYPE_FLOAT; bool requiredFloatCoord = samplerOffset == 0; vector<ir::Register> dstTupleData, srcTupleData; @@ -3403,7 +3349,7 @@ error: } uint32_t elemNum; - (void)getVectorInfo(ctx, &I, elemNum); + ir::Type dstType = getVectorInfo(ctx, &I, elemNum); GBE_ASSERT(elemNum == 4); for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { @@ -3413,49 +3359,14 @@ error: const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum); const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 3); - ir::Type dstType = ir::TYPE_U32; - - switch(it->second) { - case GEN_OCL_READ_IMAGE_I_1D: - case GEN_OCL_READ_IMAGE_UI_1D: - case GEN_OCL_READ_IMAGE_I_2D: - case GEN_OCL_READ_IMAGE_UI_2D: - case GEN_OCL_READ_IMAGE_I_3D: - case GEN_OCL_READ_IMAGE_UI_3D: - case GEN_OCL_READ_IMAGE_I_1D_I: - case GEN_OCL_READ_IMAGE_UI_1D_I: - case GEN_OCL_READ_IMAGE_I_2D_I: - case GEN_OCL_READ_IMAGE_UI_2D_I: - case GEN_OCL_READ_IMAGE_I_3D_I: - case GEN_OCL_READ_IMAGE_UI_3D_I: - dstType = ir::TYPE_U32; - break; - case GEN_OCL_READ_IMAGE_F_1D: - case GEN_OCL_READ_IMAGE_F_2D: - case GEN_OCL_READ_IMAGE_F_3D: - case GEN_OCL_READ_IMAGE_F_1D_I: - case GEN_OCL_READ_IMAGE_F_2D_I: - case GEN_OCL_READ_IMAGE_F_3D_I: - dstType = ir::TYPE_FLOAT; - break; - default: - GBE_ASSERT(0); // never been here. - } - ctx.SAMPLE(imageID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT, requiredFloatCoord, sampler, samplerOffset); break; } - case GEN_OCL_WRITE_IMAGE_I_1D: - case GEN_OCL_WRITE_IMAGE_UI_1D: - case GEN_OCL_WRITE_IMAGE_F_1D: - case GEN_OCL_WRITE_IMAGE_I_2D: - case GEN_OCL_WRITE_IMAGE_UI_2D: - case GEN_OCL_WRITE_IMAGE_F_2D: - case GEN_OCL_WRITE_IMAGE_I_3D: - case GEN_OCL_WRITE_IMAGE_UI_3D: - case GEN_OCL_WRITE_IMAGE_F_3D: + case GEN_OCL_WRITE_IMAGE_I: + case GEN_OCL_WRITE_IMAGE_UI: + case GEN_OCL_WRITE_IMAGE_F: { const uint8_t imageID = getImageID(I); GBE_ASSERT(AI != AE); ++AI; GBE_ASSERT(AI != AE); @@ -3479,7 +3390,7 @@ error: } ++AI; GBE_ASSERT(AI != AE); uint32_t elemNum; - (void)getVectorInfo(ctx, *AI, elemNum); + ir::Type srcType = getVectorInfo(ctx, *AI, elemNum); GBE_ASSERT(elemNum == 4); for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { @@ -3487,27 +3398,6 @@ error: srcTupleData.push_back(reg); } const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 7); - - ir::Type srcType = ir::TYPE_U32; - - switch(it->second) { - case GEN_OCL_WRITE_IMAGE_I_1D: - case GEN_OCL_WRITE_IMAGE_UI_1D: - case GEN_OCL_WRITE_IMAGE_I_2D: - case GEN_OCL_WRITE_IMAGE_UI_2D: - case GEN_OCL_WRITE_IMAGE_I_3D: - case GEN_OCL_WRITE_IMAGE_UI_3D: - srcType = ir::TYPE_U32; - break; - case GEN_OCL_WRITE_IMAGE_F_1D: - case GEN_OCL_WRITE_IMAGE_F_2D: - case GEN_OCL_WRITE_IMAGE_F_3D: - srcType = ir::TYPE_FLOAT; - break; - default: - GBE_ASSERT(0); // never been here. - } - ctx.TYPED_WRITE(imageID, srcTuple, srcType, ir::TYPE_U32); break; } @@ -3646,7 +3536,7 @@ error: //Becasue cmp's sources are same as sel's source, so cmp instruction and sel //instruction will be merged to one sel_cmp instruction in the gen selection //Add two intruction here for simple. - if(it->second == GEN_OCL_FMAX) + if(genIntrinsicID == GEN_OCL_FMAX) ctx.GE(getType(ctx, I.getType()), cmp, src0, src1); else ctx.LT(getType(ctx, I.getType()), cmp, src0, src1); diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp index 528b3c8b..926c4935 100644 --- a/backend/src/llvm/llvm_gen_backend.hpp +++ b/backend/src/llvm/llvm_gen_backend.hpp @@ -26,6 +26,7 @@ #ifndef __GBE_LLVM_GEN_BACKEND_HPP__ #define __GBE_LLVM_GEN_BACKEND_HPP__ +#include <cxxabi.h> #include "llvm/Config/llvm-config.h" #include "llvm/Pass.h" #include "llvm/Analysis/LoopPass.h" @@ -65,10 +66,31 @@ namespace gbe } /*! Sort intrinsics with their names */ hash_map<std::string, OCLInstrinsic> map; + OCLInstrinsic find(const std::string symbol) const { + auto it = map.find(symbol); + + if (it == map.end()) { + int status; + const char *realName = abi::__cxa_demangle(symbol.c_str(), NULL, NULL, &status); + if (status == 0) { + std::string realFnName(realName), stripName; + stripName = realFnName.substr(0, realFnName.find("(")); + it = map.find(stripName); + } + } + // FIXME, should create a complete error reporting mechanism + // when found error in beignet managed passes including Gen pass. + if (it == map.end()) { + std::cerr << "Unresolved symbol: " << symbol << std::endl; + std::cerr << "Aborting..." << std::endl; + exit(-1); + } + return it->second; + } }; /*! Sort the OCL Gen instrinsic functions (built on pre-main) */ - static const OCLIntrinsicMap instrinsicMap; + static const OCLIntrinsicMap intrinsicMap; /*! Pad the offset */ int32_t getPadding(int32_t offset, int32_t align); diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 8d55c3f5..8ec8336c 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -46,38 +46,14 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD8, __gen_ocl_force_simd8) DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16) // To read_image functions. -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D, _Z21__gen_ocl_read_imageijtfj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D, _Z22__gen_ocl_read_imageuijtfj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D, _Z21__gen_ocl_read_imagefjtfj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, _Z21__gen_ocl_read_imageijtDv2_fj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, _Z22__gen_ocl_read_imageuijtDv2_fj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, _Z21__gen_ocl_read_imagefjtDv2_fj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtDv4_fj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtDv4_fj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtDv4_fj) -// work around read image with the LD message. The coords are integer type. -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D_I, _Z21__gen_ocl_read_imageijtij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D_I, _Z22__gen_ocl_read_imageuijtij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D_I, _Z21__gen_ocl_read_imagefjtij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, _Z21__gen_ocl_read_imageijtDv2_ij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, _Z22__gen_ocl_read_imageuijtDv2_ij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, _Z21__gen_ocl_read_imagefjtDv2_ij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, _Z21__gen_ocl_read_imageijtDv4_ij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, _Z22__gen_ocl_read_imageuijtDv4_ij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, _Z21__gen_ocl_read_imagefjtDv4_ij) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I, __gen_ocl_read_imagei) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI, __gen_ocl_read_imageui) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F, __gen_ocl_read_imagef) // To write_image functions. -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_1D, _Z22__gen_ocl_write_imageijiDv4_i) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_1D, _Z23__gen_ocl_write_imageuijiDv4_j) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_1D, _Z22__gen_ocl_write_imagefjiDv4_f) - -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, _Z22__gen_ocl_write_imageijDv2_iDv4_i) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, _Z23__gen_ocl_write_imageuijDv2_iDv4_j) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, _Z22__gen_ocl_write_imagefjDv2_iDv4_f) - -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, _Z22__gen_ocl_write_imageijDv4_iS_) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, _Z23__gen_ocl_write_imageuijDv4_iDv4_j) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D, _Z22__gen_ocl_write_imagefjDv4_iDv4_f) +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I, __gen_ocl_write_imagei) +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI, __gen_ocl_write_imageui) +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F, __gen_ocl_write_imagef) // To get image info function DECL_LLVM_GEN_FUNCTION(GET_IMAGE_WIDTH, __gen_ocl_get_image_width) diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp index baf526b6..cf2939dc 100644 --- a/backend/src/llvm/llvm_scalarize.cpp +++ b/backend/src/llvm/llvm_scalarize.cpp @@ -636,42 +636,17 @@ namespace gbe { } else { Value *Callee = call->getCalledValue(); const std::string fnName = Callee->getName(); - auto it = instrinsicMap.map.find(fnName); - // FIXME, should create a complete error reporting mechanism - // when found error in beignet managed passes including Gen pass. - if (it == instrinsicMap.map.end()) { - std::cerr << "Unresolved symbol: " << fnName << std::endl; - std::cerr << "Aborting..." << std::endl; - exit(-1); - } - GBE_ASSERT(it != instrinsicMap.map.end()); + auto genIntrinsicID = intrinsicMap.find(fnName); // Get the function arguments CallSite CS(call); CallSite::arg_iterator CI = CS.arg_begin() + 1; - switch (it->second) { + switch (genIntrinsicID) { default: break; - case GEN_OCL_READ_IMAGE_I_1D: - case GEN_OCL_READ_IMAGE_UI_1D: - case GEN_OCL_READ_IMAGE_F_1D: - case GEN_OCL_READ_IMAGE_I_2D: - case GEN_OCL_READ_IMAGE_UI_2D: - case GEN_OCL_READ_IMAGE_F_2D: - case GEN_OCL_READ_IMAGE_I_3D: - case GEN_OCL_READ_IMAGE_UI_3D: - case GEN_OCL_READ_IMAGE_F_3D: - case GEN_OCL_READ_IMAGE_I_1D_I: - case GEN_OCL_READ_IMAGE_UI_1D_I: - case GEN_OCL_READ_IMAGE_F_1D_I: - case GEN_OCL_READ_IMAGE_I_2D_I: - case GEN_OCL_READ_IMAGE_UI_2D_I: - case GEN_OCL_READ_IMAGE_F_2D_I: - case GEN_OCL_READ_IMAGE_I_3D_I: - case GEN_OCL_READ_IMAGE_UI_3D_I: - case GEN_OCL_READ_IMAGE_F_3D_I: - case GEN_OCL_GET_IMAGE_WIDTH: - case GEN_OCL_GET_IMAGE_HEIGHT: + case GEN_OCL_READ_IMAGE_I: + case GEN_OCL_READ_IMAGE_UI: + case GEN_OCL_READ_IMAGE_F: { ++CI; if ((*CI)->getType()->isVectorTy()) @@ -680,15 +655,9 @@ namespace gbe { extractFromVector(call); break; } - case GEN_OCL_WRITE_IMAGE_I_3D: - case GEN_OCL_WRITE_IMAGE_UI_3D: - case GEN_OCL_WRITE_IMAGE_F_3D: - case GEN_OCL_WRITE_IMAGE_I_2D: - case GEN_OCL_WRITE_IMAGE_UI_2D: - case GEN_OCL_WRITE_IMAGE_F_2D: - case GEN_OCL_WRITE_IMAGE_I_1D: - case GEN_OCL_WRITE_IMAGE_UI_1D: - case GEN_OCL_WRITE_IMAGE_F_1D: + case GEN_OCL_WRITE_IMAGE_I: + case GEN_OCL_WRITE_IMAGE_UI: + case GEN_OCL_WRITE_IMAGE_F: { if ((*CI)->getType()->isVectorTy()) *CI = InsertToVector(call, *CI); |