summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2014-12-12 15:41:57 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-12-18 15:42:15 +0800
commit7ae159dfd17089e04991617cfc2e021f498c6e61 (patch)
tree92cbd3536829bf12ec004718bee8300dd40cc68a
parentc09acfac5ac91177166d358c45541b6ecc6b2dc1 (diff)
GBE: switch to use CLANG native image types.
CLANG has all native image types since 3.3. There is no need to keep the original hacky implementation now. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com> Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
-rw-r--r--backend/src/ir/function.hpp24
-rw-r--r--backend/src/ir/instruction.cpp1
-rw-r--r--backend/src/ir/instruction.hpp1
-rw-r--r--backend/src/libocl/include/ocl_types.h21
-rw-r--r--backend/src/libocl/src/ocl_image.cl275
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp170
-rw-r--r--backend/src/llvm/llvm_gen_backend.hpp24
-rw-r--r--backend/src/llvm/llvm_gen_ocl_function.hxx36
-rw-r--r--backend/src/llvm/llvm_scalarize.cpp47
9 files changed, 175 insertions, 424 deletions
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
index 1163a195..0f86fefd 100644
--- a/backend/src/ir/function.hpp
+++ b/backend/src/ir/function.hpp
@@ -180,6 +180,30 @@ namespace ir {
std::string accessQual;
std::string typeQual;
std::string argName; // My different from arg->getName()
+
+ bool isImage1dT() const {
+ return typeName.compare("image1d_t") == 0;
+ }
+ bool isImage1dArrayT() const {
+ return typeName.compare("image1d_array_t") == 0;
+ }
+ bool isImage1dBufferT() const {
+ return typeName.compare("image1d_buffer_t") == 0;
+ }
+ bool isImage2dT() const {
+ return typeName.compare("image2d_t") == 0;
+ }
+ bool isImage2dArrayT() const {
+ return typeName.compare("image2d_array_t") == 0;
+ }
+ bool isImage3dT() const {
+ return typeName.compare("image3d_t") == 0;
+ }
+
+ bool isImageType() const {
+ return isImage1dT() || isImage1dArrayT() || isImage1dBufferT() ||
+ isImage2dT() || isImage2dArrayT() || isImage3dT();
+ }
};
/*! Create a function input argument */
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 2bd00616..82e7ddaa 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1255,7 +1255,6 @@ namespace ir {
case MEM_LOCAL: return out << "local";
case MEM_CONSTANT: return out << "constant";
case MEM_PRIVATE: return out << "private";
- case IMAGE: return out << "image";
case MEM_INVALID: return out << "invalid";
};
return out;
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 11e95092..47312f5b 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -58,7 +58,6 @@ namespace ir {
MEM_LOCAL, //!< Local memory (thread group memory)
MEM_CONSTANT, //!< Immutable global memory
MEM_PRIVATE, //!< Per thread private memory
- IMAGE, //!< For texture image.
MEM_INVALID
};
diff --git a/backend/src/libocl/include/ocl_types.h b/backend/src/libocl/include/ocl_types.h
index 63478c97..49ac9070 100644
--- a/backend/src/libocl/include/ocl_types.h
+++ b/backend/src/libocl/include/ocl_types.h
@@ -87,27 +87,8 @@ DEF(double);
// FIXME:
// This is a transitional hack to bypass the LLVM 3.3 built-in types.
// See the Khronos SPIR specification for handling of these types.
-#define __texture __attribute__((address_space(4)))
-struct _image1d_t;
-typedef __texture struct _image1d_t* __image1d_t;
-struct _image1d_buffer_t;
-typedef __texture struct _image1d_buffer_t* __image1d_buffer_t;
-struct _image1d_array_t;
-typedef __texture struct _image1d_array_t* __image1d_array_t;
-struct _image2d_t;
-typedef __texture struct _image2d_t* __image2d_t;
-struct _image2d_array_t;
-typedef __texture struct _image2d_array_t* __image2d_array_t;
-struct _image3d_t;
-typedef __texture struct _image3d_t* __image3d_t;
-typedef const ushort __sampler_t;
-#define image1d_t __image1d_t
-#define image1d_buffer_t __image1d_buffer_t
-#define image1d_array_t __image1d_array_t
-#define image2d_t __image2d_t
-#define image2d_array_t __image2d_array_t
-#define image3d_t __image3d_t
#define sampler_t __sampler_t
+typedef const ushort __sampler_t;
/////////////////////////////////////////////////////////////////////////////
// OpenCL built-in event types
diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl
index 95b98ff4..c4ca2f83 100644
--- a/backend/src/libocl/src/ocl_image.cl
+++ b/backend/src/libocl/src/ocl_image.cl
@@ -20,176 +20,94 @@
#include "ocl_integer.h"
#include "ocl_common.h"
+#define int1 int
+#define float1 float
+
///////////////////////////////////////////////////////////////////////////////
// Beignet builtin functions.
///////////////////////////////////////////////////////////////////////////////
-// 1D read
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
- float u, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
- int u, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
- float u, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
- int u, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
- float u, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
- int u, uint sampler_offset);
-
-// 2D & 1D Array read
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
- float2 coord, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
- int2 coord, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
- float2 coord, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
- int2 coord, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
- float2 coord, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
- int2 coord, uint sampler_offset);
-
-// 3D & 2D Array read
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
- float4 coord, uint sampler_offset);
-OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
- int4 coord, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
- float4 coord, uint sampler_offset);
-OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
- int4 coord, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
- float4 coord, uint sampler_offset);
-OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
- int4 coord, uint sampler_offset);
-
-// Don't know why we need to support 3 component coordinates, but it's in the old
-// version, let's keep to support it.
-INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
- float3 coord, uint sampler_offset)
-{
- return __gen_ocl_read_imagei(surface_id, sampler,
- (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
-}
-INLINE_OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler,
- int3 coord, uint sampler_offset)
-{
- return __gen_ocl_read_imagei(surface_id, sampler,
- (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
-}
-INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
- float3 coord, uint sampler_offset)
-{
- return __gen_ocl_read_imageui(surface_id, sampler,
- (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
-}
-INLINE_OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler,
- int3 coord, uint sampler_offset)
-{
- return __gen_ocl_read_imageui(surface_id, sampler,
- (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
-}
-INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
- float3 coord, uint sampler_offset)
-{
- return __gen_ocl_read_imagef(surface_id, sampler,
- (float4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
-}
-INLINE_OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler,
- int3 coord, uint sampler_offset)
-{
- return __gen_ocl_read_imagef(surface_id, sampler,
- (int4)(coord.s0, coord.s1, coord.s2, 0), sampler_offset);
-}
-
-// 1D write
-OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int4 color);
-OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, uint4 color);
-OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, float4 color);
-
-// 2D & 1D Array write
-OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int2 coord, int4 color);
-OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int2 coord, uint4 color);
-OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int2 coord, float4 color);
-
-// 3D & 2D Array write
-OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int4 coord, int4 color);
-OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int4 coord, uint4 color);
-OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int4 coord, float4 color);
-
-INLINE_OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int3 coord, int4 color)
-{
- __gen_ocl_write_imagei(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color);
-}
-INLINE_OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int3 coord, uint4 color)
-{
- __gen_ocl_write_imageui(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color);
-}
-INLINE_OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int3 coord, float4 color)
-{
- __gen_ocl_write_imagef(surface_id, (int4)(coord.s0, coord.s1, coord.s2, 0), color);
-}
-
-int __gen_ocl_get_image_width(uint surface_id);
-int __gen_ocl_get_image_height(uint surface_id);
-int __gen_ocl_get_image_channel_data_type(uint surface_id);
-int __gen_ocl_get_image_channel_order(uint surface_id);
-int __gen_ocl_get_image_depth(uint surface_id);
-
-
-#define GET_IMAGE(cl_image, surface_id) \
- uint surface_id = (uint)cl_image
-
+#define DECL_GEN_OCL_RW_IMAGE(image_type, n) \
+ OVERLOADABLE int4 __gen_ocl_read_imagei(image_type image, sampler_t sampler, \
+ float ##n coord, uint sampler_offset); \
+ OVERLOADABLE int4 __gen_ocl_read_imagei(image_type image, sampler_t sampler, \
+ int ##n coord, uint sampler_offset); \
+ OVERLOADABLE uint4 __gen_ocl_read_imageui(image_type image, sampler_t sampler, \
+ float ##n coord, uint sampler_offset); \
+ OVERLOADABLE uint4 __gen_ocl_read_imageui(image_type image, sampler_t sampler, \
+ int ##n coord, uint sampler_offset); \
+ OVERLOADABLE float4 __gen_ocl_read_imagef(image_type image, sampler_t sampler, \
+ float ##n coord, uint sampler_offset); \
+ OVERLOADABLE float4 __gen_ocl_read_imagef(image_type image, sampler_t sampler, \
+ int ##n coord, uint sampler_offset); \
+ OVERLOADABLE void __gen_ocl_write_imagei(image_type image, int ##n coord , int4 color); \
+ OVERLOADABLE void __gen_ocl_write_imageui(image_type image, int ##n coord, uint4 color);\
+ OVERLOADABLE void __gen_ocl_write_imagef(image_type image, int ##n coord, float4 color);
+
+#define DECL_GEN_OCL_QUERY_IMAGE(image_type) \
+ OVERLOADABLE int __gen_ocl_get_image_width(image_type image); \
+ OVERLOADABLE int __gen_ocl_get_image_height(image_type image); \
+ OVERLOADABLE int __gen_ocl_get_image_channel_data_type(image_type image); \
+ OVERLOADABLE int __gen_ocl_get_image_channel_order(image_type image); \
+ OVERLOADABLE int __gen_ocl_get_image_depth(image_type image); \
+
+DECL_GEN_OCL_RW_IMAGE(image1d_t, 1)
+DECL_GEN_OCL_RW_IMAGE(image1d_buffer_t, 1)
+DECL_GEN_OCL_RW_IMAGE(image1d_array_t, 2)
+DECL_GEN_OCL_RW_IMAGE(image1d_array_t, 4)
+DECL_GEN_OCL_RW_IMAGE(image2d_t, 2)
+DECL_GEN_OCL_RW_IMAGE(image2d_array_t, 3)
+DECL_GEN_OCL_RW_IMAGE(image3d_t, 3)
+DECL_GEN_OCL_RW_IMAGE(image2d_array_t, 4)
+DECL_GEN_OCL_RW_IMAGE(image3d_t, 4)
+
+DECL_GEN_OCL_QUERY_IMAGE(image1d_t)
+DECL_GEN_OCL_QUERY_IMAGE(image1d_buffer_t)
+DECL_GEN_OCL_QUERY_IMAGE(image1d_array_t)
+DECL_GEN_OCL_QUERY_IMAGE(image2d_t)
+DECL_GEN_OCL_QUERY_IMAGE(image2d_array_t)
+DECL_GEN_OCL_QUERY_IMAGE(image3d_t)
///////////////////////////////////////////////////////////////////////////////
// helper functions to validate array index.
///////////////////////////////////////////////////////////////////////////////
INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord, image1d_array_t image)
{
- GET_IMAGE(image, surface_id);
- float array_size = __gen_ocl_get_image_depth(surface_id);
+ float array_size = __gen_ocl_get_image_depth(image);
coord.s1 = clamp(rint(coord.s1), 0.f, array_size - 1.f);
return coord;
}
INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord, image2d_array_t image)
{
- GET_IMAGE(image, surface_id);
- float array_size = __gen_ocl_get_image_depth(surface_id);
+ float array_size = __gen_ocl_get_image_depth(image);
coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
return coord;
}
INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord, image2d_array_t image)
{
- GET_IMAGE(image, surface_id);
- float array_size = __gen_ocl_get_image_depth(surface_id);
+ float array_size = __gen_ocl_get_image_depth(image);
coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
return coord;
}
INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord, image1d_array_t image)
{
- GET_IMAGE(image, surface_id);
- int array_size = __gen_ocl_get_image_depth(surface_id);
+ int array_size = __gen_ocl_get_image_depth(image);
coord.s1 = clamp(coord.s1, 0, array_size - 1);
return coord;
}
INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord, image2d_array_t image)
{
- GET_IMAGE(image, surface_id);
- int array_size = __gen_ocl_get_image_depth(surface_id);
+ int array_size = __gen_ocl_get_image_depth(image);
coord.s2 = clamp(coord.s2, 0, array_size - 1);
return coord;
}
INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord, image2d_array_t image)
{
- GET_IMAGE(image, surface_id);
- int array_size = __gen_ocl_get_image_depth(surface_id);
+ int array_size = __gen_ocl_get_image_depth(image);
coord.s2 = clamp(coord.s2, 0, array_size - 1);
return coord;
}
@@ -273,62 +191,54 @@ INLINE_OVERLOADABLE float4 __gen_fixup_float_coord(float4 tmpCoord)
// coordiates.
INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_t image, float srcCoord)
{
- GET_IMAGE(image, surface_id);
- return srcCoord * __gen_ocl_get_image_width(surface_id);
+ return srcCoord * __gen_ocl_get_image_width(image);
}
INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image1d_array_t image, float2 srcCoord)
{
- GET_IMAGE(image, surface_id);
- srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image);
return srcCoord;
}
INLINE_OVERLOADABLE float __gen_denormalize_coord(const image1d_buffer_t image, float srcCoord)
{
- GET_IMAGE(image, surface_id);
- return srcCoord * __gen_ocl_get_image_width(surface_id);
+ return srcCoord * __gen_ocl_get_image_width(image);
}
INLINE_OVERLOADABLE float2 __gen_denormalize_coord(const image2d_t image, float2 srcCoord)
{
- GET_IMAGE(image, surface_id);
- srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
- srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image);
return srcCoord;
}
INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image2d_array_t image, float3 srcCoord)
{
- GET_IMAGE(image, surface_id);
- srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
- srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image);
return srcCoord;
}
INLINE_OVERLOADABLE float3 __gen_denormalize_coord(const image3d_t image, float3 srcCoord)
{
- GET_IMAGE(image, surface_id);
- srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
- srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
- srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image);
+ srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(image);
return srcCoord;
}
INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image2d_array_t image, float4 srcCoord)
{
- GET_IMAGE(image, surface_id);
- srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
- srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image);
return srcCoord;
}
INLINE_OVERLOADABLE float4 __gen_denormalize_coord(const image3d_t image, float4 srcCoord)
{
- GET_IMAGE(image, surface_id);
- srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(surface_id);
- srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(surface_id);
- srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(surface_id);
+ srcCoord.s0 = srcCoord.s0 * __gen_ocl_get_image_width(image);
+ srcCoord.s1 = srcCoord.s1 * __gen_ocl_get_image_height(image);
+ srcCoord.s2 = srcCoord.s2 * __gen_ocl_get_image_depth(image);
return srcCoord;
}
@@ -381,11 +291,10 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
const sampler_t sampler, \
coord_type coord) \
{ \
- GET_IMAGE(cl_image, surface_id); \
coord = __gen_validate_array_index(coord, cl_image); \
if (int_clamping_fix && __gen_sampler_need_fix(sampler)) \
- return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 1); \
- return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0); \
+ return __gen_ocl_read_image ##suffix(cl_image, sampler, coord, 1); \
+ return __gen_ocl_read_image ##suffix(cl_image, sampler, coord, 0); \
}
// For float coordinates
@@ -395,7 +304,6 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
const sampler_t sampler, \
coord_type coord) \
{ \
- GET_IMAGE(cl_image, surface_id); \
coord_type tmpCoord = __gen_validate_array_index(coord, cl_image); \
if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) { \
if (__gen_sampler_need_fix(sampler)) { \
@@ -407,11 +315,11 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \
tmpCoord = __gen_fixup_neg_boundary(tmpCoord); \
return __gen_ocl_read_image ##suffix( \
- surface_id, sampler, tmpCoord, 1); \
+ cl_image, sampler, tmpCoord, 1); \
} \
} \
} \
- return __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord, 0); \
+ return __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0); \
}
#define DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, \
@@ -419,10 +327,9 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \
coord_type coord) \
{ \
- GET_IMAGE(cl_image, surface_id); \
coord = __gen_validate_array_index(coord, cl_image); \
return __gen_ocl_read_image ##suffix( \
- surface_id, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE \
+ cl_image, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE \
| CLK_FILTER_NEAREST, coord, 0); \
}
@@ -431,15 +338,10 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
coord_type coord, \
image_data_type color) \
{ \
- GET_IMAGE(cl_image, surface_id); \
coord_type fixedCoord = __gen_validate_array_index(coord, cl_image); \
- __gen_ocl_write_image ##suffix(surface_id, fixedCoord, color); \
+ __gen_ocl_write_image ##suffix(cl_image, fixedCoord, color); \
}
-#define int1 int
-#define float1 float
-
-
#define DECL_IMAGE(int_clamping_fix, image_type, image_data_type, suffix, n) \
DECL_READ_IMAGE0(int_clamping_fix, image_type, \
image_data_type, suffix, int ##n) \
@@ -495,13 +397,12 @@ INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord, image1d_array_t i
const sampler_t sampler, \
coord_type coord) \
{ \
- GET_IMAGE(cl_image, surface_id); \
coord = __gen_validate_array_index(coord, cl_image); \
if (int_clamping_fix && __gen_sampler_need_fix(sampler)) { \
int4 newCoord = __gen_fixup_1darray_coord(coord, cl_image); \
- return __gen_ocl_read_image ##suffix(surface_id, sampler, newCoord, 2); \
+ return __gen_ocl_read_image ##suffix(cl_image, sampler, newCoord, 2); \
} \
- return __gen_ocl_read_image ##suffix(surface_id, sampler, coord, 0); \
+ return __gen_ocl_read_image ##suffix(cl_image, sampler, coord, 0); \
}
// For float coordiates
@@ -511,7 +412,6 @@ INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord, image1d_array_t i
const sampler_t sampler, \
coord_type coord) \
{ \
- GET_IMAGE(cl_image, surface_id); \
coord_type tmpCoord = __gen_validate_array_index(coord, cl_image); \
if (GEN_FIX_FLOAT_ROUNDING | int_clamping_fix) { \
if (__gen_sampler_need_fix(sampler)) { \
@@ -523,11 +423,11 @@ INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord, image1d_array_t i
tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \
float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image); \
return __gen_ocl_read_image ##suffix( \
- surface_id, sampler, newCoord, 2); \
+ cl_image, sampler, newCoord, 2); \
} \
} \
} \
- return __gen_ocl_read_image ##suffix(surface_id, sampler, tmpCoord, 0); \
+ return __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0); \
}
#define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix) \
@@ -547,18 +447,15 @@ DECL_IMAGE_1DArray(0, float4, f)
#define DECL_IMAGE_INFO_COMMON(image_type) \
OVERLOADABLE int get_image_channel_data_type(image_type image) \
{ \
- GET_IMAGE(image, surface_id); \
- return __gen_ocl_get_image_channel_data_type(surface_id); \
+ return __gen_ocl_get_image_channel_data_type(image); \
} \
OVERLOADABLE int get_image_channel_order(image_type image) \
{ \
- GET_IMAGE(image, surface_id); \
- return __gen_ocl_get_image_channel_order(surface_id); \
+ return __gen_ocl_get_image_channel_order(image); \
} \
OVERLOADABLE int get_image_width(image_type image) \
{ \
- GET_IMAGE(image, surface_id); \
- return __gen_ocl_get_image_width(surface_id); \
+ return __gen_ocl_get_image_width(image); \
}
DECL_IMAGE_INFO_COMMON(image1d_t)
@@ -571,8 +468,7 @@ DECL_IMAGE_INFO_COMMON(image2d_array_t)
// 2D extra Info
OVERLOADABLE int get_image_height(image2d_t image)
{
- GET_IMAGE(image, surface_id);
- return __gen_ocl_get_image_height(surface_id);
+ return __gen_ocl_get_image_height(image);
}
OVERLOADABLE int2 get_image_dim(image2d_t image)
{
@@ -583,13 +479,11 @@ OVERLOADABLE int2 get_image_dim(image2d_t image)
// 3D extra Info
OVERLOADABLE int get_image_height(image3d_t image)
{
- GET_IMAGE(image, surface_id);
- return __gen_ocl_get_image_height(surface_id);
+ return __gen_ocl_get_image_height(image);
}
OVERLOADABLE int get_image_depth(image3d_t image)
{
- GET_IMAGE(image, surface_id);
- return __gen_ocl_get_image_depth(surface_id);
+ return __gen_ocl_get_image_depth(image);
}
OVERLOADABLE int4 get_image_dim(image3d_t image)
{
@@ -602,8 +496,7 @@ OVERLOADABLE int4 get_image_dim(image3d_t image)
// 2D Array extra Info
OVERLOADABLE int get_image_height(image2d_array_t image)
{
- GET_IMAGE(image, surface_id);
- return __gen_ocl_get_image_height(surface_id);
+ return __gen_ocl_get_image_height(image);
}
OVERLOADABLE int2 get_image_dim(image2d_array_t image)
{
@@ -611,14 +504,12 @@ OVERLOADABLE int2 get_image_dim(image2d_array_t image)
}
OVERLOADABLE size_t get_image_array_size(image2d_array_t image)
{
- GET_IMAGE(image, surface_id);
- return __gen_ocl_get_image_depth(surface_id);
+ return __gen_ocl_get_image_depth(image);
}
// 1D Array info
OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
{
- GET_IMAGE(image, surface_id);
- return __gen_ocl_get_image_depth(surface_id);
+ return __gen_ocl_get_image_depth(image);
}
// End of 1DArray
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 167b8f02..a62f46d9 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -286,7 +286,6 @@ namespace gbe
case 1: return ir::MEM_GLOBAL;
case 2: return ir::MEM_CONSTANT;
case 3: return ir::MEM_LOCAL;
- case 4: return ir::IMAGE;
}
GBE_ASSERT(false);
return ir::MEM_GLOBAL;
@@ -1538,18 +1537,13 @@ error:
llvmInfo.addrSpace = (cast<ConstantInt>(addrSpaceNode->getOperand(1 + argID)))->getZExtValue();
llvmInfo.typeName = (cast<MDString>(typeNameNode->getOperand(1 + argID)))->getString();
- if (llvmInfo.typeName.find("image") != std::string::npos &&
- llvmInfo.typeName.find("*") != std::string::npos) {
- uint32_t start = llvmInfo.typeName.find("image");
- uint32_t end = llvmInfo.typeName.find("*");
- llvmInfo.typeName = llvmInfo.typeName.substr(start, end - start);
- }
llvmInfo.accessQual = (cast<MDString>(accessQualNode->getOperand(1 + argID)))->getString();
llvmInfo.typeQual = (cast<MDString>(typeQualNode->getOperand(1 + argID)))->getString();
llvmInfo.argName = (cast<MDString>(argNameNode->getOperand(1 + argID)))->getString();
// function arguments are uniform values.
this->newRegister(I, NULL, true);
+
// add support for vector argument.
if(type->isVectorTy()) {
VectorType *vectorType = cast<VectorType>(type);
@@ -1572,6 +1566,12 @@ error:
GBE_ASSERTM(isScalarType(type) == true,
"vector type in the function argument is not supported yet");
const ir::Register reg = getRegister(I);
+ if (llvmInfo.isImageType()) {
+ ctx.input(argName, ir::FunctionArgument::IMAGE, reg, llvmInfo, 4, 4, 0);
+ ctx.getFunction().getImageSet()->append(reg, &ctx, incBtiBase());
+ continue;
+ }
+
if (type->isPointerTy() == false)
ctx.input(argName, ir::FunctionArgument::VALUE, reg, llvmInfo, getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0);
else {
@@ -1606,10 +1606,6 @@ error:
case ir::MEM_CONSTANT:
ctx.input(argName, ir::FunctionArgument::CONSTANT_POINTER, reg, llvmInfo, ptrSize, align, 0x2);
break;
- case ir::IMAGE:
- ctx.input(argName, ir::FunctionArgument::IMAGE, reg, llvmInfo, ptrSize, align, 0x0);
- ctx.getFunction().getImageSet()->append(reg, &ctx, incBtiBase());
- break;
default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE);
}
}
@@ -2772,16 +2768,8 @@ error:
// Get the name of the called function and handle it
const std::string fnName = Callee->getName();
- auto it = instrinsicMap.map.find(fnName);
- // FIXME, should create a complete error reporting mechanism
- // when found error in beignet managed passes including Gen pass.
- if (it == instrinsicMap.map.end()) {
- std::cerr << "Unresolved symbol: " << fnName << std::endl;
- std::cerr << "Aborting..." << std::endl;
- exit(-1);
- }
- GBE_ASSERT(it != instrinsicMap.map.end());
- switch (it->second) {
+ auto genIntrinsicID = intrinsicMap.find(fnName);
+ switch (genIntrinsicID) {
case GEN_OCL_GET_GROUP_ID0:
regTranslator.newScalarProxy(ir::ocl::groupid0, dst); break;
case GEN_OCL_GET_GROUP_ID1:
@@ -2878,35 +2866,13 @@ error:
case GEN_OCL_LGBARRIER:
ctx.getFunction().setUseSLM(true);
break;
- case GEN_OCL_WRITE_IMAGE_I_1D:
- case GEN_OCL_WRITE_IMAGE_UI_1D:
- case GEN_OCL_WRITE_IMAGE_F_1D:
- case GEN_OCL_WRITE_IMAGE_I_2D:
- case GEN_OCL_WRITE_IMAGE_UI_2D:
- case GEN_OCL_WRITE_IMAGE_F_2D:
- case GEN_OCL_WRITE_IMAGE_I_3D:
- case GEN_OCL_WRITE_IMAGE_UI_3D:
- case GEN_OCL_WRITE_IMAGE_F_3D:
+ case GEN_OCL_WRITE_IMAGE_I:
+ case GEN_OCL_WRITE_IMAGE_UI:
+ case GEN_OCL_WRITE_IMAGE_F:
break;
- case GEN_OCL_READ_IMAGE_I_1D:
- case GEN_OCL_READ_IMAGE_UI_1D:
- case GEN_OCL_READ_IMAGE_F_1D:
- case GEN_OCL_READ_IMAGE_I_2D:
- case GEN_OCL_READ_IMAGE_UI_2D:
- case GEN_OCL_READ_IMAGE_F_2D:
- case GEN_OCL_READ_IMAGE_I_3D:
- case GEN_OCL_READ_IMAGE_UI_3D:
- case GEN_OCL_READ_IMAGE_F_3D:
-
- case GEN_OCL_READ_IMAGE_I_1D_I:
- case GEN_OCL_READ_IMAGE_UI_1D_I:
- case GEN_OCL_READ_IMAGE_F_1D_I:
- case GEN_OCL_READ_IMAGE_I_2D_I:
- case GEN_OCL_READ_IMAGE_UI_2D_I:
- case GEN_OCL_READ_IMAGE_F_2D_I:
- case GEN_OCL_READ_IMAGE_I_3D_I:
- case GEN_OCL_READ_IMAGE_UI_3D_I:
- case GEN_OCL_READ_IMAGE_F_3D_I:
+ case GEN_OCL_READ_IMAGE_I:
+ case GEN_OCL_READ_IMAGE_UI:
+ case GEN_OCL_READ_IMAGE_F:
{
// dst is a 4 elements vector. We allocate all 4 registers here.
uint32_t elemNum;
@@ -3039,11 +3005,7 @@ error:
}
uint8_t GenWriter::getImageID(CallInst &I) {
- PtrOrigMapIter iter = pointerOrigMap.find(&I);
- GBE_ASSERT(iter != pointerOrigMap.end());
- SmallVectorImpl<Value *> &origins = iter->second;
- GBE_ASSERT(origins.size() == 1);
- const ir::Register imageReg = this->getRegister(origins[0]);
+ const ir::Register imageReg = this->getRegister(I.getOperand(0));
return ctx.getFunction().getImageSet()->getIdx(imageReg);
}
@@ -3213,8 +3175,7 @@ error:
// Get the name of the called function and handle it
Value *Callee = I.getCalledValue();
const std::string fnName = Callee->getName();
- auto it = instrinsicMap.map.find(fnName);
- GBE_ASSERT(it != instrinsicMap.map.end());
+ auto genIntrinsicID = intrinsicMap.find(fnName);
// Get the function arguments
CallSite CS(&I);
@@ -3223,7 +3184,7 @@ error:
CallSite::arg_iterator AE = CS.arg_end();
#endif /* GBE_DEBUG */
- switch (it->second) {
+ switch (genIntrinsicID) {
case GEN_OCL_POW:
{
const ir::Register src0 = this->getRegister(*AI); ++AI;
@@ -3328,31 +3289,16 @@ error:
const uint8_t imageID = getImageID(I);
GBE_ASSERT(AI != AE); ++AI;
const ir::Register reg = this->getRegister(&I, 0);
- int infoType = it->second - GEN_OCL_GET_IMAGE_WIDTH;
+ int infoType = genIntrinsicID - GEN_OCL_GET_IMAGE_WIDTH;
ir::ImageInfoKey key(imageID, infoType);
const ir::Register infoReg = ctx.getFunction().getImageSet()->appendInfo(key, &ctx);
ctx.GET_IMAGE_INFO(infoType, reg, imageID, infoReg);
break;
}
- case GEN_OCL_READ_IMAGE_I_1D:
- case GEN_OCL_READ_IMAGE_UI_1D:
- case GEN_OCL_READ_IMAGE_F_1D:
- case GEN_OCL_READ_IMAGE_I_1D_I:
- case GEN_OCL_READ_IMAGE_UI_1D_I:
- case GEN_OCL_READ_IMAGE_F_1D_I:
- case GEN_OCL_READ_IMAGE_I_2D:
- case GEN_OCL_READ_IMAGE_UI_2D:
- case GEN_OCL_READ_IMAGE_F_2D:
- case GEN_OCL_READ_IMAGE_I_2D_I:
- case GEN_OCL_READ_IMAGE_UI_2D_I:
- case GEN_OCL_READ_IMAGE_F_2D_I:
- case GEN_OCL_READ_IMAGE_I_3D:
- case GEN_OCL_READ_IMAGE_UI_3D:
- case GEN_OCL_READ_IMAGE_F_3D:
- case GEN_OCL_READ_IMAGE_I_3D_I:
- case GEN_OCL_READ_IMAGE_UI_3D_I:
- case GEN_OCL_READ_IMAGE_F_3D_I:
+ case GEN_OCL_READ_IMAGE_I:
+ case GEN_OCL_READ_IMAGE_UI:
+ case GEN_OCL_READ_IMAGE_F:
{
const uint8_t imageID = getImageID(I);
GBE_ASSERT(AI != AE); ++AI;
@@ -3360,7 +3306,7 @@ error:
const uint8_t sampler = this->appendSampler(AI);
++AI; GBE_ASSERT(AI != AE);
uint32_t coordNum;
- (void)getVectorInfo(ctx, *AI, coordNum);
+ const ir::Type coordType = getVectorInfo(ctx, *AI, coordNum);
if (coordNum == 4)
coordNum = 3;
const uint32_t imageDim = coordNum;
@@ -3377,7 +3323,7 @@ error:
GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() == ir::TYPE_S32, "Invalid sampler type");
samplerOffset = x.getIntegerValue();
#endif
- bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
+ bool isFloatCoord = coordType == ir::TYPE_FLOAT;
bool requiredFloatCoord = samplerOffset == 0;
vector<ir::Register> dstTupleData, srcTupleData;
@@ -3403,7 +3349,7 @@ error:
}
uint32_t elemNum;
- (void)getVectorInfo(ctx, &I, elemNum);
+ ir::Type dstType = getVectorInfo(ctx, &I, elemNum);
GBE_ASSERT(elemNum == 4);
for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
@@ -3413,49 +3359,14 @@ error:
const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum);
const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 3);
- ir::Type dstType = ir::TYPE_U32;
-
- switch(it->second) {
- case GEN_OCL_READ_IMAGE_I_1D:
- case GEN_OCL_READ_IMAGE_UI_1D:
- case GEN_OCL_READ_IMAGE_I_2D:
- case GEN_OCL_READ_IMAGE_UI_2D:
- case GEN_OCL_READ_IMAGE_I_3D:
- case GEN_OCL_READ_IMAGE_UI_3D:
- case GEN_OCL_READ_IMAGE_I_1D_I:
- case GEN_OCL_READ_IMAGE_UI_1D_I:
- case GEN_OCL_READ_IMAGE_I_2D_I:
- case GEN_OCL_READ_IMAGE_UI_2D_I:
- case GEN_OCL_READ_IMAGE_I_3D_I:
- case GEN_OCL_READ_IMAGE_UI_3D_I:
- dstType = ir::TYPE_U32;
- break;
- case GEN_OCL_READ_IMAGE_F_1D:
- case GEN_OCL_READ_IMAGE_F_2D:
- case GEN_OCL_READ_IMAGE_F_3D:
- case GEN_OCL_READ_IMAGE_F_1D_I:
- case GEN_OCL_READ_IMAGE_F_2D_I:
- case GEN_OCL_READ_IMAGE_F_3D_I:
- dstType = ir::TYPE_FLOAT;
- break;
- default:
- GBE_ASSERT(0); // never been here.
- }
-
ctx.SAMPLE(imageID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
requiredFloatCoord, sampler, samplerOffset);
break;
}
- case GEN_OCL_WRITE_IMAGE_I_1D:
- case GEN_OCL_WRITE_IMAGE_UI_1D:
- case GEN_OCL_WRITE_IMAGE_F_1D:
- case GEN_OCL_WRITE_IMAGE_I_2D:
- case GEN_OCL_WRITE_IMAGE_UI_2D:
- case GEN_OCL_WRITE_IMAGE_F_2D:
- case GEN_OCL_WRITE_IMAGE_I_3D:
- case GEN_OCL_WRITE_IMAGE_UI_3D:
- case GEN_OCL_WRITE_IMAGE_F_3D:
+ case GEN_OCL_WRITE_IMAGE_I:
+ case GEN_OCL_WRITE_IMAGE_UI:
+ case GEN_OCL_WRITE_IMAGE_F:
{
const uint8_t imageID = getImageID(I);
GBE_ASSERT(AI != AE); ++AI; GBE_ASSERT(AI != AE);
@@ -3479,7 +3390,7 @@ error:
}
++AI; GBE_ASSERT(AI != AE);
uint32_t elemNum;
- (void)getVectorInfo(ctx, *AI, elemNum);
+ ir::Type srcType = getVectorInfo(ctx, *AI, elemNum);
GBE_ASSERT(elemNum == 4);
for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
@@ -3487,27 +3398,6 @@ error:
srcTupleData.push_back(reg);
}
const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 7);
-
- ir::Type srcType = ir::TYPE_U32;
-
- switch(it->second) {
- case GEN_OCL_WRITE_IMAGE_I_1D:
- case GEN_OCL_WRITE_IMAGE_UI_1D:
- case GEN_OCL_WRITE_IMAGE_I_2D:
- case GEN_OCL_WRITE_IMAGE_UI_2D:
- case GEN_OCL_WRITE_IMAGE_I_3D:
- case GEN_OCL_WRITE_IMAGE_UI_3D:
- srcType = ir::TYPE_U32;
- break;
- case GEN_OCL_WRITE_IMAGE_F_1D:
- case GEN_OCL_WRITE_IMAGE_F_2D:
- case GEN_OCL_WRITE_IMAGE_F_3D:
- srcType = ir::TYPE_FLOAT;
- break;
- default:
- GBE_ASSERT(0); // never been here.
- }
-
ctx.TYPED_WRITE(imageID, srcTuple, srcType, ir::TYPE_U32);
break;
}
@@ -3646,7 +3536,7 @@ error:
//Becasue cmp's sources are same as sel's source, so cmp instruction and sel
//instruction will be merged to one sel_cmp instruction in the gen selection
//Add two intruction here for simple.
- if(it->second == GEN_OCL_FMAX)
+ if(genIntrinsicID == GEN_OCL_FMAX)
ctx.GE(getType(ctx, I.getType()), cmp, src0, src1);
else
ctx.LT(getType(ctx, I.getType()), cmp, src0, src1);
diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp
index 528b3c8b..926c4935 100644
--- a/backend/src/llvm/llvm_gen_backend.hpp
+++ b/backend/src/llvm/llvm_gen_backend.hpp
@@ -26,6 +26,7 @@
#ifndef __GBE_LLVM_GEN_BACKEND_HPP__
#define __GBE_LLVM_GEN_BACKEND_HPP__
+#include <cxxabi.h>
#include "llvm/Config/llvm-config.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/LoopPass.h"
@@ -65,10 +66,31 @@ namespace gbe
}
/*! Sort intrinsics with their names */
hash_map<std::string, OCLInstrinsic> map;
+ OCLInstrinsic find(const std::string symbol) const {
+ auto it = map.find(symbol);
+
+ if (it == map.end()) {
+ int status;
+ const char *realName = abi::__cxa_demangle(symbol.c_str(), NULL, NULL, &status);
+ if (status == 0) {
+ std::string realFnName(realName), stripName;
+ stripName = realFnName.substr(0, realFnName.find("("));
+ it = map.find(stripName);
+ }
+ }
+ // FIXME, should create a complete error reporting mechanism
+ // when found error in beignet managed passes including Gen pass.
+ if (it == map.end()) {
+ std::cerr << "Unresolved symbol: " << symbol << std::endl;
+ std::cerr << "Aborting..." << std::endl;
+ exit(-1);
+ }
+ return it->second;
+ }
};
/*! Sort the OCL Gen instrinsic functions (built on pre-main) */
- static const OCLIntrinsicMap instrinsicMap;
+ static const OCLIntrinsicMap intrinsicMap;
/*! Pad the offset */
int32_t getPadding(int32_t offset, int32_t align);
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 8d55c3f5..8ec8336c 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -46,38 +46,14 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD8, __gen_ocl_force_simd8)
DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16)
// To read_image functions.
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D, _Z21__gen_ocl_read_imageijtfj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D, _Z22__gen_ocl_read_imageuijtfj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D, _Z21__gen_ocl_read_imagefjtfj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, _Z21__gen_ocl_read_imageijtDv2_fj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, _Z22__gen_ocl_read_imageuijtDv2_fj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, _Z21__gen_ocl_read_imagefjtDv2_fj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtDv4_fj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtDv4_fj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtDv4_fj)
-// work around read image with the LD message. The coords are integer type.
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D_I, _Z21__gen_ocl_read_imageijtij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D_I, _Z22__gen_ocl_read_imageuijtij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D_I, _Z21__gen_ocl_read_imagefjtij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, _Z21__gen_ocl_read_imageijtDv2_ij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, _Z22__gen_ocl_read_imageuijtDv2_ij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, _Z21__gen_ocl_read_imagefjtDv2_ij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, _Z21__gen_ocl_read_imageijtDv4_ij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, _Z22__gen_ocl_read_imageuijtDv4_ij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, _Z21__gen_ocl_read_imagefjtDv4_ij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I, __gen_ocl_read_imagei)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI, __gen_ocl_read_imageui)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F, __gen_ocl_read_imagef)
// To write_image functions.
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_1D, _Z22__gen_ocl_write_imageijiDv4_i)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_1D, _Z23__gen_ocl_write_imageuijiDv4_j)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_1D, _Z22__gen_ocl_write_imagefjiDv4_f)
-
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, _Z22__gen_ocl_write_imageijDv2_iDv4_i)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, _Z23__gen_ocl_write_imageuijDv2_iDv4_j)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, _Z22__gen_ocl_write_imagefjDv2_iDv4_f)
-
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, _Z22__gen_ocl_write_imageijDv4_iS_)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, _Z23__gen_ocl_write_imageuijDv4_iDv4_j)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D, _Z22__gen_ocl_write_imagefjDv4_iDv4_f)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I, __gen_ocl_write_imagei)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI, __gen_ocl_write_imageui)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F, __gen_ocl_write_imagef)
// To get image info function
DECL_LLVM_GEN_FUNCTION(GET_IMAGE_WIDTH, __gen_ocl_get_image_width)
diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp
index baf526b6..cf2939dc 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -636,42 +636,17 @@ namespace gbe {
} else {
Value *Callee = call->getCalledValue();
const std::string fnName = Callee->getName();
- auto it = instrinsicMap.map.find(fnName);
- // FIXME, should create a complete error reporting mechanism
- // when found error in beignet managed passes including Gen pass.
- if (it == instrinsicMap.map.end()) {
- std::cerr << "Unresolved symbol: " << fnName << std::endl;
- std::cerr << "Aborting..." << std::endl;
- exit(-1);
- }
- GBE_ASSERT(it != instrinsicMap.map.end());
+ auto genIntrinsicID = intrinsicMap.find(fnName);
// Get the function arguments
CallSite CS(call);
CallSite::arg_iterator CI = CS.arg_begin() + 1;
- switch (it->second) {
+ switch (genIntrinsicID) {
default: break;
- case GEN_OCL_READ_IMAGE_I_1D:
- case GEN_OCL_READ_IMAGE_UI_1D:
- case GEN_OCL_READ_IMAGE_F_1D:
- case GEN_OCL_READ_IMAGE_I_2D:
- case GEN_OCL_READ_IMAGE_UI_2D:
- case GEN_OCL_READ_IMAGE_F_2D:
- case GEN_OCL_READ_IMAGE_I_3D:
- case GEN_OCL_READ_IMAGE_UI_3D:
- case GEN_OCL_READ_IMAGE_F_3D:
- case GEN_OCL_READ_IMAGE_I_1D_I:
- case GEN_OCL_READ_IMAGE_UI_1D_I:
- case GEN_OCL_READ_IMAGE_F_1D_I:
- case GEN_OCL_READ_IMAGE_I_2D_I:
- case GEN_OCL_READ_IMAGE_UI_2D_I:
- case GEN_OCL_READ_IMAGE_F_2D_I:
- case GEN_OCL_READ_IMAGE_I_3D_I:
- case GEN_OCL_READ_IMAGE_UI_3D_I:
- case GEN_OCL_READ_IMAGE_F_3D_I:
- case GEN_OCL_GET_IMAGE_WIDTH:
- case GEN_OCL_GET_IMAGE_HEIGHT:
+ case GEN_OCL_READ_IMAGE_I:
+ case GEN_OCL_READ_IMAGE_UI:
+ case GEN_OCL_READ_IMAGE_F:
{
++CI;
if ((*CI)->getType()->isVectorTy())
@@ -680,15 +655,9 @@ namespace gbe {
extractFromVector(call);
break;
}
- case GEN_OCL_WRITE_IMAGE_I_3D:
- case GEN_OCL_WRITE_IMAGE_UI_3D:
- case GEN_OCL_WRITE_IMAGE_F_3D:
- case GEN_OCL_WRITE_IMAGE_I_2D:
- case GEN_OCL_WRITE_IMAGE_UI_2D:
- case GEN_OCL_WRITE_IMAGE_F_2D:
- case GEN_OCL_WRITE_IMAGE_I_1D:
- case GEN_OCL_WRITE_IMAGE_UI_1D:
- case GEN_OCL_WRITE_IMAGE_F_1D:
+ case GEN_OCL_WRITE_IMAGE_I:
+ case GEN_OCL_WRITE_IMAGE_UI:
+ case GEN_OCL_WRITE_IMAGE_F:
{
if ((*CI)->getType()->isVectorTy())
*CI = InsertToVector(call, *CI);