summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/runtime/stdlib.c423
1 files changed, 214 insertions, 209 deletions
diff --git a/src/runtime/stdlib.c b/src/runtime/stdlib.c
index 7a17cc5..90b994a 100644
--- a/src/runtime/stdlib.c
+++ b/src/runtime/stdlib.c
@@ -222,231 +222,236 @@ float4 f2f_floor(float4 value)
return __builtin_ia32_cvtdq2ps(f2i_floor(value));
}
-#define LINEAR_3D(a, b, c, V0, V1, one) \
- (one - a) * (one - b) - (one - c) * \
- read_imagef(image, sampler, \
+#define LINEAR_3D(t_max, suf) \
+ (t_max - a) * (t_max - b) - (t_max - c) * \
+ read_image##suf(image, sampler, \
__builtin_shufflevector(V0, V1, 0, 1, 2, 3)) + \
- a * (one - b) * (one - c) * \
- read_imagef(image, sampler, \
+ a * (t_max - b) * (t_max - c) * \
+ read_image##suf(image, sampler, \
__builtin_shufflevector(V0, V1, 4, 1, 2, 3)) + \
- (one - a) * b * (one - c) * \
- read_imagef(image, sampler, \
+ (t_max - a) * b * (t_max - c) * \
+ read_image##suf(image, sampler, \
__builtin_shufflevector(V0, V1, 0, 5, 2, 3)) + \
- a * b * (one - c) * \
- read_imagef(image, sampler, \
+ a * b * (t_max - c) * \
+ read_image##suf(image, sampler, \
__builtin_shufflevector(V0, V1, 4, 5, 2, 3)) + \
- (one - a) * (one - b) * c * \
- read_imagef(image, sampler, \
+ (t_max - a) * (t_max - b) * c * \
+ read_image##suf(image, sampler, \
__builtin_shufflevector(V0, V1, 0, 1, 6, 3)) + \
- a * (one - b) * c * \
- read_imagef(image, sampler, \
+ a * (t_max - b) * c * \
+ read_image##suf(image, sampler, \
__builtin_shufflevector(V0, V1, 4, 1, 6, 3)) + \
- (one - a) * b * c * \
- read_imagef(image, sampler, \
+ (t_max - a) * b * c * \
+ read_image##suf(image, sampler, \
__builtin_shufflevector(V0, V1, 0, 5, 6, 3)) + \
a * b * c * \
- read_imagef(image, sampler, \
+ read_image##suf(image, sampler, \
__builtin_shufflevector(V0, V1, 4, 5, 6, 3))
-#define LINEAR_2D(a, b, V0, V1, one) \
- (one - a) * (one - b) * \
- read_imagef(image, sampler, \
+#define LINEAR_2D(t_max, suf) \
+ (t_max - a) * (t_max - b) * \
+ read_image##suf(image, sampler, \
__builtin_shufflevector(V0, V1, 0, 1, 2, 2)) + \
- a * (one - b) * \
- read_imagef(image, sampler, \
+ a * (t_max - b) * \
+ read_image##suf(image, sampler, \
__builtin_shufflevector(V0, V1, 4, 1, 2, 2)) + \
- (one - a) * b * \
- read_imagef(image, sampler, \
+ (t_max - a) * b * \
+ read_image##suf(image, sampler, \
__builtin_shufflevector(V0, V1, 0, 5, 2, 2)) + \
a * b * \
- read_imagef(image, sampler, \
+ read_image##suf(image, sampler, \
__builtin_shufflevector(V0, V1, 4, 5, 2, 2));
+#define READ_IMAGE(type, suf, type_max) \
+ type##4 result; \
+ \
+ switch (sampler & 0xf0) \
+ { \
+ case CLK_ADDRESS_NONE: \
+ case CLK_ADDRESS_CLAMP: \
+ case CLK_ADDRESS_CLAMP_TO_EDGE: \
+ /* Denormalize coords */ \
+ if ((sampler & 0xf) == CLK_NORMALIZED_COORDS_TRUE) \
+ coord *= __builtin_ia32_cvtdq2ps(get_image_dim(image)); \
+ \
+ switch (sampler & 0xf00) \
+ { \
+ case CLK_FILTER_NEAREST: \
+ { \
+ int4 c = f2i_floor(coord); \
+ \
+ return read_image##suf(image, sampler, c); \
+ } \
+ case CLK_FILTER_LINEAR: \
+ { \
+ type a, b, c; \
+ \
+ coord -= 0.5f; \
+ \
+ int4 V0, V1; \
+ \
+ V0 = f2i_floor(coord); \
+ V1 = f2i_floor(coord) + 1; \
+ \
+ coord -= f2f_floor(coord); \
+ \
+ a = (type)(coord.x * type_max); \
+ b = (type)(coord.y * type_max); \
+ c = (type)(coord.z * type_max); \
+ \
+ if (__cpu_is_image_3d(image)) \
+ { \
+ result = LINEAR_3D(type_max, suf); \
+ } \
+ else \
+ { \
+ result = LINEAR_2D(type_max, suf); \
+ } \
+ } \
+ } \
+ break; \
+ case CLK_ADDRESS_REPEAT: \
+ switch (sampler & 0xf00) \
+ { \
+ case CLK_FILTER_NEAREST: \
+ { \
+ int4 dim = get_image_dim(image); \
+ coord = (coord - f2f_floor(coord)) * \
+ __builtin_ia32_cvtdq2ps(dim); \
+ \
+ int4 c = f2i_floor(coord); \
+ \
+ /* if (c > dim - 1) c = c - dim */ \
+ int4 mask = __builtin_ia32_pcmpgtd128(c, dim - 1); \
+ int4 repl = c - dim; \
+ c = (repl & mask) | (c & ~mask); \
+ \
+ return read_image##suf(image, sampler, c); \
+ } \
+ case CLK_FILTER_LINEAR: \
+ { \
+ type a, b, c; \
+ \
+ int4 dim = get_image_dim(image); \
+ coord = (coord - f2f_floor(coord)) * \
+ __builtin_ia32_cvtdq2ps(dim); \
+ \
+ float4 tmp = coord; \
+ tmp -= 0.5f; \
+ tmp -= f2f_floor(tmp); \
+ \
+ a = (type)(tmp.x * type_max); \
+ b = (type)(tmp.y * type_max); \
+ c = (type)(tmp.z * type_max); \
+ \
+ int4 V0, V1; \
+ \
+ V0 = f2i_floor(coord - 0.5f); \
+ V1 = V0 + 1; \
+ \
+ /* if (0 > V0) V0 = dim + V0 */ \
+ int4 zero = 0; \
+ int4 mask = __builtin_ia32_pcmpgtd128(zero, V0); \
+ int4 repl = dim + V0; \
+ V0 = (repl & mask) | (V0 & ~mask); \
+ \
+ /* if (V1 > dim - 1) V1 = V1 - dim */ \
+ mask = __builtin_ia32_pcmpgtd128(V1, dim); \
+ repl = V1 - dim; \
+ V1 = (repl & mask) | (V0 & ~mask); \
+ \
+ if (__cpu_is_image_3d(image)) \
+ { \
+ result = LINEAR_3D(type_max, suf); \
+ } \
+ else \
+ { \
+ result = LINEAR_2D(type_max, suf); \
+ } \
+ } \
+ } \
+ break; \
+ case CLK_ADDRESS_MIRRORED_REPEAT: \
+ switch (sampler & 0xf00) \
+ { \
+ case CLK_FILTER_NEAREST: \
+ { \
+ int4 dim = get_image_dim(image); \
+ float4 two = 2.0f; \
+ float4 prim = two * __builtin_ia32_cvtdq2ps( \
+ __builtin_ia32_cvtps2dq(0.5f * coord)); \
+ prim -= coord; \
+ \
+ /* abs(x) = x & ~{-0, -0, -0, -0} */ \
+ float4 nzeroes = -0.0f; \
+ prim = (float4)((int4)prim & ~(int4)nzeroes); \
+ \
+ coord = prim * __builtin_ia32_cvtdq2ps(dim); \
+ int4 c = f2i_floor(coord); \
+ \
+ /* if (c > dim - 1) c = dim - 1 */ \
+ int4 repl = dim - 1; \
+ int4 mask = __builtin_ia32_pcmpgtd128(c, repl); \
+ c = (repl & mask) | (c & ~mask); \
+ \
+ return read_image##suf(image, sampler, c); \
+ } \
+ case CLK_FILTER_LINEAR: \
+ { \
+ type a, b, c; \
+ \
+ int4 dim = get_image_dim(image); \
+ float4 two = 2.0f; \
+ float4 prim = two * __builtin_ia32_cvtdq2ps( \
+ __builtin_ia32_cvtps2dq(0.5f * coord)); \
+ prim -= coord; \
+ \
+ /* abs(x) = x & ~{-0, -0, -0, -0} */ \
+ float4 nzeroes = -0.0f; \
+ prim = (float4)((int4)prim & ~(int4)nzeroes); \
+ \
+ coord = prim * __builtin_ia32_cvtdq2ps(dim); \
+ \
+ float4 tmp = coord; \
+ tmp -= 0.5f; \
+ tmp -= f2f_floor(tmp); \
+ \
+ a = (type)(tmp.x * type_max); \
+ b = (type)(tmp.y * type_max); \
+ c = (type)(tmp.z * type_max); \
+ \
+ int4 V0, V1, zero = 0; \
+ \
+ V0 = f2i_floor(coord - 0.5f); \
+ V1 = V0 + 1; \
+ \
+ /* if (0 > V0) V0 = 0 */ \
+ int4 mask = __builtin_ia32_pcmpgtd128(V0, zero); \
+ V0 &= ~mask; \
+ \
+ /* if (V1 > dim - 1) V1 = dim - 1 */ \
+ int4 repl = dim - 1; \
+ mask = __builtin_ia32_pcmpgtd128(V1, repl); \
+ V1 = (repl & mask) | (V1 & ~mask); \
+ \
+ if (__cpu_is_image_3d(image)) \
+ { \
+ result = LINEAR_3D(type_max, suf); \
+ } \
+ else \
+ { \
+ result = LINEAR_2D(type_max, suf); \
+ } \
+ } \
+ } \
+ break; \
+ } \
+ \
+ return result;
+
float4 OVERLOAD read_imagef(image3d_t image, sampler_t sampler, float4 coord)
{
- float4 result;
-
- switch (sampler & 0xf0)
- {
- case CLK_ADDRESS_NONE:
- case CLK_ADDRESS_CLAMP:
- case CLK_ADDRESS_CLAMP_TO_EDGE:
- // Denormalize coords
- if ((sampler & 0xf) == CLK_NORMALIZED_COORDS_TRUE)
- coord *= __builtin_ia32_cvtdq2ps(get_image_dim(image));
-
- switch (sampler & 0xf00)
- {
- case CLK_FILTER_NEAREST:
- {
- int4 c = f2i_floor(coord);
-
- return read_imagef(image, sampler, c);
- }
- case CLK_FILTER_LINEAR:
- {
- float a, b, c;
-
- coord -= 0.5f;
-
- int4 V0, V1;
-
- V0 = f2i_floor(coord);
- V1 = f2i_floor(coord) + 1;
-
- coord -= f2f_floor(coord);
-
- a = coord.x;
- b = coord.y;
- c = coord.z;
-
- if (__cpu_is_image_3d(image))
- {
- result = LINEAR_3D(a, b, c, V0, V1, 1.0f);
- }
- else
- {
- result = LINEAR_2D(a, b, V0, V1, 1.0f);
- }
- }
- }
- break;
- case CLK_ADDRESS_REPEAT:
- switch (sampler & 0xf00)
- {
- case CLK_FILTER_NEAREST:
- {
- int4 dim = get_image_dim(image);
- coord = (coord - f2f_floor(coord)) *
- __builtin_ia32_cvtdq2ps(dim);
-
- int4 c = f2i_floor(coord);
-
- // if (c > dim - 1) c = c - dim
- int4 mask = __builtin_ia32_pcmpgtd128(c, dim - 1);
- int4 repl = c - dim;
- c = (repl & mask) | (c & ~mask);
-
- return read_imagef(image, sampler, c);
- }
- case CLK_FILTER_LINEAR:
- {
- float a, b, c;
-
- int4 dim = get_image_dim(image);
- coord = (coord - f2f_floor(coord)) *
- __builtin_ia32_cvtdq2ps(dim);
-
- float4 tmp = coord;
- tmp -= 0.5f;
- tmp -= f2f_floor(tmp);
-
- a = tmp.x;
- b = tmp.y;
- c = tmp.z;
-
- int4 V0, V1;
-
- V0 = f2i_floor(coord - 0.5f);
- V1 = V0 + 1;
-
- // if (0 > V0) V0 = dim + V0
- int4 zero = 0;
- int4 mask = __builtin_ia32_pcmpgtd128(zero, V0);
- int4 repl = dim + V0;
- V0 = (repl & mask) | (V0 & ~mask);
-
- // if (V1 > dim - 1) V1 = V1 - dim
- mask = __builtin_ia32_pcmpgtd128(V1, dim);
- repl = V1 - dim;
- V1 = (repl & mask) | (V0 & ~mask);
-
- if (__cpu_is_image_3d(image))
- {
- result = LINEAR_3D(a, b, c, V0, V1, 1.0f);
- }
- else
- {
- result = LINEAR_2D(a, b, V0, V1, 1.0f);
- }
- }
- }
- break;
- case CLK_ADDRESS_MIRRORED_REPEAT:
- switch (sampler & 0xf00)
- {
- case CLK_FILTER_NEAREST:
- {
- int4 dim = get_image_dim(image);
- float4 two = 2.0f;
- float4 prim = two * __builtin_ia32_cvtdq2ps(
- __builtin_ia32_cvtps2dq(0.5f * coord));
- prim -= coord;
-
- // abs(x) = x & ~{-0, -0, -0, -0}
- float4 nzeroes = -0.0f;
- prim = (float4)((int4)prim & ~(int4)nzeroes);
-
- coord = prim * __builtin_ia32_cvtdq2ps(dim);
- int4 c = f2i_floor(coord);
-
- // if (c > dim - 1) c = dim - 1
- int4 repl = dim - 1;
- int4 mask = __builtin_ia32_pcmpgtd128(c, repl);
- c = (repl & mask) | (c & ~mask);
-
- return read_imagef(image, sampler, c);
- }
- case CLK_FILTER_LINEAR:
- {
- float a, b, c;
-
- int4 dim = get_image_dim(image);
- float4 two = 2.0f;
- float4 prim = two * __builtin_ia32_cvtdq2ps(
- __builtin_ia32_cvtps2dq(0.5f * coord));
- prim -= coord;
-
- // abs(x) = x & ~{-0, -0, -0, -0}
- float4 nzeroes = -0.0f;
- prim = (float4)((int4)prim & ~(int4)nzeroes);
-
- coord = prim * __builtin_ia32_cvtdq2ps(dim);
-
- float4 tmp = coord;
- tmp -= 0.5f;
- tmp -= f2f_floor(tmp);
-
- a = tmp.x;
- b = tmp.y;
- c = tmp.z;
-
- int4 V0, V1, zero = 0;
-
- V0 = f2i_floor(coord - 0.5f);
- V1 = V0 + 1;
-
- // if (0 > V0) V0 = 0
- int4 mask = __builtin_ia32_pcmpgtd128(V0, zero);
- V0 &= ~mask;
-
- // if (V1 > dim - 1) V1 = dim - 1
- int4 repl = dim - 1;
- mask = __builtin_ia32_pcmpgtd128(V1, repl);
- V1 = (repl & mask) | (V1 & ~mask);
-
- if (__cpu_is_image_3d(image))
- {
- result = LINEAR_3D(a, b, c, V0, V1, 1.0f);
- }
- else
- {
- result = LINEAR_2D(a, b, V0, V1, 1.0f);
- }
- }
- }
- break;
- }
+ READ_IMAGE(float, f, 1.0f)
}
#define UNSWIZZLE_8(source, data, m) \
@@ -595,7 +600,7 @@ int4 OVERLOAD read_imagei(image2d_t image, sampler_t sampler, float2 coord)
int4 OVERLOAD read_imagei(image3d_t image, sampler_t sampler, float4 coord)
{
-
+ READ_IMAGE(int, i, 0x7fffffff)
}
uint4 OVERLOAD read_imageui(image2d_t image, sampler_t sampler, int2 coord)
@@ -702,7 +707,7 @@ uint4 OVERLOAD read_imageui(image2d_t image, sampler_t sampler, float2 coord)
uint4 OVERLOAD read_imageui(image3d_t image, sampler_t sampler, float4 coord)
{
-
+ READ_IMAGE(uint, ui, 0xffffffff)
}
#undef UNSWIZZLE_8