diff options
-rw-r--r-- | src/core/cpu/builtins.cpp | 3 | ||||
-rw-r--r-- | src/runtime/stdlib.c | 413 |
2 files changed, 407 insertions, 9 deletions
diff --git a/src/core/cpu/builtins.cpp b/src/core/cpu/builtins.cpp index 2399acc..fb11ada 100644 --- a/src/core/cpu/builtins.cpp +++ b/src/core/cpu/builtins.cpp @@ -289,6 +289,9 @@ int get_image_height(Image2D *image) int get_image_depth(Image3D *image) { + if (image->type() != MemObject::Image3D) + return 0; + return image->depth(); } diff --git a/src/runtime/stdlib.c b/src/runtime/stdlib.c index 1433a93..79da778 100644 --- a/src/runtime/stdlib.c +++ b/src/runtime/stdlib.c @@ -10,6 +10,11 @@ int debug(const char *format, ...); * Image functions */ +int clamp(int a, int b, int c) +{ + return (a < b) ? b : ((a > c) ? c : a); +} + int __cpu_get_image_width(void *image); int __cpu_get_image_height(void *image); int __cpu_get_image_depth(void *image); @@ -17,19 +22,193 @@ int __cpu_get_image_channel_data_type(void *image); int __cpu_get_image_channel_order(void *image); void *__cpu_image_data(void *image, int x, int y, int z, int *order, int *type); +int4 handle_address_mode(image3d_t image, int4 coord, sampler_t sampler) +{ + if ((sampler & 0xf0) == CLK_ADDRESS_NONE) + return coord; + + int w = get_image_width(image), + h = get_image_height(image), + d = get_image_depth(image); + coord.w = 0; + + // Handle address mode + if ((sampler & 0xf0) == CLK_ADDRESS_CLAMP_TO_EDGE) + { + coord.x = clamp(coord.x, 0, w - 1); + coord.y = clamp(coord.y, 0, h - 1); + coord.z = clamp(coord.z, 0, d - 1); + } + else if ((sampler & 0xf0) == CLK_ADDRESS_CLAMP) + { + coord.x = clamp(coord.x, 0, w); + coord.y = clamp(coord.y, 0, h); + coord.z = clamp(coord.z, 0, d); + + if (coord.x == w || + coord.y == h || + coord.z == d) + { + coord.w = 1; + } + } + + return coord; +} + float4 OVERLOAD read_imagef(image2d_t image, sampler_t sampler, int2 coord) { - + int4 c; + c.xy = coord; + c.zw = 0; + + return read_imagef((image3d_t)image, sampler, c); } float4 OVERLOAD read_imagef(image3d_t image, sampler_t sampler, int4 coord) { - + float4 result; + + // Handle address mode + coord = handle_address_mode(image, coord, sampler); + + if (coord.w != 0) + { + // Border color + switch (get_image_channel_order(image)) + { + case CLK_R: + case CLK_RG: + case CLK_RGB: + case CLK_LUMINANCE: + result.xyz = 0.0f; + result.w = 1.0f; + return result; + default: + result.xyzw = 0.0f; + return result; + } + } + + int order, type; + void *v_source = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type); + +#define UNSWIZZLE(order, source, data, m)\ + switch (order) \ + { \ + case CLK_R: \ + case CLK_Rx: \ + data.x = (*source).x; \ + data.yz = 0; \ + data.w = m; \ + break; \ + case CLK_A: \ + data.w = (*source).x; \ + data.xyz = 0; \ + break; \ + case CLK_RG: \ + case CLK_RGx: \ + data.xy = (*source).xy; \ + data.z = 0; \ + data.w = m; \ + break; \ + case CLK_RA: \ + data.xw = (*source).xy; \ + data.yz = 0; \ + break; \ + case CLK_RGBA: \ + data = *source; \ + break; \ + case CLK_BGRA: \ + data.zyxw = (*source).xyzw; \ + break; \ + case CLK_ARGB: \ + data.wxyz = (*source).xyzw; \ + break; \ + case CLK_INTENSITY: \ + data.xyzw = (*source).x; \ + break; \ + case CLK_LUMINANCE: \ + data.xyz = (*source).x; \ + data.w = m; \ + break; \ + } + + switch (type) + { + case CLK_UNORM_INT8: + { + uchar4 *source = v_source; + uchar4 data; + + UNSWIZZLE(order, source, data, 0xff) + + result.x = (float)data.x / 255.0f; + result.y = (float)data.y / 255.0f; + result.z = (float)data.z / 255.0f; + result.w = (float)data.w / 255.0f; + break; + } + case CLK_UNORM_INT16: + { + ushort4 *source = v_source; + ushort4 data; + + UNSWIZZLE(order, source, data, 0xffff) + + result.x = (float)data.x / 65535.0f; + result.y = (float)data.y / 65535.0f; + result.z = (float)data.z / 65535.0f; + result.w = (float)data.w / 65535.0f; + break; + } + case CLK_SNORM_INT8: + { + char4 *source = v_source; + char4 data; + + UNSWIZZLE(order, source, data, 0x7f) + + result.x = (float)data.x / 127.0f; + result.y = (float)data.y / 127.0f; + result.z = (float)data.z / 127.0f; + result.w = (float)data.w / 127.0f; + break; + } + case CLK_SNORM_INT16: + { + short4 *source = v_source; + short4 data; + + UNSWIZZLE(order, source, data, 0x7fff) + + result.x = (float)data.x / 32767.0f; + result.y = (float)data.y / 32767.0f; + result.z = (float)data.z / 32767.0f; + result.w = (float)data.w / 32767.0f; + break; + } + case CLK_FLOAT: + { + float4 *source = v_source; + UNSWIZZLE(order, source, result, 1.0f) + break; + } + } + +#undef UNSWIZZLE + + return result; } float4 OVERLOAD read_imagef(image2d_t image, sampler_t sampler, float2 coord) { - + float4 c; + + c.xy = coord; + c.zw = 0; + + return read_imagef((image3d_t)image, sampler, c); } float4 OVERLOAD read_imagef(image3d_t image, sampler_t sampler, float4 coord) @@ -37,19 +216,148 @@ float4 OVERLOAD read_imagef(image3d_t image, sampler_t sampler, float4 coord) } +#define UNSWIZZLE_8(source, data, m) \ + case CLK_ARGB: \ + data.wxyz = (*source).xyzw; \ + break; \ + case CLK_BGRA: \ + data.zyxw = (*source).xyzw; \ + break; + +#define UNSWIZZLE_16(source, data, m) \ + case CLK_INTENSITY: \ + data.xyzw = (*source).x; \ + break; \ + case CLK_LUMINANCE: \ + data.xyz = (*source).x; \ + data.w = m; \ + break; + +#define UNSWIZZLE_32(source, data, m) \ + case CLK_R: \ + case CLK_Rx: \ + data.x = (*source).x; \ + data.yz = 0; \ + data.w = m; \ + break; \ + case CLK_A: \ + data.w = (*source).x; \ + data.xyz = 0; \ + break; \ + case CLK_RG: \ + case CLK_RGx: \ + data.xy = (*source).xy; \ + data.z = 0; \ + data.w = m; \ + break; \ + case CLK_RA: \ + data.xw = (*source).xy; \ + data.yz = 0; \ + break; \ + case CLK_RGBA: \ + data = *source; \ + break; + int4 OVERLOAD read_imagei(image2d_t image, sampler_t sampler, int2 coord) { - + int4 c; + c.xy = coord; + c.zw = 0; + + return read_imagei((image3d_t)image, sampler, c); } int4 OVERLOAD read_imagei(image3d_t image, sampler_t sampler, int4 coord) { - + int4 result; + + // Handle address mode + coord = handle_address_mode(image, coord, sampler); + + if (coord.w != 0) + { + // Border color + switch (get_image_channel_order(image)) + { + case CLK_R: + case CLK_RG: + case CLK_RGB: + case CLK_LUMINANCE: + result.xyz = 0; + result.w = 0x7fffffff; + return result; + default: + result.xyzw = 0; + return result; + } + } + + int order, type; + void *v_source = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type); + + switch (type) + { + case CLK_SIGNED_INT8: + { + char4 *source = v_source; + char4 data; + + switch (order) + { + UNSWIZZLE_8(source, data, 0x7f) + UNSWIZZLE_16(source, data, 0x7f) + UNSWIZZLE_32(source, data, 0x7f) + } + + result.x = data.x; + result.y = data.y; + result.z = data.z; + result.w = data.w; + break; + } + case CLK_SIGNED_INT16: + { + short4 *source = v_source; + short4 data; + + switch (order) + { + UNSWIZZLE_8(source, data, 0x7fff) + UNSWIZZLE_16(source, data, 0x7fff) + UNSWIZZLE_32(source, data, 0x7fff) + } + + result.x = data.x; + result.y = data.y; + result.z = data.z; + result.w = data.w; + break; + } + case CLK_SIGNED_INT32: + { + int4 *source = v_source; + + switch (order) + { + UNSWIZZLE_8(source, result, 0x7fffffff) + UNSWIZZLE_16(source, result, 0x7fffffff) + UNSWIZZLE_32(source, result, 0x7fffffff) + } + break; + } + } + + return result; } int4 OVERLOAD read_imagei(image2d_t image, sampler_t sampler, float2 coord) { - + float4 c; + + c.xy = coord; + c.zw = 0; + + return read_imagei((image3d_t)image, sampler, c); } int4 OVERLOAD read_imagei(image3d_t image, sampler_t sampler, float4 coord) @@ -59,17 +367,104 @@ int4 OVERLOAD read_imagei(image3d_t image, sampler_t sampler, float4 coord) uint4 OVERLOAD read_imageui(image2d_t image, sampler_t sampler, int2 coord) { - + int4 c; + c.xy = coord; + c.zw = 0; + + return read_imageui((image3d_t)image, sampler, c); } uint4 OVERLOAD read_imageui(image3d_t image, sampler_t sampler, int4 coord) { - + uint4 result; + + // Handle address mode + coord = handle_address_mode(image, coord, sampler); + + if (coord.w != 0) + { + // Border color + switch (get_image_channel_order(image)) + { + case CLK_R: + case CLK_RG: + case CLK_RGB: + case CLK_LUMINANCE: + result.xyz = 0; + result.w = 0xffffffff; + return result; + default: + result.xyzw = 0; + return result; + } + } + + int order, type; + void *v_source = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type); + + switch (type) + { + case CLK_UNSIGNED_INT8: + { + uchar4 *source = v_source; + uchar4 data; + + switch (order) + { + UNSWIZZLE_8(source, data, 0xff) + UNSWIZZLE_16(source, data, 0xff) + UNSWIZZLE_32(source, data, 0xff) + } + + result.x = data.x; + result.y = data.y; + result.z = data.z; + result.w = data.w; + break; + } + case CLK_UNSIGNED_INT16: + { + ushort4 *source = v_source; + ushort4 data; + + switch (order) + { + UNSWIZZLE_8(source, data, 0xffff) + UNSWIZZLE_16(source, data, 0xffff) + UNSWIZZLE_32(source, data, 0xffff) + } + + result.x = data.x; + result.y = data.y; + result.z = data.z; + result.w = data.w; + break; + } + case CLK_UNSIGNED_INT32: + { + uint4 *source = v_source; + + switch (order) + { + UNSWIZZLE_8(source, result, 0xffffffff) + UNSWIZZLE_16(source, result, 0xffffffff) + UNSWIZZLE_32(source, result, 0xffffffff) + } + break; + } + } + + return result; } uint4 OVERLOAD read_imageui(image2d_t image, sampler_t sampler, float2 coord) { - + float4 c; + + c.xy = coord; + c.zw = 0; + + return read_imageui((image3d_t)image, sampler, c); } uint4 OVERLOAD read_imageui(image3d_t image, sampler_t sampler, float4 coord) |