Reimplement read_image* in native C++ instead of OpenCL.

This new implementation is smaller and more readable. It is also architecure-independent and not tied to SSE.
author: Denis Steckelmacher <steckdenis@yahoo.fr> 2011-08-20 14:32:33 +0200
committer: Denis Steckelmacher <steckdenis@yahoo.fr> 2011-08-20 14:32:33 +0200
commit: 97917f6c1898a6eeb68c54ed640263bb435fc0ec (patch)
tree: 7aee7beced995227b0c5051ba0d153637c5bbb54
parent: 4c887fde686489e3e8cfee11f7e366146674627e (diff)
6 files changed, 347 insertions, 276 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 232e964..348d4bc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -41,6 +41,7 @@ set(COAL_SRC_FILES
     core/cpu/program.cpp
     core/cpu/worker.cpp
     core/cpu/builtins.cpp
+    core/cpu/sampler.cpp
 
     ${CMAKE_CURRENT_BINARY_DIR}/runtime/stdlib.h.embed.h
     ${CMAKE_CURRENT_BINARY_DIR}/runtime/stdlib.c.bc.embed.h
diff --git a/src/core/cpu/builtins.cpp b/src/core/cpu/builtins.cpp
index aacd649..081717c 100644
--- a/src/core/cpu/builtins.cpp
+++ b/src/core/cpu/builtins.cpp
@@ -239,18 +239,6 @@ void CPUKernelWorkGroup::barrier(unsigned int flags)
     // a barrier and that we returned to this one. We can continue.
 }
 
-void *CPUKernelWorkGroup::getImageData(Image2D *image, int x, int y, int z) const
-{
-    CPUBuffer *buffer =
-        (CPUBuffer *)image->deviceBuffer((DeviceInterface *)p_kernel->device());
-
-    return imageData((unsigned char *)buffer->data(),
-                     x, y, z,
-                     image->row_pitch(),
-                     image->slice_pitch(),
-                     image->pixel_size());
-}
-
 void CPUKernelWorkGroup::builtinNotFound(const std::string &name) const
 {
     std::cout << "OpenCL: Non-existant builtin function " << name
@@ -309,17 +297,17 @@ static void barrier(unsigned int flags)
 
 // Images
 
-int get_image_width(Image2D *image)
+static int get_image_width(Image2D *image)
 {
     return image->width();
 }
 
-int get_image_height(Image2D *image)
+static int get_image_height(Image2D *image)
 {
     return image->height();
 }
 
-int get_image_depth(Image3D *image)
+static int get_image_depth(Image3D *image)
 {
     if (image->type() != MemObject::Image3D)
         return 1;
@@ -327,17 +315,17 @@ int get_image_depth(Image3D *image)
     return image->depth();
 }
 
-int get_image_channel_data_type(Image2D *image)
+static int get_image_channel_data_type(Image2D *image)
 {
     return image->format().image_channel_data_type;
 }
 
-int get_image_channel_order(Image2D *image)
+static int get_image_channel_order(Image2D *image)
 {
     return image->format().image_channel_order;
 }
 
-void *image_data(Image2D *image, int x, int y, int z, int *order, int *type)
+static void *image_data(Image2D *image, int x, int y, int z, int *order, int *type)
 {
     *order = image->format().image_channel_order;
     *type = image->format().image_channel_data_type;
@@ -345,11 +333,26 @@ void *image_data(Image2D *image, int x, int y, int z, int *order, int *type)
     return g_work_group->getImageData(image, x, y, z);
 }
 
-bool is_image_3d(Image3D *image)
+static bool is_image_3d(Image3D *image)
 {
     return (image->type() == MemObject::Image3D ? 1 : 0);
 }
 
+static void write_imagef(Image2D *image, int x, int y, int z, float *color)
+{
+    g_work_group->writeImage(image, x, y, z, color);
+}
+
+static void write_imagei(Image2D *image, int x, int y, int z, int32_t *color)
+{
+    g_work_group->writeImage(image, x, y, z, color);
+}
+
+static void write_imageui(Image2D *image, int x, int y, int z, uint32_t *color)
+{
+    g_work_group->writeImage(image, x, y, z, color);
+}
+
 /*
  * Bridge between LLVM and us
  */
@@ -392,6 +395,12 @@ void *getBuiltin(const std::string &name)
         return (void *)&image_data;
     else if (name == "__cpu_is_image_3d")
         return (void *)&is_image_3d;
+    else if (name == "__cpu_write_imagef")
+        return (void *)&write_imagef;
+    else if (name == "__cpu_write_imagei")
+        return (void *)&write_imagei;
+    else if (name == "__cpu_write_imageui")
+        return (void *)&write_imageui;
 
     else if (name == "debug")
         return (void *)&printf;
diff --git a/src/core/cpu/kernel.h b/src/core/cpu/kernel.h
index b26e072..dec42e6 100644
--- a/src/core/cpu/kernel.h
+++ b/src/core/cpu/kernel.h
@@ -37,6 +37,7 @@
 
 #include <ucontext.h>
 #include <pthread.h>
+#include <stdint.h>
 
 namespace llvm
 {
@@ -100,8 +101,13 @@ class CPUKernelWorkGroup
         size_t getNumGroups(cl_uint dimindx) const;
         size_t getGroupID(cl_uint dimindx) const;
         size_t getGlobalOffset(cl_uint dimindx) const;
+
         void barrier(unsigned int flags);
+
         void *getImageData(Image2D *image, int x, int y, int z) const;
+        void writeImage(Image2D *image, int x, int y, int z, float *color) const;
+        void writeImage(Image2D *image, int x, int y, int z, int32_t *color) const;
+        void writeImage(Image2D *image, int x, int y, int z, uint32_t *color) const;
 
         void builtinNotFound(const std::string &name) const;
 
diff --git a/src/core/cpu/sampler.cpp b/src/core/cpu/sampler.cpp
new file mode 100644
index 0000000..c173004
--- /dev/null
+++ b/src/core/cpu/sampler.cpp
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the copyright holder nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file cpu/sampler.cpp
+ * \brief OpenCL C image access functions
+ *
+ * It is recommended to compile this file using Clang as it supports the
+ * \c __builtin_shufflevector() built-in function, providing SSE or
+ * NEON-accelerated code.
+ */
+
+#include "../memobject.h"
+#include "kernel.h"
+#include "buffer.h"
+#include "builtins.h"
+
+#include <cstdlib>
+#include <immintrin.h>
+
+using namespace Coal;
+
+/*
+ * Macros or functions used to accelerate the functions
+ */
+#ifndef __has_builtin
+    #define __has_builtin(x) 0
+#endif
+
+static void slow_shuffle4(uint32_t *rs, uint32_t *a, uint32_t *b,
+                          int x, int y, int z, int w)
+{
+    rs[0] = (x < 4 ? a[x] : b[x - 4]);
+    rs[1] = (y < 4 ? a[y] : b[y - 4]);
+    rs[2] = (z < 4 ? a[z] : b[z - 4]);
+    rs[3] = (w < 4 ? a[w] : b[w - 4]);
+}
+
+static void slow_convert_to_format4f(float *data, cl_channel_type type)
+{
+    // Convert always the four components of source to target
+    if (type == CL_FLOAT)
+        return;
+
+    // NOTE: We can read and write at the same time in data because
+    //       we always begin wy reading 4 bytes (float) and never write
+    //       more than 4 bytes, so no data is corrupted
+    for (unsigned int i=0; i<3; ++i)
+    {
+        switch (type)
+        {
+            case CL_SNORM_INT8:
+                ((int8_t *)data)[i] = data[i] * 128.0f;
+                break;
+            case CL_SNORM_INT16:
+                ((int16_t *)data)[i] = data[i] * 32767.0f;
+                break;
+            case CL_UNORM_INT8:
+                ((uint8_t *)data)[i] = data[i] * 256.0f;
+                break;
+            case CL_UNORM_INT16:
+                ((uint16_t *)data)[i] = data[i] * 65535.0f;
+                break;
+        }
+    }
+}
+
+static void slow_convert_to_format4i(int *data, cl_channel_type type)
+{
+    // Convert always the four components of source to target
+    if (type == CL_SIGNED_INT32)
+        return;
+
+    for (unsigned int i=0; i<3; ++i)
+    {
+        switch (type)
+        {
+            case CL_SIGNED_INT8:
+                ((int8_t *)data)[i] = data[i];
+                break;
+            case CL_SIGNED_INT16:
+                ((int16_t *)data)[i] = data[i];
+                break;
+        }
+    }
+}
+
+static void slow_convert_to_format4ui(uint32_t *data, cl_channel_type type)
+{
+    // Convert always the four components of source to target
+    if (type == CL_UNSIGNED_INT32)
+        return;
+
+    for (unsigned int i=0; i<3; ++i)
+    {
+        switch (type)
+        {
+            case CL_UNSIGNED_INT8:
+                ((uint8_t *)data)[i] = data[i];
+                break;
+            case CL_UNSIGNED_INT16:
+                ((uint16_t *)data)[i] = data[i];
+                break;
+        }
+    }
+}
+
+#if __has_builtin(__builtin_shufflevector)
+    #define shuffle4(rs, a, b, x, y, z, w) \
+        *(__v4sf *)rs = __builtin_shufflevector(*(__v4sf *)a, *(__v4sf *)b, \
+                                                x, y, z, w)
+#else
+    #define shuffle4(rs, a, b, x, y, z, w) \
+        slow_shuffle4(rs, a, b, x, y, z, w)
+#endif
+
+    #define convert_to_format4f(data, type) \
+        slow_convert_to_format4f(data, type)
+
+    #define convert_to_format4i(data, type) \
+        slow_convert_to_format4i(data, type)
+
+    #define convert_to_format4ui(data, type) \
+        slow_convert_to_format4ui(data, type)
+
+static void swizzle(uint32_t *target, uint32_t *source,
+                    cl_channel_order order, bool reading, uint32_t t_max)
+{
+    uint32_t special[4] = {0, t_max, 0, 0 };
+
+    if (reading)
+    {
+        switch (order)
+        {
+            case CL_R:
+            case CL_Rx:
+                // target = {source->x, 0, 0, t_max}
+                shuffle4(target, source, special, 0, 4, 4, 5);
+                break;
+            case CL_A:
+                // target = {0, 0, 0, source->x}
+                shuffle4(target, source, special, 4, 4, 4, 0);
+                break;
+            case CL_INTENSITY:
+                // target = {source->x, source->x, source->x, source->x}
+                shuffle4(target, source, source, 0, 0, 0, 0);
+                break;
+            case CL_LUMINANCE:
+                // target = {source->x, source->x, source->x, t_max}
+                shuffle4(target, source, special, 0, 0, 0, 5);
+                break;
+            case CL_RG:
+            case CL_RGx:
+                // target = {source->x, source->y, 0, t_max}
+                shuffle4(target, source, special, 0, 1, 4, 5);
+                break;
+            case CL_RA:
+                // target = {source->x, 0, 0, source->y}
+                shuffle4(target, source, special, 0, 4, 4, 1);
+                break;
+            case CL_RGB:
+            case CL_RGBx:
+            case CL_RGBA:
+                // Nothing to do, already the good order
+                std::memcpy(target, source, 16);
+                break;
+            case CL_ARGB:
+                // target = {source->y, source->z, source->w, source->x}
+                shuffle4(target, source, source, 1, 2, 3, 0);
+                break;
+            case CL_BGRA:
+                // target = {source->z, source->y, source->x, source->w}
+                shuffle4(target, source, source, 2, 1, 0, 3);
+                break;
+        }
+    }
+    else
+    {
+        switch (order)
+        {
+            case CL_A:
+                // target = {source->w, undef, undef, undef}
+                shuffle4(target, source, source, 3, 3, 3, 3);
+                break;
+            case CL_RA:
+                // target = {source->x, source->w, undef, undef}
+                shuffle4(target, source, source, 0, 3, 3, 3);
+                break;
+            case CL_ARGB:
+                // target = {source->w, source->x, source->y, source->z}
+                shuffle4(target, source, source, 3, 0, 1, 2);
+                break;
+            case CL_BGRA:
+                // target = {source->z, source->y, source->x, source->w}
+                shuffle4(target, source, source, 2, 1, 0, 3);
+                break;
+            default:
+                std::memcpy(target, source, 16);
+        }
+    }
+}
+
+/*
+ * Actual implementation of the built-ins
+ */
+
+void *CPUKernelWorkGroup::getImageData(Image2D *image, int x, int y, int z) const
+{
+    CPUBuffer *buffer =
+        (CPUBuffer *)image->deviceBuffer((DeviceInterface *)p_kernel->device());
+
+    return imageData((unsigned char *)buffer->data(),
+                     x, y, z,
+                     image->row_pitch(),
+                     image->slice_pitch(),
+                     image->pixel_size());
+}
+
+void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
+                                    float *color) const
+{
+    float converted[4];
+
+    // Swizzle to the correct order (float, int and uint are 32-bit, so the
+    // type has no importance
+    swizzle((uint32_t *)converted, (uint32_t *)color,
+            image->format().image_channel_order, false, 0);
+
+    // Convert color to the correct format
+    convert_to_format4f(converted, image->format().image_channel_data_type);
+
+    // Get a pointer in the image where to write the data
+    void *target = getImageData(image, x, y, z);
+
+    // Copy the converted data to the image
+    std::memcpy(target, converted, image->pixel_size());
+}
+
+void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
+                                    int32_t *color) const
+{
+    int32_t converted[4];
+
+    // Swizzle to the correct order (float, int and uint are 32-bit, so the
+    // type has no importance
+    swizzle((uint32_t *)converted, (uint32_t *)color,
+            image->format().image_channel_order, false, 0);
+
+    // Convert color to the correct format
+    convert_to_format4i(converted, image->format().image_channel_data_type);
+
+    // Get a pointer in the image where to write the data
+    void *target = getImageData(image, x, y, z);
+
+    // Copy the converted data to the image
+    std::memcpy(target, converted, image->pixel_size());
+}
+
+void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
+                                    uint32_t *color) const
+{
+    uint32_t converted[4];
+
+    // Swizzle to the correct order (float, int and uint are 32-bit, so the
+    // type has no importance
+    swizzle((uint32_t *)converted, (uint32_t *)color,
+            image->format().image_channel_order, false, 0);
+
+    // Convert color to the correct format
+    convert_to_format4ui(converted, image->format().image_channel_data_type);
+
+    // Get a pointer in the image where to write the data
+    void *target = getImageData(image, x, y, z);
+
+    // Copy the converted data to the image
+    std::memcpy(target, converted, image->pixel_size());
+}
+\ No newline at end of file
diff --git a/src/core/sampler.cpp b/src/core/sampler.cpp
index 8895bd0..558b84a 100644
--- a/src/core/sampler.cpp
+++ b/src/core/sampler.cpp
@@ -26,7 +26,7 @@
  */
 
 /**
- * \file sampler.cpp
+ * \file core/sampler.cpp
  * \brief Sampler
  */
 
diff --git a/src/runtime/stdlib.c b/src/runtime/stdlib.c
index 2610a98..cbb3ec8 100644
--- a/src/runtime/stdlib.c
+++ b/src/runtime/stdlib.c
@@ -50,6 +50,10 @@ int __cpu_get_image_channel_order(void *image);
 int __cpu_is_image_3d(void *image);
 void *__cpu_image_data(void *image, int x, int y, int z, int *order, int *type);
 
+void __cpu_write_imagef(void *image, int x, int y, int z, float4 *color);
+void __cpu_write_imagei(void *image, int x, int y, int z, int4 *color);
+void __cpu_write_imageui(void *image, int x, int y, int z, uint4 *color);
+
 int4 handle_address_mode(image3d_t image, int4 coord, sampler_t sampler)
 {
     coord.w = 0;
@@ -743,284 +747,34 @@ uint4 OVERLOAD read_imageui(image3d_t image, sampler_t sampler, float4 coord)
 
 void OVERLOAD write_imagef(image2d_t image, int2 coord, float4 color)
 {
-    int4 c;
-    c.xy = coord;
-    c.zw = 0;
-
-    write_imagef((image3d_t)image, c, color);
+    __cpu_write_imagef(image, coord.x, coord.y, 0, &color);
 }
 
 void OVERLOAD write_imagef(image3d_t image, int4 coord, float4 color)
 {
-    int order, type;
-    void *v_target = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type);
-
-#define SWIZZLE(order, target, data)    \
-    switch (order)                      \
-    {                                   \
-        case CLK_R:                     \
-        case CLK_Rx:                    \
-            (*target).x = data.x;       \
-            break;                      \
-        case CLK_A:                     \
-            (*target).x = data.w;       \
-            break;                      \
-        case CLK_RG:                    \
-        case CLK_RGx:                   \
-            (*target).xy = data.xy;     \
-            break;                      \
-        case CLK_RA:                    \
-            (*target).xy = data.xw;     \
-            break;                      \
-        case CLK_RGBA:                  \
-            *target = data;             \
-            break;                      \
-        case CLK_BGRA:                  \
-            (*target).zyxw = data.xyzw; \
-            break;                      \
-        case CLK_ARGB:                  \
-            (*target).wxyz = data.xyzw; \
-            break;                      \
-        case CLK_INTENSITY:             \
-        case CLK_LUMINANCE:             \
-            (*target).x = data.x;       \
-            break;                      \
-    }
-
-    switch (type)
-    {
-        case CLK_SNORM_INT8:
-        {
-            char4 *target = v_target;
-            char4 data;
-
-            // Denormalize
-            data.x = (color.x * 127.0f);
-            data.y = (color.y * 127.0f);
-            data.z = (color.z * 127.0f);
-            data.w = (color.w * 127.0f);
-
-            SWIZZLE(order, target, data)
-            break;
-        }
-        case CLK_UNORM_INT8:
-        {
-            uchar4 *target = v_target;
-            uchar4 data;
-
-            // Denormalize
-            data.x = (color.x * 255.0f);
-            data.y = (color.y * 255.0f);
-            data.z = (color.z * 255.0f);
-            data.w = (color.w * 255.0f);
-
-            SWIZZLE(order, target, data)
-            break;
-        }
-        case CLK_SNORM_INT16:
-        {
-            short4 *target = v_target;
-            short4 data;
-
-            // Denormalize
-            data.x = (color.x * 32767.0f);
-            data.y = (color.y * 32767.0f);
-            data.z = (color.z * 32767.0f);
-            data.w = (color.w * 32767.0f);
-
-            SWIZZLE(order, target, data)
-            break;
-        }
-        case CLK_UNORM_INT16:
-        {
-            ushort4 *target = v_target;
-            ushort4 data;
-
-            data.x = (color.x * 65535.0f);
-            data.y = (color.y * 65535.0f);
-            data.z = (color.z * 65535.0f);
-            data.w = (color.w * 65535.0f);
-
-            SWIZZLE(order, target, data)
-            break;
-        }
-        case CLK_FLOAT:
-        {
-            float4 *target = v_target;
-
-            SWIZZLE(order, target, color)
-            break;
-        }
-    }
-
-#undef SWIZZLE
+    __cpu_write_imagef(image, coord.x, coord.y, coord.z, &color);
 }
 
-#define SWIZZLE_8(target, data)         \
-        case CLK_ARGB:                  \
-            (*target).wxyz = data.xyzw; \
-            break;                      \
-        case CLK_BGRA:                  \
-            (*target).zyxw = data.xyzw; \
-            break;
-
-#define SWIZZLE_16(target, data)        \
-        case CLK_LUMINANCE:             \
-        case CLK_INTENSITY:             \
-            (*target).x = data.x;       \
-            break;
-
-#define SWIZZLE_32(target, data)        \
-        case CLK_R:                     \
-        case CLK_Rx:                    \
-            (*target).x = data.x;       \
-            break;                      \
-        case CLK_A:                     \
-            (*target).x = data.w;       \
-            break;                      \
-        case CLK_RG:                    \
-        case CLK_RGx:                   \
-            (*target).xy = data.xy;     \
-            break;                      \
-        case CLK_RA:                    \
-            (*target).xy = data.xw;     \
-            break;                      \
-        case CLK_RGBA:                  \
-            *target = data;             \
-            break;
-
 void OVERLOAD write_imagei(image2d_t image, int2 coord, int4 color)
 {
-    int4 c;
-    c.xy = coord;
-    c.zw = 0;
-
-    write_imagei((image3d_t)image, c, color);
+    __cpu_write_imagei(image, coord.x, coord.y, 0, &color);
 }
 
 void OVERLOAD write_imagei(image3d_t image, int4 coord, int4 color)
 {
-    int order, type;
-    void *v_target = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type);
-
-    switch (type)
-    {
-        case CLK_SIGNED_INT8:
-        {
-            char4 *target = v_target;
-            char4 data;
-
-            data.x = color.x;
-            data.y = color.y;
-            data.z = color.z;
-            data.w = color.w;
-
-            switch (order)
-            {
-                SWIZZLE_8(target, data)
-                SWIZZLE_16(target, data)
-                SWIZZLE_32(target, data)
-            }
-
-            break;
-        }
-        case CLK_SIGNED_INT16:
-        {
-            short4 *target = v_target;
-            short4 data;
-
-            data.x = color.x;
-            data.y = color.y;
-            data.z = color.z;
-            data.w = color.w;
-
-            switch (order)
-            {
-                SWIZZLE_16(target, data)
-                SWIZZLE_32(target, data)
-            }
-
-            break;
-        }
-        case CLK_SIGNED_INT32:
-        {
-            int4 *target = v_target;
-
-            switch (order)
-            {
-                SWIZZLE_32(target, color)
-            }
-
-            break;
-        }
-    }
+    __cpu_write_imagei(image, coord.x, coord.y, coord.z, &color);
 }
 
 void OVERLOAD write_imageui(image2d_t image, int2 coord, uint4 color)
 {
-    int4 c;
-    c.xy = coord;
-    c.zw = 0;
-
-    write_imageui((image3d_t)image, c, color);
+   __cpu_write_imageui(image, coord.x, coord.y, 0, &color);
 }
 
 void OVERLOAD write_imageui(image3d_t image, int4 coord, uint4 color)
 {
-    int order, type;
-    void *v_target = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type);
-
-    switch (type)
-    {
-        case CLK_UNSIGNED_INT8:
-        {
-            uchar4 *target = v_target;
-            uchar4 data;
-
-            data.x = color.x;
-            data.y = color.y;
-            data.z = color.z;
-            data.w = color.w;
-
-            switch (order)
-            {
-                SWIZZLE_8(target, data)
-                SWIZZLE_16(target, data)
-                SWIZZLE_32(target, data)
-            }
-        }
-        case CLK_UNSIGNED_INT16:
-        {
-            ushort4 *target = v_target;
-            ushort4 data;
-
-            data.x = color.x;
-            data.y = color.y;
-            data.z = color.z;
-            data.w = color.w;
-
-            switch (order)
-            {
-                SWIZZLE_16(target, data)
-                SWIZZLE_32(target, data)
-            }
-        }
-        case CLK_UNSIGNED_INT32:
-        {
-            uint4 *target = v_target;
-
-            switch (order)
-            {
-                SWIZZLE_32(target, color)
-            }
-        }
-    }
+    __cpu_write_imageui(image, coord.x, coord.y, coord.z, &color);
 }
 
-#undef SWIZZLE_8
-#undef SWIZZLE_16
-#undef SWIZZLE_32
-
 int2 OVERLOAD get_image_dim(image2d_t image)
 {
     int2 result;
author	Denis Steckelmacher <steckdenis@yahoo.fr>	2011-08-20 14:32:33 +0200
committer	Denis Steckelmacher <steckdenis@yahoo.fr>	2011-08-20 14:32:33 +0200
commit	97917f6c1898a6eeb68c54ed640263bb435fc0ec (patch)
tree	7aee7beced995227b0c5051ba0d153637c5bbb54
parent	4c887fde686489e3e8cfee11f7e366146674627e (diff)