summaryrefslogtreecommitdiff
path: root/src/kernels
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2013-10-10 12:28:41 +0800
committerZhigang Gong <zhigang.gong@linux.intel.com>2013-10-10 13:33:23 +0800
commite735049c614473ae505221f3f5efdd5a7f5bbeeb (patch)
tree4b4079a4a781373c9ce00536c4488f51edce0e59 /src/kernels
parentf3dc4b9d1996c8caee2c412c3d10dffc20abfb59 (diff)
Add the internal used kernels for buffer copy
Add internal used kernels for buffer copy. The align 1 4 16 is seperated into three kernels to improve performance. The CMakeList is also updated. Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
Diffstat (limited to 'src/kernels')
-rw-r--r--src/kernels/cl_internal_copy_buf_align1.cl8
-rw-r--r--src/kernels/cl_internal_copy_buf_align16.cl12
-rw-r--r--src/kernels/cl_internal_copy_buf_align4.cl8
3 files changed, 28 insertions, 0 deletions
diff --git a/src/kernels/cl_internal_copy_buf_align1.cl b/src/kernels/cl_internal_copy_buf_align1.cl
new file mode 100644
index 00000000..cd3ec7bf
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buf_align1.cl
@@ -0,0 +1,8 @@
+kernel void __cl_cpy_region_align1 ( global char* src, unsigned int src_offset,
+ global char* dst, unsigned int dst_offset,
+ unsigned int size)
+{
+ int i = get_global_id(0);
+ if (i < size)
+ dst[i+dst_offset] = src[i+src_offset];
+}
diff --git a/src/kernels/cl_internal_copy_buf_align16.cl b/src/kernels/cl_internal_copy_buf_align16.cl
new file mode 100644
index 00000000..75b1a4a1
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buf_align16.cl
@@ -0,0 +1,12 @@
+kernel void __cl_cpy_region_align16 ( global float* src, unsigned int src_offset,
+ global float* dst, unsigned int dst_offset,
+ unsigned int size)
+{
+ int i = get_global_id(0) * 4;
+ if (i < size*4) {
+ dst[i+dst_offset] = src[i+src_offset];
+ dst[i+dst_offset + 1] = src[i+src_offset + 1];
+ dst[i+dst_offset + 2] = src[i+src_offset + 2];
+ dst[i+dst_offset + 3] = src[i+src_offset + 3];
+ }
+}
diff --git a/src/kernels/cl_internal_copy_buf_align4.cl b/src/kernels/cl_internal_copy_buf_align4.cl
new file mode 100644
index 00000000..44a0f81d
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buf_align4.cl
@@ -0,0 +1,8 @@
+kernel void __cl_cpy_region_align4 ( global float* src, unsigned int src_offset,
+ global float* dst, unsigned int dst_offset,
+ unsigned int size)
+{
+ int i = get_global_id(0);
+ if (i < size)
+ dst[i+dst_offset] = src[i+src_offset];
+}