diff options
author | Junyan He <junyan.he@linux.intel.com> | 2013-10-10 12:28:41 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@linux.intel.com> | 2013-10-10 13:33:23 +0800 |
commit | e735049c614473ae505221f3f5efdd5a7f5bbeeb (patch) | |
tree | 4b4079a4a781373c9ce00536c4488f51edce0e59 /src/kernels | |
parent | f3dc4b9d1996c8caee2c412c3d10dffc20abfb59 (diff) |
Add the internal used kernels for buffer copy
Add internal used kernels for buffer copy. The align
1 4 16 is seperated into three kernels to improve
performance. The CMakeList is also updated.
Signed-off-by: Junyan He <junyan.he@linux.intel.com>
Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
Diffstat (limited to 'src/kernels')
-rw-r--r-- | src/kernels/cl_internal_copy_buf_align1.cl | 8 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_buf_align16.cl | 12 | ||||
-rw-r--r-- | src/kernels/cl_internal_copy_buf_align4.cl | 8 |
3 files changed, 28 insertions, 0 deletions
diff --git a/src/kernels/cl_internal_copy_buf_align1.cl b/src/kernels/cl_internal_copy_buf_align1.cl new file mode 100644 index 00000000..cd3ec7bf --- /dev/null +++ b/src/kernels/cl_internal_copy_buf_align1.cl @@ -0,0 +1,8 @@ +kernel void __cl_cpy_region_align1 ( global char* src, unsigned int src_offset, + global char* dst, unsigned int dst_offset, + unsigned int size) +{ + int i = get_global_id(0); + if (i < size) + dst[i+dst_offset] = src[i+src_offset]; +} diff --git a/src/kernels/cl_internal_copy_buf_align16.cl b/src/kernels/cl_internal_copy_buf_align16.cl new file mode 100644 index 00000000..75b1a4a1 --- /dev/null +++ b/src/kernels/cl_internal_copy_buf_align16.cl @@ -0,0 +1,12 @@ +kernel void __cl_cpy_region_align16 ( global float* src, unsigned int src_offset, + global float* dst, unsigned int dst_offset, + unsigned int size) +{ + int i = get_global_id(0) * 4; + if (i < size*4) { + dst[i+dst_offset] = src[i+src_offset]; + dst[i+dst_offset + 1] = src[i+src_offset + 1]; + dst[i+dst_offset + 2] = src[i+src_offset + 2]; + dst[i+dst_offset + 3] = src[i+src_offset + 3]; + } +} diff --git a/src/kernels/cl_internal_copy_buf_align4.cl b/src/kernels/cl_internal_copy_buf_align4.cl new file mode 100644 index 00000000..44a0f81d --- /dev/null +++ b/src/kernels/cl_internal_copy_buf_align4.cl @@ -0,0 +1,8 @@ +kernel void __cl_cpy_region_align4 ( global float* src, unsigned int src_offset, + global float* dst, unsigned int dst_offset, + unsigned int size) +{ + int i = get_global_id(0); + if (i < size) + dst[i+dst_offset] = src[i+src_offset]; +} |