diff options
-rw-r--r-- | Makefile | 3 | ||||
-rw-r--r-- | vec_load.c | 71 | ||||
-rw-r--r-- | vec_load.cl | 11 |
3 files changed, 85 insertions, 0 deletions
@@ -35,3 +35,6 @@ use-host-ptr: use_host_ptr.o $(COMMON_OBJECTS) copy-host-ptr: copy_host_ptr.o $(COMMON_OBJECTS) gcc -o $@ $^ $(LDFLAGS) + +vec-load: vec_load.o $(COMMON_OBJECTS) + gcc -o $@ $^ $(LDFLAGS) diff --git a/vec_load.c b/vec_load.c new file mode 100644 index 0000000..cbca8b3 --- /dev/null +++ b/vec_load.c @@ -0,0 +1,71 @@ +#include <stdlib.h> +#include <stdio.h> + +#include <CL/cl.h> + +#include "cl_simple.h" + + +int main (int argc, char ** argv) +{ + struct cl_simple_context context; + int out[4]; + unsigned out_size = sizeof(out); + + cl_mem input_buffer; + int in[4] = {1, 2, 3, 4}; + unsigned in_size = sizeof(in); + + size_t global_work_size = 1; + + unsigned i, pass; + + if (!clSimpleSimpleInit(&context, "vec_load")) { + return EXIT_FAILURE; + } + + if (!clSimpleCreateBuffer(&input_buffer, context.cl_ctx, CL_MEM_READ_ONLY, + in_size)) { + return EXIT_FAILURE; + } + + if (!clSimpleEnqueueWriteBuffer(context.command_queue, input_buffer, + in_size, in)) { + return EXIT_FAILURE; + } + + if (!clSimpleSetOutputBuffer(&context, out_size)) { + return EXIT_FAILURE; + } + + if (!clSimpleKernelSetArg(context.kernel, 1, sizeof(cl_mem), &input_buffer)){ + return EXIT_FAILURE; + } + + if (!clSimpleEnqueueNDRangeKernel(context.command_queue, + context.kernel, + 1, &global_work_size, + &global_work_size)) { + return EXIT_FAILURE; + } + + if (!clSimpleReadOutput(&context, out, out_size)) { + return EXIT_FAILURE; + } + + pass = 1; + for (i = 0; i < 4; i++) { + if (out[i] != in[i]) { + fprintf(stderr, "Index %u: expected %d, found %d\n", i, in[i], out[i]); + pass = 0; + } + } + + if (pass) { + fprintf(stderr, "Pass\n"); + return EXIT_SUCCESS; + } else { + fprintf(stderr, "Fail\n"); + return EXIT_FAILURE; + } +} diff --git a/vec_load.cl b/vec_load.cl new file mode 100644 index 0000000..1b955a0 --- /dev/null +++ b/vec_load.cl @@ -0,0 +1,11 @@ + +__kernel void vec_load(__global int * out, __global int4* in) +{ + int4 value = in[0]; + + + out[0] = value.s0; + out[1] = value.s1; + out[2] = value.s2; + out[3] = value.s3; +} |