diff options
-rw-r--r-- | Makefile | 3 | ||||
-rw-r--r-- | memset.c | 89 |
2 files changed, 92 insertions, 0 deletions
@@ -26,3 +26,6 @@ get-global-id-3d: get_global_id_3d.o $(COMMON_OBJECTS) mat-mul: mat_mul.o $(COMMON_OBJECTS) gcc -o $@ $^ $(LDFLAGS) + +memset: memset.o $(COMMON_OBJECTS) + gcc -o $@ $^ $(LDFLAGS) diff --git a/memset.c b/memset.c new file mode 100644 index 0000000..2cb1510 --- /dev/null +++ b/memset.c @@ -0,0 +1,89 @@ +#include <CL/cl.h> + +#include <CL/cl_platform.h> +#include "cl_simple.h" +#include "cl_util.h" +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +int main (int argc, char ** argv) +{ + cl_int error; + cl_mem out_buffer; + int * out_data; + unsigned out_size, value, items; + unsigned line_size = 61; + struct cl_simple_context context; + const char * program_template = + "__kernel void memset(__global int * out) { unsigned id = get_global_id(0); %s }"; + char * memset_src; + char * program_src; + size_t global_work_size; + size_t local_work_size; + unsigned i; + + if (argc != 5) { + fprintf(stderr, "Usage: memset value items global_size local_size \n"); + return EXIT_FAILURE; + } + + value = atoi(argv[1]); + items = atoi(argv[2]); + global_work_size = atoi(argv[3]); + local_work_size = atoi(argv[4]); + + assert(items % global_work_size == 0); + assert(global_work_size % local_work_size == 0); + + out_size = items * sizeof(int); + out_data = malloc(out_size); + + memset_src = malloc(items * line_size); + program_src = malloc(items * line_size + 100); + + for (i = 0; i < (items / global_work_size); i++) { + sprintf(memset_src + (i * line_size), "out[id*%15i + %15i] = %15i;\n", items / global_work_size, i, value); + } + + sprintf(program_src, program_template, memset_src); + + clSimpleInitGpuDevice(&context.device_id); + + clSimpleCreateContext(&context.cl_ctx, context.device_id); + + clSimpleCreateCommandQueue(&context.command_queue, context.cl_ctx, + context.device_id); + + clSimpleCreateKernelString(context.cl_ctx, context.device_id, + &context.kernel, "memset", program_src, + strlen(program_src)); + + if (!clSimpleSetOutputBuffer(&context, out_size)) { + return EXIT_FAILURE; + } + + error = clEnqueueNDRangeKernel(context.command_queue, + context.kernel, + 1, NULL, + &global_work_size, &local_work_size, + 0, NULL, NULL); + + if (error != CL_SUCCESS) { + fprintf(stderr, "clEnqueueNDRangeKernel() failed: %s\n", + clUtilErrorString(error)); + } + + if (!clSimpleReadOutput(&context, out_data, out_size)) { + return EXIT_FAILURE; + } + + for (i = 0; i < items; i++) { + fprintf(stderr, "%d ", out_data[i]); + if (i % 10 == 9) { + fprintf(stderr, "\n"); + } + } + return EXIT_SUCCESS; +} |