#include #include #include "cl_simple.h" #include "cl_util.h" #include #include #include #include int main (int argc, char ** argv) { cl_int error; cl_mem out_buffer; int * out_data; unsigned out_size, value, items; unsigned line_size = 61; struct cl_simple_context context; const char * program_template = "__kernel void memset(__global int * out) { unsigned id = get_global_id(0); %s }"; char * memset_src; char * program_src; size_t global_work_size; size_t local_work_size; unsigned i; if (argc != 5) { fprintf(stderr, "Usage: memset value items global_size local_size \n"); return EXIT_FAILURE; } value = atoi(argv[1]); items = atoi(argv[2]); global_work_size = atoi(argv[3]); local_work_size = atoi(argv[4]); assert(items % global_work_size == 0); assert(global_work_size % local_work_size == 0); out_size = items * sizeof(int); out_data = malloc(out_size); memset_src = malloc(items * line_size); program_src = malloc(items * line_size + 100); for (i = 0; i < (items / global_work_size); i++) { sprintf(memset_src + (i * line_size), "out[id*%15i + %15i] = %15i;\n", items / global_work_size, i, value); } sprintf(program_src, program_template, memset_src); clSimpleInitGpuDevice(&context.device_id); clSimpleCreateContext(&context.cl_ctx, context.device_id); clSimpleCreateCommandQueue(&context.command_queue, context.cl_ctx, context.device_id); clSimpleCreateKernelString(context.cl_ctx, context.device_id, &context.kernel, "memset", program_src, strlen(program_src)); if (!clSimpleSetOutputBuffer(&context, out_size)) { return EXIT_FAILURE; } error = clEnqueueNDRangeKernel(context.command_queue, context.kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL); if (error != CL_SUCCESS) { fprintf(stderr, "clEnqueueNDRangeKernel() failed: %s\n", clUtilErrorString(error)); } if (!clSimpleReadOutput(&context, out_data, out_size)) { return EXIT_FAILURE; } for (i = 0; i < items; i++) { fprintf(stderr, "%d ", out_data[i]); if (i % 10 == 9) { fprintf(stderr, "\n"); } } return EXIT_SUCCESS; }