summaryrefslogtreecommitdiff
path: root/memset.c
blob: 2cb15108515a64b20b8e752ea871056a80386687 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#include <CL/cl.h>

#include <CL/cl_platform.h>
#include "cl_simple.h"
#include "cl_util.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main (int argc, char ** argv)
{
   cl_int error;
   cl_mem out_buffer;
   int * out_data;
   unsigned out_size, value, items;
   unsigned line_size = 61;
   struct cl_simple_context context;
   const char * program_template =
   "__kernel void memset(__global int * out) { unsigned id = get_global_id(0); %s }";
   char * memset_src;
   char * program_src;
   size_t global_work_size;
   size_t local_work_size;
   unsigned i;

   if (argc != 5) {
      fprintf(stderr, "Usage: memset value items global_size local_size \n");
      return EXIT_FAILURE;
   }

   value = atoi(argv[1]);
   items = atoi(argv[2]);
   global_work_size = atoi(argv[3]);
   local_work_size = atoi(argv[4]);

   assert(items % global_work_size == 0);
   assert(global_work_size % local_work_size == 0);

   out_size = items * sizeof(int);
   out_data = malloc(out_size);

   memset_src = malloc(items * line_size);
   program_src = malloc(items * line_size + 100);

   for (i = 0; i < (items / global_work_size); i++) {
      sprintf(memset_src + (i * line_size), "out[id*%15i + %15i] = %15i;\n", items / global_work_size, i, value);
   }

   sprintf(program_src, program_template, memset_src);

   clSimpleInitGpuDevice(&context.device_id);

   clSimpleCreateContext(&context.cl_ctx, context.device_id);

   clSimpleCreateCommandQueue(&context.command_queue, context.cl_ctx,
                              context.device_id);

   clSimpleCreateKernelString(context.cl_ctx, context.device_id,
                              &context.kernel, "memset", program_src,
                              strlen(program_src));

   if (!clSimpleSetOutputBuffer(&context, out_size)) {
      return EXIT_FAILURE;
   }

   error = clEnqueueNDRangeKernel(context.command_queue,
                                  context.kernel,
                                  1, NULL,
                                  &global_work_size, &local_work_size,
                                  0, NULL, NULL);

   if (error != CL_SUCCESS) {
      fprintf(stderr, "clEnqueueNDRangeKernel() failed: %s\n",
              clUtilErrorString(error));
   }

   if (!clSimpleReadOutput(&context, out_data, out_size)) {
      return EXIT_FAILURE;
   }

   for (i = 0; i < items; i++) {
      fprintf(stderr, "%d ", out_data[i]);
      if (i % 10 == 9) {
         fprintf(stderr, "\n");
      }
   }
   return EXIT_SUCCESS;
}