1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
#include <CL/cl.h>
#include <CL/cl_platform.h>
#include "cl_simple.h"
#include "cl_util.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main (int argc, char ** argv)
{
cl_int error;
cl_mem out_buffer;
int * out_data;
unsigned out_size, value, items;
unsigned line_size = 61;
struct cl_simple_context context;
const char * program_template =
"__kernel void memset(__global int * out) { unsigned id = get_global_id(0); %s }";
char * memset_src;
char * program_src;
size_t global_work_size;
size_t local_work_size;
unsigned i;
if (argc != 5) {
fprintf(stderr, "Usage: memset value items global_size local_size \n");
return EXIT_FAILURE;
}
value = atoi(argv[1]);
items = atoi(argv[2]);
global_work_size = atoi(argv[3]);
local_work_size = atoi(argv[4]);
assert(items % global_work_size == 0);
assert(global_work_size % local_work_size == 0);
out_size = items * sizeof(int);
out_data = malloc(out_size);
memset_src = malloc(items * line_size);
program_src = malloc(items * line_size + 100);
for (i = 0; i < (items / global_work_size); i++) {
sprintf(memset_src + (i * line_size), "out[id*%15i + %15i] = %15i;\n", items / global_work_size, i, value);
}
sprintf(program_src, program_template, memset_src);
clSimpleInitGpuDevice(&context.device_id);
clSimpleCreateContext(&context.cl_ctx, context.device_id);
clSimpleCreateCommandQueue(&context.command_queue, context.cl_ctx,
context.device_id);
clSimpleCreateKernelString(context.cl_ctx, context.device_id,
&context.kernel, "memset", program_src,
strlen(program_src));
if (!clSimpleSetOutputBuffer(&context, out_size)) {
return EXIT_FAILURE;
}
error = clEnqueueNDRangeKernel(context.command_queue,
context.kernel,
1, NULL,
&global_work_size, &local_work_size,
0, NULL, NULL);
if (error != CL_SUCCESS) {
fprintf(stderr, "clEnqueueNDRangeKernel() failed: %s\n",
clUtilErrorString(error));
}
if (!clSimpleReadOutput(&context, out_data, out_size)) {
return EXIT_FAILURE;
}
for (i = 0; i < items; i++) {
fprintf(stderr, "%d ", out_data[i]);
if (i % 10 == 9) {
fprintf(stderr, "\n");
}
}
return EXIT_SUCCESS;
}
|