summaryrefslogtreecommitdiff
path: root/mat_mul.c
blob: 802bfad2e59b2b8649cc1b0bc8392a06e01c7f9a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

#include <CL/cl.h>
#include "cl_simple.h"

#include <stdlib.h>
#include <stdio.h>

int main(int argc, char ** argv)
{
   unsigned i,j;

   struct cl_simple_context context;

   size_t widthA, heightA, widthB, heightB;
   int * out;

   int out_size;

   int out_bytes;

   cl_mem inputA, inputB;

   size_t global_work_size[2];

   size_t a_bytes;
   size_t b_bytes;

    /* 2 x 4 */
   int A[8] = {1, 3, 5, 7,
               2, 4, 6, 8};

   /* 4 x 3 */
   int B[12] = {1, 8,  9,
                2, 7, 10,
                3, 6, 11,
                4, 5, 12};

   heightA = 2;
   widthA = 4;

   heightB = 4;
   widthB = 3;

   global_work_size[0] = widthB;
   global_work_size[1] = heightA;

   out_size = heightA * widthB;
   out_bytes = sizeof(int) * out_size;

   a_bytes = sizeof(int) * widthA * heightA;
   b_bytes = sizeof(int) * widthB * heightB;

   out = malloc(out_bytes);

   if (!clSimpleSimpleInit(&context, "mat_mul")) {
      return EXIT_FAILURE;
   }

   if (   !clSimpleCreateBuffer(&inputA, context.cl_ctx, CL_MEM_READ_ONLY,
                                                         a_bytes)
       || !clSimpleCreateBuffer(&inputB, context.cl_ctx, CL_MEM_READ_ONLY,
                                                         b_bytes)) {
      return EXIT_FAILURE;
   }

   if (   !clSimpleEnqueueWriteBuffer(context.command_queue, inputA,
                                      sizeof(A), A)
       || !clSimpleEnqueueWriteBuffer(context.command_queue, inputB,
                                      sizeof(B), B)) {
      return EXIT_FAILURE;
   }

   if (!clSimpleSetOutputBuffer(&context, out_bytes)) {
      return EXIT_FAILURE;
   }

   if (   !clSimpleKernelSetArg(context.kernel, 1, sizeof(int), &widthA)
       || !clSimpleKernelSetArg(context.kernel, 2, sizeof(int), &heightA)
       || !clSimpleKernelSetArg(context.kernel, 3, sizeof(int), &widthB)
       || !clSimpleKernelSetArg(context.kernel, 4, sizeof(int), &heightB)
       || !clSimpleKernelSetArg(context.kernel, 5, sizeof(cl_mem), &inputA)
       || !clSimpleKernelSetArg(context.kernel, 6, sizeof(cl_mem), &inputB)) {
      return EXIT_FAILURE;
   }

   if (!clSimpleEnqueueNDRangeKernel(context.command_queue,
                                     context.kernel,
                                     2, global_work_size, global_work_size)) {
      return EXIT_FAILURE;
   }

   if (!clSimpleReadOutput(&context, out, out_bytes)) {
      return EXIT_FAILURE;
   }

   for (i = 0; i < out_size; i++) {
      fprintf(stderr, "%d ", out[i]);
      if ((i + 1) % widthB == 0) {
         fprintf(stderr, "\n");
      }
   }
}